forked from commaai/openpilot
-
Notifications
You must be signed in to change notification settings - Fork 144
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
59 changed files
with
34,344 additions
and
1,563 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,74 @@ | ||
import glob | ||
|
||
Import('env', 'envCython', 'arch', 'cereal', 'messaging', 'common', 'gpucommon', 'visionipc', 'transformations') | ||
lenv = env.Clone() | ||
lenvCython = envCython.Clone() | ||
|
||
libs = [cereal, messaging, visionipc, gpucommon, common, 'capnp', 'zmq', 'kj', 'pthread'] | ||
frameworks = [] | ||
|
||
common_src = [ | ||
"models/commonmodel.cc", | ||
"transforms/loadyuv.cc", | ||
"transforms/transform.cc", | ||
] | ||
|
||
thneed_src_common = [ | ||
"thneed/thneed_common.cc", | ||
"thneed/serialize.cc", | ||
] | ||
|
||
thneed_src_qcom = thneed_src_common + ["thneed/thneed_qcom2.cc"] | ||
thneed_src_pc = thneed_src_common + ["thneed/thneed_pc.cc"] | ||
thneed_src = thneed_src_qcom if arch == "larch64" else thneed_src_pc | ||
|
||
# SNPE except on Mac and ARM Linux | ||
snpe_lib = [] | ||
if arch != "Darwin" and arch != "aarch64": | ||
common_src += ['runners/snpemodel.cc'] | ||
snpe_lib += ['SNPE'] | ||
|
||
# OpenCL is a framework on Mac | ||
if arch == "Darwin": | ||
frameworks += ['OpenCL'] | ||
else: | ||
libs += ['OpenCL'] | ||
|
||
# Set path definitions | ||
for pathdef, fn in {'TRANSFORM': 'transforms/transform.cl', 'LOADYUV': 'transforms/loadyuv.cl'}.items(): | ||
for xenv in (lenv, lenvCython): | ||
xenv['CXXFLAGS'].append(f'-D{pathdef}_PATH=\\"{File(fn).abspath}\\"') | ||
|
||
# Compile cython | ||
snpe_rpath_qcom = "/data/pythonpath/third_party/snpe/larch64" | ||
snpe_rpath_pc = f"{Dir('#').abspath}/third_party/snpe/x86_64-linux-clang" | ||
snpe_rpath = lenvCython['RPATH'] + [snpe_rpath_qcom if arch == "larch64" else snpe_rpath_pc] | ||
|
||
cython_libs = envCython["LIBS"] + libs | ||
snpemodel_lib = lenv.Library('snpemodel', ['runners/snpemodel.cc']) | ||
commonmodel_lib = lenv.Library('commonmodel', common_src) | ||
|
||
lenvCython.Program('runners/runmodel_pyx.so', 'runners/runmodel_pyx.pyx', LIBS=cython_libs, FRAMEWORKS=frameworks) | ||
lenvCython.Program('runners/snpemodel_pyx.so', 'runners/snpemodel_pyx.pyx', LIBS=[snpemodel_lib, snpe_lib, *cython_libs], FRAMEWORKS=frameworks, RPATH=snpe_rpath) | ||
lenvCython.Program('models/commonmodel_pyx.so', 'models/commonmodel_pyx.pyx', LIBS=[commonmodel_lib, *cython_libs], FRAMEWORKS=frameworks) | ||
|
||
tinygrad_files = ["#"+x for x in glob.glob(env.Dir("#tinygrad_repo").relpath + "/**", recursive=True, root_dir=env.Dir("#").abspath)] | ||
|
||
# Get model metadata | ||
fn = File("models/supercombo").abspath | ||
cmd = f'python3 {Dir("#selfdrive/classic_modeld").abspath}/get_model_metadata.py {fn}.onnx' | ||
lenv.Command(fn + "_metadata.pkl", [fn + ".onnx"] + tinygrad_files, cmd) | ||
|
||
# Build thneed model | ||
if arch == "larch64" or GetOption('pc_thneed'): | ||
tinygrad_opts = [] | ||
if not GetOption('pc_thneed'): | ||
# use FLOAT16 on device for speed + don't cache the CL kernels for space | ||
tinygrad_opts += ["FLOAT16=1", "PYOPENCL_NO_CACHE=1"] | ||
cmd = f"cd {Dir('#').abspath}/tinygrad_repo && " + ' '.join(tinygrad_opts) + f" python3 openpilot/compile2.py {fn}.onnx {fn}.thneed" | ||
|
||
lenv.Command(fn + ".thneed", [fn + ".onnx"] + tinygrad_files, cmd) | ||
|
||
thneed_lib = env.SharedLibrary('thneed', thneed_src, LIBS=[gpucommon, common, 'zmq', 'OpenCL', 'dl']) | ||
thneedmodel_lib = env.Library('thneedmodel', ['runners/thneedmodel.cc']) | ||
lenvCython.Program('runners/thneedmodel_pyx.so', 'runners/thneedmodel_pyx.pyx', LIBS=envCython["LIBS"]+[thneedmodel_lib, thneed_lib, gpucommon, common, 'dl', 'zmq', 'OpenCL']) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
File renamed without changes.
Binary file not shown.
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
5ec97a39-0095-4cea-adfa-6d72b1966cc1 | ||
26cac7a9757a27c783a365403040a1bd27ccdaea |
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,97 @@ | ||
#include "selfdrive/classic_modeld/transforms/transform.h" | ||
|
||
#include <cassert> | ||
#include <cstring> | ||
|
||
#include "common/clutil.h" | ||
|
||
void transform_init(Transform* s, cl_context ctx, cl_device_id device_id) { | ||
memset(s, 0, sizeof(*s)); | ||
|
||
cl_program prg = cl_program_from_file(ctx, device_id, TRANSFORM_PATH, ""); | ||
s->krnl = CL_CHECK_ERR(clCreateKernel(prg, "warpPerspective", &err)); | ||
// done with this | ||
CL_CHECK(clReleaseProgram(prg)); | ||
|
||
s->m_y_cl = CL_CHECK_ERR(clCreateBuffer(ctx, CL_MEM_READ_WRITE, 3*3*sizeof(float), NULL, &err)); | ||
s->m_uv_cl = CL_CHECK_ERR(clCreateBuffer(ctx, CL_MEM_READ_WRITE, 3*3*sizeof(float), NULL, &err)); | ||
} | ||
|
||
void transform_destroy(Transform* s) { | ||
CL_CHECK(clReleaseMemObject(s->m_y_cl)); | ||
CL_CHECK(clReleaseMemObject(s->m_uv_cl)); | ||
CL_CHECK(clReleaseKernel(s->krnl)); | ||
} | ||
|
||
void transform_queue(Transform* s, | ||
cl_command_queue q, | ||
cl_mem in_yuv, int in_width, int in_height, int in_stride, int in_uv_offset, | ||
cl_mem out_y, cl_mem out_u, cl_mem out_v, | ||
int out_width, int out_height, | ||
const mat3& projection) { | ||
const int zero = 0; | ||
|
||
// sampled using pixel center origin | ||
// (because that's how fastcv and opencv does it) | ||
|
||
mat3 projection_y = projection; | ||
|
||
// in and out uv is half the size of y. | ||
mat3 projection_uv = transform_scale_buffer(projection, 0.5); | ||
|
||
CL_CHECK(clEnqueueWriteBuffer(q, s->m_y_cl, CL_TRUE, 0, 3*3*sizeof(float), (void*)projection_y.v, 0, NULL, NULL)); | ||
CL_CHECK(clEnqueueWriteBuffer(q, s->m_uv_cl, CL_TRUE, 0, 3*3*sizeof(float), (void*)projection_uv.v, 0, NULL, NULL)); | ||
|
||
const int in_y_width = in_width; | ||
const int in_y_height = in_height; | ||
const int in_y_px_stride = 1; | ||
const int in_uv_width = in_width/2; | ||
const int in_uv_height = in_height/2; | ||
const int in_uv_px_stride = 2; | ||
const int in_u_offset = in_uv_offset; | ||
const int in_v_offset = in_uv_offset + 1; | ||
|
||
const int out_y_width = out_width; | ||
const int out_y_height = out_height; | ||
const int out_uv_width = out_width/2; | ||
const int out_uv_height = out_height/2; | ||
|
||
CL_CHECK(clSetKernelArg(s->krnl, 0, sizeof(cl_mem), &in_yuv)); // src | ||
CL_CHECK(clSetKernelArg(s->krnl, 1, sizeof(cl_int), &in_stride)); // src_row_stride | ||
CL_CHECK(clSetKernelArg(s->krnl, 2, sizeof(cl_int), &in_y_px_stride)); // src_px_stride | ||
CL_CHECK(clSetKernelArg(s->krnl, 3, sizeof(cl_int), &zero)); // src_offset | ||
CL_CHECK(clSetKernelArg(s->krnl, 4, sizeof(cl_int), &in_y_height)); // src_rows | ||
CL_CHECK(clSetKernelArg(s->krnl, 5, sizeof(cl_int), &in_y_width)); // src_cols | ||
CL_CHECK(clSetKernelArg(s->krnl, 6, sizeof(cl_mem), &out_y)); // dst | ||
CL_CHECK(clSetKernelArg(s->krnl, 7, sizeof(cl_int), &out_y_width)); // dst_row_stride | ||
CL_CHECK(clSetKernelArg(s->krnl, 8, sizeof(cl_int), &zero)); // dst_offset | ||
CL_CHECK(clSetKernelArg(s->krnl, 9, sizeof(cl_int), &out_y_height)); // dst_rows | ||
CL_CHECK(clSetKernelArg(s->krnl, 10, sizeof(cl_int), &out_y_width)); // dst_cols | ||
CL_CHECK(clSetKernelArg(s->krnl, 11, sizeof(cl_mem), &s->m_y_cl)); // M | ||
|
||
const size_t work_size_y[2] = {(size_t)out_y_width, (size_t)out_y_height}; | ||
|
||
CL_CHECK(clEnqueueNDRangeKernel(q, s->krnl, 2, NULL, | ||
(const size_t*)&work_size_y, NULL, 0, 0, NULL)); | ||
|
||
const size_t work_size_uv[2] = {(size_t)out_uv_width, (size_t)out_uv_height}; | ||
|
||
CL_CHECK(clSetKernelArg(s->krnl, 2, sizeof(cl_int), &in_uv_px_stride)); // src_px_stride | ||
CL_CHECK(clSetKernelArg(s->krnl, 3, sizeof(cl_int), &in_u_offset)); // src_offset | ||
CL_CHECK(clSetKernelArg(s->krnl, 4, sizeof(cl_int), &in_uv_height)); // src_rows | ||
CL_CHECK(clSetKernelArg(s->krnl, 5, sizeof(cl_int), &in_uv_width)); // src_cols | ||
CL_CHECK(clSetKernelArg(s->krnl, 6, sizeof(cl_mem), &out_u)); // dst | ||
CL_CHECK(clSetKernelArg(s->krnl, 7, sizeof(cl_int), &out_uv_width)); // dst_row_stride | ||
CL_CHECK(clSetKernelArg(s->krnl, 8, sizeof(cl_int), &zero)); // dst_offset | ||
CL_CHECK(clSetKernelArg(s->krnl, 9, sizeof(cl_int), &out_uv_height)); // dst_rows | ||
CL_CHECK(clSetKernelArg(s->krnl, 10, sizeof(cl_int), &out_uv_width)); // dst_cols | ||
CL_CHECK(clSetKernelArg(s->krnl, 11, sizeof(cl_mem), &s->m_uv_cl)); // M | ||
|
||
CL_CHECK(clEnqueueNDRangeKernel(q, s->krnl, 2, NULL, | ||
(const size_t*)&work_size_uv, NULL, 0, 0, NULL)); | ||
CL_CHECK(clSetKernelArg(s->krnl, 3, sizeof(cl_int), &in_v_offset)); // src_ofset | ||
CL_CHECK(clSetKernelArg(s->krnl, 6, sizeof(cl_mem), &out_v)); // dst | ||
|
||
CL_CHECK(clEnqueueNDRangeKernel(q, s->krnl, 2, NULL, | ||
(const size_t*)&work_size_uv, NULL, 0, 0, NULL)); | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.