Skip to content
This repository has been archived by the owner on Aug 30, 2024. It is now read-only.

Commit

Permalink
support 2 layers of sycl
Browse files Browse the repository at this point in the history
  • Loading branch information
ThanatosShinji authored and luoyu-intel committed Jun 4, 2024
1 parent 16d119a commit 0dcacb1
Show file tree
Hide file tree
Showing 6 changed files with 91 additions and 236 deletions.
2 changes: 1 addition & 1 deletion CMakePresets.json
Original file line number Diff line number Diff line change
Expand Up @@ -152,7 +152,7 @@
"description": "x64 SYCL",
"inherits": "x64-debug-sycl",
"cacheVariables": {
"CMAKE_BUILD_TYPE": "Release"
"CMAKE_BUILD_TYPE": "RelWithDebInfo"
}
}
]
Expand Down
15 changes: 15 additions & 0 deletions neural_speed/core/layers/ne_bestla.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -173,6 +173,21 @@ static inline int ne_nrows(const struct ne_tensor* tensor) {
return tensor->ne[1] * tensor->ne[2] * tensor->ne[3];
}

ne_backend bestla_backend_support(struct ne_tensor* src0, struct ne_tensor* src1, enum ne_op op) {
ne_backend bk = NE_BACKEND_CPU;
switch (op) {
case NE_OP_MUL_MAT: {
struct ne_tensor* wei = src0;
if (src0->type == NE_TYPE_BTLA) {
bk = NE_BACKEND_SYCL;
}
} break;
default:
break;
}
return bk;
}

bool bestla_sycl_support(struct ne_tensor* node) {
bool support = false;
switch (node->op) {
Expand Down
1 change: 0 additions & 1 deletion neural_speed/core/layers/ne_bestla_sycl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,6 @@ void bestla_device_memcpy(void* dstptr, const void* srcptr, size_t size, void* q
if (queue && srcptr && dstptr) {
auto ptr = (sycl::queue*)queue;
ptr->memcpy(dstptr, srcptr, size);
ptr->wait();
}
}

Expand Down
1 change: 1 addition & 0 deletions neural_speed/core/ne_bestla.h
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,7 @@ void bestla_mul(int batch, int vsize, const float* tensor, const float* vector,
void bestla_add(int batch, int vsize, const float* tensor, const float* vector, int vstep, float* out);

bool bestla_sycl_support(struct ne_tensor* node);
enum ne_backend bestla_backend_support(struct ne_tensor* src0, struct ne_tensor* src1, enum ne_op op);
bool bestla_support(struct ne_tensor* node, int n_threads, size_t* workspace, size_t* dev_workspace);

#ifdef NS_SYCL
Expand Down
Loading

0 comments on commit 0dcacb1

Please sign in to comment.