This repository has been archived by the owner on Aug 30, 2024. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 38
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
directory reorg Signed-off-by: Hengyu Meng <[email protected]>
- Loading branch information
Showing
47 changed files
with
27,524 additions
and
6,081 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
[submodule "third_party/pybind11"] | ||
path = third_party/pybind11 | ||
url = https://github.com/pybind/pybind11.git |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,144 @@ | ||
cmake_minimum_required(VERSION 3.5) | ||
|
||
project(jblas LANGUAGES CXX VERSION 0.1.0) | ||
file(GLOB headers ${PROJECT_NAME}/*.h ${PROJECT_NAME}/*.hpp) | ||
file(GLOB xbyak_headers ${PROJECT_NAME}/xbyak/*.h ${PROJECT_NAME}/xbyak/*.hpp) | ||
|
||
option(JBLAS_UT_ALL "Enable all unit tests" OFF) | ||
option(JBLAS_UT_DEBUG "Enable debug unit tests" ON) | ||
option(JBLAS_UT_EPILOGUE "Enable unit test for epilogue" OFF) | ||
option(JBLAS_UT_PROLOGUE_A "Enable unit test for activation prologue" OFF) | ||
option(JBLAS_UT_PROLOGUE_B "Enable unit test for weight prologue" OFF) | ||
option(JBLAS_UT_GEMM "Enable unit test for micro gemm kernels" OFF) | ||
option(JBLAS_UT_WRAPPER "Enable unit test for parallel gemms" OFF) | ||
option(JBLAS_UT_PARALLEL "Enable unit test for parallel set" OFF) | ||
option(JBLAS_UT_KERNEL_JIT "Enable unit test for jit kernels" OFF) | ||
option(JBLAS_UT_KERNEL_INTRIN "Enable unit test for intrinsic kernels" OFF) | ||
option(JBLAS_UT_KERNEL_WRAPPER "Enable unit test for runtime ISA kernels" OFF) | ||
option(JBLAS_UT_NOASAN "Disable sanitize" OFF) | ||
option(JBLAS_UT_BENCHMARK "Benchmark ON may take a long time to finish all tests" OFF) | ||
option(JBLAS_UT_OPENMP "Use OpenMP" ON) | ||
|
||
if(JBLAS_UT_ALL) | ||
set(JBLAS_UT_EPILOGUE ON) | ||
set(JBLAS_UT_PROLOGUE_A ON) | ||
set(JBLAS_UT_PROLOGUE_B ON) | ||
set(JBLAS_UT_GEMM ON) | ||
set(JBLAS_UT_WRAPPER ON) | ||
set(JBLAS_UT_PARALLEL ON) | ||
set(JBLAS_UT_KERNEL_JIT ON) | ||
set(JBLAS_UT_KERNEL_INTRIN ON) | ||
set(JBLAS_UT_KERNEL_WRAPPER ON) | ||
endif(JBLAS_UT_ALL) | ||
|
||
set(UT_BUILD FALSE) | ||
if(JBLAS_UT_DEBUG OR JBLAS_UT_PROLOGUE_A OR JBLAS_UT_PROLOGUE_B OR JBLAS_UT_EPILOGUE OR JBLAS_UT_GEMM | ||
OR JBLAS_UT_WRAPPER OR JBLAS_UT_PARALLEL OR JBLAS_UT_KERNEL_JIT OR JBLAS_UT_KERNEL_INTRIN | ||
OR JBLAS_UT_KERNEL_WRAPPER) | ||
set(UT_BUILD TRUE) | ||
endif() | ||
|
||
include(GNUInstallDirs) | ||
add_library(${PROJECT_NAME} INTERFACE) | ||
add_library(${PROJECT_NAME}::${PROJECT_NAME} ALIAS ${PROJECT_NAME}) | ||
|
||
target_include_directories( | ||
${PROJECT_NAME} INTERFACE | ||
"$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}>" | ||
"$<INSTALL_INTERFACE:${CMAKE_INSTALL_INCLUDEDIR}>" | ||
) | ||
|
||
install( | ||
TARGETS ${PROJECT_NAME} | ||
EXPORT ${PROJECT_NAME}-targets | ||
INCLUDES DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/${PROJECT_NAME} | ||
) | ||
|
||
include(CMakePackageConfigHelpers) | ||
configure_package_config_file( | ||
cmake/config.cmake.in | ||
"${CMAKE_CURRENT_BINARY_DIR}/${PROJECT_NAME}-config.cmake" | ||
INSTALL_DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/${PROJECT_NAME} | ||
) | ||
write_basic_package_version_file( | ||
"${CMAKE_CURRENT_BINARY_DIR}/${PROJECT_NAME}-config-version.cmake" | ||
COMPATIBILITY SameMajorVersion | ||
) | ||
|
||
install( | ||
FILES | ||
"${CMAKE_CURRENT_BINARY_DIR}/${PROJECT_NAME}-config.cmake" | ||
"${CMAKE_CURRENT_BINARY_DIR}/${PROJECT_NAME}-config-version.cmake" | ||
DESTINATION | ||
${CMAKE_INSTALL_LIBDIR}/cmake/${PROJECT_NAME} | ||
) | ||
|
||
install( | ||
EXPORT ${PROJECT_NAME}-targets | ||
NAMESPACE ${PROJECT_NAME}:: | ||
DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/${PROJECT_NAME} | ||
) | ||
|
||
if(WIN32) | ||
target_compile_definitions(${PROJECT_NAME} INTERFACE _CRT_SECURE_NO_WARNINGS NOMINMAX) | ||
target_compile_options(${PROJECT_NAME} INTERFACE /wd4068 /wd4849 /wd6262 /wd4702 /wd4100) | ||
#4068 ignore unroll and GCC flags | ||
#4849 ignore collapse | ||
#6262 ignore stack too large | ||
#4702 unreachable code(false warning on constexpr condition) | ||
#4100 unreferenced formal parameter | ||
|
||
target_link_options(${PROJECT_NAME} INTERFACE /STACK:5242880) #Stack requires up to L2 cache size | ||
endif(WIN32) | ||
|
||
if(JBLAS_UT_OPENMP) | ||
include(FindOpenMP) | ||
target_link_libraries(${PROJECT_NAME} INTERFACE OpenMP::OpenMP_CXX) | ||
endif() | ||
|
||
set(CMAKE_CXX_STANDARD 17) | ||
set(CMAKE_CXX_STANDARD_REQUIRED ON) | ||
|
||
function(add_ut_flag UT_OPTION) | ||
if(${${UT_OPTION}}) | ||
target_compile_definitions(${PROJECT_NAME}_ut PRIVATE ${UT_OPTION}) | ||
endif() | ||
endfunction() | ||
|
||
target_compile_features(${PROJECT_NAME} INTERFACE cxx_std_17) | ||
if(UT_BUILD) | ||
file(GLOB srcs ${PROJECT_NAME}/ut/*.cc ${PROJECT_NAME}/ut/*.cpp) #compile everthing even run parts of UTs | ||
file(GLOB ut_headers ${PROJECT_NAME}/ut/*.h) | ||
add_executable(${PROJECT_NAME}_ut ${srcs} ${headers} ${ut_headers}) | ||
|
||
if(NOT WIN32) | ||
if(NOT JBLAS_UT_NOASAN) | ||
target_compile_options(${PROJECT_NAME}_ut PRIVATE -fsanitize=address) | ||
target_link_options(${PROJECT_NAME}_ut PRIVATE -fsanitize=address) | ||
endif() | ||
target_link_options(${PROJECT_NAME}_ut PRIVATE -lpthread) | ||
endif() | ||
|
||
add_ut_flag(JBLAS_UT_DEBUG) | ||
add_ut_flag(JBLAS_UT_EPILOGUE) | ||
add_ut_flag(JBLAS_UT_PROLOGUE_A) | ||
add_ut_flag(JBLAS_UT_PROLOGUE_B) | ||
add_ut_flag(JBLAS_UT_GEMM) | ||
add_ut_flag(JBLAS_UT_PARALLEL) | ||
add_ut_flag(JBLAS_UT_WRAPPER) | ||
add_ut_flag(JBLAS_UT_KERNEL_INTRIN) | ||
add_ut_flag(JBLAS_UT_KERNEL_JIT) | ||
add_ut_flag(JBLAS_UT_KERNEL_WRAPPER) | ||
add_ut_flag(JBLAS_UT_BENCHMARK) | ||
|
||
target_link_libraries(${PROJECT_NAME}_ut PRIVATE ${PROJECT_NAME}) | ||
endif(UT_BUILD) | ||
|
||
install( | ||
FILES ${headers} | ||
DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/${PROJECT_NAME} | ||
) | ||
install( | ||
FILES ${xbyak_headers} | ||
DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/${PROJECT_NAME}/xbyak/ | ||
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,88 @@ | ||
{ | ||
"version": 3, | ||
"configurePresets": [ | ||
{ | ||
"name": "linux-debug", | ||
"displayName": "Linux Debug", | ||
"description": "Target the Windows Subsystem for Linux (WSL) or a remote Linux system.", | ||
"generator": "Ninja", | ||
"binaryDir": "${sourceDir}/out/build/${presetName}", | ||
"installDir": "${sourceDir}/out/install/${presetName}", | ||
"cacheVariables": { | ||
"CMAKE_BUILD_TYPE": "Debug" | ||
}, | ||
"condition": { | ||
"type": "equals", | ||
"lhs": "${hostSystemName}", | ||
"rhs": "Linux" | ||
}, | ||
"vendor": { "microsoft.com/VisualStudioRemoteSettings/CMake/1.0": { "sourceDir": "$env{HOME}/.vs/$ms{projectDirName}" } } | ||
}, | ||
{ | ||
"name": "linux-release-UT", | ||
"displayName": "linux Release for UT", | ||
"description": "Run all UT", | ||
"inherits": "linux-debug", | ||
"cacheVariables": { | ||
"CMAKE_BUILD_TYPE": "Release", | ||
"JBLAS_UT_ALL": "ON" | ||
} | ||
}, | ||
{ | ||
"name": "linux-release", | ||
"displayName": "linux Release", | ||
"description": "Release", | ||
"inherits": "linux-debug", | ||
"cacheVariables": { | ||
"CMAKE_BUILD_TYPE": "Release", | ||
"JBLAS_UT_ALL": "OFF" | ||
} | ||
}, | ||
{ | ||
"name": "windows-base", | ||
"description": "Target Windows with the Visual Studio development environment.", | ||
"hidden": true, | ||
"generator": "Ninja", | ||
"binaryDir": "${sourceDir}/out/build/${presetName}", | ||
"installDir": "${sourceDir}/out/install/${presetName}", | ||
"cacheVariables": { | ||
"CMAKE_C_COMPILER": "cl.exe", | ||
"CMAKE_CXX_COMPILER": "cl.exe", | ||
"JBLAS_UT_ALL": "OFF" | ||
}, | ||
"condition": { | ||
"type": "equals", | ||
"lhs": "${hostSystemName}", | ||
"rhs": "Windows" | ||
} | ||
}, | ||
{ | ||
"name": "x64-debug", | ||
"displayName": "x64 Debug", | ||
"description": "Target Windows (64-bit) with the Visual Studio development environment. (Debug)", | ||
"inherits": "windows-base", | ||
"architecture": { | ||
"value": "x64", | ||
"strategy": "external" | ||
}, | ||
"cacheVariables": { | ||
"CMAKE_BUILD_TYPE": "Debug", | ||
"JBLAS_UT_DEBUG": "ON" | ||
} | ||
}, | ||
{ | ||
"name": "x64-release", | ||
"displayName": "x64 Release", | ||
"description": "Target Windows (64-bit) with the Visual Studio development environment. (RelWithDebInfo)", | ||
"inherits": "x64-debug", | ||
"cacheVariables": { "CMAKE_BUILD_TYPE": "Release" } | ||
}, | ||
{ | ||
"name": "x64-release-UT", | ||
"displayName": "x64 Release for UT", | ||
"description": "Target Windows (64-bit) with the Visual Studio development environment. (RelWithDebInfo)", | ||
"inherits": "x64-release", | ||
"cacheVariables": { "JBLAS_UT_ALL": "ON" } | ||
} | ||
] | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,47 @@ | ||
# Jblas | ||
Jblas is a lightweight, header-only acceleration library for high-performance GEMM and related computations on Intel platform. Inspired by Cutlass, it provides high-level template class abstractions for various elements required for computation, and allows flexible kernel construction through template combinations to meet specific needs, maximizing the reuse of existing template classes. Users can also develop custom template classes to expand Jblas’s computational capabilities. Jblas includes several different types of template classes, specifically: | ||
|
||
- `Interface`: Exposes gemm computation interface to users, while allowing users to specify their own Launcher template classes and Parallel template classes. | ||
- `Launcher`: Schedules computation-related template classes, allowing users to specify their own computation-related template classes, including GemmCore, Prologue, and Epilogue. | ||
- `Parallel`: Specifies data splitting strategy for task distribution among different cores. Jblas’s default Parallel template class adopts an L2-cache-fusion concept, i.e., each core tries to temporarily store the data it processes in its L2-cache during each round of gemm-tile computation. | ||
- `GemmCore`: A computation-related template class that provides a micro-kernel for performing a tile gemm computation with a specific ISA. It is the most important template class in Jblas. Currently, GemmCore supports the following ISAs: | ||
- AVX2 | ||
- AVX_VNNI | ||
- AVX512F | ||
- AVX512_VNNI | ||
- AMX_BF16 | ||
- AMX_INT8 | ||
- AVX512_FP16 | ||
- `Prologue`: A computation-related template class that preprocesses (such as data type conversion/padding) input data to meet GemmCore’s input data requirements. | ||
- `Epilogue`: A computation-related template class that post-processes (such as eltwiseop-fusion) the results of gemm-core computations to expand Jblas’s application scenarios. | ||
|
||
The interaction logic between different template classes and the calculation process of gemm are shown in the following figure. | ||
![bit4_emulation](docs/workflow.png) | ||
# Highlights | ||
## Weight-only | ||
Jblas provides weight-only linear computational capabilities for LLM inference. We provide a series of Prologues for quantize/compress/serialize/deserialize fp32 weights in different ways. Specifically, we support compressed weights of the following data types: | ||
|
||
- S8 | ||
- S4_CLIP | ||
- S4_FULLRANGE | ||
- FP4 | ||
- NF4 | ||
## Postop-fusion | ||
Jblas provides assembly-level postop-fusion through epilogue to minimize the overhead caused by data movement. Specifically, we support the following postop-fusions: | ||
|
||
- GELU | ||
- SWISH | ||
- RELU | ||
- EXP | ||
- TANH | ||
## Compilation Requirements and Usage | ||
Compile: | ||
|
||
- GCC version >=8.5.0 | ||
- CMake version >=3.5 | ||
|
||
Usage: | ||
```cmake | ||
add_subdirectory(jblas) | ||
target_link_libraries("${YOUR_PROJECT}" jblas::jblas) | ||
``` |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
@PACKAGE_INIT@ | ||
|
||
include("${CMAKE_CURRENT_LIST_DIR}/@[email protected]") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,8 @@ | ||
@PACKAGE_INIT@ | ||
|
||
if(NOT TARGET @TARGET_NAME@) | ||
add_library(@TARGET_NAME@ INTERFACE IMPORTED) | ||
set_target_properties(@TARGET_NAME@ PROPERTIES | ||
INTERFACE_INCLUDE_DIRECTORIES "@ABSOLUTE_INCLUDE_DIR@" | ||
) | ||
endif() |
Oops, something went wrong.