Skip to content

Commit

Permalink
Merge branch 'develop' into remove_deprecated_unittest_alias
Browse files Browse the repository at this point in the history
  • Loading branch information
Ainavo committed Mar 20, 2023
2 parents 89bff67 + b81188f commit b482e4c
Show file tree
Hide file tree
Showing 320 changed files with 3,798 additions and 1,463 deletions.
12 changes: 12 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -298,6 +298,7 @@ option(WITH_CRYPTO "Compile PaddlePaddle with crypto support" ON)
option(WITH_ARM "Compile PaddlePaddle with arm support" OFF)
option(WITH_SW "Compile PaddlePaddle with sw support" OFF)
option(WITH_MIPS "Compile PaddlePaddle with mips support" OFF)
option(WITH_LOONGARCH "Compile PaddlePaddle with loongarch support" OFF)
option(WITH_MUSL "Compile with musl libc instead of gblic" OFF)
option(WITH_UNITY_BUILD "Compile with UnityBuild mode" OFF)
option(WITH_STRIP "Strip so files of Whl packages" OFF)
Expand Down Expand Up @@ -659,6 +660,16 @@ if(WITH_MIPS)
add_definitions(-DPADDLE_WITH_MIPS)
endif()

if(WITH_LOONGARCH)
set(WITH_XBYAK
OFF
CACHE STRING "Disable XBYAK when compiling WITH_LOONGARCH=ON" FORCE)
set(WITH_MKL
OFF
CACHE STRING "Disable MKL when compiling WITH_LOONGARCH=ON." FORCE)
add_definitions(-DPADDLE_WITH_LOONGARCH)
endif()

if(WITH_ONEMKL)
add_definitions(-DPADDLE_WITH_ONEMKL)
endif()
Expand Down Expand Up @@ -703,6 +714,7 @@ add_subdirectory(paddle)
if(WITH_PYTHON)
add_subdirectory(python)
endif()
add_subdirectory(test)

get_directory_property(all_inc_dirs INCLUDE_DIRECTORIES)
list(JOIN all_inc_dirs "\r\n" all_inc_dirs)
Expand Down
4 changes: 4 additions & 0 deletions cmake/external/openblas.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,10 @@ if(WITH_MIPS)
set(CBLAS_TAG v0.3.13)
endif()

if(WITH_LOONGARCH)
set(CBLAS_TAG v0.3.18)
endif()

if(NOT WIN32)
set(CBLAS_LIBRARIES
"${CBLAS_INSTALL_DIR}/lib/${CMAKE_STATIC_LIBRARY_PREFIX}openblas${CMAKE_STATIC_LIBRARY_SUFFIX}"
Expand Down
2 changes: 2 additions & 0 deletions cmake/external/xpu.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -142,6 +142,8 @@ if(WITH_XPU_XFT)
message(STATUS "Compile with XPU XFT!")
add_definitions(-DPADDLE_WITH_XPU_XFT)

set(XPU_XFT_INC_DIR "${XPU_INC_DIR}/xft")
include_directories(${XPU_XFT_INC_DIR})
set(XPU_XFT_LIB "${XPU_LIB_DIR}/${XPU_XFT_LIB_NAME}")
endif()

Expand Down
3 changes: 2 additions & 1 deletion cmake/flags.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -196,7 +196,8 @@ if(NOT WIN32)
if(NOT WITH_NV_JETSON
AND NOT WITH_ARM
AND NOT WITH_SW
AND NOT WITH_MIPS)
AND NOT WITH_MIPS
AND NOT WITH_LOONGARCH)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -m64")
endif()
endif()
Expand Down
105 changes: 0 additions & 105 deletions paddle/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,3 @@
set(CC_TESTS_DIR
${PADDLE_BINARY_DIR}/paddle/tests
CACHE INTERNAL "c++ tests directory")
set(PYTHON_TESTS_DIR
${PADDLE_BINARY_DIR}/python/paddle/fluid/tests
CACHE INTERNAL "python tests directory")
Expand Down Expand Up @@ -36,105 +33,3 @@ add_subdirectory(ir)
# (2) naturally, and configure tests in only one `CMakeLists.txt`,
# (3) cc tests support linking pre-built dynamic libraries. For example, use the dynamic
# library in the installed paddle by `pip`.

# add all tests here
get_property(test_srcs GLOBAL PROPERTY TEST_SRCS)
get_property(test_names GLOBAL PROPERTY TEST_NAMES)
# message("test_srcs ${test_srcs}")

get_property(paddle_lib GLOBAL PROPERTY PADDLE_LIB_NAME)

set(POSTFIX ".so")
if(WIN32)
set(POSTFIX ".dll")
endif()

list(LENGTH test_names len)
if(${len} GREATER_EQUAL 1)
message("Total cpp tests using dynamic link: ${len}")
math(EXPR stop "${len} - 1")
foreach(idx RANGE ${stop})
if(WITH_TESTING)
list(GET test_srcs ${idx} test_src)
list(GET test_names ${idx} test_name)
get_property(test_arg GLOBAL PROPERTY "${test_name}_ARGS")
# message("add test ${test_name}")
add_executable(${test_name} ${test_src})
target_link_libraries(${test_name} paddle_gtest_main_new)
target_link_libraries(${test_name} $<TARGET_LINKER_FILE:${paddle_lib}>)
add_dependencies(${test_name} ${paddle_lib} paddle_gtest_main_new)
if(WITH_GPU)
target_link_libraries(${test_name} ${CUDA_CUDART_LIBRARY}
"-Wl,--as-needed")
endif()
if(WITH_ROCM)
target_link_libraries(${test_name} ${ROCM_HIPRTC_LIB})
endif()
if(APPLE)
target_link_libraries(${test_name}
"-Wl,-rpath,$<TARGET_FILE_DIR:${paddle_lib}>")
endif()
if(NOT ((NOT WITH_PYTHON) AND ON_INFER))
target_link_libraries(${test_name} ${PYTHON_LIBRARIES})
endif()
if(WITH_XPU)
target_link_libraries(${test_name} xpulib)
endif()
if(WITH_MLU)
target_link_libraries(${test_name} neuware_lib)
endif()
if(NOT
("${test_name}" STREQUAL "c_broadcast_op_npu_test"
OR "${test_name}" STREQUAL "c_allreduce_sum_op_npu_test"
OR "${test_name}" STREQUAL "c_allreduce_max_op_npu_test"
OR "${test_name}" STREQUAL "c_reducescatter_op_npu_test"
OR "${test_name}" STREQUAL "c_allgather_op_npu_test"
OR "${test_name}" STREQUAL "send_v2_op_npu_test"
OR "${test_name}" STREQUAL "c_reduce_sum_op_npu_test"
OR "${test_name}" STREQUAL "recv_v2_op_npu_test"))
cc_test_run(
${test_name}
COMMAND
${test_name}
ARGS
${test_arg}
DIR
${CC_TESTS_DIR})
endif()
elseif(WITH_TESTING AND NOT TEST ${test_name})
add_test(NAME ${test_name} COMMAND ${CMAKE_COMMAND} -E echo CI skip
${test_name}.)
endif()
set_target_properties(${test_name} PROPERTIES RUNTIME_OUTPUT_DIRECTORY
"${CC_TESTS_DIR}")
endforeach()
endif()

# set properties for some tests, it should be set after the tests defined.
if(TARGET standalone_executor_test)
set_tests_properties(standalone_executor_test PROPERTIES TIMEOUT 100)
if(NOT WIN32)
add_dependencies(standalone_executor_test download_program)
endif()
endif()

if(TARGET layer_test)
add_dependencies(layer_test jit_download_program)
add_dependencies(layer_test_new jit_download_program)
set_tests_properties(layer_test_new PROPERTIES ENVIRONMENT
"FLAGS_jit_engine_type=New")
endif()

if(TEST buddy_allocator_test)
if(NOT WIN32)
add_dependencies(buddy_allocator_test download_data)
endif()
set_tests_properties(buddy_allocator_test PROPERTIES LABELS
"RUN_TYPE=EXCLUSIVE")
endif()

add_custom_target(build_tests)
# add target to build all cpp tests
if(${len} GREATER_EQUAL 1)
add_dependencies(build_tests ${test_names})
endif()
6 changes: 6 additions & 0 deletions paddle/fluid/framework/ir/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -235,6 +235,8 @@ if(WITH_XPU)
pass_library(link_xpu_op_max_pass inference DIR xpu DEPS ${XPU_PASS_DEPS})
pass_library(delete_isolated_node_pass inference DIR xpu DEPS
${XPU_PASS_DEPS})
pass_library(fused_multi_transformer_xpu_quant_pass inference DIR xpu DEPS
${XPU_PASS_DEPS})
endif()

cc_library(
Expand Down Expand Up @@ -493,4 +495,8 @@ if(WITH_XPU)
test_delete_isolated_node_pass
SRCS xpu/delete_isolated_node_pass_test.cc
DEPS delete_isolated_node_pass)
cc_test(
test_fused_multi_transformer_xpu_quant_pass
SRCS xpu/fused_multi_transformer_xpu_quant_pass_tester.cc
DEPS fused_multi_transformer_xpu_quant_pass)
endif()
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@ TEST(FuseMultiTransformerLayerPass, encoder_fp) {
1,
{2, -1, 16, 1024, 64},
0);
auto* out = layers.fused_multi_transformer(x,
auto outs = layers.fused_multi_transformer(x,
cache_kv,
src_mask,
qkv_w,
Expand All @@ -93,7 +93,7 @@ TEST(FuseMultiTransformerLayerPass, encoder_fp) {
0.1,
1e-12);

x = out;
x = outs[0];
}
std::unique_ptr<ir::Graph> graph(new ir::Graph(layers.main_program()));
graph->Set("__param_scope__", CreateParamScope());
Expand Down Expand Up @@ -126,7 +126,7 @@ TEST(FuseMultiTransformerLayerPass, decoder_fp) {
for (int i = 0; i < num_layers; ++i) {
auto* shape_out = layers.shape(src_mask);
auto* time_stamp = layers.slice(shape_out, {0}, {3}, {4});
auto* out = layers.fused_multi_transformer(x,
auto outs = layers.fused_multi_transformer(x,
cache_kv,
src_mask,
qkv_w,
Expand All @@ -145,7 +145,7 @@ TEST(FuseMultiTransformerLayerPass, decoder_fp) {
1e-12,
time_stamp);

x = out;
x = outs[0];
}
std::unique_ptr<ir::Graph> graph(new ir::Graph(layers.main_program()));
auto param_scope = CreateParamScope();
Expand Down
9 changes: 9 additions & 0 deletions paddle/fluid/framework/ir/node.h
Original file line number Diff line number Diff line change
Expand Up @@ -151,6 +151,15 @@ class Node {
var_desc_->SetName(new_name);
}

void RenameOp(const std::string& new_name) {
PADDLE_ENFORCE_EQ(
type_ == Type::kOperation && op_desc_,
true,
platform::errors::InvalidArgument("Node must be type of variable."));
name_ = new_name;
op_desc_->SetType(new_name);
}

int DescOrder() const { return desc_order_; }

int GetVarNodeBlockId() const {
Expand Down
1 change: 1 addition & 0 deletions paddle/fluid/framework/ir/pass.cc
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@ static const std::vector<std::string> support_subgraph_passes = {
"fuse_multi_transformer_layer_pass",
"delete_quant_dequant_linear_op_pass",
"delete_weight_dequant_linear_op_pass",
"fused_multi_transformer_xpu_quant_pass",
"fc_xpu_fuse_pass",
"delete_op_device_pass"};

Expand Down
58 changes: 31 additions & 27 deletions paddle/fluid/framework/ir/pass_tester_helper.h
Original file line number Diff line number Diff line change
Expand Up @@ -571,33 +571,35 @@ struct Layers {
return out;
}

VarDesc* fused_multi_transformer(VarDesc* x,
VarDesc* cache_kv,
VarDesc* src_mask,
VarDesc* qkv_w,
VarDesc* qkv_bias,
VarDesc* out_linear_w,
VarDesc* out_linear_bias,
VarDesc* ffn1_w,
VarDesc* ffn1_bias,
VarDesc* ffn2_w,
VarDesc* ffn2_bias,
VarDesc* ln_scale,
VarDesc* ln_bias,
VarDesc* ffn_ln_scale,
VarDesc* ffn_ln_bias,
float epsilon,
float dropout_rate,
VarDesc* time_stamp = nullptr,
VarDesc* qkv_out_scale = nullptr,
VarDesc* out_linear_out_scale = nullptr,
VarDesc* ffn1_out_scale = nullptr,
VarDesc* ffn2_out_scale = nullptr,
std::vector<float> qkv_in_scale = {},
std::vector<float> out_linear_in_scale = {},
std::vector<float> ffn1_in_scale = {},
std::vector<float> ffn2_in_scale = {}) {
std::vector<VarDesc*> fused_multi_transformer(
VarDesc* x,
VarDesc* cache_kv,
VarDesc* src_mask,
VarDesc* qkv_w,
VarDesc* qkv_bias,
VarDesc* out_linear_w,
VarDesc* out_linear_bias,
VarDesc* ffn1_w,
VarDesc* ffn1_bias,
VarDesc* ffn2_w,
VarDesc* ffn2_bias,
VarDesc* ln_scale,
VarDesc* ln_bias,
VarDesc* ffn_ln_scale,
VarDesc* ffn_ln_bias,
float epsilon,
float dropout_rate,
VarDesc* time_stamp = nullptr,
VarDesc* qkv_out_scale = nullptr,
VarDesc* out_linear_out_scale = nullptr,
VarDesc* ffn1_out_scale = nullptr,
VarDesc* ffn2_out_scale = nullptr,
std::vector<float> qkv_in_scale = {},
std::vector<float> out_linear_in_scale = {},
std::vector<float> ffn1_in_scale = {},
std::vector<float> ffn2_in_scale = {}) {
VarDesc* out = lod_tensor(unique_name());
VarDesc* cache_kv_out = lod_tensor(unique_name());
OpDesc* op = program_.MutableBlock(0)->AppendOp();
std::string op_type = qkv_out_scale ? "fused_multi_transformer_int8"
: "fused_multi_transformer";
Expand All @@ -623,6 +625,7 @@ struct Layers {
op->SetAttr("dropout_rate", dropout_rate);
op->SetAttr("epsilon", epsilon);
op->SetOutput("Out", {out->Name()});
op->SetOutput("CacheKVOut", {cache_kv_out->Name()});

if (time_stamp) {
op->SetInput("TimeStep", {time_stamp->Name()});
Expand All @@ -638,7 +641,8 @@ struct Layers {
op->SetAttr("ffn1_in_scale", ffn1_in_scale);
op->SetAttr("ffn2_in_scale", ffn2_in_scale);
}
return out;
std::vector<VarDesc*> outs = {out, cache_kv_out};
return outs;
}

VarDesc* dequantize_linear(VarDesc* x,
Expand Down
Loading

0 comments on commit b482e4c

Please sign in to comment.