Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

air-dependency pass failed on mmult code generated from Triton #278

Open
zzzDavid opened this issue Aug 18, 2023 · 0 comments
Open

air-dependency pass failed on mmult code generated from Triton #278

zzzDavid opened this issue Aug 18, 2023 · 0 comments

Comments

@zzzDavid
Copy link
Contributor

Description

Seems like there's an empty vector in ::AIRDependency::createPartialMemref function that caused this issue.

Tool commit points

  • MLIR-AIR @c3a9b505f06936a3e4c81c221ca9fac2a7d6dbad
  • MLIR-AIE @d21ca563e0c0fd100a4bbd98d194e770ce33bd79

Repeat this issue

Input: mmult.triton.air.mlir

#map = affine_map<(d0, d1) -> (d0, d1)>
module {
  func.func @matmul_kernel(%arg0: memref<*xi32>, %arg1: memref<*xi32>, %arg2: memref<*xi32>, %arg3: i32, %arg4: i32, %arg5: i32, %arg6: i32, %arg7: i32, %arg8: i32, %arg9: i32, %arg10: i32, %arg11: i32, %arg12: i32, %arg13: i32, %arg14: i32) {
    %c0_i32 = arith.constant 0 : i32
    %c128_i32 = arith.constant 128 : i32
    %c128 = arith.constant 128 : index
    %alloc = memref.alloc() {alignment = 64 : i64} : memref<128x128xi32>
    linalg.fill ins(%c0_i32 : i32) outs(%alloc : memref<128x128xi32>)
    %c1_i32 = arith.constant 1 : i32
    %c0_i32_0 = arith.constant 0 : i32
    %c-1_i32 = arith.constant -1 : i32
    %0 = arith.cmpi sgt, %c128_i32, %c0_i32_0 : i32
    %1 = arith.select %0, %c-1_i32, %c1_i32 : i32
    %2 = arith.addi %1, %arg4 : i32
    %3 = arith.divsi %2, %c128_i32 : i32
    %4 = arith.addi %c1_i32, %3 : i32
    %5 = arith.subi %c0_i32_0, %arg4 : i32
    %6 = arith.divsi %5, %c128_i32 : i32
    %7 = arith.subi %c0_i32_0, %6 : i32
    %8 = arith.cmpi slt, %arg4, %c0_i32_0 : i32
    %9 = arith.cmpi sgt, %arg4, %c0_i32_0 : i32
    %10 = arith.cmpi slt, %c128_i32, %c0_i32_0 : i32
    %11 = arith.cmpi sgt, %c128_i32, %c0_i32_0 : i32
    %12 = arith.andi %8, %10 : i1
    %13 = arith.andi %9, %11 : i1
    %14 = arith.ori %12, %13 : i1
    %15 = arith.select %14, %4, %7 : i32
    %c1_i32_1 = arith.constant 1 : i32
    %c0_i32_2 = arith.constant 0 : i32
    %c-1_i32_3 = arith.constant -1 : i32
    %16 = arith.cmpi slt, %15, %c0_i32_2 : i32
    %17 = arith.select %16, %c1_i32_1, %c-1_i32_3 : i32
    %18 = arith.subi %17, %arg12 : i32
    %19 = arith.divsi %18, %15 : i32
    %20 = arith.subi %c-1_i32_3, %19 : i32
    %21 = arith.divsi %arg12, %15 : i32
    %22 = arith.cmpi slt, %arg12, %c0_i32_2 : i32
    %23 = arith.cmpi sgt, %arg12, %c0_i32_2 : i32
    %24 = arith.cmpi slt, %15, %c0_i32_2 : i32
    %25 = arith.cmpi sgt, %15, %c0_i32_2 : i32
    %26 = arith.andi %22, %25 : i1
    %27 = arith.andi %23, %24 : i1
    %28 = arith.ori %26, %27 : i1
    %29 = arith.select %28, %20, %21 : i32
    %30 = arith.remsi %arg12, %15 : i32
    %31 = arith.muli %29, %c128_i32 : i32
    %32 = arith.muli %30, %c128_i32 : i32
    %33 = arith.index_cast %31 : i32 to index
    %34 = arith.index_cast %arg6 : i32 to index
    %35 = arith.muli %33, %34 : index
    %36 = arith.index_cast %arg7 : i32 to index
    %37 = arith.index_cast %arg8 : i32 to index
    %38 = arith.index_cast %32 : i32 to index
    %39 = arith.index_cast %arg9 : i32 to index
    %40 = arith.muli %38, %39 : index
    %reinterpret_cast = memref.reinterpret_cast %arg0 to offset: [%35], sizes: [128, 128], strides: [%34, %36] : memref<*xi32> to memref<128x128xi32, strided<[?, ?], offset: ?>>
    %reinterpret_cast_4 = memref.reinterpret_cast %arg1 to offset: [%40], sizes: [128, 128], strides: [%37, %39] : memref<*xi32> to memref<128x128xi32, strided<[?, ?], offset: ?>>
    %alloc_5 = memref.alloc() : memref<128x128xi32>
    %41 = arith.index_cast %arg5 : i32 to index
    %42 = arith.minsi %41, %c128 : index
    %subview = memref.subview %reinterpret_cast[0, 0] [128, %42] [1, 1] : memref<128x128xi32, strided<[?, ?], offset: ?>> to memref<128x?xi32, strided<[?, ?], offset: ?>>
    %subview_6 = memref.subview %alloc_5[0, 0] [128, %42] [1, 1] : memref<128x128xi32> to memref<128x?xi32, strided<[128, 1]>>
    %43 = arith.cmpi slt, %42, %c128 : index
    scf.if %43 {
      linalg.fill ins(%c0_i32 : i32) outs(%alloc_5 : memref<128x128xi32>)
    }
    linalg.copy {cast = #linalg.type_fn<cast_signed>} ins(%subview : memref<128x?xi32, strided<[?, ?], offset: ?>>) outs(%subview_6 : memref<128x?xi32, strided<[128, 1]>>)
    %alloc_7 = memref.alloc() : memref<128x128xi32>
    %44 = arith.index_cast %arg5 : i32 to index
    %45 = arith.minsi %44, %c128 : index
    %subview_8 = memref.subview %reinterpret_cast_4[0, 0] [%45, 128] [1, 1] : memref<128x128xi32, strided<[?, ?], offset: ?>> to memref<?x128xi32, strided<[?, ?], offset: ?>>
    %subview_9 = memref.subview %alloc_7[0, 0] [%45, 128] [1, 1] : memref<128x128xi32> to memref<?x128xi32, strided<[128, 1]>>
    %46 = arith.cmpi slt, %45, %c128 : index
    scf.if %46 {
      linalg.fill ins(%c0_i32 : i32) outs(%alloc_7 : memref<128x128xi32>)
    }
    linalg.copy {cast = #linalg.type_fn<cast_signed>} ins(%subview_8 : memref<?x128xi32, strided<[?, ?], offset: ?>>) outs(%subview_9 : memref<?x128xi32, strided<[128, 1]>>)
    %alloc_10 = memref.alloc() {alignment = 64 : i64} : memref<128x128xi32>
    %alloc_11 = memref.alloc() {alignment = 64 : i64} : memref<128x128xi32>
    memref.copy %alloc_10, %alloc_11 : memref<128x128xi32> to memref<128x128xi32>
    memref.dealloc %alloc_10 : memref<128x128xi32>
    linalg.matmul ins(%alloc_5, %alloc_7 : memref<128x128xi32>, memref<128x128xi32>) outs(%alloc_11 : memref<128x128xi32>)
    memref.dealloc %alloc_7 : memref<128x128xi32>
    memref.dealloc %alloc_5 : memref<128x128xi32>
    %alloc_12 = memref.alloc() {alignment = 64 : i64} : memref<128x128xi32>
    linalg.generic {indexing_maps = [#map, #map, #map], iterator_types = ["parallel", "parallel"]} ins(%alloc_11, %alloc : memref<128x128xi32>, memref<128x128xi32>) outs(%alloc_12 : memref<128x128xi32>) {
    ^bb0(%in: i32, %in_17: i32, %out: i32):
      %68 = arith.addi %in, %in_17 : i32
      linalg.yield %68 : i32
    }
    memref.dealloc %alloc_11 : memref<128x128xi32>
    %alloc_13 = memref.alloc() {alignment = 64 : i64} : memref<128x128xi32>
    linalg.generic {indexing_maps = [#map, #map, #map], iterator_types = ["parallel", "parallel"]} ins(%alloc, %alloc_12 : memref<128x128xi32>, memref<128x128xi32>) outs(%alloc_13 : memref<128x128xi32>) {
    ^bb0(%in: i32, %in_17: i32, %out: i32):
      %68 = arith.addi %in, %in_17 : i32
      linalg.yield %68 : i32
    }
    memref.dealloc %alloc_12 : memref<128x128xi32>
    memref.dealloc %alloc : memref<128x128xi32>
    %47 = arith.muli %29, %c128_i32 : i32
    %48 = arith.muli %30, %c128_i32 : i32
    %49 = arith.index_cast %arg10 : i32 to index
    %50 = arith.index_cast %47 : i32 to index
    %51 = arith.muli %50, %49 : index
    %52 = arith.index_cast %arg11 : i32 to index
    %53 = arith.index_cast %48 : i32 to index
    %54 = arith.muli %53, %52 : index
    %55 = arith.addi %51, %54 : index
    %reinterpret_cast_14 = memref.reinterpret_cast %arg2 to offset: [%55], sizes: [128, 128], strides: [%49, %52] : memref<*xi32> to memref<128x128xi32, strided<[?, ?], offset: ?>>
    %56 = arith.index_cast %47 : i32 to index
    %57 = arith.addi %56, %c128 : index
    %58 = arith.index_cast %arg3 : i32 to index
    %59 = arith.minsi %57, %58 : index
    %60 = arith.subi %59, %56 : index
    %61 = arith.index_cast %48 : i32 to index
    %62 = arith.addi %61, %c128 : index
    %63 = arith.index_cast %arg4 : i32 to index
    %64 = arith.minsi %62, %63 : index
    %65 = arith.subi %64, %61 : index
    %66 = arith.minsi %60, %c128 : index
    %67 = arith.minsi %65, %c128 : index
    %subview_15 = memref.subview %alloc_13[0, 0] [%66, %67] [1, 1] : memref<128x128xi32> to memref<?x?xi32, strided<[128, 1]>>
    %subview_16 = memref.subview %reinterpret_cast_14[0, 0] [%66, %67] [1, 1] : memref<128x128xi32, strided<[?, ?], offset: ?>> to memref<?x?xi32, strided<[?, ?], offset: ?>>
    %cast = memref.cast %subview_15 : memref<?x?xi32, strided<[128, 1]>> to memref<?x?xi32, strided<[?, ?], offset: ?>>
    linalg.copy {cast = #linalg.type_fn<cast_signed>} ins(%subview_15 : memref<?x?xi32, strided<[128, 1]>>) outs(%subview_16 : memref<?x?xi32, strided<[?, ?], offset: ?>>)
    memref.dealloc %alloc_13 : memref<128x128xi32>
    return
  }
  func.func @kernel(%arg0: memref<128x128xi32>, %arg1: memref<128x128xi32>, %arg2: memref<128x128xi32>, %arg3: i32, %arg4: i32, %arg5: i32, %arg6: i32, %arg7: i32, %arg8: i32, %arg9: i32, %arg10: i32, %arg11: i32, %arg12: i32, %arg13: i32, %arg14: i32) {
    %cast = memref.cast %arg0 : memref<128x128xi32> to memref<*xi32>
    %cast_0 = memref.cast %arg1 : memref<128x128xi32> to memref<*xi32>
    %cast_1 = memref.cast %arg2 : memref<128x128xi32> to memref<*xi32>
    call @matmul_kernel(%cast, %cast_0, %cast_1, %arg3, %arg4, %arg5, %arg6, %arg7, %arg8, %arg9, %arg10, %arg11, %arg12, %arg13, %arg14) : (memref<*xi32>, memref<*xi32>, memref<*xi32>, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> ()
    return
  }
}

Compilation command:

air-opt mmult.triton.air.mlir \
    -buffer-results-to-out-params \
    -air-linalg-codegen \
    -air-par-to-herd \
    -air-copy-to-dma \
    -air-dependency \
    -canonicalize -cse \
    -o mmult.air.mlir

Error message and stack trace

air-opt: /home/niansong/mlir-air/llvm/llvm/include/llvm/ADT/SmallVector.h:294: reference llvm::SmallVectorTemplateCommon<mlir::Value>::operator[](size_type) [T = mlir::Value]: Assertion `idx < size()' failed.
PLEASE submit a bug report to https://github.com/llvm/llvm-project/issues/ and include the crash backtrace.
Stack dump:
0.      Program arguments: air-opt mmult.triton.air.mlir -buffer-results-to-out-params -air-linalg-codegen -air-par-to-herd -air-copy-to-dma -air-dependency -canonicalize -cse -o mmult.air.mlir
 #0 0x000055cbc9a8c007 llvm::sys::PrintStackTrace(llvm::raw_ostream&, int) (/home/niansong/mlir-air/install/bin/air-opt+0x2854007)
 #1 0x000055cbc9a89e5e llvm::sys::RunSignalHandlers() (/home/niansong/mlir-air/install/bin/air-opt+0x2851e5e)
 #2 0x000055cbc9a8c80f SignalHandler(int) Signals.cpp:0:0
 #3 0x00007f54362a1420 __restore_rt (/lib/x86_64-linux-gnu/libpthread.so.0+0x14420)
 #4 0x00007f5435d3400b raise /build/glibc-SzIz7B/glibc-2.31/signal/../sysdeps/unix/sysv/linux/raise.c:51:1
 #5 0x00007f5435d13859 abort /build/glibc-SzIz7B/glibc-2.31/stdlib/abort.c:81:7
 #6 0x00007f5435d13729 get_sysdep_segment_value /build/glibc-SzIz7B/glibc-2.31/intl/loadmsgcat.c:509:8
 #7 0x00007f5435d13729 _nl_load_domain /build/glibc-SzIz7B/glibc-2.31/intl/loadmsgcat.c:970:34
 #8 0x00007f5435d24fd6 (/lib/x86_64-linux-gnu/libc.so.6+0x33fd6)
 #9 0x000055cbc8049ac9 llvm::SmallVectorTemplateCommon<mlir::Value, void>::operator[](unsigned long) AIRDependencyScheduleOpt.cpp:0:0
#10 0x000055cbc815728d (anonymous namespace)::AIRDependency::createPartialMemref(mlir::Value, unsigned int, llvm::SmallVector<mlir::Value, 2u>) AIRDependency.cpp:0:0
#11 0x000055cbc815778c void (anonymous namespace)::AIRDependency::traceDeps<xilinx::air::ExecuteOp>(llvm::SmallVector<(anonymous namespace)::AIRDependency::partialMemref, 1u>, xilinx::air::ExecuteOp, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>) AIRDependency.cpp:0:0
#12 0x000055cbc81569e3 (anonymous namespace)::AIRDependency::runOnOperation()::'lambda0'(mlir::Operation*)::operator()(mlir::Operation*) const AIRDependency.cpp:0:0
#13 0x000055cbc815470d void llvm::function_ref<void (mlir::Operation*)>::callback_fn<(anonymous namespace)::AIRDependency::runOnOperation()::'lambda0'(mlir::Operation*)>(long, mlir::Operation*) AIRDependency.cpp:0:0
#14 0x000055cbc84d5dce mlir::detail::walk(mlir::Operation*, llvm::function_ref<void (mlir::Operation*)>, mlir::WalkOrder) (/home/niansong/mlir-air/install/bin/air-opt+0x129ddce)
#15 0x000055cbc81546b2 std::enable_if<llvm::is_one_of<mlir::Operation*, mlir::Operation*, mlir::Region*, mlir::Block*>::value, void>::type mlir::detail::walk<(mlir::WalkOrder)1, (anonymous namespace)::AIRDependency::runOnOperation()::'lambda0'(mlir::Operation*), mlir::Operation*, void>(mlir::Operation*, (anonymous namespace)::AIRDependency::runOnOperation()::'lambda0'(mlir::Operation*)&&) AIRDependency.cpp:0:0
#16 0x000055cbc815465d std::enable_if<llvm::function_traits<std::decay<(anonymous namespace)::AIRDependency::runOnOperation()::'lambda0'(mlir::Operation*)>::type>::num_args == 1, void>::type mlir::Operation::walk<(mlir::WalkOrder)1, (anonymous namespace)::AIRDependency::runOnOperation()::'lambda0'(mlir::Operation*), void>((anonymous namespace)::AIRDependency::runOnOperation()::'lambda0'(mlir::Operation*)&&) AIRDependency.cpp:0:0
#17 0x000055cbc8148ba0 std::enable_if<llvm::function_traits<std::decay<(anonymous namespace)::AIRDependency::runOnOperation()::'lambda0'(mlir::Operation*)>::type>::num_args == 1, void>::type mlir::OpState::walk<(mlir::WalkOrder)1, (anonymous namespace)::AIRDependency::runOnOperation()::'lambda0'(mlir::Operation*), void>((anonymous namespace)::AIRDependency::runOnOperation()::'lambda0'(mlir::Operation*)&&) AIRDependency.cpp:0:0
#18 0x000055cbc81472de (anonymous namespace)::AIRDependency::runOnOperation() AIRDependency.cpp:0:0
#19 0x000055cbc8388c9f mlir::detail::OpToOpPassAdaptor::run(mlir::Pass*, mlir::Operation*, mlir::AnalysisManager, bool, unsigned int) (/home/niansong/mlir-air/install/bin/air-opt+0x1150c9f)
#20 0x000055cbc83892c9 mlir::detail::OpToOpPassAdaptor::runPipeline(mlir::OpPassManager&, mlir::Operation*, mlir::AnalysisManager, bool, unsigned int, mlir::PassInstrumentor*, mlir::PassInstrumentation::PipelineParentInfo const*) (/home/niansong/mlir-air/install/bin/air-opt+0x11512c9)
#21 0x000055cbc838b446 mlir::PassManager::run(mlir::Operation*) (/home/niansong/mlir-air/install/bin/air-opt+0x1153446)
#22 0x000055cbc8385b86 performActions(llvm::raw_ostream&, bool, bool, std::shared_ptr<llvm::SourceMgr> const&, mlir::MLIRContext*, llvm::function_ref<mlir::LogicalResult (mlir::PassManager&)>, bool, bool) MlirOptMain.cpp:0:0
#23 0x000055cbc838585d mlir::LogicalResult llvm::function_ref<mlir::LogicalResult (std::unique_ptr<llvm::MemoryBuffer, std::default_delete<llvm::MemoryBuffer>>, llvm::raw_ostream&)>::callback_fn<mlir::MlirOptMain(llvm::raw_ostream&, std::unique_ptr<llvm::MemoryBuffer, std::default_delete<llvm::MemoryBuffer>>, llvm::function_ref<mlir::LogicalResult (mlir::PassManager&)>, mlir::DialectRegistry&, bool, bool, bool, bool, bool, bool, bool)::$_0>(long, std::unique_ptr<llvm::MemoryBuffer, std::default_delete<llvm::MemoryBuffer>>, llvm::raw_ostream&) MlirOptMain.cpp:0:0
#24 0x000055cbc840e4c8 mlir::splitAndProcessBuffer(std::unique_ptr<llvm::MemoryBuffer, std::default_delete<llvm::MemoryBuffer>>, llvm::function_ref<mlir::LogicalResult (std::unique_ptr<llvm::MemoryBuffer, std::default_delete<llvm::MemoryBuffer>>, llvm::raw_ostream&)>, llvm::raw_ostream&, bool, bool) (/home/niansong/mlir-air/install/bin/air-opt+0x11d64c8)
#25 0x000055cbc8383dfe mlir::MlirOptMain(llvm::raw_ostream&, std::unique_ptr<llvm::MemoryBuffer, std::default_delete<llvm::MemoryBuffer>>, llvm::function_ref<mlir::LogicalResult (mlir::PassManager&)>, mlir::DialectRegistry&, bool, bool, bool, bool, bool, bool, bool) (/home/niansong/mlir-air/install/bin/air-opt+0x114bdfe)
#26 0x000055cbc838429f mlir::MlirOptMain(int, char**, llvm::StringRef, mlir::DialectRegistry&, bool) (/home/niansong/mlir-air/install/bin/air-opt+0x114c29f)
#27 0x000055cbc7fc1a3a main (/home/niansong/mlir-air/install/bin/air-opt+0xd89a3a)
#28 0x00007f5435d15083 __libc_start_main /build/glibc-SzIz7B/glibc-2.31/csu/../csu/libc-start.c:342:3
#29 0x000055cbc7fc173e _start (/home/niansong/mlir-air/install/bin/air-opt+0xd8973e)
./compile.sh: line 9: 643162 Aborted                 air-opt mmult.triton.air.mlir -buffer-results-to-out-params -air-linalg-codegen -air-par-to-herd -air-copy-to-dma -air-dependency -canonicalize -cse -o mmult.air.mlir
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

No branches or pull requests

1 participant