diff --git a/.clang-format-ignore b/.clang-format-ignore index 0755ae72..a2350e1d 100644 --- a/.clang-format-ignore +++ b/.clang-format-ignore @@ -7,5 +7,7 @@ ./sw/vendor/* ./sw/banshee/vendor/* ./sw/banshee/build/* +./sw/banshee/tests/runtime/printf.* +./sw/banshee/tests/runtime/encoding.h # Ignore any checked-out directories */.bender/* diff --git a/sw/banshee/tests/Makefile b/sw/banshee/tests/Makefile index 916556b8..90a2c9d4 100644 --- a/sw/banshee/tests/Makefile +++ b/sw/banshee/tests/Makefile @@ -2,9 +2,7 @@ # Licensed under the Apache License, Version 2.0, see LICENSE for details. # SPDX-License-Identifier: Apache-2.0 -DIR := $(shell dirname $(abspath $(lastword $(MAKEFILE_LIST))))/../../sw -VPATH += $(DIR) -include $(DIR)/runtime/runtime.mk +include runtime/runtime.mk all: bin/large_lfsr all: bin/dummy @@ -25,13 +23,13 @@ all: bin/far_jump bin/%: %.c mkdir -p $(shell dirname $@) dump - $(RISCV_CC) -Iinclude $(RISCV_LDFLAGS) -o $@ $^ -T$(DIR)/runtime/link.ld $(DIR)/runtime/billywig_crt0.S + $(RISCV_CC) -Iinclude $(RISCV_LDFLAGS) -o $@ $^ -Truntime/link.ld runtime/billywig_crt0.S $(RISCV_STRIP) $@ -g -S -d --strip-debug $(RISCV_OBJDUMP) -D $@ > dump/$*.dump bin/%: %.s mkdir -p $(shell dirname $@) dump - $(RISCV_CC) -Iinclude $(RISCV_LDFLAGS) -o $@ $^ -T$(DIR)/runtime/link.ld + $(RISCV_CC) -Iinclude $(RISCV_LDFLAGS) -o $@ $^ -Truntime/link.ld $(RISCV_STRIP) $@ -g -S -d --strip-debug $(RISCV_OBJDUMP) -D $@ > dump/$*.dump diff --git a/sw/banshee/tests/bin/far_jump b/sw/banshee/tests/bin/far_jump index 1af9e8d6..c50c8e38 100755 Binary files a/sw/banshee/tests/bin/far_jump and b/sw/banshee/tests/bin/far_jump differ diff --git a/sw/banshee/tests/bin/frep_multiple b/sw/banshee/tests/bin/frep_multiple index ff1ad20f..57e814e3 100755 Binary files a/sw/banshee/tests/bin/frep_multiple and b/sw/banshee/tests/bin/frep_multiple differ diff --git a/sw/banshee/tests/bin/frep_single b/sw/banshee/tests/bin/frep_single index 966866f1..b2dfc3ae 100755 Binary files a/sw/banshee/tests/bin/frep_single and b/sw/banshee/tests/bin/frep_single differ diff --git a/sw/banshee/tests/bin/large_lfsr b/sw/banshee/tests/bin/large_lfsr index 3968c6e0..e779ea69 100755 Binary files a/sw/banshee/tests/bin/large_lfsr and b/sw/banshee/tests/bin/large_lfsr differ diff --git a/sw/banshee/tests/bin/loads b/sw/banshee/tests/bin/loads index afd80d86..a98c6a39 100755 Binary files a/sw/banshee/tests/bin/loads and b/sw/banshee/tests/bin/loads differ diff --git a/sw/banshee/tests/bin/matmul_baseline b/sw/banshee/tests/bin/matmul_baseline index 59104269..81cb1155 100755 Binary files a/sw/banshee/tests/bin/matmul_baseline and b/sw/banshee/tests/bin/matmul_baseline differ diff --git a/sw/banshee/tests/bin/matmul_ssr b/sw/banshee/tests/bin/matmul_ssr index f8648e53..b2f1d9c9 100755 Binary files a/sw/banshee/tests/bin/matmul_ssr and b/sw/banshee/tests/bin/matmul_ssr differ diff --git a/sw/banshee/tests/bin/matmul_ssr_frep b/sw/banshee/tests/bin/matmul_ssr_frep index 136948d9..07bbae05 100755 Binary files a/sw/banshee/tests/bin/matmul_ssr_frep and b/sw/banshee/tests/bin/matmul_ssr_frep differ diff --git a/sw/banshee/tests/bin/multi_cluster b/sw/banshee/tests/bin/multi_cluster index 1c72e633..eaf1327d 100755 Binary files a/sw/banshee/tests/bin/multi_cluster and b/sw/banshee/tests/bin/multi_cluster differ diff --git a/sw/banshee/tests/bin/multi_core b/sw/banshee/tests/bin/multi_core index b0105980..619aebc3 100755 Binary files a/sw/banshee/tests/bin/multi_core and b/sw/banshee/tests/bin/multi_core differ diff --git a/sw/banshee/tests/bin/stores b/sw/banshee/tests/bin/stores index a58b7d40..bedde636 100755 Binary files a/sw/banshee/tests/bin/stores and b/sw/banshee/tests/bin/stores differ diff --git a/sw/banshee/tests/bin/unpred_jr_1 b/sw/banshee/tests/bin/unpred_jr_1 index 6617fbea..361ef475 100755 Binary files a/sw/banshee/tests/bin/unpred_jr_1 and b/sw/banshee/tests/bin/unpred_jr_1 differ diff --git a/sw/banshee/tests/bin/unpred_jr_2 b/sw/banshee/tests/bin/unpred_jr_2 index 9b054e4e..f719e2b0 100755 Binary files a/sw/banshee/tests/bin/unpred_jr_2 and b/sw/banshee/tests/bin/unpred_jr_2 differ diff --git a/sw/banshee/tests/dump/frep_multiple.dump b/sw/banshee/tests/dump/frep_multiple.dump index 8684f325..3fd001aa 100644 --- a/sw/banshee/tests/dump/frep_multiple.dump +++ b/sw/banshee/tests/dump/frep_multiple.dump @@ -20,54 +20,57 @@ Disassembly of section .text: 80010010: ffc10113 addi sp,sp,-4 # 40000008 80010014: 00012103 lw sp,0(sp) 80010018: f1402573 csrr a0,mhartid -8001001c: 00351293 slli t0,a0,0x3 -80010020: 40510133 sub sp,sp,t0 -80010024: 00629293 slli t0,t0,0x6 -80010028: 40510133 sub sp,sp,t0 -8001002c: 00010213 mv tp,sp -80010030: 00100293 li t0,1 -80010034: 00929293 slli t0,t0,0x9 -80010038: 40520233 sub tp,tp,t0 -8001003c: bfff0297 auipc t0,0xbfff0 -80010040: fd428293 addi t0,t0,-44 # 40000010 -80010044: 0002a583 lw a1,0(t0) -80010048: 038000ef jal ra,80010080
-8001004c: 00151513 slli a0,a0,0x1 -80010050: 00156513 ori a0,a0,1 -80010054: 0100006f j 80010064 - -80010058 : -80010058: fff00293 li t0,-1 -8001005c: 00554533 xor a0,a0,t0 -80010060: 0040006f j 80010064 - -80010064 : -80010064: f14022f3 csrr t0,mhartid -80010068: 00029863 bnez t0,80010078 -8001006c: bfff0297 auipc t0,0xbfff0 -80010070: fb428293 addi t0,t0,-76 # 40000020 -80010074: 00a2a023 sw a0,0(t0) - -80010078 : -80010078: 10500073 wfi -8001007c: ffdff06f j 80010078 +8001001c: bfff0297 auipc t0,0xbfff0 +80010020: 0242a283 lw t0,36(t0) # 40000040 +80010024: 40550533 sub a0,a0,t0 +80010028: 00351293 slli t0,a0,0x3 +8001002c: 40510133 sub sp,sp,t0 +80010030: 00629293 slli t0,t0,0x6 +80010034: 40510133 sub sp,sp,t0 +80010038: 00010213 mv tp,sp +8001003c: 00100293 li t0,1 +80010040: 00929293 slli t0,t0,0x9 +80010044: 40520233 sub tp,tp,t0 +80010048: bfff0297 auipc t0,0xbfff0 +8001004c: fc828293 addi t0,t0,-56 # 40000010 +80010050: 0002a583 lw a1,0(t0) +80010054: 038000ef jal ra,8001008c
+80010058: 00151513 slli a0,a0,0x1 +8001005c: 00156513 ori a0,a0,1 +80010060: 0100006f j 80010070 + +80010064 : +80010064: fff00293 li t0,-1 +80010068: 00554533 xor a0,a0,t0 +8001006c: 0040006f j 80010070 + +80010070 : +80010070: f14022f3 csrr t0,mhartid +80010074: 00029863 bnez t0,80010084 +80010078: bfff0297 auipc t0,0xbfff0 +8001007c: fa828293 addi t0,t0,-88 # 40000020 +80010080: 00a2a023 sw a0,0(t0) + +80010084 : +80010084: 10500073 wfi +80010088: ffdff06f j 80010084 Disassembly of section .text.startup: -80010080
: -80010080: 00000797 auipc a5,0x0 -80010084: 0407b707 fld fa4,64(a5) # 800100c0 -80010088: 00000797 auipc a5,0x0 -8001008c: 0407b687 fld fa3,64(a5) # 800100c8 -80010090: d20007d3 fcvt.d.w fa5,zero -80010094: 0012808b 0x12808b -80010098: 02e7f7d3 fadd.d fa5,fa5,fa4 -8001009c: 12d7f7d3 fmul.d fa5,fa5,fa3 -800100a0: 00000797 auipc a5,0x0 -800100a4: 0307b707 fld fa4,48(a5) # 800100d0 -800100a8: a2e7a553 feq.d a0,fa5,fa4 -800100ac: 00154513 xori a0,a0,1 -800100b0: 00008067 ret +8001008c
: +8001008c: 00000797 auipc a5,0x0 +80010090: 0347b707 fld fa4,52(a5) # 800100c0 +80010094: 00000797 auipc a5,0x0 +80010098: 0347b687 fld fa3,52(a5) # 800100c8 +8001009c: d20007d3 fcvt.d.w fa5,zero +800100a0: 0012808b 0x12808b +800100a4: 02e7f7d3 fadd.d fa5,fa5,fa4 +800100a8: 12d7f7d3 fmul.d fa5,fa5,fa3 +800100ac: 00000797 auipc a5,0x0 +800100b0: 0247b707 fld fa4,36(a5) # 800100d0 +800100b4: a2e7a553 feq.d a0,fa5,fa4 +800100b8: 00154513 xori a0,a0,1 +800100bc: 00008067 ret Disassembly of section .sdata: @@ -91,20 +94,20 @@ Disassembly of section .comment: 0: 3a434347 fmsub.d ft6,ft6,ft4,ft7,rmm 4: 2820 fld fs0,80(s0) 6: 29554e47 fmsub.s ft8,fa0,fs5,ft5,rmm - a: 3920 fld fs0,112(a0) - c: 322e fld ft4,232(sp) - e: 302e fld ft0,232(sp) - ... + a: 3120 fld fs0,96(a0) + c: 2e30 fld fa2,88(a2) + e: 2e32 fld ft8,264(sp) + 10: 0030 addi a2,sp,8 Disassembly of section .riscv.attributes: 00000000 <.riscv.attributes>: - 0: 2f41 jal 790 + 0: 3341 jal fffffd80 2: 0000 unimp 4: 7200 flw fs0,32(a2) 6: 7369 lui t1,0xffffa 8: 01007663 bgeu zero,a6,14 - c: 0025 c.nop 9 + c: 0029 c.nop 10 e: 0000 unimp 10: 1004 addi s1,sp,32 12: 7205 lui tp,0xfffe1 @@ -118,3 +121,5 @@ Disassembly of section .riscv.attributes: 26: 3266 fld ft4,120(sp) 28: 3070 fld fa2,224(s0) 2a: 645f 7032 0030 0x307032645f + 30: 0108 addi a0,sp,128 + 32: 0b0a slli s6,s6,0x2 diff --git a/sw/banshee/tests/dump/frep_single.dump b/sw/banshee/tests/dump/frep_single.dump index be847618..4e46210d 100644 --- a/sw/banshee/tests/dump/frep_single.dump +++ b/sw/banshee/tests/dump/frep_single.dump @@ -12,7 +12,7 @@ Disassembly of section .text: 80010000 <_start>: 80010000: 00001197 auipc gp,0x1 -80010004: 8b018193 addi gp,gp,-1872 # 800108b0 <__global_pointer$> +80010004: 8c018193 addi gp,gp,-1856 # 800108c0 <__global_pointer$> 80010008: 0040006f j 8001000c 8001000c : @@ -20,60 +20,63 @@ Disassembly of section .text: 80010010: ffc10113 addi sp,sp,-4 # 40000008 80010014: 00012103 lw sp,0(sp) 80010018: f1402573 csrr a0,mhartid -8001001c: 00351293 slli t0,a0,0x3 -80010020: 40510133 sub sp,sp,t0 -80010024: 00629293 slli t0,t0,0x6 -80010028: 40510133 sub sp,sp,t0 -8001002c: 00010213 mv tp,sp -80010030: 00100293 li t0,1 -80010034: 00929293 slli t0,t0,0x9 -80010038: 40520233 sub tp,tp,t0 -8001003c: bfff0297 auipc t0,0xbfff0 -80010040: fd428293 addi t0,t0,-44 # 40000010 -80010044: 0002a583 lw a1,0(t0) -80010048: 038000ef jal ra,80010080
-8001004c: 00151513 slli a0,a0,0x1 -80010050: 00156513 ori a0,a0,1 -80010054: 0100006f j 80010064 - -80010058 : -80010058: fff00293 li t0,-1 -8001005c: 00554533 xor a0,a0,t0 -80010060: 0040006f j 80010064 - -80010064 : -80010064: f14022f3 csrr t0,mhartid -80010068: 00029863 bnez t0,80010078 -8001006c: bfff0297 auipc t0,0xbfff0 -80010070: fb428293 addi t0,t0,-76 # 40000020 -80010074: 00a2a023 sw a0,0(t0) - -80010078 : -80010078: 10500073 wfi -8001007c: ffdff06f j 80010078 +8001001c: bfff0297 auipc t0,0xbfff0 +80010020: 0242a283 lw t0,36(t0) # 40000040 +80010024: 40550533 sub a0,a0,t0 +80010028: 00351293 slli t0,a0,0x3 +8001002c: 40510133 sub sp,sp,t0 +80010030: 00629293 slli t0,t0,0x6 +80010034: 40510133 sub sp,sp,t0 +80010038: 00010213 mv tp,sp +8001003c: 00100293 li t0,1 +80010040: 00929293 slli t0,t0,0x9 +80010044: 40520233 sub tp,tp,t0 +80010048: bfff0297 auipc t0,0xbfff0 +8001004c: fc828293 addi t0,t0,-56 # 40000010 +80010050: 0002a583 lw a1,0(t0) +80010054: 038000ef jal ra,8001008c
+80010058: 00151513 slli a0,a0,0x1 +8001005c: 00156513 ori a0,a0,1 +80010060: 0100006f j 80010070 + +80010064 : +80010064: fff00293 li t0,-1 +80010068: 00554533 xor a0,a0,t0 +8001006c: 0040006f j 80010070 + +80010070 : +80010070: f14022f3 csrr t0,mhartid +80010074: 00029863 bnez t0,80010084 +80010078: bfff0297 auipc t0,0xbfff0 +8001007c: fa828293 addi t0,t0,-88 # 40000020 +80010080: 00a2a023 sw a0,0(t0) + +80010084 : +80010084: 10500073 wfi +80010088: ffdff06f j 80010084 Disassembly of section .text.startup: -80010080
: -80010080: 00400293 li t0,4 -80010084: 00000797 auipc a5,0x0 -80010088: 02c7b707 fld fa4,44(a5) # 800100b0 -8001008c: d20007d3 fcvt.d.w fa5,zero -80010090: 0002808b 0x2808b -80010094: 02e7f7d3 fadd.d fa5,fa5,fa4 -80010098: 00500793 li a5,5 -8001009c: d2078753 fcvt.d.w fa4,a5 -800100a0: a2f72553 feq.d a0,fa4,fa5 -800100a4: 00154513 xori a0,a0,1 -800100a8: 00008067 ret +8001008c
: +8001008c: 00400293 li t0,4 +80010090: 00000797 auipc a5,0x0 +80010094: 0307b707 fld fa4,48(a5) # 800100c0 +80010098: d20007d3 fcvt.d.w fa5,zero +8001009c: 0002808b 0x2808b +800100a0: 02e7f7d3 fadd.d fa5,fa5,fa4 +800100a4: 00500793 li a5,5 +800100a8: d2078753 fcvt.d.w fa4,a5 +800100ac: a2f72553 feq.d a0,fa4,fa5 +800100b0: 00154513 xori a0,a0,1 +800100b4: 00008067 ret Disassembly of section .sdata: -800100b0 <__bss_end-0x8>: -800100b0: 0000 unimp -800100b2: 0000 unimp -800100b4: 0000 unimp -800100b6: 3ff0 fld fa2,248(a5) +800100c0 <__bss_end-0x8>: +800100c0: 0000 unimp +800100c2: 0000 unimp +800100c4: 0000 unimp +800100c6: 3ff0 fld fa2,248(a5) Disassembly of section .comment: @@ -83,18 +86,18 @@ Disassembly of section .comment: 6: 29554e47 fmsub.s ft8,fa0,fs5,ft5,rmm a: 3120 fld fs0,96(a0) c: 2e30 fld fa2,88(a2) - e: 2e31 jal 32a + e: 2e32 fld ft8,264(sp) 10: 0030 addi a2,sp,8 Disassembly of section .riscv.attributes: 00000000 <.riscv.attributes>: - 0: 2f41 jal 790 + 0: 3341 jal fffffd80 2: 0000 unimp 4: 7200 flw fs0,32(a2) 6: 7369 lui t1,0xffffa 8: 01007663 bgeu zero,a6,14 - c: 0025 c.nop 9 + c: 0029 c.nop 10 e: 0000 unimp 10: 1004 addi s1,sp,32 12: 7205 lui tp,0xfffe1 @@ -108,3 +111,5 @@ Disassembly of section .riscv.attributes: 26: 3266 fld ft4,120(sp) 28: 3070 fld fa2,224(s0) 2a: 645f 7032 0030 0x307032645f + 30: 0108 addi a0,sp,128 + 32: 0b0a slli s6,s6,0x2 diff --git a/sw/banshee/tests/dump/large_lfsr.dump b/sw/banshee/tests/dump/large_lfsr.dump index 48f1b679..79bfc797 100644 --- a/sw/banshee/tests/dump/large_lfsr.dump +++ b/sw/banshee/tests/dump/large_lfsr.dump @@ -12,7 +12,7 @@ Disassembly of section .text: 80010000 <_start>: 80010000: 00001197 auipc gp,0x1 -80010004: 8d018193 addi gp,gp,-1840 # 800108d0 <__global_pointer$> +80010004: 8e018193 addi gp,gp,-1824 # 800108e0 <__global_pointer$> 80010008: 0040006f j 8001000c 8001000c : @@ -20,59 +20,62 @@ Disassembly of section .text: 80010010: ffc10113 addi sp,sp,-4 # 40000008 80010014: 00012103 lw sp,0(sp) 80010018: f1402573 csrr a0,mhartid -8001001c: 00351293 slli t0,a0,0x3 -80010020: 40510133 sub sp,sp,t0 -80010024: 00629293 slli t0,t0,0x6 -80010028: 40510133 sub sp,sp,t0 -8001002c: 00010213 mv tp,sp -80010030: 00100293 li t0,1 -80010034: 00929293 slli t0,t0,0x9 -80010038: 40520233 sub tp,tp,t0 -8001003c: bfff0297 auipc t0,0xbfff0 -80010040: fd428293 addi t0,t0,-44 # 40000010 -80010044: 0002a583 lw a1,0(t0) -80010048: 038000ef jal ra,80010080
-8001004c: 00151513 slli a0,a0,0x1 -80010050: 00156513 ori a0,a0,1 -80010054: 0100006f j 80010064 - -80010058 : -80010058: fff00293 li t0,-1 -8001005c: 00554533 xor a0,a0,t0 -80010060: 0040006f j 80010064 - -80010064 : -80010064: f14022f3 csrr t0,mhartid -80010068: 00029863 bnez t0,80010078 -8001006c: bfff0297 auipc t0,0xbfff0 -80010070: fb428293 addi t0,t0,-76 # 40000020 -80010074: 00a2a023 sw a0,0(t0) - -80010078 : -80010078: 10500073 wfi -8001007c: ffdff06f j 80010078 +8001001c: bfff0297 auipc t0,0xbfff0 +80010020: 0242a283 lw t0,36(t0) # 40000040 +80010024: 40550533 sub a0,a0,t0 +80010028: 00351293 slli t0,a0,0x3 +8001002c: 40510133 sub sp,sp,t0 +80010030: 00629293 slli t0,t0,0x6 +80010034: 40510133 sub sp,sp,t0 +80010038: 00010213 mv tp,sp +8001003c: 00100293 li t0,1 +80010040: 00929293 slli t0,t0,0x9 +80010044: 40520233 sub tp,tp,t0 +80010048: bfff0297 auipc t0,0xbfff0 +8001004c: fc828293 addi t0,t0,-56 # 40000010 +80010050: 0002a583 lw a1,0(t0) +80010054: 038000ef jal ra,8001008c
+80010058: 00151513 slli a0,a0,0x1 +8001005c: 00156513 ori a0,a0,1 +80010060: 0100006f j 80010070 + +80010064 : +80010064: fff00293 li t0,-1 +80010068: 00554533 xor a0,a0,t0 +8001006c: 0040006f j 80010070 + +80010070 : +80010070: f14022f3 csrr t0,mhartid +80010074: 00029863 bnez t0,80010084 +80010078: bfff0297 auipc t0,0xbfff0 +8001007c: fa828293 addi t0,t0,-88 # 40000020 +80010080: 00a2a023 sw a0,0(t0) + +80010084 : +80010084: 10500073 wfi +80010088: ffdff06f j 80010084 Disassembly of section .text.startup: -80010080
: -80010080: 400007b7 lui a5,0x40000 -80010084: 0087a783 lw a5,8(a5) # 40000008 -80010088: 05f5e6b7 lui a3,0x5f5e -8001008c: d0000637 lui a2,0xd0000 -80010090: 0017e713 ori a4,a5,1 -80010094: 10068693 addi a3,a3,256 # 5f5e100 -80010098: 00160613 addi a2,a2,1 # d0000001 -8001009c: 00177793 andi a5,a4,1 -800100a0: 40f007b3 neg a5,a5 -800100a4: 00175713 srli a4,a4,0x1 -800100a8: 00c7f7b3 and a5,a5,a2 -800100ac: fff68693 addi a3,a3,-1 -800100b0: 00e7c733 xor a4,a5,a4 -800100b4: fe0694e3 bnez a3,8001009c -800100b8: 400007b7 lui a5,0x40000 -800100bc: 02e7a423 sw a4,40(a5) # 40000028 -800100c0: 00000513 li a0,0 -800100c4: 00008067 ret +8001008c
: +8001008c: 400007b7 lui a5,0x40000 +80010090: 0087a783 lw a5,8(a5) # 40000008 +80010094: 05f5e6b7 lui a3,0x5f5e +80010098: d0000637 lui a2,0xd0000 +8001009c: 0017e713 ori a4,a5,1 +800100a0: 10068693 addi a3,a3,256 # 5f5e100 +800100a4: 00160613 addi a2,a2,1 # d0000001 +800100a8: 00177793 andi a5,a4,1 +800100ac: 40f007b3 neg a5,a5 +800100b0: 00175713 srli a4,a4,0x1 +800100b4: 00c7f7b3 and a5,a5,a2 +800100b8: fff68693 addi a3,a3,-1 +800100bc: 00e7c733 xor a4,a5,a4 +800100c0: fe0694e3 bnez a3,800100a8 +800100c4: 400007b7 lui a5,0x40000 +800100c8: 02e7a423 sw a4,40(a5) # 40000028 +800100cc: 00000513 li a0,0 +800100d0: 00008067 ret Disassembly of section .comment: @@ -80,20 +83,20 @@ Disassembly of section .comment: 0: 3a434347 fmsub.d ft6,ft6,ft4,ft7,rmm 4: 2820 fld fs0,80(s0) 6: 29554e47 fmsub.s ft8,fa0,fs5,ft5,rmm - a: 3920 fld fs0,112(a0) - c: 322e fld ft4,232(sp) - e: 302e fld ft0,232(sp) - ... + a: 3120 fld fs0,96(a0) + c: 2e30 fld fa2,88(a2) + e: 2e32 fld ft8,264(sp) + 10: 0030 addi a2,sp,8 Disassembly of section .riscv.attributes: 00000000 <.riscv.attributes>: - 0: 2f41 jal 790 + 0: 3341 jal fffffd80 2: 0000 unimp 4: 7200 flw fs0,32(a2) 6: 7369 lui t1,0xffffa 8: 01007663 bgeu zero,a6,14 - c: 0025 c.nop 9 + c: 0029 c.nop 10 e: 0000 unimp 10: 1004 addi s1,sp,32 12: 7205 lui tp,0xfffe1 @@ -107,3 +110,5 @@ Disassembly of section .riscv.attributes: 26: 3266 fld ft4,120(sp) 28: 3070 fld fa2,224(s0) 2a: 645f 7032 0030 0x307032645f + 30: 0108 addi a0,sp,128 + 32: 0b0a slli s6,s6,0x2 diff --git a/sw/banshee/tests/dump/matmul_baseline.dump b/sw/banshee/tests/dump/matmul_baseline.dump index 25d9ef79..43a2a7c0 100644 --- a/sw/banshee/tests/dump/matmul_baseline.dump +++ b/sw/banshee/tests/dump/matmul_baseline.dump @@ -74,7 +74,7 @@ Disassembly of section .text: 80010000 <_start>: 80010000: 00001197 auipc gp,0x1 -80010004: b6018193 addi gp,gp,-1184 # 80010b60 <__global_pointer$> +80010004: b7018193 addi gp,gp,-1168 # 80010b70 <__global_pointer$> 80010008: 0040006f j 8001000c 8001000c : @@ -82,234 +82,238 @@ Disassembly of section .text: 80010010: ffc10113 addi sp,sp,-4 # 40000008 80010014: 00012103 lw sp,0(sp) 80010018: f1402573 csrr a0,mhartid -8001001c: 00351293 slli t0,a0,0x3 -80010020: 40510133 sub sp,sp,t0 -80010024: 00629293 slli t0,t0,0x6 -80010028: 40510133 sub sp,sp,t0 -8001002c: 00010213 mv tp,sp -80010030: 00100293 li t0,1 -80010034: 00929293 slli t0,t0,0x9 -80010038: 40520233 sub tp,tp,t0 -8001003c: bfff0297 auipc t0,0xbfff0 -80010040: fd428293 addi t0,t0,-44 # 40000010 -80010044: 0002a583 lw a1,0(t0) -80010048: 038000ef jal ra,80010080
-8001004c: 00151513 slli a0,a0,0x1 -80010050: 00156513 ori a0,a0,1 -80010054: 0100006f j 80010064 +8001001c: bfff0297 auipc t0,0xbfff0 +80010020: 0242a283 lw t0,36(t0) # 40000040 +80010024: 40550533 sub a0,a0,t0 +80010028: 00351293 slli t0,a0,0x3 +8001002c: 40510133 sub sp,sp,t0 +80010030: 00629293 slli t0,t0,0x6 +80010034: 40510133 sub sp,sp,t0 +80010038: 00010213 mv tp,sp +8001003c: 00100293 li t0,1 +80010040: 00929293 slli t0,t0,0x9 +80010044: 40520233 sub tp,tp,t0 +80010048: bfff0297 auipc t0,0xbfff0 +8001004c: fc828293 addi t0,t0,-56 # 40000010 +80010050: 0002a583 lw a1,0(t0) +80010054: 038000ef jal ra,8001008c
+80010058: 00151513 slli a0,a0,0x1 +8001005c: 00156513 ori a0,a0,1 +80010060: 0100006f j 80010070 -80010058 : -80010058: fff00293 li t0,-1 -8001005c: 00554533 xor a0,a0,t0 -80010060: 0040006f j 80010064 +80010064 : +80010064: fff00293 li t0,-1 +80010068: 00554533 xor a0,a0,t0 +8001006c: 0040006f j 80010070 -80010064 : -80010064: f14022f3 csrr t0,mhartid -80010068: 00029863 bnez t0,80010078 -8001006c: bfff0297 auipc t0,0xbfff0 -80010070: fb428293 addi t0,t0,-76 # 40000020 -80010074: 00a2a023 sw a0,0(t0) +80010070 : +80010070: f14022f3 csrr t0,mhartid +80010074: 00029863 bnez t0,80010084 +80010078: bfff0297 auipc t0,0xbfff0 +8001007c: fa828293 addi t0,t0,-88 # 40000020 +80010080: 00a2a023 sw a0,0(t0) -80010078 : -80010078: 10500073 wfi -8001007c: ffdff06f j 80010078 +80010084 : +80010084: 10500073 wfi +80010088: ffdff06f j 80010084 Disassembly of section .text.startup: -80010080
: -80010080: ff010113 addi sp,sp,-16 -80010084: 00812623 sw s0,12(sp) -80010088: 00912423 sw s1,8(sp) -8001008c: 01212223 sw s2,4(sp) -80010090: 00002683 lw a3,0(zero) # 0 -80010094: 00058f13 mv t5,a1 -80010098: 00d5e463 bltu a1,a3,800100a0 -8001009c: 00068f13 mv t5,a3 -800100a0: 00168e13 addi t3,a3,1 -800100a4: 03c68733 mul a4,a3,t3 -800100a8: 09000293 li t0,144 -800100ac: 00170713 addi a4,a4,1 -800100b0: 00371713 slli a4,a4,0x3 -800100b4: 00e28fb3 add t6,t0,a4 -800100b8: 00ef8eb3 add t4,t6,a4 -800100bc: 0a051863 bnez a0,8001016c -800100c0: 06068663 beqz a3,8001012c -800100c4: 003e1313 slli t1,t3,0x3 -800100c8: 00028893 mv a7,t0 -800100cc: 000e0613 mv a2,t3 -800100d0: 00000813 li a6,0 -800100d4: 00100593 li a1,1 -800100d8: 00000797 auipc a5,0x0 -800100dc: 2887b707 fld fa4,648(a5) # 80010360 -800100e0: 00088713 mv a4,a7 -800100e4: 00058793 mv a5,a1 -800100e8: d21787d3 fcvt.d.wu fa5,a5 -800100ec: 00870713 addi a4,a4,8 -800100f0: 00178793 addi a5,a5,1 -800100f4: 12e7f7d3 fmul.d fa5,fa5,fa4 -800100f8: fef73c27 fsd fa5,-8(a4) -800100fc: fef616e3 bne a2,a5,800100e8 -80010100: 00180813 addi a6,a6,1 -80010104: 00d585b3 add a1,a1,a3 -80010108: 00d60633 add a2,a2,a3 -8001010c: 006888b3 add a7,a7,t1 -80010110: fd0698e3 bne a3,a6,800100e0 -80010114: 00100793 li a5,1 -80010118: 03e7f7b3 remu a5,a5,t5 -8001011c: 1c078863 beqz a5,800102ec -80010120: 00200793 li a5,2 -80010124: 03e7f7b3 remu a5,a5,t5 -80010128: 06078463 beqz a5,80010190 -8001012c: bfff0917 auipc s2,0xbfff0 -80010130: f0c90913 addi s2,s2,-244 # 40000038 -80010134: 00092783 lw a5,0(s2) -80010138: 00078013 mv zero,a5 -8001013c: b00027f3 csrr a5,mcycle -80010140: 11e56a63 bltu a0,t5,80010254 -80010144: b00027f3 csrr a5,mcycle -80010148: 00092903 lw s2,0(s2) -8001014c: 00090013 mv zero,s2 -80010150: 08050e63 beqz a0,800101ec -80010154: 00000513 li a0,0 -80010158: 00c12403 lw s0,12(sp) -8001015c: 00812483 lw s1,8(sp) -80010160: 00412903 lw s2,4(sp) -80010164: 01010113 addi sp,sp,16 -80010168: 00008067 ret -8001016c: 00100793 li a5,1 -80010170: 03e7f7b3 remu a5,a5,t5 -80010174: 16a78663 beq a5,a0,800102e0 -80010178: 00200793 li a5,2 -8001017c: 03e7f7b3 remu a5,a5,t5 -80010180: faa796e3 bne a5,a0,8001012c -80010184: fa0684e3 beqz a3,8001012c -80010188: 00000797 auipc a5,0x0 -8001018c: 1d87b707 fld fa4,472(a5) # 80010360 -80010190: 00000893 li a7,0 -80010194: 00300593 li a1,3 -80010198: 00000813 li a6,0 -8001019c: fff68313 addi t1,a3,-1 -800101a0: 00389613 slli a2,a7,0x3 -800101a4: 00ce8633 add a2,t4,a2 -800101a8: 00000713 li a4,0 -800101ac: 00b707b3 add a5,a4,a1 -800101b0: d21787d3 fcvt.d.wu fa5,a5 -800101b4: 00860613 addi a2,a2,8 -800101b8: 00170713 addi a4,a4,1 -800101bc: 12e7f7d3 fmul.d fa5,fa5,fa4 -800101c0: fef63c27 fsd fa5,-8(a2) -800101c4: fed764e3 bltu a4,a3,800101ac -800101c8: 00000793 li a5,0 -800101cc: 00068463 beqz a3,800101d4 -800101d0: 00030793 mv a5,t1 -800101d4: 00178793 addi a5,a5,1 -800101d8: 00180813 addi a6,a6,1 -800101dc: 00f585b3 add a1,a1,a5 -800101e0: 01c888b3 add a7,a7,t3 -800101e4: fad86ee3 bltu a6,a3,800101a0 -800101e8: f45ff06f j 8001012c -800101ec: f60686e3 beqz a3,80010158 -800101f0: ff800813 li a6,-8 -800101f4: 03068833 mul a6,a3,a6 -800101f8: 003e1e13 slli t3,t3,0x3 -800101fc: ff8e0713 addi a4,t3,-8 -80010200: 00800593 li a1,8 -80010204: 00ee8733 add a4,t4,a4 -80010208: 00000613 li a2,0 -8001020c: 00000797 auipc a5,0x0 -80010210: 15c7b687 fld fa3,348(a5) # 80010368 -80010214: d20007d3 fcvt.d.w fa5,zero -80010218: 00e807b3 add a5,a6,a4 -8001021c: 0007b707 fld fa4,0(a5) -80010220: 00878793 addi a5,a5,8 -80010224: 02e7f7d3 fadd.d fa5,fa5,fa4 -80010228: fef71ae3 bne a4,a5,8001021c -8001022c: 0005b707 fld fa4,0(a1) -80010230: 0ae7f7d3 fsub.d fa5,fa5,fa4 -80010234: 22f7a7d3 fabs.d fa5,fa5 -80010238: a2f697d3 flt.d a5,fa3,fa5 -8001023c: 00160613 addi a2,a2,1 -80010240: 00858593 addi a1,a1,8 -80010244: 00f50533 add a0,a0,a5 -80010248: 01c70733 add a4,a4,t3 -8001024c: fcc694e3 bne a3,a2,80010214 -80010250: f09ff06f j 80010158 -80010254: efe6e8e3 bltu a3,t5,80010144 -80010258: 003e1313 slli t1,t3,0x3 -8001025c: 026503b3 mul t2,a0,t1 -80010260: ff830893 addi a7,t1,-8 -80010264: 011288b3 add a7,t0,a7 -80010268: 00000493 li s1,0 -8001026c: 026f0433 mul s0,t5,t1 -80010270: 007282b3 add t0,t0,t2 -80010274: 007888b3 add a7,a7,t2 -80010278: 007e83b3 add t2,t4,t2 -8001027c: ec0684e3 beqz a3,80010144 -80010280: 000f8813 mv a6,t6 -80010284: 00038613 mv a2,t2 -80010288: 00000593 li a1,0 -8001028c: 00063787 fld fa5,0(a2) -80010290: 00080713 mv a4,a6 -80010294: 00028793 mv a5,t0 -80010298: 0007b687 fld fa3,0(a5) -8001029c: 00073707 fld fa4,0(a4) +8001008c
: +8001008c: ff010113 addi sp,sp,-16 +80010090: 00812623 sw s0,12(sp) +80010094: 00912423 sw s1,8(sp) +80010098: 01212223 sw s2,4(sp) +8001009c: 00002803 lw a6,0(zero) # 0 +800100a0: 00058893 mv a7,a1 +800100a4: 0105e463 bltu a1,a6,800100ac +800100a8: 00080893 mv a7,a6 +800100ac: 00180e13 addi t3,a6,1 +800100b0: 03c80733 mul a4,a6,t3 +800100b4: 09000f93 li t6,144 +800100b8: 00170713 addi a4,a4,1 +800100bc: 00371713 slli a4,a4,0x3 +800100c0: 00ef8f33 add t5,t6,a4 +800100c4: 00ef0eb3 add t4,t5,a4 +800100c8: 16051e63 bnez a0,80010244 +800100cc: 06080663 beqz a6,80010138 +800100d0: 003e1293 slli t0,t3,0x3 +800100d4: 000f8313 mv t1,t6 +800100d8: 000e0693 mv a3,t3 +800100dc: 00000593 li a1,0 +800100e0: 00100613 li a2,1 +800100e4: 00000797 auipc a5,0x0 +800100e8: 28c7b707 fld fa4,652(a5) # 80010370 +800100ec: 00030713 mv a4,t1 +800100f0: 00060793 mv a5,a2 +800100f4: d21787d3 fcvt.d.wu fa5,a5 +800100f8: 00870713 addi a4,a4,8 +800100fc: 00178793 addi a5,a5,1 +80010100: 12e7f7d3 fmul.d fa5,fa5,fa4 +80010104: fef73c27 fsd fa5,-8(a4) +80010108: fef696e3 bne a3,a5,800100f4 +8001010c: 00158593 addi a1,a1,1 +80010110: 01060633 add a2,a2,a6 +80010114: 010686b3 add a3,a3,a6 +80010118: 00530333 add t1,t1,t0 +8001011c: fcb818e3 bne a6,a1,800100ec +80010120: 00100793 li a5,1 +80010124: 0317f7b3 remu a5,a5,a7 +80010128: 04078e63 beqz a5,80010184 +8001012c: 00200793 li a5,2 +80010130: 0317f7b3 remu a5,a5,a7 +80010134: 0a078a63 beqz a5,800101e8 +80010138: bfff0497 auipc s1,0xbfff0 +8001013c: f0048493 addi s1,s1,-256 # 40000038 +80010140: 0004a783 lw a5,0(s1) +80010144: 00078013 mv zero,a5 +80010148: b00027f3 csrr a5,mcycle +8001014c: 19156463 bltu a0,a7,800102d4 +80010150: b00027f3 csrr a5,mcycle +80010154: 0004a483 lw s1,0(s1) +80010158: 00048013 mv zero,s1 +8001015c: 10050863 beqz a0,8001026c +80010160: 00000513 li a0,0 +80010164: 00c12403 lw s0,12(sp) +80010168: 00812483 lw s1,8(sp) +8001016c: 00412903 lw s2,4(sp) +80010170: 01010113 addi sp,sp,16 +80010174: 00008067 ret +80010178: fc0800e3 beqz a6,80010138 +8001017c: 00000797 auipc a5,0x0 +80010180: 1f47b707 fld fa4,500(a5) # 80010370 +80010184: 00000313 li t1,0 +80010188: 00200613 li a2,2 +8001018c: 00000593 li a1,0 +80010190: fff80293 addi t0,a6,-1 +80010194: 00331713 slli a4,t1,0x3 +80010198: 00ef0733 add a4,t5,a4 +8001019c: 00000793 li a5,0 +800101a0: 00c786b3 add a3,a5,a2 +800101a4: d21687d3 fcvt.d.wu fa5,a3 +800101a8: 00870713 addi a4,a4,8 +800101ac: 00178793 addi a5,a5,1 +800101b0: 12e7f7d3 fmul.d fa5,fa5,fa4 +800101b4: fef73c27 fsd fa5,-8(a4) +800101b8: ff07e4e3 bltu a5,a6,800101a0 +800101bc: 00000793 li a5,0 +800101c0: 00080463 beqz a6,800101c8 +800101c4: 00028793 mv a5,t0 +800101c8: 00178793 addi a5,a5,1 +800101cc: 00158593 addi a1,a1,1 +800101d0: 00f60633 add a2,a2,a5 +800101d4: 01c30333 add t1,t1,t3 +800101d8: fb05eee3 bltu a1,a6,80010194 +800101dc: 00200793 li a5,2 +800101e0: 0317f7b3 remu a5,a5,a7 +800101e4: f4f51ae3 bne a0,a5,80010138 +800101e8: 00000313 li t1,0 +800101ec: 00300613 li a2,3 +800101f0: 00000593 li a1,0 +800101f4: fff80293 addi t0,a6,-1 +800101f8: 00331713 slli a4,t1,0x3 +800101fc: 00ee8733 add a4,t4,a4 +80010200: 00000793 li a5,0 +80010204: 00c786b3 add a3,a5,a2 +80010208: d21687d3 fcvt.d.wu fa5,a3 +8001020c: 00870713 addi a4,a4,8 +80010210: 00178793 addi a5,a5,1 +80010214: 12e7f7d3 fmul.d fa5,fa5,fa4 +80010218: fef73c27 fsd fa5,-8(a4) +8001021c: ff07e4e3 bltu a5,a6,80010204 +80010220: 00000793 li a5,0 +80010224: 00080463 beqz a6,8001022c +80010228: 00028793 mv a5,t0 +8001022c: 00178793 addi a5,a5,1 +80010230: 00158593 addi a1,a1,1 +80010234: 00f60633 add a2,a2,a5 +80010238: 01c30333 add t1,t1,t3 +8001023c: fb05eee3 bltu a1,a6,800101f8 +80010240: ef9ff06f j 80010138 +80010244: 00100793 li a5,1 +80010248: 0317f7b3 remu a5,a5,a7 +8001024c: f2a786e3 beq a5,a0,80010178 +80010250: 00200793 li a5,2 +80010254: 0317f7b3 remu a5,a5,a7 +80010258: eea790e3 bne a5,a0,80010138 +8001025c: ec080ee3 beqz a6,80010138 +80010260: 00000797 auipc a5,0x0 +80010264: 1107b707 fld fa4,272(a5) # 80010370 +80010268: f81ff06f j 800101e8 +8001026c: ee080ce3 beqz a6,80010164 +80010270: ff800593 li a1,-8 +80010274: 02b805b3 mul a1,a6,a1 +80010278: 003e1e13 slli t3,t3,0x3 +8001027c: ff8e0713 addi a4,t3,-8 +80010280: 00800613 li a2,8 +80010284: 00ee8733 add a4,t4,a4 +80010288: 00000693 li a3,0 +8001028c: 00000797 auipc a5,0x0 +80010290: 0ec7b687 fld fa3,236(a5) # 80010378 +80010294: d20007d3 fcvt.d.w fa5,zero +80010298: 00e587b3 add a5,a1,a4 +8001029c: 0007b707 fld fa4,0(a5) 800102a0: 00878793 addi a5,a5,8 -800102a4: 00670733 add a4,a4,t1 -800102a8: 7ae6f7c3 fmadd.d fa5,fa3,fa4,fa5 -800102ac: fef896e3 bne a7,a5,80010298 -800102b0: 00f63027 fsd fa5,0(a2) -800102b4: 00158593 addi a1,a1,1 -800102b8: 00860613 addi a2,a2,8 -800102bc: 00880813 addi a6,a6,8 -800102c0: fcb696e3 bne a3,a1,8001028c -800102c4: 03e6d7b3 divu a5,a3,t5 -800102c8: 00148493 addi s1,s1,1 -800102cc: 008282b3 add t0,t0,s0 -800102d0: 008888b3 add a7,a7,s0 -800102d4: 008383b3 add t2,t2,s0 -800102d8: faf4e2e3 bltu s1,a5,8001027c -800102dc: e69ff06f j 80010144 -800102e0: e40686e3 beqz a3,8001012c -800102e4: 00000797 auipc a5,0x0 -800102e8: 07c7b707 fld fa4,124(a5) # 80010360 -800102ec: 00000893 li a7,0 -800102f0: 00200593 li a1,2 -800102f4: 00000813 li a6,0 -800102f8: fff68313 addi t1,a3,-1 -800102fc: 00389613 slli a2,a7,0x3 -80010300: 00cf8633 add a2,t6,a2 -80010304: 00000713 li a4,0 -80010308: 00b707b3 add a5,a4,a1 -8001030c: d21787d3 fcvt.d.wu fa5,a5 -80010310: 00860613 addi a2,a2,8 -80010314: 00170713 addi a4,a4,1 -80010318: 12e7f7d3 fmul.d fa5,fa5,fa4 -8001031c: fef63c27 fsd fa5,-8(a2) -80010320: fed764e3 bltu a4,a3,80010308 -80010324: 00000793 li a5,0 -80010328: 00068463 beqz a3,80010330 -8001032c: 00030793 mv a5,t1 -80010330: 00178793 addi a5,a5,1 -80010334: 00180813 addi a6,a6,1 -80010338: 00f585b3 add a1,a1,a5 -8001033c: 01c888b3 add a7,a7,t3 -80010340: fad86ee3 bltu a6,a3,800102fc -80010344: 00200793 li a5,2 -80010348: 03e7f7b3 remu a5,a5,t5 -8001034c: def510e3 bne a0,a5,8001012c -80010350: e41ff06f j 80010190 +800102a4: 02e7f7d3 fadd.d fa5,fa5,fa4 +800102a8: fef71ae3 bne a4,a5,8001029c +800102ac: 00063707 fld fa4,0(a2) +800102b0: 0ae7f7d3 fsub.d fa5,fa5,fa4 +800102b4: 22f7a7d3 fabs.d fa5,fa5 +800102b8: a2f697d3 flt.d a5,fa3,fa5 +800102bc: 00168693 addi a3,a3,1 +800102c0: 00860613 addi a2,a2,8 +800102c4: 00f50533 add a0,a0,a5 +800102c8: 01c70733 add a4,a4,t3 +800102cc: fcd814e3 bne a6,a3,80010294 +800102d0: e95ff06f j 80010164 +800102d4: e7186ee3 bltu a6,a7,80010150 +800102d8: 03185933 divu s2,a6,a7 +800102dc: 003e1613 slli a2,t3,0x3 +800102e0: ff860693 addi a3,a2,-8 +800102e4: 00df86b3 add a3,t6,a3 +800102e8: 00000413 li s0,0 +800102ec: 02c502b3 mul t0,a0,a2 +800102f0: 031e03b3 mul t2,t3,a7 +800102f4: 005f8fb3 add t6,t6,t0 +800102f8: 005686b3 add a3,a3,t0 +800102fc: 005e82b3 add t0,t4,t0 +80010300: 00339393 slli t2,t2,0x3 +80010304: e40806e3 beqz a6,80010150 +80010308: 000f0313 mv t1,t5 +8001030c: 00028593 mv a1,t0 +80010310: 00000893 li a7,0 +80010314: 0005b787 fld fa5,0(a1) +80010318: 00030713 mv a4,t1 +8001031c: 000f8793 mv a5,t6 +80010320: 0007b687 fld fa3,0(a5) +80010324: 00073707 fld fa4,0(a4) +80010328: 00878793 addi a5,a5,8 +8001032c: 00c70733 add a4,a4,a2 +80010330: 7ae6f7c3 fmadd.d fa5,fa3,fa4,fa5 +80010334: fef696e3 bne a3,a5,80010320 +80010338: 00f5b027 fsd fa5,0(a1) +8001033c: 00188893 addi a7,a7,1 +80010340: 00858593 addi a1,a1,8 +80010344: 00830313 addi t1,t1,8 +80010348: fd1816e3 bne a6,a7,80010314 +8001034c: 00140413 addi s0,s0,1 +80010350: 007f8fb3 add t6,t6,t2 +80010354: 007686b3 add a3,a3,t2 +80010358: 007282b3 add t0,t0,t2 +8001035c: fb2464e3 bltu s0,s2,80010304 +80010360: df1ff06f j 80010150 Disassembly of section .sdata: -80010360 <__bss_end-0x10>: -80010360: e354 fsw fa3,4(a4) -80010362: 9ba5 andi a5,a5,-23 -80010364: 20c4 fld fs1,128(s1) -80010366: 4009 c.li zero,2 -80010368: a9fc fsd fa5,208(a1) -8001036a: d2f1 beqz a3,8001032e -8001036c: 624d lui tp,0x13 -8001036e: 3f50 fld fa2,184(a4) +80010370 <__bss_end-0x10>: +80010370: e354 fsw fa3,4(a4) +80010372: 9ba5 andi a5,a5,-23 +80010374: 20c4 fld fs1,128(s1) +80010376: 4009 c.li zero,2 +80010378: a9fc fsd fa5,208(a1) +8001037a: d2f1 beqz a3,8001033e +8001037c: 624d lui tp,0x13 +8001037e: 3f50 fld fa2,184(a4) Disassembly of section .comment: @@ -317,20 +321,20 @@ Disassembly of section .comment: 0: 3a434347 fmsub.d ft6,ft6,ft4,ft7,rmm 4: 2820 fld fs0,80(s0) 6: 29554e47 fmsub.s ft8,fa0,fs5,ft5,rmm - a: 3920 fld fs0,112(a0) - c: 322e fld ft4,232(sp) - e: 302e fld ft0,232(sp) - ... + a: 3120 fld fs0,96(a0) + c: 2e30 fld fa2,88(a2) + e: 2e32 fld ft8,264(sp) + 10: 0030 addi a2,sp,8 Disassembly of section .riscv.attributes: 00000000 <.riscv.attributes>: - 0: 2f41 jal 790 + 0: 3341 jal fffffd80 2: 0000 unimp 4: 7200 flw fs0,32(a2) 6: 7369 lui t1,0xffffa 8: 01007663 bgeu zero,a6,14 - c: 0025 c.nop 9 + c: 0029 c.nop 10 e: 0000 unimp 10: 1004 addi s1,sp,32 12: 7205 lui tp,0xfffe1 @@ -344,3 +348,5 @@ Disassembly of section .riscv.attributes: 26: 3266 fld ft4,120(sp) 28: 3070 fld fa2,224(s0) 2a: 645f 7032 0030 0x307032645f + 30: 0108 addi a0,sp,128 + 32: 0b0a slli s6,s6,0x2 diff --git a/sw/banshee/tests/dump/matmul_ssr.dump b/sw/banshee/tests/dump/matmul_ssr.dump index 2ca8d6ac..46605164 100644 --- a/sw/banshee/tests/dump/matmul_ssr.dump +++ b/sw/banshee/tests/dump/matmul_ssr.dump @@ -74,7 +74,7 @@ Disassembly of section .text: 80010000 <_start>: 80010000: 00001197 auipc gp,0x1 -80010004: c5018193 addi gp,gp,-944 # 80010c50 <__global_pointer$> +80010004: c6018193 addi gp,gp,-928 # 80010c60 <__global_pointer$> 80010008: 0040006f j 8001000c 8001000c : @@ -82,299 +82,302 @@ Disassembly of section .text: 80010010: ffc10113 addi sp,sp,-4 # 40000008 80010014: 00012103 lw sp,0(sp) 80010018: f1402573 csrr a0,mhartid -8001001c: 00351293 slli t0,a0,0x3 -80010020: 40510133 sub sp,sp,t0 -80010024: 00629293 slli t0,t0,0x6 -80010028: 40510133 sub sp,sp,t0 -8001002c: 00010213 mv tp,sp -80010030: 00100293 li t0,1 -80010034: 00929293 slli t0,t0,0x9 -80010038: 40520233 sub tp,tp,t0 -8001003c: bfff0297 auipc t0,0xbfff0 -80010040: fd428293 addi t0,t0,-44 # 40000010 -80010044: 0002a583 lw a1,0(t0) -80010048: 174000ef jal ra,800101bc
-8001004c: 00151513 slli a0,a0,0x1 -80010050: 00156513 ori a0,a0,1 -80010054: 0100006f j 80010064 +8001001c: bfff0297 auipc t0,0xbfff0 +80010020: 0242a283 lw t0,36(t0) # 40000040 +80010024: 40550533 sub a0,a0,t0 +80010028: 00351293 slli t0,a0,0x3 +8001002c: 40510133 sub sp,sp,t0 +80010030: 00629293 slli t0,t0,0x6 +80010034: 40510133 sub sp,sp,t0 +80010038: 00010213 mv tp,sp +8001003c: 00100293 li t0,1 +80010040: 00929293 slli t0,t0,0x9 +80010044: 40520233 sub tp,tp,t0 +80010048: bfff0297 auipc t0,0xbfff0 +8001004c: fc828293 addi t0,t0,-56 # 40000010 +80010050: 0002a583 lw a1,0(t0) +80010054: 174000ef jal ra,800101c8
+80010058: 00151513 slli a0,a0,0x1 +8001005c: 00156513 ori a0,a0,1 +80010060: 0100006f j 80010070 -80010058 : -80010058: fff00293 li t0,-1 -8001005c: 00554533 xor a0,a0,t0 -80010060: 0040006f j 80010064 +80010064 : +80010064: fff00293 li t0,-1 +80010068: 00554533 xor a0,a0,t0 +8001006c: 0040006f j 80010070 -80010064 : -80010064: f14022f3 csrr t0,mhartid -80010068: 00029863 bnez t0,80010078 -8001006c: bfff0297 auipc t0,0xbfff0 -80010070: fb428293 addi t0,t0,-76 # 40000020 -80010074: 00a2a023 sw a0,0(t0) +80010070 : +80010070: f14022f3 csrr t0,mhartid +80010074: 00029863 bnez t0,80010084 +80010078: bfff0297 auipc t0,0xbfff0 +8001007c: fa828293 addi t0,t0,-88 # 40000020 +80010080: 00a2a023 sw a0,0(t0) -80010078 : -80010078: 10500073 wfi -8001007c: ffdff06f j 80010078 +80010084 : +80010084: 10500073 wfi +80010088: ffdff06f j 80010084 -80010080 : -80010080: ff010113 addi sp,sp,-16 -80010084: 00050313 mv t1,a0 -80010088: 00068f13 mv t5,a3 -8001008c: 00812623 sw s0,12(sp) -80010090: 00070513 mv a0,a4 -80010094: 00078e93 mv t4,a5 -80010098: 00088693 mv a3,a7 -8001009c: fff60893 addi a7,a2,-1 -800100a0: 00381813 slli a6,a6,0x3 -800100a4: 01089893 slli a7,a7,0x10 -800100a8: 01081813 slli a6,a6,0x10 -800100ac: 0108d893 srli a7,a7,0x10 -800100b0: 01085813 srli a6,a6,0x10 -800100b4: 03180fb3 mul t6,a6,a7 -800100b8: fff58713 addi a4,a1,-1 -800100bc: 002057b7 lui a5,0x205 -800100c0: fff30293 addi t0,t1,-1 -800100c4: 01071713 slli a4,a4,0x10 -800100c8: 8117a823 sw a7,-2032(a5) # 204810 -800100cc: 01075713 srli a4,a4,0x10 -800100d0: 00389393 slli t2,a7,0x3 -800100d4: 01029293 slli t0,t0,0x10 -800100d8: 80e7ac23 sw a4,-2024(a5) -800100dc: 0102d293 srli t0,t0,0x10 -800100e0: 01039393 slli t2,t2,0x10 -800100e4: 00351513 slli a0,a0,0x3 -800100e8: 8257a023 sw t0,-2016(a5) +8001008c : +8001008c: ff010113 addi sp,sp,-16 +80010090: 00068f93 mv t6,a3 +80010094: 01012e03 lw t3,16(sp) +80010098: 00812623 sw s0,12(sp) +8001009c: 00912423 sw s1,8(sp) +800100a0: 00050313 mv t1,a0 +800100a4: 00070693 mv a3,a4 +800100a8: 00078f13 mv t5,a5 +800100ac: fff60e93 addi t4,a2,-1 +800100b0: 00381813 slli a6,a6,0x3 +800100b4: 01081513 slli a0,a6,0x10 +800100b8: 010e9e93 slli t4,t4,0x10 +800100bc: 010ede93 srli t4,t4,0x10 +800100c0: 01055513 srli a0,a0,0x10 +800100c4: 03d502b3 mul t0,a0,t4 +800100c8: fff58713 addi a4,a1,-1 +800100cc: 002057b7 lui a5,0x205 +800100d0: fff30393 addi t2,t1,-1 +800100d4: 01071713 slli a4,a4,0x10 +800100d8: 81d7a823 sw t4,-2032(a5) # 204810 +800100dc: 01075713 srli a4,a4,0x10 +800100e0: 003e9413 slli s0,t4,0x3 +800100e4: 01039393 slli t2,t2,0x10 +800100e8: 80e7ac23 sw a4,-2024(a5) 800100ec: 0103d393 srli t2,t2,0x10 -800100f0: 00800e13 li t3,8 -800100f4: 01051513 slli a0,a0,0x10 -800100f8: 83c7a823 sw t3,-2000(a5) -800100fc: 40700433 neg s0,t2 -80010100: 01055513 srli a0,a0,0x10 -80010104: 8287ac23 sw s0,-1992(a5) -80010108: 40750533 sub a0,a0,t2 -8001010c: 84a7a023 sw a0,-1984(a5) -80010110: 9117a823 sw a7,-1776(a5) -80010114: 010f9513 slli a0,t6,0x10 -80010118: 90e7ac23 sw a4,-1768(a5) -8001011c: 01055513 srli a0,a0,0x10 -80010120: 00371713 slli a4,a4,0x3 -80010124: 00e50733 add a4,a0,a4 -80010128: 9257a023 sw t0,-1760(a5) -8001012c: 01071713 slli a4,a4,0x10 -80010130: 9307a823 sw a6,-1744(a5) -80010134: 40ae0e33 sub t3,t3,a0 -80010138: 01075713 srli a4,a4,0x10 -8001013c: 93c7ac23 sw t3,-1736(a5) -80010140: 40e00733 neg a4,a4 -80010144: 94e7a023 sw a4,-1728(a5) -80010148: 8de7a823 sw t5,-1840(a5) -8001014c: 9dd7a823 sw t4,-1584(a5) -80010150: 7c00e073 csrsi 0x7c0,1 -80010154: 04030c63 beqz t1,800101ac -80010158: 04058a63 beqz a1,800101ac -8001015c: 01012803 lw a6,16(sp) -80010160: 00359793 slli a5,a1,0x3 -80010164: 40b005b3 neg a1,a1 -80010168: 00359513 slli a0,a1,0x3 -8001016c: 00381813 slli a6,a6,0x3 -80010170: 00f686b3 add a3,a3,a5 -80010174: 00000593 li a1,0 -80010178: 00a68733 add a4,a3,a0 -8001017c: 00073787 fld fa5,0(a4) -80010180: 00060a63 beqz a2,80010194 -80010184: 00000793 li a5,0 -80010188: 7a1077c3 fmadd.d fa5,ft0,ft1,fa5 -8001018c: 00178793 addi a5,a5,1 -80010190: fef61ce3 bne a2,a5,80010188 -80010194: 00f73027 fsd fa5,0(a4) -80010198: 00870713 addi a4,a4,8 -8001019c: fee690e3 bne a3,a4,8001017c -800101a0: 00158593 addi a1,a1,1 -800101a4: 010686b3 add a3,a3,a6 -800101a8: fcb318e3 bne t1,a1,80010178 -800101ac: 7c00f073 csrci 0x7c0,1 -800101b0: 00c12403 lw s0,12(sp) -800101b4: 01010113 addi sp,sp,16 -800101b8: 00008067 ret +800100f0: 01041413 slli s0,s0,0x10 +800100f4: 00369693 slli a3,a3,0x3 +800100f8: 8277a023 sw t2,-2016(a5) +800100fc: 01045413 srli s0,s0,0x10 +80010100: 00800813 li a6,8 +80010104: 01069693 slli a3,a3,0x10 +80010108: 8307a823 sw a6,-2000(a5) +8001010c: 408004b3 neg s1,s0 +80010110: 0106d693 srli a3,a3,0x10 +80010114: 8297ac23 sw s1,-1992(a5) +80010118: 408686b3 sub a3,a3,s0 +8001011c: 84d7a023 sw a3,-1984(a5) +80010120: 91d7a823 sw t4,-1776(a5) +80010124: 01029693 slli a3,t0,0x10 +80010128: 90e7ac23 sw a4,-1768(a5) +8001012c: 0106d693 srli a3,a3,0x10 +80010130: 00371713 slli a4,a4,0x3 +80010134: 00e68733 add a4,a3,a4 +80010138: 9277a023 sw t2,-1760(a5) +8001013c: 01071713 slli a4,a4,0x10 +80010140: 92a7a823 sw a0,-1744(a5) +80010144: 40d80833 sub a6,a6,a3 +80010148: 01075713 srli a4,a4,0x10 +8001014c: 9307ac23 sw a6,-1736(a5) +80010150: 40e00733 neg a4,a4 +80010154: 94e7a023 sw a4,-1728(a5) +80010158: 8df7a823 sw t6,-1840(a5) +8001015c: 9de7a823 sw t5,-1584(a5) +80010160: 7c00e073 csrsi 0x7c0,1 +80010164: 04030863 beqz t1,800101b4 +80010168: 00000813 li a6,0 +8001016c: 00000513 li a0,0 +80010170: 04058263 beqz a1,800101b4 +80010174: 00381713 slli a4,a6,0x3 +80010178: 00e88733 add a4,a7,a4 +8001017c: 00000693 li a3,0 +80010180: 00073787 fld fa5,0(a4) +80010184: 00060a63 beqz a2,80010198 +80010188: 00000793 li a5,0 +8001018c: 7a1077c3 fmadd.d fa5,ft0,ft1,fa5 +80010190: 00178793 addi a5,a5,1 +80010194: fef61ce3 bne a2,a5,8001018c +80010198: 00f73027 fsd fa5,0(a4) +8001019c: 00168693 addi a3,a3,1 +800101a0: 00870713 addi a4,a4,8 +800101a4: fcd59ee3 bne a1,a3,80010180 +800101a8: 00150513 addi a0,a0,1 +800101ac: 01c80833 add a6,a6,t3 +800101b0: fca312e3 bne t1,a0,80010174 +800101b4: 7c00f073 csrci 0x7c0,1 +800101b8: 00c12403 lw s0,12(sp) +800101bc: 00812483 lw s1,8(sp) +800101c0: 01010113 addi sp,sp,16 +800101c4: 00008067 ret Disassembly of section .text.startup: -800101bc
: -800101bc: fd010113 addi sp,sp,-48 -800101c0: 03212023 sw s2,32(sp) -800101c4: 00000913 li s2,0 -800101c8: 00092603 lw a2,0(s2) -800101cc: 02812423 sw s0,40(sp) -800101d0: 02112623 sw ra,44(sp) -800101d4: 02912223 sw s1,36(sp) -800101d8: 01312e23 sw s3,28(sp) -800101dc: 00050413 mv s0,a0 -800101e0: 00058e93 mv t4,a1 -800101e4: 00c5e463 bltu a1,a2,800101ec -800101e8: 00060e93 mv t4,a2 -800101ec: 00160813 addi a6,a2,1 -800101f0: 03060733 mul a4,a2,a6 -800101f4: 09000693 li a3,144 -800101f8: 00170713 addi a4,a4,1 -800101fc: 00371713 slli a4,a4,0x3 -80010200: 00e687b3 add a5,a3,a4 -80010204: 00e784b3 add s1,a5,a4 -80010208: 0a041c63 bnez s0,800102c0 -8001020c: 06060663 beqz a2,80010278 -80010210: 00381f13 slli t5,a6,0x3 -80010214: 00068e13 mv t3,a3 -80010218: 00080893 mv a7,a6 -8001021c: 00000313 li t1,0 -80010220: 00100513 li a0,1 -80010224: 00000717 auipc a4,0x0 -80010228: 22c73707 fld fa4,556(a4) # 80010450 -8001022c: 000e0593 mv a1,t3 -80010230: 00050713 mv a4,a0 -80010234: d21707d3 fcvt.d.wu fa5,a4 -80010238: 00858593 addi a1,a1,8 -8001023c: 00170713 addi a4,a4,1 -80010240: 12e7f7d3 fmul.d fa5,fa5,fa4 -80010244: fef5bc27 fsd fa5,-8(a1) -80010248: ff1716e3 bne a4,a7,80010234 -8001024c: 00130313 addi t1,t1,1 -80010250: 00c50533 add a0,a0,a2 -80010254: 00c708b3 add a7,a4,a2 -80010258: 01ee0e33 add t3,t3,t5 -8001025c: fc6618e3 bne a2,t1,8001022c -80010260: 00100713 li a4,1 -80010264: 03d77733 remu a4,a4,t4 -80010268: 18070063 beqz a4,800103e8 -8001026c: 00200713 li a4,2 +800101c8
: +800101c8: fd010113 addi sp,sp,-48 +800101cc: 03212023 sw s2,32(sp) +800101d0: 00000913 li s2,0 +800101d4: 00092603 lw a2,0(s2) +800101d8: 02812423 sw s0,40(sp) +800101dc: 02112623 sw ra,44(sp) +800101e0: 02912223 sw s1,36(sp) +800101e4: 01312e23 sw s3,28(sp) +800101e8: 00050413 mv s0,a0 +800101ec: 00058e93 mv t4,a1 +800101f0: 00c5e463 bltu a1,a2,800101f8 +800101f4: 00060e93 mv t4,a2 +800101f8: 00160813 addi a6,a2,1 +800101fc: 03060733 mul a4,a2,a6 +80010200: 09000693 li a3,144 +80010204: 00170713 addi a4,a4,1 +80010208: 00371713 slli a4,a4,0x3 +8001020c: 00e687b3 add a5,a3,a4 +80010210: 00e784b3 add s1,a5,a4 +80010214: 0a041c63 bnez s0,800102cc +80010218: 06060663 beqz a2,80010284 +8001021c: 00381f13 slli t5,a6,0x3 +80010220: 00068e13 mv t3,a3 +80010224: 00080893 mv a7,a6 +80010228: 00000313 li t1,0 +8001022c: 00100513 li a0,1 +80010230: 00000717 auipc a4,0x0 +80010234: 23073707 fld fa4,560(a4) # 80010460 +80010238: 000e0593 mv a1,t3 +8001023c: 00050713 mv a4,a0 +80010240: d21707d3 fcvt.d.wu fa5,a4 +80010244: 00858593 addi a1,a1,8 +80010248: 00170713 addi a4,a4,1 +8001024c: 12e7f7d3 fmul.d fa5,fa5,fa4 +80010250: fef5bc27 fsd fa5,-8(a1) +80010254: ff1716e3 bne a4,a7,80010240 +80010258: 00130313 addi t1,t1,1 +8001025c: 00c50533 add a0,a0,a2 +80010260: 00c708b3 add a7,a4,a2 +80010264: 01ee0e33 add t3,t3,t5 +80010268: fc6618e3 bne a2,t1,80010238 +8001026c: 00100713 li a4,1 80010270: 03d77733 remu a4,a4,t4 -80010274: 06070863 beqz a4,800102e4 -80010278: bfff0997 auipc s3,0xbfff0 -8001027c: dc098993 addi s3,s3,-576 # 40000038 -80010280: 0009a703 lw a4,0(s3) -80010284: 00070013 mv zero,a4 -80010288: b0002773 csrr a4,mcycle -8001028c: 13d46463 bltu s0,t4,800103b4 -80010290: b00027f3 csrr a5,mcycle -80010294: 0009a983 lw s3,0(s3) -80010298: 00098013 mv zero,s3 -8001029c: 00000513 li a0,0 -800102a0: 0a040063 beqz s0,80010340 -800102a4: 02c12083 lw ra,44(sp) -800102a8: 02812403 lw s0,40(sp) -800102ac: 02412483 lw s1,36(sp) -800102b0: 02012903 lw s2,32(sp) -800102b4: 01c12983 lw s3,28(sp) -800102b8: 03010113 addi sp,sp,48 -800102bc: 00008067 ret -800102c0: 00100713 li a4,1 -800102c4: 03d77733 remu a4,a4,t4 -800102c8: 10870a63 beq a4,s0,800103dc -800102cc: 00200713 li a4,2 +80010274: 18070063 beqz a4,800103f4 +80010278: 00200713 li a4,2 +8001027c: 03d77733 remu a4,a4,t4 +80010280: 06070863 beqz a4,800102f0 +80010284: bfff0997 auipc s3,0xbfff0 +80010288: db498993 addi s3,s3,-588 # 40000038 +8001028c: 0009a703 lw a4,0(s3) +80010290: 00070013 mv zero,a4 +80010294: b0002773 csrr a4,mcycle +80010298: 13d46463 bltu s0,t4,800103c0 +8001029c: b00027f3 csrr a5,mcycle +800102a0: 0009a983 lw s3,0(s3) +800102a4: 00098013 mv zero,s3 +800102a8: 00000513 li a0,0 +800102ac: 0a040063 beqz s0,8001034c +800102b0: 02c12083 lw ra,44(sp) +800102b4: 02812403 lw s0,40(sp) +800102b8: 02412483 lw s1,36(sp) +800102bc: 02012903 lw s2,32(sp) +800102c0: 01c12983 lw s3,28(sp) +800102c4: 03010113 addi sp,sp,48 +800102c8: 00008067 ret +800102cc: 00100713 li a4,1 800102d0: 03d77733 remu a4,a4,t4 -800102d4: fa8712e3 bne a4,s0,80010278 -800102d8: fa0600e3 beqz a2,80010278 -800102dc: 00000717 auipc a4,0x0 -800102e0: 17473707 fld fa4,372(a4) # 80010450 -800102e4: 00000e13 li t3,0 -800102e8: 00300313 li t1,3 -800102ec: 00000513 li a0,0 -800102f0: fff60f13 addi t5,a2,-1 -800102f4: 003e1893 slli a7,t3,0x3 -800102f8: 011488b3 add a7,s1,a7 -800102fc: 00000593 li a1,0 -80010300: 00658733 add a4,a1,t1 -80010304: d21707d3 fcvt.d.wu fa5,a4 -80010308: 00888893 addi a7,a7,8 -8001030c: 00158593 addi a1,a1,1 -80010310: 12e7f7d3 fmul.d fa5,fa5,fa4 -80010314: fef8bc27 fsd fa5,-8(a7) -80010318: fec5e4e3 bltu a1,a2,80010300 -8001031c: 00000713 li a4,0 -80010320: 00060463 beqz a2,80010328 -80010324: 000f0713 mv a4,t5 -80010328: 00170713 addi a4,a4,1 -8001032c: 00150513 addi a0,a0,1 -80010330: 00e30333 add t1,t1,a4 -80010334: 010e0e33 add t3,t3,a6 -80010338: fac56ee3 bltu a0,a2,800102f4 -8001033c: f3dff06f j 80010278 -80010340: 00092583 lw a1,0(s2) -80010344: 06058463 beqz a1,800103ac -80010348: ff800813 li a6,-8 -8001034c: 03058833 mul a6,a1,a6 -80010350: 00158513 addi a0,a1,1 -80010354: 00351513 slli a0,a0,0x3 -80010358: ff850713 addi a4,a0,-8 -8001035c: 00800613 li a2,8 -80010360: 00e48733 add a4,s1,a4 -80010364: 00000693 li a3,0 -80010368: 00000797 auipc a5,0x0 -8001036c: 0f07b687 fld fa3,240(a5) # 80010458 -80010370: d20007d3 fcvt.d.w fa5,zero -80010374: 00e807b3 add a5,a6,a4 -80010378: 0007b707 fld fa4,0(a5) -8001037c: 00878793 addi a5,a5,8 -80010380: 02e7f7d3 fadd.d fa5,fa5,fa4 -80010384: fef71ae3 bne a4,a5,80010378 -80010388: 00063707 fld fa4,0(a2) -8001038c: 0ae7f7d3 fsub.d fa5,fa5,fa4 -80010390: 22f7a7d3 fabs.d fa5,fa5 -80010394: a2f697d3 flt.d a5,fa3,fa5 -80010398: 00168693 addi a3,a3,1 -8001039c: 00860613 addi a2,a2,8 -800103a0: 00f40433 add s0,s0,a5 -800103a4: 00a70733 add a4,a4,a0 -800103a8: fcd594e3 bne a1,a3,80010370 -800103ac: 00040513 mv a0,s0 -800103b0: ef5ff06f j 800102a4 -800103b4: 03d65533 divu a0,a2,t4 -800103b8: 00060593 mv a1,a2 -800103bc: 02880333 mul t1,a6,s0 -800103c0: 03d80733 mul a4,a6,t4 -800103c4: 00331313 slli t1,t1,0x3 -800103c8: 006488b3 add a7,s1,t1 -800103cc: 006686b3 add a3,a3,t1 -800103d0: 00e12023 sw a4,0(sp) -800103d4: cadff0ef jal ra,80010080 -800103d8: eb9ff06f j 80010290 -800103dc: e8060ee3 beqz a2,80010278 -800103e0: 00000717 auipc a4,0x0 -800103e4: 07073707 fld fa4,112(a4) # 80010450 -800103e8: 00000e13 li t3,0 -800103ec: 00200313 li t1,2 -800103f0: 00000513 li a0,0 -800103f4: fff60f13 addi t5,a2,-1 -800103f8: 003e1893 slli a7,t3,0x3 -800103fc: 011788b3 add a7,a5,a7 -80010400: 00000593 li a1,0 -80010404: 00658733 add a4,a1,t1 -80010408: d21707d3 fcvt.d.wu fa5,a4 -8001040c: 00888893 addi a7,a7,8 -80010410: 00158593 addi a1,a1,1 -80010414: 12e7f7d3 fmul.d fa5,fa5,fa4 -80010418: fef8bc27 fsd fa5,-8(a7) -8001041c: fec5e4e3 bltu a1,a2,80010404 -80010420: 00000713 li a4,0 -80010424: 00060463 beqz a2,8001042c -80010428: 000f0713 mv a4,t5 -8001042c: 00170713 addi a4,a4,1 -80010430: 00150513 addi a0,a0,1 -80010434: 00e30333 add t1,t1,a4 -80010438: 010e0e33 add t3,t3,a6 -8001043c: fac56ee3 bltu a0,a2,800103f8 -80010440: 00200713 li a4,2 -80010444: 03d77733 remu a4,a4,t4 -80010448: e2e418e3 bne s0,a4,80010278 -8001044c: e99ff06f j 800102e4 +800102d4: 10870a63 beq a4,s0,800103e8 +800102d8: 00200713 li a4,2 +800102dc: 03d77733 remu a4,a4,t4 +800102e0: fa8712e3 bne a4,s0,80010284 +800102e4: fa0600e3 beqz a2,80010284 +800102e8: 00000717 auipc a4,0x0 +800102ec: 17873707 fld fa4,376(a4) # 80010460 +800102f0: 00000e13 li t3,0 +800102f4: 00300313 li t1,3 +800102f8: 00000513 li a0,0 +800102fc: fff60f13 addi t5,a2,-1 +80010300: 003e1593 slli a1,t3,0x3 +80010304: 00b485b3 add a1,s1,a1 +80010308: 00000713 li a4,0 +8001030c: 006708b3 add a7,a4,t1 +80010310: d21887d3 fcvt.d.wu fa5,a7 +80010314: 00858593 addi a1,a1,8 +80010318: 00170713 addi a4,a4,1 +8001031c: 12e7f7d3 fmul.d fa5,fa5,fa4 +80010320: fef5bc27 fsd fa5,-8(a1) +80010324: fec764e3 bltu a4,a2,8001030c +80010328: 00000713 li a4,0 +8001032c: 00060463 beqz a2,80010334 +80010330: 000f0713 mv a4,t5 +80010334: 00170713 addi a4,a4,1 +80010338: 00150513 addi a0,a0,1 +8001033c: 00e30333 add t1,t1,a4 +80010340: 010e0e33 add t3,t3,a6 +80010344: fac56ee3 bltu a0,a2,80010300 +80010348: f3dff06f j 80010284 +8001034c: 00092583 lw a1,0(s2) +80010350: 06058463 beqz a1,800103b8 +80010354: ff800813 li a6,-8 +80010358: 03058833 mul a6,a1,a6 +8001035c: 00158513 addi a0,a1,1 +80010360: 00351513 slli a0,a0,0x3 +80010364: ff850713 addi a4,a0,-8 +80010368: 00800613 li a2,8 +8001036c: 00e48733 add a4,s1,a4 +80010370: 00000693 li a3,0 +80010374: 00000797 auipc a5,0x0 +80010378: 0f47b687 fld fa3,244(a5) # 80010468 +8001037c: d20007d3 fcvt.d.w fa5,zero +80010380: 00e807b3 add a5,a6,a4 +80010384: 0007b707 fld fa4,0(a5) +80010388: 00878793 addi a5,a5,8 +8001038c: 02e7f7d3 fadd.d fa5,fa5,fa4 +80010390: fef71ae3 bne a4,a5,80010384 +80010394: 00063707 fld fa4,0(a2) +80010398: 0ae7f7d3 fsub.d fa5,fa5,fa4 +8001039c: 22f7a7d3 fabs.d fa5,fa5 +800103a0: a2f697d3 flt.d a5,fa3,fa5 +800103a4: 00168693 addi a3,a3,1 +800103a8: 00860613 addi a2,a2,8 +800103ac: 00f40433 add s0,s0,a5 +800103b0: 00a70733 add a4,a4,a0 +800103b4: fcd594e3 bne a1,a3,8001037c +800103b8: 00040513 mv a0,s0 +800103bc: ef5ff06f j 800102b0 +800103c0: 03d65533 divu a0,a2,t4 +800103c4: 00060593 mv a1,a2 +800103c8: 02880333 mul t1,a6,s0 +800103cc: 03d80733 mul a4,a6,t4 +800103d0: 00331313 slli t1,t1,0x3 +800103d4: 006488b3 add a7,s1,t1 +800103d8: 006686b3 add a3,a3,t1 +800103dc: 00e12023 sw a4,0(sp) +800103e0: cadff0ef jal ra,8001008c +800103e4: eb9ff06f j 8001029c +800103e8: e8060ee3 beqz a2,80010284 +800103ec: 00000717 auipc a4,0x0 +800103f0: 07473707 fld fa4,116(a4) # 80010460 +800103f4: 00000e13 li t3,0 +800103f8: 00200313 li t1,2 +800103fc: 00000513 li a0,0 +80010400: fff60f13 addi t5,a2,-1 +80010404: 003e1593 slli a1,t3,0x3 +80010408: 00b785b3 add a1,a5,a1 +8001040c: 00000713 li a4,0 +80010410: 006708b3 add a7,a4,t1 +80010414: d21887d3 fcvt.d.wu fa5,a7 +80010418: 00858593 addi a1,a1,8 +8001041c: 00170713 addi a4,a4,1 +80010420: 12e7f7d3 fmul.d fa5,fa5,fa4 +80010424: fef5bc27 fsd fa5,-8(a1) +80010428: fec764e3 bltu a4,a2,80010410 +8001042c: 00000713 li a4,0 +80010430: 00060463 beqz a2,80010438 +80010434: 000f0713 mv a4,t5 +80010438: 00170713 addi a4,a4,1 +8001043c: 00150513 addi a0,a0,1 +80010440: 00e30333 add t1,t1,a4 +80010444: 010e0e33 add t3,t3,a6 +80010448: fac56ee3 bltu a0,a2,80010404 +8001044c: 00200713 li a4,2 +80010450: 03d77733 remu a4,a4,t4 +80010454: e2e418e3 bne s0,a4,80010284 +80010458: e99ff06f j 800102f0 Disassembly of section .sdata: -80010450 <__bss_end-0x10>: -80010450: e354 fsw fa3,4(a4) -80010452: 9ba5 andi a5,a5,-23 -80010454: 20c4 fld fs1,128(s1) -80010456: 4009 c.li zero,2 -80010458: a9fc fsd fa5,208(a1) -8001045a: d2f1 beqz a3,8001041e -8001045c: 624d lui tp,0x13 -8001045e: 3f50 fld fa2,184(a4) +80010460 <__bss_end-0x10>: +80010460: e354 fsw fa3,4(a4) +80010462: 9ba5 andi a5,a5,-23 +80010464: 20c4 fld fs1,128(s1) +80010466: 4009 c.li zero,2 +80010468: a9fc fsd fa5,208(a1) +8001046a: d2f1 beqz a3,8001042e +8001046c: 624d lui tp,0x13 +8001046e: 3f50 fld fa2,184(a4) Disassembly of section .comment: @@ -382,20 +385,20 @@ Disassembly of section .comment: 0: 3a434347 fmsub.d ft6,ft6,ft4,ft7,rmm 4: 2820 fld fs0,80(s0) 6: 29554e47 fmsub.s ft8,fa0,fs5,ft5,rmm - a: 3920 fld fs0,112(a0) - c: 322e fld ft4,232(sp) - e: 302e fld ft0,232(sp) - ... + a: 3120 fld fs0,96(a0) + c: 2e30 fld fa2,88(a2) + e: 2e32 fld ft8,264(sp) + 10: 0030 addi a2,sp,8 Disassembly of section .riscv.attributes: 00000000 <.riscv.attributes>: - 0: 2f41 jal 790 + 0: 3341 jal fffffd80 2: 0000 unimp 4: 7200 flw fs0,32(a2) 6: 7369 lui t1,0xffffa 8: 01007663 bgeu zero,a6,14 - c: 0025 c.nop 9 + c: 0029 c.nop 10 e: 0000 unimp 10: 1004 addi s1,sp,32 12: 7205 lui tp,0xfffe1 @@ -409,3 +412,5 @@ Disassembly of section .riscv.attributes: 26: 3266 fld ft4,120(sp) 28: 3070 fld fa2,224(s0) 2a: 645f 7032 0030 0x307032645f + 30: 0108 addi a0,sp,128 + 32: 0b0a slli s6,s6,0x2 diff --git a/sw/banshee/tests/dump/matmul_ssr_frep.dump b/sw/banshee/tests/dump/matmul_ssr_frep.dump index 31b4d92f..f2c296ae 100644 --- a/sw/banshee/tests/dump/matmul_ssr_frep.dump +++ b/sw/banshee/tests/dump/matmul_ssr_frep.dump @@ -82,302 +82,301 @@ Disassembly of section .text: 80010010: ffc10113 addi sp,sp,-4 # 40000008 80010014: 00012103 lw sp,0(sp) 80010018: f1402573 csrr a0,mhartid -8001001c: 00351293 slli t0,a0,0x3 -80010020: 40510133 sub sp,sp,t0 -80010024: 00629293 slli t0,t0,0x6 -80010028: 40510133 sub sp,sp,t0 -8001002c: 00010213 mv tp,sp -80010030: 00100293 li t0,1 -80010034: 00929293 slli t0,t0,0x9 -80010038: 40520233 sub tp,tp,t0 -8001003c: bfff0297 auipc t0,0xbfff0 -80010040: fd428293 addi t0,t0,-44 # 40000010 -80010044: 0002a583 lw a1,0(t0) -80010048: 1b0000ef jal ra,800101f8
-8001004c: 00151513 slli a0,a0,0x1 -80010050: 00156513 ori a0,a0,1 -80010054: 0100006f j 80010064 +8001001c: bfff0297 auipc t0,0xbfff0 +80010020: 0242a283 lw t0,36(t0) # 40000040 +80010024: 40550533 sub a0,a0,t0 +80010028: 00351293 slli t0,a0,0x3 +8001002c: 40510133 sub sp,sp,t0 +80010030: 00629293 slli t0,t0,0x6 +80010034: 40510133 sub sp,sp,t0 +80010038: 00010213 mv tp,sp +8001003c: 00100293 li t0,1 +80010040: 00929293 slli t0,t0,0x9 +80010044: 40520233 sub tp,tp,t0 +80010048: bfff0297 auipc t0,0xbfff0 +8001004c: fc828293 addi t0,t0,-56 # 40000010 +80010050: 0002a583 lw a1,0(t0) +80010054: 1a0000ef jal ra,800101f4
+80010058: 00151513 slli a0,a0,0x1 +8001005c: 00156513 ori a0,a0,1 +80010060: 0100006f j 80010070 -80010058 : -80010058: fff00293 li t0,-1 -8001005c: 00554533 xor a0,a0,t0 -80010060: 0040006f j 80010064 +80010064 : +80010064: fff00293 li t0,-1 +80010068: 00554533 xor a0,a0,t0 +8001006c: 0040006f j 80010070 -80010064 : -80010064: f14022f3 csrr t0,mhartid -80010068: 00029863 bnez t0,80010078 -8001006c: bfff0297 auipc t0,0xbfff0 -80010070: fb428293 addi t0,t0,-76 # 40000020 -80010074: 00a2a023 sw a0,0(t0) +80010070 : +80010070: f14022f3 csrr t0,mhartid +80010074: 00029863 bnez t0,80010084 +80010078: bfff0297 auipc t0,0xbfff0 +8001007c: fa828293 addi t0,t0,-88 # 40000020 +80010080: 00a2a023 sw a0,0(t0) -80010078 : -80010078: 10500073 wfi -8001007c: ffdff06f j 80010078 +80010084 : +80010084: 10500073 wfi +80010088: ffdff06f j 80010084 -80010080 : -80010080: ff010113 addi sp,sp,-16 -80010084: 01012e03 lw t3,16(sp) -80010088: 00812623 sw s0,12(sp) -8001008c: 00912423 sw s1,8(sp) -80010090: fff60293 addi t0,a2,-1 -80010094: 00381813 slli a6,a6,0x3 -80010098: 01029f93 slli t6,t0,0x10 -8001009c: 01081313 slli t1,a6,0x10 -800100a0: 010fdf93 srli t6,t6,0x10 -800100a4: 01035313 srli t1,t1,0x10 -800100a8: 03f30eb3 mul t4,t1,t6 -800100ac: 0025d813 srli a6,a1,0x2 -800100b0: fff80813 addi a6,a6,-1 -800100b4: 00205637 lui a2,0x205 -800100b8: fff50f13 addi t5,a0,-1 -800100bc: 01081813 slli a6,a6,0x10 -800100c0: 81f62823 sw t6,-2032(a2) # 204810 -800100c4: 01085813 srli a6,a6,0x10 -800100c8: 003f9393 slli t2,t6,0x3 -800100cc: 010f1f13 slli t5,t5,0x10 -800100d0: 81062c23 sw a6,-2024(a2) -800100d4: 010f5f13 srli t5,t5,0x10 -800100d8: 01039393 slli t2,t2,0x10 -800100dc: 00371713 slli a4,a4,0x3 -800100e0: 83e62023 sw t5,-2016(a2) -800100e4: 00800413 li s0,8 -800100e8: 0103d393 srli t2,t2,0x10 -800100ec: 01071713 slli a4,a4,0x10 -800100f0: 82862823 sw s0,-2000(a2) -800100f4: 407004b3 neg s1,t2 -800100f8: 01075713 srli a4,a4,0x10 -800100fc: 82962c23 sw s1,-1992(a2) -80010100: 40770733 sub a4,a4,t2 -80010104: 84e62023 sw a4,-1984(a2) -80010108: 00300713 li a4,3 -8001010c: 80e62423 sw a4,-2040(a2) -80010110: 90e62823 sw a4,-1776(a2) -80010114: 018e8e93 addi t4,t4,24 -80010118: 91f62c23 sw t6,-1768(a2) -8001011c: 010e9e93 slli t4,t4,0x10 -80010120: 93062023 sw a6,-1760(a2) -80010124: 010ede93 srli t4,t4,0x10 -80010128: 00581713 slli a4,a6,0x5 -8001012c: 93e62423 sw t5,-1752(a2) -80010130: 00ee8733 add a4,t4,a4 -80010134: 92862823 sw s0,-1744(a2) -80010138: fe830313 addi t1,t1,-24 -8001013c: 02000813 li a6,32 -80010140: 01071713 slli a4,a4,0x10 -80010144: 92662c23 sw t1,-1736(a2) -80010148: 41d80eb3 sub t4,a6,t4 -8001014c: 01075713 srli a4,a4,0x10 -80010150: 95d62023 sw t4,-1728(a2) -80010154: 40e00733 neg a4,a4 -80010158: 94e62423 sw a4,-1720(a2) -8001015c: 8cd62823 sw a3,-1840(a2) -80010160: 9cf62c23 sw a5,-1576(a2) -80010164: 7c00e073 csrsi 0x7c0,1 -80010168: 06050c63 beqz a0,800101e0 -8001016c: 06058a63 beqz a1,800101e0 -80010170: fff58593 addi a1,a1,-1 -80010174: ffc5f593 andi a1,a1,-4 -80010178: 00000693 li a3,0 -8001017c: 00000613 li a2,0 -80010180: 02088813 addi a6,a7,32 -80010184: 00b68733 add a4,a3,a1 -80010188: 00369793 slli a5,a3,0x3 -8001018c: 00371713 slli a4,a4,0x3 -80010190: 011787b3 add a5,a5,a7 -80010194: 00e80733 add a4,a6,a4 -80010198: 0007b607 fld fa2,0(a5) -8001019c: 0087b687 fld fa3,8(a5) -800101a0: 0107b707 fld fa4,16(a5) -800101a4: 0187b787 fld fa5,24(a5) -800101a8: 0032808b 0x32808b -800101ac: 62107643 fmadd.d fa2,ft0,ft1,fa2 -800101b0: 6a1076c3 fmadd.d fa3,ft0,ft1,fa3 -800101b4: 72107743 fmadd.d fa4,ft0,ft1,fa4 -800101b8: 7a1077c3 fmadd.d fa5,ft0,ft1,fa5 -800101bc: 00c7b027 fsd fa2,0(a5) -800101c0: 00d7b427 fsd fa3,8(a5) -800101c4: 00e7b827 fsd fa4,16(a5) -800101c8: 00f7bc27 fsd fa5,24(a5) -800101cc: 02078793 addi a5,a5,32 -800101d0: fce794e3 bne a5,a4,80010198 -800101d4: 00160613 addi a2,a2,1 -800101d8: 01c686b3 add a3,a3,t3 -800101dc: fac514e3 bne a0,a2,80010184 -800101e0: e0050053 fmv.x.w zero,fa0 -800101e4: 7c00f073 csrci 0x7c0,1 -800101e8: 00c12403 lw s0,12(sp) -800101ec: 00812483 lw s1,8(sp) -800101f0: 01010113 addi sp,sp,16 -800101f4: 00008067 ret +8001008c : +8001008c: ff010113 addi sp,sp,-16 +80010090: 01012e03 lw t3,16(sp) +80010094: 00812623 sw s0,12(sp) +80010098: 00912423 sw s1,8(sp) +8001009c: fff60293 addi t0,a2,-1 +800100a0: 00381813 slli a6,a6,0x3 +800100a4: 01029f93 slli t6,t0,0x10 +800100a8: 01081313 slli t1,a6,0x10 +800100ac: 010fdf93 srli t6,t6,0x10 +800100b0: 01035313 srli t1,t1,0x10 +800100b4: 03f30eb3 mul t4,t1,t6 +800100b8: 0025d813 srli a6,a1,0x2 +800100bc: fff80813 addi a6,a6,-1 +800100c0: 00205637 lui a2,0x205 +800100c4: fff50f13 addi t5,a0,-1 +800100c8: 01081813 slli a6,a6,0x10 +800100cc: 81f62823 sw t6,-2032(a2) # 204810 +800100d0: 01085813 srli a6,a6,0x10 +800100d4: 003f9393 slli t2,t6,0x3 +800100d8: 010f1f13 slli t5,t5,0x10 +800100dc: 81062c23 sw a6,-2024(a2) +800100e0: 010f5f13 srli t5,t5,0x10 +800100e4: 01039393 slli t2,t2,0x10 +800100e8: 00371713 slli a4,a4,0x3 +800100ec: 83e62023 sw t5,-2016(a2) +800100f0: 00800413 li s0,8 +800100f4: 0103d393 srli t2,t2,0x10 +800100f8: 01071713 slli a4,a4,0x10 +800100fc: 82862823 sw s0,-2000(a2) +80010100: 407004b3 neg s1,t2 +80010104: 01075713 srli a4,a4,0x10 +80010108: 82962c23 sw s1,-1992(a2) +8001010c: 40770733 sub a4,a4,t2 +80010110: 84e62023 sw a4,-1984(a2) +80010114: 00300713 li a4,3 +80010118: 80e62423 sw a4,-2040(a2) +8001011c: 90e62823 sw a4,-1776(a2) +80010120: 018e8e93 addi t4,t4,24 +80010124: 91f62c23 sw t6,-1768(a2) +80010128: 010e9e93 slli t4,t4,0x10 +8001012c: 93062023 sw a6,-1760(a2) +80010130: 010ede93 srli t4,t4,0x10 +80010134: 00581713 slli a4,a6,0x5 +80010138: 93e62423 sw t5,-1752(a2) +8001013c: 00ee8733 add a4,t4,a4 +80010140: 92862823 sw s0,-1744(a2) +80010144: fe830313 addi t1,t1,-24 +80010148: 02000813 li a6,32 +8001014c: 01071713 slli a4,a4,0x10 +80010150: 92662c23 sw t1,-1736(a2) +80010154: 41d80eb3 sub t4,a6,t4 +80010158: 01075713 srli a4,a4,0x10 +8001015c: 95d62023 sw t4,-1728(a2) +80010160: 40e00733 neg a4,a4 +80010164: 94e62423 sw a4,-1720(a2) +80010168: 8cd62823 sw a3,-1840(a2) +8001016c: 9cf62c23 sw a5,-1576(a2) +80010170: 7c00e073 csrsi 0x7c0,1 +80010174: 06050463 beqz a0,800101dc +80010178: 00000613 li a2,0 +8001017c: 00000693 li a3,0 +80010180: 04058e63 beqz a1,800101dc +80010184: 00361793 slli a5,a2,0x3 +80010188: 00f887b3 add a5,a7,a5 +8001018c: 00000713 li a4,0 +80010190: 0007b607 fld fa2,0(a5) +80010194: 0087b687 fld fa3,8(a5) +80010198: 0107b707 fld fa4,16(a5) +8001019c: 0187b787 fld fa5,24(a5) +800101a0: 0032808b 0x32808b +800101a4: 62107643 fmadd.d fa2,ft0,ft1,fa2 +800101a8: 6a1076c3 fmadd.d fa3,ft0,ft1,fa3 +800101ac: 72107743 fmadd.d fa4,ft0,ft1,fa4 +800101b0: 7a1077c3 fmadd.d fa5,ft0,ft1,fa5 +800101b4: 00c7b027 fsd fa2,0(a5) +800101b8: 00d7b427 fsd fa3,8(a5) +800101bc: 00e7b827 fsd fa4,16(a5) +800101c0: 00f7bc27 fsd fa5,24(a5) +800101c4: 00470713 addi a4,a4,4 +800101c8: 02078793 addi a5,a5,32 +800101cc: fcb762e3 bltu a4,a1,80010190 +800101d0: 00168693 addi a3,a3,1 +800101d4: 01c60633 add a2,a2,t3 +800101d8: fad516e3 bne a0,a3,80010184 +800101dc: e0050053 fmv.x.w zero,fa0 +800101e0: 7c00f073 csrci 0x7c0,1 +800101e4: 00c12403 lw s0,12(sp) +800101e8: 00812483 lw s1,8(sp) +800101ec: 01010113 addi sp,sp,16 +800101f0: 00008067 ret Disassembly of section .text.startup: -800101f8
: -800101f8: fd010113 addi sp,sp,-48 -800101fc: 03212023 sw s2,32(sp) -80010200: 00000913 li s2,0 -80010204: 00092603 lw a2,0(s2) -80010208: 02812423 sw s0,40(sp) -8001020c: 02112623 sw ra,44(sp) -80010210: 02912223 sw s1,36(sp) -80010214: 01312e23 sw s3,28(sp) -80010218: 00050413 mv s0,a0 -8001021c: 00058e93 mv t4,a1 -80010220: 00c5e463 bltu a1,a2,80010228 -80010224: 00060e93 mv t4,a2 -80010228: 00160813 addi a6,a2,1 -8001022c: 03060733 mul a4,a2,a6 -80010230: 09000693 li a3,144 -80010234: 00170713 addi a4,a4,1 -80010238: 00371713 slli a4,a4,0x3 -8001023c: 00e687b3 add a5,a3,a4 -80010240: 00e784b3 add s1,a5,a4 -80010244: 0a041c63 bnez s0,800102fc -80010248: 06060663 beqz a2,800102b4 -8001024c: 00381f13 slli t5,a6,0x3 -80010250: 00068e13 mv t3,a3 -80010254: 00080893 mv a7,a6 -80010258: 00000313 li t1,0 -8001025c: 00100513 li a0,1 -80010260: 00000717 auipc a4,0x0 -80010264: 23073707 fld fa4,560(a4) # 80010490 -80010268: 000e0593 mv a1,t3 -8001026c: 00050713 mv a4,a0 -80010270: d21707d3 fcvt.d.wu fa5,a4 -80010274: 00858593 addi a1,a1,8 -80010278: 00170713 addi a4,a4,1 -8001027c: 12e7f7d3 fmul.d fa5,fa5,fa4 -80010280: fef5bc27 fsd fa5,-8(a1) -80010284: ff1716e3 bne a4,a7,80010270 -80010288: 00130313 addi t1,t1,1 -8001028c: 00c50533 add a0,a0,a2 -80010290: 00c708b3 add a7,a4,a2 -80010294: 01ee0e33 add t3,t3,t5 -80010298: fc6618e3 bne a2,t1,80010268 -8001029c: 00100713 li a4,1 -800102a0: 03d77733 remu a4,a4,t4 -800102a4: 18070063 beqz a4,80010424 -800102a8: 00200713 li a4,2 -800102ac: 03d77733 remu a4,a4,t4 -800102b0: 06070863 beqz a4,80010320 -800102b4: bfff0997 auipc s3,0xbfff0 -800102b8: d8498993 addi s3,s3,-636 # 40000038 -800102bc: 0009a703 lw a4,0(s3) -800102c0: 00070013 mv zero,a4 -800102c4: b0002773 csrr a4,mcycle -800102c8: 13d46463 bltu s0,t4,800103f0 -800102cc: b00027f3 csrr a5,mcycle -800102d0: 0009a983 lw s3,0(s3) -800102d4: 00098013 mv zero,s3 -800102d8: 00000513 li a0,0 -800102dc: 0a040063 beqz s0,8001037c -800102e0: 02c12083 lw ra,44(sp) -800102e4: 02812403 lw s0,40(sp) -800102e8: 02412483 lw s1,36(sp) -800102ec: 02012903 lw s2,32(sp) -800102f0: 01c12983 lw s3,28(sp) -800102f4: 03010113 addi sp,sp,48 -800102f8: 00008067 ret -800102fc: 00100713 li a4,1 -80010300: 03d77733 remu a4,a4,t4 -80010304: 10870a63 beq a4,s0,80010418 -80010308: 00200713 li a4,2 -8001030c: 03d77733 remu a4,a4,t4 -80010310: fa8712e3 bne a4,s0,800102b4 -80010314: fa0600e3 beqz a2,800102b4 -80010318: 00000717 auipc a4,0x0 -8001031c: 17873707 fld fa4,376(a4) # 80010490 -80010320: 00000e13 li t3,0 -80010324: 00300313 li t1,3 -80010328: 00000513 li a0,0 -8001032c: fff60f13 addi t5,a2,-1 -80010330: 003e1893 slli a7,t3,0x3 -80010334: 011488b3 add a7,s1,a7 -80010338: 00000593 li a1,0 -8001033c: 00658733 add a4,a1,t1 -80010340: d21707d3 fcvt.d.wu fa5,a4 -80010344: 00888893 addi a7,a7,8 -80010348: 00158593 addi a1,a1,1 -8001034c: 12e7f7d3 fmul.d fa5,fa5,fa4 -80010350: fef8bc27 fsd fa5,-8(a7) -80010354: fec5e4e3 bltu a1,a2,8001033c -80010358: 00000713 li a4,0 -8001035c: 00060463 beqz a2,80010364 -80010360: 000f0713 mv a4,t5 -80010364: 00170713 addi a4,a4,1 -80010368: 00150513 addi a0,a0,1 -8001036c: 00e30333 add t1,t1,a4 -80010370: 010e0e33 add t3,t3,a6 -80010374: fac56ee3 bltu a0,a2,80010330 -80010378: f3dff06f j 800102b4 -8001037c: 00092583 lw a1,0(s2) -80010380: 06058463 beqz a1,800103e8 -80010384: ff800813 li a6,-8 -80010388: 03058833 mul a6,a1,a6 -8001038c: 00158513 addi a0,a1,1 -80010390: 00351513 slli a0,a0,0x3 -80010394: ff850713 addi a4,a0,-8 -80010398: 00800613 li a2,8 -8001039c: 00e48733 add a4,s1,a4 -800103a0: 00000693 li a3,0 -800103a4: 00000797 auipc a5,0x0 -800103a8: 0f47b687 fld fa3,244(a5) # 80010498 -800103ac: d20007d3 fcvt.d.w fa5,zero -800103b0: 00e807b3 add a5,a6,a4 -800103b4: 0007b707 fld fa4,0(a5) -800103b8: 00878793 addi a5,a5,8 -800103bc: 02e7f7d3 fadd.d fa5,fa5,fa4 -800103c0: fef71ae3 bne a4,a5,800103b4 -800103c4: 00063707 fld fa4,0(a2) -800103c8: 0ae7f7d3 fsub.d fa5,fa5,fa4 -800103cc: 22f7a7d3 fabs.d fa5,fa5 -800103d0: a2f697d3 flt.d a5,fa3,fa5 -800103d4: 00168693 addi a3,a3,1 -800103d8: 00860613 addi a2,a2,8 -800103dc: 00f40433 add s0,s0,a5 -800103e0: 00a70733 add a4,a4,a0 -800103e4: fcd594e3 bne a1,a3,800103ac -800103e8: 00040513 mv a0,s0 -800103ec: ef5ff06f j 800102e0 -800103f0: 03d65533 divu a0,a2,t4 -800103f4: 00060593 mv a1,a2 -800103f8: 02880333 mul t1,a6,s0 -800103fc: 03d80733 mul a4,a6,t4 -80010400: 00331313 slli t1,t1,0x3 -80010404: 006488b3 add a7,s1,t1 -80010408: 006686b3 add a3,a3,t1 -8001040c: 00e12023 sw a4,0(sp) -80010410: c71ff0ef jal ra,80010080 -80010414: eb9ff06f j 800102cc -80010418: e8060ee3 beqz a2,800102b4 -8001041c: 00000717 auipc a4,0x0 -80010420: 07473707 fld fa4,116(a4) # 80010490 -80010424: 00000e13 li t3,0 -80010428: 00200313 li t1,2 -8001042c: 00000513 li a0,0 -80010430: fff60f13 addi t5,a2,-1 -80010434: 003e1893 slli a7,t3,0x3 -80010438: 011788b3 add a7,a5,a7 -8001043c: 00000593 li a1,0 -80010440: 00658733 add a4,a1,t1 -80010444: d21707d3 fcvt.d.wu fa5,a4 -80010448: 00888893 addi a7,a7,8 -8001044c: 00158593 addi a1,a1,1 -80010450: 12e7f7d3 fmul.d fa5,fa5,fa4 -80010454: fef8bc27 fsd fa5,-8(a7) -80010458: fec5e4e3 bltu a1,a2,80010440 -8001045c: 00000713 li a4,0 -80010460: 00060463 beqz a2,80010468 -80010464: 000f0713 mv a4,t5 -80010468: 00170713 addi a4,a4,1 -8001046c: 00150513 addi a0,a0,1 -80010470: 00e30333 add t1,t1,a4 -80010474: 010e0e33 add t3,t3,a6 -80010478: fac56ee3 bltu a0,a2,80010434 -8001047c: 00200713 li a4,2 -80010480: 03d77733 remu a4,a4,t4 -80010484: e2e418e3 bne s0,a4,800102b4 -80010488: e99ff06f j 80010320 +800101f4
: +800101f4: fd010113 addi sp,sp,-48 +800101f8: 03212023 sw s2,32(sp) +800101fc: 00000913 li s2,0 +80010200: 00092603 lw a2,0(s2) +80010204: 02812423 sw s0,40(sp) +80010208: 02112623 sw ra,44(sp) +8001020c: 02912223 sw s1,36(sp) +80010210: 01312e23 sw s3,28(sp) +80010214: 00050413 mv s0,a0 +80010218: 00058e93 mv t4,a1 +8001021c: 00c5e463 bltu a1,a2,80010224 +80010220: 00060e93 mv t4,a2 +80010224: 00160813 addi a6,a2,1 +80010228: 03060733 mul a4,a2,a6 +8001022c: 09000693 li a3,144 +80010230: 00170713 addi a4,a4,1 +80010234: 00371713 slli a4,a4,0x3 +80010238: 00e687b3 add a5,a3,a4 +8001023c: 00e784b3 add s1,a5,a4 +80010240: 0a041c63 bnez s0,800102f8 +80010244: 06060663 beqz a2,800102b0 +80010248: 00381f13 slli t5,a6,0x3 +8001024c: 00068e13 mv t3,a3 +80010250: 00080893 mv a7,a6 +80010254: 00000313 li t1,0 +80010258: 00100513 li a0,1 +8001025c: 00000717 auipc a4,0x0 +80010260: 23473707 fld fa4,564(a4) # 80010490 +80010264: 000e0593 mv a1,t3 +80010268: 00050713 mv a4,a0 +8001026c: d21707d3 fcvt.d.wu fa5,a4 +80010270: 00858593 addi a1,a1,8 +80010274: 00170713 addi a4,a4,1 +80010278: 12e7f7d3 fmul.d fa5,fa5,fa4 +8001027c: fef5bc27 fsd fa5,-8(a1) +80010280: ff1716e3 bne a4,a7,8001026c +80010284: 00130313 addi t1,t1,1 +80010288: 00c50533 add a0,a0,a2 +8001028c: 00c708b3 add a7,a4,a2 +80010290: 01ee0e33 add t3,t3,t5 +80010294: fc6618e3 bne a2,t1,80010264 +80010298: 00100713 li a4,1 +8001029c: 03d77733 remu a4,a4,t4 +800102a0: 18070063 beqz a4,80010420 +800102a4: 00200713 li a4,2 +800102a8: 03d77733 remu a4,a4,t4 +800102ac: 06070863 beqz a4,8001031c +800102b0: bfff0997 auipc s3,0xbfff0 +800102b4: d8898993 addi s3,s3,-632 # 40000038 +800102b8: 0009a703 lw a4,0(s3) +800102bc: 00070013 mv zero,a4 +800102c0: b0002773 csrr a4,mcycle +800102c4: 13d46463 bltu s0,t4,800103ec +800102c8: b00027f3 csrr a5,mcycle +800102cc: 0009a983 lw s3,0(s3) +800102d0: 00098013 mv zero,s3 +800102d4: 00000513 li a0,0 +800102d8: 0a040063 beqz s0,80010378 +800102dc: 02c12083 lw ra,44(sp) +800102e0: 02812403 lw s0,40(sp) +800102e4: 02412483 lw s1,36(sp) +800102e8: 02012903 lw s2,32(sp) +800102ec: 01c12983 lw s3,28(sp) +800102f0: 03010113 addi sp,sp,48 +800102f4: 00008067 ret +800102f8: 00100713 li a4,1 +800102fc: 03d77733 remu a4,a4,t4 +80010300: 10870a63 beq a4,s0,80010414 +80010304: 00200713 li a4,2 +80010308: 03d77733 remu a4,a4,t4 +8001030c: fa8712e3 bne a4,s0,800102b0 +80010310: fa0600e3 beqz a2,800102b0 +80010314: 00000717 auipc a4,0x0 +80010318: 17c73707 fld fa4,380(a4) # 80010490 +8001031c: 00000e13 li t3,0 +80010320: 00300313 li t1,3 +80010324: 00000513 li a0,0 +80010328: fff60f13 addi t5,a2,-1 +8001032c: 003e1593 slli a1,t3,0x3 +80010330: 00b485b3 add a1,s1,a1 +80010334: 00000713 li a4,0 +80010338: 006708b3 add a7,a4,t1 +8001033c: d21887d3 fcvt.d.wu fa5,a7 +80010340: 00858593 addi a1,a1,8 +80010344: 00170713 addi a4,a4,1 +80010348: 12e7f7d3 fmul.d fa5,fa5,fa4 +8001034c: fef5bc27 fsd fa5,-8(a1) +80010350: fec764e3 bltu a4,a2,80010338 +80010354: 00000713 li a4,0 +80010358: 00060463 beqz a2,80010360 +8001035c: 000f0713 mv a4,t5 +80010360: 00170713 addi a4,a4,1 +80010364: 00150513 addi a0,a0,1 +80010368: 00e30333 add t1,t1,a4 +8001036c: 010e0e33 add t3,t3,a6 +80010370: fac56ee3 bltu a0,a2,8001032c +80010374: f3dff06f j 800102b0 +80010378: 00092583 lw a1,0(s2) +8001037c: 06058463 beqz a1,800103e4 +80010380: ff800813 li a6,-8 +80010384: 03058833 mul a6,a1,a6 +80010388: 00158513 addi a0,a1,1 +8001038c: 00351513 slli a0,a0,0x3 +80010390: ff850713 addi a4,a0,-8 +80010394: 00800613 li a2,8 +80010398: 00e48733 add a4,s1,a4 +8001039c: 00000693 li a3,0 +800103a0: 00000797 auipc a5,0x0 +800103a4: 0f87b687 fld fa3,248(a5) # 80010498 +800103a8: d20007d3 fcvt.d.w fa5,zero +800103ac: 00e807b3 add a5,a6,a4 +800103b0: 0007b707 fld fa4,0(a5) +800103b4: 00878793 addi a5,a5,8 +800103b8: 02e7f7d3 fadd.d fa5,fa5,fa4 +800103bc: fef71ae3 bne a4,a5,800103b0 +800103c0: 00063707 fld fa4,0(a2) +800103c4: 0ae7f7d3 fsub.d fa5,fa5,fa4 +800103c8: 22f7a7d3 fabs.d fa5,fa5 +800103cc: a2f697d3 flt.d a5,fa3,fa5 +800103d0: 00168693 addi a3,a3,1 +800103d4: 00860613 addi a2,a2,8 +800103d8: 00f40433 add s0,s0,a5 +800103dc: 00a70733 add a4,a4,a0 +800103e0: fcd594e3 bne a1,a3,800103a8 +800103e4: 00040513 mv a0,s0 +800103e8: ef5ff06f j 800102dc +800103ec: 03d65533 divu a0,a2,t4 +800103f0: 00060593 mv a1,a2 +800103f4: 02880333 mul t1,a6,s0 +800103f8: 03d80733 mul a4,a6,t4 +800103fc: 00331313 slli t1,t1,0x3 +80010400: 006488b3 add a7,s1,t1 +80010404: 006686b3 add a3,a3,t1 +80010408: 00e12023 sw a4,0(sp) +8001040c: c81ff0ef jal ra,8001008c +80010410: eb9ff06f j 800102c8 +80010414: e8060ee3 beqz a2,800102b0 +80010418: 00000717 auipc a4,0x0 +8001041c: 07873707 fld fa4,120(a4) # 80010490 +80010420: 00000e13 li t3,0 +80010424: 00200313 li t1,2 +80010428: 00000513 li a0,0 +8001042c: fff60f13 addi t5,a2,-1 +80010430: 003e1593 slli a1,t3,0x3 +80010434: 00b785b3 add a1,a5,a1 +80010438: 00000713 li a4,0 +8001043c: 006708b3 add a7,a4,t1 +80010440: d21887d3 fcvt.d.wu fa5,a7 +80010444: 00858593 addi a1,a1,8 +80010448: 00170713 addi a4,a4,1 +8001044c: 12e7f7d3 fmul.d fa5,fa5,fa4 +80010450: fef5bc27 fsd fa5,-8(a1) +80010454: fec764e3 bltu a4,a2,8001043c +80010458: 00000713 li a4,0 +8001045c: 00060463 beqz a2,80010464 +80010460: 000f0713 mv a4,t5 +80010464: 00170713 addi a4,a4,1 +80010468: 00150513 addi a0,a0,1 +8001046c: 00e30333 add t1,t1,a4 +80010470: 010e0e33 add t3,t3,a6 +80010474: fac56ee3 bltu a0,a2,80010430 +80010478: 00200713 li a4,2 +8001047c: 03d77733 remu a4,a4,t4 +80010480: e2e418e3 bne s0,a4,800102b0 +80010484: e99ff06f j 8001031c Disassembly of section .sdata: @@ -387,7 +386,7 @@ Disassembly of section .sdata: 80010494: 20c4 fld fs1,128(s1) 80010496: 4009 c.li zero,2 80010498: a9fc fsd fa5,208(a1) -8001049a: d2f1 beqz a3,8001045e +8001049a: d2f1 beqz a3,8001045e 8001049c: 624d lui tp,0x13 8001049e: 3f50 fld fa2,184(a4) @@ -397,20 +396,20 @@ Disassembly of section .comment: 0: 3a434347 fmsub.d ft6,ft6,ft4,ft7,rmm 4: 2820 fld fs0,80(s0) 6: 29554e47 fmsub.s ft8,fa0,fs5,ft5,rmm - a: 3920 fld fs0,112(a0) - c: 322e fld ft4,232(sp) - e: 302e fld ft0,232(sp) - ... + a: 3120 fld fs0,96(a0) + c: 2e30 fld fa2,88(a2) + e: 2e32 fld ft8,264(sp) + 10: 0030 addi a2,sp,8 Disassembly of section .riscv.attributes: 00000000 <.riscv.attributes>: - 0: 2f41 jal 790 + 0: 3341 jal fffffd80 2: 0000 unimp 4: 7200 flw fs0,32(a2) 6: 7369 lui t1,0xffffa 8: 01007663 bgeu zero,a6,14 - c: 0025 c.nop 9 + c: 0029 c.nop 10 e: 0000 unimp 10: 1004 addi s1,sp,32 12: 7205 lui tp,0xfffe1 @@ -424,3 +423,5 @@ Disassembly of section .riscv.attributes: 26: 3266 fld ft4,120(sp) 28: 3070 fld fa2,224(s0) 2a: 645f 7032 0030 0x307032645f + 30: 0108 addi a0,sp,128 + 32: 0b0a slli s6,s6,0x2 diff --git a/sw/banshee/tests/dump/multi_cluster.dump b/sw/banshee/tests/dump/multi_cluster.dump index 5ac2efa9..edb406b5 100644 --- a/sw/banshee/tests/dump/multi_cluster.dump +++ b/sw/banshee/tests/dump/multi_cluster.dump @@ -7,7 +7,7 @@ Disassembly of section .text: 80010000 <_start>: 80010000: f1402573 csrr a0,mhartid 80010004: 400005b7 lui a1,0x40000 -80010008: 04058593 addi a1,a1,64 # 40000040 +80010008: 04058593 addi a1,a1,64 # 40000040 8001000c: 0005a583 lw a1,0(a1) 80010010: 00451293 slli t0,a0,0x4 80010014: 20000337 lui t1,0x20000 @@ -19,12 +19,12 @@ Disassembly of section .text: Disassembly of section .riscv.attributes: 00000000 <.riscv.attributes>: - 0: 2d41 jal 690 + 0: 3141 jal fffffc80 2: 0000 unimp 4: 7200 flw fs0,32(a2) 6: 7369 lui t1,0xffffa 8: 01007663 bgeu zero,a6,14 - c: 00000023 sb zero,0(zero) # 0 + c: 00000027 0x27 10: 7205 lui tp,0xfffe1 12: 3376 fld ft6,376(sp) 14: 6932 flw fs2,12(sp) @@ -36,3 +36,5 @@ Disassembly of section .riscv.attributes: 24: 3266 fld ft4,120(sp) 26: 3070 fld fa2,224(s0) 28: 645f 7032 0030 0x307032645f + 2e: 0108 addi a0,sp,128 + 30: 0b0a slli s6,s6,0x2 diff --git a/sw/banshee/tests/dump/multi_core.dump b/sw/banshee/tests/dump/multi_core.dump index f1860cf4..eea97fa7 100644 --- a/sw/banshee/tests/dump/multi_core.dump +++ b/sw/banshee/tests/dump/multi_core.dump @@ -15,12 +15,12 @@ Disassembly of section .text: Disassembly of section .riscv.attributes: 00000000 <.riscv.attributes>: - 0: 2d41 jal 690 + 0: 3141 jal fffffc80 2: 0000 unimp 4: 7200 flw fs0,32(a2) 6: 7369 lui t1,0xffffa 8: 01007663 bgeu zero,a6,14 - c: 00000023 sb zero,0(zero) # 0 + c: 00000027 0x27 10: 7205 lui tp,0xfffe1 12: 3376 fld ft6,376(sp) 14: 6932 flw fs2,12(sp) @@ -32,3 +32,5 @@ Disassembly of section .riscv.attributes: 24: 3266 fld ft4,120(sp) 26: 3070 fld fa2,224(s0) 28: 645f 7032 0030 0x307032645f + 2e: 0108 addi a0,sp,128 + 30: 0b0a slli s6,s6,0x2 diff --git a/sw/banshee/tests/matmul/gen_data.py b/sw/banshee/tests/matmul/gen_data.py index 5b1ff231..753c5439 100644 --- a/sw/banshee/tests/matmul/gen_data.py +++ b/sw/banshee/tests/matmul/gen_data.py @@ -33,6 +33,12 @@ def emit(name, array): print(" .word 0x%s" % s) +print("# Copyright 2020 ETH Zurich and University of Bologna.") +print( + "# Licensed under the Apache License, Version 2.0, see LICENSE for details." +) +print("# SPDX-License-Identifier: Apache-2.0") +print() print(".section .l1,\"aw\",@progbits") emit("input_size", np.array(N, dtype=np.uint32)) # emit("input_A", A) diff --git a/sw/banshee/tests/runtime/atomic.h b/sw/banshee/tests/runtime/atomic.h new file mode 100644 index 00000000..43e145b7 --- /dev/null +++ b/sw/banshee/tests/runtime/atomic.h @@ -0,0 +1,26 @@ +// Copyright 2020 ETH Zurich and University of Bologna. +// Licensed under the Apache License, Version 2.0, see LICENSE for details. +// SPDX-License-Identifier: Apache-2.0 + +#define ATOMIC_OP(op, asm_op, I, asm_type, c_type, prefix) \ + static __always_inline void atomic##prefix##_##op(c_type i, \ + atomic##prefix##_t *v) { \ + __asm__ __volatile__(" amo" #asm_op "." #asm_type " zero, %1, %0" \ + : "+A"(v->counter) \ + : "r"(I) \ + : "memory"); \ + } + +#ifdef CONFIG_GENERIC_ATOMIC64 +#define ATOMIC_OPS(op, asm_op, I) ATOMIC_OP(op, asm_op, I, w, int, ) +#else +#define ATOMIC_OPS(op, asm_op, I) \ + ATOMIC_OP(op, asm_op, I, w, int, ) \ + ATOMIC_OP(op, asm_op, I, d, long, 64) +#endif + +ATOMIC_OPS(add, add, i) +ATOMIC_OPS(sub, add, -i) +ATOMIC_OPS(and, and, i) +ATOMIC_OPS(or, or, i) +ATOMIC_OPS(xor, xor, i) diff --git a/sw/banshee/tests/runtime/billywig_crt0.S b/sw/banshee/tests/runtime/billywig_crt0.S new file mode 100644 index 00000000..752366f1 --- /dev/null +++ b/sw/banshee/tests/runtime/billywig_crt0.S @@ -0,0 +1,52 @@ +# Copyright 2020 ETH Zurich and University of Bologna. +# Licensed under the Apache License, Version 2.0, see LICENSE for details. +# SPDX-License-Identifier: Apache-2.0 + +.globl _start +.section .text.init; +_start: + # Initialize global pointer + .option push + .option norelax + 1:auipc gp, %pcrel_hi(__global_pointer$) + addi gp, gp, %pcrel_lo(1b) + .option pop + /* reset vector */ + j reset_vector +reset_vector: + la sp, tcdm_end_address_reg # load stack top from peripheral register + lw sp, 0(sp) + csrr a0, mhartid # get hart id + lw t0, cluster_base_hart_id_reg + sub a0, a0, t0 # subtract cluster base hartid + slli t0, a0, 3 # misalign stacks in the TCDM + sub sp, sp, t0 + slli t0, t0, 6 # set some stack-space aside for each hart + sub sp, sp, t0 + mv tp, sp # place thread pointer on top of stack + li t0, 1 + slli t0, t0, 9 + sub tp, tp, t0 # subtract stack-size again + la t0, nr_cores_address_reg + lw a1, 0(t0) # load number of cores + call main # main(core_id, core_num) + slli a0, a0, 1 + ori a0, a0, 1 + j eoc +fail: + li t0, 0xFFFFFFFF + xor a0, a0, t0 + j eoc +eoc: + csrr t0, mhartid + bnez t0, halt # only write exit code for core 0 + la t0, scratch_reg + sw a0, 0(t0) +halt: + wfi + j halt + +.globl atomic_barrier +.section .l1,"aw",@progbits +atomic_barrier: + .word 0 diff --git a/sw/banshee/tests/runtime/billywig_runtime.h b/sw/banshee/tests/runtime/billywig_runtime.h new file mode 100644 index 00000000..d78bc782 --- /dev/null +++ b/sw/banshee/tests/runtime/billywig_runtime.h @@ -0,0 +1,184 @@ +// Copyright 2020 ETH Zurich and University of Bologna. +// Licensed under the Apache License, Version 2.0, see LICENSE for details. +// SPDX-License-Identifier: Apache-2.0 + +#pragma once +#include +#include + +#include "encoding.h" + +#define PULP_NOINLINE __attribute__((noinline)) + +extern char l1_alloc_base; +extern uint32_t atomic_barrier; +extern uint32_t wake_up_reg; + +typedef uint32_t pulp_id_t; +typedef uint32_t pulp_timer_t; + +/// Obtain the number of cores in the current cluster. +static inline pulp_id_t pulp_get_core_count() { + extern uint32_t nr_cores_address_reg; + return nr_cores_address_reg; +} + +/// Obtain the ID of the current core. +static inline pulp_id_t pulp_get_core_id() { + pulp_id_t r; + asm volatile("csrr %0, mhartid" : "=r"(r)); + return r; +} + +/// Obtain a monotonically increasing cycle count. +static inline pulp_timer_t pulp_get_timer() { return read_csr(mcycle); } + +/// A cluster-local barrier. +static inline void pulp_barrier() { + // // The following is a software-only barrier using AMOs. + // uint32_t core_id = pulp_get_core_id(); + // uint32_t core_count = pulp_get_core_count(); + // uint32_t mask = 1 << core_id; + // uint32_t others = ((1 << core_count) - 1) ^ mask; + // if (core_id == 0) { + // while ((__atomic_load_n(&atomic_barrier, __ATOMIC_RELAXED) & others) + // != others); + // __atomic_or_fetch(&atomic_barrier, mask, __ATOMIC_RELAXED); + // while ((__atomic_load_n(&atomic_barrier, __ATOMIC_RELAXED) & others) + // != 0); + // __atomic_and_fetch(&atomic_barrier, ~mask, __ATOMIC_RELAXED); + // } else { + // while ((__atomic_load_n(&atomic_barrier, __ATOMIC_RELAXED) & 1) != + // 0); + // __atomic_or_fetch(&atomic_barrier, mask, __ATOMIC_RELAXED); + // while ((__atomic_load_n(&atomic_barrier, __ATOMIC_RELAXED) & 1) != + // 1); + // __atomic_and_fetch(&atomic_barrier, ~mask, __ATOMIC_RELAXED); + // } + + // The following uses the hardware barrier. + extern uint32_t barrier_reg; + uint32_t tmp; + asm volatile( + "lw %[tmp], 0(%[addr]) \n" + "mv zero, %[tmp] \n" + : [ tmp ] "=r"(tmp) + : [ addr ] "r"(&barrier_reg) + : "memory"); +} + +/// The different SSR data movers. +enum ssr_dm { SSR_DM0 = 0, SSR_DM1 = 1 }; + +/// The different dimensions. +enum ssr_dim { + SSR_1D = 0, + SSR_2D = 1, + SSR_3D = 2, + SSR_4D = 3, +}; + +/// The SSR configuration registers. +typedef union { + uint32_t value __attribute__((aligned(8))); +} ssr_reg32_t; +typedef struct { + ssr_reg32_t status; + ssr_reg32_t repeat; + ssr_reg32_t bounds[4]; + ssr_reg32_t stride[4]; + ssr_reg32_t _reserved4[14]; + ssr_reg32_t rptr[4]; + ssr_reg32_t wptr[4]; +} ssr_cfg_t; +// extern volatile ssr_cfg_t ssr_config_reg[2]; // linker-provided address +static volatile ssr_cfg_t *const ssr_config_reg = (void *)0x204800; + +// Configure an SSR data mover for a 1D loop nest. +static inline void pulp_ssr_loop_1d(enum ssr_dm dm, uint16_t b0, uint16_t i0) { + --b0; + ssr_config_reg[dm].bounds[0].value = b0; + uint16_t a = 0; + ssr_config_reg[dm].stride[0].value = i0 - a; + a += i0 * b0; +} + +// Configure an SSR data mover for a 2D loop nest. +static inline void pulp_ssr_loop_2d(enum ssr_dm dm, uint16_t b0, uint16_t b1, + uint16_t i0, uint16_t i1) { + --b0; + --b1; + ssr_config_reg[dm].bounds[0].value = b0; + ssr_config_reg[dm].bounds[1].value = b1; + uint16_t a = 0; + ssr_config_reg[dm].stride[0].value = i0 - a; + a += i0 * b0; + ssr_config_reg[dm].stride[1].value = i1 - a; + a += i1 * b1; +} + +// Configure an SSR data mover for a 3D loop nest. +static inline void pulp_ssr_loop_3d(enum ssr_dm dm, uint16_t b0, uint16_t b1, + uint16_t b2, uint16_t i0, uint16_t i1, + uint16_t i2) { + --b0; + --b1; + --b2; + ssr_config_reg[dm].bounds[0].value = b0; + ssr_config_reg[dm].bounds[1].value = b1; + ssr_config_reg[dm].bounds[2].value = b2; + uint16_t a = 0; + ssr_config_reg[dm].stride[0].value = i0 - a; + a += i0 * b0; + ssr_config_reg[dm].stride[1].value = i1 - a; + a += i1 * b1; + ssr_config_reg[dm].stride[2].value = i2 - a; + a += i2 * b2; +} + +// Configure an SSR data mover for a 4D loop nest. +// b0: Inner-most bound (limit of loop) +// b3: Outer-most bound (limit of loop) +// i0: increment size of inner-most loop +static inline void pulp_ssr_loop_4d(enum ssr_dm dm, uint16_t b0, uint16_t b1, + uint16_t b2, uint16_t b3, uint16_t i0, + uint16_t i1, uint16_t i2, uint16_t i3) { + --b0; + --b1; + --b2; + --b3; + ssr_config_reg[dm].bounds[0].value = b0; + ssr_config_reg[dm].bounds[1].value = b1; + ssr_config_reg[dm].bounds[2].value = b2; + ssr_config_reg[dm].bounds[3].value = b3; + uint16_t a = 0; + ssr_config_reg[dm].stride[0].value = i0 - a; + a += i0 * b0; + ssr_config_reg[dm].stride[1].value = i1 - a; + a += i1 * b1; + ssr_config_reg[dm].stride[2].value = i2 - a; + a += i2 * b2; + ssr_config_reg[dm].stride[3].value = i3 - a; + a += i3 * b3; +} + +/// Enable SSR. +static inline void pulp_ssr_enable() { asm volatile("csrsi 0x7C0, 1"); } + +/// Disable SSR. +static inline void pulp_ssr_disable() { asm volatile("csrci 0x7C0, 1"); } + +/// Start a streaming read. +static inline void pulp_ssr_read(enum ssr_dm dm, enum ssr_dim dim, + volatile void *ptr) { + ssr_config_reg[dm].rptr[dim].value = (uint32_t)ptr; +} + +/// Start a streaming write. +static inline void pulp_ssr_write(enum ssr_dm dm, enum ssr_dim dim, + volatile void *ptr) { + ssr_config_reg[dm].wptr[dim].value = (uint32_t)ptr; +} + +/// Synchronize the integer and float pipelines. +static inline void fpu_fence() { asm volatile("fmv.x.w zero, fa0"); } diff --git a/sw/banshee/tests/runtime/bowtruckle.ld b/sw/banshee/tests/runtime/bowtruckle.ld new file mode 100644 index 00000000..ee481aa3 --- /dev/null +++ b/sw/banshee/tests/runtime/bowtruckle.ld @@ -0,0 +1,50 @@ +/* Copyright 2020 ETH Zurich and University of Bologna. */ +/* Licensed under the Apache License, Version 2.0, see LICENSE for details. */ +/* SPDX-License-Identifier: Apache-2.0 */ + +OUTPUT_ARCH( "riscv" ) +ENTRY(_start) + +SECTIONS +{ + . = 0x0; + .l1 : { *(.l1) } + l1_alloc_base = ALIGN(0x10); + tcdm_start_address_reg = 0x800000; + tcdm_end_address_reg = 0x800008; + nr_cores_address_reg = 0x800010; + fetch_enable_reg = 0x800018; + /* remap this to the magic address which can be observed by the TB*/ + scratch_reg = 0xD0000000; + wake_up_reg = 0x800028; + cycle_count_reg = 0x800030; + barrier_reg = 0x800038; + ssr_config_reg = 0x204800; + fake_uart = 0xC0000000; + . = 0xD0000000; + .eoc_address (NOLOAD): { *(.eoc_address) } + . = 0x80010000; + .text : { + *(.text.init) + *(.text) + } + . = ALIGN(0x10); + .data : { *(.data)} + .rodata : { *(.rodata .rodata.* .gnu.linkonce.r.*) } + .rodata1 : { *(.rodata1) } + .sdata2 : + { + *(.sdata2 .sdata2.* .gnu.linkonce.s2.*) + } + .sdata : + { + __global_pointer$ = . + 0x800; + *(.srodata.cst16) *(.srodata.cst8) *(.srodata.cst4) *(.srodata.cst2) *(.srodata .srodata.*) + *(.sdata .sdata.* .gnu.linkonce.s.*) + } + . = .; + __bss_start = .; + .sbss2 : { *(.sbss2 .sbss2.* .gnu.linkonce.sb2.*) } + .bss : { *(.bss) } + __bss_end = .; +} diff --git a/sw/banshee/tests/runtime/crt0.S b/sw/banshee/tests/runtime/crt0.S new file mode 100644 index 00000000..f41ec88a --- /dev/null +++ b/sw/banshee/tests/runtime/crt0.S @@ -0,0 +1,113 @@ +# Copyright 2020 ETH Zurich and University of Bologna. +# Licensed under the Apache License, Version 2.0, see LICENSE for details. +# SPDX-License-Identifier: Apache-2.0 + +.globl _start +.section .text.init; +_start: + ## Initialize global pointer + .option push + .option norelax + 1:auipc gp, %pcrel_hi(__global_pointer$) + addi gp, gp, %pcrel_lo(1b) + .option pop + /* reset vector */ + j reset_vector +.section .text; +reset_vector: + li x1, 0 + li x4, 0 + li x5, 0 + li x6, 0 + li x7, 0 + li x8, 0 + li x9, 0 + li x10, 0 + li x11, 0 + li x12, 0 + li x13, 0 + li x14, 0 + li x15, 0 + li x16, 0 + li x17, 0 + li x18, 0 + li x19, 0 + li x20, 0 + li x10, 0 + li x21, 0 + li x22, 0 + li x23, 0 + li x24, 0 + li x25, 0 + li x26, 0 + li x27, 0 + li x28, 0 + li x29, 0 + li x30, 0 + li x31, 0 + ## get system info + la sp, tcdm_end_address_reg # load stack top from peripheral register + lw sp, 0(sp) + ## get hart id and number of cores in the cluster + csrr a0, mhartid + la a1, nr_cores_address_reg # get the number of cores per cluster + lw a1, 0(a1) + # check if the core has the F-extension + csrr t0, misa + andi t0, t0, (1 << 5) + beqz t0, 1f + ## clear FP registers + fmv.s.x f0, x0 + fmv.s.x f1, x0 + fmv.s.x f4, x0 + fmv.s.x f5, x0 + fmv.s.x f6, x0 + fmv.s.x f7, x0 + fmv.s.x f8, x0 + fmv.s.x f9, x0 + fmv.s.x f10, x0 + fmv.s.x f11, x0 + fmv.s.x f12, x0 + fmv.s.x f13, x0 + fmv.s.x f14, x0 + fmv.s.x f15, x0 + fmv.s.x f16, x0 + fmv.s.x f17, x0 + fmv.s.x f18, x0 + fmv.s.x f19, x0 + fmv.s.x f20, x0 + fmv.s.x f10, x0 + fmv.s.x f21, x0 + fmv.s.x f22, x0 + fmv.s.x f23, x0 + fmv.s.x f24, x0 + fmv.s.x f25, x0 + fmv.s.x f26, x0 + fmv.s.x f27, x0 + fmv.s.x f28, x0 + fmv.s.x f29, x0 + fmv.s.x f30, x0 + fmv.s.x f31, x0 +1: la t0, cluser_base_hart_id_reg + lw a2, 0(t0) + sub t0, a0, a2 + slli t0, t0, 12 # set some stack-space aside for each hart + sub sp, sp, t0 +run: + call main +eoc: + la t0, eoc_address + sw a0, 0(t0) + jal x0, eoc +fail: + la t0, eoc_address + sw a0, 0(t0) + jal x0, eoc + +.section ".eoc_address","aw",@progbits +.align 6 +.globl eoc_address +eoc_address: .dword 0 + +.section .text +.section .data diff --git a/sw/banshee/tests/runtime/crt0.S.o b/sw/banshee/tests/runtime/crt0.S.o new file mode 100644 index 00000000..2e3f065e Binary files /dev/null and b/sw/banshee/tests/runtime/crt0.S.o differ diff --git a/sw/banshee/tests/runtime/encoding.h b/sw/banshee/tests/runtime/encoding.h new file mode 100644 index 00000000..abf8845d --- /dev/null +++ b/sw/banshee/tests/runtime/encoding.h @@ -0,0 +1,223 @@ +// Copyright 2020 ETH Zurich and University of Bologna. +// Licensed under the Apache License, Version 2.0, see LICENSE for details. +// SPDX-License-Identifier: Apache-2.0 + +#ifndef RISCV_CSR_ENCODING_H +#define RISCV_CSR_ENCODING_H + +#define MSTATUS_UIE 0x00000001 +#define MSTATUS_SIE 0x00000002 +#define MSTATUS_HIE 0x00000004 +#define MSTATUS_MIE 0x00000008 +#define MSTATUS_UPIE 0x00000010 +#define MSTATUS_SPIE 0x00000020 +#define MSTATUS_HPIE 0x00000040 +#define MSTATUS_MPIE 0x00000080 +#define MSTATUS_SPP 0x00000100 +#define MSTATUS_HPP 0x00000600 +#define MSTATUS_MPP 0x00001800 +#define MSTATUS_FS 0x00006000 +#define MSTATUS_XS 0x00018000 +#define MSTATUS_MPRV 0x00020000 +#define MSTATUS_SUM 0x00040000 +#define MSTATUS_MXR 0x00080000 +#define MSTATUS_TVM 0x00100000 +#define MSTATUS_TW 0x00200000 +#define MSTATUS_TSR 0x00400000 +#define MSTATUS32_SD 0x80000000 +#define MSTATUS_UXL 0x0000000300000000 +#define MSTATUS_SXL 0x0000000C00000000 +#define MSTATUS64_SD 0x8000000000000000 + +#define SSTATUS_UIE 0x00000001 +#define SSTATUS_SIE 0x00000002 +#define SSTATUS_UPIE 0x00000010 +#define SSTATUS_SPIE 0x00000020 +#define SSTATUS_SPP 0x00000100 +#define SSTATUS_FS 0x00006000 +#define SSTATUS_XS 0x00018000 +#define SSTATUS_SUM 0x00040000 +#define SSTATUS_MXR 0x00080000 +#define SSTATUS32_SD 0x80000000 +#define SSTATUS_UXL 0x0000000300000000 +#define SSTATUS64_SD 0x8000000000000000 + +#define DCSR_XDEBUGVER (3U<<30) +#define DCSR_NDRESET (1<<29) +#define DCSR_FULLRESET (1<<28) +#define DCSR_EBREAKM (1<<15) +#define DCSR_EBREAKH (1<<14) +#define DCSR_EBREAKS (1<<13) +#define DCSR_EBREAKU (1<<12) +#define DCSR_STOPCYCLE (1<<10) +#define DCSR_STOPTIME (1<<9) +#define DCSR_CAUSE (7<<6) +#define DCSR_DEBUGINT (1<<5) +#define DCSR_HALT (1<<3) +#define DCSR_STEP (1<<2) +#define DCSR_PRV (3<<0) + +#define DCSR_CAUSE_NONE 0 +#define DCSR_CAUSE_SWBP 1 +#define DCSR_CAUSE_HWBP 2 +#define DCSR_CAUSE_DEBUGINT 3 +#define DCSR_CAUSE_STEP 4 +#define DCSR_CAUSE_HALT 5 + +#define MCONTROL_TYPE(xlen) (0xfULL<<((xlen)-4)) +#define MCONTROL_DMODE(xlen) (1ULL<<((xlen)-5)) +#define MCONTROL_MASKMAX(xlen) (0x3fULL<<((xlen)-11)) + +#define MCONTROL_SELECT (1<<19) +#define MCONTROL_TIMING (1<<18) +#define MCONTROL_ACTION (0x3f<<12) +#define MCONTROL_CHAIN (1<<11) +#define MCONTROL_MATCH (0xf<<7) +#define MCONTROL_M (1<<6) +#define MCONTROL_H (1<<5) +#define MCONTROL_S (1<<4) +#define MCONTROL_U (1<<3) +#define MCONTROL_EXECUTE (1<<2) +#define MCONTROL_STORE (1<<1) +#define MCONTROL_LOAD (1<<0) + +#define MCONTROL_TYPE_NONE 0 +#define MCONTROL_TYPE_MATCH 2 + +#define MCONTROL_ACTION_DEBUG_EXCEPTION 0 +#define MCONTROL_ACTION_DEBUG_MODE 1 +#define MCONTROL_ACTION_TRACE_START 2 +#define MCONTROL_ACTION_TRACE_STOP 3 +#define MCONTROL_ACTION_TRACE_EMIT 4 + +#define MCONTROL_MATCH_EQUAL 0 +#define MCONTROL_MATCH_NAPOT 1 +#define MCONTROL_MATCH_GE 2 +#define MCONTROL_MATCH_LT 3 +#define MCONTROL_MATCH_MASK_LOW 4 +#define MCONTROL_MATCH_MASK_HIGH 5 + +#define MIP_SSIP (1 << IRQ_S_SOFT) +#define MIP_HSIP (1 << IRQ_H_SOFT) +#define MIP_MSIP (1 << IRQ_M_SOFT) +#define MIP_STIP (1 << IRQ_S_TIMER) +#define MIP_HTIP (1 << IRQ_H_TIMER) +#define MIP_MTIP (1 << IRQ_M_TIMER) +#define MIP_SEIP (1 << IRQ_S_EXT) +#define MIP_HEIP (1 << IRQ_H_EXT) +#define MIP_MEIP (1 << IRQ_M_EXT) + +#define SIP_SSIP MIP_SSIP +#define SIP_STIP MIP_STIP + +#define PRV_U 0 +#define PRV_S 1 +#define PRV_H 2 +#define PRV_M 3 + +#define SATP32_MODE 0x80000000 +#define SATP32_ASID 0x7FC00000 +#define SATP32_PPN 0x003FFFFF +#define SATP64_MODE 0xF000000000000000 +#define SATP64_ASID 0x0FFFF00000000000 +#define SATP64_PPN 0x00000FFFFFFFFFFF + +#define SATP_MODE_OFF 0 +#define SATP_MODE_SV32 1 +#define SATP_MODE_SV39 8 +#define SATP_MODE_SV48 9 +#define SATP_MODE_SV57 10 +#define SATP_MODE_SV64 11 + +#define PMP_R 0x01 +#define PMP_W 0x02 +#define PMP_X 0x04 +#define PMP_A 0x18 +#define PMP_L 0x80 +#define PMP_SHIFT 2 + +#define PMP_TOR 0x08 +#define PMP_NA4 0x10 +#define PMP_NAPOT 0x18 + +#define IRQ_S_SOFT 1 +#define IRQ_H_SOFT 2 +#define IRQ_M_SOFT 3 +#define IRQ_S_TIMER 5 +#define IRQ_H_TIMER 6 +#define IRQ_M_TIMER 7 +#define IRQ_S_EXT 9 +#define IRQ_H_EXT 10 +#define IRQ_M_EXT 11 +#define IRQ_COP 12 +#define IRQ_HOST 13 + +#define DEFAULT_RSTVEC 0x00001000 +#define CLINT_BASE 0x02000000 +#define CLINT_SIZE 0x000c0000 +#define EXT_IO_BASE 0x40000000 +#define DRAM_BASE 0x80000000 + +/* page table entry (PTE) fields */ +#define PTE_V 0x001 /* Valid */ +#define PTE_R 0x002 /* Read */ +#define PTE_W 0x004 /* Write */ +#define PTE_X 0x008 /* Execute */ +#define PTE_U 0x010 /* User */ +#define PTE_G 0x020 /* Global */ +#define PTE_A 0x040 /* Accessed */ +#define PTE_D 0x080 /* Dirty */ +#define PTE_SOFT 0x300 /* Reserved for Software */ + +#define PTE_PPN_SHIFT 10 + +#define PTE_TABLE(PTE) (((PTE) & (PTE_V | PTE_R | PTE_W | PTE_X)) == PTE_V) + +#ifdef __riscv + +#if __riscv_xlen == 64 +# define MSTATUS_SD MSTATUS64_SD +# define SSTATUS_SD SSTATUS64_SD +# define SATP_MODE SATP64_MODE +#else +# define MSTATUS_SD MSTATUS32_SD +# define SSTATUS_SD SSTATUS32_SD +# define SATP_MODE SATP32_MODE +#endif +#define RISCV_PGSHIFT 12 +#define RISCV_PGSIZE (1 << RISCV_PGSHIFT) + +#ifndef __ASSEMBLER__ + +#ifdef __GNUC__ + +#define read_csr(reg) ({ unsigned long __tmp; \ + asm volatile ("csrr %0, " #reg : "=r"(__tmp)); \ + __tmp; }) + +#define write_csr(reg, val) ({ \ + asm volatile ("csrw " #reg ", %0" :: "rK"(val)); }) + +#define swap_csr(reg, val) ({ unsigned long __tmp; \ + asm volatile ("csrrw %0, " #reg ", %1" : "=r"(__tmp) : "rK"(val)); \ + __tmp; }) + +#define set_csr(reg, bit) ({ unsigned long __tmp; \ + asm volatile ("csrrs %0, " #reg ", %1" : "=r"(__tmp) : "rK"(bit)); \ + __tmp; }) + +#define clear_csr(reg, bit) ({ unsigned long __tmp; \ + asm volatile ("csrrc %0, " #reg ", %1" : "=r"(__tmp) : "rK"(bit)); \ + __tmp; }) + +#define rdtime() read_csr(time) +#define rdcycle() read_csr(cycle) +#define rdinstret() read_csr(instret) + +#endif + +#endif + +#endif + +#endif diff --git a/sw/banshee/tests/runtime/libsdma.h b/sw/banshee/tests/runtime/libsdma.h new file mode 100644 index 00000000..ecf2e5e3 --- /dev/null +++ b/sw/banshee/tests/runtime/libsdma.h @@ -0,0 +1,151 @@ +// Copyright 2020 ETH Zurich and University of Bologna. +// Licensed under the Apache License, Version 2.0, see LICENSE for details. +// SPDX-License-Identifier: Apache-2.0 + +#include + +// 1D transfer +inline volatile static uint32_t sdma__start_oned(volatile uint32_t src_low, + volatile uint32_t src_high, + volatile uint32_t dst_low, + volatile uint32_t dst_high, + volatile uint32_t num_bytes) { + volatile register uint32_t reg_src_high asm("s2"); // 19 + volatile register uint32_t reg_src_low asm("s3"); // 18 + volatile register uint32_t reg_dst_high asm("s4"); // 21 + volatile register uint32_t reg_dst_low asm("s5"); // 20 + volatile register uint32_t reg_tf_id asm("s6"); // 22 + volatile register uint32_t reg_num_bytes asm("s7"); // 23 + + reg_src_low = src_low; + reg_src_high = src_high; + reg_dst_low = dst_low; + reg_dst_high = dst_high; + reg_num_bytes = num_bytes; + + // set source + asm volatile( + ".word (0b0000000 << 25) | \ + ( (18) << 20) | \ + ( (19) << 15) | \ + ( 0b000 << 12) | \ + (0b0101011 << 0) \n" + : + : "r"(reg_src_high), "r"(reg_src_low)); + + // set dest + asm volatile( + ".word (0b0000001 << 25) | \ + ( (20) << 20) | \ + ( (21) << 15) | \ + ( 0b000 << 12) | \ + (0b0101011 << 0) \n" + : + : "r"(reg_dst_high), "r"(reg_dst_low)); + + // start immediate + asm volatile( + ".word (0b0010100 << 25) | \ + ( (23) << 15) | \ + ( 0b001 << 12) | \ + ( (22) << 7) | \ + (0b0101011 << 0) \n" + : "=r"(reg_tf_id) + : "r"(reg_num_bytes)); + + return reg_tf_id; +} + +// 2D transfer +inline volatile static uint32_t sdma__start_twod( + volatile uint32_t src_low, volatile uint32_t src_high, + volatile uint32_t dst_low, volatile uint32_t dst_high, + volatile uint32_t num_bytes, volatile uint32_t src_strd, + volatile uint32_t dst_strd, volatile uint32_t num_reps) { + volatile register uint32_t reg_src_high asm("s2"); // 19 + volatile register uint32_t reg_src_low asm("s3"); // 18 + volatile register uint32_t reg_dst_high asm("s4"); // 21 + volatile register uint32_t reg_dst_low asm("s5"); // 20 + volatile register uint32_t reg_tf_id asm("s6"); // 22 + volatile register uint32_t reg_num_bytes asm("s7"); // 23 + volatile register uint32_t reg_src_strd asm("s8"); // 24 + volatile register uint32_t reg_dst_strd asm("s9"); // 25 + volatile register uint32_t reg_num_reps asm("s10"); // 26 + + reg_src_low = src_low; + reg_src_high = src_high; + reg_dst_low = dst_low; + reg_dst_high = dst_high; + reg_num_bytes = num_bytes; + reg_src_strd = src_strd; + reg_dst_strd = dst_strd; + reg_num_reps = num_reps; + + // set source + asm volatile( + ".word (0b0000000 << 25) | \ + ( (18) << 20) | \ + ( (19) << 15) | \ + ( 0b000 << 12) | \ + (0b0101011 << 0) \n" + : + : "r"(reg_src_high), "r"(reg_src_low)); + + // set dest + asm volatile( + ".word (0b0000001 << 25) | \ + ( (20) << 20) | \ + ( (21) << 15) | \ + ( 0b000 << 12) | \ + (0b0101011 << 0) \n" + : + : "r"(reg_dst_high), "r"(reg_dst_low)); + + // strides + asm volatile( + ".word (0b0000101 << 25) | \ + ( (25) << 20) | \ + ( (24) << 15) | \ + ( 0b000 << 12) | \ + (0b0101011 << 0) \n" + : + : "r"(reg_dst_strd), "r"(reg_src_strd)); + + // num repetitions + asm volatile( + ".word (0b0000110 << 25) | \ + ( (26) << 15) | \ + ( 0b000 << 12) | \ + (0b0101011 << 0) \n" + : + : "r"(reg_num_reps)); + + // start immediate + asm volatile( + ".word (0b0010110 << 25) | \ + ( (23) << 15) | \ + ( 0b001 << 12) | \ + ( (22) << 7) | \ + (0b0101011 << 0) \n" + : "=r"(reg_tf_id) + : "r"(reg_num_bytes)); + + return reg_tf_id; +} + +// poll until DMA is idle +static inline void sdma__wait_for_idle() { + volatile register uint32_t arg0 asm("a0"); // x10 + asm volatile( + "0:" + "nop \n nop \n nop \n nop \n" + "li t0, 1 \n" + ".word (0b0010110 << 25) | \ + ( (10) << 15) | \ + ( 0b010 << 12) | \ + ( (5) << 7) | \ + (0b0101011 << 0) \n" + "nop \n nop \n nop \n nop \n" + "bne t0, zero, 0b \n" ::"r"(arg0) + : "t0", "memory"); +} diff --git a/sw/banshee/tests/runtime/link.ld b/sw/banshee/tests/runtime/link.ld new file mode 100644 index 00000000..2fc53b2c --- /dev/null +++ b/sw/banshee/tests/runtime/link.ld @@ -0,0 +1,51 @@ +/* Copyright 2020 ETH Zurich and University of Bologna. */ +/* Licensed under the Apache License, Version 2.0, see LICENSE for details. */ +/* SPDX-License-Identifier: Apache-2.0 */ + +OUTPUT_ARCH( "riscv" ) +ENTRY(_start) + +SECTIONS +{ + ROM_BASE = 0x80000000; /* ... but actually position independent */ + . = 0x0; + .l1 : { *(.l1) } + l1_alloc_base = ALIGN(0x10); + tcdm_start_address_reg = 0x40000000; + tcdm_end_address_reg = 0x40000008; + nr_cores_address_reg = 0x40000010; + fetch_enable_reg = 0x40000018; + scratch_reg = 0x40000020; + wake_up_reg = 0x40000028; + cycle_count_reg = 0x40000030; + barrier_reg = 0x40000038; + cluster_base_hart_id_reg = 0x40000040; + ssr_config_reg = 0x204800; + fake_uart = 0xC0000000; + . = 0xD0000000; + .eoc_address (NOLOAD): { *(.eoc_address) } + . = 0x80010000; + .text : { + *(.text.init) + *(.text) + } + . = ALIGN(0x10); + .data : { *(.data)} + .rodata : { *(.rodata .rodata.* .gnu.linkonce.r.*) } + .rodata1 : { *(.rodata1) } + .sdata2 : + { + *(.sdata2 .sdata2.* .gnu.linkonce.s2.*) + } + .sdata : + { + __global_pointer$ = . + 0x800; + *(.srodata.cst16) *(.srodata.cst8) *(.srodata.cst4) *(.srodata.cst2) *(.srodata .srodata.*) + *(.sdata .sdata.* .gnu.linkonce.s.*) + } + . = .; + __bss_start = .; + .sbss2 : { *(.sbss2 .sbss2.* .gnu.linkonce.sb2.*) } + .bss : { *(.bss) } + __bss_end = .; +} diff --git a/sw/banshee/tests/runtime/printf.c b/sw/banshee/tests/runtime/printf.c new file mode 100644 index 00000000..d837f34f --- /dev/null +++ b/sw/banshee/tests/runtime/printf.c @@ -0,0 +1,765 @@ +/////////////////////////////////////////////////////////////////////////////// +// \author (c) Marco Paland (info@paland.com) +// 2014-2019, PALANDesign Hannover, Germany +// +// \license The MIT License (MIT) +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. +// +// \brief Tiny printf, sprintf and (v)snprintf implementation, optimized for speed on +// embedded systems with a very limited resources. These routines are thread +// safe and reentrant! +// Use this instead of the bloated standard/newlib printf cause these use +// malloc for printf (and may not be thread safe). +// +/////////////////////////////////////////////////////////////////////////////// + +#include +#include + +#include "printf.h" + + +// define this globally (e.g. gcc -DPRINTF_INCLUDE_CONFIG_H ...) to include the +// printf_config.h header file +// default: undefined +#ifdef PRINTF_INCLUDE_CONFIG_H +#include "printf_config.h" +#endif + + +// 'ntoa' conversion buffer size, this must be big enough to hold one converted +// numeric number including padded zeros (dynamically created on stack) +// default: 32 byte +#ifndef PRINTF_NTOA_BUFFER_SIZE +#define PRINTF_NTOA_BUFFER_SIZE 32U +#endif + +// 'ftoa' conversion buffer size, this must be big enough to hold one converted +// float number including padded zeros (dynamically created on stack) +// default: 32 byte +#ifndef PRINTF_FTOA_BUFFER_SIZE +#define PRINTF_FTOA_BUFFER_SIZE 32U +#endif + +// support for the floating point type (%f) +// default: activated +#ifndef PRINTF_DISABLE_SUPPORT_FLOAT +#define PRINTF_SUPPORT_FLOAT +#endif + +// support for the long long types (%llu or %p) +// default: activated +#ifndef PRINTF_DISABLE_SUPPORT_LONG_LONG +#define PRINTF_SUPPORT_LONG_LONG +#endif + +// support for the ptrdiff_t type (%t) +// ptrdiff_t is normally defined in as long or long long type +// default: activated +#ifndef PRINTF_DISABLE_SUPPORT_PTRDIFF_T +#define PRINTF_SUPPORT_PTRDIFF_T +#endif + +/////////////////////////////////////////////////////////////////////////////// + +// internal flag definitions +#define FLAGS_ZEROPAD (1U << 0U) +#define FLAGS_LEFT (1U << 1U) +#define FLAGS_PLUS (1U << 2U) +#define FLAGS_SPACE (1U << 3U) +#define FLAGS_HASH (1U << 4U) +#define FLAGS_UPPERCASE (1U << 5U) +#define FLAGS_CHAR (1U << 6U) +#define FLAGS_SHORT (1U << 7U) +#define FLAGS_LONG (1U << 8U) +#define FLAGS_LONG_LONG (1U << 9U) +#define FLAGS_PRECISION (1U << 10U) + + +// output function type +typedef void (*out_fct_type)(char character, void* buffer, size_t idx, size_t maxlen); + + +// wrapper (used as buffer) for output function type +typedef struct { + void (*fct)(char character, void* arg); + void* arg; +} out_fct_wrap_type; + + +// internal buffer output +static inline void _out_buffer(char character, void* buffer, size_t idx, size_t maxlen) +{ + if (idx < maxlen) { + ((char*)buffer)[idx] = character; + } +} + + +// internal null output +static inline void _out_null(char character, void* buffer, size_t idx, size_t maxlen) +{ + (void)character; (void)buffer; (void)idx; (void)maxlen; +} + + +// internal _putchar wrapper +static inline void _out_char(char character, void* buffer, size_t idx, size_t maxlen) +{ + (void)buffer; (void)idx; (void)maxlen; + if (character) { + _putchar(character); + } +} + + +// internal output function wrapper +static inline void _out_fct(char character, void* buffer, size_t idx, size_t maxlen) +{ + (void)idx; (void)maxlen; + if (character) { + // buffer is the output fct pointer + ((out_fct_wrap_type*)buffer)->fct(character, ((out_fct_wrap_type*)buffer)->arg); + } +} + + +// internal secure strlen +// \return The length of the string (excluding the terminating 0) limited by 'maxsize' +static inline unsigned int _strnlen_s(const char* str, size_t maxsize) +{ + const char* s; + for (s = str; *s && maxsize--; ++s); + return (unsigned int)(s - str); +} + + +// internal test if char is a digit (0-9) +// \return true if char is a digit +static inline bool _is_digit(char ch) +{ + return (ch >= '0') && (ch <= '9'); +} + + +// internal ASCII string to unsigned int conversion +static unsigned int _atoi(const char** str) +{ + unsigned int i = 0U; + while (_is_digit(**str)) { + i = i * 10U + (unsigned int)(*((*str)++) - '0'); + } + return i; +} + + +// internal itoa format +static size_t _ntoa_format(out_fct_type out, char* buffer, size_t idx, size_t maxlen, char* buf, size_t len, bool negative, unsigned int base, unsigned int prec, unsigned int width, unsigned int flags) +{ + const size_t start_idx = idx; + + // pad leading zeros + if (!(flags & FLAGS_LEFT)) { + if (width && (flags & FLAGS_ZEROPAD) && (negative || (flags & (FLAGS_PLUS | FLAGS_SPACE)))) { + width--; + } + while ((len < prec) && (len < PRINTF_NTOA_BUFFER_SIZE)) { + buf[len++] = '0'; + } + while ((flags & FLAGS_ZEROPAD) && (len < width) && (len < PRINTF_NTOA_BUFFER_SIZE)) { + buf[len++] = '0'; + } + } + + // handle hash + if (flags & FLAGS_HASH) { + if (!(flags & FLAGS_PRECISION) && len && ((len == prec) || (len == width))) { + len--; + if (len && (base == 16U)) { + len--; + } + } + if ((base == 16U) && !(flags & FLAGS_UPPERCASE) && (len < PRINTF_NTOA_BUFFER_SIZE)) { + buf[len++] = 'x'; + } + else if ((base == 16U) && (flags & FLAGS_UPPERCASE) && (len < PRINTF_NTOA_BUFFER_SIZE)) { + buf[len++] = 'X'; + } + else if ((base == 2U) && (len < PRINTF_NTOA_BUFFER_SIZE)) { + buf[len++] = 'b'; + } + if (len < PRINTF_NTOA_BUFFER_SIZE) { + buf[len++] = '0'; + } + } + + if (len < PRINTF_NTOA_BUFFER_SIZE) { + if (negative) { + buf[len++] = '-'; + } + else if (flags & FLAGS_PLUS) { + buf[len++] = '+'; // ignore the space if the '+' exists + } + else if (flags & FLAGS_SPACE) { + buf[len++] = ' '; + } + } + + // pad spaces up to given width + if (!(flags & FLAGS_LEFT) && !(flags & FLAGS_ZEROPAD)) { + for (size_t i = len; i < width; i++) { + out(' ', buffer, idx++, maxlen); + } + } + + // reverse string + for (size_t i = 0U; i < len; i++) { + out(buf[len - i - 1U], buffer, idx++, maxlen); + } + + // append pad spaces up to given width + if (flags & FLAGS_LEFT) { + while (idx - start_idx < width) { + out(' ', buffer, idx++, maxlen); + } + } + + return idx; +} + + +// internal itoa for 'long' type +static size_t _ntoa_long(out_fct_type out, char* buffer, size_t idx, size_t maxlen, unsigned long value, bool negative, unsigned long base, unsigned int prec, unsigned int width, unsigned int flags) +{ + char buf[PRINTF_NTOA_BUFFER_SIZE]; + size_t len = 0U; + + // no hash for 0 values + if (!value) { + flags &= ~FLAGS_HASH; + } + + // write if precision != 0 and value is != 0 + if (!(flags & FLAGS_PRECISION) || value) { + do { + const char digit = (char)(value % base); + buf[len++] = digit < 10 ? '0' + digit : (flags & FLAGS_UPPERCASE ? 'A' : 'a') + digit - 10; + value /= base; + } while (value && (len < PRINTF_NTOA_BUFFER_SIZE)); + } + + return _ntoa_format(out, buffer, idx, maxlen, buf, len, negative, (unsigned int)base, prec, width, flags); +} + + +// internal itoa for 'long long' type +#if defined(PRINTF_SUPPORT_LONG_LONG) +static size_t _ntoa_long_long(out_fct_type out, char* buffer, size_t idx, size_t maxlen, unsigned long long value, bool negative, unsigned long long base, unsigned int prec, unsigned int width, unsigned int flags) +{ + char buf[PRINTF_NTOA_BUFFER_SIZE]; + size_t len = 0U; + + // no hash for 0 values + if (!value) { + flags &= ~FLAGS_HASH; + } + + // write if precision != 0 and value is != 0 + if (!(flags & FLAGS_PRECISION) || value) { + do { + const char digit = (char)(value % base); + buf[len++] = digit < 10 ? '0' + digit : (flags & FLAGS_UPPERCASE ? 'A' : 'a') + digit - 10; + value /= base; + } while (value && (len < PRINTF_NTOA_BUFFER_SIZE)); + } + + return _ntoa_format(out, buffer, idx, maxlen, buf, len, negative, (unsigned int)base, prec, width, flags); +} +#endif // PRINTF_SUPPORT_LONG_LONG + + +#if defined(PRINTF_SUPPORT_FLOAT) +static size_t _ftoa(out_fct_type out, char* buffer, size_t idx, size_t maxlen, double value, unsigned int prec, unsigned int width, unsigned int flags) +{ + const size_t start_idx = idx; + + char buf[PRINTF_FTOA_BUFFER_SIZE]; + size_t len = 0U; + double diff = 0.0; + + // if input is larger than thres_max, revert to exponential + const double thres_max = (double)0x7FFFFFFF; + + // powers of 10 + static const double pow10[] = { 1, 10, 100, 1000, 10000, 100000, 1000000, 10000000, 100000000, 1000000000 }; + + // test for NaN + if (value != value) { + out('n', buffer, idx++, maxlen); + out('a', buffer, idx++, maxlen); + out('n', buffer, idx++, maxlen); + return idx; + } + + // test for negative + bool negative = false; + if (value < 0) { + negative = true; + value = 0 - value; + } + + // set default precision to 6, if not set explicitly + if (!(flags & FLAGS_PRECISION)) { + prec = 6U; + } + // limit precision to 9, cause a prec >= 10 can lead to overflow errors + while ((len < PRINTF_FTOA_BUFFER_SIZE) && (prec > 9U)) { + buf[len++] = '0'; + prec--; + } + + int whole = (int)value; + double tmp = (value - whole) * pow10[prec]; + unsigned long frac = (unsigned long)tmp; + diff = tmp - frac; + + if (diff > 0.5) { + ++frac; + // handle rollover, e.g. case 0.99 with prec 1 is 1.0 + if (frac >= pow10[prec]) { + frac = 0; + ++whole; + } + } + else if (diff < 0.5) { + } + else if ((frac == 0U) || (frac & 1U)) { + // if halfway, round up if odd OR if last digit is 0 + ++frac; + } + + // TBD: for very large numbers switch back to native sprintf for exponentials. Anyone want to write code to replace this? + // Normal printf behavior is to print EVERY whole number digit which can be 100s of characters overflowing your buffers == bad + if (value > thres_max) { + return 0U; + } + + if (prec == 0U) { + diff = value - (double)whole; + if ((!(diff < 0.5) || (diff > 0.5)) && (whole & 1)) { + // exactly 0.5 and ODD, then round up + // 1.5 -> 2, but 2.5 -> 2 + ++whole; + } + } + else { + unsigned int count = prec; + // now do fractional part, as an unsigned number + while (len < PRINTF_FTOA_BUFFER_SIZE) { + --count; + buf[len++] = (char)(48U + (frac % 10U)); + if (!(frac /= 10U)) { + break; + } + } + // add extra 0s + while ((len < PRINTF_FTOA_BUFFER_SIZE) && (count-- > 0U)) { + buf[len++] = '0'; + } + if (len < PRINTF_FTOA_BUFFER_SIZE) { + // add decimal + buf[len++] = '.'; + } + } + + // do whole part, number is reversed + while (len < PRINTF_FTOA_BUFFER_SIZE) { + buf[len++] = (char)(48 + (whole % 10)); + if (!(whole /= 10)) { + break; + } + } + + // pad leading zeros + if (!(flags & FLAGS_LEFT) && (flags & FLAGS_ZEROPAD)) { + if (width && (negative || (flags & (FLAGS_PLUS | FLAGS_SPACE)))) { + width--; + } + while ((len < width) && (len < PRINTF_FTOA_BUFFER_SIZE)) { + buf[len++] = '0'; + } + } + + if (len < PRINTF_FTOA_BUFFER_SIZE) { + if (negative) { + buf[len++] = '-'; + } + else if (flags & FLAGS_PLUS) { + buf[len++] = '+'; // ignore the space if the '+' exists + } + else if (flags & FLAGS_SPACE) { + buf[len++] = ' '; + } + } + + // pad spaces up to given width + if (!(flags & FLAGS_LEFT) && !(flags & FLAGS_ZEROPAD)) { + for (size_t i = len; i < width; i++) { + out(' ', buffer, idx++, maxlen); + } + } + + // reverse string + for (size_t i = 0U; i < len; i++) { + out(buf[len - i - 1U], buffer, idx++, maxlen); + } + + // append pad spaces up to given width + if (flags & FLAGS_LEFT) { + while (idx - start_idx < width) { + out(' ', buffer, idx++, maxlen); + } + } + + return idx; +} +#endif // PRINTF_SUPPORT_FLOAT + + +// internal vsnprintf +static int _vsnprintf(out_fct_type out, char* buffer, const size_t maxlen, const char* format, va_list va) +{ + unsigned int flags, width, precision, n; + size_t idx = 0U; + + if (!buffer) { + // use null output function + out = _out_null; + } + + while (*format) + { + // format specifier? %[flags][width][.precision][length] + if (*format != '%') { + // no + out(*format, buffer, idx++, maxlen); + format++; + continue; + } + else { + // yes, evaluate it + format++; + } + + // evaluate flags + flags = 0U; + do { + switch (*format) { + case '0': flags |= FLAGS_ZEROPAD; format++; n = 1U; break; + case '-': flags |= FLAGS_LEFT; format++; n = 1U; break; + case '+': flags |= FLAGS_PLUS; format++; n = 1U; break; + case ' ': flags |= FLAGS_SPACE; format++; n = 1U; break; + case '#': flags |= FLAGS_HASH; format++; n = 1U; break; + default : n = 0U; break; + } + } while (n); + + // evaluate width field + width = 0U; + if (_is_digit(*format)) { + width = _atoi(&format); + } + else if (*format == '*') { + const int w = va_arg(va, int); + if (w < 0) { + flags |= FLAGS_LEFT; // reverse padding + width = (unsigned int)-w; + } + else { + width = (unsigned int)w; + } + format++; + } + + // evaluate precision field + precision = 0U; + if (*format == '.') { + flags |= FLAGS_PRECISION; + format++; + if (_is_digit(*format)) { + precision = _atoi(&format); + } + else if (*format == '*') { + const int prec = (int)va_arg(va, int); + precision = prec > 0 ? (unsigned int)prec : 0U; + format++; + } + } + + // evaluate length field + switch (*format) { + case 'l' : + flags |= FLAGS_LONG; + format++; + if (*format == 'l') { + flags |= FLAGS_LONG_LONG; + format++; + } + break; + case 'h' : + flags |= FLAGS_SHORT; + format++; + if (*format == 'h') { + flags |= FLAGS_CHAR; + format++; + } + break; +#if defined(PRINTF_SUPPORT_PTRDIFF_T) + case 't' : + flags |= (sizeof(ptrdiff_t) == sizeof(long) ? FLAGS_LONG : FLAGS_LONG_LONG); + format++; + break; +#endif + case 'j' : + flags |= (sizeof(intmax_t) == sizeof(long) ? FLAGS_LONG : FLAGS_LONG_LONG); + format++; + break; + case 'z' : + flags |= (sizeof(size_t) == sizeof(long) ? FLAGS_LONG : FLAGS_LONG_LONG); + format++; + break; + default : + break; + } + + // evaluate specifier + switch (*format) { + case 'd' : + case 'i' : + case 'u' : + case 'x' : + case 'X' : + case 'o' : + case 'b' : { + // set the base + unsigned int base; + if (*format == 'x' || *format == 'X') { + base = 16U; + } + else if (*format == 'o') { + base = 8U; + } + else if (*format == 'b') { + base = 2U; + } + else { + base = 10U; + flags &= ~FLAGS_HASH; // no hash for dec format + } + // uppercase + if (*format == 'X') { + flags |= FLAGS_UPPERCASE; + } + + // no plus or space flag for u, x, X, o, b + if ((*format != 'i') && (*format != 'd')) { + flags &= ~(FLAGS_PLUS | FLAGS_SPACE); + } + + // ignore '0' flag when precision is given + if (flags & FLAGS_PRECISION) { + flags &= ~FLAGS_ZEROPAD; + } + + // convert the integer + if ((*format == 'i') || (*format == 'd')) { + // signed + if (flags & FLAGS_LONG_LONG) { +#if defined(PRINTF_SUPPORT_LONG_LONG) + const long long value = va_arg(va, long long); + idx = _ntoa_long_long(out, buffer, idx, maxlen, (unsigned long long)(value > 0 ? value : 0 - value), value < 0, base, precision, width, flags); +#endif + } + else if (flags & FLAGS_LONG) { + const long value = va_arg(va, long); + idx = _ntoa_long(out, buffer, idx, maxlen, (unsigned long)(value > 0 ? value : 0 - value), value < 0, base, precision, width, flags); + } + else { + const int value = (flags & FLAGS_CHAR) ? (char)va_arg(va, int) : (flags & FLAGS_SHORT) ? (short int)va_arg(va, int) : va_arg(va, int); + idx = _ntoa_long(out, buffer, idx, maxlen, (unsigned int)(value > 0 ? value : 0 - value), value < 0, base, precision, width, flags); + } + } + else { + // unsigned + if (flags & FLAGS_LONG_LONG) { +#if defined(PRINTF_SUPPORT_LONG_LONG) + idx = _ntoa_long_long(out, buffer, idx, maxlen, va_arg(va, unsigned long long), false, base, precision, width, flags); +#endif + } + else if (flags & FLAGS_LONG) { + idx = _ntoa_long(out, buffer, idx, maxlen, va_arg(va, unsigned long), false, base, precision, width, flags); + } + else { + const unsigned int value = (flags & FLAGS_CHAR) ? (unsigned char)va_arg(va, unsigned int) : (flags & FLAGS_SHORT) ? (unsigned short int)va_arg(va, unsigned int) : va_arg(va, unsigned int); + idx = _ntoa_long(out, buffer, idx, maxlen, value, false, base, precision, width, flags); + } + } + format++; + break; + } +#if defined(PRINTF_SUPPORT_FLOAT) + case 'f' : + case 'F' : + idx = _ftoa(out, buffer, idx, maxlen, va_arg(va, double), precision, width, flags); + format++; + break; +#endif // PRINTF_SUPPORT_FLOAT + case 'c' : { + unsigned int l = 1U; + // pre padding + if (!(flags & FLAGS_LEFT)) { + while (l++ < width) { + out(' ', buffer, idx++, maxlen); + } + } + // char output + out((char)va_arg(va, int), buffer, idx++, maxlen); + // post padding + if (flags & FLAGS_LEFT) { + while (l++ < width) { + out(' ', buffer, idx++, maxlen); + } + } + format++; + break; + } + + case 's' : { + const char* p = va_arg(va, char*); + unsigned int l = _strnlen_s(p, precision ? precision : (size_t)-1); + // pre padding + if (flags & FLAGS_PRECISION) { + l = (l < precision ? l : precision); + } + if (!(flags & FLAGS_LEFT)) { + while (l++ < width) { + out(' ', buffer, idx++, maxlen); + } + } + // string output + while ((*p != 0) && (!(flags & FLAGS_PRECISION) || precision--)) { + out(*(p++), buffer, idx++, maxlen); + } + // post padding + if (flags & FLAGS_LEFT) { + while (l++ < width) { + out(' ', buffer, idx++, maxlen); + } + } + format++; + break; + } + + case 'p' : { + width = sizeof(void*) * 2U; + flags |= FLAGS_ZEROPAD | FLAGS_UPPERCASE; +#if defined(PRINTF_SUPPORT_LONG_LONG) + const bool is_ll = sizeof(uintptr_t) == sizeof(long long); + if (is_ll) { + idx = _ntoa_long_long(out, buffer, idx, maxlen, (uintptr_t)va_arg(va, void*), false, 16U, precision, width, flags); + } + else { +#endif + idx = _ntoa_long(out, buffer, idx, maxlen, (unsigned long)((uintptr_t)va_arg(va, void*)), false, 16U, precision, width, flags); +#if defined(PRINTF_SUPPORT_LONG_LONG) + } +#endif + format++; + break; + } + + case '%' : + out('%', buffer, idx++, maxlen); + format++; + break; + + default : + out(*format, buffer, idx++, maxlen); + format++; + break; + } + } + + // termination + out((char)0, buffer, idx < maxlen ? idx : maxlen - 1U, maxlen); + + // return written chars without terminating \0 + return (int)idx; +} + + +/////////////////////////////////////////////////////////////////////////////// + +int printf_(const char* format, ...) +{ + va_list va; + va_start(va, format); + char buffer[1]; + const int ret = _vsnprintf(_out_char, buffer, (size_t)-1, format, va); + va_end(va); + return ret; +} + + +int sprintf_(char* buffer, const char* format, ...) +{ + va_list va; + va_start(va, format); + const int ret = _vsnprintf(_out_buffer, buffer, (size_t)-1, format, va); + va_end(va); + return ret; +} + + +int snprintf_(char* buffer, size_t count, const char* format, ...) +{ + va_list va; + va_start(va, format); + const int ret = _vsnprintf(_out_buffer, buffer, count, format, va); + va_end(va); + return ret; +} + + +int vsnprintf_(char* buffer, size_t count, const char* format, va_list va) +{ + return _vsnprintf(_out_buffer, buffer, count, format, va); +} + + +int fctprintf(void (*out)(char character, void* arg), void* arg, const char* format, ...) +{ + va_list va; + va_start(va, format); + const out_fct_wrap_type out_fct_wrap = { out, arg }; + const int ret = _vsnprintf(_out_fct, (char*)(uintptr_t)&out_fct_wrap, (size_t)-1, format, va); + va_end(va); + return ret; +} \ No newline at end of file diff --git a/sw/banshee/tests/runtime/printf.c.o b/sw/banshee/tests/runtime/printf.c.o new file mode 100644 index 00000000..64cc928e Binary files /dev/null and b/sw/banshee/tests/runtime/printf.c.o differ diff --git a/sw/banshee/tests/runtime/printf.h b/sw/banshee/tests/runtime/printf.h new file mode 100644 index 00000000..19233922 --- /dev/null +++ b/sw/banshee/tests/runtime/printf.h @@ -0,0 +1,105 @@ +/////////////////////////////////////////////////////////////////////////////// +// \author (c) Marco Paland (info@paland.com) +// 2014-2018, PALANDesign Hannover, Germany +// +// \license The MIT License (MIT) +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. +// +// \brief Tiny printf, sprintf and snprintf implementation, optimized for speed on +// embedded systems with a very limited resources. +// Use this instead of bloated standard/newlib printf. +// These routines are thread safe and reentrant. +// +/////////////////////////////////////////////////////////////////////////////// + +#ifndef _PRINTF_H_ +#define _PRINTF_H_ + +#include +#include + + +#ifdef __cplusplus +extern "C" { +#endif + + +/** + * Output a character to a custom device like UART, used by the printf() function + * This function is declared here only. You have to write your custom implementation somewhere + * \param character Character to output + */ +void _putchar(char character); + + +/** + * Tiny printf implementation + * You have to implement _putchar if you use printf() + * To avoid conflicts with the regular printf() API it is overridden by macro defines + * and internal underscore-appended functions like printf_() are used + * \param format A string that specifies the format of the output + * \return The number of characters that are written into the array, not counting the terminating null character + */ +#define printf printf_ +int printf_(const char* format, ...); + + +/** + * Tiny sprintf implementation + * Due to security reasons (buffer overflow) YOU SHOULD CONSIDER USING (V)SNPRINTF INSTEAD! + * \param buffer A pointer to the buffer where to store the formatted string. MUST be big enough to store the output! + * \param format A string that specifies the format of the output + * \return The number of characters that are WRITTEN into the buffer, not counting the terminating null character + */ +#define sprintf sprintf_ +int sprintf_(char* buffer, const char* format, ...); + + +/** + * Tiny snprintf/vsnprintf implementation + * \param buffer A pointer to the buffer where to store the formatted string + * \param count The maximum number of characters to store in the buffer, including a terminating null character + * \param format A string that specifies the format of the output + * \return The number of characters that are WRITTEN into the buffer, not counting the terminating null character + * If the formatted string is truncated the buffer size (count) is returned + */ +#define snprintf snprintf_ +#define vsnprintf vsnprintf_ +int snprintf_(char* buffer, size_t count, const char* format, ...); +int vsnprintf_(char* buffer, size_t count, const char* format, va_list va); + + +/** + * printf with output function + * You may use this as dynamic alternative to printf() with its fixed _putchar() output + * \param out An output function which takes one character and an argument pointer + * \param arg An argument pointer for user data passed to output function + * \param format A string that specifies the format of the output + * \return The number of characters that are sent to the output function, not counting the terminating null character + */ +int fctprintf(void (*out)(char character, void* arg), void* arg, const char* format, ...); + + +#ifdef __cplusplus +} +#endif + + +#endif // _PRINTF_H_ \ No newline at end of file diff --git a/sw/banshee/tests/runtime/runtime.h b/sw/banshee/tests/runtime/runtime.h new file mode 100644 index 00000000..4f4377fe --- /dev/null +++ b/sw/banshee/tests/runtime/runtime.h @@ -0,0 +1,178 @@ +// Copyright 2020 ETH Zurich and University of Bologna. +// Licensed under the Apache License, Version 2.0, see LICENSE for details. +// SPDX-License-Identifier: Apache-2.0 + +#pragma once +#include +#include + +#include "encoding.h" + +#define PULP_NOINLINE __attribute__((noinline)) + +extern uint64_t l1_alloc_base; +extern uint32_t atomic_barrier; +extern uint32_t wake_up_reg; + +typedef uint32_t pulp_id_t; +typedef uint32_t pulp_timer_t; + +/// Obtain the number of cores in the current cluster. +static inline pulp_id_t pulp_get_core_count() { + extern uint32_t nr_cores_address_reg; + return nr_cores_address_reg; +} + +/// Obtain the ID of the current core. +static inline pulp_id_t pulp_get_core_id() { + pulp_id_t r; + asm volatile("csrr %0, mhartid" : "=r"(r)); + return r; +} + +/// Obtain a monotonically increasing cycle count. +static inline pulp_timer_t pulp_get_timer() { return read_csr(mcycle); } + +/// A cluster-local barrier. +static inline void pulp_barrier() { + // // The following is a software-only barrier using AMOs. + // uint32_t core_id = pulp_get_core_id(); + // uint32_t core_count = pulp_get_core_count(); + // uint32_t mask = 1 << core_id; + // uint32_t others = ((1 << core_count) - 1) ^ mask; + // if (core_id == 0) { + // while ((__atomic_load_n(&atomic_barrier, __ATOMIC_RELAXED) & others) + // != others); + // __atomic_or_fetch(&atomic_barrier, mask, __ATOMIC_RELAXED); + // while ((__atomic_load_n(&atomic_barrier, __ATOMIC_RELAXED) & others) + // != 0); + // __atomic_and_fetch(&atomic_barrier, ~mask, __ATOMIC_RELAXED); + // } else { + // while ((__atomic_load_n(&atomic_barrier, __ATOMIC_RELAXED) & 1) != + // 0); + // __atomic_or_fetch(&atomic_barrier, mask, __ATOMIC_RELAXED); + // while ((__atomic_load_n(&atomic_barrier, __ATOMIC_RELAXED) & 1) != + // 1); + // __atomic_and_fetch(&atomic_barrier, ~mask, __ATOMIC_RELAXED); + // } + + // The following uses the hardware barrier. + extern uint32_t barrier_reg; + uint32_t tmp; + asm volatile( + "lw %[tmp], 0(%[addr]) \n" + "mv zero, %[tmp] \n" + : [ tmp ] "=r"(tmp) + : [ addr ] "r"(&barrier_reg)); +} + +/// The different SSR data movers. +enum ssr_dm { SSR_DM0 = 0, SSR_DM1 = 1 }; + +/// The different dimensions. +enum ssr_dim { + SSR_1D = 0, + SSR_2D = 1, + SSR_3D = 2, + SSR_4D = 3, +}; + +/// The SSR configuration registers. +typedef union { + uint32_t value __attribute__((aligned(8))); +} ssr_reg32_t; +typedef struct { + ssr_reg32_t status; + ssr_reg32_t repeat; + ssr_reg32_t bounds[4]; + ssr_reg32_t stride[4]; + ssr_reg32_t _reserved4[14]; + ssr_reg32_t rptr[4]; + ssr_reg32_t wptr[4]; +} ssr_cfg_t; +// extern volatile ssr_cfg_t ssr_config_reg[2]; // linker-provided address +static volatile ssr_cfg_t *const ssr_config_reg = (void *)0x204800; + +// Configure an SSR data mover for a 1D loop nest. +static inline void pulp_ssr_loop_1d(enum ssr_dm dm, uint16_t b0, uint16_t i0) { + --b0; + ssr_config_reg[dm].bounds[0].value = b0; + uint16_t a = 0; + ssr_config_reg[dm].stride[0].value = i0 - a; + a += i0 * b0; +} + +// Configure an SSR data mover for a 2D loop nest. +static inline void pulp_ssr_loop_2d(enum ssr_dm dm, uint16_t b0, uint16_t b1, + uint16_t i0, uint16_t i1) { + --b0; + --b1; + ssr_config_reg[dm].bounds[0].value = b0; + ssr_config_reg[dm].bounds[1].value = b1; + uint16_t a = 0; + ssr_config_reg[dm].stride[0].value = i0 - a; + a += i0 * b0; + ssr_config_reg[dm].stride[1].value = i1 - a; + a += i1 * b1; +} + +// Configure an SSR data mover for a 3D loop nest. +static inline void pulp_ssr_loop_3d(enum ssr_dm dm, uint16_t b0, uint16_t b1, + uint16_t b2, uint16_t i0, uint16_t i1, + uint16_t i2) { + --b0; + --b1; + --b2; + ssr_config_reg[dm].bounds[0].value = b0; + ssr_config_reg[dm].bounds[1].value = b1; + ssr_config_reg[dm].bounds[2].value = b2; + uint16_t a = 0; + ssr_config_reg[dm].stride[0].value = i0 - a; + a += i0 * b0; + ssr_config_reg[dm].stride[1].value = i1 - a; + a += i1 * b1; + ssr_config_reg[dm].stride[2].value = i2 - a; + a += i2 * b2; +} + +// Configure an SSR data mover for a 4D loop nest. +static inline void pulp_ssr_loop_4d(enum ssr_dm dm, uint16_t b0, uint16_t b1, + uint16_t b2, uint16_t b3, uint16_t i0, + uint16_t i1, uint16_t i2, uint16_t i3) { + --b0; + --b1; + --b2; + --b3; + ssr_config_reg[dm].bounds[0].value = b0; + ssr_config_reg[dm].bounds[1].value = b1; + ssr_config_reg[dm].bounds[2].value = b2; + ssr_config_reg[dm].bounds[3].value = b3; + uint16_t a = 0; + ssr_config_reg[dm].stride[0].value = i0 - a; + a += i0 * b0; + ssr_config_reg[dm].stride[1].value = i1 - a; + a += i1 * b1; + ssr_config_reg[dm].stride[2].value = i2 - a; + a += i2 * b2; + ssr_config_reg[dm].stride[3].value = i3 - a; + a += i3 * b3; +} + +/// Enable SSR. +static inline void pulp_ssr_enable() { asm volatile("csrsi 0x7C0, 1"); } + +/// Disable SSR. +static inline void pulp_ssr_disable() { asm volatile("csrci 0x7C0, 1"); } + +/// Start a streaming read. +static inline void pulp_ssr_read(enum ssr_dm dm, enum ssr_dim dim, void *ptr) { + ssr_config_reg[dm].rptr[dim].value = (uint32_t)ptr; +} + +/// Start a streaming write. +static inline void pulp_ssr_write(enum ssr_dm dm, enum ssr_dim dim, void *ptr) { + ssr_config_reg[dm].wptr[dim].value = (uint32_t)ptr; +} + +/// Synchronize the integer and float pipelines. +static inline void fpu_fence() { asm volatile("fmv.x.w zero, fa0"); } diff --git a/sw/banshee/tests/runtime/runtime.mk b/sw/banshee/tests/runtime/runtime.mk new file mode 100644 index 00000000..1d9126bd --- /dev/null +++ b/sw/banshee/tests/runtime/runtime.mk @@ -0,0 +1,34 @@ +# Copyright 2020 ETH Zurich and University of Bologna. +# Licensed under the Apache License, Version 2.0, see LICENSE for details. +# SPDX-License-Identifier: Apache-2.0 + +RISCV_XLEN ?= 32 +RISCV_ABI ?= rv$(RISCV_XLEN)imafd +RISCV_PREFIX ?= riscv$(RISCV_XLEN)-unknown-elf- +RISCV_CC ?= $(RISCV_PREFIX)gcc +RISCV_CXX ?= $(RISCV_PREFIX)g++ +RISCV_OBJDUMP ?= $(RISCV_PREFIX)objdump +RISCV_OBJCOPY ?= $(RISCV_PREFIX)objcopy +RISCV_AS ?= $(RISCV_PREFIX)as +RISCV_AR ?= $(RISCV_PREFIX)ar +RISCV_LD ?= $(RISCV_PREFIX)ld +RISCV_STRIP ?= $(RISCV_PREFIX)strip + +RISCV_FLAGS ?= -march=$(RISCV_ABI) -mno-fdiv -mcmodel=medany -static -g -std=gnu99 -O3 -ffast-math -fno-common -fno-builtin-printf -Iruntime -DITERATIONS=10 +RISCV_CCFLAGS ?= $(RISCV_FLAGS) +RISCV_CXXFLAGS ?= $(RISCV_FLAGS) +RISCV_LDFLAGS ?= -static -nostartfiles -lm -lgcc $(RISCV_FLAGS) + +PYTHON ?= python3 + +RUNTIME ?= runtime/crt0.S.o runtime/printf.c.o runtime/string.c.o runtime/serial.c.o +HDR ?= runtime/runtime.h runtime/libsdma.h + +%.S.o: %.S + $(RISCV_CC) -Iinclude $(RISCV_CCFLAGS) -c $< -o $@ + +%.c.o: %.c + $(RISCV_CC) -Iinclude $(RISCV_CCFLAGS) -c $< -o $@ + +%.cpp.o: %.cpp + $(RISCV_CXX) $(RISCV_CXXFLAGS) -c $< -o $@ diff --git a/sw/banshee/tests/runtime/serial.c b/sw/banshee/tests/runtime/serial.c new file mode 100644 index 00000000..029bffb0 --- /dev/null +++ b/sw/banshee/tests/runtime/serial.c @@ -0,0 +1,12 @@ +// Copyright 2020 ETH Zurich and University of Bologna. +// Licensed under the Apache License, Version 2.0, see LICENSE for details. +// SPDX-License-Identifier: Apache-2.0 + +#include + +extern char fake_uart; + +void _putchar(char character) { + // send char to console + fake_uart = character; +} diff --git a/sw/banshee/tests/runtime/serial.c.o b/sw/banshee/tests/runtime/serial.c.o new file mode 100644 index 00000000..3f79ed4b Binary files /dev/null and b/sw/banshee/tests/runtime/serial.c.o differ diff --git a/sw/banshee/tests/runtime/snitch.ld b/sw/banshee/tests/runtime/snitch.ld new file mode 100644 index 00000000..7ebf93ae --- /dev/null +++ b/sw/banshee/tests/runtime/snitch.ld @@ -0,0 +1,23 @@ +/* Copyright 2020 ETH Zurich and University of Bologna. */ +/* Licensed under the Apache License, Version 2.0, see LICENSE for details. */ +/* SPDX-License-Identifier: Apache-2.0 */ + +SECTIONS +{ + ROM_BASE = 0x80000000; /* ... but actually position independent */ + . = 0x0; + .l1 : { *(.l1) } + l1_alloc_base = ALIGN(0x10); + tcdm_start_address_reg = 0x40000000; + tcdm_end_address_reg = 0x40000008; + nr_cores_address_reg = 0x40000010; + fetch_enable_reg = 0x40000018; + scratch_reg = 0x40000020; + wake_up_reg = 0x40000028; + cycle_count_reg = 0x40000030; + barrier_reg = 0x40000038; + ssr_config_reg = 0x204800; + fake_uart = 0xC0000000; + . = 0xD0000000; + .eoc_address (NOLOAD): { *(.eoc_address) } +} diff --git a/sw/banshee/tests/runtime/string.c b/sw/banshee/tests/runtime/string.c new file mode 100644 index 00000000..3f766814 --- /dev/null +++ b/sw/banshee/tests/runtime/string.c @@ -0,0 +1,104 @@ +// Copyright 2020 ETH Zurich and University of Bologna. +// Licensed under the Apache License, Version 2.0, see LICENSE for details. +// SPDX-License-Identifier: Apache-2.0 + +#include +#include +#include + +void* memcpy(void* dest, const void* src, size_t len) { + if ((((uintptr_t)dest | (uintptr_t)src | len) & (sizeof(uintptr_t) - 1)) == + 0) { + const uintptr_t* s = src; + uintptr_t* d = dest; + while (d < (uintptr_t*)(dest + len)) *d++ = *s++; + } else { + const char* s = src; + char* d = dest; + while (d < (char*)(dest + len)) *d++ = *s++; + } + return dest; +} + +void* memset(void* dest, int byte, size_t len) { + if ((((uintptr_t)dest | len) & (sizeof(uintptr_t) - 1)) == 0) { + uintptr_t word = byte & 0xFF; + word |= word << 8; + word |= word << 16; + word |= word << 16 << 16; + + uintptr_t* d = dest; + while (d < (uintptr_t*)(dest + len)) *d++ = word; + } else { + char* d = dest; + while (d < (char*)(dest + len)) *d++ = byte; + } + return dest; +} + +size_t strlen(const char* s) { + const char* p = s; + while (*p) p++; + return p - s; +} + +int strcmp(const char* s1, const char* s2) { + unsigned char c1, c2; + + do { + c1 = *s1++; + c2 = *s2++; + } while (c1 != 0 && c1 == c2); + + return c1 - c2; +} + +int memcmp(const void* s1, const void* s2, size_t n) { + if ((((uintptr_t)s1 | (uintptr_t)s2) & (sizeof(uintptr_t) - 1)) == 0) { + const uintptr_t* u1 = s1; + const uintptr_t* u2 = s2; + const uintptr_t* end = u1 + (n / sizeof(uintptr_t)); + while (u1 < end) { + if (*u1 != *u2) break; + u1++; + u2++; + } + n -= (const void*)u1 - s1; + s1 = u1; + s2 = u2; + } + + while (n--) { + unsigned char c1 = *(const unsigned char*)s1++; + unsigned char c2 = *(const unsigned char*)s2++; + if (c1 != c2) return c1 - c2; + } + + return 0; +} + +char* strcpy(char* dest, const char* src) { + char* d = dest; + while ((*d++ = *src++)) + ; + return dest; +} + +long atol(const char* str) { + long res = 0; + int sign = 0; + + while (*str == ' ') str++; + + if (*str == '-' || *str == '+') { + sign = *str == '-'; + str++; + } + + while (*str) { + res *= 10; + res += *str++ - '0'; + } + + return sign ? -res : res; +} diff --git a/sw/banshee/tests/runtime/string.c.o b/sw/banshee/tests/runtime/string.c.o new file mode 100644 index 00000000..4dc39c91 Binary files /dev/null and b/sw/banshee/tests/runtime/string.c.o differ diff --git a/util/licence-checker.hjson b/util/licence-checker.hjson index 524c08ef..865762ac 100644 --- a/util/licence-checker.hjson +++ b/util/licence-checker.hjson @@ -14,6 +14,7 @@ exclude_paths: [ # Exclude anything in vendored directories '*/vendor/*', - 'util/lowrisc_misc-linters/*' + 'util/lowrisc_misc-linters/*', + 'sw/banshee/tests/runtime/printf.*' ], }