From 13ad800cc90c2131245a9bf6e2eb6c3b01fc909e Mon Sep 17 00:00:00 2001 From: Matheus Cavalcante Date: Mon, 23 Oct 2023 15:06:12 -0700 Subject: [PATCH 01/17] spatz_cluster: Parameterize the number of FPUs and IPUs --- hw/ip/spatz_cc/src/spatz_cc.sv | 2 +- hw/system/spatz_cluster/src/spatz_cluster.sv | 7 ++++++- hw/system/spatz_cluster/src/spatz_cluster_wrapper.sv.tpl | 4 ++++ util/Makefrag | 4 ++-- 4 files changed, 13 insertions(+), 4 deletions(-) diff --git a/hw/ip/spatz_cc/src/spatz_cc.sv b/hw/ip/spatz_cc/src/spatz_cc.sv index d5119c85..02790f83 100644 --- a/hw/ip/spatz_cc/src/spatz_cc.sv +++ b/hw/ip/spatz_cc/src/spatz_cc.sv @@ -70,7 +70,7 @@ module spatz_cc parameter int unsigned NumSpatzOutstandingLoads = 0, // Enable V Extension parameter bit RVV = 1, - // Spatz paramaters + // Spatz parameters parameter int unsigned NumSpatzFPUs = 4, parameter int unsigned NumSpatzIPUs = 1, /// Add isochronous clock-domain crossings e.g., make it possible to operate diff --git a/hw/system/spatz_cluster/src/spatz_cluster.sv b/hw/system/spatz_cluster/src/spatz_cluster.sv index 381dc299..d2440ee6 100644 --- a/hw/system/spatz_cluster/src/spatz_cluster.sv +++ b/hw/system/spatz_cluster/src/spatz_cluster.sv @@ -69,6 +69,9 @@ module spatz_cluster parameter int unsigned NumIntOutstandingMem [NrCores] = '{default: '0}, /// Per-core Spatz outstanding loads parameter int unsigned NumSpatzOutstandingLoads [NrCores] = '{default: '0}, + // Spatz parameters + parameter int unsigned NumSpatzFPUs [NrCores] = '{default: '0}, + parameter int unsigned NumSpatzIPUs [NrCores] = '{default: '0}, /// ## Timing Tuning Parameters /// Insert Pipeline registers into off-loading path (response) parameter bit RegisterOffloadRsp = 1'b0, @@ -736,6 +739,8 @@ module spatz_cluster .XF8 (1'b1 ), .XF8ALT (1'b1 ), .IsoCrossing (1'b0 ), + .NumSpatzFPUs (NumSpatzFPUs[i] ), + .NumSpatzIPUs (NumSpatzIPUs[i] ), .NumIntOutstandingLoads (NumIntOutstandingLoads[i] ), .NumIntOutstandingMem (NumIntOutstandingMem[i] ), .NumSpatzOutstandingLoads(NumSpatzOutstandingLoads[i]), @@ -777,7 +782,7 @@ module spatz_cluster assign wide_axi_mst_req[SDMAMst] = axi_dma_req; assign axi_dma_res = wide_axi_mst_rsp[SDMAMst]; assign dma_events = dma_core_events; - end else begin: gen_no_dma_connection + end else begin: gen_no_dma_connection assign axi_dma_res = '0; end end diff --git a/hw/system/spatz_cluster/src/spatz_cluster_wrapper.sv.tpl b/hw/system/spatz_cluster/src/spatz_cluster_wrapper.sv.tpl index 4fb6fb87..b7ab6460 100644 --- a/hw/system/spatz_cluster/src/spatz_cluster_wrapper.sv.tpl +++ b/hw/system/spatz_cluster/src/spatz_cluster_wrapper.sv.tpl @@ -311,6 +311,8 @@ module ${cfg['name']}_wrapper localparam int unsigned NumIntOutstandingLoads [NumCores] = '{${core_cfg('num_int_outstanding_loads')}}; localparam int unsigned NumIntOutstandingMem [NumCores] = '{${core_cfg('num_int_outstanding_mem')}}; localparam int unsigned NumSpatzOutstandingLoads [NumCores] = '{${core_cfg('num_spatz_outstanding_loads')}}; + localparam int unsigned NumSpatzFPUs [NumCores] = '{${cfg['n_fpu']}}; + localparam int unsigned NumSpatzIPUs [NumCores] = '{${cfg['n_ipu']}}; spatz_cluster_pkg::spatz_axi_iwc_out_req_t axi_from_cluster_iwc_req; spatz_cluster_pkg::spatz_axi_iwc_out_resp_t axi_from_cluster_iwc_resp; @@ -507,6 +509,8 @@ module ${cfg['name']}_wrapper .NumIntOutstandingLoads (NumIntOutstandingLoads), .NumIntOutstandingMem (NumIntOutstandingMem), .NumSpatzOutstandingLoads (NumSpatzOutstandingLoads), + .NumSpatzFPUs (NumSpatzFPUs), + .NumSpatzIPUs (NumSpatzIPUs), .axi_in_req_t (axi_in_req_t), .axi_in_resp_t (axi_in_resp_t), .axi_out_req_t (spatz_cluster_pkg::spatz_axi_iwc_out_req_t), diff --git a/util/Makefrag b/util/Makefrag index c0b1ca3d..974e0007 100644 --- a/util/Makefrag +++ b/util/Makefrag @@ -6,8 +6,8 @@ # Root SHELL = /usr/bin/env bash -SPATZ_DIR := $(shell git rev-parse --show-toplevel 2>/dev/null || echo $$SPATZ_DIR) -ROOT_DIR := ${SPATZ_DIR} +SPATZ_DIR ?= $(shell git rev-parse --show-toplevel 2>/dev/null || echo $$SPATZ_DIR) +ROOT_DIR ?= ${SPATZ_DIR} # Binaries INSTALL_PREFIX ?= install From ce49bd5a4cb4a64f125430fe06015e9d1eb644ce Mon Sep 17 00:00:00 2001 From: Matheus Cavalcante Date: Tue, 24 Oct 2023 16:20:33 -0700 Subject: [PATCH 02/17] tc-llvm: Compile LLVM with a generic gcc --- Makefile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Makefile b/Makefile index 189ce0e1..5dc34c54 100644 --- a/Makefile +++ b/Makefile @@ -81,8 +81,8 @@ tc-llvm: sw/toolchain/llvm-project cd sw/toolchain/llvm-project && mkdir -p build && cd build; \ $(CMAKE) \ -DCMAKE_INSTALL_PREFIX=$(LLVM_INSTALL_DIR) \ - -DCMAKE_CXX_COMPILER=g++-8.2.0 \ - -DCMAKE_C_COMPILER=gcc-8.2.0 \ + -DCMAKE_CXX_COMPILER=${CXX} \ + -DCMAKE_C_COMPILER=${CC} \ -DLLVM_OPTIMIZED_TABLEGEN=True \ -DLLVM_ENABLE_PROJECTS="clang;lld" \ -DLLVM_TARGETS_TO_BUILD="RISCV" \ From 33198b5aca9fd9faaeeccee920e534ccc19d3555 Mon Sep 17 00:00:00 2001 From: Matheus Cavalcante Date: Wed, 25 Oct 2023 13:53:24 -0700 Subject: [PATCH 03/17] spatz: Parameterize DRAM start address and length --- hw/ip/snitch_test/src/tb_lib.hh | 12 +- hw/system/spatz_cluster/Makefile | 8 +- .../test/bootdata_bootrom.cc.tpl | 12 +- hw/system/spatz_cluster/test/bootrom.S | 10 +- hw/system/spatz_cluster/test/bootrom.bin | Bin 128 -> 148 bytes hw/system/spatz_cluster/test/bootrom.dump | 174 ++++++++---------- hw/system/spatz_cluster/test/bootrom.elf | Bin 5240 -> 5184 bytes sw/snRuntime/CMakeLists.txt | 9 +- sw/snRuntime/link/common.ld.in | 3 +- .../src/platforms/shared/start_snitch.S | 12 +- 10 files changed, 114 insertions(+), 126 deletions(-) diff --git a/hw/ip/snitch_test/src/tb_lib.hh b/hw/ip/snitch_test/src/tb_lib.hh index f06b77bc..b35d0408 100644 --- a/hw/ip/snitch_test/src/tb_lib.hh +++ b/hw/ip/snitch_test/src/tb_lib.hh @@ -117,12 +117,12 @@ extern GlobalMemory MEM; // The boot data generated along with the system RTL. struct BootData { - uint32_t boot_addr; - uint32_t core_count; - uint32_t hartid_base; - uint32_t tcdm_start; - uint32_t tcdm_size; - uint32_t tcdm_offset; + uint64_t boot_addr; + uint64_t core_count; + uint64_t hartid_base; + uint64_t tcdm_start; + uint64_t tcdm_size; + uint64_t tcdm_offset; uint64_t global_mem_start; uint64_t global_mem_end; }; diff --git a/hw/system/spatz_cluster/Makefile b/hw/system/spatz_cluster/Makefile index 9977422c..d9f5bd8f 100644 --- a/hw/system/spatz_cluster/Makefile +++ b/hw/system/spatz_cluster/Makefile @@ -62,10 +62,10 @@ VLT_COBJ += $(VLT_BUILDDIR)/vlt/verilated_vcd_c.o # Prerequisites # ################# -test/bootrom.elf test/bootrom.dump test/bootrom.bin: src/generated/spatz_cluster_wrapper.sv test/bootrom.S test/bootrom.ld Makefile - riscv -riscv64-gcc-9.5.0 riscv64-unknown-elf-gcc -mabi=ilp32 -march=rv32imaf -static -nostartfiles -Ttest/bootrom.ld test/bootrom.S test/bootdata_bootrom.cc -I../../ip/snitch_test/src -I../../../sw/snRuntime/include -o test/bootrom.elf - riscv -riscv64-gcc-9.5.0 riscv64-unknown-elf-objdump -D test/bootrom.elf > test/bootrom.dump - riscv -riscv64-gcc-9.5.0 riscv64-unknown-elf-objcopy -O binary test/bootrom.elf test/bootrom.bin +test/bootrom.elf test/bootrom.dump test/bootrom.bin: src/generated/spatz_cluster_wrapper.sv test/bootrom.S test/bootrom.ld test/bootdata_bootrom.cc Makefile + ${GCC_INSTALL_DIR}/bin/riscv32-unknown-elf-gcc -mabi=ilp32 -march=rv32imaf -static -nostartfiles -Ttest/bootrom.ld test/bootrom.S test/bootdata_bootrom.cc -I../../ip/snitch_test/src -I../../../sw/snRuntime/include -I../../../sw/toolchain/riscv-opcodes/ -o test/bootrom.elf + ${GCC_INSTALL_DIR}/bin/riscv32-unknown-elf-objdump -D test/bootrom.elf > test/bootrom.dump + ${GCC_INSTALL_DIR}/bin/riscv32-unknown-elf-objcopy -O binary test/bootrom.elf test/bootrom.bin .PHONY: bootrom bootrom: $(CURDIR)/src/generated/bootrom.sv diff --git a/hw/system/spatz_cluster/test/bootdata_bootrom.cc.tpl b/hw/system/spatz_cluster/test/bootdata_bootrom.cc.tpl index 77aa6363..6d630baa 100644 --- a/hw/system/spatz_cluster/test/bootdata_bootrom.cc.tpl +++ b/hw/system/spatz_cluster/test/bootdata_bootrom.cc.tpl @@ -6,12 +6,12 @@ // The boot data generated along with the system RTL. struct BootData { - uint32_t boot_addr; - uint32_t core_count; - uint32_t hartid_base; - uint32_t tcdm_start; - uint32_t tcdm_size; - uint32_t tcdm_offset; + uint64_t boot_addr; + uint64_t core_count; + uint64_t hartid_base; + uint64_t tcdm_start; + uint64_t tcdm_size; + uint64_t tcdm_offset; uint64_t global_mem_start; uint64_t global_mem_end; }; diff --git a/hw/system/spatz_cluster/test/bootrom.S b/hw/system/spatz_cluster/test/bootrom.S index 726ea13a..bf608fe3 100644 --- a/hw/system/spatz_cluster/test/bootrom.S +++ b/hw/system/spatz_cluster/test/bootrom.S @@ -3,6 +3,7 @@ // SPDX-License-Identifier: SHL-0.51 #include +#include .global _start .global BOOTDATA @@ -12,13 +13,18 @@ _start: csrw mtvec, t1 csrr a0, mhartid la a1, BOOTDATA + + // Activate MEIP + li t1, MIP_MEIP + csrw mie, t1 + // Wait for the wakeup interrupt wfi // Load the start address of the TCDM - lw t2, 12(a1) + lw t2, 24(a1) // Load the size of the TCDM - lw t3, 16(a1) + lw t3, 32(a1) // Final address of the TCDM add t2, t2, t3 diff --git a/hw/system/spatz_cluster/test/bootrom.bin b/hw/system/spatz_cluster/test/bootrom.bin index 913f65653e0d05c12b68f0293e79d1cf5b98a3cf..30047bccdbbee597681560616608f08c9dee388e 100755 GIT binary patch literal 148 zcmWe*pm}jCBl9{|rp?WV z87DV4vrcYiW@ui_%#hx|%m9+l|8W050|*F!Nd`s;g+xLn7?_a6z)S^*01KFQ0V)Fk Dg9jC( literal 128 zcmWesT2#Hy>u4+}zAMxtW=v vc`-9XdIK{9NIw6={r?OC3=B*RFaYErcnvU7Mj*oiBn71%fb9Q30OA7xF^3rQ diff --git a/hw/system/spatz_cluster/test/bootrom.dump b/hw/system/spatz_cluster/test/bootrom.dump index 53c34fc5..47f07df1 100644 --- a/hw/system/spatz_cluster/test/bootrom.dump +++ b/hw/system/spatz_cluster/test/bootrom.dump @@ -6,115 +6,103 @@ Disassembly of section .text: 00001000 <_start>: 1000: 00000317 auipc t1,0x0 - 1004: 07032303 lw t1,112(t1) # 1070 <_GLOBAL_OFFSET_TABLE_+0x4> + 1004: 04430313 addi t1,t1,68 # 1044 1008: 30531073 csrw mtvec,t1 100c: f1402573 csrr a0,mhartid 1010: 00000597 auipc a1,0x0 - 1014: 0645a583 lw a1,100(a1) # 1074 <_GLOBAL_OFFSET_TABLE_+0x8> - 1018: 10500073 wfi - 101c: 00c5a383 lw t2,12(a1) - 1020: 0105ae03 lw t3,16(a1) - 1024: 01c383b3 add t2,t2,t3 - 1028: 05838393 addi t2,t2,88 - 102c: 00038393 mv t2,t2 - 1030: 0003a383 lw t2,0(t2) - 1034: 00038067 jr t2 - -00001038 : - 1038: 10500073 wfi - 103c: ffdff06f j 1038 + 1014: 04058593 addi a1,a1,64 # 1050 + 1018: 00001337 lui t1,0x1 + 101c: 80030313 addi t1,t1,-2048 # 800 <_start-0x800> + 1020: 30431073 csrw 0x304,t1 + 1024: 10500073 wfi + 1028: 0185a383 lw t2,24(a1) + 102c: 0205ae03 lw t3,32(a1) + 1030: 01c383b3 add t2,t2,t3 + 1034: 05838393 addi t2,t2,88 + 1038: 00038393 mv t2,t2 + 103c: 0003a383 lw t2,0(t2) + 1040: 00038067 jr t2 + +00001044 : + 1044: 10500073 wfi + 1048: ffdff06f j 1044 Disassembly of section .rodata: -00001040 : - 1040: 1000 .2byte 0x1000 - 1042: 0000 .2byte 0x0 - 1044: 0002 .2byte 0x2 +00001050 : + 1050: 1000 addi s0,sp,32 + 1052: 0000 unimp + 1054: 0000 unimp + 1056: 0000 unimp + 1058: 0001 nop ... - 104e: 0010 .2byte 0x10 - 1050: 0000 .2byte 0x0 - 1052: 0002 .2byte 0x2 - 1054: 0000 .2byte 0x0 - 1056: 0000 .2byte 0x0 - 1058: 0000 .2byte 0x0 - 105a: 8000 .2byte 0x8000 + 106a: 0010 addi a2,sp,0 + 106c: 0000 unimp + 106e: 0000 unimp + 1070: 0000 unimp + 1072: 0002 0x2 ... - 1064: 0001 .2byte 0x1 + 1080: 0000 unimp + 1082: 2000 fld fs0,0(s0) + 1084: 0000 unimp + 1086: 0000 unimp + 1088: 0000 unimp + 108a: 2004 fld fs1,0(s0) + 108c: 0000 unimp ... Disassembly of section .boot_section: -00001068 : - 1068: 1038 .2byte 0x1038 +00001090 : + 1090: 1044 addi s1,sp,36 ... -Disassembly of section .got: - -0000106c <_GLOBAL_OFFSET_TABLE_>: - 106c: 0000 .2byte 0x0 - 106e: 0000 .2byte 0x0 - 1070: 1038 .2byte 0x1038 - 1072: 0000 .2byte 0x0 - 1074: 1040 .2byte 0x1040 +Disassembly of section .Pulp_Chip.Info: + +00000000 <.Pulp_Chip.Info>: + 0: 000a 0xa + 2: 0000 unimp + 4: 0038 addi a4,sp,8 + 6: 0000 unimp + 8: 0001 nop + a: 0000 unimp + c: 7550 flw fa2,44(a0) + e: 706c flw fa1,100(s0) + 10: 495f 666e 006f 0x6f666e495f + 16: 70696863 bltu s2,t1,726 <_start-0x8da> + 1a: 6e3d lui t3,0xf + 1c: 20656e6f jal t3,56222 + 20: 3d757063 bleu s7,a0,3e0 <_start-0xc20> + 24: 6f6e flw ft10,216(sp) + 26: 656e flw fa0,216(sp) + 28: 7020 flw fs0,96(s0) + 2a: 3d65 jal fffffee2 + 2c: 312d jal fffffc56 + 2e: 6620 flw fs0,72(a2) + 30: 312d3d63 p.bneimm s10,-14,34a <_start-0xcb6> + 34: 6c20 flw fs0,88(s0) + 36: 3d32 fld fs10,296(sp) + 38: 312d jal fffffc62 + 3a: 6c20 flw fs0,88(s0) + 3c: 6331 lui t1,0xc + 3e: 3d6c fld fa1,248(a0) + 40: 312d jal fffffc6a + 42: 6c20 flw fs0,88(s0) + 44: 6631 lui a2,0xc + 46: 312d3d63 p.bneimm s10,-14,360 <_start-0xca0> + 4a: 0000 unimp ... -Disassembly of section .got.plt: - -00001078 <.got.plt>: - 1078: ffff .2byte 0xffff - 107a: ffff .2byte 0xffff - 107c: 0000 .2byte 0x0 - ... - -Disassembly of section .riscv.attributes: - -00000000 <.riscv.attributes>: - 0: 4341 .2byte 0x4341 - 2: 0000 .2byte 0x0 - 4: 7200 .2byte 0x7200 - 6: 7369 .2byte 0x7369 - 8: 01007663 bgeu zero,a6,14 <_start-0xfec> - c: 0039 .2byte 0x39 - e: 0000 .2byte 0x0 - 10: 1004 .2byte 0x1004 - 12: 7205 .2byte 0x7205 - 14: 3376 .2byte 0x3376 - 16: 6932 .2byte 0x6932 - 18: 7032 .2byte 0x7032 - 1a: 5f31 .2byte 0x5f31 - 1c: 326d .2byte 0x326d - 1e: 3070 .2byte 0x3070 - 20: 615f 7032 5f31 .byte 0x5f, 0x61, 0x32, 0x70, 0x31, 0x5f - 26: 3266 .2byte 0x3266 - 28: 3270 .2byte 0x3270 - 2a: 7a5f 6369 7273 .byte 0x5f, 0x7a, 0x69, 0x63, 0x73, 0x72 - 30: 7032 .2byte 0x7032 - 32: 5f30 .2byte 0x5f30 - 34: 697a .2byte 0x697a - 36: 6566 .2byte 0x6566 - 38: 636e .2byte 0x636e - 3a: 6965 .2byte 0x6965 - 3c: 7032 .2byte 0x7032 - 3e: 0030 .2byte 0x30 - 40: 0108 .2byte 0x108 - 42: 0b0a .2byte 0xb0a - Disassembly of section .comment: 00000000 <.comment>: - 0: 3a434347 .4byte 0x3a434347 - 4: 2820 .2byte 0x2820 - 6: 736f7263 bgeu t5,s6,72a <_start-0x8d6> - a: 6f6f7473 csrrci s0,0x6f6,30 - e: 2d6c .2byte 0x2d6c - 10: 474e .2byte 0x474e - 12: 3120 .2byte 0x3120 - 14: 322e .2byte 0x322e - 16: 2e35 .2byte 0x2e35 - 18: 2e30 .2byte 0x2e30 - 1a: 3538 .2byte 0x3538 - 1c: 365f 6331 6334 .byte 0x5f, 0x36, 0x31, 0x63, 0x34, 0x63 - 22: 20296163 bltu s2,sp,224 <_start-0xddc> - 26: 2e39 .2byte 0x2e39 - 28: 2e35 .2byte 0x2e35 - 2a: 0030 .2byte 0x30 + 0: 3a434347 fmsub.d ft6,ft6,ft4,ft7,rmm + 4: 2820 fld fs0,80(s0) + 6: 29554e47 fmsub.s ft8,fa0,fs5,ft5,rmm + a: 3720 fld fs0,104(a4) + c: 312e fld ft2,232(sp) + e: 312e fld ft2,232(sp) + 10: 3220 fld fs0,96(a2) + 12: 3130 fld fa2,96(a0) + 14: 30353037 lui zero,0x30353 + 18: 0039 c.addi zero,14 diff --git a/hw/system/spatz_cluster/test/bootrom.elf b/hw/system/spatz_cluster/test/bootrom.elf index 7d6ff8b6f8706095566fb450ab5fe072990b26d0..887443af217f8f3404d1d9659b0b7d8b5bb005a3 100755 GIT binary patch literal 5184 zcmeHLO=}ZD7=AaKXcZ4zdl2-p9+dXrb`v7eLZNlrhC*lqtv6vwH`_LlWS89yw0N+v z6t6-G{(#=S_!ImS-UNSv#j6nCXJ#jcc=70AUYLE}k0-M;&%ArcyxVT=7=}U0O7xRN zWD(Ul`Srf6)XC=jGTottX`PsZam9df8&u@9X$7m zKr8THDe%A~S~9(IY8|-glZ~5?6{7KFrQBFT-n>XH&rPXq<=2;n`KeO+oPROKd0rXk zCgs;Aom`lNdi3MlL~N}1|NrMkL2XxE43tztp)H-Vz%8cT<-`jfn4>-F$Nui{IHKUR z7q1VaVQ2^ONIG%2{;X;r2YB{t;#m#)5=E9Lpr+@o*~`tn*DH3zsXA4==2jc-bN3~R ze_OFkQ{Hm$&LqVox$Y0sn1!1$lgcv@kx7+e2AM9?&18{X>`$6OkJwT3;TKO;i4puY z#t9qJ7RTOP+>!-6xo*$_<1GKpTztYY_Orpb$oq-s>a>~-;?*G7?f2@9krPpP7KCxu zi-r^qv*g_Gbh`;1MN!u6WE~$)5)GUnpctp*w`zsc-X=fII!RW~%W*c8i3^fNRsKFXMl#lp|d3$gQ zQ?(|?@ZBQ+op}!nAAp(~vtQOP=2=|8ogBU=tbtQ}e<#<*AnYFB@4Zz4Bj$pkS7$r? MJ@eY4A|HzP8yWab6n?IowzNp3Y!G4-M3GtvMrqnYi&!+x-zGFj4Psa0Ox!k7Qd{;=(r$?4 zBXq$5DzGnA z7zPXjh5^HXVZbn882E1t+~J>j`++?h<@buw!|kgNIimPuZ6>0Z`S{{H*`IRO=lH92 z9>=-!*rxcxro)j_zf~tSxq@ z^SSV3t<)<8tInVl6df1dK`AIXXFVB(NS*Z#ykqGxrbu(v{6eErc~)4Kp&v!c_xq1` z8--O-+7Lytx#2unmFrTvD}^n=8MORUvbEXi%>`k75IcL9qjAh+vGm6=Zaj#F@9iACq^i!c)iXc0=m;zUsQlb>M{lKuAeW zquJUiH=S0!zE^8I?eb2u=8zL9H&nFKYPGB7c9}>-rvv47*s3tKN7_@~P>~3I%uE7P zJCP@o-r~^b54*#W}_wDNuv9l z01l@ZJZY{pZZ30H*aC+Wf&GE%IRHTo@|0EMurJVL&y#982IHPj;BcOxx)-3pKx?er zGhma*h;daNu+@EEXMQoOePi6C-*CV}$6qY_myFAE@&@}ji|EOzBu-C12N1p&+)usa RsrxQ+G>O}1+?&7PegUR-lgj`A diff --git a/sw/snRuntime/CMakeLists.txt b/sw/snRuntime/CMakeLists.txt index 1603f5f5..8daa4a7f 100644 --- a/sw/snRuntime/CMakeLists.txt +++ b/sw/snRuntime/CMakeLists.txt @@ -25,13 +25,8 @@ endif() add_compile_options(-O3 -g -ffunction-sections) # Default memory regions -if(SNITCH_RUNTIME STREQUAL "snRuntime-cluster") - set(MEM_DRAM_ORIGIN "0x80000000" CACHE STRING "Base address of external memory") - set(MEM_DRAM_SIZE "0x80000000" CACHE STRING "Size of external memory") -else() - set(MEM_DRAM_ORIGIN "0x80000000" CACHE STRING "Base address of external memory") - set(MEM_DRAM_SIZE "256M" CACHE STRING "Size of external memory") -endif() +set(MEM_DRAM_ORIGIN "0x80000000" CACHE STRING "Base address of external memory") +set(MEM_DRAM_SIZE "0x80000000" CACHE STRING "Size of external memory") configure_file(${CMAKE_CURRENT_SOURCE_DIR}/link/common.ld.in common.ld @ONLY) set(LINKER_SCRIPT ${CMAKE_CURRENT_BINARY_DIR}/common.ld CACHE PATH "") diff --git a/sw/snRuntime/link/common.ld.in b/sw/snRuntime/link/common.ld.in index 41ec54d8..f51ff94a 100644 --- a/sw/snRuntime/link/common.ld.in +++ b/sw/snRuntime/link/common.ld.in @@ -16,11 +16,10 @@ SECTIONS .text : { . = ALIGN(4); - *(.text.init) + *(.init) *(.text.startup) *(.text) *(.text*) - *(.text) . = ALIGN(4); _etext = .; } >DRAM diff --git a/sw/snRuntime/src/platforms/shared/start_snitch.S b/sw/snRuntime/src/platforms/shared/start_snitch.S index a60e5040..d48ab060 100644 --- a/sw/snRuntime/src/platforms/shared/start_snitch.S +++ b/sw/snRuntime/src/platforms/shared/start_snitch.S @@ -14,11 +14,11 @@ _snrt_init_core_info: .globl _snrt_init_core_info mv a4, a1 - lw a1, 4(a4) # load the number of cores per cluster - lw t0, 8(a4) # load cluster's hartid offset - lw a2, 12(a4) # start address of the TCDM - lw t1, 16(a4) # size of TCDM address space - lw t2, 20(a4) # offset between cluster TCDMs + lw a1, 8(a4) # load the number of cores per cluster + lw t0, 16(a4) # load cluster's hartid offset + lw a2, 24(a4) # start address of the TCDM + lw t1, 32(a4) # size of TCDM address space + lw t2, 40(a4) # offset between cluster TCDMs sub a0, a0, t0 div t3, a0, a1 # calculate cluster index mul t4, t3, t2 # calculate cluster TCDM offset @@ -45,5 +45,5 @@ _snrt_cluster_barrier: # a0 = bootdata->hartid_base _snrt_get_base_hartid: .globl _snrt_get_base_hartid - lw a0, 8(a0) # load cluster's hartid offset + lw a0, 16(a0) # load cluster's hartid offset ret From 5d81c8cf81a158e311adf749684ba4650e3b1c90 Mon Sep 17 00:00:00 2001 From: Matheus Cavalcante Date: Wed, 25 Oct 2023 15:35:53 -0700 Subject: [PATCH 04/17] sw: Allow externally-defined platforms --- sw/snRuntime/CMakeLists.txt | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/sw/snRuntime/CMakeLists.txt b/sw/snRuntime/CMakeLists.txt index 8daa4a7f..6940d2da 100644 --- a/sw/snRuntime/CMakeLists.txt +++ b/sw/snRuntime/CMakeLists.txt @@ -24,6 +24,9 @@ endif() add_compile_options(-O3 -g -ffunction-sections) +# Platform sources +set(PLATFORM_SOURCE_FOLDER "src/platforms/standalone" CACHE STRING "Path to the platform-specific sources") + # Default memory regions set(MEM_DRAM_ORIGIN "0x80000000" CACHE STRING "Base address of external memory") set(MEM_DRAM_SIZE "0x80000000" CACHE STRING "Size of external memory") @@ -62,8 +65,8 @@ set(sources # platform specific sources set(standalone_snitch_sources - src/platforms/standalone/start_snitch.S - src/platforms/standalone/putchar.c + ${PLATFORM_SOURCE_FOLDER}/start_snitch.S + ${PLATFORM_SOURCE_FOLDER}/putchar.c ) # Sources only compatible with the LLVM toolchain From 3833e19bfec0e3bfd3a54ddc7e92569c7dd3a07d Mon Sep 17 00:00:00 2001 From: Matheus Cavalcante Date: Tue, 7 Nov 2023 15:24:15 -0800 Subject: [PATCH 05/17] sw: Fix platform's `bootdata` struct --- sw/snRuntime/src/platforms/shared/start.c | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/sw/snRuntime/src/platforms/shared/start.c b/sw/snRuntime/src/platforms/shared/start.c index e7ab2cfb..1ca0b7b3 100644 --- a/sw/snRuntime/src/platforms/shared/start.c +++ b/sw/snRuntime/src/platforms/shared/start.c @@ -8,19 +8,18 @@ extern const uint32_t _snrt_cluster_cluster_core_num; extern const uint32_t _snrt_cluster_cluster_base_hartid; extern const uint32_t _snrt_cluster_cluster_id; -void *const _snrt_cluster_global_offset = (void *)0x10000000; const uint32_t snrt_stack_size __attribute__((weak, section(".rodata"))) = 10; // The boot data generated along with the system RTL. // See `ip/test/src/tb_lib.hh` for details. struct snrt_cluster_bootdata { - uint32_t boot_addr; - uint32_t core_count; - uint32_t hartid_base; - uint32_t tcdm_start; - uint32_t tcdm_size; - uint32_t tcdm_offset; + uint64_t boot_addr; + uint64_t core_count; + uint64_t hartid_base; + uint64_t tcdm_start; + uint64_t tcdm_size; + uint64_t tcdm_offset; uint64_t global_mem_start; uint64_t global_mem_end; }; @@ -51,8 +50,7 @@ void _snrt_init_team(uint32_t cluster_core_id, uint32_t cluster_core_num, team->cluster_num = 0; team->cluster_core_base_hartid = bootdata->hartid_base; team->cluster_core_num = cluster_core_num; - team->global_mem.start = - (uint64_t)(bootdata->global_mem_start + _snrt_cluster_global_offset); + team->global_mem.start = (uint64_t)bootdata->global_mem_start; team->global_mem.end = (uint64_t)bootdata->global_mem_end; team->cluster_mem.start = (uint64_t)spm_start; team->cluster_mem.end = (uint64_t)spm_start + bootdata->tcdm_size; From f2f1939b517e9e980b874bf95cf3376ed1370970 Mon Sep 17 00:00:00 2001 From: Matheus Cavalcante Date: Tue, 14 Nov 2023 10:12:07 -0800 Subject: [PATCH 06/17] sw: Increase the length multiplier --- sw/riscvTests/isa/rv64uv/vadd.c | 236 +++++++++--------- sw/riscvTests/isa/rv64uv/vand.c | 232 +++++++++--------- sw/riscvTests/isa/rv64uv/vdiv.c | 160 ++++++------ sw/riscvTests/isa/rv64uv/vdivu.c | 160 ++++++------ sw/riscvTests/isa/rv64uv/vfadd.c | 236 +++++++++--------- sw/riscvTests/isa/rv64uv/vfcvt.c | 324 ++++++++++++------------- sw/riscvTests/isa/rv64uv/vfmacc.c | 132 +++++----- sw/riscvTests/isa/rv64uv/vfmadd.c | 132 +++++----- sw/riscvTests/isa/rv64uv/vfmax.c | 120 ++++----- sw/riscvTests/isa/rv64uv/vfmin.c | 120 ++++----- sw/riscvTests/isa/rv64uv/vfmsac.c | 132 +++++----- sw/riscvTests/isa/rv64uv/vfmsub.c | 132 +++++----- sw/riscvTests/isa/rv64uv/vfmul.c | 120 ++++----- sw/riscvTests/isa/rv64uv/vfmv.c | 24 +- sw/riscvTests/isa/rv64uv/vfncvt.c | 128 +++++----- sw/riscvTests/isa/rv64uv/vfnmacc.c | 132 +++++----- sw/riscvTests/isa/rv64uv/vfnmadd.c | 132 +++++----- sw/riscvTests/isa/rv64uv/vfnmsac.c | 132 +++++----- sw/riscvTests/isa/rv64uv/vfnmsub.c | 132 +++++----- sw/riscvTests/isa/rv64uv/vfrsub.c | 54 ++--- sw/riscvTests/isa/rv64uv/vfsgnj.c | 150 ++++++------ sw/riscvTests/isa/rv64uv/vfsgnjn.c | 120 ++++----- sw/riscvTests/isa/rv64uv/vfsgnjx.c | 120 ++++----- sw/riscvTests/isa/rv64uv/vfsub.c | 120 ++++----- sw/riscvTests/isa/rv64uv/vfwadd.c | 128 +++++----- sw/riscvTests/isa/rv64uv/vfwmacc.c | 72 +++--- sw/riscvTests/isa/rv64uv/vfwmsac.c | 72 +++--- sw/riscvTests/isa/rv64uv/vfwmul.c | 64 ++--- sw/riscvTests/isa/rv64uv/vfwnmacc.c | 72 +++--- sw/riscvTests/isa/rv64uv/vfwnmsac.c | 72 +++--- sw/riscvTests/isa/rv64uv/vfwsub.c | 128 +++++----- sw/riscvTests/isa/rv64uv/vmacc.c | 176 +++++++------- sw/riscvTests/isa/rv64uv/vmadd.c | 176 +++++++------- sw/riscvTests/isa/rv64uv/vmax.c | 138 +++++------ sw/riscvTests/isa/rv64uv/vmaxu.c | 138 +++++------ sw/riscvTests/isa/rv64uv/vmin.c | 138 +++++------ sw/riscvTests/isa/rv64uv/vminu.c | 138 +++++------ sw/riscvTests/isa/rv64uv/vmul.c | 160 ++++++------ sw/riscvTests/isa/rv64uv/vmulh.c | 160 ++++++------ sw/riscvTests/isa/rv64uv/vmulhsu.c | 160 ++++++------ sw/riscvTests/isa/rv64uv/vmulhu.c | 160 ++++++------ sw/riscvTests/isa/rv64uv/vmv.c | 96 ++++---- sw/riscvTests/isa/rv64uv/vnmsac.c | 176 +++++++------- sw/riscvTests/isa/rv64uv/vnmsub.c | 176 +++++++------- sw/riscvTests/isa/rv64uv/vor.c | 232 +++++++++--------- sw/riscvTests/isa/rv64uv/vredand.c | 72 +++--- sw/riscvTests/isa/rv64uv/vredmax.c | 72 +++--- sw/riscvTests/isa/rv64uv/vredmaxu.c | 104 ++++---- sw/riscvTests/isa/rv64uv/vredmin.c | 72 +++--- sw/riscvTests/isa/rv64uv/vredminu.c | 72 +++--- sw/riscvTests/isa/rv64uv/vredor.c | 72 +++--- sw/riscvTests/isa/rv64uv/vredsum.c | 152 ++++++------ sw/riscvTests/isa/rv64uv/vredxor.c | 32 +-- sw/riscvTests/isa/rv64uv/vrem.c | 160 ++++++------ sw/riscvTests/isa/rv64uv/vremu.c | 160 ++++++------ sw/riscvTests/isa/rv64uv/vrsub.c | 144 +++++------ sw/riscvTests/isa/rv64uv/vslide1down.c | 112 ++++----- sw/riscvTests/isa/rv64uv/vslide1up.c | 80 +++--- sw/riscvTests/isa/rv64uv/vslidedown.c | 64 ++--- sw/riscvTests/isa/rv64uv/vslideup.c | 164 ++++++------- sw/riscvTests/isa/rv64uv/vsll.c | 160 ++++++------ sw/riscvTests/isa/rv64uv/vsra.c | 160 ++++++------ sw/riscvTests/isa/rv64uv/vsrl.c | 160 ++++++------ sw/riscvTests/isa/rv64uv/vsub.c | 164 ++++++------- sw/riscvTests/isa/rv64uv/vwmacc.c | 144 +++++------ sw/riscvTests/isa/rv64uv/vwmaccsu.c | 144 +++++------ sw/riscvTests/isa/rv64uv/vwmaccu.c | 144 +++++------ sw/riscvTests/isa/rv64uv/vwmaccus.c | 72 +++--- sw/riscvTests/isa/rv64uv/vxor.c | 232 +++++++++--------- 69 files changed, 4612 insertions(+), 4612 deletions(-) diff --git a/sw/riscvTests/isa/rv64uv/vadd.c b/sw/riscvTests/isa/rv64uv/vadd.c index b2353d3c..cfe9eac0 100644 --- a/sw/riscvTests/isa/rv64uv/vadd.c +++ b/sw/riscvTests/isa/rv64uv/vadd.c @@ -8,182 +8,182 @@ #include "vector_macros.h" void TEST_CASE1(void) { - VSET(16, e8, m2); - VLOAD_8(v2, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); - VLOAD_8(v4, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); - asm volatile("vadd.vv v6, v2, v4"); - VCMP_U8(1, v6, 2, 4, 6, 8, 10, 12, 14, 16, 2, 4, 6, 8, 10, 12, 14, 16); - - VSET(16, e16, m2); - VLOAD_16(v2, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); - VLOAD_16(v4, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); - asm volatile("vadd.vv v6, v2, v4"); - VCMP_U16(2, v6, 2, 4, 6, 8, 10, 12, 14, 16, 2, 4, 6, 8, 10, 12, 14, 16); - - VSET(16, e32, m2); - VLOAD_32(v2, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); - VLOAD_32(v4, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); - asm volatile("vadd.vv v6, v2, v4"); - VCMP_U32(3, v6, 2, 4, 6, 8, 10, 12, 14, 16, 2, 4, 6, 8, 10, 12, 14, 16); + VSET(16, e8, m8); + VLOAD_8(v8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_8(v16, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + asm volatile("vadd.vv v24, v8, v16"); + VCMP_U8(1, v24, 2, 4, 6, 8, 10, 12, 14, 16, 2, 4, 6, 8, 10, 12, 14, 16); + + VSET(16, e16, m8); + VLOAD_16(v8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_16(v16, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + asm volatile("vadd.vv v24, v8, v16"); + VCMP_U16(2, v24, 2, 4, 6, 8, 10, 12, 14, 16, 2, 4, 6, 8, 10, 12, 14, 16); + + VSET(16, e32, m8); + VLOAD_32(v8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_32(v16, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + asm volatile("vadd.vv v24, v8, v16"); + VCMP_U32(3, v24, 2, 4, 6, 8, 10, 12, 14, 16, 2, 4, 6, 8, 10, 12, 14, 16); #if ELEN == 64 - VSET(16, e64, m2); - VLOAD_64(v2, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); - VLOAD_64(v4, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); - asm volatile("vadd.vv v6, v2, v4"); - VCMP_U64(4, v6, 2, 4, 6, 8, 10, 12, 14, 16, 2, 4, 6, 8, 10, 12, 14, 16); + VSET(16, e64, m8); + VLOAD_64(v8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_64(v16, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + asm volatile("vadd.vv v24, v8, v16"); + VCMP_U64(4, v24, 2, 4, 6, 8, 10, 12, 14, 16, 2, 4, 6, 8, 10, 12, 14, 16); #endif } void TEST_CASE2(void) { - VSET(16, e8, m2); - VLOAD_8(v2, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); - VLOAD_8(v4, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VSET(16, e8, m8); + VLOAD_8(v8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_8(v16, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); VLOAD_8(v0, 0xAA, 0xAA); - VCLEAR(v6); - asm volatile("vadd.vv v6, v2, v4, v0.t"); - VCMP_U8(5, v6, 0, 4, 0, 8, 0, 12, 0, 16, 0, 4, 0, 8, 0, 12, 0, 16); + VCLEAR(v24); + asm volatile("vadd.vv v24, v8, v16, v0.t"); + VCMP_U8(5, v24, 0, 4, 0, 8, 0, 12, 0, 16, 0, 4, 0, 8, 0, 12, 0, 16); - VSET(16, e16, m2); - VLOAD_16(v2, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); - VLOAD_16(v4, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VSET(16, e16, m8); + VLOAD_16(v8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_16(v16, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); VLOAD_8(v0, 0xAA, 0xAA); - VCLEAR(v6); - asm volatile("vadd.vv v6, v2, v4, v0.t"); - VCMP_U16(6, v6, 0, 4, 0, 8, 0, 12, 0, 16, 0, 4, 0, 8, 0, 12, 0, 16); + VCLEAR(v24); + asm volatile("vadd.vv v24, v8, v16, v0.t"); + VCMP_U16(6, v24, 0, 4, 0, 8, 0, 12, 0, 16, 0, 4, 0, 8, 0, 12, 0, 16); - VSET(16, e32, m2); - VLOAD_32(v2, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); - VLOAD_32(v4, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VSET(16, e32, m8); + VLOAD_32(v8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_32(v16, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); VLOAD_8(v0, 0xAA, 0xAA); - VCLEAR(v6); - asm volatile("vadd.vv v6, v2, v4, v0.t"); - VCMP_U32(7, v6, 0, 4, 0, 8, 0, 12, 0, 16, 0, 4, 0, 8, 0, 12, 0, 16); + VCLEAR(v24); + asm volatile("vadd.vv v24, v8, v16, v0.t"); + VCMP_U32(7, v24, 0, 4, 0, 8, 0, 12, 0, 16, 0, 4, 0, 8, 0, 12, 0, 16); #if ELEN == 64 - VSET(16, e64, m2); - VLOAD_64(v2, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); - VLOAD_64(v4, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VSET(16, e64, m8); + VLOAD_64(v8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_64(v16, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); VLOAD_8(v0, 0xAA, 0xAA); - VCLEAR(v6); - asm volatile("vadd.vv v6, v2, v4, v0.t"); - VCMP_U64(8, v6, 0, 4, 0, 8, 0, 12, 0, 16, 0, 4, 0, 8, 0, 12, 0, 16); + VCLEAR(v24); + asm volatile("vadd.vv v24, v8, v16, v0.t"); + VCMP_U64(8, v24, 0, 4, 0, 8, 0, 12, 0, 16, 0, 4, 0, 8, 0, 12, 0, 16); #endif } void TEST_CASE3(void) { - VSET(16, e8, m2); - VLOAD_8(v2, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); - asm volatile("vadd.vi v6, v2, 5"); - VCMP_U8(9, v6, 6, 7, 8, 9, 10, 11, 12, 13, 6, 7, 8, 9, 10, 11, 12, 13); + VSET(16, e8, m8); + VLOAD_8(v8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + asm volatile("vadd.vi v24, v8, 5"); + VCMP_U8(9, v24, 6, 7, 8, 9, 10, 11, 12, 13, 6, 7, 8, 9, 10, 11, 12, 13); - VSET(16, e16, m2); - VLOAD_16(v2, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); - asm volatile("vadd.vi v6, v2, 5"); - VCMP_U16(10, v6, 6, 7, 8, 9, 10, 11, 12, 13, 6, 7, 8, 9, 10, 11, 12, 13); + VSET(16, e16, m8); + VLOAD_16(v8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + asm volatile("vadd.vi v24, v8, 5"); + VCMP_U16(10, v24, 6, 7, 8, 9, 10, 11, 12, 13, 6, 7, 8, 9, 10, 11, 12, 13); - VSET(16, e32, m2); - VLOAD_32(v2, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); - asm volatile("vadd.vi v6, v2, 5"); - VCMP_U32(11, v6, 6, 7, 8, 9, 10, 11, 12, 13, 6, 7, 8, 9, 10, 11, 12, 13); + VSET(16, e32, m8); + VLOAD_32(v8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + asm volatile("vadd.vi v24, v8, 5"); + VCMP_U32(11, v24, 6, 7, 8, 9, 10, 11, 12, 13, 6, 7, 8, 9, 10, 11, 12, 13); #if ELEN == 64 - VSET(16, e64, m2); - VLOAD_64(v2, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); - asm volatile("vadd.vi v6, v2, 5"); - VCMP_U64(12, v6, 6, 7, 8, 9, 10, 11, 12, 13, 6, 7, 8, 9, 10, 11, 12, 13); + VSET(16, e64, m8); + VLOAD_64(v8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + asm volatile("vadd.vi v24, v8, 5"); + VCMP_U64(12, v24, 6, 7, 8, 9, 10, 11, 12, 13, 6, 7, 8, 9, 10, 11, 12, 13); #endif } void TEST_CASE4(void) { - VSET(16, e8, m2); - VLOAD_8(v2, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VSET(16, e8, m8); + VLOAD_8(v8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); VLOAD_8(v0, 0xAA, 0xAA); - VCLEAR(v6); - asm volatile("vadd.vi v6, v2, 5, v0.t"); - VCMP_U8(13, v6, 0, 7, 0, 9, 0, 11, 0, 13, 0, 7, 0, 9, 0, 11, 0, 13); + VCLEAR(v24); + asm volatile("vadd.vi v24, v8, 5, v0.t"); + VCMP_U8(13, v24, 0, 7, 0, 9, 0, 11, 0, 13, 0, 7, 0, 9, 0, 11, 0, 13); - VSET(16, e16, m2); - VLOAD_16(v2, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VSET(16, e16, m8); + VLOAD_16(v8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); VLOAD_8(v0, 0xAA, 0xAA); - VCLEAR(v6); - asm volatile("vadd.vi v6, v2, 5, v0.t"); - VCMP_U16(14, v6, 0, 7, 0, 9, 0, 11, 0, 13, 0, 7, 0, 9, 0, 11, 0, 13); + VCLEAR(v24); + asm volatile("vadd.vi v24, v8, 5, v0.t"); + VCMP_U16(14, v24, 0, 7, 0, 9, 0, 11, 0, 13, 0, 7, 0, 9, 0, 11, 0, 13); - VSET(16, e32, m2); - VLOAD_32(v2, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VSET(16, e32, m8); + VLOAD_32(v8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); VLOAD_8(v0, 0xAA, 0xAA); - VCLEAR(v6); - asm volatile("vadd.vi v6, v2, 5, v0.t"); - VCMP_U32(15, v6, 0, 7, 0, 9, 0, 11, 0, 13, 0, 7, 0, 9, 0, 11, 0, 13); + VCLEAR(v24); + asm volatile("vadd.vi v24, v8, 5, v0.t"); + VCMP_U32(15, v24, 0, 7, 0, 9, 0, 11, 0, 13, 0, 7, 0, 9, 0, 11, 0, 13); #if ELEN == 64 - VSET(16, e64, m2); - VLOAD_64(v2, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VSET(16, e64, m8); + VLOAD_64(v8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); VLOAD_8(v0, 0xAA, 0xAA); - VCLEAR(v6); - asm volatile("vadd.vi v6, v2, 5, v0.t"); - VCMP_U64(16, v6, 0, 7, 0, 9, 0, 11, 0, 13, 0, 7, 0, 9, 0, 11, 0, 13); + VCLEAR(v24); + asm volatile("vadd.vi v24, v8, 5, v0.t"); + VCMP_U64(16, v24, 0, 7, 0, 9, 0, 11, 0, 13, 0, 7, 0, 9, 0, 11, 0, 13); #endif } void TEST_CASE5(void) { const uint32_t scalar = 5; - VSET(16, e8, m2); - VLOAD_8(v2, 1, -2, 3, -4, 5, -6, 7, -8, 9, -10, 11, -12, 13, -14, 15, -16); - asm volatile("vadd.vx v6, v2, %[A]" ::[A] "r"(scalar)); - VCMP_U8(17, v6, 6, 3, 8, 1, 10, -1, 12, -3, 14, -5, 16, -7, 18, -9, 20, -11); + VSET(16, e8, m8); + VLOAD_8(v8, 1, -2, 3, -4, 5, -6, 7, -8, 9, -10, 11, -12, 13, -14, 15, -16); + asm volatile("vadd.vx v24, v8, %[A]" ::[A] "r"(scalar)); + VCMP_U8(17, v24, 6, 3, 8, 1, 10, -1, 12, -3, 14, -5, 16, -7, 18, -9, 20, -11); - VSET(16, e16, m2); - VLOAD_16(v2, 1, -2, 3, -4, 5, -6, 7, -8, 9, -10, 11, -12, 13, -14, 15, -16); - asm volatile("vadd.vx v6, v2, %[A]" ::[A] "r"(scalar)); - VCMP_U16(18, v6, 6, 3, 8, 1, 10, -1, 12, -3, 14, -5, 16, -7, 18, -9, 20, -11); + VSET(16, e16, m8); + VLOAD_16(v8, 1, -2, 3, -4, 5, -6, 7, -8, 9, -10, 11, -12, 13, -14, 15, -16); + asm volatile("vadd.vx v24, v8, %[A]" ::[A] "r"(scalar)); + VCMP_U16(18, v24, 6, 3, 8, 1, 10, -1, 12, -3, 14, -5, 16, -7, 18, -9, 20, -11); - VSET(16, e32, m2); - VLOAD_32(v2, 1, -2, 3, -4, 5, -6, 7, -8, 9, -10, 11, -12, 13, -14, 15, -16); - asm volatile("vadd.vx v6, v2, %[A]" ::[A] "r"(scalar)); - VCMP_U32(19, v6, 6, 3, 8, 1, 10, -1, 12, -3, 14, -5, 16, -7, 18, -9, 20, -11); + VSET(16, e32, m8); + VLOAD_32(v8, 1, -2, 3, -4, 5, -6, 7, -8, 9, -10, 11, -12, 13, -14, 15, -16); + asm volatile("vadd.vx v24, v8, %[A]" ::[A] "r"(scalar)); + VCMP_U32(19, v24, 6, 3, 8, 1, 10, -1, 12, -3, 14, -5, 16, -7, 18, -9, 20, -11); #if ELEN == 64 - VSET(16, e64, m2); - VLOAD_64(v2, 1, -2, 3, -4, 5, -6, 7, -8, 9, -10, 11, -12, 13, -14, 15, -16); - asm volatile("vadd.vx v6, v2, %[A]" ::[A] "r"(scalar)); - VCMP_U64(20, v6, 6, 3, 8, 1, 10, -1, 12, -3, 14, -5, 16, -7, 18, -9, 20, -11); + VSET(16, e64, m8); + VLOAD_64(v8, 1, -2, 3, -4, 5, -6, 7, -8, 9, -10, 11, -12, 13, -14, 15, -16); + asm volatile("vadd.vx v24, v8, %[A]" ::[A] "r"(scalar)); + VCMP_U64(20, v24, 6, 3, 8, 1, 10, -1, 12, -3, 14, -5, 16, -7, 18, -9, 20, -11); #endif } void TEST_CASE6(void) { const uint32_t scalar = 5; - VSET(16, e8, m2); - VLOAD_8(v2, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VSET(16, e8, m8); + VLOAD_8(v8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); VLOAD_8(v0, 0xAA, 0xAA); - VCLEAR(v6); - asm volatile("vadd.vx v6, v2, %[A], v0.t" ::[A] "r"(scalar)); - VCMP_U8(21, v6, 0, 7, 0, 9, 0, 11, 0, 13, 0, 7, 0, 9, 0, 11, 0, 13); + VCLEAR(v24); + asm volatile("vadd.vx v24, v8, %[A], v0.t" ::[A] "r"(scalar)); + VCMP_U8(21, v24, 0, 7, 0, 9, 0, 11, 0, 13, 0, 7, 0, 9, 0, 11, 0, 13); - VSET(16, e16, m2); - VLOAD_16(v2, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VSET(16, e16, m8); + VLOAD_16(v8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); VLOAD_8(v0, 0xAA, 0xAA); - VCLEAR(v6); - asm volatile("vadd.vx v6, v2, %[A], v0.t" ::[A] "r"(scalar)); - VCMP_U16(22, v6, 0, 7, 0, 9, 0, 11, 0, 13, 0, 7, 0, 9, 0, 11, 0, 13); + VCLEAR(v24); + asm volatile("vadd.vx v24, v8, %[A], v0.t" ::[A] "r"(scalar)); + VCMP_U16(22, v24, 0, 7, 0, 9, 0, 11, 0, 13, 0, 7, 0, 9, 0, 11, 0, 13); - VSET(16, e32, m2); - VLOAD_32(v2, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VSET(16, e32, m8); + VLOAD_32(v8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); VLOAD_8(v0, 0xAA, 0xAA); - VCLEAR(v6); - asm volatile("vadd.vx v6, v2, %[A], v0.t" ::[A] "r"(scalar)); - VCMP_U32(23, v6, 0, 7, 0, 9, 0, 11, 0, 13, 0, 7, 0, 9, 0, 11, 0, 13); + VCLEAR(v24); + asm volatile("vadd.vx v24, v8, %[A], v0.t" ::[A] "r"(scalar)); + VCMP_U32(23, v24, 0, 7, 0, 9, 0, 11, 0, 13, 0, 7, 0, 9, 0, 11, 0, 13); #if ELEN == 64 - VSET(16, e64, m2); - VLOAD_64(v2, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VSET(16, e64, m8); + VLOAD_64(v8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); VLOAD_8(v0, 0xAA, 0xAA); - VCLEAR(v6); - asm volatile("vadd.vx v6, v2, %[A], v0.t" ::[A] "r"(scalar)); - VCMP_U64(24, v6, 0, 7, 0, 9, 0, 11, 0, 13, 0, 7, 0, 9, 0, 11, 0, 13); + VCLEAR(v24); + asm volatile("vadd.vx v24, v8, %[A], v0.t" ::[A] "r"(scalar)); + VCMP_U64(24, v24, 0, 7, 0, 9, 0, 11, 0, 13, 0, 7, 0, 9, 0, 11, 0, 13); #endif } diff --git a/sw/riscvTests/isa/rv64uv/vand.c b/sw/riscvTests/isa/rv64uv/vand.c index b50a5c08..99c76169 100644 --- a/sw/riscvTests/isa/rv64uv/vand.c +++ b/sw/riscvTests/isa/rv64uv/vand.c @@ -8,48 +8,48 @@ #include "vector_macros.h" void TEST_CASE1() { - VSET(12, e8, m2); - VLOAD_8(v4, 0xff, 0x01, 0xf0, 0xff, 0x01, 0xf0, 0xff, 0x01, 0xf0, 0xff, 0x01, + VSET(12, e8, m8); + VLOAD_8(v16, 0xff, 0x01, 0xf0, 0xff, 0x01, 0xf0, 0xff, 0x01, 0xf0, 0xff, 0x01, 0xf0); - VLOAD_8(v6, 0xf0, 0x03, 0xf0, 0xf0, 0x03, 0xf0, 0xf0, 0x03, 0xf0, 0xf0, 0x03, + VLOAD_8(v24, 0xf0, 0x03, 0xf0, 0xf0, 0x03, 0xf0, 0xf0, 0x03, 0xf0, 0xf0, 0x03, 0xf0); - asm volatile("vand.vv v2, v4, v6"); - VCMP_U8(1, v2, 0xf0, 0x01, 0xf0, 0xf0, 0x01, 0xf0, 0xf0, 0x01, 0xf0, 0xf0, + asm volatile("vand.vv v8, v16, v24"); + VCMP_U8(1, v8, 0xf0, 0x01, 0xf0, 0xf0, 0x01, 0xf0, 0xf0, 0x01, 0xf0, 0xf0, 0x01, 0xf0); - VSET(12, e16, m2); - VLOAD_16(v4, 0xffff, 0x0001, 0xf0f0, 0xffff, 0x0001, 0xf0f0, 0xffff, 0x0001, + VSET(12, e16, m8); + VLOAD_16(v16, 0xffff, 0x0001, 0xf0f0, 0xffff, 0x0001, 0xf0f0, 0xffff, 0x0001, 0xf0f0, 0xffff, 0x0001, 0xf0f0); - VLOAD_16(v6, 0xff00, 0x0003, 0xf0f0, 0xff00, 0x0003, 0xf0f0, 0xff00, 0x0003, + VLOAD_16(v24, 0xff00, 0x0003, 0xf0f0, 0xff00, 0x0003, 0xf0f0, 0xff00, 0x0003, 0xf0f0, 0xff00, 0x0003, 0xf0f0); - asm volatile("vand.vv v2, v4, v6"); - VCMP_U16(2, v2, 0xff00, 0x0001, 0xf0f0, 0xff00, 0x0001, 0xf0f0, 0xff00, + asm volatile("vand.vv v8, v16, v24"); + VCMP_U16(2, v8, 0xff00, 0x0001, 0xf0f0, 0xff00, 0x0001, 0xf0f0, 0xff00, 0x0001, 0xf0f0, 0xff00, 0x0001, 0xf0f0); - VSET(12, e32, m2); - VLOAD_32(v4, 0xffffffff, 0x00000001, 0xf0f0f0f0, 0xffffffff, 0x00000001, + VSET(12, e32, m8); + VLOAD_32(v16, 0xffffffff, 0x00000001, 0xf0f0f0f0, 0xffffffff, 0x00000001, 0xf0f0f0f0, 0xffffffff, 0x00000001, 0xf0f0f0f0, 0xffffffff, 0x00000001, 0xf0f0f0f0); - VLOAD_32(v6, 0xffff0000, 0x00000003, 0xf0f0f0f0, 0xffff0000, 0x00000003, + VLOAD_32(v24, 0xffff0000, 0x00000003, 0xf0f0f0f0, 0xffff0000, 0x00000003, 0xf0f0f0f0, 0xffff0000, 0x00000003, 0xf0f0f0f0, 0xffff0000, 0x00000003, 0xf0f0f0f0); - asm volatile("vand.vv v2, v4, v6"); - VCMP_U32(3, v2, 0xffff0000, 0x00000001, 0xf0f0f0f0, 0xffff0000, 0x00000001, + asm volatile("vand.vv v8, v16, v24"); + VCMP_U32(3, v8, 0xffff0000, 0x00000001, 0xf0f0f0f0, 0xffff0000, 0x00000001, 0xf0f0f0f0, 0xffff0000, 0x00000001, 0xf0f0f0f0, 0xffff0000, 0x00000001, 0xf0f0f0f0); #if ELEN == 64 - VSET(12, e64, m2); - VLOAD_64(v4, 0xffffffffffffffff, 0x0000000000000001, 0xf0f0f0f0f0f0f0f0, + VSET(12, e64, m8); + VLOAD_64(v16, 0xffffffffffffffff, 0x0000000000000001, 0xf0f0f0f0f0f0f0f0, 0xffffffffffffffff, 0x0000000000000001, 0xf0f0f0f0f0f0f0f0, 0xffffffffffffffff, 0x0000000000000001, 0xf0f0f0f0f0f0f0f0, 0xffffffffffffffff, 0x0000000000000001, 0xf0f0f0f0f0f0f0f0); - VLOAD_64(v6, 0xffffffff00000000, 0x0000000000000003, 0xf0f0f0f0f0f0f0f0, + VLOAD_64(v24, 0xffffffff00000000, 0x0000000000000003, 0xf0f0f0f0f0f0f0f0, 0xffffffff00000000, 0x0000000000000003, 0xf0f0f0f0f0f0f0f0, 0xffffffff00000000, 0x0000000000000003, 0xf0f0f0f0f0f0f0f0, 0xffffffff00000000, 0x0000000000000003, 0xf0f0f0f0f0f0f0f0); - asm volatile("vand.vv v2, v4, v6"); - VCMP_U64(4, v2, 0xffffffff00000000, 0x0000000000000001, 0xf0f0f0f0f0f0f0f0, + asm volatile("vand.vv v8, v16, v24"); + VCMP_U64(4, v8, 0xffffffff00000000, 0x0000000000000001, 0xf0f0f0f0f0f0f0f0, 0xffffffff00000000, 0x0000000000000001, 0xf0f0f0f0f0f0f0f0, 0xffffffff00000000, 0x0000000000000001, 0xf0f0f0f0f0f0f0f0, 0xffffffff00000000, 0x0000000000000001, 0xf0f0f0f0f0f0f0f0); @@ -57,63 +57,63 @@ void TEST_CASE1() { } void TEST_CASE2() { - VSET(12, e8, m2); - VLOAD_8(v4, 0xff, 0x01, 0xf0, 0xff, 0x01, 0xf0, 0xff, 0x01, 0xf0, 0xff, 0x01, + VSET(12, e8, m8); + VLOAD_8(v16, 0xff, 0x01, 0xf0, 0xff, 0x01, 0xf0, 0xff, 0x01, 0xf0, 0xff, 0x01, 0xf0); - VLOAD_8(v6, 0xf0, 0x03, 0xf0, 0xf0, 0x03, 0xf0, 0xf0, 0x03, 0xf0, 0xf0, 0x03, + VLOAD_8(v24, 0xf0, 0x03, 0xf0, 0xf0, 0x03, 0xf0, 0xf0, 0x03, 0xf0, 0xf0, 0x03, 0xf0); VLOAD_8(v0, 0x6D, 0x0B); - VLOAD_8(v2, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, + VLOAD_8(v8, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef); - asm volatile("vand.vv v2, v4, v6, v0.t"); - VCMP_U8(5, v2, 0xf0, 0xef, 0xf0, 0xf0, 0xef, 0xf0, 0xf0, 0xef, 0xf0, 0xf0, + asm volatile("vand.vv v8, v16, v24, v0.t"); + VCMP_U8(5, v8, 0xf0, 0xef, 0xf0, 0xf0, 0xef, 0xf0, 0xf0, 0xef, 0xf0, 0xf0, 0xef, 0xf0); - VSET(12, e16, m2); - VLOAD_16(v4, 0xffff, 0x0001, 0xf0f0, 0xffff, 0x0001, 0xf0f0, 0xffff, 0x0001, + VSET(12, e16, m8); + VLOAD_16(v16, 0xffff, 0x0001, 0xf0f0, 0xffff, 0x0001, 0xf0f0, 0xffff, 0x0001, 0xf0f0, 0xffff, 0x0001, 0xf0f0); - VLOAD_16(v6, 0xff00, 0x0003, 0xf0f0, 0xff00, 0x0003, 0xf0f0, 0xff00, 0x0003, + VLOAD_16(v24, 0xff00, 0x0003, 0xf0f0, 0xff00, 0x0003, 0xf0f0, 0xff00, 0x0003, 0xf0f0, 0xff00, 0x0003, 0xf0f0); VLOAD_8(v0, 0x6D, 0x0B); - VLOAD_16(v2, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, + VLOAD_16(v8, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef); - asm volatile("vand.vv v2, v4, v6, v0.t"); - VCMP_U16(6, v2, 0xff00, 0xbeef, 0xf0f0, 0xff00, 0xbeef, 0xf0f0, 0xff00, + asm volatile("vand.vv v8, v16, v24, v0.t"); + VCMP_U16(6, v8, 0xff00, 0xbeef, 0xf0f0, 0xff00, 0xbeef, 0xf0f0, 0xff00, 0xbeef, 0xf0f0, 0xff00, 0xbeef, 0xf0f0); - VSET(12, e32, m2); - VLOAD_32(v4, 0xffffffff, 0x00000001, 0xf0f0f0f0, 0xffffffff, 0x00000001, + VSET(12, e32, m8); + VLOAD_32(v16, 0xffffffff, 0x00000001, 0xf0f0f0f0, 0xffffffff, 0x00000001, 0xf0f0f0f0, 0xffffffff, 0x00000001, 0xf0f0f0f0, 0xffffffff, 0x00000001, 0xf0f0f0f0); - VLOAD_32(v6, 0xffff0000, 0x00000003, 0xf0f0f0f0, 0xffff0000, 0x00000003, + VLOAD_32(v24, 0xffff0000, 0x00000003, 0xf0f0f0f0, 0xffff0000, 0x00000003, 0xf0f0f0f0, 0xffff0000, 0x00000003, 0xf0f0f0f0, 0xffff0000, 0x00000003, 0xf0f0f0f0); VLOAD_8(v0, 0x6D, 0x0B); - VLOAD_32(v2, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + VLOAD_32(v8, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef); - asm volatile("vand.vv v2, v4, v6, v0.t"); - VCMP_U32(7, v2, 0xffff0000, 0xdeadbeef, 0xf0f0f0f0, 0xffff0000, 0xdeadbeef, + asm volatile("vand.vv v8, v16, v24, v0.t"); + VCMP_U32(7, v8, 0xffff0000, 0xdeadbeef, 0xf0f0f0f0, 0xffff0000, 0xdeadbeef, 0xf0f0f0f0, 0xffff0000, 0xdeadbeef, 0xf0f0f0f0, 0xffff0000, 0xdeadbeef, 0xf0f0f0f0); #if ELEN == 64 - VSET(12, e64, m2); - VLOAD_64(v4, 0xffffffffffffffff, 0x0000000000000001, 0xf0f0f0f0f0f0f0f0, + VSET(12, e64, m8); + VLOAD_64(v16, 0xffffffffffffffff, 0x0000000000000001, 0xf0f0f0f0f0f0f0f0, 0xffffffffffffffff, 0x0000000000000001, 0xf0f0f0f0f0f0f0f0, 0xffffffffffffffff, 0x0000000000000001, 0xf0f0f0f0f0f0f0f0, 0xffffffffffffffff, 0x0000000000000001, 0xf0f0f0f0f0f0f0f0); - VLOAD_64(v6, 0xffffffff00000000, 0x0000000000000003, 0xf0f0f0f0f0f0f0f0, + VLOAD_64(v24, 0xffffffff00000000, 0x0000000000000003, 0xf0f0f0f0f0f0f0f0, 0xffffffff00000000, 0x0000000000000003, 0xf0f0f0f0f0f0f0f0, 0xffffffff00000000, 0x0000000000000003, 0xf0f0f0f0f0f0f0f0, 0xffffffff00000000, 0x0000000000000003, 0xf0f0f0f0f0f0f0f0); VLOAD_8(v0, 0x6D, 0x0B); - VLOAD_64(v2, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, + VLOAD_64(v8, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef); - asm volatile("vand.vv v2, v4, v6, v0.t"); - VCMP_U64(8, v2, 0xffffffff00000000, 0xdeadbeefdeadbeef, 0xf0f0f0f0f0f0f0f0, + asm volatile("vand.vv v8, v16, v24, v0.t"); + VCMP_U64(8, v8, 0xffffffff00000000, 0xdeadbeefdeadbeef, 0xf0f0f0f0f0f0f0f0, 0xffffffff00000000, 0xdeadbeefdeadbeef, 0xf0f0f0f0f0f0f0f0, 0xffffffff00000000, 0xdeadbeefdeadbeef, 0xf0f0f0f0f0f0f0f0, 0xffffffff00000000, 0xdeadbeefdeadbeef, 0xf0f0f0f0f0f0f0f0); @@ -123,37 +123,37 @@ void TEST_CASE2() { void TEST_CASE3() { const uint32_t scalar = 0x0ff00ff0; - VSET(12, e8, m2); - VLOAD_8(v4, 0xff, 0x01, 0xf0, 0xff, 0x01, 0xf0, 0xff, 0x01, 0xf0, 0xff, 0x01, + VSET(12, e8, m8); + VLOAD_8(v16, 0xff, 0x01, 0xf0, 0xff, 0x01, 0xf0, 0xff, 0x01, 0xf0, 0xff, 0x01, 0xf0); - asm volatile("vand.vx v2, v4, %[A]" ::[A] "r"(scalar)); - VCMP_U8(9, v2, 0xf0, 0x00, 0xf0, 0xf0, 0x00, 0xf0, 0xf0, 0x00, 0xf0, 0xf0, + asm volatile("vand.vx v8, v16, %[A]" ::[A] "r"(scalar)); + VCMP_U8(9, v8, 0xf0, 0x00, 0xf0, 0xf0, 0x00, 0xf0, 0xf0, 0x00, 0xf0, 0xf0, 0x00, 0xf0); - VSET(12, e16, m2); - VLOAD_16(v4, 0xffff, 0x0001, 0xf0f0, 0xffff, 0x0001, 0xf0f0, 0xffff, 0x0001, + VSET(12, e16, m8); + VLOAD_16(v16, 0xffff, 0x0001, 0xf0f0, 0xffff, 0x0001, 0xf0f0, 0xffff, 0x0001, 0xf0f0, 0xffff, 0x0001, 0xf0f0); - asm volatile("vand.vx v2, v4, %[A]" ::[A] "r"(scalar)); - VCMP_U16(10, v2, 0x0ff0, 0x0000, 0x00f0, 0x0ff0, 0x0000, 0x00f0, 0x0ff0, + asm volatile("vand.vx v8, v16, %[A]" ::[A] "r"(scalar)); + VCMP_U16(10, v8, 0x0ff0, 0x0000, 0x00f0, 0x0ff0, 0x0000, 0x00f0, 0x0ff0, 0x0000, 0x00f0, 0x0ff0, 0x0000, 0x00f0); - VSET(12, e32, m2); - VLOAD_32(v4, 0xffffffff, 0x00000001, 0xf0f0f0f0, 0xffffffff, 0x00000001, + VSET(12, e32, m8); + VLOAD_32(v16, 0xffffffff, 0x00000001, 0xf0f0f0f0, 0xffffffff, 0x00000001, 0xf0f0f0f0, 0xffffffff, 0x00000001, 0xf0f0f0f0, 0xffffffff, 0x00000001, 0xf0f0f0f0); - asm volatile("vand.vx v2, v4, %[A]" ::[A] "r"(scalar)); - VCMP_U32(11, v2, 0x0ff00ff0, 0x00000000, 0x00f000f0, 0x0ff00ff0, 0x00000000, + asm volatile("vand.vx v8, v16, %[A]" ::[A] "r"(scalar)); + VCMP_U32(11, v8, 0x0ff00ff0, 0x00000000, 0x00f000f0, 0x0ff00ff0, 0x00000000, 0x00f000f0, 0x0ff00ff0, 0x00000000, 0x00f000f0, 0x0ff00ff0, 0x00000000, 0x00f000f0); #if ELEN == 64 - VSET(12, e64, m2); - VLOAD_64(v4, 0xffffffffffffffff, 0x0000000000000001, 0xf0f0f0f0f0f0f0f0, + VSET(12, e64, m8); + VLOAD_64(v16, 0xffffffffffffffff, 0x0000000000000001, 0xf0f0f0f0f0f0f0f0, 0xffffffffffffffff, 0x0000000000000001, 0xf0f0f0f0f0f0f0f0, 0xffffffffffffffff, 0x0000000000000001, 0xf0f0f0f0f0f0f0f0, 0xffffffffffffffff, 0x0000000000000001, 0xf0f0f0f0f0f0f0f0); - asm volatile("vand.vx v2, v4, %[A]" ::[A] "r"(scalar)); - VCMP_U64(12, v2, 0x000000000ff00ff0, 0x0000000000000000, 0x0000000000f000f0, + asm volatile("vand.vx v8, v16, %[A]" ::[A] "r"(scalar)); + VCMP_U64(12, v8, 0x000000000ff00ff0, 0x0000000000000000, 0x0000000000f000f0, 0x000000000ff00ff0, 0x0000000000000000, 0x0000000000f000f0, 0x000000000ff00ff0, 0x0000000000000000, 0x0000000000f000f0, 0x000000000ff00ff0, 0x0000000000000000, 0x0000000000f000f0); @@ -163,52 +163,52 @@ void TEST_CASE3() { void TEST_CASE4() { const uint32_t scalar = 0x0ff00ff0; - VSET(12, e8, m2); - VLOAD_8(v4, 0xff, 0x01, 0xf0, 0xff, 0x01, 0xf0, 0xff, 0x01, 0xf0, 0xff, 0x01, + VSET(12, e8, m8); + VLOAD_8(v16, 0xff, 0x01, 0xf0, 0xff, 0x01, 0xf0, 0xff, 0x01, 0xf0, 0xff, 0x01, 0xf0); VLOAD_8(v0, 0x6D, 0x0B); - VLOAD_8(v2, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, + VLOAD_8(v8, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef); - asm volatile("vand.vx v2, v4, %[A], v0.t" ::[A] "r"(scalar)); - VCMP_U8(13, v2, 0xf0, 0xef, 0xf0, 0xf0, 0xef, 0xf0, 0xf0, 0xef, 0xf0, 0xf0, + asm volatile("vand.vx v8, v16, %[A], v0.t" ::[A] "r"(scalar)); + VCMP_U8(13, v8, 0xf0, 0xef, 0xf0, 0xf0, 0xef, 0xf0, 0xf0, 0xef, 0xf0, 0xf0, 0xef, 0xf0); - VSET(12, e16, m2); - VLOAD_16(v4, 0xffff, 0x0001, 0xf0f0, 0xffff, 0x0001, 0xf0f0, 0xffff, 0x0001, + VSET(12, e16, m8); + VLOAD_16(v16, 0xffff, 0x0001, 0xf0f0, 0xffff, 0x0001, 0xf0f0, 0xffff, 0x0001, 0xf0f0, 0xffff, 0x0001, 0xf0f0); VLOAD_8(v0, 0x6D, 0x0B); - VLOAD_16(v2, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, + VLOAD_16(v8, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef); - asm volatile("vand.vx v2, v4, %[A], v0.t" ::[A] "r"(scalar)); - VCMP_U16(14, v2, 0x0ff0, 0xbeef, 0x00f0, 0x0ff0, 0xbeef, 0x00f0, 0x0ff0, + asm volatile("vand.vx v8, v16, %[A], v0.t" ::[A] "r"(scalar)); + VCMP_U16(14, v8, 0x0ff0, 0xbeef, 0x00f0, 0x0ff0, 0xbeef, 0x00f0, 0x0ff0, 0xbeef, 0x00f0, 0x0ff0, 0xbeef, 0x00f0); - VSET(12, e32, m2); - VLOAD_32(v4, 0xffffffff, 0x00000001, 0xf0f0f0f0, 0xffffffff, 0x00000001, + VSET(12, e32, m8); + VLOAD_32(v16, 0xffffffff, 0x00000001, 0xf0f0f0f0, 0xffffffff, 0x00000001, 0xf0f0f0f0, 0xffffffff, 0x00000001, 0xf0f0f0f0, 0xffffffff, 0x00000001, 0xf0f0f0f0); VLOAD_8(v0, 0x6D, 0x0B); - VLOAD_32(v2, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + VLOAD_32(v8, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef); - asm volatile("vand.vx v2, v4, %[A], v0.t" ::[A] "r"(scalar)); - VCMP_U32(15, v2, 0x0ff00ff0, 0xdeadbeef, 0x00f000f0, 0x0ff00ff0, 0xdeadbeef, + asm volatile("vand.vx v8, v16, %[A], v0.t" ::[A] "r"(scalar)); + VCMP_U32(15, v8, 0x0ff00ff0, 0xdeadbeef, 0x00f000f0, 0x0ff00ff0, 0xdeadbeef, 0x00f000f0, 0x0ff00ff0, 0xdeadbeef, 0x00f000f0, 0x0ff00ff0, 0xdeadbeef, 0x00f000f0); #if ELEN == 64 - VSET(12, e64, m2); - VLOAD_64(v4, 0xffffffffffffffff, 0x0000000000000001, 0xf0f0f0f0f0f0f0f0, + VSET(12, e64, m8); + VLOAD_64(v16, 0xffffffffffffffff, 0x0000000000000001, 0xf0f0f0f0f0f0f0f0, 0xffffffffffffffff, 0x0000000000000001, 0xf0f0f0f0f0f0f0f0, 0xffffffffffffffff, 0x0000000000000001, 0xf0f0f0f0f0f0f0f0, 0xffffffffffffffff, 0x0000000000000001, 0xf0f0f0f0f0f0f0f0); VLOAD_8(v0, 0x6D, 0x0B); - VLOAD_64(v2, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, + VLOAD_64(v8, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef); - asm volatile("vand.vx v2, v4, %[A], v0.t" ::[A] "r"(scalar)); - VCMP_U64(16, v2, 0x000000000ff00ff0, 0xdeadbeefdeadbeef, 0x00f000f000f000f0, + asm volatile("vand.vx v8, v16, %[A], v0.t" ::[A] "r"(scalar)); + VCMP_U64(16, v8, 0x000000000ff00ff0, 0xdeadbeefdeadbeef, 0x00f000f000f000f0, 0x000000000ff00ff0, 0xdeadbeefdeadbeef, 0x00f000f000f000f0, 0x000000000ff00ff0, 0xdeadbeefdeadbeef, 0x00f000f000f000f0, 0x000000000ff00ff0, 0xdeadbeefdeadbeef, 0x00f000f000f000f0); @@ -216,37 +216,37 @@ void TEST_CASE4() { } void TEST_CASE5() { - VSET(12, e8, m2); - VLOAD_8(v4, 0xff, 0x01, 0xf0, 0xff, 0x01, 0xf0, 0xff, 0x01, 0xf0, 0xff, 0x01, + VSET(12, e8, m8); + VLOAD_8(v16, 0xff, 0x01, 0xf0, 0xff, 0x01, 0xf0, 0xff, 0x01, 0xf0, 0xff, 0x01, 0xf0); - asm volatile("vand.vi v2, v4, 15"); - VCMP_U8(17, v2, 0x0f, 0x01, 0x00, 0x0f, 0x01, 0x00, 0x0f, 0x01, 0x00, 0x0f, + asm volatile("vand.vi v8, v16, 15"); + VCMP_U8(17, v8, 0x0f, 0x01, 0x00, 0x0f, 0x01, 0x00, 0x0f, 0x01, 0x00, 0x0f, 0x01, 0x00); - VSET(12, e16, m2); - VLOAD_16(v4, 0xffff, 0x0001, 0xf0f0, 0xffff, 0x0001, 0xf0f0, 0xffff, 0x0001, + VSET(12, e16, m8); + VLOAD_16(v16, 0xffff, 0x0001, 0xf0f0, 0xffff, 0x0001, 0xf0f0, 0xffff, 0x0001, 0xf0f0, 0xffff, 0x0001, 0xf0f0); - asm volatile("vand.vi v2, v4, 15"); - VCMP_U16(18, v2, 0x000f, 0x0001, 0x0000, 0x000f, 0x0001, 0x0000, 0x000f, + asm volatile("vand.vi v8, v16, 15"); + VCMP_U16(18, v8, 0x000f, 0x0001, 0x0000, 0x000f, 0x0001, 0x0000, 0x000f, 0x0001, 0x0000, 0x000f, 0x0001, 0x0000); - VSET(12, e32, m2); - VLOAD_32(v4, 0xffffffff, 0x00000001, 0xf0f0f0f0, 0xffffffff, 0x00000001, + VSET(12, e32, m8); + VLOAD_32(v16, 0xffffffff, 0x00000001, 0xf0f0f0f0, 0xffffffff, 0x00000001, 0xf0f0f0f0, 0xffffffff, 0x00000001, 0xf0f0f0f0, 0xffffffff, 0x00000001, 0xf0f0f0f0); - asm volatile("vand.vi v2, v4, 15"); - VCMP_U32(19, v2, 0x0000000f, 0x00000001, 0x00000000, 0x0000000f, 0x00000001, + asm volatile("vand.vi v8, v16, 15"); + VCMP_U32(19, v8, 0x0000000f, 0x00000001, 0x00000000, 0x0000000f, 0x00000001, 0x00000000, 0x0000000f, 0x00000001, 0x00000000, 0x0000000f, 0x00000001, 0x00000000); #if ELEN == 64 - VSET(12, e64, m2); - VLOAD_64(v4, 0xffffffffffffffff, 0x0000000000000001, 0xf0f0f0f0f0f0f0f0, + VSET(12, e64, m8); + VLOAD_64(v16, 0xffffffffffffffff, 0x0000000000000001, 0xf0f0f0f0f0f0f0f0, 0xffffffffffffffff, 0x0000000000000001, 0xf0f0f0f0f0f0f0f0, 0xffffffffffffffff, 0x0000000000000001, 0xf0f0f0f0f0f0f0f0, 0xffffffffffffffff, 0x0000000000000001, 0xf0f0f0f0f0f0f0f0); - asm volatile("vand.vi v2, v4, 15"); - VCMP_U64(20, v2, 0x000000000000000f, 0x0000000000000001, 0x0000000000000000, + asm volatile("vand.vi v8, v16, 15"); + VCMP_U64(20, v8, 0x000000000000000f, 0x0000000000000001, 0x0000000000000000, 0x000000000000000f, 0x0000000000000001, 0x0000000000000000, 0x000000000000000f, 0x0000000000000001, 0x0000000000000000, 0x000000000000000f, 0x0000000000000001, 0x0000000000000000); @@ -254,52 +254,52 @@ void TEST_CASE5() { } void TEST_CASE6() { - VSET(12, e8, m2); - VLOAD_8(v4, 0xff, 0x01, 0xf0, 0xff, 0x01, 0xf0, 0xff, 0x01, 0xf0, 0xff, 0x01, + VSET(12, e8, m8); + VLOAD_8(v16, 0xff, 0x01, 0xf0, 0xff, 0x01, 0xf0, 0xff, 0x01, 0xf0, 0xff, 0x01, 0xf0); VLOAD_8(v0, 0x6D, 0x0B); - VLOAD_8(v2, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, + VLOAD_8(v8, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef); - asm volatile("vand.vi v2, v4, 15, v0.t"); - VCMP_U8(21, v2, 0x0f, 0xef, 0x00, 0x0f, 0xef, 0x00, 0x0f, 0xef, 0x00, 0x0f, + asm volatile("vand.vi v8, v16, 15, v0.t"); + VCMP_U8(21, v8, 0x0f, 0xef, 0x00, 0x0f, 0xef, 0x00, 0x0f, 0xef, 0x00, 0x0f, 0xef, 0x00); - VSET(12, e16, m2); - VLOAD_16(v4, 0xffff, 0x0001, 0xf0f0, 0xffff, 0x0001, 0xf0f0, 0xffff, 0x0001, + VSET(12, e16, m8); + VLOAD_16(v16, 0xffff, 0x0001, 0xf0f0, 0xffff, 0x0001, 0xf0f0, 0xffff, 0x0001, 0xf0f0, 0xffff, 0x0001, 0xf0f0); VLOAD_8(v0, 0x6D, 0x0B); - VLOAD_16(v2, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, + VLOAD_16(v8, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef); - asm volatile("vand.vi v2, v4, 15, v0.t"); - VCMP_U16(22, v2, 0x000f, 0xbeef, 0x0000, 0x000f, 0xbeef, 0x0000, 0x000f, + asm volatile("vand.vi v8, v16, 15, v0.t"); + VCMP_U16(22, v8, 0x000f, 0xbeef, 0x0000, 0x000f, 0xbeef, 0x0000, 0x000f, 0xbeef, 0x0000, 0x000f, 0xbeef, 0x0000); - VSET(12, e32, m2); - VLOAD_32(v4, 0xffffffff, 0x00000001, 0xf0f0f0f0, 0xffffffff, 0x00000001, + VSET(12, e32, m8); + VLOAD_32(v16, 0xffffffff, 0x00000001, 0xf0f0f0f0, 0xffffffff, 0x00000001, 0xf0f0f0f0, 0xffffffff, 0x00000001, 0xf0f0f0f0, 0xffffffff, 0x00000001, 0xf0f0f0f0); VLOAD_8(v0, 0x6D, 0x0B); - VLOAD_32(v2, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + VLOAD_32(v8, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef); - asm volatile("vand.vi v2, v4, 15, v0.t"); - VCMP_U32(23, v2, 0x0000000f, 0xdeadbeef, 0x00000000, 0x0000000f, 0xdeadbeef, + asm volatile("vand.vi v8, v16, 15, v0.t"); + VCMP_U32(23, v8, 0x0000000f, 0xdeadbeef, 0x00000000, 0x0000000f, 0xdeadbeef, 0x00000000, 0x0000000f, 0xdeadbeef, 0x00000000, 0x0000000f, 0xdeadbeef, 0x00000000); #if ELEN == 64 - VSET(12, e64, m2); - VLOAD_64(v4, 0xffffffffffffffff, 0x0000000000000001, 0xf0f0f0f0f0f0f0f0, + VSET(12, e64, m8); + VLOAD_64(v16, 0xffffffffffffffff, 0x0000000000000001, 0xf0f0f0f0f0f0f0f0, 0xffffffffffffffff, 0x0000000000000001, 0xf0f0f0f0f0f0f0f0, 0xffffffffffffffff, 0x0000000000000001, 0xf0f0f0f0f0f0f0f0, 0xffffffffffffffff, 0x0000000000000001, 0xf0f0f0f0f0f0f0f0); VLOAD_8(v0, 0x6D, 0x0B); - VLOAD_64(v2, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, + VLOAD_64(v8, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef); - asm volatile("vand.vi v2, v4, 15, v0.t"); - VCMP_U64(24, v2, 0x000000000000000f, 0xdeadbeefdeadbeef, 0x0000000000000000, + asm volatile("vand.vi v8, v16, 15, v0.t"); + VCMP_U64(24, v8, 0x000000000000000f, 0xdeadbeefdeadbeef, 0x0000000000000000, 0x000000000000000f, 0xdeadbeefdeadbeef, 0x0000000000000000, 0x000000000000000f, 0xdeadbeefdeadbeef, 0x0000000000000000, 0x000000000000000f, 0xdeadbeefdeadbeef, 0x0000000000000000); diff --git a/sw/riscvTests/isa/rv64uv/vdiv.c b/sw/riscvTests/isa/rv64uv/vdiv.c index 70d30359..03012cd9 100644 --- a/sw/riscvTests/isa/rv64uv/vdiv.c +++ b/sw/riscvTests/isa/rv64uv/vdiv.c @@ -8,56 +8,56 @@ #include "vector_macros.h" void TEST_CASE1(void) { - VSET(16, e8, m2); - VLOAD_8(v4, 0xcd, 0x42, 0x2c, 0xc4, 0x7a, 0x7b, 0xd1, 0x21, 0x38, 0x1d, 0x2e, + VSET(16, e8, m8); + VLOAD_8(v16, 0xcd, 0x42, 0x2c, 0xc4, 0x7a, 0x7b, 0xd1, 0x21, 0x38, 0x1d, 0x2e, 0x4f, 0xe7, 0x3d, 0x63, 0xd8); - VLOAD_8(v6, 0x11, 0xa1, 0x7c, 0xde, 0x02, 0x38, 0x4e, 0x03, 0x1e, 0xc6, 0x16, + VLOAD_8(v24, 0x11, 0xa1, 0x7c, 0xde, 0x02, 0x38, 0x4e, 0x03, 0x1e, 0xc6, 0x16, 0xa0, 0xca, 0x83, 0x54, 0x90); - asm volatile("vdiv.vv v2, v4, v6"); - VCMP_I8(1, v2, 0xfd, 0x00, 0x00, 0x01, 0x3d, 0x02, 0x00, 0x0b, 0x01, 0x00, + asm volatile("vdiv.vv v8, v16, v24"); + VCMP_I8(1, v8, 0xfd, 0x00, 0x00, 0x01, 0x3d, 0x02, 0x00, 0x0b, 0x01, 0x00, 0x02, 0x00, 0x00, 0x00, 0x01, 0x00); - VSET(16, e16, m2); - VLOAD_16(v4, 0x9e55, 0xf806, 0xa137, 0xa5fc, 0x38ae, 0x58c3, 0x2a66, 0x6bd7, + VSET(16, e16, m8); + VLOAD_16(v16, 0x9e55, 0xf806, 0xa137, 0xa5fc, 0x38ae, 0x58c3, 0x2a66, 0x6bd7, 0x74e7, 0xa845, 0x2052, 0x6f9a, 0x6d88, 0x2861, 0xdaea, 0x2075); - VLOAD_16(v6, 0x5e64, 0x0a44, 0xdde5, 0x813f, 0x78b9, 0x29be, 0x28b4, 0x1b2f, + VLOAD_16(v24, 0x5e64, 0x0a44, 0xdde5, 0x813f, 0x78b9, 0x29be, 0x28b4, 0x1b2f, 0xc4a3, 0x4a05, 0x5501, 0x49bb, 0xe5f8, 0xfa20, 0x4edf, 0xf892); - asm volatile("vdiv.vv v2, v4, v6"); - VCMP_I16(2, v2, 0xffff, 0x0000, 0x0002, 0x0000, 0x0000, 0x0002, 0x0001, + asm volatile("vdiv.vv v8, v16, v24"); + VCMP_I16(2, v8, 0xffff, 0x0000, 0x0002, 0x0000, 0x0000, 0x0002, 0x0001, 0x0003, 0xffff, 0xffff, 0x0000, 0x0001, 0xfffc, 0xfffa, 0x0000, 0xfffc); - VSET(16, e32, m2); - VLOAD_32(v4, 0xed578a38, 0xba7b1912, 0xb99934ef, 0x21a85df9, 0xb01c09f2, + VSET(16, e32, m8); + VLOAD_32(v16, 0xed578a38, 0xba7b1912, 0xb99934ef, 0x21a85df9, 0xb01c09f2, 0xd0cb54fc, 0x9b617331, 0xd32cf029, 0xaea08daa, 0xd76f06e2, 0x99b8e084, 0x9fdc6bfc, 0x3103b573, 0xaf1e96b4, 0x54fe9ea1, 0x0ceff9c0); - VLOAD_32(v6, 0x0000002d, 0x0000001e, 0x0000003e, 0xffffffe0, 0x0000004b, + VLOAD_32(v24, 0x0000002d, 0x0000001e, 0x0000003e, 0xffffffe0, 0x0000004b, 0x00000064, 0xffffff88, 0x0000003b, 0x00000011, 0xffffffc3, 0xffffffa2, 0x0000004b, 0xffffffcc, 0xffffffb1, 0xffffff9d, 0xffffffba); - asm volatile("vdiv.vv v2, v4, v6"); - VCMP_I32(3, v2, 0xff95db40, 0xfdaec51b, 0xfedd4f4f, 0xfef2bd11, 0xfeef4ea4, + asm volatile("vdiv.vv v8, v16, v24"); + VCMP_I32(3, v8, 0xff95db40, 0xfdaec51b, 0xfedd4f4f, 0xfef2bd11, 0xfeef4ea4, 0xff872740, 0x00d6a792, 0xff3d81e7, 0xfb369eec, 0x00aa3ed6, 0x01168b3a, 0xfeb7d87b, 0xff0eb2ab, 0x01061804, 0xff24374a, 0xffd0afa2); #if ELEN == 64 - VSET(16, e64, m2); - VLOAD_64(v4, 0x7954dd7fc5e0687c, 0xfcf310f44f869aa0, 0x375fcbcf6aae0cac, + VSET(16, e64, m8); + VLOAD_64(v16, 0x7954dd7fc5e0687c, 0xfcf310f44f869aa0, 0x375fcbcf6aae0cac, 0x6038c1825cfb5a72, 0x59046c6b324e7fb3, 0xbac247d729fb18e6, 0x0cecf04984784b3f, 0x2cd8e8ea5be0c201, 0xcab2cf17c48b57cb, 0xcb53d1b55097656d, 0xbeeb18a6fab9af8d, 0xbf94f3a0fa74670b, 0x817c1dfb5ab3bfd7, 0x40951ef6459642d1, 0x4b5f994556f6ba42, 0x63a8eaa417e6d29c); - VLOAD_64(v6, 0xffffff9bd8e00c88, 0x0000002057c200e4, 0x0000004c4c93640e, + VLOAD_64(v24, 0xffffff9bd8e00c88, 0x0000002057c200e4, 0x0000004c4c93640e, 0x000000497b7bfcdd, 0x00000001feebe76a, 0xffffffb93a2c242c, 0xffffff8523c47d2a, 0x00000019f01c4433, 0xffffffd940862ecc, 0x00000016ac4df9c8, 0x0000001e9f15d00c, 0xffffffd0af22d791, 0xffffff9feca249bc, 0xffffffac5eae7985, 0x0000003567fe8027, 0x000000175355cab3); - asm volatile("vdiv.vv v2, v4, v6"); - VCMP_I64(4, v2, 0xfffffffffec9dd87, 0xffffffffffe7dac0, 0x0000000000b9cad9, + asm volatile("vdiv.vv v8, v16, v24"); + VCMP_I64(4, v8, 0xfffffffffec9dd87, 0xffffffffffe7dac0, 0x0000000000b9cad9, 0x00000000014f3850, 0x000000002c9a4382, 0x0000000000fa75a9, 0xffffffffffe51146, 0x0000000001baa14f, 0x000000000160270e, 0xfffffffffdad470e, 0xfffffffffddfe832, 0x00000000015c87ee, @@ -67,99 +67,99 @@ void TEST_CASE1(void) { }; void TEST_CASE2(void) { - VSET(16, e8, m2); - VLOAD_8(v4, 0xcd, 0x42, 0x2c, 0xc4, 0x7a, 0x7b, 0xd1, 0x21, 0x38, 0x1d, 0x2e, + VSET(16, e8, m8); + VLOAD_8(v16, 0xcd, 0x42, 0x2c, 0xc4, 0x7a, 0x7b, 0xd1, 0x21, 0x38, 0x1d, 0x2e, 0x4f, 0xe7, 0x3d, 0x63, 0xd8); - VLOAD_8(v6, 0x11, 0xa1, 0x7c, 0xde, 0x02, 0x38, 0x4e, 0x03, 0x1e, 0xc6, 0x16, + VLOAD_8(v24, 0x11, 0xa1, 0x7c, 0xde, 0x02, 0x38, 0x4e, 0x03, 0x1e, 0xc6, 0x16, 0xa0, 0xca, 0x83, 0x54, 0x90); VLOAD_8(v0, 0xAA, 0xAA); - VCLEAR(v2); - asm volatile("vdiv.vv v2, v4, v6, v0.t"); - VCMP_I8(5, v2, 0, 0x00, 0, 0x01, 0, 0x02, 0, 0x0b, 0, 0x00, 0, 0x00, 0, 0x00, + VCLEAR(v8); + asm volatile("vdiv.vv v8, v16, v24, v0.t"); + VCMP_I8(5, v8, 0, 0x00, 0, 0x01, 0, 0x02, 0, 0x0b, 0, 0x00, 0, 0x00, 0, 0x00, 0, 0x00); - VSET(16, e16, m2); - VLOAD_16(v4, 0x9e55, 0xf806, 0xa137, 0xa5fc, 0x38ae, 0x58c3, 0x2a66, 0x6bd7, + VSET(16, e16, m8); + VLOAD_16(v16, 0x9e55, 0xf806, 0xa137, 0xa5fc, 0x38ae, 0x58c3, 0x2a66, 0x6bd7, 0x74e7, 0xa845, 0x2052, 0x6f9a, 0x6d88, 0x2861, 0xdaea, 0x2075); - VLOAD_16(v6, 0x5e64, 0x0a44, 0xdde5, 0x813f, 0x78b9, 0x29be, 0x28b4, 0x1b2f, + VLOAD_16(v24, 0x5e64, 0x0a44, 0xdde5, 0x813f, 0x78b9, 0x29be, 0x28b4, 0x1b2f, 0xc4a3, 0x4a05, 0x5501, 0x49bb, 0xe5f8, 0xfa20, 0x4edf, 0xf892); VLOAD_8(v0, 0xAA, 0xAA); - VCLEAR(v2); - asm volatile("vdiv.vv v2, v4, v6, v0.t"); - VCMP_I16(6, v2, 0, 0x0000, 0, 0x0000, 0, 0x0002, 0, 0x0003, 0, 0xffff, 0, + VCLEAR(v8); + asm volatile("vdiv.vv v8, v16, v24, v0.t"); + VCMP_I16(6, v8, 0, 0x0000, 0, 0x0000, 0, 0x0002, 0, 0x0003, 0, 0xffff, 0, 0x0001, 0, 0xfffa, 0, 0xfffc); - VSET(16, e32, m2); - VLOAD_32(v4, 0xed578a38, 0xba7b1912, 0xb99934ef, 0x21a85df9, 0xb01c09f2, + VSET(16, e32, m8); + VLOAD_32(v16, 0xed578a38, 0xba7b1912, 0xb99934ef, 0x21a85df9, 0xb01c09f2, 0xd0cb54fc, 0x9b617331, 0xd32cf029, 0xaea08daa, 0xd76f06e2, 0x99b8e084, 0x9fdc6bfc, 0x3103b573, 0xaf1e96b4, 0x54fe9ea1, 0x0ceff9c0); - VLOAD_32(v6, 0x0000002d, 0x0000001e, 0x0000003e, 0xffffffe0, 0x0000004b, + VLOAD_32(v24, 0x0000002d, 0x0000001e, 0x0000003e, 0xffffffe0, 0x0000004b, 0x00000064, 0xffffff88, 0x0000003b, 0x00000011, 0xffffffc3, 0xffffffa2, 0x0000004b, 0xffffffcc, 0xffffffb1, 0xffffff9d, 0xffffffba); VLOAD_8(v0, 0xAA, 0xAA); - VCLEAR(v2); - asm volatile("vdiv.vv v2, v4, v6, v0.t"); - VCMP_I32(7, v2, 0, 0xfdaec51b, 0, 0xfef2bd11, 0, 0xff872740, 0, 0xff3d81e7, 0, + VCLEAR(v8); + asm volatile("vdiv.vv v8, v16, v24, v0.t"); + VCMP_I32(7, v8, 0, 0xfdaec51b, 0, 0xfef2bd11, 0, 0xff872740, 0, 0xff3d81e7, 0, 0x00aa3ed6, 0, 0xfeb7d87b, 0, 0x01061804, 0, 0xffd0afa2); #if ELEN == 64 - VSET(16, e64, m2); - VLOAD_64(v4, 0x7954dd7fc5e0687c, 0xfcf310f44f869aa0, 0x375fcbcf6aae0cac, + VSET(16, e64, m8); + VLOAD_64(v16, 0x7954dd7fc5e0687c, 0xfcf310f44f869aa0, 0x375fcbcf6aae0cac, 0x6038c1825cfb5a72, 0x59046c6b324e7fb3, 0xbac247d729fb18e6, 0x0cecf04984784b3f, 0x2cd8e8ea5be0c201, 0xcab2cf17c48b57cb, 0xcb53d1b55097656d, 0xbeeb18a6fab9af8d, 0xbf94f3a0fa74670b, 0x817c1dfb5ab3bfd7, 0x40951ef6459642d1, 0x4b5f994556f6ba42, 0x63a8eaa417e6d29c); - VLOAD_64(v6, 0xffffff9bd8e00c88, 0x0000002057c200e4, 0x0000004c4c93640e, + VLOAD_64(v24, 0xffffff9bd8e00c88, 0x0000002057c200e4, 0x0000004c4c93640e, 0x000000497b7bfcdd, 0x00000001feebe76a, 0xffffffb93a2c242c, 0xffffff8523c47d2a, 0x00000019f01c4433, 0xffffffd940862ecc, 0x00000016ac4df9c8, 0x0000001e9f15d00c, 0xffffffd0af22d791, 0xffffff9feca249bc, 0xffffffac5eae7985, 0x0000003567fe8027, 0x000000175355cab3); VLOAD_8(v0, 0xAA, 0xAA); - VCLEAR(v2); - asm volatile("vdiv.vv v2, v4, v6, v0.t"); - VCMP_I64(8, v2, 0, 0xffffffffffe7dac0, 0, 0x00000000014f3850, 0, + VCLEAR(v8); + asm volatile("vdiv.vv v8, v16, v24, v0.t"); + VCMP_I64(8, v8, 0, 0xffffffffffe7dac0, 0, 0x00000000014f3850, 0, 0x0000000000fa75a9, 0, 0x0000000001baa14f, 0, 0xfffffffffdad470e, 0, 0x00000000015c87ee, 0, 0xffffffffff3a4e84, 0, 0x000000000445c6cf); #endif }; void TEST_CASE3(void) { - VSET(16, e8, m2); - VLOAD_8(v4, 0x14, 0xab, 0x4d, 0xcd, 0xc3, 0x7c, 0xb5, 0xf0, 0xc1, 0x90, 0x14, + VSET(16, e8, m8); + VLOAD_8(v16, 0x14, 0xab, 0x4d, 0xcd, 0xc3, 0x7c, 0xb5, 0xf0, 0xc1, 0x90, 0x14, 0x59, 0x98, 0xda, 0x76, 0x84); int64_t scalar = 5; - asm volatile("vdiv.vx v2, v4, %[A]" ::[A] "r"(scalar)); - VCMP_I8(9, v2, 0x04, 0xef, 0x0f, 0xf6, 0xf4, 0x18, 0xf1, 0xfd, 0xf4, 0xea, + asm volatile("vdiv.vx v8, v16, %[A]" ::[A] "r"(scalar)); + VCMP_I8(9, v8, 0x04, 0xef, 0x0f, 0xf6, 0xf4, 0x18, 0xf1, 0xfd, 0xf4, 0xea, 0x04, 0x11, 0xec, 0xf9, 0x17, 0xe8); - VSET(16, e16, m2); - VLOAD_16(v4, 0x19c9, 0x865a, 0x3063, 0xd5c2, 0xbe39, 0x98c7, 0x1ca7, 0x5d1e, + VSET(16, e16, m8); + VLOAD_16(v16, 0x19c9, 0x865a, 0x3063, 0xd5c2, 0xbe39, 0x98c7, 0x1ca7, 0x5d1e, 0x8fdc, 0x3396, 0x9442, 0xee77, 0x7da8, 0xf200, 0xaba3, 0x4cd6); scalar = -538; - asm volatile("vdiv.vx v2, v4, %[A]" ::[A] "r"(scalar)); - VCMP_I16(10, v2, 0xfff4, 0x0039, 0xffe9, 0x0014, 0x001f, 0x0031, 0xfff3, + asm volatile("vdiv.vx v8, v16, %[A]" ::[A] "r"(scalar)); + VCMP_I16(10, v8, 0xfff4, 0x0039, 0xffe9, 0x0014, 0x001f, 0x0031, 0xfff3, 0xffd4, 0x0035, 0xffe8, 0x0033, 0x0008, 0xffc5, 0x0006, 0x0028, 0xffdc); - VSET(16, e32, m2); - VLOAD_32(v4, 0x49dd393c, 0xfc1d701e, 0x7670b541, 0x5ef6c28f, 0x60da5cab, + VSET(16, e32, m8); + VLOAD_32(v16, 0x49dd393c, 0xfc1d701e, 0x7670b541, 0x5ef6c28f, 0x60da5cab, 0x6be56bc4, 0x6f629cde, 0xf1ab595a, 0x3d99363b, 0xb8a7840e, 0x84071026, 0x6697d435, 0x3768cf44, 0x82f1a5a1, 0xf5d4f40e, 0xcda97e6d); scalar = 649; - asm volatile("vdiv.vx v2, v4, %[A]" ::[A] "r"(scalar)); - VCMP_I32(11, v2, 0x001d22cc, 0xfffe77b2, 0x002eb818, 0x00257573, 0x00263435, + asm volatile("vdiv.vx v8, v16, %[A]" ::[A] "r"(scalar)); + VCMP_I32(11, v8, 0x001d22cc, 0xfffe77b2, 0x002eb818, 0x00257573, 0x00263435, 0x002a8f5a, 0x002befac, 0xfffa58e4, 0x00184c36, 0xffe3db85, 0xffcf1946, 0x002877d8, 0x0015db3d, 0xffceabd8, 0xfffbfd39, 0xffec24e2); /* #if ELEN == 64 */ - /* VSET(16, e64, m2); */ - /* VLOAD_64(v4, 0xeea1bad034de2c3e, 0x5acd284816152166, 0x52a24c3b98af20f9, + /* VSET(16, e64, m8); */ + /* VLOAD_64(v16, 0xeea1bad034de2c3e, 0x5acd284816152166, 0x52a24c3b98af20f9, */ /* 0x843d3c2e39d3221e, 0xda6c4bef77118459, 0x8c7e991a77cc3ddc, */ /* 0x58f56c82eceafc72, 0xb4b1bac0a66d4984, 0x126283c905985ab8, */ @@ -167,8 +167,8 @@ void TEST_CASE3(void) { /* 0xde91f735ce81d174, 0x3d254eb16d0c87f4, 0xc06ebbe7936e6774, */ /* 0xb17ccbc475c8724e); */ /* scalar = -59223; */ - /* asm volatile("vdiv.vx v2, v4, %[A]" ::[A] "r"(scalar)); */ - /* VCMP_I64(12, v2, 0x000013383ad25844, 0xffff9b84f9ef594c, + /* asm volatile("vdiv.vx v8, v16, %[A]" ::[A] "r"(scalar)); */ + /* VCMP_I64(12, v8, 0x000013383ad25844, 0xffff9b84f9ef594c, * 0xffffa48eb726f738, */ /* 0x000088f40e45bbd1, 0x0000299522c72a62, 0x00007fd16a16b1db, */ /* 0xffff9d8efec5cf15, 0x000053554738ae55, 0xffffeba7c8cdd664, */ @@ -179,41 +179,41 @@ void TEST_CASE3(void) { }; void TEST_CASE4(void) { - VSET(16, e8, m2); - VLOAD_8(v4, 0x14, 0xab, 0x4d, 0xcd, 0xc3, 0x7c, 0xb5, 0xf0, 0xc1, 0x90, 0x14, + VSET(16, e8, m8); + VLOAD_8(v16, 0x14, 0xab, 0x4d, 0xcd, 0xc3, 0x7c, 0xb5, 0xf0, 0xc1, 0x90, 0x14, 0x59, 0x98, 0xda, 0x76, 0x84); int64_t scalar = 5; VLOAD_8(v0, 0xAA, 0xAA); - VCLEAR(v2); - asm volatile("vdiv.vx v2, v4, %[A], v0.t" ::[A] "r"(scalar)); - VCMP_I8(13, v2, 0, 0xef, 0, 0xf6, 0, 0x18, 0, 0xfd, 0, 0xea, 0, 0x11, 0, 0xf9, + VCLEAR(v8); + asm volatile("vdiv.vx v8, v16, %[A], v0.t" ::[A] "r"(scalar)); + VCMP_I8(13, v8, 0, 0xef, 0, 0xf6, 0, 0x18, 0, 0xfd, 0, 0xea, 0, 0x11, 0, 0xf9, 0, 0xe8); - VSET(16, e16, m2); - VLOAD_16(v4, 0x19c9, 0x865a, 0x3063, 0xd5c2, 0xbe39, 0x98c7, 0x1ca7, 0x5d1e, + VSET(16, e16, m8); + VLOAD_16(v16, 0x19c9, 0x865a, 0x3063, 0xd5c2, 0xbe39, 0x98c7, 0x1ca7, 0x5d1e, 0x8fdc, 0x3396, 0x9442, 0xee77, 0x7da8, 0xf200, 0xaba3, 0x4cd6); scalar = -538; VLOAD_8(v0, 0xAA, 0xAA); - VCLEAR(v2); - asm volatile("vdiv.vx v2, v4, %[A], v0.t" ::[A] "r"(scalar)); - VCMP_I16(14, v2, 0, 0x0039, 0, 0x0014, 0, 0x0031, 0, 0xffd4, 0, 0xffe8, 0, + VCLEAR(v8); + asm volatile("vdiv.vx v8, v16, %[A], v0.t" ::[A] "r"(scalar)); + VCMP_I16(14, v8, 0, 0x0039, 0, 0x0014, 0, 0x0031, 0, 0xffd4, 0, 0xffe8, 0, 0x0008, 0, 0x0006, 0, 0xffdc); - VSET(16, e32, m2); - VLOAD_32(v4, 0x49dd393c, 0xfc1d701e, 0x7670b541, 0x5ef6c28f, 0x60da5cab, + VSET(16, e32, m8); + VLOAD_32(v16, 0x49dd393c, 0xfc1d701e, 0x7670b541, 0x5ef6c28f, 0x60da5cab, 0x6be56bc4, 0x6f629cde, 0xf1ab595a, 0x3d99363b, 0xb8a7840e, 0x84071026, 0x6697d435, 0x3768cf44, 0x82f1a5a1, 0xf5d4f40e, 0xcda97e6d); scalar = 649; VLOAD_8(v0, 0xAA, 0xAA); - VCLEAR(v2); - asm volatile("vdiv.vx v2, v4, %[A], v0.t" ::[A] "r"(scalar)); - VCMP_I32(15, v2, 0, 0xfffe77b2, 0, 0x00257573, 0, 0x002a8f5a, 0, 0xfffa58e4, + VCLEAR(v8); + asm volatile("vdiv.vx v8, v16, %[A], v0.t" ::[A] "r"(scalar)); + VCMP_I32(15, v8, 0, 0xfffe77b2, 0, 0x00257573, 0, 0x002a8f5a, 0, 0xfffa58e4, 0, 0xffe3db85, 0, 0x002877d8, 0, 0xffceabd8, 0, 0xffec24e2); /* #if ELEN == 64 */ - /* VSET(16, e64, m2); */ - /* VLOAD_64(v4, 0xeea1bad034de2c3e, 0x5acd284816152166, 0x52a24c3b98af20f9, + /* VSET(16, e64, m8); */ + /* VLOAD_64(v16, 0xeea1bad034de2c3e, 0x5acd284816152166, 0x52a24c3b98af20f9, */ /* 0x843d3c2e39d3221e, 0xda6c4bef77118459, 0x8c7e991a77cc3ddc, */ /* 0x58f56c82eceafc72, 0xb4b1bac0a66d4984, 0x126283c905985ab8, */ @@ -222,9 +222,9 @@ void TEST_CASE4(void) { /* 0xb17ccbc475c8724e); */ /* scalar = -59223; */ /* VLOAD_8(v0, 0xAA, 0xAA); */ - /* VCLEAR(v2); */ - /* asm volatile("vdiv.vx v2, v4, %[A], v0.t" ::[A] "r"(scalar)); */ - /* VCMP_I64(16, v2, 0, 0xffff9b84f9ef594c, 0, 0x000088f40e45bbd1, 0, */ + /* VCLEAR(v8); */ + /* asm volatile("vdiv.vx v8, v16, %[A], v0.t" ::[A] "r"(scalar)); */ + /* VCMP_I64(16, v8, 0, 0xffff9b84f9ef594c, 0, 0x000088f40e45bbd1, 0, */ /* 0x00007fd16a16b1db, 0, 0x000053554738ae55, 0, * 0xffffbf3d66c69bdf, 0, */ /* 0xffffface02d012e3, 0, 0xffffbc56186f1f3d, 0, diff --git a/sw/riscvTests/isa/rv64uv/vdivu.c b/sw/riscvTests/isa/rv64uv/vdivu.c index fb6e4429..0b6cb55b 100644 --- a/sw/riscvTests/isa/rv64uv/vdivu.c +++ b/sw/riscvTests/isa/rv64uv/vdivu.c @@ -8,56 +8,56 @@ #include "vector_macros.h" void TEST_CASE1(void) { - VSET(16, e8, m2); - VLOAD_8(v4, 0x85, 0x1d, 0x9f, 0x31, 0x8c, 0x4c, 0x4c, 0xf2, 0x11, 0xfc, 0xc8, + VSET(16, e8, m8); + VLOAD_8(v16, 0x85, 0x1d, 0x9f, 0x31, 0x8c, 0x4c, 0x4c, 0xf2, 0x11, 0xfc, 0xc8, 0xab, 0xc2, 0xff, 0xf5, 0xc2); - VLOAD_8(v6, 0x3d, 0x06, 0x32, 0x36, 0x02, 0x0f, 0x27, 0x35, 0x1e, 0x0f, 0x36, + VLOAD_8(v24, 0x3d, 0x06, 0x32, 0x36, 0x02, 0x0f, 0x27, 0x35, 0x1e, 0x0f, 0x36, 0x1c, 0x24, 0x1a, 0x22, 0x01); - asm volatile("vdivu.vv v2, v4, v6"); - VCMP_I8(1, v2, 0x02, 0x04, 0x03, 0x00, 0x46, 0x05, 0x01, 0x04, 0x00, 0x10, + asm volatile("vdivu.vv v8, v16, v24"); + VCMP_I8(1, v8, 0x02, 0x04, 0x03, 0x00, 0x46, 0x05, 0x01, 0x04, 0x00, 0x10, 0x03, 0x06, 0x05, 0x09, 0x07, 0xc2); - VSET(16, e16, m2); - VLOAD_16(v4, 0xd200, 0xa047, 0x7af8, 0x453d, 0xd6eb, 0xfabb, 0x604a, 0xea35, + VSET(16, e16, m8); + VLOAD_16(v16, 0xd200, 0xa047, 0x7af8, 0x453d, 0xd6eb, 0xfabb, 0x604a, 0xea35, 0xbc2d, 0x45e7, 0x8407, 0x3845, 0x1495, 0x8ee6, 0x7da4, 0xf34a); - VLOAD_16(v6, 0x03ad, 0x00b8, 0x001b, 0x0353, 0x013f, 0x008c, 0x015e, 0x01e6, + VLOAD_16(v24, 0x03ad, 0x00b8, 0x001b, 0x0353, 0x013f, 0x008c, 0x015e, 0x01e6, 0x00cd, 0x0093, 0x00ba, 0x03d0, 0x0117, 0x009d, 0x007b, 0x02cf); - asm volatile("vdivu.vv v2, v4, v6"); - VCMP_I16(2, v2, 0x0039, 0x00de, 0x048d, 0x0014, 0x00ac, 0x01ca, 0x0046, + asm volatile("vdivu.vv v8, v16, v24"); + VCMP_I16(2, v8, 0x0039, 0x00de, 0x048d, 0x0014, 0x00ac, 0x01ca, 0x0046, 0x007b, 0x00ea, 0x0079, 0x00b5, 0x000e, 0x0012, 0x00e9, 0x0105, 0x0056); - VSET(16, e32, m2); - VLOAD_32(v4, 0xd56bd03a, 0x1036c5ff, 0xaa01847f, 0x988dc35d, 0x4d8615dc, + VSET(16, e32, m8); + VLOAD_32(v16, 0xd56bd03a, 0x1036c5ff, 0xaa01847f, 0x988dc35d, 0x4d8615dc, 0xb62269e2, 0xe842ba75, 0x02fecbf9, 0xe536c712, 0xe93e2160, 0x9ba34297, 0x554d290d, 0x319f668c, 0x0d6c2fbb, 0x6a7eb54a, 0x3fa1cc84); - VLOAD_32(v6, 0x00000025, 0x00000057, 0x0000002e, 0x0000004c, 0x00000052, + VLOAD_32(v24, 0x00000025, 0x00000057, 0x0000002e, 0x0000004c, 0x00000052, 0x00000021, 0x0000001d, 0x0000002f, 0x00000029, 0x00000008, 0x00000015, 0x00000029, 0x00000048, 0x00000051, 0x0000003f, 0x00000007); - asm volatile("vdivu.vv v2, v4, v6"); - VCMP_I32(3, v2, 0x05c4a4c3, 0x002fb5c5, 0x03b21eb4, 0x0201dd84, 0x00f20682, + asm volatile("vdivu.vv v8, v16, v24"); + VCMP_I32(3, v8, 0x05c4a4c3, 0x002fb5c5, 0x03b21eb4, 0x0201dd84, 0x00f20682, 0x0584ebef, 0x08024d0c, 0x00105098, 0x05973090, 0x1d27c42c, 0x07694c50, 0x02149d19, 0x00b06fa5, 0x002a6c0b, 0x01b0bdcc, 0x09171d37); #if ELEN == 64 - VSET(16, e64, m2); - VLOAD_64(v4, 0xf251717441f02108, 0x2668b5d7f073b9d1, 0x4708b49fd356a60a, + VSET(16, e64, m8); + VLOAD_64(v16, 0xf251717441f02108, 0x2668b5d7f073b9d1, 0x4708b49fd356a60a, 0x195bedcb9ce5956b, 0x41ce2b35b4280568, 0x32e5b89eed21de3b, 0x73f17f3d0e1f243f, 0x0d0aa119af3b0e95, 0x024cbba6ba662213, 0xf7df98f44da5e55f, 0x3cf2951aa7e4c0a9, 0x0ed8987446e84f30, 0xd983a7a0d4c648b4, 0x60eb8249069801a5, 0x529967e8b06df477, 0x776410b4b0cc22ad); - VLOAD_64(v6, 0x000000000000695b, 0x0000000000007420, 0x000000000001850c, + VLOAD_64(v24, 0x000000000000695b, 0x0000000000007420, 0x000000000001850c, 0x000000000000b46f, 0x000000000000e92c, 0x0000000000024e72, 0x0000000000032774, 0x000000000001c36f, 0x00000000000063d2, 0x0000000000037bb4, 0x000000000003692c, 0x000000000001d60c, 0x000000000002cf7b, 0x0000000000037899, 0x0000000000038bcf, 0x000000000003d0e4); - asm volatile("vdivu.vv v2, v4, v6"); - VCMP_I64(4, v2, 0x00024ccd25dd5faf, 0x000054ac6a930494, 0x00002ebddee9df57, + asm volatile("vdivu.vv v8, v16, v24"); + VCMP_I64(4, v8, 0x00024ccd25dd5faf, 0x000054ac6a930494, 0x00002ebddee9df57, 0x000023fac7321f85, 0x0000483f73b2e3e2, 0x000016114f5d8a9e, 0x000024c26869df0e, 0x00000765470f410f, 0x000005e5de9b769d, 0x0000472988fa89c3, 0x000011de6d57a394, 0x00000815e7b8df73, @@ -67,107 +67,107 @@ void TEST_CASE1(void) { }; void TEST_CASE2(void) { - VSET(16, e8, m2); - VLOAD_8(v4, 0x85, 0x1d, 0x9f, 0x31, 0x8c, 0x4c, 0x4c, 0xf2, 0x11, 0xfc, 0xc8, + VSET(16, e8, m8); + VLOAD_8(v16, 0x85, 0x1d, 0x9f, 0x31, 0x8c, 0x4c, 0x4c, 0xf2, 0x11, 0xfc, 0xc8, 0xab, 0xc2, 0xff, 0xf5, 0xc2); - VLOAD_8(v6, 0x3d, 0x06, 0x32, 0x36, 0x02, 0x0f, 0x27, 0x35, 0x1e, 0x0f, 0x36, + VLOAD_8(v24, 0x3d, 0x06, 0x32, 0x36, 0x02, 0x0f, 0x27, 0x35, 0x1e, 0x0f, 0x36, 0x1c, 0x24, 0x1a, 0x22, 0x01); VLOAD_8(v0, 0xAA, 0xAA); - VCLEAR(v2); - asm volatile("vdivu.vv v2, v4, v6, v0.t"); - VCMP_I8(5, v2, 0, 0x04, 0, 0x00, 0, 0x05, 0, 0x04, 0, 0x10, 0, 0x06, 0, 0x09, + VCLEAR(v8); + asm volatile("vdivu.vv v8, v16, v24, v0.t"); + VCMP_I8(5, v8, 0, 0x04, 0, 0x00, 0, 0x05, 0, 0x04, 0, 0x10, 0, 0x06, 0, 0x09, 0, 0xc2); - VSET(16, e16, m2); - VLOAD_16(v4, 0xd200, 0xa047, 0x7af8, 0x453d, 0xd6eb, 0xfabb, 0x604a, 0xea35, + VSET(16, e16, m8); + VLOAD_16(v16, 0xd200, 0xa047, 0x7af8, 0x453d, 0xd6eb, 0xfabb, 0x604a, 0xea35, 0xbc2d, 0x45e7, 0x8407, 0x3845, 0x1495, 0x8ee6, 0x7da4, 0xf34a); - VLOAD_16(v6, 0x03ad, 0x00b8, 0x001b, 0x0353, 0x013f, 0x008c, 0x015e, 0x01e6, + VLOAD_16(v24, 0x03ad, 0x00b8, 0x001b, 0x0353, 0x013f, 0x008c, 0x015e, 0x01e6, 0x00cd, 0x0093, 0x00ba, 0x03d0, 0x0117, 0x009d, 0x007b, 0x02cf); VLOAD_8(v0, 0xAA, 0xAA); - VCLEAR(v2); - asm volatile("vdivu.vv v2, v4, v6, v0.t"); - VCMP_I16(6, v2, 0, 0x00de, 0, 0x0014, 0, 0x01ca, 0, 0x007b, 0, 0x0079, 0, + VCLEAR(v8); + asm volatile("vdivu.vv v8, v16, v24, v0.t"); + VCMP_I16(6, v8, 0, 0x00de, 0, 0x0014, 0, 0x01ca, 0, 0x007b, 0, 0x0079, 0, 0x000e, 0, 0x00e9, 0, 0x0056); - VSET(16, e32, m2); - VLOAD_32(v4, 0xd56bd03a, 0x1036c5ff, 0xaa01847f, 0x988dc35d, 0x4d8615dc, + VSET(16, e32, m8); + VLOAD_32(v16, 0xd56bd03a, 0x1036c5ff, 0xaa01847f, 0x988dc35d, 0x4d8615dc, 0xb62269e2, 0xe842ba75, 0x02fecbf9, 0xe536c712, 0xe93e2160, 0x9ba34297, 0x554d290d, 0x319f668c, 0x0d6c2fbb, 0x6a7eb54a, 0x3fa1cc84); - VLOAD_32(v6, 0x00000025, 0x00000057, 0x0000002e, 0x0000004c, 0x00000052, + VLOAD_32(v24, 0x00000025, 0x00000057, 0x0000002e, 0x0000004c, 0x00000052, 0x00000021, 0x0000001d, 0x0000002f, 0x00000029, 0x00000008, 0x00000015, 0x00000029, 0x00000048, 0x00000051, 0x0000003f, 0x00000007); VLOAD_8(v0, 0xAA, 0xAA); - VCLEAR(v2); - asm volatile("vdivu.vv v2, v4, v6, v0.t"); - VCMP_I32(7, v2, 0, 0x002fb5c5, 0, 0x0201dd84, 0, 0x0584ebef, 0, 0x00105098, 0, + VCLEAR(v8); + asm volatile("vdivu.vv v8, v16, v24, v0.t"); + VCMP_I32(7, v8, 0, 0x002fb5c5, 0, 0x0201dd84, 0, 0x0584ebef, 0, 0x00105098, 0, 0x1d27c42c, 0, 0x02149d19, 0, 0x002a6c0b, 0, 0x09171d37); #if ELEN == 64 - VSET(16, e64, m2); - VLOAD_64(v4, 0xf251717441f02108, 0x2668b5d7f073b9d1, 0x4708b49fd356a60a, + VSET(16, e64, m8); + VLOAD_64(v16, 0xf251717441f02108, 0x2668b5d7f073b9d1, 0x4708b49fd356a60a, 0x195bedcb9ce5956b, 0x41ce2b35b4280568, 0x32e5b89eed21de3b, 0x73f17f3d0e1f243f, 0x0d0aa119af3b0e95, 0x024cbba6ba662213, 0xf7df98f44da5e55f, 0x3cf2951aa7e4c0a9, 0x0ed8987446e84f30, 0xd983a7a0d4c648b4, 0x60eb8249069801a5, 0x529967e8b06df477, 0x776410b4b0cc22ad); - VLOAD_64(v6, 0x000000000000695b, 0x0000000000007420, 0x000000000001850c, + VLOAD_64(v24, 0x000000000000695b, 0x0000000000007420, 0x000000000001850c, 0x000000000000b46f, 0x000000000000e92c, 0x0000000000024e72, 0x0000000000032774, 0x000000000001c36f, 0x00000000000063d2, 0x0000000000037bb4, 0x000000000003692c, 0x000000000001d60c, 0x000000000002cf7b, 0x0000000000037899, 0x0000000000038bcf, 0x000000000003d0e4); VLOAD_8(v0, 0xAA, 0xAA); - VCLEAR(v2); - asm volatile("vdivu.vv v2, v4, v6, v0.t"); - VCMP_I64(8, v2, 0, 0x000054ac6a930494, 0, 0x000023fac7321f85, 0, + VCLEAR(v8); + asm volatile("vdivu.vv v8, v16, v24, v0.t"); + VCMP_I64(8, v8, 0, 0x000054ac6a930494, 0, 0x000023fac7321f85, 0, 0x000016114f5d8a9e, 0, 0x00000765470f410f, 0, 0x0000472988fa89c3, 0, 0x00000815e7b8df73, 0, 0x00001bec0e79307a, 0, 0x00001f497ec0ff30); #endif }; void TEST_CASE3(void) { - VSET(16, e8, m2); - VLOAD_8(v4, 0x0f, 0xbd, 0x0a, 0x58, 0x8e, 0x09, 0xa7, 0x02, 0x4b, 0xe8, 0xd2, + VSET(16, e8, m8); + VLOAD_8(v16, 0x0f, 0xbd, 0x0a, 0x58, 0x8e, 0x09, 0xa7, 0x02, 0x4b, 0xe8, 0xd2, 0xfc, 0xa9, 0x8e, 0x67, 0x49); uint64_t scalar = 5; - asm volatile("vdivu.vx v2, v4, %[A]" ::[A] "r"(scalar)); - VCMP_I8(9, v2, 0x03, 0x25, 0x02, 0x11, 0x1c, 0x01, 0x21, 0x00, 0x0f, 0x2e, + asm volatile("vdivu.vx v8, v16, %[A]" ::[A] "r"(scalar)); + VCMP_I8(9, v8, 0x03, 0x25, 0x02, 0x11, 0x1c, 0x01, 0x21, 0x00, 0x0f, 0x2e, 0x2a, 0x32, 0x21, 0x1c, 0x14, 0x0e); - VSET(16, e16, m2); - VLOAD_16(v4, 0xcf2f, 0x0c02, 0x1417, 0x1747, 0x5e43, 0x9552, 0xe03e, 0x5367, + VSET(16, e16, m8); + VLOAD_16(v16, 0xcf2f, 0x0c02, 0x1417, 0x1747, 0x5e43, 0x9552, 0xe03e, 0x5367, 0xb2f9, 0x09d8, 0x3b19, 0x8ed0, 0x4740, 0xa628, 0x5560, 0x713b); scalar = 538; - asm volatile("vdivu.vx v2, v4, %[A]" ::[A] "r"(scalar)); - VCMP_I16(10, v2, 0x0062, 0x0005, 0x0009, 0x000b, 0x002c, 0x0047, 0x006a, + asm volatile("vdivu.vx v8, v16, %[A]" ::[A] "r"(scalar)); + VCMP_I16(10, v8, 0x0062, 0x0005, 0x0009, 0x000b, 0x002c, 0x0047, 0x006a, 0x0027, 0x0055, 0x0004, 0x001c, 0x0043, 0x0021, 0x004f, 0x0028, 0x0035); - VSET(16, e32, m2); - VLOAD_32(v4, 0x643498d4, 0xe1e4c6d4, 0x2fef6b6f, 0xe68ef651, 0x9943599a, + VSET(16, e32, m8); + VLOAD_32(v16, 0x643498d4, 0xe1e4c6d4, 0x2fef6b6f, 0xe68ef651, 0x9943599a, 0x68af922e, 0x09a3beb2, 0x117ff561, 0x86a1a3f7, 0x03566f4f, 0xc3c0c7de, 0x8cb524f8, 0x532e1652, 0xb0c26bf2, 0x886d0b1c, 0xf94d6b63); scalar = 649; - asm volatile("vdivu.vx v2, v4, %[A]" ::[A] "r"(scalar)); - VCMP_I32(11, v2, 0x002786be, 0x00591abc, 0x0012e87f, 0x005af1c8, 0x003c7480, + asm volatile("vdivu.vx v8, v16, %[A]" ::[A] "r"(scalar)); + VCMP_I32(11, v8, 0x002786be, 0x00591abc, 0x0012e87f, 0x005af1c8, 0x003c7480, 0x00294b2b, 0x0003cd68, 0x0006e722, 0x00351b13, 0x00015108, 0x004d3723, 0x003780a6, 0x0020cf84, 0x0045b92a, 0x0035d049, 0x0062568c); #if ELEN == 64 - VSET(16, e64, m2); - VLOAD_64(v4, 0x751ea878eaf9f85f, 0x9843aba4c3e313af, 0xc2f56d78083dc0f2, + VSET(16, e64, m8); + VLOAD_64(v16, 0x751ea878eaf9f85f, 0x9843aba4c3e313af, 0xc2f56d78083dc0f2, 0x4fcb920a2a4ebc8d, 0x33a71e7364643a7c, 0x76f96f8403af4ad7, 0xcdbbb2002ea6ac93, 0xc380d0b6a5182bcc, 0x93b79fcc64af88cf, 0x85d32b075e613f6c, 0x4f1f75bfa6d8f319, 0xd2a34a8d9a02d7f1, 0x8679a27b237a032e, 0x7e0881a487bbb235, 0x17d97d9849271cec, 0x1c85ac87ba3c7d1e); scalar = 9223; - asm volatile("vdivu.vx v2, v4, %[A]" ::[A] "r"(scalar)); - VCMP_I64(12, v2, 0x000340383152452b, 0x000439f263aaf34a, 0x000569521e089c7c, + asm volatile("vdivu.vx v8, v16, %[A]" ::[A] "r"(scalar)); + VCMP_I64(12, v8, 0x000340383152452b, 0x000439f263aaf34a, 0x000569521e089c7c, 0x0002370079144c76, 0x00016f07b37c5546, 0x00034d65d36c535c, 0x0005b5e194247d88, 0x00056d3090f69ef0, 0x000419a3026cfde7, 0x0003b6ebd974c870, 0x000232398140d5dd, 0x0005d8bb7bec2e99, @@ -177,41 +177,41 @@ void TEST_CASE3(void) { }; void TEST_CASE4(void) { - VSET(16, e8, m2); - VLOAD_8(v4, 0x0f, 0xbd, 0x0a, 0x58, 0x8e, 0x09, 0xa7, 0x02, 0x4b, 0xe8, 0xd2, + VSET(16, e8, m8); + VLOAD_8(v16, 0x0f, 0xbd, 0x0a, 0x58, 0x8e, 0x09, 0xa7, 0x02, 0x4b, 0xe8, 0xd2, 0xfc, 0xa9, 0x8e, 0x67, 0x49); uint64_t scalar = 5; VLOAD_8(v0, 0xAA, 0xAA); - VCLEAR(v2); - asm volatile("vdivu.vx v2, v4, %[A], v0.t" ::[A] "r"(scalar)); - VCMP_I8(13, v2, 0, 0x25, 0, 0x11, 0, 0x01, 0, 0x00, 0, 0x2e, 0, 0x32, 0, 0x1c, + VCLEAR(v8); + asm volatile("vdivu.vx v8, v16, %[A], v0.t" ::[A] "r"(scalar)); + VCMP_I8(13, v8, 0, 0x25, 0, 0x11, 0, 0x01, 0, 0x00, 0, 0x2e, 0, 0x32, 0, 0x1c, 0, 0x0e); - VSET(16, e16, m2); - VLOAD_16(v4, 0xcf2f, 0x0c02, 0x1417, 0x1747, 0x5e43, 0x9552, 0xe03e, 0x5367, + VSET(16, e16, m8); + VLOAD_16(v16, 0xcf2f, 0x0c02, 0x1417, 0x1747, 0x5e43, 0x9552, 0xe03e, 0x5367, 0xb2f9, 0x09d8, 0x3b19, 0x8ed0, 0x4740, 0xa628, 0x5560, 0x713b); scalar = 538; VLOAD_8(v0, 0xAA, 0xAA); - VCLEAR(v2); - asm volatile("vdivu.vx v2, v4, %[A], v0.t" ::[A] "r"(scalar)); - VCMP_I16(14, v2, 0, 0x0005, 0, 0x000b, 0, 0x0047, 0, 0x0027, 0, 0x0004, 0, + VCLEAR(v8); + asm volatile("vdivu.vx v8, v16, %[A], v0.t" ::[A] "r"(scalar)); + VCMP_I16(14, v8, 0, 0x0005, 0, 0x000b, 0, 0x0047, 0, 0x0027, 0, 0x0004, 0, 0x0043, 0, 0x004f, 0, 0x0035); - VSET(16, e32, m2); - VLOAD_32(v4, 0x643498d4, 0xe1e4c6d4, 0x2fef6b6f, 0xe68ef651, 0x9943599a, + VSET(16, e32, m8); + VLOAD_32(v16, 0x643498d4, 0xe1e4c6d4, 0x2fef6b6f, 0xe68ef651, 0x9943599a, 0x68af922e, 0x09a3beb2, 0x117ff561, 0x86a1a3f7, 0x03566f4f, 0xc3c0c7de, 0x8cb524f8, 0x532e1652, 0xb0c26bf2, 0x886d0b1c, 0xf94d6b63); scalar = 649; VLOAD_8(v0, 0xAA, 0xAA); - VCLEAR(v2); - asm volatile("vdivu.vx v2, v4, %[A], v0.t" ::[A] "r"(scalar)); - VCMP_I32(15, v2, 0, 0x00591abc, 0, 0x005af1c8, 0, 0x00294b2b, 0, 0x0006e722, + VCLEAR(v8); + asm volatile("vdivu.vx v8, v16, %[A], v0.t" ::[A] "r"(scalar)); + VCMP_I32(15, v8, 0, 0x00591abc, 0, 0x005af1c8, 0, 0x00294b2b, 0, 0x0006e722, 0, 0x00015108, 0, 0x003780a6, 0, 0x0045b92a, 0, 0x0062568c); #if ELEN == 64 - VSET(16, e64, m2); - VLOAD_64(v4, 0x751ea878eaf9f85f, 0x9843aba4c3e313af, 0xc2f56d78083dc0f2, + VSET(16, e64, m8); + VLOAD_64(v16, 0x751ea878eaf9f85f, 0x9843aba4c3e313af, 0xc2f56d78083dc0f2, 0x4fcb920a2a4ebc8d, 0x33a71e7364643a7c, 0x76f96f8403af4ad7, 0xcdbbb2002ea6ac93, 0xc380d0b6a5182bcc, 0x93b79fcc64af88cf, 0x85d32b075e613f6c, 0x4f1f75bfa6d8f319, 0xd2a34a8d9a02d7f1, @@ -219,9 +219,9 @@ void TEST_CASE4(void) { 0x1c85ac87ba3c7d1e); scalar = 9223; VLOAD_8(v0, 0xAA, 0xAA); - VCLEAR(v2); - asm volatile("vdivu.vx v2, v4, %[A], v0.t" ::[A] "r"(scalar)); - VCMP_I64(16, v2, 0, 0x000439f263aaf34a, 0, 0x0002370079144c76, 0, + VCLEAR(v8); + asm volatile("vdivu.vx v8, v16, %[A], v0.t" ::[A] "r"(scalar)); + VCMP_I64(16, v8, 0, 0x000439f263aaf34a, 0, 0x0002370079144c76, 0, 0x00034d65d36c535c, 0, 0x00056d3090f69ef0, 0, 0x0003b6ebd974c870, 0, 0x0005d8bb7bec2e99, 0, 0x00037f8e5aab0783, 0, 0x0000caab9b4a8885); #endif diff --git a/sw/riscvTests/isa/rv64uv/vfadd.c b/sw/riscvTests/isa/rv64uv/vfadd.c index e3c361a6..0cfac68a 100644 --- a/sw/riscvTests/isa/rv64uv/vfadd.c +++ b/sw/riscvTests/isa/rv64uv/vfadd.c @@ -14,31 +14,31 @@ // Simple random test with similar values + 1 subnormal void TEST_CASE1(void) { - VSET(16, e16, m2); + VSET(16, e16, m8); // -0.8896, -0.3406, 0.7324, -0.6846, -0.2969, -0.7739, 0.5737, // 0.4331, 0.8940, -0.4900, 0.4219, 0.4639, 0.6694, 0.4382, // 0.1356, 0.5337 - VLOAD_16(v4, 0xbb1e, 0xb573, 0x39dc, 0xb97a, 0xb4c0, 0xba31, 0x3897, 0x36ee, + VLOAD_16(v16, 0xbb1e, 0xb573, 0x39dc, 0xb97a, 0xb4c0, 0xba31, 0x3897, 0x36ee, 0x3b27, 0xb7d7, 0x36c0, 0x376c, 0x395b, 0x3703, 0x3057, 0x0001); // -0.8164, 0.6533, -0.4685, 0.6284, 0.1666, 0.9438, 0.0445, // -0.1342, -0.8071, -0.3167, -0.8350, 0.2178, -0.0896, -0.3057, // -0.3064, 0.2073 - VLOAD_16(v6, 0xba88, 0x393a, 0xb77f, 0x3907, 0x3155, 0x3b8d, 0x29b3, 0xb04b, + VLOAD_16(v24, 0xba88, 0x393a, 0xb77f, 0x3907, 0x3155, 0x3b8d, 0x29b3, 0xb04b, 0xba75, 0xb511, 0xbaae, 0x32f8, 0xadbc, 0xb4e4, 0xb4e7, 0x8010); - asm volatile("vfadd.vv v2, v4, v6"); + asm volatile("vfadd.vv v8, v16, v24"); // -1.7061, 0.3127, 0.2639, -0.0562, -0.1302, 0.1699, 0.6182, // 0.2988, 0.0869, -0.8066, -0.4131, 0.6816, 0.5801, 0.1326, // -0.1708, 0.7412 - VCMP_U16(1, v2, 0xbed3, 0x3501, 0x3439, 0xab30, 0xb02b, 0x3170, 0x38f2, + VCMP_U16(1, v8, 0xbed3, 0x3501, 0x3439, 0xab30, 0xb02b, 0x3170, 0x38f2, 0x34c8, 0x2d90, 0xba74, 0xb69c, 0x3974, 0x38a4, 0x303e, 0xb177, 0x800f); - VSET(16, e32, m2); + VSET(16, e32, m8); // -0.28968573, 0.40292332, 0.33936000, 0.53889370, 0.39942014, // -0.27004066, 0.78120714, -0.15632398, -0.49984047, // -0.69259918, -0.03384063, -0.62385744, 0.00338853, 0.33711585, // -0.34673852, 0.11450682 - VLOAD_32(v4, 0xbe9451b0, 0x3ece4bf7, 0x3eadc098, 0x3f09f4f0, 0x3ecc80cc, + VLOAD_32(v16, 0xbe9451b0, 0x3ece4bf7, 0x3eadc098, 0x3f09f4f0, 0x3ecc80cc, 0xbe8a42c5, 0x3f47fd31, 0xbe201365, 0xbeffeb17, 0xbf314e2e, 0xbd0a9c78, 0xbf1fb51f, 0x3b5e1209, 0x3eac9a73, 0xbeb187b6, 0x3dea828d); @@ -46,29 +46,29 @@ void TEST_CASE1(void) { // -0.78492641, -0.44331804, -0.84668529, 0.13981950, 0.84909225, // 0.23569171, 0.34283128, 0.56619811, 0.22596644, 0.55843508, // 0.53194439, 0.02510819 - VLOAD_32(v6, 0xbf1f15f7, 0x3f221093, 0x3e87bc88, 0x3f5b7dc5, 0xbf48f0f0, + VLOAD_32(v24, 0xbf1f15f7, 0x3f221093, 0x3e87bc88, 0x3f5b7dc5, 0xbf48f0f0, 0xbee2fa95, 0xbf58c05e, 0x3e0f2cd8, 0x3f595e1c, 0x3e71592b, 0x3eaf8795, 0x3f10f25c, 0x3e6763bf, 0x3f0ef59a, 0x3f082d82, 0x3ccdafb0); - asm volatile("vfadd.vv v2, v4, v6"); + asm volatile("vfadd.vv v8, v16, v24"); // -0.91111463, 1.03598869, 0.60447025, 1.39628148, // -0.38550627, -0.71335870, -0.06547815, -0.01650448, 0.34925178, // -0.45690745, 0.30899066, -0.05765933, 0.22935496, 0.89555097, // 0.18520588, 0.13961500 - VCMP_U32(2, v2, 0xbf693ecf, 0x3f849b47, 0x3f1abe90, 0x3fb2b95a, 0xbec56114, + VCMP_U32(2, v8, 0xbf693ecf, 0x3f849b47, 0x3f1abe90, 0x3fb2b95a, 0xbec56114, 0xbf369ead, 0xbd861968, 0xbc873468, 0x3eb2d121, 0xbee9efc6, 0x3e9e3406, 0xbd6c2c30, 0x3e6adc07, 0x3f6542d4, 0x3e3da69c, 0x3e0ef73c); #if ELEN == 64 - VSET(16, e64, m2); + VSET(16, e64, m8); // -0.1192486190170796, 0.7099687505713703, -0.6001652243371716, // -0.9559723926483070, 0.7987976623002717, -0.3314459653039117, // 0.7678805321182058, -0.3118871679402779, -0.7580588930783800, // 0.5940681950113129, 0.6471754222100761, 0.4175915562917139, // -0.3690504607938143, 0.0740574148132984, -0.1493616685664843, // 0.3560295367616439 - VLOAD_64(v4, 0xbfbe8713d6c58260, 0x3fe6b810629c5a40, 0xbfe3348db3573060, + VLOAD_64(v16, 0xbfbe8713d6c58260, 0x3fe6b810629c5a40, 0xbfe3348db3573060, 0xbfee97536a49b50a, 0x3fe98fc01d766dee, 0xbfd536692357c5dc, 0x3fe8927a3195d944, 0xbfd3f5f598961d8c, 0xbfe84204b946d5d6, 0x3fe3029b4da55ad8, 0x3fe4b5a93b255a44, 0x3fdab9d1ef56f430, @@ -80,20 +80,20 @@ void TEST_CASE1(void) { // -0.7130864084314152, -0.5516927493459973, -0.4203081001100177, // 0.6487326796833275, -0.5631384800254344, -0.0996872955425372, // -0.4382844162164241 - VLOAD_64(v6, 0xbfe8d9d3f67536d2, 0x3fdad9e3e9cdd5bc, 0xbfd90875fda29450, + VLOAD_64(v24, 0xbfe8d9d3f67536d2, 0x3fdad9e3e9cdd5bc, 0xbfd90875fda29450, 0x3fe62686e0339faa, 0x3fe2208e74273f2c, 0xbfc21587add90b50, 0xbfc7a755744afe30, 0xbfdf67da0cc99808, 0xbfed4488f52c57bc, 0xbfe6d19a966debbe, 0xbfe1a7778d7c344c, 0xbfdae653f20dd9d4, 0x3fe4c26b0962c342, 0xbfe2053afd5a822c, 0xbfb9851b4a2e8ff0, 0xbfdc0cda147fbe5c); - asm volatile("vfadd.vv v2, v4, v6"); + asm volatile("vfadd.vv v8, v16, v24"); // -0.8958389485335123, 1.1295177182420593, -0.9913066367769980, // -0.2637694069859826, 1.3652718395291317, -0.4727280086528298, // 0.5830864096285984, -0.8026007761935372, -1.6726749808525929, // -0.1190182134201023, 0.0954826728640787, -0.0027165438183039, // 0.2796822188895132, -0.4890810652121360, -0.2490489641090214, // -0.0822548794547802 - VCMP_U64(3, v2, 0xbfecaab6714de71e, 0x3ff212812bc1a28f, 0xbfefb8c8b2287a88, + VCMP_U64(3, v8, 0xbfecaab6714de71e, 0x3ff212812bc1a28f, 0xbfefb8c8b2287a88, 0xbfd0e199142c2ac0, 0x3ff5d82748ced68d, 0xbfde412cfa444b84, 0x3fe2a8a4d48319b8, 0xbfe9aee7d2afdaca, 0xbffac346d73996c9, 0xbfbe77fa46448730, 0x3fb8718d6d492fc0, 0xbf6641015b72d200, @@ -105,50 +105,50 @@ void TEST_CASE1(void) { // Simple random test with similar values + 1 subnormal (masked) // The numbers are the same of TEST_CASE1 void TEST_CASE2(void) { - VSET(16, e16, m2); - VLOAD_16(v4, 0xbb1e, 0xb573, 0x39dc, 0xb97a, 0xb4c0, 0xba31, 0x3897, 0x36ee, + VSET(16, e16, m8); + VLOAD_16(v16, 0xbb1e, 0xb573, 0x39dc, 0xb97a, 0xb4c0, 0xba31, 0x3897, 0x36ee, 0x3b27, 0xb7d7, 0x36c0, 0x376c, 0x395b, 0x3703, 0x3057, 0x0001); - VLOAD_16(v6, 0xba88, 0x393a, 0xb77f, 0x3907, 0x3155, 0x3b8d, 0x29b3, 0xb04b, + VLOAD_16(v24, 0xba88, 0x393a, 0xb77f, 0x3907, 0x3155, 0x3b8d, 0x29b3, 0xb04b, 0xba75, 0xb511, 0xbaae, 0x32f8, 0xadbc, 0xb4e4, 0xb4e7, 0x8010); VLOAD_8(v0, 0xAA, 0xAA); - VCLEAR(v2); - asm volatile("vfadd.vv v2, v4, v6, v0.t"); - VCMP_U16(4, v2, 0, 0x3501, 0, 0xab30, 0, 0x3170, 0, 0x34c8, 0, 0xba74, 0, + VCLEAR(v8); + asm volatile("vfadd.vv v8, v16, v24, v0.t"); + VCMP_U16(4, v8, 0, 0x3501, 0, 0xab30, 0, 0x3170, 0, 0x34c8, 0, 0xba74, 0, 0x3974, 0, 0x303e, 0, 0x800f); - VSET(16, e32, m2); - VLOAD_32(v4, 0xbe9451b0, 0x3ece4bf7, 0x3eadc098, 0x3f09f4f0, 0x3ecc80cc, + VSET(16, e32, m8); + VLOAD_32(v16, 0xbe9451b0, 0x3ece4bf7, 0x3eadc098, 0x3f09f4f0, 0x3ecc80cc, 0xbe8a42c5, 0x3f47fd31, 0xbe201365, 0xbeffeb17, 0xbf314e2e, 0xbd0a9c78, 0xbf1fb51f, 0x3b5e1209, 0x3eac9a73, 0xbeb187b6, 0x3dea828d); - VLOAD_32(v6, 0xbf1f15f7, 0x3f221093, 0x3e87bc88, 0x3f5b7dc5, 0xbf48f0f0, + VLOAD_32(v24, 0xbf1f15f7, 0x3f221093, 0x3e87bc88, 0x3f5b7dc5, 0xbf48f0f0, 0xbee2fa95, 0xbf58c05e, 0x3e0f2cd8, 0x3f595e1c, 0x3e71592b, 0x3eaf8795, 0x3f10f25c, 0x3e6763bf, 0x3f0ef59a, 0x3f082d82, 0x3ccdafb0); VLOAD_8(v0, 0xAA, 0xAA); - VCLEAR(v2); - asm volatile("vfadd.vv v2, v4, v6, v0.t"); - VCMP_U32(5, v2, 0, 0x3f849b47, 0, 0x3fb2b95a, 0, 0xbf369ead, 0, 0xbc873468, 0, + VCLEAR(v8); + asm volatile("vfadd.vv v8, v16, v24, v0.t"); + VCMP_U32(5, v8, 0, 0x3f849b47, 0, 0x3fb2b95a, 0, 0xbf369ead, 0, 0xbc873468, 0, 0xbee9efc6, 0, 0xbd6c2c30, 0, 0x3f6542d4, 0, 0x3e0ef73c); #if ELEN == 64 - VSET(16, e64, m2); - VLOAD_64(v4, 0xbfbe8713d6c58260, 0x3fe6b810629c5a40, 0xbfe3348db3573060, + VSET(16, e64, m8); + VLOAD_64(v16, 0xbfbe8713d6c58260, 0x3fe6b810629c5a40, 0xbfe3348db3573060, 0xbfee97536a49b50a, 0x3fe98fc01d766dee, 0xbfd536692357c5dc, 0x3fe8927a3195d944, 0xbfd3f5f598961d8c, 0xbfe84204b946d5d6, 0x3fe3029b4da55ad8, 0x3fe4b5a93b255a44, 0x3fdab9d1ef56f430, 0xbfd79e85d2ebb8f0, 0x3fb2f56d3ea64090, 0xbfc31e487ce26ff0, 0x3fd6c9301c334858); - VLOAD_64(v6, 0xbfe8d9d3f67536d2, 0x3fdad9e3e9cdd5bc, 0xbfd90875fda29450, + VLOAD_64(v24, 0xbfe8d9d3f67536d2, 0x3fdad9e3e9cdd5bc, 0xbfd90875fda29450, 0x3fe62686e0339faa, 0x3fe2208e74273f2c, 0xbfc21587add90b50, 0xbfc7a755744afe30, 0xbfdf67da0cc99808, 0xbfed4488f52c57bc, 0xbfe6d19a966debbe, 0xbfe1a7778d7c344c, 0xbfdae653f20dd9d4, 0x3fe4c26b0962c342, 0xbfe2053afd5a822c, 0xbfb9851b4a2e8ff0, 0xbfdc0cda147fbe5c); VLOAD_8(v0, 0xAA, 0xAA); - VCLEAR(v2); - asm volatile("vfadd.vv v2, v4, v6, v0.t"); - VCMP_U64(6, v2, 0, 0x3ff212812bc1a28f, 0, 0xbfd0e199142c2ac0, 0, + VCLEAR(v8); + asm volatile("vfadd.vv v8, v16, v24, v0.t"); + VCMP_U64(6, v8, 0, 0x3ff212812bc1a28f, 0, 0xbfd0e199142c2ac0, 0, 0xbfde412cfa444b84, 0, 0xbfe9aee7d2afdaca, 0, 0xbfbe77fa46448730, 0, 0xbf6641015b72d200, 0, 0xbfdf4d1aab0b7434, 0, 0xbfb50ea7e131d810); #endif @@ -156,33 +156,33 @@ void TEST_CASE2(void) { // Edge-case tests void TEST_CASE3(void) { - VSET(16, e16, m2); - VLOAD_16(v4, pInfh, pInfh, mInfh, qNaNh, pMaxh, pMaxh, pZero, mZeroh, pZero, + VSET(16, e16, m8); + VLOAD_16(v16, pInfh, pInfh, mInfh, qNaNh, pMaxh, pMaxh, pZero, mZeroh, pZero, pMaxh, pZero, qNaNh, mInfh, pInfh, qNaNh, qNaNh); - VLOAD_16(v6, mInfh, pInfh, mInfh, pZero, pMaxh, mMaxh, pZero, mZeroh, mZeroh, + VLOAD_16(v24, mInfh, pInfh, mInfh, pZero, pMaxh, mMaxh, pZero, mZeroh, mZeroh, mZeroh, mMaxh, 0x1, 0xba88, pZero, qNaNh, 0xba88); - asm volatile("vfadd.vv v2, v4, v6"); - VCMP_U16(7, v2, qNaNh, pInfh, mInfh, qNaNh, pInfh, pZero, pZero, mZeroh, + asm volatile("vfadd.vv v8, v16, v24"); + VCMP_U16(7, v8, qNaNh, pInfh, mInfh, qNaNh, pInfh, pZero, pZero, mZeroh, pZero, pMaxh, mMaxh, qNaNh, mInfh, pInfh, qNaNh, qNaNh); - VSET(16, e32, m2); - VLOAD_32(v4, pInff, pInff, mInff, qNaNf, pMaxf, pMaxf, pZero, mZerof, pZero, + VSET(16, e32, m8); + VLOAD_32(v16, pInff, pInff, mInff, qNaNf, pMaxf, pMaxf, pZero, mZerof, pZero, pMaxf, pZero, qNaNf, mInff, pInff, qNaNf, qNaNf); - VLOAD_32(v6, mInff, pInff, mInff, pZero, pMaxf, mMaxf, pZero, mZerof, mZerof, + VLOAD_32(v24, mInff, pInff, mInff, pZero, pMaxf, mMaxf, pZero, mZerof, mZerof, mZerof, mMaxf, 0x1, 0xbf48f0f0, pZero, qNaNf, 0xbf48f0f0); - asm volatile("vfadd.vv v2, v4, v6"); - VCMP_U32(8, v2, qNaNf, pInff, mInff, qNaNf, pInff, pZero, pZero, mZerof, + asm volatile("vfadd.vv v8, v16, v24"); + VCMP_U32(8, v8, qNaNf, pInff, mInff, qNaNf, pInff, pZero, pZero, mZerof, pZero, pMaxf, mMaxf, qNaNf, mInff, pInff, qNaNf, qNaNf); #if ELEN == 64 - VSET(16, e64, m2); - VLOAD_64(v4, pInfd, pInfd, mInfd, qNaNd, pMaxd, pMaxd, pZero, mZerod, pZero, + VSET(16, e64, m8); + VLOAD_64(v16, pInfd, pInfd, mInfd, qNaNd, pMaxd, pMaxd, pZero, mZerod, pZero, pMaxd, pZero, qNaNd, mInfd, pInfd, qNaNd, qNaNd); - VLOAD_64(v6, mInfd, pInfd, mInfd, pZero, pMaxd, mMaxd, pZero, mZerod, mZerod, + VLOAD_64(v24, mInfd, pInfd, mInfd, pZero, pMaxd, mMaxd, pZero, mZerod, mZerod, mZerod, mMaxd, 0x1, 0xbfd90875fda29450, pZero, qNaNd, 0xbfd90875fda29450); - asm volatile("vfadd.vv v2, v4, v6"); - VCMP_U64(9, v2, qNaNd, pInfd, mInfd, qNaNd, pInfd, pZero, pZero, mZerod, + asm volatile("vfadd.vv v8, v16, v24"); + VCMP_U64(9, v8, qNaNd, pInfd, mInfd, qNaNd, pInfd, pZero, pZero, mZerod, pZero, pMaxd, mMaxd, qNaNd, mInfd, pInfd, qNaNd, qNaNd); #endif }; @@ -193,39 +193,39 @@ void TEST_CASE3(void) { void TEST_CASE4(void) { // Overflow + Inexact CLEAR_FFLAGS; - VSET(16, e16, m2); + VSET(16, e16, m8); CHECK_FFLAGS(0); - VLOAD_16(v4, pMaxh, pMaxh, pMaxh, pMaxh, pMaxh, pMaxh, pMaxh, pMaxh, pMaxh, + VLOAD_16(v16, pMaxh, pMaxh, pMaxh, pMaxh, pMaxh, pMaxh, pMaxh, pMaxh, pMaxh, pMaxh, pMaxh, pMaxh, pMaxh, pMaxh, pMaxh, pMaxh); - VLOAD_16(v6, pMaxh, pMaxh, pMaxh, pMaxh, pMaxh, pMaxh, pMaxh, pMaxh, pMaxh, + VLOAD_16(v24, pMaxh, pMaxh, pMaxh, pMaxh, pMaxh, pMaxh, pMaxh, pMaxh, pMaxh, pMaxh, pMaxh, pMaxh, pMaxh, pMaxh, pMaxh, pMaxh); - asm volatile("vfadd.vv v2, v4, v6"); - VCMP_U16(10, v2, pInfh, pInfh, pInfh, pInfh, pInfh, pInfh, pInfh, pInfh, + asm volatile("vfadd.vv v8, v16, v24"); + VCMP_U16(10, v8, pInfh, pInfh, pInfh, pInfh, pInfh, pInfh, pInfh, pInfh, pInfh, pInfh, pInfh, pInfh, pInfh, pInfh, pInfh, pInfh); // Invalid operation, overflow CLEAR_FFLAGS; - VSET(16, e32, m2); + VSET(16, e32, m8); CHECK_FFLAGS(0); - VLOAD_32(v4, pInff, pInff, pInff, pInff, pInff, pInff, pInff, pInff, pInff, + VLOAD_32(v16, pInff, pInff, pInff, pInff, pInff, pInff, pInff, pInff, pInff, pInff, pInff, pInff, pInff, pInff, pInff, pInff); - VLOAD_32(v6, mInff, mInff, mInff, mInff, mInff, mInff, mInff, mInff, mInff, + VLOAD_32(v24, mInff, mInff, mInff, mInff, mInff, mInff, mInff, mInff, mInff, mInff, mInff, mInff, mInff, mInff, mInff, mInff); - asm volatile("vfadd.vv v2, v4, v6"); - VCMP_U32(11, v2, qNaNf, qNaNf, qNaNf, qNaNf, qNaNf, qNaNf, qNaNf, qNaNf, + asm volatile("vfadd.vv v8, v16, v24"); + VCMP_U32(11, v8, qNaNf, qNaNf, qNaNf, qNaNf, qNaNf, qNaNf, qNaNf, qNaNf, qNaNf, qNaNf, qNaNf, qNaNf, qNaNf, qNaNf, qNaNf, qNaNf); #if ELEN == 64 // Invalid operation, overflow, inexact CLEAR_FFLAGS; - VSET(16, e64, m2); + VSET(16, e64, m8); CHECK_FFLAGS(0); - VLOAD_64(v4, pMaxd, pInfd, pMaxd, pMaxd, pMaxd, pMaxd, pMaxd, pMaxd, pMaxd, + VLOAD_64(v16, pMaxd, pInfd, pMaxd, pMaxd, pMaxd, pMaxd, pMaxd, pMaxd, pMaxd, pMaxd, pMaxd, pMaxd, pMaxd, pMaxd, pMaxd, pMaxd); - VLOAD_64(v6, pMaxd, mInfd, 8000000000000001, pMaxd, pMaxd, pMaxd, pMaxd, + VLOAD_64(v24, pMaxd, mInfd, 8000000000000001, pMaxd, pMaxd, pMaxd, pMaxd, pMaxd, pMaxd, pMaxd, pMaxd, pMaxd, pMaxd, pMaxd, pMaxd, pMaxd); - asm volatile("vfadd.vv v2, v4, v6"); - VCMP_U64(12, v2, pInfd, qNaNd, pMaxd, pInfd, pInfd, pInfd, pInfd, pInfd, + asm volatile("vfadd.vv v8, v16, v24"); + VCMP_U64(12, v8, pInfd, qNaNd, pMaxd, pInfd, pInfd, pInfd, pInfd, pInfd, pInfd, pInfd, pInfd, pInfd, pInfd, pInfd, pInfd, pInfd); #endif }; @@ -233,97 +233,97 @@ void TEST_CASE4(void) { // Different rounding-mode + Back-to-back rm change and vfp operation // Index 12 (starting from 0) rounds differently for RNE and RTZ void TEST_CASE5(void) { - VSET(16, e16, m2); + VSET(16, e16, m8); // -0.8896, -0.3406, 0.7324, -0.6846, -0.2969, -0.7739, 0.5737, // 0.4331, 0.8940, -0.4900, 0.4219, 0.4639, 0.6694, 0.4382, // 0.1356, 0.5337 - VLOAD_16(v4, 0xbb1e, 0xb573, 0x39dc, 0xb97a, 0xb4c0, 0xba31, 0x3897, 0x36ee, + VLOAD_16(v16, 0xbb1e, 0xb573, 0x39dc, 0xb97a, 0xb4c0, 0xba31, 0x3897, 0x36ee, 0x3b27, 0xb7d7, 0x36c0, 0x376c, 0x395b, 0x3703, 0x3057, 0x0001); // -0.8164, 0.6533, -0.4685, 0.6284, 0.1666, 0.9438, 0.0445, // -0.1342, -0.8071, -0.3167, -0.8350, 0.2178, -0.0896, -0.3057, // -0.3064, 0.2073 - VLOAD_16(v6, 0xba88, 0x393a, 0xb77f, 0x3907, 0x3155, 0x3b8d, 0x29b3, 0xb04b, + VLOAD_16(v24, 0xba88, 0x393a, 0xb77f, 0x3907, 0x3155, 0x3b8d, 0x29b3, 0xb04b, 0xba75, 0xb511, 0xbaae, 0x32f8, 0xadbc, 0xb4e4, 0xb4e7, 0x8010); CHANGE_RM(RM_RTZ); - asm volatile("vfadd.vv v2, v4, v6"); + asm volatile("vfadd.vv v8, v16, v24"); // -1.7061, 0.3127, 0.2639, -0.0562, -0.1302, 0.1699, 0.6182, // 0.2988, 0.0869, -0.8066, -0.4131, 0.6816, 0.5801, 0.1326, // -0.1708, 0.7412 - VCMP_U16(13, v2, 0xbed3, 0x3501, 0x3439, 0xab30, 0xb02b, 0x3170, 0x38f2, + VCMP_U16(13, v8, 0xbed3, 0x3501, 0x3439, 0xab30, 0xb02b, 0x3170, 0x38f2, 0x34c8, 0x2d90, 0xba74, 0xb69c, 0x3974, 0x38a3, 0x303e, 0xb177, 0x800f); - VSET(16, e16, m2); + VSET(16, e16, m8); // -0.8896, -0.3406, 0.7324, -0.6846, -0.2969, -0.7739, 0.5737, // 0.4331, 0.8940, -0.4900, 0.4219, 0.4639, 0.6694, 0.4382, // 0.1356, 0.5337 - VLOAD_16(v4, 0xbb1e, 0xb573, 0x39dc, 0xb97a, 0xb4c0, 0xba31, 0x3897, 0x36ee, + VLOAD_16(v16, 0xbb1e, 0xb573, 0x39dc, 0xb97a, 0xb4c0, 0xba31, 0x3897, 0x36ee, 0x3b27, 0xb7d7, 0x36c0, 0x376c, 0x395b, 0x3703, 0x3057, 0x0001); // -0.8164, 0.6533, -0.4685, 0.6284, 0.1666, 0.9438, 0.0445, // -0.1342, -0.8071, -0.3167, -0.8350, 0.2178, -0.0896, -0.3057, // -0.3064, 0.2073 - VLOAD_16(v6, 0xba88, 0x393a, 0xb77f, 0x3907, 0x3155, 0x3b8d, 0x29b3, 0xb04b, + VLOAD_16(v24, 0xba88, 0x393a, 0xb77f, 0x3907, 0x3155, 0x3b8d, 0x29b3, 0xb04b, 0xba75, 0xb511, 0xbaae, 0x32f8, 0xadbc, 0xb4e4, 0xb4e7, 0x8010); CHANGE_RM(RM_RNE); - asm volatile("vfadd.vv v2, v4, v6"); + asm volatile("vfadd.vv v8, v16, v24"); // -1.7061, 0.3127, 0.2639, -0.0562, -0.1302, 0.1699, 0.6182, // 0.2988, 0.0869, -0.8066, -0.4131, 0.6816, 0.5801, 0.1326, // -0.1708, 0.7412 - VCMP_U16(14, v2, 0xbed3, 0x3501, 0x3439, 0xab30, 0xb02b, 0x3170, 0x38f2, + VCMP_U16(14, v8, 0xbed3, 0x3501, 0x3439, 0xab30, 0xb02b, 0x3170, 0x38f2, 0x34c8, 0x2d90, 0xba74, 0xb69c, 0x3974, 0x38a4, 0x303e, 0xb177, 0x800f); }; // Simple random test with similar values (vector-scalar) void TEST_CASE6(void) { - VSET(16, e16, m2); + VSET(16, e16, m8); // -0.1481, -0.1797, -0.5454, 0.3228, 0.3237, -0.7212, -0.5195, // -0.4500, 0.2681, 0.7300, 0.5059, 0.5830, 0.3198, -0.1713, // -0.6431, 0.4841 - VLOAD_16(v4, 0xb0bd, 0xb1c0, 0xb85d, 0x352a, 0x352e, 0xb9c5, 0xb828, 0xb733, + VLOAD_16(v16, 0xb0bd, 0xb1c0, 0xb85d, 0x352a, 0x352e, 0xb9c5, 0xb828, 0xb733, 0x344a, 0x39d7, 0x380c, 0x38aa, 0x351e, 0xb17b, 0xb925, 0x37bf); float fscalar_16; // -0.9380 BOX_HALF_IN_FLOAT(fscalar_16, 0xbb81); - asm volatile("vfadd.vf v2, v4, %[A]" ::[A] "f"(fscalar_16)); + asm volatile("vfadd.vf v8, v16, %[A]" ::[A] "f"(fscalar_16)); // -1.0859, -1.1172, -1.4834, -0.6152, -0.6143, -1.6592, // -1.4570, -1.3877, -0.6699, -0.2080, -0.4321, -0.3550, // -0.6182, -1.1094, -1.5811, -0.4539 - VCMP_U16(15, v2, 0xbc58, 0xbc78, 0xbdef, 0xb8ec, 0xb8ea, 0xbea3, 0xbdd4, + VCMP_U16(15, v8, 0xbc58, 0xbc78, 0xbdef, 0xb8ec, 0xb8ea, 0xbea3, 0xbdd4, 0xbd8d, 0xb95c, 0xb2a8, 0xb6ea, 0xb5ae, 0xb8f2, 0xbc70, 0xbe53, 0xb743); - VSET(16, e32, m2); + VSET(16, e32, m8); // 0.86539453, -0.53925377, -0.47128764, 0.99265540, // 0.32128176, -0.47335613, -0.30028856, 0.44394016, // -0.72540921, -0.26464799, 0.77351445, -0.21725702, // -0.25191557, -0.53123665, 0.80404943, 0.81841671 - VLOAD_32(v4, 0x3f5d8a7f, 0xbf0a0c89, 0xbef14c9d, 0x3f7e1eaa, 0x3ea47f0b, + VLOAD_32(v16, 0x3f5d8a7f, 0xbf0a0c89, 0xbef14c9d, 0x3f7e1eaa, 0x3ea47f0b, 0xbef25bbc, 0xbe99bf6c, 0x3ee34c20, 0xbf39b46b, 0xbe877ff1, 0x3f46050b, 0xbe5e78a0, 0xbe80fb14, 0xbf07ff20, 0x3f4dd62f, 0x3f5183c2); float fscalar_32; // -0.96056187 BOX_FLOAT_IN_FLOAT(fscalar_32, 0xbf75e762); - asm volatile("vfadd.vf v2, v4, %[A]" ::[A] "f"(fscalar_32)); + asm volatile("vfadd.vf v8, v16, %[A]" ::[A] "f"(fscalar_32)); // -0.09516734, -1.49981570, -1.43184948, 0.03209352, // -0.63928008, -1.43391800, -1.26085043, -0.51662171, // -1.68597102, -1.22520983, -0.18704742, -1.17781889, // -1.21247745, -1.49179852, -0.15651244, -0.14214516 - VCMP_U32(16, v2, 0xbdc2e718, 0xbfbff9f6, 0xbfb746d8, 0x3d037480, 0xbf23a7dc, + VCMP_U32(16, v8, 0xbdc2e718, 0xbfbff9f6, 0xbfb746d8, 0x3d037480, 0xbf23a7dc, 0xbfb78aa0, 0xbfa1638c, 0xbf044152, 0xbfd7cde6, 0xbf9cd3ad, 0xbe3f895c, 0xbf96c2c5, 0xbf9b3276, 0xbfbef341, 0xbe2044cc, 0xbe118e80); #if ELEN == 64 - VSET(16, e64, m2); + VSET(16, e64, m8); // -0.3488917150781869, -0.4501495513738740, 0.8731197104152684, // 0.3256432550932964, 0.6502591178769535, -0.3169358689246526, // -0.5396694979141685, -0.5417807430937591, // -0.7971574213160249, -0.1764794100111047, 0.3564275916066595, // -0.3754449946313438, 0.6580947137446858, // -0.3328857144699515, 0.1761214464164236, 0.1429774118511240 - VLOAD_64(v4, 0xbfd6543dea86cb60, 0xbfdccf40105d6e5c, 0x3febf098bf37400c, + VLOAD_64(v16, 0xbfd6543dea86cb60, 0xbfdccf40105d6e5c, 0x3febf098bf37400c, 0x3fd4d756ceb279f4, 0x3fe4ceec35a6a266, 0xbfd448ad61fd7c88, 0xbfe144f8f7861540, 0xbfe1564491a616b8, 0xbfe9825047ca1cd6, 0xbfc696e097352100, 0x3fd6cfb5ac55edec, 0xbfd8074a7158dd78, @@ -332,14 +332,14 @@ void TEST_CASE6(void) { double dscalar_64; // 0.9108707261227378 BOX_DOUBLE_IN_DOUBLE(dscalar_64, 0x3fed25da5d7296fe); - asm volatile("vfadd.vf v2, v4, %[A]" ::[A] "f"(dscalar_64)); + asm volatile("vfadd.vf v8, v16, %[A]" ::[A] "f"(dscalar_64)); // 0.5619790110445508, 0.4607211747488638, 1.7839904365380062, // 1.2365139812160342, 1.5611298439996912, 0.5939348571980851, // 0.3712012282085693, 0.3690899830289787, 0.1137133048067129, // 0.7343913161116331, 1.2672983177293973, 0.5354257314913939, // 1.5689654398674235, 0.5779850116527863, 1.0869921725391614, // 1.0538481379738618 - VCMP_U64(17, v2, 0x3fe1fbbb682f314e, 0x3fdd7c74aa87bfa0, 0x3ffc8b398e54eb85, + VCMP_U64(17, v8, 0x3fe1fbbb682f314e, 0x3fdd7c74aa87bfa0, 0x3ffc8b398e54eb85, 0x3ff3c8c2e265e9fc, 0x3ff8fa63498c9cb2, 0x3fe30183ac73d8ba, 0x3fd7c1c2cbd9037c, 0x3fd79f2b9799008c, 0x3fbd1c50ad43d140, 0x3fe7802237a54ebe, 0x3ff446da99cec6fa, 0x3fe1223524c62842, @@ -350,33 +350,33 @@ void TEST_CASE6(void) { // Simple random test with similar values (vector-scalar) (masked) void TEST_CASE7(void) { - VSET(16, e16, m2); - VLOAD_16(v4, 0xb0bd, 0xb1c0, 0xb85d, 0x352a, 0x352e, 0xb9c5, 0xb828, 0xb733, + VSET(16, e16, m8); + VLOAD_16(v16, 0xb0bd, 0xb1c0, 0xb85d, 0x352a, 0x352e, 0xb9c5, 0xb828, 0xb733, 0x344a, 0x39d7, 0x380c, 0x38aa, 0x351e, 0xb17b, 0xb925, 0x37bf); float fscalar_16; BOX_HALF_IN_FLOAT(fscalar_16, 0xbb81); VLOAD_8(v0, 0xAA, 0xAA); - VCLEAR(v2); - asm volatile("vfadd.vf v2, v4, %[A], v0.t" ::[A] "f"(fscalar_16)); - VCMP_U16(18, v2, 0, 0xbc78, 0, 0xb8ec, 0, 0xbea3, 0, 0xbd8d, 0, 0xb2a8, 0, + VCLEAR(v8); + asm volatile("vfadd.vf v8, v16, %[A], v0.t" ::[A] "f"(fscalar_16)); + VCMP_U16(18, v8, 0, 0xbc78, 0, 0xb8ec, 0, 0xbea3, 0, 0xbd8d, 0, 0xb2a8, 0, 0xb5ae, 0, 0xbc70, 0, 0xb743); - VSET(16, e32, m2); - VLOAD_32(v4, 0x3f5d8a7f, 0xbf0a0c89, 0xbef14c9d, 0x3f7e1eaa, 0x3ea47f0b, + VSET(16, e32, m8); + VLOAD_32(v16, 0x3f5d8a7f, 0xbf0a0c89, 0xbef14c9d, 0x3f7e1eaa, 0x3ea47f0b, 0xbef25bbc, 0xbe99bf6c, 0x3ee34c20, 0xbf39b46b, 0xbe877ff1, 0x3f46050b, 0xbe5e78a0, 0xbe80fb14, 0xbf07ff20, 0x3f4dd62f, 0x3f5183c2); float fscalar_32; BOX_FLOAT_IN_FLOAT(fscalar_32, 0xbf75e762); VLOAD_8(v0, 0xAA, 0xAA); - VCLEAR(v2); - asm volatile("vfadd.vf v2, v4, %[A], v0.t" ::[A] "f"(fscalar_32)); - VCMP_U32(19, v2, 0, 0xbfbff9f6, 0, 0x3d037480, 0, 0xbfb78aa0, 0, 0xbf044152, + VCLEAR(v8); + asm volatile("vfadd.vf v8, v16, %[A], v0.t" ::[A] "f"(fscalar_32)); + VCMP_U32(19, v8, 0, 0xbfbff9f6, 0, 0x3d037480, 0, 0xbfb78aa0, 0, 0xbf044152, 0, 0xbf9cd3ad, 0, 0xbf96c2c5, 0, 0xbfbef341, 0, 0xbe118e80); #if ELEN == 64 - VSET(16, e64, m2); - VLOAD_64(v4, 0xbfd6543dea86cb60, 0xbfdccf40105d6e5c, 0x3febf098bf37400c, + VSET(16, e64, m8); + VLOAD_64(v16, 0xbfd6543dea86cb60, 0xbfdccf40105d6e5c, 0x3febf098bf37400c, 0x3fd4d756ceb279f4, 0x3fe4ceec35a6a266, 0xbfd448ad61fd7c88, 0xbfe144f8f7861540, 0xbfe1564491a616b8, 0xbfe9825047ca1cd6, 0xbfc696e097352100, 0x3fd6cfb5ac55edec, 0xbfd8074a7158dd78, @@ -385,9 +385,9 @@ void TEST_CASE7(void) { double dscalar_64; BOX_DOUBLE_IN_DOUBLE(dscalar_64, 0x3fed25da5d7296fe); VLOAD_8(v0, 0xAA, 0xAA); - VCLEAR(v2); - asm volatile("vfadd.vf v2, v4, %[A], v0.t" ::[A] "f"(dscalar_64)); - VCMP_U64(20, v2, 0, 0x3fdd7c74aa87bfa0, 0, 0x3ff3c8c2e265e9fc, 0, + VCLEAR(v8); + asm volatile("vfadd.vf v8, v16, %[A], v0.t" ::[A] "f"(dscalar_64)); + VCMP_U64(20, v8, 0, 0x3fdd7c74aa87bfa0, 0, 0x3ff3c8c2e265e9fc, 0, 0x3fe30183ac73d8ba, 0, 0x3fd79f2b9799008c, 0, 0x3fe7802237a54ebe, 0, 0x3fe1223524c62842, 0, 0x3fe27eda6c540f88, 0, 0x3ff0dc8fdd78c58f); #endif @@ -397,45 +397,45 @@ void TEST_CASE7(void) { void TEST_CASE8(void) { // Overflow and Inexact. Invalid operation should not be raised. CLEAR_FFLAGS; - VSET(16, e16, m2); + VSET(16, e16, m8); CHECK_FFLAGS(0); - VLOAD_16(v4, pInfh, pMaxh, pInfh, pMaxh, pInfh, pMaxh, pInfh, pMaxh, pInfh, + VLOAD_16(v16, pInfh, pMaxh, pInfh, pMaxh, pInfh, pMaxh, pInfh, pMaxh, pInfh, pMaxh, pInfh, pMaxh, pInfh, pMaxh, pInfh, pMaxh); - VLOAD_16(v6, mInfh, pMaxh, mInfh, pMaxh, mInfh, pMaxh, mInfh, pMaxh, mInfh, + VLOAD_16(v24, mInfh, pMaxh, mInfh, pMaxh, mInfh, pMaxh, mInfh, pMaxh, mInfh, pMaxh, mInfh, pMaxh, mInfh, pMaxh, mInfh, pMaxh); VLOAD_8(v0, 0xAA, 0xAA); - VCLEAR(v2); - asm volatile("vfadd.vv v2, v4, v6, v0.t"); - VCMP_U16(21, v2, 0, pInfh, 0, pInfh, 0, pInfh, 0, pInfh, 0, pInfh, 0, pInfh, + VCLEAR(v8); + asm volatile("vfadd.vv v8, v16, v24, v0.t"); + VCMP_U16(21, v8, 0, pInfh, 0, pInfh, 0, pInfh, 0, pInfh, 0, pInfh, 0, pInfh, 0, pInfh, 0, pInfh); // Invalid operation. Overflow and Inexact should not be raised. CLEAR_FFLAGS; - VSET(16, e32, m2); + VSET(16, e32, m8); CHECK_FFLAGS(0); - VLOAD_32(v4, pMaxf, pInff, pMaxf, pInff, pMaxf, pInff, pMaxf, pInff, pMaxf, + VLOAD_32(v16, pMaxf, pInff, pMaxf, pInff, pMaxf, pInff, pMaxf, pInff, pMaxf, pInff, pMaxf, pInff, pMaxf, pInff, pMaxf, pInff); - VLOAD_32(v6, pMaxf, mInff, pMaxf, mInff, pMaxf, mInff, pMaxf, mInff, pMaxf, + VLOAD_32(v24, pMaxf, mInff, pMaxf, mInff, pMaxf, mInff, pMaxf, mInff, pMaxf, mInff, pMaxf, mInff, pMaxf, mInff, pMaxf, mInff); VLOAD_8(v0, 0xAA, 0xAA); - VCLEAR(v2); - asm volatile("vfadd.vv v2, v4, v6, v0.t"); - VCMP_U32(22, v2, 0, qNaNf, 0, qNaNf, 0, qNaNf, 0, qNaNf, 0, qNaNf, 0, qNaNf, + VCLEAR(v8); + asm volatile("vfadd.vv v8, v16, v24, v0.t"); + VCMP_U32(22, v8, 0, qNaNf, 0, qNaNf, 0, qNaNf, 0, qNaNf, 0, qNaNf, 0, qNaNf, 0, qNaNf, 0, qNaNf); #if ELEN == 64 // No exception should be raised CLEAR_FFLAGS; - VSET(16, e64, m2); + VSET(16, e64, m8); CHECK_FFLAGS(0); - VLOAD_64(v4, pMaxd, 0, pInfd, 0, pMaxd, 0, pMaxd, 0, pMaxd, 0, pMaxd, 0, + VLOAD_64(v16, pMaxd, 0, pInfd, 0, pMaxd, 0, pMaxd, 0, pMaxd, 0, pMaxd, 0, pMaxd, 0, pMaxd, 0); - VLOAD_64(v6, pMaxd, 0, mInfd, 0, pMaxd, 0, pMaxd, 0, pMaxd, 0, pMaxd, 0, + VLOAD_64(v24, pMaxd, 0, mInfd, 0, pMaxd, 0, pMaxd, 0, pMaxd, 0, pMaxd, 0, pMaxd, 0, pMaxd, 0); VLOAD_8(v0, 0xAA, 0xAA); - VCLEAR(v2); - asm volatile("vfadd.vv v2, v4, v6, v0.t"); - VCMP_U64(23, v2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); + VCLEAR(v8); + asm volatile("vfadd.vv v8, v16, v24, v0.t"); + VCMP_U64(23, v8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); #endif }; diff --git a/sw/riscvTests/isa/rv64uv/vfcvt.c b/sw/riscvTests/isa/rv64uv/vfcvt.c index f4c89e8a..61f7949a 100644 --- a/sw/riscvTests/isa/rv64uv/vfcvt.c +++ b/sw/riscvTests/isa/rv64uv/vfcvt.c @@ -15,59 +15,59 @@ //////////////// void TEST_CASE1(void) { - VSET(16, e16, m2); + VSET(16, e16, m8); // -2372.000, 2978.000, -5132.000, -3426.000, -3878.000, // 9680.000, 76.000, -8128.000, -2314.000, -4660.000, 8672.000, // 8824.000, -5732.000, -1557.000, -2302.000, -407.250 - VLOAD_16(v4, 0xe8a2, 0x69d1, 0xed03, 0xeab1, 0xeb93, 0x70ba, 0x54c0, 0xeff0, + VLOAD_16(v8, 0xe8a2, 0x69d1, 0xed03, 0xeab1, 0xeb93, 0x70ba, 0x54c0, 0xeff0, 0xe885, 0xec8d, 0x703c, 0x704f, 0xed99, 0xe615, 0xe87f, 0xde5d); - asm volatile("vfcvt.xu.f.v v6, v4"); + asm volatile("vfcvt.xu.f.v v16, v8"); // 0, 2978, 0, 0, 0, // 9680, 76, 0, 0, 0, // 8672, 8824, 0, 0, 0, // 0 - VCMP_U16(1, v6, 0x0000, 0x0ba2, 0x0000, 0x0000, 0x0000, 0x25d0, 0x004c, + VCMP_U16(1, v16, 0x0000, 0x0ba2, 0x0000, 0x0000, 0x0000, 0x25d0, 0x004c, 0x0000, 0x0000, 0x0000, 0x21e0, 0x2278, 0x0000, 0x0000, 0x0000, 0x0000); - VSET(16, e32, m2); + VSET(16, e32, m8); // -82436.352, -5427.481, -30119.082, 53784.066, 76500.719, // 65152.020, -94151.375, 71894.320, -20547.545, 95485.906, // 92834.711, -28081.711, -9716.506, 62508.508, 90410.883, // 42708.285 - VLOAD_32(v4, 0xc7a1022d, 0xc5a99bd9, 0xc6eb4e2a, 0x47521811, 0x47956a5c, + VLOAD_32(v8, 0xc7a1022d, 0xc5a99bd9, 0xc6eb4e2a, 0x47521811, 0x47956a5c, 0x477e8005, 0xc7b7e3b0, 0x478c6b29, 0xc6a08717, 0x47ba7ef4, 0x47b5515b, 0xc6db636c, 0xc617d206, 0x47742c82, 0x47b09571, 0x4726d449); - asm volatile("vfcvt.xu.f.v v6, v4"); + asm volatile("vfcvt.xu.f.v v16, v8"); // 0, 0, 0, 53784, 76501, // 65152, 0, 71894, 0, 95486, // 92835, 0, 0, 62509, // 90411, 42708 - VCMP_U32(2, v6, 0x00000000, 0x00000000, 0x00000000, 0x0000d218, 0x00012ad5, + VCMP_U32(2, v16, 0x00000000, 0x00000000, 0x00000000, 0x0000d218, 0x00012ad5, 0x0000fe80, 0x00000000, 0x000118d6, 0x00000000, 0x000174fe, 0x00016aa3, 0x00000000, 0x00000000, 0x0000f42d, 0x0001612b, 0x0000a6d4); #if ELEN == 64 - VSET(16, e64, m2); + VSET(16, e64, m8); // 3554390.405, 3670449.443, 3880983.535, 3452087.537, // -5447847.496, 498812.179, 9535291.051, 113884.868, // 2124622.198, -2164534.614, 1377445.305, -2114478.485, // -4704971.356, -7866057.432, 7002504.380, -2981734.692 - VLOAD_64(v4, 0x414b1e2b33d13be4, 0x414c00d8b8b34d48, 0x414d9c0bc4751d78, + VLOAD_64(v8, 0x414b1e2b33d13be4, 0x414c00d8b8b34d48, 0x414d9c0bc4751d78, 0x414a565bc4adf2d0, 0xc154c829dfc2d9f6, 0x411e71f0b7161c00, 0x41622fe7619e55e2, 0x40fbcdcde34f1a00, 0x414035a7194d9794, 0xc140839b4e886550, 0x413504a54df56888, 0xc14021d73e1606dc, 0xc151f2b2d6cc57c8, 0xc15e01b25baceaba, 0x415ab6621850fa94, 0xc146bfb358869da2); - asm volatile("vfcvt.xu.f.v v6, v4"); + asm volatile("vfcvt.xu.f.v v16, v8"); // 3554390, 3670449, 3880984, // 3452088, 0, 498812, // 9535291, 113885, 2124622, 0, // 1377445, 0, 0, 0, 7002504, // 0 - VCMP_U64(3, v6, 0x0000000000363c56, 0x00000000003801b1, 0x00000000003b3818, + VCMP_U64(3, v16, 0x0000000000363c56, 0x00000000003801b1, 0x00000000003b3818, 0x000000000034acb8, 0x0000000000000000, 0x0000000000079c7c, 0x0000000000917f3b, 0x000000000001bcdd, 0x0000000000206b4e, 0x0000000000000000, 0x00000000001504a5, 0x0000000000000000, @@ -79,63 +79,63 @@ void TEST_CASE1(void) { // Simple random test with similar values (masked) // The numbers are the same of TEST_CASE1 void TEST_CASE2(void) { - VSET(16, e16, m2); + VSET(16, e16, m8); // -2372.000, 2978.000, -5132.000, -3426.000, -3878.000, // 9680.000, 76.000, -8128.000, -2314.000, -4660.000, 8672.000, // 8824.000, -5732.000, -1557.000, -2302.000, -407.250 - VLOAD_16(v4, 0xe8a2, 0x69d1, 0xed03, 0xeab1, 0xeb93, 0x70ba, 0x54c0, 0xeff0, + VLOAD_16(v8, 0xe8a2, 0x69d1, 0xed03, 0xeab1, 0xeb93, 0x70ba, 0x54c0, 0xeff0, 0xe885, 0xec8d, 0x703c, 0x704f, 0xed99, 0xe615, 0xe87f, 0xde5d); VLOAD_8(v0, 0xAA, 0xAA); - VCLEAR(v6); - asm volatile("vfcvt.xu.f.v v6, v4, v0.t"); + VCLEAR(v16); + asm volatile("vfcvt.xu.f.v v16, v8, v0.t"); // 0, 2978, 0, 0, 0, // 9680, 0, 0, 0, 0, // 0, 8824, 0, 0, 0, 0 - VCMP_U16(4, v6, 0x0000, 0x0ba2, 0x0000, 0x0000, 0x0000, 0x25d0, 0x0000, + VCMP_U16(4, v16, 0x0000, 0x0ba2, 0x0000, 0x0000, 0x0000, 0x25d0, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x2278, 0x0000, 0x0000, 0x0000, 0x0000); - VSET(16, e32, m2); + VSET(16, e32, m8); // -82436.352, -5427.481, -30119.082, 53784.066, 76500.719, // 65152.020, -94151.375, 71894.320, -20547.545, 95485.906, // 92834.711, -28081.711, -9716.506, 62508.508, 90410.883, // 42708.285 - VLOAD_32(v4, 0xc7a1022d, 0xc5a99bd9, 0xc6eb4e2a, 0x47521811, 0x47956a5c, + VLOAD_32(v8, 0xc7a1022d, 0xc5a99bd9, 0xc6eb4e2a, 0x47521811, 0x47956a5c, 0x477e8005, 0xc7b7e3b0, 0x478c6b29, 0xc6a08717, 0x47ba7ef4, 0x47b5515b, 0xc6db636c, 0xc617d206, 0x47742c82, 0x47b09571, 0x4726d449); VLOAD_8(v0, 0xAA, 0xAA); - VCLEAR(v6); - asm volatile("vfcvt.xu.f.v v6, v4, v0.t"); + VCLEAR(v16); + asm volatile("vfcvt.xu.f.v v16, v8, v0.t"); // 0, 0, 0, 53784, 0, // 65152, 0, 71894, 0, 95486, // 0, 0, 0, 62509, 0, // 42708 - VCMP_U32(5, v6, 0x00000000, 0x00000000, 0x00000000, 0x0000d218, 0x00000000, + VCMP_U32(5, v16, 0x00000000, 0x00000000, 0x00000000, 0x0000d218, 0x00000000, 0x0000fe80, 0x00000000, 0x000118d6, 0x00000000, 0x000174fe, 0x00000000, 0x00000000, 0x00000000, 0x0000f42d, 0x00000000, 0x0000a6d4); #if ELEN == 64 - VSET(16, e64, m2); + VSET(16, e64, m8); // 3554390.405, 3670449.443, 3880983.535, 3452087.537, // -5447847.496, 498812.179, 9535291.051, 113884.868, // 2124622.198, -2164534.614, 1377445.305, -2114478.485, // -4704971.356, -7866057.432, 7002504.380, -2981734.692 - VLOAD_64(v4, 0x414b1e2b33d13be4, 0x414c00d8b8b34d48, 0x414d9c0bc4751d78, + VLOAD_64(v8, 0x414b1e2b33d13be4, 0x414c00d8b8b34d48, 0x414d9c0bc4751d78, 0x414a565bc4adf2d0, 0xc154c829dfc2d9f6, 0x411e71f0b7161c00, 0x41622fe7619e55e2, 0x40fbcdcde34f1a00, 0x414035a7194d9794, 0xc140839b4e886550, 0x413504a54df56888, 0xc14021d73e1606dc, 0xc151f2b2d6cc57c8, 0xc15e01b25baceaba, 0x415ab6621850fa94, 0xc146bfb358869da2); VLOAD_8(v0, 0xAA, 0xAA); - VCLEAR(v6); - asm volatile("vfcvt.xu.f.v v6, v4, v0.t"); + VCLEAR(v16); + asm volatile("vfcvt.xu.f.v v16, v8, v0.t"); // 0, 3670449, 0, 3452088, 0, // 498812, 0, 113885, // 0, 0, 0, 0, 0, 0, 0, // 0 - VCMP_U64(6, v6, 0x0000000000000000, 0x00000000003801b1, 0x0000000000000000, + VCMP_U64(6, v16, 0x0000000000000000, 0x00000000003801b1, 0x0000000000000000, 0x000000000034acb8, 0x0000000000000000, 0x0000000000079c7c, 0x0000000000000000, 0x000000000001bcdd, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, @@ -150,63 +150,63 @@ void TEST_CASE2(void) { // Unmasked vfcvt.x.f.c void TEST_CASE3(void) { - VSET(16, e16, m2); + VSET(16, e16, m8); // 4144.000, -861.500, -8680.000, 3792.000, 8800.000, // -2330.000, -3066.000, -6148.000, 4776.000, 7360.000, // -7880.000, -1843.000, -7896.000, -6520.000, -7092.000, // -8344.000 - VLOAD_16(v4, 0x6c0c, 0xe2bb, 0xf03d, 0x6b68, 0x704c, 0xe88d, 0xe9fd, 0xee01, + VLOAD_16(v8, 0x6c0c, 0xe2bb, 0xf03d, 0x6b68, 0x704c, 0xe88d, 0xe9fd, 0xee01, 0x6caa, 0x6f30, 0xefb2, 0xe733, 0xefb6, 0xee5e, 0xeeed, 0xf013); - asm volatile("vfcvt.x.f.v v6, v4"); + asm volatile("vfcvt.x.f.v v16, v8"); // 4144, -862, -8680, 3792, 8800, // -2330, -3066, -6148, 4776, 7360, // -7880, -1843, -7896, -6520, -7092, // -8344 - VCMP_U16(7, v6, 0x1030, 0xfca2, 0xde18, 0x0ed0, 0x2260, 0xf6e6, 0xf406, + VCMP_U16(7, v16, 0x1030, 0xfca2, 0xde18, 0x0ed0, 0x2260, 0xf6e6, 0xf406, 0xe7fc, 0x12a8, 0x1cc0, 0xe138, 0xf8cd, 0xe128, 0xe688, 0xe44c, 0xdf68); - VSET(16, e32, m2); + VSET(16, e32, m8); // -28075.818, -5455.616, 6106.086, -11952.592, -50887.914, // -23028.832, -9221.246, -71657.047, -6655.005, -21208.561, // -30018.096, -19766.838, 48541.953, -62313.625, 13515.192, // -83224.820 - VLOAD_32(v4, 0xc6db57a3, 0xc5aa7ced, 0x45bed0b1, 0xc63ac25e, 0xc746c7ea, + VLOAD_32(v8, 0xc6db57a3, 0xc5aa7ced, 0x45bed0b1, 0xc63ac25e, 0xc746c7ea, 0xc6b3e9aa, 0xc61014fc, 0xc78bf486, 0xc5cff80a, 0xc6a5b11f, 0xc6ea8431, 0xc69a6dad, 0x473d9df4, 0xc77369a0, 0x46532cc5, 0xc7a28c69); - asm volatile("vfcvt.x.f.v v6, v4"); + asm volatile("vfcvt.x.f.v v16, v8"); // -28076, -5456, 6106, -11953, // -50888, -23029, -9221, -71657, -6655, // -21209, -30018, -19767, 48542, -62314, // 13515, -83225 - VCMP_U32(8, v6, 0xffff9254, 0xffffeab0, 0x000017da, 0xffffd14f, 0xffff3938, + VCMP_U32(8, v16, 0xffff9254, 0xffffeab0, 0x000017da, 0xffffd14f, 0xffff3938, 0xffffa60b, 0xffffdbfb, 0xfffee817, 0xffffe601, 0xffffad27, 0xffff8abe, 0xffffb2c9, 0x0000bd9e, 0xffff0c96, 0x000034cb, 0xfffebae7); #if ELEN == 64 - VSET(16, e64, m2); + VSET(16, e64, m8); // 3087905.033, -2534011.630, 7824302.813, // -9294206.521, 6436555.847, 6645117.193, // 1358075.867, 5694551.012, -9840938.636, // 4621816.383, 2584370.751, 5569558.860, // 495487.041, 4759865.418, -6831172.669, // 8371055.296 - VLOAD_64(v4, 0x41478f10842c8b9c, 0xc143553dd0971c82, 0x415dd8ebb40e1fe0, + VLOAD_64(v8, 0x41478f10842c8b9c, 0xc143553dd0971c82, 0x415dd8ebb40e1fe0, 0xc161ba2fd0a8a593, 0x41588db2f632700c, 0x4159595f4c588b60, 0x4134b8fbde131210, 0x4155b915c0cb4294, 0xc162c52554566300, 0x4151a17e187d1aa8, 0x4143b7996029dc68, 0x41553f05b70b6824, 0x411e3dfc2a598ba0, 0x4152284e5ac4da5a, 0xc15a0f112acbf258, 0x415feedbd2ed6038); - asm volatile("vfcvt.x.f.v v6, v4"); + asm volatile("vfcvt.x.f.v v16, v8"); // 3087905, -2534012, 7824303, // -9294207, 6436556, 6645117, // 1358076, 5694551, -9840939, // 4621816, 2584371, 5569559, // 495487, 4759865, -6831173, // 8371055 - VCMP_U64(9, v6, 0x00000000002f1e21, 0xffffffffffd95584, 0x00000000007763af, + VCMP_U64(9, v16, 0x00000000002f1e21, 0xffffffffffd95584, 0x00000000007763af, 0xffffffffff722e81, 0x00000000006236cc, 0x000000000065657d, 0x000000000014b8fc, 0x000000000056e457, 0xffffffffff69d6d5, 0x00000000004685f8, 0x0000000000276f33, 0x000000000054fc17, @@ -217,64 +217,64 @@ void TEST_CASE3(void) { // Masked vfcvt.x.f.c void TEST_CASE4(void) { - VSET(16, e16, m2); + VSET(16, e16, m8); // 4144.000, -861.500, -8680.000, 3792.000, 8800.000, // -2330.000, -3066.000, -6148.000, 4776.000, 7360.000, // -7880.000, -1843.000, -7896.000, -6520.000, -7092.000, // -8344.000 - VLOAD_16(v4, 0x6c0c, 0xe2bb, 0xf03d, 0x6b68, 0x704c, 0xe88d, 0xe9fd, 0xee01, + VLOAD_16(v8, 0x6c0c, 0xe2bb, 0xf03d, 0x6b68, 0x704c, 0xe88d, 0xe9fd, 0xee01, 0x6caa, 0x6f30, 0xefb2, 0xe733, 0xefb6, 0xee5e, 0xeeed, 0xf013); VLOAD_8(v0, 0xAA, 0xAA); - VCLEAR(v6); - asm volatile("vfcvt.x.f.v v6, v4, v0.t"); + VCLEAR(v16); + asm volatile("vfcvt.x.f.v v16, v8, v0.t"); // 0, -862, 0, 3792, 0, // -2330, 0, -6148, 0, 7360, 0, // -1843, 0, -6520, 0, -8344 - VCMP_U16(10, v6, 0x0000, 0xfca2, 0x0000, 0x0ed0, 0x0000, 0xf6e6, 0x0000, + VCMP_U16(10, v16, 0x0000, 0xfca2, 0x0000, 0x0ed0, 0x0000, 0xf6e6, 0x0000, 0xe7fc, 0x0000, 0x1cc0, 0x0000, 0xf8cd, 0x0000, 0xe688, 0x0000, 0xdf68); - VSET(16, e32, m2); + VSET(16, e32, m8); // -28075.818, -5455.616, 6106.086, -11952.592, // -50887.914, -23028.832, -9221.246, -71657.047, // -6655.005, -21208.561, -30018.096, -19766.838, 48541.953, // -62313.625, 13515.192, -83224.820 - VLOAD_32(v4, 0xc6db57a3, 0xc5aa7ced, 0x45bed0b1, 0xc63ac25e, 0xc746c7ea, + VLOAD_32(v8, 0xc6db57a3, 0xc5aa7ced, 0x45bed0b1, 0xc63ac25e, 0xc746c7ea, 0xc6b3e9aa, 0xc61014fc, 0xc78bf486, 0xc5cff80a, 0xc6a5b11f, 0xc6ea8431, 0xc69a6dad, 0x473d9df4, 0xc77369a0, 0x46532cc5, 0xc7a28c69); VLOAD_8(v0, 0xAA, 0xAA); - VCLEAR(v6); - asm volatile("vfcvt.x.f.v v6, v4, v0.t"); + VCLEAR(v16); + asm volatile("vfcvt.x.f.v v16, v8, v0.t"); // 0, -5456, 0, -11953, 0, // -23029, 0, -71657, 0, // -21209, 0, -19767, 0, // -62314, 0, -83225 - VCMP_U32(11, v6, 0x00000000, 0xffffeab0, 0x00000000, 0xffffd14f, 0x00000000, + VCMP_U32(11, v16, 0x00000000, 0xffffeab0, 0x00000000, 0xffffd14f, 0x00000000, 0xffffa60b, 0x00000000, 0xfffee817, 0x00000000, 0xffffad27, 0x00000000, 0xffffb2c9, 0x00000000, 0xffff0c96, 0x00000000, 0xfffebae7); #if ELEN == 64 - VSET(16, e64, m2); + VSET(16, e64, m8); // 3087905.033, -2534011.630, 7824302.813, -9294206.521, // 6436555.847, 6645117.193, 1358075.867, 5694551.012, // -9840938.636, 4621816.383, 2584370.751, 5569558.860, // 495487.041, 4759865.418, -6831172.669, 8371055.296 - VLOAD_64(v4, 0x41478f10842c8b9c, 0xc143553dd0971c82, 0x415dd8ebb40e1fe0, + VLOAD_64(v8, 0x41478f10842c8b9c, 0xc143553dd0971c82, 0x415dd8ebb40e1fe0, 0xc161ba2fd0a8a593, 0x41588db2f632700c, 0x4159595f4c588b60, 0x4134b8fbde131210, 0x4155b915c0cb4294, 0xc162c52554566300, 0x4151a17e187d1aa8, 0x4143b7996029dc68, 0x41553f05b70b6824, 0x411e3dfc2a598ba0, 0x4152284e5ac4da5a, 0xc15a0f112acbf258, 0x415feedbd2ed6038); VLOAD_8(v0, 0xAA, 0xAA); - VCLEAR(v6); - asm volatile("vfcvt.x.f.v v6, v4, v0.t"); + VCLEAR(v16); + asm volatile("vfcvt.x.f.v v16, v8, v0.t"); // 0, -2534012, 0, -9294207, // 0, 6645117, 0, 5694551, 0, // 4621816, 0, 5569559, // 0, 4759865, 0, 8371055 - VCMP_U64(12, v6, 0x0000000000000000, 0xffffffffffd95584, 0x0000000000000000, + VCMP_U64(12, v16, 0x0000000000000000, 0xffffffffffd95584, 0x0000000000000000, 0xffffffffff722e81, 0x0000000000000000, 0x000000000065657d, 0x0000000000000000, 0x000000000056e457, 0x0000000000000000, 0x00000000004685f8, 0x0000000000000000, 0x000000000054fc17, @@ -289,59 +289,59 @@ void TEST_CASE4(void) { // Simple random test with similar values void TEST_CASE5(void) { - VSET(16, e16, m2); + VSET(16, e16, m8); // 6996.000, -7512.000, -4792.000, 7240.000, 8336.000, // 6332.000, -277.750, -4074.000, 9352.000, 8832.000, // -65.000, 5860.000, 6892.000, 2944.000, 9608.000, // 4608.000 - VLOAD_16(v4, 0x6ed5, 0xef56, 0xecae, 0x6f12, 0x7012, 0x6e2f, 0xdc57, 0xebf5, + VLOAD_16(v8, 0x6ed5, 0xef56, 0xecae, 0x6f12, 0x7012, 0x6e2f, 0xdc57, 0xebf5, 0x7091, 0x7050, 0xd410, 0x6db9, 0x6ebb, 0x69c0, 0x70b1, 0x6c80); - asm volatile("vfcvt.rtz.xu.f.v v6, v4"); + asm volatile("vfcvt.rtz.xu.f.v v16, v8"); // 6996, 0, 0, 7240, 8336, // 6332, 0, 0, 9352, 8832, 0, // 5860, 6892, 2944, 9608, 4608 - VCMP_U16(13, v6, 0x1b54, 0x0000, 0x0000, 0x1c48, 0x2090, 0x18bc, 0x0000, + VCMP_U16(13, v16, 0x1b54, 0x0000, 0x0000, 0x1c48, 0x2090, 0x18bc, 0x0000, 0x0000, 0x2488, 0x2280, 0x0000, 0x16e4, 0x1aec, 0x0b80, 0x2588, 0x1200); - VSET(16, e32, m2); + VSET(16, e32, m8); // 85074.883, -2035.769, 67397.633, -57745.480, 82113.172, // 18415.770, 57859.465, 83291.773, -83693.375, 43321.199, // 94626.156, -53520.090, 9604.658, -5764.834, 94299.633, // 57572.980 - VLOAD_32(v4, 0x47a62971, 0xc4fe789f, 0x4783a2d1, 0xc761917b, 0x47a06096, + VLOAD_32(v8, 0x47a62971, 0xc4fe789f, 0x4783a2d1, 0xc761917b, 0x47a06096, 0x468fdf8a, 0x47620377, 0x47a2ade3, 0xc7a376b0, 0x47293933, 0x47b8d114, 0xc7511017, 0x461612a2, 0xc5b426ac, 0x47b82dd1, 0x4760e4fb); - asm volatile("vfcvt.rtz.xu.f.v v6, v4"); + asm volatile("vfcvt.rtz.xu.f.v v16, v8"); // 85074, 0, 67397, 0, 82113, // 18415, 57859, 83291, 0, 43321, // 94626, 0, 9604, 0, 94299, // 57572 - VCMP_U32(14, v6, 0x00014c52, 0x00000000, 0x00010745, 0x00000000, 0x000140c1, + VCMP_U32(14, v16, 0x00014c52, 0x00000000, 0x00010745, 0x00000000, 0x000140c1, 0x000047ef, 0x0000e203, 0x0001455b, 0x00000000, 0x0000a939, 0x000171a2, 0x00000000, 0x00002584, 0x00000000, 0x0001705b, 0x0000e0e4); #if ELEN == 64 - VSET(16, e64, m2); + VSET(16, e64, m8); // -5386285.220, -9081004.335, -9603879.062, -4621060.923, // 2017661.058, 1106405.978, -2095853.299, 1911589.313, // 4833261.528, 1291127.404, -9941577.120, 9259799.184, // -8569693.727, 4926687.920, -7537625.130, -6328586.289 - VLOAD_64(v4, 0xc1548c0b4e12be63, 0xc16152158ab92a41, 0xc1625164e1fd6af4, + VLOAD_64(v8, 0xc1548c0b4e12be63, 0xc16152158ab92a41, 0xc1625164e1fd6af4, 0xc151a0c13b0c041c, 0x413ec97d0edd7a68, 0x4130e1e5fa5c8120, 0xc13ffaed4c78fc7c, 0x413d2b2550357b50, 0x41526ffb61d23f42, 0x4133b377675b6328, 0xc162f64923d5cce3, 0x4161a962e5e3a1e8, 0xc160586bb74734b0, 0x4152cb37fae70f80, 0xc15cc0f6484f174c, 0xc1582442928257b8); - asm volatile("vfcvt.rtz.xu.f.v v6, v4"); + asm volatile("vfcvt.rtz.xu.f.v v16, v8"); // 0, 0, 0, 0, 2017661, // 1106405, 0, 1911589, // 4833261, 1291127, 0, // 9259799, 0, 4926687, // 0, 0 - VCMP_U64(15, v6, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, + VCMP_U64(15, v16, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x00000000001ec97d, 0x000000000010e1e5, 0x0000000000000000, 0x00000000001d2b25, 0x000000000049bfed, 0x000000000013b377, 0x0000000000000000, 0x00000000008d4b17, @@ -353,64 +353,64 @@ void TEST_CASE5(void) { // Simple random test with similar values (masked) // The numbers are the same of TEST_CASE1 void TEST_CASE6(void) { - VSET(16, e16, m2); + VSET(16, e16, m8); // 6996.000, -7512.000, -4792.000, 7240.000, 8336.000, // 6332.000, -277.750, -4074.000, 9352.000, 8832.000, // -65.000, 5860.000, 6892.000, 2944.000, 9608.000, // 4608.000 - VLOAD_16(v4, 0x6ed5, 0xef56, 0xecae, 0x6f12, 0x7012, 0x6e2f, 0xdc57, 0xebf5, + VLOAD_16(v8, 0x6ed5, 0xef56, 0xecae, 0x6f12, 0x7012, 0x6e2f, 0xdc57, 0xebf5, 0x7091, 0x7050, 0xd410, 0x6db9, 0x6ebb, 0x69c0, 0x70b1, 0x6c80); VLOAD_8(v0, 0xAA, 0xAA); - VCLEAR(v6); - asm volatile("vfcvt.rtz.xu.f.v v6, v4, v0.t"); + VCLEAR(v16); + asm volatile("vfcvt.rtz.xu.f.v v16, v8, v0.t"); // 0, 0, 0, 7240, 0, // 6332, 0, 0, 0, 8832, // 0, 5860, 0, 2944, 0, // 4608 - VCMP_U16(16, v6, 0x0000, 0x0000, 0x0000, 0x1c48, 0x0000, 0x18bc, 0x0000, + VCMP_U16(16, v16, 0x0000, 0x0000, 0x0000, 0x1c48, 0x0000, 0x18bc, 0x0000, 0x0000, 0x0000, 0x2280, 0x0000, 0x16e4, 0x0000, 0x0b80, 0x0000, 0x1200); - VSET(16, e32, m2); + VSET(16, e32, m8); // 85074.883, -2035.769, 67397.633, -57745.480, 82113.172, // 18415.770, 57859.465, 83291.773, -83693.375, 43321.199, // 94626.156, -53520.090, 9604.658, -5764.834, 94299.633, // 57572.980 - VLOAD_32(v4, 0x47a62971, 0xc4fe789f, 0x4783a2d1, 0xc761917b, 0x47a06096, + VLOAD_32(v8, 0x47a62971, 0xc4fe789f, 0x4783a2d1, 0xc761917b, 0x47a06096, 0x468fdf8a, 0x47620377, 0x47a2ade3, 0xc7a376b0, 0x47293933, 0x47b8d114, 0xc7511017, 0x461612a2, 0xc5b426ac, 0x47b82dd1, 0x4760e4fb); VLOAD_8(v0, 0xAA, 0xAA); - VCLEAR(v6); - asm volatile("vfcvt.rtz.xu.f.v v6, v4, v0.t"); + VCLEAR(v16); + asm volatile("vfcvt.rtz.xu.f.v v16, v8, v0.t"); // 0, 0, 0, 0, 0, 18415, // 0, 83291, 0, 43321, 0, 0, // 0, 0, 0, 57572 - VCMP_U32(17, v6, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + VCMP_U32(17, v16, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x000047ef, 0x00000000, 0x0001455b, 0x00000000, 0x0000a939, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x0000e0e4); #if ELEN == 64 - VSET(16, e64, m2); + VSET(16, e64, m8); // -5386285.220, -9081004.335, -9603879.062, -4621060.923, // 2017661.058, 1106405.978, -2095853.299, 1911589.313, // 4833261.528, 1291127.404, -9941577.120, 9259799.184, // -8569693.727, 4926687.920, -7537625.130, -6328586.289 - VLOAD_64(v4, 0xc1548c0b4e12be63, 0xc16152158ab92a41, 0xc1625164e1fd6af4, + VLOAD_64(v8, 0xc1548c0b4e12be63, 0xc16152158ab92a41, 0xc1625164e1fd6af4, 0xc151a0c13b0c041c, 0x413ec97d0edd7a68, 0x4130e1e5fa5c8120, 0xc13ffaed4c78fc7c, 0x413d2b2550357b50, 0x41526ffb61d23f42, 0x4133b377675b6328, 0xc162f64923d5cce3, 0x4161a962e5e3a1e8, 0xc160586bb74734b0, 0x4152cb37fae70f80, 0xc15cc0f6484f174c, 0xc1582442928257b8); VLOAD_8(v0, 0xAA, 0xAA); - VCLEAR(v6); - asm volatile("vfcvt.rtz.xu.f.v v6, v4, v0.t"); + VCLEAR(v16); + asm volatile("vfcvt.rtz.xu.f.v v16, v8, v0.t"); // 0, 0, 0, 0, 0, // 1106405, 0, 1911589, // 0, 1291127, 0, 9259799, 0, // 4926687, 0, 0 - VCMP_U64(18, v6, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, + VCMP_U64(18, v16, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x000000000010e1e5, 0x0000000000000000, 0x00000000001d2b25, 0x0000000000000000, 0x000000000013b377, 0x0000000000000000, 0x00000000008d4b17, @@ -425,61 +425,61 @@ void TEST_CASE6(void) { // Simple random test with similar values void TEST_CASE7(void) { - VSET(16, e16, m2); + VSET(16, e16, m8); // 5116.000, 4640.000, 5720.000, 1316.000, 8104.000, // 9952.000, 9400.000, -4120.000, -9368.000, 6076.000, // 1782.000, -5332.000, -4284.000, -2878.000, -2752.000, // 3714.000 - VLOAD_16(v4, 0x6cff, 0x6c88, 0x6d96, 0x6524, 0x6fea, 0x70dc, 0x7097, 0xec06, + VLOAD_16(v8, 0x6cff, 0x6c88, 0x6d96, 0x6524, 0x6fea, 0x70dc, 0x7097, 0xec06, 0xf093, 0x6def, 0x66f6, 0xed35, 0xec2f, 0xe99f, 0xe960, 0x6b41); - asm volatile("vfcvt.rtz.x.f.v v6, v4"); + asm volatile("vfcvt.rtz.x.f.v v16, v8"); // 5116, 4640, 5720, 1316, 8104, // 9952, 9400, -4120, -9368, 6076, // 1782, -5332, -4284, -2878, -2752, // 3714 - VCMP_U16(19, v6, 0x13fc, 0x1220, 0x1658, 0x0524, 0x1fa8, 0x26e0, 0x24b8, + VCMP_U16(19, v16, 0x13fc, 0x1220, 0x1658, 0x0524, 0x1fa8, 0x26e0, 0x24b8, 0xefe8, 0xdb68, 0x17bc, 0x06f6, 0xeb2c, 0xef44, 0xf4c2, 0xf540, 0x0e82); - VSET(16, e32, m2); + VSET(16, e32, m8); // -31395.312, 38407.539, 39625.664, -19419.770, -77414.898, // -96104.727, -8227.330, -45789.250, -74805.781, 78266.945, // 1635.832, -33150.762, 17428.920, -93694.898, 93592.562, // -83328.680 - VLOAD_32(v4, 0xc6f546a0, 0x4716078a, 0x471ac9aa, 0xc697b78a, 0xc7973373, + VLOAD_32(v8, 0xc6f546a0, 0x4716078a, 0x471ac9aa, 0xc697b78a, 0xc7973373, 0xc7bbb45d, 0xc6008d52, 0xc732dd40, 0xc7921ae4, 0x4798dd79, 0x44cc7aa0, 0xc7017ec3, 0x468829d7, 0xc7b6ff73, 0x47b6cc48, 0xc7a2c057); - asm volatile("vfcvt.rtz.x.f.v v6, v4"); + asm volatile("vfcvt.rtz.x.f.v v16, v8"); // -31395, 38407, 39625, -19419, // -77414, -96104, -8227, -45789, // -74805, 78266, 1635, -33150, 17428, // -93694, 93592, -83328 - VCMP_U32(20, v6, 0xffff855d, 0x00009607, 0x00009ac9, 0xffffb425, 0xfffed19a, + VCMP_U32(20, v16, 0xffff855d, 0x00009607, 0x00009ac9, 0xffffb425, 0xfffed19a, 0xfffe8898, 0xffffdfdd, 0xffff4d23, 0xfffedbcb, 0x000131ba, 0x00000663, 0xffff7e82, 0x00004414, 0xfffe9202, 0x00016d98, 0xfffeba80); #if ELEN == 64 - VSET(16, e64, m2); + VSET(16, e64, m8); // 1347922.217, 7326256.926, 2532328.150, -4365139.352, // -3892733.643, -3401324.772, -2109243.969, 61221.157, // -307581.498, -6001564.901, -1299579.664, -2048360.900, // 3486773.936, -5491246.977, -2222467.648, 1432204.815 - VLOAD_64(v4, 0x413491523797bd28, 0x415bf28c3b410560, 0x414351f41339c8f8, + VLOAD_64(v8, 0x413491523797bd28, 0x415bf28c3b410560, 0x414351f41339c8f8, 0xc150a6d4d6864763, 0xc14db2fed245a01c, 0xc149f33662d1f60e, 0xc140179dfc15a4ac, 0x40ede4a503831a00, 0xc112c5f5fdac3c80, 0xc156e4e739a40168, 0xc133d47ba9e7da00, 0xc13f4168e650cc0c, 0x414a9a1af7c5dda0, 0xc154f28bbe844db6, 0xc140f4c1d2e7a21a, 0x4135da8cd09570f8); - asm volatile("vfcvt.rtz.x.f.v v6, v4"); + asm volatile("vfcvt.rtz.x.f.v v16, v8"); // 1347922, 7326256, 2532328, // -4365139, -3892733, -3401324, // -2109243, 61221, -307581, // -6001564, -1299579, -2048360, // 3486773, -5491246, -2222467, // 1432204 - VCMP_U64(21, v6, 0x0000000000149152, 0x00000000006fca30, 0x000000000026a3e8, + VCMP_U64(21, v16, 0x0000000000149152, 0x00000000006fca30, 0x000000000026a3e8, 0xffffffffffbd64ad, 0xffffffffffc49a03, 0xffffffffffcc1994, 0xffffffffffdfd0c5, 0x000000000000ef25, 0xfffffffffffb4e83, 0xffffffffffa46c64, 0xffffffffffec2b85, 0xffffffffffe0be98, @@ -491,66 +491,66 @@ void TEST_CASE7(void) { // Simple random test with similar values (masked) // The numbers are the same of TEST_CASE1 void TEST_CASE8(void) { - VSET(16, e16, m2); + VSET(16, e16, m8); // 5116.000, 4640.000, 5720.000, 1316.000, 8104.000, // 9952.000, 9400.000, -4120.000, -9368.000, 6076.000, // 1782.000, -5332.000, -4284.000, -2878.000, -2752.000, // 3714.000 - VLOAD_16(v4, 0x6cff, 0x6c88, 0x6d96, 0x6524, 0x6fea, 0x70dc, 0x7097, 0xec06, + VLOAD_16(v8, 0x6cff, 0x6c88, 0x6d96, 0x6524, 0x6fea, 0x70dc, 0x7097, 0xec06, 0xf093, 0x6def, 0x66f6, 0xed35, 0xec2f, 0xe99f, 0xe960, 0x6b41); VLOAD_8(v0, 0xAA, 0xAA); - VCLEAR(v6); - asm volatile("vfcvt.rtz.x.f.v v6, v4, v0.t"); + VCLEAR(v16); + asm volatile("vfcvt.rtz.x.f.v v16, v8, v0.t"); // 0, 4640, 0, 1316, 0, // 9952, 0, -4120, 0, 6076, // 0, -5332, 0, -2878, 0, // 3714 - VCMP_U16(22, v6, 0x0000, 0x1220, 0x0000, 0x0524, 0x0000, 0x26e0, 0x0000, + VCMP_U16(22, v16, 0x0000, 0x1220, 0x0000, 0x0524, 0x0000, 0x26e0, 0x0000, 0xefe8, 0x0000, 0x17bc, 0x0000, 0xeb2c, 0x0000, 0xf4c2, 0x0000, 0x0e82); - VSET(16, e32, m2); + VSET(16, e32, m8); // -31395.312, 38407.539, 39625.664, -19419.770, -77414.898, // -96104.727, -8227.330, -45789.250, -74805.781, 78266.945, // 1635.832, -33150.762, 17428.920, -93694.898, 93592.562, // -83328.680 - VLOAD_32(v4, 0xc6f546a0, 0x4716078a, 0x471ac9aa, 0xc697b78a, 0xc7973373, + VLOAD_32(v8, 0xc6f546a0, 0x4716078a, 0x471ac9aa, 0xc697b78a, 0xc7973373, 0xc7bbb45d, 0xc6008d52, 0xc732dd40, 0xc7921ae4, 0x4798dd79, 0x44cc7aa0, 0xc7017ec3, 0x468829d7, 0xc7b6ff73, 0x47b6cc48, 0xc7a2c057); VLOAD_8(v0, 0xAA, 0xAA); - VCLEAR(v6); - asm volatile("vfcvt.rtz.x.f.v v6, v4, v0.t"); + VCLEAR(v16); + asm volatile("vfcvt.rtz.x.f.v v16, v8, v0.t"); // 0, 38407, 0, -19419, 0, // -96104, 0, -45789, 0, 78266, // 0, -33150, 0, -93694, 0, // -83328 - VCMP_U32(23, v6, 0x00000000, 0x00009607, 0x00000000, 0xffffb425, 0x00000000, + VCMP_U32(23, v16, 0x00000000, 0x00009607, 0x00000000, 0xffffb425, 0x00000000, 0xfffe8898, 0x00000000, 0xffff4d23, 0x00000000, 0x000131ba, 0x00000000, 0xffff7e82, 0x00000000, 0xfffe9202, 0x00000000, 0xfffeba80); #if ELEN == 64 - VSET(16, e64, m2); + VSET(16, e64, m8); // 1347922.217, 7326256.926, 2532328.150, -4365139.352, // -3892733.643, -3401324.772, -2109243.969, 61221.157, // -307581.498, -6001564.901, -1299579.664, -2048360.900, // 3486773.936, -5491246.977, -2222467.648, 1432204.815 - VLOAD_64(v4, 0x413491523797bd28, 0x415bf28c3b410560, 0x414351f41339c8f8, + VLOAD_64(v8, 0x413491523797bd28, 0x415bf28c3b410560, 0x414351f41339c8f8, 0xc150a6d4d6864763, 0xc14db2fed245a01c, 0xc149f33662d1f60e, 0xc140179dfc15a4ac, 0x40ede4a503831a00, 0xc112c5f5fdac3c80, 0xc156e4e739a40168, 0xc133d47ba9e7da00, 0xc13f4168e650cc0c, 0x414a9a1af7c5dda0, 0xc154f28bbe844db6, 0xc140f4c1d2e7a21a, 0x4135da8cd09570f8); VLOAD_8(v0, 0xAA, 0xAA); - VCLEAR(v6); - asm volatile("vfcvt.rtz.x.f.v v6, v4, v0.t"); + VCLEAR(v16); + asm volatile("vfcvt.rtz.x.f.v v16, v8, v0.t"); // 0, 7326256, 0, -4365139, // 0, -3401324, 0, 61221, 0, // -6001564, 0, // -2048360, 0, // -5491246, 0, 1432204 - VCMP_U64(24, v6, 0x0000000000000000, 0x00000000006fca30, 0x0000000000000000, + VCMP_U64(24, v16, 0x0000000000000000, 0x00000000006fca30, 0x0000000000000000, 0xffffffffffbd64ad, 0x0000000000000000, 0xffffffffffcc1994, 0x0000000000000000, 0x000000000000ef25, 0x0000000000000000, 0xffffffffffa46c64, 0x0000000000000000, 0xffffffffffe0be98, @@ -564,56 +564,56 @@ void TEST_CASE8(void) { //////////////// void TEST_CASE9(void) { - VSET(16, e16, m2); + VSET(16, e16, m8); // 55973, 61786, 64322, 55940, 55857, // 3425, 1068, 4246, 57901, 7342, // 8693, 60988, 9047, 63358, 58389, // 8076 - VLOAD_16(v4, 0xdaa5, 0xf15a, 0xfb42, 0xda84, 0xda31, 0x0d61, 0x042c, 0x1096, + VLOAD_16(v8, 0xdaa5, 0xf15a, 0xfb42, 0xda84, 0xda31, 0x0d61, 0x042c, 0x1096, 0xe22d, 0x1cae, 0x21f5, 0xee3c, 0x2357, 0xf77e, 0xe415, 0x1f8c); - asm volatile("vfcvt.f.xu.v v6, v4"); + asm volatile("vfcvt.f.xu.v v16, v8"); // 55968.000, 61792.000, 64320.000, 55936.000, 55872.000, // 3424.000, 1068.000, 4248.000, 57888.000, 7344.000, // 8696.000, 60992.000, 9048.000, 63360.000, 58400.000, // 8076.000 - VCMP_U16(25, v6, 0x7ad5, 0x7b8b, 0x7bda, 0x7ad4, 0x7ad2, 0x6ab0, 0x642c, + VCMP_U16(25, v16, 0x7ad5, 0x7b8b, 0x7bda, 0x7ad4, 0x7ad2, 0x6ab0, 0x642c, 0x6c26, 0x7b11, 0x6f2c, 0x703f, 0x7b72, 0x706b, 0x7bbc, 0x7b21, 0x6fe3); - VSET(16, e32, m2); + VSET(16, e32, m8); // 72473, 4294949057, 50975, 4294915723, // 4294876584, 4294895088, 24967, 34761, // 83805, 68361, 49397, 51562, 24877, // 4294942241, 4294909502, 42562 - VLOAD_32(v4, 0x00011b19, 0xffffb8c1, 0x0000c71f, 0xffff368b, 0xfffe9da8, + VLOAD_32(v8, 0x00011b19, 0xffffb8c1, 0x0000c71f, 0xffff368b, 0xfffe9da8, 0xfffee5f0, 0x00006187, 0x000087c9, 0x0001475d, 0x00010b09, 0x0000c0f5, 0x0000c96a, 0x0000612d, 0xffff9e21, 0xffff1e3e, 0x0000a642); - asm volatile("vfcvt.f.xu.v v6, v4"); + asm volatile("vfcvt.f.xu.v v16, v8"); // 72473.000, 4294949120.000, 50975.000, 4294915840.000, // 4294876672.000, 4294895104.000, 24967.000, 34761.000, // 83805.000, 68361.000, 49397.000, 51562.000, 24877.000, // 4294942208.000, 4294909440.000, 42562.000 - VCMP_U32(26, v6, 0x478d8c80, 0x4f7fffb9, 0x47471f00, 0x4f7fff37, 0x4f7ffe9e, + VCMP_U32(26, v16, 0x478d8c80, 0x4f7fffb9, 0x47471f00, 0x4f7fff37, 0x4f7ffe9e, 0x4f7ffee6, 0x46c30e00, 0x4707c900, 0x47a3ae80, 0x47858480, 0x4740f500, 0x47496a00, 0x46c25a00, 0x4f7fff9e, 0x4f7fff1e, 0x47264200); #if ELEN == 64 - VSET(16, e64, m2); + VSET(16, e64, m8); // 18446744073707704187, 18446744073702261660, 4325496, // 3834488, 18446744073707063867, 18446744073706356425, // 5215660, 18446744073707545423, 69532, // 18446744073707444829, 4236283, 3402850, // 18446744073708706866, 275183, 4230347, // 18446744073704794800 - VLOAD_64(v4, 0xffffffffffe3cf7b, 0xffffffffff90c39c, 0x0000000000420078, + VLOAD_64(v8, 0xffffffffffe3cf7b, 0xffffffffff90c39c, 0x0000000000420078, 0x00000000003a8278, 0xffffffffffda0a3b, 0xffffffffffcf3ec9, 0x00000000004f95ac, 0xffffffffffe1634f, 0x0000000000010f9c, 0xffffffffffdfda5d, 0x000000000040a3fb, 0x000000000033ec62, 0xfffffffffff31c32, 0x00000000000432ef, 0x0000000000408ccb, 0xffffffffffb76ab0); - asm volatile("vfcvt.f.xu.v v6, v4"); + asm volatile("vfcvt.f.xu.v v16, v8"); // 18446744073707704320.000, 18446744073702260736.000, // 4325496.000, 3834488.000, 18446744073707063296.000, // 18446744073706356736.000, 5215660.000, @@ -621,7 +621,7 @@ void TEST_CASE9(void) { // 18446744073707444224.000, 4236283.000, 3402850.000, // 18446744073708707840.000, 275183.000, 4230347.000, // 18446744073704794112.000 - VCMP_U64(27, v6, 0x43effffffffffc7a, 0x43effffffffff218, 0x4150801e00000000, + VCMP_U64(27, v16, 0x43effffffffffc7a, 0x43effffffffff218, 0x4150801e00000000, 0x414d413c00000000, 0x43effffffffffb41, 0x43effffffffff9e8, 0x4153e56b00000000, 0x43effffffffffc2c, 0x40f0f9c000000000, 0x43effffffffffbfb, 0x415028fec0000000, 0x4149f63100000000, @@ -633,65 +633,65 @@ void TEST_CASE9(void) { // Simple random test with similar values (masked) // The numbers are the same of TEST_CASE1 void TEST_CASE10(void) { - VSET(16, e16, m2); + VSET(16, e16, m8); // 55973, 61786, 64322, 55940, 55857, // 3425, 1068, 4246, 57901, 7342, // 8693, 60988, 9047, 63358, 58389, // 8076 - VLOAD_16(v4, 0xdaa5, 0xf15a, 0xfb42, 0xda84, 0xda31, 0x0d61, 0x042c, 0x1096, + VLOAD_16(v8, 0xdaa5, 0xf15a, 0xfb42, 0xda84, 0xda31, 0x0d61, 0x042c, 0x1096, 0xe22d, 0x1cae, 0x21f5, 0xee3c, 0x2357, 0xf77e, 0xe415, 0x1f8c); VLOAD_8(v0, 0xAA, 0xAA); - VCLEAR(v6); - asm volatile("vfcvt.f.xu.v v6, v4, v0.t"); + VCLEAR(v16); + asm volatile("vfcvt.f.xu.v v16, v8, v0.t"); // 0.000, 61792.000, 0.000, 55936.000, 0.000, 3424.000, // 0.000, 4248.000, 0.000, 7344.000, 0.000, 60992.000, // 0.000, 63360.000, 0.000, 8076.000 - VCMP_U16(28, v6, 0x0, 0x7b8b, 0x0, 0x7ad4, 0x0, 0x6ab0, 0x0, 0x6c26, 0x0, + VCMP_U16(28, v16, 0x0, 0x7b8b, 0x0, 0x7ad4, 0x0, 0x6ab0, 0x0, 0x6c26, 0x0, 0x6f2c, 0x0, 0x7b72, 0x0, 0x7bbc, 0x0, 0x6fe3); - VSET(16, e32, m2); + VSET(16, e32, m8); // 72473, 4294949057, 50975, 4294915723, // 4294876584, 4294895088, 24967, 34761, // 83805, 68361, 49397, 51562, 24877, // 4294942241, 4294909502, 42562 - VLOAD_32(v4, 0x00011b19, 0xffffb8c1, 0x0000c71f, 0xffff368b, 0xfffe9da8, + VLOAD_32(v8, 0x00011b19, 0xffffb8c1, 0x0000c71f, 0xffff368b, 0xfffe9da8, 0xfffee5f0, 0x00006187, 0x000087c9, 0x0001475d, 0x00010b09, 0x0000c0f5, 0x0000c96a, 0x0000612d, 0xffff9e21, 0xffff1e3e, 0x0000a642); VLOAD_8(v0, 0xAA, 0xAA); - VCLEAR(v6); - asm volatile("vfcvt.f.xu.v v6, v4, v0.t"); + VCLEAR(v16); + asm volatile("vfcvt.f.xu.v v16, v8, v0.t"); // 0.000, 4294949120.000, 0.000, 4294915840.000, 0.000, // 4294895104.000, 0.000, 34761.000, 0.000, 68361.000, // 0.000, 51562.000, 0.000, 4294942208.000, 0.000, // 42562.000 - VCMP_U32(29, v6, 0x0, 0x4f7fffb9, 0x0, 0x4f7fff37, 0x0, 0x4f7ffee6, 0x0, + VCMP_U32(29, v16, 0x0, 0x4f7fffb9, 0x0, 0x4f7fff37, 0x0, 0x4f7ffee6, 0x0, 0x4707c900, 0x0, 0x47858480, 0x0, 0x47496a00, 0x0, 0x4f7fff9e, 0x0, 0x47264200); #if ELEN == 64 - VSET(16, e64, m2); + VSET(16, e64, m8); // 18446744073707704187, 18446744073702261660, 4325496, // 3834488, 18446744073707063867, 18446744073706356425, // 5215660, 18446744073707545423, 69532, // 18446744073707444829, 4236283, 3402850, // 18446744073708706866, 275183, 4230347, // 18446744073704794800 - VLOAD_64(v4, 0xffffffffffe3cf7b, 0xffffffffff90c39c, 0x0000000000420078, + VLOAD_64(v8, 0xffffffffffe3cf7b, 0xffffffffff90c39c, 0x0000000000420078, 0x00000000003a8278, 0xffffffffffda0a3b, 0xffffffffffcf3ec9, 0x00000000004f95ac, 0xffffffffffe1634f, 0x0000000000010f9c, 0xffffffffffdfda5d, 0x000000000040a3fb, 0x000000000033ec62, 0xfffffffffff31c32, 0x00000000000432ef, 0x0000000000408ccb, 0xffffffffffb76ab0); VLOAD_8(v0, 0xAA, 0xAA); - VCLEAR(v6); - asm volatile("vfcvt.f.xu.v v6, v4, v0.t"); + VCLEAR(v16); + asm volatile("vfcvt.f.xu.v v16, v8, v0.t"); // 0.000, 18446744073702260736.000, 0.000, 3834488.000, // 0.000, 18446744073706356736.000, 0.000, // 18446744073707544576.000, 0.000, 18446744073707444224.000, // 0.000, 3402850.000, 0.000, 275183.000, 0.000, // 18446744073704794112.000 - VCMP_U64(30, v6, 0x0, 0x43effffffffff218, 0x0, 0x414d413c00000000, 0x0, + VCMP_U64(30, v16, 0x0, 0x43effffffffff218, 0x0, 0x414d413c00000000, 0x0, 0x43effffffffff9e8, 0x0, 0x43effffffffffc2c, 0x0, 0x43effffffffffbfb, 0x0, 0x4149f63100000000, 0x0, 0x4110cbbc00000000, 0x0, 0x43effffffffff6ed); @@ -703,61 +703,61 @@ void TEST_CASE10(void) { /////////////// void TEST_CASE11(void) { - VSET(16, e16, m2); + VSET(16, e16, m8); // -4779, 465, 9893, -6763, -4072, // 1612, -9552, 2426, 325, 7561, // -8581, -1741, -8518, -4699, 3653, // 9937 - VLOAD_16(v4, 0xed55, 0x01d1, 0x26a5, 0xe595, 0xf018, 0x064c, 0xdab0, 0x097a, + VLOAD_16(v8, 0xed55, 0x01d1, 0x26a5, 0xe595, 0xf018, 0x064c, 0xdab0, 0x097a, 0x0145, 0x1d89, 0xde7b, 0xf933, 0xdeba, 0xeda5, 0x0e45, 0x26d1); - asm volatile("vfcvt.f.x.v v6, v4"); + asm volatile("vfcvt.f.x.v v16, v8"); // -4780.000, 465.000, 9896.000, -6764.000, -4072.000, // 1612.000, -9552.000, 2426.000, 325.000, 7560.000, // -8584.000, -1741.000, -8520.000, -4700.000, 3652.000, // 9936.000 - VCMP_U16(31, v6, 0xecab, 0x5f44, 0x70d5, 0xee9b, 0xebf4, 0x664c, 0xf0aa, + VCMP_U16(31, v16, 0xecab, 0x5f44, 0x70d5, 0xee9b, 0xebf4, 0x664c, 0xf0aa, 0x68bd, 0x5d14, 0x6f62, 0xf031, 0xe6cd, 0xf029, 0xec97, 0x6b22, 0x70da); - VSET(16, e32, m2); + VSET(16, e32, m8); // -39422, 54262, 12833, -40266, // -64918, 28317, 89178, 54320, -99922, // -73005, 95070, -24716, 60663, 59516, // 14865, 26328 - VLOAD_32(v4, 0xffff6602, 0x0000d3f6, 0x00003221, 0xffff62b6, 0xffff026a, + VLOAD_32(v8, 0xffff6602, 0x0000d3f6, 0x00003221, 0xffff62b6, 0xffff026a, 0x00006e9d, 0x00015c5a, 0x0000d430, 0xfffe79ae, 0xfffee2d3, 0x0001735e, 0xffff9f74, 0x0000ecf7, 0x0000e87c, 0x00003a11, 0x000066d8); - asm volatile("vfcvt.f.x.v v6, v4"); + asm volatile("vfcvt.f.x.v v16, v8"); // -39422.000, 54262.000, 12833.000, -40266.000, -64918.000, // 28317.000, 89178.000, 54320.000, -99922.000, -73005.000, // 95070.000, -24716.000, 60663.000, 59516.000, 14865.000, // 26328.000 - VCMP_U32(32, v6, 0xc719fe00, 0x4753f600, 0x46488400, 0xc71d4a00, 0xc77d9600, + VCMP_U32(32, v16, 0xc719fe00, 0x4753f600, 0x46488400, 0xc71d4a00, 0xc77d9600, 0x46dd3a00, 0x47ae2d00, 0x47543000, 0xc7c32900, 0xc78e9680, 0x47b9af00, 0xc6c11800, 0x476cf700, 0x47687c00, 0x46684400, 0x46cdb000); #if ELEN == 64 - VSET(16, e64, m2); + VSET(16, e64, m8); // -8860682, 8064547, -5636078, // -3712253, 8492493, 9839246, // -8271278, -6234598, -4538479, // 8807688, 5640899, 3839761, // -1394518, -6118355, 1783927, // 5819812 - VLOAD_64(v4, 0xffffffffff78cbf6, 0x00000000007b0e23, 0xffffffffffaa0012, + VLOAD_64(v8, 0xffffffffff78cbf6, 0x00000000007b0e23, 0xffffffffffaa0012, 0xffffffffffc75b03, 0x00000000008195cd, 0x000000000096228e, 0xffffffffff81ca52, 0xffffffffffa0de1a, 0xffffffffffbabf91, 0x0000000000866508, 0x00000000005612c3, 0x00000000003a9711, 0xffffffffffeab8aa, 0xffffffffffa2a42d, 0x00000000001b3877, 0x000000000058cda4); - asm volatile("vfcvt.f.x.v v6, v4"); + asm volatile("vfcvt.f.x.v v16, v8"); // -8860682.000, 8064547.000, -5636078.000, -3712253.000, // 8492493.000, 9839246.000, -8271278.000, -6234598.000, // -4538479.000, 8807688.000, 5640899.000, 3839761.000, // -1394518.000, -6118355.000, 1783927.000, 5819812.000 - VCMP_U64(33, v6, 0xc160e68140000000, 0x415ec388c0000000, 0xc1557ffb80000000, + VCMP_U64(33, v16, 0xc160e68140000000, 0x415ec388c0000000, 0xc1557ffb80000000, 0xc14c527e80000000, 0x416032b9a0000000, 0x4162c451c0000000, 0xc15f8d6b80000000, 0xc157c87980000000, 0xc151501bc0000000, 0x4160cca100000000, 0x415584b0c0000000, 0x414d4b8880000000, @@ -769,63 +769,63 @@ void TEST_CASE11(void) { // Simple random test with similar values (masked) // The numbers are the same of TEST_CASE1 void TEST_CASE12(void) { - VSET(16, e16, m2); + VSET(16, e16, m8); // -4779, 465, 9893, -6763, -4072, // 1612, -9552, 2426, 325, 7561, // -8581, -1741, -8518, -4699, 3653, // 9937 - VLOAD_16(v4, 0xed55, 0x01d1, 0x26a5, 0xe595, 0xf018, 0x064c, 0xdab0, 0x097a, + VLOAD_16(v8, 0xed55, 0x01d1, 0x26a5, 0xe595, 0xf018, 0x064c, 0xdab0, 0x097a, 0x0145, 0x1d89, 0xde7b, 0xf933, 0xdeba, 0xeda5, 0x0e45, 0x26d1); VLOAD_8(v0, 0xAA, 0xAA); - VCLEAR(v6); - asm volatile("vfcvt.f.x.v v6, v4, v0.t"); + VCLEAR(v16); + asm volatile("vfcvt.f.x.v v16, v8, v0.t"); // 0.000, 465.000, 0.000, -6764.000, 0.000, 1612.000, // 0.000, 2426.000, 0.000, 7560.000, 0.000, -1741.000, // 0.000, -4700.000, 0.000, 9936.000 - VCMP_U16(34, v6, 0x0, 0x5f44, 0x0, 0xee9b, 0x0, 0x664c, 0x0, 0x68bd, 0x0, + VCMP_U16(34, v16, 0x0, 0x5f44, 0x0, 0xee9b, 0x0, 0x664c, 0x0, 0x68bd, 0x0, 0x6f62, 0x0, 0xe6cd, 0x0, 0xec97, 0x0, 0x70da); - VSET(16, e32, m2); + VSET(16, e32, m8); // -39422, 54262, 12833, -40266, // -64918, 28317, 89178, 54320, -99922, // -73005, 95070, -24716, 60663, 59516, // 14865, 26328 - VLOAD_32(v4, 0xffff6602, 0x0000d3f6, 0x00003221, 0xffff62b6, 0xffff026a, + VLOAD_32(v8, 0xffff6602, 0x0000d3f6, 0x00003221, 0xffff62b6, 0xffff026a, 0x00006e9d, 0x00015c5a, 0x0000d430, 0xfffe79ae, 0xfffee2d3, 0x0001735e, 0xffff9f74, 0x0000ecf7, 0x0000e87c, 0x00003a11, 0x000066d8); VLOAD_8(v0, 0xAA, 0xAA); - VCLEAR(v6); - asm volatile("vfcvt.f.x.v v6, v4, v0.t"); + VCLEAR(v16); + asm volatile("vfcvt.f.x.v v16, v8, v0.t"); // 0.000, 54262.000, 0.000, -40266.000, 0.000, 28317.000, // 0.000, 54320.000, 0.000, -73005.000, 0.000, // -24716.000, 0.000, 59516.000, 0.000, 26328.000 - VCMP_U32(35, v6, 0x0, 0x4753f600, 0x0, 0xc71d4a00, 0x0, 0x46dd3a00, 0x0, + VCMP_U32(35, v16, 0x0, 0x4753f600, 0x0, 0xc71d4a00, 0x0, 0x46dd3a00, 0x0, 0x47543000, 0x0, 0xc78e9680, 0x0, 0xc6c11800, 0x0, 0x47687c00, 0x0, 0x46cdb000); #if ELEN == 64 - VSET(16, e64, m2); + VSET(16, e64, m8); // -8860682, 8064547, -5636078, // -3712253, 8492493, 9839246, // -8271278, -6234598, -4538479, // 8807688, 5640899, 3839761, // -1394518, -6118355, 1783927, // 5819812 - VLOAD_64(v4, 0xffffffffff78cbf6, 0x00000000007b0e23, 0xffffffffffaa0012, + VLOAD_64(v8, 0xffffffffff78cbf6, 0x00000000007b0e23, 0xffffffffffaa0012, 0xffffffffffc75b03, 0x00000000008195cd, 0x000000000096228e, 0xffffffffff81ca52, 0xffffffffffa0de1a, 0xffffffffffbabf91, 0x0000000000866508, 0x00000000005612c3, 0x00000000003a9711, 0xffffffffffeab8aa, 0xffffffffffa2a42d, 0x00000000001b3877, 0x000000000058cda4); VLOAD_8(v0, 0xAA, 0xAA); - VCLEAR(v6); - asm volatile("vfcvt.f.x.v v6, v4, v0.t"); + VCLEAR(v16); + asm volatile("vfcvt.f.x.v v16, v8, v0.t"); // 0.000, 8064547.000, 0.000, -3712253.000, 0.000, // 9839246.000, 0.000, -6234598.000, 0.000, 8807688.000, // 0.000, 3839761.000, 0.000, -6118355.000, 0.000, // 5819812.000 - VCMP_U64(36, v6, 0x0, 0x415ec388c0000000, 0x0, 0xc14c527e80000000, 0x0, + VCMP_U64(36, v16, 0x0, 0x415ec388c0000000, 0x0, 0xc14c527e80000000, 0x0, 0x4162c451c0000000, 0x0, 0xc157c87980000000, 0x0, 0x4160cca100000000, 0x0, 0x414d4b8880000000, 0x0, 0xc15756f4c0000000, 0x0, 0x4156336900000000); diff --git a/sw/riscvTests/isa/rv64uv/vfmacc.c b/sw/riscvTests/isa/rv64uv/vfmacc.c index 795e55d9..1ceda34e 100644 --- a/sw/riscvTests/isa/rv64uv/vfmacc.c +++ b/sw/riscvTests/isa/rv64uv/vfmacc.c @@ -10,36 +10,36 @@ // Simple random test with similar values void TEST_CASE1(void) { - VSET(16, e16, m2); + VSET(16, e16, m8); // -0.6353, -0.2290, 0.6870, -0.1031, 0.5410, 0.4211, -0.4939, // -0.8779, -0.3213, -0.6846, 0.9229, 0.0103, -0.5068, 0.8706, // 0.6309, -0.3054 - VLOAD_16(v4, 0xb915, 0xb354, 0x397f, 0xae9a, 0x3854, 0x36bd, 0xb7e7, 0xbb06, + VLOAD_16(v16, 0xb915, 0xb354, 0x397f, 0xae9a, 0x3854, 0x36bd, 0xb7e7, 0xbb06, 0xb524, 0xb97a, 0x3b62, 0x2142, 0xb80e, 0x3af7, 0x390c, 0xb4e3); // -0.8042, -0.9463, 0.4431, 0.3757, -0.5259, -0.1290, 0.4697, // 0.0952, -0.9995, 0.8823, -0.6128, -0.5010, -0.9976, 0.0081, // 0.9746, -0.7734 - VLOAD_16(v6, 0xba6f, 0xbb92, 0x3717, 0x3603, 0xb835, 0xb021, 0x3784, 0x2e17, + VLOAD_16(v24, 0xba6f, 0xbb92, 0x3717, 0x3603, 0xb835, 0xb021, 0x3784, 0x2e17, 0xbbff, 0x3b0f, 0xb8e7, 0xb802, 0xbbfb, 0x2022, 0x3bcc, 0xba30); // 0.6509, 0.3452, 0.9360, 0.3616, -0.4258, -0.0945, -0.7295, // -0.7734, 0.3411, -0.1519, -0.3557, 0.6060, 0.2598, // -0.0171, -0.8042, -0.4419 - VLOAD_16(v2, 0x3935, 0x3586, 0x3b7d, 0x35c9, 0xb6d0, 0xae0d, 0xb9d6, 0xba30, + VLOAD_16(v8, 0x3935, 0x3586, 0x3b7d, 0x35c9, 0xb6d0, 0xae0d, 0xb9d6, 0xba30, 0x3575, 0xb0dc, 0xb5b1, 0x38d9, 0x3428, 0xa45e, 0xba6f, 0xb712); - asm volatile("vfmacc.vv v2, v4, v6"); + asm volatile("vfmacc.vv v8, v16, v24"); // 1.1621, 0.5620, 1.2402, 0.3228, -0.7100, -0.1489, -0.9614, // -0.8569, 0.6621, -0.7559, -0.9209, 0.6006, 0.7651, // -0.0100, -0.1895, -0.2057 - VCMP_U16(1, v2, 0x3ca6, 0x387f, 0x3cf6, 0x352a, 0xb9af, 0xb0c4, 0xbbb1, + VCMP_U16(1, v8, 0x3ca6, 0x387f, 0x3cf6, 0x352a, 0xb9af, 0xb0c4, 0xbbb1, 0xbadb, 0x394c, 0xba0c, 0xbb5f, 0x38ce, 0x3a1f, 0xa123, 0xb20f, 0xb295); - VSET(16, e32, m2); + VSET(16, e32, m8); // 0.72754014, 0.34003398, 0.70107144, -0.41727209, // -0.52331781, -0.11821542, -0.16069038, 0.30835113, // -0.59407759, -0.53240144, -0.92390168, 0.33251825, // -0.45979658, 0.32465541, -0.99342769, -0.16221718 - VLOAD_32(v4, 0x3f3a4012, 0x3eae18ef, 0x3f33796b, 0xbed5a4b0, 0xbf05f828, + VLOAD_32(v16, 0x3f3a4012, 0x3eae18ef, 0x3f33796b, 0xbed5a4b0, 0xbf05f828, 0xbdf21aed, 0xbe248c05, 0x3e9de033, 0xbf181578, 0xbf084b76, 0xbf6c84d2, 0x3eaa3fd5, 0xbeeb6a75, 0x3ea6393c, 0xbf7e5147, 0xbe261c43); @@ -47,7 +47,7 @@ void TEST_CASE1(void) { // 0.93318671, -0.32301557, 0.41414812, 0.81797487, // -0.21847244, -0.00211347, -0.72070456, -0.58624452, // 0.07381243, -0.16745377, 0.55389816, -0.23427610 - VLOAD_32(v6, 0x3f7377f9, 0xbded11e6, 0x3e07f41b, 0x3e694fdb, 0x3f6ee553, + VLOAD_32(v24, 0x3f7377f9, 0xbded11e6, 0x3e07f41b, 0x3e694fdb, 0x3f6ee553, 0xbea5624c, 0x3ed40b39, 0x3f5166cd, 0xbe5fb73d, 0xbb0a8224, 0xbf388018, 0xbf16141f, 0x3d972af9, 0xbe2b7900, 0x3f0dcc45, 0xbe6fe613); @@ -55,29 +55,29 @@ void TEST_CASE1(void) { // 0.24971253, 0.97819000, 0.55116856, -0.97427863, 0.61764765, // 0.86367106, 0.48787504, -0.26353455, -0.22228357, 0.40454853, // 0.64000225, -0.51787829 - VLOAD_32(v2, 0xbd98c591, 0xbb97273a, 0xbf79fed5, 0x3f71a618, 0x3e7fb4a4, + VLOAD_32(v8, 0xbd98c591, 0xbb97273a, 0xbf79fed5, 0x3f71a618, 0x3e7fb4a4, 0x3f7a6aa9, 0x3f0d1962, 0xbf796a53, 0x3f1e1e28, 0x3f5d198c, 0x3ef9cac2, 0xbe86ee00, 0xbe639e4e, 0x3ecf20fc, 0x3f23d730, 0xbf0493ac); - asm volatile("vfmacc.vv v2, v4, v6"); + asm volatile("vfmacc.vv v8, v16, v24"); // 0.61733103, -0.04397407, -0.88346541, 0.84886783, // -0.23864070, 1.01637542, 0.48461893, -0.72205520, // 0.74743724, 0.86479628, 1.15373516, -0.45847154, // -0.25622228, 0.35018376, 0.08974451, -0.47987467 - VCMP_U32(2, v2, 0x3f1e0968, 0xbd341e29, 0xbf622aca, 0x3f594f67, 0xbe745e3a, + VCMP_U32(2, v8, 0x3f1e0968, 0xbd341e29, 0xbf622aca, 0x3f594f67, 0xbe745e3a, 0x3f821897, 0x3ef81ff9, 0xbf38d89b, 0x3f3f580c, 0x3f5d634a, 0x3f93ad98, 0xbeeabcc8, 0xbe832f91, 0x3eb34b49, 0x3db7cbf5, 0xbef5b222); #if ELEN == 64 - VSET(16, e64, m2); + VSET(16, e64, m8); // -0.8992497708533775, 0.5795977429472710, -0.9421852470430045, // 0.3407052467776674, -0.1137141395145149, 0.3284679540868891, // 0.9781857174570949, 0.6033619236526551, -0.1287683269222892, // 0.6555379481826638, 0.6785468173738887, 0.6923267883951645, // 0.2185923779321672, -0.1310544396012536, -0.7596952716763763, // -0.4011231994121780, - VLOAD_64(v4, 0xbfecc6a774980626, 0x3fe28c1090d967fc, 0xbfee2661acda592c, + VLOAD_64(v16, 0xbfecc6a774980626, 0x3fe28c1090d967fc, 0xbfee2661acda592c, 0x3fd5ce1d611f1590, 0xbfbd1c5eae4ec060, 0x3fd5059e742594fc, 0x3fef4d4c223c8f84, 0x3fe34ebdaa37ac76, 0xbfc07b7b047228c0, 0x3fe4fa2ab8176850, 0x3fe5b6a7d0ad9fa2, 0x3fe6278a8249a986, @@ -89,7 +89,7 @@ void TEST_CASE1(void) { // 0.1775335284298603, -0.7021940272509897, 0.9279338928738479, // -0.7358371767028979, 0.2529700403354449, // -0.8333759771774525, -0.4016540133317048, - VLOAD_64(v6, 0x3fd36160c2769da4, 0x3fe00d350479c3ea, 0x3fc852de63fd6e08, + VLOAD_64(v24, 0x3fd36160c2769da4, 0x3fe00d350479c3ea, 0x3fc852de63fd6e08, 0x3fd51548a8a19488, 0x3fe306781d37ea9a, 0xbfec06bc7e604fb8, 0x3fd7ce88a1b60584, 0x3fed4f57e864d750, 0x3fb4a00b38c069f0, 0x3fc6b96b2d465dc0, 0xbfe6785f9bcfaa42, 0x3fedb1a26b57c7d6, @@ -101,20 +101,20 @@ void TEST_CASE1(void) { // 0.0322804237161178, -0.8345203693668675, 0.7175251091228996, // -0.7419013213335950, -0.2977694001417877, 0.4556506623709609, // -0.7832443836668095, - VLOAD_64(v2, 0xbfb0ffef54d0f220, 0xbfe5937e2c0e5202, 0x3fdba8604ddf0d80, + VLOAD_64(v8, 0xbfb0ffef54d0f220, 0xbfe5937e2c0e5202, 0x3fdba8604ddf0d80, 0xbfc4d508600804d8, 0x3f93c690cdf47e40, 0xbfd6835fd0838044, 0xbfdef17840e363cc, 0x3feaa6ceed574e1a, 0x3f9b1871c270c340, 0x3fa0870f4852d0c0, 0xbfeab4640fc8d962, 0x3fe6f5f737b7bbe2, 0xbfe7bda7d6ff9552, 0xbfd30ea762d6f1ec, 0x3fdd296165522d4c, 0xbfe910568693fcea); - asm volatile("vfmacc.vv v2, v4, v6"); + asm volatile("vfmacc.vv v8, v16, v24"); // -0.3387147047225807, -0.3835212035574087, 0.2531093914663254, // -0.0505147363757267, -0.0482954147100367, -0.6394480093239447, // -0.1196218202565150, 1.3855029309732363, 0.0160859479159849, // 0.1486603886766570, -1.3109918917369803, 1.3599586010192732, // -0.9027497195599737, -0.3309222470138559, 1.0887624517613512, // -0.6221316407824544, - VCMP_U64(3, v2, 0xbfd5ad8070dd4c48, 0xbfd88b9c84a68118, 0x3fd032f1bbaa2211, + VCMP_U64(3, v8, 0xbfd5ad8070dd4c48, 0xbfd88b9c84a68118, 0x3fd032f1bbaa2211, 0xbfa9dd1149664d37, 0xbfa8ba2d3573e621, 0xbfe4765babf13c96, 0xbfbe9f891de3c4d6, 0x3ff62b051f10acd5, 0x3f9078d5b0e5b2ba, 0x3fc3074db9c9d78e, 0xbff4f9d2a2454dd5, 0x3ff5c263f334aac4, @@ -126,62 +126,62 @@ void TEST_CASE1(void) { // Simple random test with similar values (masked, the numbers are taken from // TEST_CASE1) void TEST_CASE2(void) { - VSET(16, e16, m2); - VLOAD_16(v4, 0xb915, 0xb354, 0x397f, 0xae9a, 0x3854, 0x36bd, 0xb7e7, 0xbb06, + VSET(16, e16, m8); + VLOAD_16(v16, 0xb915, 0xb354, 0x397f, 0xae9a, 0x3854, 0x36bd, 0xb7e7, 0xbb06, 0xb524, 0xb97a, 0x3b62, 0x2142, 0xb80e, 0x3af7, 0x390c, 0xb4e3); - VLOAD_16(v6, 0xba6f, 0xbb92, 0x3717, 0x3603, 0xb835, 0xb021, 0x3784, 0x2e17, + VLOAD_16(v24, 0xba6f, 0xbb92, 0x3717, 0x3603, 0xb835, 0xb021, 0x3784, 0x2e17, 0xbbff, 0x3b0f, 0xb8e7, 0xb802, 0xbbfb, 0x2022, 0x3bcc, 0xba30); - VLOAD_16(v2, 0x3935, 0x3586, 0x3b7d, 0x35c9, 0xb6d0, 0xae0d, 0xb9d6, 0xba30, + VLOAD_16(v8, 0x3935, 0x3586, 0x3b7d, 0x35c9, 0xb6d0, 0xae0d, 0xb9d6, 0xba30, 0x3575, 0xb0dc, 0xb5b1, 0x38d9, 0x3428, 0xa45e, 0xba6f, 0xb712); VLOAD_8(v0, 0xAA, 0xAA); - asm volatile("vfmacc.vv v2, v4, v6, v0.t"); - VCMP_U16(4, v2, 0x3935, 0x387f, 0x3b7d, 0x352a, 0xb6d0, 0xb0c4, 0xb9d6, + asm volatile("vfmacc.vv v8, v16, v24, v0.t"); + VCMP_U16(4, v8, 0x3935, 0x387f, 0x3b7d, 0x352a, 0xb6d0, 0xb0c4, 0xb9d6, 0xbadb, 0x3575, 0xba0c, 0xb5b1, 0x38ce, 0x3428, 0xa123, 0xba6f, 0xb295); - VSET(16, e32, m2); - VLOAD_32(v4, 0x3f3a4012, 0x3eae18ef, 0x3f33796b, 0xbed5a4b0, 0xbf05f828, + VSET(16, e32, m8); + VLOAD_32(v16, 0x3f3a4012, 0x3eae18ef, 0x3f33796b, 0xbed5a4b0, 0xbf05f828, 0xbdf21aed, 0xbe248c05, 0x3e9de033, 0xbf181578, 0xbf084b76, 0xbf6c84d2, 0x3eaa3fd5, 0xbeeb6a75, 0x3ea6393c, 0xbf7e5147, 0xbe261c43); - VLOAD_32(v6, 0x3f7377f9, 0xbded11e6, 0x3e07f41b, 0x3e694fdb, 0x3f6ee553, + VLOAD_32(v24, 0x3f7377f9, 0xbded11e6, 0x3e07f41b, 0x3e694fdb, 0x3f6ee553, 0xbea5624c, 0x3ed40b39, 0x3f5166cd, 0xbe5fb73d, 0xbb0a8224, 0xbf388018, 0xbf16141f, 0x3d972af9, 0xbe2b7900, 0x3f0dcc45, 0xbe6fe613); - VLOAD_32(v2, 0xbd98c591, 0xbb97273a, 0xbf79fed5, 0x3f71a618, 0x3e7fb4a4, + VLOAD_32(v8, 0xbd98c591, 0xbb97273a, 0xbf79fed5, 0x3f71a618, 0x3e7fb4a4, 0x3f7a6aa9, 0x3f0d1962, 0xbf796a53, 0x3f1e1e28, 0x3f5d198c, 0x3ef9cac2, 0xbe86ee00, 0xbe639e4e, 0x3ecf20fc, 0x3f23d730, 0xbf0493ac); VLOAD_8(v0, 0xAA, 0xAA); - asm volatile("vfmacc.vv v2, v4, v6, v0.t"); - VCMP_U32(5, v2, 0xbd98c591, 0xbd341e29, 0xbf79fed5, 0x3f594f67, 0x3e7fb4a4, + asm volatile("vfmacc.vv v8, v16, v24, v0.t"); + VCMP_U32(5, v8, 0xbd98c591, 0xbd341e29, 0xbf79fed5, 0x3f594f67, 0x3e7fb4a4, 0x3f821897, 0x3f0d1962, 0xbf38d89b, 0x3f1e1e28, 0x3f5d634a, 0x3ef9cac2, 0xbeeabcc8, 0xbe639e4e, 0x3eb34b49, 0x3f23d730, 0xbef5b222); #if ELEN == 64 - VSET(16, e64, m2); - VLOAD_64(v4, 0xbfecc6a774980626, 0x3fe28c1090d967fc, 0xbfee2661acda592c, + VSET(16, e64, m8); + VLOAD_64(v16, 0xbfecc6a774980626, 0x3fe28c1090d967fc, 0xbfee2661acda592c, 0x3fd5ce1d611f1590, 0xbfbd1c5eae4ec060, 0x3fd5059e742594fc, 0x3fef4d4c223c8f84, 0x3fe34ebdaa37ac76, 0xbfc07b7b047228c0, 0x3fe4fa2ab8176850, 0x3fe5b6a7d0ad9fa2, 0x3fe6278a8249a986, 0x3fcbfad5c52fcfd8, 0xbfc0c664520a9f78, 0xbfe84f6c7558d3f0, 0xbfd9ac00a3c919a8); - VLOAD_64(v6, 0x3fd36160c2769da4, 0x3fe00d350479c3ea, 0x3fc852de63fd6e08, + VLOAD_64(v24, 0x3fd36160c2769da4, 0x3fe00d350479c3ea, 0x3fc852de63fd6e08, 0x3fd51548a8a19488, 0x3fe306781d37ea9a, 0xbfec06bc7e604fb8, 0x3fd7ce88a1b60584, 0x3fed4f57e864d750, 0x3fb4a00b38c069f0, 0x3fc6b96b2d465dc0, 0xbfe6785f9bcfaa42, 0x3fedb1a26b57c7d6, 0xbfe78bfa6823d662, 0x3fd030a94086f244, 0xbfeaab0418e7f974, 0xbfd9b4b308e446c8); - VLOAD_64(v2, 0xbfb0ffef54d0f220, 0xbfe5937e2c0e5202, 0x3fdba8604ddf0d80, + VLOAD_64(v8, 0xbfb0ffef54d0f220, 0xbfe5937e2c0e5202, 0x3fdba8604ddf0d80, 0xbfc4d508600804d8, 0x3f93c690cdf47e40, 0xbfd6835fd0838044, 0xbfdef17840e363cc, 0x3feaa6ceed574e1a, 0x3f9b1871c270c340, 0x3fa0870f4852d0c0, 0xbfeab4640fc8d962, 0x3fe6f5f737b7bbe2, 0xbfe7bda7d6ff9552, 0xbfd30ea762d6f1ec, 0x3fdd296165522d4c, 0xbfe910568693fcea); VLOAD_8(v0, 0xAA, 0xAA); - asm volatile("vfmacc.vv v2, v4, v6, v0.t"); - VCMP_U64(6, v2, 0xbfb0ffef54d0f220, 0xbfd88b9c84a68118, 0x3fdba8604ddf0d80, + asm volatile("vfmacc.vv v8, v16, v24, v0.t"); + VCMP_U64(6, v8, 0xbfb0ffef54d0f220, 0xbfd88b9c84a68118, 0x3fdba8604ddf0d80, 0xbfa9dd1149664d37, 0x3f93c690cdf47e40, 0xbfe4765babf13c96, 0xbfdef17840e363cc, 0x3ff62b051f10acd5, 0x3f9b1871c270c340, 0x3fc3074db9c9d78e, 0xbfeab4640fc8d962, 0x3ff5c263f334aac4, @@ -192,11 +192,11 @@ void TEST_CASE2(void) { // Simple random test with similar values (with scalar) void TEST_CASE3(void) { - VSET(16, e16, m2); + VSET(16, e16, m8); // 0.7407, -0.1365, 0.0000, -0.8525, -0.0812, 0.9609, -0.3740, // 0.2800, 0.9692, 0.4045, 0.0205, -0.5503, 0.6499, 0.4470, // -0.9360, -0.4426 - VLOAD_16(v6, 0x39ed, 0xb05e, 0x0000, 0xbad2, 0xad33, 0x3bb0, 0xb5fc, 0x347b, + VLOAD_16(v24, 0x39ed, 0xb05e, 0x0000, 0xbad2, 0xad33, 0x3bb0, 0xb5fc, 0x347b, 0x3bc1, 0x3679, 0x253e, 0xb867, 0x3933, 0x3727, 0xbb7d, 0xb715); float fscalar_16; // 0.5757 @@ -204,22 +204,22 @@ void TEST_CASE3(void) { // -0.1472, -0.8906, 0.2247, 0.6118, -0.0908, -0.6450, -0.5415, // 0.0505, -0.4595, 0.1157, -0.3494, 0.6670, -0.9658, -0.2944, // -0.8096, -0.3364 - VLOAD_16(v2, 0xb0b6, 0xbb20, 0x3331, 0x38e5, 0xadcf, 0xb929, 0xb855, 0x2a77, + VLOAD_16(v8, 0xb0b6, 0xbb20, 0x3331, 0x38e5, 0xadcf, 0xb929, 0xb855, 0x2a77, 0xb75a, 0x2f68, 0xb597, 0x3956, 0xbbba, 0xb4b6, 0xba7a, 0xb562); - asm volatile("vfmacc.vf v2, %[A], v6" ::[A] "f"(fscalar_16)); + asm volatile("vfmacc.vf v8, %[A], v24" ::[A] "f"(fscalar_16)); // 0.2793, -0.9692, 0.2247, 0.1210, -0.1375, -0.0918, -0.7568, // 0.2118, 0.0986, 0.3486, -0.3376, 0.3501, -0.5918, -0.0371, // -1.3486, -0.5913 - VCMP_U16(7, v2, 0x3478, 0xbbc1, 0x3331, 0x2fbf, 0xb067, 0xade0, 0xba0e, + VCMP_U16(7, v8, 0x3478, 0xbbc1, 0x3331, 0x2fbf, 0xb067, 0xade0, 0xba0e, 0x32c6, 0x2e4e, 0x3594, 0xb567, 0x359a, 0xb8bc, 0xa8bf, 0xbd65, 0xb8bb); - VSET(16, e32, m2); + VSET(16, e32, m8); // -0.79164708, -0.13258822, -0.94492996, -0.93729085, // 0.80344391, 0.77393818, 0.31253836, -0.42539355, // -0.20085664, -0.63946086, 0.24876182, -0.45639724, // 0.92842573, 0.39117134, -0.70563781, 0.13946204 - VLOAD_32(v6, 0xbf4aa962, 0xbe07c535, 0xbf71e6ee, 0xbf6ff24b, 0x3f4dae80, + VLOAD_32(v24, 0xbf4aa962, 0xbe07c535, 0xbf71e6ee, 0xbf6ff24b, 0x3f4dae80, 0x3f4620d0, 0x3ea00507, 0xbed9cd2f, 0xbe4dad5d, 0xbf23b3b5, 0x3e7ebb6b, 0xbee9ace6, 0x3f6dad4f, 0x3ec8479c, 0xbf34a4ae, 0x3e0ecf23); @@ -230,29 +230,29 @@ void TEST_CASE3(void) { // 0.34759882, 0.65410614, 0.99296939, -0.31534156, // -0.89647168, 0.47623411, -0.68185741, 0.77072626, // 0.19827089, -0.16254151, 0.81625229, -0.24369264 - VLOAD_32(v2, 0xbee017a1, 0xbf3e0603, 0x3f1a339a, 0x3edf5b43, 0x3eb1f879, + VLOAD_32(v8, 0xbee017a1, 0xbf3e0603, 0x3f1a339a, 0x3edf5b43, 0x3eb1f879, 0x3f277380, 0x3f7e333e, 0xbea17473, 0xbf657f2b, 0x3ef3d4f5, 0xbf2e8e35, 0x3f454e51, 0x3e4b0786, 0xbe267148, 0x3f50f5e9, 0xbe798a90); - asm volatile("vfmacc.vf v2, %[A], v6" ::[A] "f"(fscalar_32)); + asm volatile("vfmacc.vf v8, %[A], v24" ::[A] "f"(fscalar_32)); // -1.21056581, -0.87172520, -0.32018578, // -0.47883448, 1.13200164, 1.40970242, 1.29810071, // -0.73065352, -1.09256816, -0.14807191, -0.43899110, // 0.32514536, 1.10469353, 0.21935931, 0.12733769, -0.10753576 - VCMP_U32(8, v2, 0xbf9af3d2, 0xbf5f2962, 0xbea3ef65, 0xbef529cb, 0x3f90e56e, + VCMP_U32(8, v8, 0xbf9af3d2, 0xbf5f2962, 0xbea3ef65, 0xbef529cb, 0x3f90e56e, 0x3fb47121, 0x3fa6282b, 0xbf3b0c1c, 0xbf8bd946, 0xbe17a02a, 0xbee0c371, 0x3ea67974, 0x3f8d6699, 0x3e609fb9, 0x3e0264cf, 0xbddc3bb6); #if ELEN == 64 - VSET(16, e64, m2); + VSET(16, e64, m8); // -0.1981785436218435, 0.2324321764718080, 0.3529425082887112, // -0.4889737836823891, 0.1335009259637479, -0.7964186221277452, // -0.2707335519445100, 0.8070543770008602, -0.1237072120160827, // -0.2357903062216291, -0.0812498320849093, 0.8656662449573254, // 0.7178262144151533, -0.3106178959409680, -0.1410836751949509, // 0.6904294937898030 - VLOAD_64(v6, 0xbfc95dea1dcff710, 0x3fcdc0566a3e04a0, 0x3fd6969c2c9df760, + VLOAD_64(v24, 0xbfc95dea1dcff710, 0x3fcdc0566a3e04a0, 0x3fd6969c2c9df760, 0xbfdf4b58b2611a74, 0x3fc1168eef800078, 0xbfe97c42e7fed97a, 0xbfd153b2d1e20588, 0x3fe9d363b369fec4, 0xbfbfab469de36f10, 0xbfce2e6072f7c5c0, 0xbfb4ccc9fb9c3490, 0x3febb389b26af886, @@ -267,20 +267,20 @@ void TEST_CASE3(void) { // 0.7665070398551490, -0.7817863527411446, -0.2155326059803253, // -0.7807395886866346, 0.2528540140694266, -0.1740695080779533, // 0.7247829241803623 - VLOAD_64(v2, 0xbfee2ffe0122d3b6, 0xbfb1d468c9a80310, 0xbfea2623e6043a6c, + VLOAD_64(v8, 0xbfee2ffe0122d3b6, 0xbfb1d468c9a80310, 0xbfea2623e6043a6c, 0xbfee62d76bc21ae2, 0xbfe454d179c08866, 0x3fea1c44af53fb1a, 0x3f9d95d7dd994d80, 0xbfe34777e1831e42, 0xbfeeaa7676c316f0, 0x3fe88739c58a9cbe, 0xbfe90464d02f6f4c, 0xbfcb96928af41d88, 0xbfe8fbd197034034, 0x3fd02ec29a45caf0, 0xbfc647e8de367aa0, 0x3fe7316bf581b994); - asm volatile("vfmacc.vf v2, %[A], v6" ::[A] "f"(dscalar_64)); + asm volatile("vfmacc.vf v8, %[A], v24" ::[A] "f"(dscalar_64)); // -1.1165434690834197, 0.1334713860074080, -0.5087250014486948, // -1.3768719457941574, -0.5186894774163083, 0.1199732804009315, // -0.2076977828175325, 0.1027976993173292, -1.0664141864137089, // 0.5604536790898100, -0.8527892757662274, 0.5409592190351925, // -0.1534427913930433, -0.0185899752875187, -0.2973602653990804, // 1.3281381674327271 - VCMP_U64(9, v2, 0xbff1dd5caf44692a, 0x3fc1159722ed4311, 0xbfe04779a77c2679, + VCMP_U64(9, v8, 0xbff1dd5caf44692a, 0x3fc1159722ed4311, 0xbfe04779a77c2679, 0xbff607aae09f73e1, 0xbfe0991aacc90937, 0x3fbeb691a3b74133, 0xbfca95d7485395ec, 0x3fba50f334ac0644, 0xbff11008526a327e, 0x3fe1ef3c8dd3a2b9, 0xbfeb4a0cbc397482, 0x3fe14f89b5473a2f, @@ -292,40 +292,40 @@ void TEST_CASE3(void) { // Simple random test with similar values (masked with scalar, values taken from // TEST_CASE3) void TEST_CASE4(void) { - VSET(16, e16, m2); - VLOAD_16(v6, 0x39ed, 0xb05e, 0x0000, 0xbad2, 0xad33, 0x3bb0, 0xb5fc, 0x347b, + VSET(16, e16, m8); + VLOAD_16(v24, 0x39ed, 0xb05e, 0x0000, 0xbad2, 0xad33, 0x3bb0, 0xb5fc, 0x347b, 0x3bc1, 0x3679, 0x253e, 0xb867, 0x3933, 0x3727, 0xbb7d, 0xb715); float fscalar_16; BOX_HALF_IN_FLOAT(fscalar_16, 0x389b); - VLOAD_16(v2, 0xb0b6, 0xbb20, 0x3331, 0x38e5, 0xadcf, 0xb929, 0xb855, 0x2a77, + VLOAD_16(v8, 0xb0b6, 0xbb20, 0x3331, 0x38e5, 0xadcf, 0xb929, 0xb855, 0x2a77, 0xb75a, 0x2f68, 0xb597, 0x3956, 0xbbba, 0xb4b6, 0xba7a, 0xb562); VLOAD_8(v0, 0xAA, 0xAA); - asm volatile("vfmacc.vf v2, %[A], v6, v0.t" ::[A] "f"(fscalar_16)); - VCMP_U16(10, v2, 0xb0b6, 0xbbc1, 0x3331, 0x2fbf, 0xadcf, 0xade0, 0xb855, + asm volatile("vfmacc.vf v8, %[A], v24, v0.t" ::[A] "f"(fscalar_16)); + VCMP_U16(10, v8, 0xb0b6, 0xbbc1, 0x3331, 0x2fbf, 0xadcf, 0xade0, 0xb855, 0x32c6, 0xb75a, 0x3594, 0xb597, 0x359a, 0xbbba, 0xa8bf, 0xba7a, 0xb8bb); - VSET(16, e32, m2); - VLOAD_32(v6, 0xbf4aa962, 0xbe07c535, 0xbf71e6ee, 0xbf6ff24b, 0x3f4dae80, + VSET(16, e32, m8); + VLOAD_32(v24, 0xbf4aa962, 0xbe07c535, 0xbf71e6ee, 0xbf6ff24b, 0x3f4dae80, 0x3f4620d0, 0x3ea00507, 0xbed9cd2f, 0xbe4dad5d, 0xbf23b3b5, 0x3e7ebb6b, 0xbee9ace6, 0x3f6dad4f, 0x3ec8479c, 0xbf34a4ae, 0x3e0ecf23); float fscalar_32; BOX_FLOAT_IN_FLOAT(fscalar_32, 0x3f79eed6); - VLOAD_32(v2, 0xbee017a1, 0xbf3e0603, 0x3f1a339a, 0x3edf5b43, 0x3eb1f879, + VLOAD_32(v8, 0xbee017a1, 0xbf3e0603, 0x3f1a339a, 0x3edf5b43, 0x3eb1f879, 0x3f277380, 0x3f7e333e, 0xbea17473, 0xbf657f2b, 0x3ef3d4f5, 0xbf2e8e35, 0x3f454e51, 0x3e4b0786, 0xbe267148, 0x3f50f5e9, 0xbe798a90); VLOAD_8(v0, 0xAA, 0xAA); - asm volatile("vfmacc.vf v2, %[A], v6, v0.t" ::[A] "f"(fscalar_32)); - VCMP_U32(11, v2, 0xbee017a1, 0xbf5f2962, 0x3f1a339a, 0xbef529cb, 0x3eb1f879, + asm volatile("vfmacc.vf v8, %[A], v24, v0.t" ::[A] "f"(fscalar_32)); + VCMP_U32(11, v8, 0xbee017a1, 0xbf5f2962, 0x3f1a339a, 0xbef529cb, 0x3eb1f879, 0x3fb47121, 0x3f7e333e, 0xbf3b0c1c, 0xbf657f2b, 0xbe17a02a, 0xbf2e8e35, 0x3ea67974, 0x3e4b0786, 0x3e609fb9, 0x3f50f5e9, 0xbddc3bb6); #if ELEN == 64 - VSET(16, e64, m2); - VLOAD_64(v6, 0xbfc95dea1dcff710, 0x3fcdc0566a3e04a0, 0x3fd6969c2c9df760, + VSET(16, e64, m8); + VLOAD_64(v24, 0xbfc95dea1dcff710, 0x3fcdc0566a3e04a0, 0x3fd6969c2c9df760, 0xbfdf4b58b2611a74, 0x3fc1168eef800078, 0xbfe97c42e7fed97a, 0xbfd153b2d1e20588, 0x3fe9d363b369fec4, 0xbfbfab469de36f10, 0xbfce2e6072f7c5c0, 0xbfb4ccc9fb9c3490, 0x3febb389b26af886, @@ -333,15 +333,15 @@ void TEST_CASE4(void) { 0x3fe617ff9800ac5a); double dscalar_64; BOX_DOUBLE_IN_DOUBLE(dscalar_64, 0x3febf6db7175e482); - VLOAD_64(v2, 0xbfee2ffe0122d3b6, 0xbfb1d468c9a80310, 0xbfea2623e6043a6c, + VLOAD_64(v8, 0xbfee2ffe0122d3b6, 0xbfb1d468c9a80310, 0xbfea2623e6043a6c, 0xbfee62d76bc21ae2, 0xbfe454d179c08866, 0x3fea1c44af53fb1a, 0x3f9d95d7dd994d80, 0xbfe34777e1831e42, 0xbfeeaa7676c316f0, 0x3fe88739c58a9cbe, 0xbfe90464d02f6f4c, 0xbfcb96928af41d88, 0xbfe8fbd197034034, 0x3fd02ec29a45caf0, 0xbfc647e8de367aa0, 0x3fe7316bf581b994); VLOAD_8(v0, 0xAA, 0xAA); - asm volatile("vfmacc.vf v2, %[A], v6, v0.t" ::[A] "f"(dscalar_64)); - VCMP_U64(12, v2, 0xbfee2ffe0122d3b6, 0x3fc1159722ed4311, 0xbfea2623e6043a6c, + asm volatile("vfmacc.vf v8, %[A], v24, v0.t" ::[A] "f"(dscalar_64)); + VCMP_U64(12, v8, 0xbfee2ffe0122d3b6, 0x3fc1159722ed4311, 0xbfea2623e6043a6c, 0xbff607aae09f73e1, 0xbfe454d179c08866, 0x3fbeb691a3b74133, 0x3f9d95d7dd994d80, 0x3fba50f334ac0644, 0xbfeeaa7676c316f0, 0x3fe1ef3c8dd3a2b9, 0xbfe90464d02f6f4c, 0x3fe14f89b5473a2f, diff --git a/sw/riscvTests/isa/rv64uv/vfmadd.c b/sw/riscvTests/isa/rv64uv/vfmadd.c index e1737b22..3f83835e 100644 --- a/sw/riscvTests/isa/rv64uv/vfmadd.c +++ b/sw/riscvTests/isa/rv64uv/vfmadd.c @@ -10,36 +10,36 @@ // Simple random test with similar values + 1 subnormal void TEST_CASE1(void) { - VSET(16, e16, m2); + VSET(16, e16, m8); // 0.3501, -0.3289, -0.8853, -0.4082, -0.4346, -0.2659, 0.9316, // 0.5444, -0.0538, 0.7686, 0.8203, -0.8623, 0.3059, 0.0372, // 0.5337, -0.5815 - VLOAD_16(v4, 0x359a, 0xb543, 0xbb15, 0xb688, 0xb6f4, 0xb441, 0x3b74, 0x385b, + VLOAD_16(v16, 0x359a, 0xb543, 0xbb15, 0xb688, 0xb6f4, 0xb441, 0x3b74, 0x385b, 0xaae4, 0x3a26, 0x3a90, 0xbae6, 0x34e5, 0x28c4, 0x3845, 0xb8a7); // -0.8105, 0.5000, -0.8374, -0.8394, 0.3098, 0.1328, -0.2864, // -0.4041, -0.1729, 0.0196, 0.2739, 0.8071, -0.1553, 0.2815, // -0.9067, -0.2495 - VLOAD_16(v6, 0xba7c, 0x3800, 0xbab3, 0xbab7, 0x34f5, 0x3040, 0xb495, 0xb677, + VLOAD_16(v24, 0xba7c, 0x3800, 0xbab3, 0xbab7, 0x34f5, 0x3040, 0xb495, 0xb677, 0xb188, 0x2502, 0x3462, 0x3a75, 0xb0f8, 0x3481, 0xbb41, 0xb3fc); // -0.6558, -0.1006, 0.4558, -0.0784, 0.1539, 0.6748, 0.3347, // -0.3416, 0.0614, 0.2289, -0.0829, 0.3838, -0.6348, 0.0843, // -0.6890, -0.2598 - VLOAD_16(v2, 0xb93f, 0xae71, 0x374b, 0xad05, 0x30ed, 0x3966, 0x355b, 0xb577, + VLOAD_16(v8, 0xb93f, 0xae71, 0x374b, 0xad05, 0x30ed, 0x3966, 0x355b, 0xb577, 0x2bdc, 0x3353, 0xad4f, 0x3624, 0xb914, 0x2d65, 0xb983, 0xb428); - asm volatile("vfmadd.vv v2, v4, v6"); + asm volatile("vfmadd.vv v8, v16, v24"); // -1.0400, 0.5332, -1.2412, -0.8071, 0.2429, -0.0466, 0.0254, // -0.5898, -0.1761, 0.1954, 0.2058, 0.4761, -0.3496, 0.2847, // -1.2744, -0.0984 - VCMP_U16(1, v2, 0xbc29, 0x3844, 0xbcf7, 0xba75, 0x33c6, 0xa9f7, 0x2684, + VCMP_U16(1, v8, 0xbc29, 0x3844, 0xbcf7, 0xba75, 0x33c6, 0xa9f7, 0x2684, 0xb8b8, 0xb1a3, 0x3241, 0x3297, 0x379e, 0xb597, 0x348e, 0xbd19, 0xae4d); - VSET(16, e32, m2); + VSET(16, e32, m8); // -0.20637949, -0.63321692, 0.40850523, 0.58702314, // -0.25534528, -0.22053087, 0.96057665, 0.85530519, // 0.74252450, -0.87175107, -0.00987994, -0.52556008, 0.26113954, // -0.71307814, 0.78942811, 0.48685852 - VLOAD_32(v4, 0xbe535525, 0xbf221a81, 0x3ed12799, 0x3f164726, 0xbe82bc9e, + VLOAD_32(v16, 0xbe535525, 0xbf221a81, 0x3ed12799, 0x3f164726, 0xbe82bc9e, 0xbe61d2d8, 0x3f75e85a, 0x3f5af548, 0x3f3e1616, 0xbf5f2b14, 0xbc21df78, 0xbf068b1b, 0x3e85b415, 0xbf368c4a, 0x3f4a17f6, 0x3ef94585); @@ -47,7 +47,7 @@ void TEST_CASE1(void) { // -0.96695948, 0.71368766, 0.23281342, -0.67807233, // 0.79363507, 0.62817359, 0.37205252, 0.27726358, // -0.85021532, -0.16634122, -0.58148408, 0.06963744 - VLOAD_32(v6, 0xbe20e41a, 0x3f54b4d4, 0x3f133971, 0x3f5a88f6, 0xbf778aa8, + VLOAD_32(v24, 0xbe20e41a, 0x3f54b4d4, 0x3f133971, 0x3f5a88f6, 0xbf778aa8, 0x3f36b43c, 0x3e6e66a4, 0xbf2d9626, 0x3f4b2bab, 0x3f20cffc, 0x3ebe7dab, 0x3e8df57e, 0xbf59a7b6, 0xbe2a555a, 0xbf14dc24, 0x3d8e9e13); @@ -55,29 +55,29 @@ void TEST_CASE1(void) { // -0.84276563, 0.00681775, 0.30881208, 0.27571887, // 0.12349209, 0.29805747, -0.55497122, -0.52685922, 0.82809180, // -0.83231467, 0.20959182, 0.15603130 - VLOAD_32(v2, 0xbf216ffb, 0x3f139101, 0x3da7e970, 0xbd76f8fa, 0xbf57bf7d, + VLOAD_32(v8, 0xbf216ffb, 0x3f139101, 0x3da7e970, 0xbd76f8fa, 0xbf57bf7d, 0x3bdf676d, 0x3e9e1c9e, 0x3e8d2b06, 0x3dfce96c, 0x3e989afd, 0xbf0e1298, 0xbf06e03f, 0x3f53fdd3, 0xbf551293, 0x3e569f3d, 0x3e1fc6ab); - asm volatile("vfmadd.vv v2, v4, v6"); + asm volatile("vfmadd.vv v8, v16, v24"); // -0.02697416, 0.46587816, 0.60858786, 0.81825721, // -0.75176322, 0.71218413, 0.52945113, -0.44224855, // 0.88533098, 0.36834168, 0.37753561, 0.55415976, // -0.63396782, 0.42716417, -0.41602641, 0.14560261 - VCMP_U32(2, v2, 0xbcdcf8e5, 0x3eee8795, 0x3f1bcc6a, 0x3f51794e, 0xbf40738e, + VCMP_U32(2, v8, 0xbcdcf8e5, 0x3eee8795, 0x3f1bcc6a, 0x3f51794e, 0xbf40738e, 0x3f3651b3, 0x3f078a1b, 0xbee26e67, 0x3f62a50d, 0x3ebc9748, 0x3ec14c59, 0x3f0ddd6a, 0xbf224bb7, 0x3edab544, 0xbed5016a, 0x3e1518d9); #if ELEN == 64 - VSET(16, e64, m2); + VSET(16, e64, m8); // 0.0308264568094008, 0.5865382185158325, 0.4543411851187289, // 0.0036656924511687, -0.3103508259554966, 0.9658177901158624, // -0.3381631341283657, -0.2003719333831677, 0.8989532087589025, // -0.8054516243685412, 0.8701363884969631, // -0.3585976675814562, 0.4150155349314333, // -0.6908185611649824, 0.8412555125501906, -0.3357469205066645 - VLOAD_64(v4, 0x3f9f90f87f644880, 0x3fe2c4ebcc4c25b4, 0x3fdd13ed0cd3e484, + VLOAD_64(v16, 0x3f9f90f87f644880, 0x3fe2c4ebcc4c25b4, 0x3fdd13ed0cd3e484, 0x3f6e0783a63d2400, 0xbfd3dcc9b5f0fd10, 0x3feee7fab5ce29f4, 0xbfd5a476fc72d40c, 0xbfc9a5c99a756020, 0x3fecc43985081eb2, 0xbfe9c6427c2588e6, 0x3febd8284474eda0, 0xbfd6f343a1abca7c, @@ -89,7 +89,7 @@ void TEST_CASE1(void) { // -0.2540075520951832, 0.6661048539265222, 0.3013290199421905, // -0.0367795249610035, -0.7178804756969177, 0.1577316726139908, // -0.1242681642824526, -0.9006297759672148 - VLOAD_64(v6, 0x3fe2d21c0f5cd922, 0xbfef9fc912e0ce28, 0xbfeb260b1d5f82be, + VLOAD_64(v24, 0x3fe2d21c0f5cd922, 0xbfef9fc912e0ce28, 0xbfeb260b1d5f82be, 0xbfeefdd73b960c5a, 0xbfe594790988a396, 0x3fdbfc3f615edda8, 0xbfeab01520204008, 0xbfc3723035a012c8, 0xbfd041a8e44be49c, 0x3fe550bb206a47d8, 0x3fd348f9837f3238, 0xbfa2d4c411bd66e0, @@ -101,20 +101,20 @@ void TEST_CASE1(void) { // -0.3946645040604191, 0.6818539464440989, 0.9719861381061521, // -0.8471643748461517, 0.8077493118513845, 0.2789872574353331, // 0.7073875082318823 - VLOAD_64(v2, 0xbfeab3e8dee4061e, 0x3fe6a634071f1b28, 0xbfc81712d5195ee0, + VLOAD_64(v8, 0xbfeab3e8dee4061e, 0x3fe6a634071f1b28, 0xbfc81712d5195ee0, 0xbfd19c3bdc149f5c, 0x3fcdee73f7748a88, 0xbfefcd75c2393d96, 0xbfe11b1e209897fa, 0x3fb6b66250fca870, 0x3fd37d4be2d9c9a4, 0xbfd9422ee8753844, 0x3fe5d1bf5e1407b4, 0x3fef1a82ac6a99b4, 0xbfeb1bf876899dc0, 0x3fe9d91515b8951c, 0x3fd1daed5eabdf0c, 0x3fe6a2eb20ae8e42); - asm volatile("vfmadd.vv v2, v4, v6"); + asm volatile("vfmadd.vv v8, v16, v24"); // 0.5624246503668447, -0.5731100645801621, -0.9339028680308291, // -0.9694950747455855, -0.7469464057195535, // -0.5225882703620045, -0.6532258740745487, // -0.1696993409682697, 0.0197439149088051, 0.9839880198025914, // 0.8946349503834604, -0.3853314870073767, -1.0694668518985466, // -0.4002765447811873, 0.1104314039662806, -1.1381329534609521 - VCMP_U64(3, v2, 0x3fe1ff61faf9464f, 0xbfe256eaeb0c2af6, 0xbfede288447aa80f, + VCMP_U64(3, v8, 0x3fe1ff61faf9464f, 0xbfe256eaeb0c2af6, 0xbfede288447aa80f, 0xbfef061a88f54aac, 0xbfe7e6fc260dc471, 0xbfe0b90b094f4be7, 0xbfe4e739f2c1a370, 0xbfc5b8b53fce44b1, 0x3f9437bfb3503463, 0x3fef7cd47196a75e, 0x3feca0d979b82d6d, 0xbfd8a94565b434f4, @@ -126,34 +126,34 @@ void TEST_CASE1(void) { // Simple random test with similar values + 1 subnormal (masked) // The numbers are the same of TEST_CASE1 void TEST_CASE2(void) { - VSET(16, e16, m2); + VSET(16, e16, m8); // 0.3501, -0.3289, -0.8853, -0.4082, -0.4346, -0.2659, 0.9316, // 0.5444, -0.0538, 0.7686, 0.8203, -0.8623, 0.3059, 0.0372, // 0.5337, -0.5815 - VLOAD_16(v4, 0x359a, 0xb543, 0xbb15, 0xb688, 0xb6f4, 0xb441, 0x3b74, 0x385b, + VLOAD_16(v16, 0x359a, 0xb543, 0xbb15, 0xb688, 0xb6f4, 0xb441, 0x3b74, 0x385b, 0xaae4, 0x3a26, 0x3a90, 0xbae6, 0x34e5, 0x28c4, 0x3845, 0xb8a7); // -0.8105, 0.5000, -0.8374, -0.8394, 0.3098, 0.1328, -0.2864, // -0.4041, -0.1729, 0.0196, 0.2739, 0.8071, -0.1553, 0.2815, // -0.9067, -0.2495 - VLOAD_16(v6, 0xba7c, 0x3800, 0xbab3, 0xbab7, 0x34f5, 0x3040, 0xb495, 0xb677, + VLOAD_16(v24, 0xba7c, 0x3800, 0xbab3, 0xbab7, 0x34f5, 0x3040, 0xb495, 0xb677, 0xb188, 0x2502, 0x3462, 0x3a75, 0xb0f8, 0x3481, 0xbb41, 0xb3fc); VLOAD_8(v0, 0xAA, 0xAA); // -0.6558, -0.1006, 0.4558, -0.0784, 0.1539, 0.6748, 0.3347, // -0.3416, 0.0614, 0.2289, -0.0829, 0.3838, -0.6348, 0.0843, // -0.6890, -0.2598 - VLOAD_16(v2, 0xb93f, 0xae71, 0x374b, 0xad05, 0x30ed, 0x3966, 0x355b, 0xb577, + VLOAD_16(v8, 0xb93f, 0xae71, 0x374b, 0xad05, 0x30ed, 0x3966, 0x355b, 0xb577, 0x2bdc, 0x3353, 0xad4f, 0x3624, 0xb914, 0x2d65, 0xb983, 0xb428); - asm volatile("vfmadd.vv v2, v4, v6, v0.t"); - VCMP_U16(4, v2, 0xb93f, 0x3844, 0x374b, 0xba75, 0x30ed, 0xa9f7, 0x355b, + asm volatile("vfmadd.vv v8, v16, v24, v0.t"); + VCMP_U16(4, v8, 0xb93f, 0x3844, 0x374b, 0xba75, 0x30ed, 0xa9f7, 0x355b, 0xb8b8, 0x2bdc, 0x3241, 0xad4f, 0x379e, 0xb914, 0x348e, 0xb983, 0xae4d); - VSET(16, e32, m2); + VSET(16, e32, m8); // -0.20637949, -0.63321692, 0.40850523, 0.58702314, // -0.25534528, -0.22053087, 0.96057665, 0.85530519, // 0.74252450, -0.87175107, -0.00987994, -0.52556008, 0.26113954, // -0.71307814, 0.78942811, 0.48685852 - VLOAD_32(v4, 0xbe535525, 0xbf221a81, 0x3ed12799, 0x3f164726, 0xbe82bc9e, + VLOAD_32(v16, 0xbe535525, 0xbf221a81, 0x3ed12799, 0x3f164726, 0xbe82bc9e, 0xbe61d2d8, 0x3f75e85a, 0x3f5af548, 0x3f3e1616, 0xbf5f2b14, 0xbc21df78, 0xbf068b1b, 0x3e85b415, 0xbf368c4a, 0x3f4a17f6, 0x3ef94585); @@ -161,7 +161,7 @@ void TEST_CASE2(void) { // -0.96695948, 0.71368766, 0.23281342, -0.67807233, // 0.79363507, 0.62817359, 0.37205252, 0.27726358, // -0.85021532, -0.16634122, -0.58148408, 0.06963744 - VLOAD_32(v6, 0xbe20e41a, 0x3f54b4d4, 0x3f133971, 0x3f5a88f6, 0xbf778aa8, + VLOAD_32(v24, 0xbe20e41a, 0x3f54b4d4, 0x3f133971, 0x3f5a88f6, 0xbf778aa8, 0x3f36b43c, 0x3e6e66a4, 0xbf2d9626, 0x3f4b2bab, 0x3f20cffc, 0x3ebe7dab, 0x3e8df57e, 0xbf59a7b6, 0xbe2a555a, 0xbf14dc24, 0x3d8e9e13); @@ -170,25 +170,25 @@ void TEST_CASE2(void) { // -0.84276563, 0.00681775, 0.30881208, 0.27571887, // 0.12349209, 0.29805747, -0.55497122, -0.52685922, 0.82809180, // -0.83231467, 0.20959182, 0.15603130 - VLOAD_32(v2, 0xbf216ffb, 0x3f139101, 0x3da7e970, 0xbd76f8fa, 0xbf57bf7d, + VLOAD_32(v8, 0xbf216ffb, 0x3f139101, 0x3da7e970, 0xbd76f8fa, 0xbf57bf7d, 0x3bdf676d, 0x3e9e1c9e, 0x3e8d2b06, 0x3dfce96c, 0x3e989afd, 0xbf0e1298, 0xbf06e03f, 0x3f53fdd3, 0xbf551293, 0x3e569f3d, 0x3e1fc6ab); - asm volatile("vfmadd.vv v2, v4, v6, v0.t"); - VCMP_U32(5, v2, 0xbf216ffb, 0x3eee8795, 0x3da7e970, 0x3f51794e, 0xbf57bf7d, + asm volatile("vfmadd.vv v8, v16, v24, v0.t"); + VCMP_U32(5, v8, 0xbf216ffb, 0x3eee8795, 0x3da7e970, 0x3f51794e, 0xbf57bf7d, 0x3f3651b3, 0x3e9e1c9e, 0xbee26e67, 0x3dfce96c, 0x3ebc9748, 0xbf0e1298, 0x3f0ddd6a, 0x3f53fdd3, 0x3edab544, 0x3e569f3d, 0x3e1518d9); #if ELEN == 64 - VSET(16, e64, m2); + VSET(16, e64, m8); // 0.0308264568094008, 0.5865382185158325, 0.4543411851187289, // 0.0036656924511687, -0.3103508259554966, 0.9658177901158624, // -0.3381631341283657, -0.2003719333831677, 0.8989532087589025, // -0.8054516243685412, 0.8701363884969631, // -0.3585976675814562, 0.4150155349314333, // -0.6908185611649824, 0.8412555125501906, -0.3357469205066645 - VLOAD_64(v4, 0x3f9f90f87f644880, 0x3fe2c4ebcc4c25b4, 0x3fdd13ed0cd3e484, + VLOAD_64(v16, 0x3f9f90f87f644880, 0x3fe2c4ebcc4c25b4, 0x3fdd13ed0cd3e484, 0x3f6e0783a63d2400, 0xbfd3dcc9b5f0fd10, 0x3feee7fab5ce29f4, 0xbfd5a476fc72d40c, 0xbfc9a5c99a756020, 0x3fecc43985081eb2, 0xbfe9c6427c2588e6, 0x3febd8284474eda0, 0xbfd6f343a1abca7c, @@ -200,7 +200,7 @@ void TEST_CASE2(void) { // -0.2540075520951832, 0.6661048539265222, 0.3013290199421905, // -0.0367795249610035, -0.7178804756969177, 0.1577316726139908, // -0.1242681642824526, -0.9006297759672148 - VLOAD_64(v6, 0x3fe2d21c0f5cd922, 0xbfef9fc912e0ce28, 0xbfeb260b1d5f82be, + VLOAD_64(v24, 0x3fe2d21c0f5cd922, 0xbfef9fc912e0ce28, 0xbfeb260b1d5f82be, 0xbfeefdd73b960c5a, 0xbfe594790988a396, 0x3fdbfc3f615edda8, 0xbfeab01520204008, 0xbfc3723035a012c8, 0xbfd041a8e44be49c, 0x3fe550bb206a47d8, 0x3fd348f9837f3238, 0xbfa2d4c411bd66e0, @@ -213,20 +213,20 @@ void TEST_CASE2(void) { // -0.3946645040604191, 0.6818539464440989, 0.9719861381061521, // -0.8471643748461517, 0.8077493118513845, 0.2789872574353331, // 0.7073875082318823 - VLOAD_64(v2, 0xbfeab3e8dee4061e, 0x3fe6a634071f1b28, 0xbfc81712d5195ee0, + VLOAD_64(v8, 0xbfeab3e8dee4061e, 0x3fe6a634071f1b28, 0xbfc81712d5195ee0, 0xbfd19c3bdc149f5c, 0x3fcdee73f7748a88, 0xbfefcd75c2393d96, 0xbfe11b1e209897fa, 0x3fb6b66250fca870, 0x3fd37d4be2d9c9a4, 0xbfd9422ee8753844, 0x3fe5d1bf5e1407b4, 0x3fef1a82ac6a99b4, 0xbfeb1bf876899dc0, 0x3fe9d91515b8951c, 0x3fd1daed5eabdf0c, 0x3fe6a2eb20ae8e42); - asm volatile("vfmadd.vv v2, v4, v6, v0.t"); + asm volatile("vfmadd.vv v8, v16, v24, v0.t"); // 0.0000000000000000, -0.5731100645801621, 0.0000000000000000, // -0.9694950747455855, 0.0000000000000000, // -0.5225882703620045, 0.0000000000000000, // -0.1696993409682697, 0.0000000000000000, 0.9839880198025914, // 0.0000000000000000, -0.3853314870073767, 0.0000000000000000, // -0.4002765447811873, 0.0000000000000000, -1.1381329534609521 - VCMP_U64(6, v2, 0xbfeab3e8dee4061e, 0xbfe256eaeb0c2af6, 0xbfc81712d5195ee0, + VCMP_U64(6, v8, 0xbfeab3e8dee4061e, 0xbfe256eaeb0c2af6, 0xbfc81712d5195ee0, 0xbfef061a88f54aac, 0x3fcdee73f7748a88, 0xbfe0b90b094f4be7, 0xbfe11b1e209897fa, 0xbfc5b8b53fce44b1, 0x3fd37d4be2d9c9a4, 0x3fef7cd47196a75e, 0x3fe5d1bf5e1407b4, 0xbfd8a94565b434f4, @@ -237,29 +237,29 @@ void TEST_CASE2(void) { // Simple random test with similar values (vector-scalar) void TEST_CASE3(void) { - VSET(16, e16, m2); + VSET(16, e16, m8); float fscalar_16; // 0.6299 BOX_HALF_IN_FLOAT(fscalar_16, 0x390a); // -0.5352, 0.1115, 0.9541, -0.8857, -0.4143, 0.4045, 0.2949, // -0.5479, 0.6733, 0.8965, 0.8882, 0.6294, 0.7568, 0.8735, // -0.8569, 0.8271 - VLOAD_16(v4, 0xb848, 0x2f23, 0x3ba2, 0xbb16, 0xb6a1, 0x3679, 0x34b8, 0xb862, + VLOAD_16(v16, 0xb848, 0x2f23, 0x3ba2, 0xbb16, 0xb6a1, 0x3679, 0x34b8, 0xb862, 0x3963, 0x3b2c, 0x3b1b, 0x3909, 0x3a0e, 0x3afd, 0xbadb, 0x3a9e); // 0.2844, 0.1008, 0.3777, 0.9790, -0.8613, 0.4951, 0.4126, // 0.5518, -0.6680, -0.8340, 0.2094, 0.5884, -0.6509, -0.9360, // -0.1609, -0.2527 - VLOAD_16(v2, 0x348d, 0x2e74, 0x360b, 0x3bd5, 0xbae4, 0x37ec, 0x369a, 0x386a, + VLOAD_16(v8, 0x348d, 0x2e74, 0x360b, 0x3bd5, 0xbae4, 0x37ec, 0x369a, 0x386a, 0xb958, 0xbaac, 0x32b3, 0x38b5, 0xb935, 0xbb7d, 0xb126, 0xb40b); - asm volatile("vfmadd.vf v2, %[A], v4" ::[A] "f"(fscalar_16)); + asm volatile("vfmadd.vf v8, %[A], v16" ::[A] "f"(fscalar_16)); // -0.3560, 0.1750, 1.1924, -0.2690, -0.9570, 0.7163, 0.5547, // -0.2002, 0.2527, 0.3711, 1.0195, 1.0000, 0.3469, 0.2842, // -0.9580, 0.6680 - VCMP_U16(7, v2, 0xb5b2, 0x319a, 0x3cc5, 0xb44e, 0xbba8, 0x39bb, 0x3870, + VCMP_U16(7, v8, 0xb5b2, 0x319a, 0x3cc5, 0xb44e, 0xbba8, 0x39bb, 0x3870, 0xb269, 0x340b, 0x35f0, 0x3c15, 0x3c00, 0x358d, 0x348b, 0xbbab, 0x3958); - VSET(16, e32, m2); + VSET(16, e32, m8); float fscalar_32; // 0.80368215 BOX_FLOAT_IN_FLOAT(fscalar_32, 0x3f4dbe1d); @@ -267,7 +267,7 @@ void TEST_CASE3(void) { // -0.69910282, -0.90573430, 0.86481184, 0.33341369, // 0.30657578, -0.90526944, -0.97891974, -0.50830764, // 0.79750061, 0.96885878, 0.48752418, 0.64305341 - VLOAD_32(v4, 0x3e05db98, 0xbe4a2639, 0x3dbfe79e, 0x3ed22c6d, 0xbf32f867, + VLOAD_32(v16, 0x3e05db98, 0xbe4a2639, 0x3dbfe79e, 0x3ed22c6d, 0xbf32f867, 0xbf67de34, 0x3f5d644f, 0x3eaab533, 0x3e9cf780, 0xbf67bfbd, 0xbf7a9a7c, 0xbf022073, 0x3f4c2900, 0x3f780721, 0x3ef99cc5, 0x3f249f26); @@ -275,22 +275,22 @@ void TEST_CASE3(void) { // -0.84296966, 0.50125730, 0.96147668, 0.65802389, // 0.19629262, -0.73197508, -0.06948850, -0.60436314, // -0.80817568, 0.72047287, -0.78180677, -0.40237895 - VLOAD_32(v2, 0xbf1c7638, 0x3f515a0a, 0xbeece360, 0xbe9e3276, 0xbf57ccdc, + VLOAD_32(v8, 0xbf1c7638, 0x3f515a0a, 0xbeece360, 0xbe9e3276, 0xbf57ccdc, 0x3f005266, 0x3f762356, 0x3f287441, 0x3e4900ef, 0xbf3b62b8, 0xbd8e4ffc, 0xbf1ab78b, 0xbf4ee49a, 0x3f3870e9, 0xbf48247d, 0xbece049d); - asm volatile("vfmadd.vf v2, %[A], v4" ::[A] "f"(fscalar_32)); + asm volatile("vfmadd.vf v8, %[A], v16" ::[A] "f"(fscalar_32)); // -0.36047307, 0.45982391, -0.27813792, 0.16217449, // -1.37658250, -0.50288272, 1.63753343, 0.86225569, // 0.46433264, -1.49354482, -1.03476644, -0.99402350, 0.14798427, // 1.54788995, -0.14079997, 0.31966865 - VCMP_U32(8, v2, 0xbeb88fed, 0x3eeb6e09, 0xbe8e6818, 0x3e261112, 0xbfb033db, + VCMP_U32(8, v8, 0xbeb88fed, 0x3eeb6e09, 0xbe8e6818, 0x3e261112, 0xbfb033db, 0xbf00bced, 0x3fd19ab2, 0x3f5cbccb, 0x3eedbd02, 0xbfbf2c79, 0xbf84733a, 0xbf7e7853, 0x3e17892e, 0x3fc62142, 0xbe102ddd, 0x3ea3ab9c); #if ELEN == 64 - VSET(16, e64, m2); + VSET(16, e64, m8); double dscalar_64; // 0.5717861827636179 BOX_DOUBLE_IN_DOUBLE(dscalar_64, 0x3fe24c128968c808); @@ -301,7 +301,7 @@ void TEST_CASE3(void) { // -0.6941474840922310, 0.4809970389518419, // -0.4671263490725479, 0.5176516826232249, // -0.9714116214357187, 0.0212574845134876 - VLOAD_64(v4, 0xbfb90ff83cc58160, 0xbfe9d0157c220eae, 0x3fb9a21bd6239ad0, + VLOAD_64(v16, 0xbfb90ff83cc58160, 0xbfe9d0157c220eae, 0x3fb9a21bd6239ad0, 0x3fe3c985e1ec6d14, 0xbfe6b113bcd49f88, 0x3fde2a4914f71f28, 0xbfedf64fbb356b82, 0x3fe581cf0bb1e7c4, 0xbfc5905a8722a398, 0xbf952cb38782ee00, 0xbfe63674c8d8dba6, 0x3fdec8a7cdf1580c, @@ -314,20 +314,20 @@ void TEST_CASE3(void) { // -0.8379133193505066, -0.6497652150347000, // -0.4444119628309799, -0.8810041425660891, 0.4421772814931029, // 0.0606105644967410 - VLOAD_64(v2, 0xbfeb84f2357b2242, 0xbfeb8d061ebc48de, 0xbfe20e9f1cee50cc, + VLOAD_64(v8, 0xbfeb84f2357b2242, 0xbfeb8d061ebc48de, 0xbfe20e9f1cee50cc, 0x3fecaacc26c2f0d0, 0x3fd76ecbc40b5864, 0x3fba0eeb244baff0, 0x3fcd6d36a4399740, 0xbfdfd005f440f21c, 0xbfe2c1342d3e9986, 0xbfe50f78d644befe, 0xbfead02f97efc88a, 0xbfe4cae06b94e0d2, 0xbfdc713edf93dac0, 0xbfec312f997ec7ac, 0x3fdc4ca1f0c30314, 0x3faf0859109c77c0); - asm volatile("vfmadd.vf v2, %[A], v4" ::[A] "f"(dscalar_64)); + asm volatile("vfmadd.vf v8, %[A], v16" ::[A] "f"(dscalar_64)); // -0.5896239092143964, -1.2989386647705283, -0.2225204504323405, // 1.1305842246079476, -0.4997635227612305, 0.5295330954311120, // -0.8048662854791286, 0.3878770491466992, -0.5035828135241305, // -0.3969929346554276, -1.1732547424504496, 0.1094702669545694, // -0.7212349688741608, 0.0139056869464266, -0.7185807615459836, // 0.0559137678222272 - VCMP_U64(9, v2, 0xbfe2de32f5e07f06, 0xbff4c873e8cb3071, 0xbfcc7b8cd4a627de, + VCMP_U64(9, v8, 0xbfe2de32f5e07f06, 0xbff4c873e8cb3071, 0xbfcc7b8cd4a627de, 0x3ff216df7be108d1, 0xbfdffc20247f9130, 0x3fe0f1ef63e0d73b, 0xbfe9c176f0b925be, 0x3fd8d2fa423d0f48, 0xbfe01d59b45d3de9, 0xbfd968550dc5a3de, 0xbff2c5a6c3cb39b0, 0x3fbc063e50744ab8, @@ -338,27 +338,27 @@ void TEST_CASE3(void) { // Simple random test with similar values (vector-scalar) (masked) void TEST_CASE4(void) { - VSET(16, e16, m2); + VSET(16, e16, m8); float fscalar_16; // 0.6299 BOX_HALF_IN_FLOAT(fscalar_16, 0x390a); // -0.5352, 0.1115, 0.9541, -0.8857, -0.4143, 0.4045, 0.2949, // -0.5479, 0.6733, 0.8965, 0.8882, 0.6294, 0.7568, 0.8735, // -0.8569, 0.8271 - VLOAD_16(v4, 0xb848, 0x2f23, 0x3ba2, 0xbb16, 0xb6a1, 0x3679, 0x34b8, 0xb862, + VLOAD_16(v16, 0xb848, 0x2f23, 0x3ba2, 0xbb16, 0xb6a1, 0x3679, 0x34b8, 0xb862, 0x3963, 0x3b2c, 0x3b1b, 0x3909, 0x3a0e, 0x3afd, 0xbadb, 0x3a9e); VLOAD_8(v0, 0xAA, 0xAA); // 0.2844, 0.1008, 0.3777, 0.9790, -0.8613, 0.4951, 0.4126, // 0.5518, -0.6680, -0.8340, 0.2094, 0.5884, -0.6509, // -0.9360, -0.1609, -0.2527 - VLOAD_16(v2, 0x348d, 0x2e74, 0x360b, 0x3bd5, 0xbae4, 0x37ec, 0x369a, 0x386a, + VLOAD_16(v8, 0x348d, 0x2e74, 0x360b, 0x3bd5, 0xbae4, 0x37ec, 0x369a, 0x386a, 0xb958, 0xbaac, 0x32b3, 0x38b5, 0xb935, 0xbb7d, 0xb126, 0xb40b); - asm volatile("vfmadd.vf v2, %[A], v4, v0.t" ::[A] "f"(fscalar_16)); - VCMP_U16(10, v2, 0x348d, 0x319a, 0x360b, 0xb44e, 0xbae4, 0x39bb, 0x369a, + asm volatile("vfmadd.vf v8, %[A], v16, v0.t" ::[A] "f"(fscalar_16)); + VCMP_U16(10, v8, 0x348d, 0x319a, 0x360b, 0xb44e, 0xbae4, 0x39bb, 0x369a, 0xb269, 0xb958, 0x35f0, 0x32b3, 0x3c00, 0xb935, 0x348b, 0xb126, 0x3958); - VSET(16, e32, m2); + VSET(16, e32, m8); float fscalar_32; // 0.80368215 BOX_FLOAT_IN_FLOAT(fscalar_32, 0x3f4dbe1d); @@ -366,7 +366,7 @@ void TEST_CASE4(void) { // -0.69910282, -0.90573430, 0.86481184, 0.33341369, // 0.30657578, -0.90526944, -0.97891974, -0.50830764, // 0.79750061, 0.96885878, 0.48752418, 0.64305341 - VLOAD_32(v4, 0x3e05db98, 0xbe4a2639, 0x3dbfe79e, 0x3ed22c6d, 0xbf32f867, + VLOAD_32(v16, 0x3e05db98, 0xbe4a2639, 0x3dbfe79e, 0x3ed22c6d, 0xbf32f867, 0xbf67de34, 0x3f5d644f, 0x3eaab533, 0x3e9cf780, 0xbf67bfbd, 0xbf7a9a7c, 0xbf022073, 0x3f4c2900, 0x3f780721, 0x3ef99cc5, 0x3f249f26); @@ -375,18 +375,18 @@ void TEST_CASE4(void) { // -0.84296966, 0.50125730, 0.96147668, 0.65802389, // 0.19629262, -0.73197508, -0.06948850, -0.60436314, // -0.80817568, 0.72047287, -0.78180677, -0.40237895 - VLOAD_32(v2, 0xbf1c7638, 0x3f515a0a, 0xbeece360, 0xbe9e3276, 0xbf57ccdc, + VLOAD_32(v8, 0xbf1c7638, 0x3f515a0a, 0xbeece360, 0xbe9e3276, 0xbf57ccdc, 0x3f005266, 0x3f762356, 0x3f287441, 0x3e4900ef, 0xbf3b62b8, 0xbd8e4ffc, 0xbf1ab78b, 0xbf4ee49a, 0x3f3870e9, 0xbf48247d, 0xbece049d); - asm volatile("vfmadd.vf v2, %[A], v4, v0.t" ::[A] "f"(fscalar_32)); - VCMP_U32(11, v2, 0xbf1c7638, 0x3eeb6e09, 0xbeece360, 0x3e261112, 0xbf57ccdc, + asm volatile("vfmadd.vf v8, %[A], v16, v0.t" ::[A] "f"(fscalar_32)); + VCMP_U32(11, v8, 0xbf1c7638, 0x3eeb6e09, 0xbeece360, 0x3e261112, 0xbf57ccdc, 0xbf00bced, 0x3f762356, 0x3f5cbccb, 0x3e4900ef, 0xbfbf2c79, 0xbd8e4ffc, 0xbf7e7853, 0xbf4ee49a, 0x3fc62142, 0xbf48247d, 0x3ea3ab9c); #if ELEN == 64 - VSET(16, e64, m2); + VSET(16, e64, m8); double dscalar_64; // 0.5717861827636179 BOX_DOUBLE_IN_DOUBLE(dscalar_64, 0x3fe24c128968c808); @@ -398,7 +398,7 @@ void TEST_CASE4(void) { // -0.6941474840922310, 0.4809970389518419, // -0.4671263490725479, 0.5176516826232249, // -0.9714116214357187, 0.0212574845134876 - VLOAD_64(v4, 0xbfb90ff83cc58160, 0xbfe9d0157c220eae, 0x3fb9a21bd6239ad0, + VLOAD_64(v16, 0xbfb90ff83cc58160, 0xbfe9d0157c220eae, 0x3fb9a21bd6239ad0, 0x3fe3c985e1ec6d14, 0xbfe6b113bcd49f88, 0x3fde2a4914f71f28, 0xbfedf64fbb356b82, 0x3fe581cf0bb1e7c4, 0xbfc5905a8722a398, 0xbf952cb38782ee00, 0xbfe63674c8d8dba6, 0x3fdec8a7cdf1580c, @@ -412,14 +412,14 @@ void TEST_CASE4(void) { // -0.6581386742527398, -0.8379133193505066, // -0.6497652150347000, -0.4444119628309799, // -0.8810041425660891, 0.4421772814931029, 0.0606105644967410 - VLOAD_64(v2, 0xbfeb84f2357b2242, 0xbfeb8d061ebc48de, 0xbfe20e9f1cee50cc, + VLOAD_64(v8, 0xbfeb84f2357b2242, 0xbfeb8d061ebc48de, 0xbfe20e9f1cee50cc, 0x3fecaacc26c2f0d0, 0x3fd76ecbc40b5864, 0x3fba0eeb244baff0, 0x3fcd6d36a4399740, 0xbfdfd005f440f21c, 0xbfe2c1342d3e9986, 0xbfe50f78d644befe, 0xbfead02f97efc88a, 0xbfe4cae06b94e0d2, 0xbfdc713edf93dac0, 0xbfec312f997ec7ac, 0x3fdc4ca1f0c30314, 0x3faf0859109c77c0); - asm volatile("vfmadd.vf v2, %[A], v4, v0.t" ::[A] "f"(dscalar_64)); - VCMP_U64(12, v2, 0xbfeb84f2357b2242, 0xbff4c873e8cb3071, 0xbfe20e9f1cee50cc, + asm volatile("vfmadd.vf v8, %[A], v16, v0.t" ::[A] "f"(dscalar_64)); + VCMP_U64(12, v8, 0xbfeb84f2357b2242, 0xbff4c873e8cb3071, 0xbfe20e9f1cee50cc, 0x3ff216df7be108d1, 0x3fd76ecbc40b5864, 0x3fe0f1ef63e0d73b, 0x3fcd6d36a4399740, 0x3fd8d2fa423d0f48, 0xbfe2c1342d3e9986, 0xbfd968550dc5a3de, 0xbfead02f97efc88a, 0x3fbc063e50744ab8, diff --git a/sw/riscvTests/isa/rv64uv/vfmax.c b/sw/riscvTests/isa/rv64uv/vfmax.c index a65c16d8..e4faf6b1 100644 --- a/sw/riscvTests/isa/rv64uv/vfmax.c +++ b/sw/riscvTests/isa/rv64uv/vfmax.c @@ -10,31 +10,31 @@ // Simple random test with similar values + 1 subnormal void TEST_CASE1(void) { - VSET(16, e16, m2); + VSET(16, e16, m8); // 0.0445, -0.4812, 0.5732, 0.0634, 0.2072, -0.6416, 0.7759, // -0.0042, 0.6138, 0.7847, -0.5337, 0.3455, 0.0304, 0.7920, // 0.8179, -0.5659 - VLOAD_16(v4, 0x29b3, 0xb7b3, 0x3896, 0x2c0f, 0x32a1, 0xb922, 0x3a35, 0x9c4d, + VLOAD_16(v16, 0x29b3, 0xb7b3, 0x3896, 0x2c0f, 0x32a1, 0xb922, 0x3a35, 0x9c4d, 0x38e9, 0x3a47, 0xb845, 0x3587, 0x27ca, 0x3a56, 0x3a8b, 0xb887); // 0.6426, -0.4099, -0.1183, 0.2915, 0.5972, -0.1932, -0.0265, // -0.5913, -0.8560, 0.5029, -0.8975, -0.7373, 0.3701, 0.9546, // -0.2671, -0.6855 - VLOAD_16(v6, 0x3924, 0xb68f, 0xaf93, 0x34aa, 0x38c7, 0xb22f, 0xa6c7, 0xb8bb, + VLOAD_16(v24, 0x3924, 0xb68f, 0xaf93, 0x34aa, 0x38c7, 0xb22f, 0xa6c7, 0xb8bb, 0xbad9, 0x3806, 0xbb2e, 0xb9e6, 0x35ec, 0x3ba3, 0xb446, 0xb97c); - asm volatile("vfmax.vv v2, v4, v6"); + asm volatile("vfmax.vv v8, v16, v24"); // 0.6426, -0.4099, 0.5732, 0.2915, 0.5972, -0.1932, 0.7759, // -0.0042, 0.6138, 0.7847, -0.5337, 0.3455, 0.3701, 0.9546, // 0.8179, -0.5659 - VCMP_U16(1, v2, 0x3924, 0xb68f, 0x3896, 0x34aa, 0x38c7, 0xb22f, 0x3a35, + VCMP_U16(1, v8, 0x3924, 0xb68f, 0x3896, 0x34aa, 0x38c7, 0xb22f, 0x3a35, 0x9c4d, 0x38e9, 0x3a47, 0xb845, 0x3587, 0x35ec, 0x3ba3, 0x3a8b, 0xb887); - VSET(16, e32, m2); + VSET(16, e32, m8); // -0.19589283, 0.64597517, -0.09556163, 0.96582597, // 0.93413597, 0.78331935, -0.18831402, -0.29520443, 0.09486515, // 0.96548969, 0.74523991, 0.81442171, 0.25644442, // -0.92091519, 0.25139943, -0.77403748 - VLOAD_32(v4, 0xbe489821, 0x3f255ea1, 0xbdc3b5d1, 0x3f77405f, 0x3f6f2389, + VLOAD_32(v16, 0xbe489821, 0x3f255ea1, 0xbdc3b5d1, 0x3f77405f, 0x3f6f2389, 0x3f48879e, 0xbe40d564, 0xbe972509, 0x3dc248a9, 0x3f772a55, 0x3f3ec80b, 0x3f507df1, 0x3e834caf, 0xbf6bc119, 0x3e80b76d, 0xbf462752); @@ -42,29 +42,29 @@ void TEST_CASE1(void) { // 0.15872470, -0.17028977, -0.99863762, -0.02739566, // -0.08060763, 0.73060948, 0.62843031, 0.68798363, // -0.35207590, 0.01353026, 0.25345275, -0.93635505 - VLOAD_32(v6, 0xbf16d6e0, 0x3f318640, 0x3f25eebb, 0xbc0e6e1c, 0x3e2288ba, + VLOAD_32(v24, 0xbf16d6e0, 0x3f318640, 0x3f25eebb, 0xbc0e6e1c, 0x3e2288ba, 0xbe2e6071, 0xbf7fa6b7, 0xbce06cdd, 0xbda5159d, 0x3f3b0939, 0x3f20e0cf, 0x3f301fb2, 0xbeb4434b, 0x3c5dae02, 0x3e81c48f, 0xbf6fb4f7); - asm volatile("vfmax.vv v2, v4, v6"); + asm volatile("vfmax.vv v8, v16, v24"); // -0.19589283, 0.69345474, 0.64817399, 0.96582597, // 0.93413597, 0.78331935, -0.18831402, -0.02739566, 0.09486515, // 0.96548969, 0.74523991, 0.81442171, 0.25644442, 0.01353026, // 0.25345275, -0.77403748 - VCMP_U32(2, v2, 0xbe489821, 0x3f318640, 0x3f25eebb, 0x3f77405f, 0x3f6f2389, + VCMP_U32(2, v8, 0xbe489821, 0x3f318640, 0x3f25eebb, 0x3f77405f, 0x3f6f2389, 0x3f48879e, 0xbe40d564, 0xbce06cdd, 0x3dc248a9, 0x3f772a55, 0x3f3ec80b, 0x3f507df1, 0x3e834caf, 0x3c5dae02, 0x3e81c48f, 0xbf462752); #if ELEN == 64 - VSET(16, e64, m2); + VSET(16, e64, m8); // -0.4061329687298849, -0.2985478109200665, 0.0070087316277823, // -0.2169778494878496, -0.8530745559533048, -0.1247477743553222, // 0.5680045000966327, 0.9515829310663801, -0.9797693611753244, // 0.0055288881366042, 0.3717566019240965, 0.0982171502328268, // -0.1563664923399100, 0.9555697921812856, 0.4810293698835877, // -0.1835757691555060 - VLOAD_64(v4, 0xbfd9fe1522a16c7c, 0xbfd31b68470c6bc4, 0x3f7cb530120b5400, + VLOAD_64(v16, 0xbfd9fe1522a16c7c, 0xbfd31b68470c6bc4, 0x3f7cb530120b5400, 0xbfcbc5ee1fc0dc58, 0xbfeb4c6302dbd036, 0xbfbfef785b1ada80, 0x3fe22d17c5fcaaf0, 0x3fee735e0c0b94e4, 0xbfef5a45467bddd8, 0x3f76a5759bade800, 0x3fd7cadc33d5826c, 0x3fb924c2582803f0, @@ -76,20 +76,20 @@ void TEST_CASE1(void) { // -0.5415645292681506, 0.0094485111801912, -0.2151605186231076, // -0.0785069829906857, 0.6345480854408712, 0.4658290296396683, // -0.5143497066150833 - VLOAD_64(v6, 0xbfe17a53f1e9e958, 0xbfdc5ce6e7f43e14, 0xbfe7ddf3ea78a228, + VLOAD_64(v24, 0xbfe17a53f1e9e958, 0xbfdc5ce6e7f43e14, 0xbfe7ddf3ea78a228, 0xbfeb8eca710827f8, 0x3fe0ec9e8632f518, 0x3fdef4edc443ec94, 0xbfe3046a59846530, 0x3fe0eb6249006ebc, 0x3fd3cc0765615f4c, 0xbfe1547f22bc2bc2, 0x3f8359bdb41e5580, 0xbfcb8a613f7035f0, 0xbfb419089c73df20, 0x3fe44e37c956a792, 0x3fddd0248ff51b48, 0xbfe0758d8413ceaa); - asm volatile("vfmax.vv v2, v4, v6"); + asm volatile("vfmax.vv v8, v16, v24"); // -0.4061329687298849, -0.2985478109200665, 0.0070087316277823, // -0.2169778494878496, 0.5288841839862100, 0.4836992661145783, // 0.5680045000966327, 0.9515829310663801, 0.3093279352228719, // 0.0055288881366042, 0.3717566019240965, 0.0982171502328268, // -0.0785069829906857, 0.9555697921812856, 0.4810293698835877, // -0.1835757691555060 - VCMP_U64(3, v2, 0xbfd9fe1522a16c7c, 0xbfd31b68470c6bc4, 0x3f7cb530120b5400, + VCMP_U64(3, v8, 0xbfd9fe1522a16c7c, 0xbfd31b68470c6bc4, 0x3f7cb530120b5400, 0xbfcbc5ee1fc0dc58, 0x3fe0ec9e8632f518, 0x3fdef4edc443ec94, 0x3fe22d17c5fcaaf0, 0x3fee735e0c0b94e4, 0x3fd3cc0765615f4c, 0x3f76a5759bade800, 0x3fd7cadc33d5826c, 0x3fb924c2582803f0, @@ -101,32 +101,32 @@ void TEST_CASE1(void) { // Simple random test with similar values + 1 subnormal (masked) // The numbers are the same of TEST_CASE1 void TEST_CASE2(void) { - VSET(16, e16, m2); + VSET(16, e16, m8); // 0.0445, -0.4812, 0.5732, 0.0634, 0.2072, -0.6416, 0.7759, // -0.0042, 0.6138, 0.7847, -0.5337, 0.3455, 0.0304, 0.7920, // 0.8179, -0.5659 - VLOAD_16(v4, 0x29b3, 0xb7b3, 0x3896, 0x2c0f, 0x32a1, 0xb922, 0x3a35, 0x9c4d, + VLOAD_16(v16, 0x29b3, 0xb7b3, 0x3896, 0x2c0f, 0x32a1, 0xb922, 0x3a35, 0x9c4d, 0x38e9, 0x3a47, 0xb845, 0x3587, 0x27ca, 0x3a56, 0x3a8b, 0xb887); // 0.6426, -0.4099, -0.1183, 0.2915, 0.5972, -0.1932, -0.0265, // -0.5913, -0.8560, 0.5029, -0.8975, -0.7373, 0.3701, 0.9546, // -0.2671, -0.6855 - VLOAD_16(v6, 0x3924, 0xb68f, 0xaf93, 0x34aa, 0x38c7, 0xb22f, 0xa6c7, 0xb8bb, + VLOAD_16(v24, 0x3924, 0xb68f, 0xaf93, 0x34aa, 0x38c7, 0xb22f, 0xa6c7, 0xb8bb, 0xbad9, 0x3806, 0xbb2e, 0xb9e6, 0x35ec, 0x3ba3, 0xb446, 0xb97c); VLOAD_8(v0, 0xAA, 0xAA); - VCLEAR(v2); - asm volatile("vfmax.vv v2, v4, v6, v0.t"); + VCLEAR(v8); + asm volatile("vfmax.vv v8, v16, v24, v0.t"); // 0.0000, -0.4099, 0.0000, 0.2915, 0.0000, -0.1932, 0.0000, // -0.0042, 0.0000, 0.7847, 0.0000, 0.3455, 0.0000, 0.9546, // 0.0000, -0.5659 - VCMP_U16(4, v2, 0x0, 0xb68f, 0x0, 0x34aa, 0x0, 0xb22f, 0x0, 0x9c4d, 0x0, + VCMP_U16(4, v8, 0x0, 0xb68f, 0x0, 0x34aa, 0x0, 0xb22f, 0x0, 0x9c4d, 0x0, 0x3a47, 0x0, 0x3587, 0x0, 0x3ba3, 0x0, 0xb887); - VSET(16, e32, m2); + VSET(16, e32, m8); // -0.19589283, 0.64597517, -0.09556163, 0.96582597, // 0.93413597, 0.78331935, -0.18831402, -0.29520443, 0.09486515, // 0.96548969, 0.74523991, 0.81442171, 0.25644442, // -0.92091519, 0.25139943, -0.77403748 - VLOAD_32(v4, 0xbe489821, 0x3f255ea1, 0xbdc3b5d1, 0x3f77405f, 0x3f6f2389, + VLOAD_32(v16, 0xbe489821, 0x3f255ea1, 0xbdc3b5d1, 0x3f77405f, 0x3f6f2389, 0x3f48879e, 0xbe40d564, 0xbe972509, 0x3dc248a9, 0x3f772a55, 0x3f3ec80b, 0x3f507df1, 0x3e834caf, 0xbf6bc119, 0x3e80b76d, 0xbf462752); @@ -134,30 +134,30 @@ void TEST_CASE2(void) { // 0.15872470, -0.17028977, -0.99863762, -0.02739566, // -0.08060763, 0.73060948, 0.62843031, 0.68798363, // -0.35207590, 0.01353026, 0.25345275, -0.93635505 - VLOAD_32(v6, 0xbf16d6e0, 0x3f318640, 0x3f25eebb, 0xbc0e6e1c, 0x3e2288ba, + VLOAD_32(v24, 0xbf16d6e0, 0x3f318640, 0x3f25eebb, 0xbc0e6e1c, 0x3e2288ba, 0xbe2e6071, 0xbf7fa6b7, 0xbce06cdd, 0xbda5159d, 0x3f3b0939, 0x3f20e0cf, 0x3f301fb2, 0xbeb4434b, 0x3c5dae02, 0x3e81c48f, 0xbf6fb4f7); VLOAD_8(v0, 0xAA, 0xAA); - VCLEAR(v2); - asm volatile("vfmax.vv v2, v4, v6, v0.t"); + VCLEAR(v8); + asm volatile("vfmax.vv v8, v16, v24, v0.t"); // 0.00000000, 0.69345474, 0.00000000, 0.96582597, // 0.00000000, 0.78331935, 0.00000000, -0.02739566, // 0.00000000, 0.96548969, 0.00000000, 0.81442171, // 0.00000000, 0.01353026, 0.00000000, -0.77403748 - VCMP_U32(5, v2, 0x0, 0x3f318640, 0x0, 0x3f77405f, 0x0, 0x3f48879e, 0x0, + VCMP_U32(5, v8, 0x0, 0x3f318640, 0x0, 0x3f77405f, 0x0, 0x3f48879e, 0x0, 0xbce06cdd, 0x0, 0x3f772a55, 0x0, 0x3f507df1, 0x0, 0x3c5dae02, 0x0, 0xbf462752); #if ELEN == 64 - VSET(16, e64, m2); + VSET(16, e64, m8); // -0.4061329687298849, -0.2985478109200665, 0.0070087316277823, // -0.2169778494878496, -0.8530745559533048, -0.1247477743553222, // 0.5680045000966327, 0.9515829310663801, -0.9797693611753244, // 0.0055288881366042, 0.3717566019240965, 0.0982171502328268, // -0.1563664923399100, 0.9555697921812856, 0.4810293698835877, // -0.1835757691555060 - VLOAD_64(v4, 0xbfd9fe1522a16c7c, 0xbfd31b68470c6bc4, 0x3f7cb530120b5400, + VLOAD_64(v16, 0xbfd9fe1522a16c7c, 0xbfd31b68470c6bc4, 0x3f7cb530120b5400, 0xbfcbc5ee1fc0dc58, 0xbfeb4c6302dbd036, 0xbfbfef785b1ada80, 0x3fe22d17c5fcaaf0, 0x3fee735e0c0b94e4, 0xbfef5a45467bddd8, 0x3f76a5759bade800, 0x3fd7cadc33d5826c, 0x3fb924c2582803f0, @@ -169,22 +169,22 @@ void TEST_CASE2(void) { // -0.5415645292681506, 0.0094485111801912, -0.2151605186231076, // -0.0785069829906857, 0.6345480854408712, 0.4658290296396683, // -0.5143497066150833 - VLOAD_64(v6, 0xbfe17a53f1e9e958, 0xbfdc5ce6e7f43e14, 0xbfe7ddf3ea78a228, + VLOAD_64(v24, 0xbfe17a53f1e9e958, 0xbfdc5ce6e7f43e14, 0xbfe7ddf3ea78a228, 0xbfeb8eca710827f8, 0x3fe0ec9e8632f518, 0x3fdef4edc443ec94, 0xbfe3046a59846530, 0x3fe0eb6249006ebc, 0x3fd3cc0765615f4c, 0xbfe1547f22bc2bc2, 0x3f8359bdb41e5580, 0xbfcb8a613f7035f0, 0xbfb419089c73df20, 0x3fe44e37c956a792, 0x3fddd0248ff51b48, 0xbfe0758d8413ceaa); VLOAD_8(v0, 0xAA, 0xAA); - VCLEAR(v2); - asm volatile("vfmax.vv v2, v4, v6, v0.t"); + VCLEAR(v8); + asm volatile("vfmax.vv v8, v16, v24, v0.t"); // 0.0000000000000000, -0.2985478109200665, 0.0000000000000000, // -0.2169778494878496, 0.0000000000000000, 0.4836992661145783, // 0.0000000000000000, 0.9515829310663801, 0.0000000000000000, // 0.0055288881366042, 0.0000000000000000, 0.0982171502328268, // 0.0000000000000000, 0.9555697921812856, 0.0000000000000000, // -0.1835757691555060 - VCMP_U64(6, v2, 0x0, 0xbfd31b68470c6bc4, 0x0, 0xbfcbc5ee1fc0dc58, 0x0, + VCMP_U64(6, v8, 0x0, 0xbfd31b68470c6bc4, 0x0, 0xbfcbc5ee1fc0dc58, 0x0, 0x3fdef4edc443ec94, 0x0, 0x3fee735e0c0b94e4, 0x0, 0x3f76a5759bade800, 0x0, 0x3fb924c2582803f0, 0x0, 0x3fee940719ceda38, 0x0, 0xbfc77f692a6e3368); @@ -193,24 +193,24 @@ void TEST_CASE2(void) { // Simple random test with similar values (vector-scalar) void TEST_CASE3(void) { - VSET(16, e16, m2); + VSET(16, e16, m8); float fscalar_16; // 0.0368 BOX_HALF_IN_FLOAT(fscalar_16, 0x28b5); // -0.5518, 0.6772, 0.2756, 0.4421, 0.2081, 0.6250, 0.4136, // 0.8203, -0.3535, -0.1597, -0.5244, 0.8696, 0.1744, 0.0793, // -0.2445, -0.4031 - VLOAD_16(v4, 0xb86a, 0x396b, 0x3469, 0x3713, 0x32a9, 0x3900, 0x369e, 0x3a90, + VLOAD_16(v16, 0xb86a, 0x396b, 0x3469, 0x3713, 0x32a9, 0x3900, 0x369e, 0x3a90, 0xb5a8, 0xb11c, 0xb832, 0x3af5, 0x3195, 0x2d14, 0xb3d3, 0xb673); - asm volatile("vfmax.vf v2, v4, %[A]" ::[A] "f"(fscalar_16)); + asm volatile("vfmax.vf v8, v16, %[A]" ::[A] "f"(fscalar_16)); // 0.0368, 0.6772, 0.2756, 0.4421, 0.2081, 0.6250, 0.4136, // 0.8203, 0.0368, 0.0368, 0.0368, 0.8696, 0.1744, 0.0793, // 0.0368, 0.0368 - VCMP_U16(7, v2, 0x28b5, 0x396b, 0x3469, 0x3713, 0x32a9, 0x3900, 0x369e, + VCMP_U16(7, v8, 0x28b5, 0x396b, 0x3469, 0x3713, 0x32a9, 0x3900, 0x369e, 0x3a90, 0x28b5, 0x28b5, 0x28b5, 0x3af5, 0x3195, 0x2d14, 0x28b5, 0x28b5); - VSET(16, e32, m2); + VSET(16, e32, m8); float fscalar_32; // -0.94383347 BOX_FLOAT_IN_FLOAT(fscalar_32, 0xbf719f12); @@ -218,22 +218,22 @@ void TEST_CASE3(void) { // 0.11194360, -0.33637357, 0.83680850, 0.95792335, // 0.41251704, 0.27496886, -0.06774041, -0.19357064, // -0.48802575, -0.53921199, 0.32722279, 0.28428423 - VLOAD_32(v4, 0x3f047029, 0x3ea002ba, 0x3ef27a17, 0x3e0cafaf, 0x3de542b0, + VLOAD_32(v16, 0x3f047029, 0x3ea002ba, 0x3ef27a17, 0x3e0cafaf, 0x3de542b0, 0xbeac3928, 0x3f563915, 0x3f753a77, 0x3ed3356f, 0x3e8cc8b8, 0xbd8abb7c, 0xbe463762, 0xbef9de83, 0xbf0a09cc, 0x3ea789bf, 0x3e918db4); - asm volatile("vfmax.vf v2, v4, %[A]" ::[A] "f"(fscalar_32)); + asm volatile("vfmax.vf v8, v16, %[A]" ::[A] "f"(fscalar_32)); // 0.51733643, 0.31252080, 0.47358772, 0.13738893, // 0.11194360, -0.33637357, 0.83680850, 0.95792335, // 0.41251704, 0.27496886, -0.06774041, -0.19357064, // -0.48802575, -0.53921199, 0.32722279, 0.28428423 - VCMP_U32(8, v2, 0x3f047029, 0x3ea002ba, 0x3ef27a17, 0x3e0cafaf, 0x3de542b0, + VCMP_U32(8, v8, 0x3f047029, 0x3ea002ba, 0x3ef27a17, 0x3e0cafaf, 0x3de542b0, 0xbeac3928, 0x3f563915, 0x3f753a77, 0x3ed3356f, 0x3e8cc8b8, 0xbd8abb7c, 0xbe463762, 0xbef9de83, 0xbf0a09cc, 0x3ea789bf, 0x3e918db4); #if ELEN == 64 - VSET(16, e64, m2); + VSET(16, e64, m8); double dscalar_64; // -0.8274885128397702 BOX_DOUBLE_IN_DOUBLE(dscalar_64, 0xbfea7ac9308eccb6); @@ -245,20 +245,20 @@ void TEST_CASE3(void) { // 0.1886883982201846, 0.9268486384654282, // -0.9639662652720637, -0.2101071651393955, // 0.0859470276611187, -0.7001184217853196 - VLOAD_64(v4, 0x3feed2b8221dbd8e, 0x3fdde613942dab28, 0xbfc6640da5eaf690, + VLOAD_64(v16, 0x3feed2b8221dbd8e, 0x3fdde613942dab28, 0xbfc6640da5eaf690, 0xbfb807daf023fbb0, 0x3fd601252797bdcc, 0x3fd2ce29819fd630, 0xbfd716dac57e4298, 0x3fe5e4a85818c992, 0xbfe6455756bf47f8, 0x3fde2865724428b0, 0x3fc826f101bec2b8, 0x3feda8be79d1a2f4, 0xbfeed8cfc7f94e06, 0xbfcae4caa576e8a8, 0x3fb6009fd8fe2f80, 0xbfe6675ebf9ca482); - asm volatile("vfmax.vf v2, v4, %[A]" ::[A] "f"(dscalar_64)); + asm volatile("vfmax.vf v8, v16, %[A]" ::[A] "f"(dscalar_64)); // 0.9632225672084347, 0.4671677538923853, -0.1749283847947720, // -0.0938698612480795, 0.3438198935172891, 0.2938331380713377, // -0.3607699326176230, 0.6841623039857032, // -0.6959644979744999, 0.4712155929452235, 0.1886883982201846, // 0.9268486384654282, -0.8274885128397702, -0.2101071651393955, // 0.0859470276611187, -0.7001184217853196 - VCMP_U64(9, v2, 0x3feed2b8221dbd8e, 0x3fdde613942dab28, 0xbfc6640da5eaf690, + VCMP_U64(9, v8, 0x3feed2b8221dbd8e, 0x3fdde613942dab28, 0xbfc6640da5eaf690, 0xbfb807daf023fbb0, 0x3fd601252797bdcc, 0x3fd2ce29819fd630, 0xbfd716dac57e4298, 0x3fe5e4a85818c992, 0xbfe6455756bf47f8, 0x3fde2865724428b0, 0x3fc826f101bec2b8, 0x3feda8be79d1a2f4, @@ -269,25 +269,25 @@ void TEST_CASE3(void) { // Simple random test with similar values (vector-scalar) (masked) void TEST_CASE4(void) { - VSET(16, e16, m2); + VSET(16, e16, m8); float fscalar_16; // 0.0368 BOX_HALF_IN_FLOAT(fscalar_16, 0x28b5); // -0.5518, 0.6772, 0.2756, 0.4421, 0.2081, 0.6250, 0.4136, // 0.8203, -0.3535, -0.1597, -0.5244, 0.8696, 0.1744, 0.0793, // -0.2445, -0.4031 - VLOAD_16(v4, 0xb86a, 0x396b, 0x3469, 0x3713, 0x32a9, 0x3900, 0x369e, 0x3a90, + VLOAD_16(v16, 0xb86a, 0x396b, 0x3469, 0x3713, 0x32a9, 0x3900, 0x369e, 0x3a90, 0xb5a8, 0xb11c, 0xb832, 0x3af5, 0x3195, 0x2d14, 0xb3d3, 0xb673); VLOAD_8(v0, 0xAA, 0xAA); - VCLEAR(v2); - asm volatile("vfmax.vf v2, v4, %[A], v0.t" ::[A] "f"(fscalar_16)); + VCLEAR(v8); + asm volatile("vfmax.vf v8, v16, %[A], v0.t" ::[A] "f"(fscalar_16)); // 0.0000, 0.6772, 0.0000, 0.4421, 0.0000, 0.6250, 0.0000, // 0.8203, 0.0000, 0.0368, 0.0000, 0.8696, 0.0000, 0.0793, // 0.0000, 0.0368 - VCMP_U16(10, v2, 0x0, 0x396b, 0x0, 0x3713, 0x0, 0x3900, 0x0, 0x3a90, 0x0, + VCMP_U16(10, v8, 0x0, 0x396b, 0x0, 0x3713, 0x0, 0x3900, 0x0, 0x3a90, 0x0, 0x28b5, 0x0, 0x3af5, 0x0, 0x2d14, 0x0, 0x28b5); - VSET(16, e32, m2); + VSET(16, e32, m8); float fscalar_32; // -0.94383347 BOX_FLOAT_IN_FLOAT(fscalar_32, 0xbf719f12); @@ -295,23 +295,23 @@ void TEST_CASE4(void) { // 0.11194360, -0.33637357, 0.83680850, 0.95792335, // 0.41251704, 0.27496886, -0.06774041, -0.19357064, // -0.48802575, -0.53921199, 0.32722279, 0.28428423 - VLOAD_32(v4, 0x3f047029, 0x3ea002ba, 0x3ef27a17, 0x3e0cafaf, 0x3de542b0, + VLOAD_32(v16, 0x3f047029, 0x3ea002ba, 0x3ef27a17, 0x3e0cafaf, 0x3de542b0, 0xbeac3928, 0x3f563915, 0x3f753a77, 0x3ed3356f, 0x3e8cc8b8, 0xbd8abb7c, 0xbe463762, 0xbef9de83, 0xbf0a09cc, 0x3ea789bf, 0x3e918db4); VLOAD_8(v0, 0xAA, 0xAA); - VCLEAR(v2); - asm volatile("vfmax.vf v2, v4, %[A], v0.t" ::[A] "f"(fscalar_32)); + VCLEAR(v8); + asm volatile("vfmax.vf v8, v16, %[A], v0.t" ::[A] "f"(fscalar_32)); // 0.00000000, 0.31252080, 0.00000000, 0.13738893, // 0.00000000, -0.33637357, 0.00000000, 0.95792335, // 0.00000000, 0.27496886, 0.00000000, -0.19357064, // 0.00000000, -0.53921199, 0.00000000, 0.28428423 - VCMP_U32(11, v2, 0x0, 0x3ea002ba, 0x0, 0x3e0cafaf, 0x0, 0xbeac3928, 0x0, + VCMP_U32(11, v8, 0x0, 0x3ea002ba, 0x0, 0x3e0cafaf, 0x0, 0xbeac3928, 0x0, 0x3f753a77, 0x0, 0x3e8cc8b8, 0x0, 0xbe463762, 0x0, 0xbf0a09cc, 0x0, 0x3e918db4); #if ELEN == 64 - VSET(16, e64, m2); + VSET(16, e64, m8); double dscalar_64; // -0.8274885128397702 BOX_DOUBLE_IN_DOUBLE(dscalar_64, 0xbfea7ac9308eccb6); @@ -323,15 +323,15 @@ void TEST_CASE4(void) { // 0.1886883982201846, 0.9268486384654282, // -0.9639662652720637, -0.2101071651393955, // 0.0859470276611187, -0.7001184217853196 - VLOAD_64(v4, 0x3feed2b8221dbd8e, 0x3fdde613942dab28, 0xbfc6640da5eaf690, + VLOAD_64(v16, 0x3feed2b8221dbd8e, 0x3fdde613942dab28, 0xbfc6640da5eaf690, 0xbfb807daf023fbb0, 0x3fd601252797bdcc, 0x3fd2ce29819fd630, 0xbfd716dac57e4298, 0x3fe5e4a85818c992, 0xbfe6455756bf47f8, 0x3fde2865724428b0, 0x3fc826f101bec2b8, 0x3feda8be79d1a2f4, 0xbfeed8cfc7f94e06, 0xbfcae4caa576e8a8, 0x3fb6009fd8fe2f80, 0xbfe6675ebf9ca482); VLOAD_8(v0, 0xAA, 0xAA); - VCLEAR(v2); - asm volatile("vfmax.vf v2, v4, %[A], v0.t" ::[A] "f"(dscalar_64)); + VCLEAR(v8); + asm volatile("vfmax.vf v8, v16, %[A], v0.t" ::[A] "f"(dscalar_64)); // 0.0000000000000000, 0.4671677538923853, 0.0000000000000000, // -0.0938698612480795, 0.0000000000000000, // 0.2938331380713377, 0.0000000000000000, 0.6841623039857032, @@ -339,7 +339,7 @@ void TEST_CASE4(void) { // 0.9268486384654282, 0.0000000000000000, // -0.2101071651393955, 0.0000000000000000, // -0.7001184217853196 - VCMP_U64(12, v2, 0x0, 0x3fdde613942dab28, 0x0, 0xbfb807daf023fbb0, 0x0, + VCMP_U64(12, v8, 0x0, 0x3fdde613942dab28, 0x0, 0xbfb807daf023fbb0, 0x0, 0x3fd2ce29819fd630, 0x0, 0x3fe5e4a85818c992, 0x0, 0x3fde2865724428b0, 0x0, 0x3feda8be79d1a2f4, 0x0, 0xbfcae4caa576e8a8, 0x0, 0xbfe6675ebf9ca482); diff --git a/sw/riscvTests/isa/rv64uv/vfmin.c b/sw/riscvTests/isa/rv64uv/vfmin.c index b21e6768..9841b175 100644 --- a/sw/riscvTests/isa/rv64uv/vfmin.c +++ b/sw/riscvTests/isa/rv64uv/vfmin.c @@ -10,31 +10,31 @@ // Simple random test with similar values + 1 subnormal void TEST_CASE1(void) { - VSET(16, e16, m2); + VSET(16, e16, m8); // 0.9390, 0.9619, 0.9121, 0.6265, 0.1193, -0.4492, -0.3562, // 0.2365, -0.9897, 0.8638, -0.0379, -0.6201, 0.1809, 0.9824, // -0.9922, -0.6851 - VLOAD_16(v4, 0x3b83, 0x3bb2, 0x3b4c, 0x3903, 0x2fa2, 0xb730, 0xb5b3, 0x3391, + VLOAD_16(v16, 0x3b83, 0x3bb2, 0x3b4c, 0x3903, 0x2fa2, 0xb730, 0xb5b3, 0x3391, 0xbbeb, 0x3ae9, 0xa8da, 0xb8f6, 0x31ca, 0x3bdc, 0xbbf0, 0xb97b); // 0.9795, -0.1069, 0.7070, -0.7305, 0.0516, -0.1321, 0.3828, // 0.0230, -0.9424, -0.8652, -0.3865, -0.1719, -0.7021, 0.1664, // 0.7026, -0.8535 - VLOAD_16(v6, 0x3bd6, 0xaed8, 0x39a8, 0xb9d8, 0x2a9a, 0xb03a, 0x3620, 0x25e2, + VLOAD_16(v24, 0x3bd6, 0xaed8, 0x39a8, 0xb9d8, 0x2a9a, 0xb03a, 0x3620, 0x25e2, 0xbb8a, 0xbaec, 0xb62f, 0xb180, 0xb99e, 0x3153, 0x399f, 0xbad4); - asm volatile("vfmin.vv v2, v4, v6"); + asm volatile("vfmin.vv v8, v16, v24"); // 0.9390, -0.1069, 0.7070, -0.7305, 0.0516, -0.4492, -0.3562, // 0.0230, -0.9897, -0.8652, -0.3865, -0.6201, -0.7021, 0.1664, // -0.9922, -0.8535 - VCMP_U16(1, v2, 0x3b83, 0xaed8, 0x39a8, 0xb9d8, 0x2a9a, 0xb730, 0xb5b3, + VCMP_U16(1, v8, 0x3b83, 0xaed8, 0x39a8, 0xb9d8, 0x2a9a, 0xb730, 0xb5b3, 0x25e2, 0xbbeb, 0xbaec, 0xb62f, 0xb8f6, 0xb99e, 0x3153, 0xbbf0, 0xbad4); - VSET(16, e32, m2); + VSET(16, e32, m8); // 0.33477312, -0.14129849, -0.94871885, 0.83600986, // -0.28163233, -0.47814348, 0.77408481, -0.54823470, // -0.72419900, 0.27495387, -0.76835793, 0.71516198, // 0.32305571, -0.76598656, -0.36499983, -0.52954155 - VLOAD_32(v4, 0x3eab6762, 0xbe10b08d, 0xbf72df3d, 0x3f5604be, 0xbe90321d, + VLOAD_32(v16, 0x3eab6762, 0xbe10b08d, 0xbf72df3d, 0x3f5604be, 0xbe90321d, 0xbef4cf39, 0x3f462a6c, 0xbf0c591c, 0xbf39651b, 0x3e8cc6c1, 0xbf44b31b, 0x3f3714db, 0x3ea5678f, 0xbf4417b2, 0xbebae142, 0xbf079009); @@ -42,29 +42,29 @@ void TEST_CASE1(void) { // 0.74085194, -0.99458516, -0.73125440, -0.46319291, // -0.76140571, -0.82557100, 0.15205561, 0.39971715, // -0.32876521, -0.53106725, 0.84727478, 0.21940185 - VLOAD_32(v6, 0x3f5f314f, 0x3eccd36f, 0x3ecd67c7, 0x3f7946a1, 0x3f3da879, + VLOAD_32(v24, 0x3f5f314f, 0x3eccd36f, 0x3ecd67c7, 0x3f7946a1, 0x3f3da879, 0xbf7e9d22, 0xbf3b337d, 0xbeed279f, 0xbf42eb7c, 0xbf53589f, 0x3e1bb477, 0x3ecca7ba, 0xbea853ea, 0xbf07f406, 0x3f58e700, 0x3e60aae1); - asm volatile("vfmin.vv v2, v4, v6"); + asm volatile("vfmin.vv v8, v16, v24"); // 0.33477312, -0.14129849, -0.94871885, 0.83600986, // -0.28163233, -0.99458516, -0.73125440, -0.54823470, // -0.76140571, -0.82557100, -0.76835793, 0.39971715, // -0.32876521, -0.76598656, -0.36499983, -0.52954155 - VCMP_U32(2, v2, 0x3eab6762, 0xbe10b08d, 0xbf72df3d, 0x3f5604be, 0xbe90321d, + VCMP_U32(2, v8, 0x3eab6762, 0xbe10b08d, 0xbf72df3d, 0x3f5604be, 0xbe90321d, 0xbf7e9d22, 0xbf3b337d, 0xbf0c591c, 0xbf42eb7c, 0xbf53589f, 0xbf44b31b, 0x3ecca7ba, 0xbea853ea, 0xbf4417b2, 0xbebae142, 0xbf079009); #if ELEN == 64 - VSET(16, e64, m2); + VSET(16, e64, m8); // 0.9387726994461698, 0.8517969615002949, -0.8864275043807637, // 0.3621349692771021, 0.5392486258321831, -0.1288714247798126, // -0.9149173505741688, -0.9378576380992047, // -0.2263428385339852, 0.1016628884386184, 0.4783549203499486, // 0.5394596797016060, 0.7861587828590215, 0.0194772848204161, // -0.9126826319328591, 0.3997583898469530 - VLOAD_64(v4, 0x3fee0a6d0b4ff74a, 0x3feb41ebb38f3ae2, 0xbfec5d9d36b2e38c, + VLOAD_64(v16, 0x3fee0a6d0b4ff74a, 0x3feb41ebb38f3ae2, 0xbfec5d9d36b2e38c, 0x3fd72d3826721e9c, 0x3fe14186558b96e0, 0xbfc07edbdd68bb68, 0xbfed4700c06849e8, 0xbfee02ee057e1390, 0xbfccf8cd5897f8a0, 0x3fba06943d0f8e20, 0x3fde9d5df4b22860, 0x3fe14340f23a8770, @@ -76,20 +76,20 @@ void TEST_CASE1(void) { // 0.1176441554686278, 0.4929731878752270, 0.0942028280153233, // 0.9496420237972776, -0.4549651855719854, -0.9663401540020158, // 0.4114236885680320 - VLOAD_64(v6, 0x3fdec58fccbc12a4, 0x3fe719be53c35314, 0xbfee413924cc77e4, + VLOAD_64(v24, 0x3fdec58fccbc12a4, 0x3fe719be53c35314, 0xbfee413924cc77e4, 0x3fe7790b32975e1a, 0x3fef5cd4d43cbc4e, 0xbfe5fecd00a37bfa, 0xbfe6b0de55ba0314, 0x3fe900abee2f95f8, 0x3fe5caf19e1f4324, 0x3fbe1ded684da4d0, 0x3fdf8cdf69eea758, 0x3fb81dad31843b10, 0x3fee6377ab63bade, 0xbfdd1e264c366a78, 0xbfeeec422fc80224, 0x3fda54c405ccc2c0); - asm volatile("vfmin.vv v2, v4, v6"); + asm volatile("vfmin.vv v8, v16, v24"); // 0.4808082103120717, 0.7218925128932789, -0.9454618185734458, // 0.3621349692771021, 0.5392486258321831, -0.6873536121819364, // -0.9149173505741688, -0.9378576380992047, // -0.2263428385339852, 0.1016628884386184, 0.4783549203499486, // 0.0942028280153233, 0.7861587828590215, -0.4549651855719854, // -0.9663401540020158, 0.3997583898469530 - VCMP_U64(3, v2, 0x3fdec58fccbc12a4, 0x3fe719be53c35314, 0xbfee413924cc77e4, + VCMP_U64(3, v8, 0x3fdec58fccbc12a4, 0x3fe719be53c35314, 0xbfee413924cc77e4, 0x3fd72d3826721e9c, 0x3fe14186558b96e0, 0xbfe5fecd00a37bfa, 0xbfed4700c06849e8, 0xbfee02ee057e1390, 0xbfccf8cd5897f8a0, 0x3fba06943d0f8e20, 0x3fde9d5df4b22860, 0x3fb81dad31843b10, @@ -101,32 +101,32 @@ void TEST_CASE1(void) { // Simple random test with similar values + 1 subnormal (masked) // The numbers are the same of TEST_CASE1 void TEST_CASE2(void) { - VSET(16, e16, m2); + VSET(16, e16, m8); // 0.9390, 0.9619, 0.9121, 0.6265, 0.1193, -0.4492, -0.3562, // 0.2365, -0.9897, 0.8638, -0.0379, -0.6201, 0.1809, 0.9824, // -0.9922, -0.6851 - VLOAD_16(v4, 0x3b83, 0x3bb2, 0x3b4c, 0x3903, 0x2fa2, 0xb730, 0xb5b3, 0x3391, + VLOAD_16(v16, 0x3b83, 0x3bb2, 0x3b4c, 0x3903, 0x2fa2, 0xb730, 0xb5b3, 0x3391, 0xbbeb, 0x3ae9, 0xa8da, 0xb8f6, 0x31ca, 0x3bdc, 0xbbf0, 0xb97b); // 0.9795, -0.1069, 0.7070, -0.7305, 0.0516, -0.1321, 0.3828, // 0.0230, -0.9424, -0.8652, -0.3865, -0.1719, -0.7021, 0.1664, // 0.7026, -0.8535 - VLOAD_16(v6, 0x3bd6, 0xaed8, 0x39a8, 0xb9d8, 0x2a9a, 0xb03a, 0x3620, 0x25e2, + VLOAD_16(v24, 0x3bd6, 0xaed8, 0x39a8, 0xb9d8, 0x2a9a, 0xb03a, 0x3620, 0x25e2, 0xbb8a, 0xbaec, 0xb62f, 0xb180, 0xb99e, 0x3153, 0x399f, 0xbad4); VLOAD_8(v0, 0xAA, 0xAA); - VCLEAR(v2); - asm volatile("vfmin.vv v2, v4, v6, v0.t"); + VCLEAR(v8); + asm volatile("vfmin.vv v8, v16, v24, v0.t"); // 0.0000, -0.1069, 0.0000, -0.7305, 0.0000, -0.4492, 0.0000, // 0.0230, 0.0000, -0.8652, 0.0000, -0.6201, 0.0000, 0.1664, // 0.0000, -0.8535 - VCMP_U16(4, v2, 0x0, 0xaed8, 0x0, 0xb9d8, 0x0, 0xb730, 0x0, 0x25e2, 0x0, + VCMP_U16(4, v8, 0x0, 0xaed8, 0x0, 0xb9d8, 0x0, 0xb730, 0x0, 0x25e2, 0x0, 0xbaec, 0x0, 0xb8f6, 0x0, 0x3153, 0x0, 0xbad4); - VSET(16, e32, m2); + VSET(16, e32, m8); // 0.33477312, -0.14129849, -0.94871885, 0.83600986, // -0.28163233, -0.47814348, 0.77408481, -0.54823470, // -0.72419900, 0.27495387, -0.76835793, 0.71516198, // 0.32305571, -0.76598656, -0.36499983, -0.52954155 - VLOAD_32(v4, 0x3eab6762, 0xbe10b08d, 0xbf72df3d, 0x3f5604be, 0xbe90321d, + VLOAD_32(v16, 0x3eab6762, 0xbe10b08d, 0xbf72df3d, 0x3f5604be, 0xbe90321d, 0xbef4cf39, 0x3f462a6c, 0xbf0c591c, 0xbf39651b, 0x3e8cc6c1, 0xbf44b31b, 0x3f3714db, 0x3ea5678f, 0xbf4417b2, 0xbebae142, 0xbf079009); @@ -134,30 +134,30 @@ void TEST_CASE2(void) { // 0.74085194, -0.99458516, -0.73125440, -0.46319291, // -0.76140571, -0.82557100, 0.15205561, 0.39971715, // -0.32876521, -0.53106725, 0.84727478, 0.21940185 - VLOAD_32(v6, 0x3f5f314f, 0x3eccd36f, 0x3ecd67c7, 0x3f7946a1, 0x3f3da879, + VLOAD_32(v24, 0x3f5f314f, 0x3eccd36f, 0x3ecd67c7, 0x3f7946a1, 0x3f3da879, 0xbf7e9d22, 0xbf3b337d, 0xbeed279f, 0xbf42eb7c, 0xbf53589f, 0x3e1bb477, 0x3ecca7ba, 0xbea853ea, 0xbf07f406, 0x3f58e700, 0x3e60aae1); VLOAD_8(v0, 0xAA, 0xAA); - VCLEAR(v2); - asm volatile("vfmin.vv v2, v4, v6, v0.t"); + VCLEAR(v8); + asm volatile("vfmin.vv v8, v16, v24, v0.t"); // 0.00000000, -0.14129849, 0.00000000, 0.83600986, // 0.00000000, -0.99458516, 0.00000000, -0.54823470, // 0.00000000, -0.82557100, 0.00000000, 0.39971715, // 0.00000000, -0.76598656, 0.00000000, -0.52954155 - VCMP_U32(5, v2, 0x0, 0xbe10b08d, 0x0, 0x3f5604be, 0x0, 0xbf7e9d22, 0x0, + VCMP_U32(5, v8, 0x0, 0xbe10b08d, 0x0, 0x3f5604be, 0x0, 0xbf7e9d22, 0x0, 0xbf0c591c, 0x0, 0xbf53589f, 0x0, 0x3ecca7ba, 0x0, 0xbf4417b2, 0x0, 0xbf079009); #if ELEN == 64 - VSET(16, e64, m2); + VSET(16, e64, m8); // 0.9387726994461698, 0.8517969615002949, -0.8864275043807637, // 0.3621349692771021, 0.5392486258321831, -0.1288714247798126, // -0.9149173505741688, -0.9378576380992047, // -0.2263428385339852, 0.1016628884386184, 0.4783549203499486, // 0.5394596797016060, 0.7861587828590215, 0.0194772848204161, // -0.9126826319328591, 0.3997583898469530 - VLOAD_64(v4, 0x3fee0a6d0b4ff74a, 0x3feb41ebb38f3ae2, 0xbfec5d9d36b2e38c, + VLOAD_64(v16, 0x3fee0a6d0b4ff74a, 0x3feb41ebb38f3ae2, 0xbfec5d9d36b2e38c, 0x3fd72d3826721e9c, 0x3fe14186558b96e0, 0xbfc07edbdd68bb68, 0xbfed4700c06849e8, 0xbfee02ee057e1390, 0xbfccf8cd5897f8a0, 0x3fba06943d0f8e20, 0x3fde9d5df4b22860, 0x3fe14340f23a8770, @@ -169,22 +169,22 @@ void TEST_CASE2(void) { // 0.1176441554686278, 0.4929731878752270, 0.0942028280153233, // 0.9496420237972776, -0.4549651855719854, -0.9663401540020158, // 0.4114236885680320 - VLOAD_64(v6, 0x3fdec58fccbc12a4, 0x3fe719be53c35314, 0xbfee413924cc77e4, + VLOAD_64(v24, 0x3fdec58fccbc12a4, 0x3fe719be53c35314, 0xbfee413924cc77e4, 0x3fe7790b32975e1a, 0x3fef5cd4d43cbc4e, 0xbfe5fecd00a37bfa, 0xbfe6b0de55ba0314, 0x3fe900abee2f95f8, 0x3fe5caf19e1f4324, 0x3fbe1ded684da4d0, 0x3fdf8cdf69eea758, 0x3fb81dad31843b10, 0x3fee6377ab63bade, 0xbfdd1e264c366a78, 0xbfeeec422fc80224, 0x3fda54c405ccc2c0); VLOAD_8(v0, 0xAA, 0xAA); - VCLEAR(v2); - asm volatile("vfmin.vv v2, v4, v6, v0.t"); + VCLEAR(v8); + asm volatile("vfmin.vv v8, v16, v24, v0.t"); // 0.0000000000000000, 0.7218925128932789, 0.0000000000000000, // 0.3621349692771021, 0.0000000000000000, -0.6873536121819364, // 0.0000000000000000, -0.9378576380992047, 0.0000000000000000, // 0.1016628884386184, 0.0000000000000000, 0.0942028280153233, // 0.0000000000000000, -0.4549651855719854, 0.0000000000000000, // 0.3997583898469530 - VCMP_U64(6, v2, 0x0, 0x3fe719be53c35314, 0x0, 0x3fd72d3826721e9c, 0x0, + VCMP_U64(6, v8, 0x0, 0x3fe719be53c35314, 0x0, 0x3fd72d3826721e9c, 0x0, 0xbfe5fecd00a37bfa, 0x0, 0xbfee02ee057e1390, 0x0, 0x3fba06943d0f8e20, 0x0, 0x3fb81dad31843b10, 0x0, 0xbfdd1e264c366a78, 0x0, 0x3fd995a436ac6f1c); @@ -193,24 +193,24 @@ void TEST_CASE2(void) { // Simple random test with similar values (vector-scalar) void TEST_CASE3(void) { - VSET(16, e16, m2); + VSET(16, e16, m8); float fscalar_16; // 0.4434 BOX_HALF_IN_FLOAT(fscalar_16, 0x3718); // -0.2537, 0.5449, 0.2070, -0.5752, -0.3008, 0.0165, -0.8447, // 0.6279, 0.6802, 0.7300, 0.7720, -0.8525, 0.5264, -0.5249, // -0.9839, 0.4875 - VLOAD_16(v4, 0xb40f, 0x385c, 0x32a0, 0xb89a, 0xb4d0, 0x2437, 0xbac2, 0x3906, + VLOAD_16(v16, 0xb40f, 0x385c, 0x32a0, 0xb89a, 0xb4d0, 0x2437, 0xbac2, 0x3906, 0x3971, 0x39d7, 0x3a2d, 0xbad2, 0x3836, 0xb833, 0xbbdf, 0x37cd); - asm volatile("vfmin.vf v2, v4, %[A]" ::[A] "f"(fscalar_16)); + asm volatile("vfmin.vf v8, v16, %[A]" ::[A] "f"(fscalar_16)); // -0.2537, 0.4434, 0.2070, -0.5752, -0.3008, 0.0165, -0.8447, // 0.4434, 0.4434, 0.4434, 0.4434, -0.8525, 0.4434, -0.5249, // -0.9839, 0.4434 - VCMP_U16(7, v2, 0xb40f, 0x3718, 0x32a0, 0xb89a, 0xb4d0, 0x2437, 0xbac2, + VCMP_U16(7, v8, 0xb40f, 0x3718, 0x32a0, 0xb89a, 0xb4d0, 0x2437, 0xbac2, 0x3718, 0x3718, 0x3718, 0x3718, 0xbad2, 0x3718, 0xb833, 0xbbdf, 0x3718); - VSET(16, e32, m2); + VSET(16, e32, m8); float fscalar_32; // 0.59499639 BOX_FLOAT_IN_FLOAT(fscalar_32, 0x3f1851af); @@ -218,22 +218,22 @@ void TEST_CASE3(void) { // 0.22003150, -0.67564118, -0.90376341, 0.16465612, // -0.15494362, -0.01763406, 0.97777683, -0.91671157, // 0.81712914, -0.10151604, 0.03442690, -0.14597759 - VLOAD_32(v4, 0xbf71440f, 0xbf068d77, 0xbf39686c, 0xbf2da1c2, 0x3e614ff0, + VLOAD_32(v16, 0xbf71440f, 0xbf068d77, 0xbf39686c, 0xbf2da1c2, 0x3e614ff0, 0xbf2cf6d2, 0xbf675d0a, 0x3e289b9d, 0xbe1ea98a, 0xbc90754e, 0x3f7a4f95, 0xbf6aad9c, 0x3f512f60, 0xbdcfe7a4, 0x3d0d0338, 0xbe157b26); - asm volatile("vfmin.vf v2, v4, %[A]" ::[A] "f"(fscalar_32)); + asm volatile("vfmin.vf v8, v16, %[A]" ::[A] "f"(fscalar_32)); // -0.94244474, -0.52559608, -0.72424960, -0.67824948, // 0.22003150, -0.67564118, -0.90376341, 0.16465612, // -0.15494362, -0.01763406, 0.59499639, -0.91671157, // 0.59499639, -0.10151604, 0.03442690, -0.14597759 - VCMP_U32(8, v2, 0xbf71440f, 0xbf068d77, 0xbf39686c, 0xbf2da1c2, 0x3e614ff0, + VCMP_U32(8, v8, 0xbf71440f, 0xbf068d77, 0xbf39686c, 0xbf2da1c2, 0x3e614ff0, 0xbf2cf6d2, 0xbf675d0a, 0x3e289b9d, 0xbe1ea98a, 0xbc90754e, 0x3f1851af, 0xbf6aad9c, 0x3f1851af, 0xbdcfe7a4, 0x3d0d0338, 0xbe157b26); #if ELEN == 64 - VSET(16, e64, m2); + VSET(16, e64, m8); double dscalar_64; // 0.8631130564395617 BOX_DOUBLE_IN_DOUBLE(dscalar_64, 0x3feb9e9f45c51298); @@ -243,20 +243,20 @@ void TEST_CASE3(void) { // -0.7695072044924456, -0.6726760621514143, // -0.9995830020822822, 0.2485224245452053, 0.7025040357726613, // -0.6452676560401207, 0.5090044889036880, 0.0801949752856408 - VLOAD_64(v4, 0xbf94dc2c93a00800, 0x3fdedfbff74290e0, 0x3fe1a720de3f34c0, + VLOAD_64(v16, 0xbf94dc2c93a00800, 0x3fdedfbff74290e0, 0x3fe1a720de3f34c0, 0xbfb96a9acd667320, 0x3fe7c36063b54b1e, 0x3fd3b68465cb4b28, 0xbfe59c154d684914, 0xbfd52e7fee0af3fc, 0xbfe89fcd92aa9b24, 0xbfe5868ff2f7c1a4, 0xbfeffc957df296c0, 0x3fcfcf9532df44b8, 0x3fe67ae9be5e7376, 0xbfe4a6085afb7c12, 0x3fe049c3c82b791e, 0x3fb487a86c27c560); - asm volatile("vfmin.vf v2, v4, %[A]" ::[A] "f"(dscalar_64)); + asm volatile("vfmin.vf v8, v16, %[A]" ::[A] "f"(dscalar_64)); // -0.0203711476424431, 0.4824066080711997, 0.5516514149436702, // -0.0992829085793798, 0.7425996730256406, 0.3080149644930992, // -0.6753031265127754, -0.3309631180416657, -0.7695072044924456, // -0.6726760621514143, -0.9995830020822822, 0.2485224245452053, // 0.7025040357726613, -0.6452676560401207, 0.5090044889036880, // 0.0801949752856408 - VCMP_U64(9, v2, 0xbf94dc2c93a00800, 0x3fdedfbff74290e0, 0x3fe1a720de3f34c0, + VCMP_U64(9, v8, 0xbf94dc2c93a00800, 0x3fdedfbff74290e0, 0x3fe1a720de3f34c0, 0xbfb96a9acd667320, 0x3fe7c36063b54b1e, 0x3fd3b68465cb4b28, 0xbfe59c154d684914, 0xbfd52e7fee0af3fc, 0xbfe89fcd92aa9b24, 0xbfe5868ff2f7c1a4, 0xbfeffc957df296c0, 0x3fcfcf9532df44b8, @@ -267,25 +267,25 @@ void TEST_CASE3(void) { // Simple random test with similar values (vector-scalar) (masked) void TEST_CASE4(void) { - VSET(16, e16, m2); + VSET(16, e16, m8); float fscalar_16; // 0.4434 BOX_HALF_IN_FLOAT(fscalar_16, 0x3718); // -0.2537, 0.5449, 0.2070, -0.5752, -0.3008, 0.0165, // -0.8447, 0.6279, 0.6802, 0.7300, 0.7720, -0.8525, 0.5264, // -0.5249, -0.9839, 0.4875 - VLOAD_16(v4, 0xb40f, 0x385c, 0x32a0, 0xb89a, 0xb4d0, 0x2437, 0xbac2, 0x3906, + VLOAD_16(v16, 0xb40f, 0x385c, 0x32a0, 0xb89a, 0xb4d0, 0x2437, 0xbac2, 0x3906, 0x3971, 0x39d7, 0x3a2d, 0xbad2, 0x3836, 0xb833, 0xbbdf, 0x37cd); VLOAD_8(v0, 0xAA, 0xAA); - VCLEAR(v2); - asm volatile("vfmin.vf v2, v4, %[A], v0.t" ::[A] "f"(fscalar_16)); + VCLEAR(v8); + asm volatile("vfmin.vf v8, v16, %[A], v0.t" ::[A] "f"(fscalar_16)); // 0.0000, 0.4434, 0.0000, -0.5752, 0.0000, 0.0165, 0.0000, // 0.4434, 0.0000, 0.4434, 0.0000, -0.8525, 0.0000, // -0.5249, 0.0000, 0.4434 - VCMP_U16(10, v2, 0x0, 0x3718, 0x0, 0xb89a, 0x0, 0x2437, 0x0, 0x3718, 0x0, + VCMP_U16(10, v8, 0x0, 0x3718, 0x0, 0xb89a, 0x0, 0x2437, 0x0, 0x3718, 0x0, 0x3718, 0x0, 0xbad2, 0x0, 0xb833, 0x0, 0x3718); - VSET(16, e32, m2); + VSET(16, e32, m8); float fscalar_32; // 0.59499639 BOX_FLOAT_IN_FLOAT(fscalar_32, 0x3f1851af); @@ -293,23 +293,23 @@ void TEST_CASE4(void) { // 0.22003150, -0.67564118, -0.90376341, 0.16465612, // -0.15494362, -0.01763406, 0.97777683, -0.91671157, // 0.81712914, -0.10151604, 0.03442690, -0.14597759 - VLOAD_32(v4, 0xbf71440f, 0xbf068d77, 0xbf39686c, 0xbf2da1c2, 0x3e614ff0, + VLOAD_32(v16, 0xbf71440f, 0xbf068d77, 0xbf39686c, 0xbf2da1c2, 0x3e614ff0, 0xbf2cf6d2, 0xbf675d0a, 0x3e289b9d, 0xbe1ea98a, 0xbc90754e, 0x3f7a4f95, 0xbf6aad9c, 0x3f512f60, 0xbdcfe7a4, 0x3d0d0338, 0xbe157b26); VLOAD_8(v0, 0xAA, 0xAA); - VCLEAR(v2); - asm volatile("vfmin.vf v2, v4, %[A], v0.t" ::[A] "f"(fscalar_32)); + VCLEAR(v8); + asm volatile("vfmin.vf v8, v16, %[A], v0.t" ::[A] "f"(fscalar_32)); // 0.00000000, -0.52559608, 0.00000000, -0.67824948, // 0.00000000, -0.67564118, 0.00000000, 0.16465612, // 0.00000000, -0.01763406, 0.00000000, -0.91671157, // 0.00000000, -0.10151604, 0.00000000, -0.14597759 - VCMP_U32(11, v2, 0x0, 0xbf068d77, 0x0, 0xbf2da1c2, 0x0, 0xbf2cf6d2, 0x0, + VCMP_U32(11, v8, 0x0, 0xbf068d77, 0x0, 0xbf2da1c2, 0x0, 0xbf2cf6d2, 0x0, 0x3e289b9d, 0x0, 0xbc90754e, 0x0, 0xbf6aad9c, 0x0, 0xbdcfe7a4, 0x0, 0xbe157b26); #if ELEN == 64 - VSET(16, e64, m2); + VSET(16, e64, m8); double dscalar_64; // 0.8631130564395617 BOX_DOUBLE_IN_DOUBLE(dscalar_64, 0x3feb9e9f45c51298); @@ -320,15 +320,15 @@ void TEST_CASE4(void) { // -0.6726760621514143, -0.9995830020822822, // 0.2485224245452053, 0.7025040357726613, // -0.6452676560401207, 0.5090044889036880, 0.0801949752856408 - VLOAD_64(v4, 0xbf94dc2c93a00800, 0x3fdedfbff74290e0, 0x3fe1a720de3f34c0, + VLOAD_64(v16, 0xbf94dc2c93a00800, 0x3fdedfbff74290e0, 0x3fe1a720de3f34c0, 0xbfb96a9acd667320, 0x3fe7c36063b54b1e, 0x3fd3b68465cb4b28, 0xbfe59c154d684914, 0xbfd52e7fee0af3fc, 0xbfe89fcd92aa9b24, 0xbfe5868ff2f7c1a4, 0xbfeffc957df296c0, 0x3fcfcf9532df44b8, 0x3fe67ae9be5e7376, 0xbfe4a6085afb7c12, 0x3fe049c3c82b791e, 0x3fb487a86c27c560); VLOAD_8(v0, 0xAA, 0xAA); - VCLEAR(v2); - asm volatile("vfmin.vf v2, v4, %[A], v0.t" ::[A] "f"(dscalar_64)); + VCLEAR(v8); + asm volatile("vfmin.vf v8, v16, %[A], v0.t" ::[A] "f"(dscalar_64)); // 0.0000000000000000, 0.4824066080711997, 0.0000000000000000, // -0.0992829085793798, 0.0000000000000000, // 0.3080149644930992, 0.0000000000000000, @@ -336,7 +336,7 @@ void TEST_CASE4(void) { // -0.6726760621514143, 0.0000000000000000, // 0.2485224245452053, 0.0000000000000000, // -0.6452676560401207, 0.0000000000000000, 0.0801949752856408 - VCMP_U64(12, v2, 0x0, 0x3fdedfbff74290e0, 0x0, 0xbfb96a9acd667320, 0x0, + VCMP_U64(12, v8, 0x0, 0x3fdedfbff74290e0, 0x0, 0xbfb96a9acd667320, 0x0, 0x3fd3b68465cb4b28, 0x0, 0xbfd52e7fee0af3fc, 0x0, 0xbfe5868ff2f7c1a4, 0x0, 0x3fcfcf9532df44b8, 0x0, 0xbfe4a6085afb7c12, 0x0, 0x3fb487a86c27c560); diff --git a/sw/riscvTests/isa/rv64uv/vfmsac.c b/sw/riscvTests/isa/rv64uv/vfmsac.c index 8fe1053d..e342f37b 100644 --- a/sw/riscvTests/isa/rv64uv/vfmsac.c +++ b/sw/riscvTests/isa/rv64uv/vfmsac.c @@ -10,36 +10,36 @@ // Simple random test with similar values void TEST_CASE1(void) { - VSET(16, e16, m2); + VSET(16, e16, m8); // 0.9175, 0.0740, -0.0362, 0.2961, 0.6226, -0.8032, 0.7749, // 0.8188, 0.2019, 0.4885, 0.5669, -0.1743, 0.4404, 0.0618, // 0.8252, -0.5947 - VLOAD_16(v4, 0x3b57, 0x2cbc, 0xa8a3, 0x34bd, 0x38fb, 0xba6d, 0x3a33, 0x3a8d, + VLOAD_16(v16, 0x3b57, 0x2cbc, 0xa8a3, 0x34bd, 0x38fb, 0xba6d, 0x3a33, 0x3a8d, 0x3276, 0x37d1, 0x3889, 0xb194, 0x370c, 0x2bea, 0x3a9a, 0xb8c2); // 0.2812, -0.6733, 0.3289, 0.9609, -0.6841, 0.5488, -0.1401, // 0.5591, 0.2759, -0.6973, 0.1418, 0.2389, -0.3308, 0.8867, // 0.8936, 0.3611 - VLOAD_16(v6, 0x3480, 0xb963, 0x3543, 0x3bb0, 0xb979, 0x3864, 0xb07c, 0x3879, + VLOAD_16(v24, 0x3480, 0xb963, 0x3543, 0x3bb0, 0xb979, 0x3864, 0xb07c, 0x3879, 0x346a, 0xb994, 0x308a, 0x33a5, 0xb54b, 0x3b18, 0x3b26, 0x35c7); // 0.4780, -0.1810, 0.2800, 0.7529, 0.6484, -0.9526, // 0.5947, 1.0000, -0.1874, 0.2949, -0.4280, 0.6934, 0.5444, // 0.8823, -0.8911, 0.7603 - VLOAD_16(v2, 0x37a6, 0xb1cb, 0x347b, 0x3a06, 0x3930, 0xbb9f, 0x38c2, 0x3c00, + VLOAD_16(v8, 0x37a6, 0xb1cb, 0x347b, 0x3a06, 0x3930, 0xbb9f, 0x38c2, 0x3c00, 0xb1ff, 0x34b8, 0xb6d9, 0x398c, 0x385b, 0x3b0f, 0xbb21, 0x3a15); - asm volatile("vfmsac.vv v2, v4, v6"); + asm volatile("vfmsac.vv v8, v16, v24"); // -0.2200, 0.1312, -0.2920, -0.4683, -1.0742, 0.5117, -0.7031, // -0.5420, 0.2430, -0.6357, 0.5083, -0.7349, -0.6904, -0.8276, // 1.6289, -0.9751 - VCMP_U16(1, v2, 0xb30a, 0x3033, 0xb4ac, 0xb77e, 0xbc4c, 0x3818, 0xb9a0, + VCMP_U16(1, v8, 0xb30a, 0x3033, 0xb4ac, 0xb77e, 0xbc4c, 0x3818, 0xb9a0, 0xb856, 0x33c7, 0xb916, 0x3811, 0xb9e1, 0xb985, 0xba9f, 0x3e84, 0xbbcd); - VSET(16, e32, m2); + VSET(16, e32, m8); // -0.90310860, 0.30282700, 0.54854167, -0.38732994, // 0.92121714, 0.99595129, -0.10263380, 0.83759040, // -0.23468767, 0.03914077, -0.46234205, 0.38326120, // 0.36417511, -0.50103557, 0.36991179, 0.44718841 - VLOAD_32(v4, 0xbf673220, 0x3e9b0c24, 0x3f0c6d3a, 0xbec6501c, 0x3f6bd4e3, + VLOAD_32(v16, 0xbf673220, 0x3e9b0c24, 0x3f0c6d3a, 0xbec6501c, 0x3f6bd4e3, 0x3f7ef6aa, 0xbdd231ab, 0x3f566c53, 0xbe7051f7, 0x3d205212, 0xbeecb819, 0x3ec43ad0, 0x3eba7529, 0xbf0043de, 0x3ebd6514, 0x3ee4f5e1); @@ -47,7 +47,7 @@ void TEST_CASE1(void) { // -0.30795863, 0.57084304, 0.51653886, -0.97366458, // 0.49300706, 0.62932760, 0.45846274, -0.73850167, // -0.42686453, -0.31419462, -0.47245970, -0.87721694 - VLOAD_32(v6, 0x3f5992e8, 0xbd3a1866, 0xbf3ef767, 0xbe9d1e99, 0xbe9dacc1, + VLOAD_32(v24, 0x3f5992e8, 0xbd3a1866, 0xbf3ef767, 0xbe9d1e99, 0xbe9dacc1, 0x3f1222c5, 0x3f043be4, 0xbf794215, 0x3efc6b6c, 0x3f211b9d, 0x3eeabba1, 0xbf3d0e72, 0xbeda8dfd, 0xbea0de1e, 0xbef1e63d, 0xbf60914a); @@ -55,29 +55,29 @@ void TEST_CASE1(void) { // 0.12284227, -0.04006640, -0.93113720, -0.93526161, // -0.27461481, 0.64110506, 0.61687475, -0.43741968, // 0.70502371, 0.37014356, -0.98105848, 0.77993429 - VLOAD_32(v2, 0xbf44a48d, 0x3f5f2230, 0xbf2df961, 0xbf7c6055, 0x3dfb94bb, + VLOAD_32(v8, 0xbf44a48d, 0x3f5f2230, 0xbf2df961, 0xbf7c6055, 0x3dfb94bb, 0xbd241caa, 0xbf6e5f02, 0xbf6f6d4e, 0xbe8c9a50, 0x3f241f76, 0x3f1deb81, 0xbedff579, 0x3f347c6f, 0x3ebd8375, 0xbf7b26a6, 0x3f47a9c6); - asm volatile("vfmsac.vv v2, v4, v6"); + asm volatile("vfmsac.vv v8, v16, v24"); // 0.00058579, -0.88537389, 0.27039492, 1.10470641, // -0.40653905, 0.60859829, 0.87812287, 0.11972952, // 0.15891212, -0.61647266, -0.82884133, 0.15438065, // -0.86047715, -0.21272089, 0.80629003, -1.17221546 - VCMP_U32(2, v2, 0x3a198f11, 0xbf62a7dd, 0x3e8a7134, 0x3f8d6705, 0xbed025e3, + VCMP_U32(2, v8, 0x3a198f11, 0xbf62a7dd, 0x3e8a7134, 0x3f8d6705, 0xbed025e3, 0x3f1bcd19, 0x3f60cca9, 0x3df534be, 0x3e22b9dd, 0xbf1dd127, 0xbf542ef3, 0x3e1e15f6, 0xbf5c483b, 0xbe59d381, 0x3f4e6907, 0xbf960b29); #if ELEN == 64 - VSET(16, e64, m2); + VSET(16, e64, m8); // 0.0971325394189311, 0.6403859199401045, 0.3478142243141771, // -0.4702414117546168, 0.8862438155310881, 0.6157878617136987, // -0.9954501284062294, -0.2761157935600853, // -0.7189549700888722, -0.2302799669824283, 0.0093666993515229, // 0.9188774299961215, -0.4297410504980956, 0.2729294776457381, // -0.1419575372981836, -0.8472908703054822 - VLOAD_64(v4, 0x3fb8ddad982e8680, 0x3fe47e0a9cdec59e, 0x3fd6429697a0d4f8, + VLOAD_64(v16, 0x3fb8ddad982e8680, 0x3fe47e0a9cdec59e, 0x3fd6429697a0d4f8, 0xbfde186f6f2d8030, 0x3fec5c1bfd7f9ffe, 0x3fe3b488beeab100, 0xbfefdaba3a49b85e, 0xbfd1abe193cffa54, 0xbfe701adda7a81f6, 0xbfcd79d05f8e86d0, 0x3f832ed91b170d00, 0x3fed6771a3dbb538, @@ -89,7 +89,7 @@ void TEST_CASE1(void) { // 0.6584804564152213, 0.6542532086910551, -0.2215058802905889, // 0.3203723346938081, 0.0696368102348055, 0.8882580549203218, // 0.7725843936650791 - VLOAD_64(v6, 0xbfceef27f9efdac8, 0xbfd1854f968baf44, 0x3fe3786e1cd2fff2, + VLOAD_64(v24, 0xbfceef27f9efdac8, 0xbfd1854f968baf44, 0x3fe3786e1cd2fff2, 0xbfd99a7e695862ec, 0x3fe3271f9ab3593a, 0xbfee97117f34eb4c, 0x3fe008fcb4283a76, 0x3fe27915a4d94fb2, 0xbfdd20c1bc974608, 0x3fe512459b2b7912, 0x3fe4efa46cd43256, 0xbfcc5a4dffdc2170, @@ -101,13 +101,13 @@ void TEST_CASE1(void) { // -0.6132901056934972, 0.1207753636997857, 0.4420874266235846, // 0.8256868703569773, 0.0642541522901756, -0.3012484644971416, // 0.7323810741358745 - VLOAD_64(v2, 0xbfecbecd32eadc10, 0x3fe1dd69cb65674e, 0xbfd98675ea3b69b0, + VLOAD_64(v8, 0xbfecbecd32eadc10, 0x3fe1dd69cb65674e, 0xbfd98675ea3b69b0, 0x3f9b6ebebe00e300, 0x3fea5fb000835cf4, 0x3fd22bc772ca399c, 0xbfde9956d534a0f8, 0xbfbfbf79a29f1810, 0x3fdd5a42d93f2348, 0xbfe3a012925d3f1c, 0x3fbeeb225d40ca30, 0x3fdc4b290fd48cd4, 0x3fea6c06df1d6f14, 0x3fb072f5cab7f020, 0xbfd347a7a3bf1174, 0x3fe76faa6f33ef10); - asm volatile("vfmsac.vv v2, v4, v6"); + asm volatile("vfmsac.vv v8, v16, v24"); // 0.8748168483008159, -0.7335910925744179, 0.6104597169258920, // 0.1613325712615846, -0.2937408044039052, -0.8725789038271781, // -0.0207080522817558, -0.0353809253176660, @@ -115,7 +115,7 @@ void TEST_CASE1(void) { // -0.1146471705942074, -0.6456241806340295, // -0.9633640140188219, -0.0452482140478748, 0.1751535385353780, // -1.3869847774287927 - VCMP_U64(3, v2, 0x3febfe7fe72e2334, 0xbfe7799406e7cf1f, 0x3fe388e2d0f71ba3, + VCMP_U64(3, v8, 0x3febfe7fe72e2334, 0xbfe7799406e7cf1f, 0x3fe388e2d0f71ba3, 0x3fc4a68bb2ac8e62, 0xbfd2cca63b1a97a5, 0xbfebec2a97e3c096, 0xbf95347ddd418906, 0xbfa21d72da487f01, 0xbfc0d260a75ceb46, 0x3fdd8bc273f9289e, 0xbfbd59845847f323, 0xbfe4a8f40aaa8efa, @@ -127,37 +127,37 @@ void TEST_CASE1(void) { // Simple random test with similar values (masked) // The numbers are the same of TEST_CASE1 void TEST_CASE2(void) { - VSET(16, e16, m2); + VSET(16, e16, m8); // 0.9175, 0.0740, -0.0362, 0.2961, 0.6226, -0.8032, 0.7749, // 0.8188, 0.2019, 0.4885, 0.5669, -0.1743, 0.4404, 0.0618, // 0.8252, -0.5947 - VLOAD_16(v4, 0x3b57, 0x2cbc, 0xa8a3, 0x34bd, 0x38fb, 0xba6d, 0x3a33, 0x3a8d, + VLOAD_16(v16, 0x3b57, 0x2cbc, 0xa8a3, 0x34bd, 0x38fb, 0xba6d, 0x3a33, 0x3a8d, 0x3276, 0x37d1, 0x3889, 0xb194, 0x370c, 0x2bea, 0x3a9a, 0xb8c2); // 0.2812, -0.6733, 0.3289, 0.9609, -0.6841, 0.5488, -0.1401, // 0.5591, 0.2759, -0.6973, 0.1418, 0.2389, -0.3308, 0.8867, // 0.8936, 0.3611 - VLOAD_16(v6, 0x3480, 0xb963, 0x3543, 0x3bb0, 0xb979, 0x3864, 0xb07c, 0x3879, + VLOAD_16(v24, 0x3480, 0xb963, 0x3543, 0x3bb0, 0xb979, 0x3864, 0xb07c, 0x3879, 0x346a, 0xb994, 0x308a, 0x33a5, 0xb54b, 0x3b18, 0x3b26, 0x35c7); VLOAD_8(v0, 0xAA, 0xAA); // 0.4780, -0.1810, 0.2800, 0.7529, 0.6484, -0.9526, // 0.5947, 1.0000, -0.1874, 0.2949, -0.4280, 0.6934, 0.5444, // 0.8823, -0.8911, 0.7603 - VLOAD_16(v2, 0x37a6, 0xb1cb, 0x347b, 0x3a06, 0x3930, 0xbb9f, 0x38c2, 0x3c00, + VLOAD_16(v8, 0x37a6, 0xb1cb, 0x347b, 0x3a06, 0x3930, 0xbb9f, 0x38c2, 0x3c00, 0xb1ff, 0x34b8, 0xb6d9, 0x398c, 0x385b, 0x3b0f, 0xbb21, 0x3a15); - asm volatile("vfmsac.vv v2, v4, v6, v0.t"); + asm volatile("vfmsac.vv v8, v16, v24, v0.t"); // 0.4780, 0.1312, 0.2800, -0.4683, 0.6484, 0.5117, 0.5947, // -0.5420, -0.1874, -0.6357, -0.4280, -0.7349, 0.5444, // -0.8276, -0.8911, -0.9751 - VCMP_U16(4, v2, 0x37a6, 0x3033, 0x347b, 0xb77e, 0x3930, 0x3818, 0x38c2, + VCMP_U16(4, v8, 0x37a6, 0x3033, 0x347b, 0xb77e, 0x3930, 0x3818, 0x38c2, 0xb856, 0xb1ff, 0xb916, 0xb6d9, 0xb9e1, 0x385b, 0xba9f, 0xbb21, 0xbbcd); - VSET(16, e32, m2); + VSET(16, e32, m8); // -0.90310860, 0.30282700, 0.54854167, -0.38732994, // 0.92121714, 0.99595129, -0.10263380, 0.83759040, // -0.23468767, 0.03914077, -0.46234205, 0.38326120, // 0.36417511, -0.50103557, 0.36991179, 0.44718841 - VLOAD_32(v4, 0xbf673220, 0x3e9b0c24, 0x3f0c6d3a, 0xbec6501c, 0x3f6bd4e3, + VLOAD_32(v16, 0xbf673220, 0x3e9b0c24, 0x3f0c6d3a, 0xbec6501c, 0x3f6bd4e3, 0x3f7ef6aa, 0xbdd231ab, 0x3f566c53, 0xbe7051f7, 0x3d205212, 0xbeecb819, 0x3ec43ad0, 0x3eba7529, 0xbf0043de, 0x3ebd6514, 0x3ee4f5e1); @@ -165,7 +165,7 @@ void TEST_CASE2(void) { // -0.30795863, 0.57084304, 0.51653886, -0.97366458, // 0.49300706, 0.62932760, 0.45846274, -0.73850167, // -0.42686453, -0.31419462, -0.47245970, -0.87721694 - VLOAD_32(v6, 0x3f5992e8, 0xbd3a1866, 0xbf3ef767, 0xbe9d1e99, 0xbe9dacc1, + VLOAD_32(v24, 0x3f5992e8, 0xbd3a1866, 0xbf3ef767, 0xbe9d1e99, 0xbe9dacc1, 0x3f1222c5, 0x3f043be4, 0xbf794215, 0x3efc6b6c, 0x3f211b9d, 0x3eeabba1, 0xbf3d0e72, 0xbeda8dfd, 0xbea0de1e, 0xbef1e63d, 0xbf60914a); @@ -174,29 +174,29 @@ void TEST_CASE2(void) { // 0.12284227, -0.04006640, -0.93113720, -0.93526161, // -0.27461481, 0.64110506, 0.61687475, -0.43741968, // 0.70502371, 0.37014356, -0.98105848, 0.77993429 - VLOAD_32(v2, 0xbf44a48d, 0x3f5f2230, 0xbf2df961, 0xbf7c6055, 0x3dfb94bb, + VLOAD_32(v8, 0xbf44a48d, 0x3f5f2230, 0xbf2df961, 0xbf7c6055, 0x3dfb94bb, 0xbd241caa, 0xbf6e5f02, 0xbf6f6d4e, 0xbe8c9a50, 0x3f241f76, 0x3f1deb81, 0xbedff579, 0x3f347c6f, 0x3ebd8375, 0xbf7b26a6, 0x3f47a9c6); - asm volatile("vfmsac.vv v2, v4, v6, v0.t"); + asm volatile("vfmsac.vv v8, v16, v24, v0.t"); // -0.76813585, -0.88537389, -0.67958647, 1.10470641, // 0.12284227, 0.60859829, -0.93113720, 0.11972952, // -0.27461481, -0.61647266, 0.61687475, 0.15438065, // 0.70502371, -0.21272089, -0.98105848, -1.17221546 - VCMP_U32(5, v2, 0xbf44a48d, 0xbf62a7dd, 0xbf2df961, 0x3f8d6705, 0x3dfb94bb, + VCMP_U32(5, v8, 0xbf44a48d, 0xbf62a7dd, 0xbf2df961, 0x3f8d6705, 0x3dfb94bb, 0x3f1bcd19, 0xbf6e5f02, 0x3df534be, 0xbe8c9a50, 0xbf1dd127, 0x3f1deb81, 0x3e1e15f6, 0x3f347c6f, 0xbe59d381, 0xbf7b26a6, 0xbf960b29); #if ELEN == 64 - VSET(16, e64, m2); + VSET(16, e64, m8); // 0.0971325394189311, 0.6403859199401045, 0.3478142243141771, // -0.4702414117546168, 0.8862438155310881, 0.6157878617136987, // -0.9954501284062294, -0.2761157935600853, // -0.7189549700888722, -0.2302799669824283, 0.0093666993515229, // 0.9188774299961215, -0.4297410504980956, 0.2729294776457381, // -0.1419575372981836, -0.8472908703054822 - VLOAD_64(v4, 0x3fb8ddad982e8680, 0x3fe47e0a9cdec59e, 0x3fd6429697a0d4f8, + VLOAD_64(v16, 0x3fb8ddad982e8680, 0x3fe47e0a9cdec59e, 0x3fd6429697a0d4f8, 0xbfde186f6f2d8030, 0x3fec5c1bfd7f9ffe, 0x3fe3b488beeab100, 0xbfefdaba3a49b85e, 0xbfd1abe193cffa54, 0xbfe701adda7a81f6, 0xbfcd79d05f8e86d0, 0x3f832ed91b170d00, 0x3fed6771a3dbb538, @@ -208,7 +208,7 @@ void TEST_CASE2(void) { // 0.6584804564152213, 0.6542532086910551, -0.2215058802905889, // 0.3203723346938081, 0.0696368102348055, 0.8882580549203218, // 0.7725843936650791 - VLOAD_64(v6, 0xbfceef27f9efdac8, 0xbfd1854f968baf44, 0x3fe3786e1cd2fff2, + VLOAD_64(v24, 0xbfceef27f9efdac8, 0xbfd1854f968baf44, 0x3fe3786e1cd2fff2, 0xbfd99a7e695862ec, 0x3fe3271f9ab3593a, 0xbfee97117f34eb4c, 0x3fe008fcb4283a76, 0x3fe27915a4d94fb2, 0xbfdd20c1bc974608, 0x3fe512459b2b7912, 0x3fe4efa46cd43256, 0xbfcc5a4dffdc2170, @@ -221,20 +221,20 @@ void TEST_CASE2(void) { // -0.6132901056934972, 0.1207753636997857, 0.4420874266235846, // 0.8256868703569773, 0.0642541522901756, -0.3012484644971416, // 0.7323810741358745 - VLOAD_64(v2, 0xbfecbecd32eadc10, 0x3fe1dd69cb65674e, 0xbfd98675ea3b69b0, + VLOAD_64(v8, 0xbfecbecd32eadc10, 0x3fe1dd69cb65674e, 0xbfd98675ea3b69b0, 0x3f9b6ebebe00e300, 0x3fea5fb000835cf4, 0x3fd22bc772ca399c, 0xbfde9956d534a0f8, 0xbfbfbf79a29f1810, 0x3fdd5a42d93f2348, 0xbfe3a012925d3f1c, 0x3fbeeb225d40ca30, 0x3fdc4b290fd48cd4, 0x3fea6c06df1d6f14, 0x3fb072f5cab7f020, 0xbfd347a7a3bf1174, 0x3fe76faa6f33ef10); - asm volatile("vfmsac.vv v2, v4, v6, v0.t"); + asm volatile("vfmsac.vv v8, v16, v24, v0.t"); // -0.8982912058335177, -0.7335910925744179, -0.3988318240568800, // 0.1613325712615846, 0.8241806039831361, -0.8725789038271781, // -0.4781090814672235, -0.0353809253176660, 0.4586341020154134, // 0.4616552479316257, 0.1207753636997857, -0.6456241806340295, // 0.8256868703569773, -0.0452482140478748, -0.3012484644971416, // -1.3869847774287927 - VCMP_U64(6, v2, 0xbfecbecd32eadc10, 0xbfe7799406e7cf1f, 0xbfd98675ea3b69b0, + VCMP_U64(6, v8, 0xbfecbecd32eadc10, 0xbfe7799406e7cf1f, 0xbfd98675ea3b69b0, 0x3fc4a68bb2ac8e62, 0x3fea5fb000835cf4, 0xbfebec2a97e3c096, 0xbfde9956d534a0f8, 0xbfa21d72da487f01, 0x3fdd5a42d93f2348, 0x3fdd8bc273f9289e, 0x3fbeeb225d40ca30, 0xbfe4a8f40aaa8efa, @@ -245,29 +245,29 @@ void TEST_CASE2(void) { // Simple random test with similar values (vector-scalar) void TEST_CASE3(void) { - VSET(16, e16, m2); + VSET(16, e16, m8); float fscalar_16; // 0.3911 BOX_HALF_IN_FLOAT(fscalar_16, 0x3642); // 0.3203, -0.8521, 0.0744, 0.9370, 0.2056, 0.2866, -0.8379, // -0.2668, -0.0878, -0.5703, 0.1272, -0.6606, -0.6919, 0.9189, // 0.6245, 0.9932 - VLOAD_16(v4, 0x3520, 0xbad1, 0x2cc3, 0x3b7f, 0x3294, 0x3496, 0xbab4, 0xb445, + VLOAD_16(v16, 0x3520, 0xbad1, 0x2cc3, 0x3b7f, 0x3294, 0x3496, 0xbab4, 0xb445, 0xad9f, 0xb890, 0x3012, 0xb949, 0xb989, 0x3b5a, 0x38ff, 0x3bf2); // 0.8706, 0.4900, -0.9497, 0.4727, -0.7168, 0.0167, -0.3606, // -0.1565, -0.5142, 0.8271, -0.4783, 0.6318, 0.0842, // -0.6646, 0.1454, -0.3020 - VLOAD_16(v2, 0x3af7, 0x37d7, 0xbb99, 0x3790, 0xb9bc, 0x2445, 0xb5c5, 0xb102, + VLOAD_16(v8, 0x3af7, 0x37d7, 0xbb99, 0x3790, 0xb9bc, 0x2445, 0xb5c5, 0xb102, 0xb81d, 0x3a9e, 0xb7a7, 0x390e, 0x2d63, 0xb951, 0x30a7, 0xb4d5); - asm volatile("vfmsac.vf v2, %[A], v4" ::[A] "f"(fscalar_16)); + asm volatile("vfmsac.vf v8, %[A], v16" ::[A] "f"(fscalar_16)); // -0.7451, -0.8232, 0.9790, -0.1062, 0.7974, 0.0955, 0.0330, // 0.0521, 0.4797, -1.0498, 0.5278, -0.8901, -0.3547, 1.0234, // 0.0989, 0.6904 - VCMP_U16(7, v2, 0xb9f6, 0xba96, 0x3bd5, 0xaecc, 0x3a61, 0x2e1b, 0x2836, + VCMP_U16(7, v8, 0xb9f6, 0xba96, 0x3bd5, 0xaecc, 0x3a61, 0x2e1b, 0x2836, 0x2aac, 0x37ad, 0xbc33, 0x3839, 0xbb1f, 0xb5ad, 0x3c19, 0x2e54, 0x3986); - VSET(16, e32, m2); + VSET(16, e32, m8); float fscalar_32; // -0.39704049 BOX_FLOAT_IN_FLOAT(fscalar_32, 0xbecb48e4); @@ -275,7 +275,7 @@ void TEST_CASE3(void) { // -0.41457745, -0.69918746, 0.38535324, 0.83301985, // 0.79336989, -0.03326649, -0.85931808, 0.92554229, // -0.77742523, 0.47821125, -0.53653014, -0.32442030 - VLOAD_32(v4, 0x3ee07b86, 0x3e0ca367, 0x3e1fc1b7, 0x3efab4b6, 0xbed4437f, + VLOAD_32(v16, 0x3ee07b86, 0x3e0ca367, 0x3e1fc1b7, 0x3efab4b6, 0xbed4437f, 0xbf32fdf3, 0x3ec54d05, 0x3f5540ca, 0x3f4b1a4a, 0xbd084272, 0xbf5bfc45, 0x3f6cf057, 0xbf470557, 0x3ef4d81b, 0xbf095a0a, 0xbea61a6b); @@ -283,22 +283,22 @@ void TEST_CASE3(void) { // 0.34779423, 0.22721651, 0.47497734, -0.58483958, // -0.24916913, 0.13750601, -0.99799657, 0.66137350, // 0.58565408, 0.68887448, -0.74538875, 0.99311894 - VLOAD_32(v2, 0xbf3b2f7d, 0x3f5f929c, 0xbe272c0c, 0xbe9b02e0, 0x3eb21216, + VLOAD_32(v8, 0xbf3b2f7d, 0x3f5f929c, 0xbe272c0c, 0xbe9b02e0, 0x3eb21216, 0x3e68ab72, 0x3ef3303b, 0xbf15b80c, 0xbe7f2631, 0x3e0cce60, 0xbf7f7cb4, 0x3f294fc6, 0x3f15ed6d, 0x3f305a14, 0xbf3ed1cc, 0x3f7e3d0b); - asm volatile("vfmsac.vf v2, %[A], v4" ::[A] "f"(fscalar_32)); + asm volatile("vfmsac.vf v8, %[A], v16" ::[A] "f"(fscalar_32)); // 0.55711401, -0.92786121, 0.10131072, 0.10834149, // -0.18319020, 0.05038923, -0.62797821, 0.25409698, // -0.06583084, -0.12429786, 1.33918071, -1.02885127, // -0.27698478, -0.87874371, 0.95841295, -0.86431098 - VCMP_U32(8, v2, 0x3f0e9f06, 0xbf6d8850, 0x3dcf7bff, 0x3ddde223, 0xbe3b9636, + VCMP_U32(8, v8, 0x3f0e9f06, 0xbf6d8850, 0x3dcf7bff, 0x3ddde223, 0xbe3b9636, 0x3d4e64ec, 0xbf20c32e, 0x3e821900, 0xbd86d252, 0xbdfe8fe1, 0x3fab6a45, 0xbf83b166, 0xbe8dd0f3, 0xbf60f559, 0x3f755a8d, 0xbf5d437b); #if ELEN == 64 - VSET(16, e64, m2); + VSET(16, e64, m8); double dscalar_64; // 0.0070730785066928 BOX_DOUBLE_IN_DOUBLE(dscalar_64, 0x3f7cf8a90de48000); @@ -308,7 +308,7 @@ void TEST_CASE3(void) { // 0.4892172254017777, 0.9508074316559227, -0.9022151172016701, // -0.7929839752648156, 0.5513143449560454, 0.4823446191982377, // -0.7486658065787619 - VLOAD_64(v4, 0xbfd6abf15f87c3c4, 0x3fe2c1dfc88b26c8, 0x3fe248735bfda932, + VLOAD_64(v16, 0xbfd6abf15f87c3c4, 0x3fe2c1dfc88b26c8, 0x3fe248735bfda932, 0x3fd22fdebc43a768, 0x3fd9be624bf72ff4, 0xbfef5ff39c079aea, 0xbfef0d621514ca4c, 0x3fcd4b43685929d8, 0x3fc71fca543f2eb8, 0x3fdf4f55c3ef6448, 0x3fee6d03b4f830b0, 0xbfecdef23ccad0bc, @@ -321,20 +321,20 @@ void TEST_CASE3(void) { // -0.2107692441818434, 0.1746722346734710, // -0.5298547863982788, 0.2397543330794352, // -0.8347981409736787, -0.6198539479673024 - VLOAD_64(v2, 0xbfe8551415c9d6cc, 0xbfa92b9053839560, 0xbfd7c617af2cedf8, + VLOAD_64(v8, 0xbfe8551415c9d6cc, 0xbfa92b9053839560, 0xbfd7c617af2cedf8, 0xbfe38202ae18c034, 0xbfd20a241ae21e00, 0x3fd693c8d73a46c0, 0x3fbe1efc293b2500, 0xbfca3b9e173fd0c8, 0xbfe62a8ed24449ee, 0x3fe74332cc30c46e, 0xbfcafa7c9161bf78, 0x3fc65ba8e7b88cc0, 0xbfe0f4920666b5a4, 0x3fceb0451dd34270, 0xbfeab6aa9747cb24, 0xbfe3d5d7f25a1d14); - asm volatile("vfmsac.vf v2, %[A], v4" ::[A] "f"(dscalar_64)); + asm volatile("vfmsac.vf v8, %[A], v16" ::[A] "f"(dscalar_64)); // 0.7578799543094064, 0.0533064903276678, 0.3755067691720115, // 0.6116303877234964, 0.2847140709489031, -0.3597049427834890, // -0.1245238936163020, 0.2065631231810536, 0.6939728614971534, // -0.7234926612877562, 0.2174943797906927, // -0.1810536730273635, 0.5242459484866814, // -0.2358548434356951, 0.8382098023325486, 0.6145585759420944 - VCMP_U64(9, v2, 0x3fe8408d7641b126, 0x3fab4afd013e6639, 0x3fd8084d8b414e68, + VCMP_U64(9, v8, 0x3fe8408d7641b126, 0x3fab4afd013e6639, 0x3fd8084d8b414e68, 0x3fe39279e4106415, 0x3fd238c15ddbf0a4, 0xbfd70567e15dbc9b, 0xbfbfe0cc42a710ce, 0x3fca70a9114fa5b7, 0x3fe63506930e2352, 0xbfe726da14e40fb7, 0x3fcbd6db1821e5e6, 0xbfc72cc44a3c91ef, @@ -345,30 +345,30 @@ void TEST_CASE3(void) { // Simple random test with similar values (vector-scalar) (masked) void TEST_CASE4(void) { - VSET(16, e16, m2); + VSET(16, e16, m8); float fscalar_16; // 0.3911 BOX_HALF_IN_FLOAT(fscalar_16, 0x3642); // 0.3203, -0.8521, 0.0744, 0.9370, 0.2056, 0.2866, // -0.8379, -0.2668, -0.0878, -0.5703, 0.1272, -0.6606, // -0.6919, 0.9189, 0.6245, 0.9932 - VLOAD_16(v4, 0x3520, 0xbad1, 0x2cc3, 0x3b7f, 0x3294, 0x3496, 0xbab4, 0xb445, + VLOAD_16(v16, 0x3520, 0xbad1, 0x2cc3, 0x3b7f, 0x3294, 0x3496, 0xbab4, 0xb445, 0xad9f, 0xb890, 0x3012, 0xb949, 0xb989, 0x3b5a, 0x38ff, 0x3bf2); VLOAD_8(v0, 0xAA, 0xAA); // 0.8706, 0.4900, -0.9497, 0.4727, -0.7168, 0.0167, // -0.3606, -0.1565, -0.5142, 0.8271, -0.4783, 0.6318, // 0.0842, -0.6646, 0.1454, -0.3020 - VLOAD_16(v2, 0x3af7, 0x37d7, 0xbb99, 0x3790, 0xb9bc, 0x2445, 0xb5c5, 0xb102, + VLOAD_16(v8, 0x3af7, 0x37d7, 0xbb99, 0x3790, 0xb9bc, 0x2445, 0xb5c5, 0xb102, 0xb81d, 0x3a9e, 0xb7a7, 0x390e, 0x2d63, 0xb951, 0x30a7, 0xb4d5); - asm volatile("vfmsac.vf v2, %[A], v4, v0.t" ::[A] "f"(fscalar_16)); + asm volatile("vfmsac.vf v8, %[A], v16, v0.t" ::[A] "f"(fscalar_16)); // 0.8706, -0.8232, -0.9497, -0.1062, -0.7168, 0.0955, // -0.3606, 0.0521, -0.5142, -1.0498, -0.4783, -0.8901, // 0.0842, 1.0234, 0.1454, 0.6904 - VCMP_U16(10, v2, 0x3af7, 0xba96, 0xbb99, 0xaecc, 0xb9bc, 0x2e1b, 0xb5c5, + VCMP_U16(10, v8, 0x3af7, 0xba96, 0xbb99, 0xaecc, 0xb9bc, 0x2e1b, 0xb5c5, 0x2aac, 0xb81d, 0xbc33, 0xb7a7, 0xbb1f, 0x2d63, 0x3c19, 0x30a7, 0x3986); - VSET(16, e32, m2); + VSET(16, e32, m8); float fscalar_32; // -0.39704049 BOX_FLOAT_IN_FLOAT(fscalar_32, 0xbecb48e4); @@ -376,7 +376,7 @@ void TEST_CASE4(void) { // -0.41457745, -0.69918746, 0.38535324, 0.83301985, // 0.79336989, -0.03326649, -0.85931808, 0.92554229, // -0.77742523, 0.47821125, -0.53653014, -0.32442030 - VLOAD_32(v4, 0x3ee07b86, 0x3e0ca367, 0x3e1fc1b7, 0x3efab4b6, 0xbed4437f, + VLOAD_32(v16, 0x3ee07b86, 0x3e0ca367, 0x3e1fc1b7, 0x3efab4b6, 0xbed4437f, 0xbf32fdf3, 0x3ec54d05, 0x3f5540ca, 0x3f4b1a4a, 0xbd084272, 0xbf5bfc45, 0x3f6cf057, 0xbf470557, 0x3ef4d81b, 0xbf095a0a, 0xbea61a6b); @@ -385,22 +385,22 @@ void TEST_CASE4(void) { // 0.34779423, 0.22721651, 0.47497734, -0.58483958, // -0.24916913, 0.13750601, -0.99799657, 0.66137350, // 0.58565408, 0.68887448, -0.74538875, 0.99311894 - VLOAD_32(v2, 0xbf3b2f7d, 0x3f5f929c, 0xbe272c0c, 0xbe9b02e0, 0x3eb21216, + VLOAD_32(v8, 0xbf3b2f7d, 0x3f5f929c, 0xbe272c0c, 0xbe9b02e0, 0x3eb21216, 0x3e68ab72, 0x3ef3303b, 0xbf15b80c, 0xbe7f2631, 0x3e0cce60, 0xbf7f7cb4, 0x3f294fc6, 0x3f15ed6d, 0x3f305a14, 0xbf3ed1cc, 0x3f7e3d0b); - asm volatile("vfmsac.vf v2, %[A], v4, v0.t" ::[A] "f"(fscalar_32)); + asm volatile("vfmsac.vf v8, %[A], v16, v0.t" ::[A] "f"(fscalar_32)); // -0.73119336, -0.92786121, -0.16325396, 0.10834149, // 0.34779423, 0.05038923, 0.47497734, 0.25409698, // -0.24916913, -0.12429786, -0.99799657, -1.02885127, // 0.58565408, -0.87874371, -0.74538875, -0.86431098 - VCMP_U32(11, v2, 0xbf3b2f7d, 0xbf6d8850, 0xbe272c0c, 0x3ddde223, 0x3eb21216, + VCMP_U32(11, v8, 0xbf3b2f7d, 0xbf6d8850, 0xbe272c0c, 0x3ddde223, 0x3eb21216, 0x3d4e64ec, 0x3ef3303b, 0x3e821900, 0xbe7f2631, 0xbdfe8fe1, 0xbf7f7cb4, 0xbf83b166, 0x3f15ed6d, 0xbf60f559, 0xbf3ed1cc, 0xbf5d437b); #if ELEN == 64 - VSET(16, e64, m2); + VSET(16, e64, m8); double dscalar_64; // 0.0070730785066928 BOX_DOUBLE_IN_DOUBLE(dscalar_64, 0x3f7cf8a90de48000); @@ -411,7 +411,7 @@ void TEST_CASE4(void) { // 0.9508074316559227, -0.9022151172016701, // -0.7929839752648156, 0.5513143449560454, // 0.4823446191982377, -0.7486658065787619 - VLOAD_64(v4, 0xbfd6abf15f87c3c4, 0x3fe2c1dfc88b26c8, 0x3fe248735bfda932, + VLOAD_64(v16, 0xbfd6abf15f87c3c4, 0x3fe2c1dfc88b26c8, 0x3fe248735bfda932, 0x3fd22fdebc43a768, 0x3fd9be624bf72ff4, 0xbfef5ff39c079aea, 0xbfef0d621514ca4c, 0x3fcd4b43685929d8, 0x3fc71fca543f2eb8, 0x3fdf4f55c3ef6448, 0x3fee6d03b4f830b0, 0xbfecdef23ccad0bc, @@ -426,13 +426,13 @@ void TEST_CASE4(void) { // -0.2107692441818434, 0.1746722346734710, // -0.5298547863982788, 0.2397543330794352, // -0.8347981409736787, -0.6198539479673024 - VLOAD_64(v2, 0xbfe8551415c9d6cc, 0xbfa92b9053839560, 0xbfd7c617af2cedf8, + VLOAD_64(v8, 0xbfe8551415c9d6cc, 0xbfa92b9053839560, 0xbfd7c617af2cedf8, 0xbfe38202ae18c034, 0xbfd20a241ae21e00, 0x3fd693c8d73a46c0, 0x3fbe1efc293b2500, 0xbfca3b9e173fd0c8, 0xbfe62a8ed24449ee, 0x3fe74332cc30c46e, 0xbfcafa7c9161bf78, 0x3fc65ba8e7b88cc0, 0xbfe0f4920666b5a4, 0x3fceb0451dd34270, 0xbfeab6aa9747cb24, 0xbfe3d5d7f25a1d14); - asm volatile("vfmsac.vf v2, %[A], v4, v0.t" ::[A] "f"(dscalar_64)); + asm volatile("vfmsac.vf v8, %[A], v16, v0.t" ::[A] "f"(dscalar_64)); // -0.7603855538897846, 0.0533064903276678, // -0.3714656077097227, 0.6116303877234964, // -0.2818689596683441, -0.3597049427834890, 0.1176602936422064, @@ -440,7 +440,7 @@ void TEST_CASE4(void) { // -0.2107692441818434, -0.1810536730273635, // -0.5298547863982788, -0.2358548434356951, // -0.8347981409736787, 0.6145585759420944 - VCMP_U64(12, v2, 0xbfe8551415c9d6cc, 0x3fab4afd013e6639, 0xbfd7c617af2cedf8, + VCMP_U64(12, v8, 0xbfe8551415c9d6cc, 0x3fab4afd013e6639, 0xbfd7c617af2cedf8, 0x3fe39279e4106415, 0xbfd20a241ae21e00, 0xbfd70567e15dbc9b, 0x3fbe1efc293b2500, 0x3fca70a9114fa5b7, 0xbfe62a8ed24449ee, 0xbfe726da14e40fb7, 0xbfcafa7c9161bf78, 0xbfc72cc44a3c91ef, diff --git a/sw/riscvTests/isa/rv64uv/vfmsub.c b/sw/riscvTests/isa/rv64uv/vfmsub.c index 56cf0814..012e726e 100644 --- a/sw/riscvTests/isa/rv64uv/vfmsub.c +++ b/sw/riscvTests/isa/rv64uv/vfmsub.c @@ -10,36 +10,36 @@ // Simple random test with similar values void TEST_CASE1(void) { - VSET(16, e16, m2); + VSET(16, e16, m8); // 0.6821, 0.7749, 0.0299, 0.0299, -0.5410, -0.1865, 0.1885, // 0.0228, -0.5410, 0.1902, -0.9160, -0.3511, -0.9287, -0.9961, // -0.6509, -0.3940 - VLOAD_16(v4, 0x3975, 0x3a33, 0x27a5, 0x27a5, 0xb854, 0xb1f8, 0x3208, 0x25d8, + VLOAD_16(v16, 0x3975, 0x3a33, 0x27a5, 0x27a5, 0xb854, 0xb1f8, 0x3208, 0x25d8, 0xb854, 0x3216, 0xbb54, 0xb59e, 0xbb6e, 0xbbf8, 0xb935, 0xb64e); // -0.9048, 0.1698, 0.2411, 0.2411, -0.6147, 0.9580, 0.5117, // 0.8330, -0.6147, 0.5591, 0.7031, -0.1556, 0.2397, 0.8154, // 0.1646, 0.2693 - VLOAD_16(v6, 0xbb3d, 0x316f, 0x33b7, 0x33b7, 0xb8eb, 0x3baa, 0x3818, 0x3aaa, + VLOAD_16(v24, 0xbb3d, 0x316f, 0x33b7, 0x33b7, 0xb8eb, 0x3baa, 0x3818, 0x3aaa, 0xb8eb, 0x3879, 0x39a0, 0xb0fb, 0x33ac, 0x3a86, 0x3144, 0x344f); // -0.2292, 0.7578, -0.7427, -0.7427, 0.1119, 0.2939, -0.2983, // -0.3608, 0.1119, -0.5562, -0.0629, -0.1968, -0.8638, -0.2700, // -0.7690, 0.4216 - VLOAD_16(v2, 0xb356, 0x3a10, 0xb9f1, 0xb9f1, 0x2f29, 0x34b4, 0xb4c6, 0xb5c6, + VLOAD_16(v8, 0xb356, 0x3a10, 0xb9f1, 0xb9f1, 0x2f29, 0x34b4, 0xb4c6, 0xb5c6, 0x2f29, 0xb873, 0xac06, 0xb24c, 0xbae9, 0xb452, 0xba27, 0x36bf); - asm volatile("vfmsub.vv v2, v4, v6"); + asm volatile("vfmsub.vv v8, v16, v24"); // 0.7485, 0.4175, -0.2632, -0.2632, 0.5542, -1.0127, -0.5679, // -0.8413, 0.5542, -0.6650, -0.6455, 0.2247, 0.5625, // -0.5464, 0.3359, -0.4355 - VCMP_U16(1, v2, 0x39fd, 0x36ae, 0xb436, 0xb436, 0x386f, 0xbc0d, 0xb88b, + VCMP_U16(1, v8, 0x39fd, 0x36ae, 0xb436, 0xb436, 0x386f, 0xbc0d, 0xb88b, 0xbabb, 0x386f, 0xb952, 0xb92a, 0x3331, 0x3880, 0xb85f, 0x3560, 0xb6f8); - VSET(16, e32, m2); + VSET(16, e32, m8); // -0.74553698, -0.16736358, -0.11869104, -0.85860848, // -0.66138542, -0.68386567, -0.45389724, -0.12761629, // -0.95652348, 0.71083277, 0.24187960, 0.01609672, // -0.58867335, -0.55222940, -0.67417240, -0.06725668 - VLOAD_32(v4, 0xbf3edb83, 0xbe2b615c, 0xbdf3144a, 0xbf5bcdc4, 0xbf29508e, + VLOAD_32(v16, 0xbf3edb83, 0xbe2b615c, 0xbdf3144a, 0xbf5bcdc4, 0xbf29508e, 0xbf2f11d2, 0xbee86538, 0xbe02add8, 0xbf74deb9, 0x3f35f923, 0x3e77af49, 0x3c83dd45, 0xbf16b34c, 0xbf0d5ee8, 0xbf2c9690, 0xbd89bddf); @@ -47,7 +47,7 @@ void TEST_CASE1(void) { // 0.49659184, -0.09389434, 0.05757815, -0.13087828, // -0.73042232, -0.79662275, -0.96801740, 0.03017101, // 0.70759267, -0.35606241, 0.18037270, -0.35372722 - VLOAD_32(v6, 0xbe333510, 0xbf50bc3a, 0xbea39d6f, 0x3e6916cc, 0x3efe4149, + VLOAD_32(v24, 0xbe333510, 0xbf50bc3a, 0xbea39d6f, 0x3e6916cc, 0x3efe4149, 0xbdc04bad, 0x3d6bd711, 0xbe0604f5, 0xbf3afcf5, 0xbf4bef78, 0xbf77cffd, 0x3cf72932, 0x3f3524cb, 0xbeb64dd0, 0x3e38b39f, 0xbeb51bbc); @@ -55,29 +55,29 @@ void TEST_CASE1(void) { // 0.90907055, 0.07037155, 0.07339484, 0.17415307, // -0.61978233, -0.04939311, 0.56138068, -0.51601994, // -0.80625385, -0.31227911, 0.91474551, 0.78424871 - VLOAD_32(v2, 0x3f6dc3b9, 0x3e3e2fab, 0xbed7cb1e, 0xbf4afb22, 0x3f68b8d9, + VLOAD_32(v8, 0x3f6dc3b9, 0x3e3e2fab, 0xbed7cb1e, 0xbf4afb22, 0x3f68b8d9, 0x3d901ef6, 0x3d965009, 0x3e32552f, 0xbf1eaa0e, 0xbd4a506e, 0x3f0fb6a5, 0xbf0419e2, 0xbf4e66a7, 0xbe9fe30c, 0x3f6a2cc3, 0x3f48c486); - asm volatile("vfmsub.vv v2, v4, v6"); + asm volatile("vfmsub.vv v8, v16, v24"); // -0.51742357, 0.78428787, 0.36958539, 0.45315993, // -1.09783781, 0.04576965, -0.09089187, // 0.10865352, 1.32325864, 0.76151252, 1.10380387, // -0.03847724, -0.23297250, 0.52851212, -0.79706889, 0.30098125 - VCMP_U32(2, v2, 0xbf0475df, 0x3f48c717, 0x3ebd3a4c, 0x3ee80493, 0xbf8c85f3, + VCMP_U32(2, v8, 0xbf0475df, 0x3f48c717, 0x3ebd3a4c, 0x3ee80493, 0xbf8c85f3, 0x3d3b78f5, 0xbdba2584, 0x3dde85bc, 0x3fa9608a, 0x3f42f27c, 0x3f8d4973, 0xbd1d9a4f, 0xbe6e9058, 0x3f074c92, 0xbf4c0cb5, 0x3e9a1a37); #if ELEN == 64 - VSET(16, e64, m2); + VSET(16, e64, m8); // 0.3304351537536074, -0.7528197595818080, -0.7530937950641439, // -0.7994160811423281, 0.0797802827518117, // -0.6361377214985149, 0.1748070414096887, 0.8251843575618585, // 0.0699629848559165, -0.8195631240215655, -0.4843919596862658, // -0.9206444585342115, 0.9791118581337512, 0.5143481050333210, // 0.5856279779979670, -0.5536419150604011 - VLOAD_64(v4, 0x3fd525d97cb482ac, 0xbfe8171976e5f762, 0xbfe819582893df6e, + VLOAD_64(v16, 0x3fd525d97cb482ac, 0xbfe8171976e5f762, 0xbfe819582893df6e, 0xbfe994d1088ce396, 0x3fb46c7b0948dfc0, 0xbfe45b3d7eb2d188, 0x3fc66013befb8968, 0x3fea67e9069cc438, 0x3fb1e9181be2ff10, 0xbfea39dc71d5c454, 0xbfdf00472253102c, 0xbfed75eb5e14bcf6, @@ -90,7 +90,7 @@ void TEST_CASE1(void) { // -0.2492371358416354, 0.4131695659117063, // -0.9085600854772706, -0.3952216110937696, 0.0817663443229741, // 0.1439804529607418 - VLOAD_64(v6, 0x3fda997577954be0, 0xbfe8faec3ae9f10e, 0xbfec3ef992a7ed86, + VLOAD_64(v24, 0x3fda997577954be0, 0xbfe8faec3ae9f10e, 0xbfec3ef992a7ed86, 0x3fdec970c7c16d8c, 0x3fede18e86a8f206, 0xbfe135863f697cd2, 0xbfc25c6af06e7710, 0x3fbeb257d63cc310, 0xbfd0a5eaf1337874, 0xbfc734de337f3100, 0xbfcfe700a1b1bb78, 0x3fda715ec352c558, @@ -102,20 +102,20 @@ void TEST_CASE1(void) { // 0.1628442001087833, 0.2337303194688813, 0.1926350000139823, // -0.6653994610877216, -0.6745212179353777, 0.8748797125997727, // -0.7324641634418565 - VLOAD_64(v2, 0xbfcbfa50c7635df8, 0xbfeeade562a749c2, 0x3fbf27194abf66e0, + VLOAD_64(v8, 0xbfcbfa50c7635df8, 0xbfeeade562a749c2, 0x3fbf27194abf66e0, 0x3fe87ddfd38d1514, 0xbfe2f86de1af9792, 0x3fed319f612fa6f6, 0x3fc4157886016dd8, 0x3fd06d5afcf59780, 0xbfed29db86ef2934, 0x3fc4d81428e7be98, 0x3fcdeae00719eac8, 0x3fc8a84380900070, 0xbfe54af3cf84bab0, 0xbfe595ad856fb278, 0x3febff03bd3198ce, 0xbfe77058af6f3156); - asm volatile("vfmsub.vv v2, v4, v6"); + asm volatile("vfmsub.vv v8, v16, v24"); // -0.4878417526056305, 1.5023792602532113, 0.7910431172705017, // -1.0928904581998689, -0.9810795946017860, -0.0425694375227023, // 0.1708735055334605, 0.0918950033157782, 0.1963653801662987, // 0.0478398011349183, 0.1360200483560070, -0.5905179111943168, // 0.2570595827304748, 0.0482829008439404, 0.4305876927582732, // 0.2615424092003222 - VCMP_U64(3, v2, 0xbfdf38cc9d4420dd, 0x3ff809bed5cf9e94, 0x3fe95039a7cce2e7, + VCMP_U64(3, v8, 0xbfdf38cc9d4420dd, 0x3ff809bed5cf9e94, 0x3fe95039a7cce2e7, 0xbff17c7ab4814324, 0xbfef650108b2cdb1, 0xbfa5cba94bf030ac, 0x3fc5df2edb027178, 0x3fb7866e51e83656, 0x3fc9228032f0c004, 0x3fa87e755aa4ab1c, 0x3fc1691adda50ab1, 0xbfe2e585d18904b2, @@ -127,37 +127,37 @@ void TEST_CASE1(void) { // Simple random test with similar values (masked) // The numbers are the same of TEST_CASE1 void TEST_CASE2(void) { - VSET(16, e16, m2); + VSET(16, e16, m8); // 0.6821, 0.7749, 0.0299, 0.0299, -0.5410, -0.1865, 0.1885, // 0.0228, 0.7217, 0.1902, -0.9160, -0.3511, -0.9287, -0.9961, // -0.6509, -0.3940 - VLOAD_16(v4, 0x3975, 0x3a33, 0x27a5, 0x27a5, 0xb854, 0xb1f8, 0x3208, 0x25d8, + VLOAD_16(v16, 0x3975, 0x3a33, 0x27a5, 0x27a5, 0xb854, 0xb1f8, 0x3208, 0x25d8, 0x39c6, 0x3216, 0xbb54, 0xb59e, 0xbb6e, 0xbbf8, 0xb935, 0xb64e); // -0.9048, 0.1698, 0.2411, 0.2411, -0.6147, 0.9580, 0.5117, // 0.8330, -0.8584, 0.5591, 0.7031, -0.1556, 0.2397, 0.8154, // 0.1646, 0.2693 - VLOAD_16(v6, 0xbb3d, 0x316f, 0x33b7, 0x33b7, 0xb8eb, 0x3baa, 0x3818, 0x3aaa, + VLOAD_16(v24, 0xbb3d, 0x316f, 0x33b7, 0x33b7, 0xb8eb, 0x3baa, 0x3818, 0x3aaa, 0xbade, 0x3879, 0x39a0, 0xb0fb, 0x33ac, 0x3a86, 0x3144, 0x344f); VLOAD_8(v0, 0xAA, 0xAA); // -0.2292, 0.7578, -0.7427, -0.7427, 0.1119, 0.2939, -0.2983, // -0.3608, 0.3169, -0.5562, -0.0629, -0.1968, -0.8638, -0.2700, // -0.7690, 0.4216 - VLOAD_16(v2, 0xb356, 0x3a10, 0xb9f1, 0xb9f1, 0x2f29, 0x34b4, 0xb4c6, 0xb5c6, + VLOAD_16(v8, 0xb356, 0x3a10, 0xb9f1, 0xb9f1, 0x2f29, 0x34b4, 0xb4c6, 0xb5c6, 0x3512, 0xb873, 0xac06, 0xb24c, 0xbae9, 0xb452, 0xba27, 0x36bf); - asm volatile("vfmsub.vv v2, v4, v6, v0.t"); + asm volatile("vfmsub.vv v8, v16, v24, v0.t"); // -0.2292, 0.4175, -0.7427, -0.2632, 0.1119, -1.0127, -0.2983, // -0.8413, 0.3169, -0.6650, -0.0629, 0.2247, -0.8638, -0.5464, // -0.7690, -0.4355 - VCMP_U16(4, v2, 0xb356, 0x36ae, 0xb9f1, 0xb436, 0x2f29, 0xbc0d, 0xb4c6, + VCMP_U16(4, v8, 0xb356, 0x36ae, 0xb9f1, 0xb436, 0x2f29, 0xbc0d, 0xb4c6, 0xbabb, 0x3512, 0xb952, 0xac06, 0x3331, 0xbae9, 0xb85f, 0xba27, 0xb6f8); - VSET(16, e32, m2); + VSET(16, e32, m8); // -0.74553698, -0.16736358, -0.11869104, -0.85860848, // -0.66138542, -0.68386567, -0.45389724, -0.12761629, // -0.95652348, 0.71083277, 0.24187960, 0.01609672, // -0.58867335, -0.55222940, -0.67417240, -0.06725668 - VLOAD_32(v4, 0xbf3edb83, 0xbe2b615c, 0xbdf3144a, 0xbf5bcdc4, 0xbf29508e, + VLOAD_32(v16, 0xbf3edb83, 0xbe2b615c, 0xbdf3144a, 0xbf5bcdc4, 0xbf29508e, 0xbf2f11d2, 0xbee86538, 0xbe02add8, 0xbf74deb9, 0x3f35f923, 0x3e77af49, 0x3c83dd45, 0xbf16b34c, 0xbf0d5ee8, 0xbf2c9690, 0xbd89bddf); @@ -165,7 +165,7 @@ void TEST_CASE2(void) { // 0.49659184, -0.09389434, 0.05757815, -0.13087828, // -0.73042232, -0.79662275, -0.96801740, 0.03017101, // 0.70759267, -0.35606241, 0.18037270, -0.35372722 - VLOAD_32(v6, 0xbe333510, 0xbf50bc3a, 0xbea39d6f, 0x3e6916cc, 0x3efe4149, + VLOAD_32(v24, 0xbe333510, 0xbf50bc3a, 0xbea39d6f, 0x3e6916cc, 0x3efe4149, 0xbdc04bad, 0x3d6bd711, 0xbe0604f5, 0xbf3afcf5, 0xbf4bef78, 0xbf77cffd, 0x3cf72932, 0x3f3524cb, 0xbeb64dd0, 0x3e38b39f, 0xbeb51bbc); @@ -174,29 +174,29 @@ void TEST_CASE2(void) { // 0.90907055, 0.07037155, 0.07339484, 0.17415307, // -0.61978233, -0.04939311, 0.56138068, -0.51601994, // -0.80625385, -0.31227911, 0.91474551, 0.78424871 - VLOAD_32(v2, 0x3f6dc3b9, 0x3e3e2fab, 0xbed7cb1e, 0xbf4afb22, 0x3f68b8d9, + VLOAD_32(v8, 0x3f6dc3b9, 0x3e3e2fab, 0xbed7cb1e, 0xbf4afb22, 0x3f68b8d9, 0x3d901ef6, 0x3d965009, 0x3e32552f, 0xbf1eaa0e, 0xbd4a506e, 0x3f0fb6a5, 0xbf0419e2, 0xbf4e66a7, 0xbe9fe30c, 0x3f6a2cc3, 0x3f48c486); - asm volatile("vfmsub.vv v2, v4, v6, v0.t"); + asm volatile("vfmsub.vv v8, v16, v24, v0.t"); // 0.92876774, 0.78428787, -0.42147154, 0.45315993, // 0.90907055, 0.04576965, 0.07339484, 0.10865352, // -0.61978233, 0.76151252, 0.56138068, -0.03847724, // -0.80625385, 0.52851212, 0.91474551, 0.30098125 - VCMP_U32(5, v2, 0x3f6dc3b9, 0x3f48c717, 0xbed7cb1e, 0x3ee80493, 0x3f68b8d9, + VCMP_U32(5, v8, 0x3f6dc3b9, 0x3f48c717, 0xbed7cb1e, 0x3ee80493, 0x3f68b8d9, 0x3d3b78f5, 0x3d965009, 0x3dde85bc, 0xbf1eaa0e, 0x3f42f27c, 0x3f0fb6a5, 0xbd1d9a4f, 0xbf4e66a7, 0x3f074c92, 0x3f6a2cc3, 0x3e9a1a37); #if ELEN == 64 - VSET(16, e64, m2); + VSET(16, e64, m8); // 0.3304351537536074, -0.7528197595818080, -0.7530937950641439, // -0.7994160811423281, 0.0797802827518117, // -0.6361377214985149, 0.1748070414096887, 0.8251843575618585, // 0.0699629848559165, -0.8195631240215655, -0.4843919596862658, // -0.9206444585342115, 0.9791118581337512, 0.5143481050333210, // 0.5856279779979670, -0.5536419150604011 - VLOAD_64(v4, 0x3fd525d97cb482ac, 0xbfe8171976e5f762, 0xbfe819582893df6e, + VLOAD_64(v16, 0x3fd525d97cb482ac, 0xbfe8171976e5f762, 0xbfe819582893df6e, 0xbfe994d1088ce396, 0x3fb46c7b0948dfc0, 0xbfe45b3d7eb2d188, 0x3fc66013befb8968, 0x3fea67e9069cc438, 0x3fb1e9181be2ff10, 0xbfea39dc71d5c454, 0xbfdf00472253102c, 0xbfed75eb5e14bcf6, @@ -209,7 +209,7 @@ void TEST_CASE2(void) { // -0.2492371358416354, 0.4131695659117063, // -0.9085600854772706, -0.3952216110937696, 0.0817663443229741, // 0.1439804529607418 - VLOAD_64(v6, 0x3fda997577954be0, 0xbfe8faec3ae9f10e, 0xbfec3ef992a7ed86, + VLOAD_64(v24, 0x3fda997577954be0, 0xbfe8faec3ae9f10e, 0xbfec3ef992a7ed86, 0x3fdec970c7c16d8c, 0x3fede18e86a8f206, 0xbfe135863f697cd2, 0xbfc25c6af06e7710, 0x3fbeb257d63cc310, 0xbfd0a5eaf1337874, 0xbfc734de337f3100, 0xbfcfe700a1b1bb78, 0x3fda715ec352c558, @@ -222,20 +222,20 @@ void TEST_CASE2(void) { // 0.1628442001087833, 0.2337303194688813, 0.1926350000139823, // -0.6653994610877216, -0.6745212179353777, 0.8748797125997727, // -0.7324641634418565 - VLOAD_64(v2, 0xbfcbfa50c7635df8, 0xbfeeade562a749c2, 0x3fbf27194abf66e0, + VLOAD_64(v8, 0xbfcbfa50c7635df8, 0xbfeeade562a749c2, 0x3fbf27194abf66e0, 0x3fe87ddfd38d1514, 0xbfe2f86de1af9792, 0x3fed319f612fa6f6, 0x3fc4157886016dd8, 0x3fd06d5afcf59780, 0xbfed29db86ef2934, 0x3fc4d81428e7be98, 0x3fcdeae00719eac8, 0x3fc8a84380900070, 0xbfe54af3cf84bab0, 0xbfe595ad856fb278, 0x3febff03bd3198ce, 0xbfe77058af6f3156); - asm volatile("vfmsub.vv v2, v4, v6, v0.t"); + asm volatile("vfmsub.vv v8, v16, v24, v0.t"); // -0.2185765241217579, 1.5023792602532113, 0.1216903502931035, // -1.0928904581998689, -0.5928258331230032, -0.0425694375227023, // 0.1569052366565831, 0.0918950033157782, -0.9113595614847214, // 0.0478398011349183, 0.2337303194688813, -0.5905179111943168, // -0.6653994610877216, 0.0482829008439404, 0.8748797125997727, // 0.2615424092003222 - VCMP_U64(6, v2, 0xbfcbfa50c7635df8, 0x3ff809bed5cf9e94, 0x3fbf27194abf66e0, + VCMP_U64(6, v8, 0xbfcbfa50c7635df8, 0x3ff809bed5cf9e94, 0x3fbf27194abf66e0, 0xbff17c7ab4814324, 0xbfe2f86de1af9792, 0xbfa5cba94bf030ac, 0x3fc4157886016dd8, 0x3fb7866e51e83656, 0xbfed29db86ef2934, 0x3fa87e755aa4ab1c, 0x3fcdeae00719eac8, 0xbfe2e585d18904b2, @@ -246,29 +246,29 @@ void TEST_CASE2(void) { // Simple random test with similar values (vector-scalar) void TEST_CASE3(void) { - VSET(16, e16, m2); + VSET(16, e16, m8); float fscalar_16; // 0.1489 BOX_HALF_IN_FLOAT(fscalar_16, 0x30c4); // 0.8530, -0.3298, -0.1814, -0.2385, 0.9946, 0.6553, 0.8711, // -0.6377, 0.4224, -0.1814, 0.7026, 0.2852, 0.0553, 0.7349, // -0.8105, 0.0033 - VLOAD_16(v4, 0x3ad3, 0xb547, 0xb1ce, 0xb3a2, 0x3bf5, 0x393e, 0x3af8, 0xb91a, + VLOAD_16(v16, 0x3ad3, 0xb547, 0xb1ce, 0xb3a2, 0x3bf5, 0x393e, 0x3af8, 0xb91a, 0x36c2, 0xb1ce, 0x399f, 0x3490, 0x2b15, 0x39e1, 0xba7c, 0x1abd); // -0.2338, -0.2512, 0.0069, 0.0613, -0.1733, 0.8560, -0.2766, // -0.0028, -0.1803, 0.0069, 0.7856, -0.0243, -0.1974, 0.6416, // 0.7109, 0.0817 - VLOAD_16(v2, 0xb37b, 0xb405, 0x1f06, 0x2bd8, 0xb18c, 0x3ad9, 0xb46d, 0x99d2, + VLOAD_16(v8, 0xb37b, 0xb405, 0x1f06, 0x2bd8, 0xb18c, 0x3ad9, 0xb46d, 0x99d2, 0xb1c5, 0x1f06, 0x3a49, 0xa639, 0xb251, 0x3922, 0x39b0, 0x2d3a); - asm volatile("vfmsub.vf v2, %[A], v4" ::[A] "f"(fscalar_16)); + asm volatile("vfmsub.vf v8, %[A], v16" ::[A] "f"(fscalar_16)); // -0.8877, 0.2925, 0.1824, 0.2477, -1.0205, -0.5278, -0.9121, // 0.6372, -0.4492, 0.1824, -0.5854, -0.2888, -0.0847, -0.6392, // 0.9165, 0.0089 - VCMP_U16(7, v2, 0xbb1a, 0x34ae, 0x31d6, 0x33ed, 0xbc15, 0xb839, 0xbb4c, + VCMP_U16(7, v8, 0xbb1a, 0x34ae, 0x31d6, 0x33ed, 0xbc15, 0xb839, 0xbb4c, 0x3919, 0xb730, 0x31d6, 0xb8af, 0xb49f, 0xad6c, 0xb91d, 0x3b55, 0x208b); - VSET(16, e32, m2); + VSET(16, e32, m8); float fscalar_32; // -0.12857932 BOX_FLOAT_IN_FLOAT(fscalar_32, 0xbe03aa4c); @@ -276,7 +276,7 @@ void TEST_CASE3(void) { // -0.28880060, 0.46233574, -0.51105869, -0.11776974, // -0.39969075, 0.51141965, 0.88750082, -0.22310242, // 0.60111052, 0.58466393, -0.14306845, -0.01826003 - VLOAD_32(v4, 0x3e9efadd, 0xbe0dc2e3, 0x3e6faaea, 0xbe9c641e, 0xbe93ddac, + VLOAD_32(v16, 0x3e9efadd, 0xbe0dc2e3, 0x3e6faaea, 0xbe9c641e, 0xbe93ddac, 0x3eecb745, 0xbf02d4be, 0xbdf13143, 0xbecca444, 0x3f02ec66, 0x3f633341, 0xbe6474f6, 0x3f19e261, 0x3f15ac89, 0xbe128089, 0xbc95960e); @@ -284,22 +284,22 @@ void TEST_CASE3(void) { // -0.50454420, 0.30827177, -0.25503114, 0.07736996, // 0.20596179, -0.42633566, 0.89622146, 0.03779412, 0.50878429, // 0.67896879, -0.17667305, 0.06984760 - VLOAD_32(v2, 0xbf049506, 0x3f45f5fd, 0xbf6263ee, 0x3ed13af8, 0xbf0129cf, + VLOAD_32(v8, 0xbf049506, 0x3f45f5fd, 0xbf6263ee, 0x3ed13af8, 0xbf0129cf, 0x3e9dd5cc, 0xbe829371, 0x3d9e7424, 0x3e52e7a6, 0xbeda48ab, 0x3f656ec5, 0x3d1ace01, 0x3f023fb0, 0x3f2dd0e6, 0xbe34e9c8, 0x3d8f0c42); - asm volatile("vfmsub.vf v2, %[A], v4" ::[A] "f"(fscalar_32)); + asm volatile("vfmsub.vf v8, %[A], v16" ::[A] "f"(fscalar_32)); // -0.24391660, 0.03901032, -0.12034293, 0.25290701, // 0.35367453, -0.50197309, 0.54385042, 0.10782156, 0.37320831, // -0.45660171, -1.00273633, 0.21824288, -0.66652966, // -0.67196524, 0.16578496, 0.00927907 - VCMP_U32(8, v2, 0xbe79c546, 0x3d1fc94a, 0xbdf6765b, 0x3e817d07, 0x3eb514d5, + VCMP_U32(8, v8, 0xbe79c546, 0x3d1fc94a, 0xbdf6765b, 0x3e817d07, 0x3eb514d5, 0xbf00814f, 0x3f0b39c8, 0x3ddcd18d, 0x3ebf1529, 0xbee9c7b3, 0xbf8059aa, 0x3e5f7b10, 0xbf2aa1b0, 0xbf2c05eb, 0x3e29c388, 0x3c18073f); #if ELEN == 64 - VSET(16, e64, m2); + VSET(16, e64, m8); double dscalar_64; // -0.6953502965951812 BOX_DOUBLE_IN_DOUBLE(dscalar_64, 0xbfe6404f43e47c8c); @@ -309,7 +309,7 @@ void TEST_CASE3(void) { // -0.4464070291333477, 0.5599998966835931, // -0.3406088963725078, 0.4908382567748615, // -0.5194254665571632, 0.0909287222245825, 0.5893410930389467 - VLOAD_64(v4, 0xbfec64e92b21453c, 0x3fd90b76663c74f4, 0x3fec9ccf06e3d51a, + VLOAD_64(v16, 0xbfec64e92b21453c, 0x3fd90b76663c74f4, 0x3fec9ccf06e3d51a, 0x3fd0d574d8567864, 0xbfe9e8cae6c6325c, 0x3fe4a36b411b6206, 0x3fead4815153e1da, 0xbfefdc02add2c126, 0xbfec990204389c42, 0xbfdc91eec9b5438c, 0x3fe1eb84e7409f04, 0xbfd5cc8941a96178, @@ -321,20 +321,20 @@ void TEST_CASE3(void) { // 0.3780444269462682, -0.6731508364205383, 0.1263808806166760, // 0.8571806635726140, 0.5149747658358419, 0.3530123248386567, // -0.3756405874818076, -0.4529815298587780 - VLOAD_64(v2, 0xbfdc153dde8f3078, 0xbfcf0c99c409ad98, 0xbfec824d0777279c, + VLOAD_64(v8, 0xbfdc153dde8f3078, 0xbfcf0c99c409ad98, 0xbfec824d0777279c, 0xbfe97a21412fca1a, 0x3fa21bf19e277c80, 0x3fd32781ab407ee0, 0xbfdacb94deb0b06c, 0xbfe159aae0fd4b9a, 0x3fd831e1408ad588, 0xbfe58a739f7670b4, 0x3fc02d3faa8b4d88, 0x3feb6e062499dac6, 0x3fe07aac5c30f764, 0x3fd697c1019115dc, 0xbfd80a7ed19236bc, 0xbfdcfda63e1bdf38); - asm volatile("vfmsub.vf v2, %[A], v4" ::[A] "f"(dscalar_64)); + asm volatile("vfmsub.vf v8, %[A], v16" ::[A] "f"(dscalar_64)); // 1.1924354834767115, -0.2226520744998102, -0.2746500252428773, // 0.2905806577161405, 0.7850729097860438, -0.8530574119257405, // -0.5473143461852141, 1.3726227142641205, 0.6308044149460488, // 0.9144826628916634, -0.6478788795043589, -0.2554319322783636, // -0.8489261129378481, 0.2739582417788488, 0.1702730716940806, // -0.2743602518995064 - VCMP_U64(9, v2, 0x3ff314373ac1f573, 0xbfcc7fdcf92e7eaa, 0xbfd193ddb310e0ff, + VCMP_U64(9, v8, 0x3ff314373ac1f573, 0xbfcc7fdcf92e7eaa, 0xbfd193ddb310e0ff, 0x3fd298df9d6f6c70, 0x3fe91f5139103634, 0xbfeb4c3f0eba9b49, 0xbfe18399602fe862, 0x3ff5f6433c382dac, 0x3fe42f8cbd8bb3c3, 0x3fed4371253c1e34, 0xbfe4bb6c7ce7b0ea, 0xbfd058ff2cdf5691, @@ -345,30 +345,30 @@ void TEST_CASE3(void) { // Simple random test with similar values (vector-scalar) (masked) void TEST_CASE4(void) { - VSET(16, e16, m2); + VSET(16, e16, m8); float fscalar_16; // 0.1489 BOX_HALF_IN_FLOAT(fscalar_16, 0x30c4); // 0.8530, -0.3298, -0.1814, -0.2385, 0.9946, 0.6553, 0.8711, // -0.6377, 0.4224, -0.1814, 0.7026, 0.2852, 0.0553, // 0.7349, -0.8105, 0.0033 - VLOAD_16(v4, 0x3ad3, 0xb547, 0xb1ce, 0xb3a2, 0x3bf5, 0x393e, 0x3af8, 0xb91a, + VLOAD_16(v16, 0x3ad3, 0xb547, 0xb1ce, 0xb3a2, 0x3bf5, 0x393e, 0x3af8, 0xb91a, 0x36c2, 0xb1ce, 0x399f, 0x3490, 0x2b15, 0x39e1, 0xba7c, 0x1abd); VLOAD_8(v0, 0xAA, 0xAA); // -0.2338, -0.2512, 0.0069, 0.0613, -0.1733, 0.8560, // -0.2766, -0.0028, -0.1803, 0.0069, 0.7856, -0.0243, // -0.1974, 0.6416, 0.7109, 0.0817 - VLOAD_16(v2, 0xb37b, 0xb405, 0x1f06, 0x2bd8, 0xb18c, 0x3ad9, 0xb46d, 0x99d2, + VLOAD_16(v8, 0xb37b, 0xb405, 0x1f06, 0x2bd8, 0xb18c, 0x3ad9, 0xb46d, 0x99d2, 0xb1c5, 0x1f06, 0x3a49, 0xa639, 0xb251, 0x3922, 0x39b0, 0x2d3a); - asm volatile("vfmsub.vf v2, %[A], v4, v0.t" ::[A] "f"(fscalar_16)); + asm volatile("vfmsub.vf v8, %[A], v16, v0.t" ::[A] "f"(fscalar_16)); // -0.2338, 0.2925, 0.0069, 0.2477, -0.1733, -0.5278, // -0.2766, 0.6372, -0.1803, 0.1824, 0.7856, -0.2888, // -0.1974, -0.6392, 0.7109, 0.0089 - VCMP_U16(10, v2, 0xb37b, 0x34ae, 0x1f06, 0x33ed, 0xb18c, 0xb839, 0xb46d, + VCMP_U16(10, v8, 0xb37b, 0x34ae, 0x1f06, 0x33ed, 0xb18c, 0xb839, 0xb46d, 0x3919, 0xb1c5, 0x31d6, 0x3a49, 0xb49f, 0xb251, 0xb91d, 0x39b0, 0x208b); - VSET(16, e32, m2); + VSET(16, e32, m8); float fscalar_32; // -0.12857932 BOX_FLOAT_IN_FLOAT(fscalar_32, 0xbe03aa4c); @@ -376,7 +376,7 @@ void TEST_CASE4(void) { // -0.28880060, 0.46233574, -0.51105869, -0.11776974, // -0.39969075, 0.51141965, 0.88750082, -0.22310242, // 0.60111052, 0.58466393, -0.14306845, -0.01826003 - VLOAD_32(v4, 0x3e9efadd, 0xbe0dc2e3, 0x3e6faaea, 0xbe9c641e, 0xbe93ddac, + VLOAD_32(v16, 0x3e9efadd, 0xbe0dc2e3, 0x3e6faaea, 0xbe9c641e, 0xbe93ddac, 0x3eecb745, 0xbf02d4be, 0xbdf13143, 0xbecca444, 0x3f02ec66, 0x3f633341, 0xbe6474f6, 0x3f19e261, 0x3f15ac89, 0xbe128089, 0xbc95960e); @@ -385,22 +385,22 @@ void TEST_CASE4(void) { // -0.50454420, 0.30827177, -0.25503114, 0.07736996, // 0.20596179, -0.42633566, 0.89622146, 0.03779412, // 0.50878429, 0.67896879, -0.17667305, 0.06984760 - VLOAD_32(v2, 0xbf049506, 0x3f45f5fd, 0xbf6263ee, 0x3ed13af8, 0xbf0129cf, + VLOAD_32(v8, 0xbf049506, 0x3f45f5fd, 0xbf6263ee, 0x3ed13af8, 0xbf0129cf, 0x3e9dd5cc, 0xbe829371, 0x3d9e7424, 0x3e52e7a6, 0xbeda48ab, 0x3f656ec5, 0x3d1ace01, 0x3f023fb0, 0x3f2dd0e6, 0xbe34e9c8, 0x3d8f0c42); - asm volatile("vfmsub.vf v2, %[A], v4, v0.t" ::[A] "f"(fscalar_32)); + asm volatile("vfmsub.vf v8, %[A], v16, v0.t" ::[A] "f"(fscalar_32)); // -0.51789892, 0.03901032, -0.88433731, 0.25290701, // -0.50454420, -0.50197309, -0.25503114, 0.10782156, // 0.20596179, -0.45660171, 0.89622146, 0.21824288, // 0.50878429, -0.67196524, -0.17667305, 0.00927907 - VCMP_U32(11, v2, 0xbf049506, 0x3d1fc94a, 0xbf6263ee, 0x3e817d07, 0xbf0129cf, + VCMP_U32(11, v8, 0xbf049506, 0x3d1fc94a, 0xbf6263ee, 0x3e817d07, 0xbf0129cf, 0xbf00814f, 0xbe829371, 0x3ddcd18d, 0x3e52e7a6, 0xbee9c7b3, 0x3f656ec5, 0x3e5f7b10, 0x3f023fb0, 0xbf2c05eb, 0xbe34e9c8, 0x3c18073f); #if ELEN == 64 - VSET(16, e64, m2); + VSET(16, e64, m8); double dscalar_64; // -0.6953502965951812 BOX_DOUBLE_IN_DOUBLE(dscalar_64, 0xbfe6404f43e47c8c); @@ -411,7 +411,7 @@ void TEST_CASE4(void) { // -0.8936777193492917, -0.4464070291333477, // 0.5599998966835931, -0.3406088963725078, 0.4908382567748615, // -0.5194254665571632, 0.0909287222245825, 0.5893410930389467 - VLOAD_64(v4, 0xbfec64e92b21453c, 0x3fd90b76663c74f4, 0x3fec9ccf06e3d51a, + VLOAD_64(v16, 0xbfec64e92b21453c, 0x3fd90b76663c74f4, 0x3fec9ccf06e3d51a, 0x3fd0d574d8567864, 0xbfe9e8cae6c6325c, 0x3fe4a36b411b6206, 0x3fead4815153e1da, 0xbfefdc02add2c126, 0xbfec990204389c42, 0xbfdc91eec9b5438c, 0x3fe1eb84e7409f04, 0xbfd5cc8941a96178, @@ -425,13 +425,13 @@ void TEST_CASE4(void) { // 0.3780444269462682, -0.6731508364205383, 0.1263808806166760, // 0.8571806635726140, 0.5149747658358419, 0.3530123248386567, // -0.3756405874818076, -0.4529815298587780 - VLOAD_64(v2, 0xbfdc153dde8f3078, 0xbfcf0c99c409ad98, 0xbfec824d0777279c, + VLOAD_64(v8, 0xbfdc153dde8f3078, 0xbfcf0c99c409ad98, 0xbfec824d0777279c, 0xbfe97a21412fca1a, 0x3fa21bf19e277c80, 0x3fd32781ab407ee0, 0xbfdacb94deb0b06c, 0xbfe159aae0fd4b9a, 0x3fd831e1408ad588, 0xbfe58a739f7670b4, 0x3fc02d3faa8b4d88, 0x3feb6e062499dac6, 0x3fe07aac5c30f764, 0x3fd697c1019115dc, 0xbfd80a7ed19236bc, 0xbfdcfda63e1bdf38); - asm volatile("vfmsub.vf v2, %[A], v4, v0.t" ::[A] "f"(dscalar_64)); + asm volatile("vfmsub.vf v8, %[A], v16, v0.t" ::[A] "f"(dscalar_64)); // -0.4387964890891065, -0.2226520744998102, // -0.8909058709916624, 0.2905806577161405, 0.0353694444236163, // -0.8530574119257405, @@ -439,7 +439,7 @@ void TEST_CASE4(void) { // 0.9144826628916634, 0.1263808806166760, -0.2554319322783636, // 0.5149747658358419, 0.2739582417788488, -0.3756405874818076, // -0.2743602518995064 - VCMP_U64(12, v2, 0xbfdc153dde8f3078, 0xbfcc7fdcf92e7eaa, 0xbfec824d0777279c, + VCMP_U64(12, v8, 0xbfdc153dde8f3078, 0xbfcc7fdcf92e7eaa, 0xbfec824d0777279c, 0x3fd298df9d6f6c70, 0x3fa21bf19e277c80, 0xbfeb4c3f0eba9b49, 0xbfdacb94deb0b06c, 0x3ff5f6433c382dac, 0x3fd831e1408ad588, 0x3fed4371253c1e34, 0x3fc02d3faa8b4d88, 0xbfd058ff2cdf5691, diff --git a/sw/riscvTests/isa/rv64uv/vfmul.c b/sw/riscvTests/isa/rv64uv/vfmul.c index 62335603..d24c1989 100644 --- a/sw/riscvTests/isa/rv64uv/vfmul.c +++ b/sw/riscvTests/isa/rv64uv/vfmul.c @@ -10,31 +10,31 @@ // Simple random test with similar values + 1 subnormal void TEST_CASE1(void) { - VSET(16, e16, m2); + VSET(16, e16, m8); // -0.5522, 0.0462, -0.4255, 0.4131, 0.4658, 0.3931, -0.4868, // 0.5503, 0.3516, -0.3025, -0.2155, 0.9307, 0.9775, 0.8394, // 0.7446, 0.3909 - VLOAD_16(v4, 0xb86b, 0x29e9, 0xb6cf, 0x369c, 0x3774, 0x364a, 0xb7ca, 0x3867, + VLOAD_16(v16, 0xb86b, 0x29e9, 0xb6cf, 0x369c, 0x3774, 0x364a, 0xb7ca, 0x3867, 0x35a0, 0xb4d7, 0xb2e5, 0x3b72, 0x3bd2, 0x3ab7, 0x39f5, 0x3641); // 0.8247, 0.4902, 0.5796, -0.9561, -0.7676, 0.1672, -0.1094, // -0.9395, 0.4885, -0.2739, 0.8691, -0.3394, -0.8032, // -0.4922, 0.4456, 0.2050 - VLOAD_16(v6, 0x3a99, 0x37d8, 0x38a3, 0xbba6, 0xba24, 0x315a, 0xaf01, 0xbb84, + VLOAD_16(v24, 0x3a99, 0x37d8, 0x38a3, 0xbba6, 0xba24, 0x315a, 0xaf01, 0xbb84, 0x37d1, 0xb462, 0x3af4, 0xb56e, 0xba6d, 0xb7e0, 0x3721, 0x328f); - asm volatile("vfmul.vv v2, v4, v6"); + asm volatile("vfmul.vv v8, v16, v24"); // -0.4553, 0.0226, -0.2466, -0.3950, -0.3577, 0.0657, 0.0533, // -0.5171, 0.1718, 0.0829, -0.1873, -0.3159, -0.7852, -0.4131, // 0.3318, 0.0801 - VCMP_U16(1, v2, 0xb749, 0x25cb, 0xb3e4, 0xb652, 0xb5b9, 0x2c35, 0x2ad2, + VCMP_U16(1, v8, 0xb749, 0x25cb, 0xb3e4, 0xb652, 0xb5b9, 0x2c35, 0x2ad2, 0xb823, 0x317f, 0x2d4e, 0xb1fe, 0xb50e, 0xba48, 0xb69c, 0x354f, 0x2d21); - VSET(16, e32, m2); + VSET(16, e32, m8); // 0.48805356, 0.30350628, -0.10483003, 0.61108905, // -0.09161828, 0.83353645, -0.55006021, -0.78635991, // 0.49253011, -0.03583150, -0.77662903, 0.57397723, // -0.54674339, 0.86299890, 0.65402901, -0.16832402 - VLOAD_32(v4, 0x3ef9e228, 0x3e9b652d, 0xbdd6b121, 0x3f1c7055, 0xbdbba25d, + VLOAD_32(v16, 0x3ef9e228, 0x3e9b652d, 0xbdd6b121, 0x3f1c7055, 0xbdbba25d, 0x3f5562a5, 0xbf0cd0bf, 0xbf494ee2, 0x3efc2ce8, 0xbd12c40e, 0xbf46d129, 0x3f12f02c, 0xbf0bf760, 0x3f5ced7f, 0x3f276e72, 0xbe2c5d22); @@ -42,29 +42,29 @@ void TEST_CASE1(void) { // -0.99424285, 0.31885657, 0.18092929, -0.68290263, // 0.45391774, -0.45151946, -0.08929581, 0.80524033, // 0.81978256, -0.28325567, -0.53026456, -0.21847765 - VLOAD_32(v6, 0x3f5f15cf, 0xbea7b67f, 0x3f445e7a, 0xbf0b50f0, 0xbf7e86b3, + VLOAD_32(v24, 0x3f5f15cf, 0xbea7b67f, 0x3f445e7a, 0xbf0b50f0, 0xbf7e86b3, 0x3ea3412b, 0x3e394587, 0xbf2ed2b5, 0x3ee867e8, 0xbee72d8f, 0xbdb6e0b9, 0x3f4e243b, 0x3f51dd45, 0xbe9106e3, 0xbf07bf6b, 0xbe5fb89b); - asm volatile("vfmul.vv v2, v4, v6"); + asm volatile("vfmul.vv v8, v16, v24"); // 0.42530280, -0.09941780, -0.08041162, -0.33255696, // 0.09109081, 0.26577857, -0.09952200, 0.53700727, // 0.22356816, 0.01617862, 0.06934972, 0.46218961, // -0.44821069, -0.24444933, -0.34680840, 0.03677504 - VCMP_U32(2, v2, 0x3ed9c14a, 0xbdcb9b8f, 0xbda4aed9, 0xbeaa44e8, 0x3dba8dd2, + VCMP_U32(2, v8, 0x3ed9c14a, 0xbdcb9b8f, 0xbda4aed9, 0xbeaa44e8, 0x3dba8dd2, 0x3e881421, 0xbdcbd231, 0x3f09794f, 0x3e64ef0d, 0x3c848907, 0x3d8e073a, 0x3eeca41e, 0xbee57bdf, 0xbe7a50ed, 0xbeb190df, 0x3d16a16c); #if ELEN == 64 - VSET(16, e64, m2); + VSET(16, e64, m8); // -0.7493892241714462, 0.7026559207451004, 0.6475697152132245, // 0.0771197585157644, -0.2238692303359540, 0.8998213782649329, // -0.9446193329247832, 0.8596730101791072, -0.0254417293392082, // 0.1965035124326171, -0.4709662077579637, -0.2875069600640039, // -0.4671574223295827, 0.3105385724706418, 0.1703390668980564, // 0.1487690137320270 - VLOAD_64(v4, 0xbfe7faff1c39514c, 0x3fe67c2844fe1c76, 0x3fe4b8e41f971110, + VLOAD_64(v16, 0xbfe7faff1c39514c, 0x3fe67c2844fe1c76, 0x3fe4b8e41f971110, 0x3fb3be1ed8b35c30, 0xbfcca7bf376fd290, 0x3feccb5633fc770c, 0xbfee3a5252c299d8, 0x3feb8270f8ff23f8, 0xbf9a0d658ddcc1c0, 0x3fc92706efb93e80, 0xbfde244f72f5dcb4, 0xbfd2668397b639c0, @@ -76,20 +76,20 @@ void TEST_CASE1(void) { // 0.6297663200296084, 0.3637508978200528, 0.6003193921430929, // -0.9089688764960682, 0.1595578103621622, 0.2113473996516566, // -0.4586515678904381 - VLOAD_64(v6, 0x3fe62f4b848d2362, 0xbfd1334ac4aee374, 0x3fd02f3bdcc85930, + VLOAD_64(v24, 0x3fe62f4b848d2362, 0xbfd1334ac4aee374, 0x3fd02f3bdcc85930, 0x3fe751b7e126b540, 0xbfe22f283c572a1e, 0x3f82dddde857f980, 0xbfe06c8ede5db9be, 0xbf923ab26578ce40, 0x3fb4ea36e2cf6110, 0x3fe4270bb294c832, 0x3fd747b1d881c6e4, 0x3fe335d1038d1808, 0xbfed1645e5b43d3e, 0x3fc46c63eca9d670, 0x3fcb0d6e7ccc9be0, 0xbfdd5a8c1b164ebc); - asm volatile("vfmul.vv v2, v4, v6"); + asm volatile("vfmul.vv v8, v16, v24"); // -0.5195315511948315, -0.1888427270075288, 0.1637593235041994, // 0.0561991250128884, 0.1272151431765869, 0.0082893703931556, // 0.4848274962501199, -0.0153039296644220, -0.0020785594718451, // 0.1237512938975817, -0.1713143809148648, -0.1725960035025313, // 0.4246315573217200, 0.0495488546564072, 0.0360007188479938, // -0.0682331414017083 - VCMP_U64(3, v2, 0xbfe0a000a1b3e706, 0xbfc82bff9c4ada77, 0x3fc4f610c56ecca8, + VCMP_U64(3, v8, 0xbfe0a000a1b3e706, 0xbfc82bff9c4ada77, 0x3fc4f610c56ecca8, 0x3facc621b7fd0401, 0x3fc04895f7bfec49, 0x3f80fa0475f1bbe1, 0x3fdf0769e826220a, 0xbf8f57aaab459580, 0xbf61070e1e8a29ae, 0x3fbfae2a3020b759, 0xbfc5eda12fae9203, 0xbfc617a0373b59a7, @@ -101,32 +101,32 @@ void TEST_CASE1(void) { // Simple random test with similar values + 1 subnormal (masked) // The numbers are the same of TEST_CASE1 void TEST_CASE2(void) { - VSET(16, e16, m2); + VSET(16, e16, m8); // -0.5522, 0.0462, -0.4255, 0.4131, 0.4658, 0.3931, -0.4868, // 0.5503, 0.3516, -0.3025, -0.2155, 0.9307, 0.9775, 0.8394, // 0.7446, 0.3909 - VLOAD_16(v4, 0xb86b, 0x29e9, 0xb6cf, 0x369c, 0x3774, 0x364a, 0xb7ca, 0x3867, + VLOAD_16(v16, 0xb86b, 0x29e9, 0xb6cf, 0x369c, 0x3774, 0x364a, 0xb7ca, 0x3867, 0x35a0, 0xb4d7, 0xb2e5, 0x3b72, 0x3bd2, 0x3ab7, 0x39f5, 0x3641); // 0.8247, 0.4902, 0.5796, -0.9561, -0.7676, 0.1672, -0.1094, // -0.9395, 0.4885, -0.2739, 0.8691, -0.3394, -0.8032, // -0.4922, 0.4456, 0.2050 - VLOAD_16(v6, 0x3a99, 0x37d8, 0x38a3, 0xbba6, 0xba24, 0x315a, 0xaf01, 0xbb84, + VLOAD_16(v24, 0x3a99, 0x37d8, 0x38a3, 0xbba6, 0xba24, 0x315a, 0xaf01, 0xbb84, 0x37d1, 0xb462, 0x3af4, 0xb56e, 0xba6d, 0xb7e0, 0x3721, 0x328f); VLOAD_8(v0, 0xAA, 0xAA); - VCLEAR(v2); - asm volatile("vfmul.vv v2, v4, v6, v0.t"); + VCLEAR(v8); + asm volatile("vfmul.vv v8, v16, v24, v0.t"); // 0.0000, 0.0226, 0.0000, -0.3950, 0.0000, 0.0657, 0.0000, // -0.5171, 0.0000, 0.0829, 0.0000, -0.3159, 0.0000, // -0.4131, 0.0000, 0.0801 - VCMP_U16(4, v2, 0x0, 0x25cb, 0x0, 0xb652, 0x0, 0x2c35, 0x0, 0xb823, 0x0, + VCMP_U16(4, v8, 0x0, 0x25cb, 0x0, 0xb652, 0x0, 0x2c35, 0x0, 0xb823, 0x0, 0x2d4e, 0x0, 0xb50e, 0x0, 0xb69c, 0x0, 0x2d21); - VSET(16, e32, m2); + VSET(16, e32, m8); // 0.48805356, 0.30350628, -0.10483003, 0.61108905, // -0.09161828, 0.83353645, -0.55006021, -0.78635991, // 0.49253011, -0.03583150, -0.77662903, 0.57397723, // -0.54674339, 0.86299890, 0.65402901, -0.16832402 - VLOAD_32(v4, 0x3ef9e228, 0x3e9b652d, 0xbdd6b121, 0x3f1c7055, 0xbdbba25d, + VLOAD_32(v16, 0x3ef9e228, 0x3e9b652d, 0xbdd6b121, 0x3f1c7055, 0xbdbba25d, 0x3f5562a5, 0xbf0cd0bf, 0xbf494ee2, 0x3efc2ce8, 0xbd12c40e, 0xbf46d129, 0x3f12f02c, 0xbf0bf760, 0x3f5ced7f, 0x3f276e72, 0xbe2c5d22); @@ -134,30 +134,30 @@ void TEST_CASE2(void) { // -0.99424285, 0.31885657, 0.18092929, -0.68290263, // 0.45391774, -0.45151946, -0.08929581, 0.80524033, // 0.81978256, -0.28325567, -0.53026456, -0.21847765 - VLOAD_32(v6, 0x3f5f15cf, 0xbea7b67f, 0x3f445e7a, 0xbf0b50f0, 0xbf7e86b3, + VLOAD_32(v24, 0x3f5f15cf, 0xbea7b67f, 0x3f445e7a, 0xbf0b50f0, 0xbf7e86b3, 0x3ea3412b, 0x3e394587, 0xbf2ed2b5, 0x3ee867e8, 0xbee72d8f, 0xbdb6e0b9, 0x3f4e243b, 0x3f51dd45, 0xbe9106e3, 0xbf07bf6b, 0xbe5fb89b); VLOAD_8(v0, 0xAA, 0xAA); - VCLEAR(v2); - asm volatile("vfmul.vv v2, v4, v6, v0.t"); + VCLEAR(v8); + asm volatile("vfmul.vv v8, v16, v24, v0.t"); // 0.00000000, -0.09941780, 0.00000000, -0.33255696, // 0.00000000, 0.26577857, 0.00000000, 0.53700727, // 0.00000000, 0.01617862, 0.00000000, 0.46218961, // 0.00000000, -0.24444933, 0.00000000, 0.03677504 - VCMP_U32(5, v2, 0x0, 0xbdcb9b8f, 0x0, 0xbeaa44e8, 0x0, 0x3e881421, 0x0, + VCMP_U32(5, v8, 0x0, 0xbdcb9b8f, 0x0, 0xbeaa44e8, 0x0, 0x3e881421, 0x0, 0x3f09794f, 0x0, 0x3c848907, 0x0, 0x3eeca41e, 0x0, 0xbe7a50ed, 0x0, 0x3d16a16c); #if ELEN == 64 - VSET(16, e64, m2); + VSET(16, e64, m8); // -0.7493892241714462, 0.7026559207451004, 0.6475697152132245, // 0.0771197585157644, -0.2238692303359540, 0.8998213782649329, // -0.9446193329247832, 0.8596730101791072, -0.0254417293392082, // 0.1965035124326171, -0.4709662077579637, -0.2875069600640039, // -0.4671574223295827, 0.3105385724706418, 0.1703390668980564, // 0.1487690137320270 - VLOAD_64(v4, 0xbfe7faff1c39514c, 0x3fe67c2844fe1c76, 0x3fe4b8e41f971110, + VLOAD_64(v16, 0xbfe7faff1c39514c, 0x3fe67c2844fe1c76, 0x3fe4b8e41f971110, 0x3fb3be1ed8b35c30, 0xbfcca7bf376fd290, 0x3feccb5633fc770c, 0xbfee3a5252c299d8, 0x3feb8270f8ff23f8, 0xbf9a0d658ddcc1c0, 0x3fc92706efb93e80, 0xbfde244f72f5dcb4, 0xbfd2668397b639c0, @@ -169,22 +169,22 @@ void TEST_CASE2(void) { // 0.6297663200296084, 0.3637508978200528, 0.6003193921430929, // -0.9089688764960682, 0.1595578103621622, 0.2113473996516566, // -0.4586515678904381 - VLOAD_64(v6, 0x3fe62f4b848d2362, 0xbfd1334ac4aee374, 0x3fd02f3bdcc85930, + VLOAD_64(v24, 0x3fe62f4b848d2362, 0xbfd1334ac4aee374, 0x3fd02f3bdcc85930, 0x3fe751b7e126b540, 0xbfe22f283c572a1e, 0x3f82dddde857f980, 0xbfe06c8ede5db9be, 0xbf923ab26578ce40, 0x3fb4ea36e2cf6110, 0x3fe4270bb294c832, 0x3fd747b1d881c6e4, 0x3fe335d1038d1808, 0xbfed1645e5b43d3e, 0x3fc46c63eca9d670, 0x3fcb0d6e7ccc9be0, 0xbfdd5a8c1b164ebc); VLOAD_8(v0, 0xAA, 0xAA); - VCLEAR(v2); - asm volatile("vfmul.vv v2, v4, v6, v0.t"); + VCLEAR(v8); + asm volatile("vfmul.vv v8, v16, v24, v0.t"); // 0.0000000000000000, -0.1888427270075288, 0.0000000000000000, // 0.0561991250128884, 0.0000000000000000, 0.0082893703931556, // 0.0000000000000000, -0.0153039296644220, 0.0000000000000000, // 0.1237512938975817, 0.0000000000000000, -0.1725960035025313, // 0.0000000000000000, 0.0495488546564072, 0.0000000000000000, // -0.0682331414017083 - VCMP_U64(6, v2, 0x0, 0xbfc82bff9c4ada77, 0x0, 0x3facc621b7fd0401, 0x0, + VCMP_U64(6, v8, 0x0, 0xbfc82bff9c4ada77, 0x0, 0x3facc621b7fd0401, 0x0, 0x3f80fa0475f1bbe1, 0x0, 0xbf8f57aaab459580, 0x0, 0x3fbfae2a3020b759, 0x0, 0xbfc617a0373b59a7, 0x0, 0x3fa95e77ac9b67ce, 0x0, 0xbfb177ba26d2dcbe); @@ -193,47 +193,47 @@ void TEST_CASE2(void) { // Simple random test with similar values (vector-scalar) void TEST_CASE3(void) { - VSET(16, e16, m2); + VSET(16, e16, m8); // -0.8423, 0.9531, 0.3889, -0.3704, -0.9731, -0.4636, -0.4797, // -0.5903, 0.2959, 0.4685, -0.3660, 0.3167, -0.9766, 0.0052, // -0.6489, -0.0474 - VLOAD_16(v4, 0xbabd, 0x3ba0, 0x3639, 0xb5ed, 0xbbc9, 0xb76b, 0xb7ad, 0xb8b9, + VLOAD_16(v16, 0xbabd, 0x3ba0, 0x3639, 0xb5ed, 0xbbc9, 0xb76b, 0xb7ad, 0xb8b9, 0x34bc, 0x377f, 0xb5db, 0x3511, 0xbbd0, 0x1d48, 0xb931, 0xaa11); float fscalar_16; // 0.2971 BOX_HALF_IN_FLOAT(fscalar_16, 0x34c1); - asm volatile("vfmul.vf v2, v4, %[A]" ::[A] "f"(fscalar_16)); + asm volatile("vfmul.vf v8, v16, %[A]" ::[A] "f"(fscalar_16)); // -0.2502, 0.2832, 0.1155, -0.1100, -0.2891, -0.1377, -0.1426, // -0.1754, 0.0879, 0.1392, -0.1088, 0.0941, -0.2900, 0.0015, // -0.1927, -0.0141 - VCMP_U16(7, v2, 0xb401, 0x3488, 0x2f65, 0xaf0b, 0xb4a0, 0xb068, 0xb090, + VCMP_U16(7, v8, 0xb401, 0x3488, 0x2f65, 0xaf0b, 0xb4a0, 0xb068, 0xb090, 0xb19d, 0x2da0, 0x3074, 0xaef6, 0x2e05, 0xb4a4, 0x1647, 0xb22b, 0xa336); - VSET(16, e32, m2); + VSET(16, e32, m8); // -0.11454447, -0.46133029, 0.06972761, 0.20429718, // -0.97134608, -0.95719630, -0.11250938, 0.48455358, // 0.59656250, 0.46462929, 0.13447689, -0.32035729, 0.75118428, // 0.90634471, 0.73552424, -0.53555632 - VLOAD_32(v4, 0xbdea964b, 0xbeec337c, 0x3d8ecd5a, 0x3e513348, 0xbf78aa23, + VLOAD_32(v16, 0xbdea964b, 0xbeec337c, 0x3d8ecd5a, 0x3e513348, 0xbf78aa23, 0xbf750ad1, 0xbde66b52, 0x3ef81768, 0x3f18b852, 0x3eede3e4, 0x3e09b44f, 0xbea405df, 0x3f404d9d, 0x3f680635, 0x3f3c4b51, 0xbf091a38); float fscalar_32; // 0.94017404 BOX_FLOAT_IN_FLOAT(fscalar_32, 0x3f70af3f); - asm volatile("vfmul.vf v2, v4, %[A]" ::[A] "f"(fscalar_32)); + asm volatile("vfmul.vf v8, v16, %[A]" ::[A] "f"(fscalar_32)); // -0.10769174, -0.43373078, 0.06555609, 0.19207491, // -0.91323435, -0.89993113, -0.10577840, 0.45556471, // 0.56087255, 0.43683240, 0.12643167, -0.30119160, 0.70624399, // 0.85212177, 0.69152081, -0.50351614 - VCMP_U32(8, v2, 0xbddc8d7d, 0xbede11f6, 0x3d864246, 0x3e44af49, 0xbf69c9ba, + VCMP_U32(8, v8, 0xbddc8d7d, 0xbede11f6, 0x3d864246, 0x3e44af49, 0xbf69c9ba, 0xbf6661e3, 0xbdd8a259, 0x3ee93fc7, 0x3f0f9558, 0x3edfa87f, 0x3e01774e, 0xbe9a35c9, 0x3f34cc68, 0x3f5a24a7, 0x3f310782, 0xbf00e66f); #if ELEN == 64 - VSET(16, e64, m2); + VSET(16, e64, m8); // -0.3344965024132001, -0.2497404698970234, 0.3402338726452623, // -0.5885400342262450, -0.7135559920290824, 0.1114442794173345, // -0.9541638058007114, 0.1021679621951177, @@ -241,7 +241,7 @@ void TEST_CASE3(void) { // -0.2701320849999789, 0.3582375365191053, // -0.6137661452178358, 0.6195430637830983, 0.2731869234335833, // -0.4075196944877124 - VLOAD_64(v4, 0xbfd56864049f6dd8, 0xbfcff77ee7590278, 0x3fd5c6644b002e60, + VLOAD_64(v16, 0xbfd56864049f6dd8, 0xbfcff77ee7590278, 0x3fd5c6644b002e60, 0xbfe2d551e8ec6e20, 0xbfe6d573603426e0, 0x3fbc879cbf6c7a10, 0xbfee8882889e1c44, 0x3fba27adf853b5f0, 0xbfc177db63eceed0, 0xbfedec94daa41aac, 0xbfd149d815ab3680, 0x3fd6ed5d21e3257c, @@ -250,14 +250,14 @@ void TEST_CASE3(void) { double dscalar_64; // -0.7970907277742201 BOX_DOUBLE_IN_DOUBLE(dscalar_64, 0xbfe981c469f7860e); - asm volatile("vfmul.vf v2, v4, %[A]" ::[A] "f"(dscalar_64)); + asm volatile("vfmul.vf v8, v16, %[A]" ::[A] "f"(dscalar_64)); // 0.2666240605464688, 0.1990658129048941, -0.2711972651602534, // 0.4691198042056620, 0.5687688649941168, -0.0888312017870367, // 0.7605551223815086, -0.0814371353413154, 0.1087791670362886, // 0.7453831118261137, 0.2153197802278006, -0.2855478187000574, // 0.4892273033748624, -0.4938320315983399, -0.2177547636180751, // 0.3248301698615385 - VCMP_U64(9, v2, 0x3fd1105e5d17ec76, 0x3fc97afd1216ce6e, 0xbfd15b4bc6282ffc, + VCMP_U64(9, v8, 0x3fd1105e5d17ec76, 0x3fc97afd1216ce6e, 0xbfd15b4bc6282ffc, 0x3fde060f123e080e, 0x3fe2335ac3443fa9, 0xbfb6bda4428a29bb, 0x3fe85677b22de228, 0xbfb4d91068f88b49, 0x3fbbd8f394e82fe7, 0x3fe7da2daf091575, 0x3fcb8f993b2151e0, 0xbfd2466a5bb0b251, @@ -268,30 +268,30 @@ void TEST_CASE3(void) { // Simple random test with similar values (vector-scalar) (masked) void TEST_CASE4(void) { - VSET(16, e16, m2); + VSET(16, e16, m8); // -0.8423, 0.9531, 0.3889, -0.3704, -0.9731, -0.4636, // -0.4797, -0.5903, 0.2959, 0.4685, -0.3660, 0.3167, // -0.9766, 0.0052, -0.6489, -0.0474 - VLOAD_16(v4, 0xbabd, 0x3ba0, 0x3639, 0xb5ed, 0xbbc9, 0xb76b, 0xb7ad, 0xb8b9, + VLOAD_16(v16, 0xbabd, 0x3ba0, 0x3639, 0xb5ed, 0xbbc9, 0xb76b, 0xb7ad, 0xb8b9, 0x34bc, 0x377f, 0xb5db, 0x3511, 0xbbd0, 0x1d48, 0xb931, 0xaa11); float fscalar_16; // 0.2971 BOX_HALF_IN_FLOAT(fscalar_16, 0x34c1); VLOAD_8(v0, 0xAA, 0xAA); - VCLEAR(v2); - asm volatile("vfmul.vf v2, v4, %[A], v0.t" ::[A] "f"(fscalar_16)); + VCLEAR(v8); + asm volatile("vfmul.vf v8, v16, %[A], v0.t" ::[A] "f"(fscalar_16)); // 0.0000, 0.2832, 0.0000, -0.1100, 0.0000, -0.1377, 0.0000, // -0.1754, 0.0000, 0.1392, 0.0000, 0.0941, 0.0000, // 0.0015, 0.0000, -0.0141 - VCMP_U16(10, v2, 0x0, 0x3488, 0x0, 0xaf0b, 0x0, 0xb068, 0x0, 0xb19d, 0x0, + VCMP_U16(10, v8, 0x0, 0x3488, 0x0, 0xaf0b, 0x0, 0xb068, 0x0, 0xb19d, 0x0, 0x3074, 0x0, 0x2e05, 0x0, 0x1647, 0x0, 0xa336); - VSET(16, e32, m2); + VSET(16, e32, m8); // -0.11454447, -0.46133029, 0.06972761, 0.20429718, // -0.97134608, -0.95719630, -0.11250938, 0.48455358, // 0.59656250, 0.46462929, 0.13447689, -0.32035729, // 0.75118428, 0.90634471, 0.73552424, -0.53555632 - VLOAD_32(v4, 0xbdea964b, 0xbeec337c, 0x3d8ecd5a, 0x3e513348, 0xbf78aa23, + VLOAD_32(v16, 0xbdea964b, 0xbeec337c, 0x3d8ecd5a, 0x3e513348, 0xbf78aa23, 0xbf750ad1, 0xbde66b52, 0x3ef81768, 0x3f18b852, 0x3eede3e4, 0x3e09b44f, 0xbea405df, 0x3f404d9d, 0x3f680635, 0x3f3c4b51, 0xbf091a38); @@ -299,18 +299,18 @@ void TEST_CASE4(void) { // 0.94017404 BOX_FLOAT_IN_FLOAT(fscalar_32, 0x3f70af3f); VLOAD_8(v0, 0xAA, 0xAA); - VCLEAR(v2); - asm volatile("vfmul.vf v2, v4, %[A], v0.t" ::[A] "f"(fscalar_32)); + VCLEAR(v8); + asm volatile("vfmul.vf v8, v16, %[A], v0.t" ::[A] "f"(fscalar_32)); // 0.00000000, -0.43373078, 0.00000000, 0.19207491, // 0.00000000, -0.89993113, 0.00000000, 0.45556471, // 0.00000000, 0.43683240, 0.00000000, -0.30119160, // 0.00000000, 0.85212177, 0.00000000, -0.50351614 - VCMP_U32(11, v2, 0x0, 0xbede11f6, 0x0, 0x3e44af49, 0x0, 0xbf6661e3, 0x0, + VCMP_U32(11, v8, 0x0, 0xbede11f6, 0x0, 0x3e44af49, 0x0, 0xbf6661e3, 0x0, 0x3ee93fc7, 0x0, 0x3edfa87f, 0x0, 0xbe9a35c9, 0x0, 0x3f5a24a7, 0x0, 0xbf00e66f); #if ELEN == 64 - VSET(16, e64, m2); + VSET(16, e64, m8); // -0.3344965024132001, -0.2497404698970234, // 0.3402338726452623, -0.5885400342262450, // -0.7135559920290824, 0.1114442794173345, @@ -319,7 +319,7 @@ void TEST_CASE4(void) { // -0.2701320849999789, 0.3582375365191053, // -0.6137661452178358, 0.6195430637830983, // 0.2731869234335833, -0.4075196944877124 - VLOAD_64(v4, 0xbfd56864049f6dd8, 0xbfcff77ee7590278, 0x3fd5c6644b002e60, + VLOAD_64(v16, 0xbfd56864049f6dd8, 0xbfcff77ee7590278, 0x3fd5c6644b002e60, 0xbfe2d551e8ec6e20, 0xbfe6d573603426e0, 0x3fbc879cbf6c7a10, 0xbfee8882889e1c44, 0x3fba27adf853b5f0, 0xbfc177db63eceed0, 0xbfedec94daa41aac, 0xbfd149d815ab3680, 0x3fd6ed5d21e3257c, @@ -329,8 +329,8 @@ void TEST_CASE4(void) { // -0.7970907277742201 BOX_DOUBLE_IN_DOUBLE(dscalar_64, 0xbfe981c469f7860e); VLOAD_8(v0, 0xAA, 0xAA); - VCLEAR(v2); - asm volatile("vfmul.vf v2, v4, %[A], v0.t" ::[A] "f"(dscalar_64)); + VCLEAR(v8); + asm volatile("vfmul.vf v8, v16, %[A], v0.t" ::[A] "f"(dscalar_64)); // 0.0000000000000000, 0.1990658129048941, 0.0000000000000000, // 0.4691198042056620, 0.0000000000000000, // -0.0888312017870367, 0.0000000000000000, @@ -338,7 +338,7 @@ void TEST_CASE4(void) { // 0.7453831118261137, 0.0000000000000000, // -0.2855478187000574, 0.0000000000000000, // -0.4938320315983399, 0.0000000000000000, 0.3248301698615385 - VCMP_U64(12, v2, 0x0, 0x3fc97afd1216ce6e, 0x0, 0x3fde060f123e080e, 0x0, + VCMP_U64(12, v8, 0x0, 0x3fc97afd1216ce6e, 0x0, 0x3fde060f123e080e, 0x0, 0xbfb6bda4428a29bb, 0x0, 0xbfb4d91068f88b49, 0x0, 0x3fe7da2daf091575, 0x0, 0xbfd2466a5bb0b251, 0x0, 0xbfdf9af1aa5ba7aa, 0x0, 0x3fd4ca047b13cdbf); diff --git a/sw/riscvTests/isa/rv64uv/vfmv.c b/sw/riscvTests/isa/rv64uv/vfmv.c index 3f12dec3..03ae30ef 100644 --- a/sw/riscvTests/isa/rv64uv/vfmv.c +++ b/sw/riscvTests/isa/rv64uv/vfmv.c @@ -10,48 +10,48 @@ #include "vector_macros.h" void TEST_CASE1(void) { - VSET(16, e16, m2); + VSET(16, e16, m8); float fscalar_16; // -0.9380 BOX_HALF_IN_FLOAT(fscalar_16, 0xbb81); - VCLEAR(v2); - asm volatile("vfmv.v.f v2, %[A]" ::[A] "f"(fscalar_16)); + VCLEAR(v8); + asm volatile("vfmv.v.f v8, %[A]" ::[A] "f"(fscalar_16)); // -0.9380, -0.9380, -0.9380, -0.9380, -0.9380, -0.9380, -0.9380, // -0.9380, -0.9380, -0.9380, -0.9380, -0.9380, -0.9380, -0.9380, // -0.9380, -0.9380 - VCMP_U16(1, v2, 0xbb81, 0xbb81, 0xbb81, 0xbb81, 0xbb81, 0xbb81, 0xbb81, + VCMP_U16(1, v8, 0xbb81, 0xbb81, 0xbb81, 0xbb81, 0xbb81, 0xbb81, 0xbb81, 0xbb81, 0xbb81, 0xbb81, 0xbb81, 0xbb81, 0xbb81, 0xbb81, 0xbb81, 0xbb81); - VSET(16, e32, m2); + VSET(16, e32, m8); float fscalar_32; // -0.96056187 BOX_FLOAT_IN_FLOAT(fscalar_32, 0xbf75e762); - VCLEAR(v2); - asm volatile("vfmv.v.f v2, %[A]" ::[A] "f"(fscalar_32)); + VCLEAR(v8); + asm volatile("vfmv.v.f v8, %[A]" ::[A] "f"(fscalar_32)); // -0.96056187, -0.96056187, -0.96056187, -0.96056187, // -0.96056187, -0.96056187, -0.96056187, -0.96056187, // -0.96056187, -0.96056187, -0.96056187, -0.96056187, // -0.96056187, -0.96056187, -0.96056187, -0.96056187 - VCMP_U32(2, v2, 0xbf75e762, 0xbf75e762, 0xbf75e762, 0xbf75e762, 0xbf75e762, + VCMP_U32(2, v8, 0xbf75e762, 0xbf75e762, 0xbf75e762, 0xbf75e762, 0xbf75e762, 0xbf75e762, 0xbf75e762, 0xbf75e762, 0xbf75e762, 0xbf75e762, 0xbf75e762, 0xbf75e762, 0xbf75e762, 0xbf75e762, 0xbf75e762, 0xbf75e762); #if ELEN == 64 - VSET(16, e64, m2); + VSET(16, e64, m8); double dscalar_64; // 0.9108707261227378 BOX_DOUBLE_IN_DOUBLE(dscalar_64, 0x3fed25da5d7296fe); - VCLEAR(v2); - asm volatile("vfmv.v.f v2, %[A]" ::[A] "f"(dscalar_64)); + VCLEAR(v8); + asm volatile("vfmv.v.f v8, %[A]" ::[A] "f"(dscalar_64)); // 0.9108707261227378, 0.9108707261227378, 0.9108707261227378, // 0.9108707261227378, 0.9108707261227378, 0.9108707261227378, // 0.9108707261227378, 0.9108707261227378, 0.9108707261227378, // 0.9108707261227378, 0.9108707261227378, 0.9108707261227378, // 0.9108707261227378, 0.9108707261227378, 0.9108707261227378, // 0.9108707261227378 - VCMP_U64(3, v2, 0x3fed25da5d7296fe, 0x3fed25da5d7296fe, 0x3fed25da5d7296fe, + VCMP_U64(3, v8, 0x3fed25da5d7296fe, 0x3fed25da5d7296fe, 0x3fed25da5d7296fe, 0x3fed25da5d7296fe, 0x3fed25da5d7296fe, 0x3fed25da5d7296fe, 0x3fed25da5d7296fe, 0x3fed25da5d7296fe, 0x3fed25da5d7296fe, 0x3fed25da5d7296fe, 0x3fed25da5d7296fe, 0x3fed25da5d7296fe, diff --git a/sw/riscvTests/isa/rv64uv/vfncvt.c b/sw/riscvTests/isa/rv64uv/vfncvt.c index 40cd48b5..1e7f7874 100644 --- a/sw/riscvTests/isa/rv64uv/vfncvt.c +++ b/sw/riscvTests/isa/rv64uv/vfncvt.c @@ -19,11 +19,11 @@ void TEST_CASE1(void) { // -6826.076, -6976.746, 2675.899, 9587.624, -3671.810, // 3611.960, -9086.531, -5333.617, -3284.205, 5676.141, // -8293.472 - VLOAD_32(v4, 0x460f36ad, 0x45ab810c, 0x479c8d06, 0xc59cf316, 0x45dfd8be, + VLOAD_32(v16, 0x460f36ad, 0x45ab810c, 0x479c8d06, 0xc59cf316, 0x45dfd8be, 0xc5d5509c, 0xc5da05f8, 0x45273e62, 0x4615ce7f, 0xc5657cf5, 0x4561bf5b, 0xc60dfa20, 0xc5a6acf0, 0xc54d4347, 0x45b16120, 0xc60195e3); - asm volatile("vfncvt.xu.f.w v8, v4"); + asm volatile("vfncvt.xu.f.w v8, v16"); // 9166, 5488, 65535, 0, 7163, 0, // 0, 2676, 9588, 0, 3612, 0, 0, // 0, 5676, 0 @@ -37,13 +37,13 @@ void TEST_CASE1(void) { // -5056868.769, 4282070.604, 458667.918, 8393053.957, // -4485003.775, -5016427.098, -9086965.507, -6796529.257, // -7756776.890, -1173384.460, 4850684.145, 8658279.578 - VLOAD_64(v4, 0xc14ac53f4813ac38, 0x41399b7a6ae9e42f, 0xc128660783332e44, + VLOAD_64(v16, 0xc14ac53f4813ac38, 0x41399b7a6ae9e42f, 0xc128660783332e44, 0x413a0e9c6ddfa609, 0xc1534a59313a407b, 0x415055b5a6a655de, 0x411bfeafabb28b3f, 0x4160022bbe9fc5e9, 0xc1511be2f1a1ac8b, 0xc15322dac64b7c31, 0xc16154feb0372db3, 0xc159ed3c506ab6eb, 0xc15d96fa38fb0400, 0xc131e78875bc4ace, 0x415280ff09493a97, 0x416083acf280b61e); - asm volatile("vfncvt.xu.f.w v8, v4"); + asm volatile("vfncvt.xu.f.w v8, v16"); // 0, 1678202, 0, 1707676, 0, // 4282071, 458668, 8393054, 0, 0, 0, // 0, 0, 0, 4850684, @@ -63,13 +63,13 @@ void TEST_CASE2(void) { // 7163.093, -6826.076, -6976.746, 2675.899, 9587.624, // -3671.810, 3611.960, -9086.531, -5333.617, -3284.205, // 5676.141, -8293.472 - VLOAD_32(v4, 0x460f36ad, 0x45ab810c, 0xc4ce09ad, 0xc59cf316, 0x45dfd8be, + VLOAD_32(v16, 0x460f36ad, 0x45ab810c, 0xc4ce09ad, 0xc59cf316, 0x45dfd8be, 0xc5d5509c, 0xc5da05f8, 0x45273e62, 0x4615ce7f, 0xc5657cf5, 0x4561bf5b, 0xc60dfa20, 0xc5a6acf0, 0xc54d4347, 0x45b16120, 0xc60195e3); VLOAD_8(v0, 0xAA, 0xAA); VCLEAR(v8); - asm volatile("vfncvt.xu.f.w v8, v4, v0.t"); + asm volatile("vfncvt.xu.f.w v8, v16, v0.t"); // 0, 5488, 0, 0, 0, 0, // 0, 2676, 0, 0, 0, 0, // 0, 0, 0, 0 @@ -85,7 +85,7 @@ void TEST_CASE2(void) { // -5016427.098, -9086965.507, -6796529.257, // -7756776.890, -1173384.460, 4850684.145, // 8658279.578 - VLOAD_64(v4, 0xc14ac53f4813ac38, 0x41399b7a6ae9e42f, 0xc128660783332e44, + VLOAD_64(v16, 0xc14ac53f4813ac38, 0x41399b7a6ae9e42f, 0xc128660783332e44, 0x413a0e9c6ddfa609, 0xc1534a59313a407b, 0x415055b5a6a655de, 0x411bfeafabb28b3f, 0x4160022bbe9fc5e9, 0xc1511be2f1a1ac8b, 0xc15322dac64b7c31, 0xc16154feb0372db3, 0xc159ed3c506ab6eb, @@ -93,7 +93,7 @@ void TEST_CASE2(void) { 0x416083acf280b61e); VLOAD_8(v0, 0xAA, 0xAA); VCLEAR(v8); - asm volatile("vfncvt.xu.f.w v8, v4, v0.t"); + asm volatile("vfncvt.xu.f.w v8, v16, v0.t"); // 0, 1678202, 0, 1707676, 0, // 4282071, 0, 8393054, 0, 0, 0, // 0, 0, 0, 0, 8658280 @@ -115,11 +115,11 @@ void TEST_CASE3(void) { // 9676.333, 6963.966, 3589.870, -4334.772, -3261.309, // -2340.480, 6085.075, 4043.322, 2827.902, 4389.497, // -5196.684 - VLOAD_32(v4, 0xc5f40072, 0x461195a2, 0x44d2bc86, 0x4578798a, 0xc5bad0dd, + VLOAD_32(v16, 0xc5f40072, 0x461195a2, 0x44d2bc86, 0x4578798a, 0xc5bad0dd, 0x46173155, 0x45d99fbb, 0x45605ded, 0xc587762e, 0xc54bd4f0, 0xc51247af, 0x45be2899, 0x457cb528, 0x4530be6f, 0x45892bfa, 0xc5a26578); - asm volatile("vfncvt.x.f.w v8, v4"); + asm volatile("vfncvt.x.f.w v8, v16"); // -7808, 9317, 1686, 3976, -5978, // 9676, 6964, 3590, -4335, -3261, // -2340, 6085, 4043, 2828, 4389, @@ -134,13 +134,13 @@ void TEST_CASE3(void) { // -9624608.750, -6974543.165, 5868078.422, -5387798.170, // 3847378.080, 1368753.124, 4380497.931, -8044304.268, // 1687738.849, 3753399.509, -3684410.483, -7416477.444 - VLOAD_64(v4, 0x415477e8714aea69, 0xc15cda91b6eefd56, 0x415ebb605a479cd5, + VLOAD_64(v16, 0x415477e8714aea69, 0xc15cda91b6eefd56, 0x415ebb605a479cd5, 0xc142d4e5a0a1f367, 0xc1625b841802ee1d, 0xc15a9b13ca8c7bb6, 0x4156628b9afacdc9, 0xc1548d858ae6df86, 0x414d5a690a2dbb5e, 0x4134e2b11fa8e994, 0x4150b5d47b9c3df2, 0xc15eafc4112995f5, 0x4139c0bad971859a, 0x414ca2dbc1288a12, 0xc14c1c1d3dcd1b39, 0xc15c4aa75c6c5635); - asm volatile("vfncvt.x.f.w v8, v4"); + asm volatile("vfncvt.x.f.w v8, v16"); // 5365666, -7563847, 8056193, -2468299, // -9624609, -6974543, 5868078, -5387798, // 3847378, 1368753, 4380498, -8044304, @@ -160,13 +160,13 @@ void TEST_CASE4(void) { // 9676.333, 6963.966, 3589.870, -4334.772, -3261.309, // -2340.480, 6085.075, 4043.322, 2827.902, 4389.497, // -5196.684 - VLOAD_32(v4, 0xc5f40072, 0x461195a2, 0x44d2bc86, 0x4578798a, 0xc5bad0dd, + VLOAD_32(v16, 0xc5f40072, 0x461195a2, 0x44d2bc86, 0x4578798a, 0xc5bad0dd, 0x46173155, 0x45d99fbb, 0x45605ded, 0xc587762e, 0xc54bd4f0, 0xc51247af, 0x45be2899, 0x457cb528, 0x4530be6f, 0x45892bfa, 0xc5a26578); VLOAD_8(v0, 0xAA, 0xAA); VCLEAR(v8); - asm volatile("vfncvt.x.f.w v8, v4, v0.t"); + asm volatile("vfncvt.x.f.w v8, v16, v0.t"); // 0, 9317, 0, 3976, 0, // 9676, 0, 3590, 0, -3261, // 0, 6085, 0, 2828, 0, @@ -181,7 +181,7 @@ void TEST_CASE4(void) { // -9624608.750, -6974543.165, 5868078.422, -5387798.170, // 3847378.080, 1368753.124, 4380497.931, -8044304.268, // 1687738.849, 3753399.509, -3684410.483, -7416477.444 - VLOAD_64(v4, 0x415477e8714aea69, 0xc15cda91b6eefd56, 0x415ebb605a479cd5, + VLOAD_64(v16, 0x415477e8714aea69, 0xc15cda91b6eefd56, 0x415ebb605a479cd5, 0xc142d4e5a0a1f367, 0xc1625b841802ee1d, 0xc15a9b13ca8c7bb6, 0x4156628b9afacdc9, 0xc1548d858ae6df86, 0x414d5a690a2dbb5e, 0x4134e2b11fa8e994, 0x4150b5d47b9c3df2, 0xc15eafc4112995f5, @@ -189,7 +189,7 @@ void TEST_CASE4(void) { 0xc15c4aa75c6c5635); VLOAD_8(v0, 0xAA, 0xAA); VCLEAR(v8); - asm volatile("vfncvt.x.f.w v8, v4, v0.t"); + asm volatile("vfncvt.x.f.w v8, v16, v0.t"); // 0, -7563847, 0, -2468299, 0, // -6974543, 0, -5387798, 0, // 1368753, 0, -8044304, 0, @@ -212,11 +212,11 @@ void TEST_CASE5(void) { // 6670.171, -4079.234, -1773.082, 254.350, 53.058, // -9041.926, -8137.022, 1522.146, 198.516, -920.430, // 2857.583 - VLOAD_32(v4, 0xc6185902, 0xc5885de3, 0xc51299d6, 0x45daa07e, 0xc5de5808, + VLOAD_32(v16, 0xc6185902, 0xc5885de3, 0xc51299d6, 0x45daa07e, 0xc5de5808, 0x45d0715e, 0xc57ef3bf, 0xc4dda29c, 0x437e5998, 0x42543afb, 0xc60d47b4, 0xc5fe482e, 0x44be44af, 0x43468433, 0xc4661b8b, 0x45329953); - asm volatile("vfncvt.rtz.xu.f.w v8, v4"); + asm volatile("vfncvt.rtz.xu.f.w v8, v16"); // 0, 0, 0, 6996, 0, // 6670, 0, 0, 254, 53, // 0, 0, 1522, 198, 0, @@ -231,13 +231,13 @@ void TEST_CASE5(void) { // 5750957.328, -7243911.338, -8202847.045, 5348152.868, // 9957770.965, 8018962.598, -8478197.842, -9780786.953, // 184470.081, 250336.923, -6517203.475, -7691903.192 - VLOAD_64(v4, 0xc16007d978438b7f, 0x414bad2ac51eade4, 0xc150aa777763eeda, + VLOAD_64(v16, 0xc16007d978438b7f, 0x414bad2ac51eade4, 0xc150aa777763eeda, 0xc146008fcfc2093b, 0x4155f02b54fbd105, 0xc15ba221d5a4f5c3, 0xc15f4a97c2e2daa6, 0x415466ce378e9269, 0x4162fe315edeecec, 0x415e9704a64d845e, 0xc1602bbebaf40bb8, 0xc162a7c65e8002a9, 0x410684b0a4ee482d, 0x410e8f07623ffd06, 0xc158dc74de617fbc, 0xc15d579fcc41ba16); - asm volatile("vfncvt.rtz.xu.f.w v8, v4"); + asm volatile("vfncvt.rtz.xu.f.w v8, v16"); // 0, 3627605, 0, 0, 5750957, 0, // 0, 5348152, 9957770, 8018962, 0, 0, // 184470, 250336, 0, 0 @@ -256,13 +256,13 @@ void TEST_CASE6(void) { // 6670.171, -4079.234, -1773.082, 254.350, 53.058, // -9041.926, -8137.022, 1522.146, 198.516, -920.430, // 2857.583 - VLOAD_32(v4, 0xc6185902, 0xc5885de3, 0xc51299d6, 0x45daa07e, 0xc5de5808, + VLOAD_32(v16, 0xc6185902, 0xc5885de3, 0xc51299d6, 0x45daa07e, 0xc5de5808, 0x45d0715e, 0xc57ef3bf, 0xc4dda29c, 0x437e5998, 0x42543afb, 0xc60d47b4, 0xc5fe482e, 0x44be44af, 0x43468433, 0xc4661b8b, 0x45329953); VLOAD_8(v0, 0xAA, 0xAA); VCLEAR(v8); - asm volatile("vfncvt.rtz.xu.f.w v8, v4, v0.t"); + asm volatile("vfncvt.rtz.xu.f.w v8, v16, v0.t"); // 0, 0, 0, 6996, 0, // 6670, 0, 0, 0, 53, // 0, 0, 0, 198, 0, @@ -277,7 +277,7 @@ void TEST_CASE6(void) { // 5750957.328, -7243911.338, -8202847.045, 5348152.868, // 9957770.965, 8018962.598, -8478197.842, -9780786.953, // 184470.081, 250336.923, -6517203.475, -7691903.192 - VLOAD_64(v4, 0xc16007d978438b7f, 0x414bad2ac51eade4, 0xc150aa777763eeda, + VLOAD_64(v16, 0xc16007d978438b7f, 0x414bad2ac51eade4, 0xc150aa777763eeda, 0xc146008fcfc2093b, 0x4155f02b54fbd105, 0xc15ba221d5a4f5c3, 0xc15f4a97c2e2daa6, 0x415466ce378e9269, 0x4162fe315edeecec, 0x415e9704a64d845e, 0xc1602bbebaf40bb8, 0xc162a7c65e8002a9, @@ -285,7 +285,7 @@ void TEST_CASE6(void) { 0xc15d579fcc41ba16); VLOAD_8(v0, 0xAA, 0xAA); VCLEAR(v8); - asm volatile("vfncvt.rtz.xu.f.w v8, v4, v0.t"); + asm volatile("vfncvt.rtz.xu.f.w v8, v16, v0.t"); // 0, 3627605, 0, 0, 0, 0, 0, // 5348152, 0, 8018962, 0, 0, 0, // 250336, 0, 0 @@ -307,11 +307,11 @@ void TEST_CASE7(void) { // -5717.140, -3327.545, 1286.004, 1797.767, 3842.966, // -2148.369, -7283.256, 8783.331, -7958.880, -6728.271, // 4727.792 - VLOAD_32(v4, 0x461221ac, 0xc5b2bbac, 0x45904e86, 0xc53c4026, 0xc560d104, + VLOAD_32(v16, 0x461221ac, 0xc5b2bbac, 0x45904e86, 0xc53c4026, 0xc560d104, 0xc5b2a91e, 0xc54ff8b9, 0x44a0c01e, 0x44e0b88c, 0x45702f76, 0xc50645e9, 0xc5e39a0c, 0x46093d53, 0xc5f8b70a, 0xc5d2422c, 0x4593be56); - asm volatile("vfncvt.rtz.x.f.w v8, v4"); + asm volatile("vfncvt.rtz.x.f.w v8, v16"); // 9352, -5719, 4617, -3012, -3597, // -5717, -3327, 1286, 1797, 3842, // -2148, -7283, 8783, -7958, -6728, @@ -326,13 +326,13 @@ void TEST_CASE7(void) { // 9551708.952, -336377.787, -2352111.643, 4412162.570, // 7087155.475, 338850.875, 2765611.498, 2723631.912, // -3252079.308, 1096915.326, 5492109.280, -7265880.245 - VLOAD_64(v4, 0x4137db9a42b839bd, 0xc13e596598118127, 0x4158c7d738d1eec8, + VLOAD_64(v16, 0x4137db9a42b839bd, 0xc13e596598118127, 0x4158c7d738d1eec8, 0x4153484eb13573ed, 0x416237eb9e79d2a8, 0xc11487e725f1ce50, 0xc141f1f7d2451c3d, 0x4150d4c0a47be906, 0x415b090cde6b0575, 0x4114ae8b8081532d, 0x41451995bfc3bc74, 0x4144c797f4b307dd, 0xc148cfb7a76dea0f, 0x4130bcd353667e5d, 0x4154f36351f3a3c5, 0xc15bb7960fb007a5); - asm volatile("vfncvt.rtz.x.f.w v8, v4"); + asm volatile("vfncvt.rtz.x.f.w v8, v16"); // 1563546, -1988965, 6496092, 5054778, // 9551708, -336377, -2352111, 4412162, // 7087155, 338850, 2765611, 2723631, @@ -352,13 +352,13 @@ void TEST_CASE8(void) { // -5717.140, -3327.545, 1286.004, 1797.767, 3842.966, // -2148.369, -7283.256, 8783.331, -7958.880, -6728.271, // 4727.792 - VLOAD_32(v4, 0x461221ac, 0xc5b2bbac, 0x45904e86, 0xc53c4026, 0xc560d104, + VLOAD_32(v16, 0x461221ac, 0xc5b2bbac, 0x45904e86, 0xc53c4026, 0xc560d104, 0xc5b2a91e, 0xc54ff8b9, 0x44a0c01e, 0x44e0b88c, 0x45702f76, 0xc50645e9, 0xc5e39a0c, 0x46093d53, 0xc5f8b70a, 0xc5d2422c, 0x4593be56); VLOAD_8(v0, 0xAA, 0xAA); VCLEAR(v8); - asm volatile("vfncvt.rtz.x.f.w v8, v4, v0.t"); + asm volatile("vfncvt.rtz.x.f.w v8, v16, v0.t"); // 0, -5719, 0, -3012, 0, // -5717, 0, 1286, 0, 3842, 0, // -7283, 0, -7958, 0, 4727 @@ -372,7 +372,7 @@ void TEST_CASE8(void) { // 9551708.952, -336377.787, -2352111.643, 4412162.570, // 7087155.475, 338850.875, 2765611.498, 2723631.912, // -3252079.308, 1096915.326, 5492109.280, -7265880.245 - VLOAD_64(v4, 0x4137db9a42b839bd, 0xc13e596598118127, 0x4158c7d738d1eec8, + VLOAD_64(v16, 0x4137db9a42b839bd, 0xc13e596598118127, 0x4158c7d738d1eec8, 0x4153484eb13573ed, 0x416237eb9e79d2a8, 0xc11487e725f1ce50, 0xc141f1f7d2451c3d, 0x4150d4c0a47be906, 0x415b090cde6b0575, 0x4114ae8b8081532d, 0x41451995bfc3bc74, 0x4144c797f4b307dd, @@ -380,7 +380,7 @@ void TEST_CASE8(void) { 0xc15bb7960fb007a5); VLOAD_8(v0, 0xAA, 0xAA); VCLEAR(v8); - asm volatile("vfncvt.rtz.x.f.w v8, v4, v0.t"); + asm volatile("vfncvt.rtz.x.f.w v8, v16, v0.t"); // 0, -1988965, 0, 5054778, 0, // -336377, 0, 4412162, 0, // 338850, 0, 2723631, 0, @@ -403,11 +403,11 @@ void TEST_CASE9(void) { // 1637, 3984, 4294964217, 9553, 4294962615, // 4294962166, 9867, 4294958580, 4294966752, 5172, // 7478 - VLOAD_32(v4, 0xfffff3d2, 0x000016dd, 0xffffedce, 0xffffeba2, 0x000011e9, + VLOAD_32(v16, 0xfffff3d2, 0x000016dd, 0xffffedce, 0xffffeba2, 0x000011e9, 0x00000665, 0x00000f90, 0xfffff3f9, 0x00002551, 0xffffedb7, 0xffffebf6, 0x0000268b, 0xffffddf4, 0xfffffde0, 0x00001434, 0x00001d36); - asm volatile("vfncvt.f.xu.w v8, v4"); + asm volatile("vfncvt.f.xu.w v8, v16"); // inf, 5852.000, inf, inf, 4584.000, 1637.000, // 3984.000, inf, 9552.000, inf, inf, 9864.000, // inf, inf, 5172.000, 7480.000 @@ -423,13 +423,13 @@ void TEST_CASE9(void) { // 18446744073705977615, 18446744073704792883, // 18446744073704699584, 8685460, 18446744073709143843, // 18446744073703142874, 3905530, 18446744073704152149 - VLOAD_64(v4, 0xffffffffffb80822, 0xfffffffffff960b9, 0xffffffffffc84e30, + VLOAD_64(v16, 0xffffffffffb80822, 0xfffffffffff960b9, 0xffffffffffc84e30, 0x00000000001fd673, 0xffffffffff69013c, 0x00000000000e3ba3, 0x000000000003e709, 0x000000000030090f, 0xffffffffffc9770f, 0xffffffffffb76333, 0xffffffffffb5f6c0, 0x0000000000848794, 0xfffffffffff9c723, 0xffffffffff9e35da, 0x00000000003b97fa, 0xffffffffffad9c55); - asm volatile("vfncvt.f.xu.w v8, v4"); + asm volatile("vfncvt.f.xu.w v8, v16"); // 18446744073709551616.000, 18446744073709551616.000, // 18446744073709551616.000, 2086515.000, // 18446744073709551616.000, 932771.000, 255753.000, @@ -453,13 +453,13 @@ void TEST_CASE10(void) { // 1637, 3984, 4294964217, 9553, 4294962615, // 4294962166, 9867, 4294958580, 4294966752, 5172, // 7478 - VLOAD_32(v4, 0xfffff3d2, 0x000016dd, 0xffffedce, 0xffffeba2, 0x000011e9, + VLOAD_32(v16, 0xfffff3d2, 0x000016dd, 0xffffedce, 0xffffeba2, 0x000011e9, 0x00000665, 0x00000f90, 0xfffff3f9, 0x00002551, 0xffffedb7, 0xffffebf6, 0x0000268b, 0xffffddf4, 0xfffffde0, 0x00001434, 0x00001d36); VLOAD_8(v0, 0xAA, 0xAA); VCLEAR(v8); - asm volatile("vfncvt.f.xu.w v8, v4, v0.t"); + asm volatile("vfncvt.f.xu.w v8, v16, v0.t"); // 0.000, 5852.000, 0.000, inf, 0.000, 1637.000, // 0.000, inf, 0.000, inf, 0.000, 9864.000, 0.000, // inf, 0.000, 7480.000 @@ -474,7 +474,7 @@ void TEST_CASE10(void) { // 18446744073705977615, 18446744073704792883, // 18446744073704699584, 8685460, 18446744073709143843, // 18446744073703142874, 3905530, 18446744073704152149 - VLOAD_64(v4, 0xffffffffffb80822, 0xfffffffffff960b9, 0xffffffffffc84e30, + VLOAD_64(v16, 0xffffffffffb80822, 0xfffffffffff960b9, 0xffffffffffc84e30, 0x00000000001fd673, 0xffffffffff69013c, 0x00000000000e3ba3, 0x000000000003e709, 0x000000000030090f, 0xffffffffffc9770f, 0xffffffffffb76333, 0xffffffffffb5f6c0, 0x0000000000848794, @@ -482,7 +482,7 @@ void TEST_CASE10(void) { 0xffffffffffad9c55); VLOAD_8(v0, 0xAA, 0xAA); VCLEAR(v8); - asm volatile("vfncvt.f.xu.w v8, v4, v0.t"); + asm volatile("vfncvt.f.xu.w v8, v16, v0.t"); // 0.000, 18446744073709551616.000, 0.000, 2086515.000, // 0.000, 932771.000, 0.000, 3148047.000, 0.000, // 18446744073709551616.000, 0.000, 8685460.000, 0.000, @@ -504,11 +504,11 @@ void TEST_CASE11(void) { // 3851, 5592, -3692, -2747, -748, // -2621, -9352, 4018, 3174, -6975, // -4466 - VLOAD_32(v4, 0xffffe779, 0x00000e85, 0x0000233e, 0xffffdd23, 0xffffea66, + VLOAD_32(v16, 0xffffe779, 0x00000e85, 0x0000233e, 0xffffdd23, 0xffffea66, 0x00000f0b, 0x000015d8, 0xfffff194, 0xfffff545, 0xfffffd14, 0xfffff5c3, 0xffffdb78, 0x00000fb2, 0x00000c66, 0xffffe4c1, 0xffffee8e); - asm volatile("vfncvt.f.x.w v8, v4"); + asm volatile("vfncvt.f.x.w v8, v16"); // -6280.000, 3716.000, 9024.000, -8928.000, -5528.000, // 3852.000, 5592.000, -3692.000, -2748.000, -748.000, // -2620.000, -9352.000, 4018.000, 3174.000, -6976.000, @@ -525,13 +525,13 @@ void TEST_CASE11(void) { // 4475248, -2937762, 3310433, // 9151745, -2201488, -1506850, // 1593161 - VLOAD_64(v4, 0x00000000000b8d6b, 0xffffffffff7117cd, 0x00000000002e090d, + VLOAD_64(v16, 0x00000000000b8d6b, 0xffffffffff7117cd, 0x00000000002e090d, 0xffffffffff72714f, 0xffffffffff8094ea, 0xfffffffffff61394, 0xffffffffffe4e9c8, 0x00000000004717ac, 0x000000000002445d, 0x0000000000444970, 0xffffffffffd32c5e, 0x0000000000328361, 0x00000000008ba501, 0xffffffffffde6870, 0xffffffffffe901de, 0x0000000000184f49); - asm volatile("vfncvt.f.x.w v8, v4"); + asm volatile("vfncvt.f.x.w v8, v16"); // 757099.000, -9365555.000, 3016973.000, -9277105.000, // -8350486.000, -650348.000, -1775160.000, 4659116.000, // 148573.000, 4475248.000, -2937762.000, 3310433.000, @@ -551,13 +551,13 @@ void TEST_CASE12(void) { // 3851, 5592, -3692, -2747, -748, // -2621, -9352, 4018, 3174, -6975, // -4466 - VLOAD_32(v4, 0xffffe779, 0x00000e85, 0x0000233e, 0xffffdd23, 0xffffea66, + VLOAD_32(v16, 0xffffe779, 0x00000e85, 0x0000233e, 0xffffdd23, 0xffffea66, 0x00000f0b, 0x000015d8, 0xfffff194, 0xfffff545, 0xfffffd14, 0xfffff5c3, 0xffffdb78, 0x00000fb2, 0x00000c66, 0xffffe4c1, 0xffffee8e); VLOAD_8(v0, 0xAA, 0xAA); VCLEAR(v8); - asm volatile("vfncvt.f.x.w v8, v4, v0.t"); + asm volatile("vfncvt.f.x.w v8, v16, v0.t"); // 0.000, 3716.000, 0.000, -8928.000, 0.000, 3852.000, // 0.000, -3692.000, 0.000, -748.000, 0.000, -9352.000, // 0.000, 3174.000, 0.000, -4464.000 @@ -570,7 +570,7 @@ void TEST_CASE12(void) { // -8350486, -650348, -1775160, 4659116, // 148573, 4475248, -2937762, 3310433, // 9151745, -2201488, -1506850, 1593161 - VLOAD_64(v4, 0x00000000000b8d6b, 0xffffffffff7117cd, 0x00000000002e090d, + VLOAD_64(v16, 0x00000000000b8d6b, 0xffffffffff7117cd, 0x00000000002e090d, 0xffffffffff72714f, 0xffffffffff8094ea, 0xfffffffffff61394, 0xffffffffffe4e9c8, 0x00000000004717ac, 0x000000000002445d, 0x0000000000444970, 0xffffffffffd32c5e, 0x0000000000328361, @@ -578,7 +578,7 @@ void TEST_CASE12(void) { 0x0000000000184f49); VLOAD_8(v0, 0xAA, 0xAA); VCLEAR(v8); - asm volatile("vfncvt.f.x.w v8, v4, v0.t"); + asm volatile("vfncvt.f.x.w v8, v16, v0.t"); // 0.000, -9365555.000, 0.000, -9277105.000, 0.000, // -650348.000, 0.000, 4659116.000, 0.000, 4475248.000, // 0.000, 3310433.000, 0.000, -2201488.000, 0.000, @@ -599,11 +599,11 @@ void TEST_CASE13(void) { // 908.994, -6788.630, -5789.335, 8054.104, 3947.551, 9596.856, // 2474.506, 3094.286, 7684.992, -6850.149, -54.922, 7737.443, // 4171.873, 5266.611, 9163.839, 5679.187 - VLOAD_32(v4, 0x44633fa3, 0xc5d4250b, 0xc5b4eaaf, 0x45fbb0d4, 0x4576b8d0, + VLOAD_32(v16, 0x44633fa3, 0xc5d4250b, 0xc5b4eaaf, 0x45fbb0d4, 0x4576b8d0, 0x4615f36d, 0x451aa818, 0x45416494, 0x45f027ef, 0xc5d61131, 0xc25bb026, 0x45f1cb8c, 0x45825efb, 0x45a494e4, 0x460f2f5b, 0x45b1797f); - asm volatile("vfncvt.f.f.w v8, v4"); + asm volatile("vfncvt.f.f.w v8, v16"); // 909.000, -6788.000, -5788.000, 8056.000, 3948.000, 9600.000, // 2474.000, 3094.000, 7684.000, -6852.000, -54.938, 7736.000, // 4172.000, 5268.000, 9160.000, 5680.000 @@ -617,13 +617,13 @@ void TEST_CASE13(void) { // 7538298.328, -8731739.480, 537176.622, -3884944.157, // 7612336.042, -2270131.404, -4976406.726, -5260237.163, // -4947737.810, 3583352.355, 7648790.331, -9360989.228 - VLOAD_64(v4, 0x4102babe20435c2f, 0xc15dbd4abd4015a9, 0xc15960e30c871450, + VLOAD_64(v16, 0x4102babe20435c2f, 0xc15dbd4abd4015a9, 0xc15960e30c871450, 0x41300472c1efbd9f, 0x415cc19e94ffb79b, 0xc160a78b6f5bcd25, 0x412064b13e931aa9, 0xc14da3c81425b914, 0x415d09ec02a8cc93, 0xc14151d9b3c1ecaf, 0xc152fbc5ae718384, 0xc15410f34a6ddb48, 0xc152dfc673d9ba53, 0x414b56bc2d765fad, 0x415d2d85952e8398, 0xc161dacba74d791e); - asm volatile("vfncvt.f.f.w v8, v4"); + asm volatile("vfncvt.f.f.w v8, v16"); // 153431.766, -7796011.000, -6652812.000, 1049714.750, // 7538298.500, -8731739.000, 537176.625, -3884944.250, // 7612336.000, -2270131.500, -4976406.500, -5260237.000, @@ -642,13 +642,13 @@ void TEST_CASE14(void) { // 908.994, -6788.630, -5789.335, 8054.104, 3947.551, 9596.856, // 2474.506, 3094.286, 7684.992, -6850.149, -54.922, 7737.443, // 4171.873, 5266.611, 9163.839, 5679.187 - VLOAD_32(v4, 0x44633fa3, 0xc5d4250b, 0xc5b4eaaf, 0x45fbb0d4, 0x4576b8d0, + VLOAD_32(v16, 0x44633fa3, 0xc5d4250b, 0xc5b4eaaf, 0x45fbb0d4, 0x4576b8d0, 0x4615f36d, 0x451aa818, 0x45416494, 0x45f027ef, 0xc5d61131, 0xc25bb026, 0x45f1cb8c, 0x45825efb, 0x45a494e4, 0x460f2f5b, 0x45b1797f); VLOAD_8(v0, 0xAA, 0xAA); VCLEAR(v8); - asm volatile("vfncvt.f.f.w v8, v4, v0.t"); + asm volatile("vfncvt.f.f.w v8, v16, v0.t"); // 0.000, -6788.000, 0.000, 8056.000, 0.000, 9600.000, 0.000, // 3094.000, 0.000, -6852.000, 0.000, 7736.000, 0.000, 5268.000, // 0.000, 5680.000 @@ -661,7 +661,7 @@ void TEST_CASE14(void) { // 7538298.328, -8731739.480, 537176.622, -3884944.157, // 7612336.042, -2270131.404, -4976406.726, -5260237.163, // -4947737.810, 3583352.355, 7648790.331, -9360989.228 - VLOAD_64(v4, 0x4102babe20435c2f, 0xc15dbd4abd4015a9, 0xc15960e30c871450, + VLOAD_64(v16, 0x4102babe20435c2f, 0xc15dbd4abd4015a9, 0xc15960e30c871450, 0x41300472c1efbd9f, 0x415cc19e94ffb79b, 0xc160a78b6f5bcd25, 0x412064b13e931aa9, 0xc14da3c81425b914, 0x415d09ec02a8cc93, 0xc14151d9b3c1ecaf, 0xc152fbc5ae718384, 0xc15410f34a6ddb48, @@ -669,7 +669,7 @@ void TEST_CASE14(void) { 0xc161dacba74d791e); VLOAD_8(v0, 0xAA, 0xAA); VCLEAR(v8); - asm volatile("vfncvt.f.f.w v8, v4, v0.t"); + asm volatile("vfncvt.f.f.w v8, v16, v0.t"); // 0.000, -7796011.000, 0.000, 1049714.750, 0.000, -8731739.000, // 0.000, -3884944.250, 0.000, -2270131.500, 0.000, -5260237.000, // 0.000, 3583352.250, 0.000, -9360989.000 @@ -690,11 +690,11 @@ void TEST_CASE15(void) { // 9596.856, 2474.506, 3094.286, 7684.992, -6850.149, // -54.922, 7737.443, 4171.873, 5266.611, 9163.839, // 5679.187 - VLOAD_32(v4, 0x44633fa3, 0xc5d4250b, 0xc5b4eaaf, 0x45fbb0d4, 0x4576b8d0, + VLOAD_32(v16, 0x44633fa3, 0xc5d4250b, 0xc5b4eaaf, 0x45fbb0d4, 0x4576b8d0, 0x4615f36d, 0x451aa818, 0x45416494, 0x45f027ef, 0xc5d61131, 0xc25bb026, 0x45f1cb8c, 0x45825efb, 0x45a494e4, 0x460f2f5b, 0x45b1797f); - asm volatile("vfncvt.rod.f.f.w v8, v4"); + asm volatile("vfncvt.rod.f.f.w v8, v16"); // 909.000, -6788.000, -5788.000, 8056.000, 3948.000, // 9600.000, 2474.000, 3094.000, 7684.000, -6852.000, // -54.938, 7736.000, 4172.000, 5268.000, 9160.000, 5680.000 @@ -709,13 +709,13 @@ void TEST_CASE15(void) { // -3884944.157, 7612336.042, -2270131.404, // -4976406.726, -5260237.163, -4947737.810, // 3583352.355, 7648790.331, -9360989.228 - VLOAD_64(v4, 0x4102babe20435c2f, 0xc15dbd4abd4015a9, 0xc15960e30c871450, + VLOAD_64(v16, 0x4102babe20435c2f, 0xc15dbd4abd4015a9, 0xc15960e30c871450, 0x41300472c1efbd9f, 0x415cc19e94ffb79b, 0xc160a78b6f5bcd25, 0x412064b13e931aa9, 0xc14da3c81425b914, 0x415d09ec02a8cc93, 0xc14151d9b3c1ecaf, 0xc152fbc5ae718384, 0xc15410f34a6ddb48, 0xc152dfc673d9ba53, 0x414b56bc2d765fad, 0x415d2d85952e8398, 0xc161dacba74d791e); - asm volatile("vfncvt.rod.f.f.w v8, v4"); + asm volatile("vfncvt.rod.f.f.w v8, v16"); // 153431.766, -7796010.500, -6652812.500, // 1049714.875, 7538298.500, -8731739.000, // 537176.5625, -3884944.250, 7612336.500, @@ -736,13 +736,13 @@ void TEST_CASE16(void) { // 908.994, -6788.630, -5789.335, 8054.104, 3947.551, 9596.856, // 2474.506, 3094.286, 7684.992, -6850.149, -54.922, 7737.443, // 4171.873, 5266.611, 9163.839, 5679.187 - VLOAD_32(v4, 0x44633fa3, 0xc5d4250b, 0xc5b4eaaf, 0x45fbb0d4, 0x4576b8d0, + VLOAD_32(v16, 0x44633fa3, 0xc5d4250b, 0xc5b4eaaf, 0x45fbb0d4, 0x4576b8d0, 0x4615f36d, 0x451aa818, 0x45416494, 0x45f027ef, 0xc5d61131, 0xc25bb026, 0x45f1cb8c, 0x45825efb, 0x45a494e4, 0x460f2f5b, 0x45b1797f); VLOAD_8(v0, 0xAA, 0xAA); VCLEAR(v8); - asm volatile("vfncvt.rod.f.f.w v8, v4, v0.t"); + asm volatile("vfncvt.rod.f.f.w v8, v16, v0.t"); // 0.000, -6788.000, 0.000, 8056.000, 0.000, 9600.000, 0.000, // 3094.000, 0.000, -6852.000, 0.000, 7736.000, 0.000, 5268.000, // 0.000, 5680.000 @@ -755,7 +755,7 @@ void TEST_CASE16(void) { // 7538298.328, -8731739.480, 537176.622, -3884944.157, // 7612336.042, -2270131.404, -4976406.726, -5260237.163, // -4947737.810, 3583352.355, 7648790.331, -9360989.228 - VLOAD_64(v4, 0x4102babe20435c2f, 0xc15dbd4abd4015a9, 0xc15960e30c871450, + VLOAD_64(v16, 0x4102babe20435c2f, 0xc15dbd4abd4015a9, 0xc15960e30c871450, 0x41300472c1efbd9f, 0x415cc19e94ffb79b, 0xc160a78b6f5bcd25, 0x412064b13e931aa9, 0xc14da3c81425b914, 0x415d09ec02a8cc93, 0xc14151d9b3c1ecaf, 0xc152fbc5ae718384, 0xc15410f34a6ddb48, @@ -763,7 +763,7 @@ void TEST_CASE16(void) { 0xc161dacba74d791e); VLOAD_8(v0, 0xAA, 0xAA); VCLEAR(v8); - asm volatile("vfncvt.rod.f.f.w v8, v4, v0.t"); + asm volatile("vfncvt.rod.f.f.w v8, v16, v0.t"); // 0.000, -7796011.000, 0.000, 1049714.750, 0.000, -8731739.000, // 0.000, -3884944.250, 0.000, -2270131.500, 0.000, -5260237.000, // 0.000, 3583352.250, 0.000, -9360989.000 diff --git a/sw/riscvTests/isa/rv64uv/vfnmacc.c b/sw/riscvTests/isa/rv64uv/vfnmacc.c index b8a3d3f8..9d0e0b94 100644 --- a/sw/riscvTests/isa/rv64uv/vfnmacc.c +++ b/sw/riscvTests/isa/rv64uv/vfnmacc.c @@ -10,36 +10,36 @@ // Simple random test with similar values + 1 subnormal void TEST_CASE1(void) { - VSET(16, e16, m2); + VSET(16, e16, m8); // -0.6377, -0.2332, 0.9458, -0.2612, -0.6772, 0.4543, 0.1002, // 0.7764, 0.7979, -0.8599, 0.7837, -0.2461, 0.4221, 0.2251, // 0.7739, 0.1461 - VLOAD_16(v4, 0xb91a, 0xb376, 0x3b91, 0xb42e, 0xb96b, 0x3745, 0x2e69, 0x3a36, + VLOAD_16(v16, 0xb91a, 0xb376, 0x3b91, 0xb42e, 0xb96b, 0x3745, 0x2e69, 0x3a36, 0x3a62, 0xbae1, 0x3a45, 0xb3e0, 0x36c1, 0x3334, 0x3a31, 0x30ad); // 0.9551, -0.6787, 0.5605, -0.7305, -0.7197, -0.1581, 0.7271, // 0.6113, 0.2971, -0.8062, 0.9668, -0.5278, 0.3972, -0.1084, // -0.3015, 0.9556 - VLOAD_16(v6, 0x3ba4, 0xb96e, 0x387c, 0xb9d8, 0xb9c2, 0xb10f, 0x39d1, 0x38e4, + VLOAD_16(v24, 0x3ba4, 0xb96e, 0x387c, 0xb9d8, 0xb9c2, 0xb10f, 0x39d1, 0x38e4, 0x34c1, 0xba73, 0x3bbc, 0xb839, 0x365b, 0xaef0, 0xb4d3, 0x3ba5); // 0.7402, 0.0935, 0.1455, -0.2771, 0.3347, 0.7964, 0.6543, // -0.7534, 0.2476, 0.0338, 0.9980, 0.3284, 0.2239, // -0.4551, 0.6694, -0.8550 - VLOAD_16(v2, 0x39ec, 0x2dfc, 0x30a8, 0xb46f, 0x355b, 0x3a5f, 0x393c, 0xba07, + VLOAD_16(v8, 0x39ec, 0x2dfc, 0x30a8, 0xb46f, 0x355b, 0x3a5f, 0x393c, 0xba07, 0x33ec, 0x2853, 0x3bfc, 0x3541, 0x332a, 0xb748, 0x395b, 0xbad7); - asm volatile("vfnmacc.vv v2, v4, v6"); + asm volatile("vfnmacc.vv v8, v16, v24"); // -0.1313, -0.2517, -0.6758, 0.0863, -0.8223, -0.7246, -0.7271, // 0.2788, -0.4846, -0.7271, -1.7559, -0.4583, -0.3916, 0.4795, // -0.4360, 0.7153 - VCMP_U16(1, v2, 0xb033, 0xb407, 0xb968, 0x2d86, 0xba94, 0xb9cc, 0xb9d1, + VCMP_U16(1, v8, 0xb033, 0xb407, 0xb968, 0x2d86, 0xba94, 0xb9cc, 0xb9d1, 0x3476, 0xb7c1, 0xb9d1, 0xbf06, 0xb755, 0xb644, 0x37ac, 0xb6fa, 0x39b9); - VSET(16, e32, m2); + VSET(16, e32, m8); // -0.17374928, -0.36242354, -0.18093164, 0.94970566, // -0.45790458, -0.17780401, -0.51985794, -0.04832974, // 0.13252106, 0.77533042, 0.42536697, -0.72199643, // -0.25088808, 0.28798762, 0.66300607, -0.63549894 - VLOAD_32(v4, 0xbe31eb55, 0xbeb98f94, 0xbe394625, 0x3f731fe9, 0xbeea7278, + VLOAD_32(v16, 0xbe31eb55, 0xbeb98f94, 0xbe394625, 0x3f731fe9, 0xbeea7278, 0xbe361241, 0xbf051569, 0xbd45f569, 0x3e07b39a, 0x3f467c0e, 0x3ed9c9b3, 0xbf38d4c2, 0xbe807467, 0x3e93731d, 0x3f29bac4, 0xbf22b00f); @@ -47,7 +47,7 @@ void TEST_CASE1(void) { // 0.19637996, -0.88467985, 0.73412597, -0.98048240, 0.25438991, // -0.02058743, -0.00876777, 0.21936898, -0.71130067, // -0.29675287, -0.96093589, 0.24695934 - VLOAD_32(v6, 0xbf1cc7ee, 0x3f36e29b, 0xbe2014df, 0x3ddf9905, 0x3e4917d4, + VLOAD_32(v24, 0xbf1cc7ee, 0x3f36e29b, 0xbe2014df, 0x3ddf9905, 0x3e4917d4, 0xbf627a61, 0x3f3befae, 0xbf7b00e5, 0x3e823f65, 0xbca8a6f9, 0xbc0fa6af, 0x3e60a243, 0xbf3617cd, 0xbe97effe, 0xbf75ffe5, 0x3e7ce2e9); @@ -55,29 +55,29 @@ void TEST_CASE1(void) { // 0.45613721, -0.90825689, 0.21235447, 0.35766414, // 0.08650716, -0.98431164, 0.21029140, -0.92919809, // 0.46440944, 0.70648551, -0.80876821, -0.19595607 - VLOAD_32(v2, 0x3f46a83c, 0x3cd04eb8, 0xbf22dcea, 0x3defb680, 0x3ee98ad1, + VLOAD_32(v8, 0x3f46a83c, 0x3cd04eb8, 0xbf22dcea, 0x3defb680, 0x3ee98ad1, 0xbf688386, 0x3e597373, 0x3eb71fc1, 0x3db12aaa, 0xbf7bfbd9, 0x3e5756a1, 0xbf6ddfed, 0x3eedc713, 0x3f34dc3c, 0xbf4f0b6f, 0xbe48a8b5); - asm volatile("vfnmacc.vv v2, v4, v6"); + asm volatile("vfnmacc.vv v8, v16, v24"); // -0.88241309, 0.23348548, 0.60789841, -0.22073483, // -0.36621392, 0.75095725, 0.16928674, -0.40505061, // -0.12021918, 1.00027370, -0.20656188, 1.08758175, // -0.64286631, -0.62102437, 1.44587445, 0.35289848 - VCMP_U32(2, v2, 0xbf61e5d3, 0x3e6f16d2, 0x3f1b9f3b, 0xbe62084f, 0xbebb8064, + VCMP_U32(2, v8, 0xbf61e5d3, 0x3e6f16d2, 0x3f1b9f3b, 0xbe62084f, 0xbebb8064, 0x3f403ebc, 0x3e2d5982, 0xbecf62cb, 0xbdf63579, 0x3f8008f8, 0xbe5384f5, 0x3f8b35e1, 0xbf2492e3, 0xbf1efb74, 0x3fb9126b, 0x3eb4af1c); #if ELEN == 64 - VSET(16, e64, m2); + VSET(16, e64, m8); // -0.3252450595073633, 0.4758165631309326, -0.1595578232245429, // -0.5062008461482019, -0.8497827573746595, -0.1941654045426651, // 0.5653121187716577, -0.9852357785633095, -0.4238236947700038, // 0.5852522737985073, 0.4009389814391957, -0.8725649196362917, // -0.5946782335830663, 0.4175703122760628, -0.6355596052793091, // -0.3469340725892474 - VLOAD_64(v4, 0xbfd4d0d0a77142c0, 0x3fde73c75062b7e8, 0xbfc46c6408490198, + VLOAD_64(v16, 0xbfd4d0d0a77142c0, 0x3fde73c75062b7e8, 0xbfc46c6408490198, 0xbfe032cc1ded3ff0, 0xbfeb316b9bf41faa, 0xbfc8da6977433ee0, 0x3fe2170970c503fe, 0xbfef870d2ef8e992, 0xbfdb1fed6b13a6c0, 0x3fe2ba62f9fbf9aa, 0x3fd9a8fbf93e43f0, 0xbfebec0d442f3114, @@ -90,7 +90,7 @@ void TEST_CASE1(void) { // -0.5209348837668262, 0.1676058792979986, // -0.3611782231841894, 0.5839305722445856, // -0.5690013462620132, -0.7273345685963009 - VLOAD_64(v6, 0x3fece11b83abb9b8, 0x3fc666d29fd34b08, 0x3fe0197c6cafd8c4, + VLOAD_64(v24, 0x3fece11b83abb9b8, 0x3fc666d29fd34b08, 0x3fe0197c6cafd8c4, 0xbfcd7b88c1b4daf0, 0xbfd8d61841f43c54, 0xbfee36420fbd9482, 0xbfef195a7bef10b4, 0x3fe57beccc59d47e, 0xbfb7593394338500, 0xbfe11c8c0e185e4a, 0xbfe0ab7fa223f876, 0x3fc5741c0519e298, @@ -102,20 +102,20 @@ void TEST_CASE1(void) { // -0.7128277097157256, -0.8385947434294139, 0.8834902787005550, // 0.5936682304042178, 0.1532178226844403, -0.5096194622607613, // -0.8578075287458693 - VLOAD_64(v2, 0xbfb3b16484d96110, 0xbfeb07fadbff7462, 0xbfc87ec5fcb06230, + VLOAD_64(v8, 0xbfb3b16484d96110, 0xbfeb07fadbff7462, 0xbfc87ec5fcb06230, 0x3fe885d78705c2d4, 0x3fca59411dac8758, 0xbf99c23b11679a80, 0x3fe0e17f24429b70, 0xbfe8542d9e4907ce, 0x3fe4cb329a1542de, 0xbfe6cf7c0e9d2c04, 0xbfead5c4a4b40f1a, 0x3fec458d67ab4a36, 0x3fe2ff5484485472, 0x3fc39ca440cc0820, 0xbfe04ecd797a151a, 0xbfeb7328c6473c1e); - asm volatile("vfnmacc.vv v2, v4, v6"); + asm volatile("vfnmacc.vv v8, v16, v24"); // 0.3704523636601942, 0.7614500740339213, 0.2716441267930978, // -0.8829326116147059, -0.5356217329860959, // -0.1581610880357251, 0.0218692556270895, 1.4217408543890222, // -0.6884591815896014, 1.0257824393236514, 1.0474578451230310, // -0.7372432681003268, -0.8084530581760619, // -0.3970498940841519, 0.1479851912270807, 0.6054703847278113 - VCMP_U64(3, v2, 0x3fd7b57dd4a95f28, 0x3fe85dcc8bb06629, 0x3fd1629e0c2e846c, + VCMP_U64(3, v8, 0x3fd7b57dd4a95f28, 0x3fe85dcc8bb06629, 0x3fd1629e0c2e846c, 0xbfec40fbe46ea001, 0xbfe123d0304677d2, 0xbfc43e9f5e4e7ddd, 0x3f9664e4e6d32991, 0x3ff6bf73568fcea3, 0xbfe607db8cb1dd4b, 0x3ff0699ad8db4c8b, 0x3ff0c263284bdf71, 0xbfe7977f31b5fc6d, @@ -127,37 +127,37 @@ void TEST_CASE1(void) { // Simple random test with similar values + 1 subnormal (masked) // The numbers are the same of TEST_CASE1 void TEST_CASE2(void) { - VSET(16, e16, m2); + VSET(16, e16, m8); // -0.6377, -0.2332, 0.9458, -0.2612, -0.6772, 0.4543, 0.1002, // 0.7764, 0.7979, -0.8599, 0.7837, -0.2461, 0.4221, 0.2251, // 0.7739, 0.1461 - VLOAD_16(v4, 0xb91a, 0xb376, 0x3b91, 0xb42e, 0xb96b, 0x3745, 0x2e69, 0x3a36, + VLOAD_16(v16, 0xb91a, 0xb376, 0x3b91, 0xb42e, 0xb96b, 0x3745, 0x2e69, 0x3a36, 0x3a62, 0xbae1, 0x3a45, 0xb3e0, 0x36c1, 0x3334, 0x3a31, 0x30ad); // 0.9551, -0.6787, 0.5605, -0.7305, -0.7197, -0.1581, 0.7271, // 0.6113, 0.2971, -0.8062, 0.9668, -0.5278, 0.3972, -0.1084, // -0.3015, 0.9556 - VLOAD_16(v6, 0x3ba4, 0xb96e, 0x387c, 0xb9d8, 0xb9c2, 0xb10f, 0x39d1, 0x38e4, + VLOAD_16(v24, 0x3ba4, 0xb96e, 0x387c, 0xb9d8, 0xb9c2, 0xb10f, 0x39d1, 0x38e4, 0x34c1, 0xba73, 0x3bbc, 0xb839, 0x365b, 0xaef0, 0xb4d3, 0x3ba5); VLOAD_8(v0, 0xAA, 0xAA); // 0.7402, 0.0935, 0.1455, -0.2771, 0.3347, 0.7964, 0.6543, // -0.7534, 0.2476, 0.0338, 0.9980, 0.3284, 0.2239, // -0.4551, 0.6694, -0.8550 - VLOAD_16(v2, 0x39ec, 0x2dfc, 0x30a8, 0xb46f, 0x355b, 0x3a5f, 0x393c, 0xba07, + VLOAD_16(v8, 0x39ec, 0x2dfc, 0x30a8, 0xb46f, 0x355b, 0x3a5f, 0x393c, 0xba07, 0x33ec, 0x2853, 0x3bfc, 0x3541, 0x332a, 0xb748, 0x395b, 0xbad7); - asm volatile("vfnmacc.vv v2, v4, v6, v0.t"); + asm volatile("vfnmacc.vv v8, v16, v24, v0.t"); // 0.0000, -0.2517, 0.0000, 0.0863, 0.0000, -0.7246, 0.0000, // 0.2788, 0.0000, -0.7271, 0.0000, -0.4583, 0.0000, 0.4795, // 0.0000, 0.7153 - VCMP_U16(4, v2, 0x39ec, 0xb407, 0x30a8, 0x2d86, 0x355b, 0xb9cc, 0x393c, + VCMP_U16(4, v8, 0x39ec, 0xb407, 0x30a8, 0x2d86, 0x355b, 0xb9cc, 0x393c, 0x3476, 0x33ec, 0xb9d1, 0x3bfc, 0xb755, 0x332a, 0x37ac, 0x395b, 0x39b9); - VSET(16, e32, m2); + VSET(16, e32, m8); // -0.17374928, -0.36242354, -0.18093164, 0.94970566, // -0.45790458, -0.17780401, -0.51985794, -0.04832974, // 0.13252106, 0.77533042, 0.42536697, -0.72199643, // -0.25088808, 0.28798762, 0.66300607, -0.63549894 - VLOAD_32(v4, 0xbe31eb55, 0xbeb98f94, 0xbe394625, 0x3f731fe9, 0xbeea7278, + VLOAD_32(v16, 0xbe31eb55, 0xbeb98f94, 0xbe394625, 0x3f731fe9, 0xbeea7278, 0xbe361241, 0xbf051569, 0xbd45f569, 0x3e07b39a, 0x3f467c0e, 0x3ed9c9b3, 0xbf38d4c2, 0xbe807467, 0x3e93731d, 0x3f29bac4, 0xbf22b00f); @@ -165,7 +165,7 @@ void TEST_CASE2(void) { // 0.19637996, -0.88467985, 0.73412597, -0.98048240, 0.25438991, // -0.02058743, -0.00876777, 0.21936898, -0.71130067, // -0.29675287, -0.96093589, 0.24695934 - VLOAD_32(v6, 0xbf1cc7ee, 0x3f36e29b, 0xbe2014df, 0x3ddf9905, 0x3e4917d4, + VLOAD_32(v24, 0xbf1cc7ee, 0x3f36e29b, 0xbe2014df, 0x3ddf9905, 0x3e4917d4, 0xbf627a61, 0x3f3befae, 0xbf7b00e5, 0x3e823f65, 0xbca8a6f9, 0xbc0fa6af, 0x3e60a243, 0xbf3617cd, 0xbe97effe, 0xbf75ffe5, 0x3e7ce2e9); @@ -174,29 +174,29 @@ void TEST_CASE2(void) { // 0.45613721, -0.90825689, 0.21235447, 0.35766414, // 0.08650716, -0.98431164, 0.21029140, -0.92919809, // 0.46440944, 0.70648551, -0.80876821, -0.19595607 - VLOAD_32(v2, 0x3f46a83c, 0x3cd04eb8, 0xbf22dcea, 0x3defb680, 0x3ee98ad1, + VLOAD_32(v8, 0x3f46a83c, 0x3cd04eb8, 0xbf22dcea, 0x3defb680, 0x3ee98ad1, 0xbf688386, 0x3e597373, 0x3eb71fc1, 0x3db12aaa, 0xbf7bfbd9, 0x3e5756a1, 0xbf6ddfed, 0x3eedc713, 0x3f34dc3c, 0xbf4f0b6f, 0xbe48a8b5); - asm volatile("vfnmacc.vv v2, v4, v6, v0.t"); + asm volatile("vfnmacc.vv v8, v16, v24, v0.t"); // 0.00000000, 0.23348548, 0.00000000, -0.22073483, // 0.00000000, 0.75095725, 0.00000000, -0.40505061, // 0.00000000, 1.00027370, 0.00000000, 1.08758175, // 0.00000000, -0.62102437, 0.00000000, 0.35289848 - VCMP_U32(5, v2, 0x3f46a83c, 0x3e6f16d2, 0xbf22dcea, 0xbe62084f, 0x3ee98ad1, + VCMP_U32(5, v8, 0x3f46a83c, 0x3e6f16d2, 0xbf22dcea, 0xbe62084f, 0x3ee98ad1, 0x3f403ebc, 0x3e597373, 0xbecf62cb, 0x3db12aaa, 0x3f8008f8, 0x3e5756a1, 0x3f8b35e1, 0x3eedc713, 0xbf1efb74, 0xbf4f0b6f, 0x3eb4af1c); #if ELEN == 64 - VSET(16, e64, m2); + VSET(16, e64, m8); // -0.3252450595073633, 0.4758165631309326, -0.1595578232245429, // -0.5062008461482019, -0.8497827573746595, -0.1941654045426651, // 0.5653121187716577, -0.9852357785633095, -0.4238236947700038, // 0.5852522737985073, 0.4009389814391957, -0.8725649196362917, // -0.5946782335830663, 0.4175703122760628, -0.6355596052793091, // -0.3469340725892474 - VLOAD_64(v4, 0xbfd4d0d0a77142c0, 0x3fde73c75062b7e8, 0xbfc46c6408490198, + VLOAD_64(v16, 0xbfd4d0d0a77142c0, 0x3fde73c75062b7e8, 0xbfc46c6408490198, 0xbfe032cc1ded3ff0, 0xbfeb316b9bf41faa, 0xbfc8da6977433ee0, 0x3fe2170970c503fe, 0xbfef870d2ef8e992, 0xbfdb1fed6b13a6c0, 0x3fe2ba62f9fbf9aa, 0x3fd9a8fbf93e43f0, 0xbfebec0d442f3114, @@ -209,7 +209,7 @@ void TEST_CASE2(void) { // -0.5209348837668262, 0.1676058792979986, // -0.3611782231841894, 0.5839305722445856, // -0.5690013462620132, -0.7273345685963009 - VLOAD_64(v6, 0x3fece11b83abb9b8, 0x3fc666d29fd34b08, 0x3fe0197c6cafd8c4, + VLOAD_64(v24, 0x3fece11b83abb9b8, 0x3fc666d29fd34b08, 0x3fe0197c6cafd8c4, 0xbfcd7b88c1b4daf0, 0xbfd8d61841f43c54, 0xbfee36420fbd9482, 0xbfef195a7bef10b4, 0x3fe57beccc59d47e, 0xbfb7593394338500, 0xbfe11c8c0e185e4a, 0xbfe0ab7fa223f876, 0x3fc5741c0519e298, @@ -222,20 +222,20 @@ void TEST_CASE2(void) { // -0.7128277097157256, -0.8385947434294139, 0.8834902787005550, // 0.5936682304042178, 0.1532178226844403, -0.5096194622607613, // -0.8578075287458693 - VLOAD_64(v2, 0xbfb3b16484d96110, 0xbfeb07fadbff7462, 0xbfc87ec5fcb06230, + VLOAD_64(v8, 0xbfb3b16484d96110, 0xbfeb07fadbff7462, 0xbfc87ec5fcb06230, 0x3fe885d78705c2d4, 0x3fca59411dac8758, 0xbf99c23b11679a80, 0x3fe0e17f24429b70, 0xbfe8542d9e4907ce, 0x3fe4cb329a1542de, 0xbfe6cf7c0e9d2c04, 0xbfead5c4a4b40f1a, 0x3fec458d67ab4a36, 0x3fe2ff5484485472, 0x3fc39ca440cc0820, 0xbfe04ecd797a151a, 0xbfeb7328c6473c1e); - asm volatile("vfnmacc.vv v2, v4, v6, v0.t"); + asm volatile("vfnmacc.vv v8, v16, v24, v0.t"); // 0.0000000000000000, 0.7614500740339213, 0.0000000000000000, // -0.8829326116147059, 0.0000000000000000, // -0.1581610880357251, 0.0000000000000000, 1.4217408543890222, // 0.0000000000000000, 1.0257824393236514, 0.0000000000000000, // -0.7372432681003268, 0.0000000000000000, // -0.3970498940841519, 0.0000000000000000, 0.6054703847278113 - VCMP_U64(6, v2, 0xbfb3b16484d96110, 0x3fe85dcc8bb06629, 0xbfc87ec5fcb06230, + VCMP_U64(6, v8, 0xbfb3b16484d96110, 0x3fe85dcc8bb06629, 0xbfc87ec5fcb06230, 0xbfec40fbe46ea001, 0x3fca59411dac8758, 0xbfc43e9f5e4e7ddd, 0x3fe0e17f24429b70, 0x3ff6bf73568fcea3, 0x3fe4cb329a1542de, 0x3ff0699ad8db4c8b, 0xbfead5c4a4b40f1a, 0xbfe7977f31b5fc6d, @@ -246,29 +246,29 @@ void TEST_CASE2(void) { // Simple random test with similar values (vector-scalar) void TEST_CASE3(void) { - VSET(16, e16, m2); + VSET(16, e16, m8); float fscalar_16; // 0.1300 BOX_HALF_IN_FLOAT(fscalar_16, 0x3029); // -0.2844, 0.4070, -0.1837, -0.2321, -0.5283, -0.6104, -0.7183, // -0.1191, 0.7998, 0.1169, 0.1169, -0.9214, -0.4360, -0.6250, // -0.5386, 0.6543 - VLOAD_16(v4, 0xb48d, 0x3683, 0xb1e1, 0xb36d, 0xb83a, 0xb8e2, 0xb9bf, 0xaf9f, + VLOAD_16(v16, 0xb48d, 0x3683, 0xb1e1, 0xb36d, 0xb83a, 0xb8e2, 0xb9bf, 0xaf9f, 0x3a66, 0x2f7c, 0x2f7c, 0xbb5f, 0xb6fa, 0xb900, 0xb84f, 0x393c); // 0.9268, -0.3337, -0.3225, -0.8306, -0.1857, -0.6831, 0.0557, // 0.5586, 0.2352, 0.6294, 0.6294, -0.8877, -0.2426, 0.5488, // 0.4001, 0.1772 - VLOAD_16(v2, 0x3b6a, 0xb557, 0xb529, 0xbaa5, 0xb1f1, 0xb977, 0x2b21, 0x3878, + VLOAD_16(v8, 0x3b6a, 0xb557, 0xb529, 0xbaa5, 0xb1f1, 0xb977, 0x2b21, 0x3878, 0x3387, 0x3909, 0x3909, 0xbb1a, 0xb3c3, 0x3864, 0x3667, 0x31ac); - asm volatile("vfnmacc.vf v2, %[A], v4" ::[A] "f"(fscalar_16)); + asm volatile("vfnmacc.vf v8, %[A], v16" ::[A] "f"(fscalar_16)); // -0.8896, 0.2808, 0.3464, 0.8608, 0.2544, 0.7627, 0.0377, // -0.5430, -0.3394, -0.6445, -0.6445, 1.0078, 0.2993, -0.4675, // -0.3301, -0.2622 - VCMP_U16(7, v2, 0xbb1e, 0x347e, 0x358b, 0x3ae3, 0x3412, 0x3a1a, 0x28d3, + VCMP_U16(7, v8, 0xbb1e, 0x347e, 0x358b, 0x3ae3, 0x3412, 0x3a1a, 0x28d3, 0xb858, 0xb56d, 0xb928, 0xb928, 0x3c08, 0x34ca, 0xb77b, 0xb548, 0xb432); - VSET(16, e32, m2); + VSET(16, e32, m8); float fscalar_32; // -0.26917368 BOX_FLOAT_IN_FLOAT(fscalar_32, 0xbe89d122); @@ -276,7 +276,7 @@ void TEST_CASE3(void) { // -0.49064314, -0.74352056, -0.17169137, 0.26071417, // 0.71857828, 0.07920383, -0.43244356, -0.58339220, 0.80679923, // 0.23900302, 0.73513943, -0.80685192 - VLOAD_32(v4, 0xbe8e0f00, 0xbf5cf35c, 0xbe2b7bf9, 0xbf2fce80, 0xbefb3594, + VLOAD_32(v16, 0xbe8e0f00, 0xbf5cf35c, 0xbe2b7bf9, 0xbf2fce80, 0xbefb3594, 0xbf3e575d, 0xbe2fcfdd, 0x3e857c54, 0x3f37f4bf, 0x3da2359e, 0xbedd693e, 0xbf155931, 0x3f4e8a65, 0x3e74bd35, 0x3f3c3219, 0xbf4e8dd9); @@ -284,22 +284,22 @@ void TEST_CASE3(void) { // 0.63772237, -0.87242430, -0.44194883, -0.41286576, // -0.57735479, 0.61664599, 0.94073379, -0.89744234, // -0.70681161, 0.23247144, 0.06774496, -0.38581881 - VLOAD_32(v2, 0x3e0a5676, 0xbe955998, 0x3f4e10c4, 0xbf22e93c, 0x3f2341c6, + VLOAD_32(v8, 0x3e0a5676, 0xbe955998, 0x3f4e10c4, 0xbf22e93c, 0x3f2341c6, 0xbf5f5733, 0xbee2471e, 0xbed36324, 0xbf13cd86, 0x3f1ddc83, 0x3f70d3ee, 0xbf65bec8, 0xbf34f19b, 0x3e6e0cfe, 0x3d8abdde, 0xbec58a0b); - asm volatile("vfnmacc.vf v2, %[A], v4" ::[A] "f"(fscalar_32)); + asm volatile("vfnmacc.vf v8, %[A], v16" ::[A] "f"(fscalar_32)); // -0.20977989, 0.05937849, -0.85002053, 0.45151776, // -0.76979059, 0.67228812, 0.39573404, 0.48304313, // 0.77077717, -0.59532642, -1.05713618, 0.74040854, 0.92398071, // -0.16813812, 0.13013524, 0.16863550 - VCMP_U32(8, v2, 0xbe56d08a, 0x3d7336de, 0xbf599af2, 0x3ee72d57, 0xbf4510ff, + VCMP_U32(8, v8, 0xbe56d08a, 0x3d7336de, 0xbf599af2, 0x3ee72d57, 0xbf4510ff, 0x3f2c1b13, 0x3eca9da7, 0x3ef7516f, 0x3f4551a7, 0xbf186750, 0xbf87503d, 0x3f3d8b6a, 0x3f6c8a00, 0xbe2c2c66, 0x3e05422c, 0x3e2caec9); #if ELEN == 64 - VSET(16, e64, m2); + VSET(16, e64, m8); double dscalar_64; // 0.1021836258281641 BOX_DOUBLE_IN_DOUBLE(dscalar_64, 0x3fba28b4c31e60e0); @@ -309,7 +309,7 @@ void TEST_CASE3(void) { // 0.9355143976513562, -0.4219868517017851, 0.8950700270161456, // 0.6727820676214205, -0.8833440526985297, 0.0357808590148252, // -0.3802125831332157, 0.9831607630398518 - VLOAD_64(v4, 0x3fd4f4406b993a2c, 0x3fed9d97ae0b1cd6, 0x3feefcd01012c05e, + VLOAD_64(v16, 0x3fd4f4406b993a2c, 0x3fed9d97ae0b1cd6, 0x3feefcd01012c05e, 0xbfe5b09210bc082e, 0x3feef1596c459614, 0x3fe9af1e3ee3adfa, 0xbfe7a4ec2374d0e2, 0x3fe7c8a2209c110c, 0x3fedefbbe3db30dc, 0xbfdb01d523d9acc0, 0x3feca469e5b540fa, 0x3fe5876e42389dca, @@ -322,20 +322,20 @@ void TEST_CASE3(void) { // -0.3443302881655670, 0.3680926220616383, // -0.2344410843781140, 0.3553553454507421, // 0.0951222110617760, -0.8329780449088213 - VLOAD_64(v2, 0x3fc56654e2cbd888, 0x3fe40bb445915f4a, 0x3fc4514877d696a0, + VLOAD_64(v8, 0x3fc56654e2cbd888, 0x3fe40bb445915f4a, 0x3fc4514877d696a0, 0xbfd56df357d00344, 0x3fde371a20d41408, 0x3fd26b70e63cabf0, 0x3fe044a59c60fcd4, 0x3fee3905d92cc95e, 0xbfd1f50bba2f6e40, 0xbfe719aff62247a4, 0xbfd60981e7ac601c, 0x3fd78ed45b69d4fc, 0xbfce022a5b1f1348, 0x3fd6be2458cadcb0, 0x3fb859ede1a22f80, 0xbfeaa7c192a56bc8); - asm volatile("vfnmacc.vf v2, %[A], v4" ::[A] "f"(dscalar_64)); + asm volatile("vfnmacc.vf v8, %[A], v16" ::[A] "f"(dscalar_64)); // -0.2006411422994659, -0.7209983883869466, -0.2576811937222847, // 0.4040963223414386, -0.5709208038927434, -0.3698229881701340, // -0.4328780954683192, -1.0204078946581041, 0.1849871561177042, // 0.7650058093340112, 0.2528687874349445, -0.4368399331233641, // 0.3247043825365946, -0.3590115633601233, -0.0562707107317317, // 0.7325151133694248 - VCMP_U64(9, v2, 0xbfc9ae9be43442c9, 0xbfe7126b3652e68a, 0xbfd07dd942f53687, + VCMP_U64(9, v8, 0xbfc9ae9be43442c9, 0xbfe7126b3652e68a, 0xbfd07dd942f53687, 0x3fd9dcb6d238fb8a, 0xbfe244fbb4aa695e, 0xbfd7ab2e09dffb6d, 0xbfdbb44653cc3c92, 0xbff053973a823036, 0x3fc7ada8bcda50a5, 0x3fe87aed768addeb, 0x3fd02f00910d95b4, 0xbfdbf52f7a9681dc, @@ -346,30 +346,30 @@ void TEST_CASE3(void) { // Simple random test with similar values (vector-scalar) (masked) void TEST_CASE4(void) { - VSET(16, e16, m2); + VSET(16, e16, m8); float fscalar_16; // 0.1300 BOX_HALF_IN_FLOAT(fscalar_16, 0x3029); // -0.2844, 0.4070, -0.1837, -0.2321, -0.5283, -0.6104, // -0.7183, -0.1191, 0.7998, 0.1169, 0.2551, -0.9214, // -0.4360, -0.6250, -0.5386, 0.6543 - VLOAD_16(v4, 0xb48d, 0x3683, 0xb1e1, 0xb36d, 0xb83a, 0xb8e2, 0xb9bf, 0xaf9f, + VLOAD_16(v16, 0xb48d, 0x3683, 0xb1e1, 0xb36d, 0xb83a, 0xb8e2, 0xb9bf, 0xaf9f, 0x3a66, 0x2f7c, 0x3415, 0xbb5f, 0xb6fa, 0xb900, 0xb84f, 0x393c); VLOAD_8(v0, 0xAA, 0xAA); // 0.9268, -0.3337, -0.3225, -0.8306, -0.1857, -0.6831, 0.0557, // 0.5586, 0.2352, 0.6294, -0.0325, -0.8877, -0.2426, 0.5488, // 0.4001, 0.1772 - VLOAD_16(v2, 0x3b6a, 0xb557, 0xb529, 0xbaa5, 0xb1f1, 0xb977, 0x2b21, 0x3878, + VLOAD_16(v8, 0x3b6a, 0xb557, 0xb529, 0xbaa5, 0xb1f1, 0xb977, 0x2b21, 0x3878, 0x3387, 0x3909, 0xa828, 0xbb1a, 0xb3c3, 0x3864, 0x3667, 0x31ac); - asm volatile("vfnmacc.vf v2, %[A], v4, v0.t" ::[A] "f"(fscalar_16)); + asm volatile("vfnmacc.vf v8, %[A], v16, v0.t" ::[A] "f"(fscalar_16)); // 0.0000, 0.2808, 0.0000, 0.8608, 0.0000, 0.7627, 0.0000, // -0.5430, 0.0000, -0.6445, 0.0000, 1.0078, 0.0000, // -0.4675, 0.0000, -0.2622 - VCMP_U16(10, v2, 0x3b6a, 0x347e, 0xb529, 0x3ae3, 0xb1f1, 0x3a1a, 0x2b21, + VCMP_U16(10, v8, 0x3b6a, 0x347e, 0xb529, 0x3ae3, 0xb1f1, 0x3a1a, 0x2b21, 0xb858, 0x3387, 0xb928, 0xa828, 0x3c08, 0xb3c3, 0xb77b, 0x3667, 0xb432); - VSET(16, e32, m2); + VSET(16, e32, m8); float fscalar_32; // -0.26917368 BOX_FLOAT_IN_FLOAT(fscalar_32, 0xbe89d122); @@ -377,7 +377,7 @@ void TEST_CASE4(void) { // -0.49064314, -0.74352056, -0.17169137, 0.26071417, // 0.71857828, 0.07920383, -0.43244356, -0.58339220, // 0.80679923, 0.23900302, 0.73513943, -0.80685192 - VLOAD_32(v4, 0xbe8e0f00, 0xbf5cf35c, 0xbe2b7bf9, 0xbf2fce80, 0xbefb3594, + VLOAD_32(v16, 0xbe8e0f00, 0xbf5cf35c, 0xbe2b7bf9, 0xbf2fce80, 0xbefb3594, 0xbf3e575d, 0xbe2fcfdd, 0x3e857c54, 0x3f37f4bf, 0x3da2359e, 0xbedd693e, 0xbf155931, 0x3f4e8a65, 0x3e74bd35, 0x3f3c3219, 0xbf4e8dd9); @@ -386,22 +386,22 @@ void TEST_CASE4(void) { // 0.63772237, -0.87242430, -0.44194883, -0.41286576, // -0.57735479, 0.61664599, 0.94073379, -0.89744234, // -0.70681161, 0.23247144, 0.06774496, -0.38581881 - VLOAD_32(v2, 0x3e0a5676, 0xbe955998, 0x3f4e10c4, 0xbf22e93c, 0x3f2341c6, + VLOAD_32(v8, 0x3e0a5676, 0xbe955998, 0x3f4e10c4, 0xbf22e93c, 0x3f2341c6, 0xbf5f5733, 0xbee2471e, 0xbed36324, 0xbf13cd86, 0x3f1ddc83, 0x3f70d3ee, 0xbf65bec8, 0xbf34f19b, 0x3e6e0cfe, 0x3d8abdde, 0xbec58a0b); - asm volatile("vfnmacc.vf v2, %[A], v4, v0.t" ::[A] "f"(fscalar_32)); + asm volatile("vfnmacc.vf v8, %[A], v16, v0.t" ::[A] "f"(fscalar_32)); // 0.00000000, 0.05937849, 0.00000000, 0.45151776, // 0.00000000, 0.67228812, 0.00000000, 0.48304313, // 0.00000000, -0.59532642, 0.00000000, 0.74040854, // 0.00000000, -0.16813812, 0.00000000, 0.16863550 - VCMP_U32(11, v2, 0x3e0a5676, 0x3d7336de, 0x3f4e10c4, 0x3ee72d57, 0x3f2341c6, + VCMP_U32(11, v8, 0x3e0a5676, 0x3d7336de, 0x3f4e10c4, 0x3ee72d57, 0x3f2341c6, 0x3f2c1b13, 0xbee2471e, 0x3ef7516f, 0xbf13cd86, 0xbf186750, 0x3f70d3ee, 0x3f3d8b6a, 0xbf34f19b, 0xbe2c2c66, 0x3d8abdde, 0x3e2caec9); #if ELEN == 64 - VSET(16, e64, m2); + VSET(16, e64, m8); double dscalar_64; // 0.1021836258281641 BOX_DOUBLE_IN_DOUBLE(dscalar_64, 0x3fba28b4c31e60e0); @@ -413,7 +413,7 @@ void TEST_CASE4(void) { // 0.8950700270161456, 0.6727820676214205, // -0.8833440526985297, 0.0357808590148252, // -0.3802125831332157, 0.9831607630398518 - VLOAD_64(v4, 0x3fd4f4406b993a2c, 0x3fed9d97ae0b1cd6, 0x3feefcd01012c05e, + VLOAD_64(v16, 0x3fd4f4406b993a2c, 0x3fed9d97ae0b1cd6, 0x3feefcd01012c05e, 0xbfe5b09210bc082e, 0x3feef1596c459614, 0x3fe9af1e3ee3adfa, 0xbfe7a4ec2374d0e2, 0x3fe7c8a2209c110c, 0x3fedefbbe3db30dc, 0xbfdb01d523d9acc0, 0x3feca469e5b540fa, 0x3fe5876e42389dca, @@ -428,13 +428,13 @@ void TEST_CASE4(void) { // -0.3443302881655670, 0.3680926220616383, // -0.2344410843781140, 0.3553553454507421, // 0.0951222110617760, -0.8329780449088213 - VLOAD_64(v2, 0x3fc56654e2cbd888, 0x3fe40bb445915f4a, 0x3fc4514877d696a0, + VLOAD_64(v8, 0x3fc56654e2cbd888, 0x3fe40bb445915f4a, 0x3fc4514877d696a0, 0xbfd56df357d00344, 0x3fde371a20d41408, 0x3fd26b70e63cabf0, 0x3fe044a59c60fcd4, 0x3fee3905d92cc95e, 0xbfd1f50bba2f6e40, 0xbfe719aff62247a4, 0xbfd60981e7ac601c, 0x3fd78ed45b69d4fc, 0xbfce022a5b1f1348, 0x3fd6be2458cadcb0, 0x3fb859ede1a22f80, 0xbfeaa7c192a56bc8); - asm volatile("vfnmacc.vf v2, %[A], v4, v0.t" ::[A] "f"(dscalar_64)); + asm volatile("vfnmacc.vf v8, %[A], v16, v0.t" ::[A] "f"(dscalar_64)); // -0.2006411422994659, -0.7209983883869466, // -0.2576811937222847, 0.4040963223414386, // -0.5709208038927434, -0.3698229881701340, @@ -442,7 +442,7 @@ void TEST_CASE4(void) { // 0.1849871561177042, 0.7650058093340112, 0.2528687874349445, // -0.4368399331233641, 0.3247043825365946, // -0.3590115633601233, -0.0562707107317317, 0.7325151133694248 - VCMP_U64(12, v2, 0x3fc56654e2cbd888, 0xbfe7126b3652e68a, 0x3fc4514877d696a0, + VCMP_U64(12, v8, 0x3fc56654e2cbd888, 0xbfe7126b3652e68a, 0x3fc4514877d696a0, 0x3fd9dcb6d238fb8a, 0x3fde371a20d41408, 0xbfd7ab2e09dffb6d, 0x3fe044a59c60fcd4, 0xbff053973a823036, 0xbfd1f50bba2f6e40, 0x3fe87aed768addeb, 0xbfd60981e7ac601c, 0xbfdbf52f7a9681dc, diff --git a/sw/riscvTests/isa/rv64uv/vfnmadd.c b/sw/riscvTests/isa/rv64uv/vfnmadd.c index 8203324b..9bc23a86 100644 --- a/sw/riscvTests/isa/rv64uv/vfnmadd.c +++ b/sw/riscvTests/isa/rv64uv/vfnmadd.c @@ -10,36 +10,36 @@ // Simple random test with similar values void TEST_CASE1(void) { - VSET(16, e16, m2); + VSET(16, e16, m8); // -0.7461, 0.0514, -0.3071, -0.4934, -0.5220, -0.7983, 0.4907, // -0.9028, 0.1752, 0.0676, 0.1040, 0.4526, 0.3525, -0.2686, // 0.3540, -0.0847 - VLOAD_16(v4, 0xb9f8, 0x2a94, 0xb4ea, 0xb7e5, 0xb82d, 0xba63, 0x37da, 0xbb39, + VLOAD_16(v16, 0xb9f8, 0x2a94, 0xb4ea, 0xb7e5, 0xb82d, 0xba63, 0x37da, 0xbb39, 0x319b, 0x2c54, 0x2ea8, 0x373e, 0x35a4, 0xb44c, 0x35aa, 0xad6c); // 0.1573, -0.7700, 0.0804, -0.9438, 0.0790, 0.7998, -0.2854, // 0.1963, -0.0687, -0.2123, 0.3625, -0.0002, 0.7168, -0.4033, // 0.2812, -0.3159 - VLOAD_16(v6, 0x3109, 0xba29, 0x2d25, 0xbb8d, 0x2d0e, 0x3a66, 0xb491, 0x3248, + VLOAD_16(v24, 0x3109, 0xba29, 0x2d25, 0xbb8d, 0x2d0e, 0x3a66, 0xb491, 0x3248, 0xac65, 0xb2cb, 0x35cd, 0x897c, 0x39bc, 0xb674, 0x3480, 0xb50e); // 0.0337, 0.2034, -0.1886, 0.8242, 0.3225, 0.0331, 0.0698, // 0.6777, -0.2539, -0.5825, -0.4319, -0.6323, 0.0674, -0.2903, // -0.8145, 0.1893 - VLOAD_16(v2, 0x284f, 0x3282, 0xb209, 0x3a98, 0x3529, 0x283b, 0x2c77, 0x396c, + VLOAD_16(v8, 0x284f, 0x3282, 0xb209, 0x3a98, 0x3529, 0x283b, 0x2c77, 0x396c, 0xb410, 0xb8a9, 0xb6e9, 0xb90f, 0x2c50, 0xb4a5, 0xba84, 0x320f); - asm volatile("vfnmadd.vv v2, v4, v6"); + asm volatile("vfnmadd.vv v8, v16, v24"); // -0.1322, 0.7598, -0.1383, 1.3506, 0.0894, -0.7734, 0.2512, // 0.4155, 0.1132, 0.2517, -0.3176, 0.2864, -0.7407, 0.3254, // 0.0071, 0.3320 - VCMP_U16(1, v2, 0xb03b, 0x3a14, 0xb06d, 0x3d67, 0x2db8, 0xba30, 0x3405, + VCMP_U16(1, v8, 0xb03b, 0x3a14, 0xb06d, 0x3d67, 0x2db8, 0xba30, 0x3405, 0x36a6, 0x2f3e, 0x3407, 0xb515, 0x3495, 0xb9ed, 0x3535, 0x1f3d, 0x3550); - VSET(16, e32, m2); + VSET(16, e32, m8); // -0.36820358, 0.10496315, -0.32905263, -0.92334682, // 0.43153936, 0.92736709, -0.59600371, 0.75117606, 0.84123290, // 0.33028743, -0.43412161, 0.95273590, 0.06816643, // -0.88978988, 0.18573478, 0.61926919 - VLOAD_32(v4, 0xbebc852e, 0x3dd6f6ec, 0xbea87996, 0xbf6c6075, 0x3edcf2ba, + VLOAD_32(v16, 0xbebc852e, 0x3dd6f6ec, 0xbea87996, 0xbf6c6075, 0x3edcf2ba, 0x3f6d67ee, 0xbf1893b3, 0x3f404d13, 0x3f575b0a, 0x3ea91b6f, 0xbede4530, 0x3f73e680, 0x3d8b9ad8, 0xbf63c945, 0x3e3e3142, 0x3f1e886d); @@ -47,7 +47,7 @@ void TEST_CASE1(void) { // -0.67489260, 0.23259214, -0.14038530, 0.09741956, // -0.23567833, 0.75417399, -0.90357685, -0.87489468, // 0.54726779, -0.06705534, -0.15476358, -0.96940458 - VLOAD_32(v6, 0x3f30dac8, 0xbea068b4, 0xbf0c4ffb, 0x3e801918, 0xbf2cc5c3, + VLOAD_32(v24, 0x3f30dac8, 0xbea068b4, 0xbf0c4ffb, 0x3e801918, 0xbf2cc5c3, 0x3e6e2ca2, 0xbe0fc12a, 0x3dc783e8, 0xbe7155a9, 0x3f41118c, 0xbf6750d0, 0xbf5ff919, 0x3f0c19be, 0xbd895450, 0xbe1e7a58, 0xbf782ae6); @@ -55,29 +55,29 @@ void TEST_CASE1(void) { // 0.63562357, -0.45508829, -0.22051410, 0.42499006, // -0.59229839, -0.50074077, -0.80474108, -0.20762257, // 0.15367362, 0.98349953, -0.15871963, -0.07445616 - VLOAD_32(v2, 0x3d187c0a, 0x3f4ed687, 0x3f0a0c76, 0xbf020eee, 0x3f22b83a, + VLOAD_32(v8, 0x3d187c0a, 0x3f4ed687, 0x3f0a0c76, 0xbf020eee, 0x3f22b83a, 0xbee90155, 0xbe61ce73, 0x3ed9984c, 0xbf17a0de, 0xbf00308c, 0xbf4e0383, 0xbe549b03, 0x3e1d5c9e, 0x3f7bc6a0, 0xbe228766, 0xbd987c79); - asm volatile("vfnmadd.vv v2, v4, v6"); + asm volatile("vfnmadd.vv v8, v16, v24"); // -0.67713100, 0.22849269, 0.72553790, -0.71928883, // 0.40059602, 0.18944177, 0.00895807, -0.41666192, 0.73393923, // -0.58878565, 0.55422139, 1.07270420, -0.55774319, // 0.94216329, 0.18424334, 1.01551294 - VCMP_U32(2, v2, 0xbf2d5875, 0x3e69f9fd, 0x3f39bcda, 0xbf382350, 0x3ecd1aec, + VCMP_U32(2, v8, 0xbf2d5875, 0x3e69f9fd, 0x3f39bcda, 0xbf382350, 0x3ecd1aec, 0x3e41fd06, 0x3c12c4e5, 0xbed554b6, 0x3f3be371, 0xbf16baa7, 0x3f0de173, 0x3f894e5f, 0xbf0ec842, 0x3f71319d, 0x3e3caa49, 0x3f81fc54); #if ELEN == 64 - VSET(16, e64, m2); + VSET(16, e64, m8); // -0.1517393950396491, -0.0976116299317518, 0.4195080955516000, // -0.8346165642452430, 0.0078216057137750, -0.5126918345148062, // -0.9302856586058497, -0.8971839537614414, 0.1317157676127678, // -0.3423297874984121, 0.7678405723111816, -0.6465198020108864, // 0.4795090517472360, -0.9006147069685106, 0.9841759200408695, // 0.8437352562659637 - VLOAD_64(v4, 0xbfc36c324d9ae520, 0xbfb8fd1366442100, 0x3fdad9387bb34990, + VLOAD_64(v16, 0xbfc36c324d9ae520, 0xbfb8fd1366442100, 0x3fdad9387bb34990, 0xbfeab52dcc044330, 0x3f8004c625f16600, 0xbfe067f8b4c55ad2, 0xbfedc4e66df4cc5a, 0xbfecb5bb1f7cd800, 0x3fc0dc0ff121d700, 0xbfd5e8bb327025d8, 0x3fe892266453ca54, 0xbfe4b04a4bbb4d06, @@ -89,7 +89,7 @@ void TEST_CASE1(void) { // -0.3398132406457823, -0.1436002174993440, // -0.7049093483038609, 0.0726450331160087, 0.3054536350672581, // -0.9906780567812383, 0.2659677084286980, -0.6111168392293305 - VLOAD_64(v6, 0x3fd90f742ba04f2c, 0x3fa7f9df8ab696e0, 0x3fddb3217157f678, + VLOAD_64(v24, 0x3fd90f742ba04f2c, 0x3fa7f9df8ab696e0, 0x3fddb3217157f678, 0x3fed70907d95274a, 0xbfe573086459defe, 0x3fe40064742efe82, 0xbfd7c8cd8353cefc, 0x3fcc0d064ea14910, 0xbfd5bf8008d49208, 0xbfc2617deeedd880, 0xbfe68e9e0cb3831e, 0x3fb298dd69733960, @@ -101,20 +101,20 @@ void TEST_CASE1(void) { // -0.8703598457154336, -0.2254866845234647, 0.7002825787534324, // 0.0892712008047818, 0.9241326299982451, 0.6615225744181676, // 0.7351775340550828, -0.4044996673659886 - VLOAD_64(v2, 0x3fec9518a458e4ea, 0x3fe0c25010978504, 0xbfd7c958b04a2e10, + VLOAD_64(v8, 0x3fec9518a458e4ea, 0x3fe0c25010978504, 0xbfd7c958b04a2e10, 0x3fec2e7ba402502e, 0x3fdb6d803f3895dc, 0xbfed6516b24524fa, 0xbfd8ffe68378eb00, 0xbfebc848a4fdff0a, 0xbfebd9fce4232e3a, 0xbfccdcbf67db1aa0, 0x3fe668b702b68b90, 0x3fb6da7a37ee6240, 0x3fed927e97e0492a, 0x3fe52b3163d622de, 0x3fe786930930a7fe, 0xbfd9e35292a51b70); - asm volatile("vfnmadd.vv v2, v4, v6"); + asm volatile("vfnmadd.vv v8, v16, v24"); // -0.2560345624688988, 0.0042928888070631, -0.3081421208222118, // -0.1849655060915788, 0.6669400726566582, -1.0960014534443465, // 0.0082438467988533, -0.9980814245844917, 0.4544533558235209, // 0.0664094087027049, 0.1672039722542752, -0.0149294340464271, // -0.7485835961663959, 1.5864550162939111, -0.9895117344007368, // 0.9524074697338700 - VCMP_U64(3, v2, 0xbfd062deca1cc612, 0x3f71956b9081d880, 0xbfd3b899badce50e, + VCMP_U64(3, v8, 0xbfd062deca1cc612, 0x3f71956b9081d880, 0xbfd3b899badce50e, 0xbfc7acf31fc694ed, 0x3fe55792b50e7883, 0xbff18938d1ee9749, 0x3f80e22663278b8b, 0xbfeff04874aabc45, 0x3fdd15c38734723f, 0x3fb10034fe865a4b, 0x3fc566f0944bf4a6, 0xbf8e9352b7d14aa9, @@ -126,37 +126,37 @@ void TEST_CASE1(void) { // Simple random test with similar values (masked) // The numbers are the same of TEST_CASE1 void TEST_CASE2(void) { - VSET(16, e16, m2); + VSET(16, e16, m8); // -0.7461, 0.0514, -0.3071, -0.4934, -0.5220, -0.7983, 0.4907, // -0.9028, 0.1752, 0.0676, 0.1040, 0.4526, 0.3525, -0.2686, // 0.3540, -0.0847 - VLOAD_16(v4, 0xb9f8, 0x2a94, 0xb4ea, 0xb7e5, 0xb82d, 0xba63, 0x37da, 0xbb39, + VLOAD_16(v16, 0xb9f8, 0x2a94, 0xb4ea, 0xb7e5, 0xb82d, 0xba63, 0x37da, 0xbb39, 0x319b, 0x2c54, 0x2ea8, 0x373e, 0x35a4, 0xb44c, 0x35aa, 0xad6c); // 0.1573, -0.7700, 0.0804, -0.9438, 0.0790, 0.7998, -0.2854, // 0.1963, -0.0687, -0.2123, 0.3625, -0.0002, 0.7168, -0.4033, // 0.2812, -0.3159 - VLOAD_16(v6, 0x3109, 0xba29, 0x2d25, 0xbb8d, 0x2d0e, 0x3a66, 0xb491, 0x3248, + VLOAD_16(v24, 0x3109, 0xba29, 0x2d25, 0xbb8d, 0x2d0e, 0x3a66, 0xb491, 0x3248, 0xac65, 0xb2cb, 0x35cd, 0x897c, 0x39bc, 0xb674, 0x3480, 0xb50e); VLOAD_8(v0, 0xAA, 0xAA); // 0.0337, 0.2034, -0.1886, 0.8242, 0.3225, 0.0331, 0.0698, // 0.6777, -0.2539, -0.5825, -0.4319, -0.6323, 0.0674, -0.2903, // -0.8145, 0.1893 - VLOAD_16(v2, 0x284f, 0x3282, 0xb209, 0x3a98, 0x3529, 0x283b, 0x2c77, 0x396c, + VLOAD_16(v8, 0x284f, 0x3282, 0xb209, 0x3a98, 0x3529, 0x283b, 0x2c77, 0x396c, 0xb410, 0xb8a9, 0xb6e9, 0xb90f, 0x2c50, 0xb4a5, 0xba84, 0x320f); - asm volatile("vfnmadd.vv v2, v4, v6, v0.t"); + asm volatile("vfnmadd.vv v8, v16, v24, v0.t"); // 0.0337, 0.7598, -0.1886, 1.3506, 0.3225, -0.7734, 0.0698, // 0.4155, -0.2539, 0.2517, -0.4319, 0.2864, 0.0674, 0.3254, // -0.8145, 0.3320 - VCMP_U16(4, v2, 0x284f, 0x3a14, 0xb209, 0x3d67, 0x3529, 0xba30, 0x2c77, + VCMP_U16(4, v8, 0x284f, 0x3a14, 0xb209, 0x3d67, 0x3529, 0xba30, 0x2c77, 0x36a6, 0xb410, 0x3407, 0xb6e9, 0x3495, 0x2c50, 0x3535, 0xba84, 0x3550); - VSET(16, e32, m2); + VSET(16, e32, m8); // -0.36820358, 0.10496315, -0.32905263, -0.92334682, // 0.43153936, 0.92736709, -0.59600371, 0.75117606, 0.84123290, // 0.33028743, -0.43412161, 0.95273590, 0.06816643, // -0.88978988, 0.18573478, 0.61926919 - VLOAD_32(v4, 0xbebc852e, 0x3dd6f6ec, 0xbea87996, 0xbf6c6075, 0x3edcf2ba, + VLOAD_32(v16, 0xbebc852e, 0x3dd6f6ec, 0xbea87996, 0xbf6c6075, 0x3edcf2ba, 0x3f6d67ee, 0xbf1893b3, 0x3f404d13, 0x3f575b0a, 0x3ea91b6f, 0xbede4530, 0x3f73e680, 0x3d8b9ad8, 0xbf63c945, 0x3e3e3142, 0x3f1e886d); @@ -164,7 +164,7 @@ void TEST_CASE2(void) { // -0.67489260, 0.23259214, -0.14038530, 0.09741956, // -0.23567833, 0.75417399, -0.90357685, -0.87489468, // 0.54726779, -0.06705534, -0.15476358, -0.96940458 - VLOAD_32(v6, 0x3f30dac8, 0xbea068b4, 0xbf0c4ffb, 0x3e801918, 0xbf2cc5c3, + VLOAD_32(v24, 0x3f30dac8, 0xbea068b4, 0xbf0c4ffb, 0x3e801918, 0xbf2cc5c3, 0x3e6e2ca2, 0xbe0fc12a, 0x3dc783e8, 0xbe7155a9, 0x3f41118c, 0xbf6750d0, 0xbf5ff919, 0x3f0c19be, 0xbd895450, 0xbe1e7a58, 0xbf782ae6); @@ -173,29 +173,29 @@ void TEST_CASE2(void) { // 0.63562357, -0.45508829, -0.22051410, 0.42499006, // -0.59229839, -0.50074077, -0.80474108, -0.20762257, // 0.15367362, 0.98349953, -0.15871963, -0.07445616 - VLOAD_32(v2, 0x3d187c0a, 0x3f4ed687, 0x3f0a0c76, 0xbf020eee, 0x3f22b83a, + VLOAD_32(v8, 0x3d187c0a, 0x3f4ed687, 0x3f0a0c76, 0xbf020eee, 0x3f22b83a, 0xbee90155, 0xbe61ce73, 0x3ed9984c, 0xbf17a0de, 0xbf00308c, 0xbf4e0383, 0xbe549b03, 0x3e1d5c9e, 0x3f7bc6a0, 0xbe228766, 0xbd987c79); - asm volatile("vfnmadd.vv v2, v4, v6, v0.t"); + asm volatile("vfnmadd.vv v8, v16, v24, v0.t"); // 0.03722767, 0.22849269, 0.53925264, -0.71928883, // 0.63562357, 0.18944177, -0.22051410, -0.41666192, // -0.59229839, -0.58878565, -0.80474108, 1.07270420, // 0.15367362, 0.94216329, -0.15871963, 1.01551294 - VCMP_U32(5, v2, 0x3d187c0a, 0x3e69f9fd, 0x3f0a0c76, 0xbf382350, 0x3f22b83a, + VCMP_U32(5, v8, 0x3d187c0a, 0x3e69f9fd, 0x3f0a0c76, 0xbf382350, 0x3f22b83a, 0x3e41fd06, 0xbe61ce73, 0xbed554b6, 0xbf17a0de, 0xbf16baa7, 0xbf4e0383, 0x3f894e5f, 0x3e1d5c9e, 0x3f71319d, 0xbe228766, 0x3f81fc54); #if ELEN == 64 - VSET(16, e64, m2); + VSET(16, e64, m8); // -0.1517393950396491, -0.0976116299317518, 0.4195080955516000, // -0.8346165642452430, 0.0078216057137750, -0.5126918345148062, // -0.9302856586058497, -0.8971839537614414, 0.1317157676127678, // -0.3423297874984121, 0.7678405723111816, -0.6465198020108864, // 0.4795090517472360, -0.9006147069685106, 0.9841759200408695, // 0.8437352562659637 - VLOAD_64(v4, 0xbfc36c324d9ae520, 0xbfb8fd1366442100, 0x3fdad9387bb34990, + VLOAD_64(v16, 0xbfc36c324d9ae520, 0xbfb8fd1366442100, 0x3fdad9387bb34990, 0xbfeab52dcc044330, 0x3f8004c625f16600, 0xbfe067f8b4c55ad2, 0xbfedc4e66df4cc5a, 0xbfecb5bb1f7cd800, 0x3fc0dc0ff121d700, 0xbfd5e8bb327025d8, 0x3fe892266453ca54, 0xbfe4b04a4bbb4d06, @@ -207,7 +207,7 @@ void TEST_CASE2(void) { // -0.3398132406457823, -0.1436002174993440, // -0.7049093483038609, 0.0726450331160087, 0.3054536350672581, // -0.9906780567812383, 0.2659677084286980, -0.6111168392293305 - VLOAD_64(v6, 0x3fd90f742ba04f2c, 0x3fa7f9df8ab696e0, 0x3fddb3217157f678, + VLOAD_64(v24, 0x3fd90f742ba04f2c, 0x3fa7f9df8ab696e0, 0x3fddb3217157f678, 0x3fed70907d95274a, 0xbfe573086459defe, 0x3fe40064742efe82, 0xbfd7c8cd8353cefc, 0x3fcc0d064ea14910, 0xbfd5bf8008d49208, 0xbfc2617deeedd880, 0xbfe68e9e0cb3831e, 0x3fb298dd69733960, @@ -220,13 +220,13 @@ void TEST_CASE2(void) { // -0.8703598457154336, -0.2254866845234647, 0.7002825787534324, // 0.0892712008047818, 0.9241326299982451, 0.6615225744181676, // 0.7351775340550828, -0.4044996673659886 - VLOAD_64(v2, 0x3fec9518a458e4ea, 0x3fe0c25010978504, 0xbfd7c958b04a2e10, + VLOAD_64(v8, 0x3fec9518a458e4ea, 0x3fe0c25010978504, 0xbfd7c958b04a2e10, 0x3fec2e7ba402502e, 0x3fdb6d803f3895dc, 0xbfed6516b24524fa, 0xbfd8ffe68378eb00, 0xbfebc848a4fdff0a, 0xbfebd9fce4232e3a, 0xbfccdcbf67db1aa0, 0x3fe668b702b68b90, 0x3fb6da7a37ee6240, 0x3fed927e97e0492a, 0x3fe52b3163d622de, 0x3fe786930930a7fe, 0xbfd9e35292a51b70); - asm volatile("vfnmadd.vv v2, v4, v6, v0.t"); + asm volatile("vfnmadd.vv v8, v16, v24, v0.t"); // 0.8932002267748917, 0.0042928888070631, -0.3716642114238491, // -0.1849655060915788, 0.4285584084885536, // -1.0960014534443465, -0.3906189235600976, @@ -234,7 +234,7 @@ void TEST_CASE2(void) { // 0.7002825787534324, -0.0149294340464271, // 0.9241326299982451, 1.5864550162939111, 0.7351775340550828, // 0.9524074697338700 - VCMP_U64(6, v2, 0x3fec9518a458e4ea, 0x3f71956b9081d880, 0xbfd7c958b04a2e10, + VCMP_U64(6, v8, 0x3fec9518a458e4ea, 0x3f71956b9081d880, 0xbfd7c958b04a2e10, 0xbfc7acf31fc694ed, 0x3fdb6d803f3895dc, 0xbff18938d1ee9749, 0xbfd8ffe68378eb00, 0xbfeff04874aabc45, 0xbfebd9fce4232e3a, 0x3fb10034fe865a4b, 0x3fe668b702b68b90, 0xbf8e9352b7d14aa9, @@ -245,29 +245,29 @@ void TEST_CASE2(void) { // Simple random test with similar values (vector-scalar) void TEST_CASE3(void) { - VSET(16, e16, m2); + VSET(16, e16, m8); float fscalar_16; // 0.2646 BOX_HALF_IN_FLOAT(fscalar_16, 0x343c); // 0.4216, -0.2148, 0.0047, 0.6802, -0.8965, -0.2986, -0.1786, // -0.1904, 0.2805, 0.5322, -0.5298, 0.3208, 0.0567, // -0.9897, -0.5400, -0.4187 - VLOAD_16(v4, 0x36bf, 0xb2e0, 0x1cc3, 0x3971, 0xbb2c, 0xb4c7, 0xb1b7, 0xb218, + VLOAD_16(v16, 0x36bf, 0xb2e0, 0x1cc3, 0x3971, 0xbb2c, 0xb4c7, 0xb1b7, 0xb218, 0x347d, 0x3842, 0xb83d, 0x3522, 0x2b41, 0xbbeb, 0xb852, 0xb6b3); // -0.7886, -0.5435, -0.8345, 0.7793, 0.5796, -0.8374, -0.8623, // -0.3313, -0.2690, -0.9214, 0.2126, -0.6772, -0.6514, -0.5703, // -0.2585, -0.3320 - VLOAD_16(v2, 0xba4f, 0xb859, 0xbaad, 0x3a3c, 0x38a3, 0xbab3, 0xbae6, 0xb54d, + VLOAD_16(v8, 0xba4f, 0xb859, 0xbaad, 0x3a3c, 0x38a3, 0xbab3, 0xbae6, 0xb54d, 0xb44e, 0xbb5f, 0x32ce, 0xb96b, 0xb936, 0xb890, 0xb423, 0xb550); - asm volatile("vfnmadd.vf v2, %[A], v4" ::[A] "f"(fscalar_16)); + asm volatile("vfnmadd.vf v8, %[A], v16" ::[A] "f"(fscalar_16)); // -0.2129, 0.3586, 0.2162, -0.8867, 0.7432, 0.5200, 0.4067, // 0.2781, -0.2092, -0.2883, 0.4736, -0.1416, 0.1157, 1.1406, // 0.6084, 0.5068 - VCMP_U16(7, v2, 0xb2d0, 0x35bd, 0x32eb, 0xbb17, 0x39f2, 0x3829, 0x3682, + VCMP_U16(7, v8, 0xb2d0, 0x35bd, 0x32eb, 0xbb17, 0x39f2, 0x3829, 0x3682, 0x3473, 0xb2b3, 0xb49d, 0x3793, 0xb088, 0x2f68, 0x3c90, 0x38de, 0x380d); - VSET(16, e32, m2); + VSET(16, e32, m8); float fscalar_32; // -0.13809182 BOX_FLOAT_IN_FLOAT(fscalar_32, 0xbe0d67f1); @@ -275,7 +275,7 @@ void TEST_CASE3(void) { // 0.55768263, 0.47349435, 0.77556002, 0.16363664, 0.80314618, // -0.48171839, -0.60694915, 0.16937894, 0.86316317, // 0.00897404, -0.96310323, -0.27890080 - VLOAD_32(v4, 0xbe2dd934, 0xbe9db2a5, 0xbebd7e18, 0xbf6c438a, 0x3f0ec44a, + VLOAD_32(v16, 0xbe2dd934, 0xbe9db2a5, 0xbebd7e18, 0xbf6c438a, 0x3f0ec44a, 0x3ef26dda, 0x3f468b1a, 0x3e27905d, 0x3f4d9afd, 0xbef6a3cb, 0xbf1b6105, 0x3e2d71ac, 0x3f5cf843, 0x3c1307df, 0xbf768def, 0xbe8ecc16); @@ -283,22 +283,22 @@ void TEST_CASE3(void) { // 0.72045243, -0.36672497, 0.69402671, 0.44954479, // -0.77024877, -0.83221292, 0.37576449, -0.77536738, // -0.55040795, -0.71568310, -0.75874990, 0.91956782 - VLOAD_32(v2, 0xbea9a47f, 0xbefabdb8, 0x3f74e1bf, 0xbf6461c8, 0x3f386f92, + VLOAD_32(v8, 0xbea9a47f, 0xbefabdb8, 0x3f74e1bf, 0xbf6461c8, 0x3f386f92, 0xbebbc360, 0x3f31abbc, 0x3ee62abc, 0xbf452f06, 0xbf550be8, 0x3ec06434, 0xbf467e7a, 0xbf0ce789, 0xbf373702, 0xbf423d6f, 0x3f6b68cc); - asm volatile("vfnmadd.vf v2, %[A], v4" ::[A] "f"(fscalar_32)); + asm volatile("vfnmadd.vf v8, %[A], v16" ::[A] "f"(fscalar_32)); // 0.12401948, 0.24037606, 0.50219709, 0.79971153, // -0.45819405, -0.52413607, -0.67972064, -0.10155818, // -0.90951121, 0.36679661, 0.65883917, -0.27645081, // -0.93917000, -0.10780402, 0.85832608, 0.40588558 - VCMP_U32(8, v2, 0x3dfdfded, 0x3e762524, 0x3f008ffd, 0x3f4cb9e5, 0xbeea9869, + VCMP_U32(8, v8, 0x3dfdfded, 0x3e762524, 0x3f008ffd, 0x3f4cb9e5, 0xbeea9869, 0xbf062dc8, 0xbf2e022c, 0xbdcffdbd, 0xbf68d5ba, 0x3ebbccc3, 0x3f28a9af, 0xbe8d8af6, 0xbf706d72, 0xbddcc85b, 0x3f5bbb42, 0x3ecfd03d); #if ELEN == 64 - VSET(16, e64, m2); + VSET(16, e64, m8); double dscalar_64; // 0.8978909040536565 BOX_DOUBLE_IN_DOUBLE(dscalar_64, 0x3fecbb85b489299a); @@ -310,7 +310,7 @@ void TEST_CASE3(void) { // -0.9796673648941667, 0.2148282430178909, // -0.1529278220414154, -0.7708574130314993, // -0.4104905538508556 - VLOAD_64(v4, 0x3fda5d975d575ea8, 0xbfbe7cae0e441b80, 0x3fefb0cd7ce7c8e8, + VLOAD_64(v16, 0x3fda5d975d575ea8, 0xbfbe7cae0e441b80, 0x3fefb0cd7ce7c8e8, 0x3fe0fecd81607572, 0x3fc597eae3ba06f8, 0xbfec1f6077386c08, 0x3fc81248312ba2f8, 0x3fe85ac99da9270a, 0xbfb658d7d8ca9eb0, 0xbfe5f0214100b7de, 0xbf929ad05e338a40, 0xbfef596f5fa5b9ea, @@ -323,20 +323,20 @@ void TEST_CASE3(void) { // 0.3334000822950238, -0.6004917796663505, 0.4588428764280068, // 0.8937156106780619, 0.8421999503441004, 0.3083609158934253, // -0.2219824502919918, 0.5118870280625194 - VLOAD_64(v2, 0x3fe028f93e467e2c, 0x3fea6cbfe4289cd0, 0xbfd0d6a877a053e0, + VLOAD_64(v8, 0x3fe028f93e467e2c, 0x3fea6cbfe4289cd0, 0xbfd0d6a877a053e0, 0x3fe9bbe080247574, 0xbfa083b2550ab080, 0xbfd9907d14a5c710, 0xbfe3b1ccad88e3e8, 0xbfed786917e1dd9e, 0x3fd5566d4c7c36a8, 0xbfe3373a8965e1cc, 0x3fdd5dae8310b1e8, 0x3fec99517af92ea6, 0x3feaf34d4f6d76aa, 0x3fd3bc2f6c481e9c, 0xbfcc69ebc2252060, 0x3fe06160e798ce12); - asm volatile("vfnmadd.vf v2, %[A], v4" ::[A] "f"(dscalar_64)); + asm volatile("vfnmadd.vf v8, %[A], v16" ::[A] "f"(dscalar_64)); // -0.8653987555659035, -0.6223659818850454, -0.7540957457254903, // -1.2531735291223209, -0.1397373948516451, 1.2374872082900796, // 0.3645526649939371, 0.0658288599840662, // -0.2120637623916150, 1.2247388386247897, -0.3938221518308497, // 0.1772082472555760, -0.9710319178262998, -0.1239466395049457, // 0.9701734360082217, -0.0491281525495393 - VCMP_U64(9, v2, 0xbfebb158bb24f2ec, 0xbfe3ea6c104adab7, 0xbfe8218d66be32e5, + VCMP_U64(9, v8, 0xbfebb158bb24f2ec, 0xbfe3ea6c104adab7, 0xbfe8218d66be32e5, 0xbff40cffafbcb13e, 0xbfc1e2ea3a754147, 0x3ff3ccbf630d300f, 0x3fd754d4b3746402, 0x3fb0da2900c3b814, 0xbfcb24e7c611c0f4, 0x3ff39887c0a08d49, 0xbfd93461d3a37236, 0x3fc6aec28545a7b7, @@ -347,30 +347,30 @@ void TEST_CASE3(void) { // Simple random test with similar values (vector-scalar) (masked) void TEST_CASE4(void) { - VSET(16, e16, m2); + VSET(16, e16, m8); float fscalar_16; // 0.2646 BOX_HALF_IN_FLOAT(fscalar_16, 0x343c); // 0.4216, -0.2148, 0.0047, 0.6802, -0.8965, -0.2986, // -0.1786, -0.1904, 0.2805, 0.5322, -0.5298, 0.3208, // 0.0567, -0.9897, -0.5400, -0.4187 - VLOAD_16(v4, 0x36bf, 0xb2e0, 0x1cc3, 0x3971, 0xbb2c, 0xb4c7, 0xb1b7, 0xb218, + VLOAD_16(v16, 0x36bf, 0xb2e0, 0x1cc3, 0x3971, 0xbb2c, 0xb4c7, 0xb1b7, 0xb218, 0x347d, 0x3842, 0xb83d, 0x3522, 0x2b41, 0xbbeb, 0xb852, 0xb6b3); VLOAD_8(v0, 0xAA, 0xAA); // -0.7886, -0.5435, -0.8345, 0.7793, 0.5796, -0.8374, // -0.8623, -0.3313, -0.2690, -0.9214, 0.2126, -0.6772, // -0.6514, -0.5703, -0.2585, -0.3320 - VLOAD_16(v2, 0xba4f, 0xb859, 0xbaad, 0x3a3c, 0x38a3, 0xbab3, 0xbae6, 0xb54d, + VLOAD_16(v8, 0xba4f, 0xb859, 0xbaad, 0x3a3c, 0x38a3, 0xbab3, 0xbae6, 0xb54d, 0xb44e, 0xbb5f, 0x32ce, 0xb96b, 0xb936, 0xb890, 0xb423, 0xb550); - asm volatile("vfnmadd.vf v2, %[A], v4, v0.t" ::[A] "f"(fscalar_16)); + asm volatile("vfnmadd.vf v8, %[A], v16, v0.t" ::[A] "f"(fscalar_16)); // -0.7886, 0.3586, -0.8345, -0.8867, 0.5796, 0.5200, // -0.8623, 0.2781, -0.2690, -0.2883, 0.2126, -0.1416, // -0.6514, 1.1406, -0.2585, 0.5068 - VCMP_U16(10, v2, 0xba4f, 0x35bd, 0xbaad, 0xbb17, 0x38a3, 0x3829, 0xbae6, + VCMP_U16(10, v8, 0xba4f, 0x35bd, 0xbaad, 0xbb17, 0x38a3, 0x3829, 0xbae6, 0x3473, 0xb44e, 0xb49d, 0x32ce, 0xb088, 0xb936, 0x3c90, 0xb423, 0x380d); - VSET(16, e32, m2); + VSET(16, e32, m8); float fscalar_32; // -0.13809182 BOX_FLOAT_IN_FLOAT(fscalar_32, 0xbe0d67f1); @@ -378,7 +378,7 @@ void TEST_CASE4(void) { // 0.55768263, 0.47349435, 0.77556002, 0.16363664, // 0.80314618, -0.48171839, -0.60694915, 0.16937894, // 0.86316317, 0.00897404, -0.96310323, -0.27890080 - VLOAD_32(v4, 0xbe2dd934, 0xbe9db2a5, 0xbebd7e18, 0xbf6c438a, 0x3f0ec44a, + VLOAD_32(v16, 0xbe2dd934, 0xbe9db2a5, 0xbebd7e18, 0xbf6c438a, 0x3f0ec44a, 0x3ef26dda, 0x3f468b1a, 0x3e27905d, 0x3f4d9afd, 0xbef6a3cb, 0xbf1b6105, 0x3e2d71ac, 0x3f5cf843, 0x3c1307df, 0xbf768def, 0xbe8ecc16); @@ -387,22 +387,22 @@ void TEST_CASE4(void) { // 0.72045243, -0.36672497, 0.69402671, 0.44954479, // -0.77024877, -0.83221292, 0.37576449, -0.77536738, // -0.55040795, -0.71568310, -0.75874990, 0.91956782 - VLOAD_32(v2, 0xbea9a47f, 0xbefabdb8, 0x3f74e1bf, 0xbf6461c8, 0x3f386f92, + VLOAD_32(v8, 0xbea9a47f, 0xbefabdb8, 0x3f74e1bf, 0xbf6461c8, 0x3f386f92, 0xbebbc360, 0x3f31abbc, 0x3ee62abc, 0xbf452f06, 0xbf550be8, 0x3ec06434, 0xbf467e7a, 0xbf0ce789, 0xbf373702, 0xbf423d6f, 0x3f6b68cc); - asm volatile("vfnmadd.vf v2, %[A], v4, v0.t" ::[A] "f"(fscalar_32)); + asm volatile("vfnmadd.vf v8, %[A], v16, v0.t" ::[A] "f"(fscalar_32)); // -0.33133313, 0.24037606, 0.95656961, 0.79971153, // 0.72045243, -0.52413607, 0.69402671, -0.10155818, // -0.77024877, 0.36679661, 0.37576449, -0.27645081, // -0.55040795, -0.10780402, -0.75874990, 0.40588558 - VCMP_U32(11, v2, 0xbea9a47f, 0x3e762524, 0x3f74e1bf, 0x3f4cb9e5, 0x3f386f92, + VCMP_U32(11, v8, 0xbea9a47f, 0x3e762524, 0x3f74e1bf, 0x3f4cb9e5, 0x3f386f92, 0xbf062dc8, 0x3f31abbc, 0xbdcffdbd, 0xbf452f06, 0x3ebbccc3, 0x3ec06434, 0xbe8d8af6, 0xbf0ce789, 0xbddcc85b, 0xbf423d6f, 0x3ecfd03d); #if ELEN == 64 - VSET(16, e64, m2); + VSET(16, e64, m8); double dscalar_64; // 0.8978909040536565 BOX_DOUBLE_IN_DOUBLE(dscalar_64, 0x3fecbb85b489299a); @@ -414,7 +414,7 @@ void TEST_CASE4(void) { // -0.0181686933036735, -0.9796673648941667, // 0.2148282430178909, -0.1529278220414154, // -0.7708574130314993, -0.4104905538508556 - VLOAD_64(v4, 0x3fda5d975d575ea8, 0xbfbe7cae0e441b80, 0x3fefb0cd7ce7c8e8, + VLOAD_64(v16, 0x3fda5d975d575ea8, 0xbfbe7cae0e441b80, 0x3fefb0cd7ce7c8e8, 0x3fe0fecd81607572, 0x3fc597eae3ba06f8, 0xbfec1f6077386c08, 0x3fc81248312ba2f8, 0x3fe85ac99da9270a, 0xbfb658d7d8ca9eb0, 0xbfe5f0214100b7de, 0xbf929ad05e338a40, 0xbfef596f5fa5b9ea, @@ -429,13 +429,13 @@ void TEST_CASE4(void) { // 0.4588428764280068, 0.8937156106780619, // 0.8421999503441004, 0.3083609158934253, // -0.2219824502919918, 0.5118870280625194 - VLOAD_64(v2, 0x3fe028f93e467e2c, 0x3fea6cbfe4289cd0, 0xbfd0d6a877a053e0, + VLOAD_64(v8, 0x3fe028f93e467e2c, 0x3fea6cbfe4289cd0, 0xbfd0d6a877a053e0, 0x3fe9bbe080247574, 0xbfa083b2550ab080, 0xbfd9907d14a5c710, 0xbfe3b1ccad88e3e8, 0xbfed786917e1dd9e, 0x3fd5566d4c7c36a8, 0xbfe3373a8965e1cc, 0x3fdd5dae8310b1e8, 0x3fec99517af92ea6, 0x3feaf34d4f6d76aa, 0x3fd3bc2f6c481e9c, 0xbfcc69ebc2252060, 0x3fe06160e798ce12); - asm volatile("vfnmadd.vf v2, %[A], v4, v0.t" ::[A] "f"(dscalar_64)); + asm volatile("vfnmadd.vf v8, %[A], v16, v0.t" ::[A] "f"(dscalar_64)); // 0.5050016609492949, -0.6223659818850454, // -0.2631016891694440, -1.2531735291223209, // -0.0322547653971421, 1.2374872082900796, @@ -444,7 +444,7 @@ void TEST_CASE4(void) { // 0.1772082472555760, 0.8421999503441004, // -0.1239466395049457, -0.2219824502919918, // -0.0491281525495393 - VCMP_U64(12, v2, 0x3fe028f93e467e2c, 0xbfe3ea6c104adab7, 0xbfd0d6a877a053e0, + VCMP_U64(12, v8, 0x3fe028f93e467e2c, 0xbfe3ea6c104adab7, 0xbfd0d6a877a053e0, 0xbff40cffafbcb13e, 0xbfa083b2550ab080, 0x3ff3ccbf630d300f, 0xbfe3b1ccad88e3e8, 0x3fb0da2900c3b814, 0x3fd5566d4c7c36a8, 0x3ff39887c0a08d49, 0x3fdd5dae8310b1e8, 0x3fc6aec28545a7b7, diff --git a/sw/riscvTests/isa/rv64uv/vfnmsac.c b/sw/riscvTests/isa/rv64uv/vfnmsac.c index d3c0957f..2b46ec17 100644 --- a/sw/riscvTests/isa/rv64uv/vfnmsac.c +++ b/sw/riscvTests/isa/rv64uv/vfnmsac.c @@ -10,36 +10,36 @@ // Simple random test with similar values void TEST_CASE1(void) { - VSET(16, e16, m2); + VSET(16, e16, m8); // 0.3474, -0.9888, 0.2810, 0.4199, 0.1704, -0.3772, 0.2998, // 0.7871, -0.2527, -0.8618, 0.2646, 0.5488, -0.3184, -0.3508, // -0.3589, -0.3914 - VLOAD_16(v4, 0x358f, 0xbbe9, 0x347f, 0x36b8, 0x3174, 0xb609, 0x34cc, 0x3a4c, + VLOAD_16(v16, 0x358f, 0xbbe9, 0x347f, 0x36b8, 0x3174, 0xb609, 0x34cc, 0x3a4c, 0xb40b, 0xbae5, 0x343c, 0x3864, 0xb518, 0xb59d, 0xb5be, 0xb643); // 0.0417, 0.3862, -0.9619, -0.5659, 0.1731, 0.4827, 0.7334, // -0.7271, -0.9814, 0.8003, -0.4836, 0.5234, -0.8540, // -0.2036, -0.8823, 0.2603 - VLOAD_16(v6, 0x2958, 0x362e, 0xbbb2, 0xb887, 0x318a, 0x37b9, 0x39de, 0xb9d1, + VLOAD_16(v24, 0x2958, 0x362e, 0xbbb2, 0xb887, 0x318a, 0x37b9, 0x39de, 0xb9d1, 0xbbda, 0x3a67, 0xb7bd, 0x3830, 0xbad5, 0xb284, 0xbb0f, 0x342a); // -0.2739, 0.2146, 0.5264, -0.8853, 0.8877, -0.6748, -0.7563, // -0.9634, 0.7451, -0.5166, 0.0698, 0.4790, -0.0681, -0.9746, // 0.2129, 0.9072 - VLOAD_16(v2, 0xb462, 0x32de, 0x3836, 0xbb15, 0x3b1a, 0xb966, 0xba0d, 0xbbb5, + VLOAD_16(v8, 0xb462, 0x32de, 0x3836, 0xbb15, 0x3b1a, 0xb966, 0xba0d, 0xbbb5, 0x39f6, 0xb822, 0x2c77, 0x37aa, 0xac5b, 0xbbcc, 0x32d0, 0x3b42); - asm volatile("vfnmsac.vv v2, v4, v6"); + asm volatile("vfnmsac.vv v8, v16, v24"); // -0.2883, 0.5967, 0.7969, -0.6475, 0.8584, -0.4927, -0.9761, // -0.3911, 0.4971, 0.1733, 0.1978, 0.1917, -0.3401, -1.0459, // -0.1038, 1.0088 - VCMP_U16(1, v2, 0xb49d, 0x38c6, 0x3a60, 0xb92e, 0x3ade, 0xb7e2, 0xbbcf, + VCMP_U16(1, v8, 0xb49d, 0x38c6, 0x3a60, 0xb92e, 0x3ade, 0xb7e2, 0xbbcf, 0xb642, 0x37f4, 0x318a, 0x3254, 0x3223, 0xb570, 0xbc2f, 0xaea4, 0x3c09); - VSET(16, e32, m2); + VSET(16, e32, m8); // 0.11577118, -0.10074481, 0.13861528, 0.44782066, // 0.42196107, -0.67597556, 0.34948668, -0.87903690, // -0.34136006, -0.19722189, 0.76997000, -0.68663412, // 0.45603558, 0.60629857, -0.86984915, -0.08019307 - VLOAD_32(v4, 0x3ded1971, 0xbdce534c, 0x3e0df12a, 0x3ee548c0, 0x3ed80b48, + VLOAD_32(v16, 0x3ded1971, 0xbdce534c, 0x3e0df12a, 0x3ee548c0, 0x3ed80b48, 0xbf2d0cbc, 0x3eb2efeb, 0xbf610890, 0xbeaec6bf, 0xbe49f489, 0x3f451cc1, 0xbf2fc741, 0x3ee97d7f, 0x3f1b3662, 0xbf5eae6f, 0xbda43c43); @@ -47,7 +47,7 @@ void TEST_CASE1(void) { // 0.47494572, -0.34277225, -0.54462087, -0.90492284, 0.60100728, // -0.02819708, -0.46859986, 0.87238866, 0.46812481, // 0.49922746, 0.97036403, 0.04279163 - VLOAD_32(v6, 0xbec7879f, 0x3ecf286c, 0x3f30d065, 0xbf7b28a8, 0x3ef32c16, + VLOAD_32(v24, 0xbec7879f, 0x3ecf286c, 0x3f30d065, 0xbf7b28a8, 0x3ef32c16, 0xbeaf7fd8, 0xbf0b6c46, 0xbf67a906, 0x3f19db9d, 0xbce6fd92, 0xbeefec52, 0x3f5f54dd, 0x3eefae0e, 0x3eff9abe, 0x3f7869c7, 0x3d2f4647); @@ -55,29 +55,29 @@ void TEST_CASE1(void) { // 0.15318950, 0.15531392, -0.20705318, -0.82493448, // 0.12047531, 0.57526720, 0.23939800, -0.19725421, // 0.15403098, 0.03931713, -0.45930895, -0.15395784 - VLOAD_32(v2, 0x3f4c4ca0, 0xbf3339f2, 0xbf02ae86, 0x3ec57575, 0x3e1cddb5, + VLOAD_32(v8, 0x3f4c4ca0, 0xbf3339f2, 0xbf02ae86, 0x3ec57575, 0x3e1cddb5, 0x3e1f0a9d, 0xbe5405c0, 0xbf532ee8, 0x3df6bbc3, 0x3f1344b6, 0x3e7524c0, 0xbe49fd02, 0x3e1dba4c, 0x3d210b00, 0xbeeb2a8b, 0xbe1da720); - asm volatile("vfnmsac.vv v2, v4, v6"); + asm volatile("vfnmsac.vv v8, v16, v24"); // 0.84316099, -0.65934104, -0.60621428, 0.82501376, // -0.04721911, -0.07639174, -0.01671545, -1.62039506, // 0.32563519, 0.56970614, 0.60020584, 0.40175763, // -0.05945060, -0.26336378, 0.38476136, -0.15052626 - VCMP_U32(2, v2, 0x3f57d966, 0xbf28ca93, 0xbf1b30dc, 0x3f53341a, 0xbd4168d3, + VCMP_U32(2, v8, 0x3f57d966, 0xbf28ca93, 0xbf1b30dc, 0x3f53341a, 0xbd4168d3, 0xbd9c7345, 0xbc88eed4, 0xbfcf691b, 0x3ea6b9a8, 0x3f11d843, 0x3f19a717, 0x3ecdb32c, 0xbd738277, 0xbe86d79e, 0x3ec4ff71, 0xbe1a238e); #if ELEN == 64 - VSET(16, e64, m2); + VSET(16, e64, m8); // -0.1779684802061718, 0.1122733699429854, -0.0166033088608786, // -0.0418350503858864, 0.0809510021720363, -0.9993917101510512, // -0.2139048161619248, 0.7196716914796224, 0.6489783595942558, // 0.5950689618839839, -0.7376256302221853, -0.5442228345597713, // -0.8234113806545975, -0.6424001059348645, -0.3817524674245201, // -0.8801262923106541 - VLOAD_64(v4, 0xbfc6c7abd11a2788, 0x3fbcbdf2941de8b0, 0xbf91007532405e80, + VLOAD_64(v16, 0xbfc6c7abd11a2788, 0x3fbcbdf2941de8b0, 0xbf91007532405e80, 0xbfa56b675a77c100, 0x3fb4b93472e84630, 0xbfeffb0452dfc0ba, 0xbfcb613ba6efa978, 0x3fe7078ced586224, 0x3fe4c46e43c89c1c, 0x3fe30ace10450114, 0xbfe79aa110cfdc92, 0xbfe16a46018575da, @@ -89,7 +89,7 @@ void TEST_CASE1(void) { // -0.9215678788036654, 0.4412210589054084, 0.3537359089001260, // -0.4889461402031243, 0.2341577339668230, 0.0593866008892341, // -0.4825773777931026, 0.8989772522533539 - VLOAD_64(v6, 0x3fe5ca90cb4aba98, 0xbfd67b45dfa41e18, 0xbfd7f97c12a6b704, + VLOAD_64(v24, 0x3fe5ca90cb4aba98, 0xbfd67b45dfa41e18, 0xbfd7f97c12a6b704, 0xbfe951a6c578c3ac, 0x3fea89299b6d84b0, 0x3fda42f7d4d35178, 0xbfb5c6f9cd987320, 0xbfe2abfdad8a63b6, 0xbfed7d7beb902fcc, 0x3fdc3cf7409388b4, 0x3fd6a39bf009666c, 0xbfdf4ae4c06b61e4, @@ -101,20 +101,20 @@ void TEST_CASE1(void) { // -0.7798663937316326, -0.1025181838739857, 0.5296250728149803, // 0.8832422045338422, 0.8373555508937671, -0.8622529212135799, // 0.4241832213372883, 0.7769982087360683 - VLOAD_64(v2, 0x3fcc54d154555708, 0xbfe6477196411436, 0xbfb6f0878eee8940, + VLOAD_64(v8, 0x3fcc54d154555708, 0xbfe6477196411436, 0xbfb6f0878eee8940, 0xbfc113b7f0547630, 0x3fc973b5c856db48, 0xbfd5b4747c9c185c, 0xbfe3a398b77f3552, 0x3fe5a16ce1f8870a, 0xbfe8f4aa5e0a7552, 0xbfba3ea1b6fbece0, 0x3fe0f2b047dc3902, 0x3fec438527dd6ef6, 0x3feacb9dde46cf34, 0xbfeb9793702fc4f0, 0x3fdb25d161c9f510, 0x3fe8dd2b58f24dc8); - asm volatile("vfnmsac.vv v2, v4, v6"); + asm volatile("vfnmsac.vv v8, v16, v24"); // 0.3425309161602823, -0.6567824407689892, -0.0958273275519495, // -0.1665148733821233, 0.1317156015009752, 0.0709487030987733, // -0.6319166041826899, 1.0958800635211576, // -0.1817887834908719, -0.3650751413581792, 0.7905497455496533, // 0.6171465501654385, 1.0301636939103409, -0.8241029625112244, // 0.2399581166415165, 1.5682117246334322 - VCMP_U64(3, v2, 0x3fd5ec06cab1bfc9, 0xbfe5045c9bf61361, 0xbfb88823c5e61162, + VCMP_U64(3, v8, 0x3fd5ec06cab1bfc9, 0xbfe5045c9bf61361, 0xbfb88823c5e61162, 0xbfc5505bffbca57e, 0x3fc0dc0e8c68ea87, 0x3fb229b1b780ba72, 0xbfe438a92b9872ef, 0x3ff188b9889296a1, 0xbfc744dad7efbbd3, 0xbfd75d64202dd23c, 0x3fe94c2efadfe675, 0x3fe3bfaa1f3997d0, @@ -126,37 +126,37 @@ void TEST_CASE1(void) { // Simple random test with similar values (masked) // The numbers are the same of TEST_CASE1 void TEST_CASE2(void) { - VSET(16, e16, m2); + VSET(16, e16, m8); // 0.3474, -0.9888, 0.2810, 0.4199, 0.1704, -0.3772, 0.2998, // 0.7871, -0.2527, -0.8618, 0.2646, 0.5488, -0.3184, -0.3508, // -0.3589, -0.3914 - VLOAD_16(v4, 0x358f, 0xbbe9, 0x347f, 0x36b8, 0x3174, 0xb609, 0x34cc, 0x3a4c, + VLOAD_16(v16, 0x358f, 0xbbe9, 0x347f, 0x36b8, 0x3174, 0xb609, 0x34cc, 0x3a4c, 0xb40b, 0xbae5, 0x343c, 0x3864, 0xb518, 0xb59d, 0xb5be, 0xb643); // 0.0417, 0.3862, -0.9619, -0.5659, 0.1731, 0.4827, 0.7334, // -0.7271, -0.9814, 0.8003, -0.4836, 0.5234, -0.8540, // -0.2036, -0.8823, 0.2603 - VLOAD_16(v6, 0x2958, 0x362e, 0xbbb2, 0xb887, 0x318a, 0x37b9, 0x39de, 0xb9d1, + VLOAD_16(v24, 0x2958, 0x362e, 0xbbb2, 0xb887, 0x318a, 0x37b9, 0x39de, 0xb9d1, 0xbbda, 0x3a67, 0xb7bd, 0x3830, 0xbad5, 0xb284, 0xbb0f, 0x342a); VLOAD_8(v0, 0xAA, 0xAA); // -0.2739, 0.2146, 0.5264, -0.8853, 0.8877, -0.6748, -0.7563, // -0.9634, 0.7451, -0.5166, 0.0698, 0.4790, -0.0681, -0.9746, // 0.2129, 0.9072 - VLOAD_16(v2, 0xb462, 0x32de, 0x3836, 0xbb15, 0x3b1a, 0xb966, 0xba0d, 0xbbb5, + VLOAD_16(v8, 0xb462, 0x32de, 0x3836, 0xbb15, 0x3b1a, 0xb966, 0xba0d, 0xbbb5, 0x39f6, 0xb822, 0x2c77, 0x37aa, 0xac5b, 0xbbcc, 0x32d0, 0x3b42); - asm volatile("vfnmsac.vv v2, v4, v6, v0.t"); + asm volatile("vfnmsac.vv v8, v16, v24, v0.t"); // -0.2739, 0.5967, 0.5264, -0.6475, 0.8877, -0.4927, -0.7563, // -0.3911, 0.7451, 0.1733, 0.0698, 0.1917, -0.0681, -1.0459, // 0.2129, 1.0088 - VCMP_U16(4, v2, 0xb462, 0x38c6, 0x3836, 0xb92e, 0x3b1a, 0xb7e2, 0xba0d, + VCMP_U16(4, v8, 0xb462, 0x38c6, 0x3836, 0xb92e, 0x3b1a, 0xb7e2, 0xba0d, 0xb642, 0x39f6, 0x318a, 0x2c77, 0x3223, 0xac5b, 0xbc2f, 0x32d0, 0x3c09); - VSET(16, e32, m2); + VSET(16, e32, m8); // 0.11577118, -0.10074481, 0.13861528, 0.44782066, // 0.42196107, -0.67597556, 0.34948668, -0.87903690, // -0.34136006, -0.19722189, 0.76997000, -0.68663412, // 0.45603558, 0.60629857, -0.86984915, -0.08019307 - VLOAD_32(v4, 0x3ded1971, 0xbdce534c, 0x3e0df12a, 0x3ee548c0, 0x3ed80b48, + VLOAD_32(v16, 0x3ded1971, 0xbdce534c, 0x3e0df12a, 0x3ee548c0, 0x3ed80b48, 0xbf2d0cbc, 0x3eb2efeb, 0xbf610890, 0xbeaec6bf, 0xbe49f489, 0x3f451cc1, 0xbf2fc741, 0x3ee97d7f, 0x3f1b3662, 0xbf5eae6f, 0xbda43c43); @@ -164,7 +164,7 @@ void TEST_CASE2(void) { // 0.47494572, -0.34277225, -0.54462087, -0.90492284, 0.60100728, // -0.02819708, -0.46859986, 0.87238866, 0.46812481, // 0.49922746, 0.97036403, 0.04279163 - VLOAD_32(v6, 0xbec7879f, 0x3ecf286c, 0x3f30d065, 0xbf7b28a8, 0x3ef32c16, + VLOAD_32(v24, 0xbec7879f, 0x3ecf286c, 0x3f30d065, 0xbf7b28a8, 0x3ef32c16, 0xbeaf7fd8, 0xbf0b6c46, 0xbf67a906, 0x3f19db9d, 0xbce6fd92, 0xbeefec52, 0x3f5f54dd, 0x3eefae0e, 0x3eff9abe, 0x3f7869c7, 0x3d2f4647); @@ -173,29 +173,29 @@ void TEST_CASE2(void) { // 0.15318950, 0.15531392, -0.20705318, -0.82493448, // 0.12047531, 0.57526720, 0.23939800, -0.19725421, // 0.15403098, 0.03931713, -0.45930895, -0.15395784 - VLOAD_32(v2, 0x3f4c4ca0, 0xbf3339f2, 0xbf02ae86, 0x3ec57575, 0x3e1cddb5, + VLOAD_32(v8, 0x3f4c4ca0, 0xbf3339f2, 0xbf02ae86, 0x3ec57575, 0x3e1cddb5, 0x3e1f0a9d, 0xbe5405c0, 0xbf532ee8, 0x3df6bbc3, 0x3f1344b6, 0x3e7524c0, 0xbe49fd02, 0x3e1dba4c, 0x3d210b00, 0xbeeb2a8b, 0xbe1da720); - asm volatile("vfnmsac.vv v2, v4, v6, v0.t"); + asm volatile("vfnmsac.vv v8, v16, v24, v0.t"); // 0.79804420, -0.65934104, -0.51047552, 0.82501376, // 0.15318950, -0.07639174, -0.20705318, -1.62039506, // 0.12047531, 0.56970614, 0.23939800, 0.40175763, // 0.15403098, -0.26336378, -0.45930895, -0.15052626 - VCMP_U32(5, v2, 0x3f4c4ca0, 0xbf28ca93, 0xbf02ae86, 0x3f53341a, 0x3e1cddb5, + VCMP_U32(5, v8, 0x3f4c4ca0, 0xbf28ca93, 0xbf02ae86, 0x3f53341a, 0x3e1cddb5, 0xbd9c7345, 0xbe5405c0, 0xbfcf691b, 0x3df6bbc3, 0x3f11d843, 0x3e7524c0, 0x3ecdb32c, 0x3e1dba4c, 0xbe86d79e, 0xbeeb2a8b, 0xbe1a238e); #if ELEN == 64 - VSET(16, e64, m2); + VSET(16, e64, m8); // -0.1779684802061718, 0.1122733699429854, -0.0166033088608786, // -0.0418350503858864, 0.0809510021720363, -0.9993917101510512, // -0.2139048161619248, 0.7196716914796224, 0.6489783595942558, // 0.5950689618839839, -0.7376256302221853, -0.5442228345597713, // -0.8234113806545975, -0.6424001059348645, -0.3817524674245201, // -0.8801262923106541 - VLOAD_64(v4, 0xbfc6c7abd11a2788, 0x3fbcbdf2941de8b0, 0xbf91007532405e80, + VLOAD_64(v16, 0xbfc6c7abd11a2788, 0x3fbcbdf2941de8b0, 0xbf91007532405e80, 0xbfa56b675a77c100, 0x3fb4b93472e84630, 0xbfeffb0452dfc0ba, 0xbfcb613ba6efa978, 0x3fe7078ced586224, 0x3fe4c46e43c89c1c, 0x3fe30ace10450114, 0xbfe79aa110cfdc92, 0xbfe16a46018575da, @@ -207,7 +207,7 @@ void TEST_CASE2(void) { // -0.9215678788036654, 0.4412210589054084, 0.3537359089001260, // -0.4889461402031243, 0.2341577339668230, 0.0593866008892341, // -0.4825773777931026, 0.8989772522533539 - VLOAD_64(v6, 0x3fe5ca90cb4aba98, 0xbfd67b45dfa41e18, 0xbfd7f97c12a6b704, + VLOAD_64(v24, 0x3fe5ca90cb4aba98, 0xbfd67b45dfa41e18, 0xbfd7f97c12a6b704, 0xbfe951a6c578c3ac, 0x3fea89299b6d84b0, 0x3fda42f7d4d35178, 0xbfb5c6f9cd987320, 0xbfe2abfdad8a63b6, 0xbfed7d7beb902fcc, 0x3fdc3cf7409388b4, 0x3fd6a39bf009666c, 0xbfdf4ae4c06b61e4, @@ -220,20 +220,20 @@ void TEST_CASE2(void) { // -0.7798663937316326, -0.1025181838739857, 0.5296250728149803, // 0.8832422045338422, 0.8373555508937671, -0.8622529212135799, // 0.4241832213372883, 0.7769982087360683 - VLOAD_64(v2, 0x3fcc54d154555708, 0xbfe6477196411436, 0xbfb6f0878eee8940, + VLOAD_64(v8, 0x3fcc54d154555708, 0xbfe6477196411436, 0xbfb6f0878eee8940, 0xbfc113b7f0547630, 0x3fc973b5c856db48, 0xbfd5b4747c9c185c, 0xbfe3a398b77f3552, 0x3fe5a16ce1f8870a, 0xbfe8f4aa5e0a7552, 0xbfba3ea1b6fbece0, 0x3fe0f2b047dc3902, 0x3fec438527dd6ef6, 0x3feacb9dde46cf34, 0xbfeb9793702fc4f0, 0x3fdb25d161c9f510, 0x3fe8dd2b58f24dc8); - asm volatile("vfnmsac.vv v2, v4, v6, v0.t"); + asm volatile("vfnmsac.vv v8, v16, v24, v0.t"); // 0.2213384305747967, -0.6567824407689892, -0.0896076892809434, // -0.1665148733821233, 0.1988436916560323, 0.0709487030987733, // -0.6137202819751713, 1.0958800635211576, // -0.7798663937316326, -0.3650751413581792, 0.5296250728149803, // 0.6171465501654385, 0.8373555508937671, -0.8241029625112244, // 0.4241832213372883, 1.5682117246334322 - VCMP_U64(6, v2, 0x3fcc54d154555708, 0xbfe5045c9bf61361, 0xbfb6f0878eee8940, + VCMP_U64(6, v8, 0x3fcc54d154555708, 0xbfe5045c9bf61361, 0xbfb6f0878eee8940, 0xbfc5505bffbca57e, 0x3fc973b5c856db48, 0x3fb229b1b780ba72, 0xbfe3a398b77f3552, 0x3ff188b9889296a1, 0xbfe8f4aa5e0a7552, 0xbfd75d64202dd23c, 0x3fe0f2b047dc3902, 0x3fe3bfaa1f3997d0, @@ -244,29 +244,29 @@ void TEST_CASE2(void) { // Simple random test with similar values (vector-scalar) void TEST_CASE3(void) { - VSET(16, e16, m2); + VSET(16, e16, m8); float fscalar_16; // -0.4771 BOX_HALF_IN_FLOAT(fscalar_16, 0xb7a2); // -0.6172, 0.8584, -0.1088, -0.6719, 0.3579, 0.5889, 0.1724, // -0.5239, -0.5732, -0.6167, 0.8271, -0.7334, 0.3489, -0.7607, // -0.7788, -0.5264 - VLOAD_16(v4, 0xb8f0, 0x3ade, 0xaef7, 0xb960, 0x35ba, 0x38b6, 0x3184, 0xb831, + VLOAD_16(v16, 0xb8f0, 0x3ade, 0xaef7, 0xb960, 0x35ba, 0x38b6, 0x3184, 0xb831, 0xb896, 0xb8ef, 0x3a9e, 0xb9de, 0x3595, 0xba16, 0xba3b, 0xb836); // 0.0186, 0.9351, 0.6201, 0.9463, 0.2512, 0.5786, 0.9424, // -0.5132, -0.7646, 0.0194, -0.2507, -0.2905, 0.3452, // -0.7803, -0.7666, -0.1387 - VLOAD_16(v2, 0x24c1, 0x3b7b, 0x38f6, 0x3b92, 0x3405, 0x38a1, 0x3b8a, 0xb81b, + VLOAD_16(v8, 0x24c1, 0x3b7b, 0x38f6, 0x3b92, 0x3405, 0x38a1, 0x3b8a, 0xb81b, 0xba1e, 0x24f6, 0xb403, 0xb4a6, 0x3586, 0xba3e, 0xba22, 0xb070); - asm volatile("vfnmsac.vf v2, %[A], v4" ::[A] "f"(fscalar_16)); + asm volatile("vfnmsac.vf v8, %[A], v16" ::[A] "f"(fscalar_16)); // -0.2759, 1.3447, 0.5684, 0.6260, 0.4219, 0.8594, 1.0244, // -0.7632, -1.0381, -0.2749, 0.1438, -0.6406, 0.5117, -1.1426, // -1.1387, -0.3899 - VCMP_U16(7, v2, 0xb46a, 0x3d61, 0x388c, 0x3902, 0x36c0, 0x3ae0, 0x3c19, + VCMP_U16(7, v8, 0xb46a, 0x3d61, 0x388c, 0x3902, 0x36c0, 0x3ae0, 0x3c19, 0xba1b, 0xbc27, 0xb466, 0x309a, 0xb920, 0x3818, 0xbc93, 0xbc8d, 0xb63d); - VSET(16, e32, m2); + VSET(16, e32, m8); float fscalar_32; // -0.73549986 BOX_FLOAT_IN_FLOAT(fscalar_32, 0xbf3c49b8); @@ -274,7 +274,7 @@ void TEST_CASE3(void) { // -0.77184784, -0.41120139, -0.57577437, -0.15976480, // -0.05041125, 0.42673740, 0.88473374, -0.49891368, // -0.84324479, -0.26009968, -0.01877740, -0.13754985 - VLOAD_32(v4, 0x3f3e1643, 0x3f3af0b7, 0x3f404d94, 0x3d24ad6a, 0xbf4597d2, + VLOAD_32(v16, 0x3f3e1643, 0x3f3af0b7, 0x3f404d94, 0x3d24ad6a, 0xbf4597d2, 0xbed288fd, 0xbf1365f3, 0xbe239962, 0xbd4e7c07, 0x3eda7d53, 0x3f627de9, 0xbeff719d, 0xbf57dee4, 0xbe852bc9, 0xbc99d30f, 0xbe0cd9de); @@ -282,22 +282,22 @@ void TEST_CASE3(void) { // 0.28473541, -0.93230879, -0.77235961, -0.92498165, // -0.55227244, 0.97729182, 0.28253901, 0.45306230, // -0.50359881, 0.40307203, -0.65891176, -0.59297264 - VLOAD_32(v2, 0x3f653838, 0x3f2f98cf, 0x3f2ccdaf, 0x3daa8aad, 0x3e91c8d7, + VLOAD_32(v8, 0x3f653838, 0x3f2f98cf, 0x3f2ccdaf, 0x3daa8aad, 0x3e91c8d7, 0xbf6eabca, 0xbf45b95c, 0xbf6ccb99, 0xbf0d61ba, 0x3f7a2fcc, 0x3e90a8f4, 0x3ee7f7c8, 0xbf00ebda, 0x3ece5f75, 0xbf28ae71, 0xbf17cd0e); - asm volatile("vfnmsac.vf v2, %[A], v4" ::[A] "f"(fscalar_32)); + asm volatile("vfnmsac.vf v8, %[A], v16" ::[A] "f"(fscalar_32)); // 1.44151771, 1.22301352, 1.22750902, 0.11284268, // -0.28295860, -1.23474741, -1.19584155, -1.04248869, // -0.58934993, 1.29115713, 0.93326056, 0.08611137, // -1.12380528, 0.21176875, -0.67272252, -0.69414055 - VCMP_U32(8, v2, 0x3fb883a7, 0x3f9c8bb5, 0x3f9d1f04, 0x3de71a10, 0xbe90dff2, + VCMP_U32(8, v8, 0x3fb883a7, 0x3f9c8bb5, 0x3f9d1f04, 0x3de71a10, 0xbe90dff2, 0xbf9e0c34, 0xbf991156, 0xbf857045, 0xbf16dfa3, 0x3fa544a3, 0x3f6eea2a, 0x3db05b27, 0xbf8fd8da, 0x3e58d9e8, 0xbf2c378b, 0xbf31b332); #if ELEN == 64 - VSET(16, e64, m2); + VSET(16, e64, m8); double dscalar_64; // 0.5178244899339752 BOX_DOUBLE_IN_DOUBLE(dscalar_64, 0x3fe09204aa2ab2a0); @@ -308,7 +308,7 @@ void TEST_CASE3(void) { // 0.1494192541908572, 0.0196333207724690, // -0.2272946521816408, 0.9289723385226867, // -0.5162193242581365, -0.3917544955022987 - VLOAD_64(v4, 0x3febab01fbe195e4, 0x3f744c9c38b4b600, 0x3fedb10ca98026d0, + VLOAD_64(v16, 0x3febab01fbe195e4, 0x3f744c9c38b4b600, 0x3fedb10ca98026d0, 0xbfec39b31560f050, 0x3fed9c01a86a6166, 0xbfb06600c7c3cc10, 0xbfee59041b01e946, 0x3fe35149679e0d42, 0x3fe43a741f8b89d2, 0x3fea6681d6d6ae5e, 0x3fc3202b8d123b90, 0x3f941ac1da84be40, @@ -321,20 +321,20 @@ void TEST_CASE3(void) { // 0.1236692515687874, 0.9290168852760794, // -0.0433411597165929, -0.8422695068160440, // 0.6519328829008422, -0.3347506024828231 - VLOAD_64(v2, 0x3fd8e24f9d6331d4, 0xbfea5288d3d6317c, 0x3feaf9e847d9618e, + VLOAD_64(v8, 0x3fd8e24f9d6331d4, 0xbfea5288d3d6317c, 0x3feaf9e847d9618e, 0xbfe436b76feb621e, 0xbfe56ddde1ff608c, 0x3fe39dc3f345962e, 0xbfcd62b6d5e76d58, 0x3fc4206b1afd2970, 0xbfe82286f1e7af48, 0x3fe3ad775d9fe964, 0x3fbfa8c9bf023cb0, 0x3fedba819e762954, 0xbfa630cfff1b61a0, 0xbfeaf3df2e462cb6, 0x3fe4dca25967e02c, 0xbfd56c8dca7eb8ac); - asm volatile("vfnmsac.vf v2, %[A], v4" ::[A] "f"(dscalar_64)); + asm volatile("vfnmsac.vf v8, %[A], v16" ::[A] "f"(dscalar_64)); // -0.0589110568256553, -0.8251412828355696, 0.3625363508340025, // -0.1749355920677641, -1.1488012349688719, 0.6461784703586890, // 0.2615122334407671, -0.1553571761707763, -1.0815499543490239, // 0.1877131026437891, 0.0462963024810918, 0.9188502709613655, // 0.0743575776140855, -1.3233141341743264, 0.9192438911788732, // -0.1318905306700034 - VCMP_U64(9, v2, 0xbfae29970ce0c2e6, 0xbfea678eb10b76d9, 0x3fd733cbaa9c5dc5, + VCMP_U64(9, v8, 0xbfae29970ce0c2e6, 0xbfea678eb10b76d9, 0x3fd733cbaa9c5dc5, 0xbfc6644a1b6b315b, 0xbff2617d675cbb41, 0x3fe4ad7e78b23c6a, 0x3fd0bc9dce8872e3, 0xbfc3e2be736d1b43, 0xbff14e07532eb5a9, 0x3fc806fba27160c3, 0x3fa7b4262229fd93, 0x3fed6738aef664ca, @@ -345,30 +345,30 @@ void TEST_CASE3(void) { // Simple random test with similar values (vector-scalar) (masked) void TEST_CASE4(void) { - VSET(16, e16, m2); + VSET(16, e16, m8); float fscalar_16; // -0.4771 BOX_HALF_IN_FLOAT(fscalar_16, 0xb7a2); // -0.6172, 0.8584, -0.1088, -0.6719, 0.3579, 0.5889, 0.1724, // -0.5239, -0.5732, -0.6167, 0.8271, -0.7334, 0.3489, // -0.7607, -0.7788, -0.5264 - VLOAD_16(v4, 0xb8f0, 0x3ade, 0xaef7, 0xb960, 0x35ba, 0x38b6, 0x3184, 0xb831, + VLOAD_16(v16, 0xb8f0, 0x3ade, 0xaef7, 0xb960, 0x35ba, 0x38b6, 0x3184, 0xb831, 0xb896, 0xb8ef, 0x3a9e, 0xb9de, 0x3595, 0xba16, 0xba3b, 0xb836); VLOAD_8(v0, 0xAA, 0xAA); // 0.0186, 0.9351, 0.6201, 0.9463, 0.2512, 0.5786, 0.9424, // -0.5132, -0.7646, 0.0194, -0.2507, -0.2905, 0.3452, // -0.7803, -0.7666, -0.1387 - VLOAD_16(v2, 0x24c1, 0x3b7b, 0x38f6, 0x3b92, 0x3405, 0x38a1, 0x3b8a, 0xb81b, + VLOAD_16(v8, 0x24c1, 0x3b7b, 0x38f6, 0x3b92, 0x3405, 0x38a1, 0x3b8a, 0xb81b, 0xba1e, 0x24f6, 0xb403, 0xb4a6, 0x3586, 0xba3e, 0xba22, 0xb070); - asm volatile("vfnmsac.vf v2, %[A], v4, v0.t" ::[A] "f"(fscalar_16)); + asm volatile("vfnmsac.vf v8, %[A], v16, v0.t" ::[A] "f"(fscalar_16)); // 0.0186, 1.3447, 0.6201, 0.6260, 0.2512, 0.8594, 0.9424, // -0.7632, -0.7646, -0.2749, -0.2507, -0.6406, 0.3452, // -1.1426, -0.7666, -0.3899 - VCMP_U16(10, v2, 0x24c1, 0x3d61, 0x38f6, 0x3902, 0x3405, 0x3ae0, 0x3b8a, + VCMP_U16(10, v8, 0x24c1, 0x3d61, 0x38f6, 0x3902, 0x3405, 0x3ae0, 0x3b8a, 0xba1b, 0xba1e, 0xb466, 0xb403, 0xb920, 0x3586, 0xbc93, 0xba22, 0xb63d); - VSET(16, e32, m2); + VSET(16, e32, m8); float fscalar_32; // -0.73549986 BOX_FLOAT_IN_FLOAT(fscalar_32, 0xbf3c49b8); @@ -376,7 +376,7 @@ void TEST_CASE4(void) { // -0.77184784, -0.41120139, -0.57577437, -0.15976480, // -0.05041125, 0.42673740, 0.88473374, -0.49891368, // -0.84324479, -0.26009968, -0.01877740, -0.13754985 - VLOAD_32(v4, 0x3f3e1643, 0x3f3af0b7, 0x3f404d94, 0x3d24ad6a, 0xbf4597d2, + VLOAD_32(v16, 0x3f3e1643, 0x3f3af0b7, 0x3f404d94, 0x3d24ad6a, 0xbf4597d2, 0xbed288fd, 0xbf1365f3, 0xbe239962, 0xbd4e7c07, 0x3eda7d53, 0x3f627de9, 0xbeff719d, 0xbf57dee4, 0xbe852bc9, 0xbc99d30f, 0xbe0cd9de); @@ -385,22 +385,22 @@ void TEST_CASE4(void) { // 0.28473541, -0.93230879, -0.77235961, -0.92498165, // -0.55227244, 0.97729182, 0.28253901, 0.45306230, // -0.50359881, 0.40307203, -0.65891176, -0.59297264 - VLOAD_32(v2, 0x3f653838, 0x3f2f98cf, 0x3f2ccdaf, 0x3daa8aad, 0x3e91c8d7, + VLOAD_32(v8, 0x3f653838, 0x3f2f98cf, 0x3f2ccdaf, 0x3daa8aad, 0x3e91c8d7, 0xbf6eabca, 0xbf45b95c, 0xbf6ccb99, 0xbf0d61ba, 0x3f7a2fcc, 0x3e90a8f4, 0x3ee7f7c8, 0xbf00ebda, 0x3ece5f75, 0xbf28ae71, 0xbf17cd0e); - asm volatile("vfnmsac.vf v2, %[A], v4, v0.t" ::[A] "f"(fscalar_32)); + asm volatile("vfnmsac.vf v8, %[A], v16, v0.t" ::[A] "f"(fscalar_32)); // 0.89538908, 1.22301352, 0.67501348, 0.11284268, // 0.28473541, -1.23474741, -0.77235961, -1.04248869, // -0.55227244, 1.29115713, 0.28253901, 0.08611137, // -0.50359881, 0.21176875, -0.65891176, -0.69414055 - VCMP_U32(11, v2, 0x3f653838, 0x3f9c8bb5, 0x3f2ccdaf, 0x3de71a10, 0x3e91c8d7, + VCMP_U32(11, v8, 0x3f653838, 0x3f9c8bb5, 0x3f2ccdaf, 0x3de71a10, 0x3e91c8d7, 0xbf9e0c34, 0xbf45b95c, 0xbf857045, 0xbf0d61ba, 0x3fa544a3, 0x3e90a8f4, 0x3db05b27, 0xbf00ebda, 0x3e58d9e8, 0xbf28ae71, 0xbf31b332); #if ELEN == 64 - VSET(16, e64, m2); + VSET(16, e64, m8); double dscalar_64; // 0.5178244899339752 BOX_DOUBLE_IN_DOUBLE(dscalar_64, 0x3fe09204aa2ab2a0); @@ -412,7 +412,7 @@ void TEST_CASE4(void) { // 0.1494192541908572, 0.0196333207724690, // -0.2272946521816408, 0.9289723385226867, // -0.5162193242581365, -0.3917544955022987 - VLOAD_64(v4, 0x3febab01fbe195e4, 0x3f744c9c38b4b600, 0x3fedb10ca98026d0, + VLOAD_64(v16, 0x3febab01fbe195e4, 0x3f744c9c38b4b600, 0x3fedb10ca98026d0, 0xbfec39b31560f050, 0x3fed9c01a86a6166, 0xbfb06600c7c3cc10, 0xbfee59041b01e946, 0x3fe35149679e0d42, 0x3fe43a741f8b89d2, 0x3fea6681d6d6ae5e, 0x3fc3202b8d123b90, 0x3f941ac1da84be40, @@ -427,13 +427,13 @@ void TEST_CASE4(void) { // 0.1236692515687874, 0.9290168852760794, // -0.0433411597165929, -0.8422695068160440, // 0.6519328829008422, -0.3347506024828231 - VLOAD_64(v2, 0x3fd8e24f9d6331d4, 0xbfea5288d3d6317c, 0x3feaf9e847d9618e, + VLOAD_64(v8, 0x3fd8e24f9d6331d4, 0xbfea5288d3d6317c, 0x3feaf9e847d9618e, 0xbfe436b76feb621e, 0xbfe56ddde1ff608c, 0x3fe39dc3f345962e, 0xbfcd62b6d5e76d58, 0x3fc4206b1afd2970, 0xbfe82286f1e7af48, 0x3fe3ad775d9fe964, 0x3fbfa8c9bf023cb0, 0x3fedba819e762954, 0xbfa630cfff1b61a0, 0xbfeaf3df2e462cb6, 0x3fe4dca25967e02c, 0xbfd56c8dca7eb8ac); - asm volatile("vfnmsac.vf v2, %[A], v4, v0.t" ::[A] "f"(dscalar_64)); + asm volatile("vfnmsac.vf v8, %[A], v16, v0.t" ::[A] "f"(dscalar_64)); // 0.3888129269587612, -0.8251412828355696, 0.8430062678626642, // -0.1749355920677641, -0.6696614660277347, // 0.6461784703586890, -0.2295750183537659, @@ -441,7 +441,7 @@ void TEST_CASE4(void) { // 0.1877131026437891, 0.1236692515687874, 0.9188502709613655, // -0.0433411597165929, -1.3233141341743264, // 0.6519328829008422, -0.1318905306700034 - VCMP_U64(12, v2, 0x3fd8e24f9d6331d4, 0xbfea678eb10b76d9, 0x3feaf9e847d9618e, + VCMP_U64(12, v8, 0x3fd8e24f9d6331d4, 0xbfea678eb10b76d9, 0x3feaf9e847d9618e, 0xbfc6644a1b6b315b, 0xbfe56ddde1ff608c, 0x3fe4ad7e78b23c6a, 0xbfcd62b6d5e76d58, 0xbfc3e2be736d1b43, 0xbfe82286f1e7af48, 0x3fc806fba27160c3, 0x3fbfa8c9bf023cb0, 0x3fed6738aef664ca, diff --git a/sw/riscvTests/isa/rv64uv/vfnmsub.c b/sw/riscvTests/isa/rv64uv/vfnmsub.c index 1535a169..783dd545 100644 --- a/sw/riscvTests/isa/rv64uv/vfnmsub.c +++ b/sw/riscvTests/isa/rv64uv/vfnmsub.c @@ -10,36 +10,36 @@ // Simple random test with similar values void TEST_CASE1(void) { - VSET(16, e16, m2); + VSET(16, e16, m8); // -0.0091, -0.3794, -0.0005, -0.0464, 0.4834, 0.2932, -0.3042, // -0.3096, -0.9844, -0.1815, -0.8760, 0.0853, -0.3723, -0.8877, // 0.1584, 0.1943 - VLOAD_16(v4, 0xa0ac, 0xb612, 0x8f83, 0xa9f0, 0x37bc, 0x34b1, 0xb4de, 0xb4f4, + VLOAD_16(v16, 0xa0ac, 0xb612, 0x8f83, 0xa9f0, 0x37bc, 0x34b1, 0xb4de, 0xb4f4, 0xbbe0, 0xb1cf, 0xbb02, 0x2d75, 0xb5f5, 0xbb1a, 0x3112, 0x3238); // -0.3301, 0.7769, 0.6572, -0.8193, 0.4529, 0.2349, 0.5264, // -0.2456, 0.0873, 0.5381, 0.4670, 0.8564, -0.1790, 0.6641, // 0.0182, 0.0447 - VLOAD_16(v6, 0xb548, 0x3a37, 0x3942, 0xba8e, 0x373f, 0x3384, 0x3836, 0xb3dc, + VLOAD_16(v24, 0xb548, 0x3a37, 0x3942, 0xba8e, 0x373f, 0x3384, 0x3836, 0xb3dc, 0x2d97, 0x384e, 0x3779, 0x3ada, 0xb1ba, 0x3950, 0x24a7, 0x29b9); // 0.5835, 0.4404, -0.3459, 0.0516, -0.4866, -0.2191, 0.0685, // -0.5430, -0.1429, -0.7539, -0.6416, -0.6758, -0.1147, 0.3438, // 0.3440, 0.1991 - VLOAD_16(v2, 0x38ab, 0x370c, 0xb589, 0x2a9b, 0xb7c9, 0xb303, 0x2c62, 0xb858, + VLOAD_16(v8, 0x38ab, 0x370c, 0xb589, 0x2a9b, 0xb7c9, 0xb303, 0x2c62, 0xb858, 0xb093, 0xba08, 0xb922, 0xb968, 0xaf57, 0x3580, 0x3581, 0x325f); - asm volatile("vfnmsub.vv v2, v4, v6"); + asm volatile("vfnmsub.vv v8, v16, v24"); // -0.3247, 0.9438, 0.6572, -0.8169, 0.6880, 0.2991, 0.5474, // -0.4136, -0.0534, 0.4014, -0.0950, 0.9141, -0.2217, 0.9692, // -0.0363, 0.0060 - VCMP_U16(1, v2, 0xb532, 0x3b8d, 0x3942, 0xba89, 0x3981, 0x34c9, 0x3861, + VCMP_U16(1, v8, 0xb532, 0x3b8d, 0x3942, 0xba89, 0x3981, 0x34c9, 0x3861, 0xb69e, 0xaad5, 0x366b, 0xae14, 0x3b50, 0xb318, 0x3bc1, 0xa8a7, 0x1e29); - VSET(16, e32, m2); + VSET(16, e32, m8); // 0.76259303, -0.43966120, -0.19390504, -0.57240725, // -0.57148474, -0.93710214, 0.24273214, 0.44242114, // -0.93160200, -0.56412256, -0.75430351, -0.02741535, // -0.60542876, -0.93627954, 0.02798123, 0.23119579 - VLOAD_32(v4, 0x3f43394c, 0xbee11b46, 0xbe468f0b, 0xbf128948, 0xbf124cd3, + VLOAD_32(v16, 0x3f43394c, 0xbee11b46, 0xbe468f0b, 0xbf128948, 0xbf124cd3, 0xbf6fe5ed, 0x3e788ec6, 0x3ee28506, 0xbf6e7d78, 0xbf106a56, 0xbf411a09, 0xbce09629, 0xbf1afd61, 0xbf6fb004, 0x3ce538e6, 0x3e6cbe97); @@ -47,7 +47,7 @@ void TEST_CASE1(void) { // -0.16710435, 0.84761631, 0.37147006, 0.25389814, // -0.44707820, 0.38169226, -0.82191414, -0.81056035, // 0.29047397, -0.46743703, -0.91869444, -0.08079135 - VLOAD_32(v6, 0x3ef9888c, 0x3e49e355, 0xbef1ce59, 0x3e0cd222, 0xbe2b1d67, + VLOAD_32(v24, 0x3ef9888c, 0x3e49e355, 0xbef1ce59, 0x3e0cd222, 0xbe2b1d67, 0x3f58fd62, 0x3ebe3153, 0x3e81fef0, 0xbee4e76f, 0x3ec36d2b, 0xbf5268f7, 0xbf4f80e2, 0x3e94b901, 0xbeef53e8, 0xbf6b2f8f, 0xbda575f0); @@ -55,29 +55,29 @@ void TEST_CASE1(void) { // 0.46947387, 0.29113689, -0.11920074, 0.63394654, // -0.82611400, -0.84088647, -0.13328743, 0.29885510, // 0.91797447, -0.15480036, 0.76857966, 0.16230854 - VLOAD_32(v2, 0xbef91d92, 0xbf5fca01, 0x3e36d496, 0x3f2c969e, 0x3ef05ee1, + VLOAD_32(v8, 0xbef91d92, 0xbf5fca01, 0x3e36d496, 0x3f2c969e, 0x3ef05ee1, 0x3e950fe5, 0xbdf41f84, 0x3f224a52, 0xbf537c35, 0xbf574456, 0xbe087c80, 0x3e990389, 0x3f6b0060, 0xbe1e83fc, 0x3f44c1a3, 0x3e263436); - asm volatile("vfnmsub.vv v2, v4, v6"); + asm volatile("vfnmsub.vv v8, v16, v24"); // 0.85841238, -0.18718503, -0.43765658, 0.52342200, // 0.10119282, 1.12044132, 0.40040392, -0.02657321, // -1.21668768, -0.09267077, -0.92245328, -0.80236715, // 0.84624207, -0.61237341, -0.94020027, -0.11831641 - VCMP_U32(2, v2, 0x3f5bc0ea, 0xbe3fad70, 0xbee01486, 0x3f05fefc, 0x3dcf3e2c, + VCMP_U32(2, v8, 0x3f5bc0ea, 0xbe3fad70, 0xbee01486, 0x3f05fefc, 0x3dcf3e2c, 0x3f8f6a9f, 0x3ecd01be, 0xbcd9b00d, 0xbf9bbc6c, 0xbdbdca2c, 0xbf6c25e6, 0xbf4d67ef, 0x3f58a353, 0xbf1cc481, 0xbf70b0f7, 0xbdf24fdf); #if ELEN == 64 - VSET(16, e64, m2); + VSET(16, e64, m8); // -0.1307639483617093, 0.9224167823566942, 0.8635785104096312, // -0.1786758246437388, 0.0810514505300033, 0.4196384170211611, // 0.9100790646565715, -0.5457616411379209, -0.5513001815564993, // -0.4320693373833464, 0.2818536966914695, 0.5493933224246561, // 0.0505621823765807, 0.7247332126666939, -0.8702311369694951, // -0.0660417836134264 - VLOAD_64(v4, 0xbfc0bcdf80daccc8, 0x3fed847033301d18, 0x3feba26f66779bbe, + VLOAD_64(v16, 0xbfc0bcdf80daccc8, 0x3fed847033301d18, 0x3feba26f66779bbe, 0xbfc6ded973b720d0, 0x3fb4bfc9b151d990, 0x3fdadb5b175011f8, 0x3fed1f5e216f2d02, 0xbfe176e11e032836, 0xbfe1a44047420c82, 0xbfdba706266a9d80, 0x3fd209e41662faec, 0x3fe194a14e0e8cc0, @@ -89,7 +89,7 @@ void TEST_CASE1(void) { // -0.3169052211432897, -0.0970247500649024, 0.8584276150948376, // -0.2642287948226270, 0.2403355182026823, // -0.0814065713760876, -0.7437013715700231, -0.6226210619792329 - VLOAD_64(v6, 0x3fe5480ebb4f6ca8, 0xbfead900bb1380ea, 0x3fd4de97daca5430, + VLOAD_64(v24, 0x3fe5480ebb4f6ca8, 0xbfead900bb1380ea, 0x3fd4de97daca5430, 0x3fdc4cac5e87d53c, 0xbfc8999720661708, 0xbfc7063400e0c4f8, 0xbfe035c92894a640, 0x3fea6586b2596362, 0xbfd4482cd62f7e30, 0xbfb8d69d306e6ba0, 0x3feb783d309a196c, 0xbfd0e91fe41b2de8, @@ -101,20 +101,20 @@ void TEST_CASE1(void) { // -0.2810978842877421, -0.3240264495739638, 0.5540367578795606, // 0.7398533272929233, 0.4690189457399407, -0.2427822500985419, // 0.2399358773396087 - VLOAD_64(v2, 0xbfcb2c2560fa8d98, 0x3fed09b08a1d236c, 0xbfda8ff9193bbdb8, + VLOAD_64(v8, 0xbfcb2c2560fa8d98, 0x3fed09b08a1d236c, 0xbfda8ff9193bbdb8, 0xbfdcdfbf03022cf4, 0xbfed34ca8b7904d0, 0xbfea5c74d801844e, 0xbfb049b07fd1a3e0, 0x3fe6cc6a4ff0157a, 0xbfda692318304834, 0xbfd1fd81faff66b8, 0xbfd4bcd96efd6300, 0x3fe1baab4b7bfc96, 0x3fe7ace0e291ea9e, 0x3fde04680a4a1390, 0xbfcf137d201c7be0, 0x3fceb6380527c498); - asm volatile("vfnmsub.vv v2, v4, v6"); + asm volatile("vfnmsub.vv v8, v16, v24"); // 0.6372868977272925, -1.6760208622190165, 0.6845034040552052, // 0.3615690295631244, -0.1182120133317999, 0.1658155255420807, // -0.4486624012573315, 1.2137228688596235, // -0.5444086454629855, -0.2184785266689677, 0.9497556677330713, // -0.5686128899794636, 0.2029269193361774, // -0.4213201787237407, -0.9549780451092895, -0.6067752686868729 - VCMP_U64(3, v2, 0x3fe464a77dfd0e7c, 0xbffad0fb406a4f74, 0x3fe5e773aecd5e74, + VCMP_U64(3, v8, 0x3fe464a77dfd0e7c, 0xbffad0fb406a4f74, 0x3fe5e773aecd5e74, 0x3fd723f26d4e15bc, 0xbfbe43247b412024, 0x3fc5397171afa72c, 0xbfdcb6e281161599, 0x3ff36b68abc28cd2, 0xbfe16bcbadfd8ab4, 0xbfcbf71ab775f310, 0x3fee6465ff835579, 0xbfe23213a8d1778a, @@ -126,37 +126,37 @@ void TEST_CASE1(void) { // Simple random test with similar values (masked) // The numbers are the same of TEST_CASE1 void TEST_CASE2(void) { - VSET(16, e16, m2); + VSET(16, e16, m8); // -0.0091, -0.3794, -0.0005, -0.0464, 0.4834, 0.2932, -0.3042, // -0.3096, -0.9844, -0.1815, -0.8760, 0.0853, -0.3723, -0.8877, // 0.1584, 0.1943 - VLOAD_16(v4, 0xa0ac, 0xb612, 0x8f83, 0xa9f0, 0x37bc, 0x34b1, 0xb4de, 0xb4f4, + VLOAD_16(v16, 0xa0ac, 0xb612, 0x8f83, 0xa9f0, 0x37bc, 0x34b1, 0xb4de, 0xb4f4, 0xbbe0, 0xb1cf, 0xbb02, 0x2d75, 0xb5f5, 0xbb1a, 0x3112, 0x3238); // -0.3301, 0.7769, 0.6572, -0.8193, 0.4529, 0.2349, 0.5264, // -0.2456, 0.0873, 0.5381, 0.4670, 0.8564, -0.1790, 0.6641, // 0.0182, 0.0447 - VLOAD_16(v6, 0xb548, 0x3a37, 0x3942, 0xba8e, 0x373f, 0x3384, 0x3836, 0xb3dc, + VLOAD_16(v24, 0xb548, 0x3a37, 0x3942, 0xba8e, 0x373f, 0x3384, 0x3836, 0xb3dc, 0x2d97, 0x384e, 0x3779, 0x3ada, 0xb1ba, 0x3950, 0x24a7, 0x29b9); VLOAD_8(v0, 0xAA, 0xAA); // 0.5835, 0.4404, -0.3459, 0.0516, -0.4866, -0.2191, 0.0685, // -0.5430, -0.1429, -0.7539, -0.6416, -0.6758, -0.1147, 0.3438, // 0.3440, 0.1991 - VLOAD_16(v2, 0x38ab, 0x370c, 0xb589, 0x2a9b, 0xb7c9, 0xb303, 0x2c62, 0xb858, + VLOAD_16(v8, 0x38ab, 0x370c, 0xb589, 0x2a9b, 0xb7c9, 0xb303, 0x2c62, 0xb858, 0xb093, 0xba08, 0xb922, 0xb968, 0xaf57, 0x3580, 0x3581, 0x325f); - asm volatile("vfnmsub.vv v2, v4, v6, v0.t"); + asm volatile("vfnmsub.vv v8, v16, v24, v0.t"); // 0.5835, 0.9438, -0.3459, -0.8169, -0.4866, 0.2991, 0.0685, // -0.4136, -0.1429, 0.4014, -0.6416, 0.9141, -0.1147, 0.9692, // 0.3440, 0.0060 - VCMP_U16(4, v2, 0x38ab, 0x3b8d, 0xb589, 0xba89, 0xb7c9, 0x34c9, 0x2c62, + VCMP_U16(4, v8, 0x38ab, 0x3b8d, 0xb589, 0xba89, 0xb7c9, 0x34c9, 0x2c62, 0xb69e, 0xb093, 0x366b, 0xb922, 0x3b50, 0xaf57, 0x3bc1, 0x3581, 0x1e29); - VSET(16, e32, m2); + VSET(16, e32, m8); // 0.76259303, -0.43966120, -0.19390504, -0.57240725, // -0.57148474, -0.93710214, 0.24273214, 0.44242114, // -0.93160200, -0.56412256, -0.75430351, -0.02741535, // -0.60542876, -0.93627954, 0.02798123, 0.23119579 - VLOAD_32(v4, 0x3f43394c, 0xbee11b46, 0xbe468f0b, 0xbf128948, 0xbf124cd3, + VLOAD_32(v16, 0x3f43394c, 0xbee11b46, 0xbe468f0b, 0xbf128948, 0xbf124cd3, 0xbf6fe5ed, 0x3e788ec6, 0x3ee28506, 0xbf6e7d78, 0xbf106a56, 0xbf411a09, 0xbce09629, 0xbf1afd61, 0xbf6fb004, 0x3ce538e6, 0x3e6cbe97); @@ -164,7 +164,7 @@ void TEST_CASE2(void) { // -0.16710435, 0.84761631, 0.37147006, 0.25389814, // -0.44707820, 0.38169226, -0.82191414, -0.81056035, // 0.29047397, -0.46743703, -0.91869444, -0.08079135 - VLOAD_32(v6, 0x3ef9888c, 0x3e49e355, 0xbef1ce59, 0x3e0cd222, 0xbe2b1d67, + VLOAD_32(v24, 0x3ef9888c, 0x3e49e355, 0xbef1ce59, 0x3e0cd222, 0xbe2b1d67, 0x3f58fd62, 0x3ebe3153, 0x3e81fef0, 0xbee4e76f, 0x3ec36d2b, 0xbf5268f7, 0xbf4f80e2, 0x3e94b901, 0xbeef53e8, 0xbf6b2f8f, 0xbda575f0); @@ -173,29 +173,29 @@ void TEST_CASE2(void) { // 0.46947387, 0.29113689, -0.11920074, 0.63394654, // -0.82611400, -0.84088647, -0.13328743, 0.29885510, // 0.91797447, -0.15480036, 0.76857966, 0.16230854 - VLOAD_32(v2, 0xbef91d92, 0xbf5fca01, 0x3e36d496, 0x3f2c969e, 0x3ef05ee1, + VLOAD_32(v8, 0xbef91d92, 0xbf5fca01, 0x3e36d496, 0x3f2c969e, 0x3ef05ee1, 0x3e950fe5, 0xbdf41f84, 0x3f224a52, 0xbf537c35, 0xbf574456, 0xbe087c80, 0x3e990389, 0x3f6b0060, 0xbe1e83fc, 0x3f44c1a3, 0x3e263436); - asm volatile("vfnmsub.vv v2, v4, v6, v0.t"); + asm volatile("vfnmsub.vv v8, v16, v24, v0.t"); // -0.48655373, -0.18718503, 0.17854533, 0.52342200, // 0.46947387, 1.12044132, -0.11920074, -0.02657321, // -0.82611400, -0.09267077, -0.13328743, -0.80236715, // 0.91797447, -0.61237341, 0.76857966, -0.11831641 - VCMP_U32(5, v2, 0xbef91d92, 0xbe3fad70, 0x3e36d496, 0x3f05fefc, 0x3ef05ee1, + VCMP_U32(5, v8, 0xbef91d92, 0xbe3fad70, 0x3e36d496, 0x3f05fefc, 0x3ef05ee1, 0x3f8f6a9f, 0xbdf41f84, 0xbcd9b00d, 0xbf537c35, 0xbdbdca2c, 0xbe087c80, 0xbf4d67ef, 0x3f6b0060, 0xbf1cc481, 0x3f44c1a3, 0xbdf24fdf); #if ELEN == 64 - VSET(16, e64, m2); + VSET(16, e64, m8); // -0.1307639483617093, 0.9224167823566942, 0.8635785104096312, // -0.1786758246437388, 0.0810514505300033, 0.4196384170211611, // 0.9100790646565715, -0.5457616411379209, -0.5513001815564993, // -0.4320693373833464, 0.2818536966914695, 0.5493933224246561, // 0.0505621823765807, 0.7247332126666939, -0.8702311369694951, // -0.0660417836134264 - VLOAD_64(v4, 0xbfc0bcdf80daccc8, 0x3fed847033301d18, 0x3feba26f66779bbe, + VLOAD_64(v16, 0xbfc0bcdf80daccc8, 0x3fed847033301d18, 0x3feba26f66779bbe, 0xbfc6ded973b720d0, 0x3fb4bfc9b151d990, 0x3fdadb5b175011f8, 0x3fed1f5e216f2d02, 0xbfe176e11e032836, 0xbfe1a44047420c82, 0xbfdba706266a9d80, 0x3fd209e41662faec, 0x3fe194a14e0e8cc0, @@ -207,7 +207,7 @@ void TEST_CASE2(void) { // -0.3169052211432897, -0.0970247500649024, 0.8584276150948376, // -0.2642287948226270, 0.2403355182026823, // -0.0814065713760876, -0.7437013715700231, -0.6226210619792329 - VLOAD_64(v6, 0x3fe5480ebb4f6ca8, 0xbfead900bb1380ea, 0x3fd4de97daca5430, + VLOAD_64(v24, 0x3fe5480ebb4f6ca8, 0xbfead900bb1380ea, 0x3fd4de97daca5430, 0x3fdc4cac5e87d53c, 0xbfc8999720661708, 0xbfc7063400e0c4f8, 0xbfe035c92894a640, 0x3fea6586b2596362, 0xbfd4482cd62f7e30, 0xbfb8d69d306e6ba0, 0x3feb783d309a196c, 0xbfd0e91fe41b2de8, @@ -220,20 +220,20 @@ void TEST_CASE2(void) { // -0.2810978842877421, -0.3240264495739638, 0.5540367578795606, // 0.7398533272929233, 0.4690189457399407, -0.2427822500985419, // 0.2399358773396087 - VLOAD_64(v2, 0xbfcb2c2560fa8d98, 0x3fed09b08a1d236c, 0xbfda8ff9193bbdb8, + VLOAD_64(v8, 0xbfcb2c2560fa8d98, 0x3fed09b08a1d236c, 0xbfda8ff9193bbdb8, 0xbfdcdfbf03022cf4, 0xbfed34ca8b7904d0, 0xbfea5c74d801844e, 0xbfb049b07fd1a3e0, 0x3fe6cc6a4ff0157a, 0xbfda692318304834, 0xbfd1fd81faff66b8, 0xbfd4bcd96efd6300, 0x3fe1baab4b7bfc96, 0x3fe7ace0e291ea9e, 0x3fde04680a4a1390, 0xbfcf137d201c7be0, 0x3fceb6380527c498); - asm volatile("vfnmsub.vv v2, v4, v6, v0.t"); + asm volatile("vfnmsub.vv v8, v16, v24, v0.t"); // -0.2122847293404504, -1.6760208622190165, -0.4150374170703475, // 0.3615690295631244, -0.9126942371441604, 0.1658155255420807, // -0.0636244117792013, 1.2137228688596235, -0.4126670585839094, // -0.2184785266689677, -0.3240264495739638, -0.5686128899794636, // 0.7398533272929233, -0.4213201787237407, -0.2427822500985419, // -0.6067752686868729 - VCMP_U64(6, v2, 0xbfcb2c2560fa8d98, 0xbffad0fb406a4f74, 0xbfda8ff9193bbdb8, + VCMP_U64(6, v8, 0xbfcb2c2560fa8d98, 0xbffad0fb406a4f74, 0xbfda8ff9193bbdb8, 0x3fd723f26d4e15bc, 0xbfed34ca8b7904d0, 0x3fc5397171afa72c, 0xbfb049b07fd1a3e0, 0x3ff36b68abc28cd2, 0xbfda692318304834, 0xbfcbf71ab775f310, 0xbfd4bcd96efd6300, 0xbfe23213a8d1778a, @@ -244,29 +244,29 @@ void TEST_CASE2(void) { // Simple random test with similar values (vector-scalar) void TEST_CASE3(void) { - VSET(16, e16, m2); + VSET(16, e16, m8); float fscalar_16; // -0.1346 BOX_HALF_IN_FLOAT(fscalar_16, 0xb04f); // -0.1886, 0.9912, -0.0325, 0.5850, 0.2578, -0.2350, -0.8701, // 0.9209, 0.5859, -0.4795, 0.8682, 0.9233, -0.8896, -0.5981, // -0.3223, 0.6924 - VLOAD_16(v4, 0xb209, 0x3bee, 0xa82a, 0x38ae, 0x3420, 0xb385, 0xbaf6, 0x3b5e, + VLOAD_16(v16, 0xb209, 0x3bee, 0xa82a, 0x38ae, 0x3420, 0xb385, 0xbaf6, 0x3b5e, 0x38b0, 0xb7ac, 0x3af2, 0x3b63, 0xbb1e, 0xb8c9, 0xb528, 0x398a); // -0.0126, 0.9678, 0.8945, -0.9600, -0.9272, 0.4412, 0.5527, // 0.1136, 0.7207, -0.5181, -0.2810, -0.4048, 0.1648, 0.7612, // -0.8853, 0.1761 - VLOAD_16(v2, 0xa27a, 0x3bbe, 0x3b28, 0xbbae, 0xbb6b, 0x370f, 0x386c, 0x2f45, + VLOAD_16(v8, 0xa27a, 0x3bbe, 0x3b28, 0xbbae, 0xbb6b, 0x370f, 0x386c, 0x2f45, 0x39c4, 0xb825, 0xb47f, 0xb67a, 0x3146, 0x3a17, 0xbb15, 0x31a3); - asm volatile("vfnmsub.vf v2, %[A], v4" ::[A] "f"(fscalar_16)); + asm volatile("vfnmsub.vf v8, %[A], v16" ::[A] "f"(fscalar_16)); // -0.1903, 1.1211, 0.0879, 0.4556, 0.1329, -0.1755, -0.7959, // 0.9360, 0.6831, -0.5493, 0.8301, 0.8687, -0.8677, -0.4956, // -0.4414, 0.7163 - VCMP_U16(7, v2, 0xb217, 0x3c7c, 0x2da0, 0x374b, 0x3041, 0xb19e, 0xba5e, + VCMP_U16(7, v8, 0xb217, 0x3c7c, 0x2da0, 0x374b, 0x3041, 0xb19e, 0xba5e, 0x3b7d, 0x3977, 0xb865, 0x3aa5, 0x3af3, 0xbaf1, 0xb7ee, 0xb710, 0x39bb); - VSET(16, e32, m2); + VSET(16, e32, m8); float fscalar_32; // -0.16110219 BOX_FLOAT_IN_FLOAT(fscalar_32, 0xbe24f7f9); @@ -274,7 +274,7 @@ void TEST_CASE3(void) { // -0.89379781, 0.26157290, 0.56702632, -0.11594663, // 0.09605245, 0.45930776, -0.76518077, -0.26341528, 0.74385208, // 0.89362013, -0.21185355, 0.23924881 - VLOAD_32(v4, 0xbea17872, 0xbe33d878, 0xbf4a6b46, 0x3e6ae602, 0xbf64cfef, + VLOAD_32(v16, 0xbea17872, 0xbe33d878, 0xbf4a6b46, 0x3e6ae602, 0xbf64cfef, 0x3e85ece2, 0x3f1128a3, 0xbded756d, 0x3dc4b726, 0x3eeb2a63, 0xbf43e2e3, 0xbe86de5e, 0x3f3e6d17, 0x3f64c44a, 0xbe58f023, 0x3e74fda4); @@ -282,22 +282,22 @@ void TEST_CASE3(void) { // -0.19719712, 0.85649359, 0.36901370, -0.78377151, // 0.22567192, -0.75179213, -0.65690833, 0.11298654, // -0.64884853, -0.48376039, -0.11539485, -0.42667609 - VLOAD_32(v2, 0x3ea31a81, 0x3ef90b71, 0x3f1298df, 0x3e9aad57, 0xbe49ee0b, + VLOAD_32(v8, 0x3ea31a81, 0x3ef90b71, 0x3f1298df, 0x3e9aad57, 0xbe49ee0b, 0x3f5b432a, 0x3ebcef5d, 0xbf48a540, 0x3e67168a, 0xbf407573, 0xbf282b25, 0x3de7657d, 0xbf261af0, 0xbef7af71, 0xbdec5422, 0xbeda754a); - asm volatile("vfnmsub.vf v2, %[A], v4" ::[A] "f"(fscalar_32)); + asm volatile("vfnmsub.vf v8, %[A], v16" ::[A] "f"(fscalar_32)); // -0.26405108, -0.09726786, -0.69844496, 0.27806261, // -0.92556667, 0.39955589, 0.62647521, -0.24221393, // 0.13240869, 0.33819240, -0.87101012, -0.24521290, 0.63932115, // 0.81568527, -0.23044391, 0.17051035 - VCMP_U32(8, v2, 0xbe8731b4, 0xbdc7345f, 0xbf32cd4a, 0x3e8e5e39, 0xbf6cf1f1, + VCMP_U32(8, v8, 0xbe8731b4, 0xbdc7345f, 0xbf32cd4a, 0x3e8e5e39, 0xbf6cf1f1, 0x3ecc9297, 0x3f2060ae, 0xbe7806ee, 0x3e079625, 0x3ead278e, 0xbf5efa85, 0xbe7b1917, 0x3f23aa8d, 0x3f50d0c0, 0xbe6bf97d, 0x3e2e9a44); #if ELEN == 64 - VSET(16, e64, m2); + VSET(16, e64, m8); double dscalar_64; // 0.5849101968457469 BOX_DOUBLE_IN_DOUBLE(dscalar_64, 0x3fe2b79596d194ba); @@ -307,7 +307,7 @@ void TEST_CASE3(void) { // -0.5708244737077264, -0.7636024500128011, 0.2236424444447431, // 0.8245435877598175, 0.8527344486412596, -0.3097355632002228, // 0.0764086736442742, 0.2567358761671383, 0.1904958118727702 - VLOAD_64(v4, 0xbfe8585129fe14da, 0xbf93b8ad045d9c40, 0x3fe5cf14ca86c05c, + VLOAD_64(v16, 0xbfe8585129fe14da, 0xbf93b8ad045d9c40, 0x3fe5cf14ca86c05c, 0xbfe0864a527b2a32, 0xbfee980ddf5818b2, 0xbfebb047874a12f4, 0x3fdd5e190b029804, 0xbfe24431afca9858, 0xbfe86f6e67be6a22, 0x3fcca050cc719f20, 0x3fea62a93bf1c1ec, 0x3feb4999c122c714, @@ -320,20 +320,20 @@ void TEST_CASE3(void) { // -0.1509314378482451, -0.9803534868251271, // 0.9211862470421908, 0.7804942879773937, 0.5029472314120484, // 0.1158347026033590, 0.7422982722940397, 0.0792254120441500 - VLOAD_64(v2, 0x3fbe461fd6899df0, 0xbfec054ae66457d4, 0xbfd7981d4d67fd54, + VLOAD_64(v8, 0x3fbe461fd6899df0, 0xbfec054ae66457d4, 0xbfd7981d4d67fd54, 0x3fd1f045d94dd3b0, 0xbfe0894aae97abc2, 0x3fd8c9027de8d2a8, 0xbfb0676270cf1540, 0xbfdbda75aaa8928c, 0xbfc351b8aabf8c50, 0xbfef5f0e468ddf22, 0x3fed7a5b94924728, 0x3fe8f9cf28327c9e, 0x3fe01824cad0e968, 0x3fbda757d36c5e40, 0x3fe7c0e84e6c2aa0, 0x3fb4481dd9bde340); - asm volatile("vfnmsub.vf v2, %[A], v4" ::[A] "f"(dscalar_64)); + asm volatile("vfnmsub.vf v8, %[A], v16" ::[A] "f"(dscalar_64)); // -0.8299509146929066, 0.4929151432599784, 0.8971610699324351, // -0.6803373936193172, -0.6538035512838903, -1.0917858708828851, // 0.4963478718767876, -0.3162664568083610, -0.6753211129907725, // 0.7970611954020426, 0.2857323586707750, 0.3962153810234212, // -0.6039145273284674, 0.0086557749429749, -0.1774419523986262, // 0.1441560605188410 - VCMP_U64(9, v2, 0xbfea8ef5387c85b1, 0x3fdf8bebf5004e06, 0x3fecb58b21d3556c, + VCMP_U64(9, v8, 0xbfea8ef5387c85b1, 0x3fdf8bebf5004e06, 0x3fecb58b21d3556c, 0xbfe5c552ecfae837, 0xbfe4ebf56cd8bc27, 0xbff177f4761ad476, 0x3fdfc429dd49999d, 0xbfd43db5aa3413c6, 0xbfe59c3b05d2ff7f, 0x3fe981867ae532b2, 0x3fd249705ff9984b, 0x3fd95b97c1eabccb, @@ -344,30 +344,30 @@ void TEST_CASE3(void) { // Simple random test with similar values (vector-scalar) (masked) void TEST_CASE4(void) { - VSET(16, e16, m2); + VSET(16, e16, m8); float fscalar_16; // -0.1346 BOX_HALF_IN_FLOAT(fscalar_16, 0xb04f); // -0.1886, 0.9912, -0.0325, 0.5850, 0.2578, -0.2350, // -0.8701, 0.9209, 0.5859, -0.4795, 0.8682, 0.9233, // -0.8896, -0.5981, -0.3223, 0.6924 - VLOAD_16(v4, 0xb209, 0x3bee, 0xa82a, 0x38ae, 0x3420, 0xb385, 0xbaf6, 0x3b5e, + VLOAD_16(v16, 0xb209, 0x3bee, 0xa82a, 0x38ae, 0x3420, 0xb385, 0xbaf6, 0x3b5e, 0x38b0, 0xb7ac, 0x3af2, 0x3b63, 0xbb1e, 0xb8c9, 0xb528, 0x398a); VLOAD_8(v0, 0xAA, 0xAA); // -0.0126, 0.9678, 0.8945, -0.9600, -0.9272, 0.4412, 0.5527, // 0.1136, 0.7207, -0.5181, -0.2810, -0.4048, 0.1648, 0.7612, // -0.8853, 0.1761 - VLOAD_16(v2, 0xa27a, 0x3bbe, 0x3b28, 0xbbae, 0xbb6b, 0x370f, 0x386c, 0x2f45, + VLOAD_16(v8, 0xa27a, 0x3bbe, 0x3b28, 0xbbae, 0xbb6b, 0x370f, 0x386c, 0x2f45, 0x39c4, 0xb825, 0xb47f, 0xb67a, 0x3146, 0x3a17, 0xbb15, 0x31a3); - asm volatile("vfnmsub.vf v2, %[A], v4, v0.t" ::[A] "f"(fscalar_16)); + asm volatile("vfnmsub.vf v8, %[A], v16, v0.t" ::[A] "f"(fscalar_16)); // -0.0126, 1.1211, 0.8945, 0.4556, -0.9272, -0.1755, 0.5527, // 0.9360, 0.7207, -0.5493, -0.2810, 0.8687, 0.1648, -0.4956, // -0.8853, 0.7163 - VCMP_U16(10, v2, 0xa27a, 0x3c7c, 0x3b28, 0x374b, 0xbb6b, 0xb19e, 0x386c, + VCMP_U16(10, v8, 0xa27a, 0x3c7c, 0x3b28, 0x374b, 0xbb6b, 0xb19e, 0x386c, 0x3b7d, 0x39c4, 0xb865, 0xb47f, 0x3af3, 0x3146, 0xb7ee, 0xbb15, 0x39bb); - VSET(16, e32, m2); + VSET(16, e32, m8); float fscalar_32; // -0.16110219 BOX_FLOAT_IN_FLOAT(fscalar_32, 0xbe24f7f9); @@ -375,7 +375,7 @@ void TEST_CASE4(void) { // -0.89379781, 0.26157290, 0.56702632, -0.11594663, // 0.09605245, 0.45930776, -0.76518077, -0.26341528, // 0.74385208, 0.89362013, -0.21185355, 0.23924881 - VLOAD_32(v4, 0xbea17872, 0xbe33d878, 0xbf4a6b46, 0x3e6ae602, 0xbf64cfef, + VLOAD_32(v16, 0xbea17872, 0xbe33d878, 0xbf4a6b46, 0x3e6ae602, 0xbf64cfef, 0x3e85ece2, 0x3f1128a3, 0xbded756d, 0x3dc4b726, 0x3eeb2a63, 0xbf43e2e3, 0xbe86de5e, 0x3f3e6d17, 0x3f64c44a, 0xbe58f023, 0x3e74fda4); @@ -384,22 +384,22 @@ void TEST_CASE4(void) { // -0.19719712, 0.85649359, 0.36901370, -0.78377151, // 0.22567192, -0.75179213, -0.65690833, 0.11298654, // -0.64884853, -0.48376039, -0.11539485, -0.42667609 - VLOAD_32(v2, 0x3ea31a81, 0x3ef90b71, 0x3f1298df, 0x3e9aad57, 0xbe49ee0b, + VLOAD_32(v8, 0x3ea31a81, 0x3ef90b71, 0x3f1298df, 0x3e9aad57, 0xbe49ee0b, 0x3f5b432a, 0x3ebcef5d, 0xbf48a540, 0x3e67168a, 0xbf407573, 0xbf282b25, 0x3de7657d, 0xbf261af0, 0xbef7af71, 0xbdec5422, 0xbeda754a); - asm volatile("vfnmsub.vf v2, %[A], v4, v0.t" ::[A] "f"(fscalar_32)); + asm volatile("vfnmsub.vf v8, %[A], v16, v0.t" ::[A] "f"(fscalar_32)); // 0.31856158, -0.09726786, 0.57264513, 0.27806261, // -0.19719712, 0.39955589, 0.36901370, -0.24221393, // 0.22567192, 0.33819240, -0.65690833, -0.24521290, // -0.64884853, 0.81568527, -0.11539485, 0.17051035 - VCMP_U32(11, v2, 0x3ea31a81, 0xbdc7345f, 0x3f1298df, 0x3e8e5e39, 0xbe49ee0b, + VCMP_U32(11, v8, 0x3ea31a81, 0xbdc7345f, 0x3f1298df, 0x3e8e5e39, 0xbe49ee0b, 0x3ecc9297, 0x3ebcef5d, 0xbe7806ee, 0x3e67168a, 0x3ead278e, 0xbf282b25, 0xbe7b1917, 0xbf261af0, 0x3f50d0c0, 0xbdec5422, 0x3e2e9a44); #if ELEN == 64 - VSET(16, e64, m2); + VSET(16, e64, m8); double dscalar_64; // 0.5849101968457469 BOX_DOUBLE_IN_DOUBLE(dscalar_64, 0x3fe2b79596d194ba); @@ -411,7 +411,7 @@ void TEST_CASE4(void) { // 0.8245435877598175, 0.8527344486412596, // -0.3097355632002228, 0.0764086736442742, // 0.2567358761671383, 0.1904958118727702 - VLOAD_64(v4, 0xbfe8585129fe14da, 0xbf93b8ad045d9c40, 0x3fe5cf14ca86c05c, + VLOAD_64(v16, 0xbfe8585129fe14da, 0xbf93b8ad045d9c40, 0x3fe5cf14ca86c05c, 0xbfe0864a527b2a32, 0xbfee980ddf5818b2, 0xbfebb047874a12f4, 0x3fdd5e190b029804, 0xbfe24431afca9858, 0xbfe86f6e67be6a22, 0x3fcca050cc719f20, 0x3fea62a93bf1c1ec, 0x3feb4999c122c714, @@ -426,13 +426,13 @@ void TEST_CASE4(void) { // 0.9211862470421908, 0.7804942879773937, // 0.5029472314120484, 0.1158347026033590, // 0.7422982722940397, 0.0792254120441500 - VLOAD_64(v2, 0x3fbe461fd6899df0, 0xbfec054ae66457d4, 0xbfd7981d4d67fd54, + VLOAD_64(v8, 0x3fbe461fd6899df0, 0xbfec054ae66457d4, 0xbfd7981d4d67fd54, 0x3fd1f045d94dd3b0, 0xbfe0894aae97abc2, 0x3fd8c9027de8d2a8, 0xbfb0676270cf1540, 0xbfdbda75aaa8928c, 0xbfc351b8aabf8c50, 0xbfef5f0e468ddf22, 0x3fed7a5b94924728, 0x3fe8f9cf28327c9e, 0x3fe01824cad0e968, 0x3fbda757d36c5e40, 0x3fe7c0e84e6c2aa0, 0x3fb4481dd9bde340); - asm volatile("vfnmsub.vf v2, %[A], v4, v0.t" ::[A] "f"(dscalar_64)); + asm volatile("vfnmsub.vf v8, %[A], v16, v0.t" ::[A] "f"(dscalar_64)); // 0.1182575129292827, 0.4929151432599784, // -0.3686593299789440, -0.6803373936193172, // -0.5167592439660142, -1.0917858708828851, @@ -440,7 +440,7 @@ void TEST_CASE4(void) { // -0.1509314378482451, 0.7970611954020426, // 0.9211862470421908, 0.3962153810234212, 0.5029472314120484, // 0.0086557749429749, 0.7422982722940397, 0.1441560605188410 - VCMP_U64(12, v2, 0x3fbe461fd6899df0, 0x3fdf8bebf5004e06, 0xbfd7981d4d67fd54, + VCMP_U64(12, v8, 0x3fbe461fd6899df0, 0x3fdf8bebf5004e06, 0xbfd7981d4d67fd54, 0xbfe5c552ecfae837, 0xbfe0894aae97abc2, 0xbff177f4761ad476, 0xbfb0676270cf1540, 0xbfd43db5aa3413c6, 0xbfc351b8aabf8c50, 0x3fe981867ae532b2, 0x3fed7a5b94924728, 0x3fd95b97c1eabccb, diff --git a/sw/riscvTests/isa/rv64uv/vfrsub.c b/sw/riscvTests/isa/rv64uv/vfrsub.c index 40e08433..be9ac9ff 100644 --- a/sw/riscvTests/isa/rv64uv/vfrsub.c +++ b/sw/riscvTests/isa/rv64uv/vfrsub.c @@ -10,54 +10,54 @@ // Simple random test with similar values (vector-scalar) void TEST_CASE1(void) { - VSET(16, e16, m2); + VSET(16, e16, m8); // -0.0273, -0.8511, 0.7173, 0.9551, -0.7842, -0.6509, -0.5771, // 0.6060, -0.5361, 0.6099, 0.2859, 0.6318, -0.9521, 0.3818, // 0.2783, -0.7905 - VLOAD_16(v4, 0xa700, 0xbacf, 0x39bd, 0x3ba4, 0xba46, 0xb935, 0xb89e, 0x38d9, + VLOAD_16(v16, 0xa700, 0xbacf, 0x39bd, 0x3ba4, 0xba46, 0xb935, 0xb89e, 0x38d9, 0xb84a, 0x38e1, 0x3493, 0x390e, 0xbb9e, 0x361c, 0x3474, 0xba53); float fscalar_16; // 0.3062 BOX_HALF_IN_FLOAT(fscalar_16, 0x34e6); - asm volatile("vfrsub.vf v2, v4, %[A]" ::[A] "f"(fscalar_16)); + asm volatile("vfrsub.vf v8, v16, %[A]" ::[A] "f"(fscalar_16)); // 0.3335, 1.1572, -0.4111, -0.6489, 1.0898, 0.9570, 0.8833, // -0.2998, 0.8423, -0.3037, 0.0203, -0.3257, 1.2578, // -0.0757, 0.0278, 1.0967 - VCMP_U16(1, v2, 0x3556, 0x3ca1, 0xb694, 0xb931, 0x3c5c, 0x3ba8, 0x3b11, + VCMP_U16(1, v8, 0x3556, 0x3ca1, 0xb694, 0xb931, 0x3c5c, 0x3ba8, 0x3b11, 0xb4cc, 0x3abd, 0xb4dc, 0x2530, 0xb536, 0x3d08, 0xacd8, 0x2720, 0x3c63); - VSET(16, e32, m2); + VSET(16, e32, m8); // 0.61218858, 0.50298065, 0.82400811, -0.50508654, // -0.08447543, -0.66344708, -0.94741052, 0.85856712, // -0.16725175, -0.36700448, -0.86911696, 0.82600677, // -0.95377433, 0.06016647, 0.67027277, 0.08167093 - VLOAD_32(v4, 0x3f1cb864, 0x3f00c357, 0x3f52f232, 0xbf014d5a, 0xbdad0174, + VLOAD_32(v16, 0x3f1cb864, 0x3f00c357, 0x3f52f232, 0xbf014d5a, 0xbdad0174, 0xbf29d7ab, 0xbf72897f, 0x3f5bcb0e, 0xbe2b440b, 0xbebbe803, 0xbf5e7e73, 0x3f53752e, 0xbf742a8e, 0x3d76711d, 0x3f2b96ff, 0x3da74316); float fscalar_32; // -0.78482366 BOX_FLOAT_IN_FLOAT(fscalar_32, 0xbf48ea34); - asm volatile("vfrsub.vf v2, v4, %[A]" ::[A] "f"(fscalar_32)); + asm volatile("vfrsub.vf v8, v16, %[A]" ::[A] "f"(fscalar_32)); // -1.39701223, -1.28780437, -1.60883176, -0.27973711, // -0.70034826, -0.12137657, 0.16258687, -1.64339077, // -0.61757189, -0.41781917, 0.08429331, -1.61083043, // 0.16895068, -0.84499013, -1.45509648, -0.86649460 - VCMP_U32(2, v2, 0xbfb2d14c, 0xbfa4d6c6, 0xbfcdee33, 0xbe8f39b4, 0xbf334a06, + VCMP_U32(2, v8, 0xbfb2d14c, 0xbfa4d6c6, 0xbfcdee33, 0xbe8f39b4, 0xbf334a06, 0xbdf89448, 0x3e267d2c, 0xbfd25aa1, 0xbf1e1931, 0xbed5ec65, 0x3daca1f8, 0xbfce2fb1, 0x3e2d0168, 0xbf585146, 0xbfba409a, 0xbf5dd297); #if ELEN == 64 - VSET(16, e64, m2); + VSET(16, e64, m8); // -0.0920900511004143, 0.2386858516984947, 0.7068975504949517, // 0.5997172971219242, 0.7714780386644180, -0.1053493184316212, // 0.8711121216121871, -0.7388672665065719, 0.0889924652556937, // 0.3266446452514173, -0.5909707717470494, -0.2733520923877579, // 0.2365505631181986, 0.9616545156279142, -0.9315790291358075, // -0.8056559777055108 - VLOAD_64(v4, 0xbfb79336adc36440, 0x3fce8d420b880e70, 0x3fe69ee79c9ff24a, + VLOAD_64(v16, 0xbfb79336adc36440, 0x3fce8d420b880e70, 0x3fe69ee79c9ff24a, 0x3fe330e2543f7e66, 0x3fe8aff2b634ab34, 0xbfbaf82c4551d810, 0x3febe026872f2710, 0xbfe7a4ccf737616c, 0x3fb6c835cfdd1640, 0x3fd4e7bef1312ccc, 0xbfe2e93b89317464, 0xbfd17e99c6464f50, @@ -66,14 +66,14 @@ void TEST_CASE1(void) { double dscalar_64; // -0.4500891854782252 BOX_DOUBLE_IN_DOUBLE(dscalar_64, 0xbfdcce42defa6264); - asm volatile("vfrsub.vf v2, v4, %[A]" ::[A] "f"(dscalar_64)); + asm volatile("vfrsub.vf v8, v16, %[A]" ::[A] "f"(dscalar_64)); // -0.3579991343778108, -0.6887750371767198, -1.1569867359731769, // -1.0498064826001494, -1.2215672241426432, -0.3447398670466040, // -1.3212013070904123, 0.2887780810283467, -0.5390816507339189, // -0.7767338307296425, 0.1408815862688242, -0.1767370930904673, // -0.6866397485964237, -1.4117437011061393, 0.4814898436575823, // 0.3555667922272856 - VCMP_U64(3, v2, 0xbfd6e97533898954, 0xbfe60a71f25f34ce, 0xbff28304860e91be, + VCMP_U64(3, v8, 0xbfd6e97533898954, 0xbfe60a71f25f34ce, 0xbff28304860e91be, 0xbff0cc01e1de57cc, 0xbff38b8a12d8ee33, 0xbfd61037cda5ec60, 0xbff523a3fb562c21, 0x3fd27b570f746074, 0xbfe140282978d3fa, 0xbfe8db00e815c798, 0x3fc2086866d10cc8, 0xbfc69f5231682628, @@ -84,30 +84,30 @@ void TEST_CASE1(void) { // Simple random test with similar values (vector-scalar) (masked) void TEST_CASE2(void) { - VSET(16, e16, m2); + VSET(16, e16, m8); // -0.0273, -0.8511, 0.7173, 0.9551, -0.7842, -0.6509, // -0.5771, 0.6060, -0.5361, 0.6099, 0.2859, 0.6318, // -0.9521, 0.3818, 0.2783, -0.7905 - VLOAD_16(v4, 0xa700, 0xbacf, 0x39bd, 0x3ba4, 0xba46, 0xb935, 0xb89e, 0x38d9, + VLOAD_16(v16, 0xa700, 0xbacf, 0x39bd, 0x3ba4, 0xba46, 0xb935, 0xb89e, 0x38d9, 0xb84a, 0x38e1, 0x3493, 0x390e, 0xbb9e, 0x361c, 0x3474, 0xba53); float fscalar_16; // 0.3062 BOX_HALF_IN_FLOAT(fscalar_16, 0x34e6); VLOAD_8(v0, 0xAA, 0xAA); - VCLEAR(v2); - asm volatile("vfrsub.vf v2, v4, %[A], v0.t" ::[A] "f"(fscalar_16)); + VCLEAR(v8); + asm volatile("vfrsub.vf v8, v16, %[A], v0.t" ::[A] "f"(fscalar_16)); // 0.0000, 1.1572, 0.0000, -0.6489, 0.0000, 0.9570, 0.0000, // -0.2998, 0.0000, -0.3037, 0.0000, -0.3257, 0.0000, // -0.0757, 0.0000, 1.0967 - VCMP_U16(4, v2, 0x0, 0x3ca1, 0x0, 0xb931, 0x0, 0x3ba8, 0x0, 0xb4cc, 0x0, + VCMP_U16(4, v8, 0x0, 0x3ca1, 0x0, 0xb931, 0x0, 0x3ba8, 0x0, 0xb4cc, 0x0, 0xb4dc, 0x0, 0xb536, 0x0, 0xacd8, 0x0, 0x3c63); - VSET(16, e32, m2); + VSET(16, e32, m8); // 0.61218858, 0.50298065, 0.82400811, -0.50508654, // -0.08447543, -0.66344708, -0.94741052, 0.85856712, // -0.16725175, -0.36700448, -0.86911696, 0.82600677, // -0.95377433, 0.06016647, 0.67027277, 0.08167093 - VLOAD_32(v4, 0x3f1cb864, 0x3f00c357, 0x3f52f232, 0xbf014d5a, 0xbdad0174, + VLOAD_32(v16, 0x3f1cb864, 0x3f00c357, 0x3f52f232, 0xbf014d5a, 0xbdad0174, 0xbf29d7ab, 0xbf72897f, 0x3f5bcb0e, 0xbe2b440b, 0xbebbe803, 0xbf5e7e73, 0x3f53752e, 0xbf742a8e, 0x3d76711d, 0x3f2b96ff, 0x3da74316); @@ -115,18 +115,18 @@ void TEST_CASE2(void) { // -0.78482366 BOX_FLOAT_IN_FLOAT(fscalar_32, 0xbf48ea34); VLOAD_8(v0, 0xAA, 0xAA); - VCLEAR(v2); - asm volatile("vfrsub.vf v2, v4, %[A], v0.t" ::[A] "f"(fscalar_32)); + VCLEAR(v8); + asm volatile("vfrsub.vf v8, v16, %[A], v0.t" ::[A] "f"(fscalar_32)); // 0.00000000, -1.28780437, 0.00000000, -0.27973711, // 0.00000000, -0.12137657, 0.00000000, -1.64339077, // 0.00000000, -0.41781917, 0.00000000, -1.61083043, // 0.00000000, -0.84499013, 0.00000000, -0.86649460 - VCMP_U32(5, v2, 0x0, 0xbfa4d6c6, 0x0, 0xbe8f39b4, 0x0, 0xbdf89448, 0x0, + VCMP_U32(5, v8, 0x0, 0xbfa4d6c6, 0x0, 0xbe8f39b4, 0x0, 0xbdf89448, 0x0, 0xbfd25aa1, 0x0, 0xbed5ec65, 0x0, 0xbfce2fb1, 0x0, 0xbf585146, 0x0, 0xbf5dd297); #if ELEN == 64 - VSET(16, e64, m2); + VSET(16, e64, m8); // -0.0920900511004143, 0.2386858516984947, // 0.7068975504949517, 0.5997172971219242, 0.7714780386644180, // -0.1053493184316212, 0.8711121216121871, @@ -134,7 +134,7 @@ void TEST_CASE2(void) { // 0.3266446452514173, -0.5909707717470494, // -0.2733520923877579, 0.2365505631181986, // 0.9616545156279142, -0.9315790291358075, -0.8056559777055108 - VLOAD_64(v4, 0xbfb79336adc36440, 0x3fce8d420b880e70, 0x3fe69ee79c9ff24a, + VLOAD_64(v16, 0xbfb79336adc36440, 0x3fce8d420b880e70, 0x3fe69ee79c9ff24a, 0x3fe330e2543f7e66, 0x3fe8aff2b634ab34, 0xbfbaf82c4551d810, 0x3febe026872f2710, 0xbfe7a4ccf737616c, 0x3fb6c835cfdd1640, 0x3fd4e7bef1312ccc, 0xbfe2e93b89317464, 0xbfd17e99c6464f50, @@ -144,8 +144,8 @@ void TEST_CASE2(void) { // -0.4500891854782252 BOX_DOUBLE_IN_DOUBLE(dscalar_64, 0xbfdcce42defa6264); VLOAD_8(v0, 0xAA, 0xAA); - VCLEAR(v2); - asm volatile("vfrsub.vf v2, v4, %[A], v0.t" ::[A] "f"(dscalar_64)); + VCLEAR(v8); + asm volatile("vfrsub.vf v8, v16, %[A], v0.t" ::[A] "f"(dscalar_64)); // 0.0000000000000000, -0.6887750371767198, 0.0000000000000000, // -1.0498064826001494, 0.0000000000000000, // -0.3447398670466040, 0.0000000000000000, @@ -153,7 +153,7 @@ void TEST_CASE2(void) { // -0.7767338307296425, 0.0000000000000000, // -0.1767370930904673, 0.0000000000000000, // -1.4117437011061393, 0.0000000000000000, 0.3555667922272856 - VCMP_U64(6, v2, 0x0, 0xbfe60a71f25f34ce, 0x0, 0xbff0cc01e1de57cc, 0x0, + VCMP_U64(6, v8, 0x0, 0xbfe60a71f25f34ce, 0x0, 0xbff0cc01e1de57cc, 0x0, 0xbfd61037cda5ec60, 0x0, 0x3fd27b570f746074, 0x0, 0xbfe8db00e815c798, 0x0, 0xbfc69f5231682628, 0x0, 0xbff6968090295b9e, 0x0, 0x3fd6c19b380a3a78); diff --git a/sw/riscvTests/isa/rv64uv/vfsgnj.c b/sw/riscvTests/isa/rv64uv/vfsgnj.c index f84a2364..c9c02bec 100644 --- a/sw/riscvTests/isa/rv64uv/vfsgnj.c +++ b/sw/riscvTests/isa/rv64uv/vfsgnj.c @@ -10,31 +10,31 @@ // Simple random test with similar values void TEST_CASE1(void) { - VSET(16, e16, m2); + VSET(16, e16, m8); // 0.3784, 0.9043, -0.4600, -0.6748, 0.4448, 0.8804, 0.1497, // 0.7285, 0.9927, 0.9922, 0.8965, 0.8672, -0.1860, 0.9336, // -0.2959, 0.9668 - VLOAD_16(v4, 0x360e, 0x3b3c, 0xb75c, 0xb966, 0x371e, 0x3b0b, 0x30ca, 0x39d4, + VLOAD_16(v16, 0x360e, 0x3b3c, 0xb75c, 0xb966, 0x371e, 0x3b0b, 0x30ca, 0x39d4, 0x3bf1, 0x3bf0, 0x3b2c, 0x3af0, 0xb1f4, 0x3b78, 0xb4bc, 0x3bbc); // -0.7988, -0.5054, -0.9380, -0.7383, -0.7168, 0.2181, -0.1597, // 0.1833, 0.0045, -0.2152, 0.1919, -0.6914, 0.1748, -0.8604, // 0.6084, 0.1591 - VLOAD_16(v6, 0xba64, 0xb80b, 0xbb81, 0xb9e8, 0xb9bc, 0x32fb, 0xb11c, 0x31de, + VLOAD_16(v24, 0xba64, 0xb80b, 0xbb81, 0xb9e8, 0xb9bc, 0x32fb, 0xb11c, 0x31de, 0x1c8f, 0xb2e3, 0x3224, 0xb988, 0x3198, 0xbae2, 0x38de, 0x3117); - asm volatile("vfsgnj.vv v2, v4, v6"); + asm volatile("vfsgnj.vv v8, v16, v24"); // -0.3784, -0.9043, -0.4600, -0.6748, -0.4448, 0.8804, -0.1497, // 0.7285, 0.9927, -0.9922, 0.8965, -0.8672, 0.1860, -0.9336, // 0.2959, 0.9668 - VCMP_U16(1, v2, 0xb60e, 0xbb3c, 0xb75c, 0xb966, 0xb71e, 0x3b0b, 0xb0ca, + VCMP_U16(1, v8, 0xb60e, 0xbb3c, 0xb75c, 0xb966, 0xb71e, 0x3b0b, 0xb0ca, 0x39d4, 0x3bf1, 0xbbf0, 0x3b2c, 0xbaf0, 0x31f4, 0xbb78, 0x34bc, 0x3bbc); - VSET(16, e32, m2); + VSET(16, e32, m8); // 0.30226409, 0.06318295, -0.82590002, -0.17829193, // 0.45379546, 0.85831785, -0.43186289, -0.32250872, // 0.35404092, -0.55081791, 0.09124859, -0.13254598, // 0.95786512, 0.95395225, 0.19890578, 0.76956910 - VLOAD_32(v4, 0x3e9ac25c, 0x3d816610, 0xbf536e2f, 0xbe369229, 0x3ee857e1, + VLOAD_32(v16, 0x3e9ac25c, 0x3d816610, 0xbf536e2f, 0xbe369229, 0x3ee857e1, 0x3f5bbab8, 0xbedd1d22, 0xbea51fdd, 0x3eb544da, 0xbf0d0267, 0x3dbae08b, 0xbe07ba22, 0x3f7536a6, 0x3f743637, 0x3e4badf5, 0x3f45027b); @@ -42,29 +42,29 @@ void TEST_CASE1(void) { // -0.49171701, 0.32139263, -0.09995110, -0.34368968, // 0.33917251, 0.07372360, 0.70147520, 0.82915747, // -0.14581841, -0.19974701, -0.58837658, 0.95794803 - VLOAD_32(v6, 0x3d865981, 0x3ea2d8ad, 0x3e1626ca, 0xbf599c8c, 0xbefbc255, + VLOAD_32(v24, 0x3d865981, 0x3ea2d8ad, 0x3e1626ca, 0xbf599c8c, 0xbefbc255, 0x3ea48d93, 0xbdccb329, 0xbeaff818, 0x3eada805, 0x3d96fc66, 0x3f3393e1, 0x3f5443aa, 0xbe15516c, 0xbe4c8a7b, 0xbf169fd9, 0x3f753c15); - asm volatile("vfsgnj.vv v2, v4, v6"); + asm volatile("vfsgnj.vv v8, v16, v24"); // 0.30226409, 0.06318295, 0.82590002, -0.17829193, // -0.45379546, 0.85831785, -0.43186289, -0.32250872, // 0.35404092, 0.55081791, 0.09124859, 0.13254598, // -0.95786512, -0.95395225, -0.19890578, 0.76956910 - VCMP_U32(2, v2, 0x3e9ac25c, 0x3d816610, 0x3f536e2f, 0xbe369229, 0xbee857e1, + VCMP_U32(2, v8, 0x3e9ac25c, 0x3d816610, 0x3f536e2f, 0xbe369229, 0xbee857e1, 0x3f5bbab8, 0xbedd1d22, 0xbea51fdd, 0x3eb544da, 0x3f0d0267, 0x3dbae08b, 0x3e07ba22, 0xbf7536a6, 0xbf743637, 0xbe4badf5, 0x3f45027b); #if ELEN == 64 - VSET(16, e64, m2); + VSET(16, e64, m8); // -0.1900636538602862, -0.9484843154859770, 0.5869658512198073, // 0.4707187701595239, 0.1954104859873083, 0.0486819373954939, // -0.1899986048192088, -0.1837438621239862, 0.2694105234528963, // -0.7960262036276018, 0.6381040017115214, 0.2199215324293253, // 0.4219965521278597, -0.6541697303087526, 0.7254411745966671, // 0.2439726910863504 - VLOAD_64(v4, 0xbfc854017cbe7d20, 0xbfee59fbc778ffbc, 0x3fe2c86c9bdb73b4, + VLOAD_64(v16, 0xbfc854017cbe7d20, 0xbfee59fbc778ffbc, 0x3fe2c86c9bdb73b4, 0x3fde20419edcb428, 0x3fc90335f74e33c8, 0x3fa8ecd6c20a0480, 0xbfc851dfd0fdf7f8, 0xbfc784eb3b54e580, 0x3fd13e05a2db6b68, 0xbfe9790bf1eadde4, 0x3fe46b59155986dc, 0x3fcc266386bc2e10, @@ -77,13 +77,13 @@ void TEST_CASE1(void) { // -0.0650991402083496, -0.1766522935757786, // -0.4663829943595241, -0.1565231028144627, // -0.0629224333525875, -0.9086692399439535, -0.1206057821437510 - VLOAD_64(v6, 0x3fb24441ce2eff50, 0x3fd66612c8fd8664, 0xbfe187df69e0bb9c, + VLOAD_64(v24, 0x3fb24441ce2eff50, 0x3fd66612c8fd8664, 0xbfe187df69e0bb9c, 0xbfe76fd3c4a3b1e8, 0x3fe211bf78be2e36, 0xbfe7c6f7c1644c86, 0xbfe229ce53357d20, 0xbfe79fcc34ac1d30, 0xbfe962e04d917824, 0xbfb0aa5656314cf0, 0xbfc69c8ad7d5ef20, 0xbfddd9380f0bd244, 0xbfc408f2f3d40a40, 0xbfb01baf416f2160, 0xbfed13d1838e183a, 0xbfbee005420412c0); - asm volatile("vfsgnj.vv v2, v4, v6"); + asm volatile("vfsgnj.vv v8, v16, v24"); // 0.1900636538602862, 0.9484843154859770, -0.5869658512198073, // -0.4707187701595239, 0.1954104859873083, // -0.0486819373954939, -0.1899986048192088, @@ -91,7 +91,7 @@ void TEST_CASE1(void) { // -0.7960262036276018, -0.6381040017115214, // -0.2199215324293253, -0.4219965521278597, // -0.6541697303087526, -0.7254411745966671, -0.2439726910863504 - VCMP_U64(3, v2, 0x3fc854017cbe7d20, 0x3fee59fbc778ffbc, 0xbfe2c86c9bdb73b4, + VCMP_U64(3, v8, 0x3fc854017cbe7d20, 0x3fee59fbc778ffbc, 0xbfe2c86c9bdb73b4, 0xbfde20419edcb428, 0x3fc90335f74e33c8, 0xbfa8ecd6c20a0480, 0xbfc851dfd0fdf7f8, 0xbfc784eb3b54e580, 0xbfd13e05a2db6b68, 0xbfe9790bf1eadde4, 0xbfe46b59155986dc, 0xbfcc266386bc2e10, @@ -103,32 +103,32 @@ void TEST_CASE1(void) { // Simple random test with similar values (masked) // The numbers are the same of TEST_CASE1 void TEST_CASE2(void) { - VSET(16, e16, m2); + VSET(16, e16, m8); // 0.3784, 0.9043, -0.4600, -0.6748, 0.4448, 0.8804, 0.1497, // 0.7285, 0.9927, 0.9922, 0.8965, 0.8672, -0.1860, 0.9336, // -0.2959, 0.9668 - VLOAD_16(v4, 0x360e, 0x3b3c, 0xb75c, 0xb966, 0x371e, 0x3b0b, 0x30ca, 0x39d4, + VLOAD_16(v16, 0x360e, 0x3b3c, 0xb75c, 0xb966, 0x371e, 0x3b0b, 0x30ca, 0x39d4, 0x3bf1, 0x3bf0, 0x3b2c, 0x3af0, 0xb1f4, 0x3b78, 0xb4bc, 0x3bbc); // -0.7988, -0.5054, -0.9380, -0.7383, -0.7168, 0.2181, -0.1597, // 0.1833, 0.0045, -0.2152, 0.1919, -0.6914, 0.1748, -0.8604, // 0.6084, 0.1591 - VLOAD_16(v6, 0xba64, 0xb80b, 0xbb81, 0xb9e8, 0xb9bc, 0x32fb, 0xb11c, 0x31de, + VLOAD_16(v24, 0xba64, 0xb80b, 0xbb81, 0xb9e8, 0xb9bc, 0x32fb, 0xb11c, 0x31de, 0x1c8f, 0xb2e3, 0x3224, 0xb988, 0x3198, 0xbae2, 0x38de, 0x3117); VLOAD_8(v0, 0xAA, 0xAA); - VCLEAR(v2); - asm volatile("vfsgnj.vv v2, v4, v6, v0.t"); + VCLEAR(v8); + asm volatile("vfsgnj.vv v8, v16, v24, v0.t"); // 0.0000, -0.9043, 0.0000, -0.6748, 0.0000, 0.8804, 0.0000, // 0.7285, 0.0000, -0.9922, 0.0000, -0.8672, 0.0000, -0.9336, // 0.0000, 0.9668 - VCMP_U16(4, v2, 0x0, 0xbb3c, 0x0, 0xb966, 0x0, 0x3b0b, 0x0, 0x39d4, 0x0, + VCMP_U16(4, v8, 0x0, 0xbb3c, 0x0, 0xb966, 0x0, 0x3b0b, 0x0, 0x39d4, 0x0, 0xbbf0, 0x0, 0xbaf0, 0x0, 0xbb78, 0x0, 0x3bbc); - VSET(16, e32, m2); + VSET(16, e32, m8); // 0.30226409, 0.06318295, -0.82590002, -0.17829193, // 0.45379546, 0.85831785, -0.43186289, -0.32250872, // 0.35404092, -0.55081791, 0.09124859, -0.13254598, // 0.95786512, 0.95395225, 0.19890578, 0.76956910 - VLOAD_32(v4, 0x3e9ac25c, 0x3d816610, 0xbf536e2f, 0xbe369229, 0x3ee857e1, + VLOAD_32(v16, 0x3e9ac25c, 0x3d816610, 0xbf536e2f, 0xbe369229, 0x3ee857e1, 0x3f5bbab8, 0xbedd1d22, 0xbea51fdd, 0x3eb544da, 0xbf0d0267, 0x3dbae08b, 0xbe07ba22, 0x3f7536a6, 0x3f743637, 0x3e4badf5, 0x3f45027b); @@ -136,30 +136,30 @@ void TEST_CASE2(void) { // -0.49171701, 0.32139263, -0.09995110, -0.34368968, // 0.33917251, 0.07372360, 0.70147520, 0.82915747, // -0.14581841, -0.19974701, -0.58837658, 0.95794803 - VLOAD_32(v6, 0x3d865981, 0x3ea2d8ad, 0x3e1626ca, 0xbf599c8c, 0xbefbc255, + VLOAD_32(v24, 0x3d865981, 0x3ea2d8ad, 0x3e1626ca, 0xbf599c8c, 0xbefbc255, 0x3ea48d93, 0xbdccb329, 0xbeaff818, 0x3eada805, 0x3d96fc66, 0x3f3393e1, 0x3f5443aa, 0xbe15516c, 0xbe4c8a7b, 0xbf169fd9, 0x3f753c15); VLOAD_8(v0, 0xAA, 0xAA); - VCLEAR(v2); - asm volatile("vfsgnj.vv v2, v4, v6, v0.t"); + VCLEAR(v8); + asm volatile("vfsgnj.vv v8, v16, v24, v0.t"); // 0.00000000, 0.06318295, 0.00000000, -0.17829193, // 0.00000000, 0.85831785, 0.00000000, -0.32250872, // 0.00000000, 0.55081791, 0.00000000, 0.13254598, // 0.00000000, -0.95395225, 0.00000000, 0.76956910 - VCMP_U32(5, v2, 0x0, 0x3d816610, 0x0, 0xbe369229, 0x0, 0x3f5bbab8, 0x0, + VCMP_U32(5, v8, 0x0, 0x3d816610, 0x0, 0xbe369229, 0x0, 0x3f5bbab8, 0x0, 0xbea51fdd, 0x0, 0x3f0d0267, 0x0, 0x3e07ba22, 0x0, 0xbf743637, 0x0, 0x3f45027b); #if ELEN == 64 - VSET(16, e64, m2); + VSET(16, e64, m8); // -0.1900636538602862, -0.9484843154859770, 0.5869658512198073, // 0.4707187701595239, 0.1954104859873083, 0.0486819373954939, // -0.1899986048192088, -0.1837438621239862, 0.2694105234528963, // -0.7960262036276018, 0.6381040017115214, 0.2199215324293253, // 0.4219965521278597, -0.6541697303087526, 0.7254411745966671, // 0.2439726910863504 - VLOAD_64(v4, 0xbfc854017cbe7d20, 0xbfee59fbc778ffbc, 0x3fe2c86c9bdb73b4, + VLOAD_64(v16, 0xbfc854017cbe7d20, 0xbfee59fbc778ffbc, 0x3fe2c86c9bdb73b4, 0x3fde20419edcb428, 0x3fc90335f74e33c8, 0x3fa8ecd6c20a0480, 0xbfc851dfd0fdf7f8, 0xbfc784eb3b54e580, 0x3fd13e05a2db6b68, 0xbfe9790bf1eadde4, 0x3fe46b59155986dc, 0x3fcc266386bc2e10, @@ -172,15 +172,15 @@ void TEST_CASE2(void) { // -0.0650991402083496, -0.1766522935757786, // -0.4663829943595241, -0.1565231028144627, // -0.0629224333525875, -0.9086692399439535, -0.1206057821437510 - VLOAD_64(v6, 0x3fb24441ce2eff50, 0x3fd66612c8fd8664, 0xbfe187df69e0bb9c, + VLOAD_64(v24, 0x3fb24441ce2eff50, 0x3fd66612c8fd8664, 0xbfe187df69e0bb9c, 0xbfe76fd3c4a3b1e8, 0x3fe211bf78be2e36, 0xbfe7c6f7c1644c86, 0xbfe229ce53357d20, 0xbfe79fcc34ac1d30, 0xbfe962e04d917824, 0xbfb0aa5656314cf0, 0xbfc69c8ad7d5ef20, 0xbfddd9380f0bd244, 0xbfc408f2f3d40a40, 0xbfb01baf416f2160, 0xbfed13d1838e183a, 0xbfbee005420412c0); VLOAD_8(v0, 0xAA, 0xAA); - VCLEAR(v2); - asm volatile("vfsgnj.vv v2, v4, v6, v0.t"); + VCLEAR(v8); + asm volatile("vfsgnj.vv v8, v16, v24, v0.t"); // 0.0000000000000000, 0.9484843154859770, 0.0000000000000000, // -0.4707187701595239, 0.0000000000000000, // -0.0486819373954939, 0.0000000000000000, @@ -188,7 +188,7 @@ void TEST_CASE2(void) { // -0.7960262036276018, 0.0000000000000000, // -0.2199215324293253, 0.0000000000000000, // -0.6541697303087526, 0.0000000000000000, -0.2439726910863504 - VCMP_U64(6, v2, 0x0, 0x3fee59fbc778ffbc, 0x0, 0xbfde20419edcb428, 0x0, + VCMP_U64(6, v8, 0x0, 0x3fee59fbc778ffbc, 0x0, 0xbfde20419edcb428, 0x0, 0xbfa8ecd6c20a0480, 0x0, 0xbfc784eb3b54e580, 0x0, 0xbfe9790bf1eadde4, 0x0, 0xbfcc266386bc2e10, 0x0, 0xbfe4eef55bb6b208, 0x0, 0xbfcf3a7f44aa9f48); @@ -197,24 +197,24 @@ void TEST_CASE2(void) { // Simple random test with similar values (vector-scalar) void TEST_CASE3(void) { - VSET(16, e16, m2); + VSET(16, e16, m8); float fscalar_16; // 0.9023 BOX_HALF_IN_FLOAT(fscalar_16, 0x3b38); // 0.5586, 0.0221, 0.7397, 0.9844, -0.1426, 0.6958, 0.0319, // 0.3943, -0.5425, 0.9814, 0.7852, -0.7271, -0.1810, -0.7485, // -0.3499, -0.2178 - VLOAD_16(v4, 0x3878, 0x25a7, 0x39eb, 0x3be0, 0xb090, 0x3991, 0x2816, 0x364f, + VLOAD_16(v16, 0x3878, 0x25a7, 0x39eb, 0x3be0, 0xb090, 0x3991, 0x2816, 0x364f, 0xb857, 0x3bda, 0x3a48, 0xb9d1, 0xb1cb, 0xb9fd, 0xb599, 0xb2f8); - asm volatile("vfsgnj.vf v2, v4, %[A]" ::[A] "f"(fscalar_16)); + asm volatile("vfsgnj.vf v8, v16, %[A]" ::[A] "f"(fscalar_16)); // 0.5586, 0.0221, 0.7397, 0.9844, 0.1426, 0.6958, 0.0319, // 0.3943, 0.5425, 0.9814, 0.7852, 0.7271, 0.1810, 0.7485, // 0.3499, 0.2178 - VCMP_U16(7, v2, 0x3878, 0x25a7, 0x39eb, 0x3be0, 0x3090, 0x3991, 0x2816, + VCMP_U16(7, v8, 0x3878, 0x25a7, 0x39eb, 0x3be0, 0x3090, 0x3991, 0x2816, 0x364f, 0x3857, 0x3bda, 0x3a48, 0x39d1, 0x31cb, 0x39fd, 0x3599, 0x32f8); - VSET(16, e32, m2); + VSET(16, e32, m8); float fscalar_32; // 0.64529878 BOX_FLOAT_IN_FLOAT(fscalar_32, 0x3f25324d); @@ -222,22 +222,22 @@ void TEST_CASE3(void) { // -0.02381280, -0.27677080, -0.58998328, 0.15329099, // 0.52908343, -0.63265759, 0.48432603, 0.70191479, // -0.55785930, 0.34719029, -0.06872076, -0.69960916 - VLOAD_32(v4, 0x3e8e4f20, 0x3f25af14, 0x3f61cbb2, 0xbe8e15a7, 0xbcc31310, + VLOAD_32(v16, 0x3e8e4f20, 0x3f25af14, 0x3f61cbb2, 0xbe8e15a7, 0xbcc31310, 0xbe8db4e7, 0xbf170925, 0x3e1cf850, 0x3f077203, 0xbf21f5d9, 0x3ef7f995, 0x3f33b0b0, 0xbf0ecfde, 0x3eb1c2ed, 0xbd8cbd78, 0xbf331996); - asm volatile("vfsgnj.vf v2, v4, %[A]" ::[A] "f"(fscalar_32)); + asm volatile("vfsgnj.vf v8, v16, %[A]" ::[A] "f"(fscalar_32)); // 0.27794743, 0.64720273, 0.88201439, 0.27750894, // 0.02381280, 0.27677080, 0.58998328, 0.15329099, // 0.52908343, 0.63265759, 0.48432603, 0.70191479, // 0.55785930, 0.34719029, 0.06872076, 0.69960916 - VCMP_U32(8, v2, 0x3e8e4f20, 0x3f25af14, 0x3f61cbb2, 0x3e8e15a7, 0x3cc31310, + VCMP_U32(8, v8, 0x3e8e4f20, 0x3f25af14, 0x3f61cbb2, 0x3e8e15a7, 0x3cc31310, 0x3e8db4e7, 0x3f170925, 0x3e1cf850, 0x3f077203, 0x3f21f5d9, 0x3ef7f995, 0x3f33b0b0, 0x3f0ecfde, 0x3eb1c2ed, 0x3d8cbd78, 0x3f331996); #if ELEN == 64 - VSET(16, e64, m2); + VSET(16, e64, m8); double dscalar_64; // 0.4863995754678485 BOX_DOUBLE_IN_DOUBLE(dscalar_64, 0x3fdf212baf5b0d68); @@ -248,20 +248,20 @@ void TEST_CASE3(void) { // -0.3345487709580650, 0.1693366988903542, // 0.4095982059989967, 0.7157757577569959, // -0.5339346851091937, 0.4946553559543683 - VLOAD_64(v4, 0x3fdd4bce893c3600, 0xbfb9577828444dc0, 0xbfe14ce6b790591e, + VLOAD_64(v16, 0x3fdd4bce893c3600, 0xbfb9577828444dc0, 0xbfe14ce6b790591e, 0xbfe611ba2bf06f2a, 0x3fecf85fb3ebc33c, 0xbfe860673bd8363e, 0x3fddc16ed6b90158, 0x3fe6227560ee74e0, 0x3fcc3b744f738cd0, 0x3fbf2984b325f230, 0xbfd5693f3f8ba3fc, 0x3fc5acd32fdf92e8, 0x3fda36db64d10584, 0x3fe6e7a28fdabfd2, 0xbfe115fe3157cf38, 0x3fdfa86ef0276044); - asm volatile("vfsgnj.vf v2, v4, %[A]" ::[A] "f"(dscalar_64)); + asm volatile("vfsgnj.vf v8, v16, %[A]" ::[A] "f"(dscalar_64)); // 0.4577518787562838, 0.0989909265811582, 0.5406373582107198, // 0.6896639688670565, 0.9053190721589099, 0.7617679756965072, // 0.4649312111760273, 0.6917063611214438, 0.2205644023843889, // 0.1217272698758698, 0.3345487709580650, 0.1693366988903542, // 0.4095982059989967, 0.7157757577569959, 0.5339346851091937, // 0.4946553559543683 - VCMP_U64(9, v2, 0x3fdd4bce893c3600, 0x3fb9577828444dc0, 0x3fe14ce6b790591e, + VCMP_U64(9, v8, 0x3fdd4bce893c3600, 0x3fb9577828444dc0, 0x3fe14ce6b790591e, 0x3fe611ba2bf06f2a, 0x3fecf85fb3ebc33c, 0x3fe860673bd8363e, 0x3fddc16ed6b90158, 0x3fe6227560ee74e0, 0x3fcc3b744f738cd0, 0x3fbf2984b325f230, 0x3fd5693f3f8ba3fc, 0x3fc5acd32fdf92e8, @@ -272,25 +272,25 @@ void TEST_CASE3(void) { // Simple random test with similar values (vector-scalar) (masked) void TEST_CASE4(void) { - VSET(16, e16, m2); + VSET(16, e16, m8); float fscalar_16; // 0.9023 BOX_HALF_IN_FLOAT(fscalar_16, 0x3b38); // 0.5586, 0.0221, 0.7397, 0.9844, -0.1426, 0.6958, 0.0319, // 0.3943, -0.5425, 0.9814, 0.7852, -0.7271, -0.1810, // -0.7485, -0.3499, -0.2178 - VLOAD_16(v4, 0x3878, 0x25a7, 0x39eb, 0x3be0, 0xb090, 0x3991, 0x2816, 0x364f, + VLOAD_16(v16, 0x3878, 0x25a7, 0x39eb, 0x3be0, 0xb090, 0x3991, 0x2816, 0x364f, 0xb857, 0x3bda, 0x3a48, 0xb9d1, 0xb1cb, 0xb9fd, 0xb599, 0xb2f8); VLOAD_8(v0, 0xAA, 0xAA); - VCLEAR(v2); - asm volatile("vfsgnj.vf v2, v4, %[A], v0.t" ::[A] "f"(fscalar_16)); + VCLEAR(v8); + asm volatile("vfsgnj.vf v8, v16, %[A], v0.t" ::[A] "f"(fscalar_16)); // 0.0000, 0.0221, 0.0000, 0.9844, 0.0000, 0.6958, 0.0000, // 0.3943, 0.0000, 0.9814, 0.0000, 0.7271, 0.0000, 0.7485, // 0.0000, 0.2178 - VCMP_U16(10, v2, 0x0, 0x25a7, 0x0, 0x3be0, 0x0, 0x3991, 0x0, 0x364f, 0x0, + VCMP_U16(10, v8, 0x0, 0x25a7, 0x0, 0x3be0, 0x0, 0x3991, 0x0, 0x364f, 0x0, 0x3bda, 0x0, 0x39d1, 0x0, 0x39fd, 0x0, 0x32f8); - VSET(16, e32, m2); + VSET(16, e32, m8); float fscalar_32; // 0.64529878 BOX_FLOAT_IN_FLOAT(fscalar_32, 0x3f25324d); @@ -298,23 +298,23 @@ void TEST_CASE4(void) { // -0.02381280, -0.27677080, -0.58998328, 0.15329099, // 0.52908343, -0.63265759, 0.48432603, 0.70191479, // -0.55785930, 0.34719029, -0.06872076, -0.69960916 - VLOAD_32(v4, 0x3e8e4f20, 0x3f25af14, 0x3f61cbb2, 0xbe8e15a7, 0xbcc31310, + VLOAD_32(v16, 0x3e8e4f20, 0x3f25af14, 0x3f61cbb2, 0xbe8e15a7, 0xbcc31310, 0xbe8db4e7, 0xbf170925, 0x3e1cf850, 0x3f077203, 0xbf21f5d9, 0x3ef7f995, 0x3f33b0b0, 0xbf0ecfde, 0x3eb1c2ed, 0xbd8cbd78, 0xbf331996); VLOAD_8(v0, 0xAA, 0xAA); - VCLEAR(v2); - asm volatile("vfsgnj.vf v2, v4, %[A], v0.t" ::[A] "f"(fscalar_32)); + VCLEAR(v8); + asm volatile("vfsgnj.vf v8, v16, %[A], v0.t" ::[A] "f"(fscalar_32)); // 0.00000000, 0.64720273, 0.00000000, 0.27750894, // 0.00000000, 0.27677080, 0.00000000, 0.15329099, // 0.00000000, 0.63265759, 0.00000000, 0.70191479, // 0.00000000, 0.34719029, 0.00000000, 0.69960916 - VCMP_U32(11, v2, 0x0, 0x3f25af14, 0x0, 0x3e8e15a7, 0x0, 0x3e8db4e7, 0x0, + VCMP_U32(11, v8, 0x0, 0x3f25af14, 0x0, 0x3e8e15a7, 0x0, 0x3e8db4e7, 0x0, 0x3e1cf850, 0x0, 0x3f21f5d9, 0x0, 0x3f33b0b0, 0x0, 0x3eb1c2ed, 0x0, 0x3f331996); #if ELEN == 64 - VSET(16, e64, m2); + VSET(16, e64, m8); double dscalar_64; // 0.4863995754678485 BOX_DOUBLE_IN_DOUBLE(dscalar_64, 0x3fdf212baf5b0d68); @@ -326,22 +326,22 @@ void TEST_CASE4(void) { // -0.3345487709580650, 0.1693366988903542, // 0.4095982059989967, 0.7157757577569959, // -0.5339346851091937, 0.4946553559543683 - VLOAD_64(v4, 0x3fdd4bce893c3600, 0xbfb9577828444dc0, 0xbfe14ce6b790591e, + VLOAD_64(v16, 0x3fdd4bce893c3600, 0xbfb9577828444dc0, 0xbfe14ce6b790591e, 0xbfe611ba2bf06f2a, 0x3fecf85fb3ebc33c, 0xbfe860673bd8363e, 0x3fddc16ed6b90158, 0x3fe6227560ee74e0, 0x3fcc3b744f738cd0, 0x3fbf2984b325f230, 0xbfd5693f3f8ba3fc, 0x3fc5acd32fdf92e8, 0x3fda36db64d10584, 0x3fe6e7a28fdabfd2, 0xbfe115fe3157cf38, 0x3fdfa86ef0276044); VLOAD_8(v0, 0xAA, 0xAA); - VCLEAR(v2); - asm volatile("vfsgnj.vf v2, v4, %[A], v0.t" ::[A] "f"(dscalar_64)); + VCLEAR(v8); + asm volatile("vfsgnj.vf v8, v16, %[A], v0.t" ::[A] "f"(dscalar_64)); // 0.0000000000000000, 0.0989909265811582, 0.0000000000000000, // 0.6896639688670565, 0.0000000000000000, 0.7617679756965072, // 0.0000000000000000, 0.6917063611214438, 0.0000000000000000, // 0.1217272698758698, 0.0000000000000000, 0.1693366988903542, // 0.0000000000000000, 0.7157757577569959, 0.0000000000000000, // 0.4946553559543683 - VCMP_U64(12, v2, 0x0, 0x3fb9577828444dc0, 0x0, 0x3fe611ba2bf06f2a, 0x0, + VCMP_U64(12, v8, 0x0, 0x3fb9577828444dc0, 0x0, 0x3fe611ba2bf06f2a, 0x0, 0x3fe860673bd8363e, 0x0, 0x3fe6227560ee74e0, 0x0, 0x3fbf2984b325f230, 0x0, 0x3fc5acd32fdf92e8, 0x0, 0x3fe6e7a28fdabfd2, 0x0, 0x3fdfa86ef0276044); @@ -352,48 +352,48 @@ void TEST_CASE4(void) { // raise any exceptions void TEST_CASE5(void) { CLEAR_FFLAGS; - VSET(16, e16, m2); + VSET(16, e16, m8); CHECK_FFLAGS(0); - VLOAD_16(v4, 0x0000, 0x3b3c, 0xb75c, 0x7fff, 0x371e, 0x3b0b, 0x30ca, 0x39d4, + VLOAD_16(v16, 0x0000, 0x3b3c, 0xb75c, 0x7fff, 0x371e, 0x3b0b, 0x30ca, 0x39d4, 0x3bf1, 0x3bf0, 0x0000, 0x3af0, 0xb1f4, 0x3b78, 0xb4bc, 0x3bbc); - VLOAD_16(v6, 0x8000, 0xffff, 0xffff, 0xb9e8, 0xb9bc, 0x7fff, 0xb11c, 0x31de, + VLOAD_16(v24, 0x8000, 0xffff, 0xffff, 0xb9e8, 0xb9bc, 0x7fff, 0xb11c, 0x31de, 0x1c8f, 0xb2e3, 0x7fff, 0xb988, 0x3198, 0xbae2, 0x38de, 0x3117); - asm volatile("vfsgnj.vv v2, v4, v6"); - VCMP_U16(13, v2, 0x8000, 0xbb3c, 0xb75c, 0xffff, 0xb71e, 0x3b0b, 0xb0ca, + asm volatile("vfsgnj.vv v8, v16, v24"); + VCMP_U16(13, v8, 0x8000, 0xbb3c, 0xb75c, 0xffff, 0xb71e, 0x3b0b, 0xb0ca, 0x39d4, 0x3bf1, 0xbbf0, 0x0000, 0xbaf0, 0x31f4, 0xbb78, 0x34bc, 0x3bbc); - VSET(16, e32, m2); - VLOAD_32(v4, 0x00000000, 0x3d816610, 0xbf536e2f, 0xbe369229, 0x3ee857e1, + VSET(16, e32, m8); + VLOAD_32(v16, 0x00000000, 0x3d816610, 0xbf536e2f, 0xbe369229, 0x3ee857e1, 0x7fffffff, 0x80000000, 0xbea51fdd, 0x3eb544da, 0xbf0d0267, 0x3dbae08b, 0xbe07ba22, 0x3f7536a6, 0x3f743637, 0x3e4badf5, 0x3f45027b); - VLOAD_32(v6, 0x80000000, 0x7fffffff, 0x3e1626ca, 0xffffffff, 0xbefbc255, + VLOAD_32(v24, 0x80000000, 0x7fffffff, 0x3e1626ca, 0xffffffff, 0xbefbc255, 0x7fffffff, 0xffffffff, 0xbeaff818, 0x3eada805, 0x3d96fc66, 0x3f3393e1, 0x3f5443aa, 0xbe15516c, 0xbe4c8a7b, 0xbf169fd9, 0x3f753c15); - asm volatile("vfsgnj.vv v2, v4, v6"); - VCMP_U32(14, v2, 0x80000000, 0x3d816610, 0x3f536e2f, 0xbe369229, 0xbee857e1, + asm volatile("vfsgnj.vv v8, v16, v24"); + VCMP_U32(14, v8, 0x80000000, 0x3d816610, 0x3f536e2f, 0xbe369229, 0xbee857e1, 0x7fffffff, 0x80000000, 0xbea51fdd, 0x3eb544da, 0x3f0d0267, 0x3dbae08b, 0x3e07ba22, 0xbf7536a6, 0xbf743637, 0xbe4badf5, 0x3f45027b); #if ELEN == 64 - VSET(16, e64, m2); - VLOAD_64(v4, 0x0000000000000000, 0xbfee59fbc778ffbc, 0x7fffffffffffffff, + VSET(16, e64, m8); + VLOAD_64(v16, 0x0000000000000000, 0xbfee59fbc778ffbc, 0x7fffffffffffffff, 0x3fde20419edcb428, 0x8000000000000000, 0x3fa8ecd6c20a0480, 0xbfc851dfd0fdf7f8, 0xbfc784eb3b54e580, 0x3fd13e05a2db6b68, 0xbfe9790bf1eadde4, 0x3fe46b59155986dc, 0x3fcc266386bc2e10, 0x3fdb01fdd39a7d9c, 0xbfe4eef55bb6b208, 0x3fe736d06902107a, 0x3fcf3a7f44aa9f48); - VLOAD_64(v6, 0x8000000000000000, 0x7fffffffffffffff, 0xbfe187df69e0bb9c, + VLOAD_64(v24, 0x8000000000000000, 0x7fffffffffffffff, 0xbfe187df69e0bb9c, 0xbfe76fd3c4a3b1e8, 0x0000000000000001, 0xbfe7c6f7c1644c86, 0xbfe229ce53357d20, 0xbfe79fcc34ac1d30, 0xbfe962e04d917824, 0xbfb0aa5656314cf0, 0xbfc69c8ad7d5ef20, 0xbfddd9380f0bd244, 0xbfc408f2f3d40a40, 0xbfb01baf416f2160, 0xbfed13d1838e183a, 0xbfbee005420412c0); - asm volatile("vfsgnj.vv v2, v4, v6"); - VCMP_U64(15, v2, 0x8000000000000000, 0x3fee59fbc778ffbc, 0xffffffffffffffff, + asm volatile("vfsgnj.vv v8, v16, v24"); + VCMP_U64(15, v8, 0x8000000000000000, 0x3fee59fbc778ffbc, 0xffffffffffffffff, 0xbfde20419edcb428, 0x0000000000000000, 0xbfa8ecd6c20a0480, 0xbfc851dfd0fdf7f8, 0xbfc784eb3b54e580, 0xbfd13e05a2db6b68, 0xbfe9790bf1eadde4, 0xbfe46b59155986dc, 0xbfcc266386bc2e10, diff --git a/sw/riscvTests/isa/rv64uv/vfsgnjn.c b/sw/riscvTests/isa/rv64uv/vfsgnjn.c index 83f5877b..8d69a20f 100644 --- a/sw/riscvTests/isa/rv64uv/vfsgnjn.c +++ b/sw/riscvTests/isa/rv64uv/vfsgnjn.c @@ -10,31 +10,31 @@ // Simple random test with similar values void TEST_CASE1(void) { - VSET(16, e16, m2); + VSET(16, e16, m8); // 0.5278, -0.6548, 0.2776, 0.8730, 0.2180, 0.6172, -0.8408, // 0.9922, 0.4250, 0.7393, 0.2549, 0.4998, 0.4609, -0.6348, // 0.1127, -0.1804 - VLOAD_16(v4, 0x3839, 0xb93d, 0x3471, 0x3afc, 0x32fa, 0x38f0, 0xbaba, 0x3bf0, + VLOAD_16(v16, 0x3839, 0xb93d, 0x3471, 0x3afc, 0x32fa, 0x38f0, 0xbaba, 0x3bf0, 0x36cd, 0x39ea, 0x3414, 0x37ff, 0x3760, 0xb914, 0x2f36, 0xb1c6); // -0.6348, -0.4368, -0.1896, 0.9419, -0.6108, -0.3594, -0.5166, // -0.1266, -0.9233, 0.2368, 0.1243, 0.3745, 0.0945, -0.3088, // 0.0190, -0.6289 - VLOAD_16(v6, 0xb914, 0xb6fd, 0xb211, 0x3b89, 0xb8e3, 0xb5c0, 0xb822, 0xb00d, + VLOAD_16(v24, 0xb914, 0xb6fd, 0xb211, 0x3b89, 0xb8e3, 0xb5c0, 0xb822, 0xb00d, 0xbb63, 0x3394, 0x2ff5, 0x35fe, 0x2e0c, 0xb4f1, 0x24da, 0xb908); - asm volatile("vfsgnjn.vv v2, v4, v6"); + asm volatile("vfsgnjn.vv v8, v16, v24"); // 0.5278, 0.6548, 0.2776, -0.8730, 0.2180, 0.6172, 0.8408, // 0.9922, 0.4250, -0.7393, -0.2549, -0.4998, -0.4609, 0.6348, // -0.1127, 0.1804 - VCMP_U16(1, v2, 0x3839, 0x393d, 0x3471, 0xbafc, 0x32fa, 0x38f0, 0x3aba, + VCMP_U16(1, v8, 0x3839, 0x393d, 0x3471, 0xbafc, 0x32fa, 0x38f0, 0x3aba, 0x3bf0, 0x36cd, 0xb9ea, 0xb414, 0xb7ff, 0xb760, 0x3914, 0xaf36, 0x31c6); - VSET(16, e32, m2); + VSET(16, e32, m8); // -0.64186704, 0.87601262, -0.93132722, 0.53574133, // 0.17954259, -0.80486834, -0.95272040, -0.45182621, 0.20335940, // 0.96179944, 0.80393785, 0.06180594, 0.86447370, // -0.24008171, -0.42264909, -0.01868468 - VLOAD_32(v4, 0xbf245166, 0x3f60425d, 0xbf6e6b76, 0x3f092658, 0x3e37da03, + VLOAD_32(v16, 0xbf245166, 0x3f60425d, 0xbf6e6b76, 0x3f092658, 0x3e37da03, 0xbf4e0bda, 0xbf73e57c, 0xbee755c4, 0x3e503d72, 0x3f76387d, 0x3f4dcedf, 0x3d7d283b, 0x3f5d4e26, 0xbe75d7fb, 0xbed86576, 0xbc99109c); @@ -42,29 +42,29 @@ void TEST_CASE1(void) { // 0.44768164, 0.66998041, 0.39474848, -0.39895460, // -0.06065369, 0.53388232, -0.60164928, -0.09839682, // -0.38704434, 0.47123700, 0.40912241, -0.54495376 - VLOAD_32(v6, 0x3ea8e123, 0xbf48664e, 0x3f3afa0d, 0x3f7d982e, 0x3ee53687, + VLOAD_32(v24, 0x3ea8e123, 0xbf48664e, 0x3f3afa0d, 0x3f7d982e, 0x3ee53687, 0x3f2b83d6, 0x3eca1c79, 0xbecc43c7, 0xbd787002, 0x3f08ac83, 0xbf1a05b0, 0xbdc98446, 0xbec62aad, 0x3ef145fa, 0x3ed1787e, 0xbf0b8217); - asm volatile("vfsgnjn.vv v2, v4, v6"); + asm volatile("vfsgnjn.vv v8, v16, v24"); // -0.64186704, 0.87601262, -0.93132722, -0.53574133, // -0.17954259, -0.80486834, -0.95272040, 0.45182621, // 0.20335940, -0.96179944, 0.80393785, 0.06180594, 0.86447370, // -0.24008171, -0.42264909, 0.01868468 - VCMP_U32(2, v2, 0xbf245166, 0x3f60425d, 0xbf6e6b76, 0xbf092658, 0xbe37da03, + VCMP_U32(2, v8, 0xbf245166, 0x3f60425d, 0xbf6e6b76, 0xbf092658, 0xbe37da03, 0xbf4e0bda, 0xbf73e57c, 0x3ee755c4, 0x3e503d72, 0xbf76387d, 0x3f4dcedf, 0x3d7d283b, 0x3f5d4e26, 0xbe75d7fb, 0xbed86576, 0x3c99109c); #if ELEN == 64 - VSET(16, e64, m2); + VSET(16, e64, m8); // 0.3054868811191440, -0.2848737407493320, 0.8796894022735833, // -0.2053728688878902, -0.3336030943630310, 0.2807217618714037, // 0.4723331455917303, -0.8582398814993568, 0.8015611350975347, // 0.0545934239457773, 0.8461592442963186, 0.5731810427237676, // 0.9004228762726765, -0.5815114412549589, -0.4481603571708770, // -0.3109452697316515 - VLOAD_64(v4, 0x3fd38d18d8f0e180, 0xbfd23b5f12007bec, 0x3fec266a63ace3f8, + VLOAD_64(v16, 0x3fd38d18d8f0e180, 0xbfd23b5f12007bec, 0x3fec266a63ace3f8, 0xbfca49a87dadf9c0, 0xbfd559c0cb088d3c, 0x3fd1f75868a0d7ec, 0x3fde3ab4cd4887cc, 0xbfeb76b37be53474, 0x3fe9a663899fa232, 0x3fabf3ab54d8f940, 0x3feb13bc8d2ebe92, 0x3fe2577fc525f1c0, @@ -76,20 +76,20 @@ void TEST_CASE1(void) { // -0.7302340801247744, 0.1579280396497211, -0.6128023516491234, // 0.2706272563647967, -0.7982929669593624, -0.2521632643799878, // 0.9025785865542095 - VLOAD_64(v6, 0xbfeb866abced1b2e, 0xbfc9e5a5e5d1f648, 0x3fe68ce5791f15e4, + VLOAD_64(v24, 0xbfeb866abced1b2e, 0xbfc9e5a5e5d1f648, 0x3fe68ce5791f15e4, 0x3fb121ba83e404a0, 0xbfee62f88b14a294, 0x3fdccf2004e2dd30, 0x3fc3905c3a38c700, 0xbfd19add326bc2ac, 0xbfe1ba450e13ef3a, 0xbfe75e13dc91f006, 0x3fc436fc6ab55e68, 0xbfe39c13ad67d608, 0x3fd151f4fbdf8d78, 0xbfe98b9db136f3e0, 0xbfd023716370f004, 0x3fece1ec7cea3f5e); - asm volatile("vfsgnjn.vv v2, v4, v6"); + asm volatile("vfsgnjn.vv v8, v16, v24"); // 0.3054868811191440, 0.2848737407493320, -0.8796894022735833, // -0.2053728688878902, 0.3336030943630310, // -0.2807217618714037, -0.4723331455917303, 0.8582398814993568, // 0.8015611350975347, 0.0545934239457773, -0.8461592442963186, // 0.5731810427237676, -0.9004228762726765, 0.5815114412549589, // 0.4481603571708770, -0.3109452697316515 - VCMP_U64(3, v2, 0x3fd38d18d8f0e180, 0x3fd23b5f12007bec, 0xbfec266a63ace3f8, + VCMP_U64(3, v8, 0x3fd38d18d8f0e180, 0x3fd23b5f12007bec, 0xbfec266a63ace3f8, 0xbfca49a87dadf9c0, 0x3fd559c0cb088d3c, 0xbfd1f75868a0d7ec, 0xbfde3ab4cd4887cc, 0x3feb76b37be53474, 0x3fe9a663899fa232, 0x3fabf3ab54d8f940, 0xbfeb13bc8d2ebe92, 0x3fe2577fc525f1c0, @@ -101,32 +101,32 @@ void TEST_CASE1(void) { // Simple random test with similar values (masked) // The numbers are the same of TEST_CASE1 void TEST_CASE2(void) { - VSET(16, e16, m2); + VSET(16, e16, m8); // 0.5278, -0.6548, 0.2776, 0.8730, 0.2180, 0.6172, -0.8408, // 0.9922, 0.4250, 0.7393, 0.2549, 0.4998, 0.4609, -0.6348, // 0.1127, -0.1804 - VLOAD_16(v4, 0x3839, 0xb93d, 0x3471, 0x3afc, 0x32fa, 0x38f0, 0xbaba, 0x3bf0, + VLOAD_16(v16, 0x3839, 0xb93d, 0x3471, 0x3afc, 0x32fa, 0x38f0, 0xbaba, 0x3bf0, 0x36cd, 0x39ea, 0x3414, 0x37ff, 0x3760, 0xb914, 0x2f36, 0xb1c6); // -0.6348, -0.4368, -0.1896, 0.9419, -0.6108, -0.3594, -0.5166, // -0.1266, -0.9233, 0.2368, 0.1243, 0.3745, 0.0945, -0.3088, // 0.0190, -0.6289 - VLOAD_16(v6, 0xb914, 0xb6fd, 0xb211, 0x3b89, 0xb8e3, 0xb5c0, 0xb822, 0xb00d, + VLOAD_16(v24, 0xb914, 0xb6fd, 0xb211, 0x3b89, 0xb8e3, 0xb5c0, 0xb822, 0xb00d, 0xbb63, 0x3394, 0x2ff5, 0x35fe, 0x2e0c, 0xb4f1, 0x24da, 0xb908); VLOAD_8(v0, 0xAA, 0xAA); - VCLEAR(v2); - asm volatile("vfsgnjn.vv v2, v4, v6, v0.t"); + VCLEAR(v8); + asm volatile("vfsgnjn.vv v8, v16, v24, v0.t"); // 0.0000, 0.6548, 0.0000, -0.8730, 0.0000, 0.6172, 0.0000, // 0.9922, 0.0000, -0.7393, 0.0000, -0.4998, 0.0000, 0.6348, // 0.0000, 0.1804 - VCMP_U16(4, v2, 0x0, 0x393d, 0x0, 0xbafc, 0x0, 0x38f0, 0x0, 0x3bf0, 0x0, + VCMP_U16(4, v8, 0x0, 0x393d, 0x0, 0xbafc, 0x0, 0x38f0, 0x0, 0x3bf0, 0x0, 0xb9ea, 0x0, 0xb7ff, 0x0, 0x3914, 0x0, 0x31c6); - VSET(16, e32, m2); + VSET(16, e32, m8); // -0.64186704, 0.87601262, -0.93132722, 0.53574133, // 0.17954259, -0.80486834, -0.95272040, -0.45182621, 0.20335940, // 0.96179944, 0.80393785, 0.06180594, 0.86447370, // -0.24008171, -0.42264909, -0.01868468 - VLOAD_32(v4, 0xbf245166, 0x3f60425d, 0xbf6e6b76, 0x3f092658, 0x3e37da03, + VLOAD_32(v16, 0xbf245166, 0x3f60425d, 0xbf6e6b76, 0x3f092658, 0x3e37da03, 0xbf4e0bda, 0xbf73e57c, 0xbee755c4, 0x3e503d72, 0x3f76387d, 0x3f4dcedf, 0x3d7d283b, 0x3f5d4e26, 0xbe75d7fb, 0xbed86576, 0xbc99109c); @@ -134,30 +134,30 @@ void TEST_CASE2(void) { // 0.44768164, 0.66998041, 0.39474848, -0.39895460, // -0.06065369, 0.53388232, -0.60164928, -0.09839682, // -0.38704434, 0.47123700, 0.40912241, -0.54495376 - VLOAD_32(v6, 0x3ea8e123, 0xbf48664e, 0x3f3afa0d, 0x3f7d982e, 0x3ee53687, + VLOAD_32(v24, 0x3ea8e123, 0xbf48664e, 0x3f3afa0d, 0x3f7d982e, 0x3ee53687, 0x3f2b83d6, 0x3eca1c79, 0xbecc43c7, 0xbd787002, 0x3f08ac83, 0xbf1a05b0, 0xbdc98446, 0xbec62aad, 0x3ef145fa, 0x3ed1787e, 0xbf0b8217); VLOAD_8(v0, 0xAA, 0xAA); - VCLEAR(v2); - asm volatile("vfsgnjn.vv v2, v4, v6, v0.t"); + VCLEAR(v8); + asm volatile("vfsgnjn.vv v8, v16, v24, v0.t"); // 0.00000000, 0.87601262, 0.00000000, -0.53574133, // 0.00000000, -0.80486834, 0.00000000, 0.45182621, // 0.00000000, -0.96179944, 0.00000000, 0.06180594, // 0.00000000, -0.24008171, 0.00000000, 0.01868468 - VCMP_U32(5, v2, 0x0, 0x3f60425d, 0x0, 0xbf092658, 0x0, 0xbf4e0bda, 0x0, + VCMP_U32(5, v8, 0x0, 0x3f60425d, 0x0, 0xbf092658, 0x0, 0xbf4e0bda, 0x0, 0x3ee755c4, 0x0, 0xbf76387d, 0x0, 0x3d7d283b, 0x0, 0xbe75d7fb, 0x0, 0x3c99109c); #if ELEN == 64 - VSET(16, e64, m2); + VSET(16, e64, m8); // 0.3054868811191440, -0.2848737407493320, 0.8796894022735833, // -0.2053728688878902, -0.3336030943630310, 0.2807217618714037, // 0.4723331455917303, -0.8582398814993568, 0.8015611350975347, // 0.0545934239457773, 0.8461592442963186, 0.5731810427237676, // 0.9004228762726765, -0.5815114412549589, -0.4481603571708770, // -0.3109452697316515 - VLOAD_64(v4, 0x3fd38d18d8f0e180, 0xbfd23b5f12007bec, 0x3fec266a63ace3f8, + VLOAD_64(v16, 0x3fd38d18d8f0e180, 0xbfd23b5f12007bec, 0x3fec266a63ace3f8, 0xbfca49a87dadf9c0, 0xbfd559c0cb088d3c, 0x3fd1f75868a0d7ec, 0x3fde3ab4cd4887cc, 0xbfeb76b37be53474, 0x3fe9a663899fa232, 0x3fabf3ab54d8f940, 0x3feb13bc8d2ebe92, 0x3fe2577fc525f1c0, @@ -169,22 +169,22 @@ void TEST_CASE2(void) { // -0.7302340801247744, 0.1579280396497211, -0.6128023516491234, // 0.2706272563647967, -0.7982929669593624, -0.2521632643799878, // 0.9025785865542095 - VLOAD_64(v6, 0xbfeb866abced1b2e, 0xbfc9e5a5e5d1f648, 0x3fe68ce5791f15e4, + VLOAD_64(v24, 0xbfeb866abced1b2e, 0xbfc9e5a5e5d1f648, 0x3fe68ce5791f15e4, 0x3fb121ba83e404a0, 0xbfee62f88b14a294, 0x3fdccf2004e2dd30, 0x3fc3905c3a38c700, 0xbfd19add326bc2ac, 0xbfe1ba450e13ef3a, 0xbfe75e13dc91f006, 0x3fc436fc6ab55e68, 0xbfe39c13ad67d608, 0x3fd151f4fbdf8d78, 0xbfe98b9db136f3e0, 0xbfd023716370f004, 0x3fece1ec7cea3f5e); VLOAD_8(v0, 0xAA, 0xAA); - VCLEAR(v2); - asm volatile("vfsgnjn.vv v2, v4, v6, v0.t"); + VCLEAR(v8); + asm volatile("vfsgnjn.vv v8, v16, v24, v0.t"); // 0.0000000000000000, 0.2848737407493320, 0.0000000000000000, // -0.2053728688878902, 0.0000000000000000, // -0.2807217618714037, 0.0000000000000000, 0.8582398814993568, // 0.0000000000000000, 0.0545934239457773, 0.0000000000000000, // 0.5731810427237676, 0.0000000000000000, 0.5815114412549589, // 0.0000000000000000, -0.3109452697316515 - VCMP_U64(6, v2, 0x0, 0x3fd23b5f12007bec, 0x0, 0xbfca49a87dadf9c0, 0x0, + VCMP_U64(6, v8, 0x0, 0x3fd23b5f12007bec, 0x0, 0xbfca49a87dadf9c0, 0x0, 0xbfd1f75868a0d7ec, 0x0, 0x3feb76b37be53474, 0x0, 0x3fabf3ab54d8f940, 0x0, 0x3fe2577fc525f1c0, 0x0, 0x3fe29bbde1ce1372, 0x0, 0xbfd3e686fd15f950); @@ -193,24 +193,24 @@ void TEST_CASE2(void) { // Simple random test with similar values (vector-scalar) void TEST_CASE3(void) { - VSET(16, e16, m2); + VSET(16, e16, m8); float fscalar_16; // -0.6143 BOX_HALF_IN_FLOAT(fscalar_16, 0xb8ea); // -0.9351, 0.6538, -0.6743, -0.4695, -0.1439, 0.6250, -0.1511, // -0.7476, 0.8496, 0.6279, 0.5234, 0.2610, 0.6299, -0.0123, // -0.9995, -0.3872 - VLOAD_16(v4, 0xbb7b, 0x393b, 0xb965, 0xb783, 0xb09b, 0x3900, 0xb0d6, 0xb9fb, + VLOAD_16(v16, 0xbb7b, 0x393b, 0xb965, 0xb783, 0xb09b, 0x3900, 0xb0d6, 0xb9fb, 0x3acc, 0x3906, 0x3830, 0x342d, 0x390a, 0xa24d, 0xbbff, 0xb632); - asm volatile("vfsgnjn.vf v2, v4, %[A]" ::[A] "f"(fscalar_16)); + asm volatile("vfsgnjn.vf v8, v16, %[A]" ::[A] "f"(fscalar_16)); // 0.9351, 0.6538, 0.6743, 0.4695, 0.1439, 0.6250, 0.1511, // 0.7476, 0.8496, 0.6279, 0.5234, 0.2610, 0.6299, 0.0123, // 0.9995, 0.3872 - VCMP_U16(7, v2, 0x3b7b, 0x393b, 0x3965, 0x3783, 0x309b, 0x3900, 0x30d6, + VCMP_U16(7, v8, 0x3b7b, 0x393b, 0x3965, 0x3783, 0x309b, 0x3900, 0x30d6, 0x39fb, 0x3acc, 0x3906, 0x3830, 0x342d, 0x390a, 0x224d, 0x3bff, 0x3632); - VSET(16, e32, m2); + VSET(16, e32, m8); float fscalar_32; // 0.56259364 BOX_FLOAT_IN_FLOAT(fscalar_32, 0x3f100623); @@ -218,22 +218,22 @@ void TEST_CASE3(void) { // 0.90217608, 0.57424510, 0.05995686, -0.00974263, // -0.45620662, -0.36967716, -0.56535333, -0.93745488, // -0.55570704, 0.04399948, -0.57520008, -0.05702910 - VLOAD_32(v4, 0xbc053c4a, 0xbec60586, 0x3f719cc7, 0x3eca7d19, 0x3f66f503, + VLOAD_32(v16, 0xbc053c4a, 0xbec60586, 0x3f719cc7, 0x3eca7d19, 0x3f66f503, 0x3f1301ba, 0x3d759554, 0xbc1f9f8d, 0xbee993ea, 0xbebd4653, 0xbf10baff, 0xbf6ffd0b, 0xbf0e42d1, 0x3d3438cd, 0xbf134050, 0xbd699758); - asm volatile("vfsgnjn.vf v2, v4, %[A]" ::[A] "f"(fscalar_32)); + asm volatile("vfsgnjn.vf v8, v16, %[A]" ::[A] "f"(fscalar_32)); // -0.00813205, -0.38676089, -0.94379848, -0.39548567, // -0.90217608, -0.57424510, -0.05995686, -0.00974263, // -0.45620662, -0.36967716, -0.56535333, -0.93745488, // -0.55570704, -0.04399948, -0.57520008, -0.05702910 - VCMP_U32(8, v2, 0xbc053c4a, 0xbec60586, 0xbf719cc7, 0xbeca7d19, 0xbf66f503, + VCMP_U32(8, v8, 0xbc053c4a, 0xbec60586, 0xbf719cc7, 0xbeca7d19, 0xbf66f503, 0xbf1301ba, 0xbd759554, 0xbc1f9f8d, 0xbee993ea, 0xbebd4653, 0xbf10baff, 0xbf6ffd0b, 0xbf0e42d1, 0xbd3438cd, 0xbf134050, 0xbd699758); #if ELEN == 64 - VSET(16, e64, m2); + VSET(16, e64, m8); double dscalar_64; // 0.1909501680714165 BOX_DOUBLE_IN_DOUBLE(dscalar_64, 0x3fc8710e1b8426e8); @@ -243,20 +243,20 @@ void TEST_CASE3(void) { // -0.0001555883762874, 0.5283267089670276, 0.5439688283816015, // -0.2866314604291811, -0.0576946087921848, 0.7960283598249005, // -0.8999056473475127, 0.2142070697411482 - VLOAD_64(v4, 0xbfd13b3694df2b24, 0xbfd4eaea07180958, 0x3fead67ca8cd9566, + VLOAD_64(v16, 0xbfd13b3694df2b24, 0xbfd4eaea07180958, 0x3fead67ca8cd9566, 0xbfe214d9ba40b584, 0xbfe47283a0c1e25c, 0x3fe10547bd8d051e, 0xbfe904219ee4fb76, 0xbfe22c593425cec0, 0xbf2464adf9bfe000, 0x3fe0e80d6a13bbf4, 0x3fe1683150fe2844, 0xbfd2582b7b231344, 0xbfad8a25d3d5fd40, 0x3fe9791077845df2, 0xbfeccc06ed9afc1e, 0x3fcb6b23238e1bc8); - asm volatile("vfsgnjn.vf v2, v4, %[A]" ::[A] "f"(dscalar_64)); + asm volatile("vfsgnjn.vf v8, v16, %[A]" ::[A] "f"(dscalar_64)); // -0.2692390874696449, -0.3268380231167121, -0.8386824891028197, // -0.5650452268361481, -0.6389787807266418, -0.5318945600667211, // -0.7817543128402196, -0.5679136293897145, -0.0001555883762874, // -0.5283267089670276, -0.5439688283816015, -0.2866314604291811, // -0.0576946087921848, -0.7960283598249005, -0.8999056473475127, // -0.2142070697411482 - VCMP_U64(9, v2, 0xbfd13b3694df2b24, 0xbfd4eaea07180958, 0xbfead67ca8cd9566, + VCMP_U64(9, v8, 0xbfd13b3694df2b24, 0xbfd4eaea07180958, 0xbfead67ca8cd9566, 0xbfe214d9ba40b584, 0xbfe47283a0c1e25c, 0xbfe10547bd8d051e, 0xbfe904219ee4fb76, 0xbfe22c593425cec0, 0xbf2464adf9bfe000, 0xbfe0e80d6a13bbf4, 0xbfe1683150fe2844, 0xbfd2582b7b231344, @@ -267,25 +267,25 @@ void TEST_CASE3(void) { // Simple random test with similar values (vector-scalar) (masked) void TEST_CASE4(void) { - VSET(16, e16, m2); + VSET(16, e16, m8); float fscalar_16; // -0.6143 BOX_HALF_IN_FLOAT(fscalar_16, 0xb8ea); // -0.9351, 0.6538, -0.6743, -0.4695, -0.1439, 0.6250, // -0.1511, -0.7476, 0.8496, 0.6279, 0.5234, 0.2610, 0.6299, // -0.0123, -0.9995, -0.3872 - VLOAD_16(v4, 0xbb7b, 0x393b, 0xb965, 0xb783, 0xb09b, 0x3900, 0xb0d6, 0xb9fb, + VLOAD_16(v16, 0xbb7b, 0x393b, 0xb965, 0xb783, 0xb09b, 0x3900, 0xb0d6, 0xb9fb, 0x3acc, 0x3906, 0x3830, 0x342d, 0x390a, 0xa24d, 0xbbff, 0xb632); VLOAD_8(v0, 0xAA, 0xAA); - VCLEAR(v2); - asm volatile("vfsgnjn.vf v2, v4, %[A], v0.t" ::[A] "f"(fscalar_16)); + VCLEAR(v8); + asm volatile("vfsgnjn.vf v8, v16, %[A], v0.t" ::[A] "f"(fscalar_16)); // 0.0000, 0.6538, 0.0000, 0.4695, 0.0000, 0.6250, 0.0000, // 0.7476, 0.0000, 0.6279, 0.0000, 0.2610, 0.0000, 0.0123, // 0.0000, 0.3872 - VCMP_U16(10, v2, 0x0, 0x393b, 0x0, 0x3783, 0x0, 0x3900, 0x0, 0x39fb, 0x0, + VCMP_U16(10, v8, 0x0, 0x393b, 0x0, 0x3783, 0x0, 0x3900, 0x0, 0x39fb, 0x0, 0x3906, 0x0, 0x342d, 0x0, 0x224d, 0x0, 0x3632); - VSET(16, e32, m2); + VSET(16, e32, m8); float fscalar_32; // 0.56259364 BOX_FLOAT_IN_FLOAT(fscalar_32, 0x3f100623); @@ -293,23 +293,23 @@ void TEST_CASE4(void) { // 0.90217608, 0.57424510, 0.05995686, -0.00974263, // -0.45620662, -0.36967716, -0.56535333, -0.93745488, // -0.55570704, 0.04399948, -0.57520008, -0.05702910 - VLOAD_32(v4, 0xbc053c4a, 0xbec60586, 0x3f719cc7, 0x3eca7d19, 0x3f66f503, + VLOAD_32(v16, 0xbc053c4a, 0xbec60586, 0x3f719cc7, 0x3eca7d19, 0x3f66f503, 0x3f1301ba, 0x3d759554, 0xbc1f9f8d, 0xbee993ea, 0xbebd4653, 0xbf10baff, 0xbf6ffd0b, 0xbf0e42d1, 0x3d3438cd, 0xbf134050, 0xbd699758); VLOAD_8(v0, 0xAA, 0xAA); - VCLEAR(v2); - asm volatile("vfsgnjn.vf v2, v4, %[A], v0.t" ::[A] "f"(fscalar_32)); + VCLEAR(v8); + asm volatile("vfsgnjn.vf v8, v16, %[A], v0.t" ::[A] "f"(fscalar_32)); // 0.00000000, -0.38676089, 0.00000000, -0.39548567, // 0.00000000, -0.57424510, 0.00000000, -0.00974263, // 0.00000000, -0.36967716, 0.00000000, -0.93745488, // 0.00000000, -0.04399948, 0.00000000, -0.05702910 - VCMP_U32(11, v2, 0x0, 0xbec60586, 0x0, 0xbeca7d19, 0x0, 0xbf1301ba, 0x0, + VCMP_U32(11, v8, 0x0, 0xbec60586, 0x0, 0xbeca7d19, 0x0, 0xbf1301ba, 0x0, 0xbc1f9f8d, 0x0, 0xbebd4653, 0x0, 0xbf6ffd0b, 0x0, 0xbd3438cd, 0x0, 0xbd699758); #if ELEN == 64 - VSET(16, e64, m2); + VSET(16, e64, m8); double dscalar_64; // 0.1909501680714165 BOX_DOUBLE_IN_DOUBLE(dscalar_64, 0x3fc8710e1b8426e8); @@ -321,15 +321,15 @@ void TEST_CASE4(void) { // 0.5439688283816015, -0.2866314604291811, // -0.0576946087921848, 0.7960283598249005, // -0.8999056473475127, 0.2142070697411482 - VLOAD_64(v4, 0xbfd13b3694df2b24, 0xbfd4eaea07180958, 0x3fead67ca8cd9566, + VLOAD_64(v16, 0xbfd13b3694df2b24, 0xbfd4eaea07180958, 0x3fead67ca8cd9566, 0xbfe214d9ba40b584, 0xbfe47283a0c1e25c, 0x3fe10547bd8d051e, 0xbfe904219ee4fb76, 0xbfe22c593425cec0, 0xbf2464adf9bfe000, 0x3fe0e80d6a13bbf4, 0x3fe1683150fe2844, 0xbfd2582b7b231344, 0xbfad8a25d3d5fd40, 0x3fe9791077845df2, 0xbfeccc06ed9afc1e, 0x3fcb6b23238e1bc8); VLOAD_8(v0, 0xAA, 0xAA); - VCLEAR(v2); - asm volatile("vfsgnjn.vf v2, v4, %[A], v0.t" ::[A] "f"(dscalar_64)); + VCLEAR(v8); + asm volatile("vfsgnjn.vf v8, v16, %[A], v0.t" ::[A] "f"(dscalar_64)); // 0.0000000000000000, -0.3268380231167121, 0.0000000000000000, // -0.5650452268361481, 0.0000000000000000, // -0.5318945600667211, 0.0000000000000000, @@ -338,7 +338,7 @@ void TEST_CASE4(void) { // -0.2866314604291811, 0.0000000000000000, // -0.7960283598249005, 0.0000000000000000, // -0.2142070697411482 - VCMP_U64(12, v2, 0x0, 0xbfd4eaea07180958, 0x0, 0xbfe214d9ba40b584, 0x0, + VCMP_U64(12, v8, 0x0, 0xbfd4eaea07180958, 0x0, 0xbfe214d9ba40b584, 0x0, 0xbfe10547bd8d051e, 0x0, 0xbfe22c593425cec0, 0x0, 0xbfe0e80d6a13bbf4, 0x0, 0xbfd2582b7b231344, 0x0, 0xbfe9791077845df2, 0x0, 0xbfcb6b23238e1bc8); diff --git a/sw/riscvTests/isa/rv64uv/vfsgnjx.c b/sw/riscvTests/isa/rv64uv/vfsgnjx.c index 68a0e505..5d844248 100644 --- a/sw/riscvTests/isa/rv64uv/vfsgnjx.c +++ b/sw/riscvTests/isa/rv64uv/vfsgnjx.c @@ -10,31 +10,31 @@ // Simple random test with similar values void TEST_CASE1(void) { - VSET(16, e16, m2); + VSET(16, e16, m8); // -0.0371, 0.8374, 0.7183, 0.8086, -0.8940, 0.4626, 0.5449, // 0.6831, 0.4661, 0.2981, 0.5615, -0.6167, -0.7075, -0.7603, // 0.8438, -0.2742 - VLOAD_16(v4, 0xa8c1, 0x3ab3, 0x39bf, 0x3a78, 0xbb27, 0x3767, 0x385c, 0x3977, + VLOAD_16(v16, 0xa8c1, 0x3ab3, 0x39bf, 0x3a78, 0xbb27, 0x3767, 0x385c, 0x3977, 0x3775, 0x34c5, 0x387e, 0xb8ef, 0xb9a9, 0xba15, 0x3ac0, 0xb463); // 0.3516, 0.7925, -0.5034, -0.1672, 0.0703, -0.9731, 0.0361, // -0.4077, 0.8965, 0.8242, -0.7822, 0.0265, -0.5361, 0.1226, // -0.9917, 0.5415 - VLOAD_16(v6, 0x35a0, 0x3a57, 0xb807, 0xb15a, 0x2c7f, 0xbbc9, 0x289f, 0xb686, + VLOAD_16(v24, 0x35a0, 0x3a57, 0xb807, 0xb15a, 0x2c7f, 0xbbc9, 0x289f, 0xb686, 0x3b2c, 0x3a98, 0xba42, 0x26cb, 0xb84a, 0x2fd8, 0xbbef, 0x3855); - asm volatile("vfsgnjx.vv v2, v4, v6"); + asm volatile("vfsgnjx.vv v8, v16, v24"); // -0.0371, 0.8374, -0.7183, -0.8086, -0.8940, -0.4626, 0.5449, // -0.6831, 0.4661, 0.2981, -0.5615, -0.6167, 0.7075, -0.7603, // -0.8438, -0.2742 - VCMP_U16(1, v2, 0xa8c1, 0x3ab3, 0xb9bf, 0xba78, 0xbb27, 0xb767, 0x385c, + VCMP_U16(1, v8, 0xa8c1, 0x3ab3, 0xb9bf, 0xba78, 0xbb27, 0xb767, 0x385c, 0xb977, 0x3775, 0x34c5, 0xb87e, 0xb8ef, 0x39a9, 0xba15, 0xbac0, 0xb463); - VSET(16, e32, m2); + VSET(16, e32, m8); // -0.00918692, -0.23372029, 0.42919466, 0.95128548, // 0.05014091, 0.08194520, 0.65458435, 0.38167605, // -0.52784044, 0.46330592, 0.66792834, 0.94584799, // -0.11679628, 0.12139154, 0.61421394, -0.71422517 - VLOAD_32(v4, 0xbc1684ba, 0xbe6f545f, 0x3edbbf67, 0x3f738772, 0x3d4d608d, + VLOAD_32(v16, 0xbc1684ba, 0xbe6f545f, 0x3edbbf67, 0x3f738772, 0x3d4d608d, 0x3da7d2e2, 0x3f2792d7, 0x3ec36b0b, 0xbf07208d, 0x3eed366f, 0x3f2afd5a, 0x3f722318, 0xbdef32e4, 0x3df89c21, 0x3f1d3d20, 0xbf36d776); @@ -42,29 +42,29 @@ void TEST_CASE1(void) { // 0.23285799, 0.19133335, 0.78484982, -0.40654737, // -0.40144378, -0.94419461, 0.60990387, -0.37662670, // 0.75369638, -0.82297397, 0.24545205, -0.75572032 - VLOAD_32(v6, 0xbf771aaf, 0xbf543b53, 0xbf7c3bb6, 0x3ebd01e4, 0x3e6e7253, + VLOAD_32(v24, 0xbf771aaf, 0xbf543b53, 0xbf7c3bb6, 0x3ebd01e4, 0x3e6e7253, 0x3e43ece4, 0x3f48ebeb, 0xbed026fa, 0xbecd8a0a, 0xbf71b6bd, 0x3f1c22a9, 0xbec0d537, 0x3f40f23f, 0xbf52ae6c, 0x3e7b57c8, 0xbf4176e3); - asm volatile("vfsgnjx.vv v2, v4, v6"); + asm volatile("vfsgnjx.vv v8, v16, v24"); // 0.00918692, 0.23372029, -0.42919466, 0.95128548, // 0.05014091, 0.08194520, 0.65458435, -0.38167605, // 0.52784044, -0.46330592, 0.66792834, -0.94584799, // -0.11679628, -0.12139154, 0.61421394, 0.71422517 - VCMP_U32(2, v2, 0x3c1684ba, 0x3e6f545f, 0xbedbbf67, 0x3f738772, 0x3d4d608d, + VCMP_U32(2, v8, 0x3c1684ba, 0x3e6f545f, 0xbedbbf67, 0x3f738772, 0x3d4d608d, 0x3da7d2e2, 0x3f2792d7, 0xbec36b0b, 0x3f07208d, 0xbeed366f, 0x3f2afd5a, 0xbf722318, 0xbdef32e4, 0xbdf89c21, 0x3f1d3d20, 0x3f36d776); #if ELEN == 64 - VSET(16, e64, m2); + VSET(16, e64, m8); // -0.4085246287477386, 0.8681744372264055, -0.9782992825101422, // 0.9959576051606904, -0.7910104167136705, 0.0799315061445605, // 0.2562329212571202, -0.0401280831920132, -0.6164331117742006, // 0.0314794700215042, -0.2391312835511448, 0.2944948324466776, // -0.3469257666022745, 0.3129356083924371, 0.1418123916338592, // -0.2697778839142546 - VLOAD_64(v4, 0xbfda25447c0540c8, 0x3febc815c1e38a2c, 0xbfef4e3a4c029a38, + VLOAD_64(v16, 0xbfda25447c0540c8, 0x3febc815c1e38a2c, 0xbfef4e3a4c029a38, 0x3fefdee27bcbc3c2, 0xbfe94ff513d293d6, 0x3fb4766424cf97d0, 0x3fd0661ec43d4dd0, 0xbfa48bab09ebf660, 0xbfe3b9d1eee7bc0a, 0x3fa01e13bc79bd60, 0xbfce9bda9926bde0, 0x3fd2d900da8cc448, @@ -76,13 +76,13 @@ void TEST_CASE1(void) { // -0.1361158534833962, 0.1558021548512183, 0.9766583762298613, // 0.2768845956890595, 0.6672273199701737, -0.4444943981200347, // 0.5095574851608440 - VLOAD_64(v6, 0xbfecef7ffd03691e, 0x3fd2e79c5b6133f0, 0xbfdbfe36b251f164, + VLOAD_64(v24, 0xbfecef7ffd03691e, 0x3fd2e79c5b6133f0, 0xbfdbfe36b251f164, 0x3fc2bf4992d91480, 0xbfe0840fa43663a0, 0xbfd68f847062a774, 0xbfb1b9459f0cf460, 0xbfefaea9bfed2a32, 0x3fe3eb8fa49aeb32, 0xbfc16c3e8996d300, 0x3fc3f15333ddbc58, 0x3fef40c91128b1ea, 0x3fd1b87a2ad00b5c, 0x3fe559ed1bc8a0c2, 0xbfdc7298a1cb9174, 0x3fe04e4b7fc654a0); - asm volatile("vfsgnjx.vv v2, v4, v6"); + asm volatile("vfsgnjx.vv v8, v16, v24"); // 0.4085246287477386, 0.8681744372264055, 0.9782992825101422, // 0.9959576051606904, 0.7910104167136705, -0.0799315061445605, // -0.2562329212571202, 0.0401280831920132, @@ -90,7 +90,7 @@ void TEST_CASE1(void) { // -0.2391312835511448, 0.2944948324466776, // -0.3469257666022745, 0.3129356083924371, // -0.1418123916338592, -0.2697778839142546 - VCMP_U64(3, v2, 0x3fda25447c0540c8, 0x3febc815c1e38a2c, 0x3fef4e3a4c029a38, + VCMP_U64(3, v8, 0x3fda25447c0540c8, 0x3febc815c1e38a2c, 0x3fef4e3a4c029a38, 0x3fefdee27bcbc3c2, 0x3fe94ff513d293d6, 0xbfb4766424cf97d0, 0xbfd0661ec43d4dd0, 0x3fa48bab09ebf660, 0xbfe3b9d1eee7bc0a, 0xbfa01e13bc79bd60, 0xbfce9bda9926bde0, 0x3fd2d900da8cc448, @@ -102,32 +102,32 @@ void TEST_CASE1(void) { // Simple random test with similar values (masked) // The numbers are the same of TEST_CASE1 void TEST_CASE2(void) { - VSET(16, e16, m2); + VSET(16, e16, m8); // -0.0371, 0.8374, 0.7183, 0.8086, -0.8940, 0.4626, 0.5449, // 0.6831, 0.4661, 0.2981, 0.5615, -0.6167, -0.7075, -0.7603, // 0.8438, -0.2742 - VLOAD_16(v4, 0xa8c1, 0x3ab3, 0x39bf, 0x3a78, 0xbb27, 0x3767, 0x385c, 0x3977, + VLOAD_16(v16, 0xa8c1, 0x3ab3, 0x39bf, 0x3a78, 0xbb27, 0x3767, 0x385c, 0x3977, 0x3775, 0x34c5, 0x387e, 0xb8ef, 0xb9a9, 0xba15, 0x3ac0, 0xb463); // 0.3516, 0.7925, -0.5034, -0.1672, 0.0703, -0.9731, 0.0361, // -0.4077, 0.8965, 0.8242, -0.7822, 0.0265, -0.5361, 0.1226, // -0.9917, 0.5415 - VLOAD_16(v6, 0x35a0, 0x3a57, 0xb807, 0xb15a, 0x2c7f, 0xbbc9, 0x289f, 0xb686, + VLOAD_16(v24, 0x35a0, 0x3a57, 0xb807, 0xb15a, 0x2c7f, 0xbbc9, 0x289f, 0xb686, 0x3b2c, 0x3a98, 0xba42, 0x26cb, 0xb84a, 0x2fd8, 0xbbef, 0x3855); VLOAD_8(v0, 0xAA, 0xAA); - VCLEAR(v2); - asm volatile("vfsgnjx.vv v2, v4, v6, v0.t"); + VCLEAR(v8); + asm volatile("vfsgnjx.vv v8, v16, v24, v0.t"); // 0.0000, 0.8374, 0.0000, -0.8086, 0.0000, -0.4626, 0.0000, // -0.6831, 0.0000, 0.2981, 0.0000, -0.6167, 0.0000, // -0.7603, 0.0000, -0.2742 - VCMP_U16(4, v2, 0x0, 0x3ab3, 0x0, 0xba78, 0x0, 0xb767, 0x0, 0xb977, 0x0, + VCMP_U16(4, v8, 0x0, 0x3ab3, 0x0, 0xba78, 0x0, 0xb767, 0x0, 0xb977, 0x0, 0x34c5, 0x0, 0xb8ef, 0x0, 0xba15, 0x0, 0xb463); - VSET(16, e32, m2); + VSET(16, e32, m8); // -0.00918692, -0.23372029, 0.42919466, 0.95128548, // 0.05014091, 0.08194520, 0.65458435, 0.38167605, // -0.52784044, 0.46330592, 0.66792834, 0.94584799, // -0.11679628, 0.12139154, 0.61421394, -0.71422517 - VLOAD_32(v4, 0xbc1684ba, 0xbe6f545f, 0x3edbbf67, 0x3f738772, 0x3d4d608d, + VLOAD_32(v16, 0xbc1684ba, 0xbe6f545f, 0x3edbbf67, 0x3f738772, 0x3d4d608d, 0x3da7d2e2, 0x3f2792d7, 0x3ec36b0b, 0xbf07208d, 0x3eed366f, 0x3f2afd5a, 0x3f722318, 0xbdef32e4, 0x3df89c21, 0x3f1d3d20, 0xbf36d776); @@ -135,30 +135,30 @@ void TEST_CASE2(void) { // 0.23285799, 0.19133335, 0.78484982, -0.40654737, // -0.40144378, -0.94419461, 0.60990387, -0.37662670, // 0.75369638, -0.82297397, 0.24545205, -0.75572032 - VLOAD_32(v6, 0xbf771aaf, 0xbf543b53, 0xbf7c3bb6, 0x3ebd01e4, 0x3e6e7253, + VLOAD_32(v24, 0xbf771aaf, 0xbf543b53, 0xbf7c3bb6, 0x3ebd01e4, 0x3e6e7253, 0x3e43ece4, 0x3f48ebeb, 0xbed026fa, 0xbecd8a0a, 0xbf71b6bd, 0x3f1c22a9, 0xbec0d537, 0x3f40f23f, 0xbf52ae6c, 0x3e7b57c8, 0xbf4176e3); VLOAD_8(v0, 0xAA, 0xAA); - VCLEAR(v2); - asm volatile("vfsgnjx.vv v2, v4, v6, v0.t"); + VCLEAR(v8); + asm volatile("vfsgnjx.vv v8, v16, v24, v0.t"); // 0.00000000, 0.23372029, 0.00000000, 0.95128548, // 0.00000000, 0.08194520, 0.00000000, -0.38167605, // 0.00000000, -0.46330592, 0.00000000, -0.94584799, // 0.00000000, -0.12139154, 0.00000000, 0.71422517 - VCMP_U32(5, v2, 0x0, 0x3e6f545f, 0x0, 0x3f738772, 0x0, 0x3da7d2e2, 0x0, + VCMP_U32(5, v8, 0x0, 0x3e6f545f, 0x0, 0x3f738772, 0x0, 0x3da7d2e2, 0x0, 0xbec36b0b, 0x0, 0xbeed366f, 0x0, 0xbf722318, 0x0, 0xbdf89c21, 0x0, 0x3f36d776); #if ELEN == 64 - VSET(16, e64, m2); + VSET(16, e64, m8); // -0.4085246287477386, 0.8681744372264055, -0.9782992825101422, // 0.9959576051606904, -0.7910104167136705, 0.0799315061445605, // 0.2562329212571202, -0.0401280831920132, -0.6164331117742006, // 0.0314794700215042, -0.2391312835511448, 0.2944948324466776, // -0.3469257666022745, 0.3129356083924371, 0.1418123916338592, // -0.2697778839142546 - VLOAD_64(v4, 0xbfda25447c0540c8, 0x3febc815c1e38a2c, 0xbfef4e3a4c029a38, + VLOAD_64(v16, 0xbfda25447c0540c8, 0x3febc815c1e38a2c, 0xbfef4e3a4c029a38, 0x3fefdee27bcbc3c2, 0xbfe94ff513d293d6, 0x3fb4766424cf97d0, 0x3fd0661ec43d4dd0, 0xbfa48bab09ebf660, 0xbfe3b9d1eee7bc0a, 0x3fa01e13bc79bd60, 0xbfce9bda9926bde0, 0x3fd2d900da8cc448, @@ -170,22 +170,22 @@ void TEST_CASE2(void) { // -0.1361158534833962, 0.1558021548512183, 0.9766583762298613, // 0.2768845956890595, 0.6672273199701737, -0.4444943981200347, // 0.5095574851608440 - VLOAD_64(v6, 0xbfecef7ffd03691e, 0x3fd2e79c5b6133f0, 0xbfdbfe36b251f164, + VLOAD_64(v24, 0xbfecef7ffd03691e, 0x3fd2e79c5b6133f0, 0xbfdbfe36b251f164, 0x3fc2bf4992d91480, 0xbfe0840fa43663a0, 0xbfd68f847062a774, 0xbfb1b9459f0cf460, 0xbfefaea9bfed2a32, 0x3fe3eb8fa49aeb32, 0xbfc16c3e8996d300, 0x3fc3f15333ddbc58, 0x3fef40c91128b1ea, 0x3fd1b87a2ad00b5c, 0x3fe559ed1bc8a0c2, 0xbfdc7298a1cb9174, 0x3fe04e4b7fc654a0); VLOAD_8(v0, 0xAA, 0xAA); - VCLEAR(v2); - asm volatile("vfsgnjx.vv v2, v4, v6, v0.t"); + VCLEAR(v8); + asm volatile("vfsgnjx.vv v8, v16, v24, v0.t"); // 0.0000000000000000, 0.8681744372264055, 0.0000000000000000, // 0.9959576051606904, 0.0000000000000000, -0.0799315061445605, // 0.0000000000000000, 0.0401280831920132, 0.0000000000000000, // -0.0314794700215042, 0.0000000000000000, 0.2944948324466776, // 0.0000000000000000, 0.3129356083924371, 0.0000000000000000, // -0.2697778839142546 - VCMP_U64(6, v2, 0x0, 0x3febc815c1e38a2c, 0x0, 0x3fefdee27bcbc3c2, 0x0, + VCMP_U64(6, v8, 0x0, 0x3febc815c1e38a2c, 0x0, 0x3fefdee27bcbc3c2, 0x0, 0xbfb4766424cf97d0, 0x0, 0x3fa48bab09ebf660, 0x0, 0xbfa01e13bc79bd60, 0x0, 0x3fd2d900da8cc448, 0x0, 0x3fd4072312f3290c, 0x0, 0xbfd1440a752621b8); @@ -194,24 +194,24 @@ void TEST_CASE2(void) { // Simple random test with similar values (vector-scalar) void TEST_CASE3(void) { - VSET(16, e16, m2); + VSET(16, e16, m8); float fscalar_16; // -0.9766 BOX_HALF_IN_FLOAT(fscalar_16, 0xbbd0); // 0.8535, -0.3750, -0.8066, -0.9097, -0.2216, -0.5645, -0.3149, // -0.4512, 0.5981, 0.6587, 0.9546, -0.3040, -0.6157, 0.5723, // 0.8438, -0.1544 - VLOAD_16(v4, 0x3ad4, 0xb600, 0xba74, 0xbb47, 0xb317, 0xb884, 0xb50a, 0xb738, + VLOAD_16(v16, 0x3ad4, 0xb600, 0xba74, 0xbb47, 0xb317, 0xb884, 0xb50a, 0xb738, 0x38c9, 0x3945, 0x3ba3, 0xb4dd, 0xb8ed, 0x3894, 0x3ac0, 0xb0f1); - asm volatile("vfsgnjx.vf v2, v4, %[A]" ::[A] "f"(fscalar_16)); + asm volatile("vfsgnjx.vf v8, v16, %[A]" ::[A] "f"(fscalar_16)); // -0.8535, 0.3750, 0.8066, 0.9097, 0.2216, 0.5645, 0.3149, // 0.4512, -0.5981, -0.6587, -0.9546, 0.3040, 0.6157, -0.5723, // -0.8438, 0.1544 - VCMP_U16(7, v2, 0xbad4, 0x3600, 0x3a74, 0x3b47, 0x3317, 0x3884, 0x350a, + VCMP_U16(7, v8, 0xbad4, 0x3600, 0x3a74, 0x3b47, 0x3317, 0x3884, 0x350a, 0x3738, 0xb8c9, 0xb945, 0xbba3, 0x34dd, 0x38ed, 0xb894, 0xbac0, 0x30f1); - VSET(16, e32, m2); + VSET(16, e32, m8); float fscalar_32; // -0.71056527 BOX_FLOAT_IN_FLOAT(fscalar_32, 0xbf35e79b); @@ -219,22 +219,22 @@ void TEST_CASE3(void) { // 0.00040700, -0.59566921, -0.88075870, 0.08096603, 0.94059193, // -0.29601631, -0.54263371, -0.86016685, -0.57158113, // 0.85538357, -0.76839548, 0.28374606 - VLOAD_32(v4, 0xbe08b4c6, 0xbe3ee62f, 0x3e9a619a, 0xbf1eeaa9, 0x39d561f4, + VLOAD_32(v16, 0xbe08b4c6, 0xbe3ee62f, 0x3e9a619a, 0xbf1eeaa9, 0x39d561f4, 0xbf187dc7, 0xbf617967, 0x3da5d185, 0x3f70caa2, 0xbe978f73, 0xbf0aea0b, 0xbf5c33e5, 0xbf125324, 0x3f5afa6b, 0xbf44b591, 0x3e91472a); - asm volatile("vfsgnjx.vf v2, v4, %[A]" ::[A] "f"(fscalar_32)); + asm volatile("vfsgnjx.vf v8, v16, %[A]" ::[A] "f"(fscalar_32)); // 0.13350210, 0.18642496, -0.30152589, 0.62076813, // -0.00040700, 0.59566921, 0.88075870, -0.08096603, // -0.94059193, 0.29601631, 0.54263371, 0.86016685, // 0.57158113, -0.85538357, 0.76839548, -0.28374606 - VCMP_U32(8, v2, 0x3e08b4c6, 0x3e3ee62f, 0xbe9a619a, 0x3f1eeaa9, 0xb9d561f4, + VCMP_U32(8, v8, 0x3e08b4c6, 0x3e3ee62f, 0xbe9a619a, 0x3f1eeaa9, 0xb9d561f4, 0x3f187dc7, 0x3f617967, 0xbda5d185, 0xbf70caa2, 0x3e978f73, 0x3f0aea0b, 0x3f5c33e5, 0x3f125324, 0xbf5afa6b, 0x3f44b591, 0xbe91472a); #if ELEN == 64 - VSET(16, e64, m2); + VSET(16, e64, m8); double dscalar_64; // -0.1599292306617626 BOX_DOUBLE_IN_DOUBLE(dscalar_64, 0xbfc4788f9faef060); @@ -244,20 +244,20 @@ void TEST_CASE3(void) { // -0.3241690978469995, 0.9848431705043186, 0.5835571766262024, // 0.6934128987139432, -0.8499240402166686, 0.9392758702585176, // 0.8754505566292561, -0.4187493105472220, -0.7967172481248119 - VLOAD_64(v4, 0xbfd8216314b1d540, 0x3fe3152420f10f90, 0xbfdb0fb918f3a4fc, + VLOAD_64(v16, 0xbfd8216314b1d540, 0x3fe3152420f10f90, 0xbfdb0fb918f3a4fc, 0xbfeadd7aa9f60146, 0x3fd275a6c6712e84, 0xbfeddcd9cc23cf06, 0xbfd3b2a7ff2d8ea0, 0xbfd4bf2fbe681ba4, 0x3fef83d5d32028f6, 0x3fe2ac80199e9490, 0x3fe630703f533af4, 0xbfeb3293e69a12ae, 0x3fee0e8c4515d52c, 0x3fec03b0e2bf9ad6, 0xbfdaccc9e88176a4, 0xbfe97eb52b9b5dac); - asm volatile("vfsgnjx.vf v2, v4, %[A]" ::[A] "f"(dscalar_64)); + asm volatile("vfsgnjx.vf v8, v16, %[A]" ::[A] "f"(dscalar_64)); // 0.3770377828689853, -0.5963307040587882, 0.4228346580189990, // 0.8395360297727528, -0.2884308755790033, 0.9332093226534830, // 0.3077793113682024, 0.3241690978469995, -0.9848431705043186, // -0.5835571766262024, -0.6934128987139432, 0.8499240402166686, // -0.9392758702585176, -0.8754505566292561, 0.4187493105472220, // 0.7967172481248119 - VCMP_U64(9, v2, 0x3fd8216314b1d540, 0xbfe3152420f10f90, 0x3fdb0fb918f3a4fc, + VCMP_U64(9, v8, 0x3fd8216314b1d540, 0xbfe3152420f10f90, 0x3fdb0fb918f3a4fc, 0x3feadd7aa9f60146, 0xbfd275a6c6712e84, 0x3feddcd9cc23cf06, 0x3fd3b2a7ff2d8ea0, 0x3fd4bf2fbe681ba4, 0xbfef83d5d32028f6, 0xbfe2ac80199e9490, 0xbfe630703f533af4, 0x3feb3293e69a12ae, @@ -268,25 +268,25 @@ void TEST_CASE3(void) { // Simple random test with similar values (vector-scalar) (masked) void TEST_CASE4(void) { - VSET(16, e16, m2); + VSET(16, e16, m8); float fscalar_16; // -0.9766 BOX_HALF_IN_FLOAT(fscalar_16, 0xbbd0); // 0.8535, -0.3750, -0.8066, -0.9097, -0.2216, -0.5645, // -0.3149, -0.4512, 0.5981, 0.6587, 0.9546, -0.3040, // -0.6157, 0.5723, 0.8438, -0.1544 - VLOAD_16(v4, 0x3ad4, 0xb600, 0xba74, 0xbb47, 0xb317, 0xb884, 0xb50a, 0xb738, + VLOAD_16(v16, 0x3ad4, 0xb600, 0xba74, 0xbb47, 0xb317, 0xb884, 0xb50a, 0xb738, 0x38c9, 0x3945, 0x3ba3, 0xb4dd, 0xb8ed, 0x3894, 0x3ac0, 0xb0f1); VLOAD_8(v0, 0xAA, 0xAA); - VCLEAR(v2); - asm volatile("vfsgnjx.vf v2, v4, %[A], v0.t" ::[A] "f"(fscalar_16)); + VCLEAR(v8); + asm volatile("vfsgnjx.vf v8, v16, %[A], v0.t" ::[A] "f"(fscalar_16)); // 0.0000, 0.3750, 0.0000, 0.9097, 0.0000, 0.5645, 0.0000, // 0.4512, 0.0000, -0.6587, 0.0000, 0.3040, 0.0000, // -0.5723, 0.0000, 0.1544 - VCMP_U16(10, v2, 0x0, 0x3600, 0x0, 0x3b47, 0x0, 0x3884, 0x0, 0x3738, 0x0, + VCMP_U16(10, v8, 0x0, 0x3600, 0x0, 0x3b47, 0x0, 0x3884, 0x0, 0x3738, 0x0, 0xb945, 0x0, 0x34dd, 0x0, 0xb894, 0x0, 0x30f1); - VSET(16, e32, m2); + VSET(16, e32, m8); float fscalar_32; // -0.71056527 BOX_FLOAT_IN_FLOAT(fscalar_32, 0xbf35e79b); @@ -294,23 +294,23 @@ void TEST_CASE4(void) { // 0.00040700, -0.59566921, -0.88075870, 0.08096603, // 0.94059193, -0.29601631, -0.54263371, -0.86016685, // -0.57158113, 0.85538357, -0.76839548, 0.28374606 - VLOAD_32(v4, 0xbe08b4c6, 0xbe3ee62f, 0x3e9a619a, 0xbf1eeaa9, 0x39d561f4, + VLOAD_32(v16, 0xbe08b4c6, 0xbe3ee62f, 0x3e9a619a, 0xbf1eeaa9, 0x39d561f4, 0xbf187dc7, 0xbf617967, 0x3da5d185, 0x3f70caa2, 0xbe978f73, 0xbf0aea0b, 0xbf5c33e5, 0xbf125324, 0x3f5afa6b, 0xbf44b591, 0x3e91472a); VLOAD_8(v0, 0xAA, 0xAA); - VCLEAR(v2); - asm volatile("vfsgnjx.vf v2, v4, %[A], v0.t" ::[A] "f"(fscalar_32)); + VCLEAR(v8); + asm volatile("vfsgnjx.vf v8, v16, %[A], v0.t" ::[A] "f"(fscalar_32)); // 0.00000000, 0.18642496, 0.00000000, 0.62076813, // 0.00000000, 0.59566921, 0.00000000, -0.08096603, // 0.00000000, 0.29601631, 0.00000000, 0.86016685, // 0.00000000, -0.85538357, 0.00000000, -0.28374606 - VCMP_U32(11, v2, 0x0, 0x3e3ee62f, 0x0, 0x3f1eeaa9, 0x0, 0x3f187dc7, 0x0, + VCMP_U32(11, v8, 0x0, 0x3e3ee62f, 0x0, 0x3f1eeaa9, 0x0, 0x3f187dc7, 0x0, 0xbda5d185, 0x0, 0x3e978f73, 0x0, 0x3f5c33e5, 0x0, 0xbf5afa6b, 0x0, 0xbe91472a); #if ELEN == 64 - VSET(16, e64, m2); + VSET(16, e64, m8); double dscalar_64; // -0.1599292306617626 BOX_DOUBLE_IN_DOUBLE(dscalar_64, 0xbfc4788f9faef060); @@ -321,22 +321,22 @@ void TEST_CASE4(void) { // 0.9848431705043186, 0.5835571766262024, 0.6934128987139432, // -0.8499240402166686, 0.9392758702585176, // 0.8754505566292561, -0.4187493105472220, -0.7967172481248119 - VLOAD_64(v4, 0xbfd8216314b1d540, 0x3fe3152420f10f90, 0xbfdb0fb918f3a4fc, + VLOAD_64(v16, 0xbfd8216314b1d540, 0x3fe3152420f10f90, 0xbfdb0fb918f3a4fc, 0xbfeadd7aa9f60146, 0x3fd275a6c6712e84, 0xbfeddcd9cc23cf06, 0xbfd3b2a7ff2d8ea0, 0xbfd4bf2fbe681ba4, 0x3fef83d5d32028f6, 0x3fe2ac80199e9490, 0x3fe630703f533af4, 0xbfeb3293e69a12ae, 0x3fee0e8c4515d52c, 0x3fec03b0e2bf9ad6, 0xbfdaccc9e88176a4, 0xbfe97eb52b9b5dac); VLOAD_8(v0, 0xAA, 0xAA); - VCLEAR(v2); - asm volatile("vfsgnjx.vf v2, v4, %[A], v0.t" ::[A] "f"(dscalar_64)); + VCLEAR(v8); + asm volatile("vfsgnjx.vf v8, v16, %[A], v0.t" ::[A] "f"(dscalar_64)); // 0.0000000000000000, -0.5963307040587882, 0.0000000000000000, // 0.8395360297727528, 0.0000000000000000, 0.9332093226534830, // 0.0000000000000000, 0.3241690978469995, 0.0000000000000000, // -0.5835571766262024, 0.0000000000000000, // 0.8499240402166686, 0.0000000000000000, // -0.8754505566292561, 0.0000000000000000, 0.7967172481248119 - VCMP_U64(12, v2, 0x0, 0xbfe3152420f10f90, 0x0, 0x3feadd7aa9f60146, 0x0, + VCMP_U64(12, v8, 0x0, 0xbfe3152420f10f90, 0x0, 0x3feadd7aa9f60146, 0x0, 0x3feddcd9cc23cf06, 0x0, 0x3fd4bf2fbe681ba4, 0x0, 0xbfe2ac80199e9490, 0x0, 0x3feb3293e69a12ae, 0x0, 0xbfec03b0e2bf9ad6, 0x0, 0x3fe97eb52b9b5dac); diff --git a/sw/riscvTests/isa/rv64uv/vfsub.c b/sw/riscvTests/isa/rv64uv/vfsub.c index c5ec7a09..75363d65 100644 --- a/sw/riscvTests/isa/rv64uv/vfsub.c +++ b/sw/riscvTests/isa/rv64uv/vfsub.c @@ -10,31 +10,31 @@ // Simple random test with similar values + 1 subnormal void TEST_CASE1(void) { - VSET(16, e16, m2); + VSET(16, e16, m8); // -0.2161, 0.7432, 0.7871, 0.7583, -0.4546, -0.0478, 0.1260, // -0.4824, 0.9282, -0.6221, 0.6543, 0.3025, -0.1420, -0.7236, // 0.2333, -0.0269 - VLOAD_16(v4, 0xb2ea, 0x39f2, 0x3a4c, 0x3a11, 0xb746, 0xaa1f, 0x3008, 0xb7b8, + VLOAD_16(v16, 0xb2ea, 0x39f2, 0x3a4c, 0x3a11, 0xb746, 0xaa1f, 0x3008, 0xb7b8, 0x3b6d, 0xb8fa, 0x393c, 0x34d7, 0xb08b, 0xb9ca, 0x3377, 0xa6e5); // -0.3289, -0.8408, -0.1754, -0.8472, 0.7739, -0.9111, -0.3152, // 0.4519, -0.2537, 0.9287, -0.7163, -0.2318, 0.0615, -0.2563, // 0.1448, 0.6606 - VLOAD_16(v6, 0xb543, 0xbaba, 0xb19d, 0xbac7, 0x3a31, 0xbb4a, 0xb50b, 0x373b, + VLOAD_16(v24, 0xb543, 0xbaba, 0xb19d, 0xbac7, 0x3a31, 0xbb4a, 0xb50b, 0x373b, 0xb40f, 0x3b6e, 0xb9bb, 0xb36b, 0x2bde, 0xb41a, 0x30a2, 0x3949); - asm volatile("vfsub.vv v2, v4, v6"); + asm volatile("vfsub.vv v8, v16, v24"); // 0.1128, 1.5840, 0.9624, 1.6055, -1.2285, 0.8633, 0.4412, // -0.9346, 1.1816, -1.5508, 1.3711, 0.5342, -0.2034, // -0.4673, 0.0885, -0.6875 - VCMP_U16(1, v2, 0x2f38, 0x3e56, 0x3bb3, 0x3e6c, 0xbcea, 0x3ae8, 0x370f, + VCMP_U16(1, v8, 0x2f38, 0x3e56, 0x3bb3, 0x3e6c, 0xbcea, 0x3ae8, 0x370f, 0xbb7a, 0x3cba, 0xbe34, 0x3d7c, 0x3846, 0xb282, 0xb77a, 0x2daa, 0xb980); - VSET(16, e32, m2); + VSET(16, e32, m8); // -0.12869358, 0.96847999, -0.85811919, -0.21122381, // -0.05195865, 0.43910158, 0.86828148, -0.90407801, // 0.62089461, -0.65907389, 0.91886526, -0.57595438, // -0.35377914, -0.26657876, 0.49153560, 0.42637765 - VLOAD_32(v4, 0xbe03c840, 0x3f77ee4e, 0xbf5badb3, 0xbe584b0e, 0xbd54d298, + VLOAD_32(v16, 0xbe03c840, 0x3f77ee4e, 0xbf5badb3, 0xbe584b0e, 0xbd54d298, 0x3ee0d1ec, 0x3f5e47b2, 0xbf6771a8, 0x3f1ef2f3, 0xbf28b911, 0x3f6b3ac1, 0xbf1371bf, 0xbeb5228a, 0xbe887d03, 0x3efbaa8e, 0x3eda4e2c); @@ -42,30 +42,30 @@ void TEST_CASE1(void) { // -0.00441748, 0.72763014, 0.81834352, -0.49977919, // -0.94507313, -0.60766727, 0.21069343, 0.35644454, // -0.51639801, -0.74812186, -0.97028691, 0.42650157 - VLOAD_32(v6, 0xbf021a25, 0xbf11ab20, 0xbf62ecf7, 0x3f6949f4, 0xbb90c083, + VLOAD_32(v24, 0xbf021a25, 0xbf11ab20, 0xbf62ecf7, 0x3f6949f4, 0xbb90c083, 0x3f3a45f8, 0x3f517ef6, 0xbeffe30f, 0xbf71f050, 0xbf1b9015, 0x3e57c005, 0x3eb67fe6, 0xbf0432a9, 0xbf3f84ea, 0xbf7864b9, 0x3eda5e6a); - asm volatile("vfsub.vv v2, v4, v6"); + asm volatile("vfsub.vv v8, v16, v24"); // 0.37951785, 1.53749740, 0.02830911, -1.12250853, // -0.04754117, -0.28852856, 0.04993796, // -0.40429881, 1.56596780, -0.05140662, 0.70817184, // -0.93239892, 0.16261888, 0.48154309, 1.46182251, // -0.00012392 - VCMP_U32(2, v2, 0x3ec2502a, 0x3fc4ccb7, 0x3ce7e880, 0xbf8fae5c, 0xbd42ba88, + VCMP_U32(2, v8, 0x3ec2502a, 0x3fc4ccb7, 0x3ce7e880, 0xbf8fae5c, 0xbd42ba88, 0xbe93ba04, 0x3d4c8bc0, 0xbecf0041, 0x3fc871a2, 0xbd528fc0, 0x3f354ac0, 0xbf6eb1b2, 0x3e268590, 0x3ef68cd1, 0x3fbb1d00, 0xb901f000); #if ELEN == 64 - VSET(16, e64, m2); + VSET(16, e64, m8); // -0.5053356652713634, -0.6291854947278097, 0.6181258713941662, // -0.6097328085365348, 0.8960683065358290, 0.1233825892982841, // -0.7071646124826323, -0.6783334309218909, 0.3533001486660008, // 0.4732651306122215, -0.7335080825789513, -0.9296500813876505, // 0.5349827137885166, -0.0621174552558810, -0.8122743533756343, // -0.8908485518923974 - VLOAD_64(v4, 0xbfe02bb5b37af91c, 0xbfe422499e5f271a, 0x3fe3c7afe84e61dc, + VLOAD_64(v16, 0xbfe02bb5b37af91c, 0xbfe422499e5f271a, 0x3fe3c7afe84e61dc, 0xbfe382ee60fece00, 0x3fecac9770f1b62e, 0x3fbf960059ee92f0, 0xbfe6a117ae700ba0, 0xbfe5b4e84fb2f9d4, 0x3fd69c783a0c5078, 0x3fde49f9d4944428, 0xbfe778e5f140e788, 0xbfedbfb18709140c, @@ -77,13 +77,13 @@ void TEST_CASE1(void) { // 0.8778238674058336, 0.9294006140978470, -0.8775508592745904, // 0.7472392658861982, -0.3880038279796372, -0.6483706997783654, // -0.1530785884604509 - VLOAD_64(v6, 0xbfe3556b82731260, 0x3fc5a6ff3fe2c608, 0xbfd8acfaee5fcdc0, + VLOAD_64(v24, 0xbfe3556b82731260, 0x3fc5a6ff3fe2c608, 0xbfd8acfaee5fcdc0, 0xbfed762b3b913c28, 0x3fe67b0770a53a4a, 0xbfefb2aaa9ceeb06, 0x3fee6f68a5fe3800, 0x3fd1a071594983a8, 0x3fe0dec527d80c9a, 0x3fec172214450060, 0x3fedbda65b4dd79c, 0xbfec14e58a252770, 0x3fe7e962522895aa, 0xbfd8d50e01f94d70, 0xbfe4bf73e8e77264, 0xbfc398144593e6c0); - asm volatile("vfsub.vv v2, v4, v6"); + asm volatile("vfsub.vv v8, v16, v24"); // 0.0988415758481911, -0.7983443408145550, 1.0036837449172462, // 0.3109421032890554, 0.1935501104197752, 1.1139424835327358, // -1.6582643174207199, -0.9537510803764819, @@ -91,7 +91,7 @@ void TEST_CASE1(void) { // -1.6629086966767983, -0.0520992221130601, // -0.2122565520976816, 0.3258863727237562, // -0.1639036535972689, -0.7377699634319466 - VCMP_U64(3, v2, 0x3fb94dae77c0ca20, 0xbfe98c096e57d89c, 0x3ff00f16afbf245e, + VCMP_U64(3, v8, 0x3fb94dae77c0ca20, 0xbfe98c096e57d89c, 0x3ff00f16afbf245e, 0x3fd3e679b524dc50, 0x3fc8c6400131ef90, 0x3ff1d2b55a865eb2, 0xbffa88402a3721d0, 0xbfee8520fc57bba8, 0xbfc642242b479178, 0xbfd9e44a53f5bc98, 0xbffa9b4626475f92, 0xbfaaacbfce3ec9c0, @@ -103,32 +103,32 @@ void TEST_CASE1(void) { // Simple random test with similar values + 1 subnormal (masked) // The numbers are the same of TEST_CASE1 void TEST_CASE2(void) { - VSET(16, e16, m2); + VSET(16, e16, m8); // -0.2161, 0.7432, 0.7871, 0.7583, -0.4546, -0.0478, 0.1260, // -0.4824, 0.9282, -0.6221, 0.6543, 0.3025, -0.1420, -0.7236, // 0.2333, -0.0269 - VLOAD_16(v4, 0xb2ea, 0x39f2, 0x3a4c, 0x3a11, 0xb746, 0xaa1f, 0x3008, 0xb7b8, + VLOAD_16(v16, 0xb2ea, 0x39f2, 0x3a4c, 0x3a11, 0xb746, 0xaa1f, 0x3008, 0xb7b8, 0x3b6d, 0xb8fa, 0x393c, 0x34d7, 0xb08b, 0xb9ca, 0x3377, 0xa6e5); // -0.3289, -0.8408, -0.1754, -0.8472, 0.7739, -0.9111, -0.3152, // 0.4519, -0.2537, 0.9287, -0.7163, -0.2318, 0.0615, -0.2563, // 0.1448, 0.6606 - VLOAD_16(v6, 0xb543, 0xbaba, 0xb19d, 0xbac7, 0x3a31, 0xbb4a, 0xb50b, 0x373b, + VLOAD_16(v24, 0xb543, 0xbaba, 0xb19d, 0xbac7, 0x3a31, 0xbb4a, 0xb50b, 0x373b, 0xb40f, 0x3b6e, 0xb9bb, 0xb36b, 0x2bde, 0xb41a, 0x30a2, 0x3949); VLOAD_8(v0, 0xAA, 0xAA); - VCLEAR(v2); - asm volatile("vfsub.vv v2, v4, v6, v0.t"); + VCLEAR(v8); + asm volatile("vfsub.vv v8, v16, v24, v0.t"); // 0.0000, 1.5840, 0.0000, 1.6055, 0.0000, 0.8633, 0.0000, // -0.9346, 0.0000, -1.5508, 0.0000, 0.5342, 0.0000, // -0.4673, 0.0000, -0.6875 - VCMP_U16(4, v2, 0x0, 0x3e56, 0x0, 0x3e6c, 0x0, 0x3ae8, 0x0, 0xbb7a, 0x0, + VCMP_U16(4, v8, 0x0, 0x3e56, 0x0, 0x3e6c, 0x0, 0x3ae8, 0x0, 0xbb7a, 0x0, 0xbe34, 0x0, 0x3846, 0x0, 0xb77a, 0x0, 0xb980); - VSET(16, e32, m2); + VSET(16, e32, m8); // -0.12869358, 0.96847999, -0.85811919, -0.21122381, // -0.05195865, 0.43910158, 0.86828148, -0.90407801, // 0.62089461, -0.65907389, 0.91886526, -0.57595438, // -0.35377914, -0.26657876, 0.49153560, 0.42637765 - VLOAD_32(v4, 0xbe03c840, 0x3f77ee4e, 0xbf5badb3, 0xbe584b0e, 0xbd54d298, + VLOAD_32(v16, 0xbe03c840, 0x3f77ee4e, 0xbf5badb3, 0xbe584b0e, 0xbd54d298, 0x3ee0d1ec, 0x3f5e47b2, 0xbf6771a8, 0x3f1ef2f3, 0xbf28b911, 0x3f6b3ac1, 0xbf1371bf, 0xbeb5228a, 0xbe887d03, 0x3efbaa8e, 0x3eda4e2c); @@ -136,30 +136,30 @@ void TEST_CASE2(void) { // -0.00441748, 0.72763014, 0.81834352, -0.49977919, // -0.94507313, -0.60766727, 0.21069343, 0.35644454, // -0.51639801, -0.74812186, -0.97028691, 0.42650157 - VLOAD_32(v6, 0xbf021a25, 0xbf11ab20, 0xbf62ecf7, 0x3f6949f4, 0xbb90c083, + VLOAD_32(v24, 0xbf021a25, 0xbf11ab20, 0xbf62ecf7, 0x3f6949f4, 0xbb90c083, 0x3f3a45f8, 0x3f517ef6, 0xbeffe30f, 0xbf71f050, 0xbf1b9015, 0x3e57c005, 0x3eb67fe6, 0xbf0432a9, 0xbf3f84ea, 0xbf7864b9, 0x3eda5e6a); VLOAD_8(v0, 0xAA, 0xAA); - VCLEAR(v2); - asm volatile("vfsub.vv v2, v4, v6, v0.t"); + VCLEAR(v8); + asm volatile("vfsub.vv v8, v16, v24, v0.t"); // 0.00000000, 1.53749740, 0.00000000, -1.12250853, // 0.00000000, -0.28852856, 0.00000000, -0.40429881, // 0.00000000, -0.05140662, 0.00000000, -0.93239892, // 0.00000000, 0.48154309, 0.00000000, -0.00012392 - VCMP_U32(5, v2, 0x0, 0x3fc4ccb7, 0x0, 0xbf8fae5c, 0x0, 0xbe93ba04, 0x0, + VCMP_U32(5, v8, 0x0, 0x3fc4ccb7, 0x0, 0xbf8fae5c, 0x0, 0xbe93ba04, 0x0, 0xbecf0041, 0x0, 0xbd528fc0, 0x0, 0xbf6eb1b2, 0x0, 0x3ef68cd1, 0x0, 0xb901f000); #if ELEN == 64 - VSET(16, e64, m2); + VSET(16, e64, m8); // -0.5053356652713634, -0.6291854947278097, 0.6181258713941662, // -0.6097328085365348, 0.8960683065358290, 0.1233825892982841, // -0.7071646124826323, -0.6783334309218909, 0.3533001486660008, // 0.4732651306122215, -0.7335080825789513, -0.9296500813876505, // 0.5349827137885166, -0.0621174552558810, -0.8122743533756343, // -0.8908485518923974 - VLOAD_64(v4, 0xbfe02bb5b37af91c, 0xbfe422499e5f271a, 0x3fe3c7afe84e61dc, + VLOAD_64(v16, 0xbfe02bb5b37af91c, 0xbfe422499e5f271a, 0x3fe3c7afe84e61dc, 0xbfe382ee60fece00, 0x3fecac9770f1b62e, 0x3fbf960059ee92f0, 0xbfe6a117ae700ba0, 0xbfe5b4e84fb2f9d4, 0x3fd69c783a0c5078, 0x3fde49f9d4944428, 0xbfe778e5f140e788, 0xbfedbfb18709140c, @@ -171,22 +171,22 @@ void TEST_CASE2(void) { // 0.8778238674058336, 0.9294006140978470, -0.8775508592745904, // 0.7472392658861982, -0.3880038279796372, -0.6483706997783654, // -0.1530785884604509 - VLOAD_64(v6, 0xbfe3556b82731260, 0x3fc5a6ff3fe2c608, 0xbfd8acfaee5fcdc0, + VLOAD_64(v24, 0xbfe3556b82731260, 0x3fc5a6ff3fe2c608, 0xbfd8acfaee5fcdc0, 0xbfed762b3b913c28, 0x3fe67b0770a53a4a, 0xbfefb2aaa9ceeb06, 0x3fee6f68a5fe3800, 0x3fd1a071594983a8, 0x3fe0dec527d80c9a, 0x3fec172214450060, 0x3fedbda65b4dd79c, 0xbfec14e58a252770, 0x3fe7e962522895aa, 0xbfd8d50e01f94d70, 0xbfe4bf73e8e77264, 0xbfc398144593e6c0); VLOAD_8(v0, 0xAA, 0xAA); - VCLEAR(v2); - asm volatile("vfsub.vv v2, v4, v6, v0.t"); + VCLEAR(v8); + asm volatile("vfsub.vv v8, v16, v24, v0.t"); // 0.0000000000000000, -0.7983443408145550, 0.0000000000000000, // 0.3109421032890554, 0.0000000000000000, 1.1139424835327358, // 0.0000000000000000, -0.9537510803764819, 0.0000000000000000, // -0.4045587367936121, 0.0000000000000000, // -0.0520992221130601, 0.0000000000000000, 0.3258863727237562, // 0.0000000000000000, -0.7377699634319466 - VCMP_U64(6, v2, 0x0, 0xbfe98c096e57d89c, 0x0, 0x3fd3e679b524dc50, 0x0, + VCMP_U64(6, v8, 0x0, 0xbfe98c096e57d89c, 0x0, 0x3fd3e679b524dc50, 0x0, 0x3ff1d2b55a865eb2, 0x0, 0xbfee8520fc57bba8, 0x0, 0xbfd9e44a53f5bc98, 0x0, 0xbfaaacbfce3ec9c0, 0x0, 0x3fd4db528443e0dc, 0x0, 0xbfe79bcfc11d2996); @@ -195,55 +195,55 @@ void TEST_CASE2(void) { // Simple random test with similar values (vector-scalar) void TEST_CASE3(void) { - VSET(16, e16, m2); + VSET(16, e16, m8); // 0.9727, 0.7676, 0.0876, -0.4526, -0.1158, 0.6221, 0.7612, // -0.7539, 0.3875, -0.2002, 0.2168, -0.1055, -0.4348, 0.9795, // 0.3650, 0.5171 - VLOAD_16(v4, 0x3bc8, 0x3a24, 0x2d9c, 0xb73e, 0xaf6a, 0x38fa, 0x3a17, 0xba08, + VLOAD_16(v16, 0x3bc8, 0x3a24, 0x2d9c, 0xb73e, 0xaf6a, 0x38fa, 0x3a17, 0xba08, 0x3633, 0xb268, 0x32f0, 0xaec0, 0xb6f5, 0x3bd6, 0x35d7, 0x3823); float fscalar_16; // -0.8667 BOX_HALF_IN_FLOAT(fscalar_16, 0xbaef); - asm volatile("vfsub.vf v2, v4, %[A]" ::[A] "f"(fscalar_16)); + asm volatile("vfsub.vf v8, v16, %[A]" ::[A] "f"(fscalar_16)); // 1.8398, 1.6348, 0.9541, 0.4141, 0.7510, 1.4883, 1.6279, // 0.1128, 1.2539, 0.6665, 1.0840, 0.7612, // 0.4319, 1.8457, 1.2314, 1.3838 - VCMP_U16(7, v2, 0x3f5c, 0x3e8a, 0x3ba2, 0x36a0, 0x3a02, 0x3df4, 0x3e83, + VCMP_U16(7, v8, 0x3f5c, 0x3e8a, 0x3ba2, 0x36a0, 0x3a02, 0x3df4, 0x3e83, 0x2f38, 0x3d04, 0x3955, 0x3c56, 0x3a17, 0x36e9, 0x3f62, 0x3ced, 0x3d89); - VSET(16, e32, m2); + VSET(16, e32, m8); // 0.85933530, -0.31821987, 0.18340160, -0.58902484, // -0.83326858, -0.98716992, -0.74268776, -0.50486410, // 0.91496444, -0.46108878, -0.75265163, -0.17853038, // 0.09909800, -0.22828153, 0.31248060, 0.70940411 - VLOAD_32(v4, 0x3f5bfd66, 0xbea2edb7, 0x3e3bcda1, 0xbf16ca55, 0xbf555117, + VLOAD_32(v16, 0x3f5bfd66, 0xbea2edb7, 0x3e3bcda1, 0xbf16ca55, 0xbf555117, 0xbf7cb72b, 0xbf3e20c9, 0xbf013ec6, 0x3f6a3b1c, 0xbeec13d4, 0xbf40adc7, 0xbe36d0ab, 0x3dcaf3e5, 0xbe69c2a2, 0x3e9ffd75, 0x3f359b82); float fscalar_32; // -0.16449618 BOX_FLOAT_IN_FLOAT(fscalar_32, 0xbe2871b0); - asm volatile("vfsub.vf v2, v4, %[A]" ::[A] "f"(fscalar_32)); + asm volatile("vfsub.vf v8, v16, %[A]" ::[A] "f"(fscalar_32)); // 1.02383149, -0.15372369, 0.34789777, -0.42452866, // -0.66877240, -0.82267374, -0.57819158, // -0.34036791, 1.07946062, -0.29659259, -0.58815545, // -0.01403420, 0.26359418, -0.06378534, 0.47697678, // 0.87390029 - VCMP_U32(8, v2, 0x3f830ce9, 0xbe1d69be, 0x3eb21fa8, 0xbed95bd2, 0xbf2b34ab, + VCMP_U32(8, v8, 0x3f830ce9, 0xbe1d69be, 0x3eb21fa8, 0xbed95bd2, 0xbf2b34ab, 0xbf529abf, 0xbf14045d, 0xbeae44b4, 0x3f8a2bc4, 0xbe97dafc, 0xbf16915b, 0xbc65efb0, 0x3e86f5d1, 0xbd82a1e4, 0x3ef4364d, 0x3f5fb7ee); #if ELEN == 64 - VSET(16, e64, m2); + VSET(16, e64, m8); // -0.5270370833343294, -0.3892108170289901, 0.3278104985181656, // 0.8978904717616114, 0.2838388271052681, 0.1890152734369528, // -0.5587120809764872, 0.0329118609761476, 0.2661042157694802, // 0.4284631655495406, 0.3525175873513684, -0.7218762878034530, // -0.1902187411555145, 0.0621279131630217, // -0.3175600204168794, 0.2653267716685161 - VLOAD_64(v4, 0xbfe0dd7cdf9667ce, 0xbfd8e8d47c98e498, 0x3fd4fad8e29af14c, + VLOAD_64(v16, 0xbfe0dd7cdf9667ce, 0xbfd8e8d47c98e498, 0x3fd4fad8e29af14c, 0x3fecbb84cc736570, 0x3fd22a6a53f022d0, 0x3fc831a708ed9848, 0xbfe1e0f82875925c, 0x3fa0d9d2cd160b00, 0x3fd107d9fa03b074, 0x3fdb6bf0c4e4dbb8, 0x3fd68fa5ed3c17c4, 0xbfe7199c4cfbf578, @@ -252,14 +252,14 @@ void TEST_CASE3(void) { double dscalar_64; // -0.3447987329466446 BOX_DOUBLE_IN_DOUBLE(dscalar_64, 0xbfd6112eb46d5120); - asm volatile("vfsub.vf v2, v4, %[A]" ::[A] "f"(dscalar_64)); + asm volatile("vfsub.vf v8, v16, %[A]" ::[A] "f"(dscalar_64)); // -0.1822383503876848, -0.0444120840823454, // 0.6726092314648102, 1.2426892047082561, 0.6286375600519127, // 0.5338140063835974, -0.2139133480298425, 0.3777105939227923, // 0.6109029487161248, 0.7732618984961852, 0.6973163202980130, // -0.3770775548568084, 0.1545799917911301, 0.4069266461096663, // 0.0272387125297653, 0.6101255046151608 - VCMP_U64(9, v2, 0xbfc75396157efcf8, 0xbfa6bd2e415c9bc0, 0x3fe58603cb842136, + VCMP_U64(9, v8, 0xbfc75396157efcf8, 0xbfa6bd2e415c9bc0, 0x3fe58603cb842136, 0x3ff3e20e13550700, 0x3fe41dcc842eb9f8, 0x3fe115011c720ea2, 0xbfcb618338fba730, 0x3fd82c690e101280, 0x3fe38c84573880ca, 0x3fe8be8fbca9166c, 0x3fe6506a50d4b472, 0xbfd82209e58a99d0, @@ -270,30 +270,30 @@ void TEST_CASE3(void) { // Simple random test with similar values (vector-scalar) (masked) void TEST_CASE4(void) { - VSET(16, e16, m2); + VSET(16, e16, m8); // 0.9727, 0.7676, 0.0876, -0.4526, -0.1158, 0.6221, 0.7612, // -0.7539, 0.3875, -0.2002, 0.2168, -0.1055, -0.4348, // 0.9795, 0.3650, 0.5171 - VLOAD_16(v4, 0x3bc8, 0x3a24, 0x2d9c, 0xb73e, 0xaf6a, 0x38fa, 0x3a17, 0xba08, + VLOAD_16(v16, 0x3bc8, 0x3a24, 0x2d9c, 0xb73e, 0xaf6a, 0x38fa, 0x3a17, 0xba08, 0x3633, 0xb268, 0x32f0, 0xaec0, 0xb6f5, 0x3bd6, 0x35d7, 0x3823); float fscalar_16; // -0.8667 BOX_HALF_IN_FLOAT(fscalar_16, 0xbaef); VLOAD_8(v0, 0xAA, 0xAA); - VCLEAR(v2); - asm volatile("vfsub.vf v2, v4, %[A], v0.t" ::[A] "f"(fscalar_16)); + VCLEAR(v8); + asm volatile("vfsub.vf v8, v16, %[A], v0.t" ::[A] "f"(fscalar_16)); // 0.0000, 1.6348, 0.0000, 0.4141, 0.0000, 1.4883, 0.0000, // 0.1128, 0.0000, 0.6665, 0.0000, 0.7612, 0.0000, 1.8457, // 0.0000, 1.3838 - VCMP_U16(10, v2, 0x0, 0x3e8a, 0x0, 0x36a0, 0x0, 0x3df4, 0x0, 0x2f38, 0x0, + VCMP_U16(10, v8, 0x0, 0x3e8a, 0x0, 0x36a0, 0x0, 0x3df4, 0x0, 0x2f38, 0x0, 0x3955, 0x0, 0x3a17, 0x0, 0x3f62, 0x0, 0x3d89); - VSET(16, e32, m2); + VSET(16, e32, m8); // 0.85933530, -0.31821987, 0.18340160, -0.58902484, // -0.83326858, -0.98716992, -0.74268776, -0.50486410, // 0.91496444, -0.46108878, -0.75265163, -0.17853038, // 0.09909800, -0.22828153, 0.31248060, 0.70940411 - VLOAD_32(v4, 0x3f5bfd66, 0xbea2edb7, 0x3e3bcda1, 0xbf16ca55, 0xbf555117, + VLOAD_32(v16, 0x3f5bfd66, 0xbea2edb7, 0x3e3bcda1, 0xbf16ca55, 0xbf555117, 0xbf7cb72b, 0xbf3e20c9, 0xbf013ec6, 0x3f6a3b1c, 0xbeec13d4, 0xbf40adc7, 0xbe36d0ab, 0x3dcaf3e5, 0xbe69c2a2, 0x3e9ffd75, 0x3f359b82); @@ -301,25 +301,25 @@ void TEST_CASE4(void) { // -0.16449618 BOX_FLOAT_IN_FLOAT(fscalar_32, 0xbe2871b0); VLOAD_8(v0, 0xAA, 0xAA); - VCLEAR(v2); - asm volatile("vfsub.vf v2, v4, %[A], v0.t" ::[A] "f"(fscalar_32)); + VCLEAR(v8); + asm volatile("vfsub.vf v8, v16, %[A], v0.t" ::[A] "f"(fscalar_32)); // 0.00000000, -0.15372369, 0.00000000, -0.42452866, // 0.00000000, -0.82267374, 0.00000000, -0.34036791, // 0.00000000, -0.29659259, 0.00000000, -0.01403420, // 0.00000000, -0.06378534, 0.00000000, 0.87390029 - VCMP_U32(11, v2, 0x0, 0xbe1d69be, 0x0, 0xbed95bd2, 0x0, 0xbf529abf, 0x0, + VCMP_U32(11, v8, 0x0, 0xbe1d69be, 0x0, 0xbed95bd2, 0x0, 0xbf529abf, 0x0, 0xbeae44b4, 0x0, 0xbe97dafc, 0x0, 0xbc65efb0, 0x0, 0xbd82a1e4, 0x0, 0x3f5fb7ee); #if ELEN == 64 - VSET(16, e64, m2); + VSET(16, e64, m8); // -0.5270370833343294, -0.3892108170289901, // 0.3278104985181656, 0.8978904717616114, 0.2838388271052681, // 0.1890152734369528, -0.5587120809764872, 0.0329118609761476, // 0.2661042157694802, 0.4284631655495406, 0.3525175873513684, // -0.7218762878034530, -0.1902187411555145, // 0.0621279131630217, -0.3175600204168794, 0.2653267716685161 - VLOAD_64(v4, 0xbfe0dd7cdf9667ce, 0xbfd8e8d47c98e498, 0x3fd4fad8e29af14c, + VLOAD_64(v16, 0xbfe0dd7cdf9667ce, 0xbfd8e8d47c98e498, 0x3fd4fad8e29af14c, 0x3fecbb84cc736570, 0x3fd22a6a53f022d0, 0x3fc831a708ed9848, 0xbfe1e0f82875925c, 0x3fa0d9d2cd160b00, 0x3fd107d9fa03b074, 0x3fdb6bf0c4e4dbb8, 0x3fd68fa5ed3c17c4, 0xbfe7199c4cfbf578, @@ -329,15 +329,15 @@ void TEST_CASE4(void) { // -0.3447987329466446 BOX_DOUBLE_IN_DOUBLE(dscalar_64, 0xbfd6112eb46d5120); VLOAD_8(v0, 0xAA, 0xAA); - VCLEAR(v2); - asm volatile("vfsub.vf v2, v4, %[A], v0.t" ::[A] "f"(dscalar_64)); + VCLEAR(v8); + asm volatile("vfsub.vf v8, v16, %[A], v0.t" ::[A] "f"(dscalar_64)); // 0.0000000000000000, -0.0444120840823454, 0.0000000000000000, // 1.2426892047082561, 0.0000000000000000, 0.5338140063835974, // 0.0000000000000000, 0.3777105939227923, 0.0000000000000000, // 0.7732618984961852, 0.0000000000000000, // -0.3770775548568084, 0.0000000000000000, // 0.4069266461096663, 0.0000000000000000, 0.6101255046151608 - VCMP_U64(12, v2, 0x0, 0xbfa6bd2e415c9bc0, 0x0, 0x3ff3e20e13550700, 0x0, + VCMP_U64(12, v8, 0x0, 0xbfa6bd2e415c9bc0, 0x0, 0x3ff3e20e13550700, 0x0, 0x3fe115011c720ea2, 0x0, 0x3fd82c690e101280, 0x0, 0x3fe8be8fbca9166c, 0x0, 0xbfd82209e58a99d0, 0x0, 0x3fda0b160f3a5dec, 0x0, 0x3fe38625ec18e234); diff --git a/sw/riscvTests/isa/rv64uv/vfwadd.c b/sw/riscvTests/isa/rv64uv/vfwadd.c index e99c42cd..71e37f6b 100644 --- a/sw/riscvTests/isa/rv64uv/vfwadd.c +++ b/sw/riscvTests/isa/rv64uv/vfwadd.c @@ -11,7 +11,7 @@ // Simple random test with similar values void TEST_CASE1(void) { - VSET(16, e16, m2); + VSET(16, e16, m4); // 10.5312, 79.6250, 12.2891, 89.5000, 62.5938, 53.5625, // -37.3438, -48.3750, 49.3438, 1.1475, -79.6250, 52.0000, // -61.0312, 5.1641, 48.7500, -25.6250 @@ -21,20 +21,20 @@ void TEST_CASE1(void) { // -7.0742, 20.2656, 72.3125, 88.5625, 36.0000, 96.6250, 70.4375, // -87.6875, 87.6875, 8.7891, -18.8906, -40.1562, 88.8750, // -55.5312, 2.1875 - VLOAD_16(v6, 0x5443, 0xc713, 0x4d11, 0x5485, 0x5589, 0x5080, 0x560a, 0x5467, + VLOAD_16(v8, 0x5443, 0xc713, 0x4d11, 0x5485, 0x5589, 0x5080, 0x560a, 0x5467, 0xd57b, 0x557b, 0x4865, 0xccb9, 0xd105, 0x558e, 0xd2f1, 0x4060); - asm volatile("vfwadd.vv v8, v4, v6"); + asm volatile("vfwadd.vv v16, v4, v8"); // 78.71875000, 72.55078125, 32.55468750, 161.81250000, // 151.15625000, 89.56250000, 59.28125000, 22.06250000, // -38.34375000, 88.83496094, -70.83593750, 33.10937500, // -101.18750000, 94.03906250, -6.78125000, -23.43750000 - VCMP_U32(1, v8, 0x429d7000, 0x42911a00, 0x42023800, 0x4321d000, 0x43172800, + VCMP_U32(1, v16, 0x429d7000, 0x42911a00, 0x42023800, 0x4321d000, 0x43172800, 0x42b32000, 0x426d2000, 0x41b08000, 0xc2196000, 0x42b1ab80, 0xc28dac00, 0x42047000, 0xc2ca6000, 0x42bc1400, 0xc0d90000, 0xc1bb8000); #if ELEN == 64 - VSET(16, e32, m2); + VSET(16, e32, m4); // -35386.17187500, -52670.69531250, 69391.31250000, // 3219.84130859, 74596.35156250, -45488.69921875, // 6598.72949219, 20221.24609375, 75105.62500000, @@ -51,11 +51,11 @@ void TEST_CASE1(void) { // 38042.19140625, -61343.24218750, 76844.01562500, // 26642.50390625, 91893.05468750, 88349.72656250, // 29134.96093750 - VLOAD_32(v6, 0xc7b19c9c, 0xc78d3489, 0x46936664, 0xc40100fa, 0xc64fd475, + VLOAD_32(v8, 0xc7b19c9c, 0xc78d3489, 0x46936664, 0xc40100fa, 0xc64fd475, 0x47a65ab5, 0xc6fa9eb6, 0xc6b9b538, 0x47197f6f, 0x47149a31, 0xc76f9f3e, 0x47961602, 0x46d02502, 0x47b37a87, 0x47ac8edd, 0x46e39dec); - asm volatile("vfwadd.vv v8, v4, v6"); + asm volatile("vfwadd.vv v16, v4, v8"); // -126323.3906250000000000, -124967.7656250000000000, // 88258.5078125000000000, 2703.8260498046875000, // 61295.2373046875000000, 39684.7148437500000000, @@ -64,7 +64,7 @@ void TEST_CASE1(void) { // -28499.0507812500000000, 3373.8281250000000000, // -1735.1699218750000000, 153891.1640625000000000, // 113041.2695312500000000, -738.4941406250000000 - VCMP_U64(2, v8, 0xc0fed73640000000, 0xc0fe827c40000000, 0x40f58c2820000000, + VCMP_U64(2, v16, 0xc0fed73640000000, 0xc0fe827c40000000, 0x40f58c2820000000, 0x40a51fa6f0000000, 0x40edede798000000, 0x40e36096e0000000, 0xc0d8e22810000000, 0xc0abbaba00000000, 0x40fbee10f0000000, 0x40f9bb5030000000, 0xc0dbd4c340000000, 0x40aa5ba800000000, @@ -76,7 +76,7 @@ void TEST_CASE1(void) { // Simple random test with similar values (masked) // The numbers are the same of TEST_CASE1 void TEST_CASE2(void) { - VSET(16, e16, m2); + VSET(16, e16, m4); // 10.5312, 79.6250, 12.2891, 89.5000, 62.5938, 53.5625, // -37.3438, -48.3750, 49.3438, 1.1475, -79.6250, 52.0000, // -61.0312, 5.1641, 48.7500, -25.6250 @@ -86,21 +86,21 @@ void TEST_CASE2(void) { // -7.0742, 20.2656, 72.3125, 88.5625, 36.0000, 96.6250, 70.4375, // -87.6875, 87.6875, 8.7891, -18.8906, -40.1562, 88.8750, // -55.5312, 2.1875 - VLOAD_16(v6, 0x5443, 0xc713, 0x4d11, 0x5485, 0x5589, 0x5080, 0x560a, 0x5467, + VLOAD_16(v8, 0x5443, 0xc713, 0x4d11, 0x5485, 0x5589, 0x5080, 0x560a, 0x5467, 0xd57b, 0x557b, 0x4865, 0xccb9, 0xd105, 0x558e, 0xd2f1, 0x4060); VLOAD_8(v0, 0xAA, 0xAA); - VCLEAR(v8); - asm volatile("vfwadd.vv v8, v4, v6, v0.t"); + VCLEAR(v16); + asm volatile("vfwadd.vv v16, v4, v8, v0.t"); // 0.00000000, 72.55078125, 0.00000000, 161.81250000, // 0.00000000, 89.56250000, 0.00000000, 22.06250000, // 0.00000000, 88.83496094, 0.00000000, 33.10937500, // 0.00000000, 94.03906250, 0.00000000, -23.43750000 - VCMP_U32(3, v8, 0x0, 0x42911a00, 0x0, 0x4321d000, 0x0, 0x42b32000, 0x0, + VCMP_U32(3, v16, 0x0, 0x42911a00, 0x0, 0x4321d000, 0x0, 0x42b32000, 0x0, 0x41b08000, 0x0, 0x42b1ab80, 0x0, 0x42047000, 0x0, 0x42bc1400, 0x0, 0xc1bb8000); #if ELEN == 64 - VSET(16, e32, m2); + VSET(16, e32, m4); // -35386.17187500, -52670.69531250, 69391.31250000, // 3219.84130859, 74596.35156250, -45488.69921875, // 6598.72949219, 20221.24609375, 75105.62500000, @@ -117,13 +117,13 @@ void TEST_CASE2(void) { // 38042.19140625, -61343.24218750, 76844.01562500, // 26642.50390625, 91893.05468750, 88349.72656250, // 29134.96093750 - VLOAD_32(v6, 0xc7b19c9c, 0xc78d3489, 0x46936664, 0xc40100fa, 0xc64fd475, + VLOAD_32(v8, 0xc7b19c9c, 0xc78d3489, 0x46936664, 0xc40100fa, 0xc64fd475, 0x47a65ab5, 0xc6fa9eb6, 0xc6b9b538, 0x47197f6f, 0x47149a31, 0xc76f9f3e, 0x47961602, 0x46d02502, 0x47b37a87, 0x47ac8edd, 0x46e39dec); VLOAD_8(v0, 0xAA, 0xAA); - VCLEAR(v8); - asm volatile("vfwadd.vv v8, v4, v6, v0.t"); + VCLEAR(v16); + asm volatile("vfwadd.vv v16, v4, v8, v0.t"); // 0.0000000000000000, -124967.7656250000000000, // 0.0000000000000000, 2703.8260498046875000, // 0.0000000000000000, 39684.7148437500000000, @@ -132,7 +132,7 @@ void TEST_CASE2(void) { // 0.0000000000000000, 3373.8281250000000000, // 0.0000000000000000, 153891.1640625000000000, // 0.0000000000000000, -738.4941406250000000 - VCMP_U64(4, v8, 0x0, 0xc0fe827c40000000, 0x0, 0x40a51fa6f0000000, 0x0, + VCMP_U64(4, v16, 0x0, 0xc0fe827c40000000, 0x0, 0x40a51fa6f0000000, 0x0, 0x40e36096e0000000, 0x0, 0xc0abbaba00000000, 0x0, 0x40f9bb5030000000, 0x0, 0x40aa5ba800000000, 0x0, 0x4102c91950000000, 0x0, 0xc08713f400000000); @@ -141,7 +141,7 @@ void TEST_CASE2(void) { // Simple random test with similar values (vector-scalar) void TEST_CASE3(void) { - VSET(16, e16, m2); + VSET(16, e16, m4); float fscalar_16; // 12.5859 BOX_HALF_IN_FLOAT(fscalar_16, 0x4a4b); @@ -151,19 +151,19 @@ void TEST_CASE3(void) { // -83.6875 VLOAD_16(v4, 0x4038, 0x5367, 0x5597, 0x54b4, 0xc413, 0x540c, 0xd52f, 0xd54f, 0xd202, 0xc61a, 0x4f7e, 0x494f, 0x5293, 0xc840, 0x5468, 0xd53b); - asm volatile("vfwadd.vf v8, v4, %[A]" ::[A] "f"(fscalar_16)); + asm volatile("vfwadd.vf v16, v4, %[A]" ::[A] "f"(fscalar_16)); // 14.69531250, 71.80468750, // 102.02343750, 87.83593750, 8.51171875, 77.33593750, // -70.35156250, -72.35156250, // -35.47656250, 6.48437500, 42.55468750, 23.20312500, 65.17968750, // 4.08593750, 83.08593750, -71.10156250 - VCMP_U32(5, v8, 0x416b2000, 0x428f9c00, 0x42cc0c00, 0x42afac00, 0x41083000, + VCMP_U32(5, v16, 0x416b2000, 0x428f9c00, 0x42cc0c00, 0x42afac00, 0x41083000, 0x429aac00, 0xc28cb400, 0xc290b400, 0xc20de800, 0x40cf8000, 0x422a3800, 0x41b9a000, 0x42825c00, 0x4082c000, 0x42a62c00, 0xc28e3400); #if ELEN == 64 - VSET(16, e32, m2); + VSET(16, e32, m4); float fscalar_32; // -497871.25000000 BOX_FLOAT_IN_FLOAT(fscalar_32, 0xc8f319e8); @@ -177,7 +177,7 @@ void TEST_CASE3(void) { 0x488da31d, 0x4901961c, 0x485d9f5c, 0x488b6ed5, 0xc970fed5, 0xc8cec087, 0xc8095e14, 0xc94e3244, 0x490643c8, 0x4894ddfc, 0xc92009e7); - asm volatile("vfwadd.vf v8, v4, %[A]" ::[A] "f"(fscalar_32)); + asm volatile("vfwadd.vf v16, v4, %[A]" ::[A] "f"(fscalar_32)); // -1066378.2500000000000000, -39875.2812500000000000, // -88318.0625000000000000, -1295738.1250000000000000, // -234350.7500000000000000, -207798.3437500000000000, @@ -186,7 +186,7 @@ void TEST_CASE3(void) { // -921299.4687500000000000, -638535.5625000000000000, // -1342451.5000000000000000, 52077.2500000000000000, // -192991.3750000000000000, -1153389.6875000000000000 - VCMP_U64(6, v8, 0xc130458a40000000, 0xc0e3786900000000, 0xc0f58fe100000000, + VCMP_U64(6, v16, 0xc130458a40000000, 0xc0e3786900000000, 0xc0f58fe100000000, 0xc133c57a20000000, 0xc10c9b7600000000, 0xc1095db2c0000000, 0x40e0125000000000, 0xc110894740000000, 0xc109eac4c0000000, 0xc136a8bc90000000, 0xc12c1da6f0000000, 0xc1237c8f20000000, @@ -197,7 +197,7 @@ void TEST_CASE3(void) { // Simple random test with similar values (vector-scalar) (masked) void TEST_CASE4(void) { - VSET(16, e16, m2); + VSET(16, e16, m4); float fscalar_16; // 12.5859 BOX_HALF_IN_FLOAT(fscalar_16, 0x4a4b); @@ -208,18 +208,18 @@ void TEST_CASE4(void) { VLOAD_16(v4, 0x4038, 0x5367, 0x5597, 0x54b4, 0xc413, 0x540c, 0xd52f, 0xd54f, 0xd202, 0xc61a, 0x4f7e, 0x494f, 0x5293, 0xc840, 0x5468, 0xd53b); VLOAD_8(v0, 0xAA, 0xAA); - VCLEAR(v8); - asm volatile("vfwadd.vf v8, v4, %[A], v0.t" ::[A] "f"(fscalar_16)); + VCLEAR(v16); + asm volatile("vfwadd.vf v16, v4, %[A], v0.t" ::[A] "f"(fscalar_16)); // 0.00000000, 71.80468750, 0.00000000, 87.83593750, // 0.00000000, 77.33593750, 0.00000000, -72.35156250, // 0.00000000, 6.48437500, 0.00000000, 23.20312500, // 0.00000000, 4.08593750, 0.00000000, -71.10156250 - VCMP_U32(7, v8, 0x0, 0x428f9c00, 0x0, 0x42afac00, 0x0, 0x429aac00, 0x0, + VCMP_U32(7, v16, 0x0, 0x428f9c00, 0x0, 0x42afac00, 0x0, 0x429aac00, 0x0, 0xc290b400, 0x0, 0x40cf8000, 0x0, 0x41b9a000, 0x0, 0x4082c000, 0x0, 0xc28e3400); #if ELEN == 64 - VSET(16, e32, m2); + VSET(16, e32, m4); float fscalar_32; // -497871.25000000 BOX_FLOAT_IN_FLOAT(fscalar_32, 0xc8f319e8); @@ -234,8 +234,8 @@ void TEST_CASE4(void) { 0xc8cec087, 0xc8095e14, 0xc94e3244, 0x490643c8, 0x4894ddfc, 0xc92009e7); VLOAD_8(v0, 0xAA, 0xAA); - VCLEAR(v8); - asm volatile("vfwadd.vf v8, v4, %[A], v0.t" ::[A] "f"(fscalar_32)); + VCLEAR(v16); + asm volatile("vfwadd.vf v16, v4, %[A], v0.t" ::[A] "f"(fscalar_32)); // 0.0000000000000000, -39875.2812500000000000, // 0.0000000000000000, -1295738.1250000000000000, // 0.0000000000000000, -207798.3437500000000000, @@ -244,7 +244,7 @@ void TEST_CASE4(void) { // 0.0000000000000000, -638535.5625000000000000, // 0.0000000000000000, 52077.2500000000000000, // 0.0000000000000000, -1153389.6875000000000000 - VCMP_U64(8, v8, 0x0, 0xc0e3786900000000, 0x0, 0xc133c57a20000000, 0x0, + VCMP_U64(8, v16, 0x0, 0xc0e3786900000000, 0x0, 0xc133c57a20000000, 0x0, 0xc1095db2c0000000, 0x0, 0xc110894740000000, 0x0, 0xc136a8bc90000000, 0x0, 0xc1237c8f20000000, 0x0, 0x40e96da800000000, 0x0, 0xc131996db0000000); @@ -253,7 +253,7 @@ void TEST_CASE4(void) { // Simple random test with similar values void TEST_CASE5(void) { - VSET(16, e16, m2); + VSET(16, e16, m4); // -4.22890615, 94.35176849, -2.66183305, 81.53971863, // -30.80995941, -61.45680237, 53.70308304, 26.43629074, // -50.49792862, 12.57134342, -18.77090454, -0.50017655, @@ -266,20 +266,20 @@ void TEST_CASE5(void) { // -22.7500, 51.3438, 8.8594, 23.0938, 32.6875, 71.7500, 7.6758, // -12.8047, 98.0625, -8.7734, -73.5625, // -59.0312, 44.9688, 63.8438, 30.1406 - VLOAD_16(v6, 0xd0c6, 0xcdb0, 0x526b, 0x486e, 0x4dc6, 0x5016, 0x547c, 0x47ad, + VLOAD_16(v8, 0xd0c6, 0xcdb0, 0x526b, 0x486e, 0x4dc6, 0x5016, 0x547c, 0x47ad, 0xca67, 0x5621, 0xc863, 0xd499, 0xd361, 0x519f, 0x53fb, 0x4f89); - asm volatile("vfwadd.wv v8, v4, v6"); + asm volatile("vfwadd.wv v16, v4, v8"); // -42.41640472, 71.60176849, 48.68191528, 90.39909363, // -7.71620941, -28.76930237, 125.45307922, 34.11207199, // -63.30261612, 110.63384247, -27.54434204, -74.06267548, // -92.74699402, 59.61531448, 153.41766357, 124.46500397 - VCMP_U32(9, v8, 0xc229aa66, 0x428f341b, 0x4242ba48, 0x42b4cc56, 0xc0f6eb30, + VCMP_U32(9, v16, 0xc229aa66, 0x428f341b, 0x4242ba48, 0x42b4cc56, 0xc0f6eb30, 0xc1e62788, 0x42fae7fa, 0x420872c3, 0xc27d35e1, 0x42dd4487, 0xc1dc5ad0, 0xc2942017, 0xc2b97e76, 0x426e7615, 0x43196aec, 0x42f8ee15); #if ELEN == 64 - VSET(16, e32, m2); + VSET(16, e32, m4); // -55997.9824854041071376, 64501.1750668793683872, // -29542.1742966430028901, -97235.1376669598394074, // -76290.1568635256844573, -53719.7602741207738291, @@ -300,11 +300,11 @@ void TEST_CASE5(void) { // -46430.30078125, 30247.85937500, -94111.64843750, // 89016.30468750, -52090.74609375, 72764.65625000, // -47109.86328125 - VLOAD_32(v6, 0xc78fa064, 0x47606ea4, 0xc6ff6be9, 0xc75b8d18, 0x464386d6, + VLOAD_32(v8, 0xc78fa064, 0x47606ea4, 0xc6ff6be9, 0xc75b8d18, 0x464386d6, 0x46a2f5a5, 0x463ff3c6, 0x4770b95a, 0xc639d572, 0xc7355e4d, 0x46ec4fb8, 0xc7b7cfd3, 0x47addc27, 0xc74b7abf, 0x478e1e54, 0xc73805dd); - asm volatile("vfwadd.wv v8, v4, v6"); + asm volatile("vfwadd.wv v16, v4, v8"); // -129534.7637354041071376, 121955.8156918793683872, // -62236.1293747680028901, -153440.2314169598394074, // -63776.4478791506844573, -32860.9380084957738291, @@ -313,7 +313,7 @@ void TEST_CASE5(void) { // -56940.3199660585087258, -74256.3294054225261789, // 106525.8939759960630909, -21905.3437088813807350, // 126968.0232140090665780, -104994.2581431879953016 - VCMP_U64(10, v8, 0xc0ff9fec38429d77, 0x40fdc63d0d12ed98, 0xc0ee638423d68db0, + VCMP_U64(10, v16, 0xc0ff9fec38429d77, 0x40fdc63d0d12ed98, 0xc0ee638423d68db0, 0xc102bb01d9f12292, 0xc0ef240e5506a818, 0xc0e00b9e042a6497, 0xc0ed07c03cf1f908, 0x4100cf868295a558, 0x40f2a13ce859ab28, 0xc0ebe25e8231134e, 0xc0ebcd8a3d2975a4, 0xc0f22105453e9ece, @@ -325,7 +325,7 @@ void TEST_CASE5(void) { // Simple random test with similar values (masked) // The numbers are the same of TEST_CASE1 void TEST_CASE6(void) { - VSET(16, e16, m2); + VSET(16, e16, m4); // -4.22890615, 94.35176849, -2.66183305, 81.53971863, // -30.80995941, -61.45680237, 53.70308304, 26.43629074, // -50.49792862, 12.57134342, -18.77090454, -0.50017655, @@ -338,21 +338,21 @@ void TEST_CASE6(void) { // -22.7500, 51.3438, 8.8594, 23.0938, 32.6875, 71.7500, 7.6758, // -12.8047, 98.0625, -8.7734, -73.5625, // -59.0312, 44.9688, 63.8438, 30.1406 - VLOAD_16(v6, 0xd0c6, 0xcdb0, 0x526b, 0x486e, 0x4dc6, 0x5016, 0x547c, 0x47ad, + VLOAD_16(v8, 0xd0c6, 0xcdb0, 0x526b, 0x486e, 0x4dc6, 0x5016, 0x547c, 0x47ad, 0xca67, 0x5621, 0xc863, 0xd499, 0xd361, 0x519f, 0x53fb, 0x4f89); VLOAD_8(v0, 0xAA, 0xAA); - VCLEAR(v8); - asm volatile("vfwadd.wv v8, v4, v6, v0.t"); + VCLEAR(v16); + asm volatile("vfwadd.wv v16, v4, v8, v0.t"); // 0.00000000, 71.60176849, 0.00000000, 90.39909363, // 0.00000000, -28.76930237, 0.00000000, 34.11207199, // 0.00000000, 110.63384247, 0.00000000, -74.06267548, // 0.00000000, 59.61531448, 0.00000000, 124.46500397 - VCMP_U32(11, v8, 0x0, 0x428f341b, 0x0, 0x42b4cc56, 0x0, 0xc1e62788, 0x0, + VCMP_U32(11, v16, 0x0, 0x428f341b, 0x0, 0x42b4cc56, 0x0, 0xc1e62788, 0x0, 0x420872c3, 0x0, 0x42dd4487, 0x0, 0xc2942017, 0x0, 0x426e7615, 0x0, 0x42f8ee15); #if ELEN == 64 - VSET(16, e32, m2); + VSET(16, e32, m4); // -55997.9824854041071376, 64501.1750668793683872, // -29542.1742966430028901, -97235.1376669598394074, // -76290.1568635256844573, -53719.7602741207738291, @@ -373,13 +373,13 @@ void TEST_CASE6(void) { // -46430.30078125, 30247.85937500, -94111.64843750, // 89016.30468750, -52090.74609375, 72764.65625000, // -47109.86328125 - VLOAD_32(v6, 0xc78fa064, 0x47606ea4, 0xc6ff6be9, 0xc75b8d18, 0x464386d6, + VLOAD_32(v8, 0xc78fa064, 0x47606ea4, 0xc6ff6be9, 0xc75b8d18, 0x464386d6, 0x46a2f5a5, 0x463ff3c6, 0x4770b95a, 0xc639d572, 0xc7355e4d, 0x46ec4fb8, 0xc7b7cfd3, 0x47addc27, 0xc74b7abf, 0x478e1e54, 0xc73805dd); VLOAD_8(v0, 0xAA, 0xAA); - VCLEAR(v8); - asm volatile("vfwadd.wv v8, v4, v6, v0.t"); + VCLEAR(v16); + asm volatile("vfwadd.wv v16, v4, v8, v0.t"); // 0.0000000000000000, 121955.8156918793683872, // 0.0000000000000000, -153440.2314169598394074, // 0.0000000000000000, -32860.9380084957738291, @@ -388,7 +388,7 @@ void TEST_CASE6(void) { // 0.0000000000000000, -74256.3294054225261789, // 0.0000000000000000, -21905.3437088813807350, // 0.0000000000000000, -104994.2581431879953016 - VCMP_U64(12, v8, 0x0, 0x40fdc63d0d12ed98, 0x0, 0xc102bb01d9f12292, 0x0, + VCMP_U64(12, v16, 0x0, 0x40fdc63d0d12ed98, 0x0, 0xc102bb01d9f12292, 0x0, 0xc0e00b9e042a6497, 0x0, 0x4100cf868295a558, 0x0, 0xc0ebe25e8231134e, 0x0, 0xc0f22105453e9ece, 0x0, 0xc0d56455ff538938, 0x0, 0xc0f9a224215ac062); @@ -397,7 +397,7 @@ void TEST_CASE6(void) { // Simple random test with similar values (vector-scalar) void TEST_CASE7(void) { - VSET(16, e16, m2); + VSET(16, e16, m4); float fscalar_16; // -16.64103889, 69.17821503, 38.24327850, // -60.26666641, 97.95110321, -47.38455200, 94.12043762, @@ -410,19 +410,19 @@ void TEST_CASE7(void) { 0x425e44d8); // 53.8750 BOX_HALF_IN_FLOAT(fscalar_16, 0x52bc); - asm volatile("vfwadd.wf v8, v4, %[A]" ::[A] "f"(fscalar_16)); + asm volatile("vfwadd.wf v16, v4, %[A]" ::[A] "f"(fscalar_16)); // 37.23396301, 123.05321503, 92.11827850, -6.39166641, // 151.82611084, 6.49044800, 147.99543762, // -36.52123260, 36.85481644, 81.97482300, -32.04139709, // 127.47602081, -44.73761749, -27.88364410, 93.95490265, // 109.44223022 - VCMP_U32(13, v8, 0x4214ef94, 0x42f61b3f, 0x42b83c8f, 0xc0cc8888, 0x4317d37c, + VCMP_U32(13, v16, 0x4214ef94, 0x42f61b3f, 0x42b83c8f, 0xc0cc8888, 0x4317d37c, 0x40cfb1c0, 0x4313fed5, 0xc21215be, 0x42136b55, 0x42a3f31c, 0xc2002a64, 0x42fef3b9, 0xc232f352, 0xc1df11b4, 0x42bbe8e9, 0x42dae26c); #if ELEN == 64 - VSET(16, e32, m2); + VSET(16, e32, m4); float fscalar_32; // 366783.2934919928666204, -648147.5638866436202079, // 24949.3815817765425891, -211759.8585660880198702, @@ -440,7 +440,7 @@ void TEST_CASE7(void) { 0x411dfc51be066c64); // 572932.37500000 BOX_FLOAT_IN_FLOAT(fscalar_32, 0x490be046); - asm volatile("vfwadd.wf v8, v4, %[A]" ::[A] "f"(fscalar_32)); + asm volatile("vfwadd.wf v16, v4, %[A]" ::[A] "f"(fscalar_32)); // 939715.6684919928666204, -75215.1888866436202079, // 597881.7565817765425891, 361172.5164339119801298, // 910672.7464956413023174, 44373.0132952487329021, @@ -449,7 +449,7 @@ void TEST_CASE7(void) { // 565911.6885785305639729, 1412211.0259590207133442, // 129607.2289743639994413, 670221.7175237806513906, // 792941.4536798361223191, 1064216.8105713783297688 - VCMP_U64(14, v8, 0x412cad875644951e, 0xc0f25cf305ae0050, 0x41223ef3835eafc6, + VCMP_U64(14, v16, 0x412cad875644951e, 0xc0f25cf305ae0050, 0x41223ef3835eafc6, 0x41160b5210d40d2a, 0x412bcaa17e34ad3c, 0x40e5aaa06cea2850, 0xc111c31f94f4acd4, 0x40faa1bc6e3d0400, 0x41375f8fae943e83, 0xc111242cc019fb2c, 0x4121452f608d5d7b, 0x41358c7306a54019, @@ -460,7 +460,7 @@ void TEST_CASE7(void) { // Simple random test with similar values (vector-scalar) (masked) void TEST_CASE8(void) { - VSET(16, e16, m2); + VSET(16, e16, m4); float fscalar_16; // -16.64103889, 69.17821503, 38.24327850, // -60.26666641, 97.95110321, -47.38455200, 94.12043762, @@ -474,18 +474,18 @@ void TEST_CASE8(void) { // 53.8750 BOX_HALF_IN_FLOAT(fscalar_16, 0x52bc); VLOAD_8(v0, 0xAA, 0xAA); - VCLEAR(v8); - asm volatile("vfwadd.wf v8, v4, %[A], v0.t" ::[A] "f"(fscalar_16)); + VCLEAR(v16); + asm volatile("vfwadd.wf v16, v4, %[A], v0.t" ::[A] "f"(fscalar_16)); // 0.00000000, 123.05321503, 0.00000000, -6.39166641, // 0.00000000, 6.49044800, 0.00000000, -36.52123260, // 0.00000000, 81.97482300, 0.00000000, 127.47602081, // 0.00000000, -27.88364410, 0.00000000, 109.44223022 - VCMP_U32(15, v8, 0x0, 0x42f61b3f, 0x0, 0xc0cc8888, 0x0, 0x40cfb1c0, 0x0, + VCMP_U32(15, v16, 0x0, 0x42f61b3f, 0x0, 0xc0cc8888, 0x0, 0x40cfb1c0, 0x0, 0xc21215be, 0x0, 0x42a3f31c, 0x0, 0x42fef3b9, 0x0, 0xc1df11b4, 0x0, 0x42dae26c); #if ELEN == 64 - VSET(16, e32, m2); + VSET(16, e32, m4); float fscalar_32; // 366783.2934919928666204, -648147.5638866436202079, // 24949.3815817765425891, -211759.8585660880198702, @@ -504,8 +504,8 @@ void TEST_CASE8(void) { // 572932.37500000 BOX_FLOAT_IN_FLOAT(fscalar_32, 0x490be046); VLOAD_8(v0, 0xAA, 0xAA); - VCLEAR(v8); - asm volatile("vfwadd.wf v8, v4, %[A], v0.t" ::[A] "f"(fscalar_32)); + VCLEAR(v16); + asm volatile("vfwadd.wf v16, v4, %[A], v0.t" ::[A] "f"(fscalar_32)); // 0.0000000000000000, -75215.1888866436202079, // 0.0000000000000000, 361172.5164339119801298, // 0.0000000000000000, 44373.0132952487329021, @@ -514,7 +514,7 @@ void TEST_CASE8(void) { // 0.0000000000000000, 1412211.0259590207133442, // 0.0000000000000000, 670221.7175237806513906, // 0.0000000000000000, 1064216.8105713783297688 - VCMP_U64(16, v8, 0x0, 0xc0f25cf305ae0050, 0x0, 0x41160b5210d40d2a, 0x0, + VCMP_U64(16, v16, 0x0, 0xc0f25cf305ae0050, 0x0, 0x41160b5210d40d2a, 0x0, 0x40e5aaa06cea2850, 0x0, 0x40faa1bc6e3d0400, 0x0, 0xc111242cc019fb2c, 0x0, 0x41358c7306a54019, 0x0, 0x4124741b6f5f46e8, 0x0, 0x41303d18cf819b19); diff --git a/sw/riscvTests/isa/rv64uv/vfwmacc.c b/sw/riscvTests/isa/rv64uv/vfwmacc.c index 83e87093..3306c7da 100644 --- a/sw/riscvTests/isa/rv64uv/vfwmacc.c +++ b/sw/riscvTests/isa/rv64uv/vfwmacc.c @@ -11,7 +11,7 @@ // Simple random test with similar values void TEST_CASE1(void) { - VSET(16, e16, m2); + VSET(16, e16, m4); // 52.7812, 74.1875, 0.3564, 97.5000, 7.8477, 53.2188, // -26.4688, -48.1250, -32.9688, 7.8750, // -38.4375, 51.5625, 46.2188, -75.1875, 23.5625, -77.8125 @@ -20,29 +20,29 @@ void TEST_CASE1(void) { // 49.6875, 28.5312, 13.4766, -90.5625, 58.0000, // -63.8125, 49.0625, 0.3325, 30.8906, 11.2266, -93.9375, // -54.6875, 61.7500, 38.3438, 95.8125, 10.0938 - VLOAD_16(v6, 0x5236, 0x4f22, 0x4abd, 0xd5a9, 0x5340, 0xd3fa, 0x5222, 0x3552, + VLOAD_16(v8, 0x5236, 0x4f22, 0x4abd, 0xd5a9, 0x5340, 0xd3fa, 0x5222, 0x3552, 0x4fb9, 0x499d, 0xd5df, 0xd2d6, 0x53b8, 0x50cb, 0x55fd, 0x490c); // -83.87223053, -48.34465408, 70.48658752, -1.26614821, // -24.13150024, -65.13838196, // 0.84671319, 34.34510040, 72.80049896, // -86.23424530, 25.52654839, -68.44364929, 9.81109142, // -85.20966339, -81.00300598, 16.25512505 - VLOAD_32(v8, 0xc2a7be95, 0xc24160ed, 0x428cf922, 0xbfa21125, 0xc1c10d50, + VLOAD_32(v16, 0xc2a7be95, 0xc24160ed, 0x428cf922, 0xbfa21125, 0xc1c10d50, 0xc28246da, 0x3f58c232, 0x42096162, 0x429199db, 0xc2ac77ef, 0x41cc365f, 0xc288e326, 0x411cfa3b, 0xc2aa6b59, 0xc2a2018a, 0x41820a7f); - asm volatile("vfwmacc.vv v8, v4, v6"); + asm volatile("vfwmacc.vv v16, v4, v8"); // 2538.69604492, 2068.31738281, 75.29024506, -8831.11035156, // 431.03256226, -3461.15991211, -1297.77636719, 18.34259796, // -945.62481689, 2.17493439, 3636.24926758, -2888.26782227, // 2863.81884766, -2968.18041992, 2176.57910156, -769.16479492 - VCMP_U32(1, v8, 0x451eab23, 0x45014514, 0x4296949b, 0xc609fc71, 0x43d7842b, + VCMP_U32(1, v16, 0x451eab23, 0x45014514, 0x4296949b, 0xc609fc71, 0x43d7842b, 0xc558528f, 0xc4a238d8, 0x4192bda4, 0xc46c67fd, 0x400b3220, 0x456343fd, 0xc5348449, 0x4532fd1a, 0xc53982e3, 0x45080944, 0xc4404a8c); #if ELEN == 64 - VSET(16, e32, m2); + VSET(16, e32, m4); // -3306.98510742, -33314.88281250, 64578.31250000, // 11648.08203125, -92704.16406250, 33998.11328125, // 23406.90429688, 44169.36718750, -1206.53601074, @@ -59,7 +59,7 @@ void TEST_CASE1(void) { // -47420.98437500, -40235.07421875, 44342.39453125, // 90261.61718750, 76035.55468750, -92912.59375000, // 40474.20703125 - VLOAD_32(v6, 0xc74ca10e, 0xc6f48a33, 0x44e8d318, 0x47a8a448, 0x46156046, + VLOAD_32(v8, 0xc74ca10e, 0xc6f48a33, 0x44e8d318, 0x47a8a448, 0x46156046, 0xc7b72376, 0xc7864a70, 0x4726833b, 0xc7af0272, 0xc7393cfc, 0xc71d2b13, 0x472d3665, 0x47b04acf, 0x479481c7, 0xc7b5784c, 0x471e1a35); @@ -71,13 +71,13 @@ void TEST_CASE1(void) { // 53604.5772563865466509, -30101.3490022116457112, // 80638.7360704737366177, -75019.8948306038219016, // 63887.5576457676361315, 1225.3713199536578031 - VLOAD_64(v8, 0xc0c39e04b6396548, 0x40c97877ac90a6f8, 0x40f5b541d179217e, + VLOAD_64(v16, 0xc0c39e04b6396548, 0x40c97877ac90a6f8, 0x40f5b541d179217e, 0x40f35406fd087c82, 0x40d51ac614b2f890, 0xc0d9d5eb37ccffac, 0x40b75bacee1f5340, 0xc0f859574a437b9d, 0x40f4c951e845a8f0, 0xc0f571e03f956903, 0x40ea2c9278e262b4, 0xc0dd6556560d5f50, 0x40f3afebc6f1d544, 0xc0f250be5139e52e, 0x40ef31f1d83befd8, 0x4093257c3b4c4540); - asm volatile("vfwmacc.vv v8, v4, v6"); + asm volatile("vfwmacc.vv v16, v4, v8"); // 173226551.6662319302558899, 1042805506.3236714601516724, // 120372266.5559626817703247, 1005827715.3891682624816895, // -886236534.5412018299102783, -3187924887.6156492233276367, @@ -86,7 +86,7 @@ void TEST_CASE1(void) { // 3608622049.7550821304321289, 2122430044.9128692150115967, // -6517279175.0161266326904297, 3100051665.0599727630615234, // -8910693010.2487506866455078, 3887273396.3922243118286133 - VCMP_U64(2, v8, 0x41a4a6746f551c5a, 0x41cf13f981296e11, 0x419cb2f0aa394e48, + VCMP_U64(2, v16, 0x41a4a6746f551c5a, 0x41cf13f981296e11, 0x419cb2f0aa394e48, 0x41cdf9db41b1d044, 0xc1ca6972bb45461a, 0xc1e7c07bf2f3b366, 0xc1d7fb4bc74ce878, 0x41dc0dffdb538172, 0x4199cbccf19668ea, 0xc1a9d55ecb9731ad, 0x41eae2e67c3829a2, 0x41dfa06d973a6c73, @@ -98,7 +98,7 @@ void TEST_CASE1(void) { // Simple random test with similar values (masked) // The numbers are the same of TEST_CASE1 void TEST_CASE2(void) { - VSET(16, e16, m2); + VSET(16, e16, m4); // 52.7812, 74.1875, 0.3564, 97.5000, 7.8477, 53.2188, // -26.4688, -48.1250, -32.9688, 7.8750, // -38.4375, 51.5625, 46.2188, -75.1875, 23.5625, -77.8125 @@ -107,7 +107,7 @@ void TEST_CASE2(void) { // 49.6875, 28.5312, 13.4766, -90.5625, 58.0000, // -63.8125, 49.0625, 0.3325, 30.8906, 11.2266, -93.9375, // -54.6875, 61.7500, 38.3438, 95.8125, 10.0938 - VLOAD_16(v6, 0x5236, 0x4f22, 0x4abd, 0xd5a9, 0x5340, 0xd3fa, 0x5222, 0x3552, + VLOAD_16(v8, 0x5236, 0x4f22, 0x4abd, 0xd5a9, 0x5340, 0xd3fa, 0x5222, 0x3552, 0x4fb9, 0x499d, 0xd5df, 0xd2d6, 0x53b8, 0x50cb, 0x55fd, 0x490c); VLOAD_8(v0, 0xAA, 0xAA); // -83.87223053, -48.34465408, 70.48658752, -1.26614821, @@ -115,23 +115,23 @@ void TEST_CASE2(void) { // 0.84671319, 34.34510040, 72.80049896, // -86.23424530, 25.52654839, -68.44364929, 9.81109142, // -85.20966339, -81.00300598, 16.25512505 - VLOAD_32(v8, 0xc2a7be95, 0xc24160ed, 0x428cf922, 0xbfa21125, 0xc1c10d50, + VLOAD_32(v16, 0xc2a7be95, 0xc24160ed, 0x428cf922, 0xbfa21125, 0xc1c10d50, 0xc28246da, 0x3f58c232, 0x42096162, 0x429199db, 0xc2ac77ef, 0x41cc365f, 0xc288e326, 0x411cfa3b, 0xc2aa6b59, 0xc2a2018a, 0x41820a7f); - asm volatile("vfwmacc.vv v8, v4, v6, v0.t"); + asm volatile("vfwmacc.vv v16, v4, v8, v0.t"); // -83.87223053, 2068.31738281, 70.48658752, -8831.11035156, // -24.13150024, -3461.15991211, // 0.84671319, 18.34259796, 72.80049896, 2.17493439, 25.52654839, // -2888.26782227, 9.81109142, -2968.18041992, -81.00300598, // -769.16479492 - VCMP_U32(3, v8, 0xc2a7be95, 0x45014514, 0x428cf922, 0xc609fc71, 0xc1c10d50, + VCMP_U32(3, v16, 0xc2a7be95, 0x45014514, 0x428cf922, 0xc609fc71, 0xc1c10d50, 0xc558528f, 0x3f58c232, 0x4192bda4, 0x429199db, 0x400b3220, 0x41cc365f, 0xc5348449, 0x411cfa3b, 0xc53982e3, 0xc2a2018a, 0xc4404a8c); #if ELEN == 64 - VSET(16, e32, m2); + VSET(16, e32, m4); // -3306.98510742, -33314.88281250, 64578.31250000, // 11648.08203125, -92704.16406250, 33998.11328125, // 23406.90429688, 44169.36718750, -1206.53601074, @@ -148,7 +148,7 @@ void TEST_CASE2(void) { // -47420.98437500, -40235.07421875, 44342.39453125, // 90261.61718750, 76035.55468750, -92912.59375000, // 40474.20703125 - VLOAD_32(v6, 0xc74ca10e, 0xc6f48a33, 0x44e8d318, 0x47a8a448, 0x46156046, + VLOAD_32(v8, 0xc74ca10e, 0xc6f48a33, 0x44e8d318, 0x47a8a448, 0x46156046, 0xc7b72376, 0xc7864a70, 0x4726833b, 0xc7af0272, 0xc7393cfc, 0xc71d2b13, 0x472d3665, 0x47b04acf, 0x479481c7, 0xc7b5784c, 0x471e1a35); @@ -161,13 +161,13 @@ void TEST_CASE2(void) { // 53604.5772563865466509, -30101.3490022116457112, // 80638.7360704737366177, -75019.8948306038219016, // 63887.5576457676361315, 1225.3713199536578031 - VLOAD_64(v8, 0xc0c39e04b6396548, 0x40c97877ac90a6f8, 0x40f5b541d179217e, + VLOAD_64(v16, 0xc0c39e04b6396548, 0x40c97877ac90a6f8, 0x40f5b541d179217e, 0x40f35406fd087c82, 0x40d51ac614b2f890, 0xc0d9d5eb37ccffac, 0x40b75bacee1f5340, 0xc0f859574a437b9d, 0x40f4c951e845a8f0, 0xc0f571e03f956903, 0x40ea2c9278e262b4, 0xc0dd6556560d5f50, 0x40f3afebc6f1d544, 0xc0f250be5139e52e, 0x40ef31f1d83befd8, 0x4093257c3b4c4540); - asm volatile("vfwmacc.vv v8, v4, v6, v0.t"); + asm volatile("vfwmacc.vv v16, v4, v8, v0.t"); // -10044.0368110413110116, 1042805506.3236714601516724, // 88916.1136409099854063, 1005827715.3891682624816895, // 21611.0950133731239475, -3187924887.6156492233276367, @@ -176,7 +176,7 @@ void TEST_CASE2(void) { // 53604.5772563865466509, 2122430044.9128692150115967, // 80638.7360704737366177, 3100051665.0599727630615234, // 63887.5576457676361315, 3887273396.3922243118286133 - VCMP_U64(4, v8, 0xc0c39e04b6396548, 0x41cf13f981296e11, 0x40f5b541d179217e, + VCMP_U64(4, v16, 0xc0c39e04b6396548, 0x41cf13f981296e11, 0x40f5b541d179217e, 0x41cdf9db41b1d044, 0x40d51ac614b2f890, 0xc1e7c07bf2f3b366, 0x40b75bacee1f5340, 0x41dc0dffdb538172, 0x40f4c951e845a8f0, 0xc1a9d55ecb9731ad, 0x40ea2c9278e262b4, 0x41dfa06d973a6c73, @@ -187,7 +187,7 @@ void TEST_CASE2(void) { // Simple random test with similar values (vector-scalar) void TEST_CASE3(void) { - VSET(16, e16, m2); + VSET(16, e16, m4); float fscalar_16; // -15.3750, 11.9375, // -31.7656, 27.0625, 3.0684, 71.2500, 63.2500, -95.6875, @@ -202,23 +202,23 @@ void TEST_CASE3(void) { // -92.09814453, 92.33961487, 42.48206329, 99.15431976, // -5.94871950, -55.92549133, 59.99367523, -45.05080032, // -68.93397522, 55.13935089, -80.23659515 - VLOAD_32(v8, 0xc28c7596, 0x4288b9ff, 0xc28b8355, 0xc2b90586, 0xc256459e, + VLOAD_32(v16, 0xc28c7596, 0x4288b9ff, 0xc28b8355, 0xc2b90586, 0xc256459e, 0xc2b83240, 0x42b8ade2, 0x4229eda2, 0x42c64f03, 0xc0be5be9, 0xc25fb3b4, 0x426ff986, 0xc2343405, 0xc289de32, 0x425c8eb2, 0xc2a07923); - asm volatile("vfwmacc.vf v8, %[A], v4" ::[A] "f"(fscalar_16)); + asm volatile("vfwmacc.vf v16, %[A], v4" ::[A] "f"(fscalar_16)); // -566.55389404, 453.72070312, -1095.19055176, // 781.10052490, 45.48249054, 2207.94091797, 2134.12866211, // -3046.42993164, -1904.30078125, -901.24902344, 1736.69262695, // -1947.49658203, -1406.91601562, -3145.74072266, 931.27264404, // -626.49584961 - VCMP_U32(5, v8, 0xc40da373, 0x43e2dc40, 0xc488e619, 0x4443466f, 0x4235ee12, + VCMP_U32(5, v16, 0xc40da373, 0x43e2dc40, 0xc488e619, 0x4443466f, 0x4235ee12, 0x4509ff0e, 0x4505620f, 0xc53e66e1, 0xc4ee09a0, 0xc4614ff0, 0x44d9162a, 0xc4f36fe4, 0xc4afdd50, 0xc5449bda, 0x4468d173, 0xc41c9fbc); #if ELEN == 64 - VSET(16, e32, m2); + VSET(16, e32, m4); float fscalar_32; // -260866.17187500, -221967.43750000, -907157.25000000, // 754760.87500000, -585546.12500000, 260611.84375000, @@ -240,13 +240,13 @@ void TEST_CASE3(void) { // -602138.5575914122164249, 253718.7884360067546368, // 7255.4825419568223879, 957493.0229552322998643, // -446793.8022573012858629, -757660.7323241395642981 - VLOAD_64(v8, 0xc1264913d6b92745, 0xc12b09866c1c7afb, 0xc12692f023f9fc22, + VLOAD_64(v16, 0xc1264913d6b92745, 0xc12b09866c1c7afb, 0xc12692f023f9fc22, 0x412e4029e4afcdba, 0xc120956863b251fa, 0x412a37fec6d2858e, 0x412a5066f3e1f4bc, 0x41143879910d5c64, 0x412c95f62ac3038c, 0xc128d534d9539c30, 0xc12260351d7c9f20, 0x410ef8b64eb78980, 0x40bc577b87dea380, 0x412d386a0bc0c9c8, 0xc11b45273582f020, 0xc1271f3976f3308b); - asm volatile("vfwmacc.vf v8, %[A], v4" ::[A] "f"(fscalar_32)); + asm volatile("vfwmacc.vf v16, %[A], v4" ::[A] "f"(fscalar_32)); // 42998415581.0217819213867188, 36586495789.9245910644531250, // 149527988394.2461547851562500, // -124407906605.8560791015625000, 96516314402.8619232177734375, @@ -256,7 +256,7 @@ void TEST_CASE3(void) { // -87875091201.5038757324218750, 131558913220.3411712646484375, // -10993283369.2012958526611328, -40577380186.7228927612304688, // -120034697981.6674957275390625, 123503190798.8887634277343750 - VCMP_U64(6, v8, 0x422405cf81ba0b27, 0x422109733e5bd964, 0x4241684804551f82, + VCMP_U64(6, v16, 0x422405cf81ba0b27, 0x422109733e5bd964, 0x4241684804551f82, 0xc23cf74a012ddb28, 0x423678d21522dca7, 0xc22400cc3b5cc8ed, 0x423d7dec86478af1, 0x42120c7671f50d4d, 0x423206ce01931202, 0xc23dbf2b74cb7eaa, 0xc23475c37b0180fe, 0x423ea185b4c45757, @@ -267,7 +267,7 @@ void TEST_CASE3(void) { // Simple random test with similar values (vector-scalar) (masked) void TEST_CASE4(void) { - VSET(16, e16, m2); + VSET(16, e16, m4); float fscalar_16; // -15.3750, 11.9375, // -31.7656, 27.0625, 3.0684, 71.2500, 63.2500, -95.6875, @@ -283,23 +283,23 @@ void TEST_CASE4(void) { // -92.09814453, 92.33961487, 42.48206329, 99.15431976, // -5.94871950, -55.92549133, 59.99367523, -45.05080032, // -68.93397522, 55.13935089, -80.23659515 - VLOAD_32(v8, 0xc28c7596, 0x4288b9ff, 0xc28b8355, 0xc2b90586, 0xc256459e, + VLOAD_32(v16, 0xc28c7596, 0x4288b9ff, 0xc28b8355, 0xc2b90586, 0xc256459e, 0xc2b83240, 0x42b8ade2, 0x4229eda2, 0x42c64f03, 0xc0be5be9, 0xc25fb3b4, 0x426ff986, 0xc2343405, 0xc289de32, 0x425c8eb2, 0xc2a07923); - asm volatile("vfwmacc.vf v8, %[A], v4, v0.t" ::[A] "f"(fscalar_16)); + asm volatile("vfwmacc.vf v16, %[A], v4, v0.t" ::[A] "f"(fscalar_16)); // -70.22966003, 453.72070312, -69.75650787, 781.10052490, // -53.56798553, 2207.94091797, 92.33961487, // -3046.42993164, 99.15431976, -901.24902344, -55.92549133, // -1947.49658203, -45.05080032, -3145.74072266, 55.13935089, // -626.49584961 - VCMP_U32(7, v8, 0xc28c7596, 0x43e2dc40, 0xc28b8355, 0x4443466f, 0xc256459e, + VCMP_U32(7, v16, 0xc28c7596, 0x43e2dc40, 0xc28b8355, 0x4443466f, 0xc256459e, 0x4509ff0e, 0x42b8ade2, 0xc53e66e1, 0x42c64f03, 0xc4614ff0, 0xc25fb3b4, 0xc4f36fe4, 0xc2343405, 0xc5449bda, 0x425c8eb2, 0xc41c9fbc); #if ELEN == 64 - VSET(16, e32, m2); + VSET(16, e32, m4); float fscalar_32; // -260866.17187500, -221967.43750000, -907157.25000000, // 754760.87500000, -585546.12500000, 260611.84375000, @@ -322,13 +322,13 @@ void TEST_CASE4(void) { // -602138.5575914122164249, 253718.7884360067546368, // 7255.4825419568223879, 957493.0229552322998643, // -446793.8022573012858629, -757660.7323241395642981 - VLOAD_64(v8, 0xc1264913d6b92745, 0xc12b09866c1c7afb, 0xc12692f023f9fc22, + VLOAD_64(v16, 0xc1264913d6b92745, 0xc12b09866c1c7afb, 0xc12692f023f9fc22, 0x412e4029e4afcdba, 0xc120956863b251fa, 0x412a37fec6d2858e, 0x412a5066f3e1f4bc, 0x41143879910d5c64, 0x412c95f62ac3038c, 0xc128d534d9539c30, 0xc12260351d7c9f20, 0x410ef8b64eb78980, 0x40bc577b87dea380, 0x412d386a0bc0c9c8, 0xc11b45273582f020, 0xc1271f3976f3308b); - asm volatile("vfwmacc.vf v8, %[A], v4, v0.t" ::[A] "f"(fscalar_32)); + asm volatile("vfwmacc.vf v16, %[A], v4, v0.t" ::[A] "f"(fscalar_32)); // -730249.9193813583115116, 36586495789.9245910644531250, // -739704.0702666083816439, -124407906605.8560791015625000, // -543412.1947198503185064, -42956365230.3924331665039062, @@ -337,7 +337,7 @@ void TEST_CASE4(void) { // -602138.5575914122164249, 131558913220.3411712646484375, // 7255.4825419568223879, -40577380186.7228927612304688, // -446793.8022573012858629, 123503190798.8887634277343750 - VCMP_U64(8, v8, 0xc1264913d6b92745, 0x422109733e5bd964, 0xc12692f023f9fc22, + VCMP_U64(8, v16, 0xc1264913d6b92745, 0x422109733e5bd964, 0xc12692f023f9fc22, 0xc23cf74a012ddb28, 0xc120956863b251fa, 0xc22400cc3b5cc8ed, 0x412a5066f3e1f4bc, 0x42120c7671f50d4d, 0x412c95f62ac3038c, 0xc23dbf2b74cb7eaa, 0xc12260351d7c9f20, 0x423ea185b4c45757, diff --git a/sw/riscvTests/isa/rv64uv/vfwmsac.c b/sw/riscvTests/isa/rv64uv/vfwmsac.c index dcbd793c..492273d0 100644 --- a/sw/riscvTests/isa/rv64uv/vfwmsac.c +++ b/sw/riscvTests/isa/rv64uv/vfwmsac.c @@ -11,7 +11,7 @@ // Simple random test with similar values void TEST_CASE1(void) { - VSET(16, e16, m2); + VSET(16, e16, m4); // -93.0000, -55.1250, -68.5625, 76.3125, -61.2188, 48.9375, // -56.3125, 71.0000, -74.5625, -38.7188, // -50.3438, 93.3750, 80.2500, -7.4141, 93.8125, 83.1875 @@ -20,29 +20,29 @@ void TEST_CASE1(void) { // -60.0312, -31.7188, -74.2500, -0.9077, 30.4844, -56.2500, // -4.8320, 34.2812, 66.6875, 37.9375, 78.1875, 5.6094, // -81.8125, 67.6250, 29.4531, -64.9375 - VLOAD_16(v6, 0xd381, 0xcfee, 0xd4a4, 0xbb43, 0x4f9f, 0xd308, 0xc4d5, 0x5049, + VLOAD_16(v8, 0xd381, 0xcfee, 0xd4a4, 0xbb43, 0x4f9f, 0xd308, 0xc4d5, 0x5049, 0x542b, 0x50be, 0x54e3, 0x459c, 0xd51d, 0x543a, 0x4f5d, 0xd40f); // 31.29529381, -66.12346649, // -48.59321213, 21.66906929, 92.08473206, 1.95985305, // -96.55027771, 77.65225220, -82.48660278, // -35.32508850, 42.91923141, // -76.65069580, 25.13817024, 72.89311981, 21.44047737, 69.71634674 - VLOAD_32(v8, 0x41fa5cc3, 0xc2843f37, 0xc2425f73, 0x41ad5a41, 0x42b82b62, + VLOAD_32(v16, 0x41fa5cc3, 0xc2843f37, 0xc2425f73, 0x41ad5a41, 0x42b82b62, 0x3ffadc77, 0xc2c119be, 0x429b4df4, 0xc2a4f924, 0xc20d4ce4, 0x422bad4b, 0xc2994d28, 0x41c91af9, 0x4291c947, 0x41ab8619, 0x428b6ec5); - asm volatile("vfwmsac.vv v8, v4, v6"); + asm volatile("vfwmsac.vv v16, v4, v8"); // 5551.61083984, 1814.61950684, 5139.35888672, -90.93905640, // -1958.30004883, -2754.69433594, 368.65405273, 2356.31640625, // -4889.89990234, -1433.56750488, -3979.17114258, 600.42608643, // -6590.59130859, -574.26910400, 2741.63085938, -5471.70458984 - VCMP_U32(1, v8, 0x45ad7ce3, 0x44e2d3d3, 0x45a09adf, 0xc2b5e0cc, 0xc4f4c99a, + VCMP_U32(1, v16, 0x45ad7ce3, 0x44e2d3d3, 0x45a09adf, 0xc2b5e0cc, 0xc4f4c99a, 0xc52c2b1c, 0x43b853b8, 0x45134510, 0xc598cf33, 0xc4b33229, 0xc578b2bd, 0x44161b45, 0xc5cdf4bb, 0xc40f9139, 0x452b5a18, 0xc5aafda3); #if ELEN == 64 - VSET(16, e32, m2); + VSET(16, e32, m4); // -71423.96093750, -46625.21875000, -59851.39453125, // -43461.99218750, -10255.72753906, 37671.59765625, // 96842.05468750, 33293.05859375, 27126.79296875, @@ -59,7 +59,7 @@ void TEST_CASE1(void) { // -84006.54687500, 31506.48437500, 2731.77905273, // -20272.41992188, 53550.01953125, -85441.62500000, // -33418.07031250 - VLOAD_32(v6, 0x47b6ec94, 0xc6dda234, 0xc724b9ea, 0xc6a703e9, 0x467ed187, + VLOAD_32(v8, 0x47b6ec94, 0xc6dda234, 0xc724b9ea, 0xc6a703e9, 0x467ed187, 0x477c05d8, 0xc642d6de, 0xc7966144, 0xc70397d6, 0xc7a41346, 0x46f624f8, 0x452abc77, 0xc69e60d7, 0x47512e05, 0xc7a6e0d0, 0xc7028a12); @@ -71,13 +71,13 @@ void TEST_CASE1(void) { // 58906.1301468654128257, -84146.7844421620393405, // -23969.5482366856886074, 92255.7186088700836990, // -35519.3091108352309675, -65623.9480113173485734 - VLOAD_64(v8, 0x40cd6718fdfdcea0, 0xc0e3104c8d763c4c, 0xc0f46625a9c52662, + VLOAD_64(v16, 0x40cd6718fdfdcea0, 0xc0e3104c8d763c4c, 0xc0f46625a9c52662, 0x40e1797d8df4a4ac, 0xc0e0f368e8fd81b0, 0x40e1851120d4d47c, 0xc0e99a3f0b1a1b69, 0xc0e3efea13a0e433, 0xc0abe35c94436520, 0x40f2cda2435b507a, 0x40ecc3442a29c254, 0xc0f48b2c8d133979, 0xc0d76863164f52e0, 0x40f685fb7f6c03ba, 0xc0e157e9e43c6805, 0xc0f0057f2b0dea44); - asm volatile("vfwmsac.vv v8, v4, v6"); + asm volatile("vfwmsac.vv v16, v4, v8"); // -6689380123.7125854492187500, 1322754608.4847974777221680, // 2524011718.2575426101684570, 929092728.6666160821914673, // -167219605.4339296817779541, 2430454370.8635458946228027, @@ -86,7 +86,7 @@ void TEST_CASE1(void) { // 844792333.7375594377517700, 78362512.5631930530071259, // 115575080.3336923867464066, 3779577314.6337165832519531, // 507279209.1484052538871765, -1452948809.7560596466064453 - VCMP_U64(2, v8, 0xc1f8eb7d71bb66c0, 0x41d3b5e88c1f06ec, 0x41e2ce2b98c83dca, + VCMP_U64(2, v16, 0xc1f8eb7d71bb66c0, 0x41d3b5e88c1f06ec, 0x41e2ce2b98c83dca, 0x41cbb06a3c5553ad, 0xc1a3ef232ade2c08, 0x41e21bb94c5ba22b, 0xc1d1fe66d480ac0c, 0xc1e31930221fab5f, 0xc1cb3c0a8aae33ef, 0x41e11d15572a5b49, 0x41c92d4106de6859, 0x4192aede4240b5ae, @@ -98,7 +98,7 @@ void TEST_CASE1(void) { // Simple random test with similar values (masked) // The numbers are the same of TEST_CASE1 void TEST_CASE2(void) { - VSET(16, e16, m2); + VSET(16, e16, m4); // -93.0000, -55.1250, -68.5625, 76.3125, -61.2188, 48.9375, // -56.3125, 71.0000, -74.5625, -38.7188, // -50.3438, 93.3750, 80.2500, -7.4141, 93.8125, 83.1875 @@ -107,7 +107,7 @@ void TEST_CASE2(void) { // -60.0312, -31.7188, -74.2500, -0.9077, 30.4844, -56.2500, // -4.8320, 34.2812, 66.6875, 37.9375, 78.1875, 5.6094, // -81.8125, 67.6250, 29.4531, -64.9375 - VLOAD_16(v6, 0xd381, 0xcfee, 0xd4a4, 0xbb43, 0x4f9f, 0xd308, 0xc4d5, 0x5049, + VLOAD_16(v8, 0xd381, 0xcfee, 0xd4a4, 0xbb43, 0x4f9f, 0xd308, 0xc4d5, 0x5049, 0x542b, 0x50be, 0x54e3, 0x459c, 0xd51d, 0x543a, 0x4f5d, 0xd40f); VLOAD_8(v0, 0xAA, 0xAA); // 31.29529381, -66.12346649, @@ -115,23 +115,23 @@ void TEST_CASE2(void) { // -96.55027771, 77.65225220, -82.48660278, // -35.32508850, 42.91923141, // -76.65069580, 25.13817024, 72.89311981, 21.44047737, 69.71634674 - VLOAD_32(v8, 0x41fa5cc3, 0xc2843f37, 0xc2425f73, 0x41ad5a41, 0x42b82b62, + VLOAD_32(v16, 0x41fa5cc3, 0xc2843f37, 0xc2425f73, 0x41ad5a41, 0x42b82b62, 0x3ffadc77, 0xc2c119be, 0x429b4df4, 0xc2a4f924, 0xc20d4ce4, 0x422bad4b, 0xc2994d28, 0x41c91af9, 0x4291c947, 0x41ab8619, 0x428b6ec5); - asm volatile("vfwmsac.vv v8, v4, v6, v0.t"); + asm volatile("vfwmsac.vv v16, v4, v8, v0.t"); // 31.29529381, 1814.61950684, -48.59321213, // -90.93905640, 92.08473206, -2754.69433594, -96.55027771, // 2356.31640625, -82.48660278, -1433.56750488, 42.91923141, // 600.42608643, 25.13817024, -574.26910400, 21.44047737, // -5471.70458984 - VCMP_U32(3, v8, 0x41fa5cc3, 0x44e2d3d3, 0xc2425f73, 0xc2b5e0cc, 0x42b82b62, + VCMP_U32(3, v16, 0x41fa5cc3, 0x44e2d3d3, 0xc2425f73, 0xc2b5e0cc, 0x42b82b62, 0xc52c2b1c, 0xc2c119be, 0x45134510, 0xc2a4f924, 0xc4b33229, 0x422bad4b, 0x44161b45, 0x41c91af9, 0xc40f9139, 0x41ab8619, 0xc5aafda3); #if ELEN == 64 - VSET(16, e32, m2); + VSET(16, e32, m4); // -71423.96093750, -46625.21875000, -59851.39453125, // -43461.99218750, -10255.72753906, 37671.59765625, // 96842.05468750, 33293.05859375, 27126.79296875, @@ -148,7 +148,7 @@ void TEST_CASE2(void) { // -84006.54687500, 31506.48437500, 2731.77905273, // -20272.41992188, 53550.01953125, -85441.62500000, // -33418.07031250 - VLOAD_32(v6, 0x47b6ec94, 0xc6dda234, 0xc724b9ea, 0xc6a703e9, 0x467ed187, + VLOAD_32(v8, 0x47b6ec94, 0xc6dda234, 0xc724b9ea, 0xc6a703e9, 0x467ed187, 0x477c05d8, 0xc642d6de, 0xc7966144, 0xc70397d6, 0xc7a41346, 0x46f624f8, 0x452abc77, 0xc69e60d7, 0x47512e05, 0xc7a6e0d0, 0xc7028a12); @@ -161,13 +161,13 @@ void TEST_CASE2(void) { // 58906.1301468654128257, -84146.7844421620393405, // -23969.5482366856886074, 92255.7186088700836990, // -35519.3091108352309675, -65623.9480113173485734 - VLOAD_64(v8, 0x40cd6718fdfdcea0, 0xc0e3104c8d763c4c, 0xc0f46625a9c52662, + VLOAD_64(v16, 0x40cd6718fdfdcea0, 0xc0e3104c8d763c4c, 0xc0f46625a9c52662, 0x40e1797d8df4a4ac, 0xc0e0f368e8fd81b0, 0x40e1851120d4d47c, 0xc0e99a3f0b1a1b69, 0xc0e3efea13a0e433, 0xc0abe35c94436520, 0x40f2cda2435b507a, 0x40ecc3442a29c254, 0xc0f48b2c8d133979, 0xc0d76863164f52e0, 0x40f685fb7f6c03ba, 0xc0e157e9e43c6805, 0xc0f0057f2b0dea44); - asm volatile("vfwmsac.vv v8, v4, v6, v0.t"); + asm volatile("vfwmsac.vv v16, v4, v8, v0.t"); // 15054.1952512034331448, 1322754608.4847974777221680, // -83554.3539477824524511, 929092728.6666160821914673, // -34715.2784411938628182, 2430454370.8635458946228027, @@ -176,7 +176,7 @@ void TEST_CASE2(void) { // 58906.1301468654128257, 78362512.5631930530071259, // -23969.5482366856886074, 3779577314.6337165832519531, // -35519.3091108352309675, -1452948809.7560596466064453 - VCMP_U64(4, v8, 0x40cd6718fdfdcea0, 0x41d3b5e88c1f06ec, 0xc0f46625a9c52662, + VCMP_U64(4, v16, 0x40cd6718fdfdcea0, 0x41d3b5e88c1f06ec, 0xc0f46625a9c52662, 0x41cbb06a3c5553ad, 0xc0e0f368e8fd81b0, 0x41e21bb94c5ba22b, 0xc0e99a3f0b1a1b69, 0xc1e31930221fab5f, 0xc0abe35c94436520, 0x41e11d15572a5b49, 0x40ecc3442a29c254, 0x4192aede4240b5ae, @@ -187,7 +187,7 @@ void TEST_CASE2(void) { // Simple random test with similar values (vector-scalar) void TEST_CASE3(void) { - VSET(16, e16, m2); + VSET(16, e16, m4); float fscalar_16; // 8.1562, 2.6836, 56.7188, 38.4688, 33.8125, // -83.0625, 37.7812, -28.0938, -33.0625, 61.1562, 13.0859, @@ -201,23 +201,23 @@ void TEST_CASE3(void) { // -6.17113161, 55.22229004, 7.96844339, -92.84493256, // -90.90106201, 78.59468842, -58.67407608, 39.90958405, // -93.58789825 - VLOAD_32(v8, 0x4262adf8, 0x426f58b7, 0xc10361a2, 0xc199626a, 0xc2bab748, + VLOAD_32(v16, 0x4262adf8, 0x426f58b7, 0xc10361a2, 0xc199626a, 0xc2bab748, 0xc2090508, 0x423bfb61, 0xc0c579e9, 0x425ce3a0, 0x40fefd7d, 0xc2b9b09b, 0xc2b5cd58, 0x429d307b, 0xc26ab241, 0x421fa36a, 0xc2bb2d01); - asm volatile("vfwmsac.vf v8, %[A], v4" ::[A] "f"(fscalar_16)); + asm volatile("vfwmsac.vf v16, %[A], v4" ::[A] "f"(fscalar_16)); // 268.05081177, 47.00394058, 2266.32666016, 1550.71020508, // 1439.51806641, -3272.67089844, 1457.17053223, // -1112.31127930, -1371.52307129, 2426.81469727, 613.82879639, // -3114.00512695, 3039.22167969, -1456.68920898, // 1156.95373535, -3036.66992188 - VCMP_U32(5, v8, 0x43860681, 0x423c0409, 0x450da53a, 0x44c1d6ba, 0x44b3f094, + VCMP_U32(5, v16, 0x43860681, 0x423c0409, 0x450da53a, 0x44c1d6ba, 0x44b3f094, 0xc54c8abc, 0x44b62575, 0xc48b09f6, 0xc4ab70bd, 0x4517ad09, 0x4419750b, 0xc542a015, 0x453df38c, 0xc4b6160e, 0x44909e85, 0xc53dcab8); #if ELEN == 64 - VSET(16, e32, m2); + VSET(16, e32, m4); float fscalar_32; // 580253.06250000, -300331.93750000, 485801.21875000, // -751037.87500000, -360868.65625000, 893035.68750000, @@ -239,13 +239,13 @@ void TEST_CASE3(void) { // -520724.0386287728324533, -616193.5881990450434387, // 79952.4583538805600256, -869849.3916852036491036, // 535808.2751473840326071, -306070.6657954099355265 - VLOAD_64(v8, 0x4121ac782445c8ae, 0x411fd06534e7065c, 0x411902cc1eee8218, + VLOAD_64(v16, 0x4121ac782445c8ae, 0x411fd06534e7065c, 0x411902cc1eee8218, 0x411bc45dc1f5fbb4, 0xc1238e7e27bbe00c, 0x4110059bbe7ba1fc, 0xc1001eedcb11c418, 0xc11ff065b604bcf3, 0x41284fb7e8343a7c, 0x4104ded328699cd0, 0xc11fc850278e4d10, 0xc122ce032d286cdc, 0x40f38507556ae0f0, 0xc12a8bb2c88af688, 0x41205a008ce01e30, 0xc112ae5aa9c6459e); - asm volatile("vfwmsac.vf v8, %[A], v4" ::[A] "f"(fscalar_32)); + asm volatile("vfwmsac.vf v16, %[A], v4" ::[A] "f"(fscalar_32)); // 389346650932.4642944335937500, // -201521918580.8290100097656250, // 325970052259.3115844726562500, @@ -259,7 +259,7 @@ void TEST_CASE3(void) { // -622875321499.4388427734375000, // 201786282974.1514587402343750, // -432857718253.1149902343750000, 68797853286.6418762207031250 - VCMP_U64(6, v8, 0x4256a9b79acd1db7, 0xc24775d2393a6a1d, 0x4252f9551128d3f1, + VCMP_U64(6, v16, 0x4256a9b79acd1db7, 0xc24775d2393a6a1d, 0x4252f9551128d3f1, 0xc25d555807b2779f, 0xc24c305a5e770151, 0x4261708ea338766c, 0x425522df13d3d659, 0x42504fa86f178920, 0xc26239cb6e8c51e3, 0xc25edba5e2f2ddc3, 0x42536932b980cb59, 0x4251c60c36a04a85, @@ -270,7 +270,7 @@ void TEST_CASE3(void) { // Simple random test with similar values (vector-scalar) (masked) void TEST_CASE4(void) { - VSET(16, e16, m2); + VSET(16, e16, m4); float fscalar_16; // 8.1562, 2.6836, 56.7188, 38.4688, 33.8125, // -83.0625, 37.7812, -28.0938, -33.0625, 61.1562, 13.0859, @@ -285,23 +285,23 @@ void TEST_CASE4(void) { // -6.17113161, 55.22229004, 7.96844339, -92.84493256, // -90.90106201, 78.59468842, -58.67407608, 39.90958405, // -93.58789825 - VLOAD_32(v8, 0x4262adf8, 0x426f58b7, 0xc10361a2, 0xc199626a, 0xc2bab748, + VLOAD_32(v16, 0x4262adf8, 0x426f58b7, 0xc10361a2, 0xc199626a, 0xc2bab748, 0xc2090508, 0x423bfb61, 0xc0c579e9, 0x425ce3a0, 0x40fefd7d, 0xc2b9b09b, 0xc2b5cd58, 0x429d307b, 0xc26ab241, 0x421fa36a, 0xc2bb2d01); - asm volatile("vfwmsac.vf v8, %[A], v4, v0.t" ::[A] "f"(fscalar_16)); + asm volatile("vfwmsac.vf v16, %[A], v4, v0.t" ::[A] "f"(fscalar_16)); // 56.66989136, 47.00394058, -8.21133614, 1550.71020508, // -93.35797119, -3272.67089844, 46.99548721, // -1112.31127930, 55.22229004, 2426.81469727, -92.84493256, // -3114.00512695, 78.59468842, -1456.68920898, 39.90958405, // -3036.66992188 - VCMP_U32(7, v8, 0x4262adf8, 0x423c0409, 0xc10361a2, 0x44c1d6ba, 0xc2bab748, + VCMP_U32(7, v16, 0x4262adf8, 0x423c0409, 0xc10361a2, 0x44c1d6ba, 0xc2bab748, 0xc54c8abc, 0x423bfb61, 0xc48b09f6, 0x425ce3a0, 0x4517ad09, 0xc2b9b09b, 0xc542a015, 0x429d307b, 0xc4b6160e, 0x421fa36a, 0xc53dcab8); #if ELEN == 64 - VSET(16, e32, m2); + VSET(16, e32, m4); float fscalar_32; // 580253.06250000, -300331.93750000, 485801.21875000, // -751037.87500000, -360868.65625000, 893035.68750000, @@ -324,13 +324,13 @@ void TEST_CASE4(void) { // -520724.0386287728324533, -616193.5881990450434387, // 79952.4583538805600256, -869849.3916852036491036, // 535808.2751473840326071, -306070.6657954099355265 - VLOAD_64(v8, 0x4121ac782445c8ae, 0x411fd06534e7065c, 0x411902cc1eee8218, + VLOAD_64(v16, 0x4121ac782445c8ae, 0x411fd06534e7065c, 0x411902cc1eee8218, 0x411bc45dc1f5fbb4, 0xc1238e7e27bbe00c, 0x4110059bbe7ba1fc, 0xc1001eedcb11c418, 0xc11ff065b604bcf3, 0x41284fb7e8343a7c, 0x4104ded328699cd0, 0xc11fc850278e4d10, 0xc122ce032d286cdc, 0x40f38507556ae0f0, 0xc12a8bb2c88af688, 0x41205a008ce01e30, 0xc112ae5aa9c6459e); - asm volatile("vfwmsac.vf v8, %[A], v4, v0.t" ::[A] "f"(fscalar_32)); + asm volatile("vfwmsac.vf v16, %[A], v4, v0.t" ::[A] "f"(fscalar_32)); // 579132.0708449089433998, -201521918580.8290100097656250, // 409779.0302067114971578, -503943536329.8690795898437500, // -640831.0776052488945425, 599222720963.7006835937500000, @@ -339,7 +339,7 @@ void TEST_CASE4(void) { // -520724.0386287728324533, 305348532865.1643676757812500, // 79952.4583538805600256, 201786282974.1514587402343750, // 535808.2751473840326071, 68797853286.6418762207031250 - VCMP_U64(8, v8, 0x4121ac782445c8ae, 0xc24775d2393a6a1d, 0x411902cc1eee8218, + VCMP_U64(8, v16, 0x4121ac782445c8ae, 0xc24775d2393a6a1d, 0x411902cc1eee8218, 0xc25d555807b2779f, 0xc1238e7e27bbe00c, 0x4261708ea338766c, 0xc1001eedcb11c418, 0x42504fa86f178920, 0x41284fb7e8343a7c, 0xc25edba5e2f2ddc3, 0xc11fc850278e4d10, 0x4251c60c36a04a85, diff --git a/sw/riscvTests/isa/rv64uv/vfwmul.c b/sw/riscvTests/isa/rv64uv/vfwmul.c index 83bedd38..3ea2f5d9 100644 --- a/sw/riscvTests/isa/rv64uv/vfwmul.c +++ b/sw/riscvTests/isa/rv64uv/vfwmul.c @@ -11,7 +11,7 @@ // Simple random test with similar values void TEST_CASE1(void) { - VSET(16, e16, m2); + VSET(16, e16, m4); // -56.5312, 95.3750, 86.3750, -33.4375, 4.7656, 58.8438, // -80.4375, -96.4375, 74.3750, -92.7500, -57.2812, -90.0625, // -93.2500, 40.6875, -32.2812, -36.8125 @@ -21,20 +21,20 @@ void TEST_CASE1(void) { // -5.9180, 32.8750, 32.8750, -74.8125, // -10.3750, 39.5938, 43.2812, 15.0547, -31.9062, // -11.2500, 16.3594, 28.6094 - VLOAD_16(v6, 0x5607, 0xd62d, 0xd224, 0xd4ee, 0xc5eb, 0x501c, 0x501c, 0xd4ad, + VLOAD_16(v8, 0x5607, 0xd62d, 0xd224, 0xd4ee, 0xc5eb, 0x501c, 0x501c, 0xd4ad, 0xc930, 0x50f3, 0x5169, 0x4b87, 0xcffa, 0xc9a0, 0x4c17, 0x4f27); - asm volatile("vfwmul.vv v8, v4, v6"); + asm volatile("vfwmul.vv v16, v4, v8"); // -5451.73242188, -9424.24218750, -4243.17187500, 2637.38281250, // -28.20281982, 1934.48828125, -2644.38281250, 7214.73046875, // -771.64062500, -3672.32031250, -2479.20410156, -1355.86279297, // 2975.25781250, -457.73437500, -528.10107422, -1053.18261719 - VCMP_U32(1, v8, 0xc5aa5ddc, 0xc61340f8, 0xc5849960, 0x4524d620, 0xc1e19f60, + VCMP_U32(1, v16, 0xc5aa5ddc, 0xc61340f8, 0xc5849960, 0x4524d620, 0xc1e19f60, 0x44f1cfa0, 0xc5254620, 0x45e175d8, 0xc440e900, 0xc5658520, 0xc51af344, 0xc4a97b9c, 0x4539f420, 0xc3e4de00, 0xc4040678, 0xc483a5d8); #if ELEN == 64 - VSET(16, e32, m2); + VSET(16, e32, m4); // -89875.40625000, 87678.49218750, -37342.58593750, // -47507.81640625, -80717.72656250, 2230.02978516, // -68805.99218750, 79032.60156250, -43338.95703125, @@ -51,11 +51,11 @@ void TEST_CASE1(void) { // 85817.48437500, -23627.74023438, -79522.11718750, // 51590.63671875, 7574.55957031, -93117.57812500, // 28056.31054688 - VLOAD_32(v6, 0x47c29733, 0x4710d4bc, 0xc7812302, 0xc7c2faed, 0xc79a0fe0, + VLOAD_32(v8, 0x47c29733, 0x4710d4bc, 0xc7812302, 0xc7c2faed, 0xc79a0fe0, 0x4793b8ee, 0xc7b0e214, 0xc7a556be, 0x470746cf, 0x47a79cbe, 0xc6b8977b, 0xc79b510f, 0x474986a3, 0x45ecb47a, 0xc7b5deca, 0x46db309f); - asm volatile("vfwmul.vv v8, v4, v6"); + asm volatile("vfwmul.vv v16, v4, v8"); // -8954322534.4196777343750000, 3250832165.2364501953125000, // 2469017680.4935302734375000, 4742698259.8944396972656250, // 6366994091.8183593750000000, 168665759.1725692749023438, @@ -64,7 +64,7 @@ void TEST_CASE1(void) { // 152328975.7866010665893555, -2031330201.5839996337890625, // 306722104.6965751647949219, -359104602.5425643920898438, // 4042715658.8806152343750000, 2599960105.6150360107421875 - VCMP_U64(2, v8, 0xc200adc0f3335b80, 0x41e8387864a79100, 0x41e265470a0fcb00, + VCMP_U64(2, v16, 0xc200adc0f3335b80, 0x41e8387864a79100, 0x41e265470a0fcb00, 0x41f1aafd513e4fa0, 0x41f7b809eabd1800, 0x41a41b453e585b00, 0x41f736af4d367b00, 0xc1f8ec72ccd7c980, 0xc1d65d56a56686c0, 0x41eb03cc41e5e700, 0x41a228b61f92bd60, 0xc1de44e8e6656040, @@ -76,7 +76,7 @@ void TEST_CASE1(void) { // Simple random test with similar values (masked) // The numbers are the same of TEST_CASE1 void TEST_CASE2(void) { - VSET(16, e16, m2); + VSET(16, e16, m4); // -56.5312, 95.3750, 86.3750, -33.4375, 4.7656, 58.8438, // -80.4375, -96.4375, 74.3750, -92.7500, -57.2812, -90.0625, // -93.2500, 40.6875, -32.2812, -36.8125 @@ -86,21 +86,21 @@ void TEST_CASE2(void) { // -5.9180, 32.8750, 32.8750, -74.8125, // -10.3750, 39.5938, 43.2812, 15.0547, -31.9062, // -11.2500, 16.3594, 28.6094 - VLOAD_16(v6, 0x5607, 0xd62d, 0xd224, 0xd4ee, 0xc5eb, 0x501c, 0x501c, 0xd4ad, + VLOAD_16(v8, 0x5607, 0xd62d, 0xd224, 0xd4ee, 0xc5eb, 0x501c, 0x501c, 0xd4ad, 0xc930, 0x50f3, 0x5169, 0x4b87, 0xcffa, 0xc9a0, 0x4c17, 0x4f27); VLOAD_8(v0, 0xAA, 0xAA); - VCLEAR(v8); - asm volatile("vfwmul.vv v8, v4, v6, v0.t"); + VCLEAR(v16); + asm volatile("vfwmul.vv v16, v4, v8, v0.t"); // 0.00000000, -9424.24218750, 0.00000000, 2637.38281250, // 0.00000000, 1934.48828125, 0.00000000, 7214.73046875, // 0.00000000, -3672.32031250, 0.00000000, -1355.86279297, // 0.00000000, -457.73437500, 0.00000000, -1053.18261719 - VCMP_U32(3, v8, 0x0, 0xc61340f8, 0x0, 0x4524d620, 0x0, 0x44f1cfa0, 0x0, + VCMP_U32(3, v16, 0x0, 0xc61340f8, 0x0, 0x4524d620, 0x0, 0x44f1cfa0, 0x0, 0x45e175d8, 0x0, 0xc5658520, 0x0, 0xc4a97b9c, 0x0, 0xc3e4de00, 0x0, 0xc483a5d8); #if ELEN == 64 - VSET(16, e32, m2); + VSET(16, e32, m4); // -89875.40625000, 87678.49218750, -37342.58593750, // -47507.81640625, -80717.72656250, 2230.02978516, // -68805.99218750, 79032.60156250, -43338.95703125, @@ -117,13 +117,13 @@ void TEST_CASE2(void) { // 85817.48437500, -23627.74023438, -79522.11718750, // 51590.63671875, 7574.55957031, -93117.57812500, // 28056.31054688 - VLOAD_32(v6, 0x47c29733, 0x4710d4bc, 0xc7812302, 0xc7c2faed, 0xc79a0fe0, + VLOAD_32(v8, 0x47c29733, 0x4710d4bc, 0xc7812302, 0xc7c2faed, 0xc79a0fe0, 0x4793b8ee, 0xc7b0e214, 0xc7a556be, 0x470746cf, 0x47a79cbe, 0xc6b8977b, 0xc79b510f, 0x474986a3, 0x45ecb47a, 0xc7b5deca, 0x46db309f); VLOAD_8(v0, 0xAA, 0xAA); - VCLEAR(v8); - asm volatile("vfwmul.vv v8, v4, v6, v0.t"); + VCLEAR(v16); + asm volatile("vfwmul.vv v16, v4, v8, v0.t"); // 0.0000000000000000, 3250832165.2364501953125000, // 0.0000000000000000, 4742698259.8944396972656250, // 0.0000000000000000, 168665759.1725692749023438, @@ -132,7 +132,7 @@ void TEST_CASE2(void) { // 0.0000000000000000, -2031330201.5839996337890625, // 0.0000000000000000, -359104602.5425643920898438, // 0.0000000000000000, 2599960105.6150360107421875 - VCMP_U64(4, v8, 0x0, 0x41e8387864a79100, 0x0, 0x41f1aafd513e4fa0, 0x0, + VCMP_U64(4, v16, 0x0, 0x41e8387864a79100, 0x0, 0x41f1aafd513e4fa0, 0x0, 0x41a41b453e585b00, 0x0, 0xc1f8ec72ccd7c980, 0x0, 0x41eb03cc41e5e700, 0x0, 0xc1de44e8e6656040, 0x0, 0xc1b567805a8ae580, 0x0, 0x41e35f07c533ae60); @@ -141,7 +141,7 @@ void TEST_CASE2(void) { // Simple random test with similar values (vector-scalar) void TEST_CASE3(void) { - VSET(16, e16, m2); + VSET(16, e16, m4); float fscalar_16; // -44.4062, -27.0781, -21.6562, 75.5625, -84.5000, // -1.0713, 72.5625, -84.6250, 83.9375, -52.3438, @@ -150,19 +150,19 @@ void TEST_CASE3(void) { 0x553f, 0xd28b, 0xd112, 0x3e9c, 0x54fb, 0xd089, 0x5033, 0xd487); // -58.9688 BOX_HALF_IN_FLOAT(fscalar_16, 0xd35f); - asm volatile("vfwmul.vf v8, v4, %[A]" ::[A] "f"(fscalar_16)); + asm volatile("vfwmul.vf v16, v4, %[A]" ::[A] "f"(fscalar_16)); // 2618.58105469, 1596.76318359, 1277.04199219, // -4455.82617188, 4982.85937500, 63.17257690, -4278.91992188, // 4990.23046875, -4949.68945312, 3086.64550781, 2391.91992188, // -97.43664551, -4699.07226562, 2139.45996094, -1980.98144531, // 4271.54882812 - VCMP_U32(5, v8, 0x4523a94c, 0x44c7986c, 0x449fa158, 0xc58b3e9c, 0x459bb6e0, + VCMP_U32(5, v16, 0x4523a94c, 0x44c7986c, 0x449fa158, 0xc58b3e9c, 0x459bb6e0, 0x427cb0b8, 0xc585b75c, 0x459bf1d8, 0xc59aad84, 0x4540ea54, 0x45157eb8, 0xc2c2df90, 0xc592d894, 0x4505b75c, 0xc4f79f68, 0x45857c64); #if ELEN == 64 - VSET(16, e32, m2); + VSET(16, e32, m4); float fscalar_32; // -187018.20312500, -714032.18750000, -891429.25000000, // -378265.00000000, 211566.90625000, 231934.78125000, @@ -176,7 +176,7 @@ void TEST_CASE3(void) { 0x495f02eb); // -50557.21484375 BOX_FLOAT_IN_FLOAT(fscalar_32, 0xc7457d37); - asm volatile("vfwmul.vf v8, v4, %[A]" ::[A] "f"(fscalar_32)); + asm volatile("vfwmul.vf v16, v4, %[A]" ::[A] "f"(fscalar_32)); // 9455119475.0827026367187500, 36099478708.7902832031250000, // 45068180110.2529296875000000, 19124024872.8710937500000000, // -10696233533.1087646484375000, -11725976565.3944091796875000, @@ -185,7 +185,7 @@ void TEST_CASE3(void) { // 41326042501.6000976562500000, 26399370819.9219970703125000, // 33773309655.5700683593750000, -37618062611.2260742187500000, // 31363405215.2648925781250000, -46181724885.9680175781250000 - VCMP_U64(6, v8, 0x42019c8d6398a960, 0x4220cf64a96994a0, 0x4224fc8bb51c8180, + VCMP_U64(6, v16, 0x42019c8d6398a960, 0x4220cf64a96994a0, 0x4224fc8bb51c8180, 0x4211cf85d8a37c00, 0xc203ec5c91e8dec0, 0xc205d7619fab27c0, 0xc2264bc096681480, 0x4206c8b43b6a45c0, 0x42170e3d0f7f7d00, 0x42228d33006859a0, 0x42233e72bb0b3340, 0x42189619b90fb020, @@ -196,7 +196,7 @@ void TEST_CASE3(void) { // Simple random test with similar values (vector-scalar) (masked) void TEST_CASE4(void) { - VSET(16, e16, m2); + VSET(16, e16, m4); float fscalar_16; // -44.4062, -27.0781, -21.6562, 75.5625, -84.5000, // -1.0713, 72.5625, -84.6250, 83.9375, -52.3438, @@ -206,18 +206,18 @@ void TEST_CASE4(void) { // -58.9688 BOX_HALF_IN_FLOAT(fscalar_16, 0xd35f); VLOAD_8(v0, 0xAA, 0xAA); - VCLEAR(v8); - asm volatile("vfwmul.vf v8, v4, %[A], v0.t" ::[A] "f"(fscalar_16)); + VCLEAR(v16); + asm volatile("vfwmul.vf v16, v4, %[A], v0.t" ::[A] "f"(fscalar_16)); // 0.00000000, 1596.76318359, 0.00000000, -4455.82617188, // 0.00000000, 63.17257690, 0.00000000, 4990.23046875, // 0.00000000, 3086.64550781, 0.00000000, -97.43664551, // 0.00000000, 2139.45996094, 0.00000000, 4271.54882812 - VCMP_U32(7, v8, 0x0, 0x44c7986c, 0x0, 0xc58b3e9c, 0x0, 0x427cb0b8, 0x0, + VCMP_U32(7, v16, 0x0, 0x44c7986c, 0x0, 0xc58b3e9c, 0x0, 0x427cb0b8, 0x0, 0x459bf1d8, 0x0, 0x4540ea54, 0x0, 0xc2c2df90, 0x0, 0x4505b75c, 0x0, 0x45857c64); #if ELEN == 64 - VSET(16, e32, m2); + VSET(16, e32, m4); float fscalar_32; // -187018.20312500, -714032.18750000, -891429.25000000, // -378265.00000000, 211566.90625000, 231934.78125000, @@ -232,8 +232,8 @@ void TEST_CASE4(void) { // -50557.21484375 BOX_FLOAT_IN_FLOAT(fscalar_32, 0xc7457d37); VLOAD_8(v0, 0xAA, 0xAA); - VCLEAR(v8); - asm volatile("vfwmul.vf v8, v4, %[A], v0.t" ::[A] "f"(fscalar_32)); + VCLEAR(v16); + asm volatile("vfwmul.vf v16, v4, %[A], v0.t" ::[A] "f"(fscalar_32)); // 0.0000000000000000, 36099478708.7902832031250000, // 0.0000000000000000, 19124024872.8710937500000000, // 0.0000000000000000, -11725976565.3944091796875000, @@ -242,7 +242,7 @@ void TEST_CASE4(void) { // 0.0000000000000000, 26399370819.9219970703125000, // 0.0000000000000000, -37618062611.2260742187500000, // 0.0000000000000000, -46181724885.9680175781250000 - VCMP_U64(8, v8, 0x0, 0x4220cf64a96994a0, 0x0, 0x4211cf85d8a37c00, 0x0, + VCMP_U64(8, v16, 0x0, 0x4220cf64a96994a0, 0x0, 0x4211cf85d8a37c00, 0x0, 0xc205d7619fab27c0, 0x0, 0x4206c8b43b6a45c0, 0x0, 0x42228d33006859a0, 0x0, 0x42189619b90fb020, 0x0, 0xc221846c2a2673c0, 0x0, 0xc225814a65abefa0); diff --git a/sw/riscvTests/isa/rv64uv/vfwnmacc.c b/sw/riscvTests/isa/rv64uv/vfwnmacc.c index c91db595..981f30c9 100644 --- a/sw/riscvTests/isa/rv64uv/vfwnmacc.c +++ b/sw/riscvTests/isa/rv64uv/vfwnmacc.c @@ -11,7 +11,7 @@ // Simple random test with similar values void TEST_CASE1(void) { - VSET(16, e16, m2); + VSET(16, e16, m4); // 33.9375, 31.7344, -56.0000, -62.0625, 77.6875, -7.7383, // -75.3750, 4.1953, 79.5625, -87.3750, // -37.2188, 90.5000, 68.0625, 69.0625, 54.0312, -64.6875 @@ -21,29 +21,29 @@ void TEST_CASE1(void) { // -79.6875, 3.9375, 37.2812, 50.7812, -3.9375, -55.9688, // -31.5312, 76.0000, 69.1875, -8.2578, -52.5000, // -98.4375, 40.3438 - VLOAD_16(v6, 0xd5c5, 0xd4bf, 0x4e87, 0xd4fb, 0x43e0, 0x50a9, 0x5259, 0xc3e0, + VLOAD_16(v8, 0xd5c5, 0xd4bf, 0x4e87, 0xd4fb, 0x43e0, 0x50a9, 0x5259, 0xc3e0, 0xd2ff, 0xcfe2, 0x54c0, 0x5453, 0xc821, 0xd290, 0xd627, 0x510b); // 75.62483215, 29.19676971, 69.45310211, -70.36167145, // -0.92180759, -77.84928131, 86.66299438, -43.34124756, // -3.36894345, 7.33576536, -64.43717194, -80.48993683, // -5.57641745, 89.34833527, -39.19780731, -55.64332581 - VLOAD_32(v8, 0x42973fea, 0x41e992fc, 0x428ae7fd, 0xc28cb92d, 0xbf6bfb95, + VLOAD_32(v16, 0x42973fea, 0x41e992fc, 0x428ae7fd, 0xc28cb92d, 0xbf6bfb95, 0xc29bb2d5, 0x42ad5374, 0xc22d5d70, 0xc0579cc5, 0x40eabe97, 0xc280dfd5, 0xc2a0fad9, 0xc0b27203, 0x42b2b259, 0xc21cca8e, 0xc25e92c4); - asm volatile("vfwnmacc.vv v8, v4, v6"); + asm volatile("vfwnmacc.vv v16, v4, v8"); // 3057.23071289, 2380.63232422, 1392.67187500, // -4875.24365234, -304.97271729, 366.34207153, // 3740.97363281, 59.86029053, 4456.38281250, -2762.37866211, // 2893.06225586, -6180.97900391, 567.62377930, 3536.43286133, // 5357.89892578, 2665.37963867 - VCMP_U32(1, v8, 0x453f13b1, 0x4514ca1e, 0x44ae1580, 0xc59859f3, 0xc3987c82, + VCMP_U32(1, v16, 0x453f13b1, 0x4514ca1e, 0x44ae1580, 0xc59859f3, 0xc3987c82, 0x43b72bc9, 0x4569cf94, 0x426f70f0, 0x458b4310, 0xc52ca60f, 0x4534d0ff, 0xc5c127d5, 0x440de7ec, 0x455d06ed, 0x45a76f31, 0x45269613); #if ELEN == 64 - VSET(16, e32, m2); + VSET(16, e32, m4); // 24686.12304688, 45012.43359375, 5708.16113281, // -32777.98828125, 74121.31250000, -74877.15625000, // -60082.02734375, 46400.20312500, -45509.65234375, @@ -60,7 +60,7 @@ void TEST_CASE1(void) { // -91567.60156250, -25929.78515625, -88250.83593750, // -49992.60156250, 34217.12500000, 49765.98046875, // 8088.22802734 - VLOAD_32(v6, 0xc7aa2211, 0xc5f5885c, 0xc71d75ee, 0xc797355e, 0x46e2353e, + VLOAD_32(v8, 0xc7aa2211, 0xc5f5885c, 0xc71d75ee, 0xc797355e, 0x46e2353e, 0xc6ddc243, 0x47258058, 0xc6ff097f, 0x47ae7783, 0xc7b2d7cd, 0xc6ca9392, 0xc7ac5d6b, 0xc743489a, 0x4705a920, 0x474265fb, 0x45fcc1d3); @@ -72,13 +72,13 @@ void TEST_CASE1(void) { // 21060.8546093095501419, -76483.1707763712329324, // 83261.7813338561973069, -99608.0446094776270911, // 32602.1877863906847779, 52037.0826651407405734 - VLOAD_64(v8, 0xc0f73b7b4184cd41, 0xc0d92483139dacd4, 0xc0f5cb85138e8ec3, + VLOAD_64(v16, 0xc0f73b7b4184cd41, 0xc0d92483139dacd4, 0xc0f5cb85138e8ec3, 0xc0ec05d1a74c6a5f, 0x40f7f5bfacdd39bc, 0xc0f042fad9eb5535, 0x40f08ccc2a0135e2, 0xc0f61e1c762fe5e5, 0xc0f09d8034c5c7e1, 0xc0e220ff29d6512c, 0x40d49136b1eb3ed8, 0xc0f2ac32bb800116, 0x40f453dc8057edfa, 0xc0f85180b6b86d78, 0x40dfd68c04b135a8, 0x40e968a2a5315d80); - asm volatile("vfwnmacc.vv v8, v4, v6"); + asm volatile("vfwnmacc.vv v16, v4, v8"); // 2150457244.6964006423950195, 353690458.8370813727378845, // 230184846.2258668541908264, -2537572977.5412845611572266, // -2146252658.3886387348175049, -2125331270.8559157848358154, @@ -87,7 +87,7 @@ void TEST_CASE1(void) { // -225446839.1319110989570618, 5090653244.5816955566406250, // 3520603131.4329605102539062, -3112554462.7102732658386230, // -804160451.3248255252838135, 734216828.7275727987289429 - VCMP_U64(2, v8, 0x41e005abf39648ea, 0x41b514e35ad64af7, 0x41ab70af1c73a4d2, + VCMP_U64(2, v16, 0x41e005abf39648ea, 0x41b514e35ad64af7, 0x41ab70af1c73a4d2, 0xc1e2e8094e315234, 0xc1dffb4ddc98df75, 0xc1dfab7ed1b6c753, 0x41e2f72becddf8fe, 0x41d6928e17879240, 0x41ee49f691b1c6cc, 0xc1f5d45553c90855, 0xc1aae0176e4389da, 0x41f2f6d343c94ea0, @@ -99,7 +99,7 @@ void TEST_CASE1(void) { // Simple random test with similar values (masked) // The numbers are the same of TEST_CASE1 void TEST_CASE2(void) { - VSET(16, e16, m2); + VSET(16, e16, m4); // 33.9375, 31.7344, -56.0000, -62.0625, 77.6875, -7.7383, // -75.3750, 4.1953, 79.5625, -87.3750, // -37.2188, 90.5000, 68.0625, 69.0625, 54.0312, -64.6875 @@ -109,29 +109,29 @@ void TEST_CASE2(void) { // -79.6875, 3.9375, 37.2812, 50.7812, -3.9375, -55.9688, // -31.5312, 76.0000, 69.1875, -8.2578, -52.5000, // -98.4375, 40.3438 - VLOAD_16(v6, 0xd5c5, 0xd4bf, 0x4e87, 0xd4fb, 0x43e0, 0x50a9, 0x5259, 0xc3e0, + VLOAD_16(v8, 0xd5c5, 0xd4bf, 0x4e87, 0xd4fb, 0x43e0, 0x50a9, 0x5259, 0xc3e0, 0xd2ff, 0xcfe2, 0x54c0, 0x5453, 0xc821, 0xd290, 0xd627, 0x510b); VLOAD_8(v0, 0xAA, 0xAA); // 75.62483215, 29.19676971, 69.45310211, -70.36167145, // -0.92180759, -77.84928131, 86.66299438, -43.34124756, // -3.36894345, 7.33576536, -64.43717194, -80.48993683, // -5.57641745, 89.34833527, -39.19780731, -55.64332581 - VLOAD_32(v8, 0x42973fea, 0x41e992fc, 0x428ae7fd, 0xc28cb92d, 0xbf6bfb95, + VLOAD_32(v16, 0x42973fea, 0x41e992fc, 0x428ae7fd, 0xc28cb92d, 0xbf6bfb95, 0xc29bb2d5, 0x42ad5374, 0xc22d5d70, 0xc0579cc5, 0x40eabe97, 0xc280dfd5, 0xc2a0fad9, 0xc0b27203, 0x42b2b259, 0xc21cca8e, 0xc25e92c4); - asm volatile("vfwnmacc.vv v8, v4, v6, v0.t"); + asm volatile("vfwnmacc.vv v16, v4, v8, v0.t"); // 75.62483215, 2380.63232422, 69.45310211, -4875.24365234, // -0.92180759, 366.34207153, 86.66299438, 59.86029053, // -3.36894345, -2762.37866211, -64.43717194, -6180.97900391, // -5.57641745, 3536.43286133, -39.19780731, 2665.37963867 - VCMP_U32(3, v8, 0x42973fea, 0x4514ca1e, 0x428ae7fd, 0xc59859f3, 0xbf6bfb95, + VCMP_U32(3, v16, 0x42973fea, 0x4514ca1e, 0x428ae7fd, 0xc59859f3, 0xbf6bfb95, 0x43b72bc9, 0x42ad5374, 0x426f70f0, 0xc0579cc5, 0xc52ca60f, 0xc280dfd5, 0xc5c127d5, 0xc0b27203, 0x455d06ed, 0xc21cca8e, 0x45269613); #if ELEN == 64 - VSET(16, e32, m2); + VSET(16, e32, m4); // 24686.12304688, 45012.43359375, 5708.16113281, // -32777.98828125, 74121.31250000, -74877.15625000, // -60082.02734375, 46400.20312500, -45509.65234375, @@ -148,7 +148,7 @@ void TEST_CASE2(void) { // -91567.60156250, -25929.78515625, -88250.83593750, // -49992.60156250, 34217.12500000, 49765.98046875, // 8088.22802734 - VLOAD_32(v6, 0xc7aa2211, 0xc5f5885c, 0xc71d75ee, 0xc797355e, 0x46e2353e, + VLOAD_32(v8, 0xc7aa2211, 0xc5f5885c, 0xc71d75ee, 0xc797355e, 0x46e2353e, 0xc6ddc243, 0x47258058, 0xc6ff097f, 0x47ae7783, 0xc7b2d7cd, 0xc6ca9392, 0xc7ac5d6b, 0xc743489a, 0x4705a920, 0x474265fb, 0x45fcc1d3); @@ -161,13 +161,13 @@ void TEST_CASE2(void) { // 21060.8546093095501419, -76483.1707763712329324, // 83261.7813338561973069, -99608.0446094776270911, // 32602.1877863906847779, 52037.0826651407405734 - VLOAD_64(v8, 0xc0f73b7b4184cd41, 0xc0d92483139dacd4, 0xc0f5cb85138e8ec3, + VLOAD_64(v16, 0xc0f73b7b4184cd41, 0xc0d92483139dacd4, 0xc0f5cb85138e8ec3, 0xc0ec05d1a74c6a5f, 0x40f7f5bfacdd39bc, 0xc0f042fad9eb5535, 0x40f08ccc2a0135e2, 0xc0f61e1c762fe5e5, 0xc0f09d8034c5c7e1, 0xc0e220ff29d6512c, 0x40d49136b1eb3ed8, 0xc0f2ac32bb800116, 0x40f453dc8057edfa, 0xc0f85180b6b86d78, 0x40dfd68c04b135a8, 0x40e968a2a5315d80); - asm volatile("vfwnmacc.vv v8, v4, v6, v0.t"); + asm volatile("vfwnmacc.vv v16, v4, v8, v0.t"); // -95159.7034957902651513, 353690458.8370813727378845, // -89272.3172746254567755, -2537572977.5412845611572266, // 98139.9797031646012329, -2125331270.8559157848358154, @@ -176,7 +176,7 @@ void TEST_CASE2(void) { // 21060.8546093095501419, 5090653244.5816955566406250, // 83261.7813338561973069, -3112554462.7102732658386230, // 32602.1877863906847779, 734216828.7275727987289429 - VCMP_U64(4, v8, 0xc0f73b7b4184cd41, 0x41b514e35ad64af7, 0xc0f5cb85138e8ec3, + VCMP_U64(4, v16, 0xc0f73b7b4184cd41, 0x41b514e35ad64af7, 0xc0f5cb85138e8ec3, 0xc1e2e8094e315234, 0x40f7f5bfacdd39bc, 0xc1dfab7ed1b6c753, 0x40f08ccc2a0135e2, 0x41d6928e17879240, 0xc0f09d8034c5c7e1, 0xc1f5d45553c90855, 0x40d49136b1eb3ed8, 0x41f2f6d343c94ea0, @@ -187,7 +187,7 @@ void TEST_CASE2(void) { // Simple random test with similar values (vector-scalar) void TEST_CASE3(void) { - VSET(16, e16, m2); + VSET(16, e16, m4); float fscalar_16; // 85.2500, -7.6602, -81.8125, -37.2500, // -48.0000, 14.9531, 25.9844, 96.1875, 46.5000, @@ -202,23 +202,23 @@ void TEST_CASE3(void) { // -82.18785095, 3.87765026, -34.03960037, // -92.34690857, 46.98464203, 28.09385681, 58.44809723, 57.04935455, // -44.62148285, 83.52678680 - VLOAD_32(v8, 0x40c62ff4, 0xc2a28694, 0xc29b7f46, 0x41761cdd, 0x41daf76d, + VLOAD_32(v16, 0x40c62ff4, 0xc2a28694, 0xc29b7f46, 0x41761cdd, 0x41daf76d, 0x42433e74, 0xc2a4602e, 0x40782b6c, 0xc208288d, 0xc2b8b19e, 0x423bf046, 0x41e0c038, 0x4269cada, 0x4264328a, 0xc2327c66, 0x42a70db7); - asm volatile("vfwnmacc.vf v8, %[A], v4" ::[A] "f"(fscalar_16)); + asm volatile("vfwnmacc.vf v16, %[A], v4" ::[A] "f"(fscalar_16)); // 4080.47851562, -285.94589233, -3844.13818359, -1801.05395508, // -2328.37084961, 668.00445557, 1327.81384277, 4607.11083984, // 2263.13330078, -3619.81323242, 2135.66967773, // -3323.79687500, 2762.37426758, -3439.63916016, // -2157.50732422, -4421.87060547 - VCMP_U32(5, v8, 0x457f07a8, 0xc38ef913, 0xc5704236, 0xc4e121ba, 0xc51185ef, + VCMP_U32(5, v16, 0x457f07a8, 0xc38ef913, 0xc5704236, 0xc4e121ba, 0xc51185ef, 0x44270049, 0x44a5fa0b, 0x458ff8e3, 0x450d7222, 0xc5623d03, 0x45057ab7, 0xc54fbcc0, 0x452ca5fd, 0xc556fa3a, 0xc506d81e, 0xc58a2ef7); #if ELEN == 64 - VSET(16, e32, m2); + VSET(16, e32, m4); float fscalar_32; // 415907.75000000, 16644.92773438, -320087.15625000, // -560497.81250000, 51200.66406250, 175961.67187500, @@ -240,13 +240,13 @@ void TEST_CASE3(void) { // 97767.4262132318690419, -208046.9794710964197293, // -303699.6372622016351670, -710697.5104083393234760, // -907884.7086961114546284, -326406.2730544115183875 - VLOAD_64(v8, 0xc0ad62e3ae9e7200, 0x410ea4eb56509b38, 0xc129662b849df069, + VLOAD_64(v16, 0xc0ad62e3ae9e7200, 0x410ea4eb56509b38, 0xc129662b849df069, 0xc128f6256c256024, 0xc1238e5bb5417d8a, 0xc0f54479c1acb530, 0xc126db6acbf67002, 0x410fabde5f5c7320, 0xc11ca2b22fd69018, 0xc109032e93c94df0, 0x40f7de76d1c4f740, 0xc1096577d5f4f134, 0xc112894e8c8e766c, 0xc125b05305543dea, 0xc12bb4d96ada377b, 0xc113ec19179b935e); - asm volatile("vfwnmacc.vf v8, %[A], v4" ::[A] "f"(fscalar_32)); + asm volatile("vfwnmacc.vf v16, %[A], v4" ::[A] "f"(fscalar_32)); // 269632972092.2103271484375000, 10790654572.4701824188232422, // -207511651113.6687316894531250, // -363369878873.9254760742187500, 33194028125.5312614440917969, @@ -257,7 +257,7 @@ void TEST_CASE3(void) { // -31275968950.3095932006835938, 444146336412.5474243164062500, // 176955637312.9947814941406250, 335876029304.3336791992187500, // -226662309098.3402404785156250 - VCMP_U64(6, v8, 0x424f63b0529e1aec, 0x420419629363c2ef, 0xc24828544194d599, + VCMP_U64(6, v16, 0x424f63b0529e1aec, 0x420419629363c2ef, 0xc24828544194d599, 0xc25526a215567b3b, 0x421eea12b1762003, 0x423a8f760c56915b, 0xc222cc8c6659986a, 0xc2183b8b6ce755ee, 0x4224852ec0739f2b, 0x425f6a693fc0285e, 0xc24514c310654d4e, 0xc21d20c5a6d93d06, @@ -268,7 +268,7 @@ void TEST_CASE3(void) { // Simple random test with similar values (vector-scalar) (masked) void TEST_CASE4(void) { - VSET(16, e16, m2); + VSET(16, e16, m4); float fscalar_16; // 85.2500, -7.6602, -81.8125, -37.2500, // -48.0000, 14.9531, 25.9844, 96.1875, 46.5000, @@ -284,23 +284,23 @@ void TEST_CASE4(void) { // -82.18785095, 3.87765026, -34.03960037, // -92.34690857, 46.98464203, 28.09385681, 58.44809723, 57.04935455, // -44.62148285, 83.52678680 - VLOAD_32(v8, 0x40c62ff4, 0xc2a28694, 0xc29b7f46, 0x41761cdd, 0x41daf76d, + VLOAD_32(v16, 0x40c62ff4, 0xc2a28694, 0xc29b7f46, 0x41761cdd, 0x41daf76d, 0x42433e74, 0xc2a4602e, 0x40782b6c, 0xc208288d, 0xc2b8b19e, 0x423bf046, 0x41e0c038, 0x4269cada, 0x4264328a, 0xc2327c66, 0x42a70db7); - asm volatile("vfwnmacc.vf v8, %[A], v4, v0.t" ::[A] "f"(fscalar_16)); + asm volatile("vfwnmacc.vf v16, %[A], v4, v0.t" ::[A] "f"(fscalar_16)); // 6.19335365, -285.94589233, -77.74858093, // -1801.05395508, 27.37081337, 668.00445557, -82.18785095, // 4607.11083984, -34.03960037, -3619.81323242, 46.98464203, // -3323.79687500, 58.44809723, -3439.63916016, -44.62148285, // -4421.87060547 - VCMP_U32(7, v8, 0x40c62ff4, 0xc38ef913, 0xc29b7f46, 0xc4e121ba, 0x41daf76d, + VCMP_U32(7, v16, 0x40c62ff4, 0xc38ef913, 0xc29b7f46, 0xc4e121ba, 0x41daf76d, 0x44270049, 0xc2a4602e, 0x458ff8e3, 0xc208288d, 0xc5623d03, 0x423bf046, 0xc54fbcc0, 0x4269cada, 0xc556fa3a, 0xc2327c66, 0xc58a2ef7); #if ELEN == 64 - VSET(16, e32, m2); + VSET(16, e32, m4); float fscalar_32; // 415907.75000000, 16644.92773438, -320087.15625000, // -560497.81250000, 51200.66406250, 175961.67187500, @@ -323,13 +323,13 @@ void TEST_CASE4(void) { // 97767.4262132318690419, -208046.9794710964197293, // -303699.6372622016351670, -710697.5104083393234760, // -907884.7086961114546284, -326406.2730544115183875 - VLOAD_64(v8, 0xc0ad62e3ae9e7200, 0x410ea4eb56509b38, 0xc129662b849df069, + VLOAD_64(v16, 0xc0ad62e3ae9e7200, 0x410ea4eb56509b38, 0xc129662b849df069, 0xc128f6256c256024, 0xc1238e5bb5417d8a, 0xc0f54479c1acb530, 0xc126db6acbf67002, 0x410fabde5f5c7320, 0xc11ca2b22fd69018, 0xc109032e93c94df0, 0x40f7de76d1c4f740, 0xc1096577d5f4f134, 0xc112894e8c8e766c, 0xc125b05305543dea, 0xc12bb4d96ada377b, 0xc113ec19179b935e); - asm volatile("vfwnmacc.vf v8, %[A], v4, v0.t" ::[A] "f"(fscalar_32)); + asm volatile("vfwnmacc.vf v16, %[A], v4, v0.t" ::[A] "f"(fscalar_32)); // -3761.4446916116867214, 10790654572.4701824188232422, // -832277.7590174797223881, -363369878873.9254760742187500, // -640813.8540152770001441, 114076027990.5677947998046875, @@ -338,7 +338,7 @@ void TEST_CASE4(void) { // 97767.4262132318690419, -31275968950.3095932006835938, // -303699.6372622016351670, 176955637312.9947814941406250, // -907884.7086961114546284, -226662309098.3402404785156250 - VCMP_U64(8, v8, 0xc0ad62e3ae9e7200, 0x420419629363c2ef, 0xc129662b849df069, + VCMP_U64(8, v16, 0xc0ad62e3ae9e7200, 0x420419629363c2ef, 0xc129662b849df069, 0xc25526a215567b3b, 0xc1238e5bb5417d8a, 0x423a8f760c56915b, 0xc126db6acbf67002, 0xc2183b8b6ce755ee, 0xc11ca2b22fd69018, 0x425f6a693fc0285e, 0x40f7de76d1c4f740, 0xc21d20c5a6d93d06, diff --git a/sw/riscvTests/isa/rv64uv/vfwnmsac.c b/sw/riscvTests/isa/rv64uv/vfwnmsac.c index c35d1863..7eccfc2e 100644 --- a/sw/riscvTests/isa/rv64uv/vfwnmsac.c +++ b/sw/riscvTests/isa/rv64uv/vfwnmsac.c @@ -11,7 +11,7 @@ // Simple random test with similar values void TEST_CASE1(void) { - VSET(16, e16, m2); + VSET(16, e16, m4); // -27.1719, 16.3438, -76.1250, 73.7500, 39.2500, 32.8438, // -48.0312, -62.9062, -52.3125, 50.8750, -32.1562, -86.3750, // -42.7812, 97.2500, -83.6250, 46.6250 @@ -21,29 +21,29 @@ void TEST_CASE1(void) { // -78.0625, 13.7344, 6.3164, 19.1250, 23.3125, 72.4375, // -53.2812, -16.3438, -95.0625, -96.2500, 10.4141, // -44.4688, 42.5938 - VLOAD_16(v6, 0xcc8b, 0xd1d1, 0xd48c, 0xd4e1, 0x4ade, 0x4651, 0x4cc8, 0x4dd4, + VLOAD_16(v8, 0xcc8b, 0xd1d1, 0xd48c, 0xd4e1, 0x4ade, 0x4651, 0x4cc8, 0x4dd4, 0x5487, 0xd2a9, 0xcc16, 0xd5f1, 0xd604, 0x4935, 0xd18f, 0x5153); // 69.72727966, 14.41778183, // -64.82620239, 5.66590357, 73.33881378, // -23.97786140, 94.91672516, 17.38204765, -39.07393646, // -50.71182251, -11.98221493, -36.07648849, // -86.86090088, 55.96418381, 61.43484116, -88.02533722 - VLOAD_32(v8, 0x428b745e, 0x4166af3c, 0xc281a704, 0x40b54f15, 0x4292ad79, + VLOAD_32(v16, 0x428b745e, 0x4166af3c, 0xc281a704, 0x40b54f15, 0x4292ad79, 0xc1bfd2a9, 0x42bdd55d, 0x418b0e6f, 0xc21c4bb6, 0xc24ad8e8, 0xc13fb727, 0xc2104e53, 0xc2adb8c8, 0x425fdb53, 0x4275bd47, 0xc2b00cf9); - asm volatile("vfwnmsac.vv v8, v4, v6"); + asm volatile("vfwnmsac.vv v16, v4, v8"); // -424.03662109, 774.91290283, -5602.91992188, 5762.77539062, // -465.73541260, -231.43232727, 1013.51440430, 1483.88403320, // 3750.31274414, 2659.97167969, -537.53594971, -8247.09960938, // -4204.55615234, -956.80340576, -3657.26440430, -2073.95898438 - VCMP_U32(1, v8, 0xc3d404b0, 0x4441ba6d, 0xc5af175c, 0x45b41634, 0xc3e8de22, + VCMP_U32(1, v16, 0xc3d404b0, 0x4441ba6d, 0xc5af175c, 0x45b41634, 0xc3e8de22, 0xc3676ead, 0x447d60ec, 0x44b97c4a, 0x456a6501, 0x45263f8c, 0xc406624d, 0xc600dc66, 0xc5836473, 0xc46f336b, 0xc564943b, 0xc5019f58); #if ELEN == 64 - VSET(16, e32, m2); + VSET(16, e32, m4); // 76109.13281250, 56176.41406250, -69127.14843750, // -80327.49218750, 42920.59375000, -22857.18164062, // -74227.70312500, -2650.23828125, 34254.71093750, @@ -59,7 +59,7 @@ void TEST_CASE1(void) { // 6145.43457031, -31624.23242188, -69962.68750000, 468.94165039, // 10443.93554688, -6054.45410156, -26090.46093750, // 83534.57031250, 49878.42968750, -62082.53125000 - VLOAD_32(v6, 0xc70f0e8b, 0xc76de453, 0xc70f03b3, 0xc7915587, 0xc6ca1338, + VLOAD_32(v8, 0xc70f0e8b, 0xc76de453, 0xc70f03b3, 0xc7915587, 0xc6ca1338, 0xc7832be2, 0x45c00b7a, 0xc6f71077, 0xc788a558, 0x43ea7888, 0x46232fbe, 0xc5bd33a2, 0xc6cbd4ec, 0x47a32749, 0x4742d66e, 0xc7728288); @@ -71,13 +71,13 @@ void TEST_CASE1(void) { // 25915.8242703938303748, 82619.8738822988234460, // 36865.7501246419560630, 41236.4660055586136878, // -5735.0030344506667461, 97965.1847665070963558 - VLOAD_64(v8, 0x40f0f91647b040e6, 0x40f7fd7ad092e40e, 0xc0f7ec74490921f9, + VLOAD_64(v16, 0x40f0f91647b040e6, 0x40f7fd7ad092e40e, 0xc0f7ec74490921f9, 0xc0ef02de4f5de1b8, 0x40efe63fc51a00c4, 0xc0e1163812edb722, 0xc0ceeba02257b050, 0xc0ab6cc617554220, 0x40e6fb9e2131a44c, 0xc0d356993b5a8e58, 0x40d94ef4c0d89c24, 0x40f42bbdfb6c0160, 0x40e20038010564a4, 0x40e4228ee9847d40, 0xc0b66700c6dda260, 0x40f7ead2f4cdb996); - asm volatile("vfwnmsac.vv v8, v4, v6"); + asm volatile("vfwnmsac.vv v16, v4, v8"); // 2787379508.1325840950012207, 3421260093.5289182662963867, // -2530960357.7114648818969727, -5977316925.0209798812866211, // 1110232642.0733766555786133, -1535117955.5847384929656982, @@ -86,7 +86,7 @@ void TEST_CASE1(void) { // -170625991.9771288335323334, -290729271.1712532043457031, // 1329539.2864317980129272, 3989167748.8218097686767578, // 4771532019.5777149200439453, 5122659058.9813976287841797 - VCMP_U64(2, v8, 0x41e4c48126843e21, 0x41e97d8927b0ece6, 0xc1e2db6c7cb6c452, + VCMP_U64(2, v16, 0x41e4c48126843e21, 0x41e97d8927b0ece6, 0xc1e2db6c7cb6c452, 0xc1f64469e3d055ef, 0x41d08b339084b234, 0xc1d6e002a0e56c5b, 0x41bb303afd9bb451, 0xc193fbad7710e4ab, 0x41e1db2636354532, 0x41747cdc1763715c, 0xc1a457178ff44a3b, 0xc1b1542d372bd740, @@ -98,7 +98,7 @@ void TEST_CASE1(void) { // Simple random test with similar values (masked) // The numbers are the same of TEST_CASE1 void TEST_CASE2(void) { - VSET(16, e16, m2); + VSET(16, e16, m4); // -27.1719, 16.3438, -76.1250, 73.7500, 39.2500, 32.8438, // -48.0312, -62.9062, -52.3125, 50.8750, -32.1562, -86.3750, // -42.7812, 97.2500, -83.6250, 46.6250 @@ -108,7 +108,7 @@ void TEST_CASE2(void) { // -78.0625, 13.7344, 6.3164, 19.1250, 23.3125, 72.4375, // -53.2812, -16.3438, -95.0625, -96.2500, 10.4141, // -44.4688, 42.5938 - VLOAD_16(v6, 0xcc8b, 0xd1d1, 0xd48c, 0xd4e1, 0x4ade, 0x4651, 0x4cc8, 0x4dd4, + VLOAD_16(v8, 0xcc8b, 0xd1d1, 0xd48c, 0xd4e1, 0x4ade, 0x4651, 0x4cc8, 0x4dd4, 0x5487, 0xd2a9, 0xcc16, 0xd5f1, 0xd604, 0x4935, 0xd18f, 0x5153); VLOAD_8(v0, 0xAA, 0xAA); // 69.72727966, 14.41778183, @@ -116,23 +116,23 @@ void TEST_CASE2(void) { // -23.97786140, 94.91672516, 17.38204765, -39.07393646, // -50.71182251, -11.98221493, -36.07648849, // -86.86090088, 55.96418381, 61.43484116, -88.02533722 - VLOAD_32(v8, 0x428b745e, 0x4166af3c, 0xc281a704, 0x40b54f15, 0x4292ad79, + VLOAD_32(v16, 0x428b745e, 0x4166af3c, 0xc281a704, 0x40b54f15, 0x4292ad79, 0xc1bfd2a9, 0x42bdd55d, 0x418b0e6f, 0xc21c4bb6, 0xc24ad8e8, 0xc13fb727, 0xc2104e53, 0xc2adb8c8, 0x425fdb53, 0x4275bd47, 0xc2b00cf9); - asm volatile("vfwnmsac.vv v8, v4, v6, v0.t"); + asm volatile("vfwnmsac.vv v16, v4, v8, v0.t"); // 69.72727966, 774.91290283, -64.82620239, // 5762.77539062, 73.33881378, -231.43232727, 94.91672516, // 1483.88403320, -39.07393646, 2659.97167969, -11.98221493, // -8247.09960938, -86.86090088, -956.80340576, 61.43484116, // -2073.95898438 - VCMP_U32(3, v8, 0x428b745e, 0x4441ba6d, 0xc281a704, 0x45b41634, 0x4292ad79, + VCMP_U32(3, v16, 0x428b745e, 0x4441ba6d, 0xc281a704, 0x45b41634, 0x4292ad79, 0xc3676ead, 0x42bdd55d, 0x44b97c4a, 0xc21c4bb6, 0x45263f8c, 0xc13fb727, 0xc600dc66, 0xc2adb8c8, 0xc46f336b, 0x4275bd47, 0xc5019f58); #if ELEN == 64 - VSET(16, e32, m2); + VSET(16, e32, m4); // 76109.13281250, 56176.41406250, -69127.14843750, // -80327.49218750, 42920.59375000, -22857.18164062, // -74227.70312500, -2650.23828125, 34254.71093750, @@ -148,7 +148,7 @@ void TEST_CASE2(void) { // 6145.43457031, -31624.23242188, -69962.68750000, 468.94165039, // 10443.93554688, -6054.45410156, -26090.46093750, // 83534.57031250, 49878.42968750, -62082.53125000 - VLOAD_32(v6, 0xc70f0e8b, 0xc76de453, 0xc70f03b3, 0xc7915587, 0xc6ca1338, + VLOAD_32(v8, 0xc70f0e8b, 0xc76de453, 0xc70f03b3, 0xc7915587, 0xc6ca1338, 0xc7832be2, 0x45c00b7a, 0xc6f71077, 0xc788a558, 0x43ea7888, 0x46232fbe, 0xc5bd33a2, 0xc6cbd4ec, 0x47a32749, 0x4742d66e, 0xc7728288); @@ -161,13 +161,13 @@ void TEST_CASE2(void) { // 25915.8242703938303748, 82619.8738822988234460, // 36865.7501246419560630, 41236.4660055586136878, // -5735.0030344506667461, 97965.1847665070963558 - VLOAD_64(v8, 0x40f0f91647b040e6, 0x40f7fd7ad092e40e, 0xc0f7ec74490921f9, + VLOAD_64(v16, 0x40f0f91647b040e6, 0x40f7fd7ad092e40e, 0xc0f7ec74490921f9, 0xc0ef02de4f5de1b8, 0x40efe63fc51a00c4, 0xc0e1163812edb722, 0xc0ceeba02257b050, 0xc0ab6cc617554220, 0x40e6fb9e2131a44c, 0xc0d356993b5a8e58, 0x40d94ef4c0d89c24, 0x40f42bbdfb6c0160, 0x40e20038010564a4, 0x40e4228ee9847d40, 0xc0b66700c6dda260, 0x40f7ead2f4cdb996); - asm volatile("vfwnmsac.vv v8, v4, v6, v0.t"); + asm volatile("vfwnmsac.vv v16, v4, v8, v0.t"); // 69521.3925020728202071, 3421260093.5289182662963867, // -97991.2678309752518544, -5977316925.0209798812866211, // 65329.9928102507547010, -1535117955.5847384929656982, @@ -176,7 +176,7 @@ void TEST_CASE2(void) { // 25915.8242703938303748, -290729271.1712532043457031, // 36865.7501246419560630, 3989167748.8218097686767578, // -5735.0030344506667461, 5122659058.9813976287841797 - VCMP_U64(4, v8, 0x40f0f91647b040e6, 0x41e97d8927b0ece6, 0xc0f7ec74490921f9, + VCMP_U64(4, v16, 0x40f0f91647b040e6, 0x41e97d8927b0ece6, 0xc0f7ec74490921f9, 0xc1f64469e3d055ef, 0x40efe63fc51a00c4, 0xc1d6e002a0e56c5b, 0xc0ceeba02257b050, 0xc193fbad7710e4ab, 0x40e6fb9e2131a44c, 0x41747cdc1763715c, 0x40d94ef4c0d89c24, 0xc1b1542d372bd740, @@ -187,7 +187,7 @@ void TEST_CASE2(void) { // Simple random test with similar values (vector-scalar) void TEST_CASE3(void) { - VSET(16, e16, m2); + VSET(16, e16, m4); float fscalar_16; // 15.1797, -57.5312, -39.9688, 95.8125, 22.3906, // -30.2344, 61.3438, 67.1250, -80.6250, -20.6875, -34.1250, @@ -201,22 +201,22 @@ void TEST_CASE3(void) { // -61.63031769, -55.39078903, 21.99703789, 29.49930191, // -64.56553650, -17.54965782, 84.51310730, -88.96613312, // -6.75917578 - VLOAD_32(v8, 0xc2b4f48b, 0xc2b3f2af, 0xc208d482, 0xc2bb782b, 0x42a3831b, + VLOAD_32(v16, 0xc2b4f48b, 0xc2b3f2af, 0xc208d482, 0xc2bb782b, 0x42a3831b, 0x42a134b8, 0x4292e873, 0xc2768572, 0xc25d902b, 0x41aff9ef, 0x41ebfe92, 0xc281218e, 0xc18c65b3, 0x42a906b6, 0xc2b1eea9, 0xc0d84b2b); - asm volatile("vfwnmsac.vf v8, %[A], v4" ::[A] "f"(fscalar_16)); + asm volatile("vfwnmsac.vf v16, %[A], v4" ::[A] "f"(fscalar_16)); // 330.52151489, -1685.56726074, -1142.71582031, 2563.56518555, // 702.74603271, -757.92852783, 1774.78454590, 1800.03955078, // -2291.47485352, -551.75787354, -916.93621826, -277.44854736, // -715.24255371, 1882.04724121, 688.02972412, -1448.07995605 - VCMP_U32(5, v8, 0x43a542c1, 0xc4d2b227, 0xc48ed6e8, 0x4520390b, 0x442fafbf, + VCMP_U32(5, v16, 0x43a542c1, 0xc4d2b227, 0xc48ed6e8, 0x4520390b, 0x442fafbf, 0xc43d7b6d, 0x44ddd91b, 0x44e10144, 0xc50f3799, 0xc409f081, 0xc4653beb, 0xc38ab96a, 0xc432cf86, 0x44eb4183, 0x442c01e7, 0xc4b5028f); #if ELEN == 64 - VSET(16, e32, m2); + VSET(16, e32, m4); float fscalar_32; // 467373.87500000, -160965.29687500, 883060.25000000, // -737665.37500000, -482502.81250000, -983579.31250000, @@ -238,13 +238,13 @@ void TEST_CASE3(void) { // -388606.5700500296661630, 47697.1169114386430010, // -665347.3327810273040086, 976438.6193965608254075, // -498588.0437998892739415, 793291.0511387982405722 - VLOAD_64(v8, 0xc11c1ed27139f9a2, 0xc12cd6554cbddf2f, 0xc1128659ecb82f10, + VLOAD_64(v16, 0xc11c1ed27139f9a2, 0xc12cd6554cbddf2f, 0xc1128659ecb82f10, 0xc126d3f3549612d1, 0x4117a9d8000c4d34, 0xc12b49af53e9b790, 0x40f804b11f89b0f0, 0xc12d03c239c68719, 0xc126de2b77b7d27e, 0x412b6bb412c65e12, 0xc117b7fa47bb31ea, 0x40e74a23bdbd0eb0, 0xc1244e06aa62465a, 0x412dcc6d3d218bc8, 0xc11e6e702cd9e0d0, 0x412835961a2edd54); - asm volatile("vfwnmsac.vf v8, %[A], v4" ::[A] "f"(fscalar_32)); + asm volatile("vfwnmsac.vf v16, %[A], v4" ::[A] "f"(fscalar_32)); // -109912672670.3254089355468750, 37853228716.2851715087890625, // -207669389333.5514831542968750, 173475856848.7839965820312500, // 113470468570.1348114013671875, 231307237594.9865112304687500, @@ -253,7 +253,7 @@ void TEST_CASE3(void) { // -37859206864.6847991943359375, 2145428350.5620102882385254, // 166549954299.5226745605468750, -179467181235.7380371093750000, // 80153027927.0138244628906250, 155923943542.1058349609375000 - VCMP_U64(6, v8, 0xc239974e499e534e, 0x4221a074dd589202, 0xc2482d07b40ac697, + VCMP_U64(6, v16, 0xc239974e499e534e, 0x4221a074dd589202, 0xc2482d07b40ac697, 0x424431fbc0e8645a, 0x423a6b5df1da2283, 0x424aed7e2c6d7e46, 0x4236505f071a24b9, 0xc23eee3ac1143363, 0x421a884888b36990, 0xc24463f954175e4e, 0xc221a12b4da15e9e, 0x41dff828dfa3f7fa, @@ -264,7 +264,7 @@ void TEST_CASE3(void) { // Simple random test with similar values (vector-scalar) (masked) void TEST_CASE4(void) { - VSET(16, e16, m2); + VSET(16, e16, m4); float fscalar_16; // 15.1797, -57.5312, -39.9688, 95.8125, 22.3906, // -30.2344, 61.3438, 67.1250, -80.6250, -20.6875, -34.1250, @@ -279,23 +279,23 @@ void TEST_CASE4(void) { // -61.63031769, -55.39078903, 21.99703789, 29.49930191, // -64.56553650, -17.54965782, 84.51310730, -88.96613312, // -6.75917578 - VLOAD_32(v8, 0xc2b4f48b, 0xc2b3f2af, 0xc208d482, 0xc2bb782b, 0x42a3831b, + VLOAD_32(v16, 0xc2b4f48b, 0xc2b3f2af, 0xc208d482, 0xc2bb782b, 0x42a3831b, 0x42a134b8, 0x4292e873, 0xc2768572, 0xc25d902b, 0x41aff9ef, 0x41ebfe92, 0xc281218e, 0xc18c65b3, 0x42a906b6, 0xc2b1eea9, 0xc0d84b2b); - asm volatile("vfwnmsac.vf v8, %[A], v4, v0.t" ::[A] "f"(fscalar_16)); + asm volatile("vfwnmsac.vf v16, %[A], v4, v0.t" ::[A] "f"(fscalar_16)); // -90.47762299, -1685.56726074, -34.20752716, // 2563.56518555, 81.75606537, -757.92852783, 73.45400238, // 1800.03955078, -55.39078903, -551.75787354, 29.49930191, // -277.44854736, -17.54965782, 1882.04724121, -88.96613312, // -1448.07995605 - VCMP_U32(7, v8, 0xc2b4f48b, 0xc4d2b227, 0xc208d482, 0x4520390b, 0x42a3831b, + VCMP_U32(7, v16, 0xc2b4f48b, 0xc4d2b227, 0xc208d482, 0x4520390b, 0x42a3831b, 0xc43d7b6d, 0x4292e873, 0x44e10144, 0xc25d902b, 0xc409f081, 0x41ebfe92, 0xc38ab96a, 0xc18c65b3, 0x44eb4183, 0xc2b1eea9, 0xc4b5028f); #if ELEN == 64 - VSET(16, e32, m2); + VSET(16, e32, m4); float fscalar_32; // 467373.87500000, -160965.29687500, 883060.25000000, // -737665.37500000, -482502.81250000, -983579.31250000, @@ -318,13 +318,13 @@ void TEST_CASE4(void) { // -388606.5700500296661630, 47697.1169114386430010, // -665347.3327810273040086, 976438.6193965608254075, // -498588.0437998892739415, 793291.0511387982405722 - VLOAD_64(v8, 0xc11c1ed27139f9a2, 0xc12cd6554cbddf2f, 0xc1128659ecb82f10, + VLOAD_64(v16, 0xc11c1ed27139f9a2, 0xc12cd6554cbddf2f, 0xc1128659ecb82f10, 0xc126d3f3549612d1, 0x4117a9d8000c4d34, 0xc12b49af53e9b790, 0x40f804b11f89b0f0, 0xc12d03c239c68719, 0xc126de2b77b7d27e, 0x412b6bb412c65e12, 0xc117b7fa47bb31ea, 0x40e74a23bdbd0eb0, 0xc1244e06aa62465a, 0x412dcc6d3d218bc8, 0xc11e6e702cd9e0d0, 0x412835961a2edd54); - asm volatile("vfwnmsac.vf v8, %[A], v4, v0.t" ::[A] "f"(fscalar_32)); + asm volatile("vfwnmsac.vf v16, %[A], v4, v0.t" ::[A] "f"(fscalar_32)); // -460724.6105727200629190, 37853228716.2851715087890625, // -303510.4811713555827737, 173475856848.7839965820312500, // 387702.0000469267833978, 231307237594.9865112304687500, @@ -333,7 +333,7 @@ void TEST_CASE4(void) { // -388606.5700500296661630, 2145428350.5620102882385254, // -665347.3327810273040086, -179467181235.7380371093750000, // -498588.0437998892739415, 155923943542.1058349609375000 - VCMP_U64(8, v8, 0xc11c1ed27139f9a2, 0x4221a074dd589202, 0xc1128659ecb82f10, + VCMP_U64(8, v16, 0xc11c1ed27139f9a2, 0x4221a074dd589202, 0xc1128659ecb82f10, 0x424431fbc0e8645a, 0x4117a9d8000c4d34, 0x424aed7e2c6d7e46, 0x40f804b11f89b0f0, 0xc23eee3ac1143363, 0xc126de2b77b7d27e, 0xc24463f954175e4e, 0xc117b7fa47bb31ea, 0x41dff828dfa3f7fa, diff --git a/sw/riscvTests/isa/rv64uv/vfwsub.c b/sw/riscvTests/isa/rv64uv/vfwsub.c index 9110889b..400a06ab 100644 --- a/sw/riscvTests/isa/rv64uv/vfwsub.c +++ b/sw/riscvTests/isa/rv64uv/vfwsub.c @@ -11,7 +11,7 @@ // Simple random test with similar values void TEST_CASE1(void) { - VSET(16, e16, m2); + VSET(16, e16, m4); // -15.5625, 95.7500, -42.4375, 30.7188, -50.7500, -90.2500, // -95.5000, 29.5938, -41.4062, -94.0000, 34.3438, // -69.5625, 31.5625, -75.0625, 46.2500, -63.6875 @@ -20,21 +20,21 @@ void TEST_CASE1(void) { // 57.2500, 43.2812, -49.4062, -53.5625, -54.7812, // -12.1406, 92.1875, 67.1875, -19.7656, -41.2812, 98.0625, // -41.9062, 10.1719, -84.6250, -7.1016, 62.8750 - VLOAD_16(v6, 0x5328, 0x5169, 0xd22d, 0xd2b2, 0xd2d9, 0xca12, 0x55c3, 0x5433, + VLOAD_16(v8, 0x5328, 0x5169, 0xd22d, 0xd2b2, 0xd2d9, 0xca12, 0x55c3, 0x5433, 0xccf1, 0xd129, 0x5621, 0xd13d, 0x4916, 0xd54a, 0xc71a, 0x53dc); - asm volatile("vfwsub.vv v8, v4, v6"); + asm volatile("vfwsub.vv v16, v4, v8"); // -72.81250000, 52.46875000, 6.96875000, 84.28125000, 4.03125000, // -78.10937500, -187.68750000, -37.59375000, -21.64062500, // -52.71875000, -63.71875000, // -27.65625000, 21.39062500, 9.56250000, 53.35156250, // -126.56250000 - VCMP_U32(1, v8, 0xc291a000, 0x4251e000, 0x40df0000, 0x42a89000, 0x40810000, + VCMP_U32(1, v16, 0xc291a000, 0x4251e000, 0x40df0000, 0x42a89000, 0x40810000, 0xc29c3800, 0xc33bb000, 0xc2166000, 0xc1ad2000, 0xc252e000, 0xc27ee000, 0xc1dd4000, 0x41ab2000, 0x41190000, 0x42556800, 0xc2fd2000); #if ELEN == 64 - VSET(16, e32, m2); + VSET(16, e32, m4); // 74632.77343750, -65636.60937500, 16165.84765625, // -17815.85937500, -85604.03125000, -76754.03125000, // 21778.01171875, -70512.52343750, 85301.90625000, @@ -51,11 +51,11 @@ void TEST_CASE1(void) { // 57701.78906250, -81581.38281250, 53319.19531250, // -86229.57031250, 44376.69531250, 46809.38671875, // -92887.27343750 - VLOAD_32(v6, 0x45a87dc7, 0xc7b1442f, 0xc6b36264, 0x46469280, 0x47aa21cc, + VLOAD_32(v8, 0x45a87dc7, 0xc7b1442f, 0xc6b36264, 0x46469280, 0x47aa21cc, 0x4756537c, 0x47588065, 0xc78b8601, 0xc7704177, 0x476165ca, 0xc79f56b1, 0x47504732, 0xc7a86ac9, 0x472d58b2, 0x4736d963, 0xc7b56ba3); - asm volatile("vfwsub.vv v8, v4, v6"); + asm volatile("vfwsub.vv v16, v4, v8"); // 69241.0512695312500000, 25123.7578125000000000, // 39127.0429687500000000, -30524.4843750000000000, // -172711.6250000000000000, -131621.5156250000000000, @@ -64,7 +64,7 @@ void TEST_CASE1(void) { // 179839.4375000000000000, -103740.7265625000000000, // 156072.1093750000000000, -109596.6562500000000000, // -112075.4765625000000000, 2146.6718750000000000 - VCMP_U64(2, v8, 0x40f0e790d2000000, 0x40d888f080000000, 0x40e31ae160000000, + VCMP_U64(2, v16, 0x40f0e790d2000000, 0x40d888f080000000, 0x40e31ae160000000, 0xc0ddcf1f00000000, 0xc105153d00000000, 0xc100112c20000000, 0xc0e06dcc40000000, 0x408cdbe000000000, 0x4101ebbaf8000000, 0xc0f0226e64000000, 0x4105f3fb80000000, 0xc0f953cba0000000, @@ -76,7 +76,7 @@ void TEST_CASE1(void) { // Simple random test with similar values (masked) // The numbers are the same of TEST_CASE1 void TEST_CASE2(void) { - VSET(16, e16, m2); + VSET(16, e16, m4); // -15.5625, 95.7500, -42.4375, 30.7188, -50.7500, -90.2500, // -95.5000, 29.5938, -41.4062, -94.0000, 34.3438, // -69.5625, 31.5625, -75.0625, 46.2500, -63.6875 @@ -85,21 +85,21 @@ void TEST_CASE2(void) { // 57.2500, 43.2812, -49.4062, -53.5625, -54.7812, // -12.1406, 92.1875, 67.1875, -19.7656, -41.2812, 98.0625, // -41.9062, 10.1719, -84.6250, -7.1016, 62.8750 - VLOAD_16(v6, 0x5328, 0x5169, 0xd22d, 0xd2b2, 0xd2d9, 0xca12, 0x55c3, 0x5433, + VLOAD_16(v8, 0x5328, 0x5169, 0xd22d, 0xd2b2, 0xd2d9, 0xca12, 0x55c3, 0x5433, 0xccf1, 0xd129, 0x5621, 0xd13d, 0x4916, 0xd54a, 0xc71a, 0x53dc); VLOAD_8(v0, 0xAA, 0xAA); - VCLEAR(v8); - asm volatile("vfwsub.vv v8, v4, v6, v0.t"); + VCLEAR(v16); + asm volatile("vfwsub.vv v16, v4, v8, v0.t"); // 0.00000000, 52.46875000, 0.00000000, 84.28125000, // 0.00000000, -78.10937500, 0.00000000, -37.59375000, // 0.00000000, -52.71875000, 0.00000000, -27.65625000, // 0.00000000, 9.56250000, 0.00000000, -126.56250000 - VCMP_U32(3, v8, 0x0, 0x4251e000, 0x0, 0x42a89000, 0x0, 0xc29c3800, 0x0, + VCMP_U32(3, v16, 0x0, 0x4251e000, 0x0, 0x42a89000, 0x0, 0xc29c3800, 0x0, 0xc2166000, 0x0, 0xc252e000, 0x0, 0xc1dd4000, 0x0, 0x41190000, 0x0, 0xc2fd2000); #if ELEN == 64 - VSET(16, e32, m2); + VSET(16, e32, m4); // 74632.77343750, -65636.60937500, 16165.84765625, // -17815.85937500, -85604.03125000, -76754.03125000, // 21778.01171875, -70512.52343750, 85301.90625000, @@ -116,13 +116,13 @@ void TEST_CASE2(void) { // 57701.78906250, -81581.38281250, 53319.19531250, // -86229.57031250, 44376.69531250, 46809.38671875, // -92887.27343750 - VLOAD_32(v6, 0x45a87dc7, 0xc7b1442f, 0xc6b36264, 0x46469280, 0x47aa21cc, + VLOAD_32(v8, 0x45a87dc7, 0xc7b1442f, 0xc6b36264, 0x46469280, 0x47aa21cc, 0x4756537c, 0x47588065, 0xc78b8601, 0xc7704177, 0x476165ca, 0xc79f56b1, 0x47504732, 0xc7a86ac9, 0x472d58b2, 0x4736d963, 0xc7b56ba3); VLOAD_8(v0, 0xAA, 0xAA); - VCLEAR(v8); - asm volatile("vfwsub.vv v8, v4, v6, v0.t"); + VCLEAR(v16); + asm volatile("vfwsub.vv v16, v4, v8, v0.t"); // 0.0000000000000000, 25123.7578125000000000, // 0.0000000000000000, -30524.4843750000000000, // 0.0000000000000000, -131621.5156250000000000, @@ -131,7 +131,7 @@ void TEST_CASE2(void) { // 0.0000000000000000, -103740.7265625000000000, // 0.0000000000000000, -109596.6562500000000000, // 0.0000000000000000, 2146.6718750000000000 - VCMP_U64(4, v8, 0x0, 0x40d888f080000000, 0x0, 0xc0ddcf1f00000000, 0x0, + VCMP_U64(4, v16, 0x0, 0x40d888f080000000, 0x0, 0xc0ddcf1f00000000, 0x0, 0xc100112c20000000, 0x0, 0x408cdbe000000000, 0x0, 0xc0f0226e64000000, 0x0, 0xc0f953cba0000000, 0x0, 0xc0fac1ca80000000, 0x0, 0x40a0c55800000000); @@ -140,7 +140,7 @@ void TEST_CASE2(void) { // Simple random test with similar values (vector-scalar) void TEST_CASE3(void) { - VSET(16, e16, m2); + VSET(16, e16, m4); float fscalar_16; // 36.4375 BOX_HALF_IN_FLOAT(fscalar_16, 0x508e); @@ -150,19 +150,19 @@ void TEST_CASE3(void) { // -96.8750 VLOAD_16(v4, 0x545d, 0xd0aa, 0xd4d4, 0x5017, 0xd530, 0x54c5, 0x4b78, 0x5488, 0x50f4, 0xd3a7, 0x508b, 0x55d8, 0xd573, 0xc6ed, 0x4e47, 0xd60e); - asm volatile("vfwsub.vf v8, v4, %[A]" ::[A] "f"(fscalar_16)); + asm volatile("vfwsub.vf v16, v4, %[A]" ::[A] "f"(fscalar_16)); // 33.37500000, -73.75000000, -113.68750000, -3.71875000, // -119.43750000, 39.87500000, // -21.50000000, 36.06250000, 3.18750000, -97.65625000, // -0.09375000, 57.06250000, -123.62500000, -43.36328125, // -11.32812500, -133.31250000 - VCMP_U32(5, v8, 0x42058000, 0xc2938000, 0xc2e36000, 0xc06e0000, 0xc2eee000, + VCMP_U32(5, v16, 0x42058000, 0xc2938000, 0xc2e36000, 0xc06e0000, 0xc2eee000, 0x421f8000, 0xc1ac0000, 0x42104000, 0x404c0000, 0xc2c35000, 0xbdc00000, 0x42644000, 0xc2f74000, 0xc22d7400, 0xc1354000, 0xc3055000); #if ELEN == 64 - VSET(16, e32, m2); + VSET(16, e32, m4); float fscalar_32; // -138614.20312500 BOX_FLOAT_IN_FLOAT(fscalar_32, 0xc8075d8d); @@ -176,7 +176,7 @@ void TEST_CASE3(void) { 0xc915db06, 0xc83281a2, 0x47ec8d39, 0xc96a049d, 0x4890262d, 0x488948fb, 0xc9337dbe, 0xc93f66e9, 0x48cd8ef5, 0x496903a2, 0xc8910b91); - asm volatile("vfwsub.vf v8, v4, %[A]" ::[A] "f"(fscalar_32)); + asm volatile("vfwsub.vf v16, v4, %[A]" ::[A] "f"(fscalar_32)); // 611722.1406250000000000, 300589.2812500000000000, // -34430.6875000000000000, -183431.8906250000000000, // -346993.3593750000000000, -475194.1718750000000000, @@ -185,7 +185,7 @@ void TEST_CASE3(void) { // 419774.0468750000000000, -596581.6718750000000000, // -645368.3593750000000000, 559597.8593750000000000, // 1093040.3281250000000000, -158438.3281250000000000 - VCMP_U64(6, v8, 0x4122ab1448000000, 0x411258b520000000, 0xc0e0cfd600000000, + VCMP_U64(6, v16, 0x4122ab1448000000, 0x411258b520000000, 0xc0e0cfd600000000, 0xc106643f20000000, 0xc1152dc570000000, 0xc11d00e8b0000000, 0xc0e5920a80000000, 0x410fb48530000000, 0xc12905a738000000, 0x411a7a9e70000000, 0x41199ef830000000, 0xc12234cb58000000, @@ -196,7 +196,7 @@ void TEST_CASE3(void) { // Simple random test with similar values (vector-scalar) (masked) void TEST_CASE4(void) { - VSET(16, e16, m2); + VSET(16, e16, m4); float fscalar_16; // 36.4375 BOX_HALF_IN_FLOAT(fscalar_16, 0x508e); @@ -207,18 +207,18 @@ void TEST_CASE4(void) { VLOAD_16(v4, 0x545d, 0xd0aa, 0xd4d4, 0x5017, 0xd530, 0x54c5, 0x4b78, 0x5488, 0x50f4, 0xd3a7, 0x508b, 0x55d8, 0xd573, 0xc6ed, 0x4e47, 0xd60e); VLOAD_8(v0, 0xAA, 0xAA); - VCLEAR(v8); - asm volatile("vfwsub.vf v8, v4, %[A], v0.t" ::[A] "f"(fscalar_16)); + VCLEAR(v16); + asm volatile("vfwsub.vf v16, v4, %[A], v0.t" ::[A] "f"(fscalar_16)); // 0.00000000, -73.75000000, 0.00000000, -3.71875000, // 0.00000000, 39.87500000, 0.00000000, 36.06250000, // 0.00000000, -97.65625000, 0.00000000, 57.06250000, // 0.00000000, -43.36328125, 0.00000000, -133.31250000 - VCMP_U32(7, v8, 0x0, 0xc2938000, 0x0, 0xc06e0000, 0x0, 0x421f8000, 0x0, + VCMP_U32(7, v16, 0x0, 0xc2938000, 0x0, 0xc06e0000, 0x0, 0x421f8000, 0x0, 0x42104000, 0x0, 0xc2c35000, 0x0, 0x42644000, 0x0, 0xc22d7400, 0x0, 0xc3055000); #if ELEN == 64 - VSET(16, e32, m2); + VSET(16, e32, m4); float fscalar_32; // -138614.20312500 BOX_FLOAT_IN_FLOAT(fscalar_32, 0xc8075d8d); @@ -233,8 +233,8 @@ void TEST_CASE4(void) { 0x488948fb, 0xc9337dbe, 0xc93f66e9, 0x48cd8ef5, 0x496903a2, 0xc8910b91); VLOAD_8(v0, 0xAA, 0xAA); - VCLEAR(v8); - asm volatile("vfwsub.vf v8, v4, %[A], v0.t" ::[A] "f"(fscalar_32)); + VCLEAR(v16); + asm volatile("vfwsub.vf v16, v4, %[A], v0.t" ::[A] "f"(fscalar_32)); // 0.0000000000000000, 300589.2812500000000000, // 0.0000000000000000, -183431.8906250000000000, // 0.0000000000000000, -475194.1718750000000000, @@ -243,7 +243,7 @@ void TEST_CASE4(void) { // 0.0000000000000000, -596581.6718750000000000, // 0.0000000000000000, 559597.8593750000000000, // 0.0000000000000000, -158438.3281250000000000 - VCMP_U64(8, v8, 0x0, 0x411258b520000000, 0x0, 0xc106643f20000000, 0x0, + VCMP_U64(8, v16, 0x0, 0x411258b520000000, 0x0, 0xc106643f20000000, 0x0, 0xc11d00e8b0000000, 0x0, 0x410fb48530000000, 0x0, 0x411a7a9e70000000, 0x0, 0xc12234cb58000000, 0x0, 0x412113dbb8000000, 0x0, 0xc1035732a0000000); @@ -251,7 +251,7 @@ void TEST_CASE4(void) { }; // Simple random test with similar values void TEST_CASE5(void) { - VSET(16, e16, m2); + VSET(16, e16, m4); // -92.15529633, 27.66998672, // -5.68499708, 78.95133209, 57.52299500, 15.45270920, 50.26883316, // 46.63587189, 71.16806793, -80.68485260, @@ -264,21 +264,21 @@ void TEST_CASE5(void) { // -72.5625, -83.4375, 28.8281, 33.5938, // -85.7500, 67.5000, 91.0625, -91.8750, -9.2578, -64.2500, // -58.6250, 50.3438, -70.5000, 36.6250, 5.7930, 86.6875 - VLOAD_16(v6, 0xd489, 0xd537, 0x4f35, 0x5033, 0xd55c, 0x5438, 0x55b1, 0xd5be, + VLOAD_16(v8, 0xd489, 0xd537, 0x4f35, 0x5033, 0xd55c, 0x5438, 0x55b1, 0xd5be, 0xc8a1, 0xd404, 0xd354, 0x524b, 0xd468, 0x5094, 0x45cb, 0x556b); - asm volatile("vfwsub.wv v8, v4, v6"); + asm volatile("vfwsub.wv v16, v4, v8"); // -19.59279633, 111.10748291, -34.51312256, 45.35758209, // 143.27299500, -52.04729080, -40.79366684, // 138.51086426, 80.42588043, -16.43485260, 36.28306580, // -10.17347717, 164.04611206, -10.76483917, 36.03541946, // -4.18495178 - VCMP_U32(9, v8, 0xc19cbe0c, 0x42de3708, 0xc20a0d70, 0x42356e2a, 0x430f45e3, + VCMP_U32(9, v16, 0xc19cbe0c, 0x42de3708, 0xc20a0d70, 0x42356e2a, 0x430f45e3, 0xc250306d, 0xc2232cb7, 0x430a82c8, 0x42a0da0d, 0xc1837a94, 0x421121dc, 0xc122c690, 0x43240bce, 0xc12c3cc8, 0x42102445, 0xc085eb20); #if ELEN == 64 - VSET(16, e32, m2); + VSET(16, e32, m4); // -79494.9435096215456724, 81629.4152202270051930, // 60506.1876363231276628, -81020.4028176319407066, // -6814.2587861350475578, 11974.4045779409498209, @@ -299,11 +299,11 @@ void TEST_CASE5(void) { // 39975.67578125, -71197.53125000, -66640.12500000, // 47459.75390625, -34899.84375000, -21371.85937500, // 17582.65820312 - VLOAD_32(v6, 0x47bb2751, 0x46aa3aff, 0xc725a96e, 0x476b0ce4, 0xc7351794, + VLOAD_32(v8, 0x47bb2751, 0x46aa3aff, 0xc725a96e, 0x476b0ce4, 0xc7351794, 0xc78b222b, 0x4580e2df, 0xc79d4780, 0x46d64d69, 0x471c27ad, 0xc78b0ec4, 0xc7822810, 0x473963c1, 0xc70853d8, 0xc6a6f7b8, 0x46895d51); - asm volatile("vfwsub.wv v8, v4, v6"); + asm volatile("vfwsub.wv v16, v4, v8"); // -175317.5763221215456724, 59839.9171733520051930, // 102915.6173238231276628, -141193.2934426319552585, // 39545.3193388649524422, 83210.7405154409498209, @@ -312,7 +312,7 @@ void TEST_CASE5(void) { // 100155.9076395476586185, 30252.7584680835425388, // -137859.5532296942838002, -43872.2568954367889091, // -41482.7561000282003079, 20275.9804473698022775 - VCMP_U64(10, v8, 0xc10566ac9c4ec5c0, 0x40ed37fd597bedf4, 0x40f92039e08ef1f6, + VCMP_U64(10, v16, 0xc10566ac9c4ec5c0, 0x40ed37fd597bedf4, 0x40f92039e08ef1f6, 0xc1013c4a58f8735c, 0x40e34f2a380623aa, 0x40f450abd926b811, 0x40f6e9b5904af21e, 0xc0c90f706042ba00, 0x40f0bb0bc1192628, 0xc0f80c81aba217f7, 0x40f873be85b10bdc, 0x40dd8b308abdb778, @@ -324,7 +324,7 @@ void TEST_CASE5(void) { // Simple random test with similar values (masked) // The numbers are the same of TEST_CASE1 void TEST_CASE6(void) { - VSET(16, e16, m2); + VSET(16, e16, m4); // -92.15529633, 27.66998672, // -5.68499708, 78.95133209, 57.52299500, 15.45270920, 50.26883316, // 46.63587189, 71.16806793, -80.68485260, @@ -337,21 +337,21 @@ void TEST_CASE6(void) { // -72.5625, -83.4375, 28.8281, 33.5938, // -85.7500, 67.5000, 91.0625, -91.8750, -9.2578, -64.2500, // -58.6250, 50.3438, -70.5000, 36.6250, 5.7930, 86.6875 - VLOAD_16(v6, 0xd489, 0xd537, 0x4f35, 0x5033, 0xd55c, 0x5438, 0x55b1, 0xd5be, + VLOAD_16(v8, 0xd489, 0xd537, 0x4f35, 0x5033, 0xd55c, 0x5438, 0x55b1, 0xd5be, 0xc8a1, 0xd404, 0xd354, 0x524b, 0xd468, 0x5094, 0x45cb, 0x556b); VLOAD_8(v0, 0xAA, 0xAA); - VCLEAR(v8); - asm volatile("vfwsub.wv v8, v4, v6, v0.t"); + VCLEAR(v16); + asm volatile("vfwsub.wv v16, v4, v8, v0.t"); // 0.00000000, 111.10748291, 0.00000000, 45.35758209, // 0.00000000, -52.04729080, 0.00000000, 138.51086426, // 0.00000000, -16.43485260, 0.00000000, -10.17347717, // 0.00000000, -10.76483917, 0.00000000, -4.18495178 - VCMP_U32(11, v8, 0x0, 0x42de3708, 0x0, 0x42356e2a, 0x0, 0xc250306d, 0x0, + VCMP_U32(11, v16, 0x0, 0x42de3708, 0x0, 0x42356e2a, 0x0, 0xc250306d, 0x0, 0x430a82c8, 0x0, 0xc1837a94, 0x0, 0xc122c690, 0x0, 0xc12c3cc8, 0x0, 0xc085eb20); #if ELEN == 64 - VSET(16, e32, m2); + VSET(16, e32, m4); // -79494.9435096215456724, 81629.4152202270051930, // 60506.1876363231276628, -81020.4028176319407066, // -6814.2587861350475578, 11974.4045779409498209, @@ -372,13 +372,13 @@ void TEST_CASE6(void) { // 39975.67578125, -71197.53125000, -66640.12500000, // 47459.75390625, -34899.84375000, -21371.85937500, // 17582.65820312 - VLOAD_32(v6, 0x47bb2751, 0x46aa3aff, 0xc725a96e, 0x476b0ce4, 0xc7351794, + VLOAD_32(v8, 0x47bb2751, 0x46aa3aff, 0xc725a96e, 0x476b0ce4, 0xc7351794, 0xc78b222b, 0x4580e2df, 0xc79d4780, 0x46d64d69, 0x471c27ad, 0xc78b0ec4, 0xc7822810, 0x473963c1, 0xc70853d8, 0xc6a6f7b8, 0x46895d51); VLOAD_8(v0, 0xAA, 0xAA); - VCLEAR(v8); - asm volatile("vfwsub.wv v8, v4, v6, v0.t"); + VCLEAR(v16); + asm volatile("vfwsub.wv v16, v4, v8, v0.t"); // 0.0000000000000000, 59839.9171733520051930, // 0.0000000000000000, -141193.2934426319552585, // 0.0000000000000000, 83210.7405154409498209, @@ -387,7 +387,7 @@ void TEST_CASE6(void) { // 0.0000000000000000, 30252.7584680835425388, // 0.0000000000000000, -43872.2568954367889091, // 0.0000000000000000, 20275.9804473698022775 - VCMP_U64(12, v8, 0x0, 0x40ed37fd597bedf4, 0x0, 0xc1013c4a58f8735c, 0x0, + VCMP_U64(12, v16, 0x0, 0x40ed37fd597bedf4, 0x0, 0xc1013c4a58f8735c, 0x0, 0x40f450abd926b811, 0x0, 0xc0c90f706042ba00, 0x0, 0xc0f80c81aba217f7, 0x0, 0x40dd8b308abdb778, 0x0, 0xc0e56c08387cc770, 0x0, 0x40d3ccfebfa65330); @@ -396,7 +396,7 @@ void TEST_CASE6(void) { // Simple random test with similar values (vector-scalar) void TEST_CASE7(void) { - VSET(16, e16, m2); + VSET(16, e16, m4); float fscalar_16; // -8.76965809, 55.45920181, 71.29286957, -84.65414429, // -81.93881226, 75.13192749, -75.44019318, -48.81898499, @@ -408,18 +408,18 @@ void TEST_CASE7(void) { 0xc25c49f3); // 34.7812 BOX_HALF_IN_FLOAT(fscalar_16, 0x5059); - asm volatile("vfwsub.wf v8, v4, %[A]" ::[A] "f"(fscalar_16)); + asm volatile("vfwsub.wf v16, v4, %[A]" ::[A] "f"(fscalar_16)); // -43.55090714, 20.67795181, 36.51161957, -119.43539429, // -116.72006226, 40.35067749, -110.22144318, -83.60023499, // -34.67818832, -59.97023392, 14.89881516, 37.88153076, // -59.69005966, -67.37556458, -20.19248962, -89.85346985 - VCMP_U32(13, v8, 0xc22e3421, 0x41a56c72, 0x42120be6, 0xc2eedeec, 0xc2e970ac, + VCMP_U32(13, v16, 0xc22e3421, 0x41a56c72, 0x42120be6, 0xc2eedeec, 0xc2e970ac, 0x42216718, 0xc2dc7161, 0xc2a73352, 0xc20ab677, 0xc26fe185, 0x416e618c, 0x421786b0, 0xc26ec29f, 0xc286c04a, 0xc1a18a38, 0xc2b3b4fa); #if ELEN == 64 - VSET(16, e32, m2); + VSET(16, e32, m4); float fscalar_32; // 322189.5706008458510041, 914899.9451866354793310, // -620811.0881863175891340, -456926.2657179111847654, @@ -437,7 +437,7 @@ void TEST_CASE7(void) { 0x4128f812f63066de); // -83388.08593750 BOX_FLOAT_IN_FLOAT(fscalar_32, 0xc7a2de0b); - asm volatile("vfwsub.wf v8, v4, %[A]" ::[A] "f"(fscalar_32)); + asm volatile("vfwsub.wf v16, v4, %[A]" ::[A] "f"(fscalar_32)); // 405577.6565383458510041, 998288.0311241354793310, // -537423.0022488175891340, -373538.1797804111847654, // -466557.7857936944346875, -303426.8900513321859762, @@ -446,7 +446,7 @@ void TEST_CASE7(void) { // -699427.3162755169905722, -478041.7009790195152164, // 838760.0551206718664616, -871480.0901815977413207, // -522879.0126630428712815, 901573.5667755266185850 - VCMP_U64(14, v8, 0x4118c126a04b9690, 0x412e77200fef80b0, 0xc120669e0126c1cc, + VCMP_U64(14, v16, 0x4118c126a04b9690, 0x412e77200fef80b0, 0xc120669e0126c1cc, 0xc116cc88b8185b2a, 0xc11c79f724a71a2c, 0xc112850b8f699dce, 0x412964833c5f5108, 0x412b9bddba2eeaae, 0x41300e592553b4fb, 0xc10fc693bfd2c18c, 0xc1255846a1eedd54, 0xc11d2d66cdcd71b0, @@ -457,7 +457,7 @@ void TEST_CASE7(void) { // Simple random test with similar values (vector-scalar) (masked) void TEST_CASE8(void) { - VSET(16, e16, m2); + VSET(16, e16, m4); float fscalar_16; // -8.76965809, 55.45920181, 71.29286957, -84.65414429, // -81.93881226, 75.13192749, -75.44019318, -48.81898499, @@ -470,18 +470,18 @@ void TEST_CASE8(void) { // 34.7812 BOX_HALF_IN_FLOAT(fscalar_16, 0x5059); VLOAD_8(v0, 0xAA, 0xAA); - VCLEAR(v8); - asm volatile("vfwsub.wf v8, v4, %[A], v0.t" ::[A] "f"(fscalar_16)); + VCLEAR(v16); + asm volatile("vfwsub.wf v16, v4, %[A], v0.t" ::[A] "f"(fscalar_16)); // 0.00000000, 20.67795181, 0.00000000, -119.43539429, // 0.00000000, 40.35067749, 0.00000000, -83.60023499, // 0.00000000, -59.97023392, 0.00000000, 37.88153076, // 0.00000000, -67.37556458, 0.00000000, -89.85346985 - VCMP_U32(15, v8, 0x0, 0x41a56c72, 0x0, 0xc2eedeec, 0x0, 0x42216718, 0x0, + VCMP_U32(15, v16, 0x0, 0x41a56c72, 0x0, 0xc2eedeec, 0x0, 0x42216718, 0x0, 0xc2a73352, 0x0, 0xc26fe185, 0x0, 0x421786b0, 0x0, 0xc286c04a, 0x0, 0xc2b3b4fa); #if ELEN == 64 - VSET(16, e32, m2); + VSET(16, e32, m4); float fscalar_32; // 322189.5706008458510041, 914899.9451866354793310, // -620811.0881863175891340, -456926.2657179111847654, @@ -500,8 +500,8 @@ void TEST_CASE8(void) { // -83388.08593750 BOX_FLOAT_IN_FLOAT(fscalar_32, 0xc7a2de0b); VLOAD_8(v0, 0xAA, 0xAA); - VCLEAR(v8); - asm volatile("vfwsub.wf v8, v4, %[A], v0.t" ::[A] "f"(fscalar_32)); + VCLEAR(v16); + asm volatile("vfwsub.wf v16, v4, %[A], v0.t" ::[A] "f"(fscalar_32)); // 0.0000000000000000, 998288.0311241354793310, // 0.0000000000000000, -373538.1797804111847654, // 0.0000000000000000, -303426.8900513321859762, @@ -510,7 +510,7 @@ void TEST_CASE8(void) { // 0.0000000000000000, -478041.7009790195152164, // 0.0000000000000000, -871480.0901815977413207, // 0.0000000000000000, 901573.5667755266185850 - VCMP_U64(16, v8, 0x0, 0x412e77200fef80b0, 0x0, 0xc116cc88b8185b2a, 0x0, + VCMP_U64(16, v16, 0x0, 0x412e77200fef80b0, 0x0, 0xc116cc88b8185b2a, 0x0, 0xc112850b8f699dce, 0x0, 0x412b9bddba2eeaae, 0x0, 0xc10fc693bfd2c18c, 0x0, 0xc11d2d66cdcd71b0, 0x0, 0xc12a98702e2c484a, 0x0, 0x412b838b223066de); diff --git a/sw/riscvTests/isa/rv64uv/vmacc.c b/sw/riscvTests/isa/rv64uv/vmacc.c index 4758f198..e24d7323 100644 --- a/sw/riscvTests/isa/rv64uv/vmacc.c +++ b/sw/riscvTests/isa/rv64uv/vmacc.c @@ -8,70 +8,70 @@ #include "vector_macros.h" void TEST_CASE1() { - VSET(16, e8, m2); - VLOAD_8(v6, 0x21, 0x75, 0x7f, 0x3a, 0x50, 0x6d, 0x3f, 0x3e, 0x74, 0x11, 0x29, + VSET(16, e8, m8); + VLOAD_8(v24, 0x21, 0x75, 0x7f, 0x3a, 0x50, 0x6d, 0x3f, 0x3e, 0x74, 0x11, 0x29, 0xea, 0x14, 0xce, 0xb0, 0x37); - VLOAD_8(v4, 0xfe, 0xa7, 0x06, 0xaa, 0x35, 0x3c, 0x2c, 0x58, 0xa1, 0xc4, 0x40, + VLOAD_8(v16, 0xfe, 0xa7, 0x06, 0xaa, 0x35, 0x3c, 0x2c, 0x58, 0xa1, 0xc4, 0x40, 0x42, 0x52, 0x40, 0xa8, 0x53); - VLOAD_8(v2, 0x30, 0xef, 0xb4, 0x12, 0x6d, 0x3b, 0x2c, 0x5e, 0xf0, 0x25, 0xd7, + VLOAD_8(v8, 0x30, 0xef, 0xb4, 0x12, 0x6d, 0x3b, 0x2c, 0x5e, 0xf0, 0x25, 0xd7, 0x70, 0xc2, 0x62, 0xe0, 0x99); - asm volatile("vmacc.vv v2, v4, v6"); - VCMP_U8(1, v2, 0xee, 0x42, 0xae, 0x96, 0xfd, 0xc7, 0x00, 0xae, 0xe4, 0x29, + asm volatile("vmacc.vv v8, v16, v24"); + VCMP_U8(1, v8, 0xee, 0x42, 0xae, 0x96, 0xfd, 0xc7, 0x00, 0xae, 0xe4, 0x29, 0x17, 0xc4, 0x2a, 0xe2, 0x60, 0x6e); - VSET(16, e16, m2); - VLOAD_16(v6, 0x1c20, 0x11e4, 0xde38, 0x642f, 0x3eb5, 0xa0af, 0x48e1, 0x5fc4, + VSET(16, e16, m8); + VLOAD_16(v24, 0x1c20, 0x11e4, 0xde38, 0x642f, 0x3eb5, 0xa0af, 0x48e1, 0x5fc4, 0x3d2a, 0x67d5, 0x3f07, 0x2889, 0x8812, 0x0bd9, 0x56f4, 0xe068); - VLOAD_16(v4, 0x02cc, 0xd99c, 0xdba2, 0xf282, 0x0f99, 0xa219, 0x2dcc, 0x17cc, + VLOAD_16(v16, 0x02cc, 0xd99c, 0xdba2, 0xf282, 0x0f99, 0xa219, 0x2dcc, 0x17cc, 0xe8fb, 0x1e83, 0xed20, 0xbfee, 0xee87, 0x6b0f, 0xf6cf, 0x4cd1); - VLOAD_16(v2, 0xe3f0, 0x42db, 0x2fde, 0x1983, 0x910c, 0x853b, 0x82aa, 0x9ac2, + VLOAD_16(v8, 0xe3f0, 0x42db, 0x2fde, 0x1983, 0x910c, 0x853b, 0x82aa, 0x9ac2, 0x4631, 0x1f8b, 0x68c3, 0x6fbc, 0x3b5c, 0xf98b, 0x2db1, 0x8e75); - asm volatile("vmacc.vv v2, v4, v6"); - VCMP_U16(2, v2, 0x8d70, 0x6dcb, 0xb74e, 0x6761, 0xa639, 0xf452, 0x22f6, + asm volatile("vmacc.vv v8, v16, v24"); + VCMP_U16(2, v8, 0x8d70, 0x6dcb, 0xb74e, 0x6761, 0xa639, 0xf452, 0x22f6, 0x86f2, 0x4e5f, 0x378a, 0xc4a3, 0x561a, 0xb8da, 0x5e42, 0xf4fd, 0xa35d); - VSET(16, e32, m2); - VLOAD_32(v6, 0x0401c584, 0x69049955, 0x4a71aa0c, 0xc651666f, 0x273fcd5d, + VSET(16, e32, m8); + VLOAD_32(v24, 0x0401c584, 0x69049955, 0x4a71aa0c, 0xc651666f, 0x273fcd5d, 0x23ca1d7d, 0x599c994e, 0xb2d8adc5, 0x4710afae, 0x69c61cad, 0x96ee5026, 0x2c197996, 0xd95da451, 0x3a654fb9, 0xbe990e4b, 0xc41fd55a); - VLOAD_32(v4, 0x39d5b56a, 0xc578a540, 0x51283b5c, 0x07b4ba9d, 0xe5aba5e4, + VLOAD_32(v16, 0x39d5b56a, 0xc578a540, 0x51283b5c, 0x07b4ba9d, 0xe5aba5e4, 0x28720dc8, 0x600fb42b, 0xf2937fa7, 0x4032d36f, 0xc676e3b3, 0xf1cd5f96, 0x1c14bcbf, 0x7dea81ed, 0x40270562, 0x9577b3be, 0xea615f0a); - VLOAD_32(v2, 0xa055bbb6, 0x71f9a668, 0x0be640c9, 0x2336ca55, 0xca121638, + VLOAD_32(v8, 0xa055bbb6, 0x71f9a668, 0x0be640c9, 0x2336ca55, 0xca121638, 0xbf234fb5, 0xe7c83142, 0xb7048f12, 0x8eb340e3, 0xef253e93, 0xffef4a03, 0xdf346833, 0xd0922181, 0xf159ee1d, 0xf86a7c06, 0xfcb24a2d); - asm volatile("vmacc.vv v2, v4, v6"); - VCMP_U32(3, v2, 0x448bd85e, 0xf2cbc4a8, 0x5cd02119, 0xf69b4268, 0x3c60ee0c, + asm volatile("vmacc.vv v8, v16, v24"); + VCMP_U32(3, v8, 0x448bd85e, 0xf2cbc4a8, 0x5cd02119, 0xf69b4268, 0x3c60ee0c, 0xa233b25d, 0x4c72c95c, 0xe2b1a595, 0xefb7d755, 0x95d6b28a, 0xd3be5a47, 0x6338471d, 0xfb1a117e, 0xabe00fef, 0xbede88b0, 0x913705b1); #if ELEN == 64 - VSET(16, e64, m2); - VLOAD_64(v6, 0x9cffef345b95f00b, 0x85d366e07e4bbc6b, 0xadfda1d2464c6433, + VSET(16, e64, m8); + VLOAD_64(v24, 0x9cffef345b95f00b, 0x85d366e07e4bbc6b, 0xadfda1d2464c6433, 0x610bf2c1435b3cf6, 0x8a0c6e4bc950e81f, 0x4296e7147ef94d7a, 0x27d7ec90ba159756, 0x2a6c87932c3aef86, 0xbfd90c33e58a8fe3, 0x1114f7672cf625c1, 0x1a7b72dd8ac39fab, 0xdb80f952e5fd2e5b, 0x6b01c18a3daf288b, 0x69b4b0e4335f26d5, 0x0c059f365ec6d3d5, 0xc22568276f1dcdd0); - VLOAD_64(v4, 0x6dc8e88769e54465, 0xce8cda83d16c3859, 0x1465ee5b6eb0d2b8, + VLOAD_64(v16, 0x6dc8e88769e54465, 0xce8cda83d16c3859, 0x1465ee5b6eb0d2b8, 0x4827a9b40add2507, 0xd24c4005695a64d6, 0xb97c8e41e912f84a, 0xc8c22e3b3b2e2fa1, 0x26712aa325bd00b6, 0xdf7ad19151df27b5, 0x68ba6d050ffcba1e, 0x94448979a2b854e6, 0x84bf5d544f97f739, 0x6d4bfa429e9d6ef0, 0xdb6c54b9a91ab935, 0x1a0051ca72162c5e, 0xe04b73fdf1b61f9c); - VLOAD_64(v2, 0x32a4c1edbbfe5591, 0xf6baf4e747f4a120, 0x3a29727ae38b9b92, + VLOAD_64(v8, 0x32a4c1edbbfe5591, 0xf6baf4e747f4a120, 0x3a29727ae38b9b92, 0xf173f78d09c997e4, 0xaab9d34e4aeaa57a, 0xa8fe3bf12b7c95e8, 0xc4bd99b066821092, 0x9c2f1daf5fe2db9d, 0xa8b041a876aabcae, 0xb9a2e6f9ded9a60a, 0x8bdf55954f50101d, 0x704f0e648c11d63f, 0x0c8ca4d0a6d1a982, 0xa74d01c12ae6aea5, 0x3f2cd5d2e2f5b538, 0x79803b24efa2caa3); - asm volatile("vmacc.vv v2, v4, v6"); - VCMP_U64(4, v2, 0xf7c2044aeebff5e8, 0xad447a1b99a48a53, 0x78676efbe1b5763a, + asm volatile("vmacc.vv v8, v16, v24"); + VCMP_U64(4, v8, 0xf7c2044aeebff5e8, 0xad447a1b99a48a53, 0x78676efbe1b5763a, 0x813582af4d75d09e, 0x483adf8d811ecb64, 0x36d90fe4df2f2b2c, 0xf833b173685307a8, 0x955c2ac405b724e1, 0xdcf9681f074b0d2d, 0x10277404741c4ca8, 0x25d9bca0245d9fbf, 0x58439c4175d7f582, @@ -81,74 +81,74 @@ void TEST_CASE1() { } void TEST_CASE2() { - VSET(16, e8, m2); - VLOAD_8(v6, 0x21, 0x75, 0x7f, 0x3a, 0x50, 0x6d, 0x3f, 0x3e, 0x74, 0x11, 0x29, + VSET(16, e8, m8); + VLOAD_8(v24, 0x21, 0x75, 0x7f, 0x3a, 0x50, 0x6d, 0x3f, 0x3e, 0x74, 0x11, 0x29, 0xea, 0x14, 0xce, 0xb0, 0x37); - VLOAD_8(v4, 0xfe, 0xa7, 0x06, 0xaa, 0x35, 0x3c, 0x2c, 0x58, 0xa1, 0xc4, 0x40, + VLOAD_8(v16, 0xfe, 0xa7, 0x06, 0xaa, 0x35, 0x3c, 0x2c, 0x58, 0xa1, 0xc4, 0x40, 0x42, 0x52, 0x40, 0xa8, 0x53); - VLOAD_8(v2, 0x30, 0xef, 0xb4, 0x12, 0x6d, 0x3b, 0x2c, 0x5e, 0xf0, 0x25, 0xd7, + VLOAD_8(v8, 0x30, 0xef, 0xb4, 0x12, 0x6d, 0x3b, 0x2c, 0x5e, 0xf0, 0x25, 0xd7, 0x70, 0xc2, 0x62, 0xe0, 0x99); VLOAD_8(v0, 0xAA, 0xAA); - asm volatile("vmacc.vv v2, v4, v6, v0.t"); - VCMP_U8(5, v2, 0x30, 0x42, 0xb4, 0x96, 0x6d, 0xc7, 0x2c, 0xae, 0xf0, 0x29, + asm volatile("vmacc.vv v8, v16, v24, v0.t"); + VCMP_U8(5, v8, 0x30, 0x42, 0xb4, 0x96, 0x6d, 0xc7, 0x2c, 0xae, 0xf0, 0x29, 0xd7, 0xc4, 0xc2, 0xe2, 0xe0, 0x6e); - VSET(16, e16, m2); - VLOAD_16(v6, 0x1c20, 0x11e4, 0xde38, 0x642f, 0x3eb5, 0xa0af, 0x48e1, 0x5fc4, + VSET(16, e16, m8); + VLOAD_16(v24, 0x1c20, 0x11e4, 0xde38, 0x642f, 0x3eb5, 0xa0af, 0x48e1, 0x5fc4, 0x3d2a, 0x67d5, 0x3f07, 0x2889, 0x8812, 0x0bd9, 0x56f4, 0xe068); - VLOAD_16(v4, 0x02cc, 0xd99c, 0xdba2, 0xf282, 0x0f99, 0xa219, 0x2dcc, 0x17cc, + VLOAD_16(v16, 0x02cc, 0xd99c, 0xdba2, 0xf282, 0x0f99, 0xa219, 0x2dcc, 0x17cc, 0xe8fb, 0x1e83, 0xed20, 0xbfee, 0xee87, 0x6b0f, 0xf6cf, 0x4cd1); - VLOAD_16(v2, 0xe3f0, 0x42db, 0x2fde, 0x1983, 0x910c, 0x853b, 0x82aa, 0x9ac2, + VLOAD_16(v8, 0xe3f0, 0x42db, 0x2fde, 0x1983, 0x910c, 0x853b, 0x82aa, 0x9ac2, 0x4631, 0x1f8b, 0x68c3, 0x6fbc, 0x3b5c, 0xf98b, 0x2db1, 0x8e75); VLOAD_8(v0, 0xAA, 0xAA); - asm volatile("vmacc.vv v2, v4, v6, v0.t"); - VCMP_U16(6, v2, 0xe3f0, 0x6dcb, 0x2fde, 0x6761, 0x910c, 0xf452, 0x82aa, + asm volatile("vmacc.vv v8, v16, v24, v0.t"); + VCMP_U16(6, v8, 0xe3f0, 0x6dcb, 0x2fde, 0x6761, 0x910c, 0xf452, 0x82aa, 0x86f2, 0x4631, 0x378a, 0x68c3, 0x561a, 0x3b5c, 0x5e42, 0x2db1, 0xa35d); - VSET(16, e32, m2); - VLOAD_32(v6, 0x0401c584, 0x69049955, 0x4a71aa0c, 0xc651666f, 0x273fcd5d, + VSET(16, e32, m8); + VLOAD_32(v24, 0x0401c584, 0x69049955, 0x4a71aa0c, 0xc651666f, 0x273fcd5d, 0x23ca1d7d, 0x599c994e, 0xb2d8adc5, 0x4710afae, 0x69c61cad, 0x96ee5026, 0x2c197996, 0xd95da451, 0x3a654fb9, 0xbe990e4b, 0xc41fd55a); - VLOAD_32(v4, 0x39d5b56a, 0xc578a540, 0x51283b5c, 0x07b4ba9d, 0xe5aba5e4, + VLOAD_32(v16, 0x39d5b56a, 0xc578a540, 0x51283b5c, 0x07b4ba9d, 0xe5aba5e4, 0x28720dc8, 0x600fb42b, 0xf2937fa7, 0x4032d36f, 0xc676e3b3, 0xf1cd5f96, 0x1c14bcbf, 0x7dea81ed, 0x40270562, 0x9577b3be, 0xea615f0a); - VLOAD_32(v2, 0xa055bbb6, 0x71f9a668, 0x0be640c9, 0x2336ca55, 0xca121638, + VLOAD_32(v8, 0xa055bbb6, 0x71f9a668, 0x0be640c9, 0x2336ca55, 0xca121638, 0xbf234fb5, 0xe7c83142, 0xb7048f12, 0x8eb340e3, 0xef253e93, 0xffef4a03, 0xdf346833, 0xd0922181, 0xf159ee1d, 0xf86a7c06, 0xfcb24a2d); VLOAD_8(v0, 0xAA, 0xAA); - asm volatile("vmacc.vv v2, v4, v6, v0.t"); - VCMP_U32(7, v2, 0xa055bbb6, 0xf2cbc4a8, 0x0be640c9, 0xf69b4268, 0xca121638, + asm volatile("vmacc.vv v8, v16, v24, v0.t"); + VCMP_U32(7, v8, 0xa055bbb6, 0xf2cbc4a8, 0x0be640c9, 0xf69b4268, 0xca121638, 0xa233b25d, 0xe7c83142, 0xe2b1a595, 0x8eb340e3, 0x95d6b28a, 0xffef4a03, 0x6338471d, 0xd0922181, 0xabe00fef, 0xf86a7c06, 0x913705b1); #if ELEN == 64 - VSET(16, e64, m2); - VLOAD_64(v6, 0x9cffef345b95f00b, 0x85d366e07e4bbc6b, 0xadfda1d2464c6433, + VSET(16, e64, m8); + VLOAD_64(v24, 0x9cffef345b95f00b, 0x85d366e07e4bbc6b, 0xadfda1d2464c6433, 0x610bf2c1435b3cf6, 0x8a0c6e4bc950e81f, 0x4296e7147ef94d7a, 0x27d7ec90ba159756, 0x2a6c87932c3aef86, 0xbfd90c33e58a8fe3, 0x1114f7672cf625c1, 0x1a7b72dd8ac39fab, 0xdb80f952e5fd2e5b, 0x6b01c18a3daf288b, 0x69b4b0e4335f26d5, 0x0c059f365ec6d3d5, 0xc22568276f1dcdd0); - VLOAD_64(v4, 0x6dc8e88769e54465, 0xce8cda83d16c3859, 0x1465ee5b6eb0d2b8, + VLOAD_64(v16, 0x6dc8e88769e54465, 0xce8cda83d16c3859, 0x1465ee5b6eb0d2b8, 0x4827a9b40add2507, 0xd24c4005695a64d6, 0xb97c8e41e912f84a, 0xc8c22e3b3b2e2fa1, 0x26712aa325bd00b6, 0xdf7ad19151df27b5, 0x68ba6d050ffcba1e, 0x94448979a2b854e6, 0x84bf5d544f97f739, 0x6d4bfa429e9d6ef0, 0xdb6c54b9a91ab935, 0x1a0051ca72162c5e, 0xe04b73fdf1b61f9c); - VLOAD_64(v2, 0x32a4c1edbbfe5591, 0xf6baf4e747f4a120, 0x3a29727ae38b9b92, + VLOAD_64(v8, 0x32a4c1edbbfe5591, 0xf6baf4e747f4a120, 0x3a29727ae38b9b92, 0xf173f78d09c997e4, 0xaab9d34e4aeaa57a, 0xa8fe3bf12b7c95e8, 0xc4bd99b066821092, 0x9c2f1daf5fe2db9d, 0xa8b041a876aabcae, 0xb9a2e6f9ded9a60a, 0x8bdf55954f50101d, 0x704f0e648c11d63f, 0x0c8ca4d0a6d1a982, 0xa74d01c12ae6aea5, 0x3f2cd5d2e2f5b538, 0x79803b24efa2caa3); VLOAD_8(v0, 0xAA, 0xAA); - asm volatile("vmacc.vv v2, v4, v6, v0.t"); - VCMP_U64(8, v2, 0x32a4c1edbbfe5591, 0xad447a1b99a48a53, 0x3a29727ae38b9b92, + asm volatile("vmacc.vv v8, v16, v24, v0.t"); + VCMP_U64(8, v8, 0x32a4c1edbbfe5591, 0xad447a1b99a48a53, 0x3a29727ae38b9b92, 0x813582af4d75d09e, 0xaab9d34e4aeaa57a, 0x36d90fe4df2f2b2c, 0xc4bd99b066821092, 0x955c2ac405b724e1, 0xa8b041a876aabcae, 0x10277404741c4ca8, 0x8bdf55954f50101d, 0x58439c4175d7f582, @@ -158,62 +158,62 @@ void TEST_CASE2() { } void TEST_CASE3() { - VSET(16, e8, m2); + VSET(16, e8, m8); int64_t scalar = 5; - VLOAD_8(v4, 0x60, 0xe3, 0xa0, 0xb7, 0x35, 0x23, 0xa3, 0xf4, 0x5f, 0x6e, 0x07, + VLOAD_8(v16, 0x60, 0xe3, 0xa0, 0xb7, 0x35, 0x23, 0xa3, 0xf4, 0x5f, 0x6e, 0x07, 0x01, 0xe7, 0x51, 0x53, 0x29); - VLOAD_8(v2, 0xfb, 0x1b, 0xc0, 0x36, 0xa7, 0xe0, 0xc8, 0x47, 0x57, 0xe0, 0x51, + VLOAD_8(v8, 0xfb, 0x1b, 0xc0, 0x36, 0xa7, 0xe0, 0xc8, 0x47, 0x57, 0xe0, 0x51, 0xaa, 0xd2, 0x93, 0x83, 0xa8); - asm volatile("vmacc.vx v2, %[A], v4" ::[A] "r"(scalar)); - VCMP_U8(9, v2, 0xdb, 0x8a, 0xe0, 0xc9, 0xb0, 0x8f, 0xf7, 0x0b, 0x32, 0x06, + asm volatile("vmacc.vx v8, %[A], v16" ::[A] "r"(scalar)); + VCMP_U8(9, v8, 0xdb, 0x8a, 0xe0, 0xc9, 0xb0, 0x8f, 0xf7, 0x0b, 0x32, 0x06, 0x74, 0xaf, 0x55, 0x28, 0x22, 0x75); - VSET(16, e16, m2); + VSET(16, e16, m8); scalar = -5383; - VLOAD_16(v4, 0x992e, 0x9a07, 0x90c3, 0xf1ce, 0xd53c, 0x8f07, 0x2d2f, 0x5ab1, + VLOAD_16(v16, 0x992e, 0x9a07, 0x90c3, 0xf1ce, 0xd53c, 0x8f07, 0x2d2f, 0x5ab1, 0x0a79, 0x0523, 0x6f34, 0xe5fd, 0xc95a, 0xca1c, 0x36bf, 0x16a1); - VLOAD_16(v2, 0x0a9f, 0x7ee0, 0x494e, 0xb6d0, 0x394c, 0xc8e7, 0xc117, 0x8108, + VLOAD_16(v8, 0x0a9f, 0x7ee0, 0x494e, 0xb6d0, 0x394c, 0xc8e7, 0xc117, 0x8108, 0xb1af, 0x9f16, 0x22ab, 0xa244, 0xf1c9, 0xe363, 0x9bed, 0xa06f); - asm volatile("vmacc.vx v2, %[A], v4" ::[A] "r"(scalar)); - VCMP_U16(10, v2, 0x145d, 0xb5af, 0x54f9, 0x342e, 0x78a8, 0x4cb6, 0xa9ce, + asm volatile("vmacc.vx v8, %[A], v16" ::[A] "r"(scalar)); + VCMP_U16(10, v8, 0x145d, 0xb5af, 0x54f9, 0x342e, 0x78a8, 0x4cb6, 0xa9ce, 0x8131, 0x7b60, 0x9c21, 0xd43f, 0x9759, 0x0e53, 0x109f, 0x71b4, 0xcd08); - VSET(16, e32, m2); + VSET(16, e32, m8); scalar = 6474219; - VLOAD_32(v4, 0x709e784e, 0x8e13e48a, 0xad5df7fd, 0x738c8997, 0x0a0030d0, + VLOAD_32(v16, 0x709e784e, 0x8e13e48a, 0xad5df7fd, 0x738c8997, 0x0a0030d0, 0x7569b952, 0x507fd5c7, 0x5d09af12, 0x0bf1c209, 0x7be6ed49, 0x842ba667, 0x53360ec0, 0xd85d7415, 0xf20de61f, 0x153e7e16, 0xec5512e4); - VLOAD_32(v2, 0xb2436fad, 0x6b162382, 0xd94eebe7, 0x9c43d906, 0xb80f178d, + VLOAD_32(v8, 0xb2436fad, 0x6b162382, 0xd94eebe7, 0x9c43d906, 0xb80f178d, 0x5cf91d42, 0x7764b8a3, 0x6269f72c, 0xb0dff3a6, 0x838d6893, 0xa98a861e, 0x758b63de, 0xde488617, 0x371696ab, 0xc3ba8192, 0x7ca33236); - asm volatile("vmacc.vx v2, %[A], v4" ::[A] "r"(scalar)); - VCMP_U32(11, v2, 0x8e0d1d47, 0xf29d4830, 0xb5213626, 0xb21bb5a3, 0xbc2f367d, + asm volatile("vmacc.vx v8, %[A], v16" ::[A] "r"(scalar)); + VCMP_U32(11, v8, 0x8e0d1d47, 0xf29d4830, 0xb5213626, 0xb21bb5a3, 0xbc2f367d, 0x18eb9d88, 0x91c53550, 0x69a6ceb2, 0xc09822e9, 0x66c98b96, 0xf6b125ab, 0xef3fae1e, 0x4c40925e, 0x6b652c20, 0x998385c4, 0x75d88d82); /* #if ELEN == 64 */ - /* VSET(16, e64, m2); */ + /* VSET(16, e64, m8); */ /* scalar = -598189234597999223; */ - /* VLOAD_64(v4, 0x2a47beb4fd7729c5, 0x401c187818b15d1e, 0xbbaf5fe50c41f22a, + /* VLOAD_64(v16, 0x2a47beb4fd7729c5, 0x401c187818b15d1e, 0xbbaf5fe50c41f22a, */ /* 0x31eaddea171055a9, 0x609cbc4a78316c29, 0xd7bb8f31d8b59d88, */ /* 0x97860fd5fba018c0, 0x724cecf178bd2125, 0x866d16f96d3d8b67, */ /* 0x56153b0315164a5a, 0x6962bde49e3edf3f, 0x9b3f792bfbf5f343, */ /* 0x64cf433b239e7764, 0x583c3a4ae481fef0, 0x217e2df75fcf0d8d, */ /* 0x935ac02069fe54ce); */ - /* VLOAD_64(v2, 0x0dc8fa1b817237e5, 0xc817934370de904d, 0xb015bdbf0f39ec01, + /* VLOAD_64(v8, 0x0dc8fa1b817237e5, 0xc817934370de904d, 0xb015bdbf0f39ec01, */ /* 0x3c7e70a75643cce5, 0x80c45834a5026c02, 0xcdf1fcd83b8133a0, */ /* 0x9d31b9b802ae2db1, 0xba7e57975c5febf5, 0x8732f75adf268ddb, */ /* 0x5ff488a4187bd3f3, 0x6a259fe666091333, 0x5afc4de057de51c4, */ /* 0x8a479b7e3558e399, 0xbc21e79022996c26, 0xe2c7432cd7e3e81d, */ /* 0xdab377ddbdfb2df7); */ - /* asm volatile("vmacc.vx v2, %[A], v4" ::[A] "r"(scalar)); */ - /* VCMP_U64(12, v2, 0x093861b79ac45352, 0xfd3c909decf66b5b, + /* asm volatile("vmacc.vx v8, %[A], v16" ::[A] "r"(scalar)); */ + /* VCMP_U64(12, v8, 0x093861b79ac45352, 0xfd3c909decf66b5b, * 0x04eb13132ce4267b, */ /* 0xb258e6b065bbf956, 0x62775181e33422f3, 0xdc0ae0e371686968, */ /* 0xf8db06270cad2c71, 0x6c3cc52cd1fb49c2, 0x41c19c0ac1b5a2fa, */ @@ -224,57 +224,57 @@ void TEST_CASE3() { } void TEST_CASE4() { - VSET(16, e8, m2); + VSET(16, e8, m8); int64_t scalar = 5; - VLOAD_8(v4, 0x60, 0xe3, 0xa0, 0xb7, 0x35, 0x23, 0xa3, 0xf4, 0x5f, 0x6e, 0x07, + VLOAD_8(v16, 0x60, 0xe3, 0xa0, 0xb7, 0x35, 0x23, 0xa3, 0xf4, 0x5f, 0x6e, 0x07, 0x01, 0xe7, 0x51, 0x53, 0x29); - VLOAD_8(v2, 0xfb, 0x1b, 0xc0, 0x36, 0xa7, 0xe0, 0xc8, 0x47, 0x57, 0xe0, 0x51, + VLOAD_8(v8, 0xfb, 0x1b, 0xc0, 0x36, 0xa7, 0xe0, 0xc8, 0x47, 0x57, 0xe0, 0x51, 0xaa, 0xd2, 0x93, 0x83, 0xa8); VLOAD_8(v0, 0xAA, 0xAA); - asm volatile("vmacc.vx v2, %[A], v4, v0.t" ::[A] "r"(scalar)); - VCMP_U8(13, v2, 0xfb, 0x8a, 0xc0, 0xc9, 0xa7, 0x8f, 0xc8, 0x0b, 0x57, 0x06, + asm volatile("vmacc.vx v8, %[A], v16, v0.t" ::[A] "r"(scalar)); + VCMP_U8(13, v8, 0xfb, 0x8a, 0xc0, 0xc9, 0xa7, 0x8f, 0xc8, 0x0b, 0x57, 0x06, 0x51, 0xaf, 0xd2, 0x28, 0x83, 0x75); - VSET(16, e16, m2); + VSET(16, e16, m8); scalar = -5383; - VLOAD_16(v4, 0x992e, 0x9a07, 0x90c3, 0xf1ce, 0xd53c, 0x8f07, 0x2d2f, 0x5ab1, + VLOAD_16(v16, 0x992e, 0x9a07, 0x90c3, 0xf1ce, 0xd53c, 0x8f07, 0x2d2f, 0x5ab1, 0x0a79, 0x0523, 0x6f34, 0xe5fd, 0xc95a, 0xca1c, 0x36bf, 0x16a1); - VLOAD_16(v2, 0x0a9f, 0x7ee0, 0x494e, 0xb6d0, 0x394c, 0xc8e7, 0xc117, 0x8108, + VLOAD_16(v8, 0x0a9f, 0x7ee0, 0x494e, 0xb6d0, 0x394c, 0xc8e7, 0xc117, 0x8108, 0xb1af, 0x9f16, 0x22ab, 0xa244, 0xf1c9, 0xe363, 0x9bed, 0xa06f); VLOAD_8(v0, 0xAA, 0xAA); - asm volatile("vmacc.vx v2, %[A], v4, v0.t" ::[A] "r"(scalar)); - VCMP_U16(14, v2, 0x0a9f, 0xb5af, 0x494e, 0x342e, 0x394c, 0x4cb6, 0xc117, + asm volatile("vmacc.vx v8, %[A], v16, v0.t" ::[A] "r"(scalar)); + VCMP_U16(14, v8, 0x0a9f, 0xb5af, 0x494e, 0x342e, 0x394c, 0x4cb6, 0xc117, 0x8131, 0xb1af, 0x9c21, 0x22ab, 0x9759, 0xf1c9, 0x109f, 0x9bed, 0xcd08); - VSET(16, e32, m2); + VSET(16, e32, m8); scalar = 6474219; - VLOAD_32(v4, 0x709e784e, 0x8e13e48a, 0xad5df7fd, 0x738c8997, 0x0a0030d0, + VLOAD_32(v16, 0x709e784e, 0x8e13e48a, 0xad5df7fd, 0x738c8997, 0x0a0030d0, 0x7569b952, 0x507fd5c7, 0x5d09af12, 0x0bf1c209, 0x7be6ed49, 0x842ba667, 0x53360ec0, 0xd85d7415, 0xf20de61f, 0x153e7e16, 0xec5512e4); - VLOAD_32(v2, 0xb2436fad, 0x6b162382, 0xd94eebe7, 0x9c43d906, 0xb80f178d, + VLOAD_32(v8, 0xb2436fad, 0x6b162382, 0xd94eebe7, 0x9c43d906, 0xb80f178d, 0x5cf91d42, 0x7764b8a3, 0x6269f72c, 0xb0dff3a6, 0x838d6893, 0xa98a861e, 0x758b63de, 0xde488617, 0x371696ab, 0xc3ba8192, 0x7ca33236); VLOAD_8(v0, 0xAA, 0xAA); - asm volatile("vmacc.vx v2, %[A], v4, v0.t" ::[A] "r"(scalar)); - VCMP_U32(15, v2, 0xb2436fad, 0xf29d4830, 0xd94eebe7, 0xb21bb5a3, 0xb80f178d, + asm volatile("vmacc.vx v8, %[A], v16, v0.t" ::[A] "r"(scalar)); + VCMP_U32(15, v8, 0xb2436fad, 0xf29d4830, 0xd94eebe7, 0xb21bb5a3, 0xb80f178d, 0x18eb9d88, 0x7764b8a3, 0x69a6ceb2, 0xb0dff3a6, 0x66c98b96, 0xa98a861e, 0xef3fae1e, 0xde488617, 0x6b652c20, 0xc3ba8192, 0x75d88d82); /* #if ELEN == 64 */ - /* VSET(16, e64, m2); */ + /* VSET(16, e64, m8); */ /* scalar = -598189234597999223; */ - /* VLOAD_64(v4, 0x2a47beb4fd7729c5, 0x401c187818b15d1e, 0xbbaf5fe50c41f22a, + /* VLOAD_64(v16, 0x2a47beb4fd7729c5, 0x401c187818b15d1e, 0xbbaf5fe50c41f22a, */ /* 0x31eaddea171055a9, 0x609cbc4a78316c29, 0xd7bb8f31d8b59d88, */ /* 0x97860fd5fba018c0, 0x724cecf178bd2125, 0x866d16f96d3d8b67, */ /* 0x56153b0315164a5a, 0x6962bde49e3edf3f, 0x9b3f792bfbf5f343, */ /* 0x64cf433b239e7764, 0x583c3a4ae481fef0, 0x217e2df75fcf0d8d, */ /* 0x935ac02069fe54ce); */ - /* VLOAD_64(v2, 0x0dc8fa1b817237e5, 0xc817934370de904d, 0xb015bdbf0f39ec01, + /* VLOAD_64(v8, 0x0dc8fa1b817237e5, 0xc817934370de904d, 0xb015bdbf0f39ec01, */ /* 0x3c7e70a75643cce5, 0x80c45834a5026c02, 0xcdf1fcd83b8133a0, */ /* 0x9d31b9b802ae2db1, 0xba7e57975c5febf5, 0x8732f75adf268ddb, */ @@ -282,8 +282,8 @@ void TEST_CASE4() { /* 0x8a479b7e3558e399, 0xbc21e79022996c26, 0xe2c7432cd7e3e81d, */ /* 0xdab377ddbdfb2df7); */ /* VLOAD_8(v0, 0xAA, 0xAA); */ - /* asm volatile("vmacc.vx v2, %[A], v4, v0.t" ::[A] "r"(scalar)); */ - /* VCMP_U64(16, v2, 0x0dc8fa1b817237e5, 0xfd3c909decf66b5b, + /* asm volatile("vmacc.vx v8, %[A], v16, v0.t" ::[A] "r"(scalar)); */ + /* VCMP_U64(16, v8, 0x0dc8fa1b817237e5, 0xfd3c909decf66b5b, * 0xb015bdbf0f39ec01, */ /* 0xb258e6b065bbf956, 0x80c45834a5026c02, 0xdc0ae0e371686968, */ /* 0x9d31b9b802ae2db1, 0x6c3cc52cd1fb49c2, 0x8732f75adf268ddb, */ diff --git a/sw/riscvTests/isa/rv64uv/vmadd.c b/sw/riscvTests/isa/rv64uv/vmadd.c index 5241d0b7..dd2e68ee 100644 --- a/sw/riscvTests/isa/rv64uv/vmadd.c +++ b/sw/riscvTests/isa/rv64uv/vmadd.c @@ -8,70 +8,70 @@ #include "vector_macros.h" void TEST_CASE1() { - VSET(16, e8, m2); - VLOAD_8(v2, 0x21, 0x75, 0x7f, 0x3a, 0x50, 0x6d, 0x3f, 0x3e, 0x74, 0x11, 0x29, + VSET(16, e8, m8); + VLOAD_8(v8, 0x21, 0x75, 0x7f, 0x3a, 0x50, 0x6d, 0x3f, 0x3e, 0x74, 0x11, 0x29, 0xea, 0x14, 0xce, 0xb0, 0x37); - VLOAD_8(v4, 0xfe, 0xa7, 0x06, 0xaa, 0x35, 0x3c, 0x2c, 0x58, 0xa1, 0xc4, 0x40, + VLOAD_8(v16, 0xfe, 0xa7, 0x06, 0xaa, 0x35, 0x3c, 0x2c, 0x58, 0xa1, 0xc4, 0x40, 0x42, 0x52, 0x40, 0xa8, 0x53); - VLOAD_8(v6, 0x30, 0xef, 0xb4, 0x12, 0x6d, 0x3b, 0x2c, 0x5e, 0xf0, 0x25, 0xd7, + VLOAD_8(v24, 0x30, 0xef, 0xb4, 0x12, 0x6d, 0x3b, 0x2c, 0x5e, 0xf0, 0x25, 0xd7, 0x70, 0xc2, 0x62, 0xe0, 0x99); - asm volatile("vmadd.vv v2, v4, v6"); - VCMP_U8(1, v2, 0xee, 0x42, 0xae, 0x96, 0xfd, 0xc7, 0x00, 0xae, 0xe4, 0x29, + asm volatile("vmadd.vv v8, v16, v24"); + VCMP_U8(1, v8, 0xee, 0x42, 0xae, 0x96, 0xfd, 0xc7, 0x00, 0xae, 0xe4, 0x29, 0x17, 0xc4, 0x2a, 0xe2, 0x60, 0x6e); - VSET(16, e16, m2); - VLOAD_16(v2, 0x1c20, 0x11e4, 0xde38, 0x642f, 0x3eb5, 0xa0af, 0x48e1, 0x5fc4, + VSET(16, e16, m8); + VLOAD_16(v8, 0x1c20, 0x11e4, 0xde38, 0x642f, 0x3eb5, 0xa0af, 0x48e1, 0x5fc4, 0x3d2a, 0x67d5, 0x3f07, 0x2889, 0x8812, 0x0bd9, 0x56f4, 0xe068); - VLOAD_16(v4, 0x02cc, 0xd99c, 0xdba2, 0xf282, 0x0f99, 0xa219, 0x2dcc, 0x17cc, + VLOAD_16(v16, 0x02cc, 0xd99c, 0xdba2, 0xf282, 0x0f99, 0xa219, 0x2dcc, 0x17cc, 0xe8fb, 0x1e83, 0xed20, 0xbfee, 0xee87, 0x6b0f, 0xf6cf, 0x4cd1); - VLOAD_16(v6, 0xe3f0, 0x42db, 0x2fde, 0x1983, 0x910c, 0x853b, 0x82aa, 0x9ac2, + VLOAD_16(v24, 0xe3f0, 0x42db, 0x2fde, 0x1983, 0x910c, 0x853b, 0x82aa, 0x9ac2, 0x4631, 0x1f8b, 0x68c3, 0x6fbc, 0x3b5c, 0xf98b, 0x2db1, 0x8e75); - asm volatile("vmadd.vv v2, v4, v6"); - VCMP_U16(2, v2, 0x8d70, 0x6dcb, 0xb74e, 0x6761, 0xa639, 0xf452, 0x22f6, + asm volatile("vmadd.vv v8, v16, v24"); + VCMP_U16(2, v8, 0x8d70, 0x6dcb, 0xb74e, 0x6761, 0xa639, 0xf452, 0x22f6, 0x86f2, 0x4e5f, 0x378a, 0xc4a3, 0x561a, 0xb8da, 0x5e42, 0xf4fd, 0xa35d); - VSET(16, e32, m2); - VLOAD_32(v2, 0x0401c584, 0x69049955, 0x4a71aa0c, 0xc651666f, 0x273fcd5d, + VSET(16, e32, m8); + VLOAD_32(v8, 0x0401c584, 0x69049955, 0x4a71aa0c, 0xc651666f, 0x273fcd5d, 0x23ca1d7d, 0x599c994e, 0xb2d8adc5, 0x4710afae, 0x69c61cad, 0x96ee5026, 0x2c197996, 0xd95da451, 0x3a654fb9, 0xbe990e4b, 0xc41fd55a); - VLOAD_32(v4, 0x39d5b56a, 0xc578a540, 0x51283b5c, 0x07b4ba9d, 0xe5aba5e4, + VLOAD_32(v16, 0x39d5b56a, 0xc578a540, 0x51283b5c, 0x07b4ba9d, 0xe5aba5e4, 0x28720dc8, 0x600fb42b, 0xf2937fa7, 0x4032d36f, 0xc676e3b3, 0xf1cd5f96, 0x1c14bcbf, 0x7dea81ed, 0x40270562, 0x9577b3be, 0xea615f0a); - VLOAD_32(v6, 0xa055bbb6, 0x71f9a668, 0x0be640c9, 0x2336ca55, 0xca121638, + VLOAD_32(v24, 0xa055bbb6, 0x71f9a668, 0x0be640c9, 0x2336ca55, 0xca121638, 0xbf234fb5, 0xe7c83142, 0xb7048f12, 0x8eb340e3, 0xef253e93, 0xffef4a03, 0xdf346833, 0xd0922181, 0xf159ee1d, 0xf86a7c06, 0xfcb24a2d); - asm volatile("vmadd.vv v2, v4, v6"); - VCMP_U32(3, v2, 0x448bd85e, 0xf2cbc4a8, 0x5cd02119, 0xf69b4268, 0x3c60ee0c, + asm volatile("vmadd.vv v8, v16, v24"); + VCMP_U32(3, v8, 0x448bd85e, 0xf2cbc4a8, 0x5cd02119, 0xf69b4268, 0x3c60ee0c, 0xa233b25d, 0x4c72c95c, 0xe2b1a595, 0xefb7d755, 0x95d6b28a, 0xd3be5a47, 0x6338471d, 0xfb1a117e, 0xabe00fef, 0xbede88b0, 0x913705b1); #if ELEN == 64 - VSET(16, e64, m2); - VLOAD_64(v2, 0x9cffef345b95f00b, 0x85d366e07e4bbc6b, 0xadfda1d2464c6433, + VSET(16, e64, m8); + VLOAD_64(v8, 0x9cffef345b95f00b, 0x85d366e07e4bbc6b, 0xadfda1d2464c6433, 0x610bf2c1435b3cf6, 0x8a0c6e4bc950e81f, 0x4296e7147ef94d7a, 0x27d7ec90ba159756, 0x2a6c87932c3aef86, 0xbfd90c33e58a8fe3, 0x1114f7672cf625c1, 0x1a7b72dd8ac39fab, 0xdb80f952e5fd2e5b, 0x6b01c18a3daf288b, 0x69b4b0e4335f26d5, 0x0c059f365ec6d3d5, 0xc22568276f1dcdd0); - VLOAD_64(v4, 0x6dc8e88769e54465, 0xce8cda83d16c3859, 0x1465ee5b6eb0d2b8, + VLOAD_64(v16, 0x6dc8e88769e54465, 0xce8cda83d16c3859, 0x1465ee5b6eb0d2b8, 0x4827a9b40add2507, 0xd24c4005695a64d6, 0xb97c8e41e912f84a, 0xc8c22e3b3b2e2fa1, 0x26712aa325bd00b6, 0xdf7ad19151df27b5, 0x68ba6d050ffcba1e, 0x94448979a2b854e6, 0x84bf5d544f97f739, 0x6d4bfa429e9d6ef0, 0xdb6c54b9a91ab935, 0x1a0051ca72162c5e, 0xe04b73fdf1b61f9c); - VLOAD_64(v6, 0x32a4c1edbbfe5591, 0xf6baf4e747f4a120, 0x3a29727ae38b9b92, + VLOAD_64(v24, 0x32a4c1edbbfe5591, 0xf6baf4e747f4a120, 0x3a29727ae38b9b92, 0xf173f78d09c997e4, 0xaab9d34e4aeaa57a, 0xa8fe3bf12b7c95e8, 0xc4bd99b066821092, 0x9c2f1daf5fe2db9d, 0xa8b041a876aabcae, 0xb9a2e6f9ded9a60a, 0x8bdf55954f50101d, 0x704f0e648c11d63f, 0x0c8ca4d0a6d1a982, 0xa74d01c12ae6aea5, 0x3f2cd5d2e2f5b538, 0x79803b24efa2caa3); - asm volatile("vmadd.vv v2, v4, v6"); - VCMP_U64(4, v2, 0xf7c2044aeebff5e8, 0xad447a1b99a48a53, 0x78676efbe1b5763a, + asm volatile("vmadd.vv v8, v16, v24"); + VCMP_U64(4, v8, 0xf7c2044aeebff5e8, 0xad447a1b99a48a53, 0x78676efbe1b5763a, 0x813582af4d75d09e, 0x483adf8d811ecb64, 0x36d90fe4df2f2b2c, 0xf833b173685307a8, 0x955c2ac405b724e1, 0xdcf9681f074b0d2d, 0x10277404741c4ca8, 0x25d9bca0245d9fbf, 0x58439c4175d7f582, @@ -81,74 +81,74 @@ void TEST_CASE1() { } void TEST_CASE2() { - VSET(16, e8, m2); - VLOAD_8(v2, 0x21, 0x75, 0x7f, 0x3a, 0x50, 0x6d, 0x3f, 0x3e, 0x74, 0x11, 0x29, + VSET(16, e8, m8); + VLOAD_8(v8, 0x21, 0x75, 0x7f, 0x3a, 0x50, 0x6d, 0x3f, 0x3e, 0x74, 0x11, 0x29, 0xea, 0x14, 0xce, 0xb0, 0x37); - VLOAD_8(v4, 0xfe, 0xa7, 0x06, 0xaa, 0x35, 0x3c, 0x2c, 0x58, 0xa1, 0xc4, 0x40, + VLOAD_8(v16, 0xfe, 0xa7, 0x06, 0xaa, 0x35, 0x3c, 0x2c, 0x58, 0xa1, 0xc4, 0x40, 0x42, 0x52, 0x40, 0xa8, 0x53); - VLOAD_8(v6, 0x30, 0xef, 0xb4, 0x12, 0x6d, 0x3b, 0x2c, 0x5e, 0xf0, 0x25, 0xd7, + VLOAD_8(v24, 0x30, 0xef, 0xb4, 0x12, 0x6d, 0x3b, 0x2c, 0x5e, 0xf0, 0x25, 0xd7, 0x70, 0xc2, 0x62, 0xe0, 0x99); VLOAD_8(v0, 0xAA, 0xAA); - asm volatile("vmadd.vv v2, v4, v6, v0.t"); - VCMP_U8(5, v2, 0x21, 0x42, 0x7f, 0x96, 0x50, 0xc7, 0x3f, 0xae, 0x74, 0x29, + asm volatile("vmadd.vv v8, v16, v24, v0.t"); + VCMP_U8(5, v8, 0x21, 0x42, 0x7f, 0x96, 0x50, 0xc7, 0x3f, 0xae, 0x74, 0x29, 0x29, 0xc4, 0x14, 0xe2, 0xb0, 0x6e); - VSET(16, e16, m2); - VLOAD_16(v2, 0x1c20, 0x11e4, 0xde38, 0x642f, 0x3eb5, 0xa0af, 0x48e1, 0x5fc4, + VSET(16, e16, m8); + VLOAD_16(v8, 0x1c20, 0x11e4, 0xde38, 0x642f, 0x3eb5, 0xa0af, 0x48e1, 0x5fc4, 0x3d2a, 0x67d5, 0x3f07, 0x2889, 0x8812, 0x0bd9, 0x56f4, 0xe068); - VLOAD_16(v4, 0x02cc, 0xd99c, 0xdba2, 0xf282, 0x0f99, 0xa219, 0x2dcc, 0x17cc, + VLOAD_16(v16, 0x02cc, 0xd99c, 0xdba2, 0xf282, 0x0f99, 0xa219, 0x2dcc, 0x17cc, 0xe8fb, 0x1e83, 0xed20, 0xbfee, 0xee87, 0x6b0f, 0xf6cf, 0x4cd1); - VLOAD_16(v6, 0xe3f0, 0x42db, 0x2fde, 0x1983, 0x910c, 0x853b, 0x82aa, 0x9ac2, + VLOAD_16(v24, 0xe3f0, 0x42db, 0x2fde, 0x1983, 0x910c, 0x853b, 0x82aa, 0x9ac2, 0x4631, 0x1f8b, 0x68c3, 0x6fbc, 0x3b5c, 0xf98b, 0x2db1, 0x8e75); VLOAD_8(v0, 0xAA, 0xAA); - asm volatile("vmadd.vv v2, v4, v6, v0.t"); - VCMP_U16(6, v2, 0x1c20, 0x6dcb, 0xde38, 0x6761, 0x3eb5, 0xf452, 0x48e1, + asm volatile("vmadd.vv v8, v16, v24, v0.t"); + VCMP_U16(6, v8, 0x1c20, 0x6dcb, 0xde38, 0x6761, 0x3eb5, 0xf452, 0x48e1, 0x86f2, 0x3d2a, 0x378a, 0x3f07, 0x561a, 0x8812, 0x5e42, 0x56f4, 0xa35d); - VSET(16, e32, m2); - VLOAD_32(v2, 0x0401c584, 0x69049955, 0x4a71aa0c, 0xc651666f, 0x273fcd5d, + VSET(16, e32, m8); + VLOAD_32(v8, 0x0401c584, 0x69049955, 0x4a71aa0c, 0xc651666f, 0x273fcd5d, 0x23ca1d7d, 0x599c994e, 0xb2d8adc5, 0x4710afae, 0x69c61cad, 0x96ee5026, 0x2c197996, 0xd95da451, 0x3a654fb9, 0xbe990e4b, 0xc41fd55a); - VLOAD_32(v4, 0x39d5b56a, 0xc578a540, 0x51283b5c, 0x07b4ba9d, 0xe5aba5e4, + VLOAD_32(v16, 0x39d5b56a, 0xc578a540, 0x51283b5c, 0x07b4ba9d, 0xe5aba5e4, 0x28720dc8, 0x600fb42b, 0xf2937fa7, 0x4032d36f, 0xc676e3b3, 0xf1cd5f96, 0x1c14bcbf, 0x7dea81ed, 0x40270562, 0x9577b3be, 0xea615f0a); - VLOAD_32(v6, 0xa055bbb6, 0x71f9a668, 0x0be640c9, 0x2336ca55, 0xca121638, + VLOAD_32(v24, 0xa055bbb6, 0x71f9a668, 0x0be640c9, 0x2336ca55, 0xca121638, 0xbf234fb5, 0xe7c83142, 0xb7048f12, 0x8eb340e3, 0xef253e93, 0xffef4a03, 0xdf346833, 0xd0922181, 0xf159ee1d, 0xf86a7c06, 0xfcb24a2d); VLOAD_8(v0, 0xAA, 0xAA); - asm volatile("vmadd.vv v2, v4, v6, v0.t"); - VCMP_U32(7, v2, 0x0401c584, 0xf2cbc4a8, 0x4a71aa0c, 0xf69b4268, 0x273fcd5d, + asm volatile("vmadd.vv v8, v16, v24, v0.t"); + VCMP_U32(7, v8, 0x0401c584, 0xf2cbc4a8, 0x4a71aa0c, 0xf69b4268, 0x273fcd5d, 0xa233b25d, 0x599c994e, 0xe2b1a595, 0x4710afae, 0x95d6b28a, 0x96ee5026, 0x6338471d, 0xd95da451, 0xabe00fef, 0xbe990e4b, 0x913705b1); #if ELEN == 64 - VSET(16, e64, m2); - VLOAD_64(v2, 0x9cffef345b95f00b, 0x85d366e07e4bbc6b, 0xadfda1d2464c6433, + VSET(16, e64, m8); + VLOAD_64(v8, 0x9cffef345b95f00b, 0x85d366e07e4bbc6b, 0xadfda1d2464c6433, 0x610bf2c1435b3cf6, 0x8a0c6e4bc950e81f, 0x4296e7147ef94d7a, 0x27d7ec90ba159756, 0x2a6c87932c3aef86, 0xbfd90c33e58a8fe3, 0x1114f7672cf625c1, 0x1a7b72dd8ac39fab, 0xdb80f952e5fd2e5b, 0x6b01c18a3daf288b, 0x69b4b0e4335f26d5, 0x0c059f365ec6d3d5, 0xc22568276f1dcdd0); - VLOAD_64(v4, 0x6dc8e88769e54465, 0xce8cda83d16c3859, 0x1465ee5b6eb0d2b8, + VLOAD_64(v16, 0x6dc8e88769e54465, 0xce8cda83d16c3859, 0x1465ee5b6eb0d2b8, 0x4827a9b40add2507, 0xd24c4005695a64d6, 0xb97c8e41e912f84a, 0xc8c22e3b3b2e2fa1, 0x26712aa325bd00b6, 0xdf7ad19151df27b5, 0x68ba6d050ffcba1e, 0x94448979a2b854e6, 0x84bf5d544f97f739, 0x6d4bfa429e9d6ef0, 0xdb6c54b9a91ab935, 0x1a0051ca72162c5e, 0xe04b73fdf1b61f9c); - VLOAD_64(v6, 0x32a4c1edbbfe5591, 0xf6baf4e747f4a120, 0x3a29727ae38b9b92, + VLOAD_64(v24, 0x32a4c1edbbfe5591, 0xf6baf4e747f4a120, 0x3a29727ae38b9b92, 0xf173f78d09c997e4, 0xaab9d34e4aeaa57a, 0xa8fe3bf12b7c95e8, 0xc4bd99b066821092, 0x9c2f1daf5fe2db9d, 0xa8b041a876aabcae, 0xb9a2e6f9ded9a60a, 0x8bdf55954f50101d, 0x704f0e648c11d63f, 0x0c8ca4d0a6d1a982, 0xa74d01c12ae6aea5, 0x3f2cd5d2e2f5b538, 0x79803b24efa2caa3); VLOAD_8(v0, 0xAA, 0xAA); - asm volatile("vmadd.vv v2, v4, v6, v0.t"); - VCMP_U64(8, v2, 0x9cffef345b95f00b, 0xad447a1b99a48a53, 0xadfda1d2464c6433, + asm volatile("vmadd.vv v8, v16, v24, v0.t"); + VCMP_U64(8, v8, 0x9cffef345b95f00b, 0xad447a1b99a48a53, 0xadfda1d2464c6433, 0x813582af4d75d09e, 0x8a0c6e4bc950e81f, 0x36d90fe4df2f2b2c, 0x27d7ec90ba159756, 0x955c2ac405b724e1, 0xbfd90c33e58a8fe3, 0x10277404741c4ca8, 0x1a7b72dd8ac39fab, 0x58439c4175d7f582, @@ -158,62 +158,62 @@ void TEST_CASE2() { } void TEST_CASE3() { - VSET(16, e8, m2); + VSET(16, e8, m8); int64_t scalar = 5; - VLOAD_8(v2, 0x60, 0xe3, 0xa0, 0xb7, 0x35, 0x23, 0xa3, 0xf4, 0x5f, 0x6e, 0x07, + VLOAD_8(v8, 0x60, 0xe3, 0xa0, 0xb7, 0x35, 0x23, 0xa3, 0xf4, 0x5f, 0x6e, 0x07, 0x01, 0xe7, 0x51, 0x53, 0x29); - VLOAD_8(v4, 0xfb, 0x1b, 0xc0, 0x36, 0xa7, 0xe0, 0xc8, 0x47, 0x57, 0xe0, 0x51, + VLOAD_8(v16, 0xfb, 0x1b, 0xc0, 0x36, 0xa7, 0xe0, 0xc8, 0x47, 0x57, 0xe0, 0x51, 0xaa, 0xd2, 0x93, 0x83, 0xa8); - asm volatile("vmadd.vx v2, %[A], v4" ::[A] "r"(scalar)); - VCMP_U8(9, v2, 0xdb, 0x8a, 0xe0, 0xc9, 0xb0, 0x8f, 0xf7, 0x0b, 0x32, 0x06, + asm volatile("vmadd.vx v8, %[A], v16" ::[A] "r"(scalar)); + VCMP_U8(9, v8, 0xdb, 0x8a, 0xe0, 0xc9, 0xb0, 0x8f, 0xf7, 0x0b, 0x32, 0x06, 0x74, 0xaf, 0x55, 0x28, 0x22, 0x75); - VSET(16, e16, m2); + VSET(16, e16, m8); scalar = -5383; - VLOAD_16(v2, 0x992e, 0x9a07, 0x90c3, 0xf1ce, 0xd53c, 0x8f07, 0x2d2f, 0x5ab1, + VLOAD_16(v8, 0x992e, 0x9a07, 0x90c3, 0xf1ce, 0xd53c, 0x8f07, 0x2d2f, 0x5ab1, 0x0a79, 0x0523, 0x6f34, 0xe5fd, 0xc95a, 0xca1c, 0x36bf, 0x16a1); - VLOAD_16(v4, 0x0a9f, 0x7ee0, 0x494e, 0xb6d0, 0x394c, 0xc8e7, 0xc117, 0x8108, + VLOAD_16(v16, 0x0a9f, 0x7ee0, 0x494e, 0xb6d0, 0x394c, 0xc8e7, 0xc117, 0x8108, 0xb1af, 0x9f16, 0x22ab, 0xa244, 0xf1c9, 0xe363, 0x9bed, 0xa06f); - asm volatile("vmadd.vx v2, %[A], v4" ::[A] "r"(scalar)); - VCMP_U16(10, v2, 0x145d, 0xb5af, 0x54f9, 0x342e, 0x78a8, 0x4cb6, 0xa9ce, + asm volatile("vmadd.vx v8, %[A], v16" ::[A] "r"(scalar)); + VCMP_U16(10, v8, 0x145d, 0xb5af, 0x54f9, 0x342e, 0x78a8, 0x4cb6, 0xa9ce, 0x8131, 0x7b60, 0x9c21, 0xd43f, 0x9759, 0x0e53, 0x109f, 0x71b4, 0xcd08); - VSET(16, e32, m2); + VSET(16, e32, m8); scalar = 6474219; - VLOAD_32(v2, 0x709e784e, 0x8e13e48a, 0xad5df7fd, 0x738c8997, 0x0a0030d0, + VLOAD_32(v8, 0x709e784e, 0x8e13e48a, 0xad5df7fd, 0x738c8997, 0x0a0030d0, 0x7569b952, 0x507fd5c7, 0x5d09af12, 0x0bf1c209, 0x7be6ed49, 0x842ba667, 0x53360ec0, 0xd85d7415, 0xf20de61f, 0x153e7e16, 0xec5512e4); - VLOAD_32(v4, 0xb2436fad, 0x6b162382, 0xd94eebe7, 0x9c43d906, 0xb80f178d, + VLOAD_32(v16, 0xb2436fad, 0x6b162382, 0xd94eebe7, 0x9c43d906, 0xb80f178d, 0x5cf91d42, 0x7764b8a3, 0x6269f72c, 0xb0dff3a6, 0x838d6893, 0xa98a861e, 0x758b63de, 0xde488617, 0x371696ab, 0xc3ba8192, 0x7ca33236); - asm volatile("vmadd.vx v2, %[A], v4" ::[A] "r"(scalar)); - VCMP_U32(11, v2, 0x8e0d1d47, 0xf29d4830, 0xb5213626, 0xb21bb5a3, 0xbc2f367d, + asm volatile("vmadd.vx v8, %[A], v16" ::[A] "r"(scalar)); + VCMP_U32(11, v8, 0x8e0d1d47, 0xf29d4830, 0xb5213626, 0xb21bb5a3, 0xbc2f367d, 0x18eb9d88, 0x91c53550, 0x69a6ceb2, 0xc09822e9, 0x66c98b96, 0xf6b125ab, 0xef3fae1e, 0x4c40925e, 0x6b652c20, 0x998385c4, 0x75d88d82); /* #if ELEN == 64 */ - /* VSET(16, e64, m2); */ + /* VSET(16, e64, m8); */ /* scalar = -598189234597999223; */ - /* VLOAD_64(v2, 0x2a47beb4fd7729c5, 0x401c187818b15d1e, 0xbbaf5fe50c41f22a, + /* VLOAD_64(v8, 0x2a47beb4fd7729c5, 0x401c187818b15d1e, 0xbbaf5fe50c41f22a, */ /* 0x31eaddea171055a9, 0x609cbc4a78316c29, 0xd7bb8f31d8b59d88, */ /* 0x97860fd5fba018c0, 0x724cecf178bd2125, 0x866d16f96d3d8b67, */ /* 0x56153b0315164a5a, 0x6962bde49e3edf3f, 0x9b3f792bfbf5f343, */ /* 0x64cf433b239e7764, 0x583c3a4ae481fef0, 0x217e2df75fcf0d8d, */ /* 0x935ac02069fe54ce); */ - /* VLOAD_64(v4, 0x0dc8fa1b817237e5, 0xc817934370de904d, 0xb015bdbf0f39ec01, + /* VLOAD_64(v16, 0x0dc8fa1b817237e5, 0xc817934370de904d, 0xb015bdbf0f39ec01, */ /* 0x3c7e70a75643cce5, 0x80c45834a5026c02, 0xcdf1fcd83b8133a0, */ /* 0x9d31b9b802ae2db1, 0xba7e57975c5febf5, 0x8732f75adf268ddb, */ /* 0x5ff488a4187bd3f3, 0x6a259fe666091333, 0x5afc4de057de51c4, */ /* 0x8a479b7e3558e399, 0xbc21e79022996c26, 0xe2c7432cd7e3e81d, */ /* 0xdab377ddbdfb2df7); */ - /* asm volatile("vmadd.vx v2, %[A], v4" ::[A] "r"(scalar)); */ - /* VCMP_U64(12, v2, 0x093861b79ac45352, 0xfd3c909decf66b5b, + /* asm volatile("vmadd.vx v8, %[A], v16" ::[A] "r"(scalar)); */ + /* VCMP_U64(12, v8, 0x093861b79ac45352, 0xfd3c909decf66b5b, * 0x04eb13132ce4267b, */ /* 0xb258e6b065bbf956, 0x62775181e33422f3, 0xdc0ae0e371686968, */ /* 0xf8db06270cad2c71, 0x6c3cc52cd1fb49c2, 0x41c19c0ac1b5a2fa, */ @@ -224,57 +224,57 @@ void TEST_CASE3() { } void TEST_CASE4() { - VSET(16, e8, m2); + VSET(16, e8, m8); int64_t scalar = 5; - VLOAD_8(v2, 0x60, 0xe3, 0xa0, 0xb7, 0x35, 0x23, 0xa3, 0xf4, 0x5f, 0x6e, 0x07, + VLOAD_8(v8, 0x60, 0xe3, 0xa0, 0xb7, 0x35, 0x23, 0xa3, 0xf4, 0x5f, 0x6e, 0x07, 0x01, 0xe7, 0x51, 0x53, 0x29); - VLOAD_8(v4, 0xfb, 0x1b, 0xc0, 0x36, 0xa7, 0xe0, 0xc8, 0x47, 0x57, 0xe0, 0x51, + VLOAD_8(v16, 0xfb, 0x1b, 0xc0, 0x36, 0xa7, 0xe0, 0xc8, 0x47, 0x57, 0xe0, 0x51, 0xaa, 0xd2, 0x93, 0x83, 0xa8); VLOAD_8(v0, 0xAA, 0xAA); - asm volatile("vmadd.vx v2, %[A], v4, v0.t" ::[A] "r"(scalar)); - VCMP_U8(13, v2, 0x60, 0x8a, 0xa0, 0xc9, 0x35, 0x8f, 0xa3, 0x0b, 0x5f, 0x06, + asm volatile("vmadd.vx v8, %[A], v16, v0.t" ::[A] "r"(scalar)); + VCMP_U8(13, v8, 0x60, 0x8a, 0xa0, 0xc9, 0x35, 0x8f, 0xa3, 0x0b, 0x5f, 0x06, 0x07, 0xaf, 0xe7, 0x28, 0x53, 0x75); - VSET(16, e16, m2); + VSET(16, e16, m8); scalar = -5383; - VLOAD_16(v2, 0x992e, 0x9a07, 0x90c3, 0xf1ce, 0xd53c, 0x8f07, 0x2d2f, 0x5ab1, + VLOAD_16(v8, 0x992e, 0x9a07, 0x90c3, 0xf1ce, 0xd53c, 0x8f07, 0x2d2f, 0x5ab1, 0x0a79, 0x0523, 0x6f34, 0xe5fd, 0xc95a, 0xca1c, 0x36bf, 0x16a1); - VLOAD_16(v4, 0x0a9f, 0x7ee0, 0x494e, 0xb6d0, 0x394c, 0xc8e7, 0xc117, 0x8108, + VLOAD_16(v16, 0x0a9f, 0x7ee0, 0x494e, 0xb6d0, 0x394c, 0xc8e7, 0xc117, 0x8108, 0xb1af, 0x9f16, 0x22ab, 0xa244, 0xf1c9, 0xe363, 0x9bed, 0xa06f); VLOAD_8(v0, 0xAA, 0xAA); - asm volatile("vmadd.vx v2, %[A], v4, v0.t" ::[A] "r"(scalar)); - VCMP_U16(14, v2, 0x992e, 0xb5af, 0x90c3, 0x342e, 0xd53c, 0x4cb6, 0x2d2f, + asm volatile("vmadd.vx v8, %[A], v16, v0.t" ::[A] "r"(scalar)); + VCMP_U16(14, v8, 0x992e, 0xb5af, 0x90c3, 0x342e, 0xd53c, 0x4cb6, 0x2d2f, 0x8131, 0x0a79, 0x9c21, 0x6f34, 0x9759, 0xc95a, 0x109f, 0x36bf, 0xcd08); - VSET(16, e32, m2); + VSET(16, e32, m8); scalar = 6474219; - VLOAD_32(v2, 0x709e784e, 0x8e13e48a, 0xad5df7fd, 0x738c8997, 0x0a0030d0, + VLOAD_32(v8, 0x709e784e, 0x8e13e48a, 0xad5df7fd, 0x738c8997, 0x0a0030d0, 0x7569b952, 0x507fd5c7, 0x5d09af12, 0x0bf1c209, 0x7be6ed49, 0x842ba667, 0x53360ec0, 0xd85d7415, 0xf20de61f, 0x153e7e16, 0xec5512e4); - VLOAD_32(v4, 0xb2436fad, 0x6b162382, 0xd94eebe7, 0x9c43d906, 0xb80f178d, + VLOAD_32(v16, 0xb2436fad, 0x6b162382, 0xd94eebe7, 0x9c43d906, 0xb80f178d, 0x5cf91d42, 0x7764b8a3, 0x6269f72c, 0xb0dff3a6, 0x838d6893, 0xa98a861e, 0x758b63de, 0xde488617, 0x371696ab, 0xc3ba8192, 0x7ca33236); VLOAD_8(v0, 0xAA, 0xAA); - asm volatile("vmadd.vx v2, %[A], v4, v0.t" ::[A] "r"(scalar)); - VCMP_U32(15, v2, 0x709e784e, 0xf29d4830, 0xad5df7fd, 0xb21bb5a3, 0x0a0030d0, + asm volatile("vmadd.vx v8, %[A], v16, v0.t" ::[A] "r"(scalar)); + VCMP_U32(15, v8, 0x709e784e, 0xf29d4830, 0xad5df7fd, 0xb21bb5a3, 0x0a0030d0, 0x18eb9d88, 0x507fd5c7, 0x69a6ceb2, 0x0bf1c209, 0x66c98b96, 0x842ba667, 0xef3fae1e, 0xd85d7415, 0x6b652c20, 0x153e7e16, 0x75d88d82); /* #if ELEN == 64 */ - /* VSET(16, e64, m2); */ + /* VSET(16, e64, m8); */ /* scalar = -598189234597999223; */ - /* VLOAD_64(v2, 0x2a47beb4fd7729c5, 0x401c187818b15d1e, 0xbbaf5fe50c41f22a, + /* VLOAD_64(v8, 0x2a47beb4fd7729c5, 0x401c187818b15d1e, 0xbbaf5fe50c41f22a, */ /* 0x31eaddea171055a9, 0x609cbc4a78316c29, 0xd7bb8f31d8b59d88, */ /* 0x97860fd5fba018c0, 0x724cecf178bd2125, 0x866d16f96d3d8b67, */ /* 0x56153b0315164a5a, 0x6962bde49e3edf3f, 0x9b3f792bfbf5f343, */ /* 0x64cf433b239e7764, 0x583c3a4ae481fef0, 0x217e2df75fcf0d8d, */ /* 0x935ac02069fe54ce); */ - /* VLOAD_64(v4, 0x0dc8fa1b817237e5, 0xc817934370de904d, 0xb015bdbf0f39ec01, + /* VLOAD_64(v16, 0x0dc8fa1b817237e5, 0xc817934370de904d, 0xb015bdbf0f39ec01, */ /* 0x3c7e70a75643cce5, 0x80c45834a5026c02, 0xcdf1fcd83b8133a0, */ /* 0x9d31b9b802ae2db1, 0xba7e57975c5febf5, 0x8732f75adf268ddb, */ @@ -282,8 +282,8 @@ void TEST_CASE4() { /* 0x8a479b7e3558e399, 0xbc21e79022996c26, 0xe2c7432cd7e3e81d, */ /* 0xdab377ddbdfb2df7); */ /* VLOAD_8(v0, 0xAA, 0xAA); */ - /* asm volatile("vmadd.vx v2, %[A], v4, v0.t" ::[A] "r"(scalar)); */ - /* VCMP_U64(16, v2, 0x2a47beb4fd7729c5, 0xfd3c909decf66b5b, + /* asm volatile("vmadd.vx v8, %[A], v16, v0.t" ::[A] "r"(scalar)); */ + /* VCMP_U64(16, v8, 0x2a47beb4fd7729c5, 0xfd3c909decf66b5b, * 0xbbaf5fe50c41f22a, */ /* 0xb258e6b065bbf956, 0x609cbc4a78316c29, 0xdc0ae0e371686968, */ /* 0x97860fd5fba018c0, 0x6c3cc52cd1fb49c2, 0x866d16f96d3d8b67, */ diff --git a/sw/riscvTests/isa/rv64uv/vmax.c b/sw/riscvTests/isa/rv64uv/vmax.c index 945e22be..35b4266c 100644 --- a/sw/riscvTests/isa/rv64uv/vmax.c +++ b/sw/riscvTests/isa/rv64uv/vmax.c @@ -8,79 +8,79 @@ #include "vector_macros.h" void TEST_CASE1(void) { - VSET(16, e16, m2); - VLOAD_16(v4, 12345, -80, 2560, -19900, 12345, -80, 2560, -19900, 12345, -80, + VSET(16, e16, m8); + VLOAD_16(v16, 12345, -80, 2560, -19900, 12345, -80, 2560, -19900, 12345, -80, 2560, -19900, 12345, -80, 2560, -19900); - VLOAD_16(v6, 50, 7000, 400, 19901, 50, 7000, 400, 19901, 50, 7000, 400, 19901, + VLOAD_16(v24, 50, 7000, 400, 19901, 50, 7000, 400, 19901, 50, 7000, 400, 19901, 50, 7000, 400, 19901); - asm volatile("vmax.vv v2, v4, v6"); - VCMP_I16(1, v2, 12345, 7000, 2560, 19901, 12345, 7000, 2560, 19901, 12345, + asm volatile("vmax.vv v8, v16, v24"); + VCMP_I16(1, v8, 12345, 7000, 2560, 19901, 12345, 7000, 2560, 19901, 12345, 7000, 2560, 19901, 12345, 7000, 2560, 19901); - VSET(16, e32, m2); - VLOAD_32(v4, 12345, -80, 2560, -19900, 12345, -80, 2560, -19900, 12345, -80, + VSET(16, e32, m8); + VLOAD_32(v16, 12345, -80, 2560, -19900, 12345, -80, 2560, -19900, 12345, -80, 2560, -19900, 12345, -80, 2560, -19900); - VLOAD_32(v6, 50, 7000, 400, 19901, 50, 7000, 400, 19901, 50, 7000, 400, 19901, + VLOAD_32(v24, 50, 7000, 400, 19901, 50, 7000, 400, 19901, 50, 7000, 400, 19901, 50, 7000, 400, 19901); - asm volatile("vmax.vv v2, v4, v6"); - VCMP_I32(2, v2, 12345, 7000, 2560, 19901, 12345, 7000, 2560, 19901, 12345, + asm volatile("vmax.vv v8, v16, v24"); + VCMP_I32(2, v8, 12345, 7000, 2560, 19901, 12345, 7000, 2560, 19901, 12345, 7000, 2560, 19901, 12345, 7000, 2560, 19901); #if ELEN == 64 - VSET(16, e64, m2); - VLOAD_64(v4, 12345, -80, 2560, -19900, 12345, -80, 2560, -19900, 12345, -80, + VSET(16, e64, m8); + VLOAD_64(v16, 12345, -80, 2560, -19900, 12345, -80, 2560, -19900, 12345, -80, 2560, -19900, 12345, -80, 2560, -19900); - VLOAD_64(v6, 50, 7000, 400, 19901, 50, 7000, 400, 19901, 50, 7000, 400, 19901, + VLOAD_64(v24, 50, 7000, 400, 19901, 50, 7000, 400, 19901, 50, 7000, 400, 19901, 50, 7000, 400, 19901); - asm volatile("vmax.vv v2, v4, v6"); - VCMP_I64(3, v2, 12345, 7000, 2560, 19901, 12345, 7000, 2560, 19901, 12345, + asm volatile("vmax.vv v8, v16, v24"); + VCMP_I64(3, v8, 12345, 7000, 2560, 19901, 12345, 7000, 2560, 19901, 12345, 7000, 2560, 19901, 12345, 7000, 2560, 19901); #endif }; void TEST_CASE2(void) { - VSET(16, e16, m2); - VLOAD_16(v4, 12345, -80, 2560, -19900, 12345, -80, 2560, -19900, 12345, -80, + VSET(16, e16, m8); + VLOAD_16(v16, 12345, -80, 2560, -19900, 12345, -80, 2560, -19900, 12345, -80, 2560, -19900, 12345, -80, 2560, -19900); - VLOAD_16(v6, 50, 7000, 400, 19901, 50, 7000, 400, 19901, 50, 7000, 400, 19901, + VLOAD_16(v24, 50, 7000, 400, 19901, 50, 7000, 400, 19901, 50, 7000, 400, 19901, 50, 7000, 400, 19901); VLOAD_8(v0, 0xCC, 0xCC); - VLOAD_16(v2, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, + VLOAD_16(v8, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef); - asm volatile("vmax.vv v2, v4, v6, v0.t"); - VCMP_I16(4, v2, 0xbeef, 0xbeef, 2560, 19901, 0xbeef, 0xbeef, 2560, 19901, + asm volatile("vmax.vv v8, v16, v24, v0.t"); + VCMP_I16(4, v8, 0xbeef, 0xbeef, 2560, 19901, 0xbeef, 0xbeef, 2560, 19901, 0xbeef, 0xbeef, 2560, 19901, 0xbeef, 0xbeef, 2560, 19901); - VSET(16, e32, m2); - VLOAD_32(v4, 12345, -80, 2560, -19900, 12345, -80, 2560, -19900, 12345, -80, + VSET(16, e32, m8); + VLOAD_32(v16, 12345, -80, 2560, -19900, 12345, -80, 2560, -19900, 12345, -80, 2560, -19900, 12345, -80, 2560, -19900); - VLOAD_32(v6, 50, 7000, 400, 19901, 50, 7000, 400, 19901, 50, 7000, 400, 19901, + VLOAD_32(v24, 50, 7000, 400, 19901, 50, 7000, 400, 19901, 50, 7000, 400, 19901, 50, 7000, 400, 19901); VLOAD_8(v0, 0xCC, 0xCC); - VLOAD_32(v2, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + VLOAD_32(v8, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef); - asm volatile("vmax.vv v2, v4, v6, v0.t"); - VCMP_I32(5, v2, 0xdeadbeef, 0xdeadbeef, 2560, 19901, 0xdeadbeef, 0xdeadbeef, + asm volatile("vmax.vv v8, v16, v24, v0.t"); + VCMP_I32(5, v8, 0xdeadbeef, 0xdeadbeef, 2560, 19901, 0xdeadbeef, 0xdeadbeef, 2560, 19901, 0xdeadbeef, 0xdeadbeef, 2560, 19901, 0xdeadbeef, 0xdeadbeef, 2560, 19901); #if ELEN == 64 - VSET(16, e64, m2); - VLOAD_64(v4, 12345, -80, 2560, -19900, 12345, -80, 2560, -19900, 12345, -80, + VSET(16, e64, m8); + VLOAD_64(v16, 12345, -80, 2560, -19900, 12345, -80, 2560, -19900, 12345, -80, 2560, -19900, 12345, -80, 2560, -19900); - VLOAD_64(v6, 50, 7000, 400, 19901, 50, 7000, 400, 19901, 50, 7000, 400, 19901, + VLOAD_64(v24, 50, 7000, 400, 19901, 50, 7000, 400, 19901, 50, 7000, 400, 19901, 50, 7000, 400, 19901); VLOAD_8(v0, 0xCC, 0xCC); - VLOAD_64(v2, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, + VLOAD_64(v8, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef); - asm volatile("vmax.vv v2, v4, v6, v0.t"); - VCMP_I64(6, v2, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 2560, 19901, + asm volatile("vmax.vv v8, v16, v24, v0.t"); + VCMP_I64(6, v8, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 2560, 19901, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 2560, 19901, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 2560, 19901, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 2560, 19901); @@ -90,33 +90,33 @@ void TEST_CASE2(void) { void TEST_CASE3(void) { const uint64_t scalar = 40; - VSET(16, e8, m2); - VLOAD_8(v4, 123, -8, -25, 99, 123, -8, -25, 99, 123, -8, -25, 99, 123, -8, + VSET(16, e8, m8); + VLOAD_8(v16, 123, -8, -25, 99, 123, -8, -25, 99, 123, -8, -25, 99, 123, -8, -25, 99); - asm volatile("vmax.vx v2, v4, %[A]" ::[A] "r"(scalar)); - VCMP_I8(7, v2, 123, 40, 40, 99, 123, 40, 40, 99, 123, 40, 40, 99, 123, 40, 40, + asm volatile("vmax.vx v8, v16, %[A]" ::[A] "r"(scalar)); + VCMP_I8(7, v8, 123, 40, 40, 99, 123, 40, 40, 99, 123, 40, 40, 99, 123, 40, 40, 99); - VSET(16, e16, m2); - VLOAD_16(v4, 12345, -8, -25, 199, 12345, -8, -25, 199, 12345, -8, -25, 199, + VSET(16, e16, m8); + VLOAD_16(v16, 12345, -8, -25, 199, 12345, -8, -25, 199, 12345, -8, -25, 199, 12345, -8, -25, 199); - asm volatile("vmax.vx v2, v4, %[A]" ::[A] "r"(scalar)); - VCMP_I16(8, v2, 12345, 40, 40, 199, 12345, 40, 40, 199, 12345, 40, 40, 199, + asm volatile("vmax.vx v8, v16, %[A]" ::[A] "r"(scalar)); + VCMP_I16(8, v8, 12345, 40, 40, 199, 12345, 40, 40, 199, 12345, 40, 40, 199, 12345, 40, 40, 199); - VSET(16, e32, m2); - VLOAD_32(v4, 12345, -8, -25, 199, 12345, -8, -25, 199, 12345, -8, -25, 199, + VSET(16, e32, m8); + VLOAD_32(v16, 12345, -8, -25, 199, 12345, -8, -25, 199, 12345, -8, -25, 199, 12345, -8, -25, 199); - asm volatile("vmax.vx v2, v4, %[A]" ::[A] "r"(scalar)); - VCMP_I32(9, v2, 12345, 40, 40, 199, 12345, 40, 40, 199, 12345, 40, 40, 199, + asm volatile("vmax.vx v8, v16, %[A]" ::[A] "r"(scalar)); + VCMP_I32(9, v8, 12345, 40, 40, 199, 12345, 40, 40, 199, 12345, 40, 40, 199, 12345, 40, 40, 199); #if ELEN == 64 - VSET(16, e64, m2); - VLOAD_64(v4, 12345, -8, -25, 199, 12345, -8, -25, 199, 12345, -8, -25, 199, + VSET(16, e64, m8); + VLOAD_64(v16, 12345, -8, -25, 199, 12345, -8, -25, 199, 12345, -8, -25, 199, 12345, -8, -25, 199); - asm volatile("vmax.vx v2, v4, %[A]" ::[A] "r"(scalar)); - VCMP_I64(10, v2, 12345, 40, 40, 199, 12345, 40, 40, 199, 12345, 40, 40, 199, + asm volatile("vmax.vx v8, v16, %[A]" ::[A] "r"(scalar)); + VCMP_I64(10, v8, 12345, 40, 40, 199, 12345, 40, 40, 199, 12345, 40, 40, 199, 12345, 40, 40, 199); #endif }; @@ -124,52 +124,52 @@ void TEST_CASE3(void) { void TEST_CASE4(void) { const uint64_t scalar = 40; - VSET(16, e8, m2); - VLOAD_8(v4, 123, -8, -25, 99, 123, -8, -25, 99, 123, -8, -25, 99, 123, -8, + VSET(16, e8, m8); + VLOAD_8(v16, 123, -8, -25, 99, 123, -8, -25, 99, 123, -8, -25, 99, 123, -8, -25, 99); VLOAD_8(v0, 0xCC, 0xCC); - VLOAD_8(v2, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, + VLOAD_8(v8, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef); - asm volatile("vmax.vx v2, v4, %[A], v0.t" ::[A] "r"(scalar)); - VCMP_I8(11, v2, 0xef, 0xef, 40, 99, 0xef, 0xef, 40, 99, 0xef, 0xef, 40, 99, + asm volatile("vmax.vx v8, v16, %[A], v0.t" ::[A] "r"(scalar)); + VCMP_I8(11, v8, 0xef, 0xef, 40, 99, 0xef, 0xef, 40, 99, 0xef, 0xef, 40, 99, 0xef, 0xef, 40, 99); - VSET(16, e16, m2); - VLOAD_16(v4, 12345, -8, -25, 199, 12345, -8, -25, 199, 12345, -8, -25, 199, + VSET(16, e16, m8); + VLOAD_16(v16, 12345, -8, -25, 199, 12345, -8, -25, 199, 12345, -8, -25, 199, 12345, -8, -25, 199); VLOAD_8(v0, 0xCC, 0xCC); - VLOAD_16(v2, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, + VLOAD_16(v8, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef); - asm volatile("vmax.vx v2, v4, %[A], v0.t" ::[A] "r"(scalar)); - VCMP_I16(12, v2, 0xbeef, 0xbeef, 40, 199, 0xbeef, 0xbeef, 40, 199, 0xbeef, + asm volatile("vmax.vx v8, v16, %[A], v0.t" ::[A] "r"(scalar)); + VCMP_I16(12, v8, 0xbeef, 0xbeef, 40, 199, 0xbeef, 0xbeef, 40, 199, 0xbeef, 0xbeef, 40, 199, 0xbeef, 0xbeef, 40, 199); - VSET(16, e32, m2); - VLOAD_32(v4, 12345, -8, -25, 199, 12345, -8, -25, 199, 12345, -8, -25, 199, + VSET(16, e32, m8); + VLOAD_32(v16, 12345, -8, -25, 199, 12345, -8, -25, 199, 12345, -8, -25, 199, 12345, -8, -25, 199); VLOAD_8(v0, 0xCC, 0xCC); - VLOAD_32(v2, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + VLOAD_32(v8, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef); - asm volatile("vmax.vx v2, v4, %[A], v0.t" ::[A] "r"(scalar)); - VCMP_I32(13, v2, 0xdeadbeef, 0xdeadbeef, 40, 199, 0xdeadbeef, 0xdeadbeef, 40, + asm volatile("vmax.vx v8, v16, %[A], v0.t" ::[A] "r"(scalar)); + VCMP_I32(13, v8, 0xdeadbeef, 0xdeadbeef, 40, 199, 0xdeadbeef, 0xdeadbeef, 40, 199, 0xdeadbeef, 0xdeadbeef, 40, 199, 0xdeadbeef, 0xdeadbeef, 40, 199); #if ELEN == 64 - VSET(16, e64, m2); - VLOAD_64(v4, 12345, -8, -25, 199, 12345, -8, -25, 199, 12345, -8, -25, 199, + VSET(16, e64, m8); + VLOAD_64(v16, 12345, -8, -25, 199, 12345, -8, -25, 199, 12345, -8, -25, 199, 12345, -8, -25, 199); VLOAD_8(v0, 0xCC, 0xCC); - VLOAD_64(v2, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, + VLOAD_64(v8, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef); - asm volatile("vmax.vx v2, v4, %[A], v0.t" ::[A] "r"(scalar)); - VCMP_I64(14, v2, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 40, 199, + asm volatile("vmax.vx v8, v16, %[A], v0.t" ::[A] "r"(scalar)); + VCMP_I64(14, v8, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 40, 199, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 40, 199, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 40, 199, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 40, 199); diff --git a/sw/riscvTests/isa/rv64uv/vmaxu.c b/sw/riscvTests/isa/rv64uv/vmaxu.c index 2f7c320f..d6204fe9 100644 --- a/sw/riscvTests/isa/rv64uv/vmaxu.c +++ b/sw/riscvTests/isa/rv64uv/vmaxu.c @@ -8,79 +8,79 @@ #include "vector_macros.h" void TEST_CASE1(void) { - VSET(16, e16, m2); - VLOAD_16(v4, 12345, 80, 2560, 19900, 12345, 80, 2560, 19900, 12345, 80, 2560, + VSET(16, e16, m8); + VLOAD_16(v16, 12345, 80, 2560, 19900, 12345, 80, 2560, 19900, 12345, 80, 2560, 19900, 12345, 80, 2560, 19900); - VLOAD_16(v6, 50, 7000, 400, 19901, 50, 7000, 400, 19901, 50, 7000, 400, 19901, + VLOAD_16(v24, 50, 7000, 400, 19901, 50, 7000, 400, 19901, 50, 7000, 400, 19901, 50, 7000, 400, 19901); - asm volatile("vmaxu.vv v2, v4, v6"); - VCMP_U16(1, v2, 12345, 7000, 2560, 19901, 12345, 7000, 2560, 19901, 12345, + asm volatile("vmaxu.vv v8, v16, v24"); + VCMP_U16(1, v8, 12345, 7000, 2560, 19901, 12345, 7000, 2560, 19901, 12345, 7000, 2560, 19901, 12345, 7000, 2560, 19901); - VSET(16, e32, m2); - VLOAD_32(v4, 12345, 80, 2560, 19900, 12345, 80, 2560, 19900, 12345, 80, 2560, + VSET(16, e32, m8); + VLOAD_32(v16, 12345, 80, 2560, 19900, 12345, 80, 2560, 19900, 12345, 80, 2560, 19900, 12345, 80, 2560, 19900); - VLOAD_32(v6, 50, 7000, 400, 19901, 50, 7000, 400, 19901, 50, 7000, 400, 19901, + VLOAD_32(v24, 50, 7000, 400, 19901, 50, 7000, 400, 19901, 50, 7000, 400, 19901, 50, 7000, 400, 19901); - asm volatile("vmaxu.vv v2, v4, v6"); - VCMP_U32(2, v2, 12345, 7000, 2560, 19901, 12345, 7000, 2560, 19901, 12345, + asm volatile("vmaxu.vv v8, v16, v24"); + VCMP_U32(2, v8, 12345, 7000, 2560, 19901, 12345, 7000, 2560, 19901, 12345, 7000, 2560, 19901, 12345, 7000, 2560, 19901); #if ELEN == 64 - VSET(16, e64, m2); - VLOAD_64(v4, 12345, 80, 2560, 19900, 12345, 80, 2560, 19900, 12345, 80, 2560, + VSET(16, e64, m8); + VLOAD_64(v16, 12345, 80, 2560, 19900, 12345, 80, 2560, 19900, 12345, 80, 2560, 19900, 12345, 80, 2560, 19900); - VLOAD_64(v6, 50, 7000, 400, 19901, 50, 7000, 400, 19901, 50, 7000, 400, 19901, + VLOAD_64(v24, 50, 7000, 400, 19901, 50, 7000, 400, 19901, 50, 7000, 400, 19901, 50, 7000, 400, 19901); - asm volatile("vmaxu.vv v2, v4, v6"); - VCMP_U64(3, v2, 12345, 7000, 2560, 19901, 12345, 7000, 2560, 19901, 12345, + asm volatile("vmaxu.vv v8, v16, v24"); + VCMP_U64(3, v8, 12345, 7000, 2560, 19901, 12345, 7000, 2560, 19901, 12345, 7000, 2560, 19901, 12345, 7000, 2560, 19901); #endif }; void TEST_CASE2(void) { - VSET(16, e16, m2); - VLOAD_16(v4, 12345, 80, 2560, 19900, 12345, 80, 2560, 19900, 12345, 80, 2560, + VSET(16, e16, m8); + VLOAD_16(v16, 12345, 80, 2560, 19900, 12345, 80, 2560, 19900, 12345, 80, 2560, 19900, 12345, 80, 2560, 19900); - VLOAD_16(v6, 50, 7000, 400, 19901, 50, 7000, 400, 19901, 50, 7000, 400, 19901, + VLOAD_16(v24, 50, 7000, 400, 19901, 50, 7000, 400, 19901, 50, 7000, 400, 19901, 50, 7000, 400, 19901); VLOAD_8(v0, 0xCC, 0xCC); - VLOAD_16(v2, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, + VLOAD_16(v8, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef); - asm volatile("vmaxu.vv v2, v4, v6, v0.t"); - VCMP_U16(4, v2, 0xbeef, 0xbeef, 2560, 19901, 0xbeef, 0xbeef, 2560, 19901, + asm volatile("vmaxu.vv v8, v16, v24, v0.t"); + VCMP_U16(4, v8, 0xbeef, 0xbeef, 2560, 19901, 0xbeef, 0xbeef, 2560, 19901, 0xbeef, 0xbeef, 2560, 19901, 0xbeef, 0xbeef, 2560, 19901); - VSET(16, e32, m2); - VLOAD_32(v4, 12345, 80, 2560, 19900, 12345, 80, 2560, 19900, 12345, 80, 2560, + VSET(16, e32, m8); + VLOAD_32(v16, 12345, 80, 2560, 19900, 12345, 80, 2560, 19900, 12345, 80, 2560, 19900, 12345, 80, 2560, 19900); - VLOAD_32(v6, 50, 7000, 400, 19901, 50, 7000, 400, 19901, 50, 7000, 400, 19901, + VLOAD_32(v24, 50, 7000, 400, 19901, 50, 7000, 400, 19901, 50, 7000, 400, 19901, 50, 7000, 400, 19901); VLOAD_8(v0, 0xCC, 0xCC); - VLOAD_32(v2, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + VLOAD_32(v8, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef); - asm volatile("vmaxu.vv v2, v4, v6, v0.t"); - VCMP_U32(5, v2, 0xdeadbeef, 0xdeadbeef, 2560, 19901, 0xdeadbeef, 0xdeadbeef, + asm volatile("vmaxu.vv v8, v16, v24, v0.t"); + VCMP_U32(5, v8, 0xdeadbeef, 0xdeadbeef, 2560, 19901, 0xdeadbeef, 0xdeadbeef, 2560, 19901, 0xdeadbeef, 0xdeadbeef, 2560, 19901, 0xdeadbeef, 0xdeadbeef, 2560, 19901); #if ELEN == 64 - VSET(16, e64, m2); - VLOAD_64(v4, 12345, 80, 2560, 19900, 12345, 80, 2560, 19900, 12345, 80, 2560, + VSET(16, e64, m8); + VLOAD_64(v16, 12345, 80, 2560, 19900, 12345, 80, 2560, 19900, 12345, 80, 2560, 19900, 12345, 80, 2560, 19900); - VLOAD_64(v6, 50, 7000, 400, 19901, 50, 7000, 400, 19901, 50, 7000, 400, 19901, + VLOAD_64(v24, 50, 7000, 400, 19901, 50, 7000, 400, 19901, 50, 7000, 400, 19901, 50, 7000, 400, 19901); VLOAD_8(v0, 0xCC, 0xCC); - VLOAD_64(v2, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, + VLOAD_64(v8, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef); - asm volatile("vmaxu.vv v2, v4, v6, v0.t"); - VCMP_U64(6, v2, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 2560, 19901, + asm volatile("vmaxu.vv v8, v16, v24, v0.t"); + VCMP_U64(6, v8, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 2560, 19901, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 2560, 19901, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 2560, 19901, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 2560, 19901); @@ -90,33 +90,33 @@ void TEST_CASE2(void) { void TEST_CASE3(void) { const uint64_t scalar = 40; - VSET(16, e8, m2); - VLOAD_8(v4, 123, 8, 25, 199, 123, 8, 25, 199, 123, 8, 25, 199, 123, 8, 25, + VSET(16, e8, m8); + VLOAD_8(v16, 123, 8, 25, 199, 123, 8, 25, 199, 123, 8, 25, 199, 123, 8, 25, 199); - asm volatile("vmaxu.vx v2, v4, %[A]" ::[A] "r"(scalar)); - VCMP_U8(7, v2, 123, 40, 40, 199, 123, 40, 40, 199, 123, 40, 40, 199, 123, 40, + asm volatile("vmaxu.vx v8, v16, %[A]" ::[A] "r"(scalar)); + VCMP_U8(7, v8, 123, 40, 40, 199, 123, 40, 40, 199, 123, 40, 40, 199, 123, 40, 40, 199); - VSET(16, e16, m2); - VLOAD_16(v4, 12345, 8, 25, 199, 12345, 8, 25, 199, 12345, 8, 25, 199, 12345, + VSET(16, e16, m8); + VLOAD_16(v16, 12345, 8, 25, 199, 12345, 8, 25, 199, 12345, 8, 25, 199, 12345, 8, 25, 199); - asm volatile("vmaxu.vx v2, v4, %[A]" ::[A] "r"(scalar)); - VCMP_U16(8, v2, 12345, 40, 40, 199, 12345, 40, 40, 199, 12345, 40, 40, 199, + asm volatile("vmaxu.vx v8, v16, %[A]" ::[A] "r"(scalar)); + VCMP_U16(8, v8, 12345, 40, 40, 199, 12345, 40, 40, 199, 12345, 40, 40, 199, 12345, 40, 40, 199); - VSET(16, e32, m2); - VLOAD_32(v4, 12345, 8, 25, 199, 12345, 8, 25, 199, 12345, 8, 25, 199, 12345, + VSET(16, e32, m8); + VLOAD_32(v16, 12345, 8, 25, 199, 12345, 8, 25, 199, 12345, 8, 25, 199, 12345, 8, 25, 199); - asm volatile("vmaxu.vx v2, v4, %[A]" ::[A] "r"(scalar)); - VCMP_U32(9, v2, 12345, 40, 40, 199, 12345, 40, 40, 199, 12345, 40, 40, 199, + asm volatile("vmaxu.vx v8, v16, %[A]" ::[A] "r"(scalar)); + VCMP_U32(9, v8, 12345, 40, 40, 199, 12345, 40, 40, 199, 12345, 40, 40, 199, 12345, 40, 40, 199); #if ELEN == 64 - VSET(16, e64, m2); - VLOAD_64(v4, 12345, 8, 25, 199, 12345, 8, 25, 199, 12345, 8, 25, 199, 12345, + VSET(16, e64, m8); + VLOAD_64(v16, 12345, 8, 25, 199, 12345, 8, 25, 199, 12345, 8, 25, 199, 12345, 8, 25, 199); - asm volatile("vmaxu.vx v2, v4, %[A]" ::[A] "r"(scalar)); - VCMP_U64(10, v2, 12345, 40, 40, 199, 12345, 40, 40, 199, 12345, 40, 40, 199, + asm volatile("vmaxu.vx v8, v16, %[A]" ::[A] "r"(scalar)); + VCMP_U64(10, v8, 12345, 40, 40, 199, 12345, 40, 40, 199, 12345, 40, 40, 199, 12345, 40, 40, 199); #endif }; @@ -124,52 +124,52 @@ void TEST_CASE3(void) { void TEST_CASE4(void) { const uint64_t scalar = 40; - VSET(16, e8, m2); - VLOAD_8(v4, 123, 8, 25, 199, 123, 8, 25, 199, 123, 8, 25, 199, 123, 8, 25, + VSET(16, e8, m8); + VLOAD_8(v16, 123, 8, 25, 199, 123, 8, 25, 199, 123, 8, 25, 199, 123, 8, 25, 199); VLOAD_8(v0, 0xCC, 0xCC); - VLOAD_8(v2, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, + VLOAD_8(v8, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef); - asm volatile("vmaxu.vx v2, v4, %[A], v0.t" ::[A] "r"(scalar)); - VCMP_U8(11, v2, 0xef, 0xef, 40, 199, 0xef, 0xef, 40, 199, 0xef, 0xef, 40, 199, + asm volatile("vmaxu.vx v8, v16, %[A], v0.t" ::[A] "r"(scalar)); + VCMP_U8(11, v8, 0xef, 0xef, 40, 199, 0xef, 0xef, 40, 199, 0xef, 0xef, 40, 199, 0xef, 0xef, 40, 199); - VSET(16, e16, m2); - VLOAD_16(v4, 12345, 8, 25, 199, 12345, 8, 25, 199, 12345, 8, 25, 199, 12345, + VSET(16, e16, m8); + VLOAD_16(v16, 12345, 8, 25, 199, 12345, 8, 25, 199, 12345, 8, 25, 199, 12345, 8, 25, 199); VLOAD_8(v0, 0xCC, 0xCC); - VLOAD_16(v2, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, + VLOAD_16(v8, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef); - asm volatile("vmaxu.vx v2, v4, %[A], v0.t" ::[A] "r"(scalar)); - VCMP_U16(12, v2, 0xbeef, 0xbeef, 40, 199, 0xbeef, 0xbeef, 40, 199, 0xbeef, + asm volatile("vmaxu.vx v8, v16, %[A], v0.t" ::[A] "r"(scalar)); + VCMP_U16(12, v8, 0xbeef, 0xbeef, 40, 199, 0xbeef, 0xbeef, 40, 199, 0xbeef, 0xbeef, 40, 199, 0xbeef, 0xbeef, 40, 199); - VSET(16, e32, m2); - VLOAD_32(v4, 12345, 8, 25, 199, 12345, 8, 25, 199, 12345, 8, 25, 199, 12345, + VSET(16, e32, m8); + VLOAD_32(v16, 12345, 8, 25, 199, 12345, 8, 25, 199, 12345, 8, 25, 199, 12345, 8, 25, 199); VLOAD_8(v0, 0xCC, 0xCC); - VLOAD_32(v2, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + VLOAD_32(v8, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef); - asm volatile("vmaxu.vx v2, v4, %[A], v0.t" ::[A] "r"(scalar)); - VCMP_U32(13, v2, 0xdeadbeef, 0xdeadbeef, 40, 199, 0xdeadbeef, 0xdeadbeef, 40, + asm volatile("vmaxu.vx v8, v16, %[A], v0.t" ::[A] "r"(scalar)); + VCMP_U32(13, v8, 0xdeadbeef, 0xdeadbeef, 40, 199, 0xdeadbeef, 0xdeadbeef, 40, 199, 0xdeadbeef, 0xdeadbeef, 40, 199, 0xdeadbeef, 0xdeadbeef, 40, 199); #if ELEN == 64 - VSET(16, e64, m2); - VLOAD_64(v4, 12345, 8, 25, 199, 12345, 8, 25, 199, 12345, 8, 25, 199, 12345, + VSET(16, e64, m8); + VLOAD_64(v16, 12345, 8, 25, 199, 12345, 8, 25, 199, 12345, 8, 25, 199, 12345, 8, 25, 199); VLOAD_8(v0, 0xCC, 0xCC); - VLOAD_64(v2, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, + VLOAD_64(v8, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef); - asm volatile("vmaxu.vx v2, v4, %[A], v0.t" ::[A] "r"(scalar)); - VCMP_U64(14, v2, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 40, 199, + asm volatile("vmaxu.vx v8, v16, %[A], v0.t" ::[A] "r"(scalar)); + VCMP_U64(14, v8, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 40, 199, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 40, 199, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 40, 199, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 40, 199); diff --git a/sw/riscvTests/isa/rv64uv/vmin.c b/sw/riscvTests/isa/rv64uv/vmin.c index 1e29d145..58b3255f 100644 --- a/sw/riscvTests/isa/rv64uv/vmin.c +++ b/sw/riscvTests/isa/rv64uv/vmin.c @@ -8,79 +8,79 @@ #include "vector_macros.h" void TEST_CASE1(void) { - VSET(16, e16, m2); - VLOAD_16(v4, 12345, -80, 2560, -19900, 12345, -80, 2560, -19900, 12345, -80, + VSET(16, e16, m8); + VLOAD_16(v16, 12345, -80, 2560, -19900, 12345, -80, 2560, -19900, 12345, -80, 2560, -19900, 12345, -80, 2560, -19900); - VLOAD_16(v6, 50, 7000, 400, 19901, 50, 7000, 400, 19901, 50, 7000, 400, 19901, + VLOAD_16(v24, 50, 7000, 400, 19901, 50, 7000, 400, 19901, 50, 7000, 400, 19901, 50, 7000, 400, 19901); - asm volatile("vmin.vv v2, v4, v6"); - VCMP_I16(1, v2, 50, -80, 400, -19900, 50, -80, 400, -19900, 50, -80, 400, + asm volatile("vmin.vv v8, v16, v24"); + VCMP_I16(1, v8, 50, -80, 400, -19900, 50, -80, 400, -19900, 50, -80, 400, -19900, 50, -80, 400, -19900); - VSET(16, e32, m2); - VLOAD_32(v4, 12345, -80, 2560, -19900, 12345, -80, 2560, -19900, 12345, -80, + VSET(16, e32, m8); + VLOAD_32(v16, 12345, -80, 2560, -19900, 12345, -80, 2560, -19900, 12345, -80, 2560, -19900, 12345, -80, 2560, -19900); - VLOAD_32(v6, 50, 7000, 400, 19901, 50, 7000, 400, 19901, 50, 7000, 400, 19901, + VLOAD_32(v24, 50, 7000, 400, 19901, 50, 7000, 400, 19901, 50, 7000, 400, 19901, 50, 7000, 400, 19901); - asm volatile("vmin.vv v2, v4, v6"); - VCMP_I32(2, v2, 50, -80, 400, -19900, 50, -80, 400, -19900, 50, -80, 400, + asm volatile("vmin.vv v8, v16, v24"); + VCMP_I32(2, v8, 50, -80, 400, -19900, 50, -80, 400, -19900, 50, -80, 400, -19900, 50, -80, 400, -19900); #if ELEN == 64 - VSET(16, e64, m2); - VLOAD_64(v4, 12345, -80, 2560, -19900, 12345, -80, 2560, -19900, 12345, -80, + VSET(16, e64, m8); + VLOAD_64(v16, 12345, -80, 2560, -19900, 12345, -80, 2560, -19900, 12345, -80, 2560, -19900, 12345, -80, 2560, -19900); - VLOAD_64(v6, 50, 7000, 400, 19901, 50, 7000, 400, 19901, 50, 7000, 400, 19901, + VLOAD_64(v24, 50, 7000, 400, 19901, 50, 7000, 400, 19901, 50, 7000, 400, 19901, 50, 7000, 400, 19901); - asm volatile("vmin.vv v2, v4, v6"); - VCMP_I64(3, v2, 50, -80, 400, -19900, 50, -80, 400, -19900, 50, -80, 400, + asm volatile("vmin.vv v8, v16, v24"); + VCMP_I64(3, v8, 50, -80, 400, -19900, 50, -80, 400, -19900, 50, -80, 400, -19900, 50, -80, 400, -19900); #endif }; void TEST_CASE2(void) { - VSET(16, e16, m2); - VLOAD_16(v4, 12345, -80, 2560, -19900, 12345, -80, 2560, -19900, 12345, -80, + VSET(16, e16, m8); + VLOAD_16(v16, 12345, -80, 2560, -19900, 12345, -80, 2560, -19900, 12345, -80, 2560, -19900, 12345, -80, 2560, -19900); - VLOAD_16(v6, 50, 7000, 400, 19901, 50, 7000, 400, 19901, 50, 7000, 400, 19901, + VLOAD_16(v24, 50, 7000, 400, 19901, 50, 7000, 400, 19901, 50, 7000, 400, 19901, 50, 7000, 400, 19901); VLOAD_8(v0, 0xCC, 0xCC); - VLOAD_16(v2, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, + VLOAD_16(v8, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef); - asm volatile("vmin.vv v2, v4, v6, v0.t"); - VCMP_I16(4, v2, 0xbeef, 0xbeef, 400, -19900, 0xbeef, 0xbeef, 400, -19900, + asm volatile("vmin.vv v8, v16, v24, v0.t"); + VCMP_I16(4, v8, 0xbeef, 0xbeef, 400, -19900, 0xbeef, 0xbeef, 400, -19900, 0xbeef, 0xbeef, 400, -19900, 0xbeef, 0xbeef, 400, -19900); - VSET(16, e32, m2); - VLOAD_32(v4, 12345, -80, 2560, -19900, 12345, -80, 2560, -19900, 12345, -80, + VSET(16, e32, m8); + VLOAD_32(v16, 12345, -80, 2560, -19900, 12345, -80, 2560, -19900, 12345, -80, 2560, -19900, 12345, -80, 2560, -19900); - VLOAD_32(v6, 50, 7000, 400, 19901, 50, 7000, 400, 19901, 50, 7000, 400, 19901, + VLOAD_32(v24, 50, 7000, 400, 19901, 50, 7000, 400, 19901, 50, 7000, 400, 19901, 50, 7000, 400, 19901); VLOAD_8(v0, 0xCC, 0xCC); - VLOAD_32(v2, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + VLOAD_32(v8, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef); - asm volatile("vmin.vv v2, v4, v6, v0.t"); - VCMP_I32(5, v2, 0xdeadbeef, 0xdeadbeef, 400, -19900, 0xdeadbeef, 0xdeadbeef, + asm volatile("vmin.vv v8, v16, v24, v0.t"); + VCMP_I32(5, v8, 0xdeadbeef, 0xdeadbeef, 400, -19900, 0xdeadbeef, 0xdeadbeef, 400, -19900, 0xdeadbeef, 0xdeadbeef, 400, -19900, 0xdeadbeef, 0xdeadbeef, 400, -19900); #if ELEN == 64 - VSET(16, e64, m2); - VLOAD_64(v4, 12345, -80, 2560, -19900, 12345, -80, 2560, -19900, 12345, -80, + VSET(16, e64, m8); + VLOAD_64(v16, 12345, -80, 2560, -19900, 12345, -80, 2560, -19900, 12345, -80, 2560, -19900, 12345, -80, 2560, -19900); - VLOAD_64(v6, 50, 7000, 400, 19901, 50, 7000, 400, 19901, 50, 7000, 400, 19901, + VLOAD_64(v24, 50, 7000, 400, 19901, 50, 7000, 400, 19901, 50, 7000, 400, 19901, 50, 7000, 400, 19901); VLOAD_8(v0, 0xCC, 0xCC); - VLOAD_64(v2, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, + VLOAD_64(v8, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef); - asm volatile("vmin.vv v2, v4, v6, v0.t"); - VCMP_I64(6, v2, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 400, -19900, + asm volatile("vmin.vv v8, v16, v24, v0.t"); + VCMP_I64(6, v8, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 400, -19900, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 400, -19900, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 400, -19900, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 400, -19900); @@ -90,33 +90,33 @@ void TEST_CASE2(void) { void TEST_CASE3(void) { const uint64_t scalar = 40; - VSET(16, e8, m2); - VLOAD_8(v4, 123, -8, -25, 99, 123, -8, -25, 99, 123, -8, -25, 99, 123, -8, + VSET(16, e8, m8); + VLOAD_8(v16, 123, -8, -25, 99, 123, -8, -25, 99, 123, -8, -25, 99, 123, -8, -25, 99); - asm volatile("vmin.vx v2, v4, %[A]" ::[A] "r"(scalar)); - VCMP_I8(7, v2, 40, -8, -25, 40, 40, -8, -25, 40, 40, -8, -25, 40, 40, -8, -25, + asm volatile("vmin.vx v8, v16, %[A]" ::[A] "r"(scalar)); + VCMP_I8(7, v8, 40, -8, -25, 40, 40, -8, -25, 40, 40, -8, -25, 40, 40, -8, -25, 40); - VSET(16, e16, m2); - VLOAD_16(v4, 12345, -8, -25, 199, 12345, -8, -25, 199, 12345, -8, -25, 199, + VSET(16, e16, m8); + VLOAD_16(v16, 12345, -8, -25, 199, 12345, -8, -25, 199, 12345, -8, -25, 199, 12345, -8, -25, 199); - asm volatile("vmin.vx v2, v4, %[A]" ::[A] "r"(scalar)); - VCMP_I16(8, v2, 40, -8, -25, 40, 40, -8, -25, 40, 40, -8, -25, 40, 40, -8, + asm volatile("vmin.vx v8, v16, %[A]" ::[A] "r"(scalar)); + VCMP_I16(8, v8, 40, -8, -25, 40, 40, -8, -25, 40, 40, -8, -25, 40, 40, -8, -25, 40); - VSET(16, e32, m2); - VLOAD_32(v4, 12345, -8, -25, 199, 12345, -8, -25, 199, 12345, -8, -25, 199, + VSET(16, e32, m8); + VLOAD_32(v16, 12345, -8, -25, 199, 12345, -8, -25, 199, 12345, -8, -25, 199, 12345, -8, -25, 199); - asm volatile("vmin.vx v2, v4, %[A]" ::[A] "r"(scalar)); - VCMP_I32(9, v2, 40, -8, -25, 40, 40, -8, -25, 40, 40, -8, -25, 40, 40, -8, + asm volatile("vmin.vx v8, v16, %[A]" ::[A] "r"(scalar)); + VCMP_I32(9, v8, 40, -8, -25, 40, 40, -8, -25, 40, 40, -8, -25, 40, 40, -8, -25, 40); #if ELEN == 64 - VSET(16, e64, m2); - VLOAD_64(v4, 12345, -8, -25, 199, 12345, -8, -25, 199, 12345, -8, -25, 199, + VSET(16, e64, m8); + VLOAD_64(v16, 12345, -8, -25, 199, 12345, -8, -25, 199, 12345, -8, -25, 199, 12345, -8, -25, 199); - asm volatile("vmin.vx v2, v4, %[A]" ::[A] "r"(scalar)); - VCMP_I64(10, v2, 40, -8, -25, 40, 40, -8, -25, 40, 40, -8, -25, 40, 40, -8, + asm volatile("vmin.vx v8, v16, %[A]" ::[A] "r"(scalar)); + VCMP_I64(10, v8, 40, -8, -25, 40, 40, -8, -25, 40, 40, -8, -25, 40, 40, -8, -25, 40); #endif }; @@ -124,52 +124,52 @@ void TEST_CASE3(void) { void TEST_CASE4(void) { const uint64_t scalar = 40; - VSET(16, e8, m2); - VLOAD_8(v4, 123, -8, -25, 99, 123, -8, -25, 99, 123, -8, -25, 99, 123, -8, + VSET(16, e8, m8); + VLOAD_8(v16, 123, -8, -25, 99, 123, -8, -25, 99, 123, -8, -25, 99, 123, -8, -25, 99); VLOAD_8(v0, 0xCC, 0xCC); - VLOAD_8(v2, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, + VLOAD_8(v8, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef); - asm volatile("vmin.vx v2, v4, %[A], v0.t" ::[A] "r"(scalar)); - VCMP_I8(11, v2, 0xef, 0xef, -25, 40, 0xef, 0xef, -25, 40, 0xef, 0xef, -25, 40, + asm volatile("vmin.vx v8, v16, %[A], v0.t" ::[A] "r"(scalar)); + VCMP_I8(11, v8, 0xef, 0xef, -25, 40, 0xef, 0xef, -25, 40, 0xef, 0xef, -25, 40, 0xef, 0xef, -25, 40); - VSET(16, e16, m2); - VLOAD_16(v4, 12345, -8, -25, 199, 12345, -8, -25, 199, 12345, -8, -25, 199, + VSET(16, e16, m8); + VLOAD_16(v16, 12345, -8, -25, 199, 12345, -8, -25, 199, 12345, -8, -25, 199, 12345, -8, -25, 199); VLOAD_8(v0, 0xCC, 0xCC); - VLOAD_16(v2, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, + VLOAD_16(v8, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef); - asm volatile("vmin.vx v2, v4, %[A], v0.t" ::[A] "r"(scalar)); - VCMP_I16(12, v2, 0xbeef, 0xbeef, -25, 40, 0xbeef, 0xbeef, -25, 40, 0xbeef, + asm volatile("vmin.vx v8, v16, %[A], v0.t" ::[A] "r"(scalar)); + VCMP_I16(12, v8, 0xbeef, 0xbeef, -25, 40, 0xbeef, 0xbeef, -25, 40, 0xbeef, 0xbeef, -25, 40, 0xbeef, 0xbeef, -25, 40); - VSET(16, e32, m2); - VLOAD_32(v4, 12345, -8, -25, 199, 12345, -8, -25, 199, 12345, -8, -25, 199, + VSET(16, e32, m8); + VLOAD_32(v16, 12345, -8, -25, 199, 12345, -8, -25, 199, 12345, -8, -25, 199, 12345, -8, -25, 199); VLOAD_8(v0, 0xCC, 0xCC); - VLOAD_32(v2, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + VLOAD_32(v8, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef); - asm volatile("vmin.vx v2, v4, %[A], v0.t" ::[A] "r"(scalar)); - VCMP_I32(13, v2, 0xdeadbeef, 0xdeadbeef, -25, 40, 0xdeadbeef, 0xdeadbeef, -25, + asm volatile("vmin.vx v8, v16, %[A], v0.t" ::[A] "r"(scalar)); + VCMP_I32(13, v8, 0xdeadbeef, 0xdeadbeef, -25, 40, 0xdeadbeef, 0xdeadbeef, -25, 40, 0xdeadbeef, 0xdeadbeef, -25, 40, 0xdeadbeef, 0xdeadbeef, -25, 40); #if ELEN == 64 - VSET(16, e64, m2); - VLOAD_64(v4, 12345, -8, -25, 199, 12345, -8, -25, 199, 12345, -8, -25, 199, + VSET(16, e64, m8); + VLOAD_64(v16, 12345, -8, -25, 199, 12345, -8, -25, 199, 12345, -8, -25, 199, 12345, -8, -25, 199); VLOAD_8(v0, 0xCC, 0xCC); - VLOAD_64(v2, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, + VLOAD_64(v8, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef); - asm volatile("vmin.vx v2, v4, %[A], v0.t" ::[A] "r"(scalar)); - VCMP_I64(14, v2, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, -25, 40, + asm volatile("vmin.vx v8, v16, %[A], v0.t" ::[A] "r"(scalar)); + VCMP_I64(14, v8, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, -25, 40, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, -25, 40, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, -25, 40, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, -25, 40); diff --git a/sw/riscvTests/isa/rv64uv/vminu.c b/sw/riscvTests/isa/rv64uv/vminu.c index 8c2e3a94..65be4114 100644 --- a/sw/riscvTests/isa/rv64uv/vminu.c +++ b/sw/riscvTests/isa/rv64uv/vminu.c @@ -8,79 +8,79 @@ #include "vector_macros.h" void TEST_CASE1(void) { - VSET(16, e16, m2); - VLOAD_16(v4, 12345, 80, 2560, 19900, 12345, 80, 2560, 19900, 12345, 80, 2560, + VSET(16, e16, m8); + VLOAD_16(v16, 12345, 80, 2560, 19900, 12345, 80, 2560, 19900, 12345, 80, 2560, 19900, 12345, 80, 2560, 19900); - VLOAD_16(v6, 50, 7000, 400, 19901, 50, 7000, 400, 19901, 50, 7000, 400, 19901, + VLOAD_16(v24, 50, 7000, 400, 19901, 50, 7000, 400, 19901, 50, 7000, 400, 19901, 50, 7000, 400, 19901); - asm volatile("vminu.vv v2, v4, v6"); - VCMP_U16(1, v2, 50, 80, 400, 19900, 50, 80, 400, 19900, 50, 80, 400, 19900, + asm volatile("vminu.vv v8, v16, v24"); + VCMP_U16(1, v8, 50, 80, 400, 19900, 50, 80, 400, 19900, 50, 80, 400, 19900, 50, 80, 400, 19900); - VSET(16, e32, m2); - VLOAD_32(v4, 12345, 80, 2560, 19900, 12345, 80, 2560, 19900, 12345, 80, 2560, + VSET(16, e32, m8); + VLOAD_32(v16, 12345, 80, 2560, 19900, 12345, 80, 2560, 19900, 12345, 80, 2560, 19900, 12345, 80, 2560, 19900); - VLOAD_32(v6, 50, 7000, 400, 19901, 50, 7000, 400, 19901, 50, 7000, 400, 19901, + VLOAD_32(v24, 50, 7000, 400, 19901, 50, 7000, 400, 19901, 50, 7000, 400, 19901, 50, 7000, 400, 19901); - asm volatile("vminu.vv v2, v4, v6"); - VCMP_U32(2, v2, 50, 80, 400, 19900, 50, 80, 400, 19900, 50, 80, 400, 19900, + asm volatile("vminu.vv v8, v16, v24"); + VCMP_U32(2, v8, 50, 80, 400, 19900, 50, 80, 400, 19900, 50, 80, 400, 19900, 50, 80, 400, 19900); #if ELEN == 64 - VSET(16, e64, m2); - VLOAD_64(v4, 12345, 80, 2560, 19900, 12345, 80, 2560, 19900, 12345, 80, 2560, + VSET(16, e64, m8); + VLOAD_64(v16, 12345, 80, 2560, 19900, 12345, 80, 2560, 19900, 12345, 80, 2560, 19900, 12345, 80, 2560, 19900); - VLOAD_64(v6, 50, 7000, 400, 19901, 50, 7000, 400, 19901, 50, 7000, 400, 19901, + VLOAD_64(v24, 50, 7000, 400, 19901, 50, 7000, 400, 19901, 50, 7000, 400, 19901, 50, 7000, 400, 19901); - asm volatile("vminu.vv v2, v4, v6"); - VCMP_U64(3, v2, 50, 80, 400, 19900, 50, 80, 400, 19900, 50, 80, 400, 19900, + asm volatile("vminu.vv v8, v16, v24"); + VCMP_U64(3, v8, 50, 80, 400, 19900, 50, 80, 400, 19900, 50, 80, 400, 19900, 50, 80, 400, 19900); #endif }; void TEST_CASE2(void) { - VSET(16, e16, m2); - VLOAD_16(v4, 12345, 80, 2560, 19900, 12345, 80, 2560, 19900, 12345, 80, 2560, + VSET(16, e16, m8); + VLOAD_16(v16, 12345, 80, 2560, 19900, 12345, 80, 2560, 19900, 12345, 80, 2560, 19900, 12345, 80, 2560, 19900); - VLOAD_16(v6, 50, 7000, 400, 19901, 50, 7000, 400, 19901, 50, 7000, 400, 19901, + VLOAD_16(v24, 50, 7000, 400, 19901, 50, 7000, 400, 19901, 50, 7000, 400, 19901, 50, 7000, 400, 19901); VLOAD_8(v0, 0xCC, 0xCC); - VLOAD_16(v2, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, + VLOAD_16(v8, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef); - asm volatile("vminu.vv v2, v4, v6, v0.t"); - VCMP_U16(4, v2, 0xbeef, 0xbeef, 400, 19900, 0xbeef, 0xbeef, 400, 19900, + asm volatile("vminu.vv v8, v16, v24, v0.t"); + VCMP_U16(4, v8, 0xbeef, 0xbeef, 400, 19900, 0xbeef, 0xbeef, 400, 19900, 0xbeef, 0xbeef, 400, 19900, 0xbeef, 0xbeef, 400, 19900); - VSET(16, e32, m2); - VLOAD_32(v4, 12345, 80, 2560, 19900, 12345, 80, 2560, 19900, 12345, 80, 2560, + VSET(16, e32, m8); + VLOAD_32(v16, 12345, 80, 2560, 19900, 12345, 80, 2560, 19900, 12345, 80, 2560, 19900, 12345, 80, 2560, 19900); - VLOAD_32(v6, 50, 7000, 400, 19901, 50, 7000, 400, 19901, 50, 7000, 400, 19901, + VLOAD_32(v24, 50, 7000, 400, 19901, 50, 7000, 400, 19901, 50, 7000, 400, 19901, 50, 7000, 400, 19901); VLOAD_8(v0, 0xCC, 0xCC); - VLOAD_32(v2, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + VLOAD_32(v8, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef); - asm volatile("vminu.vv v2, v4, v6, v0.t"); - VCMP_U32(5, v2, 0xdeadbeef, 0xdeadbeef, 400, 19900, 0xdeadbeef, 0xdeadbeef, + asm volatile("vminu.vv v8, v16, v24, v0.t"); + VCMP_U32(5, v8, 0xdeadbeef, 0xdeadbeef, 400, 19900, 0xdeadbeef, 0xdeadbeef, 400, 19900, 0xdeadbeef, 0xdeadbeef, 400, 19900, 0xdeadbeef, 0xdeadbeef, 400, 19900); #if ELEN == 64 - VSET(16, e64, m2); - VLOAD_64(v4, 12345, 80, 2560, 19900, 12345, 80, 2560, 19900, 12345, 80, 2560, + VSET(16, e64, m8); + VLOAD_64(v16, 12345, 80, 2560, 19900, 12345, 80, 2560, 19900, 12345, 80, 2560, 19900, 12345, 80, 2560, 19900); - VLOAD_64(v6, 50, 7000, 400, 19901, 50, 7000, 400, 19901, 50, 7000, 400, 19901, + VLOAD_64(v24, 50, 7000, 400, 19901, 50, 7000, 400, 19901, 50, 7000, 400, 19901, 50, 7000, 400, 19901); VLOAD_8(v0, 0xCC, 0xCC); - VLOAD_64(v2, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, + VLOAD_64(v8, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef); - asm volatile("vminu.vv v2, v4, v6, v0.t"); - VCMP_U64(6, v2, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 400, 19900, + asm volatile("vminu.vv v8, v16, v24, v0.t"); + VCMP_U64(6, v8, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 400, 19900, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 400, 19900, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 400, 19900, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 400, 19900); @@ -90,81 +90,81 @@ void TEST_CASE2(void) { void TEST_CASE3(void) { const uint64_t scalar = 40; - VSET(16, e8, m2); - VLOAD_8(v4, 123, 8, 25, 199, 123, 8, 25, 199, 123, 8, 25, 199, 123, 8, 25, + VSET(16, e8, m8); + VLOAD_8(v16, 123, 8, 25, 199, 123, 8, 25, 199, 123, 8, 25, 199, 123, 8, 25, 199); - asm volatile("vminu.vx v2, v4, %[A]" ::[A] "r"(scalar)); - VCMP_U8(7, v2, 40, 8, 25, 40, 40, 8, 25, 40, 40, 8, 25, 40, 40, 8, 25, 40); + asm volatile("vminu.vx v8, v16, %[A]" ::[A] "r"(scalar)); + VCMP_U8(7, v8, 40, 8, 25, 40, 40, 8, 25, 40, 40, 8, 25, 40, 40, 8, 25, 40); - VSET(16, e16, m2); - VLOAD_16(v4, 12345, 8, 25, 199, 12345, 8, 25, 199, 12345, 8, 25, 199, 12345, + VSET(16, e16, m8); + VLOAD_16(v16, 12345, 8, 25, 199, 12345, 8, 25, 199, 12345, 8, 25, 199, 12345, 8, 25, 199); - asm volatile("vminu.vx v2, v4, %[A]" ::[A] "r"(scalar)); - VCMP_U16(8, v2, 40, 8, 25, 40, 40, 8, 25, 40, 40, 8, 25, 40, 40, 8, 25, 40); + asm volatile("vminu.vx v8, v16, %[A]" ::[A] "r"(scalar)); + VCMP_U16(8, v8, 40, 8, 25, 40, 40, 8, 25, 40, 40, 8, 25, 40, 40, 8, 25, 40); - VSET(16, e32, m2); - VLOAD_32(v4, 12345, 8, 25, 199, 12345, 8, 25, 199, 12345, 8, 25, 199, 12345, + VSET(16, e32, m8); + VLOAD_32(v16, 12345, 8, 25, 199, 12345, 8, 25, 199, 12345, 8, 25, 199, 12345, 8, 25, 199); - asm volatile("vminu.vx v2, v4, %[A]" ::[A] "r"(scalar)); - VCMP_U32(9, v2, 40, 8, 25, 40, 40, 8, 25, 40, 40, 8, 25, 40, 40, 8, 25, 40); + asm volatile("vminu.vx v8, v16, %[A]" ::[A] "r"(scalar)); + VCMP_U32(9, v8, 40, 8, 25, 40, 40, 8, 25, 40, 40, 8, 25, 40, 40, 8, 25, 40); #if ELEN == 64 - VSET(16, e64, m2); - VLOAD_64(v4, 12345, 8, 25, 199, 12345, 8, 25, 199, 12345, 8, 25, 199, 12345, + VSET(16, e64, m8); + VLOAD_64(v16, 12345, 8, 25, 199, 12345, 8, 25, 199, 12345, 8, 25, 199, 12345, 8, 25, 199); - asm volatile("vminu.vx v2, v4, %[A]" ::[A] "r"(scalar)); - VCMP_U64(10, v2, 40, 8, 25, 40, 40, 8, 25, 40, 40, 8, 25, 40, 40, 8, 25, 40); + asm volatile("vminu.vx v8, v16, %[A]" ::[A] "r"(scalar)); + VCMP_U64(10, v8, 40, 8, 25, 40, 40, 8, 25, 40, 40, 8, 25, 40, 40, 8, 25, 40); #endif }; void TEST_CASE4(void) { const uint64_t scalar = 40; - VSET(16, e8, m2); - VLOAD_8(v4, 123, 8, 25, 199, 123, 8, 25, 199, 123, 8, 25, 199, 123, 8, 25, + VSET(16, e8, m8); + VLOAD_8(v16, 123, 8, 25, 199, 123, 8, 25, 199, 123, 8, 25, 199, 123, 8, 25, 199); VLOAD_8(v0, 0xCC, 0xCC); - VLOAD_8(v2, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, + VLOAD_8(v8, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef); - asm volatile("vminu.vx v2, v4, %[A], v0.t" ::[A] "r"(scalar)); - VCMP_U8(11, v2, 0xef, 0xef, 25, 40, 0xef, 0xef, 25, 40, 0xef, 0xef, 25, 40, + asm volatile("vminu.vx v8, v16, %[A], v0.t" ::[A] "r"(scalar)); + VCMP_U8(11, v8, 0xef, 0xef, 25, 40, 0xef, 0xef, 25, 40, 0xef, 0xef, 25, 40, 0xef, 0xef, 25, 40); - VSET(16, e16, m2); - VLOAD_16(v4, 12345, 8, 25, 199, 12345, 8, 25, 199, 12345, 8, 25, 199, 12345, + VSET(16, e16, m8); + VLOAD_16(v16, 12345, 8, 25, 199, 12345, 8, 25, 199, 12345, 8, 25, 199, 12345, 8, 25, 199); VLOAD_8(v0, 0xCC, 0xCC); - VLOAD_16(v2, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, + VLOAD_16(v8, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef); - asm volatile("vminu.vx v2, v4, %[A], v0.t" ::[A] "r"(scalar)); - VCMP_U16(12, v2, 0xbeef, 0xbeef, 25, 40, 0xbeef, 0xbeef, 25, 40, 0xbeef, + asm volatile("vminu.vx v8, v16, %[A], v0.t" ::[A] "r"(scalar)); + VCMP_U16(12, v8, 0xbeef, 0xbeef, 25, 40, 0xbeef, 0xbeef, 25, 40, 0xbeef, 0xbeef, 25, 40, 0xbeef, 0xbeef, 25, 40); - VSET(16, e32, m2); - VLOAD_32(v4, 12345, 8, 25, 199, 12345, 8, 25, 199, 12345, 8, 25, 199, 12345, + VSET(16, e32, m8); + VLOAD_32(v16, 12345, 8, 25, 199, 12345, 8, 25, 199, 12345, 8, 25, 199, 12345, 8, 25, 199); VLOAD_8(v0, 0xCC, 0xCC); - VLOAD_32(v2, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + VLOAD_32(v8, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef); - asm volatile("vminu.vx v2, v4, %[A], v0.t" ::[A] "r"(scalar)); - VCMP_U32(13, v2, 0xdeadbeef, 0xdeadbeef, 25, 40, 0xdeadbeef, 0xdeadbeef, 25, + asm volatile("vminu.vx v8, v16, %[A], v0.t" ::[A] "r"(scalar)); + VCMP_U32(13, v8, 0xdeadbeef, 0xdeadbeef, 25, 40, 0xdeadbeef, 0xdeadbeef, 25, 40, 0xdeadbeef, 0xdeadbeef, 25, 40, 0xdeadbeef, 0xdeadbeef, 25, 40); #if ELEN == 64 - VSET(16, e64, m2); - VLOAD_64(v4, 12345, 8, 25, 199, 12345, 8, 25, 199, 12345, 8, 25, 199, 12345, + VSET(16, e64, m8); + VLOAD_64(v16, 12345, 8, 25, 199, 12345, 8, 25, 199, 12345, 8, 25, 199, 12345, 8, 25, 199); VLOAD_8(v0, 0xCC, 0xCC); - VLOAD_64(v2, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, + VLOAD_64(v8, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef); - asm volatile("vminu.vx v2, v4, %[A], v0.t" ::[A] "r"(scalar)); - VCMP_U64(14, v2, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 25, 40, + asm volatile("vminu.vx v8, v16, %[A], v0.t" ::[A] "r"(scalar)); + VCMP_U64(14, v8, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 25, 40, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 25, 40, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 25, 40, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 25, 40); diff --git a/sw/riscvTests/isa/rv64uv/vmul.c b/sw/riscvTests/isa/rv64uv/vmul.c index 43937857..32c1cc06 100644 --- a/sw/riscvTests/isa/rv64uv/vmul.c +++ b/sw/riscvTests/isa/rv64uv/vmul.c @@ -8,56 +8,56 @@ #include "vector_macros.h" void TEST_CASE1(void) { - VSET(16, e8, m2); - VLOAD_8(v4, 0xb3, 0x5d, 0x3d, 0xa4, 0xbf, 0xc7, 0x6b, 0x95, 0xf9, 0x64, 0x52, + VSET(16, e8, m8); + VLOAD_8(v16, 0xb3, 0x5d, 0x3d, 0xa4, 0xbf, 0xc7, 0x6b, 0x95, 0xf9, 0x64, 0x52, 0x57, 0xbc, 0x1f, 0xd5, 0x13); - VLOAD_8(v6, 0x46, 0x37, 0xf5, 0x2b, 0x55, 0x05, 0xcb, 0x76, 0x31, 0x30, 0x78, + VLOAD_8(v24, 0x46, 0x37, 0xf5, 0x2b, 0x55, 0x05, 0xcb, 0x76, 0x31, 0x30, 0x78, 0xb3, 0x6a, 0xae, 0x5a, 0xe1); - asm volatile("vmul.vv v2, v4, v6"); - VCMP_I8(1, v2, 0xf2, 0xfb, 0x61, 0x8c, 0x6b, 0xe3, 0xd9, 0xae, 0xa9, 0xc0, + asm volatile("vmul.vv v8, v16, v24"); + VCMP_I8(1, v8, 0xf2, 0xfb, 0x61, 0x8c, 0x6b, 0xe3, 0xd9, 0xae, 0xa9, 0xc0, 0x70, 0xd5, 0xd8, 0x12, 0xe2, 0xb3); - VSET(16, e16, m2); - VLOAD_16(v4, 0x8132, 0x94b5, 0x245c, 0xd15d, 0xbfca, 0x18b6, 0xd5ba, 0x9299, + VSET(16, e16, m8); + VLOAD_16(v16, 0x8132, 0x94b5, 0x245c, 0xd15d, 0xbfca, 0x18b6, 0xd5ba, 0x9299, 0xccbd, 0x9ad4, 0xce46, 0xfbba, 0x10cc, 0xc463, 0x5298, 0x7b3e); - VLOAD_16(v6, 0xede6, 0x010b, 0xa570, 0x21c5, 0xfe5a, 0x5386, 0x16c9, 0x45fb, + VLOAD_16(v24, 0xede6, 0x010b, 0xa570, 0x21c5, 0xfe5a, 0x5386, 0x16c9, 0x45fb, 0x1439, 0x436f, 0x6f56, 0x90f7, 0x77c0, 0x0751, 0x64c3, 0x36e8); - asm volatile("vmul.vv v2, v4, v6"); - VCMP_I16(2, v2, 0x5cec, 0x18c7, 0x3440, 0x1991, 0xd904, 0xf144, 0xcb0a, + asm volatile("vmul.vv v8, v16, v24"); + VCMP_I16(2, v8, 0x5cec, 0x18c7, 0x3440, 0x1991, 0xd904, 0xf144, 0xcb0a, 0xf903, 0x5a15, 0x9dec, 0xa584, 0x8076, 0x6d00, 0xd853, 0x49c8, 0xc430); - VSET(16, e32, m2); - VLOAD_32(v4, 0x9c2bdc58, 0xe3995899, 0xbbbc0eda, 0x0729ff92, 0xa757a2c5, + VSET(16, e32, m8); + VLOAD_32(v16, 0x9c2bdc58, 0xe3995899, 0xbbbc0eda, 0x0729ff92, 0xa757a2c5, 0xd5f3a23e, 0x9a295390, 0xb2367b2e, 0xfee5b6a2, 0x07cb59c1, 0x6bf5cf9a, 0x7d75506b, 0x013c1e90, 0x600f9ca8, 0x6d4d0522, 0x0a225ff0); - VLOAD_32(v6, 0xc51e02f8, 0xae06b334, 0x397b1ec7, 0xc46f34fe, 0x4f9db2ab, + VLOAD_32(v24, 0xc51e02f8, 0xae06b334, 0x397b1ec7, 0xc46f34fe, 0x4f9db2ab, 0x957c2534, 0x70f8e127, 0xa79ebcec, 0x0a542044, 0x20e6ac3e, 0xd61caed7, 0x6f4e7820, 0x27c56901, 0x0aaf1d61, 0xa95c6f5c, 0x5b7aedf3); - asm volatile("vmul.vv v2, v4, v6"); - VCMP_I32(3, v2, 0x2c862540, 0x85aefa14, 0xa5ab1776, 0x3be33adc, 0x5487b397, + asm volatile("vmul.vv v8, v16, v24"); + VCMP_I32(3, v8, 0x2c862540, 0x85aefa14, 0xa5ab1776, 0x3be33adc, 0x5487b397, 0x57f7ea98, 0xc23d4af0, 0x55135668, 0xad00c308, 0x46f368be, 0x2f640656, 0x91f63560, 0x1e952e90, 0xd18163a8, 0xf71f9638, 0x79d240d0); #if ELEN == 64 - VSET(16, e64, m2); - VLOAD_64(v4, 0x077de9270ce00632, 0x2dadf4e020f3d47a, 0xb54ca84f9fe0573b, + VSET(16, e64, m8); + VLOAD_64(v16, 0x077de9270ce00632, 0x2dadf4e020f3d47a, 0xb54ca84f9fe0573b, 0x7be639dfdb02db6f, 0x61bb44569da93eff, 0xcd7f973ce822182b, 0x5434a22e7432397c, 0xcaadfd89d8dd1ad5, 0x5474c56d9089d672, 0x700e415c07b99bf9, 0xb89d409d4323a9c8, 0x68ccc7411db0ab09, 0xdf4fe3fa4e113e98, 0xa98a2e2575b04c41, 0x26ddf248ccb4a7aa, 0xdda1822d4972ee47); - VLOAD_64(v6, 0xcd2888c8bb07b736, 0xf20013627ac47d4f, 0xc05dbcd989ef329c, + VLOAD_64(v24, 0xcd2888c8bb07b736, 0xf20013627ac47d4f, 0xc05dbcd989ef329c, 0xc272db2ebcf7cfad, 0x8869302db041176d, 0xd3d90968a9ee01c2, 0x9cdb9f91a3381f51, 0x99ad38b70907ee8d, 0xf7d629b266c67cf1, 0x706f9b996cdd60f2, 0x4caa2335622bd6a0, 0x94171c9dfbbb186f, 0x7b6e42290f54ecc6, 0xa545b8670143bfbc, 0x9f430bf94b2805c9, 0xb45030fc2b4cef12); - asm volatile("vmul.vv v2, v4, v6"); - VCMP_I64(4, v2, 0x90d27e278d0d0c8c, 0x5ea9d3e60b6623a6, 0x6823b3e240d3adf4, + asm volatile("vmul.vv v8, v16, v24"); + VCMP_I64(4, v8, 0x90d27e278d0d0c8c, 0x5ea9d3e60b6623a6, 0x6823b3e240d3adf4, 0xc0dcea378c760b03, 0x17692726a477bb93, 0x784c7f2ee6e87b96, 0xd1aae9975ffa343c, 0xfdcd46ca398ccd51, 0x405f01791dce1952, 0x16063fbe99e7d162, 0xc9d244cddacf4d00, 0x22024848323600e7, @@ -67,99 +67,99 @@ void TEST_CASE1(void) { }; void TEST_CASE2(void) { - VSET(16, e8, m2); - VLOAD_8(v4, 0xb3, 0x5d, 0x3d, 0xa4, 0xbf, 0xc7, 0x6b, 0x95, 0xf9, 0x64, 0x52, + VSET(16, e8, m8); + VLOAD_8(v16, 0xb3, 0x5d, 0x3d, 0xa4, 0xbf, 0xc7, 0x6b, 0x95, 0xf9, 0x64, 0x52, 0x57, 0xbc, 0x1f, 0xd5, 0x13); - VLOAD_8(v6, 0x46, 0x37, 0xf5, 0x2b, 0x55, 0x05, 0xcb, 0x76, 0x31, 0x30, 0x78, + VLOAD_8(v24, 0x46, 0x37, 0xf5, 0x2b, 0x55, 0x05, 0xcb, 0x76, 0x31, 0x30, 0x78, 0xb3, 0x6a, 0xae, 0x5a, 0xe1); VLOAD_8(v0, 0xAA, 0xAA); - VCLEAR(v2); - asm volatile("vmul.vv v2, v4, v6, v0.t"); - VCMP_I8(5, v2, 0, 0xfb, 0, 0x8c, 0, 0xe3, 0, 0xae, 0, 0xc0, 0, 0xd5, 0, 0x12, + VCLEAR(v8); + asm volatile("vmul.vv v8, v16, v24, v0.t"); + VCMP_I8(5, v8, 0, 0xfb, 0, 0x8c, 0, 0xe3, 0, 0xae, 0, 0xc0, 0, 0xd5, 0, 0x12, 0, 0xb3); - VSET(16, e16, m2); - VLOAD_16(v4, 0x8132, 0x94b5, 0x245c, 0xd15d, 0xbfca, 0x18b6, 0xd5ba, 0x9299, + VSET(16, e16, m8); + VLOAD_16(v16, 0x8132, 0x94b5, 0x245c, 0xd15d, 0xbfca, 0x18b6, 0xd5ba, 0x9299, 0xccbd, 0x9ad4, 0xce46, 0xfbba, 0x10cc, 0xc463, 0x5298, 0x7b3e); - VLOAD_16(v6, 0xede6, 0x010b, 0xa570, 0x21c5, 0xfe5a, 0x5386, 0x16c9, 0x45fb, + VLOAD_16(v24, 0xede6, 0x010b, 0xa570, 0x21c5, 0xfe5a, 0x5386, 0x16c9, 0x45fb, 0x1439, 0x436f, 0x6f56, 0x90f7, 0x77c0, 0x0751, 0x64c3, 0x36e8); VLOAD_8(v0, 0xAA, 0xAA); - VCLEAR(v2); - asm volatile("vmul.vv v2, v4, v6, v0.t"); - VCMP_I16(6, v2, 0, 0x18c7, 0, 0x1991, 0, 0xf144, 0, 0xf903, 0, 0x9dec, 0, + VCLEAR(v8); + asm volatile("vmul.vv v8, v16, v24, v0.t"); + VCMP_I16(6, v8, 0, 0x18c7, 0, 0x1991, 0, 0xf144, 0, 0xf903, 0, 0x9dec, 0, 0x8076, 0, 0xd853, 0, 0xc430); - VSET(16, e32, m2); - VLOAD_32(v4, 0x9c2bdc58, 0xe3995899, 0xbbbc0eda, 0x0729ff92, 0xa757a2c5, + VSET(16, e32, m8); + VLOAD_32(v16, 0x9c2bdc58, 0xe3995899, 0xbbbc0eda, 0x0729ff92, 0xa757a2c5, 0xd5f3a23e, 0x9a295390, 0xb2367b2e, 0xfee5b6a2, 0x07cb59c1, 0x6bf5cf9a, 0x7d75506b, 0x013c1e90, 0x600f9ca8, 0x6d4d0522, 0x0a225ff0); - VLOAD_32(v6, 0xc51e02f8, 0xae06b334, 0x397b1ec7, 0xc46f34fe, 0x4f9db2ab, + VLOAD_32(v24, 0xc51e02f8, 0xae06b334, 0x397b1ec7, 0xc46f34fe, 0x4f9db2ab, 0x957c2534, 0x70f8e127, 0xa79ebcec, 0x0a542044, 0x20e6ac3e, 0xd61caed7, 0x6f4e7820, 0x27c56901, 0x0aaf1d61, 0xa95c6f5c, 0x5b7aedf3); VLOAD_8(v0, 0xAA, 0xAA); - VCLEAR(v2); - asm volatile("vmul.vv v2, v4, v6, v0.t"); - VCMP_I32(7, v2, 0, 0x85aefa14, 0, 0x3be33adc, 0, 0x57f7ea98, 0, 0x55135668, 0, + VCLEAR(v8); + asm volatile("vmul.vv v8, v16, v24, v0.t"); + VCMP_I32(7, v8, 0, 0x85aefa14, 0, 0x3be33adc, 0, 0x57f7ea98, 0, 0x55135668, 0, 0x46f368be, 0, 0x91f63560, 0, 0xd18163a8, 0, 0x79d240d0); #if ELEN == 64 - VSET(16, e64, m2); - VLOAD_64(v4, 0x077de9270ce00632, 0x2dadf4e020f3d47a, 0xb54ca84f9fe0573b, + VSET(16, e64, m8); + VLOAD_64(v16, 0x077de9270ce00632, 0x2dadf4e020f3d47a, 0xb54ca84f9fe0573b, 0x7be639dfdb02db6f, 0x61bb44569da93eff, 0xcd7f973ce822182b, 0x5434a22e7432397c, 0xcaadfd89d8dd1ad5, 0x5474c56d9089d672, 0x700e415c07b99bf9, 0xb89d409d4323a9c8, 0x68ccc7411db0ab09, 0xdf4fe3fa4e113e98, 0xa98a2e2575b04c41, 0x26ddf248ccb4a7aa, 0xdda1822d4972ee47); - VLOAD_64(v6, 0xcd2888c8bb07b736, 0xf20013627ac47d4f, 0xc05dbcd989ef329c, + VLOAD_64(v24, 0xcd2888c8bb07b736, 0xf20013627ac47d4f, 0xc05dbcd989ef329c, 0xc272db2ebcf7cfad, 0x8869302db041176d, 0xd3d90968a9ee01c2, 0x9cdb9f91a3381f51, 0x99ad38b70907ee8d, 0xf7d629b266c67cf1, 0x706f9b996cdd60f2, 0x4caa2335622bd6a0, 0x94171c9dfbbb186f, 0x7b6e42290f54ecc6, 0xa545b8670143bfbc, 0x9f430bf94b2805c9, 0xb45030fc2b4cef12); VLOAD_8(v0, 0xAA, 0xAA); - VCLEAR(v2); - asm volatile("vmul.vv v2, v4, v6, v0.t"); - VCMP_I64(8, v2, 0, 0x5ea9d3e60b6623a6, 0, 0xc0dcea378c760b03, 0, + VCLEAR(v8); + asm volatile("vmul.vv v8, v16, v24, v0.t"); + VCMP_I64(8, v8, 0, 0x5ea9d3e60b6623a6, 0, 0xc0dcea378c760b03, 0, 0x784c7f2ee6e87b96, 0, 0xfdcd46ca398ccd51, 0, 0x16063fbe99e7d162, 0, 0x22024848323600e7, 0, 0x566db9e82c5f7ebc, 0, 0xdaab68ca209d09fe); #endif }; void TEST_CASE3(void) { - VSET(16, e8, m2); - VLOAD_8(v4, 0x80, 0xb2, 0xb6, 0xd7, 0x4f, 0xbe, 0xee, 0x53, 0xab, 0x57, 0xe4, + VSET(16, e8, m8); + VLOAD_8(v16, 0x80, 0xb2, 0xb6, 0xd7, 0x4f, 0xbe, 0xee, 0x53, 0xab, 0x57, 0xe4, 0x28, 0x6a, 0x91, 0x14, 0x4f); int64_t scalar = 5; - asm volatile("vmul.vx v2, v4, %[A]" ::[A] "r"(scalar)); - VCMP_I8(9, v2, 0x80, 0x7a, 0x8e, 0x33, 0x8b, 0xb6, 0xa6, 0x9f, 0x57, 0xb3, + asm volatile("vmul.vx v8, v16, %[A]" ::[A] "r"(scalar)); + VCMP_I8(9, v8, 0x80, 0x7a, 0x8e, 0x33, 0x8b, 0xb6, 0xa6, 0x9f, 0x57, 0xb3, 0x74, 0xc8, 0x12, 0xd5, 0x64, 0x8b); - VSET(16, e16, m2); - VLOAD_16(v4, 0xbab0, 0x83a5, 0x06b6, 0x22ba, 0x91b8, 0x7720, 0xc2c7, 0x3494, + VSET(16, e16, m8); + VLOAD_16(v16, 0xbab0, 0x83a5, 0x06b6, 0x22ba, 0x91b8, 0x7720, 0xc2c7, 0x3494, 0xd281, 0x6d38, 0x378d, 0xa91d, 0xd731, 0xa4c7, 0x4d8f, 0x2422); scalar = -5383; - asm volatile("vmul.vx v2, v4, %[A]" ::[A] "r"(scalar)); - VCMP_I16(10, v2, 0x7530, 0xdd7d, 0xe306, 0xcaea, 0xebf8, 0x1e20, 0x598f, + asm volatile("vmul.vx v8, v16, %[A]" ::[A] "r"(scalar)); + VCMP_I16(10, v8, 0x7530, 0xdd7d, 0xe306, 0xcaea, 0xebf8, 0x1e20, 0x598f, 0x6bf4, 0xa979, 0x6b78, 0xea25, 0xff35, 0x18a9, 0x2b8f, 0x2617, 0x3912); - VSET(16, e32, m2); - VLOAD_32(v4, 0x8bb4a8bc, 0x9799b344, 0xcd8c1672, 0xeb2d7c0f, 0x55474d7d, + VSET(16, e32, m8); + VLOAD_32(v16, 0x8bb4a8bc, 0x9799b344, 0xcd8c1672, 0xeb2d7c0f, 0x55474d7d, 0x3dae9eaf, 0xc19a3519, 0x6922f03c, 0x42edfa01, 0x1f60b344, 0x82f31d5e, 0x0faa2e5c, 0x74e95cfa, 0x9fcdae3b, 0xe6c4e0a0, 0x45549cbc); scalar = 6474219; - asm volatile("vmul.vx v2, v4, %[A]" ::[A] "r"(scalar)); - VCMP_I32(11, v2, 0xaf4a8094, 0x77dff36c, 0x44dc1ca6, 0x16e6a8c5, 0xee2546bf, + asm volatile("vmul.vx v8, v16, %[A]" ::[A] "r"(scalar)); + VCMP_I32(11, v8, 0xaf4a8094, 0x77dff36c, 0x44dc1ca6, 0x16e6a8c5, 0xee2546bf, 0x78e111a5, 0x1fd15ef3, 0xe8a9a314, 0xfe2147eb, 0x5a8cf36c, 0x5536c34a, 0xbed6ca74, 0x23eca37e, 0xe2314329, 0x6857d2e0, 0x13b37c94); /* #if ELEN == 64 */ - /* VSET(16, e64, m2); */ - /* VLOAD_64(v4, 0xc238e0a3df21299c, 0xb642655c3ab064d5, 0xd19f84bab77e5602, + /* VSET(16, e64, m8); */ + /* VLOAD_64(v16, 0xc238e0a3df21299c, 0xb642655c3ab064d5, 0xd19f84bab77e5602, */ /* 0x4e6e3c114a19f160, 0xfd403cbcc59407a1, 0xef3e81a68ae0e48c, */ /* 0xd93a7b1ab54d024e, 0x5f7460aa9f4c4920, 0x4c91150cd4b54f60, */ @@ -167,8 +167,8 @@ void TEST_CASE3(void) { /* 0xcd0e6874555602d4, 0xb70264bd366ff52f, 0xc0b1fa64cec9368d, */ /* 0x13e86249a0235941); */ /* scalar = -598189234597999223; */ - /* asm volatile("vmul.vx v2, v4, %[A]" ::[A] "r"(scalar)); */ - /* VCMP_I64(12, v2, 0x61ead1213f09307c, 0x7d03f4c84c5e86fd, + /* asm volatile("vmul.vx v8, v16, %[A]" ::[A] "r"(scalar)); */ + /* VCMP_I64(12, v8, 0x61ead1213f09307c, 0x7d03f4c84c5e86fd, * 0x4aa0acc4e01fa112, */ /* 0x77bc957fdeec0c60, 0x762b14c112e60229, 0xbac65562e2366aec, */ /* 0xcc243dd1e80ab1be, 0xa871135122a1c220, 0x3d0db00992575a60, */ @@ -179,41 +179,41 @@ void TEST_CASE3(void) { }; void TEST_CASE4(void) { - VSET(16, e8, m2); - VLOAD_8(v4, 0x80, 0xb2, 0xb6, 0xd7, 0x4f, 0xbe, 0xee, 0x53, 0xab, 0x57, 0xe4, + VSET(16, e8, m8); + VLOAD_8(v16, 0x80, 0xb2, 0xb6, 0xd7, 0x4f, 0xbe, 0xee, 0x53, 0xab, 0x57, 0xe4, 0x28, 0x6a, 0x91, 0x14, 0x4f); int64_t scalar = 5; VLOAD_8(v0, 0xAA, 0xAA); - VCLEAR(v2); - asm volatile("vmul.vx v2, v4, %[A], v0.t" ::[A] "r"(scalar)); - VCMP_I8(13, v2, 0, 0x7a, 0, 0x33, 0, 0xb6, 0, 0x9f, 0, 0xb3, 0, 0xc8, 0, 0xd5, + VCLEAR(v8); + asm volatile("vmul.vx v8, v16, %[A], v0.t" ::[A] "r"(scalar)); + VCMP_I8(13, v8, 0, 0x7a, 0, 0x33, 0, 0xb6, 0, 0x9f, 0, 0xb3, 0, 0xc8, 0, 0xd5, 0, 0x8b); - VSET(16, e16, m2); - VLOAD_16(v4, 0xbab0, 0x83a5, 0x06b6, 0x22ba, 0x91b8, 0x7720, 0xc2c7, 0x3494, + VSET(16, e16, m8); + VLOAD_16(v16, 0xbab0, 0x83a5, 0x06b6, 0x22ba, 0x91b8, 0x7720, 0xc2c7, 0x3494, 0xd281, 0x6d38, 0x378d, 0xa91d, 0xd731, 0xa4c7, 0x4d8f, 0x2422); scalar = -5383; VLOAD_8(v0, 0xAA, 0xAA); - VCLEAR(v2); - asm volatile("vmul.vx v2, v4, %[A], v0.t" ::[A] "r"(scalar)); - VCMP_I16(14, v2, 0, 0xdd7d, 0, 0xcaea, 0, 0x1e20, 0, 0x6bf4, 0, 0x6b78, 0, + VCLEAR(v8); + asm volatile("vmul.vx v8, v16, %[A], v0.t" ::[A] "r"(scalar)); + VCMP_I16(14, v8, 0, 0xdd7d, 0, 0xcaea, 0, 0x1e20, 0, 0x6bf4, 0, 0x6b78, 0, 0xff35, 0, 0x2b8f, 0, 0x3912); - VSET(16, e32, m2); - VLOAD_32(v4, 0x8bb4a8bc, 0x9799b344, 0xcd8c1672, 0xeb2d7c0f, 0x55474d7d, + VSET(16, e32, m8); + VLOAD_32(v16, 0x8bb4a8bc, 0x9799b344, 0xcd8c1672, 0xeb2d7c0f, 0x55474d7d, 0x3dae9eaf, 0xc19a3519, 0x6922f03c, 0x42edfa01, 0x1f60b344, 0x82f31d5e, 0x0faa2e5c, 0x74e95cfa, 0x9fcdae3b, 0xe6c4e0a0, 0x45549cbc); scalar = 6474219; VLOAD_8(v0, 0xAA, 0xAA); - VCLEAR(v2); - asm volatile("vmul.vx v2, v4, %[A], v0.t" ::[A] "r"(scalar)); - VCMP_I32(15, v2, 0, 0x77dff36c, 0, 0x16e6a8c5, 0, 0x78e111a5, 0, 0xe8a9a314, + VCLEAR(v8); + asm volatile("vmul.vx v8, v16, %[A], v0.t" ::[A] "r"(scalar)); + VCMP_I32(15, v8, 0, 0x77dff36c, 0, 0x16e6a8c5, 0, 0x78e111a5, 0, 0xe8a9a314, 0, 0x5a8cf36c, 0, 0xbed6ca74, 0, 0xe2314329, 0, 0x13b37c94); /* #if ELEN == 64 */ - /* VSET(16, e64, m2); */ - /* VLOAD_64(v4, 0xc238e0a3df21299c, 0xb642655c3ab064d5, 0xd19f84bab77e5602, + /* VSET(16, e64, m8); */ + /* VLOAD_64(v16, 0xc238e0a3df21299c, 0xb642655c3ab064d5, 0xd19f84bab77e5602, */ /* 0x4e6e3c114a19f160, 0xfd403cbcc59407a1, 0xef3e81a68ae0e48c, */ /* 0xd93a7b1ab54d024e, 0x5f7460aa9f4c4920, 0x4c91150cd4b54f60, */ @@ -222,9 +222,9 @@ void TEST_CASE4(void) { /* 0x13e86249a0235941); */ /* scalar = -598189234597999223; */ /* VLOAD_8(v0, 0xAA, 0xAA); */ - /* VCLEAR(v2); */ - /* asm volatile("vmul.vx v2, v4, %[A], v0.t" ::[A] "r"(scalar)); */ - /* VCMP_I64(16, v2, 0, 0x7d03f4c84c5e86fd, 0, 0x77bc957fdeec0c60, 0, */ + /* VCLEAR(v8); */ + /* asm volatile("vmul.vx v8, v16, %[A], v0.t" ::[A] "r"(scalar)); */ + /* VCMP_I64(16, v8, 0, 0x7d03f4c84c5e86fd, 0, 0x77bc957fdeec0c60, 0, */ /* 0xbac65562e2366aec, 0, 0xa871135122a1c220, 0, * 0x8b07763affcd8fb7, 0, */ /* 0xd8827c839711c0aa, 0, 0x243fd844e74ed927, 0, diff --git a/sw/riscvTests/isa/rv64uv/vmulh.c b/sw/riscvTests/isa/rv64uv/vmulh.c index eda29bd6..869bfef9 100644 --- a/sw/riscvTests/isa/rv64uv/vmulh.c +++ b/sw/riscvTests/isa/rv64uv/vmulh.c @@ -8,56 +8,56 @@ #include "vector_macros.h" void TEST_CASE1(void) { - VSET(16, e8, m2); - VLOAD_8(v4, 0xc2, 0xf6, 0xad, 0x0f, 0xc6, 0xeb, 0xca, 0xf3, 0xf3, 0xd9, 0xf4, + VSET(16, e8, m8); + VLOAD_8(v16, 0xc2, 0xf6, 0xad, 0x0f, 0xc6, 0xeb, 0xca, 0xf3, 0xf3, 0xd9, 0xf4, 0xf6, 0x27, 0x57, 0x4f, 0xef); - VLOAD_8(v6, 0xf9, 0x0c, 0xa8, 0x05, 0x23, 0xff, 0x48, 0x74, 0xd4, 0x6b, 0x5b, + VLOAD_8(v24, 0xf9, 0x0c, 0xa8, 0x05, 0x23, 0xff, 0x48, 0x74, 0xd4, 0x6b, 0x5b, 0x07, 0x8b, 0x2e, 0x9e, 0x5f); - asm volatile("vmulh.vv v2, v4, v6"); - VCMP_I8(1, v2, 0x01, 0xff, 0x1c, 0x00, 0xf8, 0x00, 0xf0, 0xfa, 0x02, 0xef, + asm volatile("vmulh.vv v8, v16, v24"); + VCMP_I8(1, v8, 0x01, 0xff, 0x1c, 0x00, 0xf8, 0x00, 0xf0, 0xfa, 0x02, 0xef, 0xfb, 0xff, 0xee, 0x0f, 0xe1, 0xf9); - VSET(16, e16, m2); - VLOAD_16(v4, 0x911a, 0x9f44, 0x3d2a, 0xa2a8, 0x5aae, 0x0231, 0x56fa, 0xb8b8, + VSET(16, e16, m8); + VLOAD_16(v16, 0x911a, 0x9f44, 0x3d2a, 0xa2a8, 0x5aae, 0x0231, 0x56fa, 0xb8b8, 0x55df, 0x8a78, 0x413c, 0xeb32, 0x6bc4, 0x3e47, 0x3d79, 0x2c8f); - VLOAD_16(v6, 0x89fd, 0x6bb7, 0x4a94, 0x770c, 0x7c87, 0x8b01, 0xbb6a, 0x900d, + VLOAD_16(v24, 0x89fd, 0x6bb7, 0x4a94, 0x770c, 0x7c87, 0x8b01, 0xbb6a, 0x900d, 0xb589, 0x709e, 0xc75d, 0xafa5, 0x4fd5, 0x2d77, 0x8dbf, 0x3a0a); - asm volatile("vmulh.vv v2, v4, v6"); - VCMP_I16(2, v2, 0x331f, 0xd74c, 0x11d1, 0xd497, 0x2c1c, 0xfeff, 0xe8b2, + asm volatile("vmulh.vv v8, v16, v24"); + VCMP_I16(2, v8, 0x331f, 0xd74c, 0x11d1, 0xd497, 0x2c1c, 0xfeff, 0xe8b2, 0x1f2b, 0xe705, 0xcc4b, 0xf191, 0x0687, 0x219b, 0x0b0f, 0xe490, 0x0a1a); - VSET(16, e32, m2); - VLOAD_32(v4, 0xc66253f4, 0x710c314c, 0xa6fe579b, 0xa7947b70, 0xbf94259f, + VSET(16, e32, m8); + VLOAD_32(v16, 0xc66253f4, 0x710c314c, 0xa6fe579b, 0xa7947b70, 0xbf94259f, 0x211088fe, 0x64bfd390, 0x1d49c8d8, 0x7a12a08a, 0x190ee9ae, 0x361172f8, 0x52457515, 0x05d4b17b, 0x7bb6d43b, 0x96270cc7, 0x62d35f88); - VLOAD_32(v6, 0xd14a266e, 0xe4f43ca5, 0x1c067312, 0xa1909d51, 0x35b8d1aa, + VLOAD_32(v24, 0xd14a266e, 0xe4f43ca5, 0x1c067312, 0xa1909d51, 0x35b8d1aa, 0xdcd3e2ea, 0x05cec46d, 0xbe70ebd4, 0xe15e49c5, 0x81be068b, 0x49fd9ad8, 0x6c2a5abd, 0x26216dd6, 0x9e3188ac, 0x14af13c4, 0xd98c6d7f); - asm volatile("vmulh.vv v2, v4, v6"); - VCMP_I32(3, v2, 0x0a83425c, 0xf40e8502, 0xf6419389, 0x209df360, 0xf27b2982, + asm volatile("vmulh.vv v8, v16, v24"); + VCMP_I32(3, v8, 0x0a83425c, 0xf40e8502, 0xf6419389, 0x209df360, 0xf27b2982, 0xfb750aac, 0x02491ecb, 0xf87fe57b, 0xf164b493, 0xf3a433c8, 0x0fa089bb, 0x22c2e9f3, 0x00de5543, 0xd0bbf2cc, 0xf772a985, 0xf128024f); #if ELEN == 64 - VSET(16, e64, m2); - VLOAD_64(v4, 0x9def74822cdf1a42, 0x29307e854b225449, 0x071cdf51785d150e, + VSET(16, e64, m8); + VLOAD_64(v16, 0x9def74822cdf1a42, 0x29307e854b225449, 0x071cdf51785d150e, 0xe8ced2e9009d363f, 0xa88c741be4e81893, 0x4a7655ec12afe587, 0x50c5efa017138cb9, 0x88e076b6ef49619d, 0x5745683769adf333, 0x5b3b01f4b1c4fd42, 0x8a3d55e48864d144, 0x2eac97fae4174cac, 0xdb8804ccf6f55686, 0xf7bea87bac575241, 0x250ed7ddade1432d, 0x06ae542295f32453); - VLOAD_64(v6, 0xc5c41e47b5f3de5f, 0xa03833fb95a7e7e2, 0x74f0573dba05b058, + VLOAD_64(v24, 0xc5c41e47b5f3de5f, 0xa03833fb95a7e7e2, 0x74f0573dba05b058, 0x687968e9ba2a98ad, 0x29f4aaf3e5e4f2b6, 0x2c40a650d53f6f08, 0x491da2c816388b78, 0x2822d8207421ec15, 0x5dd8d394b292512a, 0x4169844eea56920d, 0x97183b6e1e85fd70, 0x224077bf8899614c, 0x3a9c0520417d4f32, 0xee47b09a33f49fca, 0x3f9f1140fbd02e0a, 0x6106ad88eabfc3e2); - asm volatile("vmulh.vv v2, v4, v6"); - VCMP_I64(4, v2, 0x164eafe1cab0639c, 0xf096db86d4d06824, 0x033fc2aecddc0dd7, + asm volatile("vmulh.vv v8, v16, v24"); + VCMP_I64(4, v8, 0x164eafe1cab0639c, 0xf096db86d4d06824, 0x033fc2aecddc0dd7, 0xf68905ef31703000, 0xf1aaea11162383ae, 0x0cdf24ba4cf3be38, 0x1711cb1d2f008de9, 0xed52dbcaa3de5ca2, 0x1ffe218cf60b6bf9, 0x174f95d97aff7bf9, 0x3041b22ecc97909a, 0x063ead2a7756c9da, @@ -67,99 +67,99 @@ void TEST_CASE1(void) { }; void TEST_CASE2(void) { - VSET(16, e8, m2); - VLOAD_8(v4, 0xc2, 0xf6, 0xad, 0x0f, 0xc6, 0xeb, 0xca, 0xf3, 0xf3, 0xd9, 0xf4, + VSET(16, e8, m8); + VLOAD_8(v16, 0xc2, 0xf6, 0xad, 0x0f, 0xc6, 0xeb, 0xca, 0xf3, 0xf3, 0xd9, 0xf4, 0xf6, 0x27, 0x57, 0x4f, 0xef); - VLOAD_8(v6, 0xf9, 0x0c, 0xa8, 0x05, 0x23, 0xff, 0x48, 0x74, 0xd4, 0x6b, 0x5b, + VLOAD_8(v24, 0xf9, 0x0c, 0xa8, 0x05, 0x23, 0xff, 0x48, 0x74, 0xd4, 0x6b, 0x5b, 0x07, 0x8b, 0x2e, 0x9e, 0x5f); VLOAD_8(v0, 0xAA, 0xAA); - VCLEAR(v2); - asm volatile("vmulh.vv v2, v4, v6, v0.t"); - VCMP_I8(5, v2, 0, 0xff, 0, 0x00, 0, 0x00, 0, 0xfa, 0, 0xef, 0, 0xff, 0, 0x0f, + VCLEAR(v8); + asm volatile("vmulh.vv v8, v16, v24, v0.t"); + VCMP_I8(5, v8, 0, 0xff, 0, 0x00, 0, 0x00, 0, 0xfa, 0, 0xef, 0, 0xff, 0, 0x0f, 0, 0xf9); - VSET(16, e16, m2); - VLOAD_16(v4, 0x911a, 0x9f44, 0x3d2a, 0xa2a8, 0x5aae, 0x0231, 0x56fa, 0xb8b8, + VSET(16, e16, m8); + VLOAD_16(v16, 0x911a, 0x9f44, 0x3d2a, 0xa2a8, 0x5aae, 0x0231, 0x56fa, 0xb8b8, 0x55df, 0x8a78, 0x413c, 0xeb32, 0x6bc4, 0x3e47, 0x3d79, 0x2c8f); - VLOAD_16(v6, 0x89fd, 0x6bb7, 0x4a94, 0x770c, 0x7c87, 0x8b01, 0xbb6a, 0x900d, + VLOAD_16(v24, 0x89fd, 0x6bb7, 0x4a94, 0x770c, 0x7c87, 0x8b01, 0xbb6a, 0x900d, 0xb589, 0x709e, 0xc75d, 0xafa5, 0x4fd5, 0x2d77, 0x8dbf, 0x3a0a); VLOAD_8(v0, 0xAA, 0xAA); - VCLEAR(v2); - asm volatile("vmulh.vv v2, v4, v6, v0.t"); - VCMP_I16(6, v2, 0, 0xd74c, 0, 0xd497, 0, 0xfeff, 0, 0x1f2b, 0, 0xcc4b, 0, + VCLEAR(v8); + asm volatile("vmulh.vv v8, v16, v24, v0.t"); + VCMP_I16(6, v8, 0, 0xd74c, 0, 0xd497, 0, 0xfeff, 0, 0x1f2b, 0, 0xcc4b, 0, 0x0687, 0, 0x0b0f, 0, 0x0a1a); - VSET(16, e32, m2); - VLOAD_32(v4, 0xc66253f4, 0x710c314c, 0xa6fe579b, 0xa7947b70, 0xbf94259f, + VSET(16, e32, m8); + VLOAD_32(v16, 0xc66253f4, 0x710c314c, 0xa6fe579b, 0xa7947b70, 0xbf94259f, 0x211088fe, 0x64bfd390, 0x1d49c8d8, 0x7a12a08a, 0x190ee9ae, 0x361172f8, 0x52457515, 0x05d4b17b, 0x7bb6d43b, 0x96270cc7, 0x62d35f88); - VLOAD_32(v6, 0xd14a266e, 0xe4f43ca5, 0x1c067312, 0xa1909d51, 0x35b8d1aa, + VLOAD_32(v24, 0xd14a266e, 0xe4f43ca5, 0x1c067312, 0xa1909d51, 0x35b8d1aa, 0xdcd3e2ea, 0x05cec46d, 0xbe70ebd4, 0xe15e49c5, 0x81be068b, 0x49fd9ad8, 0x6c2a5abd, 0x26216dd6, 0x9e3188ac, 0x14af13c4, 0xd98c6d7f); VLOAD_8(v0, 0xAA, 0xAA); - VCLEAR(v2); - asm volatile("vmulh.vv v2, v4, v6, v0.t"); - VCMP_I32(7, v2, 0, 0xf40e8502, 0, 0x209df360, 0, 0xfb750aac, 0, 0xf87fe57b, 0, + VCLEAR(v8); + asm volatile("vmulh.vv v8, v16, v24, v0.t"); + VCMP_I32(7, v8, 0, 0xf40e8502, 0, 0x209df360, 0, 0xfb750aac, 0, 0xf87fe57b, 0, 0xf3a433c8, 0, 0x22c2e9f3, 0, 0xd0bbf2cc, 0, 0xf128024f); #if ELEN == 64 - VSET(16, e64, m2); - VLOAD_64(v4, 0x9def74822cdf1a42, 0x29307e854b225449, 0x071cdf51785d150e, + VSET(16, e64, m8); + VLOAD_64(v16, 0x9def74822cdf1a42, 0x29307e854b225449, 0x071cdf51785d150e, 0xe8ced2e9009d363f, 0xa88c741be4e81893, 0x4a7655ec12afe587, 0x50c5efa017138cb9, 0x88e076b6ef49619d, 0x5745683769adf333, 0x5b3b01f4b1c4fd42, 0x8a3d55e48864d144, 0x2eac97fae4174cac, 0xdb8804ccf6f55686, 0xf7bea87bac575241, 0x250ed7ddade1432d, 0x06ae542295f32453); - VLOAD_64(v6, 0xc5c41e47b5f3de5f, 0xa03833fb95a7e7e2, 0x74f0573dba05b058, + VLOAD_64(v24, 0xc5c41e47b5f3de5f, 0xa03833fb95a7e7e2, 0x74f0573dba05b058, 0x687968e9ba2a98ad, 0x29f4aaf3e5e4f2b6, 0x2c40a650d53f6f08, 0x491da2c816388b78, 0x2822d8207421ec15, 0x5dd8d394b292512a, 0x4169844eea56920d, 0x97183b6e1e85fd70, 0x224077bf8899614c, 0x3a9c0520417d4f32, 0xee47b09a33f49fca, 0x3f9f1140fbd02e0a, 0x6106ad88eabfc3e2); VLOAD_8(v0, 0xAA, 0xAA); - VCLEAR(v2); - asm volatile("vmulh.vv v2, v4, v6, v0.t"); - VCMP_I64(8, v2, 0, 0xf096db86d4d06824, 0, 0xf68905ef31703000, 0, + VCLEAR(v8); + asm volatile("vmulh.vv v8, v16, v24, v0.t"); + VCMP_I64(8, v8, 0, 0xf096db86d4d06824, 0, 0xf68905ef31703000, 0, 0x0cdf24ba4cf3be38, 0, 0xed52dbcaa3de5ca2, 0, 0x174f95d97aff7bf9, 0, 0x063ead2a7756c9da, 0, 0x0092485623082173, 0, 0x02883a7e75391040); #endif }; void TEST_CASE3(void) { - VSET(16, e8, m2); - VLOAD_8(v4, 0x92, 0xce, 0xdd, 0x64, 0x60, 0x29, 0xa6, 0xd5, 0x07, 0x8c, 0x71, + VSET(16, e8, m8); + VLOAD_8(v16, 0x92, 0xce, 0xdd, 0x64, 0x60, 0x29, 0xa6, 0xd5, 0x07, 0x8c, 0x71, 0x94, 0x95, 0xf6, 0xd4, 0xbd); int64_t scalar = 5; - asm volatile("vmulh.vx v2, v4, %[A]" ::[A] "r"(scalar)); - VCMP_I8(9, v2, 0xfd, 0xff, 0xff, 0x01, 0x01, 0x00, 0xfe, 0xff, 0x00, 0xfd, + asm volatile("vmulh.vx v8, v16, %[A]" ::[A] "r"(scalar)); + VCMP_I8(9, v8, 0xfd, 0xff, 0xff, 0x01, 0x01, 0x00, 0xfe, 0xff, 0x00, 0xfd, 0x02, 0xfd, 0xfd, 0xff, 0xff, 0xfe); - VSET(16, e16, m2); - VLOAD_16(v4, 0x3b9b, 0x7758, 0x030f, 0x9f60, 0x13e2, 0x8f0d, 0xfc9d, 0x3922, + VSET(16, e16, m8); + VLOAD_16(v16, 0x3b9b, 0x7758, 0x030f, 0x9f60, 0x13e2, 0x8f0d, 0xfc9d, 0x3922, 0x3a43, 0x58b5, 0xb9e9, 0xa4e8, 0x4bac, 0x5636, 0x9f4a, 0xbd52); scalar = -5383; - asm volatile("vmulh.vx v2, v4, %[A]" ::[A] "r"(scalar)); - VCMP_I16(10, v2, 0xfb1a, 0xf632, 0xffbf, 0x07ef, 0xfe5d, 0x0947, 0x0047, + asm volatile("vmulh.vx v8, v16, %[A]" ::[A] "r"(scalar)); + VCMP_I16(10, v8, 0xfb1a, 0xf632, 0xffbf, 0x07ef, 0xfe5d, 0x0947, 0x0047, 0xfb4e, 0xfb36, 0xf8b6, 0x05c1, 0x077b, 0xf9c8, 0xf8eb, 0x07f1, 0x057a); - VSET(16, e32, m2); - VLOAD_32(v4, 0x7efca225, 0xdbc0a9ca, 0x0cf02cf8, 0xc19bdc84, 0x7fa3ca90, + VSET(16, e32, m8); + VLOAD_32(v16, 0x7efca225, 0xdbc0a9ca, 0x0cf02cf8, 0xc19bdc84, 0x7fa3ca90, 0x3d878c29, 0x15809928, 0x7b0b7421, 0x48b872f5, 0xafbfeab4, 0xe79dc9ba, 0xe60a8fc0, 0x1fd7e866, 0xed7df17c, 0x0684a7ee, 0xb2b01a61); scalar = 6474219; - asm volatile("vmulh.vx v2, v4, %[A]" ::[A] "r"(scalar)); - VCMP_I32(11, v2, 0x003100de, 0xfff20329, 0x0004fe25, 0xffe7ec74, 0x00314160, + asm volatile("vmulh.vx v8, v16, %[A]" ::[A] "r"(scalar)); + VCMP_I32(11, v8, 0x003100de, 0xfff20329, 0x0004fe25, 0xffe7ec74, 0x00314160, 0x0017be6b, 0x00084c30, 0x002f7b6f, 0x001c0ff7, 0xffe1082b, 0xfff6972b, 0xfff5fb91, 0x000c49c4, 0xfff8db9d, 0x000283ec, 0xffe22a6f); /* #if ELEN == 64 */ - /* VSET(16, e64, m2); */ - /* VLOAD_64(v4, 0x42e9b386e7453715, 0xd6aae3fda4b2f3e8, 0xfcbec1ad7996a0b2, + /* VSET(16, e64, m8); */ + /* VLOAD_64(v16, 0x42e9b386e7453715, 0xd6aae3fda4b2f3e8, 0xfcbec1ad7996a0b2, */ /* 0x4fcb68f516b589c9, 0x414b0eeb29c35e62, 0x038c6221829f4241, */ /* 0xf2c2c11f26e326b0, 0xed9ad0ce4d50a009, 0xc57105a57d435897, */ @@ -167,8 +167,8 @@ void TEST_CASE3(void) { /* 0x1d9da4f87df33b54, 0xe347aadb53bdc879, 0x7a39a7269cbae2a7, */ /* 0x422ed2952e246f26); */ /* scalar = -598189234597999223; */ - /* asm volatile("vmulh.vx v2, v4, %[A]" ::[A] "r"(scalar)); */ - /* VCMP_I64(12, v2, 0xfdd4850b300f6008, 0x01571f899f226d57, + /* asm volatile("vmulh.vx v8, v16, %[A]" ::[A] "r"(scalar)); */ + /* VCMP_I64(12, v8, 0xfdd4850b300f6008, 0x01571f899f226d57, * 0x001b0534decdc9a2, */ /* 0xfd6994f8de6e51aa, 0xfde1f73873e6758a, 0xffe28b043b9b8971, */ /* 0x006de7f819baba3d, 0x0098b57f65f599e1, 0x01e62040839e971b, */ @@ -179,41 +179,41 @@ void TEST_CASE3(void) { }; void TEST_CASE4(void) { - VSET(16, e8, m2); - VLOAD_8(v4, 0x92, 0xce, 0xdd, 0x64, 0x60, 0x29, 0xa6, 0xd5, 0x07, 0x8c, 0x71, + VSET(16, e8, m8); + VLOAD_8(v16, 0x92, 0xce, 0xdd, 0x64, 0x60, 0x29, 0xa6, 0xd5, 0x07, 0x8c, 0x71, 0x94, 0x95, 0xf6, 0xd4, 0xbd); int64_t scalar = 5; VLOAD_8(v0, 0xAA, 0xAA); - VCLEAR(v2); - asm volatile("vmulh.vx v2, v4, %[A], v0.t" ::[A] "r"(scalar)); - VCMP_I8(13, v2, 0, 0xff, 0, 0x01, 0, 0x00, 0, 0xff, 0, 0xfd, 0, 0xfd, 0, 0xff, + VCLEAR(v8); + asm volatile("vmulh.vx v8, v16, %[A], v0.t" ::[A] "r"(scalar)); + VCMP_I8(13, v8, 0, 0xff, 0, 0x01, 0, 0x00, 0, 0xff, 0, 0xfd, 0, 0xfd, 0, 0xff, 0, 0xfe); - VSET(16, e16, m2); - VLOAD_16(v4, 0x3b9b, 0x7758, 0x030f, 0x9f60, 0x13e2, 0x8f0d, 0xfc9d, 0x3922, + VSET(16, e16, m8); + VLOAD_16(v16, 0x3b9b, 0x7758, 0x030f, 0x9f60, 0x13e2, 0x8f0d, 0xfc9d, 0x3922, 0x3a43, 0x58b5, 0xb9e9, 0xa4e8, 0x4bac, 0x5636, 0x9f4a, 0xbd52); scalar = -5383; VLOAD_8(v0, 0xAA, 0xAA); - VCLEAR(v2); - asm volatile("vmulh.vx v2, v4, %[A], v0.t" ::[A] "r"(scalar)); - VCMP_I16(14, v2, 0, 0xf632, 0, 0x07ef, 0, 0x0947, 0, 0xfb4e, 0, 0xf8b6, 0, + VCLEAR(v8); + asm volatile("vmulh.vx v8, v16, %[A], v0.t" ::[A] "r"(scalar)); + VCMP_I16(14, v8, 0, 0xf632, 0, 0x07ef, 0, 0x0947, 0, 0xfb4e, 0, 0xf8b6, 0, 0x077b, 0, 0xf8eb, 0, 0x057a); - VSET(16, e32, m2); - VLOAD_32(v4, 0x7efca225, 0xdbc0a9ca, 0x0cf02cf8, 0xc19bdc84, 0x7fa3ca90, + VSET(16, e32, m8); + VLOAD_32(v16, 0x7efca225, 0xdbc0a9ca, 0x0cf02cf8, 0xc19bdc84, 0x7fa3ca90, 0x3d878c29, 0x15809928, 0x7b0b7421, 0x48b872f5, 0xafbfeab4, 0xe79dc9ba, 0xe60a8fc0, 0x1fd7e866, 0xed7df17c, 0x0684a7ee, 0xb2b01a61); scalar = 6474219; VLOAD_8(v0, 0xAA, 0xAA); - VCLEAR(v2); - asm volatile("vmulh.vx v2, v4, %[A], v0.t" ::[A] "r"(scalar)); - VCMP_I32(15, v2, 0, 0xfff20329, 0, 0xffe7ec74, 0, 0x0017be6b, 0, 0x002f7b6f, + VCLEAR(v8); + asm volatile("vmulh.vx v8, v16, %[A], v0.t" ::[A] "r"(scalar)); + VCMP_I32(15, v8, 0, 0xfff20329, 0, 0xffe7ec74, 0, 0x0017be6b, 0, 0x002f7b6f, 0, 0xffe1082b, 0, 0xfff5fb91, 0, 0xfff8db9d, 0, 0xffe22a6f); /* #if ELEN == 64 */ - /* VSET(16, e64, m2); */ - /* VLOAD_64(v4, 0x42e9b386e7453715, 0xd6aae3fda4b2f3e8, 0xfcbec1ad7996a0b2, + /* VSET(16, e64, m8); */ + /* VLOAD_64(v16, 0x42e9b386e7453715, 0xd6aae3fda4b2f3e8, 0xfcbec1ad7996a0b2, */ /* 0x4fcb68f516b589c9, 0x414b0eeb29c35e62, 0x038c6221829f4241, */ /* 0xf2c2c11f26e326b0, 0xed9ad0ce4d50a009, 0xc57105a57d435897, */ @@ -222,9 +222,9 @@ void TEST_CASE4(void) { /* 0x422ed2952e246f26); */ /* scalar = -598189234597999223; */ /* VLOAD_8(v0, 0xAA, 0xAA); */ - /* VCLEAR(v2); */ - /* asm volatile("vmulh.vx v2, v4, %[A], v0.t" ::[A] "r"(scalar)); */ - /* VCMP_I64(16, v2, 0, 0x01571f899f226d57, 0, 0xfd6994f8de6e51aa, 0, */ + /* VCLEAR(v8); */ + /* asm volatile("vmulh.vx v8, v16, %[A], v0.t" ::[A] "r"(scalar)); */ + /* VCMP_I64(16, v8, 0, 0x01571f899f226d57, 0, 0xfd6994f8de6e51aa, 0, */ /* 0xffe28b043b9b8971, 0, 0x0098b57f65f599e1, 0, * 0x039b807e6f36fd81, 0, */ /* 0x0293356120e5cee9, 0, 0x00ee6bb505683322, 0, diff --git a/sw/riscvTests/isa/rv64uv/vmulhsu.c b/sw/riscvTests/isa/rv64uv/vmulhsu.c index 482b3e65..c87aaa8b 100644 --- a/sw/riscvTests/isa/rv64uv/vmulhsu.c +++ b/sw/riscvTests/isa/rv64uv/vmulhsu.c @@ -8,56 +8,56 @@ #include "vector_macros.h" void TEST_CASE1(void) { - VSET(16, e8, m2); - VLOAD_8(v4, 0x53, 0x4f, 0xde, 0xea, 0x47, 0x3c, 0x41, 0xf8, 0xd6, 0xd0, 0x93, + VSET(16, e8, m8); + VLOAD_8(v16, 0x53, 0x4f, 0xde, 0xea, 0x47, 0x3c, 0x41, 0xf8, 0xd6, 0xd0, 0x93, 0x35, 0xfc, 0x70, 0x33, 0xe4); - VLOAD_8(v6, 0xaa, 0x24, 0xaa, 0xde, 0x92, 0x00, 0x7f, 0xe5, 0xb3, 0xf8, 0xa0, + VLOAD_8(v24, 0xaa, 0x24, 0xaa, 0xde, 0x92, 0x00, 0x7f, 0xe5, 0xb3, 0xf8, 0xa0, 0xa8, 0xbb, 0xc6, 0x65, 0x81); - asm volatile("vmulhsu.vv v2, v4, v6"); - VCMP_I8(1, v2, 0x37, 0x0b, 0xe9, 0xec, 0x28, 0x00, 0x20, 0xf8, 0xe2, 0xd1, + asm volatile("vmulhsu.vv v8, v16, v24"); + VCMP_I8(1, v8, 0x37, 0x0b, 0xe9, 0xec, 0x28, 0x00, 0x20, 0xf8, 0xe2, 0xd1, 0xbb, 0x22, 0xfd, 0x56, 0x14, 0xf1); - VSET(16, e16, m2); - VLOAD_16(v4, 0x0f50, 0x47f9, 0xa4ca, 0xf94d, 0x720c, 0x444c, 0x3681, 0x96bd, + VSET(16, e16, m8); + VLOAD_16(v16, 0x0f50, 0x47f9, 0xa4ca, 0xf94d, 0x720c, 0x444c, 0x3681, 0x96bd, 0x5d37, 0xd64e, 0xe792, 0xdb64, 0xfaa6, 0xafe6, 0xf4e8, 0xe5ea); - VLOAD_16(v6, 0x7784, 0xa42e, 0x499b, 0x0c01, 0x9d2b, 0x600d, 0x2bbd, 0xcb41, + VLOAD_16(v24, 0x7784, 0xa42e, 0x499b, 0x0c01, 0x9d2b, 0x600d, 0x2bbd, 0xcb41, 0xdda1, 0xb5d7, 0xafbc, 0xc74f, 0xab45, 0x986f, 0xf0f2, 0xcf3c); - asm volatile("vmulhsu.vv v2, v4, v6"); - VCMP_I16(2, v2, 0x0726, 0x2e28, 0xe5c6, 0xffaf, 0x4604, 0x199f, 0x094f, + asm volatile("vmulhsu.vv v8, v16, v24"); + VCMP_I16(2, v8, 0x0726, 0x2e28, 0xe5c6, 0xffaf, 0x4604, 0x199f, 0x094f, 0xac6d, 0x50b3, 0xe262, 0xef3a, 0xe37f, 0xfc6b, 0xd04d, 0xf58f, 0xeae2); - VSET(16, e32, m2); - VLOAD_32(v4, 0xeded4bf3, 0xc9e27167, 0x4175509c, 0x80a3ae04, 0x9f1b2c07, + VSET(16, e32, m8); + VLOAD_32(v16, 0xeded4bf3, 0xc9e27167, 0x4175509c, 0x80a3ae04, 0x9f1b2c07, 0x87ea397b, 0x862e2800, 0x3cd09f37, 0x9a313d78, 0x596661ee, 0x31f99717, 0x64e65802, 0xbd567027, 0xf7c459be, 0x57b6d9cd, 0x94bc3eb4); - VLOAD_32(v6, 0xa147b233, 0x19880f3d, 0x8dd8815e, 0xbc318dca, 0x2c436b94, + VLOAD_32(v24, 0xa147b233, 0x19880f3d, 0x8dd8815e, 0xbc318dca, 0x2c436b94, 0x29ba4191, 0x637f89b7, 0xe39d7818, 0xf48ff2d6, 0xb1dc7c7e, 0xfa5da298, 0x5c1aae36, 0x83e04069, 0xecf36c08, 0x40d2e3a3, 0xe7468a97); - asm volatile("vmulhsu.vv v2, v4, v6"); - VCMP_I32(3, v2, 0xf49d2cff, 0xfa9a5a26, 0x2444f976, 0xa25f8c94, 0xef3f26f6, + asm volatile("vmulhsu.vv v8, v16, v24"); + VCMP_I32(3, v8, 0xf49d2cff, 0xfa9a5a26, 0x2444f976, 0xa25f8c94, 0xef3f26f6, 0xec6d24a0, 0xd0a728d5, 0x361265a6, 0x9ebdaf85, 0x3e1cc92b, 0x30e004f5, 0x244d4baf, 0xdda8d640, 0xf8612ea2, 0x1635f870, 0x9f184dfb); #if ELEN == 64 - VSET(16, e64, m2); - VLOAD_64(v4, 0x2b1f761d24dcff24, 0x1174fcea60fbf146, 0xaa5068c22e71489d, + VSET(16, e64, m8); + VLOAD_64(v16, 0x2b1f761d24dcff24, 0x1174fcea60fbf146, 0xaa5068c22e71489d, 0x422ad458a7cbf321, 0x01e0f752e5d8bb37, 0xe7762162abff4c4c, 0x36279dbbf009199d, 0x188dda33d835d9e4, 0xa0f5a67450e87d77, 0xb43066649033e7ac, 0xb47ff6241cc77c2c, 0xfce0bafc1d36b615, 0x045b90f3b63e0f7f, 0x514e5121be1f02e5, 0x06c9e97573723e47, 0x406879d908a80b41); - VLOAD_64(v6, 0xd50adee8d491db29, 0xc7d423514dd58616, 0x5b22f7a3971e17f9, + VLOAD_64(v24, 0xd50adee8d491db29, 0xc7d423514dd58616, 0x5b22f7a3971e17f9, 0xb9ad8b0339e659cd, 0x5af15755f3954b0f, 0x6b2fb3e49bd48e69, 0x084244757fba5561, 0xf2d5b41ee89411fa, 0x8585111aaee16c07, 0xcd1a427644b0ad59, 0x2356738fd6b04f3a, 0x89d936a76f0a518a, 0x5f2df66443ff24b3, 0x6cbfcf273c43ae6b, 0xabb59d9f05a03eef, 0xb84832df19fc19b6); - asm volatile("vmulhsu.vv v2, v4, v6"); - VCMP_I64(4, v2, 0x23e3020d5d8e40d8, 0x0da067e42d62fa2a, 0xe17ee107c3fdd97f, + asm volatile("vmulhsu.vv v8, v16, v24"); + VCMP_I64(4, v8, 0x23e3020d5d8e40d8, 0x0da067e42d62fa2a, 0xe17ee107c3fdd97f, 0x2ffdce53a7ef7aa4, 0x00aadc600f6180bd, 0xf5b9cd660e9f294b, 0x01bf419feafa3fe5, 0x174a979243e0945b, 0xce6e38c0508aba17, 0xc342fb3a620dde75, 0xf593ff8eafcca075, 0xfe519de4c807844e, @@ -67,99 +67,99 @@ void TEST_CASE1(void) { }; void TEST_CASE2(void) { - VSET(16, e8, m2); - VLOAD_8(v4, 0x53, 0x4f, 0xde, 0xea, 0x47, 0x3c, 0x41, 0xf8, 0xd6, 0xd0, 0x93, + VSET(16, e8, m8); + VLOAD_8(v16, 0x53, 0x4f, 0xde, 0xea, 0x47, 0x3c, 0x41, 0xf8, 0xd6, 0xd0, 0x93, 0x35, 0xfc, 0x70, 0x33, 0xe4); - VLOAD_8(v6, 0xaa, 0x24, 0xaa, 0xde, 0x92, 0x00, 0x7f, 0xe5, 0xb3, 0xf8, 0xa0, + VLOAD_8(v24, 0xaa, 0x24, 0xaa, 0xde, 0x92, 0x00, 0x7f, 0xe5, 0xb3, 0xf8, 0xa0, 0xa8, 0xbb, 0xc6, 0x65, 0x81); VLOAD_8(v0, 0xAA, 0xAA); - VCLEAR(v2); - asm volatile("vmulhsu.vv v2, v4, v6, v0.t"); - VCMP_I8(5, v2, 0, 0x0b, 0, 0xec, 0, 0x00, 0, 0xf8, 0, 0xd1, 0, 0x22, 0, 0x56, + VCLEAR(v8); + asm volatile("vmulhsu.vv v8, v16, v24, v0.t"); + VCMP_I8(5, v8, 0, 0x0b, 0, 0xec, 0, 0x00, 0, 0xf8, 0, 0xd1, 0, 0x22, 0, 0x56, 0, 0xf1); - VSET(16, e16, m2); - VLOAD_16(v4, 0x0f50, 0x47f9, 0xa4ca, 0xf94d, 0x720c, 0x444c, 0x3681, 0x96bd, + VSET(16, e16, m8); + VLOAD_16(v16, 0x0f50, 0x47f9, 0xa4ca, 0xf94d, 0x720c, 0x444c, 0x3681, 0x96bd, 0x5d37, 0xd64e, 0xe792, 0xdb64, 0xfaa6, 0xafe6, 0xf4e8, 0xe5ea); - VLOAD_16(v6, 0x7784, 0xa42e, 0x499b, 0x0c01, 0x9d2b, 0x600d, 0x2bbd, 0xcb41, + VLOAD_16(v24, 0x7784, 0xa42e, 0x499b, 0x0c01, 0x9d2b, 0x600d, 0x2bbd, 0xcb41, 0xdda1, 0xb5d7, 0xafbc, 0xc74f, 0xab45, 0x986f, 0xf0f2, 0xcf3c); VLOAD_8(v0, 0xAA, 0xAA); - VCLEAR(v2); - asm volatile("vmulhsu.vv v2, v4, v6, v0.t"); - VCMP_I16(6, v2, 0, 0x2e28, 0, 0xffaf, 0, 0x199f, 0, 0xac6d, 0, 0xe262, 0, + VCLEAR(v8); + asm volatile("vmulhsu.vv v8, v16, v24, v0.t"); + VCMP_I16(6, v8, 0, 0x2e28, 0, 0xffaf, 0, 0x199f, 0, 0xac6d, 0, 0xe262, 0, 0xe37f, 0, 0xd04d, 0, 0xeae2); - VSET(16, e32, m2); - VLOAD_32(v4, 0xeded4bf3, 0xc9e27167, 0x4175509c, 0x80a3ae04, 0x9f1b2c07, + VSET(16, e32, m8); + VLOAD_32(v16, 0xeded4bf3, 0xc9e27167, 0x4175509c, 0x80a3ae04, 0x9f1b2c07, 0x87ea397b, 0x862e2800, 0x3cd09f37, 0x9a313d78, 0x596661ee, 0x31f99717, 0x64e65802, 0xbd567027, 0xf7c459be, 0x57b6d9cd, 0x94bc3eb4); - VLOAD_32(v6, 0xa147b233, 0x19880f3d, 0x8dd8815e, 0xbc318dca, 0x2c436b94, + VLOAD_32(v24, 0xa147b233, 0x19880f3d, 0x8dd8815e, 0xbc318dca, 0x2c436b94, 0x29ba4191, 0x637f89b7, 0xe39d7818, 0xf48ff2d6, 0xb1dc7c7e, 0xfa5da298, 0x5c1aae36, 0x83e04069, 0xecf36c08, 0x40d2e3a3, 0xe7468a97); VLOAD_8(v0, 0xAA, 0xAA); - VCLEAR(v2); - asm volatile("vmulhsu.vv v2, v4, v6, v0.t"); - VCMP_I32(7, v2, 0, 0xfa9a5a26, 0, 0xa25f8c94, 0, 0xec6d24a0, 0, 0x361265a6, 0, + VCLEAR(v8); + asm volatile("vmulhsu.vv v8, v16, v24, v0.t"); + VCMP_I32(7, v8, 0, 0xfa9a5a26, 0, 0xa25f8c94, 0, 0xec6d24a0, 0, 0x361265a6, 0, 0x3e1cc92b, 0, 0x244d4baf, 0, 0xf8612ea2, 0, 0x9f184dfb); #if ELEN == 64 - VSET(16, e64, m2); - VLOAD_64(v4, 0x2b1f761d24dcff24, 0x1174fcea60fbf146, 0xaa5068c22e71489d, + VSET(16, e64, m8); + VLOAD_64(v16, 0x2b1f761d24dcff24, 0x1174fcea60fbf146, 0xaa5068c22e71489d, 0x422ad458a7cbf321, 0x01e0f752e5d8bb37, 0xe7762162abff4c4c, 0x36279dbbf009199d, 0x188dda33d835d9e4, 0xa0f5a67450e87d77, 0xb43066649033e7ac, 0xb47ff6241cc77c2c, 0xfce0bafc1d36b615, 0x045b90f3b63e0f7f, 0x514e5121be1f02e5, 0x06c9e97573723e47, 0x406879d908a80b41); - VLOAD_64(v6, 0xd50adee8d491db29, 0xc7d423514dd58616, 0x5b22f7a3971e17f9, + VLOAD_64(v24, 0xd50adee8d491db29, 0xc7d423514dd58616, 0x5b22f7a3971e17f9, 0xb9ad8b0339e659cd, 0x5af15755f3954b0f, 0x6b2fb3e49bd48e69, 0x084244757fba5561, 0xf2d5b41ee89411fa, 0x8585111aaee16c07, 0xcd1a427644b0ad59, 0x2356738fd6b04f3a, 0x89d936a76f0a518a, 0x5f2df66443ff24b3, 0x6cbfcf273c43ae6b, 0xabb59d9f05a03eef, 0xb84832df19fc19b6); VLOAD_8(v0, 0xAA, 0xAA); - VCLEAR(v2); - asm volatile("vmulhsu.vv v2, v4, v6, v0.t"); - VCMP_I64(8, v2, 0, 0x0da067e42d62fa2a, 0, 0x2ffdce53a7ef7aa4, 0, + VCLEAR(v8); + asm volatile("vmulhsu.vv v8, v16, v24, v0.t"); + VCMP_I64(8, v8, 0, 0x0da067e42d62fa2a, 0, 0x2ffdce53a7ef7aa4, 0, 0xf5b9cd660e9f294b, 0, 0x174a979243e0945b, 0, 0xc342fb3a620dde75, 0, 0xfe519de4c807844e, 0, 0x2289f5738e0e6d23, 0, 0x2e5d41c2cc9b604f); #endif }; void TEST_CASE3(void) { - VSET(16, e8, m2); - VLOAD_8(v4, 0x21, 0x87, 0xa0, 0xa8, 0x6a, 0x6f, 0x6a, 0x6b, 0x74, 0x99, 0x37, + VSET(16, e8, m8); + VLOAD_8(v16, 0x21, 0x87, 0xa0, 0xa8, 0x6a, 0x6f, 0x6a, 0x6b, 0x74, 0x99, 0x37, 0xa4, 0xdc, 0x4f, 0xc3, 0x55); uint64_t scalar = 5; - asm volatile("vmulhsu.vx v2, v4, %[A]" ::[A] "r"(scalar)); - VCMP_I8(9, v2, 0x00, 0xfd, 0xfe, 0xfe, 0x02, 0x02, 0x02, 0x02, 0x02, 0xfd, + asm volatile("vmulhsu.vx v8, v16, %[A]" ::[A] "r"(scalar)); + VCMP_I8(9, v8, 0x00, 0xfd, 0xfe, 0xfe, 0x02, 0x02, 0x02, 0x02, 0x02, 0xfd, 0x01, 0xfe, 0xff, 0x01, 0xfe, 0x01); - VSET(16, e16, m2); - VLOAD_16(v4, 0x62b8, 0xc1e3, 0xb151, 0x08ce, 0x06c4, 0x1d2f, 0x7448, 0xfcd5, + VSET(16, e16, m8); + VLOAD_16(v16, 0x62b8, 0xc1e3, 0xb151, 0x08ce, 0x06c4, 0x1d2f, 0x7448, 0xfcd5, 0x398c, 0xb933, 0x436d, 0x748f, 0x58d9, 0x1cd6, 0x86db, 0x20f2); scalar = 816; - asm volatile("vmulhsu.vx v2, v4, %[A]" ::[A] "r"(scalar)); - VCMP_I16(10, v2, 0x013a, 0xff3a, 0xff05, 0x001c, 0x0015, 0x005d, 0x0172, + asm volatile("vmulhsu.vx v8, v16, %[A]" ::[A] "r"(scalar)); + VCMP_I16(10, v8, 0x013a, 0xff3a, 0xff05, 0x001c, 0x0015, 0x005d, 0x0172, 0xfff5, 0x00b7, 0xff1e, 0x00d6, 0x0173, 0x011b, 0x005b, 0xfe7d, 0x0069); - VSET(16, e32, m2); - VLOAD_32(v4, 0xa7cac3f7, 0xb3894e05, 0xbac8e70b, 0x05479577, 0x19d8bf63, + VSET(16, e32, m8); + VLOAD_32(v16, 0xa7cac3f7, 0xb3894e05, 0xbac8e70b, 0x05479577, 0x19d8bf63, 0xb952c1ad, 0x9eaa74c0, 0x9e38d5c8, 0x51c77b3b, 0xa5f44521, 0x65042faa, 0x8e7e5345, 0x76ae481c, 0x0ab27b6f, 0xa388cf2b, 0x58218f7f); scalar = 7389998; - asm volatile("vmulhsu.vx v2, v4, %[A]" ::[A] "r"(scalar)); - VCMP_I32(11, v2, 0xffd92575, 0xffde51c5, 0xffe1831f, 0x00025357, 0x000b6288, + asm volatile("vmulhsu.vx v8, v16, %[A]" ::[A] "r"(scalar)); + VCMP_I32(11, v8, 0xffd92575, 0xffde51c5, 0xffe1831f, 0x00025357, 0x000b6288, 0xffe0de52, 0xffd5205d, 0xffd4ee51, 0x0024059f, 0xffd85637, 0x002c7ed9, 0xffce00ba, 0x003446bb, 0x0004b63d, 0xffd7455b, 0x0026d1e0); /* #if ELEN == 64 */ - /* VSET(16, e64, m2); */ - /* VLOAD_64(v4, 0x43c5f863d4be9b51, 0x70b017b4c5d0d11e, 0x9e008a07f48796fe, + /* VSET(16, e64, m8); */ + /* VLOAD_64(v16, 0x43c5f863d4be9b51, 0x70b017b4c5d0d11e, 0x9e008a07f48796fe, */ /* 0x6f0fa9a63860308a, 0x07b5d372a7be167c, 0x234be9472899553e, */ /* 0x25655d82cb668037, 0x959d6233470780ee, 0xf3d683308326232a, */ @@ -167,8 +167,8 @@ void TEST_CASE3(void) { /* 0x8382e0c79caa1e6c, 0x0d1593d36c1dc00e, 0x9f8eb889cc8e98c6, */ /* 0x37411f40369680d2); */ /* scalar = 321156886679781445; */ - /* asm volatile("vmulhsu.vx v2, v4, %[A]" ::[A] "r"(scalar)); */ - /* VCMP_I64(12, v2, 0x012e0fe6705cf26d, 0x01f63e6c65840868, + /* asm volatile("vmulhsu.vx v8, v16, %[A]" ::[A] "r"(scalar)); */ + /* VCMP_I64(12, v8, 0x012e0fe6705cf26d, 0x01f63e6c65840868, * 0xfe4b3a837bcf749f, */ /* 0x01eefe6ad67c584e, 0x00225d3cec11ae29, 0x009d50942207fb0e, */ /* 0x00a6abfb9cc735df, 0xfe25d8c13270b026, 0xffc9cb59c445c91a, */ @@ -179,41 +179,41 @@ void TEST_CASE3(void) { }; void TEST_CASE4(void) { - VSET(16, e8, m2); - VLOAD_8(v4, 0x21, 0x87, 0xa0, 0xa8, 0x6a, 0x6f, 0x6a, 0x6b, 0x74, 0x99, 0x37, + VSET(16, e8, m8); + VLOAD_8(v16, 0x21, 0x87, 0xa0, 0xa8, 0x6a, 0x6f, 0x6a, 0x6b, 0x74, 0x99, 0x37, 0xa4, 0xdc, 0x4f, 0xc3, 0x55); uint64_t scalar = 5; VLOAD_8(v0, 0xAA, 0xAA); - VCLEAR(v2); - asm volatile("vmulhsu.vx v2, v4, %[A], v0.t" ::[A] "r"(scalar)); - VCMP_I8(13, v2, 0, 0xfd, 0, 0xfe, 0, 0x02, 0, 0x02, 0, 0xfd, 0, 0xfe, 0, 0x01, + VCLEAR(v8); + asm volatile("vmulhsu.vx v8, v16, %[A], v0.t" ::[A] "r"(scalar)); + VCMP_I8(13, v8, 0, 0xfd, 0, 0xfe, 0, 0x02, 0, 0x02, 0, 0xfd, 0, 0xfe, 0, 0x01, 0, 0x01); - VSET(16, e16, m2); - VLOAD_16(v4, 0x62b8, 0xc1e3, 0xb151, 0x08ce, 0x06c4, 0x1d2f, 0x7448, 0xfcd5, + VSET(16, e16, m8); + VLOAD_16(v16, 0x62b8, 0xc1e3, 0xb151, 0x08ce, 0x06c4, 0x1d2f, 0x7448, 0xfcd5, 0x398c, 0xb933, 0x436d, 0x748f, 0x58d9, 0x1cd6, 0x86db, 0x20f2); scalar = 816; VLOAD_8(v0, 0xAA, 0xAA); - VCLEAR(v2); - asm volatile("vmulhsu.vx v2, v4, %[A], v0.t" ::[A] "r"(scalar)); - VCMP_I16(14, v2, 0, 0xff3a, 0, 0x001c, 0, 0x005d, 0, 0xfff5, 0, 0xff1e, 0, + VCLEAR(v8); + asm volatile("vmulhsu.vx v8, v16, %[A], v0.t" ::[A] "r"(scalar)); + VCMP_I16(14, v8, 0, 0xff3a, 0, 0x001c, 0, 0x005d, 0, 0xfff5, 0, 0xff1e, 0, 0x0173, 0, 0x005b, 0, 0x0069); - VSET(16, e32, m2); - VLOAD_32(v4, 0xa7cac3f7, 0xb3894e05, 0xbac8e70b, 0x05479577, 0x19d8bf63, + VSET(16, e32, m8); + VLOAD_32(v16, 0xa7cac3f7, 0xb3894e05, 0xbac8e70b, 0x05479577, 0x19d8bf63, 0xb952c1ad, 0x9eaa74c0, 0x9e38d5c8, 0x51c77b3b, 0xa5f44521, 0x65042faa, 0x8e7e5345, 0x76ae481c, 0x0ab27b6f, 0xa388cf2b, 0x58218f7f); scalar = 7389998; VLOAD_8(v0, 0xAA, 0xAA); - VCLEAR(v2); - asm volatile("vmulhsu.vx v2, v4, %[A], v0.t" ::[A] "r"(scalar)); - VCMP_I32(15, v2, 0, 0xffde51c5, 0, 0x00025357, 0, 0xffe0de52, 0, 0xffd4ee51, + VCLEAR(v8); + asm volatile("vmulhsu.vx v8, v16, %[A], v0.t" ::[A] "r"(scalar)); + VCMP_I32(15, v8, 0, 0xffde51c5, 0, 0x00025357, 0, 0xffe0de52, 0, 0xffd4ee51, 0, 0xffd85637, 0, 0xffce00ba, 0, 0x0004b63d, 0, 0x0026d1e0); /* #if ELEN == 64 */ - /* VSET(16, e64, m2); */ - /* VLOAD_64(v4, 0x43c5f863d4be9b51, 0x70b017b4c5d0d11e, 0x9e008a07f48796fe, + /* VSET(16, e64, m8); */ + /* VLOAD_64(v16, 0x43c5f863d4be9b51, 0x70b017b4c5d0d11e, 0x9e008a07f48796fe, */ /* 0x6f0fa9a63860308a, 0x07b5d372a7be167c, 0x234be9472899553e, */ /* 0x25655d82cb668037, 0x959d6233470780ee, 0xf3d683308326232a, */ @@ -222,9 +222,9 @@ void TEST_CASE4(void) { /* 0x37411f40369680d2); */ /* scalar = 321156886679781445; */ /* VLOAD_8(v0, 0xAA, 0xAA); */ - /* VCLEAR(v2); */ - /* asm volatile("vmulhsu.vx v2, v4, %[A], v0.t" ::[A] "r"(scalar)); */ - /* VCMP_I64(16, v2, 0, 0x01f63e6c65840868, 0, 0x01eefe6ad67c584e, 0, */ + /* VCLEAR(v8); */ + /* asm volatile("vmulhsu.vx v8, v16, %[A], v0.t" ::[A] "r"(scalar)); */ + /* VCMP_I64(16, v8, 0, 0x01f63e6c65840868, 0, 0x01eefe6ad67c584e, 0, */ /* 0x009d50942207fb0e, 0, 0xfe25d8c13270b026, 0, * 0x02261e05ece3e474, 0, */ /* 0x004c5c6f8fd9129e, 0, 0x003a50e3baabab1e, 0, diff --git a/sw/riscvTests/isa/rv64uv/vmulhu.c b/sw/riscvTests/isa/rv64uv/vmulhu.c index 72864626..424bd372 100644 --- a/sw/riscvTests/isa/rv64uv/vmulhu.c +++ b/sw/riscvTests/isa/rv64uv/vmulhu.c @@ -8,56 +8,56 @@ #include "vector_macros.h" void TEST_CASE1(void) { - VSET(16, e8, m2); - VLOAD_8(v4, 0x3b, 0xb2, 0xd1, 0x3e, 0x39, 0x2c, 0x08, 0xc5, 0xbf, 0x54, 0x6c, + VSET(16, e8, m8); + VLOAD_8(v16, 0x3b, 0xb2, 0xd1, 0x3e, 0x39, 0x2c, 0x08, 0xc5, 0xbf, 0x54, 0x6c, 0xde, 0x87, 0xcb, 0x7a, 0x83); - VLOAD_8(v6, 0x55, 0xde, 0xf4, 0x14, 0x3c, 0xed, 0x47, 0x1b, 0xca, 0x0b, 0xc4, + VLOAD_8(v24, 0x55, 0xde, 0xf4, 0x14, 0x3c, 0xed, 0x47, 0x1b, 0xca, 0x0b, 0xc4, 0xe3, 0xd8, 0x8f, 0xa0, 0x0d); - asm volatile("vmulhu.vv v2, v4, v6"); - VCMP_U8(1, v2, 0x13, 0x9a, 0xc7, 0x04, 0x0d, 0x28, 0x02, 0x14, 0x96, 0x03, + asm volatile("vmulhu.vv v8, v16, v24"); + VCMP_U8(1, v8, 0x13, 0x9a, 0xc7, 0x04, 0x0d, 0x28, 0x02, 0x14, 0x96, 0x03, 0x52, 0xc4, 0x71, 0x71, 0x4c, 0x06); - VSET(16, e16, m2); - VLOAD_16(v4, 0xe6e1, 0x02a1, 0x2911, 0xe3c3, 0xe141, 0x69e6, 0x4133, 0xf783, + VSET(16, e16, m8); + VLOAD_16(v16, 0xe6e1, 0x02a1, 0x2911, 0xe3c3, 0xe141, 0x69e6, 0x4133, 0xf783, 0x91ef, 0x1897, 0xf0bb, 0x0e07, 0xb8eb, 0x3f5a, 0x9f5d, 0xa626); - VLOAD_16(v6, 0x4fcb, 0x8a38, 0xbaa0, 0x8a97, 0xe409, 0x558e, 0x582b, 0x62b1, + VLOAD_16(v24, 0x4fcb, 0x8a38, 0xbaa0, 0x8a97, 0xe409, 0x558e, 0x582b, 0x62b1, 0xf7bb, 0x181f, 0x2b5a, 0xdf85, 0x44f3, 0x27fe, 0x8412, 0xcda0); - asm volatile("vmulhu.vv v2, v4, v6"); - VCMP_U16(2, v2, 0x47f6, 0x016b, 0x1df0, 0x7b4d, 0xc8a5, 0x2364, 0x1674, + asm volatile("vmulhu.vv v8, v16, v24"); + VCMP_U16(2, v8, 0x47f6, 0x016b, 0x1df0, 0x7b4d, 0xc8a5, 0x2364, 0x1674, 0x5f6b, 0x8d38, 0x0251, 0x28c4, 0x0c3f, 0x31cd, 0x09e5, 0x5237, 0x8574); - VSET(16, e32, m2); - VLOAD_32(v4, 0xf129e694, 0x8dfc72a3, 0xc9911598, 0xd20083ec, 0xe7f36604, + VSET(16, e32, m8); + VLOAD_32(v16, 0xf129e694, 0x8dfc72a3, 0xc9911598, 0xd20083ec, 0xe7f36604, 0x1ab510aa, 0xc290b86c, 0xa7e9a02e, 0x5c3f3bb3, 0x70a3dfae, 0x16baad22, 0x21758cfb, 0x09033e60, 0x8b31075e, 0x6439b7bf, 0xead33cf0); - VLOAD_32(v6, 0x3f2ef56d, 0x12649032, 0x6c0a880b, 0x7cb2477a, 0x41525037, + VLOAD_32(v24, 0x3f2ef56d, 0x12649032, 0x6c0a880b, 0x7cb2477a, 0x41525037, 0x02a39cfa, 0xf7595181, 0x0c230035, 0x86cf9ea9, 0x0f66ddd3, 0x13351370, 0xbe489ce5, 0x4127f488, 0xe6b5e1b3, 0xc6918270, 0xccc8626a); - asm volatile("vmulhu.vv v2, v4, v6"); - VCMP_U32(3, v2, 0x3b858c79, 0x0a3386a5, 0x55117fe4, 0x664a7ee4, 0x3b2f618b, + asm volatile("vmulhu.vv v8, v16, v24"); + VCMP_U32(3, v8, 0x3b858c79, 0x0a3386a5, 0x55117fe4, 0x664a7ee4, 0x3b2f618b, 0x00467bcb, 0xbbfd8432, 0x07f5e895, 0x3093e98c, 0x06c6dd00, 0x01b49139, 0x18debc33, 0x024b3af0, 0x7d70f100, 0x4dbd9bdf, 0xbbd823dc); #if ELEN == 64 - VSET(16, e64, m2); - VLOAD_64(v4, 0x4aee1e4f9775ff4b, 0x045a804c3d3e7dc0, 0x1a2f38060efcd306, + VSET(16, e64, m8); + VLOAD_64(v16, 0x4aee1e4f9775ff4b, 0x045a804c3d3e7dc0, 0x1a2f38060efcd306, 0x34e09e5173ee6301, 0xd1f03c2e38769683, 0x1b1f454816d4ed10, 0xed4a4f231da4abb3, 0xc87b025e6da277dc, 0x8da43ddf6feb6aae, 0x7dcf10ced634db74, 0x736fd9583bc2de91, 0xa66de0036d350cbc, 0x40bf5ec7afca9ec2, 0x5bb552a7b134ba79, 0x6ae5d02d7c121603, 0x8a7621ad8d6f104a); - VLOAD_64(v6, 0x8f2c0088bea2739e, 0x4ed8c54dad60d3cb, 0x51e715e5cf56b2e6, + VLOAD_64(v24, 0x8f2c0088bea2739e, 0x4ed8c54dad60d3cb, 0x51e715e5cf56b2e6, 0xa1b1262536ea3c57, 0x67f334468e5cde4f, 0x8ae5618164bd63fd, 0x2f8be93c1d7807c3, 0x0444a9f4ccff2a2c, 0x6cac5e35bf847d59, 0x1d92c5117b87a392, 0x124597d21d757a4e, 0x4ec5a9fb5b8a6591, 0xb5b4189dd6080734, 0xf75ddacea0effff6, 0x5c3cb19fbc1c7580, 0xff93a562f06d3641); - asm volatile("vmulhu.vv v2, v4, v6"); - VCMP_U64(4, v2, 0x29e7e403b1955330, 0x015742ce71e2c757, 0x08609392d9402e03, + asm volatile("vmulhu.vv v8, v16, v24"); + VCMP_U64(4, v8, 0x29e7e403b1955330, 0x015742ce71e2c757, 0x08609392d9402e03, 0x2165dabfb788d03d, 0x553f1a1e61409141, 0x0eb728a66479b5fb, 0x2c125410c5448322, 0x0357b1cf05241ad9, 0x3c20a893e10635bb, 0x0e8895d7f39e953c, 0x083d3ee38137c9b0, 0x3335fb506009220b, @@ -67,99 +67,99 @@ void TEST_CASE1(void) { }; void TEST_CASE2(void) { - VSET(16, e8, m2); - VLOAD_8(v4, 0x3b, 0xb2, 0xd1, 0x3e, 0x39, 0x2c, 0x08, 0xc5, 0xbf, 0x54, 0x6c, + VSET(16, e8, m8); + VLOAD_8(v16, 0x3b, 0xb2, 0xd1, 0x3e, 0x39, 0x2c, 0x08, 0xc5, 0xbf, 0x54, 0x6c, 0xde, 0x87, 0xcb, 0x7a, 0x83); - VLOAD_8(v6, 0x55, 0xde, 0xf4, 0x14, 0x3c, 0xed, 0x47, 0x1b, 0xca, 0x0b, 0xc4, + VLOAD_8(v24, 0x55, 0xde, 0xf4, 0x14, 0x3c, 0xed, 0x47, 0x1b, 0xca, 0x0b, 0xc4, 0xe3, 0xd8, 0x8f, 0xa0, 0x0d); VLOAD_8(v0, 0xAA, 0xAA); - VCLEAR(v2); - asm volatile("vmulhu.vv v2, v4, v6, v0.t"); - VCMP_U8(5, v2, 0, 0x9a, 0, 0x04, 0, 0x28, 0, 0x14, 0, 0x03, 0, 0xc4, 0, 0x71, + VCLEAR(v8); + asm volatile("vmulhu.vv v8, v16, v24, v0.t"); + VCMP_U8(5, v8, 0, 0x9a, 0, 0x04, 0, 0x28, 0, 0x14, 0, 0x03, 0, 0xc4, 0, 0x71, 0, 0x06); - VSET(16, e16, m2); - VLOAD_16(v4, 0xe6e1, 0x02a1, 0x2911, 0xe3c3, 0xe141, 0x69e6, 0x4133, 0xf783, + VSET(16, e16, m8); + VLOAD_16(v16, 0xe6e1, 0x02a1, 0x2911, 0xe3c3, 0xe141, 0x69e6, 0x4133, 0xf783, 0x91ef, 0x1897, 0xf0bb, 0x0e07, 0xb8eb, 0x3f5a, 0x9f5d, 0xa626); - VLOAD_16(v6, 0x4fcb, 0x8a38, 0xbaa0, 0x8a97, 0xe409, 0x558e, 0x582b, 0x62b1, + VLOAD_16(v24, 0x4fcb, 0x8a38, 0xbaa0, 0x8a97, 0xe409, 0x558e, 0x582b, 0x62b1, 0xf7bb, 0x181f, 0x2b5a, 0xdf85, 0x44f3, 0x27fe, 0x8412, 0xcda0); VLOAD_8(v0, 0xAA, 0xAA); - VCLEAR(v2); - asm volatile("vmulhu.vv v2, v4, v6, v0.t"); - VCMP_U16(6, v2, 0, 0x016b, 0, 0x7b4d, 0, 0x2364, 0, 0x5f6b, 0, 0x0251, 0, + VCLEAR(v8); + asm volatile("vmulhu.vv v8, v16, v24, v0.t"); + VCMP_U16(6, v8, 0, 0x016b, 0, 0x7b4d, 0, 0x2364, 0, 0x5f6b, 0, 0x0251, 0, 0x0c3f, 0, 0x09e5, 0, 0x8574); - VSET(16, e32, m2); - VLOAD_32(v4, 0xf129e694, 0x8dfc72a3, 0xc9911598, 0xd20083ec, 0xe7f36604, + VSET(16, e32, m8); + VLOAD_32(v16, 0xf129e694, 0x8dfc72a3, 0xc9911598, 0xd20083ec, 0xe7f36604, 0x1ab510aa, 0xc290b86c, 0xa7e9a02e, 0x5c3f3bb3, 0x70a3dfae, 0x16baad22, 0x21758cfb, 0x09033e60, 0x8b31075e, 0x6439b7bf, 0xead33cf0); - VLOAD_32(v6, 0x3f2ef56d, 0x12649032, 0x6c0a880b, 0x7cb2477a, 0x41525037, + VLOAD_32(v24, 0x3f2ef56d, 0x12649032, 0x6c0a880b, 0x7cb2477a, 0x41525037, 0x02a39cfa, 0xf7595181, 0x0c230035, 0x86cf9ea9, 0x0f66ddd3, 0x13351370, 0xbe489ce5, 0x4127f488, 0xe6b5e1b3, 0xc6918270, 0xccc8626a); VLOAD_8(v0, 0xAA, 0xAA); - VCLEAR(v2); - asm volatile("vmulhu.vv v2, v4, v6, v0.t"); - VCMP_U32(7, v2, 0, 0x0a3386a5, 0, 0x664a7ee4, 0, 0x00467bcb, 0, 0x07f5e895, 0, + VCLEAR(v8); + asm volatile("vmulhu.vv v8, v16, v24, v0.t"); + VCMP_U32(7, v8, 0, 0x0a3386a5, 0, 0x664a7ee4, 0, 0x00467bcb, 0, 0x07f5e895, 0, 0x06c6dd00, 0, 0x18debc33, 0, 0x7d70f100, 0, 0xbbd823dc); #if ELEN == 64 - VSET(16, e64, m2); - VLOAD_64(v4, 0x4aee1e4f9775ff4b, 0x045a804c3d3e7dc0, 0x1a2f38060efcd306, + VSET(16, e64, m8); + VLOAD_64(v16, 0x4aee1e4f9775ff4b, 0x045a804c3d3e7dc0, 0x1a2f38060efcd306, 0x34e09e5173ee6301, 0xd1f03c2e38769683, 0x1b1f454816d4ed10, 0xed4a4f231da4abb3, 0xc87b025e6da277dc, 0x8da43ddf6feb6aae, 0x7dcf10ced634db74, 0x736fd9583bc2de91, 0xa66de0036d350cbc, 0x40bf5ec7afca9ec2, 0x5bb552a7b134ba79, 0x6ae5d02d7c121603, 0x8a7621ad8d6f104a); - VLOAD_64(v6, 0x8f2c0088bea2739e, 0x4ed8c54dad60d3cb, 0x51e715e5cf56b2e6, + VLOAD_64(v24, 0x8f2c0088bea2739e, 0x4ed8c54dad60d3cb, 0x51e715e5cf56b2e6, 0xa1b1262536ea3c57, 0x67f334468e5cde4f, 0x8ae5618164bd63fd, 0x2f8be93c1d7807c3, 0x0444a9f4ccff2a2c, 0x6cac5e35bf847d59, 0x1d92c5117b87a392, 0x124597d21d757a4e, 0x4ec5a9fb5b8a6591, 0xb5b4189dd6080734, 0xf75ddacea0effff6, 0x5c3cb19fbc1c7580, 0xff93a562f06d3641); VLOAD_8(v0, 0xAA, 0xAA); - VCLEAR(v2); - asm volatile("vmulhu.vv v2, v4, v6, v0.t"); - VCMP_U64(8, v2, 0, 0x015742ce71e2c757, 0, 0x2165dabfb788d03d, 0, + VCLEAR(v8); + asm volatile("vmulhu.vv v8, v16, v24, v0.t"); + VCMP_U64(8, v8, 0, 0x015742ce71e2c757, 0, 0x2165dabfb788d03d, 0, 0x0eb728a66479b5fb, 0, 0x0357b1cf05241ad9, 0, 0x0e8895d7f39e953c, 0, 0x3335fb506009220b, 0, 0x589d920140d7dd8c, 0, 0x8a3b86d4dd8169cf); #endif }; void TEST_CASE3(void) { - VSET(16, e8, m2); - VLOAD_8(v4, 0x5c, 0x3c, 0x86, 0x65, 0x41, 0x38, 0x20, 0x9e, 0x88, 0x28, 0x19, + VSET(16, e8, m8); + VLOAD_8(v16, 0x5c, 0x3c, 0x86, 0x65, 0x41, 0x38, 0x20, 0x9e, 0x88, 0x28, 0x19, 0xc2, 0x5f, 0xa3, 0x7c, 0xca); uint64_t scalar = 5; - asm volatile("vmulhu.vx v2, v4, %[A]" ::[A] "r"(scalar)); - VCMP_U8(9, v2, 0x01, 0x01, 0x02, 0x01, 0x01, 0x01, 0x00, 0x03, 0x02, 0x00, + asm volatile("vmulhu.vx v8, v16, %[A]" ::[A] "r"(scalar)); + VCMP_U8(9, v8, 0x01, 0x01, 0x02, 0x01, 0x01, 0x01, 0x00, 0x03, 0x02, 0x00, 0x00, 0x03, 0x01, 0x03, 0x02, 0x03); - VSET(16, e16, m2); - VLOAD_16(v4, 0x4e7f, 0xfe41, 0x1346, 0x6c1a, 0x38ce, 0x5fa7, 0x5e39, 0xf7a2, + VSET(16, e16, m8); + VLOAD_16(v16, 0x4e7f, 0xfe41, 0x1346, 0x6c1a, 0x38ce, 0x5fa7, 0x5e39, 0xf7a2, 0x61aa, 0x0a3a, 0xfe0a, 0x30f1, 0x5852, 0xbb6b, 0x42f7, 0x58d9); scalar = 816; - asm volatile("vmulhu.vx v2, v4, %[A]" ::[A] "r"(scalar)); - VCMP_U16(10, v2, 0x00fa, 0x032a, 0x003d, 0x0158, 0x00b5, 0x0130, 0x012c, + asm volatile("vmulhu.vx v8, v16, %[A]" ::[A] "r"(scalar)); + VCMP_U16(10, v8, 0x00fa, 0x032a, 0x003d, 0x0158, 0x00b5, 0x0130, 0x012c, 0x0315, 0x0137, 0x0020, 0x0329, 0x009c, 0x0119, 0x0255, 0x00d5, 0x011b); - VSET(16, e32, m2); - VLOAD_32(v4, 0x15c6221c, 0x0d704417, 0x3d90ffd1, 0x4e168273, 0xc3bd5e20, + VSET(16, e32, m8); + VLOAD_32(v16, 0x15c6221c, 0x0d704417, 0x3d90ffd1, 0x4e168273, 0xc3bd5e20, 0xd75f62df, 0x3002ed42, 0x74269b1d, 0xc77bc0dd, 0x36f2552d, 0x71b5888c, 0x02eb291b, 0x790cb3b1, 0xa3cf03c4, 0x8f90730a, 0xf41b555a); scalar = 7389998; - asm volatile("vmulhu.vx v2, v4, %[A]" ::[A] "r"(scalar)); - VCMP_U32(11, v2, 0x00099748, 0x0005eb5c, 0x001b1e60, 0x00226562, 0x00563815, + asm volatile("vmulhu.vx v8, v16, %[A]" ::[A] "r"(scalar)); + VCMP_U32(11, v8, 0x00099748, 0x0005eb5c, 0x001b1e60, 0x00226562, 0x00563815, 0x005eddef, 0x001525e2, 0x00332972, 0x0057de3b, 0x001833e9, 0x0032161d, 0x0001491b, 0x003551d9, 0x00482775, 0x003f3ca7, 0x006b8612); /* #if ELEN == 64 */ - /* VSET(16, e64, m2); */ - /* VLOAD_64(v4, 0x96304201a90be11f, 0x11654d4226322e4b, 0xe16e5cf2c1183b63, + /* VSET(16, e64, m8); */ + /* VLOAD_64(v16, 0x96304201a90be11f, 0x11654d4226322e4b, 0xe16e5cf2c1183b63, */ /* 0x447b5f4710764817, 0xb62589a3d309672c, 0x5ddec2e6716fd0d3, */ /* 0xf31034a096a6d0fa, 0x9cb4dca46ce577f7, 0x30cf2e2dc6773d82, */ @@ -167,8 +167,8 @@ void TEST_CASE3(void) { /* 0x0c0c54042a20ddc8, 0xf309bda968a3a583, 0x550697570a1e9645, */ /* 0x5beaf5933973231f); */ /* scalar = 321156886679781445; */ - /* asm volatile("vmulhu.vx v2, v4, %[A]" ::[A] "r"(scalar)); */ - /* VCMP_U64(12, v2, 0x029d61da2f470da8, 0x004d882170361dd2, + /* asm volatile("vmulhu.vx v8, v16, %[A]" ::[A] "r"(scalar)); */ + /* VCMP_U64(12, v8, 0x029d61da2f470da8, 0x004d882170361dd2, * 0x03ecbc09716942cd, */ /* 0x013138661b0ea1a1, 0x032bd162449d3f20, 0x01a25fd52874e6a2, */ /* 0x043b51fe85cf352c, 0x02ba6ebb77802a7c, 0x00d98a5bba81dc57, */ @@ -179,41 +179,41 @@ void TEST_CASE3(void) { }; void TEST_CASE4(void) { - VSET(16, e8, m2); - VLOAD_8(v4, 0x5c, 0x3c, 0x86, 0x65, 0x41, 0x38, 0x20, 0x9e, 0x88, 0x28, 0x19, + VSET(16, e8, m8); + VLOAD_8(v16, 0x5c, 0x3c, 0x86, 0x65, 0x41, 0x38, 0x20, 0x9e, 0x88, 0x28, 0x19, 0xc2, 0x5f, 0xa3, 0x7c, 0xca); uint64_t scalar = 5; VLOAD_8(v0, 0xAA, 0xAA); - VCLEAR(v2); - asm volatile("vmulhu.vx v2, v4, %[A], v0.t" ::[A] "r"(scalar)); - VCMP_U8(13, v2, 0, 0x01, 0, 0x01, 0, 0x01, 0, 0x03, 0, 0x00, 0, 0x03, 0, 0x03, + VCLEAR(v8); + asm volatile("vmulhu.vx v8, v16, %[A], v0.t" ::[A] "r"(scalar)); + VCMP_U8(13, v8, 0, 0x01, 0, 0x01, 0, 0x01, 0, 0x03, 0, 0x00, 0, 0x03, 0, 0x03, 0, 0x03); - VSET(16, e16, m2); - VLOAD_16(v4, 0x4e7f, 0xfe41, 0x1346, 0x6c1a, 0x38ce, 0x5fa7, 0x5e39, 0xf7a2, + VSET(16, e16, m8); + VLOAD_16(v16, 0x4e7f, 0xfe41, 0x1346, 0x6c1a, 0x38ce, 0x5fa7, 0x5e39, 0xf7a2, 0x61aa, 0x0a3a, 0xfe0a, 0x30f1, 0x5852, 0xbb6b, 0x42f7, 0x58d9); scalar = 816; VLOAD_8(v0, 0xAA, 0xAA); - VCLEAR(v2); - asm volatile("vmulhu.vx v2, v4, %[A], v0.t" ::[A] "r"(scalar)); - VCMP_U16(14, v2, 0, 0x032a, 0, 0x0158, 0, 0x0130, 0, 0x0315, 0, 0x0020, 0, + VCLEAR(v8); + asm volatile("vmulhu.vx v8, v16, %[A], v0.t" ::[A] "r"(scalar)); + VCMP_U16(14, v8, 0, 0x032a, 0, 0x0158, 0, 0x0130, 0, 0x0315, 0, 0x0020, 0, 0x009c, 0, 0x0255, 0, 0x011b); - VSET(16, e32, m2); - VLOAD_32(v4, 0x15c6221c, 0x0d704417, 0x3d90ffd1, 0x4e168273, 0xc3bd5e20, + VSET(16, e32, m8); + VLOAD_32(v16, 0x15c6221c, 0x0d704417, 0x3d90ffd1, 0x4e168273, 0xc3bd5e20, 0xd75f62df, 0x3002ed42, 0x74269b1d, 0xc77bc0dd, 0x36f2552d, 0x71b5888c, 0x02eb291b, 0x790cb3b1, 0xa3cf03c4, 0x8f90730a, 0xf41b555a); scalar = 7389998; VLOAD_8(v0, 0xAA, 0xAA); - VCLEAR(v2); - asm volatile("vmulhu.vx v2, v4, %[A], v0.t" ::[A] "r"(scalar)); - VCMP_U32(15, v2, 0, 0x0005eb5c, 0, 0x00226562, 0, 0x005eddef, 0, 0x00332972, + VCLEAR(v8); + asm volatile("vmulhu.vx v8, v16, %[A], v0.t" ::[A] "r"(scalar)); + VCMP_U32(15, v8, 0, 0x0005eb5c, 0, 0x00226562, 0, 0x005eddef, 0, 0x00332972, 0, 0x001833e9, 0, 0x0001491b, 0, 0x00482775, 0, 0x006b8612); /* #if ELEN == 64 */ - /* VSET(16, e64, m2); */ - /* VLOAD_64(v4, 0x96304201a90be11f, 0x11654d4226322e4b, 0xe16e5cf2c1183b63, + /* VSET(16, e64, m8); */ + /* VLOAD_64(v16, 0x96304201a90be11f, 0x11654d4226322e4b, 0xe16e5cf2c1183b63, */ /* 0x447b5f4710764817, 0xb62589a3d309672c, 0x5ddec2e6716fd0d3, */ /* 0xf31034a096a6d0fa, 0x9cb4dca46ce577f7, 0x30cf2e2dc6773d82, */ @@ -222,9 +222,9 @@ void TEST_CASE4(void) { /* 0x5beaf5933973231f); */ /* scalar = 321156886679781445; */ /* VLOAD_8(v0, 0xAA, 0xAA); */ - /* VCLEAR(v2); */ - /* asm volatile("vmulhu.vx v2, v4, %[A], v0.t" ::[A] "r"(scalar)); */ - /* VCMP_U64(16, v2, 0, 0x004d882170361dd2, 0, 0x013138661b0ea1a1, 0, */ + /* VCLEAR(v8); */ + /* asm volatile("vmulhu.vx v8, v16, %[A], v0.t" ::[A] "r"(scalar)); */ + /* VCMP_U64(16, v8, 0, 0x004d882170361dd2, 0, 0x013138661b0ea1a1, 0, */ /* 0x01a25fd52874e6a2, 0, 0x02ba6ebb77802a7c, 0, * 0x01b10a47f99f8c44, 0, */ /* 0x010e20461059ad6b, 0, 0x043b352e6dc32a00, 0, diff --git a/sw/riscvTests/isa/rv64uv/vmv.c b/sw/riscvTests/isa/rv64uv/vmv.c index 63fe2337..094767c0 100644 --- a/sw/riscvTests/isa/rv64uv/vmv.c +++ b/sw/riscvTests/isa/rv64uv/vmv.c @@ -8,60 +8,60 @@ #include "vector_macros.h" void TEST_CASE1() { - VSET(16, e8, m2); - VLOAD_8(v2, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); - asm volatile("vmv.v.v v6, v2"); - VCMP_U8(1, v6, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VSET(16, e8, m8); + VLOAD_8(v8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + asm volatile("vmv.v.v v16, v8"); + VCMP_U8(1, v16, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); - VSET(16, e16, m2); - VLOAD_16(v2, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VSET(16, e16, m8); + VLOAD_16(v8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); VLOAD_16(v4, 8, 7, 6, 5, 4, 3, 2, 1, 8, 7, 6, 5, 4, 3, 2, 1); - asm volatile("vmv.v.v v6, v2"); - VCMP_U16(2, v6, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + asm volatile("vmv.v.v v16, v8"); + VCMP_U16(2, v16, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); - VSET(16, e32, m2); - VLOAD_32(v2, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VSET(16, e32, m8); + VLOAD_32(v8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); VLOAD_32(v4, 8, 7, 6, 5, 4, 3, 2, 1, 8, 7, 6, 5, 4, 3, 2, 1); - asm volatile("vmv.v.v v6, v2"); - VCMP_U32(3, v6, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + asm volatile("vmv.v.v v16, v8"); + VCMP_U32(3, v16, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); #if ELEN == 64 - VSET(16, e64, m2); - VLOAD_64(v2, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VSET(16, e64, m8); + VLOAD_64(v8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); VLOAD_64(v4, 8, 7, 6, 5, 4, 3, 2, 1, 8, 7, 6, 5, 4, 3, 2, 1); - asm volatile("vmv.v.v v6, v2"); - VCMP_U64(4, v6, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + asm volatile("vmv.v.v v16, v8"); + VCMP_U64(4, v16, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); #endif } void TEST_CASE2() { const uint32_t scalar = 0xdeadbeef; - VSET(16, e8, m2); - VLOAD_8(v2, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); - asm volatile("vmv.v.x v6, %[A]" ::[A] "r"(scalar)); - VCMP_U8(5, v6, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, + VSET(16, e8, m8); + VLOAD_8(v8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + asm volatile("vmv.v.x v16, %[A]" ::[A] "r"(scalar)); + VCMP_U8(5, v16, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef); - VSET(16, e16, m2); - VLOAD_16(v2, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); - asm volatile("vmv.v.x v6, %[A]" ::[A] "r"(scalar)); - VCMP_U16(6, v6, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, + VSET(16, e16, m8); + VLOAD_16(v8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + asm volatile("vmv.v.x v16, %[A]" ::[A] "r"(scalar)); + VCMP_U16(6, v16, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef); - VSET(16, e32, m2); - VLOAD_32(v2, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); - asm volatile("vmv.v.x v6, %[A]" ::[A] "r"(scalar)); - VCMP_U32(7, v6, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + VSET(16, e32, m8); + VLOAD_32(v8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + asm volatile("vmv.v.x v16, %[A]" ::[A] "r"(scalar)); + VCMP_U32(7, v16, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef); #if ELEN == 64 - VSET(16, e64, m2); - VLOAD_64(v2, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); - asm volatile("vmv.v.x v6, %[A]" ::[A] "r"(scalar)); - VCMP_U64(8, v6, 0xffffffffdeadbeef, 0xffffffffdeadbeef, 0xffffffffdeadbeef, + VSET(16, e64, m8); + VLOAD_64(v8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + asm volatile("vmv.v.x v16, %[A]" ::[A] "r"(scalar)); + VCMP_U64(8, v16, 0xffffffffdeadbeef, 0xffffffffdeadbeef, 0xffffffffdeadbeef, 0xffffffffdeadbeef, 0xffffffffdeadbeef, 0xffffffffdeadbeef, 0xffffffffdeadbeef, 0xffffffffdeadbeef, 0xffffffffdeadbeef, 0xffffffffdeadbeef, 0xffffffffdeadbeef, 0xffffffffdeadbeef, @@ -71,29 +71,29 @@ void TEST_CASE2() { } void TEST_CASE3() { - VSET(16, e8, m2); - VLOAD_8(v2, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); - asm volatile("vmv.v.i v6, -9"); - VCMP_U8(9, v6, -9, -9, -9, -9, -9, -9, -9, -9, -9, -9, -9, -9, -9, -9, -9, + VSET(16, e8, m8); + VLOAD_8(v8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + asm volatile("vmv.v.i v16, -9"); + VCMP_U8(9, v16, -9, -9, -9, -9, -9, -9, -9, -9, -9, -9, -9, -9, -9, -9, -9, -9); - VSET(16, e16, m2); - VLOAD_16(v2, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); - asm volatile("vmv.v.i v6, -10"); - VCMP_U16(10, v6, -10, -10, -10, -10, -10, -10, -10, -10, -10, -10, -10, -10, + VSET(16, e16, m8); + VLOAD_16(v8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + asm volatile("vmv.v.i v16, -10"); + VCMP_U16(10, v16, -10, -10, -10, -10, -10, -10, -10, -10, -10, -10, -10, -10, -10, -10, -10, -10); - VSET(16, e32, m2); - VLOAD_32(v2, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); - asm volatile("vmv.v.i v6, -11"); - VCMP_U32(11, v6, -11, -11, -11, -11, -11, -11, -11, -11, -11, -11, -11, -11, + VSET(16, e32, m8); + VLOAD_32(v8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + asm volatile("vmv.v.i v16, -11"); + VCMP_U32(11, v16, -11, -11, -11, -11, -11, -11, -11, -11, -11, -11, -11, -11, -11, -11, -11, -11); #if ELEN == 64 - VSET(16, e64, m2); - VLOAD_64(v2, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); - asm volatile("vmv.v.i v6, -12"); - VCMP_U64(12, v6, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, + VSET(16, e64, m8); + VLOAD_64(v8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + asm volatile("vmv.v.i v16, -12"); + VCMP_U64(12, v16, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12); #endif } diff --git a/sw/riscvTests/isa/rv64uv/vnmsac.c b/sw/riscvTests/isa/rv64uv/vnmsac.c index d6e30e59..be9a4c41 100644 --- a/sw/riscvTests/isa/rv64uv/vnmsac.c +++ b/sw/riscvTests/isa/rv64uv/vnmsac.c @@ -8,70 +8,70 @@ #include "vector_macros.h" void TEST_CASE1() { - VSET(16, e8, m2); - VLOAD_8(v6, 0x41, 0x5b, 0xd0, 0x04, 0xc4, 0x7a, 0x91, 0xd1, 0x7b, 0x09, 0x85, + VSET(16, e8, m8); + VLOAD_8(v24, 0x41, 0x5b, 0xd0, 0x04, 0xc4, 0x7a, 0x91, 0xd1, 0x7b, 0x09, 0x85, 0x59, 0x2b, 0xe3, 0x33, 0xb9); - VLOAD_8(v4, 0xc5, 0x4d, 0xad, 0x35, 0x81, 0x18, 0x48, 0x50, 0xe7, 0x95, 0x7b, + VLOAD_8(v16, 0xc5, 0x4d, 0xad, 0x35, 0x81, 0x18, 0x48, 0x50, 0xe7, 0x95, 0x7b, 0x18, 0xe6, 0x44, 0x57, 0xaf); - VLOAD_8(v2, 0x53, 0x13, 0x2c, 0xd8, 0x4a, 0xc3, 0xa3, 0xd7, 0x7e, 0x1f, 0x4c, + VLOAD_8(v8, 0x53, 0x13, 0x2c, 0xd8, 0x4a, 0xc3, 0xa3, 0xd7, 0x7e, 0x1f, 0x4c, 0x4e, 0x2e, 0x7d, 0x13, 0x5a); - asm volatile("vnmsac.vv v2, v4, v6"); - VCMP_U8(1, v2, 0x4e, 0xb4, 0x9c, 0x04, 0x86, 0x53, 0xdb, 0x87, 0x81, 0xe2, + asm volatile("vnmsac.vv v8, v16, v24"); + VCMP_U8(1, v8, 0x4e, 0xb4, 0x9c, 0x04, 0x86, 0x53, 0xdb, 0x87, 0x81, 0xe2, 0x65, 0xf6, 0x8c, 0x31, 0xbe, 0xe3); - VSET(16, e16, m2); - VLOAD_16(v6, 0x9904, 0x5982, 0xa6cb, 0x73a1, 0x227e, 0xc8f6, 0x3eeb, 0xb010, + VSET(16, e16, m8); + VLOAD_16(v24, 0x9904, 0x5982, 0xa6cb, 0x73a1, 0x227e, 0xc8f6, 0x3eeb, 0xb010, 0x14a1, 0xef2d, 0x3376, 0x371a, 0x4fc8, 0xbcca, 0xccd7, 0x9097); - VLOAD_16(v4, 0xb2dd, 0x9ca4, 0x72fe, 0xecab, 0x9909, 0xe1b0, 0x1769, 0x6759, + VLOAD_16(v16, 0xb2dd, 0x9ca4, 0x72fe, 0xecab, 0x9909, 0xe1b0, 0x1769, 0x6759, 0x9500, 0x3aae, 0x0637, 0xeadc, 0x7523, 0xa53c, 0xecc7, 0xaccc); - VLOAD_16(v2, 0xb917, 0x6a27, 0x0f0c, 0x04a2, 0xe0b6, 0x9fb1, 0x5c69, 0x21e2, + VLOAD_16(v8, 0xb917, 0x6a27, 0x0f0c, 0x04a2, 0xe0b6, 0x9fb1, 0x5c69, 0x21e2, 0x3588, 0x8d19, 0x65d9, 0x6458, 0xfbff, 0xf949, 0x34a4, 0x0710); - asm volatile("vnmsac.vv v2, v4, v6"); - VCMP_U16(2, v2, 0xd8a3, 0xdadf, 0x2ba2, 0x5c17, 0x5c48, 0x4091, 0x7106, + asm volatile("vnmsac.vv v8, v16, v24"); + VCMP_U16(2, v8, 0xd8a3, 0xdadf, 0x2ba2, 0x5c17, 0x5c48, 0x4091, 0x7106, 0x7c52, 0x8088, 0xca83, 0x937f, 0x4600, 0xaba7, 0x87f1, 0xc583, 0x5abc); - VSET(16, e32, m2); - VLOAD_32(v6, 0xe6f4ff60, 0xbf6a38db, 0x30f2ea92, 0x1763619e, 0x815c1c28, + VSET(16, e32, m8); + VLOAD_32(v24, 0xe6f4ff60, 0xbf6a38db, 0x30f2ea92, 0x1763619e, 0x815c1c28, 0x5f1b57db, 0xdb2cdc06, 0xab577f4a, 0x214746ac, 0xd3a08c15, 0x35887ce9, 0x9d5a0f65, 0x76adea2b, 0x91b7f299, 0x6e2977fe, 0xdcbcb1d7); - VLOAD_32(v4, 0x885c8baa, 0xbe200324, 0x9eaa49d7, 0x4e208dde, 0x802bbe9f, + VLOAD_32(v16, 0x885c8baa, 0xbe200324, 0x9eaa49d7, 0x4e208dde, 0x802bbe9f, 0x7633680e, 0xf1a79717, 0xe62e371e, 0x0fc25b48, 0x11067f38, 0xc654ccb4, 0x6702a66c, 0x7a0b229d, 0x25c2b688, 0x82b68b3d, 0x4224aa5e); - VLOAD_32(v2, 0x3a582428, 0x61c55f94, 0xb445799b, 0xcca5a657, 0x51a7fe9e, + VLOAD_32(v8, 0x3a582428, 0x61c55f94, 0xb445799b, 0xcca5a657, 0x51a7fe9e, 0x0840b4f8, 0xfb0a701b, 0x1b5361d7, 0xd10c9064, 0xa899d63d, 0xbb1779fd, 0x1b35390c, 0xd04c0f6c, 0xd8c9db62, 0x90a09dc8, 0x463438b4); - asm volatile("vnmsac.vv v2, v4, v6"); - VCMP_U32(3, v2, 0x0e7d6e68, 0x92cacfc8, 0x726ad6fd, 0x6ff2f953, 0xe1d4d1c6, + asm volatile("vnmsac.vv v8, v16, v24"); + VCMP_U32(3, v8, 0x0e7d6e68, 0x92cacfc8, 0x726ad6fd, 0x6ff2f953, 0xe1d4d1c6, 0x466feefe, 0xa6512191, 0xdf6d912b, 0x7c898c04, 0x0a7ec6a5, 0x2121fa29, 0x91713c70, 0x60b0bd0d, 0xb3a6341a, 0x82041c42, 0x7a9625c2); #if ELEN == 64 - VSET(16, e64, m2); - VLOAD_64(v6, 0xb8d79a755b98580a, 0xceefb8be6deb3a3d, 0x670688aed7c97cdd, + VSET(16, e64, m8); + VLOAD_64(v24, 0xb8d79a755b98580a, 0xceefb8be6deb3a3d, 0x670688aed7c97cdd, 0x0ced1db5e1b7da8f, 0x58993c2ae4a62e89, 0x864439a0768dce1b, 0x7882d6539128d119, 0xfe6469348911945b, 0x6da189493780c328, 0xf8c4931b61dc54dc, 0xd5ac914ccbf735f0, 0xba0a5bf3b2b528d1, 0x74d814e6ebcebe81, 0xfc44af3a74cfee8c, 0xea0cb63d1bf7d5dc, 0xbe98c841d80bd077); - VLOAD_64(v4, 0x5517b401c8fbd5ec, 0x021c52c329edd200, 0xff61d899305d1423, + VLOAD_64(v16, 0x5517b401c8fbd5ec, 0x021c52c329edd200, 0xff61d899305d1423, 0xd886035fdcfe3186, 0x694b857cb1cc3bc3, 0x52376b9a996e1925, 0x5785c45d41ec230d, 0x950f08c23f6da73d, 0xe5dc4e9a35834719, 0x9a08d0e965e96a19, 0xb80c2107151bdcf6, 0xd0612e4d4bc314b7, 0xdfb23a142b750482, 0xedc5e4b79881bdaf, 0x72c493d9df55bf13, 0xfd4b1328b8f7773a); - VLOAD_64(v2, 0xc265b2d19ad92bbb, 0x81a4ef527fc2e042, 0xe490f5981f64a313, + VLOAD_64(v8, 0xc265b2d19ad92bbb, 0x81a4ef527fc2e042, 0xe490f5981f64a313, 0xf12edb410132b013, 0xc475df4b52276fe9, 0x069e283bf74ca195, 0x8dd5189f3a66f166, 0x297726422e620380, 0x7b74d167bd1b22fd, 0x08e88e9642656a52, 0x0ab0c3f0f7ddeb66, 0x00b155918c8646c0, 0x84d4df4b2a3768c7, 0xc31234734867ae09, 0x79320b8da693a84e, 0x30727b2d1bccd396); - asm volatile("vnmsac.vv v2, v4, v6"); - VCMP_U64(4, v2, 0x231a763b4759b083, 0x68b16397da83d642, 0x7081592414ce4cdc, + asm volatile("vnmsac.vv v8, v16, v24"); + VCMP_U64(4, v8, 0x231a763b4759b083, 0x68b16397da83d642, 0x7081592414ce4cdc, 0x084dc189ec3eea39, 0x72cb55f70cac6a8e, 0xc713321c491334ae, 0xa21580bb2ab1e821, 0x3d84da5e7dab4cd1, 0x735a758175effc15, 0x91e8df24708208d6, 0x5133f0cd25a5d6c6, 0x2f1a889653a2c559, @@ -81,74 +81,74 @@ void TEST_CASE1() { } void TEST_CASE2() { - VSET(16, e8, m2); - VLOAD_8(v6, 0x41, 0x5b, 0xd0, 0x04, 0xc4, 0x7a, 0x91, 0xd1, 0x7b, 0x09, 0x85, + VSET(16, e8, m8); + VLOAD_8(v24, 0x41, 0x5b, 0xd0, 0x04, 0xc4, 0x7a, 0x91, 0xd1, 0x7b, 0x09, 0x85, 0x59, 0x2b, 0xe3, 0x33, 0xb9); - VLOAD_8(v4, 0xc5, 0x4d, 0xad, 0x35, 0x81, 0x18, 0x48, 0x50, 0xe7, 0x95, 0x7b, + VLOAD_8(v16, 0xc5, 0x4d, 0xad, 0x35, 0x81, 0x18, 0x48, 0x50, 0xe7, 0x95, 0x7b, 0x18, 0xe6, 0x44, 0x57, 0xaf); - VLOAD_8(v2, 0x53, 0x13, 0x2c, 0xd8, 0x4a, 0xc3, 0xa3, 0xd7, 0x7e, 0x1f, 0x4c, + VLOAD_8(v8, 0x53, 0x13, 0x2c, 0xd8, 0x4a, 0xc3, 0xa3, 0xd7, 0x7e, 0x1f, 0x4c, 0x4e, 0x2e, 0x7d, 0x13, 0x5a); VLOAD_8(v0, 0xAA, 0xAA); - asm volatile("vnmsac.vv v2, v4, v6, v0.t"); - VCMP_U8(5, v2, 0x53, 0xb4, 0x2c, 0x04, 0x4a, 0x53, 0xa3, 0x87, 0x7e, 0xe2, + asm volatile("vnmsac.vv v8, v16, v24, v0.t"); + VCMP_U8(5, v8, 0x53, 0xb4, 0x2c, 0x04, 0x4a, 0x53, 0xa3, 0x87, 0x7e, 0xe2, 0x4c, 0xf6, 0x2e, 0x31, 0x13, 0xe3); - VSET(16, e16, m2); - VLOAD_16(v6, 0x9904, 0x5982, 0xa6cb, 0x73a1, 0x227e, 0xc8f6, 0x3eeb, 0xb010, + VSET(16, e16, m8); + VLOAD_16(v24, 0x9904, 0x5982, 0xa6cb, 0x73a1, 0x227e, 0xc8f6, 0x3eeb, 0xb010, 0x14a1, 0xef2d, 0x3376, 0x371a, 0x4fc8, 0xbcca, 0xccd7, 0x9097); - VLOAD_16(v4, 0xb2dd, 0x9ca4, 0x72fe, 0xecab, 0x9909, 0xe1b0, 0x1769, 0x6759, + VLOAD_16(v16, 0xb2dd, 0x9ca4, 0x72fe, 0xecab, 0x9909, 0xe1b0, 0x1769, 0x6759, 0x9500, 0x3aae, 0x0637, 0xeadc, 0x7523, 0xa53c, 0xecc7, 0xaccc); - VLOAD_16(v2, 0xb917, 0x6a27, 0x0f0c, 0x04a2, 0xe0b6, 0x9fb1, 0x5c69, 0x21e2, + VLOAD_16(v8, 0xb917, 0x6a27, 0x0f0c, 0x04a2, 0xe0b6, 0x9fb1, 0x5c69, 0x21e2, 0x3588, 0x8d19, 0x65d9, 0x6458, 0xfbff, 0xf949, 0x34a4, 0x0710); VLOAD_8(v0, 0xAA, 0xAA); - asm volatile("vnmsac.vv v2, v4, v6, v0.t"); - VCMP_U16(6, v2, 0xb917, 0xdadf, 0x0f0c, 0x5c17, 0xe0b6, 0x4091, 0x5c69, + asm volatile("vnmsac.vv v8, v16, v24, v0.t"); + VCMP_U16(6, v8, 0xb917, 0xdadf, 0x0f0c, 0x5c17, 0xe0b6, 0x4091, 0x5c69, 0x7c52, 0x3588, 0xca83, 0x65d9, 0x4600, 0xfbff, 0x87f1, 0x34a4, 0x5abc); - VSET(16, e32, m2); - VLOAD_32(v6, 0xe6f4ff60, 0xbf6a38db, 0x30f2ea92, 0x1763619e, 0x815c1c28, + VSET(16, e32, m8); + VLOAD_32(v24, 0xe6f4ff60, 0xbf6a38db, 0x30f2ea92, 0x1763619e, 0x815c1c28, 0x5f1b57db, 0xdb2cdc06, 0xab577f4a, 0x214746ac, 0xd3a08c15, 0x35887ce9, 0x9d5a0f65, 0x76adea2b, 0x91b7f299, 0x6e2977fe, 0xdcbcb1d7); - VLOAD_32(v4, 0x885c8baa, 0xbe200324, 0x9eaa49d7, 0x4e208dde, 0x802bbe9f, + VLOAD_32(v16, 0x885c8baa, 0xbe200324, 0x9eaa49d7, 0x4e208dde, 0x802bbe9f, 0x7633680e, 0xf1a79717, 0xe62e371e, 0x0fc25b48, 0x11067f38, 0xc654ccb4, 0x6702a66c, 0x7a0b229d, 0x25c2b688, 0x82b68b3d, 0x4224aa5e); - VLOAD_32(v2, 0x3a582428, 0x61c55f94, 0xb445799b, 0xcca5a657, 0x51a7fe9e, + VLOAD_32(v8, 0x3a582428, 0x61c55f94, 0xb445799b, 0xcca5a657, 0x51a7fe9e, 0x0840b4f8, 0xfb0a701b, 0x1b5361d7, 0xd10c9064, 0xa899d63d, 0xbb1779fd, 0x1b35390c, 0xd04c0f6c, 0xd8c9db62, 0x90a09dc8, 0x463438b4); VLOAD_8(v0, 0xAA, 0xAA); - asm volatile("vnmsac.vv v2, v4, v6, v0.t"); - VCMP_U32(7, v2, 0x3a582428, 0x92cacfc8, 0xb445799b, 0x6ff2f953, 0x51a7fe9e, + asm volatile("vnmsac.vv v8, v16, v24, v0.t"); + VCMP_U32(7, v8, 0x3a582428, 0x92cacfc8, 0xb445799b, 0x6ff2f953, 0x51a7fe9e, 0x466feefe, 0xfb0a701b, 0xdf6d912b, 0xd10c9064, 0x0a7ec6a5, 0xbb1779fd, 0x91713c70, 0xd04c0f6c, 0xb3a6341a, 0x90a09dc8, 0x7a9625c2); #if ELEN == 64 - VSET(16, e64, m2); - VLOAD_64(v6, 0xb8d79a755b98580a, 0xceefb8be6deb3a3d, 0x670688aed7c97cdd, + VSET(16, e64, m8); + VLOAD_64(v24, 0xb8d79a755b98580a, 0xceefb8be6deb3a3d, 0x670688aed7c97cdd, 0x0ced1db5e1b7da8f, 0x58993c2ae4a62e89, 0x864439a0768dce1b, 0x7882d6539128d119, 0xfe6469348911945b, 0x6da189493780c328, 0xf8c4931b61dc54dc, 0xd5ac914ccbf735f0, 0xba0a5bf3b2b528d1, 0x74d814e6ebcebe81, 0xfc44af3a74cfee8c, 0xea0cb63d1bf7d5dc, 0xbe98c841d80bd077); - VLOAD_64(v4, 0x5517b401c8fbd5ec, 0x021c52c329edd200, 0xff61d899305d1423, + VLOAD_64(v16, 0x5517b401c8fbd5ec, 0x021c52c329edd200, 0xff61d899305d1423, 0xd886035fdcfe3186, 0x694b857cb1cc3bc3, 0x52376b9a996e1925, 0x5785c45d41ec230d, 0x950f08c23f6da73d, 0xe5dc4e9a35834719, 0x9a08d0e965e96a19, 0xb80c2107151bdcf6, 0xd0612e4d4bc314b7, 0xdfb23a142b750482, 0xedc5e4b79881bdaf, 0x72c493d9df55bf13, 0xfd4b1328b8f7773a); - VLOAD_64(v2, 0xc265b2d19ad92bbb, 0x81a4ef527fc2e042, 0xe490f5981f64a313, + VLOAD_64(v8, 0xc265b2d19ad92bbb, 0x81a4ef527fc2e042, 0xe490f5981f64a313, 0xf12edb410132b013, 0xc475df4b52276fe9, 0x069e283bf74ca195, 0x8dd5189f3a66f166, 0x297726422e620380, 0x7b74d167bd1b22fd, 0x08e88e9642656a52, 0x0ab0c3f0f7ddeb66, 0x00b155918c8646c0, 0x84d4df4b2a3768c7, 0xc31234734867ae09, 0x79320b8da693a84e, 0x30727b2d1bccd396); VLOAD_8(v0, 0xAA, 0xAA); - asm volatile("vnmsac.vv v2, v4, v6, v0.t"); - VCMP_U64(8, v2, 0xc265b2d19ad92bbb, 0x68b16397da83d642, 0xe490f5981f64a313, + asm volatile("vnmsac.vv v8, v16, v24, v0.t"); + VCMP_U64(8, v8, 0xc265b2d19ad92bbb, 0x68b16397da83d642, 0xe490f5981f64a313, 0x084dc189ec3eea39, 0xc475df4b52276fe9, 0xc713321c491334ae, 0x8dd5189f3a66f166, 0x3d84da5e7dab4cd1, 0x7b74d167bd1b22fd, 0x91e8df24708208d6, 0x0ab0c3f0f7ddeb66, 0x2f1a889653a2c559, @@ -158,62 +158,62 @@ void TEST_CASE2() { } void TEST_CASE3() { - VSET(16, e8, m2); + VSET(16, e8, m8); int64_t scalar = 5; - VLOAD_8(v6, 0x5e, 0xf5, 0xa9, 0x0b, 0x14, 0x3c, 0x84, 0x22, 0xd7, 0xb6, 0x5c, + VLOAD_8(v24, 0x5e, 0xf5, 0xa9, 0x0b, 0x14, 0x3c, 0x84, 0x22, 0xd7, 0xb6, 0x5c, 0x90, 0xa2, 0x67, 0x3d, 0xf5); - VLOAD_8(v2, 0xfa, 0xd9, 0x2a, 0xe2, 0xe7, 0x1f, 0x8c, 0xbd, 0x40, 0x5d, 0x50, + VLOAD_8(v8, 0xfa, 0xd9, 0x2a, 0xe2, 0xe7, 0x1f, 0x8c, 0xbd, 0x40, 0x5d, 0x50, 0x1f, 0xe0, 0xdd, 0x1f, 0xd7); - asm volatile("vnmsac.vx v2, %[A], v6" ::[A] "r"(scalar)); - VCMP_U8(9, v2, 0x24, 0x10, 0xdd, 0xab, 0x83, 0xf3, 0xf8, 0x13, 0x0d, 0xcf, + asm volatile("vnmsac.vx v8, %[A], v24" ::[A] "r"(scalar)); + VCMP_U8(9, v8, 0x24, 0x10, 0xdd, 0xab, 0x83, 0xf3, 0xf8, 0x13, 0x0d, 0xcf, 0x84, 0x4f, 0xb6, 0xda, 0xee, 0x0e); - VSET(16, e16, m2); + VSET(16, e16, m8); scalar = -5383; - VLOAD_16(v6, 0xfe80, 0x3910, 0x5313, 0xefef, 0xecfc, 0x7983, 0xcc0d, 0x731f, + VLOAD_16(v24, 0xfe80, 0x3910, 0x5313, 0xefef, 0xecfc, 0x7983, 0xcc0d, 0x731f, 0xf384, 0xfde3, 0x9cd1, 0xf20b, 0xa41b, 0x949a, 0x5886, 0xa1a9); - VLOAD_16(v2, 0xe886, 0xf1f9, 0x1857, 0xd0bb, 0x522e, 0x2de0, 0xa6c2, 0xd624, + VLOAD_16(v8, 0xe886, 0xf1f9, 0x1857, 0xd0bb, 0x522e, 0x2de0, 0xa6c2, 0xd624, 0xd024, 0x40a2, 0xdd99, 0xd517, 0xf00a, 0xae8d, 0x79a5, 0x519d); - asm volatile("vnmsac.vx v2, %[A], v6" ::[A] "r"(scalar)); - VCMP_U16(10, v2, 0x5e06, 0xd169, 0xecdc, 0xfb44, 0x7912, 0x3f75, 0x4c1d, + asm volatile("vnmsac.vx v8, %[A], v24" ::[A] "r"(scalar)); + VCMP_U16(10, v8, 0x5e06, 0xd169, 0xecdc, 0xfb44, 0x7912, 0x3f75, 0x4c1d, 0x86fd, 0x4cc0, 0xd0d7, 0x4c50, 0x5a64, 0xa3c7, 0x60c3, 0xe34f, 0x9a3c); - VSET(16, e32, m2); + VSET(16, e32, m8); scalar = 6474219; - VLOAD_32(v6, 0x48da7aac, 0x54e98600, 0xf49f26e5, 0x338e8406, 0x40ca82f5, + VLOAD_32(v24, 0x48da7aac, 0x54e98600, 0xf49f26e5, 0x338e8406, 0x40ca82f5, 0x75d0b8f6, 0x38c88af2, 0x96bb5c03, 0x6f61c0a9, 0x3626dd69, 0x31aba619, 0x7d974a55, 0xbc63c280, 0x502334bf, 0x9451b955, 0x6a8bae19); - VLOAD_32(v2, 0xcbd8537e, 0x36928a6c, 0x60cf8444, 0xa19ea650, 0x8f2a8694, + VLOAD_32(v8, 0xcbd8537e, 0x36928a6c, 0x60cf8444, 0xa19ea650, 0x8f2a8694, 0x050956bf, 0x1ef799f5, 0x43c0f327, 0x4bfd5a25, 0x7be439e4, 0xed89a52a, 0x2bbf028d, 0x872392b9, 0x0ad55495, 0x865c7264, 0xfcce4b64); - asm volatile("vnmsac.vx v2, %[A], v6" ::[A] "r"(scalar)); - VCMP_U32(11, v2, 0x6b20ab9a, 0x9dfe886c, 0xf983030d, 0xddd6bece, 0xf2a7f2ad, + asm volatile("vnmsac.vx v8, %[A], v24" ::[A] "r"(scalar)); + VCMP_U32(11, v8, 0x6b20ab9a, 0x9dfe886c, 0xf983030d, 0xddd6bece, 0xf2a7f2ad, 0x2f0a66ed, 0x59240bcf, 0x915f2166, 0x8c4ace02, 0x802d8981, 0xf9fc8b37, 0xa3f70986, 0xced98739, 0xa4fbf240, 0x4249945d, 0xdd51d971); /* #if ELEN == 64 */ - /* VSET(16, e64, m2); */ + /* VSET(16, e64, m8); */ /* scalar = -598189234597999223; */ - /* VLOAD_64(v6, 0x93adc14539897782, 0xa8d5d41c19b1455a, 0x55e01165195d2d9b, + /* VLOAD_64(v24, 0x93adc14539897782, 0xa8d5d41c19b1455a, 0x55e01165195d2d9b, */ /* 0xe81a26a17fef30f2, 0x33a71ede19aec0aa, 0x49598be14c5bc1cd, */ /* 0x1c27bde3f488bfc6, 0x4188f9b8611e5d90, 0xd53289cca28a3b6b, */ /* 0x3b435e1078e3bee9, 0x5e3f4c08c869abf4, 0x3c004920e9c39fb6, */ /* 0x4b42a451b264b153, 0x110a6db11a7c2801, 0x7c0f358ac41d49fa, */ /* 0x6e8c6ae4d14bd1a5); */ - /* VLOAD_64(v2, 0x3b0c6a3a651beecc, 0xb612caa033bc9bca, 0xda94340ac428ca78, + /* VLOAD_64(v8, 0x3b0c6a3a651beecc, 0xb612caa033bc9bca, 0xda94340ac428ca78, */ /* 0xf774b16ef94a22ea, 0x87df3c47c8113e43, 0x38487d57a064f677, */ /* 0x358706b57ce6d6c7, 0xda111b3ac946811c, 0xe9ffed5b39f1ea1d, */ /* 0x3c7e5a675c779870, 0x5d2ea63ac910e42e, 0xb3e832dbe2332203, */ /* 0x05d366b426005f47, 0x00b3b58815a860d8, 0x023bbf8109263e1d, */ /* 0x5fbc2f647d6c1153); */ - /* asm volatile("vnmsac.vx v2, %[A], v6" ::[A] "r"(scalar)); */ - /* VCMP_U64(12, v2, 0x60dd7609c833e03a, 0x79abe3a30a816ca0, + /* asm volatile("vnmsac.vx v8, %[A], v24" ::[A] "r"(scalar)); */ + /* VCMP_U64(12, v8, 0x60dd7609c833e03a, 0x79abe3a30a816ca0, * 0x983ca4c97f204385, */ /* 0x59a9303f04932768, 0x59c29659b4d00149, 0x59a15b1bb66f16c2, */ /* 0x37b0445a8ebaa7d1, 0x65e0c3ab56fa1f0c, 0x94a740971b1d6eda, */ @@ -224,57 +224,57 @@ void TEST_CASE3() { } void TEST_CASE4() { - VSET(16, e8, m2); + VSET(16, e8, m8); int64_t scalar = 5; - VLOAD_8(v6, 0x5e, 0xf5, 0xa9, 0x0b, 0x14, 0x3c, 0x84, 0x22, 0xd7, 0xb6, 0x5c, + VLOAD_8(v24, 0x5e, 0xf5, 0xa9, 0x0b, 0x14, 0x3c, 0x84, 0x22, 0xd7, 0xb6, 0x5c, 0x90, 0xa2, 0x67, 0x3d, 0xf5); - VLOAD_8(v2, 0xfa, 0xd9, 0x2a, 0xe2, 0xe7, 0x1f, 0x8c, 0xbd, 0x40, 0x5d, 0x50, + VLOAD_8(v8, 0xfa, 0xd9, 0x2a, 0xe2, 0xe7, 0x1f, 0x8c, 0xbd, 0x40, 0x5d, 0x50, 0x1f, 0xe0, 0xdd, 0x1f, 0xd7); VLOAD_8(v0, 0xAA, 0xAA); - asm volatile("vnmsac.vx v2, %[A], v6, v0.t" ::[A] "r"(scalar)); - VCMP_U8(13, v2, 0xfa, 0x10, 0x2a, 0xab, 0xe7, 0xf3, 0x8c, 0x13, 0x40, 0xcf, + asm volatile("vnmsac.vx v8, %[A], v24, v0.t" ::[A] "r"(scalar)); + VCMP_U8(13, v8, 0xfa, 0x10, 0x2a, 0xab, 0xe7, 0xf3, 0x8c, 0x13, 0x40, 0xcf, 0x50, 0x4f, 0xe0, 0xda, 0x1f, 0x0e); - VSET(16, e16, m2); + VSET(16, e16, m8); scalar = -5383; - VLOAD_16(v6, 0xfe80, 0x3910, 0x5313, 0xefef, 0xecfc, 0x7983, 0xcc0d, 0x731f, + VLOAD_16(v24, 0xfe80, 0x3910, 0x5313, 0xefef, 0xecfc, 0x7983, 0xcc0d, 0x731f, 0xf384, 0xfde3, 0x9cd1, 0xf20b, 0xa41b, 0x949a, 0x5886, 0xa1a9); - VLOAD_16(v2, 0xe886, 0xf1f9, 0x1857, 0xd0bb, 0x522e, 0x2de0, 0xa6c2, 0xd624, + VLOAD_16(v8, 0xe886, 0xf1f9, 0x1857, 0xd0bb, 0x522e, 0x2de0, 0xa6c2, 0xd624, 0xd024, 0x40a2, 0xdd99, 0xd517, 0xf00a, 0xae8d, 0x79a5, 0x519d); VLOAD_8(v0, 0xAA, 0xAA); - asm volatile("vnmsac.vx v2, %[A], v6, v0.t" ::[A] "r"(scalar)); - VCMP_U16(14, v2, 0xe886, 0xd169, 0x1857, 0xfb44, 0x522e, 0x3f75, 0xa6c2, + asm volatile("vnmsac.vx v8, %[A], v24, v0.t" ::[A] "r"(scalar)); + VCMP_U16(14, v8, 0xe886, 0xd169, 0x1857, 0xfb44, 0x522e, 0x3f75, 0xa6c2, 0x86fd, 0xd024, 0xd0d7, 0xdd99, 0x5a64, 0xf00a, 0x60c3, 0x79a5, 0x9a3c); - VSET(16, e32, m2); + VSET(16, e32, m8); scalar = 6474219; - VLOAD_32(v6, 0x48da7aac, 0x54e98600, 0xf49f26e5, 0x338e8406, 0x40ca82f5, + VLOAD_32(v24, 0x48da7aac, 0x54e98600, 0xf49f26e5, 0x338e8406, 0x40ca82f5, 0x75d0b8f6, 0x38c88af2, 0x96bb5c03, 0x6f61c0a9, 0x3626dd69, 0x31aba619, 0x7d974a55, 0xbc63c280, 0x502334bf, 0x9451b955, 0x6a8bae19); - VLOAD_32(v2, 0xcbd8537e, 0x36928a6c, 0x60cf8444, 0xa19ea650, 0x8f2a8694, + VLOAD_32(v8, 0xcbd8537e, 0x36928a6c, 0x60cf8444, 0xa19ea650, 0x8f2a8694, 0x050956bf, 0x1ef799f5, 0x43c0f327, 0x4bfd5a25, 0x7be439e4, 0xed89a52a, 0x2bbf028d, 0x872392b9, 0x0ad55495, 0x865c7264, 0xfcce4b64); VLOAD_8(v0, 0xAA, 0xAA); - asm volatile("vnmsac.vx v2, %[A], v6, v0.t" ::[A] "r"(scalar)); - VCMP_U32(15, v2, 0xcbd8537e, 0x9dfe886c, 0x60cf8444, 0xddd6bece, 0x8f2a8694, + asm volatile("vnmsac.vx v8, %[A], v24, v0.t" ::[A] "r"(scalar)); + VCMP_U32(15, v8, 0xcbd8537e, 0x9dfe886c, 0x60cf8444, 0xddd6bece, 0x8f2a8694, 0x2f0a66ed, 0x1ef799f5, 0x915f2166, 0x4bfd5a25, 0x802d8981, 0xed89a52a, 0xa3f70986, 0x872392b9, 0xa4fbf240, 0x865c7264, 0xdd51d971); /* #if ELEN == 64 */ - /* VSET(16, e64, m2); */ + /* VSET(16, e64, m8); */ /* scalar = -598189234597999223; */ - /* VLOAD_64(v6, 0x93adc14539897782, 0xa8d5d41c19b1455a, 0x55e01165195d2d9b, + /* VLOAD_64(v24, 0x93adc14539897782, 0xa8d5d41c19b1455a, 0x55e01165195d2d9b, */ /* 0xe81a26a17fef30f2, 0x33a71ede19aec0aa, 0x49598be14c5bc1cd, */ /* 0x1c27bde3f488bfc6, 0x4188f9b8611e5d90, 0xd53289cca28a3b6b, */ /* 0x3b435e1078e3bee9, 0x5e3f4c08c869abf4, 0x3c004920e9c39fb6, */ /* 0x4b42a451b264b153, 0x110a6db11a7c2801, 0x7c0f358ac41d49fa, */ /* 0x6e8c6ae4d14bd1a5); */ - /* VLOAD_64(v2, 0x3b0c6a3a651beecc, 0xb612caa033bc9bca, 0xda94340ac428ca78, + /* VLOAD_64(v8, 0x3b0c6a3a651beecc, 0xb612caa033bc9bca, 0xda94340ac428ca78, */ /* 0xf774b16ef94a22ea, 0x87df3c47c8113e43, 0x38487d57a064f677, */ /* 0x358706b57ce6d6c7, 0xda111b3ac946811c, 0xe9ffed5b39f1ea1d, */ @@ -282,8 +282,8 @@ void TEST_CASE4() { /* 0x05d366b426005f47, 0x00b3b58815a860d8, 0x023bbf8109263e1d, */ /* 0x5fbc2f647d6c1153); */ /* VLOAD_8(v0, 0xAA, 0xAA); */ - /* asm volatile("vnmsac.vx v2, %[A], v6, v0.t" ::[A] "r"(scalar)); */ - /* VCMP_U64(16, v2, 0x3b0c6a3a651beecc, 0x79abe3a30a816ca0, + /* asm volatile("vnmsac.vx v8, %[A], v24, v0.t" ::[A] "r"(scalar)); */ + /* VCMP_U64(16, v8, 0x3b0c6a3a651beecc, 0x79abe3a30a816ca0, * 0xda94340ac428ca78, */ /* 0x59a9303f04932768, 0x87df3c47c8113e43, 0x59a15b1bb66f16c2, */ /* 0x358706b57ce6d6c7, 0x65e0c3ab56fa1f0c, 0xe9ffed5b39f1ea1d, */ diff --git a/sw/riscvTests/isa/rv64uv/vnmsub.c b/sw/riscvTests/isa/rv64uv/vnmsub.c index c0dd0a1f..cd1cbc72 100644 --- a/sw/riscvTests/isa/rv64uv/vnmsub.c +++ b/sw/riscvTests/isa/rv64uv/vnmsub.c @@ -8,70 +8,70 @@ #include "vector_macros.h" void TEST_CASE1() { - VSET(16, e8, m2); - VLOAD_8(v2, 0x41, 0x5b, 0xd0, 0x04, 0xc4, 0x7a, 0x91, 0xd1, 0x7b, 0x09, 0x85, + VSET(16, e8, m8); + VLOAD_8(v8, 0x41, 0x5b, 0xd0, 0x04, 0xc4, 0x7a, 0x91, 0xd1, 0x7b, 0x09, 0x85, 0x59, 0x2b, 0xe3, 0x33, 0xb9); - VLOAD_8(v4, 0xc5, 0x4d, 0xad, 0x35, 0x81, 0x18, 0x48, 0x50, 0xe7, 0x95, 0x7b, + VLOAD_8(v16, 0xc5, 0x4d, 0xad, 0x35, 0x81, 0x18, 0x48, 0x50, 0xe7, 0x95, 0x7b, 0x18, 0xe6, 0x44, 0x57, 0xaf); - VLOAD_8(v6, 0x53, 0x13, 0x2c, 0xd8, 0x4a, 0xc3, 0xa3, 0xd7, 0x7e, 0x1f, 0x4c, + VLOAD_8(v24, 0x53, 0x13, 0x2c, 0xd8, 0x4a, 0xc3, 0xa3, 0xd7, 0x7e, 0x1f, 0x4c, 0x4e, 0x2e, 0x7d, 0x13, 0x5a); - asm volatile("vnmsub.vv v2, v4, v6"); - VCMP_U8(1, v2, 0x4e, 0xb4, 0x9c, 0x04, 0x86, 0x53, 0xdb, 0x87, 0x81, 0xe2, + asm volatile("vnmsub.vv v8, v16, v24"); + VCMP_U8(1, v8, 0x4e, 0xb4, 0x9c, 0x04, 0x86, 0x53, 0xdb, 0x87, 0x81, 0xe2, 0x65, 0xf6, 0x8c, 0x31, 0xbe, 0xe3); - VSET(16, e16, m2); - VLOAD_16(v2, 0x9904, 0x5982, 0xa6cb, 0x73a1, 0x227e, 0xc8f6, 0x3eeb, 0xb010, + VSET(16, e16, m8); + VLOAD_16(v8, 0x9904, 0x5982, 0xa6cb, 0x73a1, 0x227e, 0xc8f6, 0x3eeb, 0xb010, 0x14a1, 0xef2d, 0x3376, 0x371a, 0x4fc8, 0xbcca, 0xccd7, 0x9097); - VLOAD_16(v4, 0xb2dd, 0x9ca4, 0x72fe, 0xecab, 0x9909, 0xe1b0, 0x1769, 0x6759, + VLOAD_16(v16, 0xb2dd, 0x9ca4, 0x72fe, 0xecab, 0x9909, 0xe1b0, 0x1769, 0x6759, 0x9500, 0x3aae, 0x0637, 0xeadc, 0x7523, 0xa53c, 0xecc7, 0xaccc); - VLOAD_16(v6, 0xb917, 0x6a27, 0x0f0c, 0x04a2, 0xe0b6, 0x9fb1, 0x5c69, 0x21e2, + VLOAD_16(v24, 0xb917, 0x6a27, 0x0f0c, 0x04a2, 0xe0b6, 0x9fb1, 0x5c69, 0x21e2, 0x3588, 0x8d19, 0x65d9, 0x6458, 0xfbff, 0xf949, 0x34a4, 0x0710); - asm volatile("vnmsub.vv v2, v4, v6"); - VCMP_U16(2, v2, 0xd8a3, 0xdadf, 0x2ba2, 0x5c17, 0x5c48, 0x4091, 0x7106, + asm volatile("vnmsub.vv v8, v16, v24"); + VCMP_U16(2, v8, 0xd8a3, 0xdadf, 0x2ba2, 0x5c17, 0x5c48, 0x4091, 0x7106, 0x7c52, 0x8088, 0xca83, 0x937f, 0x4600, 0xaba7, 0x87f1, 0xc583, 0x5abc); - VSET(16, e32, m2); - VLOAD_32(v2, 0xe6f4ff60, 0xbf6a38db, 0x30f2ea92, 0x1763619e, 0x815c1c28, + VSET(16, e32, m8); + VLOAD_32(v8, 0xe6f4ff60, 0xbf6a38db, 0x30f2ea92, 0x1763619e, 0x815c1c28, 0x5f1b57db, 0xdb2cdc06, 0xab577f4a, 0x214746ac, 0xd3a08c15, 0x35887ce9, 0x9d5a0f65, 0x76adea2b, 0x91b7f299, 0x6e2977fe, 0xdcbcb1d7); - VLOAD_32(v4, 0x885c8baa, 0xbe200324, 0x9eaa49d7, 0x4e208dde, 0x802bbe9f, + VLOAD_32(v16, 0x885c8baa, 0xbe200324, 0x9eaa49d7, 0x4e208dde, 0x802bbe9f, 0x7633680e, 0xf1a79717, 0xe62e371e, 0x0fc25b48, 0x11067f38, 0xc654ccb4, 0x6702a66c, 0x7a0b229d, 0x25c2b688, 0x82b68b3d, 0x4224aa5e); - VLOAD_32(v6, 0x3a582428, 0x61c55f94, 0xb445799b, 0xcca5a657, 0x51a7fe9e, + VLOAD_32(v24, 0x3a582428, 0x61c55f94, 0xb445799b, 0xcca5a657, 0x51a7fe9e, 0x0840b4f8, 0xfb0a701b, 0x1b5361d7, 0xd10c9064, 0xa899d63d, 0xbb1779fd, 0x1b35390c, 0xd04c0f6c, 0xd8c9db62, 0x90a09dc8, 0x463438b4); - asm volatile("vnmsub.vv v2, v4, v6"); - VCMP_U32(3, v2, 0x0e7d6e68, 0x92cacfc8, 0x726ad6fd, 0x6ff2f953, 0xe1d4d1c6, + asm volatile("vnmsub.vv v8, v16, v24"); + VCMP_U32(3, v8, 0x0e7d6e68, 0x92cacfc8, 0x726ad6fd, 0x6ff2f953, 0xe1d4d1c6, 0x466feefe, 0xa6512191, 0xdf6d912b, 0x7c898c04, 0x0a7ec6a5, 0x2121fa29, 0x91713c70, 0x60b0bd0d, 0xb3a6341a, 0x82041c42, 0x7a9625c2); #if ELEN == 64 - VSET(16, e64, m2); - VLOAD_64(v2, 0xb8d79a755b98580a, 0xceefb8be6deb3a3d, 0x670688aed7c97cdd, + VSET(16, e64, m8); + VLOAD_64(v8, 0xb8d79a755b98580a, 0xceefb8be6deb3a3d, 0x670688aed7c97cdd, 0x0ced1db5e1b7da8f, 0x58993c2ae4a62e89, 0x864439a0768dce1b, 0x7882d6539128d119, 0xfe6469348911945b, 0x6da189493780c328, 0xf8c4931b61dc54dc, 0xd5ac914ccbf735f0, 0xba0a5bf3b2b528d1, 0x74d814e6ebcebe81, 0xfc44af3a74cfee8c, 0xea0cb63d1bf7d5dc, 0xbe98c841d80bd077); - VLOAD_64(v4, 0x5517b401c8fbd5ec, 0x021c52c329edd200, 0xff61d899305d1423, + VLOAD_64(v16, 0x5517b401c8fbd5ec, 0x021c52c329edd200, 0xff61d899305d1423, 0xd886035fdcfe3186, 0x694b857cb1cc3bc3, 0x52376b9a996e1925, 0x5785c45d41ec230d, 0x950f08c23f6da73d, 0xe5dc4e9a35834719, 0x9a08d0e965e96a19, 0xb80c2107151bdcf6, 0xd0612e4d4bc314b7, 0xdfb23a142b750482, 0xedc5e4b79881bdaf, 0x72c493d9df55bf13, 0xfd4b1328b8f7773a); - VLOAD_64(v6, 0xc265b2d19ad92bbb, 0x81a4ef527fc2e042, 0xe490f5981f64a313, + VLOAD_64(v24, 0xc265b2d19ad92bbb, 0x81a4ef527fc2e042, 0xe490f5981f64a313, 0xf12edb410132b013, 0xc475df4b52276fe9, 0x069e283bf74ca195, 0x8dd5189f3a66f166, 0x297726422e620380, 0x7b74d167bd1b22fd, 0x08e88e9642656a52, 0x0ab0c3f0f7ddeb66, 0x00b155918c8646c0, 0x84d4df4b2a3768c7, 0xc31234734867ae09, 0x79320b8da693a84e, 0x30727b2d1bccd396); - asm volatile("vnmsub.vv v2, v4, v6"); - VCMP_U64(4, v2, 0x231a763b4759b083, 0x68b16397da83d642, 0x7081592414ce4cdc, + asm volatile("vnmsub.vv v8, v16, v24"); + VCMP_U64(4, v8, 0x231a763b4759b083, 0x68b16397da83d642, 0x7081592414ce4cdc, 0x084dc189ec3eea39, 0x72cb55f70cac6a8e, 0xc713321c491334ae, 0xa21580bb2ab1e821, 0x3d84da5e7dab4cd1, 0x735a758175effc15, 0x91e8df24708208d6, 0x5133f0cd25a5d6c6, 0x2f1a889653a2c559, @@ -81,74 +81,74 @@ void TEST_CASE1() { } void TEST_CASE2() { - VSET(16, e8, m2); - VLOAD_8(v2, 0x41, 0x5b, 0xd0, 0x04, 0xc4, 0x7a, 0x91, 0xd1, 0x7b, 0x09, 0x85, + VSET(16, e8, m8); + VLOAD_8(v8, 0x41, 0x5b, 0xd0, 0x04, 0xc4, 0x7a, 0x91, 0xd1, 0x7b, 0x09, 0x85, 0x59, 0x2b, 0xe3, 0x33, 0xb9); - VLOAD_8(v4, 0xc5, 0x4d, 0xad, 0x35, 0x81, 0x18, 0x48, 0x50, 0xe7, 0x95, 0x7b, + VLOAD_8(v16, 0xc5, 0x4d, 0xad, 0x35, 0x81, 0x18, 0x48, 0x50, 0xe7, 0x95, 0x7b, 0x18, 0xe6, 0x44, 0x57, 0xaf); - VLOAD_8(v6, 0x53, 0x13, 0x2c, 0xd8, 0x4a, 0xc3, 0xa3, 0xd7, 0x7e, 0x1f, 0x4c, + VLOAD_8(v24, 0x53, 0x13, 0x2c, 0xd8, 0x4a, 0xc3, 0xa3, 0xd7, 0x7e, 0x1f, 0x4c, 0x4e, 0x2e, 0x7d, 0x13, 0x5a); VLOAD_8(v0, 0xAA, 0xAA); - asm volatile("vnmsub.vv v2, v4, v6, v0.t"); - VCMP_U8(5, v2, 0x41, 0xb4, 0xd0, 0x04, 0xc4, 0x53, 0x91, 0x87, 0x7b, 0xe2, + asm volatile("vnmsub.vv v8, v16, v24, v0.t"); + VCMP_U8(5, v8, 0x41, 0xb4, 0xd0, 0x04, 0xc4, 0x53, 0x91, 0x87, 0x7b, 0xe2, 0x85, 0xf6, 0x2b, 0x31, 0x33, 0xe3); - VSET(16, e16, m2); - VLOAD_16(v2, 0x9904, 0x5982, 0xa6cb, 0x73a1, 0x227e, 0xc8f6, 0x3eeb, 0xb010, + VSET(16, e16, m8); + VLOAD_16(v8, 0x9904, 0x5982, 0xa6cb, 0x73a1, 0x227e, 0xc8f6, 0x3eeb, 0xb010, 0x14a1, 0xef2d, 0x3376, 0x371a, 0x4fc8, 0xbcca, 0xccd7, 0x9097); - VLOAD_16(v4, 0xb2dd, 0x9ca4, 0x72fe, 0xecab, 0x9909, 0xe1b0, 0x1769, 0x6759, + VLOAD_16(v16, 0xb2dd, 0x9ca4, 0x72fe, 0xecab, 0x9909, 0xe1b0, 0x1769, 0x6759, 0x9500, 0x3aae, 0x0637, 0xeadc, 0x7523, 0xa53c, 0xecc7, 0xaccc); - VLOAD_16(v6, 0xb917, 0x6a27, 0x0f0c, 0x04a2, 0xe0b6, 0x9fb1, 0x5c69, 0x21e2, + VLOAD_16(v24, 0xb917, 0x6a27, 0x0f0c, 0x04a2, 0xe0b6, 0x9fb1, 0x5c69, 0x21e2, 0x3588, 0x8d19, 0x65d9, 0x6458, 0xfbff, 0xf949, 0x34a4, 0x0710); VLOAD_8(v0, 0xAA, 0xAA); - asm volatile("vnmsub.vv v2, v4, v6, v0.t"); - VCMP_U16(6, v2, 0x9904, 0xdadf, 0xa6cb, 0x5c17, 0x227e, 0x4091, 0x3eeb, + asm volatile("vnmsub.vv v8, v16, v24, v0.t"); + VCMP_U16(6, v8, 0x9904, 0xdadf, 0xa6cb, 0x5c17, 0x227e, 0x4091, 0x3eeb, 0x7c52, 0x14a1, 0xca83, 0x3376, 0x4600, 0x4fc8, 0x87f1, 0xccd7, 0x5abc); - VSET(16, e32, m2); - VLOAD_32(v2, 0xe6f4ff60, 0xbf6a38db, 0x30f2ea92, 0x1763619e, 0x815c1c28, + VSET(16, e32, m8); + VLOAD_32(v8, 0xe6f4ff60, 0xbf6a38db, 0x30f2ea92, 0x1763619e, 0x815c1c28, 0x5f1b57db, 0xdb2cdc06, 0xab577f4a, 0x214746ac, 0xd3a08c15, 0x35887ce9, 0x9d5a0f65, 0x76adea2b, 0x91b7f299, 0x6e2977fe, 0xdcbcb1d7); - VLOAD_32(v4, 0x885c8baa, 0xbe200324, 0x9eaa49d7, 0x4e208dde, 0x802bbe9f, + VLOAD_32(v16, 0x885c8baa, 0xbe200324, 0x9eaa49d7, 0x4e208dde, 0x802bbe9f, 0x7633680e, 0xf1a79717, 0xe62e371e, 0x0fc25b48, 0x11067f38, 0xc654ccb4, 0x6702a66c, 0x7a0b229d, 0x25c2b688, 0x82b68b3d, 0x4224aa5e); - VLOAD_32(v6, 0x3a582428, 0x61c55f94, 0xb445799b, 0xcca5a657, 0x51a7fe9e, + VLOAD_32(v24, 0x3a582428, 0x61c55f94, 0xb445799b, 0xcca5a657, 0x51a7fe9e, 0x0840b4f8, 0xfb0a701b, 0x1b5361d7, 0xd10c9064, 0xa899d63d, 0xbb1779fd, 0x1b35390c, 0xd04c0f6c, 0xd8c9db62, 0x90a09dc8, 0x463438b4); VLOAD_8(v0, 0xAA, 0xAA); - asm volatile("vnmsub.vv v2, v4, v6, v0.t"); - VCMP_U32(7, v2, 0xe6f4ff60, 0x92cacfc8, 0x30f2ea92, 0x6ff2f953, 0x815c1c28, + asm volatile("vnmsub.vv v8, v16, v24, v0.t"); + VCMP_U32(7, v8, 0xe6f4ff60, 0x92cacfc8, 0x30f2ea92, 0x6ff2f953, 0x815c1c28, 0x466feefe, 0xdb2cdc06, 0xdf6d912b, 0x214746ac, 0x0a7ec6a5, 0x35887ce9, 0x91713c70, 0x76adea2b, 0xb3a6341a, 0x6e2977fe, 0x7a9625c2); #if ELEN == 64 - VSET(16, e64, m2); - VLOAD_64(v2, 0xb8d79a755b98580a, 0xceefb8be6deb3a3d, 0x670688aed7c97cdd, + VSET(16, e64, m8); + VLOAD_64(v8, 0xb8d79a755b98580a, 0xceefb8be6deb3a3d, 0x670688aed7c97cdd, 0x0ced1db5e1b7da8f, 0x58993c2ae4a62e89, 0x864439a0768dce1b, 0x7882d6539128d119, 0xfe6469348911945b, 0x6da189493780c328, 0xf8c4931b61dc54dc, 0xd5ac914ccbf735f0, 0xba0a5bf3b2b528d1, 0x74d814e6ebcebe81, 0xfc44af3a74cfee8c, 0xea0cb63d1bf7d5dc, 0xbe98c841d80bd077); - VLOAD_64(v4, 0x5517b401c8fbd5ec, 0x021c52c329edd200, 0xff61d899305d1423, + VLOAD_64(v16, 0x5517b401c8fbd5ec, 0x021c52c329edd200, 0xff61d899305d1423, 0xd886035fdcfe3186, 0x694b857cb1cc3bc3, 0x52376b9a996e1925, 0x5785c45d41ec230d, 0x950f08c23f6da73d, 0xe5dc4e9a35834719, 0x9a08d0e965e96a19, 0xb80c2107151bdcf6, 0xd0612e4d4bc314b7, 0xdfb23a142b750482, 0xedc5e4b79881bdaf, 0x72c493d9df55bf13, 0xfd4b1328b8f7773a); - VLOAD_64(v6, 0xc265b2d19ad92bbb, 0x81a4ef527fc2e042, 0xe490f5981f64a313, + VLOAD_64(v24, 0xc265b2d19ad92bbb, 0x81a4ef527fc2e042, 0xe490f5981f64a313, 0xf12edb410132b013, 0xc475df4b52276fe9, 0x069e283bf74ca195, 0x8dd5189f3a66f166, 0x297726422e620380, 0x7b74d167bd1b22fd, 0x08e88e9642656a52, 0x0ab0c3f0f7ddeb66, 0x00b155918c8646c0, 0x84d4df4b2a3768c7, 0xc31234734867ae09, 0x79320b8da693a84e, 0x30727b2d1bccd396); VLOAD_8(v0, 0xAA, 0xAA); - asm volatile("vnmsub.vv v2, v4, v6, v0.t"); - VCMP_U64(8, v2, 0xb8d79a755b98580a, 0x68b16397da83d642, 0x670688aed7c97cdd, + asm volatile("vnmsub.vv v8, v16, v24, v0.t"); + VCMP_U64(8, v8, 0xb8d79a755b98580a, 0x68b16397da83d642, 0x670688aed7c97cdd, 0x084dc189ec3eea39, 0x58993c2ae4a62e89, 0xc713321c491334ae, 0x7882d6539128d119, 0x3d84da5e7dab4cd1, 0x6da189493780c328, 0x91e8df24708208d6, 0xd5ac914ccbf735f0, 0x2f1a889653a2c559, @@ -158,62 +158,62 @@ void TEST_CASE2() { } void TEST_CASE3() { - VSET(16, e8, m2); + VSET(16, e8, m8); int64_t scalar = 5; - VLOAD_8(v2, 0x5e, 0xf5, 0xa9, 0x0b, 0x14, 0x3c, 0x84, 0x22, 0xd7, 0xb6, 0x5c, + VLOAD_8(v8, 0x5e, 0xf5, 0xa9, 0x0b, 0x14, 0x3c, 0x84, 0x22, 0xd7, 0xb6, 0x5c, 0x90, 0xa2, 0x67, 0x3d, 0xf5); - VLOAD_8(v6, 0xfa, 0xd9, 0x2a, 0xe2, 0xe7, 0x1f, 0x8c, 0xbd, 0x40, 0x5d, 0x50, + VLOAD_8(v24, 0xfa, 0xd9, 0x2a, 0xe2, 0xe7, 0x1f, 0x8c, 0xbd, 0x40, 0x5d, 0x50, 0x1f, 0xe0, 0xdd, 0x1f, 0xd7); - asm volatile("vnmsub.vx v2, %[A], v6" ::[A] "r"(scalar)); - VCMP_U8(9, v2, 0x24, 0x10, 0xdd, 0xab, 0x83, 0xf3, 0xf8, 0x13, 0x0d, 0xcf, + asm volatile("vnmsub.vx v8, %[A], v24" ::[A] "r"(scalar)); + VCMP_U8(9, v8, 0x24, 0x10, 0xdd, 0xab, 0x83, 0xf3, 0xf8, 0x13, 0x0d, 0xcf, 0x84, 0x4f, 0xb6, 0xda, 0xee, 0x0e); - VSET(16, e16, m2); + VSET(16, e16, m8); scalar = -5383; - VLOAD_16(v2, 0xfe80, 0x3910, 0x5313, 0xefef, 0xecfc, 0x7983, 0xcc0d, 0x731f, + VLOAD_16(v8, 0xfe80, 0x3910, 0x5313, 0xefef, 0xecfc, 0x7983, 0xcc0d, 0x731f, 0xf384, 0xfde3, 0x9cd1, 0xf20b, 0xa41b, 0x949a, 0x5886, 0xa1a9); - VLOAD_16(v6, 0xe886, 0xf1f9, 0x1857, 0xd0bb, 0x522e, 0x2de0, 0xa6c2, 0xd624, + VLOAD_16(v24, 0xe886, 0xf1f9, 0x1857, 0xd0bb, 0x522e, 0x2de0, 0xa6c2, 0xd624, 0xd024, 0x40a2, 0xdd99, 0xd517, 0xf00a, 0xae8d, 0x79a5, 0x519d); - asm volatile("vnmsub.vx v2, %[A], v6" ::[A] "r"(scalar)); - VCMP_U16(10, v2, 0x5e06, 0xd169, 0xecdc, 0xfb44, 0x7912, 0x3f75, 0x4c1d, + asm volatile("vnmsub.vx v8, %[A], v24" ::[A] "r"(scalar)); + VCMP_U16(10, v8, 0x5e06, 0xd169, 0xecdc, 0xfb44, 0x7912, 0x3f75, 0x4c1d, 0x86fd, 0x4cc0, 0xd0d7, 0x4c50, 0x5a64, 0xa3c7, 0x60c3, 0xe34f, 0x9a3c); - VSET(16, e32, m2); + VSET(16, e32, m8); scalar = 6474219; - VLOAD_32(v2, 0x48da7aac, 0x54e98600, 0xf49f26e5, 0x338e8406, 0x40ca82f5, + VLOAD_32(v8, 0x48da7aac, 0x54e98600, 0xf49f26e5, 0x338e8406, 0x40ca82f5, 0x75d0b8f6, 0x38c88af2, 0x96bb5c03, 0x6f61c0a9, 0x3626dd69, 0x31aba619, 0x7d974a55, 0xbc63c280, 0x502334bf, 0x9451b955, 0x6a8bae19); - VLOAD_32(v6, 0xcbd8537e, 0x36928a6c, 0x60cf8444, 0xa19ea650, 0x8f2a8694, + VLOAD_32(v24, 0xcbd8537e, 0x36928a6c, 0x60cf8444, 0xa19ea650, 0x8f2a8694, 0x050956bf, 0x1ef799f5, 0x43c0f327, 0x4bfd5a25, 0x7be439e4, 0xed89a52a, 0x2bbf028d, 0x872392b9, 0x0ad55495, 0x865c7264, 0xfcce4b64); - asm volatile("vnmsub.vx v2, %[A], v6" ::[A] "r"(scalar)); - VCMP_U32(11, v2, 0x6b20ab9a, 0x9dfe886c, 0xf983030d, 0xddd6bece, 0xf2a7f2ad, + asm volatile("vnmsub.vx v8, %[A], v24" ::[A] "r"(scalar)); + VCMP_U32(11, v8, 0x6b20ab9a, 0x9dfe886c, 0xf983030d, 0xddd6bece, 0xf2a7f2ad, 0x2f0a66ed, 0x59240bcf, 0x915f2166, 0x8c4ace02, 0x802d8981, 0xf9fc8b37, 0xa3f70986, 0xced98739, 0xa4fbf240, 0x4249945d, 0xdd51d971); /* #if ELEN == 64 */ - /* VSET(16, e64, m2); */ + /* VSET(16, e64, m8); */ /* scalar = -598189234597999223; */ - /* VLOAD_64(v2, 0x93adc14539897782, 0xa8d5d41c19b1455a, 0x55e01165195d2d9b, + /* VLOAD_64(v8, 0x93adc14539897782, 0xa8d5d41c19b1455a, 0x55e01165195d2d9b, */ /* 0xe81a26a17fef30f2, 0x33a71ede19aec0aa, 0x49598be14c5bc1cd, */ /* 0x1c27bde3f488bfc6, 0x4188f9b8611e5d90, 0xd53289cca28a3b6b, */ /* 0x3b435e1078e3bee9, 0x5e3f4c08c869abf4, 0x3c004920e9c39fb6, */ /* 0x4b42a451b264b153, 0x110a6db11a7c2801, 0x7c0f358ac41d49fa, */ /* 0x6e8c6ae4d14bd1a5); */ - /* VLOAD_64(v6, 0x3b0c6a3a651beecc, 0xb612caa033bc9bca, 0xda94340ac428ca78, + /* VLOAD_64(v24, 0x3b0c6a3a651beecc, 0xb612caa033bc9bca, 0xda94340ac428ca78, */ /* 0xf774b16ef94a22ea, 0x87df3c47c8113e43, 0x38487d57a064f677, */ /* 0x358706b57ce6d6c7, 0xda111b3ac946811c, 0xe9ffed5b39f1ea1d, */ /* 0x3c7e5a675c779870, 0x5d2ea63ac910e42e, 0xb3e832dbe2332203, */ /* 0x05d366b426005f47, 0x00b3b58815a860d8, 0x023bbf8109263e1d, */ /* 0x5fbc2f647d6c1153); */ - /* asm volatile("vnmsub.vx v2, %[A], v6" ::[A] "r"(scalar)); */ - /* VCMP_U64(12, v2, 0x60dd7609c833e03a, 0x79abe3a30a816ca0, + /* asm volatile("vnmsub.vx v8, %[A], v24" ::[A] "r"(scalar)); */ + /* VCMP_U64(12, v8, 0x60dd7609c833e03a, 0x79abe3a30a816ca0, * 0x983ca4c97f204385, */ /* 0x59a9303f04932768, 0x59c29659b4d00149, 0x59a15b1bb66f16c2, */ /* 0x37b0445a8ebaa7d1, 0x65e0c3ab56fa1f0c, 0x94a740971b1d6eda, */ @@ -224,57 +224,57 @@ void TEST_CASE3() { } void TEST_CASE4() { - VSET(16, e8, m2); + VSET(16, e8, m8); int64_t scalar = 5; - VLOAD_8(v2, 0x5e, 0xf5, 0xa9, 0x0b, 0x14, 0x3c, 0x84, 0x22, 0xd7, 0xb6, 0x5c, + VLOAD_8(v8, 0x5e, 0xf5, 0xa9, 0x0b, 0x14, 0x3c, 0x84, 0x22, 0xd7, 0xb6, 0x5c, 0x90, 0xa2, 0x67, 0x3d, 0xf5); - VLOAD_8(v6, 0xfa, 0xd9, 0x2a, 0xe2, 0xe7, 0x1f, 0x8c, 0xbd, 0x40, 0x5d, 0x50, + VLOAD_8(v24, 0xfa, 0xd9, 0x2a, 0xe2, 0xe7, 0x1f, 0x8c, 0xbd, 0x40, 0x5d, 0x50, 0x1f, 0xe0, 0xdd, 0x1f, 0xd7); VLOAD_8(v0, 0xAA, 0xAA); - asm volatile("vnmsub.vx v2, %[A], v6, v0.t" ::[A] "r"(scalar)); - VCMP_U8(13, v2, 0x5e, 0x10, 0xa9, 0xab, 0x14, 0xf3, 0x84, 0x13, 0xd7, 0xcf, + asm volatile("vnmsub.vx v8, %[A], v24, v0.t" ::[A] "r"(scalar)); + VCMP_U8(13, v8, 0x5e, 0x10, 0xa9, 0xab, 0x14, 0xf3, 0x84, 0x13, 0xd7, 0xcf, 0x5c, 0x4f, 0xa2, 0xda, 0x3d, 0x0e); - VSET(16, e16, m2); + VSET(16, e16, m8); scalar = -5383; - VLOAD_16(v2, 0xfe80, 0x3910, 0x5313, 0xefef, 0xecfc, 0x7983, 0xcc0d, 0x731f, + VLOAD_16(v8, 0xfe80, 0x3910, 0x5313, 0xefef, 0xecfc, 0x7983, 0xcc0d, 0x731f, 0xf384, 0xfde3, 0x9cd1, 0xf20b, 0xa41b, 0x949a, 0x5886, 0xa1a9); - VLOAD_16(v6, 0xe886, 0xf1f9, 0x1857, 0xd0bb, 0x522e, 0x2de0, 0xa6c2, 0xd624, + VLOAD_16(v24, 0xe886, 0xf1f9, 0x1857, 0xd0bb, 0x522e, 0x2de0, 0xa6c2, 0xd624, 0xd024, 0x40a2, 0xdd99, 0xd517, 0xf00a, 0xae8d, 0x79a5, 0x519d); VLOAD_8(v0, 0xAA, 0xAA); - asm volatile("vnmsub.vx v2, %[A], v6, v0.t" ::[A] "r"(scalar)); - VCMP_U16(14, v2, 0xfe80, 0xd169, 0x5313, 0xfb44, 0xecfc, 0x3f75, 0xcc0d, + asm volatile("vnmsub.vx v8, %[A], v24, v0.t" ::[A] "r"(scalar)); + VCMP_U16(14, v8, 0xfe80, 0xd169, 0x5313, 0xfb44, 0xecfc, 0x3f75, 0xcc0d, 0x86fd, 0xf384, 0xd0d7, 0x9cd1, 0x5a64, 0xa41b, 0x60c3, 0x5886, 0x9a3c); - VSET(16, e32, m2); + VSET(16, e32, m8); scalar = 6474219; - VLOAD_32(v2, 0x48da7aac, 0x54e98600, 0xf49f26e5, 0x338e8406, 0x40ca82f5, + VLOAD_32(v8, 0x48da7aac, 0x54e98600, 0xf49f26e5, 0x338e8406, 0x40ca82f5, 0x75d0b8f6, 0x38c88af2, 0x96bb5c03, 0x6f61c0a9, 0x3626dd69, 0x31aba619, 0x7d974a55, 0xbc63c280, 0x502334bf, 0x9451b955, 0x6a8bae19); - VLOAD_32(v6, 0xcbd8537e, 0x36928a6c, 0x60cf8444, 0xa19ea650, 0x8f2a8694, + VLOAD_32(v24, 0xcbd8537e, 0x36928a6c, 0x60cf8444, 0xa19ea650, 0x8f2a8694, 0x050956bf, 0x1ef799f5, 0x43c0f327, 0x4bfd5a25, 0x7be439e4, 0xed89a52a, 0x2bbf028d, 0x872392b9, 0x0ad55495, 0x865c7264, 0xfcce4b64); VLOAD_8(v0, 0xAA, 0xAA); - asm volatile("vnmsub.vx v2, %[A], v6, v0.t" ::[A] "r"(scalar)); - VCMP_U32(15, v2, 0x48da7aac, 0x9dfe886c, 0xf49f26e5, 0xddd6bece, 0x40ca82f5, + asm volatile("vnmsub.vx v8, %[A], v24, v0.t" ::[A] "r"(scalar)); + VCMP_U32(15, v8, 0x48da7aac, 0x9dfe886c, 0xf49f26e5, 0xddd6bece, 0x40ca82f5, 0x2f0a66ed, 0x38c88af2, 0x915f2166, 0x6f61c0a9, 0x802d8981, 0x31aba619, 0xa3f70986, 0xbc63c280, 0xa4fbf240, 0x9451b955, 0xdd51d971); /* #if ELEN == 64 */ - /* VSET(16, e64, m2); */ + /* VSET(16, e64, m8); */ /* scalar = -598189234597999223; */ - /* VLOAD_64(v2, 0x93adc14539897782, 0xa8d5d41c19b1455a, 0x55e01165195d2d9b, + /* VLOAD_64(v8, 0x93adc14539897782, 0xa8d5d41c19b1455a, 0x55e01165195d2d9b, */ /* 0xe81a26a17fef30f2, 0x33a71ede19aec0aa, 0x49598be14c5bc1cd, */ /* 0x1c27bde3f488bfc6, 0x4188f9b8611e5d90, 0xd53289cca28a3b6b, */ /* 0x3b435e1078e3bee9, 0x5e3f4c08c869abf4, 0x3c004920e9c39fb6, */ /* 0x4b42a451b264b153, 0x110a6db11a7c2801, 0x7c0f358ac41d49fa, */ /* 0x6e8c6ae4d14bd1a5); */ - /* VLOAD_64(v6, 0x3b0c6a3a651beecc, 0xb612caa033bc9bca, 0xda94340ac428ca78, + /* VLOAD_64(v24, 0x3b0c6a3a651beecc, 0xb612caa033bc9bca, 0xda94340ac428ca78, */ /* 0xf774b16ef94a22ea, 0x87df3c47c8113e43, 0x38487d57a064f677, */ /* 0x358706b57ce6d6c7, 0xda111b3ac946811c, 0xe9ffed5b39f1ea1d, */ @@ -282,8 +282,8 @@ void TEST_CASE4() { /* 0x05d366b426005f47, 0x00b3b58815a860d8, 0x023bbf8109263e1d, */ /* 0x5fbc2f647d6c1153); */ /* VLOAD_8(v0, 0xAA, 0xAA); */ - /* asm volatile("vnmsub.vx v2, %[A], v6, v0.t" ::[A] "r"(scalar)); */ - /* VCMP_U64(16, v2, 0x93adc14539897782, 0x79abe3a30a816ca0, + /* asm volatile("vnmsub.vx v8, %[A], v24, v0.t" ::[A] "r"(scalar)); */ + /* VCMP_U64(16, v8, 0x93adc14539897782, 0x79abe3a30a816ca0, * 0x55e01165195d2d9b, */ /* 0x59a9303f04932768, 0x33a71ede19aec0aa, 0x59a15b1bb66f16c2, */ /* 0x1c27bde3f488bfc6, 0x65e0c3ab56fa1f0c, 0xd53289cca28a3b6b, */ diff --git a/sw/riscvTests/isa/rv64uv/vor.c b/sw/riscvTests/isa/rv64uv/vor.c index b1a5b12a..be95164e 100644 --- a/sw/riscvTests/isa/rv64uv/vor.c +++ b/sw/riscvTests/isa/rv64uv/vor.c @@ -8,48 +8,48 @@ #include "vector_macros.h" void TEST_CASE1() { - VSET(12, e8, m2); - VLOAD_8(v4, 0xff, 0x01, 0xf0, 0xff, 0x01, 0xf0, 0xff, 0x01, 0xf0, 0xff, 0x01, + VSET(12, e8, m8); + VLOAD_8(v16, 0xff, 0x01, 0xf0, 0xff, 0x01, 0xf0, 0xff, 0x01, 0xf0, 0xff, 0x01, 0xf0); - VLOAD_8(v6, 0xf0, 0x03, 0xf0, 0xf0, 0x03, 0xf0, 0xf0, 0x03, 0xf0, 0xf0, 0x03, + VLOAD_8(v24, 0xf0, 0x03, 0xf0, 0xf0, 0x03, 0xf0, 0xf0, 0x03, 0xf0, 0xf0, 0x03, 0xf0); - asm volatile("vor.vv v2, v4, v6"); - VCMP_U8(1, v2, 0xff, 0x03, 0xf0, 0xff, 0x03, 0xf0, 0xff, 0x03, 0xf0, 0xff, + asm volatile("vor.vv v8, v16, v24"); + VCMP_U8(1, v8, 0xff, 0x03, 0xf0, 0xff, 0x03, 0xf0, 0xff, 0x03, 0xf0, 0xff, 0x03, 0xf0); - VSET(12, e16, m2); - VLOAD_16(v4, 0xffff, 0x0001, 0xf0f0, 0xffff, 0x0001, 0xf0f0, 0xffff, 0x0001, + VSET(12, e16, m8); + VLOAD_16(v16, 0xffff, 0x0001, 0xf0f0, 0xffff, 0x0001, 0xf0f0, 0xffff, 0x0001, 0xf0f0, 0xffff, 0x0001, 0xf0f0); - VLOAD_16(v6, 0xff00, 0x0003, 0xf0f0, 0xff00, 0x0003, 0xf0f0, 0xff00, 0x0003, + VLOAD_16(v24, 0xff00, 0x0003, 0xf0f0, 0xff00, 0x0003, 0xf0f0, 0xff00, 0x0003, 0xf0f0, 0xff00, 0x0003, 0xf0f0); - asm volatile("vor.vv v2, v4, v6"); - VCMP_U16(2, v2, 0xffff, 0x0003, 0xf0f0, 0xffff, 0x0003, 0xf0f0, 0xffff, + asm volatile("vor.vv v8, v16, v24"); + VCMP_U16(2, v8, 0xffff, 0x0003, 0xf0f0, 0xffff, 0x0003, 0xf0f0, 0xffff, 0x0003, 0xf0f0, 0xffff, 0x0003, 0xf0f0); - VSET(12, e32, m2); - VLOAD_32(v4, 0xffffffff, 0x00000001, 0xf0f0f0f0, 0xffffffff, 0x00000001, + VSET(12, e32, m8); + VLOAD_32(v16, 0xffffffff, 0x00000001, 0xf0f0f0f0, 0xffffffff, 0x00000001, 0xf0f0f0f0, 0xffffffff, 0x00000001, 0xf0f0f0f0, 0xffffffff, 0x00000001, 0xf0f0f0f0); - VLOAD_32(v6, 0xffff0000, 0x00000003, 0xf0f0f0f0, 0xffff0000, 0x00000003, + VLOAD_32(v24, 0xffff0000, 0x00000003, 0xf0f0f0f0, 0xffff0000, 0x00000003, 0xf0f0f0f0, 0xffff0000, 0x00000003, 0xf0f0f0f0, 0xffff0000, 0x00000003, 0xf0f0f0f0); - asm volatile("vor.vv v2, v4, v6"); - VCMP_U32(3, v2, 0xffffffff, 0x00000003, 0xf0f0f0f0, 0xffffffff, 0x00000003, + asm volatile("vor.vv v8, v16, v24"); + VCMP_U32(3, v8, 0xffffffff, 0x00000003, 0xf0f0f0f0, 0xffffffff, 0x00000003, 0xf0f0f0f0, 0xffffffff, 0x00000003, 0xf0f0f0f0, 0xffffffff, 0x00000003, 0xf0f0f0f0); #if ELEN == 64 - VSET(12, e64, m2); - VLOAD_64(v4, 0xffffffffffffffff, 0x0000000000000001, 0xf0f0f0f0f0f0f0f0, + VSET(12, e64, m8); + VLOAD_64(v16, 0xffffffffffffffff, 0x0000000000000001, 0xf0f0f0f0f0f0f0f0, 0xffffffffffffffff, 0x0000000000000001, 0xf0f0f0f0f0f0f0f0, 0xffffffffffffffff, 0x0000000000000001, 0xf0f0f0f0f0f0f0f0, 0xffffffffffffffff, 0x0000000000000001, 0xf0f0f0f0f0f0f0f0); - VLOAD_64(v6, 0xffffffff00000000, 0x0000000000000003, 0xf0f0f0f0f0f0f0f0, + VLOAD_64(v24, 0xffffffff00000000, 0x0000000000000003, 0xf0f0f0f0f0f0f0f0, 0xffffffff00000000, 0x0000000000000003, 0xf0f0f0f0f0f0f0f0, 0xffffffff00000000, 0x0000000000000003, 0xf0f0f0f0f0f0f0f0, 0xffffffff00000000, 0x0000000000000003, 0xf0f0f0f0f0f0f0f0); - asm volatile("vor.vv v2, v4, v6"); - VCMP_U64(4, v2, 0xffffffffffffffff, 0x0000000000000003, 0xf0f0f0f0f0f0f0f0, + asm volatile("vor.vv v8, v16, v24"); + VCMP_U64(4, v8, 0xffffffffffffffff, 0x0000000000000003, 0xf0f0f0f0f0f0f0f0, 0xffffffffffffffff, 0x0000000000000003, 0xf0f0f0f0f0f0f0f0, 0xffffffffffffffff, 0x0000000000000003, 0xf0f0f0f0f0f0f0f0, 0xffffffffffffffff, 0x0000000000000003, 0xf0f0f0f0f0f0f0f0); @@ -57,63 +57,63 @@ void TEST_CASE1() { } void TEST_CASE2() { - VSET(12, e8, m2); - VLOAD_8(v4, 0xff, 0x01, 0xf0, 0xff, 0x01, 0xf0, 0xff, 0x01, 0xf0, 0xff, 0x01, + VSET(12, e8, m8); + VLOAD_8(v16, 0xff, 0x01, 0xf0, 0xff, 0x01, 0xf0, 0xff, 0x01, 0xf0, 0xff, 0x01, 0xf0); - VLOAD_8(v6, 0xf0, 0x03, 0xf0, 0xf0, 0x03, 0xf0, 0xf0, 0x03, 0xf0, 0xf0, 0x03, + VLOAD_8(v24, 0xf0, 0x03, 0xf0, 0xf0, 0x03, 0xf0, 0xf0, 0x03, 0xf0, 0xf0, 0x03, 0xf0); VLOAD_8(v0, 0x6D, 0x0B); - VLOAD_8(v2, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, + VLOAD_8(v8, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef); - asm volatile("vor.vv v2, v4, v6, v0.t"); - VCMP_U8(5, v2, 0xff, 0xef, 0xf0, 0xff, 0xef, 0xf0, 0xff, 0xef, 0xf0, 0xff, + asm volatile("vor.vv v8, v16, v24, v0.t"); + VCMP_U8(5, v8, 0xff, 0xef, 0xf0, 0xff, 0xef, 0xf0, 0xff, 0xef, 0xf0, 0xff, 0xef, 0xf0); - VSET(12, e16, m2); - VLOAD_16(v4, 0xffff, 0x0001, 0xf0f0, 0xffff, 0x0001, 0xf0f0, 0xffff, 0x0001, + VSET(12, e16, m8); + VLOAD_16(v16, 0xffff, 0x0001, 0xf0f0, 0xffff, 0x0001, 0xf0f0, 0xffff, 0x0001, 0xf0f0, 0xffff, 0x0001, 0xf0f0); - VLOAD_16(v6, 0xff00, 0x0003, 0xf0f0, 0xff00, 0x0003, 0xf0f0, 0xff00, 0x0003, + VLOAD_16(v24, 0xff00, 0x0003, 0xf0f0, 0xff00, 0x0003, 0xf0f0, 0xff00, 0x0003, 0xf0f0, 0xff00, 0x0003, 0xf0f0); VLOAD_8(v0, 0x6D, 0x0B); - VLOAD_16(v2, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, + VLOAD_16(v8, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef); - asm volatile("vor.vv v2, v4, v6, v0.t"); - VCMP_U16(6, v2, 0xffff, 0xbeef, 0xf0f0, 0xffff, 0xbeef, 0xf0f0, 0xffff, + asm volatile("vor.vv v8, v16, v24, v0.t"); + VCMP_U16(6, v8, 0xffff, 0xbeef, 0xf0f0, 0xffff, 0xbeef, 0xf0f0, 0xffff, 0xbeef, 0xf0f0, 0xffff, 0xbeef, 0xf0f0); - VSET(12, e32, m2); - VLOAD_32(v4, 0xffffffff, 0x00000001, 0xf0f0f0f0, 0xffffffff, 0x00000001, + VSET(12, e32, m8); + VLOAD_32(v16, 0xffffffff, 0x00000001, 0xf0f0f0f0, 0xffffffff, 0x00000001, 0xf0f0f0f0, 0xffffffff, 0x00000001, 0xf0f0f0f0, 0xffffffff, 0x00000001, 0xf0f0f0f0); - VLOAD_32(v6, 0xffff0000, 0x00000003, 0xf0f0f0f0, 0xffff0000, 0x00000003, + VLOAD_32(v24, 0xffff0000, 0x00000003, 0xf0f0f0f0, 0xffff0000, 0x00000003, 0xf0f0f0f0, 0xffff0000, 0x00000003, 0xf0f0f0f0, 0xffff0000, 0x00000003, 0xf0f0f0f0); VLOAD_8(v0, 0x6D, 0x0B); - VLOAD_32(v2, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + VLOAD_32(v8, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef); - asm volatile("vor.vv v2, v4, v6, v0.t"); - VCMP_U32(7, v2, 0xffffffff, 0xdeadbeef, 0xf0f0f0f0, 0xffffffff, 0xdeadbeef, + asm volatile("vor.vv v8, v16, v24, v0.t"); + VCMP_U32(7, v8, 0xffffffff, 0xdeadbeef, 0xf0f0f0f0, 0xffffffff, 0xdeadbeef, 0xf0f0f0f0, 0xffffffff, 0xdeadbeef, 0xf0f0f0f0, 0xffffffff, 0xdeadbeef, 0xf0f0f0f0); #if ELEN == 64 - VSET(12, e64, m2); - VLOAD_64(v4, 0xffffffffffffffff, 0x0000000000000001, 0xf0f0f0f0f0f0f0f0, + VSET(12, e64, m8); + VLOAD_64(v16, 0xffffffffffffffff, 0x0000000000000001, 0xf0f0f0f0f0f0f0f0, 0xffffffffffffffff, 0x0000000000000001, 0xf0f0f0f0f0f0f0f0, 0xffffffffffffffff, 0x0000000000000001, 0xf0f0f0f0f0f0f0f0, 0xffffffffffffffff, 0x0000000000000001, 0xf0f0f0f0f0f0f0f0); - VLOAD_64(v6, 0xffffffff00000000, 0x0000000000000003, 0xf0f0f0f0f0f0f0f0, + VLOAD_64(v24, 0xffffffff00000000, 0x0000000000000003, 0xf0f0f0f0f0f0f0f0, 0xffffffff00000000, 0x0000000000000003, 0xf0f0f0f0f0f0f0f0, 0xffffffff00000000, 0x0000000000000003, 0xf0f0f0f0f0f0f0f0, 0xffffffff00000000, 0x0000000000000003, 0xf0f0f0f0f0f0f0f0); VLOAD_8(v0, 0x6D, 0x0B); - VLOAD_64(v2, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, + VLOAD_64(v8, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef); - asm volatile("vor.vv v2, v4, v6, v0.t"); - VCMP_U64(8, v2, 0xffffffffffffffff, 0xdeadbeefdeadbeef, 0xf0f0f0f0f0f0f0f0, + asm volatile("vor.vv v8, v16, v24, v0.t"); + VCMP_U64(8, v8, 0xffffffffffffffff, 0xdeadbeefdeadbeef, 0xf0f0f0f0f0f0f0f0, 0xffffffffffffffff, 0xdeadbeefdeadbeef, 0xf0f0f0f0f0f0f0f0, 0xffffffffffffffff, 0xdeadbeefdeadbeef, 0xf0f0f0f0f0f0f0f0, 0xffffffffffffffff, 0xdeadbeefdeadbeef, 0xf0f0f0f0f0f0f0f0); @@ -123,37 +123,37 @@ void TEST_CASE2() { void TEST_CASE3() { const uint32_t scalar = 0x0ff00ff0; - VSET(12, e8, m2); - VLOAD_8(v4, 0xff, 0x01, 0xf0, 0xff, 0x01, 0xf0, 0xff, 0x01, 0xf0, 0xff, 0x01, + VSET(12, e8, m8); + VLOAD_8(v16, 0xff, 0x01, 0xf0, 0xff, 0x01, 0xf0, 0xff, 0x01, 0xf0, 0xff, 0x01, 0xf0); - asm volatile("vor.vx v2, v4, %[A]" ::[A] "r"(scalar)); - VCMP_U8(9, v2, 0xff, 0xf1, 0xf0, 0xff, 0xf1, 0xf0, 0xff, 0xf1, 0xf0, 0xff, + asm volatile("vor.vx v8, v16, %[A]" ::[A] "r"(scalar)); + VCMP_U8(9, v8, 0xff, 0xf1, 0xf0, 0xff, 0xf1, 0xf0, 0xff, 0xf1, 0xf0, 0xff, 0xf1, 0xf0); - VSET(12, e16, m2); - VLOAD_16(v4, 0xffff, 0x0001, 0xf0f0, 0xffff, 0x0001, 0xf0f0, 0xffff, 0x0001, + VSET(12, e16, m8); + VLOAD_16(v16, 0xffff, 0x0001, 0xf0f0, 0xffff, 0x0001, 0xf0f0, 0xffff, 0x0001, 0xf0f0, 0xffff, 0x0001, 0xf0f0); - asm volatile("vor.vx v2, v4, %[A]" ::[A] "r"(scalar)); - VCMP_U16(10, v2, 0xffff, 0x0ff1, 0xfff0, 0xffff, 0x0ff1, 0xfff0, 0xffff, + asm volatile("vor.vx v8, v16, %[A]" ::[A] "r"(scalar)); + VCMP_U16(10, v8, 0xffff, 0x0ff1, 0xfff0, 0xffff, 0x0ff1, 0xfff0, 0xffff, 0x0ff1, 0xfff0, 0xffff, 0x0ff1, 0xfff0); - VSET(12, e32, m2); - VLOAD_32(v4, 0xffffffff, 0x00000001, 0xf0f0f0f0, 0xffffffff, 0x00000001, + VSET(12, e32, m8); + VLOAD_32(v16, 0xffffffff, 0x00000001, 0xf0f0f0f0, 0xffffffff, 0x00000001, 0xf0f0f0f0, 0xffffffff, 0x00000001, 0xf0f0f0f0, 0xffffffff, 0x00000001, 0xf0f0f0f0); - asm volatile("vor.vx v2, v4, %[A]" ::[A] "r"(scalar)); - VCMP_U32(11, v2, 0xffffffff, 0x0ff00ff1, 0xfff0fff0, 0xffffffff, 0x0ff00ff1, + asm volatile("vor.vx v8, v16, %[A]" ::[A] "r"(scalar)); + VCMP_U32(11, v8, 0xffffffff, 0x0ff00ff1, 0xfff0fff0, 0xffffffff, 0x0ff00ff1, 0xfff0fff0, 0xffffffff, 0x0ff00ff1, 0xfff0fff0, 0xffffffff, 0x0ff00ff1, 0xfff0fff0); #if ELEN == 64 - VSET(12, e64, m2); - VLOAD_64(v4, 0xffffffffffffffff, 0x0000000000000001, 0xf0f0f0f0f0f0f0f0, + VSET(12, e64, m8); + VLOAD_64(v16, 0xffffffffffffffff, 0x0000000000000001, 0xf0f0f0f0f0f0f0f0, 0xffffffffffffffff, 0x0000000000000001, 0xf0f0f0f0f0f0f0f0, 0xffffffffffffffff, 0x0000000000000001, 0xf0f0f0f0f0f0f0f0, 0xffffffffffffffff, 0x0000000000000001, 0xf0f0f0f0f0f0f0f0); - asm volatile("vor.vx v2, v4, %[A]" ::[A] "r"(scalar)); - VCMP_U64(12, v2, 0xffffffffffffffff, 0x000000000ff00ff1, 0xf0f0f0f0fff0fff0, + asm volatile("vor.vx v8, v16, %[A]" ::[A] "r"(scalar)); + VCMP_U64(12, v8, 0xffffffffffffffff, 0x000000000ff00ff1, 0xf0f0f0f0fff0fff0, 0xffffffffffffffff, 0x000000000ff00ff1, 0xf0f0f0f0fff0fff0, 0xffffffffffffffff, 0x000000000ff00ff1, 0xf0f0f0f0fff0fff0, 0xffffffffffffffff, 0x000000000ff00ff1, 0xf0f0f0f0fff0fff0); @@ -163,52 +163,52 @@ void TEST_CASE3() { void TEST_CASE4() { const uint32_t scalar = 0x0ff00ff0; - VSET(12, e8, m2); - VLOAD_8(v4, 0xff, 0x01, 0xf0, 0xff, 0x01, 0xf0, 0xff, 0x01, 0xf0, 0xff, 0x01, + VSET(12, e8, m8); + VLOAD_8(v16, 0xff, 0x01, 0xf0, 0xff, 0x01, 0xf0, 0xff, 0x01, 0xf0, 0xff, 0x01, 0xf0); VLOAD_8(v0, 0x6D, 0x0B); - VLOAD_8(v2, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, + VLOAD_8(v8, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef); - asm volatile("vor.vx v2, v4, %[A], v0.t" ::[A] "r"(scalar)); - VCMP_U8(13, v2, 0xff, 0xef, 0xf0, 0xff, 0xef, 0xf0, 0xff, 0xef, 0xf0, 0xff, + asm volatile("vor.vx v8, v16, %[A], v0.t" ::[A] "r"(scalar)); + VCMP_U8(13, v8, 0xff, 0xef, 0xf0, 0xff, 0xef, 0xf0, 0xff, 0xef, 0xf0, 0xff, 0xef, 0xf0); - VSET(12, e16, m2); - VLOAD_16(v4, 0xffff, 0x0001, 0xf0f0, 0xffff, 0x0001, 0xf0f0, 0xffff, 0x0001, + VSET(12, e16, m8); + VLOAD_16(v16, 0xffff, 0x0001, 0xf0f0, 0xffff, 0x0001, 0xf0f0, 0xffff, 0x0001, 0xf0f0, 0xffff, 0x0001, 0xf0f0); VLOAD_8(v0, 0x6D, 0x0B); - VLOAD_16(v2, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, + VLOAD_16(v8, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef); - asm volatile("vor.vx v2, v4, %[A], v0.t" ::[A] "r"(scalar)); - VCMP_U16(14, v2, 0xffff, 0xbeef, 0xfff0, 0xffff, 0xbeef, 0xfff0, 0xffff, + asm volatile("vor.vx v8, v16, %[A], v0.t" ::[A] "r"(scalar)); + VCMP_U16(14, v8, 0xffff, 0xbeef, 0xfff0, 0xffff, 0xbeef, 0xfff0, 0xffff, 0xbeef, 0xfff0, 0xffff, 0xbeef, 0xfff0); - VSET(12, e32, m2); - VLOAD_32(v4, 0xffffffff, 0x00000001, 0xf0f0f0f0, 0xffffffff, 0x00000001, + VSET(12, e32, m8); + VLOAD_32(v16, 0xffffffff, 0x00000001, 0xf0f0f0f0, 0xffffffff, 0x00000001, 0xf0f0f0f0, 0xffffffff, 0x00000001, 0xf0f0f0f0, 0xffffffff, 0x00000001, 0xf0f0f0f0); VLOAD_8(v0, 0x6D, 0x0B); - VLOAD_32(v2, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + VLOAD_32(v8, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef); - asm volatile("vor.vx v2, v4, %[A], v0.t" ::[A] "r"(scalar)); - VCMP_U32(15, v2, 0xffffffff, 0xdeadbeef, 0xfff0fff0, 0xffffffff, 0xdeadbeef, + asm volatile("vor.vx v8, v16, %[A], v0.t" ::[A] "r"(scalar)); + VCMP_U32(15, v8, 0xffffffff, 0xdeadbeef, 0xfff0fff0, 0xffffffff, 0xdeadbeef, 0xfff0fff0, 0xffffffff, 0xdeadbeef, 0xfff0fff0, 0xffffffff, 0xdeadbeef, 0xfff0fff0); #if ELEN == 64 - VSET(12, e64, m2); - VLOAD_64(v4, 0xffffffffffffffff, 0x0000000000000001, 0xf0f0f0f0f0f0f0f0, + VSET(12, e64, m8); + VLOAD_64(v16, 0xffffffffffffffff, 0x0000000000000001, 0xf0f0f0f0f0f0f0f0, 0xffffffffffffffff, 0x0000000000000001, 0xf0f0f0f0f0f0f0f0, 0xffffffffffffffff, 0x0000000000000001, 0xf0f0f0f0f0f0f0f0, 0xffffffffffffffff, 0x0000000000000001, 0xf0f0f0f0f0f0f0f0); VLOAD_8(v0, 0x6D, 0x0B); - VLOAD_64(v2, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, + VLOAD_64(v8, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef); - asm volatile("vor.vx v2, v4, %[A], v0.t" ::[A] "r"(scalar)); - VCMP_U64(16, v2, 0xffffffffffffffff, 0xdeadbeefdeadbeef, 0xf0f0f0f0fff0fff0, + asm volatile("vor.vx v8, v16, %[A], v0.t" ::[A] "r"(scalar)); + VCMP_U64(16, v8, 0xffffffffffffffff, 0xdeadbeefdeadbeef, 0xf0f0f0f0fff0fff0, 0xffffffffffffffff, 0xdeadbeefdeadbeef, 0xf0f0f0f0fff0fff0, 0xffffffffffffffff, 0xdeadbeefdeadbeef, 0xf0f0f0f0fff0fff0, 0xffffffffffffffff, 0xdeadbeefdeadbeef, 0xf0f0f0f0fff0fff0); @@ -216,37 +216,37 @@ void TEST_CASE4() { } void TEST_CASE5() { - VSET(12, e8, m2); - VLOAD_8(v4, 0xff, 0x01, 0xf0, 0xff, 0x01, 0xf0, 0xff, 0x01, 0xf0, 0xff, 0x01, + VSET(12, e8, m8); + VLOAD_8(v16, 0xff, 0x01, 0xf0, 0xff, 0x01, 0xf0, 0xff, 0x01, 0xf0, 0xff, 0x01, 0xf0); - asm volatile("vor.vi v2, v4, 15"); - VCMP_U8(17, v2, 0xff, 0x0f, 0xff, 0xff, 0x0f, 0xff, 0xff, 0x0f, 0xff, 0xff, + asm volatile("vor.vi v8, v16, 15"); + VCMP_U8(17, v8, 0xff, 0x0f, 0xff, 0xff, 0x0f, 0xff, 0xff, 0x0f, 0xff, 0xff, 0x0f, 0xff); - VSET(12, e16, m2); - VLOAD_16(v4, 0xffff, 0x0001, 0xf0f0, 0xffff, 0x0001, 0xf0f0, 0xffff, 0x0001, + VSET(12, e16, m8); + VLOAD_16(v16, 0xffff, 0x0001, 0xf0f0, 0xffff, 0x0001, 0xf0f0, 0xffff, 0x0001, 0xf0f0, 0xffff, 0x0001, 0xf0f0); - asm volatile("vor.vi v2, v4, 15"); - VCMP_U16(18, v2, 0xffff, 0x000f, 0xf0ff, 0xffff, 0x000f, 0xf0ff, 0xffff, + asm volatile("vor.vi v8, v16, 15"); + VCMP_U16(18, v8, 0xffff, 0x000f, 0xf0ff, 0xffff, 0x000f, 0xf0ff, 0xffff, 0x000f, 0xf0ff, 0xffff, 0x000f, 0xf0ff); - VSET(12, e32, m2); - VLOAD_32(v4, 0xffffffff, 0x00000001, 0xf0f0f0f0, 0xffffffff, 0x00000001, + VSET(12, e32, m8); + VLOAD_32(v16, 0xffffffff, 0x00000001, 0xf0f0f0f0, 0xffffffff, 0x00000001, 0xf0f0f0f0, 0xffffffff, 0x00000001, 0xf0f0f0f0, 0xffffffff, 0x00000001, 0xf0f0f0f0); - asm volatile("vor.vi v2, v4, 15"); - VCMP_U32(19, v2, 0xffffffff, 0x0000000f, 0xf0f0f0ff, 0xffffffff, 0x0000000f, + asm volatile("vor.vi v8, v16, 15"); + VCMP_U32(19, v8, 0xffffffff, 0x0000000f, 0xf0f0f0ff, 0xffffffff, 0x0000000f, 0xf0f0f0ff, 0xffffffff, 0x0000000f, 0xf0f0f0ff, 0xffffffff, 0x0000000f, 0xf0f0f0ff); #if ELEN == 64 - VSET(12, e64, m2); - VLOAD_64(v4, 0xffffffffffffffff, 0x0000000000000001, 0xf0f0f0f0f0f0f0f0, + VSET(12, e64, m8); + VLOAD_64(v16, 0xffffffffffffffff, 0x0000000000000001, 0xf0f0f0f0f0f0f0f0, 0xffffffffffffffff, 0x0000000000000001, 0xf0f0f0f0f0f0f0f0, 0xffffffffffffffff, 0x0000000000000001, 0xf0f0f0f0f0f0f0f0, 0xffffffffffffffff, 0x0000000000000001, 0xf0f0f0f0f0f0f0f0); - asm volatile("vor.vi v2, v4, 15"); - VCMP_U64(20, v2, 0xffffffffffffffff, 0x000000000000000f, 0xf0f0f0f0f0f0f0ff, + asm volatile("vor.vi v8, v16, 15"); + VCMP_U64(20, v8, 0xffffffffffffffff, 0x000000000000000f, 0xf0f0f0f0f0f0f0ff, 0xffffffffffffffff, 0x000000000000000f, 0xf0f0f0f0f0f0f0ff, 0xffffffffffffffff, 0x000000000000000f, 0xf0f0f0f0f0f0f0ff, 0xffffffffffffffff, 0x000000000000000f, 0xf0f0f0f0f0f0f0ff); @@ -254,52 +254,52 @@ void TEST_CASE5() { } void TEST_CASE6() { - VSET(12, e8, m2); - VLOAD_8(v4, 0xff, 0x01, 0xf0, 0xff, 0x01, 0xf0, 0xff, 0x01, 0xf0, 0xff, 0x01, + VSET(12, e8, m8); + VLOAD_8(v16, 0xff, 0x01, 0xf0, 0xff, 0x01, 0xf0, 0xff, 0x01, 0xf0, 0xff, 0x01, 0xf0); VLOAD_8(v0, 0x6D, 0x0B); - VLOAD_8(v2, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, + VLOAD_8(v8, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef); - asm volatile("vor.vi v2, v4, 15, v0.t"); - VCMP_U8(21, v2, 0xff, 0xef, 0xff, 0xff, 0xef, 0xff, 0xff, 0xef, 0xff, 0xff, + asm volatile("vor.vi v8, v16, 15, v0.t"); + VCMP_U8(21, v8, 0xff, 0xef, 0xff, 0xff, 0xef, 0xff, 0xff, 0xef, 0xff, 0xff, 0xef, 0xff); - VSET(12, e16, m2); - VLOAD_16(v4, 0xffff, 0x0001, 0xf0f0, 0xffff, 0x0001, 0xf0f0, 0xffff, 0x0001, + VSET(12, e16, m8); + VLOAD_16(v16, 0xffff, 0x0001, 0xf0f0, 0xffff, 0x0001, 0xf0f0, 0xffff, 0x0001, 0xf0f0, 0xffff, 0x0001, 0xf0f0); VLOAD_8(v0, 0x6D, 0x0B); - VLOAD_16(v2, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, + VLOAD_16(v8, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef); - asm volatile("vor.vi v2, v4, 15, v0.t"); - VCMP_U16(22, v2, 0xffff, 0xbeef, 0xf0ff, 0xffff, 0xbeef, 0xf0ff, 0xffff, + asm volatile("vor.vi v8, v16, 15, v0.t"); + VCMP_U16(22, v8, 0xffff, 0xbeef, 0xf0ff, 0xffff, 0xbeef, 0xf0ff, 0xffff, 0xbeef, 0xf0ff, 0xffff, 0xbeef, 0xf0ff); - VSET(12, e32, m2); - VLOAD_32(v4, 0xffffffff, 0x00000001, 0xf0f0f0f0, 0xffffffff, 0x00000001, + VSET(12, e32, m8); + VLOAD_32(v16, 0xffffffff, 0x00000001, 0xf0f0f0f0, 0xffffffff, 0x00000001, 0xf0f0f0f0, 0xffffffff, 0x00000001, 0xf0f0f0f0, 0xffffffff, 0x00000001, 0xf0f0f0f0); VLOAD_8(v0, 0x6D, 0x0B); - VLOAD_32(v2, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + VLOAD_32(v8, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef); - asm volatile("vor.vi v2, v4, 15, v0.t"); - VCMP_U32(23, v2, 0xffffffff, 0xdeadbeef, 0xf0f0f0ff, 0xffffffff, 0xdeadbeef, + asm volatile("vor.vi v8, v16, 15, v0.t"); + VCMP_U32(23, v8, 0xffffffff, 0xdeadbeef, 0xf0f0f0ff, 0xffffffff, 0xdeadbeef, 0xf0f0f0ff, 0xffffffff, 0xdeadbeef, 0xf0f0f0ff, 0xffffffff, 0xdeadbeef, 0xf0f0f0ff); #if ELEN == 64 - VSET(12, e64, m2); - VLOAD_64(v4, 0xffffffffffffffff, 0x0000000000000001, 0xf0f0f0f0f0f0f0f0, + VSET(12, e64, m8); + VLOAD_64(v16, 0xffffffffffffffff, 0x0000000000000001, 0xf0f0f0f0f0f0f0f0, 0xffffffffffffffff, 0x0000000000000001, 0xf0f0f0f0f0f0f0f0, 0xffffffffffffffff, 0x0000000000000001, 0xf0f0f0f0f0f0f0f0, 0xffffffffffffffff, 0x0000000000000001, 0xf0f0f0f0f0f0f0f0); VLOAD_8(v0, 0x6D, 0x0B); - VLOAD_64(v2, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, + VLOAD_64(v8, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef); - asm volatile("vor.vi v2, v4, 15, v0.t"); - VCMP_U64(24, v2, 0xffffffffffffffff, 0xdeadbeefdeadbeef, 0xf0f0f0f0f0f0f0ff, + asm volatile("vor.vi v8, v16, 15, v0.t"); + VCMP_U64(24, v8, 0xffffffffffffffff, 0xdeadbeefdeadbeef, 0xf0f0f0f0f0f0f0ff, 0xffffffffffffffff, 0xdeadbeefdeadbeef, 0xf0f0f0f0f0f0f0ff, 0xffffffffffffffff, 0xdeadbeefdeadbeef, 0xf0f0f0f0f0f0f0ff, 0xffffffffffffffff, 0xdeadbeefdeadbeef, 0xf0f0f0f0f0f0f0ff); diff --git a/sw/riscvTests/isa/rv64uv/vredand.c b/sw/riscvTests/isa/rv64uv/vredand.c index 34b7fab4..a354b18d 100644 --- a/sw/riscvTests/isa/rv64uv/vredand.c +++ b/sw/riscvTests/isa/rv64uv/vredand.c @@ -8,81 +8,81 @@ // Naive test void TEST_CASE1(void) { - VSET(12, e8, m2); - VLOAD_8(v4, 0xff, 0xf1, 0xf0, 0xff, 0xf1, 0xf0, 0xff, 0xf1, 0xf0, 0xff, 0xf1, + VSET(12, e8, m8); + VLOAD_8(v16, 0xff, 0xf1, 0xf0, 0xff, 0xf1, 0xf0, 0xff, 0xf1, 0xf0, 0xff, 0xf1, 0xf0); VLOAD_8(v8, 0xf0); - asm volatile("vredand.vs v12, v4, v8"); - VCMP_U8(1, v12, 0xf0); + asm volatile("vredand.vs v24, v16, v8"); + VCMP_U8(1, v24, 0xf0); - VSET(12, e16, m2); - VLOAD_16(v4, 0xffff, 0x0301, 0xf1f0, 0xffff, 0x0101, 0xf7f0, 0xffff, 0x0701, + VSET(12, e16, m8); + VLOAD_16(v16, 0xffff, 0x0301, 0xf1f0, 0xffff, 0x0101, 0xf7f0, 0xffff, 0x0701, 0xfff0, 0xffff, 0x0101, 0xf1f0); VLOAD_16(v8, 0xefff); - asm volatile("vredand.vs v12, v4, v8"); - VCMP_U16(2, v12, 0x0100); + asm volatile("vredand.vs v24, v16, v8"); + VCMP_U16(2, v24, 0x0100); - VSET(12, e32, m2); - VLOAD_32(v4, 0xffffffff, 0x100ff001, 0xf0f0f0f0, 0xffffffff, 0x100ff001, + VSET(12, e32, m8); + VLOAD_32(v16, 0xffffffff, 0x100ff001, 0xf0f0f0f0, 0xffffffff, 0x100ff001, 0xf0f0f0f0, 0xffffffff, 0x100ff001, 0xf0f0f0f0, 0xffffffff, 0x100ff001, 0xf0f0f0f0); VLOAD_32(v8, 0x00f010f0); - asm volatile("vredand.vs v12, v4, v8"); - VCMP_U32(3, v12, 0x00001000); + asm volatile("vredand.vs v24, v16, v8"); + VCMP_U32(3, v24, 0x00001000); #if ELEN == 64 - VSET(12, e64, m2); - VLOAD_64(v4, 0xffffffffffffffff, 0x1000000000000001, 0xf0f0f0f0f0f0f0f0, + VSET(12, e64, m8); + VLOAD_64(v16, 0xffffffffffffffff, 0x1000000000000001, 0xf0f0f0f0f0f0f0f0, 0xffffffffffffffff, 0x1000000000000001, 0xf0f0f0f0f0f0f0f0, 0xffffffffffffffff, 0x1000000000000001, 0xf0f0f0f0f0f0f0f0, 0xffffffffffffffff, 0x1000000000000001, 0xf0f0f0f0f0f0f0f0); VLOAD_64(v8, 0xfffffffffffffff7); - asm volatile("vredand.vs v12, v4, v8"); - VCMP_U64(4, v12, 0x1000000000000000); + asm volatile("vredand.vs v24, v16, v8"); + VCMP_U64(4, v24, 0x1000000000000000); #endif } // Masked naive test void TEST_CASE2(void) { - VSET(12, e8, m2); + VSET(12, e8, m8); VLOAD_8(v0, 0xf7, 0xff); - VLOAD_8(v4, 0xff, 0xf1, 0xff, 0x00, 0xf1, 0xf0, 0xff, 0xf1, 0xf0, 0xff, 0xf1, + VLOAD_8(v16, 0xff, 0xf1, 0xff, 0x00, 0xf1, 0xf0, 0xff, 0xf1, 0xf0, 0xff, 0xf1, 0xf0); VLOAD_8(v8, 0xf0); - VLOAD_8(v12, 1); - asm volatile("vredand.vs v12, v4, v8, v0.t"); - VCMP_U8(5, v12, 0xf0); + VLOAD_8(v24, 1); + asm volatile("vredand.vs v24, v16, v8, v0.t"); + VCMP_U8(5, v24, 0xf0); - VSET(12, e16, m2); + VSET(12, e16, m8); VLOAD_8(v0, 0x00, 0x08); - VLOAD_16(v4, 0xffff, 0x0301, 0xf1f0, 0xffff, 0x0101, 0xf7f0, 0xffff, 0x9701, + VLOAD_16(v16, 0xffff, 0x0301, 0xf1f0, 0xffff, 0x0101, 0xf7f0, 0xffff, 0x9701, 0xfff0, 0xffff, 0x0101, 0xf1f0); VLOAD_16(v8, 0xefff); - VLOAD_16(v12, 1); - asm volatile("vredand.vs v12, v4, v8, v0.t"); - VCMP_U16(6, v12, 0xe1f0); + VLOAD_16(v24, 1); + asm volatile("vredand.vs v24, v16, v8, v0.t"); + VCMP_U16(6, v24, 0xe1f0); - VSET(12, e32, m2); + VSET(12, e32, m8); VLOAD_8(v0, 0xfe, 0xff); - VLOAD_32(v4, 0x00000000, 0x100ff001, 0xf0f0f0f0, 0xffffffff, 0x100ff001, + VLOAD_32(v16, 0x00000000, 0x100ff001, 0xf0f0f0f0, 0xffffffff, 0x100ff001, 0xf0f0f0f0, 0xffffffff, 0x100ff001, 0xf0f0f0f0, 0xffffffff, 0x100ff001, 0xf0f0f0f0); VLOAD_32(v8, 0x00f010f0); - VLOAD_32(v12, 1); - asm volatile("vredand.vs v12, v4, v8, v0.t"); - VCMP_U32(7, v12, 0x00001000); + VLOAD_32(v24, 1); + asm volatile("vredand.vs v24, v16, v8, v0.t"); + VCMP_U32(7, v24, 0x00001000); #if ELEN == 64 - VSET(12, e64, m2); + VSET(12, e64, m8); VLOAD_8(v0, 0xaa, 0x55); - VLOAD_64(v4, 0xffffffffffffffff, 0x1000000000000001, 0xf0f0f0f0f0f0f0f0, + VLOAD_64(v16, 0xffffffffffffffff, 0x1000000000000001, 0xf0f0f0f0f0f0f0f0, 0xffffffffffffffff, 0x1000000000000001, 0xf0f0f0f0f0f0f0f0, 0xffffffffffffffff, 0x1000000000000001, 0xf0f0f0f0f0f0f0f0, 0xffffffffffffffff, 0x1000000000000001, 0xf0f0f0f0f0f0f0f0); VLOAD_64(v8, 0xfffffffffffffff7); - VLOAD_64(v12, 1); - asm volatile("vredand.vs v12, v4, v8, v0.t"); - VCMP_U64(8, v12, 0x1000000000000000); + VLOAD_64(v24, 1); + asm volatile("vredand.vs v24, v16, v8, v0.t"); + VCMP_U64(8, v24, 0x1000000000000000); #endif } diff --git a/sw/riscvTests/isa/rv64uv/vredmax.c b/sw/riscvTests/isa/rv64uv/vredmax.c index 94a7c60b..0b6953d6 100644 --- a/sw/riscvTests/isa/rv64uv/vredmax.c +++ b/sw/riscvTests/isa/rv64uv/vredmax.c @@ -8,67 +8,67 @@ // Naive test void TEST_CASE1(void) { - VSET(16, e8, m2); - VLOAD_8(v4, 1, 2, 3, 4, 5, 6, -7, 8, 1, 9, 3, 4, 5, -6, 7, 8); + VSET(16, e8, m8); + VLOAD_8(v16, 1, 2, 3, 4, 5, 6, -7, 8, 1, 9, 3, 4, 5, -6, 7, 8); VLOAD_8(v8, -1); - asm volatile("vredmax.vs v12, v4, v8"); - VCMP_U8(1, v12, 9); + asm volatile("vredmax.vs v24, v16, v8"); + VCMP_U8(1, v24, 9); - VSET(16, e16, m2); - VLOAD_16(v4, -1, 2, -3, 4, 5, 6, 7, 8, 1, 2, 3, -4, 5, 6, 7, 8); + VSET(16, e16, m8); + VLOAD_16(v16, -1, 2, -3, 4, 5, 6, 7, 8, 1, 2, 3, -4, 5, 6, 7, 8); VLOAD_16(v8, 9); - asm volatile("vredmax.vs v12, v4, v8"); - VCMP_U16(2, v12, 9); + asm volatile("vredmax.vs v24, v16, v8"); + VCMP_U16(2, v24, 9); - VSET(16, e32, m2); - VLOAD_32(v4, 9, 2, 3, -4, 5, 6, 7, 8, 1, 2, 3, 4, -5, 6, 7, 8); + VSET(16, e32, m8); + VLOAD_32(v16, 9, 2, 3, -4, 5, 6, 7, 8, 1, 2, 3, 4, -5, 6, 7, 8); VLOAD_32(v8, 1); - asm volatile("vredmax.vs v12, v4, v8"); - VCMP_U32(3, v12, 9); + asm volatile("vredmax.vs v24, v16, v8"); + VCMP_U32(3, v24, 9); #if ELEN == 64 - VSET(16, e64, m2); - VLOAD_64(v4, -1, 2, 3, -4, 5, 6, 7, 9, 1, 2, 3, 4, 5, 6, 7, -8); + VSET(16, e64, m8); + VLOAD_64(v16, -1, 2, 3, -4, 5, 6, 7, 9, 1, 2, 3, 4, 5, 6, 7, -8); VLOAD_64(v8, -1); - asm volatile("vredmax.vs v12, v4, v8"); - VCMP_U64(4, v12, 9); + asm volatile("vredmax.vs v24, v16, v8"); + VCMP_U64(4, v24, 9); #endif } // Masked naive test void TEST_CASE2(void) { - VSET(16, e8, m2); + VSET(16, e8, m8); VLOAD_8(v0, 0x03, 0x00); - VLOAD_8(v4, -1, 2, 3, -4, 5, 6, 7, 9, 1, -2, 3, 4, 5, 6, 7, 8); + VLOAD_8(v16, -1, 2, 3, -4, 5, 6, 7, 9, 1, -2, 3, 4, 5, 6, 7, 8); VLOAD_8(v8, 1); - VLOAD_8(v12, 1); - asm volatile("vredmax.vs v12, v4, v8, v0.t"); - VCMP_U8(5, v12, 2); + VLOAD_8(v24, 1); + asm volatile("vredmax.vs v24, v16, v8, v0.t"); + VCMP_U8(5, v24, 2); - VSET(16, e16, m2); + VSET(16, e16, m8); VLOAD_8(v0, 0x00, 0xc0); - VLOAD_16(v4, 1, 2, 3, 4, 5, 6, -7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_16(v16, 1, 2, 3, 4, 5, 6, -7, 8, 1, 2, 3, 4, 5, 6, 7, 8); VLOAD_16(v8, 9); - VLOAD_16(v12, 1); - asm volatile("vredmax.vs v12, v4, v8, v0.t"); - VCMP_U16(6, v12, 9); + VLOAD_16(v24, 1); + asm volatile("vredmax.vs v24, v16, v8, v0.t"); + VCMP_U16(6, v24, 9); - VSET(16, e32, m2); + VSET(16, e32, m8); VLOAD_8(v0, 0x00, 0xc0); - VLOAD_32(v4, -1, 2, 3, 4, 5, 6, 7, -8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_32(v16, -1, 2, 3, 4, 5, 6, 7, -8, 1, 2, 3, 4, 5, 6, 7, 8); VLOAD_32(v8, 1); - VLOAD_32(v12, 1); - asm volatile("vredmax.vs v12, v4, v8, v0.t"); - VCMP_U32(7, v12, 8); + VLOAD_32(v24, 1); + asm volatile("vredmax.vs v24, v16, v8, v0.t"); + VCMP_U32(7, v24, 8); #if ELEN == 64 - VSET(16, e64, m2); + VSET(16, e64, m8); VLOAD_8(v0, 0xaa, 0x55); - VLOAD_64(v4, 1, -2, 3, 4, 5, 6, -7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_64(v16, 1, -2, 3, 4, 5, 6, -7, 8, 1, 2, 3, 4, 5, 6, 7, 8); VLOAD_64(v8, 4); - VLOAD_64(v12, 1); - asm volatile("vredmax.vs v12, v4, v8, v0.t"); - VCMP_U64(8, v12, 8); + VLOAD_64(v24, 1); + asm volatile("vredmax.vs v24, v16, v8, v0.t"); + VCMP_U64(8, v24, 8); #endif } diff --git a/sw/riscvTests/isa/rv64uv/vredmaxu.c b/sw/riscvTests/isa/rv64uv/vredmaxu.c index 8b545cb4..1e38a098 100644 --- a/sw/riscvTests/isa/rv64uv/vredmaxu.c +++ b/sw/riscvTests/isa/rv64uv/vredmaxu.c @@ -8,96 +8,96 @@ // Naive test void TEST_CASE1(void) { - VSET(16, e8, m2); - VLOAD_8(v4, 1, 2, 3, 4, 5, 6, 7, 8, 1, 9, 3, 4, 5, 6, 7, 8); + VSET(16, e8, m8); + VLOAD_8(v16, 1, 2, 3, 4, 5, 6, 7, 8, 1, 9, 3, 4, 5, 6, 7, 8); VLOAD_8(v8, 1); - asm volatile("vredmaxu.vs v12, v4, v8"); - VCMP_U8(1, v12, 9); + asm volatile("vredmaxu.vs v24, v16, v8"); + VCMP_U8(1, v24, 9); - VSET(16, e16, m2); - VLOAD_16(v4, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VSET(16, e16, m8); + VLOAD_16(v16, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); VLOAD_16(v8, 9); - asm volatile("vredmaxu.vs v12, v4, v8"); - VCMP_U16(2, v12, 9); + asm volatile("vredmaxu.vs v24, v16, v8"); + VCMP_U16(2, v24, 9); - VSET(16, e32, m2); - VLOAD_32(v4, 9, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VSET(16, e32, m8); + VLOAD_32(v16, 9, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); VLOAD_32(v8, 1); - asm volatile("vredmaxu.vs v12, v4, v8"); - VCMP_U32(3, v12, 9); + asm volatile("vredmaxu.vs v24, v16, v8"); + VCMP_U32(3, v24, 9); #if ELEN == 64 - VSET(16, e64, m2); - VLOAD_64(v4, 1, 2, 3, 4, 5, 6, 7, 9, 1, 2, 3, 4, 5, 6, 7, 8); + VSET(16, e64, m8); + VLOAD_64(v16, 1, 2, 3, 4, 5, 6, 7, 9, 1, 2, 3, 4, 5, 6, 7, 8); VLOAD_64(v8, 1); - asm volatile("vredmaxu.vs v12, v4, v8"); - VCMP_U64(4, v12, 9); + asm volatile("vredmaxu.vs v24, v16, v8"); + VCMP_U64(4, v24, 9); #endif } // Masked naive test void TEST_CASE2(void) { - VSET(16, e8, m2); + VSET(16, e8, m8); VLOAD_8(v0, 0x03, 0x00); - VLOAD_8(v4, 1, 2, 3, 4, 5, 6, 7, 9, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_8(v16, 1, 2, 3, 4, 5, 6, 7, 9, 1, 2, 3, 4, 5, 6, 7, 8); VLOAD_8(v8, 1); - VLOAD_8(v12, 1); - asm volatile("vredmaxu.vs v12, v4, v8, v0.t"); - VCMP_U8(5, v12, 2); + VLOAD_8(v24, 1); + asm volatile("vredmaxu.vs v24, v16, v8, v0.t"); + VCMP_U8(5, v24, 2); - VSET(16, e16, m2); + VSET(16, e16, m8); VLOAD_8(v0, 0x00, 0xc0); - VLOAD_16(v4, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_16(v16, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); VLOAD_16(v8, 9); - VLOAD_16(v12, 1); - asm volatile("vredmaxu.vs v12, v4, v8, v0.t"); - VCMP_U16(6, v12, 9); + VLOAD_16(v24, 1); + asm volatile("vredmaxu.vs v24, v16, v8, v0.t"); + VCMP_U16(6, v24, 9); - VSET(16, e32, m2); + VSET(16, e32, m8); VLOAD_8(v0, 0x00, 0xc0); - VLOAD_32(v4, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_32(v16, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); VLOAD_32(v8, 1); - VLOAD_32(v12, 1); - asm volatile("vredmaxu.vs v12, v4, v8, v0.t"); - VCMP_U32(7, v12, 8); + VLOAD_32(v24, 1); + asm volatile("vredmaxu.vs v24, v16, v8, v0.t"); + VCMP_U32(7, v24, 8); #if ELEN == 64 - VSET(16, e64, m2); + VSET(16, e64, m8); VLOAD_8(v0, 0xaa, 0x55); - VLOAD_64(v4, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_64(v16, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); VLOAD_64(v8, 4); - VLOAD_64(v12, 1); - asm volatile("vredmaxu.vs v12, v4, v8, v0.t"); - VCMP_U64(8, v12, 8); + VLOAD_64(v24, 1); + asm volatile("vredmaxu.vs v24, v16, v8, v0.t"); + VCMP_U64(8, v24, 8); #endif } // Naive test with negative values void TEST_CASE3(void) { - VSET(16, e8, m2); - VLOAD_8(v4, 1, 2, 3, 4, 5, 6, 7, 8, 1, 9, -3, 4, 5, 6, 7, 8); + VSET(16, e8, m8); + VLOAD_8(v16, 1, 2, 3, 4, 5, 6, 7, 8, 1, 9, -3, 4, 5, 6, 7, 8); VLOAD_8(v8, 1); - asm volatile("vredmaxu.vs v12, v4, v8"); - VCMP_U8(9, v12, -3); + asm volatile("vredmaxu.vs v24, v16, v8"); + VCMP_U8(9, v24, -3); - VSET(16, e16, m2); - VLOAD_16(v4, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VSET(16, e16, m8); + VLOAD_16(v16, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); VLOAD_16(v8, -9); - asm volatile("vredmaxu.vs v12, v4, v8"); - VCMP_U16(10, v12, -9); + asm volatile("vredmaxu.vs v24, v16, v8"); + VCMP_U16(10, v24, -9); - VSET(16, e32, m2); - VLOAD_32(v4, 9, 2, 3, 4, -5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VSET(16, e32, m8); + VLOAD_32(v16, 9, 2, 3, 4, -5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); VLOAD_32(v8, 1); - asm volatile("vredmaxu.vs v12, v4, v8"); - VCMP_U32(11, v12, -5); + asm volatile("vredmaxu.vs v24, v16, v8"); + VCMP_U32(11, v24, -5); #if ELEN == 64 - VSET(16, e64, m2); - VLOAD_64(v4, 1, 2, 3, -4, 5, 6, 7, 9, 1, 2, 3, 4, 5, 6, 7, 8); + VSET(16, e64, m8); + VLOAD_64(v16, 1, 2, 3, -4, 5, 6, 7, 9, 1, 2, 3, 4, 5, 6, 7, 8); VLOAD_64(v8, -1); - asm volatile("vredmaxu.vs v12, v4, v8"); - VCMP_U64(12, v12, -1); + asm volatile("vredmaxu.vs v24, v16, v8"); + VCMP_U64(12, v24, -1); #endif } diff --git a/sw/riscvTests/isa/rv64uv/vredmin.c b/sw/riscvTests/isa/rv64uv/vredmin.c index 67148a54..758cf2b0 100644 --- a/sw/riscvTests/isa/rv64uv/vredmin.c +++ b/sw/riscvTests/isa/rv64uv/vredmin.c @@ -8,67 +8,67 @@ // Naive test void TEST_CASE1(void) { - VSET(16, e8, m2); - VLOAD_8(v4, 1, 2, 3, 4, 5, 6, 7, 0, 1, 9, 3, 4, 5, 6, 7, 8); + VSET(16, e8, m8); + VLOAD_8(v16, 1, 2, 3, 4, 5, 6, 7, 0, 1, 9, 3, 4, 5, 6, 7, 8); VLOAD_8(v8, 1); - asm volatile("vredmin.vs v12, v4, v8"); - VCMP_U8(1, v12, 0); + asm volatile("vredmin.vs v24, v16, v8"); + VCMP_U8(1, v24, 0); - VSET(16, e16, m2); - VLOAD_16(v4, 1, 2, -3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VSET(16, e16, m8); + VLOAD_16(v16, 1, 2, -3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); VLOAD_16(v8, 0); - asm volatile("vredmin.vs v12, v4, v8"); - VCMP_U16(2, v12, -3); + asm volatile("vredmin.vs v24, v16, v8"); + VCMP_U16(2, v24, -3); - VSET(16, e32, m2); - VLOAD_32(v4, 9, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VSET(16, e32, m8); + VLOAD_32(v16, 9, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); VLOAD_32(v8, -1); - asm volatile("vredmin.vs v12, v4, v8"); - VCMP_U32(3, v12, -1); + asm volatile("vredmin.vs v24, v16, v8"); + VCMP_U32(3, v24, -1); #if ELEN == 64 - VSET(16, e64, m2); - VLOAD_64(v4, -1, 2, 3, 4, 5, -6, 7, -9, -1, -2, 3, 4, 5, 6, 7, 8); + VSET(16, e64, m8); + VLOAD_64(v16, -1, 2, 3, 4, 5, -6, 7, -9, -1, -2, 3, 4, 5, 6, 7, 8); VLOAD_64(v8, -1); - asm volatile("vredmin.vs v12, v4, v8"); - VCMP_U64(4, v12, -9); + asm volatile("vredmin.vs v24, v16, v8"); + VCMP_U64(4, v24, -9); #endif } // Masked naive test void TEST_CASE2(void) { - VSET(16, e8, m2); + VSET(16, e8, m8); VLOAD_8(v0, 0x03, 0x00); - VLOAD_8(v4, 1, -2, 3, 4, 5, 6, 7, 9, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_8(v16, 1, -2, 3, 4, 5, 6, 7, 9, 1, 2, 3, 4, 5, 6, 7, 8); VLOAD_8(v8, 1); - VLOAD_8(v12, 1); - asm volatile("vredmin.vs v12, v4, v8, v0.t"); - VCMP_U8(5, v12, -2); + VLOAD_8(v24, 1); + asm volatile("vredmin.vs v24, v16, v8, v0.t"); + VCMP_U8(5, v24, -2); - VSET(16, e16, m2); + VSET(16, e16, m8); VLOAD_8(v0, 0x00, 0xc0); - VLOAD_16(v4, -1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_16(v16, -1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); VLOAD_16(v8, 3); - VLOAD_16(v12, 1); - asm volatile("vredmin.vs v12, v4, v8, v0.t"); - VCMP_U16(6, v12, 3); + VLOAD_16(v24, 1); + asm volatile("vredmin.vs v24, v16, v8, v0.t"); + VCMP_U16(6, v24, 3); - VSET(16, e32, m2); + VSET(16, e32, m8); VLOAD_8(v0, 0x00, 0xc0); - VLOAD_32(v4, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_32(v16, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); VLOAD_32(v8, 8); - VLOAD_32(v12, 1); - asm volatile("vredmin.vs v12, v4, v8, v0.t"); - VCMP_U32(7, v12, 7); + VLOAD_32(v24, 1); + asm volatile("vredmin.vs v24, v16, v8, v0.t"); + VCMP_U32(7, v24, 7); #if ELEN == 64 - VSET(16, e64, m2); + VSET(16, e64, m8); VLOAD_8(v0, 0xaa, 0x55); - VLOAD_64(v4, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_64(v16, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); VLOAD_64(v8, 4); - VLOAD_64(v12, 1); - asm volatile("vredmin.vs v12, v4, v8, v0.t"); - VCMP_U64(8, v12, 1); + VLOAD_64(v24, 1); + asm volatile("vredmin.vs v24, v16, v8, v0.t"); + VCMP_U64(8, v24, 1); #endif } diff --git a/sw/riscvTests/isa/rv64uv/vredminu.c b/sw/riscvTests/isa/rv64uv/vredminu.c index 0fedc3e6..f33fb52e 100644 --- a/sw/riscvTests/isa/rv64uv/vredminu.c +++ b/sw/riscvTests/isa/rv64uv/vredminu.c @@ -8,67 +8,67 @@ // Naive test void TEST_CASE1(void) { - VSET(16, e8, m2); - VLOAD_8(v4, 1, 2, 3, 4, 5, 6, 7, 0, 1, 9, 3, 4, 5, 6, 7, 8); + VSET(16, e8, m8); + VLOAD_8(v16, 1, 2, 3, 4, 5, 6, 7, 0, 1, 9, 3, 4, 5, 6, 7, 8); VLOAD_8(v8, 1); - asm volatile("vredminu.vs v12, v4, v8"); - VCMP_U8(1, v12, 0); + asm volatile("vredminu.vs v24, v16, v8"); + VCMP_U8(1, v24, 0); - VSET(16, e16, m2); - VLOAD_16(v4, 1, 2, -3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VSET(16, e16, m8); + VLOAD_16(v16, 1, 2, -3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); VLOAD_16(v8, 0); - asm volatile("vredminu.vs v12, v4, v8"); - VCMP_U16(2, v12, 0); + asm volatile("vredminu.vs v24, v16, v8"); + VCMP_U16(2, v24, 0); - VSET(16, e32, m2); - VLOAD_32(v4, 9, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VSET(16, e32, m8); + VLOAD_32(v16, 9, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); VLOAD_32(v8, -1); - asm volatile("vredminu.vs v12, v4, v8"); - VCMP_U32(3, v12, 1); + asm volatile("vredminu.vs v24, v16, v8"); + VCMP_U32(3, v24, 1); #if ELEN == 64 - VSET(16, e64, m2); - VLOAD_64(v4, -1, 2, 3, 4, 5, -6, 7, -9, -1, -2, 3, 4, 5, 6, 7, 8); + VSET(16, e64, m8); + VLOAD_64(v16, -1, 2, 3, 4, 5, -6, 7, -9, -1, -2, 3, 4, 5, 6, 7, 8); VLOAD_64(v8, -1); - asm volatile("vredminu.vs v12, v4, v8"); - VCMP_U64(4, v12, 2); + asm volatile("vredminu.vs v24, v16, v8"); + VCMP_U64(4, v24, 2); #endif } // Masked naive test void TEST_CASE2(void) { - VSET(16, e8, m2); + VSET(16, e8, m8); VLOAD_8(v0, 0x03, 0x00); - VLOAD_8(v4, 1, -2, 3, 4, 5, 6, 7, 9, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_8(v16, 1, -2, 3, 4, 5, 6, 7, 9, 1, 2, 3, 4, 5, 6, 7, 8); VLOAD_8(v8, 1); - VLOAD_8(v12, 1); - asm volatile("vredminu.vs v12, v4, v8, v0.t"); - VCMP_U8(5, v12, 1); + VLOAD_8(v24, 1); + asm volatile("vredminu.vs v24, v16, v8, v0.t"); + VCMP_U8(5, v24, 1); - VSET(16, e16, m2); + VSET(16, e16, m8); VLOAD_8(v0, 0x00, 0xc0); - VLOAD_16(v4, -1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_16(v16, -1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); VLOAD_16(v8, 3); - VLOAD_16(v12, 1); - asm volatile("vredminu.vs v12, v4, v8, v0.t"); - VCMP_U16(6, v12, 3); + VLOAD_16(v24, 1); + asm volatile("vredminu.vs v24, v16, v8, v0.t"); + VCMP_U16(6, v24, 3); - VSET(16, e32, m2); + VSET(16, e32, m8); VLOAD_8(v0, 0x00, 0xc0); - VLOAD_32(v4, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_32(v16, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); VLOAD_32(v8, 8); - VLOAD_32(v12, 1); - asm volatile("vredminu.vs v12, v4, v8, v0.t"); - VCMP_U32(7, v12, 7); + VLOAD_32(v24, 1); + asm volatile("vredminu.vs v24, v16, v8, v0.t"); + VCMP_U32(7, v24, 7); #if ELEN == 64 - VSET(16, e64, m2); + VSET(16, e64, m8); VLOAD_8(v0, 0xaa, 0x55); - VLOAD_64(v4, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_64(v16, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); VLOAD_64(v8, 4); - VLOAD_64(v12, 1); - asm volatile("vredminu.vs v12, v4, v8, v0.t"); - VCMP_U64(8, v12, 1); + VLOAD_64(v24, 1); + asm volatile("vredminu.vs v24, v16, v8, v0.t"); + VCMP_U64(8, v24, 1); #endif } diff --git a/sw/riscvTests/isa/rv64uv/vredor.c b/sw/riscvTests/isa/rv64uv/vredor.c index 886b28fa..decc6719 100644 --- a/sw/riscvTests/isa/rv64uv/vredor.c +++ b/sw/riscvTests/isa/rv64uv/vredor.c @@ -8,81 +8,81 @@ // Naive test void TEST_CASE1(void) { - VSET(12, e8, m2); - VLOAD_8(v4, 0x00, 0x01, 0x00, 0x00, 0x01, 0x00, 0x00, 0x01, 0x00, 0x00, 0x01, + VSET(12, e8, m8); + VLOAD_8(v16, 0x00, 0x01, 0x00, 0x00, 0x01, 0x00, 0x00, 0x01, 0x00, 0x00, 0x01, 0x00); VLOAD_8(v8, 0x10); - asm volatile("vredor.vs v12, v4, v8"); - VCMP_U8(1, v12, 0x11); + asm volatile("vredor.vs v24, v16, v8"); + VCMP_U8(1, v24, 0x11); - VSET(12, e16, m2); - VLOAD_16(v4, 0x0000, 0x0301, 0x0100, 0x0000, 0x0101, 0x0700, 0x0000, 0x0701, + VSET(12, e16, m8); + VLOAD_16(v16, 0x0000, 0x0301, 0x0100, 0x0000, 0x0101, 0x0700, 0x0000, 0x0701, 0x0000, 0x0000, 0x0101, 0x0100); VLOAD_16(v8, 0xe000); - asm volatile("vredor.vs v12, v4, v8"); - VCMP_U16(2, v12, 0xe701); + asm volatile("vredor.vs v24, v16, v8"); + VCMP_U16(2, v24, 0xe701); - VSET(12, e32, m2); - VLOAD_32(v4, 0x00000000, 0x10000001, 0x00000000, 0x00000000, 0x10000001, + VSET(12, e32, m8); + VLOAD_32(v16, 0x00000000, 0x10000001, 0x00000000, 0x00000000, 0x10000001, 0x00000000, 0x00000000, 0x10000001, 0x00000000, 0x00000000, 0x10000001, 0x00000000); VLOAD_32(v8, 0x00001000); - asm volatile("vredor.vs v12, v4, v8"); - VCMP_U32(3, v12, 0x10001001); + asm volatile("vredor.vs v24, v16, v8"); + VCMP_U32(3, v24, 0x10001001); #if ELEN == 64 - VSET(12, e64, m2); - VLOAD_64(v4, 0x0000000000000000, 0x1000000000000001, 0x0000000000000000, + VSET(12, e64, m8); + VLOAD_64(v16, 0x0000000000000000, 0x1000000000000001, 0x0000000000000000, 0x0000000000000000, 0x1000000000000001, 0x0000000000000000, 0x0000000000000000, 0x1000000000000001, 0x0000000000000000, 0x0000000000000000, 0x1000000000000001, 0x0000000000000000); VLOAD_64(v8, 0x0000000000000007); - asm volatile("vredor.vs v12, v4, v8"); - VCMP_U64(4, v12, 0x1000000000000007); + asm volatile("vredor.vs v24, v16, v8"); + VCMP_U64(4, v24, 0x1000000000000007); #endif } // Masked naive test void TEST_CASE2(void) { - VSET(12, e8, m2); + VSET(12, e8, m8); VLOAD_8(v0, 0x07, 0x00); - VLOAD_8(v4, 0x00, 0x01, 0x00, 0xff, 0x01, 0x00, 0x00, 0x01, 0x00, 0x00, 0x01, + VLOAD_8(v16, 0x00, 0x01, 0x00, 0xff, 0x01, 0x00, 0x00, 0x01, 0x00, 0x00, 0x01, 0x00); VLOAD_8(v8, 0x00); - VLOAD_8(v12, 1); - asm volatile("vredor.vs v12, v4, v8, v0.t"); - VCMP_U8(5, v12, 0x01); + VLOAD_8(v24, 1); + asm volatile("vredor.vs v24, v16, v8, v0.t"); + VCMP_U8(5, v24, 0x01); - VSET(12, e16, m2); + VSET(12, e16, m8); VLOAD_8(v0, 0x00, 0x08); - VLOAD_16(v4, 0x0f00, 0x0301, 0x0100, 0x0000, 0x0101, 0x0700, 0x0000, 0x9701, + VLOAD_16(v16, 0x0f00, 0x0301, 0x0100, 0x0000, 0x0101, 0x0700, 0x0000, 0x9701, 0x0000, 0x0000, 0x0101, 0x0100); VLOAD_16(v8, 0xe000); - VLOAD_16(v12, 1); - asm volatile("vredor.vs v12, v4, v8, v0.t"); - VCMP_U16(6, v12, 0xe100); + VLOAD_16(v24, 1); + asm volatile("vredor.vs v24, v16, v8, v0.t"); + VCMP_U16(6, v24, 0xe100); - VSET(12, e32, m2); + VSET(12, e32, m8); VLOAD_8(v0, 0x0e, 0x00); - VLOAD_32(v4, 0xf0000fff, 0x10000001, 0x00000000, 0x00000000, 0x10000001, + VLOAD_32(v16, 0xf0000fff, 0x10000001, 0x00000000, 0x00000000, 0x10000001, 0x00000000, 0x00000000, 0x10000001, 0x00000000, 0x00000000, 0x10000001, 0x00000000); VLOAD_32(v8, 0x00001000); - VLOAD_32(v12, 1); - asm volatile("vredor.vs v12, v4, v8, v0.t"); - VCMP_U32(7, v12, 0x10001001); + VLOAD_32(v24, 1); + asm volatile("vredor.vs v24, v16, v8, v0.t"); + VCMP_U32(7, v24, 0x10001001); #if ELEN == 64 - VSET(12, e64, m2); + VSET(12, e64, m8); VLOAD_8(v0, 0xaa, 0x55); - VLOAD_64(v4, 0x0000000000000000, 0x1000000000000001, 0x0000f00000000000, + VLOAD_64(v16, 0x0000000000000000, 0x1000000000000001, 0x0000f00000000000, 0x0000000000000000, 0x1000000000000001, 0x0000000000000000, 0x0000000000000000, 0x1000000000000001, 0x0000000000000000, 0x0000000000000000, 0x1000000000000001, 0x0000000000000000); VLOAD_64(v8, 0x0000000000000007); - VLOAD_64(v12, 1); - asm volatile("vredor.vs v12, v4, v8, v0.t"); - VCMP_U64(8, v12, 0x1000000000000007); + VLOAD_64(v24, 1); + asm volatile("vredor.vs v24, v16, v8, v0.t"); + VCMP_U64(8, v24, 0x1000000000000007); #endif } diff --git a/sw/riscvTests/isa/rv64uv/vredsum.c b/sw/riscvTests/isa/rv64uv/vredsum.c index 8b4c9860..4d515878 100644 --- a/sw/riscvTests/isa/rv64uv/vredsum.c +++ b/sw/riscvTests/isa/rv64uv/vredsum.c @@ -9,29 +9,29 @@ // Naive test void TEST_CASE1(void) { VSET(16, e8, m4); - VLOAD_8(v4, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_8(v16, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); VLOAD_8(v8, 1); - asm volatile("vredsum.vs v12, v4, v8"); - VCMP_U8(1, v12, 73); + asm volatile("vredsum.vs v24, v16, v8"); + VCMP_U8(1, v24, 73); VSET(16, e16, m4); - VLOAD_16(v4, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_16(v16, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); VLOAD_16(v8, 1); - asm volatile("vredsum.vs v12, v4, v8"); - VCMP_U16(2, v12, 73); + asm volatile("vredsum.vs v24, v16, v8"); + VCMP_U16(2, v24, 73); VSET(16, e32, m4); - VLOAD_32(v4, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_32(v16, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); VLOAD_32(v8, 1); - asm volatile("vredsum.vs v12, v4, v8"); - VCMP_U32(3, v12, 73); + asm volatile("vredsum.vs v24, v16, v8"); + VCMP_U32(3, v24, 73); #if ELEN == 64 VSET(16, e64, m4); - VLOAD_64(v4, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_64(v16, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); VLOAD_64(v8, 1); - asm volatile("vredsum.vs v12, v4, v8"); - VCMP_U64(4, v12, 73); + asm volatile("vredsum.vs v24, v16, v8"); + VCMP_U64(4, v24, 73); #endif } @@ -39,109 +39,109 @@ void TEST_CASE1(void) { void TEST_CASE2(void) { VSET(16, e8, m4); VLOAD_8(v0, 0xaa, 0x55); - VLOAD_8(v4, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_8(v16, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); VLOAD_8(v8, 1); - VLOAD_8(v12, 1); - asm volatile("vredsum.vs v12, v4, v8, v0.t"); - VCMP_U8(5, v12, 37); + VLOAD_8(v24, 1); + asm volatile("vredsum.vs v24, v16, v8, v0.t"); + VCMP_U8(5, v24, 37); VSET(16, e16, m4); VLOAD_8(v0, 0xaa, 0x55); - VLOAD_16(v4, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_16(v16, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); VLOAD_16(v8, 1); - VLOAD_16(v12, 1); - asm volatile("vredsum.vs v12, v4, v8, v0.t"); - VCMP_U16(6, v12, 37); + VLOAD_16(v24, 1); + asm volatile("vredsum.vs v24, v16, v8, v0.t"); + VCMP_U16(6, v24, 37); VSET(16, e32, m4); VLOAD_8(v0, 0xaa, 0x55); - VLOAD_32(v4, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_32(v16, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); VLOAD_32(v8, 1); - VLOAD_32(v12, 1); - asm volatile("vredsum.vs v12, v4, v8, v0.t"); - VCMP_U32(7, v12, 37); + VLOAD_32(v24, 1); + asm volatile("vredsum.vs v24, v16, v8, v0.t"); + VCMP_U32(7, v24, 37); #if ELEN == 64 VSET(16, e64, m4); VLOAD_8(v0, 0xaa, 0x55); - VLOAD_64(v4, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_64(v16, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); VLOAD_64(v8, 1); - VLOAD_64(v12, 1); - asm volatile("vredsum.vs v12, v4, v8, v0.t"); - VCMP_U64(8, v12, 37); + VLOAD_64(v24, 1); + asm volatile("vredsum.vs v24, v16, v8, v0.t"); + VCMP_U64(8, v24, 37); #endif } // Are we respecting the undisturbed tail policy? void TEST_CASE3(void) { VSET(16, e8, m4); - VLOAD_8(v4, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_8(v16, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); VLOAD_8(v8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); - VLOAD_8(v12, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); - asm volatile("vredsum.vs v12, v4, v8"); - VCMP_U8(9, v12, 73, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_8(v24, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + asm volatile("vredsum.vs v24, v16, v8"); + VCMP_U8(9, v24, 73, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); VSET(16, e16, m4); - VLOAD_16(v4, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_16(v16, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); VLOAD_16(v8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); - VLOAD_16(v12, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); - asm volatile("vredsum.vs v12, v4, v8"); - VCMP_U16(10, v12, 73, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_16(v24, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + asm volatile("vredsum.vs v24, v16, v8"); + VCMP_U16(10, v24, 73, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); VSET(16, e32, m4); - VLOAD_32(v4, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_32(v16, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); VLOAD_32(v8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); - VLOAD_32(v12, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); - asm volatile("vredsum.vs v12, v4, v8"); - VCMP_U32(11, v12, 73, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_32(v24, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + asm volatile("vredsum.vs v24, v16, v8"); + VCMP_U32(11, v24, 73, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); #if ELEN == 64 VSET(16, e64, m4); - VLOAD_64(v4, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_64(v16, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); VLOAD_64(v8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); - VLOAD_64(v12, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); - asm volatile("vredsum.vs v12, v4, v8"); - VCMP_U64(12, v12, 73, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_64(v24, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + asm volatile("vredsum.vs v24, v16, v8"); + VCMP_U64(12, v24, 73, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); #endif } // Odd number of elements, undisturbed policy void TEST_CASE4(void) { VSET(15, e8, m4); - VLOAD_8(v4, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_8(v16, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); VLOAD_8(v8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); - VLOAD_8(v12, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); - asm volatile("vredsum.vs v12, v4, v8"); - VCMP_U8(13, v12, 65, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_8(v24, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + asm volatile("vredsum.vs v24, v16, v8"); + VCMP_U8(13, v24, 65, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); VSET(1, e16, m4); - VLOAD_16(v4, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_16(v16, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); VLOAD_16(v8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); - VLOAD_16(v12, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); - asm volatile("vredsum.vs v12, v4, v8"); - VCMP_U16(14, v12, 2, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_16(v24, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + asm volatile("vredsum.vs v24, v16, v8"); + VCMP_U16(14, v24, 2, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); VSET(3, e32, m4); - VLOAD_32(v4, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_32(v16, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); VLOAD_32(v8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); - VLOAD_32(v12, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); - asm volatile("vredsum.vs v12, v4, v8"); - VCMP_U32(15, v12, 7, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_32(v24, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + asm volatile("vredsum.vs v24, v16, v8"); + VCMP_U32(15, v24, 7, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); #if ELEN == 64 VSET(7, e64, m4); - VLOAD_64(v4, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_64(v16, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); VLOAD_64(v8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); - VLOAD_64(v12, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); - asm volatile("vredsum.vs v12, v4, v8"); - VCMP_U64(16, v12, 29, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_64(v24, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + asm volatile("vredsum.vs v24, v16, v8"); + VCMP_U64(16, v24, 29, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); VSET(15, e64, m4); - VLOAD_64(v4, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_64(v16, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); VLOAD_64(v8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); - VLOAD_64(v12, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); - asm volatile("vredsum.vs v12, v4, v8"); - VCMP_U64(17, v12, 65, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_64(v24, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + asm volatile("vredsum.vs v24, v16, v8"); + VCMP_U64(17, v24, 65, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); #endif } @@ -149,28 +149,28 @@ void TEST_CASE4(void) { void TEST_CASE5(void) { VSET(15, e8, m4); VLOAD_8(v0, 0x00, 0x40); - VLOAD_8(v4, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_8(v16, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); VLOAD_8(v8, 100, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); - VLOAD_8(v12, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); - asm volatile("vredsum.vs v12, v4, v8, v0.t"); - VCMP_U8(18, v12, 107, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_8(v24, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + asm volatile("vredsum.vs v24, v16, v8, v0.t"); + VCMP_U8(18, v24, 107, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); VSET(1, e16, m4); VLOAD_8(v0, 0xaa, 0x55); - VLOAD_16(v4, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_16(v16, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); VLOAD_16(v8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); - VLOAD_16(v12, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); - asm volatile("vredsum.vs v12, v4, v8, v0.t"); - VCMP_U16(19, v12, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_16(v24, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + asm volatile("vredsum.vs v24, v16, v8, v0.t"); + VCMP_U16(19, v24, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); #if ELEN == 64 VSET(3, e32, m4); VLOAD_8(v0, 0xaa, 0x55); - VLOAD_32(v4, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_32(v16, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); VLOAD_32(v8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); - VLOAD_32(v12, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); - asm volatile("vredsum.vs v12, v4, v8, v0.t"); - VCMP_U32(20, v12, 3, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_32(v24, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + asm volatile("vredsum.vs v24, v16, v8, v0.t"); + VCMP_U32(20, v24, 3, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); #endif } diff --git a/sw/riscvTests/isa/rv64uv/vredxor.c b/sw/riscvTests/isa/rv64uv/vredxor.c index 89b80e9b..514dd13d 100644 --- a/sw/riscvTests/isa/rv64uv/vredxor.c +++ b/sw/riscvTests/isa/rv64uv/vredxor.c @@ -8,31 +8,31 @@ // Naive test void TEST_CASE1(void) { - VSET(4, e8, m2); - VLOAD_8(v4, 0x00, 0x01, 0x01, 0x00); + VSET(4, e8, m8); + VLOAD_8(v16, 0x00, 0x01, 0x01, 0x00); VLOAD_8(v8, 0x11); - asm volatile("vredxor.vs v12, v4, v8"); - VCMP_U8(1, v12, 0x11); + asm volatile("vredxor.vs v24, v16, v8"); + VCMP_U8(1, v24, 0x11); - VSET(4, e16, m2); - VLOAD_16(v4, 0x8000, 0x0301, 0x0101, 0x0001); + VSET(4, e16, m8); + VLOAD_16(v16, 0x8000, 0x0301, 0x0101, 0x0001); VLOAD_16(v8, 0xe001); - asm volatile("vredxor.vs v12, v4, v8"); - VCMP_U16(2, v12, 0x6200); + asm volatile("vredxor.vs v24, v16, v8"); + VCMP_U16(2, v24, 0x6200); - VSET(4, e32, m2); - VLOAD_32(v4, 0x00000001, 0x10000001, 0x00000000, 0x00000000); + VSET(4, e32, m8); + VLOAD_32(v16, 0x00000001, 0x10000001, 0x00000000, 0x00000000); VLOAD_32(v8, 0x00001000); - asm volatile("vredxor.vs v12, v4, v8"); - VCMP_U32(3, v12, 0x10001000); + asm volatile("vredxor.vs v24, v16, v8"); + VCMP_U32(3, v24, 0x10001000); #if ELEN == 64 - VSET(4, e64, m2); - VLOAD_64(v4, 0x0000000000000000, 0x1000000000000001, 0x0000000000000000, + VSET(4, e64, m8); + VLOAD_64(v16, 0x0000000000000000, 0x1000000000000001, 0x0000000000000000, 0x0000000000000000); VLOAD_64(v8, 0x0000000000000007); - asm volatile("vredxor.vs v12, v4, v8"); - VCMP_U64(4, v12, 0x1000000000000006); + asm volatile("vredxor.vs v24, v16, v8"); + VCMP_U64(4, v24, 0x1000000000000006); #endif } diff --git a/sw/riscvTests/isa/rv64uv/vrem.c b/sw/riscvTests/isa/rv64uv/vrem.c index ac36af5f..dbba3fcc 100644 --- a/sw/riscvTests/isa/rv64uv/vrem.c +++ b/sw/riscvTests/isa/rv64uv/vrem.c @@ -8,56 +8,56 @@ #include "vector_macros.h" void TEST_CASE1(void) { - VSET(16, e8, m2); - VLOAD_8(v4, 0x9b, 0x28, 0xec, 0x86, 0x26, 0x85, 0xf7, 0x33, 0x46, 0x37, 0x2c, + VSET(16, e8, m8); + VLOAD_8(v16, 0x9b, 0x28, 0xec, 0x86, 0x26, 0x85, 0xf7, 0x33, 0x46, 0x37, 0x2c, 0x0c, 0x8e, 0xae, 0xa1, 0x93); - VLOAD_8(v6, 0x84, 0x5e, 0x3b, 0xdf, 0x10, 0xfc, 0x05, 0xcf, 0x42, 0xbe, 0x23, + VLOAD_8(v24, 0x84, 0x5e, 0x3b, 0xdf, 0x10, 0xfc, 0x05, 0xcf, 0x42, 0xbe, 0x23, 0xdb, 0x37, 0x78, 0xe2, 0x85); - asm volatile("vrem.vv v2, v4, v6"); - VCMP_I8(1, v2, 0x9b, 0x28, 0xec, 0xe9, 0x06, 0xfd, 0xfc, 0x02, 0x04, 0x37, + asm volatile("vrem.vv v8, v16, v24"); + VCMP_I8(1, v8, 0x9b, 0x28, 0xec, 0xe9, 0x06, 0xfd, 0xfc, 0x02, 0x04, 0x37, 0x09, 0x0c, 0xfc, 0xae, 0xfb, 0x93); - VSET(16, e16, m2); - VLOAD_16(v4, 0xb58f, 0xa184, 0xdcf9, 0xd084, 0xbbc6, 0xcf0e, 0xbbd4, 0xa20c, + VSET(16, e16, m8); + VLOAD_16(v16, 0xb58f, 0xa184, 0xdcf9, 0xd084, 0xbbc6, 0xcf0e, 0xbbd4, 0xa20c, 0xe04c, 0xd954, 0xda74, 0xa394, 0x207a, 0x8975, 0xddd3, 0x897d); - VLOAD_16(v6, 0x4534, 0xafd7, 0xf703, 0x92c2, 0x97e3, 0xd85a, 0x1540, 0x8c5c, + VLOAD_16(v24, 0x4534, 0xafd7, 0xf703, 0x92c2, 0x97e3, 0xd85a, 0x1540, 0x8c5c, 0x4a71, 0x43a7, 0xe65d, 0x2bdc, 0x497b, 0x6aa0, 0x6071, 0xf431); - asm volatile("vrem.vv v2, v4, v6"); - VCMP_I16(2, v2, 0xfac3, 0xf1ad, 0xf7f0, 0xd084, 0xbbc6, 0xf6b4, 0xfb94, + asm volatile("vrem.vv v8, v16, v24"); + VCMP_I16(2, v8, 0xfac3, 0xf1ad, 0xf7f0, 0xd084, 0xbbc6, 0xf6b4, 0xfb94, 0xa20c, 0xe04c, 0xd954, 0xf417, 0xfb4c, 0x207a, 0xf415, 0xddd3, 0xff93); - VSET(16, e32, m2); - VLOAD_32(v4, 0x620db972, 0x60b1f870, 0x7d1badcf, 0x90a85eb6, 0xca41954b, + VSET(16, e32, m8); + VLOAD_32(v16, 0x620db972, 0x60b1f870, 0x7d1badcf, 0x90a85eb6, 0xca41954b, 0x10dc3772, 0xf7749e82, 0x027ed4d3, 0xdcb6a562, 0xa979baf0, 0xb480c184, 0x979555c6, 0x3f894108, 0x803bd362, 0x9038beec, 0x22d7ca24); - VLOAD_32(v6, 0xb9b52c0c, 0x30b52d8c, 0x832f89ea, 0x95181d9c, 0x85a6a24f, + VLOAD_32(v24, 0xb9b52c0c, 0x30b52d8c, 0x832f89ea, 0x95181d9c, 0x85a6a24f, 0x2f2c64a7, 0xebe4120c, 0x83852646, 0xfb1857b5, 0x25400571, 0xab2d7393, 0xddb87ac8, 0x01149cdf, 0x62b2c8dc, 0xaed39563, 0x41ec046e); - asm volatile("vrem.vv v2, v4, v6"); - VCMP_I32(3, v2, 0x1bc2e57e, 0x2ffccae4, 0x004b37b9, 0xfb90411a, 0xca41954b, + asm volatile("vrem.vv v8, v16, v24"); + VCMP_I32(3, v8, 0x1bc2e57e, 0x2ffccae4, 0x004b37b9, 0xfb90411a, 0xca41954b, 0x10dc3772, 0xf7749e82, 0x027ed4d3, 0xff0c3f6f, 0xf3f9c5d2, 0xb480c184, 0xfe6be56e, 0x00ddb682, 0xe2ee9c3e, 0xe1652989, 0x22d7ca24); #if ELEN == 64 - VSET(16, e64, m2); - VLOAD_64(v4, 0x9fc0a4e82116b913, 0xbd1a679edd2667e1, 0x559913931b91caf2, + VSET(16, e64, m8); + VLOAD_64(v16, 0x9fc0a4e82116b913, 0xbd1a679edd2667e1, 0x559913931b91caf2, 0xecfe6fb53a8b043d, 0xd8a48a111d080e66, 0x7baccec6b5a29e3e, 0x8746dc00d1d7ff0b, 0x467babd497d4931b, 0x6f7f3e669faa900c, 0x36e81d34c3ee3445, 0x99bcc4a480c648c5, 0xc8ae527a2cc4d908, 0xce3b4c1da847fe6a, 0x3709710bc016c1fc, 0x81471426bbe09e45, 0x0f0389282729456f); - VLOAD_64(v6, 0xf2473f877dd9c3df, 0xd2471da7c8ff8466, 0x7e93451b38765d03, + VLOAD_64(v24, 0xf2473f877dd9c3df, 0xd2471da7c8ff8466, 0x7e93451b38765d03, 0xf7e905f27777369f, 0x73cbef014fd0f311, 0x4c3e4fc36800b443, 0x4c283e06a5067444, 0xdc8295e57f30e905, 0x08207a363067024e, 0x42aba773f21efc47, 0x5f00e9093d50b50f, 0x5ff0dcd41bf799fa, 0xe8c1d1110518742a, 0x34fe1a3555bf07f0, 0xd1bce4800f79700f, 0xff00f7d87b2c7068); - asm volatile("vrem.vv v2, v4, v6"); - VCMP_I64(4, v2, 0xffcde833b0225dfa, 0xead349f71426e37b, 0x559913931b91caf2, + asm volatile("vrem.vv v8, v16, v24"); + VCMP_I64(4, v8, 0xffcde833b0225dfa, 0xead349f71426e37b, 0x559913931b91caf2, 0xfd2c63d04b9c96ff, 0xd8a48a111d080e66, 0x2f6e7f034da1e9fb, 0xd36f1a0776de734f, 0x22fe41ba17057c20, 0x05d909a62a6f7216, 0x36e81d34c3ee3445, 0xf8bdadadbe16fdd4, 0xc8ae527a2cc4d908, @@ -67,99 +67,99 @@ void TEST_CASE1(void) { }; void TEST_CASE2(void) { - VSET(16, e8, m2); - VLOAD_8(v4, 0x9b, 0x28, 0xec, 0x86, 0x26, 0x85, 0xf7, 0x33, 0x46, 0x37, 0x2c, + VSET(16, e8, m8); + VLOAD_8(v16, 0x9b, 0x28, 0xec, 0x86, 0x26, 0x85, 0xf7, 0x33, 0x46, 0x37, 0x2c, 0x0c, 0x8e, 0xae, 0xa1, 0x93); - VLOAD_8(v6, 0x84, 0x5e, 0x3b, 0xdf, 0x10, 0xfc, 0x05, 0xcf, 0x42, 0xbe, 0x23, + VLOAD_8(v24, 0x84, 0x5e, 0x3b, 0xdf, 0x10, 0xfc, 0x05, 0xcf, 0x42, 0xbe, 0x23, 0xdb, 0x37, 0x78, 0xe2, 0x85); VLOAD_8(v0, 0xAA, 0xAA); - VCLEAR(v2); - asm volatile("vrem.vv v2, v4, v6, v0.t"); - VCMP_I8(5, v2, 0, 0x28, 0, 0xe9, 0, 0xfd, 0, 0x02, 0, 0x37, 0, 0x0c, 0, 0xae, + VCLEAR(v8); + asm volatile("vrem.vv v8, v16, v24, v0.t"); + VCMP_I8(5, v8, 0, 0x28, 0, 0xe9, 0, 0xfd, 0, 0x02, 0, 0x37, 0, 0x0c, 0, 0xae, 0, 0x93); - VSET(16, e16, m2); - VLOAD_16(v4, 0xb58f, 0xa184, 0xdcf9, 0xd084, 0xbbc6, 0xcf0e, 0xbbd4, 0xa20c, + VSET(16, e16, m8); + VLOAD_16(v16, 0xb58f, 0xa184, 0xdcf9, 0xd084, 0xbbc6, 0xcf0e, 0xbbd4, 0xa20c, 0xe04c, 0xd954, 0xda74, 0xa394, 0x207a, 0x8975, 0xddd3, 0x897d); - VLOAD_16(v6, 0x4534, 0xafd7, 0xf703, 0x92c2, 0x97e3, 0xd85a, 0x1540, 0x8c5c, + VLOAD_16(v24, 0x4534, 0xafd7, 0xf703, 0x92c2, 0x97e3, 0xd85a, 0x1540, 0x8c5c, 0x4a71, 0x43a7, 0xe65d, 0x2bdc, 0x497b, 0x6aa0, 0x6071, 0xf431); VLOAD_8(v0, 0xAA, 0xAA); - VCLEAR(v2); - asm volatile("vrem.vv v2, v4, v6, v0.t"); - VCMP_I16(6, v2, 0, 0xf1ad, 0, 0xd084, 0, 0xf6b4, 0, 0xa20c, 0, 0xd954, 0, + VCLEAR(v8); + asm volatile("vrem.vv v8, v16, v24, v0.t"); + VCMP_I16(6, v8, 0, 0xf1ad, 0, 0xd084, 0, 0xf6b4, 0, 0xa20c, 0, 0xd954, 0, 0xfb4c, 0, 0xf415, 0, 0xff93); - VSET(16, e32, m2); - VLOAD_32(v4, 0x620db972, 0x60b1f870, 0x7d1badcf, 0x90a85eb6, 0xca41954b, + VSET(16, e32, m8); + VLOAD_32(v16, 0x620db972, 0x60b1f870, 0x7d1badcf, 0x90a85eb6, 0xca41954b, 0x10dc3772, 0xf7749e82, 0x027ed4d3, 0xdcb6a562, 0xa979baf0, 0xb480c184, 0x979555c6, 0x3f894108, 0x803bd362, 0x9038beec, 0x22d7ca24); - VLOAD_32(v6, 0xb9b52c0c, 0x30b52d8c, 0x832f89ea, 0x95181d9c, 0x85a6a24f, + VLOAD_32(v24, 0xb9b52c0c, 0x30b52d8c, 0x832f89ea, 0x95181d9c, 0x85a6a24f, 0x2f2c64a7, 0xebe4120c, 0x83852646, 0xfb1857b5, 0x25400571, 0xab2d7393, 0xddb87ac8, 0x01149cdf, 0x62b2c8dc, 0xaed39563, 0x41ec046e); VLOAD_8(v0, 0xAA, 0xAA); - VCLEAR(v2); - asm volatile("vrem.vv v2, v4, v6, v0.t"); - VCMP_I32(7, v2, 0, 0x2ffccae4, 0, 0xfb90411a, 0, 0x10dc3772, 0, 0x027ed4d3, 0, + VCLEAR(v8); + asm volatile("vrem.vv v8, v16, v24, v0.t"); + VCMP_I32(7, v8, 0, 0x2ffccae4, 0, 0xfb90411a, 0, 0x10dc3772, 0, 0x027ed4d3, 0, 0xf3f9c5d2, 0, 0xfe6be56e, 0, 0xe2ee9c3e, 0, 0x22d7ca24); #if ELEN == 64 - VSET(16, e64, m2); - VLOAD_64(v4, 0x9fc0a4e82116b913, 0xbd1a679edd2667e1, 0x559913931b91caf2, + VSET(16, e64, m8); + VLOAD_64(v16, 0x9fc0a4e82116b913, 0xbd1a679edd2667e1, 0x559913931b91caf2, 0xecfe6fb53a8b043d, 0xd8a48a111d080e66, 0x7baccec6b5a29e3e, 0x8746dc00d1d7ff0b, 0x467babd497d4931b, 0x6f7f3e669faa900c, 0x36e81d34c3ee3445, 0x99bcc4a480c648c5, 0xc8ae527a2cc4d908, 0xce3b4c1da847fe6a, 0x3709710bc016c1fc, 0x81471426bbe09e45, 0x0f0389282729456f); - VLOAD_64(v6, 0xf2473f877dd9c3df, 0xd2471da7c8ff8466, 0x7e93451b38765d03, + VLOAD_64(v24, 0xf2473f877dd9c3df, 0xd2471da7c8ff8466, 0x7e93451b38765d03, 0xf7e905f27777369f, 0x73cbef014fd0f311, 0x4c3e4fc36800b443, 0x4c283e06a5067444, 0xdc8295e57f30e905, 0x08207a363067024e, 0x42aba773f21efc47, 0x5f00e9093d50b50f, 0x5ff0dcd41bf799fa, 0xe8c1d1110518742a, 0x34fe1a3555bf07f0, 0xd1bce4800f79700f, 0xff00f7d87b2c7068); VLOAD_8(v0, 0xAA, 0xAA); - VCLEAR(v2); - asm volatile("vrem.vv v2, v4, v6, v0.t"); - VCMP_I64(8, v2, 0, 0xead349f71426e37b, 0, 0xfd2c63d04b9c96ff, 0, + VCLEAR(v8); + asm volatile("vrem.vv v8, v16, v24, v0.t"); + VCMP_I64(8, v8, 0, 0xead349f71426e37b, 0, 0xfd2c63d04b9c96ff, 0, 0x2f6e7f034da1e9fb, 0, 0x22fe41ba17057c20, 0, 0x36e81d34c3ee3445, 0, 0xc8ae527a2cc4d908, 0, 0x020b56d66a57ba0c, 0, 0x00120ed75ec3db87); #endif }; void TEST_CASE3(void) { - VSET(16, e8, m2); - VLOAD_8(v4, 0x5b, 0x3b, 0xc4, 0x95, 0x41, 0x71, 0x9b, 0x67, 0x84, 0x2e, 0x0a, + VSET(16, e8, m8); + VLOAD_8(v16, 0x5b, 0x3b, 0xc4, 0x95, 0x41, 0x71, 0x9b, 0x67, 0x84, 0x2e, 0x0a, 0x2a, 0xb2, 0x57, 0xe5, 0x6c); int64_t scalar = 5; - asm volatile("vrem.vx v2, v4, %[A]" ::[A] "r"(scalar)); - VCMP_I8(9, v2, 0x01, 0x04, 0x00, 0xfe, 0x00, 0x03, 0xff, 0x03, 0xfc, 0x01, + asm volatile("vrem.vx v8, v16, %[A]" ::[A] "r"(scalar)); + VCMP_I8(9, v8, 0x01, 0x04, 0x00, 0xfe, 0x00, 0x03, 0xff, 0x03, 0xfc, 0x01, 0x00, 0x02, 0xfd, 0x02, 0xfe, 0x03); - VSET(16, e16, m2); - VLOAD_16(v4, 0xc670, 0x8f3b, 0x200f, 0x52ea, 0xfdce, 0xcf06, 0x57f1, 0x1936, + VSET(16, e16, m8); + VLOAD_16(v16, 0xc670, 0x8f3b, 0x200f, 0x52ea, 0xfdce, 0xcf06, 0x57f1, 0x1936, 0xb6ec, 0x69e8, 0x0abf, 0x441e, 0xa420, 0x396c, 0xe7c9, 0xa464); scalar = -538; - asm volatile("vrem.vx v2, v4, %[A]" ::[A] "r"(scalar)); - VCMP_I16(10, v2, 0xff2e, 0xfe9d, 0x0089, 0x00f4, 0xffe8, 0xff5c, 0x01c7, + asm volatile("vrem.vx v8, v16, %[A]" ::[A] "r"(scalar)); + VCMP_I16(10, v8, 0xff2e, 0xfe9d, 0x0089, 0x00f4, 0xffe8, 0xff5c, 0x01c7, 0x0218, 0xfe60, 0x00d4, 0x003d, 0x00de, 0xfe7e, 0x00ae, 0xfee7, 0xfec2); - VSET(16, e32, m2); - VLOAD_32(v4, 0xf937dbf9, 0x6d855b59, 0x3bd09126, 0xaed11886, 0x6eb6f4bd, + VSET(16, e32, m8); + VLOAD_32(v16, 0xf937dbf9, 0x6d855b59, 0x3bd09126, 0xaed11886, 0x6eb6f4bd, 0x5c639253, 0xca0f2abf, 0x57fec97b, 0x39496099, 0x8bfcdd58, 0x0f19f6e2, 0x2070c8d4, 0x8c689324, 0x2eecd9d7, 0xe2907e94, 0xb6cc2d44); scalar = 649; - asm volatile("vrem.vx v2, v4, %[A]" ::[A] "r"(scalar)); - VCMP_I32(11, v2, 0xfffffee4, 0x00000116, 0x00000160, 0xffffffef, 0x00000217, + asm volatile("vrem.vx v8, v16, %[A]" ::[A] "r"(scalar)); + VCMP_I32(11, v8, 0xfffffee4, 0x00000116, 0x00000160, 0xffffffef, 0x00000217, 0x00000275, 0xfffffea6, 0x000000a9, 0x000000e4, 0xfffffe09, 0x00000272, 0x0000023c, 0xffffff79, 0x000000ce, 0xffffffb3, 0xfffffe0e); /* #if ELEN == 64 */ - /* VSET(16, e64, m2); */ - /* VLOAD_64(v4, 0x94236504e03e6525, 0x8d219d7afe5b2fb0, 0xc65a0b252860ab73, + /* VSET(16, e64, m8); */ + /* VLOAD_64(v16, 0x94236504e03e6525, 0x8d219d7afe5b2fb0, 0xc65a0b252860ab73, */ /* 0x2ca68636bacbc0bb, 0x275575f3e3fea940, 0x8f546251aaad354a, */ /* 0xb1462969035e0fa7, 0x5c9cdc19273ce111, 0x25a8487741ee75db, */ @@ -167,8 +167,8 @@ void TEST_CASE3(void) { /* 0xb308a4fe0dcbb2f3, 0xf2fa735abc2db4d0, 0xc73c476461ac3f28, */ /* 0xb2830c2607bfffcc); */ /* scalar = -59223; */ - /* asm volatile("vrem.vx v2, v4, %[A]" ::[A] "r"(scalar)); */ - /* VCMP_I64(12, v2, 0xffffffffffff299e, 0xffffffffffff1f8a, + /* asm volatile("vrem.vx v8, v16, %[A]" ::[A] "r"(scalar)); */ + /* VCMP_I64(12, v8, 0xffffffffffff299e, 0xffffffffffff1f8a, * 0xffffffffffff57aa, */ /* 0x000000000000cc8c, 0x000000000000416e, 0xffffffffffffcecd, */ /* 0xffffffffffff7e24, 0x000000000000397b, 0x000000000000bb50, */ @@ -179,41 +179,41 @@ void TEST_CASE3(void) { }; void TEST_CASE4(void) { - VSET(16, e8, m2); - VLOAD_8(v4, 0x5b, 0x3b, 0xc4, 0x95, 0x41, 0x71, 0x9b, 0x67, 0x84, 0x2e, 0x0a, + VSET(16, e8, m8); + VLOAD_8(v16, 0x5b, 0x3b, 0xc4, 0x95, 0x41, 0x71, 0x9b, 0x67, 0x84, 0x2e, 0x0a, 0x2a, 0xb2, 0x57, 0xe5, 0x6c); int64_t scalar = 5; VLOAD_8(v0, 0xAA, 0xAA); - VCLEAR(v2); - asm volatile("vrem.vx v2, v4, %[A], v0.t" ::[A] "r"(scalar)); - VCMP_I8(13, v2, 0, 0x04, 0, 0xfe, 0, 0x03, 0, 0x03, 0, 0x01, 0, 0x02, 0, 0x02, + VCLEAR(v8); + asm volatile("vrem.vx v8, v16, %[A], v0.t" ::[A] "r"(scalar)); + VCMP_I8(13, v8, 0, 0x04, 0, 0xfe, 0, 0x03, 0, 0x03, 0, 0x01, 0, 0x02, 0, 0x02, 0, 0x03); - VSET(16, e16, m2); - VLOAD_16(v4, 0xc670, 0x8f3b, 0x200f, 0x52ea, 0xfdce, 0xcf06, 0x57f1, 0x1936, + VSET(16, e16, m8); + VLOAD_16(v16, 0xc670, 0x8f3b, 0x200f, 0x52ea, 0xfdce, 0xcf06, 0x57f1, 0x1936, 0xb6ec, 0x69e8, 0x0abf, 0x441e, 0xa420, 0x396c, 0xe7c9, 0xa464); scalar = -538; VLOAD_8(v0, 0xAA, 0xAA); - VCLEAR(v2); - asm volatile("vrem.vx v2, v4, %[A], v0.t" ::[A] "r"(scalar)); - VCMP_I16(14, v2, 0, 0xfe9d, 0, 0x00f4, 0, 0xff5c, 0, 0x0218, 0, 0x00d4, 0, + VCLEAR(v8); + asm volatile("vrem.vx v8, v16, %[A], v0.t" ::[A] "r"(scalar)); + VCMP_I16(14, v8, 0, 0xfe9d, 0, 0x00f4, 0, 0xff5c, 0, 0x0218, 0, 0x00d4, 0, 0x00de, 0, 0x00ae, 0, 0xfec2); - VSET(16, e32, m2); - VLOAD_32(v4, 0xf937dbf9, 0x6d855b59, 0x3bd09126, 0xaed11886, 0x6eb6f4bd, + VSET(16, e32, m8); + VLOAD_32(v16, 0xf937dbf9, 0x6d855b59, 0x3bd09126, 0xaed11886, 0x6eb6f4bd, 0x5c639253, 0xca0f2abf, 0x57fec97b, 0x39496099, 0x8bfcdd58, 0x0f19f6e2, 0x2070c8d4, 0x8c689324, 0x2eecd9d7, 0xe2907e94, 0xb6cc2d44); scalar = 649; VLOAD_8(v0, 0xAA, 0xAA); - VCLEAR(v2); - asm volatile("vrem.vx v2, v4, %[A], v0.t" ::[A] "r"(scalar)); - VCMP_I32(15, v2, 0, 0x00000116, 0, 0xffffffef, 0, 0x00000275, 0, 0x000000a9, + VCLEAR(v8); + asm volatile("vrem.vx v8, v16, %[A], v0.t" ::[A] "r"(scalar)); + VCMP_I32(15, v8, 0, 0x00000116, 0, 0xffffffef, 0, 0x00000275, 0, 0x000000a9, 0, 0xfffffe09, 0, 0x0000023c, 0, 0x000000ce, 0, 0xfffffe0e); /* #if ELEN == 64 */ - /* VSET(16, e64, m2); */ - /* VLOAD_64(v4, 0x94236504e03e6525, 0x8d219d7afe5b2fb0, 0xc65a0b252860ab73, + /* VSET(16, e64, m8); */ + /* VLOAD_64(v16, 0x94236504e03e6525, 0x8d219d7afe5b2fb0, 0xc65a0b252860ab73, */ /* 0x2ca68636bacbc0bb, 0x275575f3e3fea940, 0x8f546251aaad354a, */ /* 0xb1462969035e0fa7, 0x5c9cdc19273ce111, 0x25a8487741ee75db, */ @@ -222,9 +222,9 @@ void TEST_CASE4(void) { /* 0xb2830c2607bfffcc); */ /* scalar = -59223; */ /* VLOAD_8(v0, 0xAA, 0xAA); */ - /* VCLEAR(v2); */ - /* asm volatile("vrem.vx v2, v4, %[A], v0.t" ::[A] "r"(scalar)); */ - /* VCMP_I64(16, v2, 0, 0xffffffffffff1f8a, 0, 0x000000000000cc8c, 0, */ + /* VCLEAR(v8); */ + /* asm volatile("vrem.vx v8, v16, %[A], v0.t" ::[A] "r"(scalar)); */ + /* VCMP_I64(16, v8, 0, 0xffffffffffff1f8a, 0, 0x000000000000cc8c, 0, */ /* 0xffffffffffffcecd, 0, 0x000000000000397b, 0, * 0x0000000000006b00, 0, */ /* 0xffffffffffff9a21, 0, 0xffffffffffffca84, 0, diff --git a/sw/riscvTests/isa/rv64uv/vremu.c b/sw/riscvTests/isa/rv64uv/vremu.c index 42080aa6..fc71fd64 100644 --- a/sw/riscvTests/isa/rv64uv/vremu.c +++ b/sw/riscvTests/isa/rv64uv/vremu.c @@ -8,56 +8,56 @@ #include "vector_macros.h" void TEST_CASE1(void) { - VSET(16, e8, m2); - VLOAD_8(v4, 0x11, 0xd2, 0x6a, 0xcc, 0x14, 0xe4, 0x2c, 0x7f, 0xd2, 0x6b, 0x34, + VSET(16, e8, m8); + VLOAD_8(v16, 0x11, 0xd2, 0x6a, 0xcc, 0x14, 0xe4, 0x2c, 0x7f, 0xd2, 0x6b, 0x34, 0x5c, 0x75, 0xdd, 0x0c, 0x42); - VLOAD_8(v6, 0x77, 0xb2, 0xd1, 0x95, 0x6f, 0xbe, 0x0d, 0x5a, 0x93, 0x02, 0xaf, + VLOAD_8(v24, 0x77, 0xb2, 0xd1, 0x95, 0x6f, 0xbe, 0x0d, 0x5a, 0x93, 0x02, 0xaf, 0xfd, 0x94, 0xe0, 0xb7, 0xe6); - asm volatile("vremu.vv v2, v4, v6"); - VCMP_I8(1, v2, 0x11, 0x20, 0x6a, 0x37, 0x14, 0x26, 0x05, 0x25, 0x3f, 0x01, + asm volatile("vremu.vv v8, v16, v24"); + VCMP_I8(1, v8, 0x11, 0x20, 0x6a, 0x37, 0x14, 0x26, 0x05, 0x25, 0x3f, 0x01, 0x34, 0x5c, 0x75, 0xdd, 0x0c, 0x42); - VSET(16, e16, m2); - VLOAD_16(v4, 0xf77a, 0x54d7, 0xe527, 0xe28f, 0x53ed, 0x9301, 0xde4f, 0xcb17, + VSET(16, e16, m8); + VLOAD_16(v16, 0xf77a, 0x54d7, 0xe527, 0xe28f, 0x53ed, 0x9301, 0xde4f, 0xcb17, 0xae43, 0x9e4a, 0xa0c2, 0xdf31, 0xb66f, 0x286d, 0x1d15, 0x0480); - VLOAD_16(v6, 0x5bfa, 0x0571, 0x8a43, 0x6350, 0xb962, 0x71fc, 0x0b54, 0x1e8b, + VLOAD_16(v24, 0x5bfa, 0x0571, 0x8a43, 0x6350, 0xb962, 0x71fc, 0x0b54, 0x1e8b, 0x6c25, 0x9c0d, 0x5950, 0x1887, 0xbc18, 0x628e, 0x6561, 0x407f); - asm volatile("vremu.vv v2, v4, v6"); - VCMP_I16(2, v2, 0x3f86, 0x0338, 0x5ae4, 0x1bef, 0x53ed, 0x2105, 0x0713, + asm volatile("vremu.vv v8, v16, v24"); + VCMP_I16(2, v8, 0x3f86, 0x0338, 0x5ae4, 0x1bef, 0x53ed, 0x2105, 0x0713, 0x13d5, 0x421e, 0x023d, 0x4772, 0x0272, 0xb66f, 0x286d, 0x1d15, 0x0480); - VSET(16, e32, m2); - VLOAD_32(v4, 0x647d8841, 0xf9e0aabf, 0xea4aa122, 0xd6178d3e, 0x64a7afe5, + VSET(16, e32, m8); + VLOAD_32(v16, 0x647d8841, 0xf9e0aabf, 0xea4aa122, 0xd6178d3e, 0x64a7afe5, 0xe0350cba, 0xc72768ec, 0x9f977a31, 0x5e1c2ac4, 0xcd44b950, 0x39dc32f4, 0x1dc82ea3, 0xd1cf125f, 0xc677269c, 0x6405ec5b, 0x653a05ee); - VLOAD_32(v6, 0x89828d99, 0x5c7c7db0, 0x2911efb6, 0x1f6982ff, 0x564e4bd4, + VLOAD_32(v24, 0x89828d99, 0x5c7c7db0, 0x2911efb6, 0x1f6982ff, 0x564e4bd4, 0xc4576bff, 0x8e998104, 0x4a23ba44, 0x994b4630, 0x017ee935, 0xa38c7dae, 0x893dfb15, 0x4969125f, 0x9a951d27, 0x09b6017f, 0x5a0a7906); - asm volatile("vremu.vv v2, v4, v6"); - VCMP_I32(3, v2, 0x647d8841, 0x40e7af5f, 0x1cf0f294, 0x199e7b44, 0x0e596411, + asm volatile("vremu.vv v8, v16, v24"); + VCMP_I32(3, v8, 0x647d8841, 0x40e7af5f, 0x1cf0f294, 0x199e7b44, 0x0e596411, 0x1bdda0bb, 0x388de7e8, 0x0b5005a9, 0x5e1c2ac4, 0x0059ebf3, 0x39dc32f4, 0x1dc82ea3, 0x3efceda1, 0x2be20975, 0x02e9dd65, 0x0b2f8ce8); #if ELEN == 64 - VSET(16, e64, m2); - VLOAD_64(v4, 0x09ab27501ccac4a6, 0x97eb5bf189b39a0e, 0x26f588069b0858c4, + VSET(16, e64, m8); + VLOAD_64(v16, 0x09ab27501ccac4a6, 0x97eb5bf189b39a0e, 0x26f588069b0858c4, 0x9a251c274a394df3, 0x54b3587602f8d9d2, 0xc3cc623deda95ca7, 0x118c4335397980bf, 0xc3e2d283cb39133d, 0x71837e24114813fc, 0x85a1f65867438a09, 0x80f01e0588afc9a0, 0x60e89a1e5a43d9f5, 0x93a87cf6308ad888, 0xca3976f49ac6a681, 0xcfc7c8f225b47766, 0xeaa4ce2cf507b527); - VLOAD_64(v6, 0x9fed81c550326301, 0x445bb7ac18d0eaa1, 0x040f8ff58f5adf72, + VLOAD_64(v24, 0x9fed81c550326301, 0x445bb7ac18d0eaa1, 0x040f8ff58f5adf72, 0xafc4ff6b8eb4d201, 0xfba36cabfc3fb4a0, 0x9c3ed271bf173d29, 0xe8b7e325c9ff594b, 0x05169e56693600d7, 0x08e72c4bb62ad267, 0xbd9677ee996d5fa5, 0x900295e8502a9817, 0x39e0bfa9927679a8, 0xdd0ca7797d532524, 0x6f8f78c47ddee88a, 0x2f40f7661cca9eee, 0x8e4a3b2358129e92); - asm volatile("vremu.vv v2, v4, v6"); - VCMP_I64(4, v2, 0x09ab27501ccac4a6, 0x0f33ec995811c4cc, 0x0269786490d67dc2, + asm volatile("vremu.vv v8, v16, v24"); + VCMP_I64(4, v8, 0x09ab27501ccac4a6, 0x0f33ec995811c4cc, 0x0269786490d67dc2, 0x9a251c274a394df3, 0x54b3587602f8d9d2, 0x278d8fcc2e921f7e, 0x118c4335397980bf, 0x028751b02d34f353, 0x06ad6a9787463728, 0x85a1f65867438a09, 0x80f01e0588afc9a0, 0x2707da74c7cd604d, @@ -67,107 +67,107 @@ void TEST_CASE1(void) { }; void TEST_CASE2(void) { - VSET(16, e8, m2); - VLOAD_8(v4, 0x11, 0xd2, 0x6a, 0xcc, 0x14, 0xe4, 0x2c, 0x7f, 0xd2, 0x6b, 0x34, + VSET(16, e8, m8); + VLOAD_8(v16, 0x11, 0xd2, 0x6a, 0xcc, 0x14, 0xe4, 0x2c, 0x7f, 0xd2, 0x6b, 0x34, 0x5c, 0x75, 0xdd, 0x0c, 0x42); - VLOAD_8(v6, 0x77, 0xb2, 0xd1, 0x95, 0x6f, 0xbe, 0x0d, 0x5a, 0x93, 0x02, 0xaf, + VLOAD_8(v24, 0x77, 0xb2, 0xd1, 0x95, 0x6f, 0xbe, 0x0d, 0x5a, 0x93, 0x02, 0xaf, 0xfd, 0x94, 0xe0, 0xb7, 0xe6); VLOAD_8(v0, 0xAA, 0xAA); - VCLEAR(v2); - asm volatile("vremu.vv v2, v4, v6, v0.t"); - VCMP_I8(5, v2, 0, 0x20, 0, 0x37, 0, 0x26, 0, 0x25, 0, 0x01, 0, 0x5c, 0, 0xdd, + VCLEAR(v8); + asm volatile("vremu.vv v8, v16, v24, v0.t"); + VCMP_I8(5, v8, 0, 0x20, 0, 0x37, 0, 0x26, 0, 0x25, 0, 0x01, 0, 0x5c, 0, 0xdd, 0, 0x42); - VSET(16, e16, m2); - VLOAD_16(v4, 0xf77a, 0x54d7, 0xe527, 0xe28f, 0x53ed, 0x9301, 0xde4f, 0xcb17, + VSET(16, e16, m8); + VLOAD_16(v16, 0xf77a, 0x54d7, 0xe527, 0xe28f, 0x53ed, 0x9301, 0xde4f, 0xcb17, 0xae43, 0x9e4a, 0xa0c2, 0xdf31, 0xb66f, 0x286d, 0x1d15, 0x0480); - VLOAD_16(v6, 0x5bfa, 0x0571, 0x8a43, 0x6350, 0xb962, 0x71fc, 0x0b54, 0x1e8b, + VLOAD_16(v24, 0x5bfa, 0x0571, 0x8a43, 0x6350, 0xb962, 0x71fc, 0x0b54, 0x1e8b, 0x6c25, 0x9c0d, 0x5950, 0x1887, 0xbc18, 0x628e, 0x6561, 0x407f); VLOAD_8(v0, 0xAA, 0xAA); - VCLEAR(v2); - asm volatile("vremu.vv v2, v4, v6, v0.t"); - VCMP_I16(6, v2, 0, 0x0338, 0, 0x1bef, 0, 0x2105, 0, 0x13d5, 0, 0x023d, 0, + VCLEAR(v8); + asm volatile("vremu.vv v8, v16, v24, v0.t"); + VCMP_I16(6, v8, 0, 0x0338, 0, 0x1bef, 0, 0x2105, 0, 0x13d5, 0, 0x023d, 0, 0x0272, 0, 0x286d, 0, 0x0480); - VSET(16, e32, m2); - VLOAD_32(v4, 0x647d8841, 0xf9e0aabf, 0xea4aa122, 0xd6178d3e, 0x64a7afe5, + VSET(16, e32, m8); + VLOAD_32(v16, 0x647d8841, 0xf9e0aabf, 0xea4aa122, 0xd6178d3e, 0x64a7afe5, 0xe0350cba, 0xc72768ec, 0x9f977a31, 0x5e1c2ac4, 0xcd44b950, 0x39dc32f4, 0x1dc82ea3, 0xd1cf125f, 0xc677269c, 0x6405ec5b, 0x653a05ee); - VLOAD_32(v6, 0x89828d99, 0x5c7c7db0, 0x2911efb6, 0x1f6982ff, 0x564e4bd4, + VLOAD_32(v24, 0x89828d99, 0x5c7c7db0, 0x2911efb6, 0x1f6982ff, 0x564e4bd4, 0xc4576bff, 0x8e998104, 0x4a23ba44, 0x994b4630, 0x017ee935, 0xa38c7dae, 0x893dfb15, 0x4969125f, 0x9a951d27, 0x09b6017f, 0x5a0a7906); VLOAD_8(v0, 0xAA, 0xAA); - VCLEAR(v2); - asm volatile("vremu.vv v2, v4, v6, v0.t"); - VCMP_I32(7, v2, 0, 0x40e7af5f, 0, 0x199e7b44, 0, 0x1bdda0bb, 0, 0x0b5005a9, 0, + VCLEAR(v8); + asm volatile("vremu.vv v8, v16, v24, v0.t"); + VCMP_I32(7, v8, 0, 0x40e7af5f, 0, 0x199e7b44, 0, 0x1bdda0bb, 0, 0x0b5005a9, 0, 0x0059ebf3, 0, 0x1dc82ea3, 0, 0x2be20975, 0, 0x0b2f8ce8); #if ELEN == 64 - VSET(16, e64, m2); - VLOAD_64(v4, 0x09ab27501ccac4a6, 0x97eb5bf189b39a0e, 0x26f588069b0858c4, + VSET(16, e64, m8); + VLOAD_64(v16, 0x09ab27501ccac4a6, 0x97eb5bf189b39a0e, 0x26f588069b0858c4, 0x9a251c274a394df3, 0x54b3587602f8d9d2, 0xc3cc623deda95ca7, 0x118c4335397980bf, 0xc3e2d283cb39133d, 0x71837e24114813fc, 0x85a1f65867438a09, 0x80f01e0588afc9a0, 0x60e89a1e5a43d9f5, 0x93a87cf6308ad888, 0xca3976f49ac6a681, 0xcfc7c8f225b47766, 0xeaa4ce2cf507b527); - VLOAD_64(v6, 0x9fed81c550326301, 0x445bb7ac18d0eaa1, 0x040f8ff58f5adf72, + VLOAD_64(v24, 0x9fed81c550326301, 0x445bb7ac18d0eaa1, 0x040f8ff58f5adf72, 0xafc4ff6b8eb4d201, 0xfba36cabfc3fb4a0, 0x9c3ed271bf173d29, 0xe8b7e325c9ff594b, 0x05169e56693600d7, 0x08e72c4bb62ad267, 0xbd9677ee996d5fa5, 0x900295e8502a9817, 0x39e0bfa9927679a8, 0xdd0ca7797d532524, 0x6f8f78c47ddee88a, 0x2f40f7661cca9eee, 0x8e4a3b2358129e92); VLOAD_8(v0, 0xAA, 0xAA); - VCLEAR(v2); - asm volatile("vremu.vv v2, v4, v6, v0.t"); - VCMP_I64(8, v2, 0, 0x0f33ec995811c4cc, 0, 0x9a251c274a394df3, 0, + VCLEAR(v8); + asm volatile("vremu.vv v8, v16, v24, v0.t"); + VCMP_I64(8, v8, 0, 0x0f33ec995811c4cc, 0, 0x9a251c274a394df3, 0, 0x278d8fcc2e921f7e, 0, 0x028751b02d34f353, 0, 0x85a1f65867438a09, 0, 0x2707da74c7cd604d, 0, 0x5aa9fe301ce7bdf7, 0, 0x5c5a93099cf51695); #endif }; void TEST_CASE3(void) { - VSET(16, e8, m2); - VLOAD_8(v4, 0x98, 0x1a, 0xbe, 0x48, 0x7c, 0xd9, 0x5e, 0x58, 0x2e, 0x46, 0x0c, + VSET(16, e8, m8); + VLOAD_8(v16, 0x98, 0x1a, 0xbe, 0x48, 0x7c, 0xd9, 0x5e, 0x58, 0x2e, 0x46, 0x0c, 0x24, 0xc5, 0x2b, 0x37, 0xbe); uint64_t scalar = 5; - asm volatile("vremu.vx v2, v4, %[A]" ::[A] "r"(scalar)); - VCMP_I8(9, v2, 0x02, 0x01, 0x00, 0x02, 0x04, 0x02, 0x04, 0x03, 0x01, 0x00, + asm volatile("vremu.vx v8, v16, %[A]" ::[A] "r"(scalar)); + VCMP_I8(9, v8, 0x02, 0x01, 0x00, 0x02, 0x04, 0x02, 0x04, 0x03, 0x01, 0x00, 0x02, 0x01, 0x02, 0x03, 0x00, 0x00); - VSET(16, e16, m2); - VLOAD_16(v4, 0xf11f, 0xb8cd, 0xb686, 0xc226, 0xc35a, 0xd724, 0x03f1, 0xcf10, + VSET(16, e16, m8); + VLOAD_16(v16, 0xf11f, 0xb8cd, 0xb686, 0xc226, 0xc35a, 0xd724, 0x03f1, 0xcf10, 0xbae0, 0x9f01, 0x1d0f, 0xf53c, 0x5461, 0x341e, 0x9ae7, 0x032b); scalar = 538; - asm volatile("vremu.vx v2, v4, %[A]" ::[A] "r"(scalar)); - VCMP_I16(10, v2, 0x018b, 0x01f7, 0x01ca, 0x00ce, 0x0202, 0x00c8, 0x01d7, + asm volatile("vremu.vx v8, v16, %[A]" ::[A] "r"(scalar)); + VCMP_I16(10, v8, 0x018b, 0x01f7, 0x01ca, 0x00ce, 0x0202, 0x00c8, 0x01d7, 0x011c, 0x01f0, 0x0163, 0x01bd, 0x0174, 0x0051, 0x01ae, 0x017d, 0x0111); - VSET(16, e32, m2); - VLOAD_32(v4, 0x9c36da54, 0x1b1dea93, 0x80be8651, 0x03a23fcf, 0x26973d17, + VSET(16, e32, m8); + VLOAD_32(v16, 0x9c36da54, 0x1b1dea93, 0x80be8651, 0x03a23fcf, 0x26973d17, 0x521f01df, 0x09e8f77a, 0x5b231aa2, 0xd4bea1df, 0x529b4f34, 0x800a5d88, 0xe7b02512, 0xf7954032, 0x48652b8c, 0x8b14b883, 0x121a9b8b); scalar = 649; - asm volatile("vremu.vx v2, v4, %[A]" ::[A] "r"(scalar)); - VCMP_I32(11, v2, 0x00000039, 0x00000141, 0x0000020b, 0x0000015f, 0x0000008a, + asm volatile("vremu.vx v8, v16, %[A]" ::[A] "r"(scalar)); + VCMP_I32(11, v8, 0x00000039, 0x00000141, 0x0000020b, 0x0000015f, 0x0000008a, 0x00000199, 0x00000214, 0x0000006c, 0x0000025d, 0x000001a6, 0x000000d2, 0x00000168, 0x000001e6, 0x00000266, 0x00000188, 0x00000159); #if ELEN == 64 - VSET(16, e64, m2); - VLOAD_64(v4, 0x1882c5f4b911b949, 0x6ca37133428ed155, 0xbacb9408aa8251bf, + VSET(16, e64, m8); + VLOAD_64(v16, 0x1882c5f4b911b949, 0x6ca37133428ed155, 0xbacb9408aa8251bf, 0x62d79deed97681f5, 0x56258335e007492c, 0x2428afa90a14fa61, 0xd62824119c3084c6, 0xef97986ae9ea2da7, 0xfc28c84e37024f10, 0x1f475f820dec67e1, 0x9c180cfef468c050, 0x4be017933813e27e, 0xafd2b5edb83df693, 0xddd4766a628d4c30, 0xa1f4d0f48a6ac917, 0x827a07db9e6a8897); scalar = 9223; - asm volatile("vremu.vx v2, v4, %[A]" ::[A] "r"(scalar)); - VCMP_I64(12, v2, 0x000000000000167d, 0x00000000000015f2, 0x00000000000019be, + asm volatile("vremu.vx v8, v16, %[A]" ::[A] "r"(scalar)); + VCMP_I64(12, v8, 0x000000000000167d, 0x00000000000015f2, 0x00000000000019be, 0x00000000000003fd, 0x00000000000010ce, 0x0000000000001863, 0x0000000000000750, 0x0000000000000062, 0x0000000000002237, 0x00000000000002bc, 0x0000000000000061, 0x0000000000001b82, @@ -177,41 +177,41 @@ void TEST_CASE3(void) { }; void TEST_CASE4(void) { - VSET(16, e8, m2); - VLOAD_8(v4, 0x98, 0x1a, 0xbe, 0x48, 0x7c, 0xd9, 0x5e, 0x58, 0x2e, 0x46, 0x0c, + VSET(16, e8, m8); + VLOAD_8(v16, 0x98, 0x1a, 0xbe, 0x48, 0x7c, 0xd9, 0x5e, 0x58, 0x2e, 0x46, 0x0c, 0x24, 0xc5, 0x2b, 0x37, 0xbe); uint64_t scalar = 5; VLOAD_8(v0, 0xAA, 0xAA); - VCLEAR(v2); - asm volatile("vremu.vx v2, v4, %[A], v0.t" ::[A] "r"(scalar)); - VCMP_I8(13, v2, 0, 0x01, 0, 0x02, 0, 0x02, 0, 0x03, 0, 0x00, 0, 0x01, 0, 0x03, + VCLEAR(v8); + asm volatile("vremu.vx v8, v16, %[A], v0.t" ::[A] "r"(scalar)); + VCMP_I8(13, v8, 0, 0x01, 0, 0x02, 0, 0x02, 0, 0x03, 0, 0x00, 0, 0x01, 0, 0x03, 0, 0x00); - VSET(16, e16, m2); - VLOAD_16(v4, 0xf11f, 0xb8cd, 0xb686, 0xc226, 0xc35a, 0xd724, 0x03f1, 0xcf10, + VSET(16, e16, m8); + VLOAD_16(v16, 0xf11f, 0xb8cd, 0xb686, 0xc226, 0xc35a, 0xd724, 0x03f1, 0xcf10, 0xbae0, 0x9f01, 0x1d0f, 0xf53c, 0x5461, 0x341e, 0x9ae7, 0x032b); scalar = 538; VLOAD_8(v0, 0xAA, 0xAA); - VCLEAR(v2); - asm volatile("vremu.vx v2, v4, %[A], v0.t" ::[A] "r"(scalar)); - VCMP_I16(14, v2, 0, 0x01f7, 0, 0x00ce, 0, 0x00c8, 0, 0x011c, 0, 0x0163, 0, + VCLEAR(v8); + asm volatile("vremu.vx v8, v16, %[A], v0.t" ::[A] "r"(scalar)); + VCMP_I16(14, v8, 0, 0x01f7, 0, 0x00ce, 0, 0x00c8, 0, 0x011c, 0, 0x0163, 0, 0x0174, 0, 0x01ae, 0, 0x0111); - VSET(16, e32, m2); - VLOAD_32(v4, 0x9c36da54, 0x1b1dea93, 0x80be8651, 0x03a23fcf, 0x26973d17, + VSET(16, e32, m8); + VLOAD_32(v16, 0x9c36da54, 0x1b1dea93, 0x80be8651, 0x03a23fcf, 0x26973d17, 0x521f01df, 0x09e8f77a, 0x5b231aa2, 0xd4bea1df, 0x529b4f34, 0x800a5d88, 0xe7b02512, 0xf7954032, 0x48652b8c, 0x8b14b883, 0x121a9b8b); scalar = 649; VLOAD_8(v0, 0xAA, 0xAA); - VCLEAR(v2); - asm volatile("vremu.vx v2, v4, %[A], v0.t" ::[A] "r"(scalar)); - VCMP_I32(15, v2, 0, 0x00000141, 0, 0x0000015f, 0, 0x00000199, 0, 0x0000006c, + VCLEAR(v8); + asm volatile("vremu.vx v8, v16, %[A], v0.t" ::[A] "r"(scalar)); + VCMP_I32(15, v8, 0, 0x00000141, 0, 0x0000015f, 0, 0x00000199, 0, 0x0000006c, 0, 0x000001a6, 0, 0x00000168, 0, 0x00000266, 0, 0x00000159); #if ELEN == 64 - VSET(16, e64, m2); - VLOAD_64(v4, 0x1882c5f4b911b949, 0x6ca37133428ed155, 0xbacb9408aa8251bf, + VSET(16, e64, m8); + VLOAD_64(v16, 0x1882c5f4b911b949, 0x6ca37133428ed155, 0xbacb9408aa8251bf, 0x62d79deed97681f5, 0x56258335e007492c, 0x2428afa90a14fa61, 0xd62824119c3084c6, 0xef97986ae9ea2da7, 0xfc28c84e37024f10, 0x1f475f820dec67e1, 0x9c180cfef468c050, 0x4be017933813e27e, @@ -219,9 +219,9 @@ void TEST_CASE4(void) { 0x827a07db9e6a8897); scalar = 9223; VLOAD_8(v0, 0xAA, 0xAA); - VCLEAR(v2); - asm volatile("vremu.vx v2, v4, %[A], v0.t" ::[A] "r"(scalar)); - VCMP_I64(16, v2, 0, 0x00000000000015f2, 0, 0x00000000000003fd, 0, + VCLEAR(v8); + asm volatile("vremu.vx v8, v16, %[A], v0.t" ::[A] "r"(scalar)); + VCMP_I64(16, v8, 0, 0x00000000000015f2, 0, 0x00000000000003fd, 0, 0x0000000000001863, 0, 0x0000000000000062, 0, 0x00000000000002bc, 0, 0x0000000000001b82, 0, 0x0000000000000fb7, 0, 0x0000000000000545); #endif diff --git a/sw/riscvTests/isa/rv64uv/vrsub.c b/sw/riscvTests/isa/rv64uv/vrsub.c index cdefddb3..25fd0e6e 100644 --- a/sw/riscvTests/isa/rv64uv/vrsub.c +++ b/sw/riscvTests/isa/rv64uv/vrsub.c @@ -8,91 +8,91 @@ #include "vector_macros.h" void TEST_CASE1(void) { - VSET(16, e8, m2); - VLOAD_8(v2, 5, 10, 15, 20, 25, 30, 35, 40, 5, 10, 15, 20, 25, 30, 35, 40); - asm volatile("vrsub.vi v6, v2, 10"); - VCMP_U8(1, v6, 5, 0, -5, -10, -15, -20, -25, -30, 5, 0, -5, -10, -15, -20, + VSET(16, e8, m8); + VLOAD_8(v8, 5, 10, 15, 20, 25, 30, 35, 40, 5, 10, 15, 20, 25, 30, 35, 40); + asm volatile("vrsub.vi v24, v8, 10"); + VCMP_U8(1, v24, 5, 0, -5, -10, -15, -20, -25, -30, 5, 0, -5, -10, -15, -20, -25, -30); - VSET(16, e16, m2); - VLOAD_16(v2, 5, 10, 15, 20, 25, 30, 35, 40, 5, 10, 15, 20, 25, 30, 35, 40); - asm volatile("vrsub.vi v6, v2, 10"); - VCMP_U16(2, v6, 5, 0, -5, -10, -15, -20, -25, -30, 5, 0, -5, -10, -15, -20, + VSET(16, e16, m8); + VLOAD_16(v8, 5, 10, 15, 20, 25, 30, 35, 40, 5, 10, 15, 20, 25, 30, 35, 40); + asm volatile("vrsub.vi v24, v8, 10"); + VCMP_U16(2, v24, 5, 0, -5, -10, -15, -20, -25, -30, 5, 0, -5, -10, -15, -20, -25, -30); - VSET(16, e32, m2); - VLOAD_32(v2, 5, 10, 15, 20, 25, 30, 35, 40, 5, 10, 15, 20, 25, 30, 35, 40); - asm volatile("vrsub.vi v6, v2, 10"); - VCMP_U32(3, v6, 5, 0, -5, -10, -15, -20, -25, -30, 5, 0, -5, -10, -15, -20, + VSET(16, e32, m8); + VLOAD_32(v8, 5, 10, 15, 20, 25, 30, 35, 40, 5, 10, 15, 20, 25, 30, 35, 40); + asm volatile("vrsub.vi v24, v8, 10"); + VCMP_U32(3, v24, 5, 0, -5, -10, -15, -20, -25, -30, 5, 0, -5, -10, -15, -20, -25, -30); #if ELEN == 64 - VSET(16, e64, m2); - VLOAD_64(v2, 5, 10, 15, 20, 25, 30, 35, 40, 5, 10, 15, 20, 25, 30, 35, 40); - asm volatile("vrsub.vi v6, v2, 10"); - VCMP_U64(4, v6, 5, 0, -5, -10, -15, -20, -25, -30, 5, 0, -5, -10, -15, -20, + VSET(16, e64, m8); + VLOAD_64(v8, 5, 10, 15, 20, 25, 30, 35, 40, 5, 10, 15, 20, 25, 30, 35, 40); + asm volatile("vrsub.vi v24, v8, 10"); + VCMP_U64(4, v24, 5, 0, -5, -10, -15, -20, -25, -30, 5, 0, -5, -10, -15, -20, -25, -30); #endif } void TEST_CASE2(void) { - VSET(16, e8, m2); - VLOAD_8(v2, 5, 10, 15, 20, 25, 30, 35, 40, 5, 10, 15, 20, 25, 30, 35, 40); + VSET(16, e8, m8); + VLOAD_8(v8, 5, 10, 15, 20, 25, 30, 35, 40, 5, 10, 15, 20, 25, 30, 35, 40); VLOAD_8(v0, 0x33, 0x33); - VCLEAR(v6); - asm volatile("vrsub.vi v6, v2, 10, v0.t"); - VCMP_U8(5, v6, 5, 0, 0, 0, -15, -20, 0, 0, 5, 0, 0, 0, -15, -20, 0, 0); + VCLEAR(v24); + asm volatile("vrsub.vi v24, v8, 10, v0.t"); + VCMP_U8(5, v24, 5, 0, 0, 0, -15, -20, 0, 0, 5, 0, 0, 0, -15, -20, 0, 0); - VSET(16, e16, m2); - VLOAD_16(v2, 5, 10, 15, 20, 25, 30, 35, 40, 5, 10, 15, 20, 25, 30, 35, 40); + VSET(16, e16, m8); + VLOAD_16(v8, 5, 10, 15, 20, 25, 30, 35, 40, 5, 10, 15, 20, 25, 30, 35, 40); VLOAD_8(v0, 0x33, 0x33); - VCLEAR(v6); - asm volatile("vrsub.vi v6, v2, 10, v0.t"); - VCMP_U16(6, v6, 5, 0, 0, 0, -15, -20, 0, 0, 5, 0, 0, 0, -15, -20, 0, 0); + VCLEAR(v24); + asm volatile("vrsub.vi v24, v8, 10, v0.t"); + VCMP_U16(6, v24, 5, 0, 0, 0, -15, -20, 0, 0, 5, 0, 0, 0, -15, -20, 0, 0); - VSET(16, e32, m2); - VLOAD_32(v2, 5, 10, 15, 20, 25, 30, 35, 40, 5, 10, 15, 20, 25, 30, 35, 40); + VSET(16, e32, m8); + VLOAD_32(v8, 5, 10, 15, 20, 25, 30, 35, 40, 5, 10, 15, 20, 25, 30, 35, 40); VLOAD_8(v0, 0x33, 0x33); - VCLEAR(v6); - asm volatile("vrsub.vi v6, v2, 10, v0.t"); - VCMP_U32(7, v6, 5, 0, 0, 0, -15, -20, 0, 0, 5, 0, 0, 0, -15, -20, 0, 0); + VCLEAR(v24); + asm volatile("vrsub.vi v24, v8, 10, v0.t"); + VCMP_U32(7, v24, 5, 0, 0, 0, -15, -20, 0, 0, 5, 0, 0, 0, -15, -20, 0, 0); #if ELEN == 64 - VSET(16, e64, m2); - VLOAD_64(v2, 5, 10, 15, 20, 25, 30, 35, 40, 5, 10, 15, 20, 25, 30, 35, 40); + VSET(16, e64, m8); + VLOAD_64(v8, 5, 10, 15, 20, 25, 30, 35, 40, 5, 10, 15, 20, 25, 30, 35, 40); VLOAD_8(v0, 0x33, 0x33); - VCLEAR(v6); - asm volatile("vrsub.vi v6, v2, 10, v0.t"); - VCMP_U64(8, v6, 5, 0, 0, 0, -15, -20, 0, 0, 5, 0, 0, 0, -15, -20, 0, 0); + VCLEAR(v24); + asm volatile("vrsub.vi v24, v8, 10, v0.t"); + VCMP_U64(8, v24, 5, 0, 0, 0, -15, -20, 0, 0, 5, 0, 0, 0, -15, -20, 0, 0); #endif } void TEST_CASE3(void) { const uint64_t scalar = 25; - VSET(16, e8, m2); - VLOAD_8(v2, 5, 10, 15, 20, 25, 30, 35, 40, 5, 10, 15, 20, 25, 30, 35, 40); - asm volatile("vrsub.vx v6, v2, %[A]" ::[A] "r"(scalar)); - VCMP_U8(9, v6, 20, 15, 10, 5, 0, -5, -10, -15, 20, 15, 10, 5, 0, -5, -10, + VSET(16, e8, m8); + VLOAD_8(v8, 5, 10, 15, 20, 25, 30, 35, 40, 5, 10, 15, 20, 25, 30, 35, 40); + asm volatile("vrsub.vx v24, v8, %[A]" ::[A] "r"(scalar)); + VCMP_U8(9, v24, 20, 15, 10, 5, 0, -5, -10, -15, 20, 15, 10, 5, 0, -5, -10, -15); - VSET(16, e16, m2); - VLOAD_16(v2, 5, 10, 15, 20, 25, 30, 35, 40, 5, 10, 15, 20, 25, 30, 35, 40); - asm volatile("vrsub.vx v6, v2, %[A]" ::[A] "r"(scalar)); - VCMP_U16(10, v6, 20, 15, 10, 5, 0, -5, -10, -15, 20, 15, 10, 5, 0, -5, -10, + VSET(16, e16, m8); + VLOAD_16(v8, 5, 10, 15, 20, 25, 30, 35, 40, 5, 10, 15, 20, 25, 30, 35, 40); + asm volatile("vrsub.vx v24, v8, %[A]" ::[A] "r"(scalar)); + VCMP_U16(10, v24, 20, 15, 10, 5, 0, -5, -10, -15, 20, 15, 10, 5, 0, -5, -10, -15); - VSET(16, e32, m2); - VLOAD_32(v2, 5, 10, 15, 20, 25, 30, 35, 40, 5, 10, 15, 20, 25, 30, 35, 40); - asm volatile("vrsub.vx v6, v2, %[A]" ::[A] "r"(scalar)); - VCMP_U32(11, v6, 20, 15, 10, 5, 0, -5, -10, -15, 20, 15, 10, 5, 0, -5, -10, + VSET(16, e32, m8); + VLOAD_32(v8, 5, 10, 15, 20, 25, 30, 35, 40, 5, 10, 15, 20, 25, 30, 35, 40); + asm volatile("vrsub.vx v24, v8, %[A]" ::[A] "r"(scalar)); + VCMP_U32(11, v24, 20, 15, 10, 5, 0, -5, -10, -15, 20, 15, 10, 5, 0, -5, -10, -15); #if ELEN == 64 - VSET(16, e64, m2); - VLOAD_64(v2, 5, 10, 15, 20, 25, 30, 35, 40, 5, 10, 15, 20, 25, 30, 35, 40); - asm volatile("vrsub.vx v6, v2, %[A]" ::[A] "r"(scalar)); - VCMP_U64(12, v6, 20, 15, 10, 5, 0, -5, -10, -15, 20, 15, 10, 5, 0, -5, -10, + VSET(16, e64, m8); + VLOAD_64(v8, 5, 10, 15, 20, 25, 30, 35, 40, 5, 10, 15, 20, 25, 30, 35, 40); + asm volatile("vrsub.vx v24, v8, %[A]" ::[A] "r"(scalar)); + VCMP_U64(12, v24, 20, 15, 10, 5, 0, -5, -10, -15, 20, 15, 10, 5, 0, -5, -10, -15); #endif } @@ -100,34 +100,34 @@ void TEST_CASE3(void) { void TEST_CASE4(void) { const uint64_t scalar = 25; - VSET(16, e8, m2); - VLOAD_8(v2, 5, 10, 15, 20, 25, 30, 35, 40, 5, 10, 15, 20, 25, 30, 35, 40); + VSET(16, e8, m8); + VLOAD_8(v8, 5, 10, 15, 20, 25, 30, 35, 40, 5, 10, 15, 20, 25, 30, 35, 40); VLOAD_8(v0, 0x33, 0x33); - VCLEAR(v6); - asm volatile("vrsub.vx v6, v2, %[A], v0.t" ::[A] "r"(scalar)); - VCMP_U8(13, v6, 20, 15, 0, 0, 0, -5, 0, 0, 20, 15, 0, 0, 0, -5, 0, 0); + VCLEAR(v24); + asm volatile("vrsub.vx v24, v8, %[A], v0.t" ::[A] "r"(scalar)); + VCMP_U8(13, v24, 20, 15, 0, 0, 0, -5, 0, 0, 20, 15, 0, 0, 0, -5, 0, 0); - VSET(16, e16, m2); - VLOAD_16(v2, 5, 10, 15, 20, 25, 30, 35, 40, 5, 10, 15, 20, 25, 30, 35, 40); + VSET(16, e16, m8); + VLOAD_16(v8, 5, 10, 15, 20, 25, 30, 35, 40, 5, 10, 15, 20, 25, 30, 35, 40); VLOAD_8(v0, 0x33, 0x33); - VCLEAR(v6); - asm volatile("vrsub.vx v6, v2, %[A], v0.t" ::[A] "r"(scalar)); - VCMP_U16(14, v6, 20, 15, 0, 0, 0, -5, 0, 0, 20, 15, 0, 0, 0, -5, 0, 0); + VCLEAR(v24); + asm volatile("vrsub.vx v24, v8, %[A], v0.t" ::[A] "r"(scalar)); + VCMP_U16(14, v24, 20, 15, 0, 0, 0, -5, 0, 0, 20, 15, 0, 0, 0, -5, 0, 0); - VSET(16, e32, m2); - VLOAD_32(v2, 5, 10, 15, 20, 25, 30, 35, 40, 5, 10, 15, 20, 25, 30, 35, 40); + VSET(16, e32, m8); + VLOAD_32(v8, 5, 10, 15, 20, 25, 30, 35, 40, 5, 10, 15, 20, 25, 30, 35, 40); VLOAD_8(v0, 0x33, 0x33); - VCLEAR(v6); - asm volatile("vrsub.vx v6, v2, %[A], v0.t" ::[A] "r"(scalar)); - VCMP_U32(15, v6, 20, 15, 0, 0, 0, -5, 0, 0, 20, 15, 0, 0, 0, -5, 0, 0); + VCLEAR(v24); + asm volatile("vrsub.vx v24, v8, %[A], v0.t" ::[A] "r"(scalar)); + VCMP_U32(15, v24, 20, 15, 0, 0, 0, -5, 0, 0, 20, 15, 0, 0, 0, -5, 0, 0); #if ELEN == 64 - VSET(16, e64, m2); - VLOAD_64(v2, 5, 10, 15, 20, 25, 30, 35, 40, 5, 10, 15, 20, 25, 30, 35, 40); + VSET(16, e64, m8); + VLOAD_64(v8, 5, 10, 15, 20, 25, 30, 35, 40, 5, 10, 15, 20, 25, 30, 35, 40); VLOAD_8(v0, 0x33, 0x33); - VCLEAR(v6); - asm volatile("vrsub.vx v6, v2, %[A], v0.t" ::[A] "r"(scalar)); - VCMP_U64(16, v6, 20, 15, 0, 0, 0, -5, 0, 0, 20, 15, 0, 0, 0, -5, 0, 0); + VCLEAR(v24); + asm volatile("vrsub.vx v24, v8, %[A], v0.t" ::[A] "r"(scalar)); + VCMP_U64(16, v24, 20, 15, 0, 0, 0, -5, 0, 0, 20, 15, 0, 0, 0, -5, 0, 0); #endif } diff --git a/sw/riscvTests/isa/rv64uv/vslide1down.c b/sw/riscvTests/isa/rv64uv/vslide1down.c index ac37d7ae..91b5b38a 100644 --- a/sw/riscvTests/isa/rv64uv/vslide1down.c +++ b/sw/riscvTests/isa/rv64uv/vslide1down.c @@ -10,88 +10,88 @@ void TEST_CASE1() { uint64_t scalar = 99; - VSET(32, e8, m4); - VLOAD_8(v4, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, + VSET(32, e8, m8); + VLOAD_8(v16, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32); - VSET(16, e8, m4); - VLOAD_8(v4, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); - VLOAD_8(v2, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1); - asm volatile("vslide1down.vx v2, v4, %[A]" ::[A] "r"(scalar)); - VCMP_U8(1, v2, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 99); + VSET(16, e8, m8); + VLOAD_8(v16, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + VLOAD_8(v8, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1); + asm volatile("vslide1down.vx v8, v16, %[A]" ::[A] "r"(scalar)); + VCMP_U8(1, v8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 99); - VSET(32, e16, m4); - VLOAD_16(v4, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, + VSET(32, e16, m8); + VLOAD_16(v16, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32); - VSET(16, e16, m4); - VLOAD_16(v4, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); - VLOAD_16(v2, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1); - asm volatile("vslide1down.vx v2, v4, %[A]" ::[A] "r"(scalar)); - VCMP_U16(2, v2, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 99); + VSET(16, e16, m8); + VLOAD_16(v16, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + VLOAD_16(v8, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1); + asm volatile("vslide1down.vx v8, v16, %[A]" ::[A] "r"(scalar)); + VCMP_U16(2, v8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 99); - VSET(32, e32, m4); - VLOAD_32(v4, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, + VSET(32, e32, m8); + VLOAD_32(v16, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32); - VSET(16, e32, m4); - VLOAD_32(v4, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); - VLOAD_32(v2, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1); - asm volatile("vslide1down.vx v2, v4, %[A]" ::[A] "r"(scalar)); - VCMP_U32(3, v2, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 99); + VSET(16, e32, m8); + VLOAD_32(v16, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + VLOAD_32(v8, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1); + asm volatile("vslide1down.vx v8, v16, %[A]" ::[A] "r"(scalar)); + VCMP_U32(3, v8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 99); #if ELEN == 64 - VSET(32, e64, m4); - VLOAD_64(v4, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, + VSET(32, e64, m8); + VLOAD_64(v16, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32); - VSET(16, e64, m4); - VLOAD_64(v4, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); - VLOAD_64(v2, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1); - asm volatile("vslide1down.vx v2, v4, %[A]" ::[A] "r"(scalar)); - VCMP_U64(4, v2, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 99); + VSET(16, e64, m8); + VLOAD_64(v16, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + VLOAD_64(v8, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1); + asm volatile("vslide1down.vx v8, v16, %[A]" ::[A] "r"(scalar)); + VCMP_U64(4, v8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 99); #endif } void TEST_CASE2() { uint64_t scalar = 99; - VSET(32, e8, m4); - VLOAD_8(v4, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, + VSET(32, e8, m8); + VLOAD_8(v16, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32); - VSET(16, e8, m4); - VLOAD_8(v4, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); - VLOAD_8(v2, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1); + VSET(16, e8, m8); + VLOAD_8(v16, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + VLOAD_8(v8, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1); VLOAD_8(v0, 0xAA, 0xAA); - asm volatile("vslide1down.vx v2, v4, %[A], v0.t" ::[A] "r"(scalar)); - VCMP_U8(5, v2, -1, 3, -1, 5, -1, 7, -1, 9, -1, 11, -1, 13, -1, 15, -1, 99); + asm volatile("vslide1down.vx v8, v16, %[A], v0.t" ::[A] "r"(scalar)); + VCMP_U8(5, v8, -1, 3, -1, 5, -1, 7, -1, 9, -1, 11, -1, 13, -1, 15, -1, 99); - VSET(32, e16, m4); - VLOAD_16(v4, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, + VSET(32, e16, m8); + VLOAD_16(v16, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32); - VSET(16, e16, m4); - VLOAD_16(v4, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); - VLOAD_16(v2, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1); + VSET(16, e16, m8); + VLOAD_16(v16, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + VLOAD_16(v8, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1); VLOAD_8(v0, 0x55, 0x55); - asm volatile("vslide1down.vx v2, v4, %[A], v0.t" ::[A] "r"(scalar)); - VCMP_U16(6, v2, 2, -1, 4, -1, 6, -1, 8, -1, 10, -1, 12, -1, 14, -1, 16, -1); + asm volatile("vslide1down.vx v8, v16, %[A], v0.t" ::[A] "r"(scalar)); + VCMP_U16(6, v8, 2, -1, 4, -1, 6, -1, 8, -1, 10, -1, 12, -1, 14, -1, 16, -1); - VSET(32, e32, m4); - VLOAD_32(v4, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, + VSET(32, e32, m8); + VLOAD_32(v16, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32); - VSET(16, e32, m4); - VLOAD_32(v4, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); - VLOAD_32(v2, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1); + VSET(16, e32, m8); + VLOAD_32(v16, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + VLOAD_32(v8, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1); VLOAD_8(v0, 0xAA, 0xAA); - asm volatile("vslide1down.vx v2, v4, %[A], v0.t" ::[A] "r"(scalar)); - VCMP_U32(7, v2, -1, 3, -1, 5, -1, 7, -1, 9, -1, 11, -1, 13, -1, 15, -1, 99); + asm volatile("vslide1down.vx v8, v16, %[A], v0.t" ::[A] "r"(scalar)); + VCMP_U32(7, v8, -1, 3, -1, 5, -1, 7, -1, 9, -1, 11, -1, 13, -1, 15, -1, 99); #if ELEN == 64 - VSET(32, e64, m4); - VLOAD_64(v4, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, + VSET(32, e64, m8); + VLOAD_64(v16, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32); - VSET(16, e64, m4); - VLOAD_64(v4, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); - VLOAD_64(v2, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1); + VSET(16, e64, m8); + VLOAD_64(v16, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + VLOAD_64(v8, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1); VLOAD_8(v0, 0x55, 0x55); - asm volatile("vslide1down.vx v2, v4, %[A], v0.t" ::[A] "r"(scalar)); - VCMP_U64(8, v2, 2, -1, 4, -1, 6, -1, 8, -1, 10, -1, 12, -1, 14, -1, 16, -1); + asm volatile("vslide1down.vx v8, v16, %[A], v0.t" ::[A] "r"(scalar)); + VCMP_U64(8, v8, 2, -1, 4, -1, 6, -1, 8, -1, 10, -1, 12, -1, 14, -1, 16, -1); #endif } diff --git a/sw/riscvTests/isa/rv64uv/vslide1up.c b/sw/riscvTests/isa/rv64uv/vslide1up.c index c9422b9b..ffe48221 100644 --- a/sw/riscvTests/isa/rv64uv/vslide1up.c +++ b/sw/riscvTests/isa/rv64uv/vslide1up.c @@ -10,64 +10,64 @@ void TEST_CASE1() { uint64_t scalar = 99; - VSET(16, e8, m2); - VLOAD_8(v4, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); - VLOAD_8(v2, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1); - asm volatile("vslide1up.vx v2, v4, %[A]" ::[A] "r"(scalar)); - VCMP_U8(1, v2, 99, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + VSET(16, e8, m8); + VLOAD_8(v16, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + VLOAD_8(v8, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1); + asm volatile("vslide1up.vx v8, v16, %[A]" ::[A] "r"(scalar)); + VCMP_U8(1, v8, 99, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); - VSET(16, e16, m2); - VLOAD_16(v4, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); - VLOAD_16(v2, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1); - asm volatile("vslide1up.vx v2, v4, %[A]" ::[A] "r"(scalar)); - VCMP_U16(2, v2, 99, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + VSET(16, e16, m8); + VLOAD_16(v16, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + VLOAD_16(v8, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1); + asm volatile("vslide1up.vx v8, v16, %[A]" ::[A] "r"(scalar)); + VCMP_U16(2, v8, 99, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); - VSET(16, e32, m2); - VLOAD_32(v4, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); - VLOAD_32(v2, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1); - asm volatile("vslide1up.vx v2, v4, %[A]" ::[A] "r"(scalar)); - VCMP_U32(3, v2, 99, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + VSET(16, e32, m8); + VLOAD_32(v16, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + VLOAD_32(v8, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1); + asm volatile("vslide1up.vx v8, v16, %[A]" ::[A] "r"(scalar)); + VCMP_U32(3, v8, 99, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); #if ELEN == 64 - VSET(16, e64, m2); - VLOAD_64(v4, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); - VLOAD_64(v2, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1); - asm volatile("vslide1up.vx v2, v4, %[A]" ::[A] "r"(scalar)); - VCMP_U64(4, v2, 99, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + VSET(16, e64, m8); + VLOAD_64(v16, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + VLOAD_64(v8, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1); + asm volatile("vslide1up.vx v8, v16, %[A]" ::[A] "r"(scalar)); + VCMP_U64(4, v8, 99, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); #endif } void TEST_CASE2() { uint64_t scalar = 99; - VSET(16, e8, m2); - VLOAD_8(v4, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); - VLOAD_8(v2, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1); + VSET(16, e8, m8); + VLOAD_8(v16, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + VLOAD_8(v8, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1); VLOAD_8(v0, 0xAA, 0xAA); - asm volatile("vslide1up.vx v2, v4, %[A], v0.t" ::[A] "r"(scalar)); - VCMP_U8(5, v2, -1, 1, -1, 3, -1, 5, -1, 7, -1, 9, -1, 11, -1, 13, -1, 15); + asm volatile("vslide1up.vx v8, v16, %[A], v0.t" ::[A] "r"(scalar)); + VCMP_U8(5, v8, -1, 1, -1, 3, -1, 5, -1, 7, -1, 9, -1, 11, -1, 13, -1, 15); - VSET(16, e16, m2); - VLOAD_16(v4, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); - VLOAD_16(v2, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1); + VSET(16, e16, m8); + VLOAD_16(v16, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + VLOAD_16(v8, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1); VLOAD_8(v0, 0x55, 0x55); - asm volatile("vslide1up.vx v2, v4, %[A], v0.t" ::[A] "r"(scalar)); - VCMP_U16(6, v2, 99, -1, 2, -1, 4, -1, 6, -1, 8, -1, 10, -1, 12, -1, 14, -1); + asm volatile("vslide1up.vx v8, v16, %[A], v0.t" ::[A] "r"(scalar)); + VCMP_U16(6, v8, 99, -1, 2, -1, 4, -1, 6, -1, 8, -1, 10, -1, 12, -1, 14, -1); - VSET(16, e32, m2); - VLOAD_32(v4, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); - VLOAD_32(v2, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1); + VSET(16, e32, m8); + VLOAD_32(v16, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + VLOAD_32(v8, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1); VLOAD_8(v0, 0xAA, 0xAA); - asm volatile("vslide1up.vx v2, v4, %[A], v0.t" ::[A] "r"(scalar)); - VCMP_U32(7, v2, -1, 1, -1, 3, -1, 5, -1, 7, -1, 9, -1, 11, -1, 13, -1, 15); + asm volatile("vslide1up.vx v8, v16, %[A], v0.t" ::[A] "r"(scalar)); + VCMP_U32(7, v8, -1, 1, -1, 3, -1, 5, -1, 7, -1, 9, -1, 11, -1, 13, -1, 15); #if ELEN == 64 - VSET(16, e64, m2); - VLOAD_64(v4, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); - VLOAD_64(v2, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1); + VSET(16, e64, m8); + VLOAD_64(v16, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + VLOAD_64(v8, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1); VLOAD_8(v0, 0x55, 0x55); - asm volatile("vslide1up.vx v2, v4, %[A], v0.t" ::[A] "r"(scalar)); - VCMP_U64(8, v2, 99, -1, 2, -1, 4, -1, 6, -1, 8, -1, 10, -1, 12, -1, 14, -1); + asm volatile("vslide1up.vx v8, v16, %[A], v0.t" ::[A] "r"(scalar)); + VCMP_U64(8, v8, 99, -1, 2, -1, 4, -1, 6, -1, 8, -1, 10, -1, 12, -1, 14, -1); #endif } diff --git a/sw/riscvTests/isa/rv64uv/vslidedown.c b/sw/riscvTests/isa/rv64uv/vslidedown.c index 832886be..092e8ea3 100644 --- a/sw/riscvTests/isa/rv64uv/vslidedown.c +++ b/sw/riscvTests/isa/rv64uv/vslidedown.c @@ -8,35 +8,35 @@ #include "vector_macros.h" void TEST_CASE1() { - VSET(32, e8, m4); + VSET(32, e8, m8); VLOAD_8(v0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32); - VSET(16, e8, m4); + VSET(16, e8, m8); VLOAD_8(v8, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1); asm volatile("vslidedown.vi v8, v0, 3"); VCMP_U8(1, v8, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19); - VSET(32, e16, m4); + VSET(32, e16, m8); VLOAD_16(v0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32); - VSET(16, e16, m4); + VSET(16, e16, m8); VLOAD_16(v8, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1); asm volatile("vslidedown.vi v8, v0, 4"); VCMP_U16(2, v8, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20); - VSET(32, e32, m4); + VSET(32, e32, m8); VLOAD_32(v0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32); - VSET(16, e32, m4); + VSET(16, e32, m8); VLOAD_32(v8, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1); asm volatile("vslidedown.vi v8, v0, 5"); VCMP_U32(3, v8, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21); #if ELEN == 64 - VSET(32, e64, m4); + VSET(32, e64, m8); VLOAD_64(v0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32); - VSET(16, e64, m4); + VSET(16, e64, m8); VLOAD_64(v8, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1); asm volatile("vslidedown.vi v8, v0, 6"); VCMP_U64(4, v8, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22); @@ -44,36 +44,36 @@ void TEST_CASE1() { } void TEST_CASE2() { - VSET(32, e8, m4); + VSET(32, e8, m8); VLOAD_8(v0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32); - VSET(16, e8, m4); + VSET(16, e8, m8); VLOAD_8(v8, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1); VLOAD_8(v0, 0xAA, 0xAA); asm volatile("vslidedown.vi v8, v0, 3, v0.t"); VCMP_U8(5, v8, -1, 5, -1, 7, -1, 9, -1, 11, -1, 13, -1, 15, -1, 17, -1, 19); - VSET(32, e16, m4); + VSET(32, e16, m8); VLOAD_16(v0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32); - VSET(16, e16, m4); + VSET(16, e16, m8); VLOAD_16(v8, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1); asm volatile("vslidedown.vi v8, v0, 4, v0.t"); VCMP_U16(6, v8, -1, 6, -1, 8, -1, 10, -1, 12, -1, 14, -1, 16, -1, 18, -1, 20); - VSET(32, e32, m4); + VSET(32, e32, m8); VLOAD_32(v0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32); - VSET(16, e32, m4); + VSET(16, e32, m8); VLOAD_32(v8, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1); asm volatile("vslidedown.vi v8, v0, 5, v0.t"); VCMP_U32(7, v8, -1, 7, -1, 9, -1, 11, -1, 13, -1, 15, -1, 17, -1, 19, -1, 21); #if ELEN == 64 - VSET(32, e64, m4); + VSET(32, e64, m8); VLOAD_64(v0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32); - VSET(16, e64, m4); + VSET(16, e64, m8); VLOAD_64(v8, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1); asm volatile("vslidedown.vi v8, v0, 6, v0.t"); VCMP_U64(8, v8, -1, 8, -1, 10, -1, 12, -1, 14, -1, 16, -1, 18, -1, 20, -1, @@ -84,35 +84,35 @@ void TEST_CASE2() { void TEST_CASE3() { uint64_t scalar = 3; - VSET(32, e8, m4); + VSET(32, e8, m8); VLOAD_8(v0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32); - VSET(16, e8, m4); + VSET(16, e8, m8); VLOAD_8(v8, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1); asm volatile("vslidedown.vx v8, v0, %[A]" ::[A] "r"(scalar)); VCMP_U8(9, v8, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19); - VSET(32, e16, m4); + VSET(32, e16, m8); VLOAD_16(v0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32); - VSET(16, e16, m4); + VSET(16, e16, m8); VLOAD_16(v8, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1); asm volatile("vslidedown.vx v8, v0, %[A]" ::[A] "r"(scalar)); VCMP_U16(10, v8, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19); - VSET(32, e32, m4); + VSET(32, e32, m8); VLOAD_32(v0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32); - VSET(16, e32, m4); + VSET(16, e32, m8); VLOAD_32(v8, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1); asm volatile("vslidedown.vx v8, v0, %[A]" ::[A] "r"(scalar)); VCMP_U32(11, v8, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19); #if ELEN == 64 - VSET(32, e64, m4); + VSET(32, e64, m8); VLOAD_64(v0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32); - VSET(16, e64, m4); + VSET(16, e64, m8); VLOAD_64(v8, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1); asm volatile("vslidedown.vx v8, v0, %[A]" ::[A] "r"(scalar)); VCMP_U64(12, v8, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19); @@ -122,38 +122,38 @@ void TEST_CASE3() { void TEST_CASE4() { uint64_t scalar = 3; - VSET(32, e8, m4); + VSET(32, e8, m8); VLOAD_8(v0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32); - VSET(16, e8, m4); + VSET(16, e8, m8); VLOAD_8(v8, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1); VLOAD_8(v0, 0xAA, 0xAA); asm volatile("vslidedown.vx v8, v0, %[A], v0.t" ::[A] "r"(scalar)); VCMP_U8(13, v8, -1, 5, -1, 7, -1, 9, -1, 11, -1, 13, -1, 15, -1, 17, -1, 19); - VSET(32, e16, m4); + VSET(32, e16, m8); VLOAD_16(v0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32); - VSET(16, e16, m4); + VSET(16, e16, m8); VLOAD_16(v8, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1); VLOAD_8(v0, 0xAA, 0xAA); asm volatile("vslidedown.vx v8, v0, %[A], v0.t" ::[A] "r"(scalar)); VCMP_U16(14, v8, -1, 5, -1, 7, -1, 9, -1, 11, -1, 13, -1, 15, -1, 17, -1, 19); - VSET(32, e32, m4); + VSET(32, e32, m8); VLOAD_32(v0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32); - VSET(16, e32, m4); + VSET(16, e32, m8); VLOAD_32(v8, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1); VLOAD_8(v0, 0xAA, 0xAA); asm volatile("vslidedown.vx v8, v0, %[A], v0.t" ::[A] "r"(scalar)); VCMP_U32(15, v8, -1, 5, -1, 7, -1, 9, -1, 11, -1, 13, -1, 15, -1, 17, -1, 19); #if ELEN == 64 - VSET(32, e64, m4); + VSET(32, e64, m8); VLOAD_64(v0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32); - VSET(16, e64, m4); + VSET(16, e64, m8); VLOAD_64(v8, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1); VLOAD_8(v0, 0xAA, 0xAA); asm volatile("vslidedown.vx v8, v0, %[A], v0.t" ::[A] "r"(scalar)); diff --git a/sw/riscvTests/isa/rv64uv/vslideup.c b/sw/riscvTests/isa/rv64uv/vslideup.c index 9393b30b..e37dff71 100644 --- a/sw/riscvTests/isa/rv64uv/vslideup.c +++ b/sw/riscvTests/isa/rv64uv/vslideup.c @@ -8,123 +8,123 @@ #include "vector_macros.h" void TEST_CASE1() { - VSET(16, e8, m2); - VLOAD_8(v4, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); - VLOAD_8(v2, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1); - asm volatile("vslideup.vi v2, v4, 3"); - VCMP_U8(1, v2, -1, -1, -1, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13); - - VSET(16, e16, m2); - VLOAD_16(v4, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); - VLOAD_16(v2, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1); - asm volatile("vslideup.vi v2, v4, 4"); - VCMP_U16(2, v2, -1, -1, -1, -1, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12); - - VSET(16, e32, m2); - VLOAD_32(v4, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); - VLOAD_32(v2, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1); - asm volatile("vslideup.vi v2, v4, 5"); - VCMP_U32(3, v2, -1, -1, -1, -1, -1, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11); + VSET(16, e8, m8); + VLOAD_8(v16, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + VLOAD_8(v8, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1); + asm volatile("vslideup.vi v8, v16, 3"); + VCMP_U8(1, v8, -1, -1, -1, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13); + + VSET(16, e16, m8); + VLOAD_16(v16, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + VLOAD_16(v8, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1); + asm volatile("vslideup.vi v8, v16, 4"); + VCMP_U16(2, v8, -1, -1, -1, -1, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12); + + VSET(16, e32, m8); + VLOAD_32(v16, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + VLOAD_32(v8, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1); + asm volatile("vslideup.vi v8, v16, 5"); + VCMP_U32(3, v8, -1, -1, -1, -1, -1, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11); #if ELEN == 64 - VSET(16, e64, m2); - VLOAD_64(v4, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); - VLOAD_64(v2, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1); - asm volatile("vslideup.vi v2, v4, 6"); - VCMP_U64(4, v2, -1, -1, -1, -1, -1, -1, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10); + VSET(16, e64, m8); + VLOAD_64(v16, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + VLOAD_64(v8, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1); + asm volatile("vslideup.vi v8, v16, 6"); + VCMP_U64(4, v8, -1, -1, -1, -1, -1, -1, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10); #endif } void TEST_CASE2() { - VSET(16, e8, m2); - VLOAD_8(v4, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); - VLOAD_8(v2, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1); + VSET(16, e8, m8); + VLOAD_8(v16, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + VLOAD_8(v8, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1); VLOAD_8(v0, 0xAA, 0xAA); - asm volatile("vslideup.vi v2, v4, 3, v0.t"); - VCMP_U8(5, v2, -1, -1, -1, 1, -1, 3, -1, 5, -1, 7, -1, 9, -1, 11, -1, 13); + asm volatile("vslideup.vi v8, v16, 3, v0.t"); + VCMP_U8(5, v8, -1, -1, -1, 1, -1, 3, -1, 5, -1, 7, -1, 9, -1, 11, -1, 13); - VSET(16, e16, m2); - VLOAD_16(v4, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); - VLOAD_16(v2, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1); - asm volatile("vslideup.vi v2, v4, 4, v0.t"); - VCMP_U16(6, v2, -1, -1, -1, -1, -1, 2, -1, 4, -1, 6, -1, 8, -1, 10, -1, 12); + VSET(16, e16, m8); + VLOAD_16(v16, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + VLOAD_16(v8, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1); + asm volatile("vslideup.vi v8, v16, 4, v0.t"); + VCMP_U16(6, v8, -1, -1, -1, -1, -1, 2, -1, 4, -1, 6, -1, 8, -1, 10, -1, 12); - VSET(16, e32, m2); - VLOAD_32(v4, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); - VLOAD_32(v2, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1); - asm volatile("vslideup.vi v2, v4, 5, v0.t"); - VCMP_U32(7, v2, -1, -1, -1, -1, -1, 1, -1, 3, -1, 5, -1, 7, -1, 9, -1, 11); + VSET(16, e32, m8); + VLOAD_32(v16, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + VLOAD_32(v8, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1); + asm volatile("vslideup.vi v8, v16, 5, v0.t"); + VCMP_U32(7, v8, -1, -1, -1, -1, -1, 1, -1, 3, -1, 5, -1, 7, -1, 9, -1, 11); #if ELEN == 64 - VSET(16, e64, m2); - VLOAD_64(v4, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); - VLOAD_64(v2, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1); - asm volatile("vslideup.vi v2, v4, 6, v0.t"); - VCMP_U64(8, v2, -1, -1, -1, -1, -1, -1, -1, 2, -1, 4, -1, 6, -1, 8, -1, 10); + VSET(16, e64, m8); + VLOAD_64(v16, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + VLOAD_64(v8, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1); + asm volatile("vslideup.vi v8, v16, 6, v0.t"); + VCMP_U64(8, v8, -1, -1, -1, -1, -1, -1, -1, 2, -1, 4, -1, 6, -1, 8, -1, 10); #endif } void TEST_CASE3() { uint64_t scalar = 3; - VSET(16, e8, m2); - VLOAD_8(v4, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); - VLOAD_8(v2, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1); - asm volatile("vslideup.vx v2, v4, %[A]" ::[A] "r"(scalar)); - VCMP_U8(9, v2, -1, -1, -1, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13); + VSET(16, e8, m8); + VLOAD_8(v16, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + VLOAD_8(v8, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1); + asm volatile("vslideup.vx v8, v16, %[A]" ::[A] "r"(scalar)); + VCMP_U8(9, v8, -1, -1, -1, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13); - VSET(16, e16, m2); - VLOAD_16(v4, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); - VLOAD_16(v2, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1); - asm volatile("vslideup.vx v2, v4, %[A]" ::[A] "r"(scalar)); - VCMP_U16(10, v2, -1, -1, -1, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13); + VSET(16, e16, m8); + VLOAD_16(v16, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + VLOAD_16(v8, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1); + asm volatile("vslideup.vx v8, v16, %[A]" ::[A] "r"(scalar)); + VCMP_U16(10, v8, -1, -1, -1, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13); - VSET(16, e32, m2); - VLOAD_32(v4, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); - VLOAD_32(v2, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1); - asm volatile("vslideup.vx v2, v4, %[A]" ::[A] "r"(scalar)); - VCMP_U32(11, v2, -1, -1, -1, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13); + VSET(16, e32, m8); + VLOAD_32(v16, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + VLOAD_32(v8, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1); + asm volatile("vslideup.vx v8, v16, %[A]" ::[A] "r"(scalar)); + VCMP_U32(11, v8, -1, -1, -1, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13); #if ELEN == 64 - VSET(16, e64, m2); - VLOAD_64(v4, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); - VLOAD_64(v2, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1); - asm volatile("vslideup.vx v2, v4, %[A]" ::[A] "r"(scalar)); - VCMP_U64(12, v2, -1, -1, -1, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13); + VSET(16, e64, m8); + VLOAD_64(v16, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + VLOAD_64(v8, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1); + asm volatile("vslideup.vx v8, v16, %[A]" ::[A] "r"(scalar)); + VCMP_U64(12, v8, -1, -1, -1, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13); #endif } void TEST_CASE4() { uint64_t scalar = 3; - VSET(16, e8, m2); - VLOAD_8(v4, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); - VLOAD_8(v2, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1); + VSET(16, e8, m8); + VLOAD_8(v16, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + VLOAD_8(v8, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1); VLOAD_8(v0, 0xAA, 0xAA); - asm volatile("vslideup.vx v2, v4, %[A], v0.t" ::[A] "r"(scalar)); - VCMP_U8(13, v2, -1, -1, -1, 1, -1, 3, -1, 5, -1, 7, -1, 9, -1, 11, -1, 13); + asm volatile("vslideup.vx v8, v16, %[A], v0.t" ::[A] "r"(scalar)); + VCMP_U8(13, v8, -1, -1, -1, 1, -1, 3, -1, 5, -1, 7, -1, 9, -1, 11, -1, 13); - VSET(16, e16, m2); - VLOAD_16(v4, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); - VLOAD_16(v2, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1); + VSET(16, e16, m8); + VLOAD_16(v16, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + VLOAD_16(v8, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1); VLOAD_8(v0, 0xAA, 0xAA); - asm volatile("vslideup.vx v2, v4, %[A], v0.t" ::[A] "r"(scalar)); - VCMP_U16(14, v2, -1, -1, -1, 1, -1, 3, -1, 5, -1, 7, -1, 9, -1, 11, -1, 13); + asm volatile("vslideup.vx v8, v16, %[A], v0.t" ::[A] "r"(scalar)); + VCMP_U16(14, v8, -1, -1, -1, 1, -1, 3, -1, 5, -1, 7, -1, 9, -1, 11, -1, 13); - VSET(16, e32, m2); - VLOAD_32(v4, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); - VLOAD_32(v2, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1); + VSET(16, e32, m8); + VLOAD_32(v16, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + VLOAD_32(v8, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1); VLOAD_8(v0, 0xAA, 0xAA); - asm volatile("vslideup.vx v2, v4, %[A], v0.t" ::[A] "r"(scalar)); - VCMP_U32(15, v2, -1, -1, -1, 1, -1, 3, -1, 5, -1, 7, -1, 9, -1, 11, -1, 13); + asm volatile("vslideup.vx v8, v16, %[A], v0.t" ::[A] "r"(scalar)); + VCMP_U32(15, v8, -1, -1, -1, 1, -1, 3, -1, 5, -1, 7, -1, 9, -1, 11, -1, 13); #if ELEN == 64 - VSET(16, e64, m2); - VLOAD_64(v4, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); - VLOAD_64(v2, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1); + VSET(16, e64, m8); + VLOAD_64(v16, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + VLOAD_64(v8, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1); VLOAD_8(v0, 0xAA, 0xAA); - asm volatile("vslideup.vx v2, v4, %[A], v0.t" ::[A] "r"(scalar)); - VCMP_U64(16, v2, -1, -1, -1, 1, -1, 3, -1, 5, -1, 7, -1, 9, -1, 11, -1, 13); + asm volatile("vslideup.vx v8, v16, %[A], v0.t" ::[A] "r"(scalar)); + VCMP_U64(16, v8, -1, -1, -1, 1, -1, 3, -1, 5, -1, 7, -1, 9, -1, 11, -1, 13); #endif } diff --git a/sw/riscvTests/isa/rv64uv/vsll.c b/sw/riscvTests/isa/rv64uv/vsll.c index d29efa13..3150b6e0 100644 --- a/sw/riscvTests/isa/rv64uv/vsll.c +++ b/sw/riscvTests/isa/rv64uv/vsll.c @@ -8,45 +8,45 @@ #include "vector_macros.h" void TEST_CASE1(void) { - VSET(16, e8, m2); - VLOAD_8(v4, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, + VSET(16, e8, m8); + VLOAD_8(v16, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01); - VLOAD_8(v6, 0, 1, 2, 3, 7, 15, 31, 32, 0, 1, 2, 3, 7, 15, 31, 32); - asm volatile("vsll.vv v8, v4, v6"); + VLOAD_8(v24, 0, 1, 2, 3, 7, 15, 31, 32, 0, 1, 2, 3, 7, 15, 31, 32); + asm volatile("vsll.vv v8, v16, v24"); VCMP_U8(1, v8, 0x01, 0x02, 0x04, 0x08, 0x80, 0x80, 0x80, 0x01, 0x01, 0x02, 0x04, 0x08, 0x80, 0x80, 0x80, 0x01); - VSET(16, e16, m2); - VLOAD_16(v4, 0x0001, 0x0001, 0x0001, 0x0001, 0x0001, 0x0001, 0x0001, 0x0001, + VSET(16, e16, m8); + VLOAD_16(v16, 0x0001, 0x0001, 0x0001, 0x0001, 0x0001, 0x0001, 0x0001, 0x0001, 0x0001, 0x0001, 0x0001, 0x0001, 0x0001, 0x0001, 0x0001, 0x0001); - VLOAD_16(v6, 0, 1, 2, 3, 7, 15, 31, 32, 0, 1, 2, 3, 7, 15, 31, 32); - asm volatile("vsll.vv v8, v4, v6"); + VLOAD_16(v24, 0, 1, 2, 3, 7, 15, 31, 32, 0, 1, 2, 3, 7, 15, 31, 32); + asm volatile("vsll.vv v8, v16, v24"); VCMP_U16(2, v8, 0x0001, 0x0002, 0x0004, 0x0008, 0x0080, 0x8000, 0x8000, 0x0001, 0x0001, 0x0002, 0x0004, 0x0008, 0x0080, 0x8000, 0x8000, 0x0001); - VSET(16, e32, m2); - VLOAD_32(v4, 0x00000001, 0x00000001, 0x00000001, 0x00000001, 0x00000001, + VSET(16, e32, m8); + VLOAD_32(v16, 0x00000001, 0x00000001, 0x00000001, 0x00000001, 0x00000001, 0x00000001, 0x00000001, 0x00000001, 0x00000001, 0x00000001, 0x00000001, 0x00000001, 0x00000001, 0x00000001, 0x00000001, 0x00000001); - VLOAD_32(v6, 0, 1, 2, 3, 7, 15, 31, 32, 0, 1, 2, 3, 7, 15, 31, 32); - asm volatile("vsll.vv v8, v4, v6"); + VLOAD_32(v24, 0, 1, 2, 3, 7, 15, 31, 32, 0, 1, 2, 3, 7, 15, 31, 32); + asm volatile("vsll.vv v8, v16, v24"); VCMP_U32(3, v8, 0x00000001, 0x00000002, 0x00000004, 0x00000008, 0x00000080, 0x00008000, 0x80000000, 0x00000001, 0x00000001, 0x00000002, 0x00000004, 0x00000008, 0x00000080, 0x00008000, 0x80000000, 0x00000001); #if ELEN == 64 - VSET(16, e64, m2); - VLOAD_64(v4, 0x0000000000000001, 0x0000000000000001, 0x0000000000000001, + VSET(16, e64, m8); + VLOAD_64(v16, 0x0000000000000001, 0x0000000000000001, 0x0000000000000001, 0x0000000000000001, 0x0000000000000001, 0x0000000000000001, 0x0000000000000001, 0x0000000000000001, 0x0000000000000001, 0x0000000000000001, 0x0000000000000001, 0x0000000000000001, 0x0000000000000001, 0x0000000000000001, 0x0000000000000001, 0x0000000000000001); - VLOAD_64(v6, 0, 1, 2, 3, 7, 15, 31, 32, 0, 1, 2, 3, 7, 15, 31, 32); - asm volatile("vsll.vv v8, v4, v6"); + VLOAD_64(v24, 0, 1, 2, 3, 7, 15, 31, 32, 0, 1, 2, 3, 7, 15, 31, 32); + asm volatile("vsll.vv v8, v16, v24"); VCMP_U64(4, v8, 0x000000000000001, 0x0000000000000002, 0x0000000000000004, 0x0000000000000008, 0x0000000000000080, 0x0000000000008000, 0x0000000080000000, 0x0000000100000000, 0x0000000000000001, @@ -57,53 +57,53 @@ void TEST_CASE1(void) { }; void TEST_CASE2(void) { - VSET(16, e8, m2); - VLOAD_8(v4, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, + VSET(16, e8, m8); + VLOAD_8(v16, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01); - VLOAD_8(v6, 0, 1, 2, 3, 7, 15, 31, 32, 0, 1, 2, 3, 7, 15, 31, 32); + VLOAD_8(v24, 0, 1, 2, 3, 7, 15, 31, 32, 0, 1, 2, 3, 7, 15, 31, 32); VLOAD_8(v0, 0xAA, 0xAA); VCLEAR(v8); - asm volatile("vsll.vv v8, v4, v6, v0.t"); + asm volatile("vsll.vv v8, v16, v24, v0.t"); VCMP_U8(5, v8, 0x00, 0x02, 0x00, 0x08, 0x00, 0x80, 0x00, 0x01, 0x00, 0x02, 0x00, 0x08, 0x00, 0x80, 0x00, 0x01); - VSET(16, e16, m2); - VLOAD_16(v4, 0x0001, 0x0001, 0x0001, 0x0001, 0x0001, 0x0001, 0x0001, 0x0001, + VSET(16, e16, m8); + VLOAD_16(v16, 0x0001, 0x0001, 0x0001, 0x0001, 0x0001, 0x0001, 0x0001, 0x0001, 0x0001, 0x0001, 0x0001, 0x0001, 0x0001, 0x0001, 0x0001, 0x0001); - VLOAD_16(v6, 0, 1, 2, 3, 7, 15, 31, 32, 0, 1, 2, 3, 7, 15, 31, 32); + VLOAD_16(v24, 0, 1, 2, 3, 7, 15, 31, 32, 0, 1, 2, 3, 7, 15, 31, 32); VLOAD_8(v0, 0xAA, 0xAA); VCLEAR(v8); - asm volatile("vsll.vv v8, v4, v6, v0.t"); + asm volatile("vsll.vv v8, v16, v24, v0.t"); VCMP_U16(6, v8, 0x0000, 0x0002, 0x0000, 0x0008, 0x0000, 0x8000, 0x0000, 0x0001, 0x0000, 0x0002, 0x0000, 0x0008, 0x0000, 0x8000, 0x0000, 0x0001); - VSET(16, e32, m2); - VLOAD_32(v4, 0x00000001, 0x00000001, 0x00000001, 0x00000001, 0x00000001, + VSET(16, e32, m8); + VLOAD_32(v16, 0x00000001, 0x00000001, 0x00000001, 0x00000001, 0x00000001, 0x00000001, 0x00000001, 0x00000001, 0x00000001, 0x00000001, 0x00000001, 0x00000001, 0x00000001, 0x00000001, 0x00000001, 0x00000001); - VLOAD_32(v6, 0, 1, 2, 3, 7, 15, 31, 32, 0, 1, 2, 3, 7, 15, 31, 32); + VLOAD_32(v24, 0, 1, 2, 3, 7, 15, 31, 32, 0, 1, 2, 3, 7, 15, 31, 32); VLOAD_8(v0, 0xAA, 0xAA); VCLEAR(v8); - asm volatile("vsll.vv v8, v4, v6, v0.t"); + asm volatile("vsll.vv v8, v16, v24, v0.t"); VCMP_U32(7, v8, 0x00000000, 0x00000002, 0x00000000, 0x00000008, 0x00000000, 0x00008000, 0x00000000, 0x00000001, 0x00000000, 0x00000002, 0x00000000, 0x00000008, 0x00000000, 0x00008000, 0x00000000, 0x00000001); #if ELEN == 64 - VSET(16, e64, m2); - VLOAD_64(v4, 0x0000000000000001, 0x0000000000000001, 0x0000000000000001, + VSET(16, e64, m8); + VLOAD_64(v16, 0x0000000000000001, 0x0000000000000001, 0x0000000000000001, 0x0000000000000001, 0x0000000000000001, 0x0000000000000001, 0x0000000000000001, 0x0000000000000001, 0x0000000000000001, 0x0000000000000001, 0x0000000000000001, 0x0000000000000001, 0x0000000000000001, 0x0000000000000001, 0x0000000000000001, 0x0000000000000001); - VLOAD_64(v6, 0, 1, 2, 3, 7, 15, 31, 32, 0, 1, 2, 3, 7, 15, 31, 32); + VLOAD_64(v24, 0, 1, 2, 3, 7, 15, 31, 32, 0, 1, 2, 3, 7, 15, 31, 32); VLOAD_8(v0, 0xAA, 0xAA); VCLEAR(v8); - asm volatile("vsll.vv v8, v4, v6, v0.t"); + asm volatile("vsll.vv v8, v16, v24, v0.t"); VCMP_U64(8, v8, 0x000000000000000, 0x0000000000000002, 0x0000000000000000, 0x0000000000000008, 0x0000000000000000, 0x0000000000008000, 0x0000000000000000, 0x0000000100000000, 0x0000000000000000, @@ -116,41 +116,41 @@ void TEST_CASE2(void) { void TEST_CASE3(void) { const uint64_t scalar = 2; - VSET(16, e8, m2); - VLOAD_8(v4, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0xF8, 0xF9, 0xFA, + VSET(16, e8, m8); + VLOAD_8(v16, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE, 0xFF); - asm volatile("vsll.vx v8, v4, %[A]" ::[A] "r"(scalar)); + asm volatile("vsll.vx v8, v16, %[A]" ::[A] "r"(scalar)); VCMP_U8(9, v8, 0x04, 0x08, 0x0C, 0x10, 0x14, 0x18, 0x1C, 0x20, 0xE0, 0xE4, 0xE8, 0xEC, 0xF0, 0xF4, 0xF8, 0xFC); - VSET(16, e16, m2); - VLOAD_16(v4, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, 0x0008, + VSET(16, e16, m8); + VLOAD_16(v16, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, 0x0008, 0xFFF8, 0xFFF9, 0xFFFA, 0xFFFB, 0xFFFC, 0xFFFD, 0xFFFE, 0x00FF); - asm volatile("vsll.vx v8, v4, %[A]" ::[A] "r"(scalar)); + asm volatile("vsll.vx v8, v16, %[A]" ::[A] "r"(scalar)); VCMP_U16(10, v8, 0x0004, 0x0008, 0x000C, 0x0010, 0x0014, 0x0018, 0x001C, 0x0020, 0xFFE0, 0xFFE4, 0xFFE8, 0xFFEC, 0xFFF0, 0xFFF4, 0xFFF8, 0x03FC); - VSET(16, e32, m2); - VLOAD_32(v4, 0x00000001, 0x00000002, 0x00000003, 0x00000004, 0x00000005, + VSET(16, e32, m8); + VLOAD_32(v16, 0x00000001, 0x00000002, 0x00000003, 0x00000004, 0x00000005, 0x00000006, 0x00000007, 0x00000008, 0xFFFFFFF8, 0xFFFFFFF9, 0xFFFFFFFA, 0xFFFFFFFB, 0xFFFFFFFC, 0xFFFFFFFD, 0xFFFFFFFE, 0x000000FF); - asm volatile("vsll.vx v8, v4, %[A]" ::[A] "r"(scalar)); + asm volatile("vsll.vx v8, v16, %[A]" ::[A] "r"(scalar)); VCMP_U32(11, v8, 0x00000004, 0x00000008, 0x0000000C, 0x00000010, 0x00000014, 0x00000018, 0x0000001C, 0x00000020, 0xFFFFFFE0, 0xFFFFFFE4, 0xFFFFFFE8, 0xFFFFFFEC, 0xFFFFFFF0, 0xFFFFFFF4, 0xFFFFFFF8, 0x000003FC); #if ELEN == 64 - VSET(16, e64, m2); - VLOAD_64(v4, 0x0000000000000001, 0x0000000000000002, 0x0000000000000003, + VSET(16, e64, m8); + VLOAD_64(v16, 0x0000000000000001, 0x0000000000000002, 0x0000000000000003, 0x0000000000000004, 0x0000000000000005, 0x0000000000000006, 0x0000000000000007, 0x0000000000000008, 0xFFFFFFFFFFFFFFF8, 0xFFFFFFFFFFFFFFF9, 0xFFFFFFFFFFFFFFFA, 0xFFFFFFFFFFFFFFFB, 0xFFFFFFFFFFFFFFFC, 0xFFFFFFFFFFFFFFFD, 0xFFFFFFFFFFFFFFFE, 0x00000000000000FF); - asm volatile("vsll.vx v8, v4, %[A]" ::[A] "r"(scalar)); + asm volatile("vsll.vx v8, v16, %[A]" ::[A] "r"(scalar)); VCMP_U64(12, v8, 0x0000000000000004, 0x0000000000000008, 0x000000000000000C, 0x0000000000000010, 0x0000000000000014, 0x0000000000000018, 0x000000000000001C, 0x0000000000000020, 0xFFFFFFFFFFFFFFE0, @@ -163,41 +163,41 @@ void TEST_CASE3(void) { void TEST_CASE4(void) { const uint64_t scalar = 2; - VSET(16, e8, m2); - VLOAD_8(v4, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0xF8, 0xF9, 0xFA, + VSET(16, e8, m8); + VLOAD_8(v16, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE, 0xFF); VCLEAR(v8); VLOAD_8(v0, 0xAA, 0xAA); - asm volatile("vsll.vx v8, v4, %[A], v0.t" ::[A] "r"(scalar)); + asm volatile("vsll.vx v8, v16, %[A], v0.t" ::[A] "r"(scalar)); VCMP_U8(13, v8, 0x00, 0x08, 0x00, 0x10, 0x00, 0x18, 0x00, 0x20, 0x00, 0xE4, 0x00, 0xEC, 0x00, 0xF4, 0x00, 0xFC); - VSET(16, e16, m2); - VLOAD_16(v4, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, 0x0008, + VSET(16, e16, m8); + VLOAD_16(v16, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, 0x0008, 0xFFF8, 0xFFF9, 0xFFFA, 0xFFFB, 0xFFFC, 0xFFFD, 0xFFFE, 0x00FF); VCLEAR(v8); VLOAD_8(v0, 0xAA, 0xAA); - asm volatile("vsll.vx v8, v4, %[A], v0.t" ::[A] "r"(scalar)); + asm volatile("vsll.vx v8, v16, %[A], v0.t" ::[A] "r"(scalar)); VCMP_U16(14, v8, 0x0000, 0x0008, 0x0000, 0x0010, 0x0000, 0x0018, 0x0000, 0x0020, 0x0000, 0xFFE4, 0x0000, 0xFFEC, 0x0000, 0xFFF4, 0x0000, 0x03FC); - VSET(16, e32, m2); - VLOAD_32(v4, 0x00000001, 0x00000002, 0x00000003, 0x00000004, 0x00000005, + VSET(16, e32, m8); + VLOAD_32(v16, 0x00000001, 0x00000002, 0x00000003, 0x00000004, 0x00000005, 0x00000006, 0x00000007, 0x00000008, 0xFFFFFFF8, 0xFFFFFFF9, 0xFFFFFFFA, 0xFFFFFFFB, 0xFFFFFFFC, 0xFFFFFFFD, 0xFFFFFFFE, 0x000000FF); VCLEAR(v8); VLOAD_8(v0, 0xAA, 0xAA); - asm volatile("vsll.vx v8, v4, %[A], v0.t" ::[A] "r"(scalar)); + asm volatile("vsll.vx v8, v16, %[A], v0.t" ::[A] "r"(scalar)); VCMP_U32(15, v8, 0x00000000, 0x00000008, 0x00000000, 0x00000010, 0x00000000, 0x00000018, 0x00000000, 0x00000020, 0x00000000, 0xFFFFFFE4, 0x00000000, 0xFFFFFFEC, 0x00000000, 0xFFFFFFF4, 0x00000000, 0x000003FC); #if ELEN == 64 - VSET(16, e64, m2); - VLOAD_64(v4, 0x0000000000000001, 0x0000000000000002, 0x0000000000000003, + VSET(16, e64, m8); + VLOAD_64(v16, 0x0000000000000001, 0x0000000000000002, 0x0000000000000003, 0x0000000000000004, 0x0000000000000005, 0x0000000000000006, 0x0000000000000007, 0x0000000000000008, 0xFFFFFFFFFFFFFFF8, 0xFFFFFFFFFFFFFFF9, 0xFFFFFFFFFFFFFFFA, 0xFFFFFFFFFFFFFFFB, @@ -205,7 +205,7 @@ void TEST_CASE4(void) { 0x00000000000000FF); VCLEAR(v8); VLOAD_8(v0, 0xAA, 0xAA); - asm volatile("vsll.vx v8, v4, %[A], v0.t" ::[A] "r"(scalar)); + asm volatile("vsll.vx v8, v16, %[A], v0.t" ::[A] "r"(scalar)); VCMP_U64(16, v8, 0x0000000000000000, 0x0000000000000008, 0x0000000000000000, 0x0000000000000010, 0x0000000000000000, 0x0000000000000018, 0x0000000000000000, 0x0000000000000020, 0x0000000000000000, @@ -216,41 +216,41 @@ void TEST_CASE4(void) { }; void TEST_CASE5(void) { - VSET(16, e8, m2); - VLOAD_8(v4, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0xF8, 0xF9, 0xFA, + VSET(16, e8, m8); + VLOAD_8(v16, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE, 0xFF); - asm volatile("vsll.vi v8, v4, 2"); + asm volatile("vsll.vi v8, v16, 2"); VCMP_U8(17, v8, 0x04, 0x08, 0x0C, 0x10, 0x14, 0x18, 0x1C, 0x20, 0xE0, 0xE4, 0xE8, 0xEC, 0xF0, 0xF4, 0xF8, 0xFC); - VSET(16, e16, m2); - VLOAD_16(v4, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, 0x0008, + VSET(16, e16, m8); + VLOAD_16(v16, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, 0x0008, 0xFFF8, 0xFFF9, 0xFFFA, 0xFFFB, 0xFFFC, 0xFFFD, 0xFFFE, 0x00FF); - asm volatile("vsll.vi v8, v4, 2"); + asm volatile("vsll.vi v8, v16, 2"); VCMP_U16(18, v8, 0x0004, 0x0008, 0x000C, 0x0010, 0x0014, 0x0018, 0x001C, 0x0020, 0xFFE0, 0xFFE4, 0xFFE8, 0xFFEC, 0xFFF0, 0xFFF4, 0xFFF8, 0x03FC); - VSET(16, e32, m2); - VLOAD_32(v4, 0x00000001, 0x00000002, 0x00000003, 0x00000004, 0x00000005, + VSET(16, e32, m8); + VLOAD_32(v16, 0x00000001, 0x00000002, 0x00000003, 0x00000004, 0x00000005, 0x00000006, 0x00000007, 0x00000008, 0xFFFFFFF8, 0xFFFFFFF9, 0xFFFFFFFA, 0xFFFFFFFB, 0xFFFFFFFC, 0xFFFFFFFD, 0xFFFFFFFE, 0x000000FF); - asm volatile("vsll.vi v8, v4, 2"); + asm volatile("vsll.vi v8, v16, 2"); VCMP_U32(19, v8, 0x00000004, 0x00000008, 0x0000000C, 0x00000010, 0x00000014, 0x00000018, 0x0000001C, 0x00000020, 0xFFFFFFE0, 0xFFFFFFE4, 0xFFFFFFE8, 0xFFFFFFEC, 0xFFFFFFF0, 0xFFFFFFF4, 0xFFFFFFF8, 0x000003FC); #if ELEN == 64 - VSET(16, e64, m2); - VLOAD_64(v4, 0x0000000000000001, 0x0000000000000002, 0x0000000000000003, + VSET(16, e64, m8); + VLOAD_64(v16, 0x0000000000000001, 0x0000000000000002, 0x0000000000000003, 0x0000000000000004, 0x0000000000000005, 0x0000000000000006, 0x0000000000000007, 0x0000000000000008, 0xFFFFFFFFFFFFFFF8, 0xFFFFFFFFFFFFFFF9, 0xFFFFFFFFFFFFFFFA, 0xFFFFFFFFFFFFFFFB, 0xFFFFFFFFFFFFFFFC, 0xFFFFFFFFFFFFFFFD, 0xFFFFFFFFFFFFFFFE, 0x00000000000000FF); - asm volatile("vsll.vi v8, v4, 2"); + asm volatile("vsll.vi v8, v16, 2"); VCMP_U64(20, v8, 0x0000000000000004, 0x0000000000000008, 0x000000000000000C, 0x0000000000000010, 0x0000000000000014, 0x0000000000000018, 0x000000000000001C, 0x0000000000000020, 0xFFFFFFFFFFFFFFE0, @@ -261,41 +261,41 @@ void TEST_CASE5(void) { }; void TEST_CASE6(void) { - VSET(16, e8, m2); - VLOAD_8(v4, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0xF8, 0xF9, 0xFA, + VSET(16, e8, m8); + VLOAD_8(v16, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE, 0xFF); VCLEAR(v8); VLOAD_8(v0, 0xAA, 0xAA); - asm volatile("vsll.vi v8, v4, 2, v0.t"); + asm volatile("vsll.vi v8, v16, 2, v0.t"); VCMP_U8(21, v8, 0x00, 0x08, 0x00, 0x10, 0x00, 0x18, 0x00, 0x20, 0x00, 0xE4, 0x00, 0xEC, 0x00, 0xF4, 0x00, 0xFC); - VSET(16, e16, m2); - VLOAD_16(v4, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, 0x0008, + VSET(16, e16, m8); + VLOAD_16(v16, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, 0x0008, 0xFFF8, 0xFFF9, 0xFFFA, 0xFFFB, 0xFFFC, 0xFFFD, 0xFFFE, 0x00FF); VCLEAR(v8); VLOAD_8(v0, 0xAA, 0xAA); - asm volatile("vsll.vi v8, v4, 2, v0.t"); + asm volatile("vsll.vi v8, v16, 2, v0.t"); VCMP_U16(22, v8, 0x0000, 0x0008, 0x0000, 0x0010, 0x0000, 0x0018, 0x0000, 0x0020, 0x0000, 0xFFE4, 0x0000, 0xFFEC, 0x0000, 0xFFF4, 0x0000, 0x03FC); - VSET(16, e32, m2); - VLOAD_32(v4, 0x00000001, 0x00000002, 0x00000003, 0x00000004, 0x00000005, + VSET(16, e32, m8); + VLOAD_32(v16, 0x00000001, 0x00000002, 0x00000003, 0x00000004, 0x00000005, 0x00000006, 0x00000007, 0x00000008, 0xFFFFFFF8, 0xFFFFFFF9, 0xFFFFFFFA, 0xFFFFFFFB, 0xFFFFFFFC, 0xFFFFFFFD, 0xFFFFFFFE, 0x000000FF); VCLEAR(v8); VLOAD_8(v0, 0xAA, 0xAA); - asm volatile("vsll.vi v8, v4, 2, v0.t"); + asm volatile("vsll.vi v8, v16, 2, v0.t"); VCMP_U32(23, v8, 0x00000000, 0x00000008, 0x00000000, 0x00000010, 0x00000000, 0x00000018, 0x00000000, 0x00000020, 0x00000000, 0xFFFFFFE4, 0x00000000, 0xFFFFFFEC, 0x00000000, 0xFFFFFFF4, 0x00000000, 0x000003FC); #if ELEN == 64 - VSET(16, e64, m2); - VLOAD_64(v4, 0x0000000000000001, 0x0000000000000002, 0x0000000000000003, + VSET(16, e64, m8); + VLOAD_64(v16, 0x0000000000000001, 0x0000000000000002, 0x0000000000000003, 0x0000000000000004, 0x0000000000000005, 0x0000000000000006, 0x0000000000000007, 0x0000000000000008, 0xFFFFFFFFFFFFFFF8, 0xFFFFFFFFFFFFFFF9, 0xFFFFFFFFFFFFFFFA, 0xFFFFFFFFFFFFFFFB, @@ -303,7 +303,7 @@ void TEST_CASE6(void) { 0x00000000000000FF); VCLEAR(v8); VLOAD_8(v0, 0xAA, 0xAA); - asm volatile("vsll.vi v8, v4, 2, v0.t"); + asm volatile("vsll.vi v8, v16, 2, v0.t"); VCMP_U64(24, v8, 0x0000000000000000, 0x0000000000000008, 0x0000000000000000, 0x0000000000000010, 0x0000000000000000, 0x0000000000000018, 0x0000000000000000, 0x0000000000000020, 0x0000000000000000, diff --git a/sw/riscvTests/isa/rv64uv/vsra.c b/sw/riscvTests/isa/rv64uv/vsra.c index fe5a091f..4901555f 100644 --- a/sw/riscvTests/isa/rv64uv/vsra.c +++ b/sw/riscvTests/isa/rv64uv/vsra.c @@ -8,45 +8,45 @@ #include "vector_macros.h" void TEST_CASE1(void) { - VSET(16, e8, m2); - VLOAD_8(v4, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + VSET(16, e8, m8); + VLOAD_8(v16, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80); - VLOAD_8(v6, 0, 1, 2, 3, 7, 15, 31, 32, 0, 1, 2, 3, 7, 15, 31, 32); - asm volatile("vsra.vv v8, v4, v6"); + VLOAD_8(v24, 0, 1, 2, 3, 7, 15, 31, 32, 0, 1, 2, 3, 7, 15, 31, 32); + asm volatile("vsra.vv v8, v16, v24"); VCMP_U8(1, v8, 0x80, 0xC0, 0xE0, 0xF0, 0xFF, 0xFF, 0xFF, 0x80, 0x80, 0xC0, 0xE0, 0xF0, 0xFF, 0xFF, 0xFF, 0x80); - VSET(16, e16, m2); - VLOAD_16(v4, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, + VSET(16, e16, m8); + VLOAD_16(v16, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000); - VLOAD_16(v6, 0, 1, 2, 3, 7, 15, 31, 32, 0, 1, 2, 3, 7, 15, 31, 32); - asm volatile("vsra.vv v8, v4, v6"); + VLOAD_16(v24, 0, 1, 2, 3, 7, 15, 31, 32, 0, 1, 2, 3, 7, 15, 31, 32); + asm volatile("vsra.vv v8, v16, v24"); VCMP_U16(2, v8, 0x8000, 0xC000, 0xE000, 0xF000, 0xFF00, 0xFFFF, 0xFFFF, 0x8000, 0x8000, 0xC000, 0xE000, 0xF000, 0xFF00, 0xFFFF, 0xFFFF, 0x8000); - VSET(16, e32, m2); - VLOAD_32(v4, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, + VSET(16, e32, m8); + VLOAD_32(v16, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000); - VLOAD_32(v6, 0, 1, 2, 3, 7, 15, 31, 32, 0, 1, 2, 3, 7, 15, 31, 32); - asm volatile("vsra.vv v8, v4, v6"); + VLOAD_32(v24, 0, 1, 2, 3, 7, 15, 31, 32, 0, 1, 2, 3, 7, 15, 31, 32); + asm volatile("vsra.vv v8, v16, v24"); VCMP_U32(3, v8, 0x80000000, 0xC0000000, 0xE0000000, 0xF0000000, 0xFF000000, 0xFFFF0000, 0xFFFFFFFF, 0x80000000, 0x80000000, 0xC0000000, 0xE0000000, 0xF0000000, 0xFF000000, 0xFFFF0000, 0xFFFFFFFF, 0x80000000); #if ELEN == 64 - VSET(16, e64, m2); - VLOAD_64(v4, 0x8000000000000000, 0x8000000000000000, 0x8000000000000000, + VSET(16, e64, m8); + VLOAD_64(v16, 0x8000000000000000, 0x8000000000000000, 0x8000000000000000, 0x8000000000000000, 0x8000000000000000, 0x8000000000000000, 0x8000000000000000, 0x8000000000000000, 0x8000000000000000, 0x8000000000000000, 0x8000000000000000, 0x8000000000000000, 0x8000000000000000, 0x8000000000000000, 0x8000000000000000, 0x8000000000000000); - VLOAD_64(v6, 0, 1, 2, 3, 7, 15, 31, 32, 0, 1, 2, 3, 7, 15, 31, 32); - asm volatile("vsra.vv v8, v4, v6"); + VLOAD_64(v24, 0, 1, 2, 3, 7, 15, 31, 32, 0, 1, 2, 3, 7, 15, 31, 32); + asm volatile("vsra.vv v8, v16, v24"); VCMP_U64(4, v8, 0x8000000000000000, 0xC000000000000000, 0xE000000000000000, 0xF000000000000000, 0xFF00000000000000, 0xFFFF000000000000, 0xFFFFFFFF00000000, 0xFFFFFFFF80000000, 0x8000000000000000, @@ -57,53 +57,53 @@ void TEST_CASE1(void) { }; void TEST_CASE2(void) { - VSET(16, e8, m2); - VLOAD_8(v4, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + VSET(16, e8, m8); + VLOAD_8(v16, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80); - VLOAD_8(v6, 0, 1, 2, 3, 7, 15, 31, 32, 0, 1, 2, 3, 7, 15, 31, 32); + VLOAD_8(v24, 0, 1, 2, 3, 7, 15, 31, 32, 0, 1, 2, 3, 7, 15, 31, 32); VLOAD_8(v0, 0xAA, 0xAA); VCLEAR(v8); - asm volatile("vsra.vv v8, v4, v6, v0.t"); + asm volatile("vsra.vv v8, v16, v24, v0.t"); VCMP_U8(5, v8, 0x00, 0xC0, 0x00, 0xF0, 0x00, 0xFF, 0x00, 0x80, 0x00, 0xC0, 0x00, 0xF0, 0x00, 0xFF, 0x00, 0x80); - VSET(16, e16, m2); - VLOAD_16(v4, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, + VSET(16, e16, m8); + VLOAD_16(v16, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000); - VLOAD_16(v6, 0, 1, 2, 3, 7, 15, 31, 32, 0, 1, 2, 3, 7, 15, 31, 32); + VLOAD_16(v24, 0, 1, 2, 3, 7, 15, 31, 32, 0, 1, 2, 3, 7, 15, 31, 32); VLOAD_8(v0, 0xAA, 0xAA); VCLEAR(v8); - asm volatile("vsra.vv v8, v4, v6, v0.t"); + asm volatile("vsra.vv v8, v16, v24, v0.t"); VCMP_U16(6, v8, 0x0000, 0xC000, 0x0000, 0xF000, 0x0000, 0xFFFF, 0x0000, 0x8000, 0x0000, 0xC000, 0x0000, 0xF000, 0x0000, 0xFFFF, 0x0000, 0x8000); - VSET(16, e32, m2); - VLOAD_32(v4, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, + VSET(16, e32, m8); + VLOAD_32(v16, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000); - VLOAD_32(v6, 0, 1, 2, 3, 7, 15, 31, 32, 0, 1, 2, 3, 7, 15, 31, 32); + VLOAD_32(v24, 0, 1, 2, 3, 7, 15, 31, 32, 0, 1, 2, 3, 7, 15, 31, 32); VLOAD_8(v0, 0xAA, 0xAA); VCLEAR(v8); - asm volatile("vsra.vv v8, v4, v6, v0.t"); + asm volatile("vsra.vv v8, v16, v24, v0.t"); VCMP_U32(7, v8, 0x00000000, 0xC0000000, 0x00000000, 0xF0000000, 0x00000000, 0xFFFF0000, 0x00000000, 0x80000000, 0x00000000, 0xC0000000, 0x00000000, 0xF0000000, 0x00000000, 0xFFFF0000, 0x00000000, 0x80000000); #if ELEN == 64 - VSET(16, e64, m2); - VLOAD_64(v4, 0x8000000000000000, 0x8000000000000000, 0x8000000000000000, + VSET(16, e64, m8); + VLOAD_64(v16, 0x8000000000000000, 0x8000000000000000, 0x8000000000000000, 0x8000000000000000, 0x8000000000000000, 0x8000000000000000, 0x8000000000000000, 0x8000000000000000, 0x8000000000000000, 0x8000000000000000, 0x8000000000000000, 0x8000000000000000, 0x8000000000000000, 0x8000000000000000, 0x8000000000000000, 0x8000000000000000); - VLOAD_64(v6, 0, 1, 2, 3, 7, 15, 31, 32, 0, 1, 2, 3, 7, 15, 31, 32); + VLOAD_64(v24, 0, 1, 2, 3, 7, 15, 31, 32, 0, 1, 2, 3, 7, 15, 31, 32); VLOAD_8(v0, 0xAA, 0xAA); VCLEAR(v8); - asm volatile("vsra.vv v8, v4, v6, v0.t"); + asm volatile("vsra.vv v8, v16, v24, v0.t"); VCMP_U64(8, v8, 0x0000000000000000, 0xC000000000000000, 0x0000000000000000, 0xF000000000000000, 0x0000000000000000, 0xFFFF000000000000, 0x0000000000000000, 0xFFFFFFFF80000000, 0x0000000000000000, @@ -116,41 +116,41 @@ void TEST_CASE2(void) { void TEST_CASE3(void) { const uint64_t scalar = 2; - VSET(16, e8, m2); - VLOAD_8(v4, 0x04, 0x08, 0x0C, 0x10, 0x14, 0x18, 0x1C, 0x20, 0xE0, 0xE4, 0xE8, + VSET(16, e8, m8); + VLOAD_8(v16, 0x04, 0x08, 0x0C, 0x10, 0x14, 0x18, 0x1C, 0x20, 0xE0, 0xE4, 0xE8, 0xEC, 0xF0, 0xF4, 0xF8, 0xFC); - asm volatile("vsra.vx v8, v4, %[A]" ::[A] "r"(scalar)); + asm volatile("vsra.vx v8, v16, %[A]" ::[A] "r"(scalar)); VCMP_U8(9, v8, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE, 0xFF); - VSET(16, e16, m2); - VLOAD_16(v4, 0x0004, 0x0008, 0x000C, 0x0010, 0x0014, 0x0018, 0x001C, 0x0020, + VSET(16, e16, m8); + VLOAD_16(v16, 0x0004, 0x0008, 0x000C, 0x0010, 0x0014, 0x0018, 0x001C, 0x0020, 0xFFE0, 0xFFE4, 0xFFE8, 0xFFEC, 0xFFF0, 0xFFF4, 0xFFF8, 0xFFFC); - asm volatile("vsra.vx v8, v4, %[A]" ::[A] "r"(scalar)); + asm volatile("vsra.vx v8, v16, %[A]" ::[A] "r"(scalar)); VCMP_U16(10, v8, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, 0x0008, 0xFFF8, 0xFFF9, 0xFFFA, 0xFFFB, 0xFFFC, 0xFFFD, 0xFFFE, 0xFFFF); - VSET(16, e32, m2); - VLOAD_32(v4, 0x00000004, 0x00000008, 0x0000000C, 0x00000010, 0x00000014, + VSET(16, e32, m8); + VLOAD_32(v16, 0x00000004, 0x00000008, 0x0000000C, 0x00000010, 0x00000014, 0x00000018, 0x0000001C, 0x00000020, 0xFFFFFFE0, 0xFFFFFFE4, 0xFFFFFFE8, 0xFFFFFFEC, 0xFFFFFFF0, 0xFFFFFFF4, 0xFFFFFFF8, 0xFFFFFFFC); - asm volatile("vsra.vx v8, v4, %[A]" ::[A] "r"(scalar)); + asm volatile("vsra.vx v8, v16, %[A]" ::[A] "r"(scalar)); VCMP_U32(11, v8, 0x00000001, 0x00000002, 0x00000003, 0x00000004, 0x00000005, 0x00000006, 0x00000007, 0x00000008, 0xFFFFFFF8, 0xFFFFFFF9, 0xFFFFFFFA, 0xFFFFFFFB, 0xFFFFFFFC, 0xFFFFFFFD, 0xFFFFFFFE, 0xFFFFFFFF); #if ELEN == 64 - VSET(16, e64, m2); - VLOAD_64(v4, 0x0000000000000004, 0x0000000000000008, 0x000000000000000C, + VSET(16, e64, m8); + VLOAD_64(v16, 0x0000000000000004, 0x0000000000000008, 0x000000000000000C, 0x0000000000000010, 0x0000000000000014, 0x0000000000000018, 0x000000000000001C, 0x0000000000000020, 0xFFFFFFFFFFFFFFE0, 0xFFFFFFFFFFFFFFE4, 0xFFFFFFFFFFFFFFE8, 0xFFFFFFFFFFFFFFEC, 0xFFFFFFFFFFFFFFF0, 0xFFFFFFFFFFFFFFF4, 0xFFFFFFFFFFFFFFF8, 0xFFFFFFFFFFFFFFFC); - asm volatile("vsra.vx v8, v4, %[A]" ::[A] "r"(scalar)); + asm volatile("vsra.vx v8, v16, %[A]" ::[A] "r"(scalar)); VCMP_U64(12, v8, 0x0000000000000001, 0x0000000000000002, 0x0000000000000003, 0x0000000000000004, 0x0000000000000005, 0x0000000000000006, 0x0000000000000007, 0x0000000000000008, 0xFFFFFFFFFFFFFFF8, @@ -163,41 +163,41 @@ void TEST_CASE3(void) { void TEST_CASE4(void) { const uint64_t scalar = 2; - VSET(16, e8, m2); - VLOAD_8(v4, 0x04, 0x08, 0x0C, 0x10, 0x14, 0x18, 0x1C, 0x20, 0xE0, 0xE4, 0xE8, + VSET(16, e8, m8); + VLOAD_8(v16, 0x04, 0x08, 0x0C, 0x10, 0x14, 0x18, 0x1C, 0x20, 0xE0, 0xE4, 0xE8, 0xEC, 0xF0, 0xF4, 0xF8, 0xFC); VLOAD_8(v0, 0xAA, 0xAA); VCLEAR(v8); - asm volatile("vsra.vx v8, v4, %[A], v0.t" ::[A] "r"(scalar)); + asm volatile("vsra.vx v8, v16, %[A], v0.t" ::[A] "r"(scalar)); VCMP_U8(13, v8, 0x00, 0x02, 0x00, 0x04, 0x00, 0x06, 0x00, 0x08, 0x00, 0xF9, 0x00, 0xFB, 0x00, 0xFD, 0x00, 0xFF); - VSET(16, e16, m2); - VLOAD_16(v4, 0x0004, 0x0008, 0x000C, 0x0010, 0x0014, 0x0018, 0x001C, 0x0020, + VSET(16, e16, m8); + VLOAD_16(v16, 0x0004, 0x0008, 0x000C, 0x0010, 0x0014, 0x0018, 0x001C, 0x0020, 0xFFE0, 0xFFE4, 0xFFE8, 0xFFEC, 0xFFF0, 0xFFF4, 0xFFF8, 0xFFFC); VLOAD_8(v0, 0xAA, 0xAA); VCLEAR(v8); - asm volatile("vsra.vx v8, v4, %[A], v0.t" ::[A] "r"(scalar)); + asm volatile("vsra.vx v8, v16, %[A], v0.t" ::[A] "r"(scalar)); VCMP_U16(14, v8, 0x0000, 0x0002, 0x0000, 0x0004, 0x0000, 0x0006, 0x0000, 0x0008, 0x0000, 0xFFF9, 0x0000, 0xFFFB, 0x0000, 0xFFFD, 0x0000, 0xFFFF); - VSET(16, e32, m2); - VLOAD_32(v4, 0x00000004, 0x00000008, 0x0000000C, 0x00000010, 0x00000014, + VSET(16, e32, m8); + VLOAD_32(v16, 0x00000004, 0x00000008, 0x0000000C, 0x00000010, 0x00000014, 0x00000018, 0x0000001C, 0x00000020, 0xFFFFFFE0, 0xFFFFFFE4, 0xFFFFFFE8, 0xFFFFFFEC, 0xFFFFFFF0, 0xFFFFFFF4, 0xFFFFFFF8, 0xFFFFFFFC); VLOAD_8(v0, 0xAA, 0xAA); VCLEAR(v8); - asm volatile("vsra.vx v8, v4, %[A], v0.t" ::[A] "r"(scalar)); + asm volatile("vsra.vx v8, v16, %[A], v0.t" ::[A] "r"(scalar)); VCMP_U32(15, v8, 0x00000000, 0x00000002, 0x00000000, 0x00000004, 0x00000000, 0x00000006, 0x00000000, 0x00000008, 0x00000000, 0xFFFFFFF9, 0x00000000, 0xFFFFFFFB, 0x00000000, 0xFFFFFFFD, 0x00000000, 0xFFFFFFFF); #if ELEN == 64 - VSET(16, e64, m2); - VLOAD_64(v4, 0x0000000000000004, 0x0000000000000008, 0x000000000000000C, + VSET(16, e64, m8); + VLOAD_64(v16, 0x0000000000000004, 0x0000000000000008, 0x000000000000000C, 0x0000000000000010, 0x0000000000000014, 0x0000000000000018, 0x000000000000001C, 0x0000000000000020, 0xFFFFFFFFFFFFFFE0, 0xFFFFFFFFFFFFFFE4, 0xFFFFFFFFFFFFFFE8, 0xFFFFFFFFFFFFFFEC, @@ -205,7 +205,7 @@ void TEST_CASE4(void) { 0xFFFFFFFFFFFFFFFC); VLOAD_8(v0, 0xAA, 0xAA); VCLEAR(v8); - asm volatile("vsra.vx v8, v4, %[A], v0.t" ::[A] "r"(scalar)); + asm volatile("vsra.vx v8, v16, %[A], v0.t" ::[A] "r"(scalar)); VCMP_U64(16, v8, 0x0000000000000000, 0x0000000000000002, 0x0000000000000000, 0x0000000000000004, 0x0000000000000000, 0x0000000000000006, 0x0000000000000000, 0x0000000000000008, 0x0000000000000000, @@ -216,41 +216,41 @@ void TEST_CASE4(void) { }; void TEST_CASE5(void) { - VSET(16, e8, m2); - VLOAD_8(v4, 0x04, 0x08, 0x0C, 0x10, 0x14, 0x18, 0x1C, 0x20, 0xE0, 0xE4, 0xE8, + VSET(16, e8, m8); + VLOAD_8(v16, 0x04, 0x08, 0x0C, 0x10, 0x14, 0x18, 0x1C, 0x20, 0xE0, 0xE4, 0xE8, 0xEC, 0xF0, 0xF4, 0xF8, 0xFC); - asm volatile("vsra.vi v8, v4, 2"); + asm volatile("vsra.vi v8, v16, 2"); VCMP_U8(17, v8, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE, 0xFF); - VSET(16, e16, m2); - VLOAD_16(v4, 0x0004, 0x0008, 0x000C, 0x0010, 0x0014, 0x0018, 0x001C, 0x0020, + VSET(16, e16, m8); + VLOAD_16(v16, 0x0004, 0x0008, 0x000C, 0x0010, 0x0014, 0x0018, 0x001C, 0x0020, 0xFFE0, 0xFFE4, 0xFFE8, 0xFFEC, 0xFFF0, 0xFFF4, 0xFFF8, 0xFFFC); - asm volatile("vsra.vi v8, v4, 2"); + asm volatile("vsra.vi v8, v16, 2"); VCMP_U16(18, v8, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, 0x0008, 0xFFF8, 0xFFF9, 0xFFFA, 0xFFFB, 0xFFFC, 0xFFFD, 0xFFFE, 0xFFFF); - VSET(16, e32, m2); - VLOAD_32(v4, 0x00000004, 0x00000008, 0x0000000C, 0x00000010, 0x00000014, + VSET(16, e32, m8); + VLOAD_32(v16, 0x00000004, 0x00000008, 0x0000000C, 0x00000010, 0x00000014, 0x00000018, 0x0000001C, 0x00000020, 0xFFFFFFE0, 0xFFFFFFE4, 0xFFFFFFE8, 0xFFFFFFEC, 0xFFFFFFF0, 0xFFFFFFF4, 0xFFFFFFF8, 0xFFFFFFFC); - asm volatile("vsra.vi v8, v4, 2"); + asm volatile("vsra.vi v8, v16, 2"); VCMP_U32(19, v8, 0x00000001, 0x00000002, 0x00000003, 0x00000004, 0x00000005, 0x00000006, 0x00000007, 0x00000008, 0xFFFFFFF8, 0xFFFFFFF9, 0xFFFFFFFA, 0xFFFFFFFB, 0xFFFFFFFC, 0xFFFFFFFD, 0xFFFFFFFE, 0xFFFFFFFF); #if ELEN == 64 - VSET(16, e64, m2); - VLOAD_64(v4, 0x0000000000000004, 0x0000000000000008, 0x000000000000000C, + VSET(16, e64, m8); + VLOAD_64(v16, 0x0000000000000004, 0x0000000000000008, 0x000000000000000C, 0x0000000000000010, 0x0000000000000014, 0x0000000000000018, 0x000000000000001C, 0x0000000000000020, 0xFFFFFFFFFFFFFFE0, 0xFFFFFFFFFFFFFFE4, 0xFFFFFFFFFFFFFFE8, 0xFFFFFFFFFFFFFFEC, 0xFFFFFFFFFFFFFFF0, 0xFFFFFFFFFFFFFFF4, 0xFFFFFFFFFFFFFFF8, 0xFFFFFFFFFFFFFFFC); - asm volatile("vsra.vi v8, v4, 2"); + asm volatile("vsra.vi v8, v16, 2"); VCMP_U64(20, v8, 0x0000000000000001, 0x0000000000000002, 0x0000000000000003, 0x0000000000000004, 0x0000000000000005, 0x0000000000000006, 0x0000000000000007, 0x0000000000000008, 0xFFFFFFFFFFFFFFF8, @@ -261,41 +261,41 @@ void TEST_CASE5(void) { }; void TEST_CASE6(void) { - VSET(16, e8, m2); - VLOAD_8(v4, 0x04, 0x08, 0x0C, 0x10, 0x14, 0x18, 0x1C, 0x20, 0xE0, 0xE4, 0xE8, + VSET(16, e8, m8); + VLOAD_8(v16, 0x04, 0x08, 0x0C, 0x10, 0x14, 0x18, 0x1C, 0x20, 0xE0, 0xE4, 0xE8, 0xEC, 0xF0, 0xF4, 0xF8, 0xFC); VLOAD_8(v0, 0xAA, 0xAA); VCLEAR(v8); - asm volatile("vsra.vi v8, v4, 2, v0.t"); + asm volatile("vsra.vi v8, v16, 2, v0.t"); VCMP_U8(21, v8, 0x00, 0x02, 0x00, 0x04, 0x00, 0x06, 0x00, 0x08, 0x00, 0xF9, 0x00, 0xFB, 0x00, 0xFD, 0x00, 0xFF); - VSET(16, e16, m2); - VLOAD_16(v4, 0x0004, 0x0008, 0x000C, 0x0010, 0x0014, 0x0018, 0x001C, 0x0020, + VSET(16, e16, m8); + VLOAD_16(v16, 0x0004, 0x0008, 0x000C, 0x0010, 0x0014, 0x0018, 0x001C, 0x0020, 0xFFE0, 0xFFE4, 0xFFE8, 0xFFEC, 0xFFF0, 0xFFF4, 0xFFF8, 0xFFFC); VLOAD_8(v0, 0xAA, 0xAA); VCLEAR(v8); - asm volatile("vsra.vi v8, v4, 2, v0.t"); + asm volatile("vsra.vi v8, v16, 2, v0.t"); VCMP_U16(22, v8, 0x0000, 0x0002, 0x0000, 0x0004, 0x0000, 0x0006, 0x0000, 0x0008, 0x0000, 0xFFF9, 0x0000, 0xFFFB, 0x0000, 0xFFFD, 0x0000, 0xFFFF); - VSET(16, e32, m2); - VLOAD_32(v4, 0x00000004, 0x00000008, 0x0000000C, 0x00000010, 0x00000014, + VSET(16, e32, m8); + VLOAD_32(v16, 0x00000004, 0x00000008, 0x0000000C, 0x00000010, 0x00000014, 0x00000018, 0x0000001C, 0x00000020, 0xFFFFFFE0, 0xFFFFFFE4, 0xFFFFFFE8, 0xFFFFFFEC, 0xFFFFFFF0, 0xFFFFFFF4, 0xFFFFFFF8, 0xFFFFFFFC); VLOAD_8(v0, 0xAA, 0xAA); VCLEAR(v8); - asm volatile("vsra.vi v8, v4, 2, v0.t"); + asm volatile("vsra.vi v8, v16, 2, v0.t"); VCMP_U32(23, v8, 0x00000000, 0x00000002, 0x00000000, 0x00000004, 0x00000000, 0x00000006, 0x00000000, 0x00000008, 0x00000000, 0xFFFFFFF9, 0x00000000, 0xFFFFFFFB, 0x00000000, 0xFFFFFFFD, 0x00000000, 0xFFFFFFFF); #if ELEN == 64 - VSET(16, e64, m2); - VLOAD_64(v4, 0x0000000000000004, 0x0000000000000008, 0x000000000000000C, + VSET(16, e64, m8); + VLOAD_64(v16, 0x0000000000000004, 0x0000000000000008, 0x000000000000000C, 0x0000000000000010, 0x0000000000000014, 0x0000000000000018, 0x000000000000001C, 0x0000000000000020, 0xFFFFFFFFFFFFFFE0, 0xFFFFFFFFFFFFFFE4, 0xFFFFFFFFFFFFFFE8, 0xFFFFFFFFFFFFFFEC, @@ -303,7 +303,7 @@ void TEST_CASE6(void) { 0xFFFFFFFFFFFFFFFC); VLOAD_8(v0, 0xAA, 0xAA); VCLEAR(v8); - asm volatile("vsra.vi v8, v4, 2, v0.t"); + asm volatile("vsra.vi v8, v16, 2, v0.t"); VCMP_U64(24, v8, 0x0000000000000000, 0x0000000000000002, 0x0000000000000000, 0x0000000000000004, 0x0000000000000000, 0x0000000000000006, 0x0000000000000000, 0x0000000000000008, 0x0000000000000000, diff --git a/sw/riscvTests/isa/rv64uv/vsrl.c b/sw/riscvTests/isa/rv64uv/vsrl.c index 12654982..8beafca1 100644 --- a/sw/riscvTests/isa/rv64uv/vsrl.c +++ b/sw/riscvTests/isa/rv64uv/vsrl.c @@ -8,45 +8,45 @@ #include "vector_macros.h" void TEST_CASE1(void) { - VSET(16, e8, m2); - VLOAD_8(v4, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + VSET(16, e8, m8); + VLOAD_8(v16, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80); - VLOAD_8(v6, 0, 1, 2, 3, 7, 15, 31, 32, 0, 1, 2, 3, 7, 15, 31, 32); - asm volatile("vsrl.vv v8, v4, v6"); + VLOAD_8(v24, 0, 1, 2, 3, 7, 15, 31, 32, 0, 1, 2, 3, 7, 15, 31, 32); + asm volatile("vsrl.vv v8, v16, v24"); VCMP_U8(1, v8, 0x80, 0x40, 0x20, 0x10, 0x01, 0x01, 0x01, 0x80, 0x80, 0x40, 0x20, 0x10, 0x01, 0x01, 0x01, 0x80); - VSET(16, e16, m2); - VLOAD_16(v4, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, + VSET(16, e16, m8); + VLOAD_16(v16, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000); - VLOAD_16(v6, 0, 1, 2, 3, 7, 15, 31, 32, 0, 1, 2, 3, 7, 15, 31, 32); - asm volatile("vsrl.vv v8, v4, v6"); + VLOAD_16(v24, 0, 1, 2, 3, 7, 15, 31, 32, 0, 1, 2, 3, 7, 15, 31, 32); + asm volatile("vsrl.vv v8, v16, v24"); VCMP_U16(2, v8, 0x8000, 0x4000, 0x2000, 0x1000, 0x0100, 0x0001, 0x0001, 0x8000, 0x8000, 0x4000, 0x2000, 0x1000, 0x0100, 0x0001, 0x0001, 0x8000); - VSET(16, e32, m2); - VLOAD_32(v4, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, + VSET(16, e32, m8); + VLOAD_32(v16, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000); - VLOAD_32(v6, 0, 1, 2, 3, 7, 15, 31, 32, 0, 1, 2, 3, 7, 15, 31, 32); - asm volatile("vsrl.vv v8, v4, v6"); + VLOAD_32(v24, 0, 1, 2, 3, 7, 15, 31, 32, 0, 1, 2, 3, 7, 15, 31, 32); + asm volatile("vsrl.vv v8, v16, v24"); VCMP_U32(3, v8, 0x80000000, 0x40000000, 0x20000000, 0x10000000, 0x01000000, 0x00010000, 0x00000001, 0x80000000, 0x80000000, 0x40000000, 0x20000000, 0x10000000, 0x01000000, 0x00010000, 0x00000001, 0x80000000); #if ELEN == 64 - VSET(16, e64, m2); - VLOAD_64(v4, 0x8000000000000000, 0x8000000000000000, 0x8000000000000000, + VSET(16, e64, m8); + VLOAD_64(v16, 0x8000000000000000, 0x8000000000000000, 0x8000000000000000, 0x8000000000000000, 0x8000000000000000, 0x8000000000000000, 0x8000000000000000, 0x8000000000000000, 0x8000000000000000, 0x8000000000000000, 0x8000000000000000, 0x8000000000000000, 0x8000000000000000, 0x8000000000000000, 0x8000000000000000, 0x8000000000000000); - VLOAD_64(v6, 0, 1, 2, 3, 7, 15, 31, 32, 0, 1, 2, 3, 7, 15, 31, 32); - asm volatile("vsrl.vv v8, v4, v6"); + VLOAD_64(v24, 0, 1, 2, 3, 7, 15, 31, 32, 0, 1, 2, 3, 7, 15, 31, 32); + asm volatile("vsrl.vv v8, v16, v24"); VCMP_U64(4, v8, 0x8000000000000000, 0x4000000000000000, 0x2000000000000000, 0x1000000000000000, 0x0100000000000000, 0x0001000000000000, 0x0000000100000000, 0x0000000080000000, 0x8000000000000000, @@ -57,53 +57,53 @@ void TEST_CASE1(void) { }; void TEST_CASE2(void) { - VSET(16, e8, m2); - VLOAD_8(v4, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + VSET(16, e8, m8); + VLOAD_8(v16, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80); - VLOAD_8(v6, 0, 1, 2, 3, 7, 15, 31, 32, 0, 1, 2, 3, 7, 15, 31, 32); + VLOAD_8(v24, 0, 1, 2, 3, 7, 15, 31, 32, 0, 1, 2, 3, 7, 15, 31, 32); VLOAD_8(v0, 0xAA, 0xAA); VCLEAR(v8); - asm volatile("vsrl.vv v8, v4, v6, v0.t"); + asm volatile("vsrl.vv v8, v16, v24, v0.t"); VCMP_U8(5, v8, 0x00, 0x40, 0x00, 0x10, 0x00, 0x01, 0x00, 0x80, 0x00, 0x40, 0x00, 0x10, 0x00, 0x01, 0x00, 0x80); - VSET(16, e16, m2); - VLOAD_16(v4, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, + VSET(16, e16, m8); + VLOAD_16(v16, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000); - VLOAD_16(v6, 0, 1, 2, 3, 7, 15, 31, 32, 0, 1, 2, 3, 7, 15, 31, 32); + VLOAD_16(v24, 0, 1, 2, 3, 7, 15, 31, 32, 0, 1, 2, 3, 7, 15, 31, 32); VLOAD_8(v0, 0xAA, 0xAA); VCLEAR(v8); - asm volatile("vsrl.vv v8, v4, v6, v0.t"); + asm volatile("vsrl.vv v8, v16, v24, v0.t"); VCMP_U16(6, v8, 0x0000, 0x4000, 0x0000, 0x1000, 0x0000, 0x0001, 0x0000, 0x8000, 0x0000, 0x4000, 0x0000, 0x1000, 0x0000, 0x0001, 0x0000, 0x8000); - VSET(16, e32, m2); - VLOAD_32(v4, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, + VSET(16, e32, m8); + VLOAD_32(v16, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000); - VLOAD_32(v6, 0, 1, 2, 3, 7, 15, 31, 32, 0, 1, 2, 3, 7, 15, 31, 32); + VLOAD_32(v24, 0, 1, 2, 3, 7, 15, 31, 32, 0, 1, 2, 3, 7, 15, 31, 32); VLOAD_8(v0, 0xAA, 0xAA); VCLEAR(v8); - asm volatile("vsrl.vv v8, v4, v6, v0.t"); + asm volatile("vsrl.vv v8, v16, v24, v0.t"); VCMP_U32(7, v8, 0x00000000, 0x40000000, 0x00000000, 0x10000000, 0x00000000, 0x00010000, 0x00000000, 0x80000000, 0x00000000, 0x40000000, 0x00000000, 0x10000000, 0x00000000, 0x00010000, 0x00000000, 0x80000000); #if ELEN == 64 - VSET(16, e64, m2); - VLOAD_64(v4, 0x8000000000000000, 0x8000000000000000, 0x8000000000000000, + VSET(16, e64, m8); + VLOAD_64(v16, 0x8000000000000000, 0x8000000000000000, 0x8000000000000000, 0x8000000000000000, 0x8000000000000000, 0x8000000000000000, 0x8000000000000000, 0x8000000000000000, 0x8000000000000000, 0x8000000000000000, 0x8000000000000000, 0x8000000000000000, 0x8000000000000000, 0x8000000000000000, 0x8000000000000000, 0x8000000000000000); - VLOAD_64(v6, 0, 1, 2, 3, 7, 15, 31, 32, 0, 1, 2, 3, 7, 15, 31, 32); + VLOAD_64(v24, 0, 1, 2, 3, 7, 15, 31, 32, 0, 1, 2, 3, 7, 15, 31, 32); VLOAD_8(v0, 0xAA, 0xAA); VCLEAR(v8); - asm volatile("vsrl.vv v8, v4, v6, v0.t"); + asm volatile("vsrl.vv v8, v16, v24, v0.t"); VCMP_U64(8, v8, 0x0000000000000000, 0x4000000000000000, 0x0000000000000000, 0x1000000000000000, 0x0000000000000000, 0x0001000000000000, 0x0000000000000000, 0x0000000080000000, 0x0000000000000000, @@ -116,41 +116,41 @@ void TEST_CASE2(void) { void TEST_CASE3(void) { const uint64_t scalar = 2; - VSET(16, e8, m2); - VLOAD_8(v4, 0x04, 0x08, 0x0C, 0x10, 0x14, 0x18, 0x1C, 0x20, 0xE0, 0xE4, 0xE8, + VSET(16, e8, m8); + VLOAD_8(v16, 0x04, 0x08, 0x0C, 0x10, 0x14, 0x18, 0x1C, 0x20, 0xE0, 0xE4, 0xE8, 0xEC, 0xF0, 0xF4, 0xF8, 0xFC); - asm volatile("vsrl.vx v8, v4, %[A]" ::[A] "r"(scalar)); + asm volatile("vsrl.vx v8, v16, %[A]" ::[A] "r"(scalar)); VCMP_U8(9, v8, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x38, 0x39, 0x3A, 0x3B, 0x3C, 0x3D, 0x3E, 0x3F); - VSET(16, e16, m2); - VLOAD_16(v4, 0x0004, 0x0008, 0x000C, 0x0010, 0x0014, 0x0018, 0x001C, 0x0020, + VSET(16, e16, m8); + VLOAD_16(v16, 0x0004, 0x0008, 0x000C, 0x0010, 0x0014, 0x0018, 0x001C, 0x0020, 0xFFE0, 0xFFE4, 0xFFE8, 0xFFEC, 0xFFF0, 0xFFF4, 0xFFF8, 0xFFFC); - asm volatile("vsrl.vx v8, v4, %[A]" ::[A] "r"(scalar)); + asm volatile("vsrl.vx v8, v16, %[A]" ::[A] "r"(scalar)); VCMP_U16(10, v8, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, 0x0008, 0x3FF8, 0x3FF9, 0x3FFA, 0x3FFB, 0x3FFC, 0x3FFD, 0x3FFE, 0x3FFF); - VSET(16, e32, m2); - VLOAD_32(v4, 0x00000004, 0x00000008, 0x0000000C, 0x00000010, 0x00000014, + VSET(16, e32, m8); + VLOAD_32(v16, 0x00000004, 0x00000008, 0x0000000C, 0x00000010, 0x00000014, 0x00000018, 0x0000001C, 0x00000020, 0xFFFFFFE0, 0xFFFFFFE4, 0xFFFFFFE8, 0xFFFFFFEC, 0xFFFFFFF0, 0xFFFFFFF4, 0xFFFFFFF8, 0xFFFFFFFC); - asm volatile("vsrl.vx v8, v4, %[A]" ::[A] "r"(scalar)); + asm volatile("vsrl.vx v8, v16, %[A]" ::[A] "r"(scalar)); VCMP_U32(11, v8, 0x00000001, 0x00000002, 0x00000003, 0x00000004, 0x00000005, 0x00000006, 0x00000007, 0x00000008, 0x3FFFFFF8, 0x3FFFFFF9, 0x3FFFFFFA, 0x3FFFFFFB, 0x3FFFFFFC, 0x3FFFFFFD, 0x3FFFFFFE, 0x3FFFFFFF); #if ELEN == 64 - VSET(16, e64, m2); - VLOAD_64(v4, 0x0000000000000004, 0x0000000000000008, 0x000000000000000C, + VSET(16, e64, m8); + VLOAD_64(v16, 0x0000000000000004, 0x0000000000000008, 0x000000000000000C, 0x0000000000000010, 0x0000000000000014, 0x0000000000000018, 0x000000000000001C, 0x0000000000000020, 0xFFFFFFFFFFFFFFE0, 0xFFFFFFFFFFFFFFE4, 0xFFFFFFFFFFFFFFE8, 0xFFFFFFFFFFFFFFEC, 0xFFFFFFFFFFFFFFF0, 0xFFFFFFFFFFFFFFF4, 0xFFFFFFFFFFFFFFF8, 0xFFFFFFFFFFFFFFFC); - asm volatile("vsrl.vx v8, v4, %[A]" ::[A] "r"(scalar)); + asm volatile("vsrl.vx v8, v16, %[A]" ::[A] "r"(scalar)); VCMP_U64(12, v8, 0x0000000000000001, 0x0000000000000002, 0x0000000000000003, 0x0000000000000004, 0x0000000000000005, 0x0000000000000006, 0x0000000000000007, 0x0000000000000008, 0x3FFFFFFFFFFFFFF8, @@ -163,41 +163,41 @@ void TEST_CASE3(void) { void TEST_CASE4(void) { const uint64_t scalar = 2; - VSET(16, e8, m2); - VLOAD_8(v4, 0x04, 0x08, 0x0C, 0x10, 0x14, 0x18, 0x1C, 0x20, 0xE0, 0xE4, 0xE8, + VSET(16, e8, m8); + VLOAD_8(v16, 0x04, 0x08, 0x0C, 0x10, 0x14, 0x18, 0x1C, 0x20, 0xE0, 0xE4, 0xE8, 0xEC, 0xF0, 0xF4, 0xF8, 0xFC); VLOAD_8(v0, 0xAA, 0xAA); VCLEAR(v8); - asm volatile("vsrl.vx v8, v4, %[A], v0.t" ::[A] "r"(scalar)); + asm volatile("vsrl.vx v8, v16, %[A], v0.t" ::[A] "r"(scalar)); VCMP_U8(13, v8, 0x00, 0x02, 0x00, 0x04, 0x00, 0x06, 0x00, 0x08, 0x00, 0x39, 0x00, 0x3B, 0x00, 0x3D, 0x00, 0x3F); - VSET(16, e16, m2); - VLOAD_16(v4, 0x0004, 0x0008, 0x000C, 0x0010, 0x0014, 0x0018, 0x001C, 0x0020, + VSET(16, e16, m8); + VLOAD_16(v16, 0x0004, 0x0008, 0x000C, 0x0010, 0x0014, 0x0018, 0x001C, 0x0020, 0xFFE0, 0xFFE4, 0xFFE8, 0xFFEC, 0xFFF0, 0xFFF4, 0xFFF8, 0xFFFC); VLOAD_8(v0, 0xAA, 0xAA); VCLEAR(v8); - asm volatile("vsrl.vx v8, v4, %[A], v0.t" ::[A] "r"(scalar)); + asm volatile("vsrl.vx v8, v16, %[A], v0.t" ::[A] "r"(scalar)); VCMP_U16(14, v8, 0x0000, 0x0002, 0x0000, 0x0004, 0x0000, 0x0006, 0x0000, 0x0008, 0x0000, 0x3FF9, 0x0000, 0x3FFB, 0x0000, 0x3FFD, 0x0000, 0x3FFF); - VSET(16, e32, m2); - VLOAD_32(v4, 0x00000004, 0x00000008, 0x0000000C, 0x00000010, 0x00000014, + VSET(16, e32, m8); + VLOAD_32(v16, 0x00000004, 0x00000008, 0x0000000C, 0x00000010, 0x00000014, 0x00000018, 0x0000001C, 0x00000020, 0xFFFFFFE0, 0xFFFFFFE4, 0xFFFFFFE8, 0xFFFFFFEC, 0xFFFFFFF0, 0xFFFFFFF4, 0xFFFFFFF8, 0xFFFFFFFC); VLOAD_8(v0, 0xAA, 0xAA); VCLEAR(v8); - asm volatile("vsrl.vx v8, v4, %[A], v0.t" ::[A] "r"(scalar)); + asm volatile("vsrl.vx v8, v16, %[A], v0.t" ::[A] "r"(scalar)); VCMP_U32(15, v8, 0x00000000, 0x00000002, 0x00000000, 0x00000004, 0x00000000, 0x00000006, 0x00000000, 0x00000008, 0x00000000, 0x3FFFFFF9, 0x00000000, 0x3FFFFFFB, 0x00000000, 0x3FFFFFFD, 0x00000000, 0x3FFFFFFF); #if ELEN == 64 - VSET(16, e64, m2); - VLOAD_64(v4, 0x0000000000000004, 0x0000000000000008, 0x000000000000000C, + VSET(16, e64, m8); + VLOAD_64(v16, 0x0000000000000004, 0x0000000000000008, 0x000000000000000C, 0x0000000000000010, 0x0000000000000014, 0x0000000000000018, 0x000000000000001C, 0x0000000000000020, 0xFFFFFFFFFFFFFFE0, 0xFFFFFFFFFFFFFFE4, 0xFFFFFFFFFFFFFFE8, 0xFFFFFFFFFFFFFFEC, @@ -205,7 +205,7 @@ void TEST_CASE4(void) { 0xFFFFFFFFFFFFFFFC); VLOAD_8(v0, 0xAA, 0xAA); VCLEAR(v8); - asm volatile("vsrl.vx v8, v4, %[A], v0.t" ::[A] "r"(scalar)); + asm volatile("vsrl.vx v8, v16, %[A], v0.t" ::[A] "r"(scalar)); VCMP_U64(16, v8, 0x0000000000000000, 0x0000000000000002, 0x0000000000000000, 0x0000000000000004, 0x0000000000000000, 0x0000000000000006, 0x0000000000000000, 0x0000000000000008, 0x0000000000000000, @@ -216,41 +216,41 @@ void TEST_CASE4(void) { }; void TEST_CASE5(void) { - VSET(16, e8, m2); - VLOAD_8(v4, 0x04, 0x08, 0x0C, 0x10, 0x14, 0x18, 0x1C, 0x20, 0xE0, 0xE4, 0xE8, + VSET(16, e8, m8); + VLOAD_8(v16, 0x04, 0x08, 0x0C, 0x10, 0x14, 0x18, 0x1C, 0x20, 0xE0, 0xE4, 0xE8, 0xEC, 0xF0, 0xF4, 0xF8, 0xFC); - asm volatile("vsrl.vi v8, v4, 2"); + asm volatile("vsrl.vi v8, v16, 2"); VCMP_U8(17, v8, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x38, 0x39, 0x3A, 0x3B, 0x3C, 0x3D, 0x3E, 0x3F); - VSET(16, e16, m2); - VLOAD_16(v4, 0x0004, 0x0008, 0x000C, 0x0010, 0x0014, 0x0018, 0x001C, 0x0020, + VSET(16, e16, m8); + VLOAD_16(v16, 0x0004, 0x0008, 0x000C, 0x0010, 0x0014, 0x0018, 0x001C, 0x0020, 0xFFE0, 0xFFE4, 0xFFE8, 0xFFEC, 0xFFF0, 0xFFF4, 0xFFF8, 0xFFFC); - asm volatile("vsrl.vi v8, v4, 2"); + asm volatile("vsrl.vi v8, v16, 2"); VCMP_U16(18, v8, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, 0x0008, 0x3FF8, 0x3FF9, 0x3FFA, 0x3FFB, 0x3FFC, 0x3FFD, 0x3FFE, 0x3FFF); - VSET(16, e32, m2); - VLOAD_32(v4, 0x00000004, 0x00000008, 0x0000000C, 0x00000010, 0x00000014, + VSET(16, e32, m8); + VLOAD_32(v16, 0x00000004, 0x00000008, 0x0000000C, 0x00000010, 0x00000014, 0x00000018, 0x0000001C, 0x00000020, 0xFFFFFFE0, 0xFFFFFFE4, 0xFFFFFFE8, 0xFFFFFFEC, 0xFFFFFFF0, 0xFFFFFFF4, 0xFFFFFFF8, 0xFFFFFFFC); - asm volatile("vsrl.vi v8, v4, 2"); + asm volatile("vsrl.vi v8, v16, 2"); VCMP_U32(19, v8, 0x00000001, 0x00000002, 0x00000003, 0x00000004, 0x00000005, 0x00000006, 0x00000007, 0x00000008, 0x3FFFFFF8, 0x3FFFFFF9, 0x3FFFFFFA, 0x3FFFFFFB, 0x3FFFFFFC, 0x3FFFFFFD, 0x3FFFFFFE, 0x3FFFFFFF); #if ELEN == 64 - VSET(16, e64, m2); - VLOAD_64(v4, 0x0000000000000004, 0x0000000000000008, 0x000000000000000C, + VSET(16, e64, m8); + VLOAD_64(v16, 0x0000000000000004, 0x0000000000000008, 0x000000000000000C, 0x0000000000000010, 0x0000000000000014, 0x0000000000000018, 0x000000000000001C, 0x0000000000000020, 0xFFFFFFFFFFFFFFE0, 0xFFFFFFFFFFFFFFE4, 0xFFFFFFFFFFFFFFE8, 0xFFFFFFFFFFFFFFEC, 0xFFFFFFFFFFFFFFF0, 0xFFFFFFFFFFFFFFF4, 0xFFFFFFFFFFFFFFF8, 0xFFFFFFFFFFFFFFFC); - asm volatile("vsrl.vi v8, v4, 2"); + asm volatile("vsrl.vi v8, v16, 2"); VCMP_U64(20, v8, 0x0000000000000001, 0x0000000000000002, 0x0000000000000003, 0x0000000000000004, 0x0000000000000005, 0x0000000000000006, 0x0000000000000007, 0x0000000000000008, 0x3FFFFFFFFFFFFFF8, @@ -261,41 +261,41 @@ void TEST_CASE5(void) { }; void TEST_CASE6(void) { - VSET(16, e8, m2); - VLOAD_8(v4, 0x04, 0x08, 0x0C, 0x10, 0x14, 0x18, 0x1C, 0x20, 0xE0, 0xE4, 0xE8, + VSET(16, e8, m8); + VLOAD_8(v16, 0x04, 0x08, 0x0C, 0x10, 0x14, 0x18, 0x1C, 0x20, 0xE0, 0xE4, 0xE8, 0xEC, 0xF0, 0xF4, 0xF8, 0xFC); VLOAD_8(v0, 0xAA, 0xAA); VCLEAR(v8); - asm volatile("vsrl.vi v8, v4, 2, v0.t"); + asm volatile("vsrl.vi v8, v16, 2, v0.t"); VCMP_U8(21, v8, 0x00, 0x02, 0x00, 0x04, 0x00, 0x06, 0x00, 0x08, 0x00, 0x39, 0x00, 0x3B, 0x00, 0x3D, 0x00, 0x3F); - VSET(16, e16, m2); - VLOAD_16(v4, 0x0004, 0x0008, 0x000C, 0x0010, 0x0014, 0x0018, 0x001C, 0x0020, + VSET(16, e16, m8); + VLOAD_16(v16, 0x0004, 0x0008, 0x000C, 0x0010, 0x0014, 0x0018, 0x001C, 0x0020, 0xFFE0, 0xFFE4, 0xFFE8, 0xFFEC, 0xFFF0, 0xFFF4, 0xFFF8, 0xFFFC); VLOAD_8(v0, 0xAA, 0xAA); VCLEAR(v8); - asm volatile("vsrl.vi v8, v4, 2, v0.t"); + asm volatile("vsrl.vi v8, v16, 2, v0.t"); VCMP_U16(22, v8, 0x0000, 0x0002, 0x0000, 0x0004, 0x0000, 0x0006, 0x0000, 0x0008, 0x0000, 0x3FF9, 0x0000, 0x3FFB, 0x0000, 0x3FFD, 0x0000, 0x3FFF); - VSET(16, e32, m2); - VLOAD_32(v4, 0x00000004, 0x00000008, 0x0000000C, 0x00000010, 0x00000014, + VSET(16, e32, m8); + VLOAD_32(v16, 0x00000004, 0x00000008, 0x0000000C, 0x00000010, 0x00000014, 0x00000018, 0x0000001C, 0x00000020, 0xFFFFFFE0, 0xFFFFFFE4, 0xFFFFFFE8, 0xFFFFFFEC, 0xFFFFFFF0, 0xFFFFFFF4, 0xFFFFFFF8, 0xFFFFFFFC); VLOAD_8(v0, 0xAA, 0xAA); VCLEAR(v8); - asm volatile("vsrl.vi v8, v4, 2, v0.t"); + asm volatile("vsrl.vi v8, v16, 2, v0.t"); VCMP_U32(23, v8, 0x00000000, 0x00000002, 0x00000000, 0x00000004, 0x00000000, 0x00000006, 0x00000000, 0x00000008, 0x00000000, 0x3FFFFFF9, 0x00000000, 0x3FFFFFFB, 0x00000000, 0x3FFFFFFD, 0x00000000, 0x3FFFFFFF); #if ELEN == 64 - VSET(16, e64, m2); - VLOAD_64(v4, 0x0000000000000004, 0x0000000000000008, 0x000000000000000C, + VSET(16, e64, m8); + VLOAD_64(v16, 0x0000000000000004, 0x0000000000000008, 0x000000000000000C, 0x0000000000000010, 0x0000000000000014, 0x0000000000000018, 0x000000000000001C, 0x0000000000000020, 0xFFFFFFFFFFFFFFE0, 0xFFFFFFFFFFFFFFE4, 0xFFFFFFFFFFFFFFE8, 0xFFFFFFFFFFFFFFEC, @@ -303,7 +303,7 @@ void TEST_CASE6(void) { 0xFFFFFFFFFFFFFFFC); VLOAD_8(v0, 0xAA, 0xAA); VCLEAR(v8); - asm volatile("vsrl.vi v8, v4, 2, v0.t"); + asm volatile("vsrl.vi v8, v16, 2, v0.t"); VCMP_U64(24, v8, 0x0000000000000000, 0x0000000000000002, 0x0000000000000000, 0x0000000000000004, 0x0000000000000000, 0x0000000000000006, 0x0000000000000000, 0x0000000000000008, 0x0000000000000000, diff --git a/sw/riscvTests/isa/rv64uv/vsub.c b/sw/riscvTests/isa/rv64uv/vsub.c index 32a74668..177910fd 100644 --- a/sw/riscvTests/isa/rv64uv/vsub.c +++ b/sw/riscvTests/isa/rv64uv/vsub.c @@ -8,126 +8,126 @@ #include "vector_macros.h" void TEST_CASE1(void) { - VSET(16, e8, m2); - VLOAD_8(v2, 5, 10, 15, 20, 25, 30, 35, 40, 5, 10, 15, 20, 25, 30, 35, 40); - VLOAD_8(v4, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); - asm volatile("vsub.vv v6, v2, v4"); - VCMP_U8(1, v6, 4, 8, 12, 16, 20, 24, 28, 32, 4, 8, 12, 16, 20, 24, 28, 32); - - VSET(16, e16, m2); - VLOAD_16(v2, 5, 10, 15, 20, 25, 30, 35, 40, 5, 10, 15, 20, 25, 30, 35, 40); - VLOAD_16(v4, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); - asm volatile("vsub.vv v6, v2, v4"); - VCMP_U16(2, v6, 4, 8, 12, 16, 20, 24, 28, 32, 4, 8, 12, 16, 20, 24, 28, 32); - - VSET(16, e32, m2); - VLOAD_32(v2, 5, 10, 15, 20, 25, 30, 35, 40, 5, 10, 15, 20, 25, 30, 35, 40); - VLOAD_32(v4, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); - asm volatile("vsub.vv v6, v2, v4"); - VCMP_U32(3, v6, 4, 8, 12, 16, 20, 24, 28, 32, 4, 8, 12, 16, 20, 24, 28, 32); + VSET(16, e8, m8); + VLOAD_8(v8, 5, 10, 15, 20, 25, 30, 35, 40, 5, 10, 15, 20, 25, 30, 35, 40); + VLOAD_8(v16, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + asm volatile("vsub.vv v24, v8, v16"); + VCMP_U8(1, v24, 4, 8, 12, 16, 20, 24, 28, 32, 4, 8, 12, 16, 20, 24, 28, 32); + + VSET(16, e16, m8); + VLOAD_16(v8, 5, 10, 15, 20, 25, 30, 35, 40, 5, 10, 15, 20, 25, 30, 35, 40); + VLOAD_16(v16, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + asm volatile("vsub.vv v24, v8, v16"); + VCMP_U16(2, v24, 4, 8, 12, 16, 20, 24, 28, 32, 4, 8, 12, 16, 20, 24, 28, 32); + + VSET(16, e32, m8); + VLOAD_32(v8, 5, 10, 15, 20, 25, 30, 35, 40, 5, 10, 15, 20, 25, 30, 35, 40); + VLOAD_32(v16, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + asm volatile("vsub.vv v24, v8, v16"); + VCMP_U32(3, v24, 4, 8, 12, 16, 20, 24, 28, 32, 4, 8, 12, 16, 20, 24, 28, 32); #if ELEN == 64 - VSET(16, e64, m2); - VLOAD_64(v2, 5, 10, 15, 20, 25, 30, 35, 40, 5, 10, 15, 20, 25, 30, 35, 40); - VLOAD_64(v4, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); - asm volatile("vsub.vv v6, v2, v4"); - VCMP_U64(4, v6, 4, 8, 12, 16, 20, 24, 28, 32, 4, 8, 12, 16, 20, 24, 28, 32); + VSET(16, e64, m8); + VLOAD_64(v8, 5, 10, 15, 20, 25, 30, 35, 40, 5, 10, 15, 20, 25, 30, 35, 40); + VLOAD_64(v16, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + asm volatile("vsub.vv v24, v8, v16"); + VCMP_U64(4, v24, 4, 8, 12, 16, 20, 24, 28, 32, 4, 8, 12, 16, 20, 24, 28, 32); #endif } void TEST_CASE2(void) { - VSET(16, e8, m2); - VLOAD_8(v2, 5, 10, 15, 20, 25, 30, 35, 40, 5, 10, 15, 20, 25, 30, 35, 40); - VLOAD_8(v4, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VSET(16, e8, m8); + VLOAD_8(v8, 5, 10, 15, 20, 25, 30, 35, 40, 5, 10, 15, 20, 25, 30, 35, 40); + VLOAD_8(v16, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); VLOAD_8(v0, 0xAA, 0xAA); - VCLEAR(v6); - asm volatile("vsub.vv v6, v2, v4, v0.t"); - VCMP_U8(5, v6, 0, 8, 0, 16, 0, 24, 0, 32, 0, 8, 0, 16, 0, 24, 0, 32); + VCLEAR(v24); + asm volatile("vsub.vv v24, v8, v16, v0.t"); + VCMP_U8(5, v24, 0, 8, 0, 16, 0, 24, 0, 32, 0, 8, 0, 16, 0, 24, 0, 32); - VSET(16, e16, m2); - VLOAD_16(v2, 5, 10, 15, 20, 25, 30, 35, 40, 5, 10, 15, 20, 25, 30, 35, 40); - VLOAD_16(v4, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VSET(16, e16, m8); + VLOAD_16(v8, 5, 10, 15, 20, 25, 30, 35, 40, 5, 10, 15, 20, 25, 30, 35, 40); + VLOAD_16(v16, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); VLOAD_8(v0, 0xAA, 0xAA); - VCLEAR(v6); - asm volatile("vsub.vv v6, v2, v4, v0.t"); - VCMP_U16(6, v6, 0, 8, 0, 16, 0, 24, 0, 32, 0, 8, 0, 16, 0, 24, 0, 32); + VCLEAR(v24); + asm volatile("vsub.vv v24, v8, v16, v0.t"); + VCMP_U16(6, v24, 0, 8, 0, 16, 0, 24, 0, 32, 0, 8, 0, 16, 0, 24, 0, 32); - VSET(16, e32, m2); - VLOAD_32(v2, 5, 10, 15, 20, 25, 30, 35, 40, 5, 10, 15, 20, 25, 30, 35, 40); - VLOAD_32(v4, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VSET(16, e32, m8); + VLOAD_32(v8, 5, 10, 15, 20, 25, 30, 35, 40, 5, 10, 15, 20, 25, 30, 35, 40); + VLOAD_32(v16, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); VLOAD_8(v0, 0xAA, 0xAA); - VCLEAR(v6); - asm volatile("vsub.vv v6, v2, v4, v0.t"); - VCMP_U32(7, v6, 0, 8, 0, 16, 0, 24, 0, 32, 0, 8, 0, 16, 0, 24, 0, 32); + VCLEAR(v24); + asm volatile("vsub.vv v24, v8, v16, v0.t"); + VCMP_U32(7, v24, 0, 8, 0, 16, 0, 24, 0, 32, 0, 8, 0, 16, 0, 24, 0, 32); #if ELEN == 64 - VSET(16, e64, m2); - VLOAD_64(v2, 5, 10, 15, 20, 25, 30, 35, 40, 5, 10, 15, 20, 25, 30, 35, 40); - VLOAD_64(v4, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VSET(16, e64, m8); + VLOAD_64(v8, 5, 10, 15, 20, 25, 30, 35, 40, 5, 10, 15, 20, 25, 30, 35, 40); + VLOAD_64(v16, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); VLOAD_8(v0, 0xAA, 0xAA); - VCLEAR(v6); - asm volatile("vsub.vv v6, v2, v4, v0.t"); - VCMP_U64(8, v6, 0, 8, 0, 16, 0, 24, 0, 32, 0, 8, 0, 16, 0, 24, 0, 32); + VCLEAR(v24); + asm volatile("vsub.vv v24, v8, v16, v0.t"); + VCMP_U64(8, v24, 0, 8, 0, 16, 0, 24, 0, 32, 0, 8, 0, 16, 0, 24, 0, 32); #endif } void TEST_CASE3(void) { const uint64_t scalar = 5; - VSET(16, e8, m2); - VLOAD_8(v2, 5, 10, 15, 20, 25, 30, 35, 40, 5, 10, 15, 20, 25, 30, 35, 40); - asm volatile("vsub.vx v6, v2, %[A]" ::[A] "r"(scalar)); - VCMP_U8(9, v6, 0, 5, 10, 15, 20, 25, 30, 35, 0, 5, 10, 15, 20, 25, 30, 35); + VSET(16, e8, m8); + VLOAD_8(v8, 5, 10, 15, 20, 25, 30, 35, 40, 5, 10, 15, 20, 25, 30, 35, 40); + asm volatile("vsub.vx v24, v8, %[A]" ::[A] "r"(scalar)); + VCMP_U8(9, v24, 0, 5, 10, 15, 20, 25, 30, 35, 0, 5, 10, 15, 20, 25, 30, 35); - VSET(16, e16, m2); - VLOAD_16(v2, 5, 10, 15, 20, 25, 30, 35, 40, 5, 10, 15, 20, 25, 30, 35, 40); - asm volatile("vsub.vx v6, v2, %[A]" ::[A] "r"(scalar)); - VCMP_U16(10, v6, 0, 5, 10, 15, 20, 25, 30, 35, 0, 5, 10, 15, 20, 25, 30, 35); + VSET(16, e16, m8); + VLOAD_16(v8, 5, 10, 15, 20, 25, 30, 35, 40, 5, 10, 15, 20, 25, 30, 35, 40); + asm volatile("vsub.vx v24, v8, %[A]" ::[A] "r"(scalar)); + VCMP_U16(10, v24, 0, 5, 10, 15, 20, 25, 30, 35, 0, 5, 10, 15, 20, 25, 30, 35); - VSET(16, e32, m2); - VLOAD_32(v2, 5, 10, 15, 20, 25, 30, 35, 40, 5, 10, 15, 20, 25, 30, 35, 40); - asm volatile("vsub.vx v6, v2, %[A]" ::[A] "r"(scalar)); - VCMP_U32(11, v6, 0, 5, 10, 15, 20, 25, 30, 35, 0, 5, 10, 15, 20, 25, 30, 35); + VSET(16, e32, m8); + VLOAD_32(v8, 5, 10, 15, 20, 25, 30, 35, 40, 5, 10, 15, 20, 25, 30, 35, 40); + asm volatile("vsub.vx v24, v8, %[A]" ::[A] "r"(scalar)); + VCMP_U32(11, v24, 0, 5, 10, 15, 20, 25, 30, 35, 0, 5, 10, 15, 20, 25, 30, 35); #if ELEN == 64 - VSET(16, e64, m2); - VLOAD_64(v2, 5, 10, 15, 20, 25, 30, 35, 40, 5, 10, 15, 20, 25, 30, 35, 40); - asm volatile("vsub.vx v6, v2, %[A]" ::[A] "r"(scalar)); - VCMP_U64(12, v6, 0, 5, 10, 15, 20, 25, 30, 35, 0, 5, 10, 15, 20, 25, 30, 35); + VSET(16, e64, m8); + VLOAD_64(v8, 5, 10, 15, 20, 25, 30, 35, 40, 5, 10, 15, 20, 25, 30, 35, 40); + asm volatile("vsub.vx v24, v8, %[A]" ::[A] "r"(scalar)); + VCMP_U64(12, v24, 0, 5, 10, 15, 20, 25, 30, 35, 0, 5, 10, 15, 20, 25, 30, 35); #endif } void TEST_CASE4(void) { const uint64_t scalar = 5; - VSET(16, e8, m2); - VLOAD_8(v2, 5, 10, 15, 20, 25, 30, 35, 40, 5, 10, 15, 20, 25, 30, 35, 40); + VSET(16, e8, m8); + VLOAD_8(v8, 5, 10, 15, 20, 25, 30, 35, 40, 5, 10, 15, 20, 25, 30, 35, 40); VLOAD_8(v0, 0xAA, 0xAA); - VCLEAR(v6); - asm volatile("vsub.vx v6, v2, %[A], v0.t" ::[A] "r"(scalar)); - VCMP_U8(13, v6, 0, 5, 0, 15, 0, 25, 0, 35, 0, 5, 0, 15, 0, 25, 0, 35); + VCLEAR(v24); + asm volatile("vsub.vx v24, v8, %[A], v0.t" ::[A] "r"(scalar)); + VCMP_U8(13, v24, 0, 5, 0, 15, 0, 25, 0, 35, 0, 5, 0, 15, 0, 25, 0, 35); - VSET(16, e16, m2); - VLOAD_16(v2, 5, 10, 15, 20, 25, 30, 35, 40, 5, 10, 15, 20, 25, 30, 35, 40); + VSET(16, e16, m8); + VLOAD_16(v8, 5, 10, 15, 20, 25, 30, 35, 40, 5, 10, 15, 20, 25, 30, 35, 40); VLOAD_8(v0, 0xAA, 0xAA); - VCLEAR(v6); - asm volatile("vsub.vx v6, v2, %[A], v0.t" ::[A] "r"(scalar)); - VCMP_U16(14, v6, 0, 5, 0, 15, 0, 25, 0, 35, 0, 5, 0, 15, 0, 25, 0, 35); + VCLEAR(v24); + asm volatile("vsub.vx v24, v8, %[A], v0.t" ::[A] "r"(scalar)); + VCMP_U16(14, v24, 0, 5, 0, 15, 0, 25, 0, 35, 0, 5, 0, 15, 0, 25, 0, 35); - VSET(16, e32, m2); - VLOAD_32(v2, 5, 10, 15, 20, 25, 30, 35, 40, 5, 10, 15, 20, 25, 30, 35, 40); + VSET(16, e32, m8); + VLOAD_32(v8, 5, 10, 15, 20, 25, 30, 35, 40, 5, 10, 15, 20, 25, 30, 35, 40); VLOAD_8(v0, 0xAA, 0xAA); - VCLEAR(v6); - asm volatile("vsub.vx v6, v2, %[A], v0.t" ::[A] "r"(scalar)); - VCMP_U32(15, v6, 0, 5, 0, 15, 0, 25, 0, 35, 0, 5, 0, 15, 0, 25, 0, 35); + VCLEAR(v24); + asm volatile("vsub.vx v24, v8, %[A], v0.t" ::[A] "r"(scalar)); + VCMP_U32(15, v24, 0, 5, 0, 15, 0, 25, 0, 35, 0, 5, 0, 15, 0, 25, 0, 35); #if ELEN == 64 - VSET(16, e64, m2); - VLOAD_64(v2, 5, 10, 15, 20, 25, 30, 35, 40, 5, 10, 15, 20, 25, 30, 35, 40); + VSET(16, e64, m8); + VLOAD_64(v8, 5, 10, 15, 20, 25, 30, 35, 40, 5, 10, 15, 20, 25, 30, 35, 40); VLOAD_8(v0, 0xAA, 0xAA); - VCLEAR(v6); - asm volatile("vsub.vx v6, v2, %[A], v0.t" ::[A] "r"(scalar)); - VCMP_U64(16, v6, 0, 5, 0, 15, 0, 25, 0, 35, 0, 5, 0, 15, 0, 25, 0, 35); + VCLEAR(v24); + asm volatile("vsub.vx v24, v8, %[A], v0.t" ::[A] "r"(scalar)); + VCMP_U64(16, v24, 0, 5, 0, 15, 0, 25, 0, 35, 0, 5, 0, 15, 0, 25, 0, 35); #endif } diff --git a/sw/riscvTests/isa/rv64uv/vwmacc.c b/sw/riscvTests/isa/rv64uv/vwmacc.c index 3517d1a6..d1118af8 100644 --- a/sw/riscvTests/isa/rv64uv/vwmacc.c +++ b/sw/riscvTests/isa/rv64uv/vwmacc.c @@ -8,46 +8,46 @@ #include "vector_macros.h" void TEST_CASE1() { - VSET(16, e16, m2); - VLOAD_16(v6, 0x460f, 0x1c3e, 0xa322, 0xa7de, 0xd343, 0xa068, 0xf7a8, 0x3a62, + VSET(16, e16, m4); + VLOAD_16(v16, 0x460f, 0x1c3e, 0xa322, 0xa7de, 0xd343, 0xa068, 0xf7a8, 0x3a62, 0x3f7f, 0x0ae0, 0x0e38, 0x57fe, 0xdc97, 0x61e5, 0xe3f4, 0xb1bd); - VSET(16, e8, m2); + VSET(16, e8, m4); VLOAD_8(v4, 0x19, 0x87, 0x46, 0xf5, 0x3d, 0x66, 0xd7, 0xcf, 0x9f, 0x73, 0x35, 0x92, 0xb4, 0xc4, 0xdb, 0x1a); VLOAD_8(v8, 0xd0, 0x62, 0xb7, 0xd9, 0x39, 0xdf, 0x3e, 0x3d, 0xa2, 0xbb, 0xf1, 0xba, 0xe2, 0xd7, 0x51, 0x5d); - asm volatile("vwmacc.vv v6, v4, v8"); - VSET(16, e16, m2); - VCMP_I16(1, v6, 0x415f, 0xedec, 0x8f2c, 0xa98b, 0xe0d8, 0x9342, 0xedba, + asm volatile("vwmacc.vv v16, v4, v8"); + VSET(16, e16, m4); + VCMP_I16(1, v16, 0x415f, 0xedec, 0x8f2c, 0xa98b, 0xe0d8, 0x9342, 0xedba, 0x2eb5, 0x631d, 0xebe1, 0x0b1d, 0x7612, 0xe57f, 0x6b81, 0xd83f, 0xbb2f); - VSET(16, e32, m2); - VLOAD_32(v6, 0x1d5e4130, 0x9a736c84, 0xe2c407c1, 0x62baf7c8, 0xc157159f, + VSET(16, e32, m4); + VLOAD_32(v16, 0x1d5e4130, 0x9a736c84, 0xe2c407c1, 0x62baf7c8, 0xc157159f, 0x6cea275d, 0x0c385a3e, 0xf8f640d1, 0x484e89df, 0xb7720e91, 0x17a7a4cf, 0x9cba6dac, 0x177e67d2, 0x491950da, 0x5b48691f, 0x03289e10); - VSET(16, e16, m2); + VSET(16, e16, m4); VLOAD_16(v4, 0x6930, 0x239f, 0x2214, 0x555e, 0x9868, 0x02e7, 0x784f, 0x8c32, 0xe8d1, 0xe941, 0xaaaf, 0x4833, 0xc773, 0x6156, 0xdad9, 0x02a5); VLOAD_16(v8, 0xe798, 0x1fe5, 0xca4f, 0xb93c, 0xafe4, 0x5641, 0x4848, 0x82a3, 0x6065, 0x1385, 0x5a53, 0x3318, 0xd488, 0xb1cf, 0x5142, 0x0277); - asm volatile("vwmacc.vv v6, v4, v8"); - VSET(16, e32, m2); - VCMP_I32(2, v6, 0x135705b0, 0x9ee38abf, 0xdb9e53ed, 0x4b21e7d0, 0xe1c1ea3f, + asm volatile("vwmacc.vv v16, v4, v8"); + VSET(16, e32, m4); + VCMP_I32(2, v16, 0x135705b0, 0x9ee38abf, 0xdb9e53ed, 0x4b21e7d0, 0xe1c1ea3f, 0x6de47e04, 0x2e306876, 0x31abe8a7, 0x3f93c454, 0xb5b61056, 0xf98d818c, 0xab235b74, 0x211898ea, 0x2b5e7b64, 0x4f7d7e11, 0x032f22c3); #if ELEN == 64 - VSET(16, e64, m2); - VLOAD_64(v6, 0x37abc1433be408eb, 0xb3af312be2d38e09, 0x3a99dc46913b03d2, + VSET(16, e64, m4); + VLOAD_64(v16, 0x37abc1433be408eb, 0xb3af312be2d38e09, 0x3a99dc46913b03d2, 0xb2cca27c11815d4d, 0x456749124aaf479a, 0xc11d5ef0eaa5ee72, 0x1e6a624541e03978, 0x36ce0e391abb8a91, 0x552a61c1f7116723, 0x621ae1e17b7074c2, 0x4c3f1888b5df72b9, 0xde3961024df8c2cf, 0x37cd59f214853904, 0xe76372440eb37d3d, 0x0f0ff8cee2000142, 0x061e905b827b9818); - VSET(16, e32, m2); + VSET(16, e32, m4); VLOAD_32(v4, 0xb5c0475b, 0xda0c4af7, 0xa939123e, 0xb7261aa3, 0x510b75c1, 0x7d5e66d9, 0x3b263bb7, 0xc35c07a0, 0x03b0bb28, 0xba423d88, 0xb4ddeabb, 0x97b1e0ce, 0x01d07d01, 0x16174f78, 0x40c6b24f, @@ -56,9 +56,9 @@ void TEST_CASE1() { 0x5fc79836, 0x6597295d, 0x737b18f1, 0x8cb86656, 0x044f320e, 0x2a881643, 0x2e1a8f59, 0xfdc331d1, 0xca03d155, 0x0a51ebfe, 0xcac2c353); - asm volatile("vwmacc.vv v6, v4, v8"); - VSET(16, e64, m2); - VCMP_I64(3, v6, 0x27987c3defb2dc09, 0xc2652748b5903b7c, 0x5fb1b6348769c35c, + asm volatile("vwmacc.vv v16, v4, v8"); + VSET(16, e64, m4); + VCMP_I64(3, v16, 0x27987c3defb2dc09, 0xc2652748b5903b7c, 0x5fb1b6348769c35c, 0xc1e76e2cf6217c56, 0x4cc871cf26ba35d4, 0xf0052607e34f7838, 0x35e364f04a4539f3, 0x1b733cf52ef5b831, 0x5380f57403c23693, 0x60ee57a5b80c6232, 0x3fc390677f77f3aa, 0xcb708510404efc6d, @@ -68,48 +68,48 @@ void TEST_CASE1() { } void TEST_CASE2() { - VSET(16, e16, m2); - VLOAD_16(v6, 0x460f, 0x1c3e, 0xa322, 0xa7de, 0xd343, 0xa068, 0xf7a8, 0x3a62, + VSET(16, e16, m4); + VLOAD_16(v16, 0x460f, 0x1c3e, 0xa322, 0xa7de, 0xd343, 0xa068, 0xf7a8, 0x3a62, 0x3f7f, 0x0ae0, 0x0e38, 0x57fe, 0xdc97, 0x61e5, 0xe3f4, 0xb1bd); - VSET(16, e8, m2); + VSET(16, e8, m4); VLOAD_8(v4, 0x19, 0x87, 0x46, 0xf5, 0x3d, 0x66, 0xd7, 0xcf, 0x9f, 0x73, 0x35, 0x92, 0xb4, 0xc4, 0xdb, 0x1a); VLOAD_8(v8, 0xd0, 0x62, 0xb7, 0xd9, 0x39, 0xdf, 0x3e, 0x3d, 0xa2, 0xbb, 0xf1, 0xba, 0xe2, 0xd7, 0x51, 0x5d); VLOAD_8(v0, 0xAA, 0xAA); - asm volatile("vwmacc.vv v6, v4, v8, v0.t"); - VSET(16, e16, m2); - VCMP_I16(4, v6, 0x460f, 0xedec, 0xa322, 0xa98b, 0xd343, 0x9342, 0xf7a8, + asm volatile("vwmacc.vv v16, v4, v8, v0.t"); + VSET(16, e16, m4); + VCMP_I16(4, v16, 0x460f, 0xedec, 0xa322, 0xa98b, 0xd343, 0x9342, 0xf7a8, 0x2eb5, 0x3f7f, 0xebe1, 0x0e38, 0x7612, 0xdc97, 0x6b81, 0xe3f4, 0xbb2f); - VSET(16, e32, m2); - VLOAD_32(v6, 0x1d5e4130, 0x9a736c84, 0xe2c407c1, 0x62baf7c8, 0xc157159f, + VSET(16, e32, m4); + VLOAD_32(v16, 0x1d5e4130, 0x9a736c84, 0xe2c407c1, 0x62baf7c8, 0xc157159f, 0x6cea275d, 0x0c385a3e, 0xf8f640d1, 0x484e89df, 0xb7720e91, 0x17a7a4cf, 0x9cba6dac, 0x177e67d2, 0x491950da, 0x5b48691f, 0x03289e10); - VSET(16, e16, m2); + VSET(16, e16, m4); VLOAD_16(v4, 0x6930, 0x239f, 0x2214, 0x555e, 0x9868, 0x02e7, 0x784f, 0x8c32, 0xe8d1, 0xe941, 0xaaaf, 0x4833, 0xc773, 0x6156, 0xdad9, 0x02a5); VLOAD_16(v8, 0xe798, 0x1fe5, 0xca4f, 0xb93c, 0xafe4, 0x5641, 0x4848, 0x82a3, 0x6065, 0x1385, 0x5a53, 0x3318, 0xd488, 0xb1cf, 0x5142, 0x0277); VLOAD_8(v0, 0xAA, 0xAA); - asm volatile("vwmacc.vv v6, v4, v8, v0.t"); - VSET(16, e32, m2); - VCMP_I32(5, v6, 0x1d5e4130, 0x9ee38abf, 0xe2c407c1, 0x4b21e7d0, 0xc157159f, + asm volatile("vwmacc.vv v16, v4, v8, v0.t"); + VSET(16, e32, m4); + VCMP_I32(5, v16, 0x1d5e4130, 0x9ee38abf, 0xe2c407c1, 0x4b21e7d0, 0xc157159f, 0x6de47e04, 0x0c385a3e, 0x31abe8a7, 0x484e89df, 0xb5b61056, 0x17a7a4cf, 0xab235b74, 0x177e67d2, 0x2b5e7b64, 0x5b48691f, 0x032f22c3); #if ELEN == 64 - VSET(16, e64, m2); - VLOAD_64(v6, 0x37abc1433be408eb, 0xb3af312be2d38e09, 0x3a99dc46913b03d2, + VSET(16, e64, m4); + VLOAD_64(v16, 0x37abc1433be408eb, 0xb3af312be2d38e09, 0x3a99dc46913b03d2, 0xb2cca27c11815d4d, 0x456749124aaf479a, 0xc11d5ef0eaa5ee72, 0x1e6a624541e03978, 0x36ce0e391abb8a91, 0x552a61c1f7116723, 0x621ae1e17b7074c2, 0x4c3f1888b5df72b9, 0xde3961024df8c2cf, 0x37cd59f214853904, 0xe76372440eb37d3d, 0x0f0ff8cee2000142, 0x061e905b827b9818); - VSET(16, e32, m2); + VSET(16, e32, m4); VLOAD_32(v4, 0xb5c0475b, 0xda0c4af7, 0xa939123e, 0xb7261aa3, 0x510b75c1, 0x7d5e66d9, 0x3b263bb7, 0xc35c07a0, 0x03b0bb28, 0xba423d88, 0xb4ddeabb, 0x97b1e0ce, 0x01d07d01, 0x16174f78, 0x40c6b24f, @@ -119,9 +119,9 @@ void TEST_CASE2() { 0x2a881643, 0x2e1a8f59, 0xfdc331d1, 0xca03d155, 0x0a51ebfe, 0xcac2c353); VLOAD_8(v0, 0xAA, 0xAA); - asm volatile("vwmacc.vv v6, v4, v8, v0.t"); - VSET(16, e64, m2); - VCMP_I64(6, v6, 0x37abc1433be408eb, 0xc2652748b5903b7c, 0x3a99dc46913b03d2, + asm volatile("vwmacc.vv v16, v4, v8, v0.t"); + VSET(16, e64, m4); + VCMP_I64(6, v16, 0x37abc1433be408eb, 0xc2652748b5903b7c, 0x3a99dc46913b03d2, 0xc1e76e2cf6217c56, 0x456749124aaf479a, 0xf0052607e34f7838, 0x1e6a624541e03978, 0x1b733cf52ef5b831, 0x552a61c1f7116723, 0x60ee57a5b80c6232, 0x4c3f1888b5df72b9, 0xcb708510404efc6d, @@ -131,52 +131,52 @@ void TEST_CASE2() { } void TEST_CASE3() { - VSET(16, e16, m2); - VLOAD_16(v6, 0x17db, 0x9069, 0x8e1f, 0x3584, 0xbb3d, 0x39b2, 0x82cf, 0x015b, + VSET(16, e16, m4); + VLOAD_16(v16, 0x17db, 0x9069, 0x8e1f, 0x3584, 0xbb3d, 0x39b2, 0x82cf, 0x015b, 0xd556, 0xd603, 0x85d1, 0x66a6, 0x4e3e, 0xb965, 0xaa7b, 0x9d27); - VSET(16, e8, m2); + VSET(16, e8, m4); int64_t scalar = 5; VLOAD_8(v8, 0x50, 0x56, 0x94, 0x1e, 0x09, 0x8f, 0xe1, 0x9e, 0x86, 0x97, 0x71, 0x5e, 0x55, 0x09, 0xdd, 0x23); - asm volatile("vwmacc.vx v6, %[A], v8" ::[A] "r"(scalar)); - VSET(16, e16, m2); - VCMP_I16(7, v6, 0x196b, 0x9217, 0x8c03, 0x361a, 0xbb6a, 0x377d, 0x8234, + asm volatile("vwmacc.vx v16, %[A], v8" ::[A] "r"(scalar)); + VSET(16, e16, m4); + VCMP_I16(7, v16, 0x196b, 0x9217, 0x8c03, 0x361a, 0xbb6a, 0x377d, 0x8234, 0xff71, 0xd2f4, 0xd3f6, 0x8806, 0x687c, 0x4fe7, 0xb992, 0xa9cc, 0x9dd6); - VSET(16, e32, m2); - VLOAD_32(v6, 0xc9b9ade8, 0xfc9c14a8, 0xe1ace4f7, 0x43ea8b48, 0x3ab3025c, + VSET(16, e32, m4); + VLOAD_32(v16, 0xc9b9ade8, 0xfc9c14a8, 0xe1ace4f7, 0x43ea8b48, 0x3ab3025c, 0xe545695b, 0x538304ce, 0xf430c148, 0xd126fac1, 0xbf51d251, 0x85ebc0a4, 0x2167faaf, 0x0a2e18cc, 0x0ae19395, 0x03cc9899, 0x05524f83); - VSET(16, e16, m2); + VSET(16, e16, m4); scalar = -5383; VLOAD_16(v8, 0x4324, 0xd762, 0xc34b, 0x6f67, 0x5134, 0x4d9d, 0xfa05, 0xacb7, 0xb7d2, 0xb079, 0x5bb2, 0x7949, 0x51df, 0xbadd, 0xee81, 0x3b49); - asm volatile("vwmacc.vx v6, %[A], v8" ::[A] "r"(scalar)); - VSET(16, e32, m2); - VCMP_I32(8, v6, 0xc435e3ec, 0xfff226fa, 0xe6a966ea, 0x3ac40c77, 0x340785f0, + asm volatile("vwmacc.vx v16, %[A], v8" ::[A] "r"(scalar)); + VSET(16, e32, m4); + VCMP_I32(8, v16, 0xc435e3ec, 0xfff226fa, 0xe6a966ea, 0x3ac40c77, 0x340785f0, 0xdee56910, 0x5400c5ab, 0xfb080547, 0xd714ba03, 0xc5da1202, 0x7e63a4c6, 0x1771acb0, 0x037490b3, 0x108f568a, 0x053c7e12, 0x0073b384); #if ELEN == 64 - VSET(16, e64, m2); - VLOAD_64(v6, 0xc3afd90f697a742a, 0x585e39767c2959ab, 0xfd5f5c31e16d95ba, + VSET(16, e64, m4); + VLOAD_64(v16, 0xc3afd90f697a742a, 0x585e39767c2959ab, 0xfd5f5c31e16d95ba, 0x2c39235d58ff74a1, 0x4a793d202092aeac, 0x6d31f07b7bdfb6ea, 0x902b8e28be41b10d, 0x89114b9383c4b511, 0x1f9a7e912f5a51f0, 0x5494b9380432890c, 0xfd260f5f1fc1eb45, 0x80381e728c1baa95, 0xa6be6d48744a823b, 0xd37b8ae766a82bf8, 0x7992c128f1c1f6ab, 0xbeca06f79871e7e8); - VSET(16, e32, m2); + VSET(16, e32, m4); scalar = 6474219; VLOAD_32(v8, 0x56545434, 0x99cd1438, 0xa1d42f8a, 0x3500b207, 0x642cd563, 0x7405746d, 0xe92c3246, 0xdab496dc, 0xcbe26107, 0x6bb989c7, 0xc8542e0c, 0x5849a179, 0x04aac7de, 0x7b5ce579, 0x0ce6e7ea, 0x77402b10); - asm volatile("vwmacc.vx v6, %[A], v8" ::[A] "r"(scalar)); - VSET(16, e64, m2); - VCMP_I64(9, v6, 0xc3d1296f1ae893e6, 0x5836c95c6dbae113, 0xfd3b05253b5c9368, + asm volatile("vwmacc.vx v16, %[A], v8" ::[A] "r"(scalar)); + VSET(16, e64, m4); + VCMP_I64(9, v16, 0xc3d1296f1ae893e6, 0x5836c95c6dbae113, 0xfd3b05253b5c9368, 0x2c4d976fb318600e, 0x4a9fe54cf92b4b8d, 0x6d5eb614d7052bf9, 0x9022bf12bc18cd4f, 0x8902e74ad235ed05, 0x1f86621f3b05e25d, 0x54be4b3652df41b9, 0xfd1093afbdc79c49, 0x805a304537cce5a8, @@ -186,55 +186,55 @@ void TEST_CASE3() { } void TEST_CASE4() { - VSET(16, e16, m2); - VLOAD_16(v6, 0x17db, 0x9069, 0x8e1f, 0x3584, 0xbb3d, 0x39b2, 0x82cf, 0x015b, + VSET(16, e16, m4); + VLOAD_16(v16, 0x17db, 0x9069, 0x8e1f, 0x3584, 0xbb3d, 0x39b2, 0x82cf, 0x015b, 0xd556, 0xd603, 0x85d1, 0x66a6, 0x4e3e, 0xb965, 0xaa7b, 0x9d27); - VSET(16, e8, m2); + VSET(16, e8, m4); int64_t scalar = 5; VLOAD_8(v8, 0x50, 0x56, 0x94, 0x1e, 0x09, 0x8f, 0xe1, 0x9e, 0x86, 0x97, 0x71, 0x5e, 0x55, 0x09, 0xdd, 0x23); VLOAD_8(v0, 0xAA, 0xAA); - asm volatile("vwmacc.vx v6, %[A], v8, v0.t" ::[A] "r"(scalar)); - VSET(16, e16, m2); - VCMP_I16(10, v6, 0x17db, 0x9217, 0x8e1f, 0x361a, 0xbb3d, 0x377d, 0x82cf, + asm volatile("vwmacc.vx v16, %[A], v8, v0.t" ::[A] "r"(scalar)); + VSET(16, e16, m4); + VCMP_I16(10, v16, 0x17db, 0x9217, 0x8e1f, 0x361a, 0xbb3d, 0x377d, 0x82cf, 0xff71, 0xd556, 0xd3f6, 0x85d1, 0x687c, 0x4e3e, 0xb992, 0xaa7b, 0x9dd6); - VSET(16, e32, m2); - VLOAD_32(v6, 0xc9b9ade8, 0xfc9c14a8, 0xe1ace4f7, 0x43ea8b48, 0x3ab3025c, + VSET(16, e32, m4); + VLOAD_32(v16, 0xc9b9ade8, 0xfc9c14a8, 0xe1ace4f7, 0x43ea8b48, 0x3ab3025c, 0xe545695b, 0x538304ce, 0xf430c148, 0xd126fac1, 0xbf51d251, 0x85ebc0a4, 0x2167faaf, 0x0a2e18cc, 0x0ae19395, 0x03cc9899, 0x05524f83); - VSET(16, e16, m2); + VSET(16, e16, m4); scalar = -5383; VLOAD_16(v8, 0x4324, 0xd762, 0xc34b, 0x6f67, 0x5134, 0x4d9d, 0xfa05, 0xacb7, 0xb7d2, 0xb079, 0x5bb2, 0x7949, 0x51df, 0xbadd, 0xee81, 0x3b49); VLOAD_8(v0, 0xAA, 0xAA); - asm volatile("vwmacc.vx v6, %[A], v8, v0.t" ::[A] "r"(scalar)); - VSET(16, e32, m2); - VCMP_I32(11, v6, 0xc9b9ade8, 0xfff226fa, 0xe1ace4f7, 0x3ac40c77, 0x3ab3025c, + asm volatile("vwmacc.vx v16, %[A], v8, v0.t" ::[A] "r"(scalar)); + VSET(16, e32, m4); + VCMP_I32(11, v16, 0xc9b9ade8, 0xfff226fa, 0xe1ace4f7, 0x3ac40c77, 0x3ab3025c, 0xdee56910, 0x538304ce, 0xfb080547, 0xd126fac1, 0xc5da1202, 0x85ebc0a4, 0x1771acb0, 0x0a2e18cc, 0x108f568a, 0x03cc9899, 0x0073b384); #if ELEN == 64 - VSET(16, e64, m2); - VLOAD_64(v6, 0xc3afd90f697a742a, 0x585e39767c2959ab, 0xfd5f5c31e16d95ba, + VSET(16, e64, m4); + VLOAD_64(v16, 0xc3afd90f697a742a, 0x585e39767c2959ab, 0xfd5f5c31e16d95ba, 0x2c39235d58ff74a1, 0x4a793d202092aeac, 0x6d31f07b7bdfb6ea, 0x902b8e28be41b10d, 0x89114b9383c4b511, 0x1f9a7e912f5a51f0, 0x5494b9380432890c, 0xfd260f5f1fc1eb45, 0x80381e728c1baa95, 0xa6be6d48744a823b, 0xd37b8ae766a82bf8, 0x7992c128f1c1f6ab, 0xbeca06f79871e7e8); - VSET(16, e32, m2); + VSET(16, e32, m4); scalar = 6474219; VLOAD_32(v8, 0x56545434, 0x99cd1438, 0xa1d42f8a, 0x3500b207, 0x642cd563, 0x7405746d, 0xe92c3246, 0xdab496dc, 0xcbe26107, 0x6bb989c7, 0xc8542e0c, 0x5849a179, 0x04aac7de, 0x7b5ce579, 0x0ce6e7ea, 0x77402b10); VLOAD_8(v0, 0xAA, 0xAA); - asm volatile("vwmacc.vx v6, %[A], v8, v0.t" ::[A] "r"(scalar)); - VSET(16, e64, m2); - VCMP_I64(12, v6, 0xc3afd90f697a742a, 0x5836c95c6dbae113, 0xfd5f5c31e16d95ba, + asm volatile("vwmacc.vx v16, %[A], v8, v0.t" ::[A] "r"(scalar)); + VSET(16, e64, m4); + VCMP_I64(12, v16, 0xc3afd90f697a742a, 0x5836c95c6dbae113, 0xfd5f5c31e16d95ba, 0x2c4d976fb318600e, 0x4a793d202092aeac, 0x6d5eb614d7052bf9, 0x902b8e28be41b10d, 0x8902e74ad235ed05, 0x1f9a7e912f5a51f0, 0x54be4b3652df41b9, 0xfd260f5f1fc1eb45, 0x805a304537cce5a8, diff --git a/sw/riscvTests/isa/rv64uv/vwmaccsu.c b/sw/riscvTests/isa/rv64uv/vwmaccsu.c index d97b76a4..0db0d68d 100644 --- a/sw/riscvTests/isa/rv64uv/vwmaccsu.c +++ b/sw/riscvTests/isa/rv64uv/vwmaccsu.c @@ -8,46 +8,46 @@ #include "vector_macros.h" void TEST_CASE1() { - VSET(16, e16, m2); - VLOAD_16(v6, 0x30dc, 0x7235, 0xd5f6, 0xa008, 0x6e79, 0xa159, 0xa05c, 0x5914, + VSET(16, e16, m4); + VLOAD_16(v16, 0x30dc, 0x7235, 0xd5f6, 0xa008, 0x6e79, 0xa159, 0xa05c, 0x5914, 0xd06f, 0x69c5, 0x9475, 0x5625, 0xa5bd, 0x7be7, 0x823c, 0x5fb2); - VSET(16, e8, m2); + VSET(16, e8, m4); VLOAD_8(v4, 0xb6, 0xbb, 0xb6, 0x57, 0xf9, 0x7c, 0xbf, 0x62, 0x1a, 0xeb, 0xa4, 0x34, 0xde, 0x96, 0x80, 0xe6); VLOAD_8(v8, 0x26, 0xea, 0xe8, 0x85, 0x2e, 0xf1, 0x46, 0x8f, 0x68, 0x29, 0xbb, 0x9b, 0xec, 0x5c, 0x8e, 0x77); - asm volatile("vwmaccsu.vv v6, v4, v8"); - VSET(16, e16, m2); - VCMP_I16(1, v6, 0x25e0, 0x3323, 0x92e6, 0xcd3b, 0x6d37, 0x1615, 0x8e96, + asm volatile("vwmaccsu.vv v16, v4, v8"); + VSET(16, e16, m4); + VCMP_I16(1, v16, 0x25e0, 0x3323, 0x92e6, 0xcd3b, 0x6d37, 0x1615, 0x8e96, 0x8fd2, 0xdaff, 0x6668, 0x5141, 0x75a1, 0x8665, 0x55cf, 0x3b3c, 0x539c); - VSET(16, e32, m2); - VLOAD_32(v6, 0xdbc5b23d, 0x86bd7dad, 0xb744b5c2, 0xc32f4a47, 0x237edfc4, + VSET(16, e32, m4); + VLOAD_32(v16, 0xdbc5b23d, 0x86bd7dad, 0xb744b5c2, 0xc32f4a47, 0x237edfc4, 0x5d6e851a, 0xbd3110cd, 0x18c61b57, 0x7ade2943, 0x7e4f5ed6, 0x90e5ba77, 0xce45b744, 0x82d1976e, 0xa88bb4e1, 0x989fbb9a, 0xab29da17); - VSET(16, e16, m2); + VSET(16, e16, m4); VLOAD_16(v4, 0x23fb, 0xcee7, 0xa704, 0xc00f, 0xed9f, 0x2cf0, 0x4b53, 0xc0ba, 0x775b, 0x557c, 0x57b7, 0xbb06, 0xf9ba, 0x178f, 0xec73, 0x8240); VLOAD_16(v8, 0xad9d, 0x104d, 0xdc56, 0x96af, 0x8c68, 0x1d25, 0x2d70, 0x467a, 0xc27c, 0x96e2, 0x1c85, 0xe8b6, 0xf7e0, 0xd069, 0x0bca, 0x4f36); - asm volatile("vwmaccsu.vv v6, v4, v8"); - VSET(16, e32, m2); - VCMP_I32(2, v6, 0xf42c622c, 0x839d2928, 0x6aae411a, 0x9d8c5e88, 0x196a5c5c, + asm volatile("vwmaccsu.vv v16, v4, v8"); + VSET(16, e32, m4); + VCMP_I32(2, v16, 0xf42c622c, 0x839d2928, 0x6aae411a, 0x9d8c5e88, 0x196a5c5c, 0x628c33ca, 0xca8f9c1d, 0x075acffb, 0xd58aef57, 0xb0b17e4e, 0x9aab508a, 0x8f921d88, 0x7cbe902e, 0xbbb98e88, 0x97b93f58, 0x84411397); #if ELEN == 64 - VSET(16, e64, m2); - VLOAD_64(v6, 0xf8e162af4fefb46a, 0x8e859cff3b076a9d, 0xa7279ec622e749eb, + VSET(16, e64, m4); + VLOAD_64(v16, 0xf8e162af4fefb46a, 0x8e859cff3b076a9d, 0xa7279ec622e749eb, 0x67bbdace6d6bf1a9, 0xf2090d8d3b00e5b8, 0x9259e92430c5a337, 0x7cc51e4cc8fd46c6, 0xe5c6946a8e9787fd, 0x0d36e747a75534cc, 0x9c1a70c0989504f9, 0xa7b0f15e7b51c000, 0x4566f8ffa299d104, 0xf385b581a4c1c25b, 0xb067f1a7621f9cdd, 0x54ffc96dc442d7b5, 0x3fc18a6aa65ab8d5); - VSET(16, e32, m2); + VSET(16, e32, m4); VLOAD_32(v4, 0x189138d0, 0xe2f3f48f, 0x58448029, 0x44298d07, 0x6f6b15cf, 0x13e9cf30, 0x23b6edb8, 0xd532420a, 0xdab302ee, 0xa5e6854e, 0x538f91b0, 0xc5d4db0e, 0xbc6d31b3, 0x754d418c, 0x96198b07, @@ -56,9 +56,9 @@ void TEST_CASE1() { 0x13db3502, 0x64efb3f9, 0x55c57a21, 0x31cd5a79, 0x5c0b4048, 0x899cfb88, 0xfab9de9d, 0x6fa41232, 0x9462cda3, 0x0f8de6ea, 0x8064029f); - asm volatile("vwmaccsu.vv v6, v4, v8"); - VSET(16, e64, m2); - VCMP_I64(3, v6, 0x00d3fd4343a241fa, 0x7bbc44d6fa22c6ff, 0xe7a7ead9df60a04d, + asm volatile("vwmaccsu.vv v16, v4, v8"); + VSET(16, e64, m4); + VCMP_I64(3, v16, 0x00d3fd4343a241fa, 0x7bbc44d6fa22c6ff, 0xe7a7ead9df60a04d, 0x7ddc4c9a72efd193, 0x2d44ed6d874572c1, 0x93e550a88e8e3197, 0x8ada040c3cea26be, 0xd76f3f99213ccf47, 0x05f53f01db8f434a, 0x7bb552f0a51802e9, 0xd49c03ec9aaeb580, 0x0c6e9b2885384c9a, @@ -68,48 +68,48 @@ void TEST_CASE1() { } void TEST_CASE2() { - VSET(16, e16, m2); - VLOAD_16(v6, 0x30dc, 0x7235, 0xd5f6, 0xa008, 0x6e79, 0xa159, 0xa05c, 0x5914, + VSET(16, e16, m4); + VLOAD_16(v16, 0x30dc, 0x7235, 0xd5f6, 0xa008, 0x6e79, 0xa159, 0xa05c, 0x5914, 0xd06f, 0x69c5, 0x9475, 0x5625, 0xa5bd, 0x7be7, 0x823c, 0x5fb2); - VSET(16, e8, m2); + VSET(16, e8, m4); VLOAD_8(v4, 0xb6, 0xbb, 0xb6, 0x57, 0xf9, 0x7c, 0xbf, 0x62, 0x1a, 0xeb, 0xa4, 0x34, 0xde, 0x96, 0x80, 0xe6); VLOAD_8(v8, 0x26, 0xea, 0xe8, 0x85, 0x2e, 0xf1, 0x46, 0x8f, 0x68, 0x29, 0xbb, 0x9b, 0xec, 0x5c, 0x8e, 0x77); VLOAD_8(v0, 0xAA, 0xAA); - asm volatile("vwmaccsu.vv v6, v4, v8, v0.t"); - VSET(16, e16, m2); - VCMP_I16(4, v6, 0x30dc, 0x3323, 0xd5f6, 0xcd3b, 0x6e79, 0x1615, 0xa05c, + asm volatile("vwmaccsu.vv v16, v4, v8, v0.t"); + VSET(16, e16, m4); + VCMP_I16(4, v16, 0x30dc, 0x3323, 0xd5f6, 0xcd3b, 0x6e79, 0x1615, 0xa05c, 0x8fd2, 0xd06f, 0x6668, 0x9475, 0x75a1, 0xa5bd, 0x55cf, 0x823c, 0x539c); - VSET(16, e32, m2); - VLOAD_32(v6, 0xdbc5b23d, 0x86bd7dad, 0xb744b5c2, 0xc32f4a47, 0x237edfc4, + VSET(16, e32, m4); + VLOAD_32(v16, 0xdbc5b23d, 0x86bd7dad, 0xb744b5c2, 0xc32f4a47, 0x237edfc4, 0x5d6e851a, 0xbd3110cd, 0x18c61b57, 0x7ade2943, 0x7e4f5ed6, 0x90e5ba77, 0xce45b744, 0x82d1976e, 0xa88bb4e1, 0x989fbb9a, 0xab29da17); - VSET(16, e16, m2); + VSET(16, e16, m4); VLOAD_16(v4, 0x23fb, 0xcee7, 0xa704, 0xc00f, 0xed9f, 0x2cf0, 0x4b53, 0xc0ba, 0x775b, 0x557c, 0x57b7, 0xbb06, 0xf9ba, 0x178f, 0xec73, 0x8240); VLOAD_16(v8, 0xad9d, 0x104d, 0xdc56, 0x96af, 0x8c68, 0x1d25, 0x2d70, 0x467a, 0xc27c, 0x96e2, 0x1c85, 0xe8b6, 0xf7e0, 0xd069, 0x0bca, 0x4f36); VLOAD_8(v0, 0xAA, 0xAA); - asm volatile("vwmaccsu.vv v6, v4, v8, v0.t"); - VSET(16, e32, m2); - VCMP_I32(5, v6, 0xdbc5b23d, 0x839d2928, 0xb744b5c2, 0x9d8c5e88, 0x237edfc4, + asm volatile("vwmaccsu.vv v16, v4, v8, v0.t"); + VSET(16, e32, m4); + VCMP_I32(5, v16, 0xdbc5b23d, 0x839d2928, 0xb744b5c2, 0x9d8c5e88, 0x237edfc4, 0x628c33ca, 0xbd3110cd, 0x075acffb, 0x7ade2943, 0xb0b17e4e, 0x90e5ba77, 0x8f921d88, 0x82d1976e, 0xbbb98e88, 0x989fbb9a, 0x84411397); #if ELEN == 64 - VSET(16, e64, m2); - VLOAD_64(v6, 0xf8e162af4fefb46a, 0x8e859cff3b076a9d, 0xa7279ec622e749eb, + VSET(16, e64, m4); + VLOAD_64(v16, 0xf8e162af4fefb46a, 0x8e859cff3b076a9d, 0xa7279ec622e749eb, 0x67bbdace6d6bf1a9, 0xf2090d8d3b00e5b8, 0x9259e92430c5a337, 0x7cc51e4cc8fd46c6, 0xe5c6946a8e9787fd, 0x0d36e747a75534cc, 0x9c1a70c0989504f9, 0xa7b0f15e7b51c000, 0x4566f8ffa299d104, 0xf385b581a4c1c25b, 0xb067f1a7621f9cdd, 0x54ffc96dc442d7b5, 0x3fc18a6aa65ab8d5); - VSET(16, e32, m2); + VSET(16, e32, m4); VLOAD_32(v4, 0x189138d0, 0xe2f3f48f, 0x58448029, 0x44298d07, 0x6f6b15cf, 0x13e9cf30, 0x23b6edb8, 0xd532420a, 0xdab302ee, 0xa5e6854e, 0x538f91b0, 0xc5d4db0e, 0xbc6d31b3, 0x754d418c, 0x96198b07, @@ -119,9 +119,9 @@ void TEST_CASE2() { 0x899cfb88, 0xfab9de9d, 0x6fa41232, 0x9462cda3, 0x0f8de6ea, 0x8064029f); VLOAD_8(v0, 0xAA, 0xAA); - asm volatile("vwmaccsu.vv v6, v4, v8, v0.t"); - VSET(16, e64, m2); - VCMP_I64(6, v6, 0xf8e162af4fefb46a, 0x7bbc44d6fa22c6ff, 0xa7279ec622e749eb, + asm volatile("vwmaccsu.vv v16, v4, v8, v0.t"); + VSET(16, e64, m4); + VCMP_I64(6, v16, 0xf8e162af4fefb46a, 0x7bbc44d6fa22c6ff, 0xa7279ec622e749eb, 0x7ddc4c9a72efd193, 0xf2090d8d3b00e5b8, 0x93e550a88e8e3197, 0x7cc51e4cc8fd46c6, 0xd76f3f99213ccf47, 0x0d36e747a75534cc, 0x7bb552f0a51802e9, 0xa7b0f15e7b51c000, 0x0c6e9b2885384c9a, @@ -131,52 +131,52 @@ void TEST_CASE2() { } void TEST_CASE3() { - VSET(16, e16, m2); - VLOAD_16(v6, 0xadd2, 0x2112, 0xbbc6, 0xd113, 0xc6f7, 0xbd07, 0xfd9a, 0x0c0e, + VSET(16, e16, m4); + VLOAD_16(v16, 0xadd2, 0x2112, 0xbbc6, 0xd113, 0xc6f7, 0xbd07, 0xfd9a, 0x0c0e, 0xe110, 0xe81b, 0xb432, 0x5c2c, 0x4da9, 0x8c48, 0x6f94, 0x6250); - VSET(16, e8, m2); + VSET(16, e8, m4); int64_t scalar = 5; VLOAD_8(v8, 0xfe, 0xd1, 0xc2, 0x3b, 0x79, 0x2f, 0xf5, 0xe8, 0x7f, 0x4b, 0x64, 0x57, 0x2b, 0x4f, 0x4e, 0xda); - asm volatile("vwmaccsu.vx v6, %[A], v8" ::[A] "r"(scalar)); - VSET(16, e16, m2); - VCMP_I16(7, v6, 0xb2c8, 0x2527, 0xbf90, 0xd23a, 0xc954, 0xbdf2, 0x0263, + asm volatile("vwmaccsu.vx v16, %[A], v8" ::[A] "r"(scalar)); + VSET(16, e16, m4); + VCMP_I16(7, v16, 0xb2c8, 0x2527, 0xbf90, 0xd23a, 0xc954, 0xbdf2, 0x0263, 0x1096, 0xe38b, 0xe992, 0xb626, 0x5ddf, 0x4e80, 0x8dd3, 0x711a, 0x6692); - VSET(16, e32, m2); - VLOAD_32(v6, 0x39d3ea89, 0x451d8e1a, 0x83edb2d7, 0xc1919ab3, 0x243c3d4d, + VSET(16, e32, m4); + VLOAD_32(v16, 0x39d3ea89, 0x451d8e1a, 0x83edb2d7, 0xc1919ab3, 0x243c3d4d, 0xd4745be8, 0x50a58cbe, 0x53b75e9f, 0x2a648b62, 0xd74ce1cf, 0xa2c6a2e7, 0xc30eadb0, 0x7a908fb9, 0xd4455b56, 0x48109ee2, 0x2f5b537a); - VSET(16, e16, m2); + VSET(16, e16, m4); scalar = -5383; VLOAD_16(v8, 0x29f8, 0x6958, 0x6635, 0x03a0, 0x07bc, 0x4881, 0x7d4e, 0x37e3, 0x8370, 0x405f, 0x1f0d, 0x1252, 0xacf1, 0x06ee, 0x790d, 0x73af); - asm volatile("vwmaccsu.vx v6, %[A], v8" ::[A] "r"(scalar)); - VSET(16, e32, m2); - VCMP_I32(8, v6, 0x36616cc1, 0x3c7674b2, 0x7b888e64, 0xc1456153, 0x23999b29, + asm volatile("vwmaccsu.vx v16, %[A], v8" ::[A] "r"(scalar)); + VSET(16, e32, m4); + VCMP_I32(8, v16, 0x36616cc1, 0x3c7674b2, 0x7b888e64, 0xc1456153, 0x23999b29, 0xce7fcb61, 0x465ab99c, 0x4f20386a, 0x1f98c352, 0xd2035436, 0xa039b88c, 0xc18d7372, 0x6c5c1022, 0xd3b3a4d4, 0x3e1f3e87, 0x25daceb1); #if ELEN == 64 - VSET(16, e64, m2); - VLOAD_64(v6, 0xd860771ff910e8a1, 0xd8de9ddf3be66e90, 0xe55e25348ff4c406, + VSET(16, e64, m4); + VLOAD_64(v16, 0xd860771ff910e8a1, 0xd8de9ddf3be66e90, 0xe55e25348ff4c406, 0x6ee24d9ebeda1c54, 0x78437fc8299017d1, 0x46a2833ed69dec1d, 0x0331761dcc2485b7, 0x99c00b7ecbecb5bf, 0xd68d230a95510605, 0x0e82f981980d47c8, 0x7bb0e1dd5f273626, 0x044cc7c24be55121, 0x341b063e01c35796, 0xb77a96fdf1826215, 0xdcbd3fe115470433, 0xc2797417b552325b); - VSET(16, e32, m2); + VSET(16, e32, m4); scalar = 6474219; VLOAD_32(v8, 0x53046c2d, 0x3b0c65ed, 0x6565f981, 0xaa4c1d70, 0x0a18c71e, 0xbc91ff46, 0xa52c32d1, 0x73cca3fc, 0xb2a7e5d2, 0x1939af0a, 0xe4fdb1f5, 0x783f5c5d, 0x3514c875, 0xce346d04, 0x68047428, 0x72ca548f); - asm volatile("vwmaccsu.vx v6, %[A], v8" ::[A] "r"(scalar)); - VSET(16, e64, m2); - VCMP_I64(9, v6, 0xd880804c10498af0, 0xd8f567313109141f, 0xe585463838d91671, + asm volatile("vwmaccsu.vx v16, %[A], v8" ::[A] "r"(scalar)); + VSET(16, e64, m4); + VCMP_I64(9, v16, 0xd880804c10498af0, 0xd8f567313109141f, 0xe585463838d91671, 0x6f2405141ab61224, 0x7847653b1c216e5b, 0x46eb47de4add375f, 0x03713350939f4492, 0x99ecbb2b8c001a13, 0xd6d2143a6346dfcb, 0x0e8cb57c0c43cff6, 0x7c093f98e48cef0d, 0x047b2edbc92e1f80, @@ -186,55 +186,55 @@ void TEST_CASE3() { } void TEST_CASE4() { - VSET(16, e16, m2); - VLOAD_16(v6, 0xadd2, 0x2112, 0xbbc6, 0xd113, 0xc6f7, 0xbd07, 0xfd9a, 0x0c0e, + VSET(16, e16, m4); + VLOAD_16(v16, 0xadd2, 0x2112, 0xbbc6, 0xd113, 0xc6f7, 0xbd07, 0xfd9a, 0x0c0e, 0xe110, 0xe81b, 0xb432, 0x5c2c, 0x4da9, 0x8c48, 0x6f94, 0x6250); - VSET(16, e8, m2); + VSET(16, e8, m4); int64_t scalar = 5; VLOAD_8(v8, 0xfe, 0xd1, 0xc2, 0x3b, 0x79, 0x2f, 0xf5, 0xe8, 0x7f, 0x4b, 0x64, 0x57, 0x2b, 0x4f, 0x4e, 0xda); VLOAD_8(v0, 0xAA, 0xAA); - asm volatile("vwmaccsu.vx v6, %[A], v8, v0.t" ::[A] "r"(scalar)); - VSET(16, e16, m2); - VCMP_I16(10, v6, 0xadd2, 0x2527, 0xbbc6, 0xd23a, 0xc6f7, 0xbdf2, 0xfd9a, + asm volatile("vwmaccsu.vx v16, %[A], v8, v0.t" ::[A] "r"(scalar)); + VSET(16, e16, m4); + VCMP_I16(10, v16, 0xadd2, 0x2527, 0xbbc6, 0xd23a, 0xc6f7, 0xbdf2, 0xfd9a, 0x1096, 0xe110, 0xe992, 0xb432, 0x5ddf, 0x4da9, 0x8dd3, 0x6f94, 0x6692); - VSET(16, e32, m2); - VLOAD_32(v6, 0x39d3ea89, 0x451d8e1a, 0x83edb2d7, 0xc1919ab3, 0x243c3d4d, + VSET(16, e32, m4); + VLOAD_32(v16, 0x39d3ea89, 0x451d8e1a, 0x83edb2d7, 0xc1919ab3, 0x243c3d4d, 0xd4745be8, 0x50a58cbe, 0x53b75e9f, 0x2a648b62, 0xd74ce1cf, 0xa2c6a2e7, 0xc30eadb0, 0x7a908fb9, 0xd4455b56, 0x48109ee2, 0x2f5b537a); - VSET(16, e16, m2); + VSET(16, e16, m4); scalar = -5383; VLOAD_16(v8, 0x29f8, 0x6958, 0x6635, 0x03a0, 0x07bc, 0x4881, 0x7d4e, 0x37e3, 0x8370, 0x405f, 0x1f0d, 0x1252, 0xacf1, 0x06ee, 0x790d, 0x73af); VLOAD_8(v0, 0xAA, 0xAA); - asm volatile("vwmaccsu.vx v6, %[A], v8, v0.t" ::[A] "r"(scalar)); - VSET(16, e32, m2); - VCMP_I32(11, v6, 0x39d3ea89, 0x3c7674b2, 0x83edb2d7, 0xc1456153, 0x243c3d4d, + asm volatile("vwmaccsu.vx v16, %[A], v8, v0.t" ::[A] "r"(scalar)); + VSET(16, e32, m4); + VCMP_I32(11, v16, 0x39d3ea89, 0x3c7674b2, 0x83edb2d7, 0xc1456153, 0x243c3d4d, 0xce7fcb61, 0x50a58cbe, 0x4f20386a, 0x2a648b62, 0xd2035436, 0xa2c6a2e7, 0xc18d7372, 0x7a908fb9, 0xd3b3a4d4, 0x48109ee2, 0x25daceb1); #if ELEN == 64 - VSET(16, e64, m2); - VLOAD_64(v6, 0xd860771ff910e8a1, 0xd8de9ddf3be66e90, 0xe55e25348ff4c406, + VSET(16, e64, m4); + VLOAD_64(v16, 0xd860771ff910e8a1, 0xd8de9ddf3be66e90, 0xe55e25348ff4c406, 0x6ee24d9ebeda1c54, 0x78437fc8299017d1, 0x46a2833ed69dec1d, 0x0331761dcc2485b7, 0x99c00b7ecbecb5bf, 0xd68d230a95510605, 0x0e82f981980d47c8, 0x7bb0e1dd5f273626, 0x044cc7c24be55121, 0x341b063e01c35796, 0xb77a96fdf1826215, 0xdcbd3fe115470433, 0xc2797417b552325b); - VSET(16, e32, m2); + VSET(16, e32, m4); scalar = 6474219; VLOAD_32(v8, 0x53046c2d, 0x3b0c65ed, 0x6565f981, 0xaa4c1d70, 0x0a18c71e, 0xbc91ff46, 0xa52c32d1, 0x73cca3fc, 0xb2a7e5d2, 0x1939af0a, 0xe4fdb1f5, 0x783f5c5d, 0x3514c875, 0xce346d04, 0x68047428, 0x72ca548f); VLOAD_8(v0, 0xAA, 0xAA); - asm volatile("vwmaccsu.vx v6, %[A], v8, v0.t" ::[A] "r"(scalar)); - VSET(16, e64, m2); - VCMP_I64(12, v6, 0xd860771ff910e8a1, 0xd8f567313109141f, 0xe55e25348ff4c406, + asm volatile("vwmaccsu.vx v16, %[A], v8, v0.t" ::[A] "r"(scalar)); + VSET(16, e64, m4); + VCMP_I64(12, v16, 0xd860771ff910e8a1, 0xd8f567313109141f, 0xe55e25348ff4c406, 0x6f2405141ab61224, 0x78437fc8299017d1, 0x46eb47de4add375f, 0x0331761dcc2485b7, 0x99ecbb2b8c001a13, 0xd68d230a95510605, 0x0e8cb57c0c43cff6, 0x7bb0e1dd5f273626, 0x047b2edbc92e1f80, diff --git a/sw/riscvTests/isa/rv64uv/vwmaccu.c b/sw/riscvTests/isa/rv64uv/vwmaccu.c index 2bba6cae..82be8acb 100644 --- a/sw/riscvTests/isa/rv64uv/vwmaccu.c +++ b/sw/riscvTests/isa/rv64uv/vwmaccu.c @@ -8,46 +8,46 @@ #include "vector_macros.h" void TEST_CASE1() { - VSET(16, e16, m2); - VLOAD_16(v6, 0x519d, 0x7122, 0x672c, 0x4d97, 0x436e, 0x3f1f, 0x423d, 0x44e8, + VSET(16, e16, m4); + VLOAD_16(v16, 0x519d, 0x7122, 0x672c, 0x4d97, 0x436e, 0x3f1f, 0x423d, 0x44e8, 0x3d7b, 0x5570, 0x1e90, 0x79f4, 0x456b, 0x0283, 0x02b5, 0x6865); - VSET(16, e8, m2); + VSET(16, e8, m4); VLOAD_8(v4, 0xce, 0x96, 0x33, 0x88, 0xf8, 0x3f, 0x0c, 0xde, 0x1e, 0x9d, 0x5a, 0x75, 0x73, 0x43, 0xd9, 0x43); VLOAD_8(v8, 0x51, 0x88, 0x16, 0xf6, 0x57, 0xab, 0xd8, 0x26, 0x2e, 0x35, 0x94, 0xd1, 0xf0, 0xb9, 0x09, 0x8a); - asm volatile("vwmaccu.vv v6, v4, v8"); - VSET(16, e16, m2); - VCMP_U16(1, v6, 0x92cb, 0xc0d2, 0x6b8e, 0xd047, 0x97b6, 0x6934, 0x4c5d, + asm volatile("vwmaccu.vv v16, v4, v8"); + VSET(16, e16, m4); + VCMP_U16(1, v16, 0x92cb, 0xc0d2, 0x6b8e, 0xd047, 0x97b6, 0x6934, 0x4c5d, 0x65dc, 0x42df, 0x75f1, 0x5298, 0xd979, 0xb13b, 0x32ee, 0x0a56, 0x8c83); - VSET(16, e32, m2); - VLOAD_32(v6, 0x1f50b763, 0x6d1a7f46, 0x17b8b2b5, 0x6b69c966, 0x25d945cb, + VSET(16, e32, m4); + VLOAD_32(v16, 0x1f50b763, 0x6d1a7f46, 0x17b8b2b5, 0x6b69c966, 0x25d945cb, 0x3e6c375b, 0x314db8d3, 0x35ade27d, 0x74fa2d58, 0x735f513d, 0x3cad4e4d, 0x628eb81a, 0x1c48c2f9, 0x14f08921, 0x77de05bf, 0x528c354b); - VSET(16, e16, m2); + VSET(16, e16, m4); VLOAD_16(v4, 0x4ed5, 0xcf74, 0x3442, 0x280f, 0x795e, 0x3007, 0xdf3e, 0xb348, 0x3865, 0xcb59, 0x1291, 0xa04b, 0xc5bd, 0x957f, 0xefe4, 0xe75d); VLOAD_16(v8, 0x7d39, 0xddd8, 0x17d7, 0x0574, 0x251a, 0x4ce4, 0x4817, 0x9de1, 0xd773, 0xdcc8, 0xeb92, 0x8fa8, 0x9382, 0x4369, 0xb1c7, 0x9185); - asm volatile("vwmaccu.vv v6, v4, v8"); - VSET(16, e32, m2); - VCMP_U32(2, v6, 0x45e045d0, 0x20e0ad26, 0x1c968423, 0x6c443b32, 0x37702f57, + asm volatile("vwmaccu.vv v16, v4, v8"); + VSET(16, e32, m4); + VCMP_U32(2, v16, 0x45e045d0, 0x20e0ad26, 0x1c968423, 0x6c443b32, 0x37702f57, 0x4cd91197, 0x702b3765, 0xa43e9cc5, 0xa47055b7, 0x22beaac5, 0x4dc2ffff, 0xbc81ce52, 0x8e38b3f3, 0x3c4e1738, 0x1e7523fb, 0xd610159c); #if ELEN == 64 - VSET(16, e64, m2); - VLOAD_64(v6, 0x5118504f9237ea08, 0x6b71d4ee88073dde, 0x4420559f61e6927c, + VSET(16, e64, m4); + VLOAD_64(v16, 0x5118504f9237ea08, 0x6b71d4ee88073dde, 0x4420559f61e6927c, 0x24eea54c6530475c, 0x289211cb16ebbbc9, 0x1a0b2b7644ecd474, 0x159c16af3e71f736, 0x12dda0d2ca288012, 0x790fab107c1346b6, 0x589cc8132c869645, 0x323623bba87568ce, 0x5ce2e94d5e335c5d, 0x6e6b3c19c0d78ca0, 0x0502bed40a0600bc, 0x5ff6f4b3610e009c, 0x40d6eb0605052915); - VSET(16, e32, m2); + VSET(16, e32, m4); VLOAD_32(v4, 0xd1247b78, 0xfd5d326b, 0x7fe40cf5, 0xfd802d90, 0x9ec23b7e, 0x67219fe8, 0x9dc7f026, 0x257d8b7f, 0x782bc512, 0x42fa808b, 0x48d3273d, 0x7ca0371d, 0x06409254, 0xb77ce3ba, 0x28aac174, @@ -56,9 +56,9 @@ void TEST_CASE1() { 0xeec0354b, 0x8b4595bf, 0x9200fb5c, 0x0d627fcf, 0xdf0a8280, 0x4b5733be, 0x4f3bd496, 0x10f5d788, 0x3499c99d, 0xdeee29dd, 0x7e8643a4); - asm volatile("vwmaccu.vv v6, v4, v8"); - VSET(16, e64, m2); - VCMP_U64(3, v6, 0xdcfc2b3ca5c34640, 0x52260af0834ff780, 0x5dd0d00a7e576638, + asm volatile("vwmaccu.vv v16, v4, v8"); + VSET(16, e64, m4); + VCMP_U64(3, v16, 0xdcfc2b3ca5c34640, 0x52260af0834ff780, 0x5dd0d00a7e576638, 0x1f3e3a8021c0226c, 0x37a86e0412f255b1, 0x7a39dcd00fe1b56c, 0x6b728942b1a24190, 0x283f5f30c90c26b6, 0x7f5820d2b91f8e44, 0x92f7bdf9f97b71c5, 0x47a4ceb4fab8af14, 0x83757ab9b866ab5b, @@ -68,48 +68,48 @@ void TEST_CASE1() { } void TEST_CASE2() { - VSET(16, e16, m2); - VLOAD_16(v6, 0x519d, 0x7122, 0x672c, 0x4d97, 0x436e, 0x3f1f, 0x423d, 0x44e8, + VSET(16, e16, m4); + VLOAD_16(v16, 0x519d, 0x7122, 0x672c, 0x4d97, 0x436e, 0x3f1f, 0x423d, 0x44e8, 0x3d7b, 0x5570, 0x1e90, 0x79f4, 0x456b, 0x0283, 0x02b5, 0x6865); - VSET(16, e8, m2); + VSET(16, e8, m4); VLOAD_8(v4, 0xce, 0x96, 0x33, 0x88, 0xf8, 0x3f, 0x0c, 0xde, 0x1e, 0x9d, 0x5a, 0x75, 0x73, 0x43, 0xd9, 0x43); VLOAD_8(v8, 0x51, 0x88, 0x16, 0xf6, 0x57, 0xab, 0xd8, 0x26, 0x2e, 0x35, 0x94, 0xd1, 0xf0, 0xb9, 0x09, 0x8a); VLOAD_8(v0, 0xAA, 0xAA); - asm volatile("vwmaccu.vv v6, v4, v8, v0.t"); - VSET(16, e16, m2); - VCMP_U16(4, v6, 0x519d, 0xc0d2, 0x672c, 0xd047, 0x436e, 0x6934, 0x423d, + asm volatile("vwmaccu.vv v16, v4, v8, v0.t"); + VSET(16, e16, m4); + VCMP_U16(4, v16, 0x519d, 0xc0d2, 0x672c, 0xd047, 0x436e, 0x6934, 0x423d, 0x65dc, 0x3d7b, 0x75f1, 0x1e90, 0xd979, 0x456b, 0x32ee, 0x02b5, 0x8c83); - VSET(16, e32, m2); - VLOAD_32(v6, 0x1f50b763, 0x6d1a7f46, 0x17b8b2b5, 0x6b69c966, 0x25d945cb, + VSET(16, e32, m4); + VLOAD_32(v16, 0x1f50b763, 0x6d1a7f46, 0x17b8b2b5, 0x6b69c966, 0x25d945cb, 0x3e6c375b, 0x314db8d3, 0x35ade27d, 0x74fa2d58, 0x735f513d, 0x3cad4e4d, 0x628eb81a, 0x1c48c2f9, 0x14f08921, 0x77de05bf, 0x528c354b); - VSET(16, e16, m2); + VSET(16, e16, m4); VLOAD_16(v4, 0x4ed5, 0xcf74, 0x3442, 0x280f, 0x795e, 0x3007, 0xdf3e, 0xb348, 0x3865, 0xcb59, 0x1291, 0xa04b, 0xc5bd, 0x957f, 0xefe4, 0xe75d); VLOAD_16(v8, 0x7d39, 0xddd8, 0x17d7, 0x0574, 0x251a, 0x4ce4, 0x4817, 0x9de1, 0xd773, 0xdcc8, 0xeb92, 0x8fa8, 0x9382, 0x4369, 0xb1c7, 0x9185); VLOAD_8(v0, 0xAA, 0xAA); - asm volatile("vwmaccu.vv v6, v4, v8, v0.t"); - VSET(16, e32, m2); - VCMP_U32(5, v6, 0x1f50b763, 0x20e0ad26, 0x17b8b2b5, 0x6c443b32, 0x25d945cb, + asm volatile("vwmaccu.vv v16, v4, v8, v0.t"); + VSET(16, e32, m4); + VCMP_U32(5, v16, 0x1f50b763, 0x20e0ad26, 0x17b8b2b5, 0x6c443b32, 0x25d945cb, 0x4cd91197, 0x314db8d3, 0xa43e9cc5, 0x74fa2d58, 0x22beaac5, 0x3cad4e4d, 0xbc81ce52, 0x1c48c2f9, 0x3c4e1738, 0x77de05bf, 0xd610159c); #if ELEN == 64 - VSET(16, e64, m2); - VLOAD_64(v6, 0x5118504f9237ea08, 0x6b71d4ee88073dde, 0x4420559f61e6927c, + VSET(16, e64, m4); + VLOAD_64(v16, 0x5118504f9237ea08, 0x6b71d4ee88073dde, 0x4420559f61e6927c, 0x24eea54c6530475c, 0x289211cb16ebbbc9, 0x1a0b2b7644ecd474, 0x159c16af3e71f736, 0x12dda0d2ca288012, 0x790fab107c1346b6, 0x589cc8132c869645, 0x323623bba87568ce, 0x5ce2e94d5e335c5d, 0x6e6b3c19c0d78ca0, 0x0502bed40a0600bc, 0x5ff6f4b3610e009c, 0x40d6eb0605052915); - VSET(16, e32, m2); + VSET(16, e32, m4); VLOAD_32(v4, 0xd1247b78, 0xfd5d326b, 0x7fe40cf5, 0xfd802d90, 0x9ec23b7e, 0x67219fe8, 0x9dc7f026, 0x257d8b7f, 0x782bc512, 0x42fa808b, 0x48d3273d, 0x7ca0371d, 0x06409254, 0xb77ce3ba, 0x28aac174, @@ -119,9 +119,9 @@ void TEST_CASE2() { 0x4b5733be, 0x4f3bd496, 0x10f5d788, 0x3499c99d, 0xdeee29dd, 0x7e8643a4); VLOAD_8(v0, 0xAA, 0xAA); - asm volatile("vwmaccu.vv v6, v4, v8, v0.t"); - VSET(16, e64, m2); - VCMP_U64(6, v6, 0x5118504f9237ea08, 0x52260af0834ff780, 0x4420559f61e6927c, + asm volatile("vwmaccu.vv v16, v4, v8, v0.t"); + VSET(16, e64, m4); + VCMP_U64(6, v16, 0x5118504f9237ea08, 0x52260af0834ff780, 0x4420559f61e6927c, 0x1f3e3a8021c0226c, 0x289211cb16ebbbc9, 0x7a39dcd00fe1b56c, 0x159c16af3e71f736, 0x283f5f30c90c26b6, 0x790fab107c1346b6, 0x92f7bdf9f97b71c5, 0x323623bba87568ce, 0x83757ab9b866ab5b, @@ -131,52 +131,52 @@ void TEST_CASE2() { } void TEST_CASE3() { - VSET(16, e16, m2); - VLOAD_16(v6, 0x6f50, 0x0299, 0x3578, 0x0e45, 0x752b, 0x60c7, 0x7d0c, 0x0943, + VSET(16, e16, m4); + VLOAD_16(v16, 0x6f50, 0x0299, 0x3578, 0x0e45, 0x752b, 0x60c7, 0x7d0c, 0x0943, 0x3f2d, 0x47bc, 0x4468, 0x616d, 0x5844, 0x3a7d, 0x32fe, 0x7813); - VSET(16, e8, m2); + VSET(16, e8, m4); uint64_t scalar = 5; VLOAD_8(v8, 0x01, 0xd6, 0x1e, 0x57, 0xcc, 0x31, 0x29, 0x06, 0x5a, 0xab, 0x1e, 0x0a, 0x97, 0x6f, 0xe0, 0xfc); - asm volatile("vwmaccu.vx v6, %[A], v8" ::[A] "r"(scalar)); - VSET(16, e16, m2); - VCMP_U16(7, v6, 0x6f55, 0x06c7, 0x360e, 0x0ff8, 0x7927, 0x61bc, 0x7dd9, + asm volatile("vwmaccu.vx v16, %[A], v8" ::[A] "r"(scalar)); + VSET(16, e16, m4); + VCMP_U16(7, v16, 0x6f55, 0x06c7, 0x360e, 0x0ff8, 0x7927, 0x61bc, 0x7dd9, 0x0961, 0x40ef, 0x4b13, 0x44fe, 0x619f, 0x5b37, 0x3ca8, 0x375e, 0x7cff); - VSET(16, e32, m2); - VLOAD_32(v6, 0x681721c9, 0x5c07924a, 0x5147143e, 0x14da5785, 0x30a43e20, + VSET(16, e32, m4); + VLOAD_32(v16, 0x681721c9, 0x5c07924a, 0x5147143e, 0x14da5785, 0x30a43e20, 0x3498177e, 0x551df71d, 0x29656468, 0x12550807, 0x7dc95cbd, 0x2167ff36, 0x194b0d6c, 0x79119a1d, 0x6d77fab6, 0x3e32c755, 0x6e479bf4); - VSET(16, e16, m2); + VSET(16, e16, m4); scalar = 5383; VLOAD_16(v8, 0x9752, 0x45a4, 0xfde9, 0xa659, 0x957b, 0x1a3f, 0x2212, 0x5d43, 0xdc08, 0x1fb8, 0x5e15, 0x08da, 0x0468, 0x4458, 0xe1e2, 0x4ef7); - asm volatile("vwmaccu.vx v6, %[A], v8" ::[A] "r"(scalar)); - VSET(16, e32, m2); - VCMP_U32(8, v6, 0x7484ff07, 0x61bfedc6, 0x6622229d, 0x228430f4, 0x3ceb6b7d, + asm volatile("vwmaccu.vx v16, %[A], v8" ::[A] "r"(scalar)); + VSET(16, e32, m4); + VCMP_U32(8, v16, 0x7484ff07, 0x61bfedc6, 0x6622229d, 0x228430f4, 0x3ceb6b7d, 0x36bffa37, 0x57ea5f9b, 0x310e703d, 0x2467b43f, 0x806452c5, 0x29224ac9, 0x1a052d62, 0x796e40f5, 0x7315111e, 0x50c07e83, 0x74c407b5); #if ELEN == 64 - VSET(16, e64, m2); - VLOAD_64(v6, 0x27a0a02f7e9757d4, 0x7ff7bb4d394926a0, 0x09d25e3173571efd, + VSET(16, e64, m4); + VLOAD_64(v16, 0x27a0a02f7e9757d4, 0x7ff7bb4d394926a0, 0x09d25e3173571efd, 0x11661c8ece6711ac, 0x1e5ffff32ed851dd, 0x0698334d63d206a9, 0x79598c88fd85995f, 0x2fa78b4b7d90a222, 0x7d65cbfdfc7f2e1d, 0x6c0101ef46924df6, 0x59ff3d4e018b50f4, 0x2c8ec8409f219401, 0x20b183b4bb89c200, 0x28bee831261ca372, 0x5b9d142326bcef0a, 0x1c2ad051e4e7281e); - VSET(16, e32, m2); + VSET(16, e32, m4); scalar = 6474219; VLOAD_32(v8, 0x307dc235, 0x92187481, 0xa69319d1, 0x682b9abe, 0x8bdce4be, 0x95ec65ce, 0x46915d6f, 0xd59243e6, 0x1d0943e5, 0x3ae27787, 0x33c32e03, 0x8be66da2, 0x0fc78147, 0x2ce8d421, 0x9c9bc2fb, 0x10c8c9f7); - asm volatile("vwmaccu.vx v6, %[A], v8" ::[A] "r"(scalar)); - VSET(16, e64, m2); - VCMP_U64(9, v6, 0x27b3569317ce3b7b, 0x80301be52297620b, 0x0a12a5e3bda6ead8, + asm volatile("vwmaccu.vx v16, %[A], v8" ::[A] "r"(scalar)); + VSET(16, e64, m4); + VCMP_U64(9, v16, 0x27b3569317ce3b7b, 0x80301be52297620b, 0x0a12a5e3bda6ead8, 0x118e4f69e7a94c16, 0x1e95f8d392f37a47, 0x06d20e0c989e38c3, 0x7974c7d7abd08544, 0x2ff9f5bd60308c44, 0x7d710070e7f94e54, 0x6c17bb12e5fd05e3, 0x5a1336d961fce8b5, 0x2cc4c4ceefc069b7, @@ -186,55 +186,55 @@ void TEST_CASE3() { } void TEST_CASE4() { - VSET(16, e16, m2); - VLOAD_16(v6, 0x6f50, 0x0299, 0x3578, 0x0e45, 0x752b, 0x60c7, 0x7d0c, 0x0943, + VSET(16, e16, m4); + VLOAD_16(v16, 0x6f50, 0x0299, 0x3578, 0x0e45, 0x752b, 0x60c7, 0x7d0c, 0x0943, 0x3f2d, 0x47bc, 0x4468, 0x616d, 0x5844, 0x3a7d, 0x32fe, 0x7813); - VSET(16, e8, m2); + VSET(16, e8, m4); uint64_t scalar = 5; VLOAD_8(v8, 0x01, 0xd6, 0x1e, 0x57, 0xcc, 0x31, 0x29, 0x06, 0x5a, 0xab, 0x1e, 0x0a, 0x97, 0x6f, 0xe0, 0xfc); VLOAD_8(v0, 0xAA, 0xAA); - asm volatile("vwmaccu.vx v6, %[A], v8, v0.t" ::[A] "r"(scalar)); - VSET(16, e16, m2); - VCMP_U16(10, v6, 0x6f50, 0x06c7, 0x3578, 0x0ff8, 0x752b, 0x61bc, 0x7d0c, + asm volatile("vwmaccu.vx v16, %[A], v8, v0.t" ::[A] "r"(scalar)); + VSET(16, e16, m4); + VCMP_U16(10, v16, 0x6f50, 0x06c7, 0x3578, 0x0ff8, 0x752b, 0x61bc, 0x7d0c, 0x0961, 0x3f2d, 0x4b13, 0x4468, 0x619f, 0x5844, 0x3ca8, 0x32fe, 0x7cff); - VSET(16, e32, m2); - VLOAD_32(v6, 0x681721c9, 0x5c07924a, 0x5147143e, 0x14da5785, 0x30a43e20, + VSET(16, e32, m4); + VLOAD_32(v16, 0x681721c9, 0x5c07924a, 0x5147143e, 0x14da5785, 0x30a43e20, 0x3498177e, 0x551df71d, 0x29656468, 0x12550807, 0x7dc95cbd, 0x2167ff36, 0x194b0d6c, 0x79119a1d, 0x6d77fab6, 0x3e32c755, 0x6e479bf4); - VSET(16, e16, m2); + VSET(16, e16, m4); scalar = 5383; VLOAD_16(v8, 0x9752, 0x45a4, 0xfde9, 0xa659, 0x957b, 0x1a3f, 0x2212, 0x5d43, 0xdc08, 0x1fb8, 0x5e15, 0x08da, 0x0468, 0x4458, 0xe1e2, 0x4ef7); VLOAD_8(v0, 0xAA, 0xAA); - asm volatile("vwmaccu.vx v6, %[A], v8, v0.t" ::[A] "r"(scalar)); - VSET(16, e32, m2); - VCMP_U32(11, v6, 0x681721c9, 0x61bfedc6, 0x5147143e, 0x228430f4, 0x30a43e20, + asm volatile("vwmaccu.vx v16, %[A], v8, v0.t" ::[A] "r"(scalar)); + VSET(16, e32, m4); + VCMP_U32(11, v16, 0x681721c9, 0x61bfedc6, 0x5147143e, 0x228430f4, 0x30a43e20, 0x36bffa37, 0x551df71d, 0x310e703d, 0x12550807, 0x806452c5, 0x2167ff36, 0x1a052d62, 0x79119a1d, 0x7315111e, 0x3e32c755, 0x74c407b5); #if ELEN == 64 - VSET(16, e64, m2); - VLOAD_64(v6, 0x27a0a02f7e9757d4, 0x7ff7bb4d394926a0, 0x09d25e3173571efd, + VSET(16, e64, m4); + VLOAD_64(v16, 0x27a0a02f7e9757d4, 0x7ff7bb4d394926a0, 0x09d25e3173571efd, 0x11661c8ece6711ac, 0x1e5ffff32ed851dd, 0x0698334d63d206a9, 0x79598c88fd85995f, 0x2fa78b4b7d90a222, 0x7d65cbfdfc7f2e1d, 0x6c0101ef46924df6, 0x59ff3d4e018b50f4, 0x2c8ec8409f219401, 0x20b183b4bb89c200, 0x28bee831261ca372, 0x5b9d142326bcef0a, 0x1c2ad051e4e7281e); - VSET(16, e32, m2); + VSET(16, e32, m4); scalar = 6474219; VLOAD_32(v8, 0x307dc235, 0x92187481, 0xa69319d1, 0x682b9abe, 0x8bdce4be, 0x95ec65ce, 0x46915d6f, 0xd59243e6, 0x1d0943e5, 0x3ae27787, 0x33c32e03, 0x8be66da2, 0x0fc78147, 0x2ce8d421, 0x9c9bc2fb, 0x10c8c9f7); VLOAD_8(v0, 0xAA, 0xAA); - asm volatile("vwmaccu.vx v6, %[A], v8, v0.t" ::[A] "r"(scalar)); - VSET(16, e64, m2); - VCMP_U64(12, v6, 0x27a0a02f7e9757d4, 0x80301be52297620b, 0x09d25e3173571efd, + asm volatile("vwmaccu.vx v16, %[A], v8, v0.t" ::[A] "r"(scalar)); + VSET(16, e64, m4); + VCMP_U64(12, v16, 0x27a0a02f7e9757d4, 0x80301be52297620b, 0x09d25e3173571efd, 0x118e4f69e7a94c16, 0x1e5ffff32ed851dd, 0x06d20e0c989e38c3, 0x79598c88fd85995f, 0x2ff9f5bd60308c44, 0x7d65cbfdfc7f2e1d, 0x6c17bb12e5fd05e3, 0x59ff3d4e018b50f4, 0x2cc4c4ceefc069b7, diff --git a/sw/riscvTests/isa/rv64uv/vwmaccus.c b/sw/riscvTests/isa/rv64uv/vwmaccus.c index 44953427..3c0ff032 100644 --- a/sw/riscvTests/isa/rv64uv/vwmaccus.c +++ b/sw/riscvTests/isa/rv64uv/vwmaccus.c @@ -8,52 +8,52 @@ #include "vector_macros.h" void TEST_CASE1() { - VSET(16, e16, m2); - VLOAD_16(v6, 0x4c8e, 0xd449, 0xe266, 0xb6d1, 0xf28a, 0x1655, 0x3111, 0x4bde, + VSET(16, e16, m4); + VLOAD_16(v16, 0x4c8e, 0xd449, 0xe266, 0xb6d1, 0xf28a, 0x1655, 0x3111, 0x4bde, 0x8787, 0x2ce4, 0x1083, 0xaa0c, 0x9fdf, 0x3e42, 0x98e7, 0xe33b); - VSET(16, e8, m2); + VSET(16, e8, m4); uint64_t scalar = 5; VLOAD_8(v8, 0x83, 0xfe, 0xa2, 0xc3, 0xa6, 0x18, 0xd9, 0x4c, 0x6e, 0xeb, 0x43, 0xb7, 0xec, 0x48, 0xb7, 0xe5); - asm volatile("vwmaccus.vx v6, %[A], v8" ::[A] "r"(scalar)); - VSET(16, e16, m2); - VCMP_I16(1, v6, 0x4a1d, 0xd43f, 0xe090, 0xb5a0, 0xf0c8, 0x16cd, 0x304e, + asm volatile("vwmaccus.vx v16, %[A], v8" ::[A] "r"(scalar)); + VSET(16, e16, m4); + VCMP_I16(1, v16, 0x4a1d, 0xd43f, 0xe090, 0xb5a0, 0xf0c8, 0x16cd, 0x304e, 0x4d5a, 0x89ad, 0x2c7b, 0x11d2, 0xa89f, 0x9f7b, 0x3faa, 0x977a, 0xe2b4); - VSET(16, e32, m2); - VLOAD_32(v6, 0xe318cc7a, 0x489815a8, 0x6e6fc053, 0x8d746807, 0xbc3e6244, + VSET(16, e32, m4); + VLOAD_32(v16, 0xe318cc7a, 0x489815a8, 0x6e6fc053, 0x8d746807, 0xbc3e6244, 0xcdfeb4fe, 0x22d24149, 0x26962240, 0x5ef85b7e, 0x2f61a9e8, 0x373dc202, 0x1567a6b5, 0x763c5239, 0x60dd0502, 0xab178102, 0x753e0a11); - VSET(16, e16, m2); + VSET(16, e16, m4); scalar = 5383; VLOAD_16(v8, 0xce02, 0x6935, 0xc803, 0x75bc, 0x80b7, 0x19d2, 0x3b7c, 0xc269, 0xb639, 0x66f1, 0x678b, 0xc83e, 0x5a5c, 0x389e, 0x9e46, 0xfae9); - asm volatile("vwmaccus.vx v6, %[A], v8" ::[A] "r"(scalar)); - VSET(16, e32, m2); - VCMP_I32(2, v6, 0xdefd9888, 0x513c4f1b, 0x69d67768, 0x97200c2b, 0xb1c9ea45, + asm volatile("vwmaccus.vx v16, %[A], v8" ::[A] "r"(scalar)); + VSET(16, e32, m4); + VCMP_I32(2, v16, 0xdefd9888, 0x513c4f1b, 0x69d67768, 0x97200c2b, 0xb1c9ea45, 0xd01da3bc, 0x27b50dad, 0x2187101f, 0x58e9040d, 0x37d63f7f, 0x3fbefdcf, 0x10d33667, 0x7da856bd, 0x65838754, 0xa31092ec, 0x74d30370); #if ELEN == 64 - VSET(16, e64, m2); - VLOAD_64(v6, 0xb6a5b1c3c6d69abb, 0x3c6a647eb0d79a41, 0xf0c3eb8821045259, + VSET(16, e64, m4); + VLOAD_64(v16, 0xb6a5b1c3c6d69abb, 0x3c6a647eb0d79a41, 0xf0c3eb8821045259, 0x91d74be946352cae, 0x524c6db6c58f9da6, 0x39185a920f7787e8, 0x4080fbf0fdcc64ec, 0x9ed1fb83f53270fc, 0xff0661a19269f0c0, 0x47d26c599193fe0b, 0xd8cc0342dc3104ce, 0xc51f802bc93381cd, 0xe7d6522aa1c51245, 0x6fa0a9d3f57bc667, 0xd140731478a147a8, 0x5d716379591922f4); - VSET(16, e32, m2); + VSET(16, e32, m4); scalar = 6474219; VLOAD_32(v8, 0x09377def, 0x99ee4d30, 0x8b8452d1, 0xc9e17667, 0x2254aa29, 0xb56ca9f5, 0xa1276371, 0x32ac1413, 0x59ff6af3, 0x6b61bf57, 0xc0eb37b3, 0x26f06be7, 0x0e9b21b2, 0x22898a93, 0xe3646841, 0xdd301fdc); - asm volatile("vwmaccus.vx v6, %[A], v8" ::[A] "r"(scalar)); - VSET(16, e64, m2); - VCMP_I64(3, v6, 0xb6a940470425dc20, 0x3c430136a5932551, 0xf096f84fcc847134, + asm volatile("vwmaccus.vx v16, %[A], v8" ::[A] "r"(scalar)); + VSET(16, e64, m4); + VCMP_I64(3, v16, 0xb6a940470425dc20, 0x3c430136a5932551, 0xf096f84fcc847134, 0x91c26988f191bc3b, 0x5259ad33e6940249, 0x38fb9358fb72e8cf, 0x405c6235eb4a66a7, 0x9ee5895b4431c96d, 0xff291c64abdfe8d1, 0x47fbdc7722d7f1e8, 0xd8b3ab8cf55eb11f, 0xc52e86eb9b0cedda, @@ -63,55 +63,55 @@ void TEST_CASE1() { } void TEST_CASE2() { - VSET(16, e16, m2); - VLOAD_16(v6, 0x4c8e, 0xd449, 0xe266, 0xb6d1, 0xf28a, 0x1655, 0x3111, 0x4bde, + VSET(16, e16, m4); + VLOAD_16(v16, 0x4c8e, 0xd449, 0xe266, 0xb6d1, 0xf28a, 0x1655, 0x3111, 0x4bde, 0x8787, 0x2ce4, 0x1083, 0xaa0c, 0x9fdf, 0x3e42, 0x98e7, 0xe33b); - VSET(16, e8, m2); + VSET(16, e8, m4); uint64_t scalar = 5; VLOAD_8(v8, 0x83, 0xfe, 0xa2, 0xc3, 0xa6, 0x18, 0xd9, 0x4c, 0x6e, 0xeb, 0x43, 0xb7, 0xec, 0x48, 0xb7, 0xe5); VLOAD_8(v0, 0xAA, 0xAA); - asm volatile("vwmaccus.vx v6, %[A], v8, v0.t" ::[A] "r"(scalar)); - VSET(16, e16, m2); - VCMP_I16(4, v6, 0x4c8e, 0xd43f, 0xe266, 0xb5a0, 0xf28a, 0x16cd, 0x3111, + asm volatile("vwmaccus.vx v16, %[A], v8, v0.t" ::[A] "r"(scalar)); + VSET(16, e16, m4); + VCMP_I16(4, v16, 0x4c8e, 0xd43f, 0xe266, 0xb5a0, 0xf28a, 0x16cd, 0x3111, 0x4d5a, 0x8787, 0x2c7b, 0x1083, 0xa89f, 0x9fdf, 0x3faa, 0x98e7, 0xe2b4); - VSET(16, e32, m2); - VLOAD_32(v6, 0xe318cc7a, 0x489815a8, 0x6e6fc053, 0x8d746807, 0xbc3e6244, + VSET(16, e32, m4); + VLOAD_32(v16, 0xe318cc7a, 0x489815a8, 0x6e6fc053, 0x8d746807, 0xbc3e6244, 0xcdfeb4fe, 0x22d24149, 0x26962240, 0x5ef85b7e, 0x2f61a9e8, 0x373dc202, 0x1567a6b5, 0x763c5239, 0x60dd0502, 0xab178102, 0x753e0a11); - VSET(16, e16, m2); + VSET(16, e16, m4); scalar = 5383; VLOAD_16(v8, 0xce02, 0x6935, 0xc803, 0x75bc, 0x80b7, 0x19d2, 0x3b7c, 0xc269, 0xb639, 0x66f1, 0x678b, 0xc83e, 0x5a5c, 0x389e, 0x9e46, 0xfae9); VLOAD_8(v0, 0xAA, 0xAA); - asm volatile("vwmaccus.vx v6, %[A], v8, v0.t" ::[A] "r"(scalar)); - VSET(16, e32, m2); - VCMP_I32(5, v6, 0xe318cc7a, 0x513c4f1b, 0x6e6fc053, 0x97200c2b, 0xbc3e6244, + asm volatile("vwmaccus.vx v16, %[A], v8, v0.t" ::[A] "r"(scalar)); + VSET(16, e32, m4); + VCMP_I32(5, v16, 0xe318cc7a, 0x513c4f1b, 0x6e6fc053, 0x97200c2b, 0xbc3e6244, 0xd01da3bc, 0x22d24149, 0x2187101f, 0x5ef85b7e, 0x37d63f7f, 0x373dc202, 0x10d33667, 0x763c5239, 0x65838754, 0xab178102, 0x74d30370); #if ELEN == 64 - VSET(16, e64, m2); - VLOAD_64(v6, 0xb6a5b1c3c6d69abb, 0x3c6a647eb0d79a41, 0xf0c3eb8821045259, + VSET(16, e64, m4); + VLOAD_64(v16, 0xb6a5b1c3c6d69abb, 0x3c6a647eb0d79a41, 0xf0c3eb8821045259, 0x91d74be946352cae, 0x524c6db6c58f9da6, 0x39185a920f7787e8, 0x4080fbf0fdcc64ec, 0x9ed1fb83f53270fc, 0xff0661a19269f0c0, 0x47d26c599193fe0b, 0xd8cc0342dc3104ce, 0xc51f802bc93381cd, 0xe7d6522aa1c51245, 0x6fa0a9d3f57bc667, 0xd140731478a147a8, 0x5d716379591922f4); - VSET(16, e32, m2); + VSET(16, e32, m4); scalar = 6474219; VLOAD_32(v8, 0x09377def, 0x99ee4d30, 0x8b8452d1, 0xc9e17667, 0x2254aa29, 0xb56ca9f5, 0xa1276371, 0x32ac1413, 0x59ff6af3, 0x6b61bf57, 0xc0eb37b3, 0x26f06be7, 0x0e9b21b2, 0x22898a93, 0xe3646841, 0xdd301fdc); VLOAD_8(v0, 0xAA, 0xAA); - asm volatile("vwmaccus.vx v6, %[A], v8, v0.t" ::[A] "r"(scalar)); - VSET(16, e64, m2); - VCMP_I64(6, v6, 0xb6a5b1c3c6d69abb, 0x3c430136a5932551, 0xf0c3eb8821045259, + asm volatile("vwmaccus.vx v16, %[A], v8, v0.t" ::[A] "r"(scalar)); + VSET(16, e64, m4); + VCMP_I64(6, v16, 0xb6a5b1c3c6d69abb, 0x3c430136a5932551, 0xf0c3eb8821045259, 0x91c26988f191bc3b, 0x524c6db6c58f9da6, 0x38fb9358fb72e8cf, 0x4080fbf0fdcc64ec, 0x9ee5895b4431c96d, 0xff0661a19269f0c0, 0x47fbdc7722d7f1e8, 0xd8cc0342dc3104ce, 0xc52e86eb9b0cedda, diff --git a/sw/riscvTests/isa/rv64uv/vxor.c b/sw/riscvTests/isa/rv64uv/vxor.c index 82f02dac..6e4bb684 100644 --- a/sw/riscvTests/isa/rv64uv/vxor.c +++ b/sw/riscvTests/isa/rv64uv/vxor.c @@ -8,48 +8,48 @@ #include "vector_macros.h" void TEST_CASE1() { - VSET(12, e8, m2); - VLOAD_8(v4, 0xff, 0x01, 0xf0, 0xff, 0x01, 0xf0, 0xff, 0x01, 0xf0, 0xff, 0x01, + VSET(12, e8, m8); + VLOAD_8(v16, 0xff, 0x01, 0xf0, 0xff, 0x01, 0xf0, 0xff, 0x01, 0xf0, 0xff, 0x01, 0xf0); - VLOAD_8(v6, 0xf0, 0x03, 0xf0, 0xf0, 0x03, 0xf0, 0xf0, 0x03, 0xf0, 0xf0, 0x03, + VLOAD_8(v24, 0xf0, 0x03, 0xf0, 0xf0, 0x03, 0xf0, 0xf0, 0x03, 0xf0, 0xf0, 0x03, 0xf0); - asm volatile("vxor.vv v2, v4, v6"); - VCMP_U8(1, v2, 0x0f, 0x02, 0x00, 0x0f, 0x02, 0x00, 0x0f, 0x02, 0x00, 0x0f, + asm volatile("vxor.vv v8, v16, v24"); + VCMP_U8(1, v8, 0x0f, 0x02, 0x00, 0x0f, 0x02, 0x00, 0x0f, 0x02, 0x00, 0x0f, 0x02, 0x00); - VSET(12, e16, m2); - VLOAD_16(v4, 0xffff, 0x0001, 0xf0f0, 0xffff, 0x0001, 0xf0f0, 0xffff, 0x0001, + VSET(12, e16, m8); + VLOAD_16(v16, 0xffff, 0x0001, 0xf0f0, 0xffff, 0x0001, 0xf0f0, 0xffff, 0x0001, 0xf0f0, 0xffff, 0x0001, 0xf0f0); - VLOAD_16(v6, 0xff00, 0x0003, 0xf0f0, 0xff00, 0x0003, 0xf0f0, 0xff00, 0x0003, + VLOAD_16(v24, 0xff00, 0x0003, 0xf0f0, 0xff00, 0x0003, 0xf0f0, 0xff00, 0x0003, 0xf0f0, 0xff00, 0x0003, 0xf0f0); - asm volatile("vxor.vv v2, v4, v6"); - VCMP_U16(2, v2, 0x00ff, 0x0002, 0x0000, 0x00ff, 0x0002, 0x0000, 0x00ff, + asm volatile("vxor.vv v8, v16, v24"); + VCMP_U16(2, v8, 0x00ff, 0x0002, 0x0000, 0x00ff, 0x0002, 0x0000, 0x00ff, 0x0002, 0x0000, 0x00ff, 0x0002, 0x0000); - VSET(12, e32, m2); - VLOAD_32(v4, 0xffffffff, 0x00000001, 0xf0f0f0f0, 0xffffffff, 0x00000001, + VSET(12, e32, m8); + VLOAD_32(v16, 0xffffffff, 0x00000001, 0xf0f0f0f0, 0xffffffff, 0x00000001, 0xf0f0f0f0, 0xffffffff, 0x00000001, 0xf0f0f0f0, 0xffffffff, 0x00000001, 0xf0f0f0f0); - VLOAD_32(v6, 0xffff0000, 0x00000003, 0xf0f0f0f0, 0xffff0000, 0x00000003, + VLOAD_32(v24, 0xffff0000, 0x00000003, 0xf0f0f0f0, 0xffff0000, 0x00000003, 0xf0f0f0f0, 0xffff0000, 0x00000003, 0xf0f0f0f0, 0xffff0000, 0x00000003, 0xf0f0f0f0); - asm volatile("vxor.vv v2, v4, v6"); - VCMP_U32(3, v2, 0x0000ffff, 0x00000002, 0x00000000, 0x0000ffff, 0x00000002, + asm volatile("vxor.vv v8, v16, v24"); + VCMP_U32(3, v8, 0x0000ffff, 0x00000002, 0x00000000, 0x0000ffff, 0x00000002, 0x00000000, 0x0000ffff, 0x00000002, 0x00000000, 0x0000ffff, 0x00000002, 0x00000000); #if ELEN == 64 - VSET(12, e64, m2); - VLOAD_64(v4, 0xffffffffffffffff, 0x0000000000000001, 0xf0f0f0f0f0f0f0f0, + VSET(12, e64, m8); + VLOAD_64(v16, 0xffffffffffffffff, 0x0000000000000001, 0xf0f0f0f0f0f0f0f0, 0xffffffffffffffff, 0x0000000000000001, 0xf0f0f0f0f0f0f0f0, 0xffffffffffffffff, 0x0000000000000001, 0xf0f0f0f0f0f0f0f0, 0xffffffffffffffff, 0x0000000000000001, 0xf0f0f0f0f0f0f0f0); - VLOAD_64(v6, 0xffffffff00000000, 0x0000000000000003, 0xf0f0f0f0f0f0f0f0, + VLOAD_64(v24, 0xffffffff00000000, 0x0000000000000003, 0xf0f0f0f0f0f0f0f0, 0xffffffff00000000, 0x0000000000000003, 0xf0f0f0f0f0f0f0f0, 0xffffffff00000000, 0x0000000000000003, 0xf0f0f0f0f0f0f0f0, 0xffffffff00000000, 0x0000000000000003, 0xf0f0f0f0f0f0f0f0); - asm volatile("vxor.vv v2, v4, v6"); - VCMP_U64(4, v2, 0x00000000ffffffff, 0x0000000000000002, 0x0000000000000000, + asm volatile("vxor.vv v8, v16, v24"); + VCMP_U64(4, v8, 0x00000000ffffffff, 0x0000000000000002, 0x0000000000000000, 0x00000000ffffffff, 0x0000000000000002, 0x0000000000000000, 0x00000000ffffffff, 0x0000000000000002, 0x0000000000000000, 0x00000000ffffffff, 0x0000000000000002, 0x0000000000000000); @@ -57,63 +57,63 @@ void TEST_CASE1() { } void TEST_CASE2() { - VSET(12, e8, m2); - VLOAD_8(v4, 0xff, 0x01, 0xf0, 0xff, 0x01, 0xf0, 0xff, 0x01, 0xf0, 0xff, 0x01, + VSET(12, e8, m8); + VLOAD_8(v16, 0xff, 0x01, 0xf0, 0xff, 0x01, 0xf0, 0xff, 0x01, 0xf0, 0xff, 0x01, 0xf0); - VLOAD_8(v6, 0xf0, 0x03, 0xf0, 0xf0, 0x03, 0xf0, 0xf0, 0x03, 0xf0, 0xf0, 0x03, + VLOAD_8(v24, 0xf0, 0x03, 0xf0, 0xf0, 0x03, 0xf0, 0xf0, 0x03, 0xf0, 0xf0, 0x03, 0xf0); VLOAD_8(v0, 0x6D, 0x0B); - VLOAD_8(v2, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, + VLOAD_8(v8, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef); - asm volatile("vxor.vv v2, v4, v6, v0.t"); - VCMP_U8(5, v2, 0x0f, 0xef, 0x00, 0x0f, 0xef, 0x00, 0x0f, 0xef, 0x00, 0x0f, + asm volatile("vxor.vv v8, v16, v24, v0.t"); + VCMP_U8(5, v8, 0x0f, 0xef, 0x00, 0x0f, 0xef, 0x00, 0x0f, 0xef, 0x00, 0x0f, 0xef, 0x00); - VSET(12, e16, m2); - VLOAD_16(v4, 0xffff, 0x0001, 0xf0f0, 0xffff, 0x0001, 0xf0f0, 0xffff, 0x0001, + VSET(12, e16, m8); + VLOAD_16(v16, 0xffff, 0x0001, 0xf0f0, 0xffff, 0x0001, 0xf0f0, 0xffff, 0x0001, 0xf0f0, 0xffff, 0x0001, 0xf0f0); - VLOAD_16(v6, 0xff00, 0x0003, 0xf0f0, 0xff00, 0x0003, 0xf0f0, 0xff00, 0x0003, + VLOAD_16(v24, 0xff00, 0x0003, 0xf0f0, 0xff00, 0x0003, 0xf0f0, 0xff00, 0x0003, 0xf0f0, 0xff00, 0x0003, 0xf0f0); VLOAD_8(v0, 0x6D, 0x0B); - VLOAD_16(v2, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, + VLOAD_16(v8, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef); - asm volatile("vxor.vv v2, v4, v6, v0.t"); - VCMP_U16(6, v2, 0x00ff, 0xbeef, 0x0000, 0x00ff, 0xbeef, 0x0000, 0x00ff, + asm volatile("vxor.vv v8, v16, v24, v0.t"); + VCMP_U16(6, v8, 0x00ff, 0xbeef, 0x0000, 0x00ff, 0xbeef, 0x0000, 0x00ff, 0xbeef, 0x0000, 0x00ff, 0xbeef, 0x0000); - VSET(12, e32, m2); - VLOAD_32(v4, 0xffffffff, 0x00000001, 0xf0f0f0f0, 0xffffffff, 0x00000001, + VSET(12, e32, m8); + VLOAD_32(v16, 0xffffffff, 0x00000001, 0xf0f0f0f0, 0xffffffff, 0x00000001, 0xf0f0f0f0, 0xffffffff, 0x00000001, 0xf0f0f0f0, 0xffffffff, 0x00000001, 0xf0f0f0f0); - VLOAD_32(v6, 0xffff0000, 0x00000003, 0xf0f0f0f0, 0xffff0000, 0x00000003, + VLOAD_32(v24, 0xffff0000, 0x00000003, 0xf0f0f0f0, 0xffff0000, 0x00000003, 0xf0f0f0f0, 0xffff0000, 0x00000003, 0xf0f0f0f0, 0xffff0000, 0x00000003, 0xf0f0f0f0); VLOAD_8(v0, 0x6D, 0x0B); - VLOAD_32(v2, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + VLOAD_32(v8, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef); - asm volatile("vxor.vv v2, v4, v6, v0.t"); - VCMP_U32(7, v2, 0x0000ffff, 0xdeadbeef, 0x00000000, 0x0000ffff, 0xdeadbeef, + asm volatile("vxor.vv v8, v16, v24, v0.t"); + VCMP_U32(7, v8, 0x0000ffff, 0xdeadbeef, 0x00000000, 0x0000ffff, 0xdeadbeef, 0x00000000, 0x0000ffff, 0xdeadbeef, 0x00000000, 0x0000ffff, 0xdeadbeef, 0x00000000); #if ELEN == 64 - VSET(12, e64, m2); - VLOAD_64(v4, 0xffffffffffffffff, 0x0000000000000001, 0xf0f0f0f0f0f0f0f0, + VSET(12, e64, m8); + VLOAD_64(v16, 0xffffffffffffffff, 0x0000000000000001, 0xf0f0f0f0f0f0f0f0, 0xffffffffffffffff, 0x0000000000000001, 0xf0f0f0f0f0f0f0f0, 0xffffffffffffffff, 0x0000000000000001, 0xf0f0f0f0f0f0f0f0, 0xffffffffffffffff, 0x0000000000000001, 0xf0f0f0f0f0f0f0f0); - VLOAD_64(v6, 0xffffffff00000000, 0x0000000000000003, 0xf0f0f0f0f0f0f0f0, + VLOAD_64(v24, 0xffffffff00000000, 0x0000000000000003, 0xf0f0f0f0f0f0f0f0, 0xffffffff00000000, 0x0000000000000003, 0xf0f0f0f0f0f0f0f0, 0xffffffff00000000, 0x0000000000000003, 0xf0f0f0f0f0f0f0f0, 0xffffffff00000000, 0x0000000000000003, 0xf0f0f0f0f0f0f0f0); VLOAD_8(v0, 0x6D, 0x0B); - VLOAD_64(v2, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, + VLOAD_64(v8, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef); - asm volatile("vxor.vv v2, v4, v6, v0.t"); - VCMP_U64(8, v2, 0x00000000ffffffff, 0xdeadbeefdeadbeef, 0x0000000000000000, + asm volatile("vxor.vv v8, v16, v24, v0.t"); + VCMP_U64(8, v8, 0x00000000ffffffff, 0xdeadbeefdeadbeef, 0x0000000000000000, 0x00000000ffffffff, 0xdeadbeefdeadbeef, 0x0000000000000000, 0x00000000ffffffff, 0xdeadbeefdeadbeef, 0x0000000000000000, 0x00000000ffffffff, 0xdeadbeefdeadbeef, 0x0000000000000000); @@ -123,37 +123,37 @@ void TEST_CASE2() { void TEST_CASE3() { const uint32_t scalar = 0x0ff00ff0; - VSET(12, e8, m2); - VLOAD_8(v4, 0xff, 0x01, 0xf0, 0xff, 0x01, 0xf0, 0xff, 0x01, 0xf0, 0xff, 0x01, + VSET(12, e8, m8); + VLOAD_8(v16, 0xff, 0x01, 0xf0, 0xff, 0x01, 0xf0, 0xff, 0x01, 0xf0, 0xff, 0x01, 0xf0); - asm volatile("vxor.vx v2, v4, %[A]" ::[A] "r"(scalar)); - VCMP_U8(9, v2, 0x0f, 0xf1, 0x00, 0x0f, 0xf1, 0x00, 0x0f, 0xf1, 0x00, 0x0f, + asm volatile("vxor.vx v8, v16, %[A]" ::[A] "r"(scalar)); + VCMP_U8(9, v8, 0x0f, 0xf1, 0x00, 0x0f, 0xf1, 0x00, 0x0f, 0xf1, 0x00, 0x0f, 0xf1, 0x00); - VSET(12, e16, m2); - VLOAD_16(v4, 0xffff, 0x0001, 0xf0f0, 0xffff, 0x0001, 0xf0f0, 0xffff, 0x0001, + VSET(12, e16, m8); + VLOAD_16(v16, 0xffff, 0x0001, 0xf0f0, 0xffff, 0x0001, 0xf0f0, 0xffff, 0x0001, 0xf0f0, 0xffff, 0x0001, 0xf0f0); - asm volatile("vxor.vx v2, v4, %[A]" ::[A] "r"(scalar)); - VCMP_U16(10, v2, 0xf00f, 0x0ff1, 0xff00, 0xf00f, 0x0ff1, 0xff00, 0xf00f, + asm volatile("vxor.vx v8, v16, %[A]" ::[A] "r"(scalar)); + VCMP_U16(10, v8, 0xf00f, 0x0ff1, 0xff00, 0xf00f, 0x0ff1, 0xff00, 0xf00f, 0x0ff1, 0xff00, 0xf00f, 0x0ff1, 0xff00); - VSET(12, e32, m2); - VLOAD_32(v4, 0xffffffff, 0x00000001, 0xf0f0f0f0, 0xffffffff, 0x00000001, + VSET(12, e32, m8); + VLOAD_32(v16, 0xffffffff, 0x00000001, 0xf0f0f0f0, 0xffffffff, 0x00000001, 0xf0f0f0f0, 0xffffffff, 0x00000001, 0xf0f0f0f0, 0xffffffff, 0x00000001, 0xf0f0f0f0); - asm volatile("vxor.vx v2, v4, %[A]" ::[A] "r"(scalar)); - VCMP_U32(11, v2, 0xf00ff00f, 0x0ff00ff1, 0xff00ff00, 0xf00ff00f, 0x0ff00ff1, + asm volatile("vxor.vx v8, v16, %[A]" ::[A] "r"(scalar)); + VCMP_U32(11, v8, 0xf00ff00f, 0x0ff00ff1, 0xff00ff00, 0xf00ff00f, 0x0ff00ff1, 0xff00ff00, 0xf00ff00f, 0x0ff00ff1, 0xff00ff00, 0xf00ff00f, 0x0ff00ff1, 0xff00ff00); #if ELEN == 64 - VSET(12, e64, m2); - VLOAD_64(v4, 0xffffffffffffffff, 0x0000000000000001, 0xf0f0f0f0f0f0f0f0, + VSET(12, e64, m8); + VLOAD_64(v16, 0xffffffffffffffff, 0x0000000000000001, 0xf0f0f0f0f0f0f0f0, 0xffffffffffffffff, 0x0000000000000001, 0xf0f0f0f0f0f0f0f0, 0xffffffffffffffff, 0x0000000000000001, 0xf0f0f0f0f0f0f0f0, 0xffffffffffffffff, 0x0000000000000001, 0xf0f0f0f0f0f0f0f0); - asm volatile("vxor.vx v2, v4, %[A]" ::[A] "r"(scalar)); - VCMP_U64(12, v2, 0xfffffffff00ff00f, 0x000000000ff00ff1, 0xf0f0f0f0ff00ff00, + asm volatile("vxor.vx v8, v16, %[A]" ::[A] "r"(scalar)); + VCMP_U64(12, v8, 0xfffffffff00ff00f, 0x000000000ff00ff1, 0xf0f0f0f0ff00ff00, 0xfffffffff00ff00f, 0x000000000ff00ff1, 0xf0f0f0f0ff00ff00, 0xfffffffff00ff00f, 0x000000000ff00ff1, 0xf0f0f0f0ff00ff00, 0xfffffffff00ff00f, 0x000000000ff00ff1, 0xf0f0f0f0ff00ff00); @@ -163,52 +163,52 @@ void TEST_CASE3() { void TEST_CASE4() { const uint32_t scalar = 0x0ff00ff0; - VSET(12, e8, m2); - VLOAD_8(v4, 0xff, 0x01, 0xf0, 0xff, 0x01, 0xf0, 0xff, 0x01, 0xf0, 0xff, 0x01, + VSET(12, e8, m8); + VLOAD_8(v16, 0xff, 0x01, 0xf0, 0xff, 0x01, 0xf0, 0xff, 0x01, 0xf0, 0xff, 0x01, 0xf0); VLOAD_8(v0, 0x6D, 0x0B); - VLOAD_8(v2, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, + VLOAD_8(v8, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef); - asm volatile("vxor.vx v2, v4, %[A], v0.t" ::[A] "r"(scalar)); - VCMP_U8(13, v2, 0x0f, 0xef, 0x00, 0x0f, 0xef, 0x00, 0x0f, 0xef, 0x00, 0x0f, + asm volatile("vxor.vx v8, v16, %[A], v0.t" ::[A] "r"(scalar)); + VCMP_U8(13, v8, 0x0f, 0xef, 0x00, 0x0f, 0xef, 0x00, 0x0f, 0xef, 0x00, 0x0f, 0xef, 0x00); - VSET(12, e16, m2); - VLOAD_16(v4, 0xffff, 0x0001, 0xf0f0, 0xffff, 0x0001, 0xf0f0, 0xffff, 0x0001, + VSET(12, e16, m8); + VLOAD_16(v16, 0xffff, 0x0001, 0xf0f0, 0xffff, 0x0001, 0xf0f0, 0xffff, 0x0001, 0xf0f0, 0xffff, 0x0001, 0xf0f0); VLOAD_8(v0, 0x6D, 0x0B); - VLOAD_16(v2, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, + VLOAD_16(v8, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef); - asm volatile("vxor.vx v2, v4, %[A], v0.t" ::[A] "r"(scalar)); - VCMP_U16(14, v2, 0xf00f, 0xbeef, 0xff00, 0xf00f, 0xbeef, 0xff00, 0xf00f, + asm volatile("vxor.vx v8, v16, %[A], v0.t" ::[A] "r"(scalar)); + VCMP_U16(14, v8, 0xf00f, 0xbeef, 0xff00, 0xf00f, 0xbeef, 0xff00, 0xf00f, 0xbeef, 0xff00, 0xf00f, 0xbeef, 0xff00); - VSET(12, e32, m2); - VLOAD_32(v4, 0xffffffff, 0x00000001, 0xf0f0f0f0, 0xffffffff, 0x00000001, + VSET(12, e32, m8); + VLOAD_32(v16, 0xffffffff, 0x00000001, 0xf0f0f0f0, 0xffffffff, 0x00000001, 0xf0f0f0f0, 0xffffffff, 0x00000001, 0xf0f0f0f0, 0xffffffff, 0x00000001, 0xf0f0f0f0); VLOAD_8(v0, 0x6D, 0x0B); - VLOAD_32(v2, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + VLOAD_32(v8, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef); - asm volatile("vxor.vx v2, v4, %[A], v0.t" ::[A] "r"(scalar)); - VCMP_U32(15, v2, 0xf00ff00f, 0xdeadbeef, 0xff00ff00, 0xf00ff00f, 0xdeadbeef, + asm volatile("vxor.vx v8, v16, %[A], v0.t" ::[A] "r"(scalar)); + VCMP_U32(15, v8, 0xf00ff00f, 0xdeadbeef, 0xff00ff00, 0xf00ff00f, 0xdeadbeef, 0xff00ff00, 0xf00ff00f, 0xdeadbeef, 0xff00ff00, 0xf00ff00f, 0xdeadbeef, 0xff00ff00); #if ELEN == 64 - VSET(12, e64, m2); - VLOAD_64(v4, 0xffffffffffffffff, 0x0000000000000001, 0xf0f0f0f0f0f0f0f0, + VSET(12, e64, m8); + VLOAD_64(v16, 0xffffffffffffffff, 0x0000000000000001, 0xf0f0f0f0f0f0f0f0, 0xffffffffffffffff, 0x0000000000000001, 0xf0f0f0f0f0f0f0f0, 0xffffffffffffffff, 0x0000000000000001, 0xf0f0f0f0f0f0f0f0, 0xffffffffffffffff, 0x0000000000000001, 0xf0f0f0f0f0f0f0f0); VLOAD_8(v0, 0x6D, 0x0B); - VLOAD_64(v2, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, + VLOAD_64(v8, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef); - asm volatile("vxor.vx v2, v4, %[A], v0.t" ::[A] "r"(scalar)); - VCMP_U64(16, v2, 0xfffffffff00ff00f, 0xdeadbeefdeadbeef, 0xf0f0f0f0ff00ff00, + asm volatile("vxor.vx v8, v16, %[A], v0.t" ::[A] "r"(scalar)); + VCMP_U64(16, v8, 0xfffffffff00ff00f, 0xdeadbeefdeadbeef, 0xf0f0f0f0ff00ff00, 0xfffffffff00ff00f, 0xdeadbeefdeadbeef, 0xf0f0f0f0ff00ff00, 0xfffffffff00ff00f, 0xdeadbeefdeadbeef, 0xf0f0f0f0ff00ff00, 0xfffffffff00ff00f, 0xdeadbeefdeadbeef, 0xf0f0f0f0ff00ff00); @@ -216,37 +216,37 @@ void TEST_CASE4() { } void TEST_CASE5() { - VSET(12, e8, m2); - VLOAD_8(v4, 0xff, 0x01, 0xf0, 0xff, 0x01, 0xf0, 0xff, 0x01, 0xf0, 0xff, 0x01, + VSET(12, e8, m8); + VLOAD_8(v16, 0xff, 0x01, 0xf0, 0xff, 0x01, 0xf0, 0xff, 0x01, 0xf0, 0xff, 0x01, 0xf0); - asm volatile("vxor.vi v2, v4, 15"); - VCMP_U8(17, v2, 0xf0, 0x0e, 0xff, 0xf0, 0x0e, 0xff, 0xf0, 0x0e, 0xff, 0xf0, + asm volatile("vxor.vi v8, v16, 15"); + VCMP_U8(17, v8, 0xf0, 0x0e, 0xff, 0xf0, 0x0e, 0xff, 0xf0, 0x0e, 0xff, 0xf0, 0x0e, 0xff); - VSET(12, e16, m2); - VLOAD_16(v4, 0xffff, 0x0001, 0xf0f0, 0xffff, 0x0001, 0xf0f0, 0xffff, 0x0001, + VSET(12, e16, m8); + VLOAD_16(v16, 0xffff, 0x0001, 0xf0f0, 0xffff, 0x0001, 0xf0f0, 0xffff, 0x0001, 0xf0f0, 0xffff, 0x0001, 0xf0f0); - asm volatile("vxor.vi v2, v4, 15"); - VCMP_U16(18, v2, 0xfff0, 0x000e, 0xf0ff, 0xfff0, 0x000e, 0xf0ff, 0xfff0, + asm volatile("vxor.vi v8, v16, 15"); + VCMP_U16(18, v8, 0xfff0, 0x000e, 0xf0ff, 0xfff0, 0x000e, 0xf0ff, 0xfff0, 0x000e, 0xf0ff, 0xfff0, 0x000e, 0xf0ff); - VSET(12, e32, m2); - VLOAD_32(v4, 0xffffffff, 0x00000001, 0xf0f0f0f0, 0xffffffff, 0x00000001, + VSET(12, e32, m8); + VLOAD_32(v16, 0xffffffff, 0x00000001, 0xf0f0f0f0, 0xffffffff, 0x00000001, 0xf0f0f0f0, 0xffffffff, 0x00000001, 0xf0f0f0f0, 0xffffffff, 0x00000001, 0xf0f0f0f0); - asm volatile("vxor.vi v2, v4, 15"); - VCMP_U32(19, v2, 0xfffffff0, 0x0000000e, 0xf0f0f0ff, 0xfffffff0, 0x0000000e, + asm volatile("vxor.vi v8, v16, 15"); + VCMP_U32(19, v8, 0xfffffff0, 0x0000000e, 0xf0f0f0ff, 0xfffffff0, 0x0000000e, 0xf0f0f0ff, 0xfffffff0, 0x0000000e, 0xf0f0f0ff, 0xfffffff0, 0x0000000e, 0xf0f0f0ff); #if ELEN == 64 - VSET(12, e64, m2); - VLOAD_64(v4, 0xffffffffffffffff, 0x0000000000000001, 0xf0f0f0f0f0f0f0f0, + VSET(12, e64, m8); + VLOAD_64(v16, 0xffffffffffffffff, 0x0000000000000001, 0xf0f0f0f0f0f0f0f0, 0xffffffffffffffff, 0x0000000000000001, 0xf0f0f0f0f0f0f0f0, 0xffffffffffffffff, 0x0000000000000001, 0xf0f0f0f0f0f0f0f0, 0xffffffffffffffff, 0x0000000000000001, 0xf0f0f0f0f0f0f0f0); - asm volatile("vxor.vi v2, v4, 15"); - VCMP_U64(20, v2, 0xfffffffffffffff0, 0x000000000000000e, 0xf0f0f0f0f0f0f0ff, + asm volatile("vxor.vi v8, v16, 15"); + VCMP_U64(20, v8, 0xfffffffffffffff0, 0x000000000000000e, 0xf0f0f0f0f0f0f0ff, 0xfffffffffffffff0, 0x000000000000000e, 0xf0f0f0f0f0f0f0ff, 0xfffffffffffffff0, 0x000000000000000e, 0xf0f0f0f0f0f0f0ff, 0xfffffffffffffff0, 0x000000000000000e, 0xf0f0f0f0f0f0f0ff); @@ -254,52 +254,52 @@ void TEST_CASE5() { } void TEST_CASE6() { - VSET(12, e8, m2); - VLOAD_8(v4, 0xff, 0x01, 0xf0, 0xff, 0x01, 0xf0, 0xff, 0x01, 0xf0, 0xff, 0x01, + VSET(12, e8, m8); + VLOAD_8(v16, 0xff, 0x01, 0xf0, 0xff, 0x01, 0xf0, 0xff, 0x01, 0xf0, 0xff, 0x01, 0xf0); VLOAD_8(v0, 0x6D, 0x0B); - VLOAD_8(v2, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, + VLOAD_8(v8, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef); - asm volatile("vxor.vi v2, v4, 15, v0.t"); - VCMP_U8(21, v2, 0xf0, 0xef, 0xff, 0xf0, 0xef, 0xff, 0xf0, 0xef, 0xff, 0xf0, + asm volatile("vxor.vi v8, v16, 15, v0.t"); + VCMP_U8(21, v8, 0xf0, 0xef, 0xff, 0xf0, 0xef, 0xff, 0xf0, 0xef, 0xff, 0xf0, 0xef, 0xff); - VSET(12, e16, m2); - VLOAD_16(v4, 0xffff, 0x0001, 0xf0f0, 0xffff, 0x0001, 0xf0f0, 0xffff, 0x0001, + VSET(12, e16, m8); + VLOAD_16(v16, 0xffff, 0x0001, 0xf0f0, 0xffff, 0x0001, 0xf0f0, 0xffff, 0x0001, 0xf0f0, 0xffff, 0x0001, 0xf0f0); VLOAD_8(v0, 0x6D, 0x0B); - VLOAD_16(v2, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, + VLOAD_16(v8, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef); - asm volatile("vxor.vi v2, v4, 15, v0.t"); - VCMP_U16(22, v2, 0xfff0, 0xbeef, 0xf0ff, 0xfff0, 0xbeef, 0xf0ff, 0xfff0, + asm volatile("vxor.vi v8, v16, 15, v0.t"); + VCMP_U16(22, v8, 0xfff0, 0xbeef, 0xf0ff, 0xfff0, 0xbeef, 0xf0ff, 0xfff0, 0xbeef, 0xf0ff, 0xfff0, 0xbeef, 0xf0ff); - VSET(12, e32, m2); - VLOAD_32(v4, 0xffffffff, 0x00000001, 0xf0f0f0f0, 0xffffffff, 0x00000001, + VSET(12, e32, m8); + VLOAD_32(v16, 0xffffffff, 0x00000001, 0xf0f0f0f0, 0xffffffff, 0x00000001, 0xf0f0f0f0, 0xffffffff, 0x00000001, 0xf0f0f0f0, 0xffffffff, 0x00000001, 0xf0f0f0f0); VLOAD_8(v0, 0x6D, 0x0B); - VLOAD_32(v2, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + VLOAD_32(v8, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef); - asm volatile("vxor.vi v2, v4, 15, v0.t"); - VCMP_U32(23, v2, 0xfffffff0, 0xdeadbeef, 0xf0f0f0ff, 0xfffffff0, 0xdeadbeef, + asm volatile("vxor.vi v8, v16, 15, v0.t"); + VCMP_U32(23, v8, 0xfffffff0, 0xdeadbeef, 0xf0f0f0ff, 0xfffffff0, 0xdeadbeef, 0xf0f0f0ff, 0xfffffff0, 0xdeadbeef, 0xf0f0f0ff, 0xfffffff0, 0xdeadbeef, 0xf0f0f0ff); #if ELEN == 64 - VSET(12, e64, m2); - VLOAD_64(v4, 0xffffffffffffffff, 0x0000000000000001, 0xf0f0f0f0f0f0f0f0, + VSET(12, e64, m8); + VLOAD_64(v16, 0xffffffffffffffff, 0x0000000000000001, 0xf0f0f0f0f0f0f0f0, 0xffffffffffffffff, 0x0000000000000001, 0xf0f0f0f0f0f0f0f0, 0xffffffffffffffff, 0x0000000000000001, 0xf0f0f0f0f0f0f0f0, 0xffffffffffffffff, 0x0000000000000001, 0xf0f0f0f0f0f0f0f0); VLOAD_8(v0, 0x6D, 0x0B); - VLOAD_64(v2, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, + VLOAD_64(v8, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef); - asm volatile("vxor.vi v2, v4, 15, v0.t"); - VCMP_U64(40, v2, 0xfffffffffffffff0, 0xdeadbeefdeadbeef, 0xf0f0f0f0f0f0f0ff, + asm volatile("vxor.vi v8, v16, 15, v0.t"); + VCMP_U64(40, v8, 0xfffffffffffffff0, 0xdeadbeefdeadbeef, 0xf0f0f0f0f0f0f0ff, 0xfffffffffffffff0, 0xdeadbeefdeadbeef, 0xf0f0f0f0f0f0f0ff, 0xfffffffffffffff0, 0xdeadbeefdeadbeef, 0xf0f0f0f0f0f0f0ff, 0xfffffffffffffff0, 0xdeadbeefdeadbeef, 0xf0f0f0f0f0f0f0ff); From 4db44832766e1064015997a5cedf5d1ae23fdaa9 Mon Sep 17 00:00:00 2001 From: Matheus Cavalcante Date: Mon, 4 Dec 2023 19:12:41 -0800 Subject: [PATCH 07/17] sw: Define number of FPUs --- sw/spatzBenchmarks/CMakeLists.txt | 10 +++++++--- sw/spatzBenchmarks/dp-faxpy/main.c | 2 +- sw/spatzBenchmarks/dp-fconv2d/main.c | 2 +- sw/spatzBenchmarks/dp-fdotp/main.c | 2 +- sw/spatzBenchmarks/dp-fft/main.c | 2 +- sw/spatzBenchmarks/dp-fmatmul/main.c | 2 +- sw/spatzBenchmarks/hp-fmatmul/main.c | 2 +- sw/spatzBenchmarks/sdotp-bp-fmatmul/main.c | 2 +- sw/spatzBenchmarks/sdotp-hp-fmatmul/main.c | 2 +- sw/spatzBenchmarks/sp-fft/main.c | 2 +- sw/spatzBenchmarks/sp-fmatmul/main.c | 2 +- sw/spatzBenchmarks/widening-bp-fmatmul/main.c | 2 +- sw/spatzBenchmarks/widening-hp-fmatmul/main.c | 2 +- 13 files changed, 19 insertions(+), 15 deletions(-) diff --git a/sw/spatzBenchmarks/CMakeLists.txt b/sw/spatzBenchmarks/CMakeLists.txt index 474b1919..dd96cb0d 100644 --- a/sw/spatzBenchmarks/CMakeLists.txt +++ b/sw/spatzBenchmarks/CMakeLists.txt @@ -4,6 +4,9 @@ cmake_minimum_required(VERSION 3.13) +# Defines +set(SNRT_NFPU_PER_CORE "0" CACHE STRING "Number of FPUs per Spatz") + # Allow spatzBenchmarks to be built as a standalone library. if (CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR) list(APPEND CMAKE_MODULE_PATH ${CMAKE_CURRENT_SOURCE_DIR}/../cmake) @@ -21,26 +24,27 @@ include_directories(${SNRUNTIME_INCLUDE_DIRS}) add_compile_options(-O3 -g -ffunction-sections) + # Macro to regenerate the golden values and compile a module macro(add_spatz_test_oneParam name file param1) set(target_name ${name}_M${param1}) add_snitch_test(${target_name} ${file}) target_link_libraries(test-${SNITCH_TEST_PREFIX}${target_name} benchmark ${SNITCH_RUNTIME}) - target_compile_definitions(test-${SNITCH_TEST_PREFIX}${target_name} PUBLIC DATAHEADER="data/data_${param1}.h") + target_compile_definitions(test-${SNITCH_TEST_PREFIX}${target_name} PUBLIC DATAHEADER="data/data_${param1}.h" SNRT_NFPU_PER_CORE=${SNRT_NFPU_PER_CORE}) endmacro() macro(add_spatz_test_twoParam name file param1 param2) set(target_name ${name}_M${param1}_N${param2}) add_snitch_test(${target_name} ${file}) target_link_libraries(test-${SNITCH_TEST_PREFIX}${target_name} benchmark ${SNITCH_RUNTIME}) - target_compile_definitions(test-${SNITCH_TEST_PREFIX}${target_name} PUBLIC DATAHEADER="data/data_${param1}_${param2}.h") + target_compile_definitions(test-${SNITCH_TEST_PREFIX}${target_name} PUBLIC DATAHEADER="data/data_${param1}_${param2}.h" SNRT_NFPU_PER_CORE=${SNRT_NFPU_PER_CORE}) endmacro() macro(add_spatz_test_threeParam name file param1 param2 param3) set(target_name ${name}_M${param1}_N${param2}_K${param3}) add_snitch_test(${target_name} ${file}) target_link_libraries(test-${SNITCH_TEST_PREFIX}${target_name} benchmark ${SNITCH_RUNTIME}) - target_compile_definitions(test-${SNITCH_TEST_PREFIX}${target_name} PUBLIC DATAHEADER="data/data_${param1}_${param2}_${param3}.h") + target_compile_definitions(test-${SNITCH_TEST_PREFIX}${target_name} PUBLIC DATAHEADER="data/data_${param1}_${param2}_${param3}.h" SNRT_NFPU_PER_CORE=${SNRT_NFPU_PER_CORE}) endmacro() # Benchmark library diff --git a/sw/spatzBenchmarks/dp-faxpy/main.c b/sw/spatzBenchmarks/dp-faxpy/main.c index ca40ec1a..00ea189d 100644 --- a/sw/spatzBenchmarks/dp-faxpy/main.c +++ b/sw/spatzBenchmarks/dp-faxpy/main.c @@ -98,7 +98,7 @@ int main() { // Check and display results if (cid == 0) { long unsigned int performance = 1000 * 2 * dim / timer; - long unsigned int utilization = performance / (2 * num_cores * 4); + long unsigned int utilization = performance / (2 * num_cores * SNRT_NFPU_PER_CORE); printf("\n----- (%d) axpy -----\n", dim); printf("The execution took %u cycles.\n", timer); diff --git a/sw/spatzBenchmarks/dp-fconv2d/main.c b/sw/spatzBenchmarks/dp-fconv2d/main.c index e0ec3049..c054eeab 100644 --- a/sw/spatzBenchmarks/dp-fconv2d/main.c +++ b/sw/spatzBenchmarks/dp-fconv2d/main.c @@ -122,7 +122,7 @@ int main() { // Check and display results if (cid == 0) { long unsigned int performance = 1000 * 2 * f * f * r * c / timer; - long unsigned int utilization = performance / (2 * num_cores * 4); + long unsigned int utilization = performance / (2 * num_cores * SNRT_NFPU_PER_CORE); printf("\n----- (%dx%d) dp fconv2d -----\n", r, c); printf("The execution took %u cycles.\n", timer); diff --git a/sw/spatzBenchmarks/dp-fdotp/main.c b/sw/spatzBenchmarks/dp-fdotp/main.c index ede8776e..aab0e11f 100644 --- a/sw/spatzBenchmarks/dp-fdotp/main.c +++ b/sw/spatzBenchmarks/dp-fdotp/main.c @@ -108,7 +108,7 @@ int main() { // Check and display results if (cid == 0) { long unsigned int performance = 1000 * 2 * dotp_l.M / timer; - long unsigned int utilization = performance / (2 * num_cores * 4); + long unsigned int utilization = performance / (2 * num_cores * SNRT_NFPU_PER_CORE); printf("\n----- (%d) dp fdotp -----\n", dotp_l.M); printf("The execution took %u cycles.\n", timer); diff --git a/sw/spatzBenchmarks/dp-fft/main.c b/sw/spatzBenchmarks/dp-fft/main.c index 6bf64908..f4a93bb3 100644 --- a/sw/spatzBenchmarks/dp-fft/main.c +++ b/sw/spatzBenchmarks/dp-fft/main.c @@ -115,7 +115,7 @@ int main() { if (cid == 0) { long unsigned int performance = 1000 * 10 * NFFT * log2_nfft * 6 / 5 / timer; - long unsigned int utilization = performance / (2 * num_cores * 4); + long unsigned int utilization = performance / (2 * num_cores * SNRT_NFPU_PER_CORE); printf("\n----- fft on %d samples -----\n", NFFT); printf("The execution took %u cycles.\n", timer); diff --git a/sw/spatzBenchmarks/dp-fmatmul/main.c b/sw/spatzBenchmarks/dp-fmatmul/main.c index bd012ce4..98fae43d 100644 --- a/sw/spatzBenchmarks/dp-fmatmul/main.c +++ b/sw/spatzBenchmarks/dp-fmatmul/main.c @@ -131,7 +131,7 @@ int main() { if (cid == 0) { long unsigned int performance = 1000 * 2 * gemm_l.M * gemm_l.N * gemm_l.K / timer; - long unsigned int utilization = performance / (2 * num_cores * 4); + long unsigned int utilization = performance / (2 * num_cores * SNRT_NFPU_PER_CORE); printf("\n----- (%dx%d) dp fmatmul -----\n", gemm_l.M, gemm_l.N); printf("The execution took %u cycles.\n", timer); diff --git a/sw/spatzBenchmarks/hp-fmatmul/main.c b/sw/spatzBenchmarks/hp-fmatmul/main.c index 60d3e8c9..891b931a 100644 --- a/sw/spatzBenchmarks/hp-fmatmul/main.c +++ b/sw/spatzBenchmarks/hp-fmatmul/main.c @@ -147,7 +147,7 @@ int main() { if (cid == 0) { long unsigned int performance = 1000 * 2 * gemm_l.M * gemm_l.N * gemm_l.K / timer; - long unsigned int utilization = performance / (2 * num_cores * 16); + long unsigned int utilization = performance / (2 * num_cores * SNRT_NFPU_PER_CORE * 4); printf("\n----- (%dx%d) hp fmatmul -----\n", gemm_l.M, gemm_l.N); printf("The execution took %u cycles.\n", timer); diff --git a/sw/spatzBenchmarks/sdotp-bp-fmatmul/main.c b/sw/spatzBenchmarks/sdotp-bp-fmatmul/main.c index e92a7c12..fda65614 100644 --- a/sw/spatzBenchmarks/sdotp-bp-fmatmul/main.c +++ b/sw/spatzBenchmarks/sdotp-bp-fmatmul/main.c @@ -127,7 +127,7 @@ int main() { if (cid == 0) { long unsigned int performance = 1000 * 2 * gemm_l.M * gemm_l.N * gemm_l.K / timer; - long unsigned int utilization = performance / (2 * num_cores * 32); + long unsigned int utilization = performance / (2 * num_cores * SNRT_NFPU_PER_CORE * 8); printf("\n----- (%dx%d) sdotp bp fmatmul -----\n", gemm_l.M, gemm_l.N); printf("The execution took %u cycles.\n", timer); diff --git a/sw/spatzBenchmarks/sdotp-hp-fmatmul/main.c b/sw/spatzBenchmarks/sdotp-hp-fmatmul/main.c index d7794785..210210b6 100644 --- a/sw/spatzBenchmarks/sdotp-hp-fmatmul/main.c +++ b/sw/spatzBenchmarks/sdotp-hp-fmatmul/main.c @@ -151,7 +151,7 @@ int main() { if (cid == 0) { long unsigned int performance = 1000 * 2 * gemm_l.M * gemm_l.N * gemm_l.K / timer; - long unsigned int utilization = performance / (2 * num_cores * 16); + long unsigned int utilization = performance / (2 * num_cores * SNRT_NFPU_PER_CORE * 4); printf("\n----- (%dx%d) sdotp hp fmatmul -----\n", gemm_l.M, gemm_l.N); printf("The execution took %u cycles.\n", timer); diff --git a/sw/spatzBenchmarks/sp-fft/main.c b/sw/spatzBenchmarks/sp-fft/main.c index eaf5fc2b..8f340aed 100644 --- a/sw/spatzBenchmarks/sp-fft/main.c +++ b/sw/spatzBenchmarks/sp-fft/main.c @@ -114,7 +114,7 @@ int main() { if (cid == 0) { long unsigned int performance = 1000 * 10 * NFFT * log2_nfft * 6 / 5 / timer; - long unsigned int utilization = performance / (2 * num_cores * 8); + long unsigned int utilization = performance / (2 * num_cores * SNRT_NFPU_PER_CORE * 2); printf("\n----- fft on %d samples -----\n", NFFT); printf("The execution took %u cycles.\n", timer); diff --git a/sw/spatzBenchmarks/sp-fmatmul/main.c b/sw/spatzBenchmarks/sp-fmatmul/main.c index de215721..a172685c 100644 --- a/sw/spatzBenchmarks/sp-fmatmul/main.c +++ b/sw/spatzBenchmarks/sp-fmatmul/main.c @@ -131,7 +131,7 @@ int main() { if (cid == 0) { long unsigned int performance = 1000 * 2 * gemm_l.M * gemm_l.N * gemm_l.K / timer; - long unsigned int utilization = performance / (2 * num_cores * 8); + long unsigned int utilization = performance / (2 * num_cores * SNRT_NFPU_PER_CORE * 2); printf("\n----- (%dx%d) sp fmatmul -----\n", gemm_l.M, gemm_l.N); printf("The execution took %u cycles.\n", timer); diff --git a/sw/spatzBenchmarks/widening-bp-fmatmul/main.c b/sw/spatzBenchmarks/widening-bp-fmatmul/main.c index f61dcf3a..554a5ede 100644 --- a/sw/spatzBenchmarks/widening-bp-fmatmul/main.c +++ b/sw/spatzBenchmarks/widening-bp-fmatmul/main.c @@ -115,7 +115,7 @@ int main() { if (cid == 0) { long unsigned int performance = 1000 * 2 * gemm_l.M * gemm_l.N * gemm_l.K / timer; - long unsigned int utilization = performance / (2 * num_cores * 16); + long unsigned int utilization = performance / (2 * num_cores * SNRT_NFPU_PER_CORE * 4); printf("\n----- (%dx%d) widening bp fmatmul -----\n", gemm_l.M, gemm_l.N); printf("The execution took %u cycles.\n", timer); diff --git a/sw/spatzBenchmarks/widening-hp-fmatmul/main.c b/sw/spatzBenchmarks/widening-hp-fmatmul/main.c index 5397a63d..3298f910 100644 --- a/sw/spatzBenchmarks/widening-hp-fmatmul/main.c +++ b/sw/spatzBenchmarks/widening-hp-fmatmul/main.c @@ -139,7 +139,7 @@ int main() { if (cid == 0) { long unsigned int performance = 1000 * 2 * gemm_l.M * gemm_l.N * gemm_l.K / timer; - long unsigned int utilization = performance / (2 * num_cores * 8); + long unsigned int utilization = performance / (2 * num_cores * SNRT_NFPU_PER_CORE * 2); printf("\n----- (%dx%d) widening hp fmatmul -----\n", gemm_l.M, gemm_l.N); printf("The execution took %u cycles.\n", timer); From 611e8116bb6c4ee33815cbde5c2a739cf2f4648c Mon Sep 17 00:00:00 2001 From: Matheus Cavalcante Date: Mon, 4 Dec 2023 19:12:55 -0800 Subject: [PATCH 08/17] sw: Clean up the runtime --- sw/snRuntime/CMakeLists.txt | 15 +- .../src/platforms/shared/start_snitch.S | 21 --- sw/snRuntime/src/{start.S => start.S.in} | 139 +++++++----------- 3 files changed, 67 insertions(+), 108 deletions(-) rename sw/snRuntime/src/{start.S => start.S.in} (76%) diff --git a/sw/snRuntime/CMakeLists.txt b/sw/snRuntime/CMakeLists.txt index 6940d2da..167e8355 100644 --- a/sw/snRuntime/CMakeLists.txt +++ b/sw/snRuntime/CMakeLists.txt @@ -30,7 +30,13 @@ set(PLATFORM_SOURCE_FOLDER "src/platforms/standalone" CACHE STRING "Path to the # Default memory regions set(MEM_DRAM_ORIGIN "0x80000000" CACHE STRING "Base address of external memory") set(MEM_DRAM_SIZE "0x80000000" CACHE STRING "Size of external memory") +set(SNRT_BASE_HARTID "0" CACHE STRING "Base hart id of this cluster") +set(SNRT_CLUSTER_CORE_NUM "0" CACHE STRING "Number of cores in this cluster") +set(SNRT_TCDM_START_ADDR "0" CACHE STRING "Start address of the TCDM region") +set(SNRT_TCDM_SIZE "0" CACHE STRING "Length of the TCDM region") +set(SNRT_CLUSTER_OFFSET "0" CACHE STRING "Address offset of this cluster's TCDM region") configure_file(${CMAKE_CURRENT_SOURCE_DIR}/link/common.ld.in common.ld @ONLY) +configure_file(${CMAKE_CURRENT_SOURCE_DIR}/src/start.S.in start.S @ONLY) set(LINKER_SCRIPT ${CMAKE_CURRENT_BINARY_DIR}/common.ld CACHE PATH "") # provide linker script @@ -40,6 +46,7 @@ message(STATUS "Using common linker script: ${LINKER_SCRIPT}") # OpenMP set(OMPSTATIC_NUMTHREADS "0" CACHE STRING "If set to a non-zero value the OpenMP runtime is optimized to the number of cores") + if(RUNTIME_TRACE) # Enable runtime tracing add_compile_definitions(__SNRT_USE_TRACE) @@ -92,20 +99,20 @@ if (CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR) # Build all runtimes # Generic Runtime (requiring bootloader/OS) - add_snitch_library(snRuntime src/start.S ${sources}) + add_snitch_library(snRuntime ${CMAKE_CURRENT_BINARY_DIR}/start.S ${sources}) # Bare Runtimes (with startup code) - add_snitch_library(snRuntime-cluster src/platforms/shared/start.c ${standalone_snitch_sources} ${sources}) + add_snitch_library(snRuntime-cluster ${CMAKE_CURRENT_BINARY_DIR}/start.S src/platforms/shared/start.c ${standalone_snitch_sources} ${sources}) else() # snRuntime is added externally, only build required runtime if(SNITCH_RUNTIME STREQUAL "snRuntime") # Generic Runtime (requiring bootloader/OS) - add_snitch_library(snRuntime src/platforms/shared/start.c src/start.S ${sources}) + add_snitch_library(snRuntime ${CMAKE_CURRENT_BINARY_DIR}/start.S src/platforms/shared/start.c ${sources}) # Bare Runtimes (with startup code) elseif(SNITCH_RUNTIME STREQUAL "snRuntime-cluster") - add_snitch_library(snRuntime-cluster src/platforms/shared/start.c ${standalone_snitch_sources} ${sources}) + add_snitch_library(snRuntime-cluster ${CMAKE_CURRENT_BINARY_DIR}/start.S src/platforms/shared/start.c ${standalone_snitch_sources} ${sources}) else() message(FATAL_ERROR "Requested runtime not implemented: ${SNITCH_RUNTIME}") endif() diff --git a/sw/snRuntime/src/platforms/shared/start_snitch.S b/sw/snRuntime/src/platforms/shared/start_snitch.S index d48ab060..09244300 100644 --- a/sw/snRuntime/src/platforms/shared/start_snitch.S +++ b/sw/snRuntime/src/platforms/shared/start_snitch.S @@ -2,30 +2,9 @@ # Licensed under the Apache License, Version 2.0, see LICENSE for details. # SPDX-License-Identifier: Apache-2.0 -#include "../../start.S" - # Function to determine the initial cluster layout to setup the initial stack. # See `hw/system/snitch_cluster/test/tb_lib.hh` for details of the bootdata # layout. -# -# Expecting: -# - a0: mhartid -# - a1: device tree ptr ("bootdata" in `tb_lib.hh`) -_snrt_init_core_info: - .globl _snrt_init_core_info - mv a4, a1 - lw a1, 8(a4) # load the number of cores per cluster - lw t0, 16(a4) # load cluster's hartid offset - lw a2, 24(a4) # start address of the TCDM - lw t1, 32(a4) # size of TCDM address space - lw t2, 40(a4) # offset between cluster TCDMs - sub a0, a0, t0 - div t3, a0, a1 # calculate cluster index - mul t4, t3, t2 # calculate cluster TCDM offset - add a2, a2, t4 # start address of current cluster TCDM - add a3, a2, t1 # end address of current cluster TCDM - remu a0, a0, a1 # calculate cluster-local core id - ret # Must leave a0 untouched. _snrt_cluster_barrier: diff --git a/sw/snRuntime/src/start.S b/sw/snRuntime/src/start.S.in similarity index 76% rename from sw/snRuntime/src/start.S rename to sw/snRuntime/src/start.S.in index 1574452e..5d9d14ee 100644 --- a/sw/snRuntime/src/start.S +++ b/sw/snRuntime/src/start.S.in @@ -5,7 +5,6 @@ # This file defines the `_start` symbol and initializes the global pointer. This # is common among all bare runtimes. - # Expected from bootloader: # - a0: mhartid # - a1: device tree ptr @@ -13,51 +12,12 @@ _start: .globl _start - # Initialize global pointer -snrt.crt0.init_global_pointer: - .option push - .option norelax -1: auipc gp, %pcrel_hi(__global_pointer$) - addi gp, gp, %pcrel_lo(1b) - .option pop - -snrt.crt0.init_core_info: - # Information passed into the call: - # - a0: mhartid (from bootloader) - # - a1: device tree ptr (from bootloader) - call _snrt_init_core_info - # Information needed after the call: - # - a0: cluster-local core id - # - a1: cluster-local core count - # - a2: TCDM start ptr - # - a3: TCDM end ptr - # - a4: device tree ptr - - # Clear the bss segment. -snrt.crt0.init_bss: - # Takes device tree ptr as argument and returns the hart base ID - mv s0, a0 # store a0 in s0 since we dont have a stack yet - mv a0, a4 - call _snrt_get_base_hartid - mv t0, a0 - mv a0, s0 - # only core 0. t0: return value base hartid - csrr t1, mhartid - sub t0, t1, t0 - bnez t0, 2f - la t0, _edata - la t1, _end - bge t0, t1, 2f -1: sw zero, 0(t0) - addi t0, t0, 4 - blt t0, t1, 1b -2: - -snrt.crt0.init_registers: - # Clear FP registers +snrt.crt0.init_fp_registers: + # Check if core has FP registers otherwise skip csrr t0, misa andi t0, t0, (1 << 3) | (1 << 5) # D/F - single/double precision float extension beqz t0, 3f + # Clear FP registers fcvt.d.w f0, zero fcvt.d.w f1, zero fcvt.d.w f2, zero @@ -92,18 +52,47 @@ snrt.crt0.init_registers: fcvt.d.w f31, zero 3: - # Read hartid and number of cores in the cluster. -snrt.crt0.init_stack: +snrt.crt0.init_global_pointer: + # Initialize global pointer + .option push + .option norelax +1: auipc gp, %pcrel_hi(__global_pointer$) + addi gp, gp, %pcrel_lo(1b) + .option pop + +snrt.crt0.init_core_info: + # Initialize information needed by the following routines - # Set 8 bytes aside for the main return value and clear that to zero. - addi a3, a3, -8 - sw zero, 0(a3) + # Bootdata + mv a4, a1 + + # Calculate cluster idx + csrr a0, mhartid + li t0, @SNRT_BASE_HARTID@ + sub a0, a0, t0 + li a1, @SNRT_CLUSTER_CORE_NUM@ + div t0, a0, a1 + + # Calculate cluster-local core ID + remu a0, a0, a1 + + # Calculate cluster's TCDM start address + li a2, @SNRT_TCDM_START_ADDR@ + li t1, @SNRT_CLUSTER_OFFSET@ + mul t0, t1, t0 + add a2, a2, t0 + + # Calculate cluster's TCDM end address + li t0, @SNRT_TCDM_SIZE@ + add a3, a2, t0 # Set space aside for the root team structure. lw t0, _snrt_team_size sub a3, a3, t0 mv a5, a3 + # Read hartid and number of cores in the cluster. +snrt.crt0.init_stack: # Compute the stack top for this hart, and the cluster-wide stack bottom. lw t2, snrt_stack_size # load stack size (overridable by binary) @@ -121,37 +110,26 @@ snrt.crt0.init_stack: # Allocate thread-local storage at the top of the stack and set tp # to the base of the tls snrt.crt0.init_tls: - la t0, __tdata_start - la t1, __tdata_end - la t2, __tbss_start - la t3, __tbss_end - add sp, sp, t0 # subtract sp to make room for TLS and align - sub sp, sp, t1 - add sp, sp, t2 - sub sp, sp, t3 - andi sp, sp, ~0x7 - mv tp, sp # palce tp at start of TLS - andi sp, sp, ~0x7 # align stack to 8B + la t0, __tdata_end + la t1, __tdata_start + sub t0, t0, t1 + sub sp, sp, t0 + la t0, __tbss_end + la t1, __tbss_start + sub t0, t0, t1 + sub sp, sp, t0 - # Copy __tdata_start to __tdata_end. - mv t4, tp - bge t0, t1, 2f -1: lw t5, 0(t0) - sw t5, 0(t4) - addi t0, t0, 4 - addi t4, t4, 4 - blt t0, t1, 1b -2: + # Place tp at top of TLS + mv tp, sp - # Clear from _tdata_end to _tbss_end. - la t0, __tbss_start - la t1, __tbss_end - bge t0, t1, 2f -1: sw zero, 0(t4) - addi t0, t0, 4 - addi t4, t4, 4 - blt t0, t2, 1b -2: + # Align stack to 8B + addi sp, sp, -4 + andi sp, sp, ~0x7 + + # Prepare interrupts +snrt.crt0.init_interrupt: + la t0, __snrt_crt0_interrupt_handler + csrw mtvec, t0 # Initialize the root team. snrt.crt0.init_team: @@ -169,11 +147,6 @@ snrt.crt0.init_team: lw a4, 16(sp) addi sp, sp, 20 - # Prepare interrupts -snrt.crt0.init_interrupt: - la t0, __snrt_crt0_interrupt_handler - csrw mtvec, t0 - # Synchronize cores. snrt.crt0.pre_barrier: call _snrt_cluster_barrier @@ -258,7 +231,7 @@ __snrt_crt0_interrupt_handler: fsd f30, 8(sp) fsd f31, 0(sp) # state is stored, can now handle the interrupt -1: +1: call __snrt_isr # restore fp context csrr t0, misa From 88739de817c3a4a6fcb7def4b300fcb6575e8175 Mon Sep 17 00:00:00 2001 From: Matheus Cavalcante Date: Mon, 4 Dec 2023 19:17:05 -0800 Subject: [PATCH 09/17] cfg: Propagate defines to the compilation flow --- hw/system/spatz_cluster/Makefile | 18 +- hw/system/spatz_cluster/cfg/carfield.hjson | 156 +++++++++--------- .../cfg/spatz_cluster.default.hjson | 150 ++++++++--------- 3 files changed, 166 insertions(+), 158 deletions(-) diff --git a/hw/system/spatz_cluster/Makefile b/hw/system/spatz_cluster/Makefile index d9f5bd8f..ed35ac6d 100644 --- a/hw/system/spatz_cluster/Makefile +++ b/hw/system/spatz_cluster/Makefile @@ -11,7 +11,15 @@ MKFILE_PATH := $(abspath $(lastword $(MAKEFILE_LIST))) MKFILE_DIR := $(dir $(MKFILE_PATH)) # Configuration file -SPATZ_CLUSTER_CFG ?= $(SPATZ_CLUSTER_DIR)/cfg/spatz_cluster.default.hjson +SPATZ_CLUSTER_CFG ?= $(SPATZ_CLUSTER_DIR)/cfg/spatz_cluster.default.hjson +SPATZ_CLUSTER_CFG_DEFINES += -DMEM_DRAM_ORIGIN=$(shell python3 -c "import jstyleson; f = open('$(SPATZ_CLUSTER_CFG)'); print(jstyleson.load(f)['dram']['address'])") +SPATZ_CLUSTER_CFG_DEFINES += -DMEM_DRAM_SIZE=$(shell python3 -c "import jstyleson; f = open('$(SPATZ_CLUSTER_CFG)'); print(jstyleson.load(f)['dram']['length'])") +SPATZ_CLUSTER_CFG_DEFINES += -DSNRT_BASE_HARTID=$(shell python3 -c "import jstyleson; f = open('$(SPATZ_CLUSTER_CFG)'); print(jstyleson.load(f)['cluster']['cluster_base_hartid'])") +SPATZ_CLUSTER_CFG_DEFINES += -DSNRT_CLUSTER_CORE_NUM=$(shell python3 -c "import jstyleson; f = open('$(SPATZ_CLUSTER_CFG)'); print(len(jstyleson.load(f)['cluster']['cores']))") +SPATZ_CLUSTER_CFG_DEFINES += -DSNRT_TCDM_START_ADDR=$(shell python3 -c "import jstyleson; f = open('$(SPATZ_CLUSTER_CFG)'); print(jstyleson.load(f)['cluster']['cluster_base_addr'])") +SPATZ_CLUSTER_CFG_DEFINES += -DSNRT_CLUSTER_OFFSET=$(shell python3 -c "import jstyleson; f = open('$(SPATZ_CLUSTER_CFG)'); print(jstyleson.load(f)['cluster']['cluster_base_offset'])") +SPATZ_CLUSTER_CFG_DEFINES += -DSNRT_TCDM_SIZE=$(shell python3 -c "import jstyleson; f = open('$(SPATZ_CLUSTER_CFG)'); print(jstyleson.load(f)['cluster']['tcdm']['size'] * 1024)") +SPATZ_CLUSTER_CFG_DEFINES += -DSNRT_NFPU_PER_CORE=$(shell python3 -c "import jstyleson; f = open('$(SPATZ_CLUSTER_CFG)'); print(jstyleson.load(f)['cluster']['n_fpu'])") # Include Makefrag include $(ROOT)/util/Makefrag @@ -167,13 +175,13 @@ lint/tmp/files: ${BENDER} ## Build SW into sw/build with the LLVM toolchain sw: clean.sw mkdir -p sw/build - cd sw/build && ${CMAKE} -DLLVM_PATH=${LLVM_INSTALL_DIR} -DGCC_PATH=${GCC_INSTALL_DIR} -DPYTHON=${PYTHON} .. && make + cd sw/build && ${CMAKE} -DLLVM_PATH=${LLVM_INSTALL_DIR} -DGCC_PATH=${GCC_INSTALL_DIR} -DPYTHON=${PYTHON} ${SPATZ_CLUSTER_CFG_DEFINES} .. && make # VSIM ## Build SW into sw/build with the LLVM toolchain (including tests) for Questasim simulator sw.vsim: clean.sw bin/spatz_cluster.vsim mkdir -p sw/build - cd sw/build && ${CMAKE} -DLLVM_PATH=${LLVM_INSTALL_DIR} -DGCC_PATH=${GCC_INSTALL_DIR} -DPYTHON=${PYTHON} -DSNITCH_SIMULATOR=../../../../../hw/system/spatz_cluster/bin/spatz_cluster.vsim -DBUILD_TESTS=ON .. && make -j8 + cd sw/build && ${CMAKE} -DLLVM_PATH=${LLVM_INSTALL_DIR} -DGCC_PATH=${GCC_INSTALL_DIR} -DPYTHON=${PYTHON} -DSNITCH_SIMULATOR=../../../../../hw/system/spatz_cluster/bin/spatz_cluster.vsim -DBUILD_TESTS=ON ${SPATZ_CLUSTER_CFG_DEFINES} .. && make -j8 ## Build SW and run all tests with Questasim simulator sw.test.vsim: sw.vsim @@ -183,7 +191,7 @@ sw.test.vsim: sw.vsim ## Build SW into sw/build with the LLVM toolchain (including tests) for VCS simulator sw.vcs: clean.sw bin/spatz_cluster.vcs mkdir -p sw/build - cd sw/build && ${CMAKE} -DLLVM_PATH=${LLVM_INSTALL_DIR} -DGCC_PATH=${GCC_INSTALL_DIR} -DPYTHON=${PYTHON} -DSNITCH_SIMULATOR=../../../../../hw/system/spatz_cluster/bin/spatz_cluster.vcs -DBUILD_TESTS=ON .. && make -j8 + cd sw/build && ${CMAKE} -DLLVM_PATH=${LLVM_INSTALL_DIR} -DGCC_PATH=${GCC_INSTALL_DIR} -DPYTHON=${PYTHON} -DSNITCH_SIMULATOR=../../../../../hw/system/spatz_cluster/bin/spatz_cluster.vcs -DBUILD_TESTS=ON ${SPATZ_CLUSTER_CFG_DEFINES} .. && make -j8 ## Build SW and run all tests with VCS simulator sw.test.vcs: sw.vcs @@ -193,7 +201,7 @@ sw.test.vcs: sw.vcs ## Build SW into sw/build with the LLVM toolchain (including tests) for Verilator simulator sw.vlt: clean.sw bin/spatz_cluster.vlt mkdir -p sw/build - cd sw/build && ${CMAKE} -DLLVM_PATH=${LLVM_INSTALL_DIR} -DGCC_PATH=${GCC_INSTALL_DIR} -DPYTHON=${PYTHON} -DSNITCH_SIMULATOR=../../../../../hw/system/spatz_cluster/bin/spatz_cluster.vlt -DBUILD_TESTS=ON .. && make -j8 + cd sw/build && ${CMAKE} -DLLVM_PATH=${LLVM_INSTALL_DIR} -DGCC_PATH=${GCC_INSTALL_DIR} -DPYTHON=${PYTHON} -DSNITCH_SIMULATOR=../../../../../hw/system/spatz_cluster/bin/spatz_cluster.vlt -DBUILD_TESTS=ON ${SPATZ_CLUSTER_CFG_DEFINES} .. && make -j8 ## Build SW and run all tests with Verilator simulator sw.test.vlt: sw.vlt diff --git a/hw/system/spatz_cluster/cfg/carfield.hjson b/hw/system/spatz_cluster/cfg/carfield.hjson index 8faca6dc..c25c9c9d 100644 --- a/hw/system/spatz_cluster/cfg/carfield.hjson +++ b/hw/system/spatz_cluster/cfg/carfield.hjson @@ -4,96 +4,96 @@ // Cluster configuration for a simple system. { - cluster: { - boot_addr: 4096, // 0x1000 - cluster_base_addr: 1358954496, // 0x51000000 - cluster_base_offset: 0, // 0x0 - cluster_base_hartid: 16, - addr_width: 48, - data_width: 64, - id_width_in: 6, - id_width_out: 2, - user_width: 10, - cluster_default_axi_user: 7, - axi_cdc_enable: true, - sw_rst_enable: true, - axi_isolate_enable: true, - tcdm: { - size: 128, - banks: 16, + "cluster": { + "boot_addr": 4096, // 0x1000 + "cluster_base_addr": 1358954496, // 0x51000000 + "cluster_base_offset": 0, // 0x0 + "cluster_base_hartid": 16, + "addr_width": 48, + "data_width": 64, + "id_width_in": 6, + "id_width_out": 2, + "user_width": 10, + "cluster_default_axi_user": 7, + "axi_cdc_enable": true, + "sw_rst_enable": true, + "axi_isolate_enable": true, + "tcdm": { + "size": 128, + "banks": 16 }, - cluster_periph_size: 64, // kB - dma_data_width: 64, - dma_axi_req_fifo_depth: 3, - dma_req_fifo_depth: 3, + "cluster_periph_size": 64, // kB + "dma_data_width": 64, + "dma_axi_req_fifo_depth": 3, + "dma_req_fifo_depth": 3, // Spatz parameters - vlen: 512, - n_fpu: 4, - n_ipu: 1, - spatz_fpu: true, + "vlen": 512, + "n_fpu": 4, + "n_ipu": 1, + "spatz_fpu": true, // Timing parameters - timing: { - lat_comp_fp32: 2, - lat_comp_fp64: 4, - lat_comp_fp16: 1, - lat_comp_fp16_alt: 1, - lat_comp_fp8: 0, - lat_comp_fp8_alt: 0, - lat_noncomp: 1, - lat_conv: 2, - lat_sdotp: 4, - fpu_pipe_config: "BEFORE" - xbar_latency: "CUT_ALL_PORTS", + "timing": { + "lat_comp_fp32": 2, + "lat_comp_fp64": 4, + "lat_comp_fp16": 1, + "lat_comp_fp16_alt": 1, + "lat_comp_fp8": 0, + "lat_comp_fp8_alt": 0, + "lat_noncomp": 1, + "lat_conv": 2, + "lat_sdotp": 4, + "fpu_pipe_config": "BEFORE", + "xbar_latency": "CUT_ALL_PORTS", - register_tcdm_cuts: true, - register_core_req: true, - register_core_rsp: true, - register_offload_rsp: true + "register_tcdm_cuts": true, + "register_core_req": true, + "register_core_rsp": true, + "register_offload_rsp": true }, - cores: [ - { $ref: "#/dma_core_template" }, - { $ref: "#/compute_core_template" }, + "cores": [ + // Compute core + { + "isa": "rv32imafd", + "xf16": true, + "xf8": true, + "xfdotp": true, + "xdma": false, + "num_int_outstanding_loads": 1, + "num_int_outstanding_mem": 4, + "num_spatz_outstanding_loads": 4, + "num_dtlb_entries": 1, + "num_itlb_entries": 1 + }, + + // DMA core + { + "isa": "rv32imafd", + "xdma": true, + "xf16": true, + "xf8": true, + "xfdotp": true, + "num_int_outstanding_loads": 1, + "num_int_outstanding_mem": 4, + "num_spatz_outstanding_loads": 4, + "num_dtlb_entries": 1, + "num_itlb_entries": 1 + } ], - icache: { - size: 4, // total instruction cache size in kByte - sets: 2, // number of ways - cacheline: 128 // word size in bits + "icache": { + "size": 4, // total instruction cache size in kByte + "sets": 2, // number of ways + "cacheline": 128 // word size in bits } - } + }, - dram: { + "dram": { // 0x8000_0000 - address: 2147483648, + "address": 2147483648, // 0x8000_0000 - length: 2147483648 - }, - peripherals: { + "length": 2147483648 }, - // Templates. + "peripherals": { - compute_core_template: { - isa: "rv32imafd", - xf16: true, - xf8: true, - xfdotp: true, - xdma: false, - num_int_outstanding_loads: 1, - num_int_outstanding_mem: 4, - num_spatz_outstanding_loads: 4, - num_dtlb_entries: 1, - num_itlb_entries: 1 - }, - dma_core_template: { - isa: "rv32imafd", - xdma: true - xf16: true, - xf8: true, - xfdotp: true, - num_int_outstanding_loads: 1, - num_int_outstanding_mem: 4, - num_spatz_outstanding_loads: 4, - num_dtlb_entries: 1, - num_itlb_entries: 1 } } diff --git a/hw/system/spatz_cluster/cfg/spatz_cluster.default.hjson b/hw/system/spatz_cluster/cfg/spatz_cluster.default.hjson index c333c577..77e4e36e 100644 --- a/hw/system/spatz_cluster/cfg/spatz_cluster.default.hjson +++ b/hw/system/spatz_cluster/cfg/spatz_cluster.default.hjson @@ -4,93 +4,93 @@ // Cluster configuration for a simple system. { - cluster: { - boot_addr: 4096, // 0x1000 - cluster_base_addr: 1048576, // 0x100000 - cluster_base_offset: 0, // 0x0 - cluster_base_hartid: 0, - addr_width: 32, - data_width: 64, - id_width_in: 2, - id_width_out: 4, - user_width: 2, - cluster_default_axi_user: 1, - axi_cdc_enable: false, - tcdm: { - size: 128, - banks: 16, + "cluster": { + "boot_addr": 4096, // 0x1000 + "cluster_base_addr": 1048576, // 0x100000 + "cluster_base_offset": 0, // 0x0 + "cluster_base_hartid": 0, + "addr_width": 32, + "data_width": 64, + "id_width_in": 2, + "id_width_out": 4, + "user_width": 2, + "cluster_default_axi_user": 1, + "axi_cdc_enable": false, + "tcdm": { + "size": 128, + "banks": 16 }, - cluster_periph_size: 64, // kB - dma_data_width: 512, - dma_axi_req_fifo_depth: 3, - dma_req_fifo_depth: 3, + "cluster_periph_size": 64, // kB + "dma_data_width": 512, + "dma_axi_req_fifo_depth": 3, + "dma_req_fifo_depth": 3, // Spatz parameters - vlen: 512, - n_fpu: 4, - n_ipu: 1, - spatz_fpu: true, + "vlen": 512, + "n_fpu": 4, + "n_ipu": 1, + "spatz_fpu": true, // Timing parameters - timing: { - lat_comp_fp32: 1, - lat_comp_fp64: 2, - lat_comp_fp16: 0, - lat_comp_fp16_alt: 0, - lat_comp_fp8: 0, - lat_comp_fp8_alt: 0, - lat_noncomp: 1, - lat_conv: 2, - lat_sdotp: 2, - fpu_pipe_config: "BEFORE" - xbar_latency: "CUT_ALL_PORTS", + "timing": { + "lat_comp_fp32": 1, + "lat_comp_fp64": 2, + "lat_comp_fp16": 0, + "lat_comp_fp16_alt": 0, + "lat_comp_fp8": 0, + "lat_comp_fp8_alt": 0, + "lat_noncomp": 1, + "lat_conv": 2, + "lat_sdotp": 2, + "fpu_pipe_config": "BEFORE", + "xbar_latency": "CUT_ALL_PORTS", - register_core_req: true, - register_core_rsp: true, - register_offload_rsp: true + "register_core_req": true, + "register_core_rsp": true, + "register_offload_rsp": true }, - cores: [ - { $ref: "#/dma_core_template" }, - { $ref: "#/compute_core_template" }, + "cores": [ + // Compute core + { + "isa": "rv32imafd", + "xf16": true, + "xf8": true, + "xfdotp": true, + "xdma": false, + "num_int_outstanding_loads": 1, + "num_int_outstanding_mem": 4, + "num_spatz_outstanding_loads": 4, + "num_dtlb_entries": 1, + "num_itlb_entries": 1 + }, + + // DMA core + { + "isa": "rv32imafd", + "xdma": true, + "xf16": true, + "xf8": true, + "xfdotp": true, + "num_int_outstanding_loads": 1, + "num_int_outstanding_mem": 4, + "num_spatz_outstanding_loads": 4, + "num_dtlb_entries": 1, + "num_itlb_entries": 1 + } ], - icache: { - size: 4, // total instruction cache size in kByte - sets: 2, // number of ways - cacheline: 256 // word size in bits + "icache": { + "size": 4, // total instruction cache size in kByte + "sets": 2, // number of ways + "cacheline": 256 // word size in bits } - } + }, - dram: { + "dram": { // 0x8000_0000 - address: 2147483648, + "address": 2147483648, // 0x8000_0000 - length: 2147483648 - }, - peripherals: { + "length": 2147483648 }, - // Templates. + "peripherals": { - compute_core_template: { - isa: "rv32imafd", - xf16: true, - xf8: true, - xfdotp: true, - xdma: false, - num_int_outstanding_loads: 1, - num_int_outstanding_mem: 4, - num_spatz_outstanding_loads: 4, - num_dtlb_entries: 1, - num_itlb_entries: 1 - }, - dma_core_template: { - isa: "rv32imafd", - xdma: true - xf16: true, - xf8: true, - xfdotp: true, - num_int_outstanding_loads: 1, - num_int_outstanding_mem: 4, - num_spatz_outstanding_loads: 4, - num_dtlb_entries: 1, - num_itlb_entries: 1 } } From 921484b5daabd4edd2a2273a42563645c9f4e2d4 Mon Sep 17 00:00:00 2001 From: Matheus Cavalcante Date: Mon, 4 Dec 2023 19:17:17 -0800 Subject: [PATCH 10/17] spatz: Fix mismatch in the VRF --- hw/ip/spatz/src/vregfile.sv | 2 -- 1 file changed, 2 deletions(-) diff --git a/hw/ip/spatz/src/vregfile.sv b/hw/ip/spatz/src/vregfile.sv index 559777b3..87d066c4 100644 --- a/hw/ip/spatz/src/vregfile.sv +++ b/hw/ip/spatz/src/vregfile.sv @@ -99,11 +99,9 @@ module vregfile import spatz_pkg::*; #( ); always_latch begin -`ifndef SYNTHESIS if (!rst_ni) mem[vreg][b] <= '0; else -`endif if (clk_latch) mem[vreg][b] <= wdata_q[b*8 +: 8]; end From 04849833fe1257f4f7b2ceb88a53ac97f28116a0 Mon Sep 17 00:00:00 2001 From: Matheus Cavalcante Date: Mon, 4 Dec 2023 19:22:21 -0800 Subject: [PATCH 11/17] hw: Clean up the bootrom --- hw/system/spatz_cluster/test/bootrom.S | 34 ++++++- hw/system/spatz_cluster/test/bootrom.bin | Bin 148 -> 268 bytes hw/system/spatz_cluster/test/bootrom.dump | 109 ++++++++++++++-------- hw/system/spatz_cluster/test/bootrom.elf | Bin 5184 -> 5304 bytes 4 files changed, 101 insertions(+), 42 deletions(-) diff --git a/hw/system/spatz_cluster/test/bootrom.S b/hw/system/spatz_cluster/test/bootrom.S index bf608fe3..6b9885e9 100644 --- a/hw/system/spatz_cluster/test/bootrom.S +++ b/hw/system/spatz_cluster/test/bootrom.S @@ -9,9 +9,41 @@ .global BOOTDATA _start: + // Clear integer registers + mv x1, x0 + mv x2, x0 + mv x3, x0 + mv x4, x0 + mv x5, x0 + mv x6, x0 + mv x7, x0 + mv x8, x0 + mv x9, x0 + mv x10, x0 + mv x11, x0 + mv x12, x0 + mv x13, x0 + mv x14, x0 + mv x15, x0 + mv x16, x0 + mv x17, x0 + mv x18, x0 + mv x19, x0 + mv x20, x0 + mv x21, x0 + mv x22, x0 + mv x23, x0 + mv x24, x0 + mv x25, x0 + mv x26, x0 + mv x27, x0 + mv x28, x0 + mv x29, x0 + mv x30, x0 + mv x31, x0 + la t1, exception csrw mtvec, t1 - csrr a0, mhartid la a1, BOOTDATA // Activate MEIP diff --git a/hw/system/spatz_cluster/test/bootrom.bin b/hw/system/spatz_cluster/test/bootrom.bin index 30047bccdbbee597681560616608f08c9dee388e..09e3c51ed977df41adbcb28a0594719c4562e97e 100755 GIT binary patch literal 268 zcmZY4yA8rH6h`5jIOhGZ^cSp=jsaKzi6JN{vOpv&6S!c2^fa_|48jU*0LKwDVCnOb zaxFPdW~Ra{K_!J8g(?bp3XLc7: - 1000: 00000317 auipc t1,0x0 - 1004: 04430313 addi t1,t1,68 # 1044 - 1008: 30531073 csrw mtvec,t1 - 100c: f1402573 csrr a0,mhartid - 1010: 00000597 auipc a1,0x0 - 1014: 04058593 addi a1,a1,64 # 1050 - 1018: 00001337 lui t1,0x1 - 101c: 80030313 addi t1,t1,-2048 # 800 <_start-0x800> - 1020: 30431073 csrw 0x304,t1 - 1024: 10500073 wfi - 1028: 0185a383 lw t2,24(a1) - 102c: 0205ae03 lw t3,32(a1) - 1030: 01c383b3 add t2,t2,t3 - 1034: 05838393 addi t2,t2,88 - 1038: 00038393 mv t2,t2 - 103c: 0003a383 lw t2,0(t2) - 1040: 00038067 jr t2 + 1000: 00000093 li ra,0 + 1004: 00000113 li sp,0 + 1008: 00000193 li gp,0 + 100c: 00000213 li tp,0 + 1010: 00000293 li t0,0 + 1014: 00000313 li t1,0 + 1018: 00000393 li t2,0 + 101c: 00000413 li s0,0 + 1020: 00000493 li s1,0 + 1024: 00000513 li a0,0 + 1028: 00000593 li a1,0 + 102c: 00000613 li a2,0 + 1030: 00000693 li a3,0 + 1034: 00000713 li a4,0 + 1038: 00000793 li a5,0 + 103c: 00000813 li a6,0 + 1040: 00000893 li a7,0 + 1044: 00000913 li s2,0 + 1048: 00000993 li s3,0 + 104c: 00000a13 li s4,0 + 1050: 00000a93 li s5,0 + 1054: 00000b13 li s6,0 + 1058: 00000b93 li s7,0 + 105c: 00000c13 li s8,0 + 1060: 00000c93 li s9,0 + 1064: 00000d13 li s10,0 + 1068: 00000d93 li s11,0 + 106c: 00000e13 li t3,0 + 1070: 00000e93 li t4,0 + 1074: 00000f13 li t5,0 + 1078: 00000f93 li t6,0 + 107c: 00000317 auipc t1,0x0 + 1080: 04030313 addi t1,t1,64 # 10bc + 1084: 30531073 csrw mtvec,t1 + 1088: 00000597 auipc a1,0x0 + 108c: 04058593 addi a1,a1,64 # 10c8 + 1090: 00001337 lui t1,0x1 + 1094: 80030313 addi t1,t1,-2048 # 800 <_start-0x800> + 1098: 30431073 csrw 0x304,t1 + 109c: 10500073 wfi + 10a0: 0185a383 lw t2,24(a1) + 10a4: 0205ae03 lw t3,32(a1) + 10a8: 01c383b3 add t2,t2,t3 + 10ac: 05838393 addi t2,t2,88 + 10b0: 00038393 mv t2,t2 + 10b4: 0003a383 lw t2,0(t2) + 10b8: 00038067 jr t2 -00001044 : - 1044: 10500073 wfi - 1048: ffdff06f j 1044 +000010bc : + 10bc: 10500073 wfi + 10c0: ffdff06f j 10bc Disassembly of section .rodata: -00001050 : - 1050: 1000 addi s0,sp,32 - 1052: 0000 unimp - 1054: 0000 unimp - 1056: 0000 unimp - 1058: 0001 nop +000010c8 : + 10c8: 1000 addi s0,sp,32 + 10ca: 0000 unimp + 10cc: 0000 unimp + 10ce: 0000 unimp + 10d0: 0002 0x2 ... - 106a: 0010 addi a2,sp,0 - 106c: 0000 unimp - 106e: 0000 unimp - 1070: 0000 unimp - 1072: 0002 0x2 + 10e2: 0010 addi a2,sp,0 + 10e4: 0000 unimp + 10e6: 0000 unimp + 10e8: 0000 unimp + 10ea: 0002 0x2 ... - 1080: 0000 unimp - 1082: 2000 fld fs0,0(s0) - 1084: 0000 unimp - 1086: 0000 unimp - 1088: 0000 unimp - 108a: 2004 fld fs1,0(s0) - 108c: 0000 unimp + 10f8: 0000 unimp + 10fa: 8000 0x8000 + ... + 1104: 0001 nop ... Disassembly of section .boot_section: -00001090 : - 1090: 1044 addi s1,sp,36 +00001108 : + 1108: 10bc addi a5,sp,104 ... Disassembly of section .Pulp_Chip.Info: @@ -71,7 +98,7 @@ Disassembly of section .Pulp_Chip.Info: 10: 495f 666e 006f 0x6f666e495f 16: 70696863 bltu s2,t1,726 <_start-0x8da> 1a: 6e3d lui t3,0xf - 1c: 20656e6f jal t3,56222 + 1c: 20656e6f jal t3,56222 20: 3d757063 bleu s7,a0,3e0 <_start-0xc20> 24: 6f6e flw ft10,216(sp) 26: 656e flw fa0,216(sp) diff --git a/hw/system/spatz_cluster/test/bootrom.elf b/hw/system/spatz_cluster/test/bootrom.elf index 887443af217f8f3404d1d9659b0b7d8b5bb005a3..dfa5d03305eeae97b6abc19ebfead869d70fd4f2 100755 GIT binary patch delta 349 zcmY+;u}T9$5P;#?yX3s+ne5&~3|bfxh}wyrjkQ<>>?{*V6$CN8B33pU=SfT<7AY(& z?iqXwTO0jPlfs3cj~$k~dE0y4e{4s6opw4&=0r00eEQP3HW5*-Mj9b$jHDGLts-fH zq$^08B54gtSCMoLNi!r}N76cyHjp-BsqwS8a^=qs%=;4;J%v_VSmFPY zdEx(N@#1wF>@XbX^Tk-i$u=(---ZiU*k|Y_qsj1cJg}|&%e~u%?o`W<6n3QDBn`iB i2il9WVE1qyYzC8HAKG(w=9;^$?_8w6+y%>Z%JT!m%`o=> delta 192 zcmdm?c|c==0@DqliAqXrQy3T+rZ8;u+Qu&+&dk6d%wU>;+@ zmS+VS5-@qAurk{OAPcDWmarPr6oJW1B5F*J1SV?% YsXqdfgGAIAc_vqis57=tUMM0C06sw~ZU6uP From cb18b5f538ac821a0044d851fdfe6d80149beb9b Mon Sep 17 00:00:00 2001 From: Matheus Cavalcante Date: Mon, 4 Dec 2023 19:24:33 -0800 Subject: [PATCH 12/17] hw: Flatten FPU count configuration --- hw/system/spatz_cluster/src/spatz_cluster_wrapper.sv.tpl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/hw/system/spatz_cluster/src/spatz_cluster_wrapper.sv.tpl b/hw/system/spatz_cluster/src/spatz_cluster_wrapper.sv.tpl index b7ab6460..fc403a30 100644 --- a/hw/system/spatz_cluster/src/spatz_cluster_wrapper.sv.tpl +++ b/hw/system/spatz_cluster/src/spatz_cluster_wrapper.sv.tpl @@ -311,8 +311,8 @@ module ${cfg['name']}_wrapper localparam int unsigned NumIntOutstandingLoads [NumCores] = '{${core_cfg('num_int_outstanding_loads')}}; localparam int unsigned NumIntOutstandingMem [NumCores] = '{${core_cfg('num_int_outstanding_mem')}}; localparam int unsigned NumSpatzOutstandingLoads [NumCores] = '{${core_cfg('num_spatz_outstanding_loads')}}; - localparam int unsigned NumSpatzFPUs [NumCores] = '{${cfg['n_fpu']}}; - localparam int unsigned NumSpatzIPUs [NumCores] = '{${cfg['n_ipu']}}; + localparam int unsigned NumSpatzFPUs [NumCores] = '{default: ${cfg['n_fpu']}}; + localparam int unsigned NumSpatzIPUs [NumCores] = '{default: ${cfg['n_ipu']}}; spatz_cluster_pkg::spatz_axi_iwc_out_req_t axi_from_cluster_iwc_req; spatz_cluster_pkg::spatz_axi_iwc_out_resp_t axi_from_cluster_iwc_resp; From 2df66993a45cc4c290a19381da5d2e08c817ab06 Mon Sep 17 00:00:00 2001 From: Matheus Cavalcante Date: Mon, 4 Dec 2023 19:31:42 -0800 Subject: [PATCH 13/17] ci: Update python dependencies --- .gitlab/.gitlab-ci.yml | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/.gitlab/.gitlab-ci.yml b/.gitlab/.gitlab-ci.yml index 5e7985f8..9b111c7c 100644 --- a/.gitlab/.gitlab-ci.yml +++ b/.gitlab/.gitlab-ci.yml @@ -107,6 +107,7 @@ snRuntime-test-vsim: - export PYTHON3=$(which python3.6) - ${PYTHON3} -m pip install --user jsonref - ${PYTHON3} -m pip install --user jsonschema + - ${PYTHON3} -m pip install --user jstyleson - ${PYTHON3} -m pip install --user dataclasses - ${PYTHON3} -m pip install --user mako # Compile the binaries @@ -134,6 +135,7 @@ snRuntime-test-vcs: - export PYTHON3=$(which python3.6) - ${PYTHON3} -m pip install --user jsonref - ${PYTHON3} -m pip install --user jsonschema + - ${PYTHON3} -m pip install --user jstyleson - ${PYTHON3} -m pip install --user dataclasses - ${PYTHON3} -m pip install --user mako # Compile the binaries @@ -162,6 +164,7 @@ snRuntime-test-vlt: - export PYTHON3=$(which python3.6) - ${PYTHON3} -m pip install --user jsonref - ${PYTHON3} -m pip install --user jsonschema + - ${PYTHON3} -m pip install --user jstyleson - ${PYTHON3} -m pip install --user dataclasses - ${PYTHON3} -m pip install --user mako # Compile the binaries @@ -191,6 +194,7 @@ riscvTests-test-vlt: - export PYTHON3=$(which python3.6) - ${PYTHON3} -m pip install --user jsonref - ${PYTHON3} -m pip install --user jsonschema + - ${PYTHON3} -m pip install --user jstyleson - ${PYTHON3} -m pip install --user dataclasses - ${PYTHON3} -m pip install --user mako # Compile the binaries @@ -220,6 +224,7 @@ spatzBenchmarks-test-vlt: - export PYTHON3=$(which python3.6) - ${PYTHON3} -m pip install --user jsonref - ${PYTHON3} -m pip install --user jsonschema + - ${PYTHON3} -m pip install --user jstyleson - ${PYTHON3} -m pip install --user dataclasses - ${PYTHON3} -m pip install --user mako # Compile the binaries From 8c78b41c3e85ab20eb1ec8565cf6c7a7b7abffea Mon Sep 17 00:00:00 2001 From: Matheus Cavalcante Date: Mon, 4 Dec 2023 20:09:06 -0800 Subject: [PATCH 14/17] sw: Initialize TLS --- sw/snRuntime/src/start.S.in | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/sw/snRuntime/src/start.S.in b/sw/snRuntime/src/start.S.in index 5d9d14ee..ddd58ec4 100644 --- a/sw/snRuntime/src/start.S.in +++ b/sw/snRuntime/src/start.S.in @@ -126,6 +126,30 @@ snrt.crt0.init_tls: addi sp, sp, -4 andi sp, sp, ~0x7 + # Copy __tdata_start to __tdata_end. + la t0, __tdata_start + la t1, __tdata_end + la t2, __tbss_start + la t3, __tbss_end + mv t4, tp + bge t0, t1, 2f +1: lw t5, 0(t0) + sw t5, 0(t4) + addi t0, t0, 4 + addi t4, t4, 4 + blt t0, t1, 1b +2: + + # Clear from _tdata_end to _tbss_end. + la t0, __tbss_start + la t1, __tbss_end + bge t0, t1, 2f +1: sw zero, 0(t4) + addi t0, t0, 4 + addi t4, t4, 4 + blt t0, t2, 1b +2: + # Prepare interrupts snrt.crt0.init_interrupt: la t0, __snrt_crt0_interrupt_handler From 80d6813a4faa285a6888bd7c315a98587434bdd0 Mon Sep 17 00:00:00 2001 From: Matheus Cavalcante Date: Mon, 4 Dec 2023 20:24:48 -0800 Subject: [PATCH 15/17] bender: Install bender with cargo --- Makefile | 10 +++++----- util/Makefrag | 2 +- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/Makefile b/Makefile index 5dc34c54..f89a3047 100644 --- a/Makefile +++ b/Makefile @@ -107,18 +107,18 @@ tc-riscv-isa-sim: sw/toolchain/riscv-isa-sim sw/toolchain/dtc bender: check-bender check-bender: - @if [ -x $(BENDER_INSTALL_DIR)/bender ]; then \ + @if [ -x $(BENDER_INSTALL_DIR)/bin/bender ]; then \ req="bender $(BENDER_VERSION)"; \ - current="$$($(BENDER_INSTALL_DIR)/bender --version)"; \ + current="$$($(BENDER_INSTALL_DIR)/bin/bender --version)"; \ if [ "$$(printf '%s\n' "$${req}" "$${current}" | sort -V | head -n1)" != "$${req}" ]; then \ rm -rf $(BENDER_INSTALL_DIR); \ fi \ fi - @$(MAKE) -C $(ROOT_DIR) $(BENDER_INSTALL_DIR)/bender + @$(MAKE) -C $(ROOT_DIR) $(BENDER_INSTALL_DIR)/bin/bender -$(BENDER_INSTALL_DIR)/bender: +$(BENDER_INSTALL_DIR)/bin/bender: mkdir -p $(BENDER_INSTALL_DIR) && cd $(BENDER_INSTALL_DIR) && \ - curl --proto '=https' --tlsv1.2 https://pulp-platform.github.io/bender/init -sSf | sh -s -- $(BENDER_VERSION) + cargo install bender --version $(BENDER_VERSION) --root $(BENDER_INSTALL_DIR) --locked ############### # Verilator # diff --git a/util/Makefrag b/util/Makefrag index 974e0007..cfb51dc9 100644 --- a/util/Makefrag +++ b/util/Makefrag @@ -19,7 +19,7 @@ BENDER_INSTALL_DIR ?= ${INSTALL_DIR}/bender VERILATOR_INSTALL_DIR ?= ${INSTALL_DIR}/verilator # Support for local override -BENDER ?= ${BENDER_INSTALL_DIR}/bender +BENDER ?= ${BENDER_INSTALL_DIR}/bin/bender DASM ?= ${SPIKE_INSTALL_DIR}/bin/spike-dasm VLT ?= ${VERILATOR_INSTALL_DIR}/bin/verilator_bin CMAKE ?= cmake-3.18.1 From f0754d5c82a2ba3d3448254b1f8335eb69fb8d2b Mon Sep 17 00:00:00 2001 From: Matheus Cavalcante Date: Mon, 4 Dec 2023 20:25:56 -0800 Subject: [PATCH 16/17] memora: Makefile changes do not trigger recompilation --- Memora.yml | 4 ---- 1 file changed, 4 deletions(-) diff --git a/Memora.yml b/Memora.yml index 8f52ee35..e8dd1c87 100644 --- a/Memora.yml +++ b/Memora.yml @@ -7,21 +7,18 @@ cache_root_dir: /usr/scratch2/larain1/matheusd/gitlabci/buildcache/spatz artifacts: tc-riscv-gcc: inputs: - - Makefile - sw/toolchain/riscv-gnu-toolchain.version outputs: - install/riscv-gcc tc-llvm: inputs: - - Makefile - sw/toolchain/llvm-project.version outputs: - install/llvm riscv-isa-sim: inputs: - - Makefile - sw/toolchain/riscv-isa-sim.version - sw/toolchain/riscv-opcodes.version outputs: @@ -29,7 +26,6 @@ artifacts: verilator: inputs: - - Makefile - sw/toolchain/verilator.version outputs: - install/verilator From a2092eddf1846d0c768e3cbabd7a0b15b7817608 Mon Sep 17 00:00:00 2001 From: Matheus Cavalcante Date: Tue, 5 Dec 2023 08:13:47 +0100 Subject: [PATCH 17/17] cfg: Switch DMA and compute cores --- hw/system/spatz_cluster/cfg/carfield.hjson | 8 ++++---- hw/system/spatz_cluster/cfg/spatz_cluster.default.hjson | 8 ++++---- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/hw/system/spatz_cluster/cfg/carfield.hjson b/hw/system/spatz_cluster/cfg/carfield.hjson index c25c9c9d..1ba2e62b 100644 --- a/hw/system/spatz_cluster/cfg/carfield.hjson +++ b/hw/system/spatz_cluster/cfg/carfield.hjson @@ -51,13 +51,13 @@ "register_offload_rsp": true }, "cores": [ - // Compute core + // DMA core { "isa": "rv32imafd", + "xdma": true, "xf16": true, "xf8": true, "xfdotp": true, - "xdma": false, "num_int_outstanding_loads": 1, "num_int_outstanding_mem": 4, "num_spatz_outstanding_loads": 4, @@ -65,13 +65,13 @@ "num_itlb_entries": 1 }, - // DMA core + // Compute core { "isa": "rv32imafd", - "xdma": true, "xf16": true, "xf8": true, "xfdotp": true, + "xdma": false, "num_int_outstanding_loads": 1, "num_int_outstanding_mem": 4, "num_spatz_outstanding_loads": 4, diff --git a/hw/system/spatz_cluster/cfg/spatz_cluster.default.hjson b/hw/system/spatz_cluster/cfg/spatz_cluster.default.hjson index 77e4e36e..6c820d37 100644 --- a/hw/system/spatz_cluster/cfg/spatz_cluster.default.hjson +++ b/hw/system/spatz_cluster/cfg/spatz_cluster.default.hjson @@ -48,13 +48,13 @@ "register_offload_rsp": true }, "cores": [ - // Compute core + // DMA core { "isa": "rv32imafd", + "xdma": true, "xf16": true, "xf8": true, "xfdotp": true, - "xdma": false, "num_int_outstanding_loads": 1, "num_int_outstanding_mem": 4, "num_spatz_outstanding_loads": 4, @@ -62,13 +62,13 @@ "num_itlb_entries": 1 }, - // DMA core + // Compute core { "isa": "rv32imafd", - "xdma": true, "xf16": true, "xf8": true, "xfdotp": true, + "xdma": false, "num_int_outstanding_loads": 1, "num_int_outstanding_mem": 4, "num_spatz_outstanding_loads": 4,