From 8a5898dce43de95321561194d39feb1cb0f07e50 Mon Sep 17 00:00:00 2001 From: Zbigniew Chamski <107464696+zchamski@users.noreply.github.com> Date: Fri, 9 Dec 2022 11:07:12 +0100 Subject: [PATCH] Vendorize CVA6 core submodules (common_cells, FPU with related sub-modules) (#1007) --- Bender.yml | 106 +- Flist.ariane | 76 +- Makefile | 36 +- core/Flist.cv32a60x | 64 +- core/Flist.cv32a6_ima_sv32_fpga | 60 +- core/Flist.cv32a6_imac_sv0 | 60 +- core/Flist.cv32a6_imac_sv32 | 60 +- core/Flist.cv32a6_imafc_sv32 | 60 +- core/Flist.cv64a6_imafdc_sv39 | 64 +- corev_apu/fpga/scripts/run.tcl | 14 +- corev_apu/tb/tb_cva6_icache/tb.list | 8 +- corev_apu/tb/tb_wb_dcache/tb.list | 20 +- corev_apu/tb/tb_wt_axi_dcache/tb.list | 20 +- corev_apu/tb/tb_wt_dcache/tb.list | 12 +- pd/synth/cva6_synth.tcl | 2 +- util/README.md | 5 + util/vendor.py | 782 ++++ vendor/pulp-platform/common_cells/.gitignore | 14 + .../pulp-platform/common_cells/CHANGELOG.md | 342 ++ vendor/pulp-platform/common_cells/LICENSE | 176 + vendor/pulp-platform/common_cells/README.md | 181 + .../include/common_cells/assertions.svh | 201 + .../include/common_cells/registers.svh | 221 ++ .../common_cells/src/addr_decode.sv | 161 + .../common_cells/src/binary_to_gray.sv | 22 + .../common_cells/src/cb_filter.sv | 246 ++ .../common_cells/src/cb_filter_pkg.sv | 26 + .../common_cells/src/cc_onehot.sv | 50 + .../common_cells/src/cdc_2phase.sv | 175 + .../common_cells/src/cdc_fifo_2phase.sv | 134 + .../common_cells/src/cdc_fifo_gray.sv | 269 ++ .../common_cells/src/cf_math_pkg.sv | 61 + .../pulp-platform/common_cells/src/clk_div.sv | 42 + .../pulp-platform/common_cells/src/counter.sv | 43 + .../common_cells/src/delta_counter.sv | 74 + .../src/deprecated/clock_divider.sv | 191 + .../src/deprecated/clock_divider_counter.sv | 211 + .../common_cells/src/deprecated/fifo_v1.sv | 57 + .../common_cells/src/deprecated/fifo_v2.sv | 79 + .../src/deprecated/find_first_one.sv | 83 + .../src/deprecated/generic_LFSR_8bit.sv | 64 + .../src/deprecated/generic_fifo.sv | 274 ++ .../src/deprecated/generic_fifo_adv.sv | 264 ++ .../src/deprecated/prioarbiter.sv | 89 + .../common_cells/src/deprecated/pulp_sync.sv | 36 + .../src/deprecated/pulp_sync_wedge.sv | 55 + .../common_cells/src/deprecated/rrarbiter.sv | 61 + .../common_cells/src/deprecated/sram.sv | 46 + .../common_cells/src/ecc_decode.sv | 128 + .../common_cells/src/ecc_encode.sv | 78 + .../pulp-platform/common_cells/src/ecc_pkg.sv | 31 + .../common_cells/src/edge_detect.sv | 32 + .../common_cells/src/edge_propagator.sv | 50 + .../common_cells/src/edge_propagator_rx.sv | 31 + .../common_cells/src/edge_propagator_tx.sv | 40 + .../common_cells/src/exp_backoff.sv | 98 + .../common_cells/src/fall_through_register.sv | 58 + .../pulp-platform/common_cells/src/fifo_v3.sv | 154 + .../common_cells/src/gray_to_binary.sv | 23 + .../common_cells/src/id_queue.sv | 419 ++ .../src/isochronous_4phase_handshake.sv | 81 + .../src/isochronous_spill_register.sv | 111 + vendor/pulp-platform/common_cells/src/lfsr.sv | 315 ++ .../common_cells/src/lfsr_16bit.sv | 68 + .../common_cells/src/lfsr_8bit.sv | 61 + vendor/pulp-platform/common_cells/src/lzc.sv | 112 + .../common_cells/src/max_counter.sv | 77 + .../common_cells/src/mv_filter.sv | 55 + .../common_cells/src/onehot_to_bin.sv | 38 + .../common_cells/src/plru_tree.sv | 120 + .../common_cells/src/popcount.sv | 60 + .../common_cells/src/rr_arb_tree.sv | 348 ++ .../pulp-platform/common_cells/src/rstgen.sv | 30 + .../common_cells/src/rstgen_bypass.sv | 57 + .../common_cells/src/serial_deglitch.sv | 50 + .../common_cells/src/shift_reg.sv | 53 + .../common_cells/src/spill_register.sv | 46 + .../src/spill_register_flushable.sv | 105 + .../common_cells/src/stream_arbiter.sv | 49 + .../src/stream_arbiter_flushable.sv | 82 + .../common_cells/src/stream_delay.sv | 132 + .../common_cells/src/stream_demux.sv | 36 + .../common_cells/src/stream_fifo.sv | 66 + .../common_cells/src/stream_filter.sv | 26 + .../common_cells/src/stream_fork.sv | 133 + .../common_cells/src/stream_fork_dynamic.sv | 95 + .../common_cells/src/stream_intf.sv | 49 + .../common_cells/src/stream_join.sv | 43 + .../common_cells/src/stream_mux.sv | 46 + .../common_cells/src/stream_omega_net.sv | 301 ++ .../common_cells/src/stream_register.sv | 57 + .../common_cells/src/stream_to_mem.sv | 134 + .../common_cells/src/stream_xbar.sv | 198 + .../common_cells/src/sub_per_hash.sv | 173 + vendor/pulp-platform/common_cells/src/sync.sv | 35 + .../common_cells/src/sync_wedge.sv | 56 + .../pulp-platform/common_cells/src/unread.sv | 21 + vendor/pulp-platform/fpnew/.gitignore | 3 + vendor/pulp-platform/fpnew/LICENSE | 176 + vendor/pulp-platform/fpnew/README.md | 147 + .../fpnew/src/common_cells/.gitignore | 7 + .../fpnew/src/common_cells/CHANGELOG.md | 210 + .../fpnew/src/common_cells/LICENSE | 176 + .../fpnew/src/common_cells/README.md | 117 + .../include/common_cells/registers.svh | 224 ++ .../fpnew/src/common_cells/src/cdc_2phase.sv | 175 + .../src/common_cells/src/cdc_fifo_2phase.sv | 134 + .../src/common_cells/src/cdc_fifo_gray.sv | 158 + .../fpnew/src/common_cells/src/cf_math_pkg.sv | 49 + .../fpnew/src/common_cells/src/clk_div.sv | 42 + .../fpnew/src/common_cells/src/counter.sv | 55 + .../src/deprecated/clock_divider.sv | 191 + .../src/deprecated/clock_divider_counter.sv | 211 + .../common_cells/src/deprecated/fifo_v1.sv | 57 + .../common_cells/src/deprecated/fifo_v2.sv | 79 + .../src/deprecated/find_first_one.sv | 83 + .../src/deprecated/generic_LFSR_8bit.sv | 64 + .../src/deprecated/generic_fifo.sv | 274 ++ .../src/deprecated/generic_fifo_adv.sv | 264 ++ .../src/deprecated/prioarbiter.sv | 89 + .../common_cells/src/deprecated/pulp_sync.sv | 36 + .../src/deprecated/pulp_sync_wedge.sv | 55 + .../common_cells/src/deprecated/rrarbiter.sv | 61 + .../fpnew/src/common_cells/src/edge_detect.sv | 32 + .../src/common_cells/src/edge_propagator.sv | 50 + .../common_cells/src/edge_propagator_rx.sv | 31 + .../common_cells/src/edge_propagator_tx.sv | 40 + .../fpnew/src/common_cells/src/exp_backoff.sv | 94 + .../common_cells/src/fall_through_register.sv | 58 + .../fpnew/src/common_cells/src/fifo_v3.sv | 153 + .../fpnew/src/common_cells/src/graycode.sv | 33 + .../fpnew/src/common_cells/src/id_queue.sv | 268 ++ .../fpnew/src/common_cells/src/lfsr.sv | 310 ++ .../fpnew/src/common_cells/src/lfsr_16bit.sv | 67 + .../fpnew/src/common_cells/src/lfsr_8bit.sv | 68 + .../fpnew/src/common_cells/src/lzc.sv | 93 + .../fpnew/src/common_cells/src/mv_filter.sv | 55 + .../src/common_cells/src/onehot_to_bin.sv | 39 + .../fpnew/src/common_cells/src/plru_tree.sv | 120 + .../fpnew/src/common_cells/src/popcount.sv | 57 + .../fpnew/src/common_cells/src/rr_arb_tree.sv | 244 ++ .../fpnew/src/common_cells/src/rstgen.sv | 30 + .../src/common_cells/src/rstgen_bypass.sv | 54 + .../src/common_cells/src/serial_deglitch.sv | 50 + .../fpnew/src/common_cells/src/shift_reg.sv | 53 + .../src/common_cells/src/spill_register.sv | 89 + .../fpnew/src/common_cells/src/sram.sv | 46 + .../src/common_cells/src/stream_arbiter.sv | 49 + .../src/stream_arbiter_flushable.sv | 80 + .../src/common_cells/src/stream_delay.sv | 132 + .../src/common_cells/src/stream_demux.sv | 37 + .../src/common_cells/src/stream_filter.sv | 26 + .../fpnew/src/common_cells/src/stream_fork.sv | 133 + .../fpnew/src/common_cells/src/stream_mux.sv | 46 + .../src/common_cells/src/stream_register.sv | 57 + .../fpnew/src/common_cells/src/sync.sv | 34 + .../fpnew/src/common_cells/src/sync_wedge.sv | 56 + .../fpnew/src/common_cells/src/unread.sv | 21 + .../fpnew/src/fpnew_cast_multi.sv | 760 ++++ .../fpnew/src/fpnew_classifier.sv | 72 + .../fpnew/src/fpnew_divsqrt_multi.sv | 340 ++ vendor/pulp-platform/fpnew/src/fpnew_fma.sv | 673 ++++ .../fpnew/src/fpnew_fma_multi.sv | 820 ++++ .../pulp-platform/fpnew/src/fpnew_noncomp.sv | 404 ++ .../fpnew/src/fpnew_opgroup_block.sv | 230 ++ .../fpnew/src/fpnew_opgroup_fmt_slice.sv | 276 ++ .../fpnew/src/fpnew_opgroup_multifmt_slice.sv | 414 ++ vendor/pulp-platform/fpnew/src/fpnew_pkg.sv | 484 +++ .../pulp-platform/fpnew/src/fpnew_rounding.sv | 72 + vendor/pulp-platform/fpnew/src/fpnew_top.sv | 172 + .../fpnew/src/fpu_div_sqrt_mvp/LICENSE | 176 + .../fpnew/src/fpu_div_sqrt_mvp/hdl/.gitignore | 2 + .../src/fpu_div_sqrt_mvp/hdl/control_mvp.sv | 3413 +++++++++++++++++ .../fpu_div_sqrt_mvp/hdl/defs_div_sqrt_mvp.sv | 83 + .../hdl/div_sqrt_mvp_wrapper.sv | 232 ++ .../fpu_div_sqrt_mvp/hdl/div_sqrt_top_mvp.sv | 180 + .../hdl/iteration_div_sqrt_mvp.sv | 61 + .../fpu_div_sqrt_mvp/hdl/norm_div_sqrt_mvp.sv | 484 +++ .../src/fpu_div_sqrt_mvp/hdl/nrbd_nrsc_mvp.sv | 104 + .../fpu_div_sqrt_mvp/hdl/preprocess_mvp.sv | 425 ++ vendor/pulp-platform_common_cells.lock.hjson | 14 + .../pulp-platform_common_cells.vendor.hjson | 40 + .../pulp-platform_common_cells_fpu.lock.hjson | 14 + ...ulp-platform_common_cells_fpu.vendor.hjson | 41 + vendor/pulp-platform_fpnew.lock.hjson | 14 + vendor/pulp-platform_fpnew.vendor.hjson | 34 + .../pulp-platform_fpu_div_sqrt_mvp.lock.hjson | 14 + ...ulp-platform_fpu_div_sqrt_mvp.vendor.hjson | 32 + 188 files changed, 25749 insertions(+), 331 deletions(-) create mode 100644 util/README.md create mode 100644 util/vendor.py create mode 100644 vendor/pulp-platform/common_cells/.gitignore create mode 100644 vendor/pulp-platform/common_cells/CHANGELOG.md create mode 100644 vendor/pulp-platform/common_cells/LICENSE create mode 100644 vendor/pulp-platform/common_cells/README.md create mode 100644 vendor/pulp-platform/common_cells/include/common_cells/assertions.svh create mode 100644 vendor/pulp-platform/common_cells/include/common_cells/registers.svh create mode 100644 vendor/pulp-platform/common_cells/src/addr_decode.sv create mode 100644 vendor/pulp-platform/common_cells/src/binary_to_gray.sv create mode 100644 vendor/pulp-platform/common_cells/src/cb_filter.sv create mode 100644 vendor/pulp-platform/common_cells/src/cb_filter_pkg.sv create mode 100644 vendor/pulp-platform/common_cells/src/cc_onehot.sv create mode 100644 vendor/pulp-platform/common_cells/src/cdc_2phase.sv create mode 100644 vendor/pulp-platform/common_cells/src/cdc_fifo_2phase.sv create mode 100644 vendor/pulp-platform/common_cells/src/cdc_fifo_gray.sv create mode 100644 vendor/pulp-platform/common_cells/src/cf_math_pkg.sv create mode 100644 vendor/pulp-platform/common_cells/src/clk_div.sv create mode 100644 vendor/pulp-platform/common_cells/src/counter.sv create mode 100644 vendor/pulp-platform/common_cells/src/delta_counter.sv create mode 100644 vendor/pulp-platform/common_cells/src/deprecated/clock_divider.sv create mode 100644 vendor/pulp-platform/common_cells/src/deprecated/clock_divider_counter.sv create mode 100644 vendor/pulp-platform/common_cells/src/deprecated/fifo_v1.sv create mode 100644 vendor/pulp-platform/common_cells/src/deprecated/fifo_v2.sv create mode 100644 vendor/pulp-platform/common_cells/src/deprecated/find_first_one.sv create mode 100644 vendor/pulp-platform/common_cells/src/deprecated/generic_LFSR_8bit.sv create mode 100644 vendor/pulp-platform/common_cells/src/deprecated/generic_fifo.sv create mode 100644 vendor/pulp-platform/common_cells/src/deprecated/generic_fifo_adv.sv create mode 100644 vendor/pulp-platform/common_cells/src/deprecated/prioarbiter.sv create mode 100644 vendor/pulp-platform/common_cells/src/deprecated/pulp_sync.sv create mode 100644 vendor/pulp-platform/common_cells/src/deprecated/pulp_sync_wedge.sv create mode 100644 vendor/pulp-platform/common_cells/src/deprecated/rrarbiter.sv create mode 100644 vendor/pulp-platform/common_cells/src/deprecated/sram.sv create mode 100644 vendor/pulp-platform/common_cells/src/ecc_decode.sv create mode 100644 vendor/pulp-platform/common_cells/src/ecc_encode.sv create mode 100644 vendor/pulp-platform/common_cells/src/ecc_pkg.sv create mode 100644 vendor/pulp-platform/common_cells/src/edge_detect.sv create mode 100644 vendor/pulp-platform/common_cells/src/edge_propagator.sv create mode 100644 vendor/pulp-platform/common_cells/src/edge_propagator_rx.sv create mode 100644 vendor/pulp-platform/common_cells/src/edge_propagator_tx.sv create mode 100644 vendor/pulp-platform/common_cells/src/exp_backoff.sv create mode 100644 vendor/pulp-platform/common_cells/src/fall_through_register.sv create mode 100644 vendor/pulp-platform/common_cells/src/fifo_v3.sv create mode 100644 vendor/pulp-platform/common_cells/src/gray_to_binary.sv create mode 100644 vendor/pulp-platform/common_cells/src/id_queue.sv create mode 100644 vendor/pulp-platform/common_cells/src/isochronous_4phase_handshake.sv create mode 100644 vendor/pulp-platform/common_cells/src/isochronous_spill_register.sv create mode 100644 vendor/pulp-platform/common_cells/src/lfsr.sv create mode 100644 vendor/pulp-platform/common_cells/src/lfsr_16bit.sv create mode 100644 vendor/pulp-platform/common_cells/src/lfsr_8bit.sv create mode 100644 vendor/pulp-platform/common_cells/src/lzc.sv create mode 100644 vendor/pulp-platform/common_cells/src/max_counter.sv create mode 100644 vendor/pulp-platform/common_cells/src/mv_filter.sv create mode 100644 vendor/pulp-platform/common_cells/src/onehot_to_bin.sv create mode 100644 vendor/pulp-platform/common_cells/src/plru_tree.sv create mode 100644 vendor/pulp-platform/common_cells/src/popcount.sv create mode 100644 vendor/pulp-platform/common_cells/src/rr_arb_tree.sv create mode 100644 vendor/pulp-platform/common_cells/src/rstgen.sv create mode 100644 vendor/pulp-platform/common_cells/src/rstgen_bypass.sv create mode 100644 vendor/pulp-platform/common_cells/src/serial_deglitch.sv create mode 100644 vendor/pulp-platform/common_cells/src/shift_reg.sv create mode 100644 vendor/pulp-platform/common_cells/src/spill_register.sv create mode 100644 vendor/pulp-platform/common_cells/src/spill_register_flushable.sv create mode 100644 vendor/pulp-platform/common_cells/src/stream_arbiter.sv create mode 100644 vendor/pulp-platform/common_cells/src/stream_arbiter_flushable.sv create mode 100644 vendor/pulp-platform/common_cells/src/stream_delay.sv create mode 100644 vendor/pulp-platform/common_cells/src/stream_demux.sv create mode 100644 vendor/pulp-platform/common_cells/src/stream_fifo.sv create mode 100644 vendor/pulp-platform/common_cells/src/stream_filter.sv create mode 100644 vendor/pulp-platform/common_cells/src/stream_fork.sv create mode 100644 vendor/pulp-platform/common_cells/src/stream_fork_dynamic.sv create mode 100644 vendor/pulp-platform/common_cells/src/stream_intf.sv create mode 100644 vendor/pulp-platform/common_cells/src/stream_join.sv create mode 100644 vendor/pulp-platform/common_cells/src/stream_mux.sv create mode 100644 vendor/pulp-platform/common_cells/src/stream_omega_net.sv create mode 100644 vendor/pulp-platform/common_cells/src/stream_register.sv create mode 100644 vendor/pulp-platform/common_cells/src/stream_to_mem.sv create mode 100644 vendor/pulp-platform/common_cells/src/stream_xbar.sv create mode 100644 vendor/pulp-platform/common_cells/src/sub_per_hash.sv create mode 100644 vendor/pulp-platform/common_cells/src/sync.sv create mode 100644 vendor/pulp-platform/common_cells/src/sync_wedge.sv create mode 100644 vendor/pulp-platform/common_cells/src/unread.sv create mode 100644 vendor/pulp-platform/fpnew/.gitignore create mode 100644 vendor/pulp-platform/fpnew/LICENSE create mode 100644 vendor/pulp-platform/fpnew/README.md create mode 100644 vendor/pulp-platform/fpnew/src/common_cells/.gitignore create mode 100644 vendor/pulp-platform/fpnew/src/common_cells/CHANGELOG.md create mode 100644 vendor/pulp-platform/fpnew/src/common_cells/LICENSE create mode 100644 vendor/pulp-platform/fpnew/src/common_cells/README.md create mode 100644 vendor/pulp-platform/fpnew/src/common_cells/include/common_cells/registers.svh create mode 100644 vendor/pulp-platform/fpnew/src/common_cells/src/cdc_2phase.sv create mode 100644 vendor/pulp-platform/fpnew/src/common_cells/src/cdc_fifo_2phase.sv create mode 100644 vendor/pulp-platform/fpnew/src/common_cells/src/cdc_fifo_gray.sv create mode 100644 vendor/pulp-platform/fpnew/src/common_cells/src/cf_math_pkg.sv create mode 100644 vendor/pulp-platform/fpnew/src/common_cells/src/clk_div.sv create mode 100644 vendor/pulp-platform/fpnew/src/common_cells/src/counter.sv create mode 100644 vendor/pulp-platform/fpnew/src/common_cells/src/deprecated/clock_divider.sv create mode 100644 vendor/pulp-platform/fpnew/src/common_cells/src/deprecated/clock_divider_counter.sv create mode 100644 vendor/pulp-platform/fpnew/src/common_cells/src/deprecated/fifo_v1.sv create mode 100644 vendor/pulp-platform/fpnew/src/common_cells/src/deprecated/fifo_v2.sv create mode 100644 vendor/pulp-platform/fpnew/src/common_cells/src/deprecated/find_first_one.sv create mode 100644 vendor/pulp-platform/fpnew/src/common_cells/src/deprecated/generic_LFSR_8bit.sv create mode 100644 vendor/pulp-platform/fpnew/src/common_cells/src/deprecated/generic_fifo.sv create mode 100644 vendor/pulp-platform/fpnew/src/common_cells/src/deprecated/generic_fifo_adv.sv create mode 100644 vendor/pulp-platform/fpnew/src/common_cells/src/deprecated/prioarbiter.sv create mode 100644 vendor/pulp-platform/fpnew/src/common_cells/src/deprecated/pulp_sync.sv create mode 100644 vendor/pulp-platform/fpnew/src/common_cells/src/deprecated/pulp_sync_wedge.sv create mode 100644 vendor/pulp-platform/fpnew/src/common_cells/src/deprecated/rrarbiter.sv create mode 100644 vendor/pulp-platform/fpnew/src/common_cells/src/edge_detect.sv create mode 100644 vendor/pulp-platform/fpnew/src/common_cells/src/edge_propagator.sv create mode 100644 vendor/pulp-platform/fpnew/src/common_cells/src/edge_propagator_rx.sv create mode 100644 vendor/pulp-platform/fpnew/src/common_cells/src/edge_propagator_tx.sv create mode 100644 vendor/pulp-platform/fpnew/src/common_cells/src/exp_backoff.sv create mode 100644 vendor/pulp-platform/fpnew/src/common_cells/src/fall_through_register.sv create mode 100644 vendor/pulp-platform/fpnew/src/common_cells/src/fifo_v3.sv create mode 100644 vendor/pulp-platform/fpnew/src/common_cells/src/graycode.sv create mode 100644 vendor/pulp-platform/fpnew/src/common_cells/src/id_queue.sv create mode 100644 vendor/pulp-platform/fpnew/src/common_cells/src/lfsr.sv create mode 100644 vendor/pulp-platform/fpnew/src/common_cells/src/lfsr_16bit.sv create mode 100644 vendor/pulp-platform/fpnew/src/common_cells/src/lfsr_8bit.sv create mode 100644 vendor/pulp-platform/fpnew/src/common_cells/src/lzc.sv create mode 100644 vendor/pulp-platform/fpnew/src/common_cells/src/mv_filter.sv create mode 100644 vendor/pulp-platform/fpnew/src/common_cells/src/onehot_to_bin.sv create mode 100644 vendor/pulp-platform/fpnew/src/common_cells/src/plru_tree.sv create mode 100644 vendor/pulp-platform/fpnew/src/common_cells/src/popcount.sv create mode 100644 vendor/pulp-platform/fpnew/src/common_cells/src/rr_arb_tree.sv create mode 100644 vendor/pulp-platform/fpnew/src/common_cells/src/rstgen.sv create mode 100644 vendor/pulp-platform/fpnew/src/common_cells/src/rstgen_bypass.sv create mode 100644 vendor/pulp-platform/fpnew/src/common_cells/src/serial_deglitch.sv create mode 100644 vendor/pulp-platform/fpnew/src/common_cells/src/shift_reg.sv create mode 100644 vendor/pulp-platform/fpnew/src/common_cells/src/spill_register.sv create mode 100644 vendor/pulp-platform/fpnew/src/common_cells/src/sram.sv create mode 100644 vendor/pulp-platform/fpnew/src/common_cells/src/stream_arbiter.sv create mode 100644 vendor/pulp-platform/fpnew/src/common_cells/src/stream_arbiter_flushable.sv create mode 100644 vendor/pulp-platform/fpnew/src/common_cells/src/stream_delay.sv create mode 100644 vendor/pulp-platform/fpnew/src/common_cells/src/stream_demux.sv create mode 100644 vendor/pulp-platform/fpnew/src/common_cells/src/stream_filter.sv create mode 100644 vendor/pulp-platform/fpnew/src/common_cells/src/stream_fork.sv create mode 100644 vendor/pulp-platform/fpnew/src/common_cells/src/stream_mux.sv create mode 100644 vendor/pulp-platform/fpnew/src/common_cells/src/stream_register.sv create mode 100644 vendor/pulp-platform/fpnew/src/common_cells/src/sync.sv create mode 100644 vendor/pulp-platform/fpnew/src/common_cells/src/sync_wedge.sv create mode 100644 vendor/pulp-platform/fpnew/src/common_cells/src/unread.sv create mode 100644 vendor/pulp-platform/fpnew/src/fpnew_cast_multi.sv create mode 100644 vendor/pulp-platform/fpnew/src/fpnew_classifier.sv create mode 100644 vendor/pulp-platform/fpnew/src/fpnew_divsqrt_multi.sv create mode 100644 vendor/pulp-platform/fpnew/src/fpnew_fma.sv create mode 100644 vendor/pulp-platform/fpnew/src/fpnew_fma_multi.sv create mode 100644 vendor/pulp-platform/fpnew/src/fpnew_noncomp.sv create mode 100644 vendor/pulp-platform/fpnew/src/fpnew_opgroup_block.sv create mode 100644 vendor/pulp-platform/fpnew/src/fpnew_opgroup_fmt_slice.sv create mode 100644 vendor/pulp-platform/fpnew/src/fpnew_opgroup_multifmt_slice.sv create mode 100644 vendor/pulp-platform/fpnew/src/fpnew_pkg.sv create mode 100644 vendor/pulp-platform/fpnew/src/fpnew_rounding.sv create mode 100644 vendor/pulp-platform/fpnew/src/fpnew_top.sv create mode 100644 vendor/pulp-platform/fpnew/src/fpu_div_sqrt_mvp/LICENSE create mode 100644 vendor/pulp-platform/fpnew/src/fpu_div_sqrt_mvp/hdl/.gitignore create mode 100644 vendor/pulp-platform/fpnew/src/fpu_div_sqrt_mvp/hdl/control_mvp.sv create mode 100644 vendor/pulp-platform/fpnew/src/fpu_div_sqrt_mvp/hdl/defs_div_sqrt_mvp.sv create mode 100644 vendor/pulp-platform/fpnew/src/fpu_div_sqrt_mvp/hdl/div_sqrt_mvp_wrapper.sv create mode 100644 vendor/pulp-platform/fpnew/src/fpu_div_sqrt_mvp/hdl/div_sqrt_top_mvp.sv create mode 100644 vendor/pulp-platform/fpnew/src/fpu_div_sqrt_mvp/hdl/iteration_div_sqrt_mvp.sv create mode 100644 vendor/pulp-platform/fpnew/src/fpu_div_sqrt_mvp/hdl/norm_div_sqrt_mvp.sv create mode 100644 vendor/pulp-platform/fpnew/src/fpu_div_sqrt_mvp/hdl/nrbd_nrsc_mvp.sv create mode 100644 vendor/pulp-platform/fpnew/src/fpu_div_sqrt_mvp/hdl/preprocess_mvp.sv create mode 100644 vendor/pulp-platform_common_cells.lock.hjson create mode 100644 vendor/pulp-platform_common_cells.vendor.hjson create mode 100644 vendor/pulp-platform_common_cells_fpu.lock.hjson create mode 100644 vendor/pulp-platform_common_cells_fpu.vendor.hjson create mode 100644 vendor/pulp-platform_fpnew.lock.hjson create mode 100644 vendor/pulp-platform_fpnew.vendor.hjson create mode 100644 vendor/pulp-platform_fpu_div_sqrt_mvp.lock.hjson create mode 100644 vendor/pulp-platform_fpu_div_sqrt_mvp.vendor.hjson diff --git a/Bender.yml b/Bender.yml index f61dfb1970..a3e30c312e 100644 --- a/Bender.yml +++ b/Bender.yml @@ -8,7 +8,7 @@ package: # WT_DCACHE export_include_dirs: - - common/submodules/common_cells/include/ + - vendor/pulp-platform/common_cells/include/ - corev_apu/axi/include/ sources: @@ -28,8 +28,8 @@ sources: - corev_apu/tb/ariane_axi_soc_pkg.sv - core/include/ariane_axi_pkg.sv - core/include/std_cache_pkg.sv - - core/fpu/src/fpnew_pkg.sv - - core/fpu/src/fpu_div_sqrt_mvp/hdl/defs_div_sqrt_mvp.sv + - vendor/pulp-platform/fpnew/src/fpnew_pkg.sv + - vendor/pulp-platform/fpnew/src/fpu_div_sqrt_mvp/hdl/defs_div_sqrt_mvp.sv # Stand-alone source files - core/ariane.sv - core/serdiv.sv @@ -68,24 +68,24 @@ sources: - core/issue_read_operands.sv - core/pmp/src/pmp_entry.sv - core/pmp/src/pmp.sv - - core/fpu/src/fpnew_fma.sv - - core/fpu/src/fpnew_opgroup_fmt_slice.sv - - core/fpu/src/fpnew_divsqrt_multi.sv - - core/fpu/src/fpnew_fma_multi.sv - - core/fpu/src/fpnew_opgroup_multifmt_slice.sv - - core/fpu/src/fpnew_classifier.sv - - core/fpu/src/fpnew_noncomp.sv - - core/fpu/src/fpnew_cast_multi.sv - - core/fpu/src/fpnew_opgroup_block.sv - - core/fpu/src/fpnew_rounding.sv - - core/fpu/src/fpnew_top.sv - - core/fpu/src/fpu_div_sqrt_mvp/hdl/iteration_div_sqrt_mvp.sv - - core/fpu/src/fpu_div_sqrt_mvp/hdl/nrbd_nrsc_mvp.sv - - core/fpu/src/fpu_div_sqrt_mvp/hdl/div_sqrt_top_mvp.sv - - core/fpu/src/fpu_div_sqrt_mvp/hdl/preprocess_mvp.sv - - core/fpu/src/fpu_div_sqrt_mvp/hdl/control_mvp.sv - - core/fpu/src/fpu_div_sqrt_mvp/hdl/norm_div_sqrt_mvp.sv - - core/fpu/src/fpu_div_sqrt_mvp/hdl/div_sqrt_mvp_wrapper.sv + - vendor/pulp-platform/fpnew/src/fpnew_fma.sv + - vendor/pulp-platform/fpnew/src/fpnew_opgroup_fmt_slice.sv + - vendor/pulp-platform/fpnew/src/fpnew_divsqrt_multi.sv + - vendor/pulp-platform/fpnew/src/fpnew_fma_multi.sv + - vendor/pulp-platform/fpnew/src/fpnew_opgroup_multifmt_slice.sv + - vendor/pulp-platform/fpnew/src/fpnew_classifier.sv + - vendor/pulp-platform/fpnew/src/fpnew_noncomp.sv + - vendor/pulp-platform/fpnew/src/fpnew_cast_multi.sv + - vendor/pulp-platform/fpnew/src/fpnew_opgroup_block.sv + - vendor/pulp-platform/fpnew/src/fpnew_rounding.sv + - vendor/pulp-platform/fpnew/src/fpnew_top.sv + - vendor/pulp-platform/fpnew/src/fpu_div_sqrt_mvp/hdl/iteration_div_sqrt_mvp.sv + - vendor/pulp-platform/fpnew/src/fpu_div_sqrt_mvp/hdl/nrbd_nrsc_mvp.sv + - vendor/pulp-platform/fpnew/src/fpu_div_sqrt_mvp/hdl/div_sqrt_top_mvp.sv + - vendor/pulp-platform/fpnew/src/fpu_div_sqrt_mvp/hdl/preprocess_mvp.sv + - vendor/pulp-platform/fpnew/src/fpu_div_sqrt_mvp/hdl/control_mvp.sv + - vendor/pulp-platform/fpnew/src/fpu_div_sqrt_mvp/hdl/norm_div_sqrt_mvp.sv + - vendor/pulp-platform/fpnew/src/fpu_div_sqrt_mvp/hdl/div_sqrt_mvp_wrapper.sv - core/frontend/frontend.sv - core/frontend/instr_scan.sv - core/frontend/instr_queue.sv @@ -143,16 +143,16 @@ sources: - corev_apu/riscv-dbg/debug_rom/debug_rom.sv - corev_apu/register_interface/src/apb_to_reg.sv - corev_apu/axi/src/axi_multicut.sv - - common/submodules/common_cells/src/cf_math_pkg.sv - - common/submodules/common_cells/src/deprecated/generic_fifo.sv - - common/submodules/common_cells/src/deprecated/pulp_sync.sv - - common/submodules/common_cells/src/deprecated/find_first_one.sv - - common/submodules/common_cells/src/rstgen_bypass.sv - - common/submodules/common_cells/src/rstgen.sv - - common/submodules/common_cells/src/stream_mux.sv - - common/submodules/common_cells/src/stream_demux.sv - - common/submodules/common_cells/src/stream_arbiter.sv - - common/submodules/common_cells/src/stream_arbiter_flushable.sv + - vendor/pulp-platform/common_cells/src/cf_math_pkg.sv + - vendor/pulp-platform/common_cells/src/deprecated/generic_fifo.sv + - vendor/pulp-platform/common_cells/src/deprecated/pulp_sync.sv + - vendor/pulp-platform/common_cells/src/deprecated/find_first_one.sv + - vendor/pulp-platform/common_cells/src/rstgen_bypass.sv + - vendor/pulp-platform/common_cells/src/rstgen.sv + - vendor/pulp-platform/common_cells/src/stream_mux.sv + - vendor/pulp-platform/common_cells/src/stream_demux.sv + - vendor/pulp-platform/common_cells/src/stream_arbiter.sv + - vendor/pulp-platform/common_cells/src/stream_arbiter_flushable.sv - corev_apu/axi/src/axi_cut.sv - corev_apu/axi/src/axi_join.sv - corev_apu/axi/src/axi_delayer.sv @@ -164,28 +164,28 @@ sources: - corev_apu/axi/src/axi_demux.sv - corev_apu/axi/src/axi_xbar.sv - common/local/techlib/fpga/rtl/SyncSpRamBeNx64.sv - - common/submodules/common_cells/src/sync.sv - - common/submodules/common_cells/src/popcount.sv - - common/submodules/common_cells/src/unread.sv - - common/submodules/common_cells/src/cdc_2phase.sv - - common/submodules/common_cells/src/spill_register_flushable.sv - - common/submodules/common_cells/src/spill_register.sv - - common/submodules/common_cells/src/edge_detect.sv - - common/submodules/common_cells/src/fifo_v3.sv - - common/submodules/common_cells/src/deprecated/fifo_v2.sv - - common/submodules/common_cells/src/deprecated/fifo_v1.sv - - common/submodules/common_cells/src/lzc.sv - - common/submodules/common_cells/src/rr_arb_tree.sv - - common/submodules/common_cells/src/deprecated/rrarbiter.sv - - common/submodules/common_cells/src/stream_delay.sv - - common/submodules/common_cells/src/lfsr.sv - - common/submodules/common_cells/src/lfsr_8bit.sv - - common/submodules/common_cells/src/lfsr_16bit.sv - - common/submodules/common_cells/src/counter.sv - - common/submodules/common_cells/src/shift_reg.sv - - common/submodules/common_cells/src/exp_backoff.sv - - common/submodules/common_cells/src/addr_decode.sv - - common/submodules/common_cells/src/stream_register.sv + - vendor/pulp-platform/common_cells/src/sync.sv + - vendor/pulp-platform/common_cells/src/popcount.sv + - vendor/pulp-platform/common_cells/src/unread.sv + - vendor/pulp-platform/common_cells/src/cdc_2phase.sv + - vendor/pulp-platform/common_cells/src/spill_register_flushable.sv + - vendor/pulp-platform/common_cells/src/spill_register.sv + - vendor/pulp-platform/common_cells/src/edge_detect.sv + - vendor/pulp-platform/common_cells/src/fifo_v3.sv + - vendor/pulp-platform/common_cells/src/deprecated/fifo_v2.sv + - vendor/pulp-platform/common_cells/src/deprecated/fifo_v1.sv + - vendor/pulp-platform/common_cells/src/lzc.sv + - vendor/pulp-platform/common_cells/src/rr_arb_tree.sv + - vendor/pulp-platform/common_cells/src/deprecated/rrarbiter.sv + - vendor/pulp-platform/common_cells/src/stream_delay.sv + - vendor/pulp-platform/common_cells/src/lfsr.sv + - vendor/pulp-platform/common_cells/src/lfsr_8bit.sv + - vendor/pulp-platform/common_cells/src/lfsr_16bit.sv + - vendor/pulp-platform/common_cells/src/counter.sv + - vendor/pulp-platform/common_cells/src/shift_reg.sv + - vendor/pulp-platform/common_cells/src/exp_backoff.sv + - vendor/pulp-platform/common_cells/src/addr_decode.sv + - vendor/pulp-platform/common_cells/src/stream_register.sv - corev_apu/src/tech_cells_generic/src/cluster_clock_inverter.sv - corev_apu/src/tech_cells_generic/src/pulp_clock_mux2.sv - target: not(cv32a6) diff --git a/Flist.ariane b/Flist.ariane index f7573966f4..3085215bc0 100644 --- a/Flist.ariane +++ b/Flist.ariane @@ -15,7 +15,7 @@ // Author: Michael Schaffner , ETH Zurich // Date: 15.08.2018 // Description: File list for OpenPiton flow -+incdir+common/submodules/common_cells/include/ ++incdir+vendor/pulp-platform/common_cells/include/ +incdir+common/local/util/ +incdir+corev_apu/register_interface/include/ @@ -28,27 +28,27 @@ corev_apu/axi/src/axi_pkg.sv core/include/ariane_axi_pkg.sv core/include/wt_cache_pkg.sv core/include/axi_intf.sv -core/fpu/src/fpnew_pkg.sv +vendor/pulp-platform/fpnew/src/fpnew_pkg.sv core/include/cvxif_pkg.sv -common/submodules/common_cells/src/cf_math_pkg.sv +vendor/pulp-platform/common_cells/src/cf_math_pkg.sv core/include/instr_tracer_pkg.sv core/cvxif_example/include/cvxif_instr_pkg.sv corev_apu/rv_plic/rtl/rv_plic_reg_pkg.sv common/local/util/sram.sv -common/submodules/common_cells/src/deprecated/rrarbiter.sv -common/submodules/common_cells/src/deprecated/fifo_v1.sv -common/submodules/common_cells/src/deprecated/fifo_v2.sv -common/submodules/common_cells/src/fifo_v3.sv -common/submodules/common_cells/src/shift_reg.sv -common/submodules/common_cells/src/lfsr_8bit.sv -common/submodules/common_cells/src/lfsr.sv -common/submodules/common_cells/src/lzc.sv -common/submodules/common_cells/src/exp_backoff.sv -common/submodules/common_cells/src/rr_arb_tree.sv -common/submodules/common_cells/src/rstgen_bypass.sv -common/submodules/common_cells/src/cdc_2phase.sv -common/submodules/common_cells/src/unread.sv -common/submodules/common_cells/src/popcount.sv +vendor/pulp-platform/common_cells/src/deprecated/rrarbiter.sv +vendor/pulp-platform/common_cells/src/deprecated/fifo_v1.sv +vendor/pulp-platform/common_cells/src/deprecated/fifo_v2.sv +vendor/pulp-platform/common_cells/src/fifo_v3.sv +vendor/pulp-platform/common_cells/src/shift_reg.sv +vendor/pulp-platform/common_cells/src/lfsr_8bit.sv +vendor/pulp-platform/common_cells/src/lfsr.sv +vendor/pulp-platform/common_cells/src/lzc.sv +vendor/pulp-platform/common_cells/src/exp_backoff.sv +vendor/pulp-platform/common_cells/src/rr_arb_tree.sv +vendor/pulp-platform/common_cells/src/rstgen_bypass.sv +vendor/pulp-platform/common_cells/src/cdc_2phase.sv +vendor/pulp-platform/common_cells/src/unread.sv +vendor/pulp-platform/common_cells/src/popcount.sv corev_apu/axi_mem_if/src/axi2mem.sv corev_apu/src/tech_cells_generic/src/deprecated/cluster_clk_cells.sv corev_apu/src/tech_cells_generic/src/deprecated/pulp_clk_cells.sv @@ -132,31 +132,31 @@ corev_apu/fpga/src/axi_slice/src/axi_r_buffer.sv corev_apu/fpga/src/axi_slice/src/axi_aw_buffer.sv corev_apu/register_interface/src/apb_to_reg.sv corev_apu/register_interface/src/reg_intf.sv -core/fpu/src/fpu_div_sqrt_mvp/hdl/defs_div_sqrt_mvp.sv -core/fpu/src/fpu_div_sqrt_mvp/hdl/control_mvp.sv -core/fpu/src/fpu_div_sqrt_mvp/hdl/div_sqrt_mvp_wrapper.sv -core/fpu/src/fpu_div_sqrt_mvp/hdl/div_sqrt_top_mvp.sv -core/fpu/src/fpu_div_sqrt_mvp/hdl/iteration_div_sqrt_mvp.sv -core/fpu/src/fpu_div_sqrt_mvp/hdl/norm_div_sqrt_mvp.sv -core/fpu/src/fpu_div_sqrt_mvp/hdl/nrbd_nrsc_mvp.sv -core/fpu/src/fpu_div_sqrt_mvp/hdl/preprocess_mvp.sv -core/fpu/src/fpnew_cast_multi.sv -core/fpu/src/fpnew_classifier.sv -core/fpu/src/fpnew_divsqrt_multi.sv -core/fpu/src/fpnew_fma_multi.sv -core/fpu/src/fpnew_fma.sv -core/fpu/src/fpnew_noncomp.sv -core/fpu/src/fpnew_opgroup_block.sv -core/fpu/src/fpnew_opgroup_fmt_slice.sv -core/fpu/src/fpnew_opgroup_multifmt_slice.sv -core/fpu/src/fpnew_rounding.sv -core/fpu/src/fpnew_top.sv +vendor/pulp-platform/fpnew/src/fpu_div_sqrt_mvp/hdl/defs_div_sqrt_mvp.sv +vendor/pulp-platform/fpnew/src/fpu_div_sqrt_mvp/hdl/control_mvp.sv +vendor/pulp-platform/fpnew/src/fpu_div_sqrt_mvp/hdl/div_sqrt_mvp_wrapper.sv +vendor/pulp-platform/fpnew/src/fpu_div_sqrt_mvp/hdl/div_sqrt_top_mvp.sv +vendor/pulp-platform/fpnew/src/fpu_div_sqrt_mvp/hdl/iteration_div_sqrt_mvp.sv +vendor/pulp-platform/fpnew/src/fpu_div_sqrt_mvp/hdl/norm_div_sqrt_mvp.sv +vendor/pulp-platform/fpnew/src/fpu_div_sqrt_mvp/hdl/nrbd_nrsc_mvp.sv +vendor/pulp-platform/fpnew/src/fpu_div_sqrt_mvp/hdl/preprocess_mvp.sv +vendor/pulp-platform/fpnew/src/fpnew_cast_multi.sv +vendor/pulp-platform/fpnew/src/fpnew_classifier.sv +vendor/pulp-platform/fpnew/src/fpnew_divsqrt_multi.sv +vendor/pulp-platform/fpnew/src/fpnew_fma_multi.sv +vendor/pulp-platform/fpnew/src/fpnew_fma.sv +vendor/pulp-platform/fpnew/src/fpnew_noncomp.sv +vendor/pulp-platform/fpnew/src/fpnew_opgroup_block.sv +vendor/pulp-platform/fpnew/src/fpnew_opgroup_fmt_slice.sv +vendor/pulp-platform/fpnew/src/fpnew_opgroup_multifmt_slice.sv +vendor/pulp-platform/fpnew/src/fpnew_rounding.sv +vendor/pulp-platform/fpnew/src/fpnew_top.sv core/pmp/src/pmp.sv core/pmp/src/pmp_entry.sv common/local/util/instr_tracer.sv common/local/util/instr_tracer_if.sv core/cvxif_example/cvxif_example_coprocessor.sv core/cvxif_example/instr_decoder.sv -common/submodules/common_cells/src/counter.sv -common/submodules/common_cells/src/delta_counter.sv +vendor/pulp-platform/common_cells/src/counter.sv +vendor/pulp-platform/common_cells/src/delta_counter.sv core/cvxif_fu.sv diff --git a/Makefile b/Makefile index 570fe74af0..9536db0e29 100644 --- a/Makefile +++ b/Makefile @@ -169,13 +169,13 @@ src := core/axi_adapter.sv corev_apu/riscv-dbg/debug_rom/debug_rom.sv \ corev_apu/register_interface/src/apb_to_reg.sv \ corev_apu/axi/src/axi_multicut.sv \ - common/submodules/common_cells/src/rstgen_bypass.sv \ - common/submodules/common_cells/src/rstgen.sv \ - common/submodules/common_cells/src/stream_mux.sv \ - common/submodules/common_cells/src/stream_demux.sv \ - common/submodules/common_cells/src/exp_backoff.sv \ - common/submodules/common_cells/src/addr_decode.sv \ - common/submodules/common_cells/src/stream_register.sv \ + vendor/pulp-platform/common_cells/src/rstgen_bypass.sv \ + vendor/pulp-platform/common_cells/src/rstgen.sv \ + vendor/pulp-platform/common_cells/src/stream_mux.sv \ + vendor/pulp-platform/common_cells/src/stream_demux.sv \ + vendor/pulp-platform/common_cells/src/exp_backoff.sv \ + vendor/pulp-platform/common_cells/src/addr_decode.sv \ + vendor/pulp-platform/common_cells/src/stream_register.sv \ corev_apu/axi/src/axi_cut.sv \ corev_apu/axi/src/axi_join.sv \ corev_apu/axi/src/axi_delayer.sv \ @@ -186,15 +186,15 @@ src := core/axi_adapter.sv corev_apu/axi/src/axi_mux.sv \ corev_apu/axi/src/axi_demux.sv \ corev_apu/axi/src/axi_xbar.sv \ - common/submodules/common_cells/src/cdc_2phase.sv \ - common/submodules/common_cells/src/spill_register_flushable.sv \ - common/submodules/common_cells/src/spill_register.sv \ - common/submodules/common_cells/src/stream_arbiter.sv \ - common/submodules/common_cells/src/stream_arbiter_flushable.sv \ - common/submodules/common_cells/src/deprecated/fifo_v1.sv \ - common/submodules/common_cells/src/deprecated/fifo_v2.sv \ - common/submodules/common_cells/src/stream_delay.sv \ - common/submodules/common_cells/src/lfsr_16bit.sv \ + vendor/pulp-platform/common_cells/src/cdc_2phase.sv \ + vendor/pulp-platform/common_cells/src/spill_register_flushable.sv \ + vendor/pulp-platform/common_cells/src/spill_register.sv \ + vendor/pulp-platform/common_cells/src/stream_arbiter.sv \ + vendor/pulp-platform/common_cells/src/stream_arbiter_flushable.sv \ + vendor/pulp-platform/common_cells/src/deprecated/fifo_v1.sv \ + vendor/pulp-platform/common_cells/src/deprecated/fifo_v2.sv \ + vendor/pulp-platform/common_cells/src/stream_delay.sv \ + vendor/pulp-platform/common_cells/src/lfsr_16bit.sv \ corev_apu/src/tech_cells_generic/src/deprecated/cluster_clk_cells.sv \ corev_apu/src/tech_cells_generic/src/deprecated/pulp_clk_cells.sv \ corev_apu/src/tech_cells_generic/src/rtl/tc_clk.sv \ @@ -244,7 +244,7 @@ riscv-fp-tests := $(shell xargs printf '\n%s' < $(riscv-fp-tests-list riscv-benchmarks := $(shell xargs printf '\n%s' < $(riscv-benchmarks-list) | cut -b 1-) # Search here for include files (e.g.: non-standalone components) -incdir := common/submodules/common_cells/include/ corev_apu/axi/include/ corev_apu/register_interface/include/ +incdir := vendor/pulp-platform/common_cells/include/ corev_apu/axi/include/ corev_apu/register_interface/include/ # Compile and sim flags compile_flag += +cover=bcfst+/dut -incr -64 -nologo -quiet -suppress 13262 -permissive +define+$(defines) @@ -293,7 +293,7 @@ vcs_build: $(dpi-library)/ariane_dpi.so vlogan $(if $(VERDI), -kdb,) -full64 -nc -sverilog +define+$(defines) -f ../core/Flist.$(target) &&\ vlogan $(if $(VERDI), -kdb,) -full64 -nc -sverilog +define+$(defines) $(filter %.sv,$(ariane_pkg)) +incdir+core/include/+$(VCS_HOME)/etc/uvm-1.2/dpi &&\ vhdlan $(if $(VERDI), -kdb,) -full64 -nc $(filter %.vhd,$(uart_src)) &&\ - vlogan $(if $(VERDI), -kdb,) -full64 -nc -sverilog -assert svaext +define+$(defines) $(filter %.sv,$(src)) +incdir+../common/submodules/common_cells/include/+../corev_apu/axi/include/+../corev_apu/register_interface/include/ &&\ + vlogan $(if $(VERDI), -kdb,) -full64 -nc -sverilog -assert svaext +define+$(defines) $(filter %.sv,$(src)) +incdir+../vendor/pulp-platform/common_cells/include/+../corev_apu/axi/include/+../corev_apu/register_interface/include/ &&\ vlogan $(if $(VERDI), -kdb,) -full64 -nc -sverilog -ntb_opts uvm-1.2 &&\ vlogan $(if $(VERDI), -kdb,) -full64 -nc -sverilog -ntb_opts uvm-1.2 $(tbs) +define+$(defines) +incdir+../corev_apu/axi/include/ &&\ vcs $(if $(VERDI), -kdb -debug_access+all -lca,) -full64 -timescale=1ns/1ns -ntb_opts uvm-1.2 work.ariane_tb diff --git a/core/Flist.cv32a60x b/core/Flist.cv32a60x index e8490a057c..f88c51db33 100644 --- a/core/Flist.cv32a60x +++ b/core/Flist.cv32a60x @@ -27,8 +27,8 @@ +define+WT_DCACHE -+incdir+${CVA6_REPO_DIR}/common/submodules/common_cells/include/ -+incdir+${CVA6_REPO_DIR}/common/submodules/common_cells/src/ ++incdir+${CVA6_REPO_DIR}/vendor/pulp-platform/common_cells/include/ ++incdir+${CVA6_REPO_DIR}/vendor/pulp-platform/common_cells/src/ +incdir+${CVA6_REPO_DIR}/common/local/util/ ${CVA6_REPO_DIR}/core/include/cv32a60x_config_pkg.sv @@ -58,40 +58,40 @@ ${CVA6_REPO_DIR}/core/cvxif_example/cvxif_example_coprocessor.sv ${CVA6_REPO_DIR}/core/cvxif_example/instr_decoder.sv // Common Cells -${CVA6_REPO_DIR}/common/submodules/common_cells/src/cf_math_pkg.sv -${CVA6_REPO_DIR}/common/submodules/common_cells/src/fifo_v3.sv -${CVA6_REPO_DIR}/common/submodules/common_cells/src/lfsr.sv -${CVA6_REPO_DIR}/common/submodules/common_cells/src/lzc.sv -${CVA6_REPO_DIR}/common/submodules/common_cells/src/rr_arb_tree.sv -${CVA6_REPO_DIR}/common/submodules/common_cells/src/shift_reg.sv -${CVA6_REPO_DIR}/common/submodules/common_cells/src/unread.sv -${CVA6_REPO_DIR}/common/submodules/common_cells/src/popcount.sv -${CVA6_REPO_DIR}/common/submodules/common_cells/src/exp_backoff.sv +${CVA6_REPO_DIR}/vendor/pulp-platform/common_cells/src/cf_math_pkg.sv +${CVA6_REPO_DIR}/vendor/pulp-platform/common_cells/src/fifo_v3.sv +${CVA6_REPO_DIR}/vendor/pulp-platform/common_cells/src/lfsr.sv +${CVA6_REPO_DIR}/vendor/pulp-platform/common_cells/src/lzc.sv +${CVA6_REPO_DIR}/vendor/pulp-platform/common_cells/src/rr_arb_tree.sv +${CVA6_REPO_DIR}/vendor/pulp-platform/common_cells/src/shift_reg.sv +${CVA6_REPO_DIR}/vendor/pulp-platform/common_cells/src/unread.sv +${CVA6_REPO_DIR}/vendor/pulp-platform/common_cells/src/popcount.sv +${CVA6_REPO_DIR}/vendor/pulp-platform/common_cells/src/exp_backoff.sv // Common Cells for example coprocessor -${CVA6_REPO_DIR}/common/submodules/common_cells/src/counter.sv -${CVA6_REPO_DIR}/common/submodules/common_cells/src/delta_counter.sv +${CVA6_REPO_DIR}/vendor/pulp-platform/common_cells/src/counter.sv +${CVA6_REPO_DIR}/vendor/pulp-platform/common_cells/src/delta_counter.sv // Floating point unit -${CVA6_REPO_DIR}/core/fpu/src/fpnew_pkg.sv -${CVA6_REPO_DIR}/core/fpu/src/fpnew_cast_multi.sv -${CVA6_REPO_DIR}/core/fpu/src/fpnew_classifier.sv -${CVA6_REPO_DIR}/core/fpu/src/fpnew_divsqrt_multi.sv -${CVA6_REPO_DIR}/core/fpu/src/fpnew_fma_multi.sv -${CVA6_REPO_DIR}/core/fpu/src/fpnew_fma.sv -${CVA6_REPO_DIR}/core/fpu/src/fpnew_noncomp.sv -${CVA6_REPO_DIR}/core/fpu/src/fpnew_opgroup_block.sv -${CVA6_REPO_DIR}/core/fpu/src/fpnew_opgroup_fmt_slice.sv -${CVA6_REPO_DIR}/core/fpu/src/fpnew_opgroup_multifmt_slice.sv -${CVA6_REPO_DIR}/core/fpu/src/fpnew_rounding.sv -${CVA6_REPO_DIR}/core/fpu/src/fpnew_top.sv -${CVA6_REPO_DIR}/core/fpu/src/fpu_div_sqrt_mvp/hdl/defs_div_sqrt_mvp.sv -${CVA6_REPO_DIR}/core/fpu/src/fpu_div_sqrt_mvp/hdl/control_mvp.sv -${CVA6_REPO_DIR}/core/fpu/src/fpu_div_sqrt_mvp/hdl/div_sqrt_top_mvp.sv -${CVA6_REPO_DIR}/core/fpu/src/fpu_div_sqrt_mvp/hdl/iteration_div_sqrt_mvp.sv -${CVA6_REPO_DIR}/core/fpu/src/fpu_div_sqrt_mvp/hdl/norm_div_sqrt_mvp.sv -${CVA6_REPO_DIR}/core/fpu/src/fpu_div_sqrt_mvp/hdl/nrbd_nrsc_mvp.sv -${CVA6_REPO_DIR}/core/fpu/src/fpu_div_sqrt_mvp/hdl/preprocess_mvp.sv +${CVA6_REPO_DIR}/vendor/pulp-platform/fpnew/src/fpnew_pkg.sv +${CVA6_REPO_DIR}/vendor/pulp-platform/fpnew/src/fpnew_cast_multi.sv +${CVA6_REPO_DIR}/vendor/pulp-platform/fpnew/src/fpnew_classifier.sv +${CVA6_REPO_DIR}/vendor/pulp-platform/fpnew/src/fpnew_divsqrt_multi.sv +${CVA6_REPO_DIR}/vendor/pulp-platform/fpnew/src/fpnew_fma_multi.sv +${CVA6_REPO_DIR}/vendor/pulp-platform/fpnew/src/fpnew_fma.sv +${CVA6_REPO_DIR}/vendor/pulp-platform/fpnew/src/fpnew_noncomp.sv +${CVA6_REPO_DIR}/vendor/pulp-platform/fpnew/src/fpnew_opgroup_block.sv +${CVA6_REPO_DIR}/vendor/pulp-platform/fpnew/src/fpnew_opgroup_fmt_slice.sv +${CVA6_REPO_DIR}/vendor/pulp-platform/fpnew/src/fpnew_opgroup_multifmt_slice.sv +${CVA6_REPO_DIR}/vendor/pulp-platform/fpnew/src/fpnew_rounding.sv +${CVA6_REPO_DIR}/vendor/pulp-platform/fpnew/src/fpnew_top.sv +${CVA6_REPO_DIR}/vendor/pulp-platform/fpnew/src/fpu_div_sqrt_mvp/hdl/defs_div_sqrt_mvp.sv +${CVA6_REPO_DIR}/vendor/pulp-platform/fpnew/src/fpu_div_sqrt_mvp/hdl/control_mvp.sv +${CVA6_REPO_DIR}/vendor/pulp-platform/fpnew/src/fpu_div_sqrt_mvp/hdl/div_sqrt_top_mvp.sv +${CVA6_REPO_DIR}/vendor/pulp-platform/fpnew/src/fpu_div_sqrt_mvp/hdl/iteration_div_sqrt_mvp.sv +${CVA6_REPO_DIR}/vendor/pulp-platform/fpnew/src/fpu_div_sqrt_mvp/hdl/norm_div_sqrt_mvp.sv +${CVA6_REPO_DIR}/vendor/pulp-platform/fpnew/src/fpu_div_sqrt_mvp/hdl/nrbd_nrsc_mvp.sv +${CVA6_REPO_DIR}/vendor/pulp-platform/fpnew/src/fpu_div_sqrt_mvp/hdl/preprocess_mvp.sv // Top-level source files (not necessarily instantiated at the top of the cva6). ${CVA6_REPO_DIR}/core/ariane.sv diff --git a/core/Flist.cv32a6_ima_sv32_fpga b/core/Flist.cv32a6_ima_sv32_fpga index 310cc070df..dbaa746c05 100644 --- a/core/Flist.cv32a6_ima_sv32_fpga +++ b/core/Flist.cv32a6_ima_sv32_fpga @@ -27,8 +27,8 @@ +define+WT_DCACHE -+incdir+${CVA6_REPO_DIR}/common/submodules/common_cells/include/ -+incdir+${CVA6_REPO_DIR}/common/submodules/common_cells/src/ ++incdir+${CVA6_REPO_DIR}/vendor/pulp-platform/common_cells/include/ ++incdir+${CVA6_REPO_DIR}/vendor/pulp-platform/common_cells/src/ +incdir+${CVA6_REPO_DIR}/common/local/util/ ${CVA6_REPO_DIR}/core/include/cv32a6_imac_sv0_config_pkg.sv @@ -55,36 +55,36 @@ ${CVA6_REPO_DIR}/core/include/cvxif_pkg.sv ${CVA6_REPO_DIR}/core/cvxif_fu.sv // Common Cells -${CVA6_REPO_DIR}/common/submodules/common_cells/src/cf_math_pkg.sv -${CVA6_REPO_DIR}/common/submodules/common_cells/src/fifo_v3.sv -${CVA6_REPO_DIR}/common/submodules/common_cells/src/lfsr.sv -${CVA6_REPO_DIR}/common/submodules/common_cells/src/lzc.sv -${CVA6_REPO_DIR}/common/submodules/common_cells/src/rr_arb_tree.sv -${CVA6_REPO_DIR}/common/submodules/common_cells/src/shift_reg.sv -${CVA6_REPO_DIR}/common/submodules/common_cells/src/unread.sv -${CVA6_REPO_DIR}/common/submodules/common_cells/src/popcount.sv -${CVA6_REPO_DIR}/common/submodules/common_cells/src/exp_backoff.sv +${CVA6_REPO_DIR}/vendor/pulp-platform/common_cells/src/cf_math_pkg.sv +${CVA6_REPO_DIR}/vendor/pulp-platform/common_cells/src/fifo_v3.sv +${CVA6_REPO_DIR}/vendor/pulp-platform/common_cells/src/lfsr.sv +${CVA6_REPO_DIR}/vendor/pulp-platform/common_cells/src/lzc.sv +${CVA6_REPO_DIR}/vendor/pulp-platform/common_cells/src/rr_arb_tree.sv +${CVA6_REPO_DIR}/vendor/pulp-platform/common_cells/src/shift_reg.sv +${CVA6_REPO_DIR}/vendor/pulp-platform/common_cells/src/unread.sv +${CVA6_REPO_DIR}/vendor/pulp-platform/common_cells/src/popcount.sv +${CVA6_REPO_DIR}/vendor/pulp-platform/common_cells/src/exp_backoff.sv // Floating point unit -${CVA6_REPO_DIR}/core/fpu/src/fpnew_pkg.sv -${CVA6_REPO_DIR}/core/fpu/src/fpnew_cast_multi.sv -${CVA6_REPO_DIR}/core/fpu/src/fpnew_classifier.sv -${CVA6_REPO_DIR}/core/fpu/src/fpnew_divsqrt_multi.sv -${CVA6_REPO_DIR}/core/fpu/src/fpnew_fma_multi.sv -${CVA6_REPO_DIR}/core/fpu/src/fpnew_fma.sv -${CVA6_REPO_DIR}/core/fpu/src/fpnew_noncomp.sv -${CVA6_REPO_DIR}/core/fpu/src/fpnew_opgroup_block.sv -${CVA6_REPO_DIR}/core/fpu/src/fpnew_opgroup_fmt_slice.sv -${CVA6_REPO_DIR}/core/fpu/src/fpnew_opgroup_multifmt_slice.sv -${CVA6_REPO_DIR}/core/fpu/src/fpnew_rounding.sv -${CVA6_REPO_DIR}/core/fpu/src/fpnew_top.sv -${CVA6_REPO_DIR}/core/fpu/src/fpu_div_sqrt_mvp/hdl/defs_div_sqrt_mvp.sv -${CVA6_REPO_DIR}/core/fpu/src/fpu_div_sqrt_mvp/hdl/control_mvp.sv -${CVA6_REPO_DIR}/core/fpu/src/fpu_div_sqrt_mvp/hdl/div_sqrt_top_mvp.sv -${CVA6_REPO_DIR}/core/fpu/src/fpu_div_sqrt_mvp/hdl/iteration_div_sqrt_mvp.sv -${CVA6_REPO_DIR}/core/fpu/src/fpu_div_sqrt_mvp/hdl/norm_div_sqrt_mvp.sv -${CVA6_REPO_DIR}/core/fpu/src/fpu_div_sqrt_mvp/hdl/nrbd_nrsc_mvp.sv -${CVA6_REPO_DIR}/core/fpu/src/fpu_div_sqrt_mvp/hdl/preprocess_mvp.sv +${CVA6_REPO_DIR}/vendor/pulp-platform/fpnew/src/fpnew_pkg.sv +${CVA6_REPO_DIR}/vendor/pulp-platform/fpnew/src/fpnew_cast_multi.sv +${CVA6_REPO_DIR}/vendor/pulp-platform/fpnew/src/fpnew_classifier.sv +${CVA6_REPO_DIR}/vendor/pulp-platform/fpnew/src/fpnew_divsqrt_multi.sv +${CVA6_REPO_DIR}/vendor/pulp-platform/fpnew/src/fpnew_fma_multi.sv +${CVA6_REPO_DIR}/vendor/pulp-platform/fpnew/src/fpnew_fma.sv +${CVA6_REPO_DIR}/vendor/pulp-platform/fpnew/src/fpnew_noncomp.sv +${CVA6_REPO_DIR}/vendor/pulp-platform/fpnew/src/fpnew_opgroup_block.sv +${CVA6_REPO_DIR}/vendor/pulp-platform/fpnew/src/fpnew_opgroup_fmt_slice.sv +${CVA6_REPO_DIR}/vendor/pulp-platform/fpnew/src/fpnew_opgroup_multifmt_slice.sv +${CVA6_REPO_DIR}/vendor/pulp-platform/fpnew/src/fpnew_rounding.sv +${CVA6_REPO_DIR}/vendor/pulp-platform/fpnew/src/fpnew_top.sv +${CVA6_REPO_DIR}/vendor/pulp-platform/fpnew/src/fpu_div_sqrt_mvp/hdl/defs_div_sqrt_mvp.sv +${CVA6_REPO_DIR}/vendor/pulp-platform/fpnew/src/fpu_div_sqrt_mvp/hdl/control_mvp.sv +${CVA6_REPO_DIR}/vendor/pulp-platform/fpnew/src/fpu_div_sqrt_mvp/hdl/div_sqrt_top_mvp.sv +${CVA6_REPO_DIR}/vendor/pulp-platform/fpnew/src/fpu_div_sqrt_mvp/hdl/iteration_div_sqrt_mvp.sv +${CVA6_REPO_DIR}/vendor/pulp-platform/fpnew/src/fpu_div_sqrt_mvp/hdl/norm_div_sqrt_mvp.sv +${CVA6_REPO_DIR}/vendor/pulp-platform/fpnew/src/fpu_div_sqrt_mvp/hdl/nrbd_nrsc_mvp.sv +${CVA6_REPO_DIR}/vendor/pulp-platform/fpnew/src/fpu_div_sqrt_mvp/hdl/preprocess_mvp.sv // Top-level source files (not necessarily instantiated at the top of the cva6). ${CVA6_REPO_DIR}/core/ariane.sv diff --git a/core/Flist.cv32a6_imac_sv0 b/core/Flist.cv32a6_imac_sv0 index 310cc070df..dbaa746c05 100644 --- a/core/Flist.cv32a6_imac_sv0 +++ b/core/Flist.cv32a6_imac_sv0 @@ -27,8 +27,8 @@ +define+WT_DCACHE -+incdir+${CVA6_REPO_DIR}/common/submodules/common_cells/include/ -+incdir+${CVA6_REPO_DIR}/common/submodules/common_cells/src/ ++incdir+${CVA6_REPO_DIR}/vendor/pulp-platform/common_cells/include/ ++incdir+${CVA6_REPO_DIR}/vendor/pulp-platform/common_cells/src/ +incdir+${CVA6_REPO_DIR}/common/local/util/ ${CVA6_REPO_DIR}/core/include/cv32a6_imac_sv0_config_pkg.sv @@ -55,36 +55,36 @@ ${CVA6_REPO_DIR}/core/include/cvxif_pkg.sv ${CVA6_REPO_DIR}/core/cvxif_fu.sv // Common Cells -${CVA6_REPO_DIR}/common/submodules/common_cells/src/cf_math_pkg.sv -${CVA6_REPO_DIR}/common/submodules/common_cells/src/fifo_v3.sv -${CVA6_REPO_DIR}/common/submodules/common_cells/src/lfsr.sv -${CVA6_REPO_DIR}/common/submodules/common_cells/src/lzc.sv -${CVA6_REPO_DIR}/common/submodules/common_cells/src/rr_arb_tree.sv -${CVA6_REPO_DIR}/common/submodules/common_cells/src/shift_reg.sv -${CVA6_REPO_DIR}/common/submodules/common_cells/src/unread.sv -${CVA6_REPO_DIR}/common/submodules/common_cells/src/popcount.sv -${CVA6_REPO_DIR}/common/submodules/common_cells/src/exp_backoff.sv +${CVA6_REPO_DIR}/vendor/pulp-platform/common_cells/src/cf_math_pkg.sv +${CVA6_REPO_DIR}/vendor/pulp-platform/common_cells/src/fifo_v3.sv +${CVA6_REPO_DIR}/vendor/pulp-platform/common_cells/src/lfsr.sv +${CVA6_REPO_DIR}/vendor/pulp-platform/common_cells/src/lzc.sv +${CVA6_REPO_DIR}/vendor/pulp-platform/common_cells/src/rr_arb_tree.sv +${CVA6_REPO_DIR}/vendor/pulp-platform/common_cells/src/shift_reg.sv +${CVA6_REPO_DIR}/vendor/pulp-platform/common_cells/src/unread.sv +${CVA6_REPO_DIR}/vendor/pulp-platform/common_cells/src/popcount.sv +${CVA6_REPO_DIR}/vendor/pulp-platform/common_cells/src/exp_backoff.sv // Floating point unit -${CVA6_REPO_DIR}/core/fpu/src/fpnew_pkg.sv -${CVA6_REPO_DIR}/core/fpu/src/fpnew_cast_multi.sv -${CVA6_REPO_DIR}/core/fpu/src/fpnew_classifier.sv -${CVA6_REPO_DIR}/core/fpu/src/fpnew_divsqrt_multi.sv -${CVA6_REPO_DIR}/core/fpu/src/fpnew_fma_multi.sv -${CVA6_REPO_DIR}/core/fpu/src/fpnew_fma.sv -${CVA6_REPO_DIR}/core/fpu/src/fpnew_noncomp.sv -${CVA6_REPO_DIR}/core/fpu/src/fpnew_opgroup_block.sv -${CVA6_REPO_DIR}/core/fpu/src/fpnew_opgroup_fmt_slice.sv -${CVA6_REPO_DIR}/core/fpu/src/fpnew_opgroup_multifmt_slice.sv -${CVA6_REPO_DIR}/core/fpu/src/fpnew_rounding.sv -${CVA6_REPO_DIR}/core/fpu/src/fpnew_top.sv -${CVA6_REPO_DIR}/core/fpu/src/fpu_div_sqrt_mvp/hdl/defs_div_sqrt_mvp.sv -${CVA6_REPO_DIR}/core/fpu/src/fpu_div_sqrt_mvp/hdl/control_mvp.sv -${CVA6_REPO_DIR}/core/fpu/src/fpu_div_sqrt_mvp/hdl/div_sqrt_top_mvp.sv -${CVA6_REPO_DIR}/core/fpu/src/fpu_div_sqrt_mvp/hdl/iteration_div_sqrt_mvp.sv -${CVA6_REPO_DIR}/core/fpu/src/fpu_div_sqrt_mvp/hdl/norm_div_sqrt_mvp.sv -${CVA6_REPO_DIR}/core/fpu/src/fpu_div_sqrt_mvp/hdl/nrbd_nrsc_mvp.sv -${CVA6_REPO_DIR}/core/fpu/src/fpu_div_sqrt_mvp/hdl/preprocess_mvp.sv +${CVA6_REPO_DIR}/vendor/pulp-platform/fpnew/src/fpnew_pkg.sv +${CVA6_REPO_DIR}/vendor/pulp-platform/fpnew/src/fpnew_cast_multi.sv +${CVA6_REPO_DIR}/vendor/pulp-platform/fpnew/src/fpnew_classifier.sv +${CVA6_REPO_DIR}/vendor/pulp-platform/fpnew/src/fpnew_divsqrt_multi.sv +${CVA6_REPO_DIR}/vendor/pulp-platform/fpnew/src/fpnew_fma_multi.sv +${CVA6_REPO_DIR}/vendor/pulp-platform/fpnew/src/fpnew_fma.sv +${CVA6_REPO_DIR}/vendor/pulp-platform/fpnew/src/fpnew_noncomp.sv +${CVA6_REPO_DIR}/vendor/pulp-platform/fpnew/src/fpnew_opgroup_block.sv +${CVA6_REPO_DIR}/vendor/pulp-platform/fpnew/src/fpnew_opgroup_fmt_slice.sv +${CVA6_REPO_DIR}/vendor/pulp-platform/fpnew/src/fpnew_opgroup_multifmt_slice.sv +${CVA6_REPO_DIR}/vendor/pulp-platform/fpnew/src/fpnew_rounding.sv +${CVA6_REPO_DIR}/vendor/pulp-platform/fpnew/src/fpnew_top.sv +${CVA6_REPO_DIR}/vendor/pulp-platform/fpnew/src/fpu_div_sqrt_mvp/hdl/defs_div_sqrt_mvp.sv +${CVA6_REPO_DIR}/vendor/pulp-platform/fpnew/src/fpu_div_sqrt_mvp/hdl/control_mvp.sv +${CVA6_REPO_DIR}/vendor/pulp-platform/fpnew/src/fpu_div_sqrt_mvp/hdl/div_sqrt_top_mvp.sv +${CVA6_REPO_DIR}/vendor/pulp-platform/fpnew/src/fpu_div_sqrt_mvp/hdl/iteration_div_sqrt_mvp.sv +${CVA6_REPO_DIR}/vendor/pulp-platform/fpnew/src/fpu_div_sqrt_mvp/hdl/norm_div_sqrt_mvp.sv +${CVA6_REPO_DIR}/vendor/pulp-platform/fpnew/src/fpu_div_sqrt_mvp/hdl/nrbd_nrsc_mvp.sv +${CVA6_REPO_DIR}/vendor/pulp-platform/fpnew/src/fpu_div_sqrt_mvp/hdl/preprocess_mvp.sv // Top-level source files (not necessarily instantiated at the top of the cva6). ${CVA6_REPO_DIR}/core/ariane.sv diff --git a/core/Flist.cv32a6_imac_sv32 b/core/Flist.cv32a6_imac_sv32 index 310cc070df..dbaa746c05 100644 --- a/core/Flist.cv32a6_imac_sv32 +++ b/core/Flist.cv32a6_imac_sv32 @@ -27,8 +27,8 @@ +define+WT_DCACHE -+incdir+${CVA6_REPO_DIR}/common/submodules/common_cells/include/ -+incdir+${CVA6_REPO_DIR}/common/submodules/common_cells/src/ ++incdir+${CVA6_REPO_DIR}/vendor/pulp-platform/common_cells/include/ ++incdir+${CVA6_REPO_DIR}/vendor/pulp-platform/common_cells/src/ +incdir+${CVA6_REPO_DIR}/common/local/util/ ${CVA6_REPO_DIR}/core/include/cv32a6_imac_sv0_config_pkg.sv @@ -55,36 +55,36 @@ ${CVA6_REPO_DIR}/core/include/cvxif_pkg.sv ${CVA6_REPO_DIR}/core/cvxif_fu.sv // Common Cells -${CVA6_REPO_DIR}/common/submodules/common_cells/src/cf_math_pkg.sv -${CVA6_REPO_DIR}/common/submodules/common_cells/src/fifo_v3.sv -${CVA6_REPO_DIR}/common/submodules/common_cells/src/lfsr.sv -${CVA6_REPO_DIR}/common/submodules/common_cells/src/lzc.sv -${CVA6_REPO_DIR}/common/submodules/common_cells/src/rr_arb_tree.sv -${CVA6_REPO_DIR}/common/submodules/common_cells/src/shift_reg.sv -${CVA6_REPO_DIR}/common/submodules/common_cells/src/unread.sv -${CVA6_REPO_DIR}/common/submodules/common_cells/src/popcount.sv -${CVA6_REPO_DIR}/common/submodules/common_cells/src/exp_backoff.sv +${CVA6_REPO_DIR}/vendor/pulp-platform/common_cells/src/cf_math_pkg.sv +${CVA6_REPO_DIR}/vendor/pulp-platform/common_cells/src/fifo_v3.sv +${CVA6_REPO_DIR}/vendor/pulp-platform/common_cells/src/lfsr.sv +${CVA6_REPO_DIR}/vendor/pulp-platform/common_cells/src/lzc.sv +${CVA6_REPO_DIR}/vendor/pulp-platform/common_cells/src/rr_arb_tree.sv +${CVA6_REPO_DIR}/vendor/pulp-platform/common_cells/src/shift_reg.sv +${CVA6_REPO_DIR}/vendor/pulp-platform/common_cells/src/unread.sv +${CVA6_REPO_DIR}/vendor/pulp-platform/common_cells/src/popcount.sv +${CVA6_REPO_DIR}/vendor/pulp-platform/common_cells/src/exp_backoff.sv // Floating point unit -${CVA6_REPO_DIR}/core/fpu/src/fpnew_pkg.sv -${CVA6_REPO_DIR}/core/fpu/src/fpnew_cast_multi.sv -${CVA6_REPO_DIR}/core/fpu/src/fpnew_classifier.sv -${CVA6_REPO_DIR}/core/fpu/src/fpnew_divsqrt_multi.sv -${CVA6_REPO_DIR}/core/fpu/src/fpnew_fma_multi.sv -${CVA6_REPO_DIR}/core/fpu/src/fpnew_fma.sv -${CVA6_REPO_DIR}/core/fpu/src/fpnew_noncomp.sv -${CVA6_REPO_DIR}/core/fpu/src/fpnew_opgroup_block.sv -${CVA6_REPO_DIR}/core/fpu/src/fpnew_opgroup_fmt_slice.sv -${CVA6_REPO_DIR}/core/fpu/src/fpnew_opgroup_multifmt_slice.sv -${CVA6_REPO_DIR}/core/fpu/src/fpnew_rounding.sv -${CVA6_REPO_DIR}/core/fpu/src/fpnew_top.sv -${CVA6_REPO_DIR}/core/fpu/src/fpu_div_sqrt_mvp/hdl/defs_div_sqrt_mvp.sv -${CVA6_REPO_DIR}/core/fpu/src/fpu_div_sqrt_mvp/hdl/control_mvp.sv -${CVA6_REPO_DIR}/core/fpu/src/fpu_div_sqrt_mvp/hdl/div_sqrt_top_mvp.sv -${CVA6_REPO_DIR}/core/fpu/src/fpu_div_sqrt_mvp/hdl/iteration_div_sqrt_mvp.sv -${CVA6_REPO_DIR}/core/fpu/src/fpu_div_sqrt_mvp/hdl/norm_div_sqrt_mvp.sv -${CVA6_REPO_DIR}/core/fpu/src/fpu_div_sqrt_mvp/hdl/nrbd_nrsc_mvp.sv -${CVA6_REPO_DIR}/core/fpu/src/fpu_div_sqrt_mvp/hdl/preprocess_mvp.sv +${CVA6_REPO_DIR}/vendor/pulp-platform/fpnew/src/fpnew_pkg.sv +${CVA6_REPO_DIR}/vendor/pulp-platform/fpnew/src/fpnew_cast_multi.sv +${CVA6_REPO_DIR}/vendor/pulp-platform/fpnew/src/fpnew_classifier.sv +${CVA6_REPO_DIR}/vendor/pulp-platform/fpnew/src/fpnew_divsqrt_multi.sv +${CVA6_REPO_DIR}/vendor/pulp-platform/fpnew/src/fpnew_fma_multi.sv +${CVA6_REPO_DIR}/vendor/pulp-platform/fpnew/src/fpnew_fma.sv +${CVA6_REPO_DIR}/vendor/pulp-platform/fpnew/src/fpnew_noncomp.sv +${CVA6_REPO_DIR}/vendor/pulp-platform/fpnew/src/fpnew_opgroup_block.sv +${CVA6_REPO_DIR}/vendor/pulp-platform/fpnew/src/fpnew_opgroup_fmt_slice.sv +${CVA6_REPO_DIR}/vendor/pulp-platform/fpnew/src/fpnew_opgroup_multifmt_slice.sv +${CVA6_REPO_DIR}/vendor/pulp-platform/fpnew/src/fpnew_rounding.sv +${CVA6_REPO_DIR}/vendor/pulp-platform/fpnew/src/fpnew_top.sv +${CVA6_REPO_DIR}/vendor/pulp-platform/fpnew/src/fpu_div_sqrt_mvp/hdl/defs_div_sqrt_mvp.sv +${CVA6_REPO_DIR}/vendor/pulp-platform/fpnew/src/fpu_div_sqrt_mvp/hdl/control_mvp.sv +${CVA6_REPO_DIR}/vendor/pulp-platform/fpnew/src/fpu_div_sqrt_mvp/hdl/div_sqrt_top_mvp.sv +${CVA6_REPO_DIR}/vendor/pulp-platform/fpnew/src/fpu_div_sqrt_mvp/hdl/iteration_div_sqrt_mvp.sv +${CVA6_REPO_DIR}/vendor/pulp-platform/fpnew/src/fpu_div_sqrt_mvp/hdl/norm_div_sqrt_mvp.sv +${CVA6_REPO_DIR}/vendor/pulp-platform/fpnew/src/fpu_div_sqrt_mvp/hdl/nrbd_nrsc_mvp.sv +${CVA6_REPO_DIR}/vendor/pulp-platform/fpnew/src/fpu_div_sqrt_mvp/hdl/preprocess_mvp.sv // Top-level source files (not necessarily instantiated at the top of the cva6). ${CVA6_REPO_DIR}/core/ariane.sv diff --git a/core/Flist.cv32a6_imafc_sv32 b/core/Flist.cv32a6_imafc_sv32 index 310cc070df..dbaa746c05 100644 --- a/core/Flist.cv32a6_imafc_sv32 +++ b/core/Flist.cv32a6_imafc_sv32 @@ -27,8 +27,8 @@ +define+WT_DCACHE -+incdir+${CVA6_REPO_DIR}/common/submodules/common_cells/include/ -+incdir+${CVA6_REPO_DIR}/common/submodules/common_cells/src/ ++incdir+${CVA6_REPO_DIR}/vendor/pulp-platform/common_cells/include/ ++incdir+${CVA6_REPO_DIR}/vendor/pulp-platform/common_cells/src/ +incdir+${CVA6_REPO_DIR}/common/local/util/ ${CVA6_REPO_DIR}/core/include/cv32a6_imac_sv0_config_pkg.sv @@ -55,36 +55,36 @@ ${CVA6_REPO_DIR}/core/include/cvxif_pkg.sv ${CVA6_REPO_DIR}/core/cvxif_fu.sv // Common Cells -${CVA6_REPO_DIR}/common/submodules/common_cells/src/cf_math_pkg.sv -${CVA6_REPO_DIR}/common/submodules/common_cells/src/fifo_v3.sv -${CVA6_REPO_DIR}/common/submodules/common_cells/src/lfsr.sv -${CVA6_REPO_DIR}/common/submodules/common_cells/src/lzc.sv -${CVA6_REPO_DIR}/common/submodules/common_cells/src/rr_arb_tree.sv -${CVA6_REPO_DIR}/common/submodules/common_cells/src/shift_reg.sv -${CVA6_REPO_DIR}/common/submodules/common_cells/src/unread.sv -${CVA6_REPO_DIR}/common/submodules/common_cells/src/popcount.sv -${CVA6_REPO_DIR}/common/submodules/common_cells/src/exp_backoff.sv +${CVA6_REPO_DIR}/vendor/pulp-platform/common_cells/src/cf_math_pkg.sv +${CVA6_REPO_DIR}/vendor/pulp-platform/common_cells/src/fifo_v3.sv +${CVA6_REPO_DIR}/vendor/pulp-platform/common_cells/src/lfsr.sv +${CVA6_REPO_DIR}/vendor/pulp-platform/common_cells/src/lzc.sv +${CVA6_REPO_DIR}/vendor/pulp-platform/common_cells/src/rr_arb_tree.sv +${CVA6_REPO_DIR}/vendor/pulp-platform/common_cells/src/shift_reg.sv +${CVA6_REPO_DIR}/vendor/pulp-platform/common_cells/src/unread.sv +${CVA6_REPO_DIR}/vendor/pulp-platform/common_cells/src/popcount.sv +${CVA6_REPO_DIR}/vendor/pulp-platform/common_cells/src/exp_backoff.sv // Floating point unit -${CVA6_REPO_DIR}/core/fpu/src/fpnew_pkg.sv -${CVA6_REPO_DIR}/core/fpu/src/fpnew_cast_multi.sv -${CVA6_REPO_DIR}/core/fpu/src/fpnew_classifier.sv -${CVA6_REPO_DIR}/core/fpu/src/fpnew_divsqrt_multi.sv -${CVA6_REPO_DIR}/core/fpu/src/fpnew_fma_multi.sv -${CVA6_REPO_DIR}/core/fpu/src/fpnew_fma.sv -${CVA6_REPO_DIR}/core/fpu/src/fpnew_noncomp.sv -${CVA6_REPO_DIR}/core/fpu/src/fpnew_opgroup_block.sv -${CVA6_REPO_DIR}/core/fpu/src/fpnew_opgroup_fmt_slice.sv -${CVA6_REPO_DIR}/core/fpu/src/fpnew_opgroup_multifmt_slice.sv -${CVA6_REPO_DIR}/core/fpu/src/fpnew_rounding.sv -${CVA6_REPO_DIR}/core/fpu/src/fpnew_top.sv -${CVA6_REPO_DIR}/core/fpu/src/fpu_div_sqrt_mvp/hdl/defs_div_sqrt_mvp.sv -${CVA6_REPO_DIR}/core/fpu/src/fpu_div_sqrt_mvp/hdl/control_mvp.sv -${CVA6_REPO_DIR}/core/fpu/src/fpu_div_sqrt_mvp/hdl/div_sqrt_top_mvp.sv -${CVA6_REPO_DIR}/core/fpu/src/fpu_div_sqrt_mvp/hdl/iteration_div_sqrt_mvp.sv -${CVA6_REPO_DIR}/core/fpu/src/fpu_div_sqrt_mvp/hdl/norm_div_sqrt_mvp.sv -${CVA6_REPO_DIR}/core/fpu/src/fpu_div_sqrt_mvp/hdl/nrbd_nrsc_mvp.sv -${CVA6_REPO_DIR}/core/fpu/src/fpu_div_sqrt_mvp/hdl/preprocess_mvp.sv +${CVA6_REPO_DIR}/vendor/pulp-platform/fpnew/src/fpnew_pkg.sv +${CVA6_REPO_DIR}/vendor/pulp-platform/fpnew/src/fpnew_cast_multi.sv +${CVA6_REPO_DIR}/vendor/pulp-platform/fpnew/src/fpnew_classifier.sv +${CVA6_REPO_DIR}/vendor/pulp-platform/fpnew/src/fpnew_divsqrt_multi.sv +${CVA6_REPO_DIR}/vendor/pulp-platform/fpnew/src/fpnew_fma_multi.sv +${CVA6_REPO_DIR}/vendor/pulp-platform/fpnew/src/fpnew_fma.sv +${CVA6_REPO_DIR}/vendor/pulp-platform/fpnew/src/fpnew_noncomp.sv +${CVA6_REPO_DIR}/vendor/pulp-platform/fpnew/src/fpnew_opgroup_block.sv +${CVA6_REPO_DIR}/vendor/pulp-platform/fpnew/src/fpnew_opgroup_fmt_slice.sv +${CVA6_REPO_DIR}/vendor/pulp-platform/fpnew/src/fpnew_opgroup_multifmt_slice.sv +${CVA6_REPO_DIR}/vendor/pulp-platform/fpnew/src/fpnew_rounding.sv +${CVA6_REPO_DIR}/vendor/pulp-platform/fpnew/src/fpnew_top.sv +${CVA6_REPO_DIR}/vendor/pulp-platform/fpnew/src/fpu_div_sqrt_mvp/hdl/defs_div_sqrt_mvp.sv +${CVA6_REPO_DIR}/vendor/pulp-platform/fpnew/src/fpu_div_sqrt_mvp/hdl/control_mvp.sv +${CVA6_REPO_DIR}/vendor/pulp-platform/fpnew/src/fpu_div_sqrt_mvp/hdl/div_sqrt_top_mvp.sv +${CVA6_REPO_DIR}/vendor/pulp-platform/fpnew/src/fpu_div_sqrt_mvp/hdl/iteration_div_sqrt_mvp.sv +${CVA6_REPO_DIR}/vendor/pulp-platform/fpnew/src/fpu_div_sqrt_mvp/hdl/norm_div_sqrt_mvp.sv +${CVA6_REPO_DIR}/vendor/pulp-platform/fpnew/src/fpu_div_sqrt_mvp/hdl/nrbd_nrsc_mvp.sv +${CVA6_REPO_DIR}/vendor/pulp-platform/fpnew/src/fpu_div_sqrt_mvp/hdl/preprocess_mvp.sv // Top-level source files (not necessarily instantiated at the top of the cva6). ${CVA6_REPO_DIR}/core/ariane.sv diff --git a/core/Flist.cv64a6_imafdc_sv39 b/core/Flist.cv64a6_imafdc_sv39 index 72a038bc14..1fe09b4934 100644 --- a/core/Flist.cv64a6_imafdc_sv39 +++ b/core/Flist.cv64a6_imafdc_sv39 @@ -27,8 +27,8 @@ +define+WT_DCACHE -+incdir+${CVA6_REPO_DIR}/common/submodules/common_cells/include/ -+incdir+${CVA6_REPO_DIR}/common/submodules/common_cells/src/ ++incdir+${CVA6_REPO_DIR}/vendor/pulp-platform/common_cells/include/ ++incdir+${CVA6_REPO_DIR}/vendor/pulp-platform/common_cells/src/ +incdir+${CVA6_REPO_DIR}/common/local/util/ ${CVA6_REPO_DIR}/core/include/cv64a6_imafdc_sv39_config_pkg.sv @@ -58,40 +58,40 @@ ${CVA6_REPO_DIR}/core/cvxif_example/cvxif_example_coprocessor.sv ${CVA6_REPO_DIR}/core/cvxif_example/instr_decoder.sv // Common Cells -${CVA6_REPO_DIR}/common/submodules/common_cells/src/cf_math_pkg.sv -${CVA6_REPO_DIR}/common/submodules/common_cells/src/fifo_v3.sv -${CVA6_REPO_DIR}/common/submodules/common_cells/src/lfsr.sv -${CVA6_REPO_DIR}/common/submodules/common_cells/src/lzc.sv -${CVA6_REPO_DIR}/common/submodules/common_cells/src/rr_arb_tree.sv -${CVA6_REPO_DIR}/common/submodules/common_cells/src/shift_reg.sv -${CVA6_REPO_DIR}/common/submodules/common_cells/src/unread.sv -${CVA6_REPO_DIR}/common/submodules/common_cells/src/popcount.sv -${CVA6_REPO_DIR}/common/submodules/common_cells/src/exp_backoff.sv +${CVA6_REPO_DIR}/vendor/pulp-platform/common_cells/src/cf_math_pkg.sv +${CVA6_REPO_DIR}/vendor/pulp-platform/common_cells/src/fifo_v3.sv +${CVA6_REPO_DIR}/vendor/pulp-platform/common_cells/src/lfsr.sv +${CVA6_REPO_DIR}/vendor/pulp-platform/common_cells/src/lzc.sv +${CVA6_REPO_DIR}/vendor/pulp-platform/common_cells/src/rr_arb_tree.sv +${CVA6_REPO_DIR}/vendor/pulp-platform/common_cells/src/shift_reg.sv +${CVA6_REPO_DIR}/vendor/pulp-platform/common_cells/src/unread.sv +${CVA6_REPO_DIR}/vendor/pulp-platform/common_cells/src/popcount.sv +${CVA6_REPO_DIR}/vendor/pulp-platform/common_cells/src/exp_backoff.sv // Common Cells for example coprocessor -${CVA6_REPO_DIR}/common/submodules/common_cells/src/counter.sv -${CVA6_REPO_DIR}/common/submodules/common_cells/src/delta_counter.sv +${CVA6_REPO_DIR}/vendor/pulp-platform/common_cells/src/counter.sv +${CVA6_REPO_DIR}/vendor/pulp-platform/common_cells/src/delta_counter.sv // Floating point unit -${CVA6_REPO_DIR}/core/fpu/src/fpnew_pkg.sv -${CVA6_REPO_DIR}/core/fpu/src/fpnew_cast_multi.sv -${CVA6_REPO_DIR}/core/fpu/src/fpnew_classifier.sv -${CVA6_REPO_DIR}/core/fpu/src/fpnew_divsqrt_multi.sv -${CVA6_REPO_DIR}/core/fpu/src/fpnew_fma_multi.sv -${CVA6_REPO_DIR}/core/fpu/src/fpnew_fma.sv -${CVA6_REPO_DIR}/core/fpu/src/fpnew_noncomp.sv -${CVA6_REPO_DIR}/core/fpu/src/fpnew_opgroup_block.sv -${CVA6_REPO_DIR}/core/fpu/src/fpnew_opgroup_fmt_slice.sv -${CVA6_REPO_DIR}/core/fpu/src/fpnew_opgroup_multifmt_slice.sv -${CVA6_REPO_DIR}/core/fpu/src/fpnew_rounding.sv -${CVA6_REPO_DIR}/core/fpu/src/fpnew_top.sv -${CVA6_REPO_DIR}/core/fpu/src/fpu_div_sqrt_mvp/hdl/defs_div_sqrt_mvp.sv -${CVA6_REPO_DIR}/core/fpu/src/fpu_div_sqrt_mvp/hdl/control_mvp.sv -${CVA6_REPO_DIR}/core/fpu/src/fpu_div_sqrt_mvp/hdl/div_sqrt_top_mvp.sv -${CVA6_REPO_DIR}/core/fpu/src/fpu_div_sqrt_mvp/hdl/iteration_div_sqrt_mvp.sv -${CVA6_REPO_DIR}/core/fpu/src/fpu_div_sqrt_mvp/hdl/norm_div_sqrt_mvp.sv -${CVA6_REPO_DIR}/core/fpu/src/fpu_div_sqrt_mvp/hdl/nrbd_nrsc_mvp.sv -${CVA6_REPO_DIR}/core/fpu/src/fpu_div_sqrt_mvp/hdl/preprocess_mvp.sv +${CVA6_REPO_DIR}/vendor/pulp-platform/fpnew/src/fpnew_pkg.sv +${CVA6_REPO_DIR}/vendor/pulp-platform/fpnew/src/fpnew_cast_multi.sv +${CVA6_REPO_DIR}/vendor/pulp-platform/fpnew/src/fpnew_classifier.sv +${CVA6_REPO_DIR}/vendor/pulp-platform/fpnew/src/fpnew_divsqrt_multi.sv +${CVA6_REPO_DIR}/vendor/pulp-platform/fpnew/src/fpnew_fma_multi.sv +${CVA6_REPO_DIR}/vendor/pulp-platform/fpnew/src/fpnew_fma.sv +${CVA6_REPO_DIR}/vendor/pulp-platform/fpnew/src/fpnew_noncomp.sv +${CVA6_REPO_DIR}/vendor/pulp-platform/fpnew/src/fpnew_opgroup_block.sv +${CVA6_REPO_DIR}/vendor/pulp-platform/fpnew/src/fpnew_opgroup_fmt_slice.sv +${CVA6_REPO_DIR}/vendor/pulp-platform/fpnew/src/fpnew_opgroup_multifmt_slice.sv +${CVA6_REPO_DIR}/vendor/pulp-platform/fpnew/src/fpnew_rounding.sv +${CVA6_REPO_DIR}/vendor/pulp-platform/fpnew/src/fpnew_top.sv +${CVA6_REPO_DIR}/vendor/pulp-platform/fpnew/src/fpu_div_sqrt_mvp/hdl/defs_div_sqrt_mvp.sv +${CVA6_REPO_DIR}/vendor/pulp-platform/fpnew/src/fpu_div_sqrt_mvp/hdl/control_mvp.sv +${CVA6_REPO_DIR}/vendor/pulp-platform/fpnew/src/fpu_div_sqrt_mvp/hdl/div_sqrt_top_mvp.sv +${CVA6_REPO_DIR}/vendor/pulp-platform/fpnew/src/fpu_div_sqrt_mvp/hdl/iteration_div_sqrt_mvp.sv +${CVA6_REPO_DIR}/vendor/pulp-platform/fpnew/src/fpu_div_sqrt_mvp/hdl/norm_div_sqrt_mvp.sv +${CVA6_REPO_DIR}/vendor/pulp-platform/fpnew/src/fpu_div_sqrt_mvp/hdl/nrbd_nrsc_mvp.sv +${CVA6_REPO_DIR}/vendor/pulp-platform/fpnew/src/fpu_div_sqrt_mvp/hdl/preprocess_mvp.sv // Top-level source files (not necessarily instantiated at the top of the cva6). ${CVA6_REPO_DIR}/core/ariane.sv diff --git a/corev_apu/fpga/scripts/run.tcl b/corev_apu/fpga/scripts/run.tcl index 6d32d95d6a..f075219831 100644 --- a/corev_apu/fpga/scripts/run.tcl +++ b/corev_apu/fpga/scripts/run.tcl @@ -38,24 +38,24 @@ read_ip { \ } # read_ip xilinx/xlnx_protocol_checker/ip/xlnx_protocol_checker.xci -set_property include_dirs { "src/axi_sd_bridge/include" "../../common/submodules/common_cells/include" "../axi/include" "../register_interface/include"} [current_fileset] +set_property include_dirs { "src/axi_sd_bridge/include" "../../vendor/pulp-platform/common_cells/include" "../axi/include" "../register_interface/include"} [current_fileset] source scripts/add_sources.tcl set_property top ${project}_xilinx [current_fileset] if {$::env(BOARD) eq "genesys2"} { - read_verilog -sv {src/genesysii.svh ../../common/submodules/common_cells/include/common_cells/registers.svh} + read_verilog -sv {src/genesysii.svh ../../vendor/pulp-platform/common_cells/include/common_cells/registers.svh} set file "src/genesysii.svh" - set registers "../../common/submodules/common_cells/include/common_cells/registers.svh" + set registers "../../vendor/pulp-platform/common_cells/include/common_cells/registers.svh" } elseif {$::env(BOARD) eq "kc705"} { - read_verilog -sv {src/kc705.svh ../../common/submodules/common_cells/include/common_cells/registers.svh} + read_verilog -sv {src/kc705.svh ../../vendor/pulp-platform/common_cells/include/common_cells/registers.svh} set file "src/kc705.svh" - set registers "../../common/submodules/common_cells/include/common_cells/registers.svh" + set registers "../../vendor/pulp-platform/common_cells/include/common_cells/registers.svh" } elseif {$::env(BOARD) eq "vc707"} { - read_verilog -sv {src/vc707.svh ../../common/submodules/common_cells/include/common_cells/registers.svh} + read_verilog -sv {src/vc707.svh ../../vendor/pulp-platform/common_cells/include/common_cells/registers.svh} set file "src/vc707.svh" - set registers "../../common/submodules/common_cells/include/common_cells/registers.svh" + set registers "../../vendor/pulp-platform/common_cells/include/common_cells/registers.svh" } else { exit 1 } diff --git a/corev_apu/tb/tb_cva6_icache/tb.list b/corev_apu/tb/tb_cva6_icache/tb.list index db576dac4b..60f3c41aba 100644 --- a/corev_apu/tb/tb_cva6_icache/tb.list +++ b/corev_apu/tb/tb_cva6_icache/tb.list @@ -3,12 +3,12 @@ ../../riscv-dbg/src/dm_pkg.sv ../../../core/include/ariane_pkg.sv ../../../core/include/wt_cache_pkg.sv -../../../common/submodules/common_cells/src/cf_math_pkg.sv +../../../vendor/pulp-platform/common_cells/src/cf_math_pkg.sv ../../../common/local/techlib/fpga/rtl/SyncSpRamBeNx64.sv ../../../core/cache_subsystem/cva6_icache.sv -../../../common/submodules/common_cells/src/lfsr.sv -../../../common/submodules/common_cells/src/fifo_v3.sv -../../../common/submodules/common_cells/src/lzc.sv +../../../vendor/pulp-platform/common_cells/src/lfsr.sv +../../../vendor/pulp-platform/common_cells/src/fifo_v3.sv +../../../vendor/pulp-platform/common_cells/src/lzc.sv ../../../common/local/util/sram.sv hdl/mem_emul.sv hdl/tlb_emul.sv diff --git a/corev_apu/tb/tb_wb_dcache/tb.list b/corev_apu/tb/tb_wb_dcache/tb.list index f427364770..05180e4ee1 100644 --- a/corev_apu/tb/tb_wb_dcache/tb.list +++ b/corev_apu/tb/tb_wb_dcache/tb.list @@ -23,16 +23,16 @@ ../../../core/cache_subsystem/std_nbdcache.sv ../../../core/cache_subsystem/amo_alu.sv ../../../core/cache_subsystem/tag_cmp.sv -../../../common/submodules/common_cells/src/cf_math_pkg.sv -../../../common/submodules/common_cells/src/lfsr_8bit.sv -../../../common/submodules/common_cells/src/fifo_v3.sv -../../../common/submodules/common_cells/src/lzc.sv -../../../common/submodules/common_cells/src/rr_arb_tree.sv -../../../common/submodules/common_cells/src/exp_backoff.sv -../../../common/submodules/common_cells/src/stream_arbiter.sv -../../../common/submodules/common_cells/src/stream_arbiter_flushable.sv -../../../common/submodules/common_cells/src/stream_mux.sv -../../../common/submodules/common_cells/src/stream_demux.sv +../../../vendor/pulp-platform/common_cells/src/cf_math_pkg.sv +../../../vendor/pulp-platform/common_cells/src/lfsr_8bit.sv +../../../vendor/pulp-platform/common_cells/src/fifo_v3.sv +../../../vendor/pulp-platform/common_cells/src/lzc.sv +../../../vendor/pulp-platform/common_cells/src/rr_arb_tree.sv +../../../vendor/pulp-platform/common_cells/src/exp_backoff.sv +../../../vendor/pulp-platform/common_cells/src/stream_arbiter.sv +../../../vendor/pulp-platform/common_cells/src/stream_arbiter_flushable.sv +../../../vendor/pulp-platform/common_cells/src/stream_mux.sv +../../../vendor/pulp-platform/common_cells/src/stream_demux.sv ../../../core/axi_adapter.sv ../../../common/local/util/sram.sv ../../src/axi_riscv_atomics/src/axi_res_tbl.sv diff --git a/corev_apu/tb/tb_wt_axi_dcache/tb.list b/corev_apu/tb/tb_wt_axi_dcache/tb.list index a12da819cd..1c744fcc5f 100644 --- a/corev_apu/tb/tb_wt_axi_dcache/tb.list +++ b/corev_apu/tb/tb_wt_axi_dcache/tb.list @@ -12,7 +12,7 @@ ../../axi/src/axi_pkg.sv ../../axi/src/axi_intf.sv ../../axi/src/axi_test.sv -../../../core/fpu/src/fpnew_pkg.sv +../../../vendor/pulp-platform/fpnew/src/fpnew_pkg.sv ../../../core/include/ariane_pkg.sv ../ariane_soc_pkg.sv ../ariane_axi_soc_pkg.sv @@ -28,15 +28,15 @@ ../../../core/axi_shim.sv ../../../core/cache_subsystem/wt_axi_adapter.sv ../../../core/cache_subsystem/wt_cache_subsystem.sv -../../../common/submodules/common_cells/src/cf_math_pkg.sv -../../../common/submodules/common_cells/src/lfsr.sv -../../../common/submodules/common_cells/src/fifo_v3.sv -../../../common/submodules/common_cells/src/lzc.sv -../../../common/submodules/common_cells/src/rr_arb_tree.sv -../../../common/submodules/common_cells/src/exp_backoff.sv -../../../common/submodules/common_cells/src/stream_arbiter.sv -../../../common/submodules/common_cells/src/stream_arbiter_flushable.sv -../../../common/submodules/common_cells/src/stream_mux.sv +../../../vendor/pulp-platform/common_cells/src/cf_math_pkg.sv +../../../vendor/pulp-platform/common_cells/src/lfsr.sv +../../../vendor/pulp-platform/common_cells/src/fifo_v3.sv +../../../vendor/pulp-platform/common_cells/src/lzc.sv +../../../vendor/pulp-platform/common_cells/src/rr_arb_tree.sv +../../../vendor/pulp-platform/common_cells/src/exp_backoff.sv +../../../vendor/pulp-platform/common_cells/src/stream_arbiter.sv +../../../vendor/pulp-platform/common_cells/src/stream_arbiter_flushable.sv +../../../vendor/pulp-platform/common_cells/src/stream_mux.sv ../../src/tech_cells_generic/src/rtl/tc_sram.sv ../../../common/local/util/tc_sram_wrapper.sv ../../../common/local/util/sram.sv diff --git a/corev_apu/tb/tb_wt_dcache/tb.list b/corev_apu/tb/tb_wt_dcache/tb.list index 617eff1eda..a03beda249 100644 --- a/corev_apu/tb/tb_wt_dcache/tb.list +++ b/corev_apu/tb/tb_wt_dcache/tb.list @@ -16,12 +16,12 @@ ../../../core/cache_subsystem/wt_dcache_missunit.sv ../../../core/cache_subsystem/wt_dcache_wbuffer.sv ../../../core/cache_subsystem/wt_dcache.sv -../../../common/submodules/common_cells/src/cf_math_pkg.sv -../../../common/submodules/common_cells/src/lfsr.sv -../../../common/submodules/common_cells/src/fifo_v3.sv -../../../common/submodules/common_cells/src/lzc.sv -../../../common/submodules/common_cells/src/rr_arb_tree.sv -../../../common/submodules/common_cells/src/exp_backoff.sv +../../../vendor/pulp-platform/common_cells/src/cf_math_pkg.sv +../../../vendor/pulp-platform/common_cells/src/lfsr.sv +../../../vendor/pulp-platform/common_cells/src/fifo_v3.sv +../../../vendor/pulp-platform/common_cells/src/lzc.sv +../../../vendor/pulp-platform/common_cells/src/rr_arb_tree.sv +../../../vendor/pulp-platform/common_cells/src/exp_backoff.sv ../../src/tech_cells_generic/src/rtl/tc_sram.sv ../../../common/local/util/tc_sram_wrapper.sv ../../../common/local/util/sram.sv diff --git a/pd/synth/cva6_synth.tcl b/pd/synth/cva6_synth.tcl index c3a82efa7d..334557f2bc 100644 --- a/pd/synth/cva6_synth.tcl +++ b/pd/synth/cva6_synth.tcl @@ -17,7 +17,7 @@ set clk_period $PERIOD set input_delay $INPUT_DELAY set output_delay $OUTPUT_DELAY -set_app_var search_path "../../core/fpu/src/common_cells/include/ $search_path" +set_app_var search_path "../../vendor/pulp-platform/fpnew/src/common_cells/include/ $search_path" sh rm -rf work sh mkdir work diff --git a/util/README.md b/util/README.md new file mode 100644 index 0000000000..9ed2109698 --- /dev/null +++ b/util/README.md @@ -0,0 +1,5 @@ +Content: + +* vendor.py + - vendorization script + - copied from https://github.com/openhwgroup/cv32e40p/blob/master/util/vendor.py, commit 69e839e diff --git a/util/vendor.py b/util/vendor.py new file mode 100644 index 0000000000..8c677f9288 --- /dev/null +++ b/util/vendor.py @@ -0,0 +1,782 @@ +#!/usr/bin/env python3 +# Copyright lowRISC contributors. +# Licensed under the Apache License, Version 2.0, see LICENSE for details. +# SPDX-License-Identifier: Apache-2.0 + +'''A tool to copy source code from upstream into this repository. + +For an introduction to using this tool, see doc/ug/vendor_hw.md in this +repository (on the internet at https://docs.opentitan.org/doc/ug/vendor_hw/). + +For full documentation, see doc/rm/vendor_in_tool.md (on the internet at +https://docs.opentitan.org/doc/rm/vendor_in_tool). + +''' + +import argparse +import fnmatch +import logging as log +import os +import re +import shutil +import subprocess +import sys +import tempfile +import textwrap +from pathlib import Path + +import hjson + +EXCLUDE_ALWAYS = ['.git'] + +LOCK_FILE_HEADER = """// Copyright lowRISC contributors. +// Licensed under the Apache License, Version 2.0, see LICENSE for details. +// SPDX-License-Identifier: Apache-2.0 + +// This file is generated by the util/vendor script. Please do not modify it +// manually. + +""" + +# Keys in the description (configuration) file which can be overridden through +# the command line. +OVERRIDABLE_DESC_KEYS = [ + 'patch_repo.url', + 'patch_repo.rev_base', + 'patch_repo.rev_patched', + 'upstream.url', + 'upstream.ref', +] + +verbose = False + + +def git_is_clean_workdir(git_workdir): + """Check if the git working directory is clean (no unstaged or staged changes)""" + cmd = ['git', 'status', '--untracked-files=no', '--porcelain'] + modified_files = subprocess.run(cmd, + cwd=str(git_workdir), + check=True, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE).stdout.strip() + return not modified_files + + +def github_qualify_references(log, repo_userorg, repo_name): + """ Replace "unqualified" GitHub references with "fully qualified" one + + GitHub automatically links issues and pull requests if they have a specific + format. Links can be qualified with the user/org name and the repository + name, or unqualified, if they only contain the issue or pull request number. + + This function converts all unqualified references to qualified ones. + + See https://help.github.com/en/articles/autolinked-references-and-urls#issues-and-pull-requests + for a documentation of all supported formats. + """ + + r = re.compile(r"(^|[^\w])(?:#|[gG][hH]-)(\d+)\b") + repl_str = r'\1%s/%s#\2' % (repo_userorg, repo_name) + return [r.sub(repl_str, l) for l in log] + + +def test_github_qualify_references(): + repo_userorg = 'lowRISC' + repo_name = 'ibex' + + # Unqualified references, should be replaced + items_unqualified = [ + '#28', + 'GH-27', + 'klaus #27', + 'Fixes #27', + 'Fixes #27 and #28', + '(#27)', + 'something (#27) done', + '#27 and (GH-38)', + ] + exp_items_unqualified = [ + 'lowRISC/ibex#28', + 'lowRISC/ibex#27', + 'klaus lowRISC/ibex#27', + 'Fixes lowRISC/ibex#27', + 'Fixes lowRISC/ibex#27 and lowRISC/ibex#28', + '(lowRISC/ibex#27)', + 'something (lowRISC/ibex#27) done', + 'lowRISC/ibex#27 and (lowRISC/ibex#38)', + ] + assert github_qualify_references(items_unqualified, repo_userorg, + repo_name) == exp_items_unqualified + + # Qualified references, should stay as they are + items_qualified = [ + 'Fixes lowrisc/ibex#27', + 'lowrisc/ibex#2', + ] + assert github_qualify_references(items_qualified, repo_userorg, + repo_name) == items_qualified + + # Invalid references, should stay as they are + items_invalid = [ + 'something#27', + 'lowrisc/ibex#', + ] + assert github_qualify_references(items_invalid, repo_userorg, + repo_name) == items_invalid + + +def test_github_parse_url(): + assert github_parse_url('https://example.com/something/asdf.git') is None + assert github_parse_url('https://github.com/lowRISC/ibex.git') == ( + 'lowRISC', 'ibex') + assert github_parse_url('https://github.com/lowRISC/ibex') == ('lowRISC', + 'ibex') + assert github_parse_url('git@github.com:lowRISC/ibex.git') == ('lowRISC', + 'ibex') + + +def github_parse_url(github_repo_url): + """Parse a GitHub repository URL into its parts. + + Return a tuple (userorg, name), or None if the parsing failed. + """ + + regex = r"(?:@github\.com\:|\/github\.com\/)([a-zA-Z\d-]+)\/([a-zA-Z\d-]+)(?:\.git)?$" + m = re.search(regex, github_repo_url) + if m is None: + return None + return (m.group(1), m.group(2)) + + +def produce_shortlog(clone_dir, mapping, old_rev, new_rev): + """ Produce a list of changes between two revisions, one revision per line + + Merges are excluded""" + + # If mapping is None, we want to list all changes below clone_dir. + # Otherwise, we want to list changes in each 'source' in the mapping. Since + # these strings are paths relative to clone_dir, we can just pass them all + # to git and let it figure out what to do. + subdirs = (['.'] if mapping is None + else [m.from_path for m in mapping.items]) + + cmd = (['git', '-C', str(clone_dir), 'log', + '--pretty=format:%s (%aN)', '--no-merges', + old_rev + '..' + new_rev] + + subdirs) + try: + proc = subprocess.run(cmd, + cwd=str(clone_dir), + check=True, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + universal_newlines=True) + return proc.stdout.splitlines() + except subprocess.CalledProcessError as e: + log.error("Unable to capture shortlog: %s", e.stderr) + return "" + + +def format_list_to_str(list, width=70): + """ Create Markdown-style formatted string from a list of strings """ + wrapper = textwrap.TextWrapper(initial_indent="* ", + subsequent_indent=" ", + width=width) + return '\n'.join([wrapper.fill(s) for s in list]) + + +class JsonError(Exception): + '''An error class for when data in the source HJSON is bad''' + def __init__(self, path, msg): + self.path = path + self.msg = msg + + def __str__(self): + return 'In hjson at {}, {}'.format(self.path, self.msg) + + +def get_field(path, where, data, name, expected_type=dict, optional=False, constructor=None): + value = data.get(name) + if value is None: + if not optional: + raise JsonError(path, '{}, missing {!r} field.'.format(where, name)) + return None + + if not isinstance(value, expected_type): + raise JsonError(path, + '{}, the {!r} field is {!r}, but should be of type {!r}.' + .format(where, name, value, expected_type.__name__)) + + return value if constructor is None else constructor(value) + + +class Upstream: + '''A class representing the 'upstream' field in a config or lock file''' + def __init__(self, path, data): + # Fields: 'url', 'rev', 'only_subdir' (optional). All should be strings. + where = 'in upstream dict' + self.url = get_field(path, where, data, 'url', str) + self.rev = get_field(path, where, data, 'rev', str) + self.only_subdir = get_field(path, where, data, + 'only_subdir', str, optional=True) + + def as_dict(self): + data = {'url': self.url, 'rev': self.rev} + if self.only_subdir is not None: + data['only_subdir'] = self.only_subdir + return data + + +class PatchRepo: + '''A class representing the 'patch_repo' field in a config file''' + def __init__(self, path, data): + # Fields: 'url', 'rev_base', 'rev_patched'. All should be strings. + where = 'in patch_repo dict' + self.url = get_field(path, where, data, 'url', str) + self.rev_base = get_field(path, where, data, 'rev_base', str) + self.rev_patched = get_field(path, where, data, 'rev_patched', str) + + +class Mapping1: + '''A class to represent a single item in the 'mapping' field in a config file''' + def __init__(self, from_path, to_path, patch_dir): + self.from_path = from_path + self.to_path = to_path + self.patch_dir = patch_dir + + @staticmethod + def make(path, idx, data): + assert isinstance(data, dict) + + def get_path(name, optional=False): + val = get_field(path, 'in mapping entry {}'.format(idx + 1), + data, name, expected_type=str, optional=optional) + if val is None: + return None + + # Check that the paths aren't evil ('../../../foo' or '/etc/passwd' + # are *not* ok!) + val = os.path.normpath(val) + if val.startswith('/') or val.startswith('..'): + raise JsonError(path, + 'Mapping entry {} has a bad path for {!r} ' + '(must be a relative path that doesn\'t ' + 'escape the directory)' + .format(idx + 1, name)) + + return Path(val) + + from_path = get_path('from') + to_path = get_path('to') + patch_dir = get_path('patch_dir', optional=True) + + return Mapping1(from_path, to_path, patch_dir) + + @staticmethod + def make_default(have_patch_dir): + '''Make a default mapping1, which copies everything straight through''' + return Mapping1(Path('.'), Path('.'), + Path('.') if have_patch_dir else None) + + @staticmethod + def apply_patch(basedir, patchfile): + cmd = ['git', 'apply', '--directory', str(basedir), '-p1', + str(patchfile)] + if verbose: + cmd += ['--verbose'] + subprocess.run(cmd, check=True) + + def import_from_upstream(self, upstream_path, + target_path, exclude_files, patch_dir): + '''Copy from the upstream checkout to target_path''' + from_path = upstream_path / self.from_path + to_path = target_path / self.to_path + + # Make sure the target directory actually exists + to_path.parent.mkdir(exist_ok=True, parents=True) + + # Copy src to dst recursively. For directories, we can use + # shutil.copytree. This doesn't support files, though, so we have to + # check for them first. + if from_path.is_file(): + shutil.copy(str(from_path), str(to_path)) + else: + ignore = ignore_patterns(str(upstream_path), *exclude_files) + shutil.copytree(str(from_path), str(to_path), ignore=ignore) + + # Apply any patches to the copied files. If self.patch_dir is None, + # there are none to apply. Otherwise, resolve it relative to patch_dir. + if self.patch_dir is not None: + patches = (patch_dir / self.patch_dir).glob('*.patch') + for patch in sorted(patches): + log.info("Applying patch {} at {}".format(patch, to_path)) + Mapping1.apply_patch(to_path, patch) + + +class Mapping: + '''A class representing the 'mapping' field in a config file + + This should be a list of dicts. + ''' + def __init__(self, items): + self.items = items + + @staticmethod + def make(path, data): + items = [] + assert isinstance(data, list) + for idx, elt in enumerate(data): + if not isinstance(elt, dict): + raise JsonError(path, 'Mapping element {!r} is not a dict.'.format(elt)) + items.append(Mapping1.make(path, idx, elt)) + + return Mapping(items) + + def has_patch_dir(self): + '''Check whether at least one item defines a patch dir''' + for item in self.items: + if item.patch_dir is not None: + return True + return False + + +class LockDesc: + '''A class representing the contents of a lock file''' + def __init__(self, handle): + data = hjson.loads(handle.read(), use_decimal=True) + self.upstream = get_field(handle.name, 'at top-level', data, 'upstream', + constructor=lambda data: Upstream(handle.name, data)) + + +class Desc: + '''A class representing the configuration file''' + + def __init__(self, handle, desc_overrides): + + # Ensure description file matches our naming rules (otherwise we don't + # know the name for the lockfile). This regex checks that we have the + # right suffix and a nonempty name. + if not re.match(r'.+\.vendor\.hjson', handle.name): + raise ValueError("Description file names must have a .vendor.hjson suffix.") + + data = hjson.loads(handle.read(), use_decimal=True) + where = 'at top-level' + + self.apply_overrides(data, desc_overrides) + + path = Path(handle.name) + + def take_path(p): + return path.parent / p + + self.path = path + self.name = get_field(path, where, data, 'name', expected_type=str) + self.target_dir = get_field(path, where, data, 'target_dir', + expected_type=str, constructor=take_path) + self.upstream = get_field(path, where, data, 'upstream', + constructor=lambda data: Upstream(path, data)) + self.patch_dir = get_field(path, where, data, 'patch_dir', + optional=True, expected_type=str, constructor=take_path) + self.patch_repo = get_field(path, where, data, 'patch_repo', + optional=True, + constructor=lambda data: PatchRepo(path, data)) + self.exclude_from_upstream = (get_field(path, where, data, 'exclude_from_upstream', + optional=True, expected_type=list) or + []) + self.mapping = get_field(path, where, data, 'mapping', optional=True, + expected_type=list, + constructor=lambda data: Mapping.make(path, data)) + + # Add default exclusions + self.exclude_from_upstream += EXCLUDE_ALWAYS + + # It doesn't make sense to define a patch_repo, but not a patch_dir + # (where should we put the patches that we get?) + if self.patch_repo is not None and self.patch_dir is None: + raise JsonError(path, 'Has patch_repo but not patch_dir.') + + # We don't currently support a patch_repo and a mapping (just because + # we haven't written the code to generate the patches across subdirs + # yet). Tracked in issue #2317. + if self.patch_repo is not None and self.mapping is not None: + raise JsonError(path, + "vendor.py doesn't currently support patch_repo " + "and mapping at the same time (see issue #2317).") + + # If a patch_dir is defined and there is no mapping, we will look in + # that directory for patches and apply them in (the only) directory + # that we copy stuff into. + # + # If there is a mapping check that there is a patch_dir if and only if + # least one mapping entry uses it. + if self.mapping is not None: + if self.patch_dir is not None: + if not self.mapping.has_patch_dir(): + raise JsonError(path, 'Has patch_dir, but no mapping item uses it.') + else: + if self.mapping.has_patch_dir(): + raise JsonError(path, + 'Has a mapping item with a patch directory, ' + 'but there is no global patch_dir key.') + + # Check that exclude_from_upstream really is a list of strings. Most of + # this type-checking is in the constructors for field types, but we + # don't have a "ExcludeList" class, so have to do it explicitly here. + for efu in self.exclude_from_upstream: + if not isinstance(efu, str): + raise JsonError(path, + 'exclude_from_upstream has entry {}, which is not a string.' + .format(efu)) + + def apply_overrides(self, desc_data, desc_overrides): + """ Apply overrides from command line to configuration file data + + Updates are applied to the desc_data reference.""" + + for key, value in desc_overrides: + log.info("Overriding description key {!r} with value {!r}".format( + key, value)) + ref = desc_data + split_keys = key.split('.') + for key_part in split_keys[:-1]: + if key_part not in ref: + ref[key_part] = {} + ref = ref[key_part] + ref[split_keys[-1]] = value + + def lock_file_path(self): + desc_file_stem = self.path.name.rsplit('.', 2)[0] + return self.path.with_name(desc_file_stem + '.lock.hjson') + + def import_from_upstream(self, upstream_path): + log.info('Copying upstream sources to {}'.format(self.target_dir)) + + # Remove existing directories before importing them again + shutil.rmtree(str(self.target_dir), ignore_errors=True) + + items = (self.mapping.items if self.mapping is not None + else [Mapping1.make_default(self.patch_dir is not None)]) + for map1 in items: + map1.import_from_upstream(upstream_path, + self.target_dir, + self.exclude_from_upstream, + self.patch_dir) + + +def refresh_patches(desc): + if desc.patch_repo is None: + log.fatal('Unable to refresh patches, patch_repo not set in config.') + sys.exit(1) + + log.info('Refreshing patches in {}'.format(desc.patch_dir)) + + # remove existing patches + for patch in desc.patch_dir.glob('*.patch'): + os.unlink(str(patch)) + + # get current patches + _export_patches(desc.patch_repo.url, desc.patch_dir, + desc.patch_repo.rev_base, + desc.patch_repo.rev_patched) + + +def _export_patches(patchrepo_clone_url, target_patch_dir, upstream_rev, + patched_rev): + with tempfile.TemporaryDirectory() as clone_dir: + clone_git_repo(patchrepo_clone_url, clone_dir, patched_rev) + rev_range = 'origin/' + upstream_rev + '..' + 'origin/' + patched_rev + cmd = [ + 'git', + 'format-patch', + '--no-signature', + '--no-stat', + '-o', + str(target_patch_dir.resolve()), + rev_range + ] + if not verbose: + cmd += ['-q'] + subprocess.run(cmd, cwd=str(clone_dir), check=True) + + +def ignore_patterns(base_dir, *patterns): + """Similar to shutil.ignore_patterns, but with support for directory excludes.""" + def _rel_to_base(path, name): + return os.path.relpath(os.path.join(path, name), base_dir) + + def _ignore_patterns(path, names): + ignored_names = [] + for pattern in patterns: + pattern_matches = [ + n for n in names + if fnmatch.fnmatch(_rel_to_base(path, n), pattern) + ] + ignored_names.extend(pattern_matches) + return set(ignored_names) + + return _ignore_patterns + + +def clone_git_repo(repo_url, clone_dir, rev='master'): + log.info('Cloning upstream repository %s @ %s', repo_url, rev) + + # Clone the whole repository + cmd = ['git', 'clone', '--no-single-branch'] + if not verbose: + cmd += ['-q'] + cmd += [repo_url, str(clone_dir)] + subprocess.run(cmd, check=True) + + # Check out exactly the revision requested + cmd = ['git', '-C', str(clone_dir), 'checkout', '--force', rev] + if not verbose: + cmd += ['-q'] + subprocess.run(cmd, check=True) + + # Get revision information + cmd = ['git', '-C', str(clone_dir), 'rev-parse', 'HEAD'] + rev = subprocess.run(cmd, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + check=True, + universal_newlines=True).stdout.strip() + log.info('Cloned at revision %s', rev) + return rev + + +def git_get_short_rev(clone_dir, rev): + """ Get the shortened SHA-1 hash for a revision """ + cmd = ['git', '-C', str(clone_dir), 'rev-parse', '--short', rev] + short_rev = subprocess.run(cmd, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + check=True, + universal_newlines=True).stdout.strip() + return short_rev + + +def git_add_commit(paths, commit_msg): + """ Stage and commit all changes in paths""" + + assert paths + base_dir = paths[0].parent + + # Stage all changes + # + # Rather than figuring out GIT_DIR properly, we cheat and use "git -C" to + # pretend that we're running in base_dir. Of course, the elements of paths + # are relative to our actual working directory. Rather than do anything + # clever, we just resolve them to absolute paths as we go. + abs_paths = [p.resolve() for p in paths] + subprocess.run(['git', '-C', base_dir, 'add'] + abs_paths, check=True) + + cmd_commit = ['git', '-C', base_dir, 'commit', '-s', '-F', '-'] + try: + subprocess.run(cmd_commit, + check=True, + universal_newlines=True, + input=commit_msg) + except subprocess.CalledProcessError: + log.warning("Unable to create commit. Are there no changes?") + + +def define_arg_type(arg): + """Sanity-check and return a config file override argument""" + try: + (key, value) = [v.strip() for v in arg.split('=', 2)] + except Exception: + raise argparse.ArgumentTypeError( + 'unable to parse {!r}: configuration overrides must be in the form key=value' + .format(arg)) + + if key not in OVERRIDABLE_DESC_KEYS: + raise argparse.ArgumentTypeError( + 'invalid configuration override: key {!r} cannot be overwritten' + .format(key)) + return (key, value) + + +def main(argv): + parser = argparse.ArgumentParser(prog="vendor", description=__doc__) + parser.add_argument( + '--update', + '-U', + dest='update', + action='store_true', + help='Update locked version of repository with upstream changes') + parser.add_argument('--refresh-patches', + action='store_true', + help='Refresh the patches from the patch repository') + parser.add_argument('--commit', + '-c', + action='store_true', + help='Commit the changes') + parser.add_argument('--desc-override', + '-D', + dest="desc_overrides", + action="append", + type=define_arg_type, + default=[], + help='Override a setting in the description file. ' + 'Format: -Dsome.key=value. ' + 'Can be used multiple times.') + parser.add_argument('desc_file', + metavar='file', + type=argparse.FileType('r', encoding='UTF-8'), + help='vendoring description file (*.vendor.hjson)') + parser.add_argument('--verbose', '-v', action='store_true', help='Verbose') + args = parser.parse_args() + + global verbose + verbose = args.verbose + if (verbose): + log.basicConfig(format="%(levelname)s: %(message)s", level=log.DEBUG) + else: + log.basicConfig(format="%(levelname)s: %(message)s") + + # Load input files (desc file; lock file) and check syntax etc. + try: + # Load description file + desc = Desc(args.desc_file, args.desc_overrides) + lock_file_path = desc.lock_file_path() + + # Try to load lock file (which might not exist) + try: + with open(str(lock_file_path), 'r') as lock_file: + lock = LockDesc(lock_file) + except FileNotFoundError: + lock = None + except (JsonError, ValueError) as err: + log.fatal(str(err)) + raise SystemExit(1) + + # Check for a clean working directory when commit is requested + if args.commit: + if not git_is_clean_workdir(desc.path.parent): + log.fatal("A clean git working directory is required for " + "--commit/-c. git stash your changes and try again.") + raise SystemExit(1) + + if lock is None and not args.update: + log.warning("No lock file at {}, so will update upstream repo." + .format(str(desc.lock_file_path()))) + args.update = True + + # If we have a lock file and we're not in update mode, override desc's + # upstream field with the one from the lock file. Keep track of whether the + # URL differs (in which case, we can't get a shortlog) + changed_url = False + if lock is not None: + changed_url = desc.upstream.url != lock.upstream.url + if not args.update: + desc.upstream = lock.upstream + + if args.refresh_patches: + refresh_patches(desc) + + with tempfile.TemporaryDirectory() as clone_dir: + # clone upstream repository + upstream_new_rev = clone_git_repo(desc.upstream.url, clone_dir, rev=desc.upstream.rev) + + if not args.update: + if upstream_new_rev != lock.upstream.rev: + log.fatal( + "Revision mismatch. Unable to re-clone locked version of repository." + ) + log.fatal("Attempted revision: %s", desc.upstream.rev) + log.fatal("Re-cloned revision: %s", upstream_new_rev) + raise SystemExit(1) + + clone_subdir = Path(clone_dir) + if desc.upstream.only_subdir is not None: + clone_subdir = clone_subdir / desc.upstream.only_subdir + if not clone_subdir.is_dir(): + log.fatal("subdir '{}' does not exist in repo" + .format(desc.upstream.only_subdir)) + raise SystemExit(1) + + # copy selected files from upstream repo and apply patches as necessary + desc.import_from_upstream(clone_subdir) + + # get shortlog + get_shortlog = args.update + if args.update: + if lock is None: + get_shortlog = False + log.warning("No lock file %s: unable to summarize changes.", str(lock_file_path)) + elif changed_url: + get_shortlog = False + log.warning("The repository URL changed since the last run. " + "Unable to get log of changes.") + + shortlog = None + if get_shortlog: + shortlog = produce_shortlog(clone_subdir, desc.mapping, + lock.upstream.rev, upstream_new_rev) + + # Ensure fully-qualified issue/PR references for GitHub repos + gh_repo_info = github_parse_url(desc.upstream.url) + if gh_repo_info: + shortlog = github_qualify_references(shortlog, gh_repo_info[0], + gh_repo_info[1]) + + log.info("Changes since the last import:\n" + + format_list_to_str(shortlog)) + + # write lock file + if args.update: + lock_data = {} + lock_data['upstream'] = desc.upstream.as_dict() + lock_data['upstream']['rev'] = upstream_new_rev + with open(str(lock_file_path), 'w', encoding='UTF-8') as f: + f.write(LOCK_FILE_HEADER) + hjson.dump(lock_data, f) + f.write("\n") + log.info("Wrote lock file %s", str(lock_file_path)) + + # Commit changes + if args.commit: + sha_short = git_get_short_rev(clone_subdir, upstream_new_rev) + + repo_info = github_parse_url(desc.upstream.url) + if repo_info is not None: + sha_short = "%s/%s@%s" % (repo_info[0], repo_info[1], + sha_short) + + commit_msg_subject = 'Update %s to %s' % (desc.name, sha_short) + intro = ('Update code from {}upstream repository {} to revision {}' + .format(('' if desc.upstream.only_subdir is None else + 'subdir {} in '.format(desc.upstream.only_subdir)), + desc.upstream.url, + upstream_new_rev)) + commit_msg_body = textwrap.fill(intro, width=70) + + if shortlog: + commit_msg_body += "\n\n" + commit_msg_body += format_list_to_str(shortlog, width=70) + + commit_msg = commit_msg_subject + "\n\n" + commit_msg_body + + commit_paths = [] + commit_paths.append(desc.target_dir) + if args.refresh_patches: + commit_paths.append(desc.patch_dir) + commit_paths.append(lock_file_path) + + git_add_commit(commit_paths, commit_msg) + + log.info('Import finished') + + +if __name__ == '__main__': + try: + main(sys.argv) + except subprocess.CalledProcessError as e: + log.fatal("Called program '%s' returned with %d.\n" + "STDOUT:\n%s\n" + "STDERR:\n%s\n" % + (" ".join(e.cmd), e.returncode, e.stdout, e.stderr)) + raise + except KeyboardInterrupt: + log.info("Aborting operation on user request.") + sys.exit(1) diff --git a/vendor/pulp-platform/common_cells/.gitignore b/vendor/pulp-platform/common_cells/.gitignore new file mode 100644 index 0000000000..2d00b390c2 --- /dev/null +++ b/vendor/pulp-platform/common_cells/.gitignore @@ -0,0 +1,14 @@ +.* +!.travis.yml +!.git* +*.out +*~ +/Bender.lock +/Bender.local +build +formal/fifo_v3 +formal/counter +formal/fall_through_register +*.check +*.vcd +obj_dir/ diff --git a/vendor/pulp-platform/common_cells/CHANGELOG.md b/vendor/pulp-platform/common_cells/CHANGELOG.md new file mode 100644 index 0000000000..8513988e40 --- /dev/null +++ b/vendor/pulp-platform/common_cells/CHANGELOG.md @@ -0,0 +1,342 @@ +# Changelog +All notable changes to this project will be documented in this file. + +The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/) +and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.html). + +## Unreleased + +## 1.23.0 - 2021-09-05 +### Added +- Add `cc_onehot` +- `isochronous_4phase_handshake`: Isochronous clock domain crossing cutting all paths using a 4-phase handshake. +- Changed `isochronous_spill_register_tb` to `isochronous_crossing_tb` also covering the `isochronous_4phase_handshake` + module. +- Make reset value of `sync` module parameterizable. + +### Changed +- `id_queue`: Allow simultaneous input and output requests in `FULL_BW` mode + +## 1.22.1 - 2021-06-14 +### Fixed +- Remove breaking change of `spill_register` + +## 1.22.0 - 2021-06-09 +### Added +- Add `spill_register_flushable` + +### Changed +- `registers.svh`: Merge explicit and implicit register variants into `` `FF `` and `` `FFL `` macros +- `rr_arb_tree`: Allow flushing locked decision +- Improved `verific` compatibility + +## 1.21.0 - 2021-01-28 +### Changed +- Remove `timeprecision/timeunit` arguments +- Update `common_verification` to `0.2.0` +- Update `tech_cells_generic` to `0.2.3` + +## 1.20.1 - 2021-01-21 +### Changed +- `id_queue`: Replace default or reset value of signals that were assigned `'x` with `'0`. +- `id_queue`: Use `cf_math_pkg::idx_width()` for computation of localparams. + +### Fixed +- Add `XSIM` define guard for statements incompatible with `xsim`. + +## 1.20.0 - 2020-11-04 +### Added +- assertions: Assertion include header with macros (from lowrisc) + +### Changed +- `sram.sv`: Deprecated as it has been moved to `tech_cells_generic` + +### Fixed +- `stream_register`: Fix `DATA_WIDTH` of instantiated FIFO. +- `stream_xbar`: Add missing argument in assertion error string. +- Lint style fixes +- `stream_omega`: Fix parse issue with verible. +- `src_files.yml`: Fix compile order and missing modules. + +## 1.19.0 - 2020-05-25 +### Added +- stream_to_mem: Allows to use memories with flow control (req/gnt) for requests but + without flow control for output data to be used in streams. +- isochronous_spill_register: Isochronous clock domain crossing cutting all paths. +- `rr_arb_tree_tb`: Systemverilog testbench for `rr_arb_tree`, which checks for fair throughput. +- `cf_math_pkg::idx_width`: Constant function for defining the binary representation width + of an index signal. + +### Changed +- `addr_decode`: Use `cf_math_pkg::idx_width` for computing the index width, inline documentation. +- `lzc`: Use `cf_math_pkg::idx_width` for computing the index width, inline documentation. +- `Bender`: Change levels of modules affected by depending on `cf_math_pkg::idx_width()`. +- `stream_xbar`: Fully connected stream bassed interconnect with variable number of inputs and outputs. +- `stream_xbar`: Fully connected stream-bassed interconnect with a variable number of inputs and outputs. +- `stream_omega_net`: Stream-based network implementing an omega topology. Variable number of inputs, + outputs and radix. Topology is isomorphic to a butterfly network. + +### Fixed +- Improve tool compatibility. +- `rr_arb_tree`: Properly degenerate `rr_i` and `idx_o` signals. +- `rr_arb_tree`: Add parameter `FairArb` to distribute throughput of input requests evenly when + not all inputs have requests active. +- `stream_demux`: Properly degenerate `inp_sel_i` signal. + +## 1.18.0 - 2020-04-15 +### Added +- stream_fork_dynamic: Wrapper around `stream_fork` for partial forking. +- stream_join: Join multiple Ready/Valid handshakes to one common handshake. +- SECDED (Single Error Correction, Double Error Detection) encoder and decoder +- SECDED Verilator-based testbench +- Travis build for SECDED module + +## 1.17.0 - 2020-04-09 +### Added +- stream_fifo: Ready/Valid handshake wrapper around `fifo_v3` + +## 1.16.4 - 2020-03-02 +### Fixed +- id_queue: Fix generation of `head_tail_q` registers + +## 1.16.3 - 2020-02-11 +### Fixed +- Handle degenerated `addr_decode` with `NoIndices == 1`, change default parameters to `32'd0` + +## 1.16.2 - 2020-02-04 +### Fixed +- Fix author section in Bender.yml + +## 1.16.1 - 2020-02-03 +### Fixed +- `rr_arb_tree`: Add guard SVA statement for Verilator +- Added missing sources in `Bender.yml` and `src_files.yml` + +## 1.16.0 - 2020-01-13 +### Fixed +- Handle degenerated `onehot_to_bin` with `ONEHOT_WIDTH == 1` +- Handle degenerated `id_queue` with `CAPACITY == 1` or `HT_CAPACITY == 1` +- Fix `cdc_fifo_gray` to be a safe clock domain crossing (CDC) + +## 1.15.0 - 2019-12-09 +### Added +- Added address map decoder module + +### Fixed +- Handle degenerated `lzc` with `WIDTH == 1` + +## 1.14.0 - 2019-10-08 + +### Added +- Added spubstitution-permutation hash function module +- Added couning-bloom-filter module +- `spill_register`: Added Bypass parameter +- `counter`: Added sticky overflow +- Added counter with variable delta +- Added counter that tracks its maximum value + +### Changed +- Added formal testbench for `fifo` and `fall_through_regsiter` + +## 1.13.1 - 2019-06-01 + +### Changed + +- Fix path in `src_files.yml` for `stream_arbiter` and `stream_arbiter_flushable` + +## 1.13.0 - 2019-05-29 + +### Added + +- Added exponential backoff window module +- Added parametric Galois LFSR module with optional whitening feature +- Added `cf_math_pkg`: Constant Function implementations of mathematical functions for HDL elaboration + +### Changed +- Parametric payload data type for `rr_arb_tree` + +### Deprecated +- The following arbiter implementations are deprecated and superseded by `rr_arb_tree`: +- Priority arbiter `prioarbiter` +- Round-robin arbiter `rrarbiter` + +### Fixed + +## 1.12.0 - 2019-04-09 + +### Added +- Add priority arbiter +- Add Pseudo Least Recently Used tree +- Add round robin arbiter mux tree + +### Changed +- Add selectable arbiter implementation for `stream_arbiter` and `stream_arbiter_flushable`. One can choose between priority (`prio`) and round-robin arbitration (`rr`). +- Add `$onehot0` assertion in one-hot to bin +- Rework `rrarbiter` unit (uses `rr_arb_tree` implementation underneath) + +## 1.11.0 - 2019-03-20 + +### Added +- Add stream fork +- Add fall-through register +- Add stream filter +- Add ID queue + +### Changed +- `sync_wedge` use existing synchronizer. This defines a single place where a tech-specific synchronizer can be defined. + +### Fixed +- Fix FIFO push and pop signals in `stream_register` to observe interface prerequisites. +- In `fifo_v3`, fix data output when pushing into empty fall-through FIFO. Previously, the data + output of an empty fall-through FIFO with data at its input (and `push_i=1`) depended on + `pop_i`: When `pop_i=0`, old, invalid data were visible at the output (even though `empty_o=0`, + indicating that the data output is valid). Only when `pop_i=1`, the data from the input fell + through. One consequence of this bug was that `data_o` of the `fall_through_register` could change + while `valid_o=1`, violating the basic stream specification. + +## 1.10.0 - 2018-12-18 + +### Added +- Add `fifo_v3` with generic fill count +- Add 16 bit LFSR +- Add stream delayer +- Add stream arbiter +- Add register macros for RTL +- Add shift register + +### Changed +- Make number of registers of `rstgen_bypass` a parameter. + +### Fixed +- Fix `valid_i` and `grant_i` guarantees in `generic_fifo` for backward compatibility. +- LZC: Synthesis of streaming operators in ternary operators +- Add missing entry for `popcount` to `Bender.yml`. +- Add default values for parameters to improve compatibility with Synopsys DC and Vivado. + +## 1.9.0 - 2018-11-02 + +### Added +- Add popcount circuit `popcount` + +## 1.8.0 - 2018-10-15 + +### Added +- Add lock feature to the rrarbiter. This prevents the arbiter to change the decision when we have pending requests that remain unaknowledged for several cycles. +- Add deglitching circuit +- Add generic clock divider +- Add edge detecter as alias to sync_wedge (name is more expressive) +- Add generic counter +- Add moving deglitcher + +## 1.7.6 - 2018-09-27 + +### Added +- Add reset synchronizer with explicit reset bypass in testmode + +## 1.7.5 - 2018-09-06 +### Fixed +- Fix incompatibility with verilator +- Fix dependency to open-source repo + +## 1.7.4 - 2018-09-06 +- Fix assertions in `fifo_v2` (write on full / read on empty did not trigger properly) + +## 1.7.3 - 2018-08-27 +### Fixed +- Use proper `fifo_v2` in `generic_fifo` module. + +## 1.7.2 - 2018-08-27 +### Added +- Almost full/empty flags to FIFO, as `fifo_v2`. + +### Changed +- FIFO moved to `fifo_v1` and instantiates `fifo_v2`. + +## 1.7.1 - 2018-08-27 +### Fixed +- Revert breaking changes to `fifo`. + +## 1.7.0 - 2018-08-24 +### Added +- Add stream register (`stream_register`). +- Add stream multiplexer and demultiplexer (`stream_mux`, `stream_demux`). +- Add round robin arbiter (`rrarbiter`). +- Add leading zero counter (`lzc`). + +### Changed +- Deprecate `find_first_one` in favor of `lzc`. + +## 1.6.0 - 2018-04-03 +### Added +- Add binary to Gray code converter. +- Add Gray code to binary converter. +- Add Gray code testbench. +- Add CDC FIFO based on Gray counters. This is a faster alternative to the 2-phase FIFO which also works if a domain's clock has stopped. + +### Changed +- Rename `cdc_fifo` to `cdc_fifo_2phase`. +- Adjust CDC FIFO testbench to cover both implementations. + +## 1.5.4 - 2018-03-31 +### Changed +- Replace explicit clock gate in `fifo` with implicit one. + +## 1.5.3 - 2018-03-16 +### Changed +- Remove duplicate deprecated modules. + +## 1.5.2 - 2018-03-16 +### Changed +- Remove deprecated `rstgen` and fix interface. + +## 1.5.1 - 2018-03-16 +### Changed +- Remove deprecated `onehot_to_bin`. + +## 1.5.0 - 2018-03-14 +### Added +- Add behavioural SRAM model + +## 1.4.0 - 2018-03-14 +### Added +- Clock domain crossing FIFO + +### Changed +- Re-name new sync modules to resolve namespace collisions + +## 1.3.0 - 2018-03-12 +### Added +- 2-phase clock domain crossing +- Add old common cells as deprecated legacy modules + +## 1.2.3 - 2018-03-09 +### Added +- Backwards compatibility wrapper for `generic_LFSR_8bit` + +## 1.2.2 - 2018-03-09 +### Added +- Backwards compatibility wrapper for `generic_fifo` + +## 1.2.1 - 2018-03-09 +### Fixed +- Fix an issue in the spill register which causes transactions to be lost + +## 1.2.0 - 2018-03-09 +### Added +- Add spill register + +## 1.1.0 - 2018-03-06 +### Added +- Find first zero + +## 1.0.0 - 2018-03-02 +### Added +- Re-implementation of the generic FIFO supporting all kinds of use-cases +- Testbench for FIFO + +### Changed +- Re-formatting and artistic code clean-up + +## 0.1.0 - 2018-02-23 +### Added +- Fork of PULP common cells repository diff --git a/vendor/pulp-platform/common_cells/LICENSE b/vendor/pulp-platform/common_cells/LICENSE new file mode 100644 index 0000000000..18e4f67692 --- /dev/null +++ b/vendor/pulp-platform/common_cells/LICENSE @@ -0,0 +1,176 @@ +SOLDERPAD HARDWARE LICENSE version 0.51 + +This license is based closely on the Apache License Version 2.0, but is not +approved or endorsed by the Apache Foundation. A copy of the non-modified +Apache License 2.0 can be found at http://www.apache.org/licenses/LICENSE-2.0. + +As this license is not currently OSI or FSF approved, the Licensor permits any +Work licensed under this License, at the option of the Licensee, to be treated +as licensed under the Apache License Version 2.0 (which is so approved). + +This License is licensed under the terms of this License and in particular +clause 7 below (Disclaimer of Warranties) applies in relation to its use. + +TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + +1. Definitions. + +"License" shall mean the terms and conditions for use, reproduction, and +distribution as defined by Sections 1 through 9 of this document. + +"Licensor" shall mean the Rights owner or entity authorized by the Rights owner +that is granting the License. + +"Legal Entity" shall mean the union of the acting entity and all other entities +that control, are controlled by, or are under common control with that entity. +For the purposes of this definition, "control" means (i) the power, direct or +indirect, to cause the direction or management of such entity, whether by +contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the +outstanding shares, or (iii) beneficial ownership of such entity. + +"You" (or "Your") shall mean an individual or Legal Entity exercising +permissions granted by this License. + +"Rights" means copyright and any similar right including design right (whether +registered or unregistered), semiconductor topography (mask) rights and +database rights (but excluding Patents and Trademarks). + +"Source" form shall mean the preferred form for making modifications, including +but not limited to source code, net lists, board layouts, CAD files, +documentation source, and configuration files. + +"Object" form shall mean any form resulting from mechanical transformation or +translation of a Source form, including but not limited to compiled object +code, generated documentation, the instantiation of a hardware design and +conversions to other media types, including intermediate forms such as +bytecodes, FPGA bitstreams, artwork and semiconductor topographies (mask +works). + +"Work" shall mean the work of authorship, whether in Source form or other +Object form, made available under the License, as indicated by a Rights notice +that is included in or attached to the work (an example is provided in the +Appendix below). + +"Derivative Works" shall mean any work, whether in Source or Object form, that +is based on (or derived from) the Work and for which the editorial revisions, +annotations, elaborations, or other modifications represent, as a whole, an +original work of authorship. For the purposes of this License, Derivative Works +shall not include works that remain separable from, or merely link (or bind by +name) or physically connect to or interoperate with the interfaces of, the Work +and Derivative Works thereof. + +"Contribution" shall mean any design or work of authorship, including the +original version of the Work and any modifications or additions to that Work or +Derivative Works thereof, that is intentionally submitted to Licensor for +inclusion in the Work by the Rights owner or by an individual or Legal Entity +authorized to submit on behalf of the Rights owner. For the purposes of this +definition, "submitted" means any form of electronic, verbal, or written +communication sent to the Licensor or its representatives, including but not +limited to communication on electronic mailing lists, source code control +systems, and issue tracking systems that are managed by, or on behalf of, the +Licensor for the purpose of discussing and improving the Work, but excluding +communication that is conspicuously marked or otherwise designated in writing +by the Rights owner as "Not a Contribution." + +"Contributor" shall mean Licensor and any individual or Legal Entity on behalf +of whom a Contribution has been received by Licensor and subsequently +incorporated within the Work. + +2. Grant of License. Subject to the terms and conditions of this License, each +Contributor hereby grants to You a perpetual, worldwide, non-exclusive, +no-charge, royalty-free, irrevocable license under the Rights to reproduce, +prepare Derivative Works of, publicly display, publicly perform, sublicense, +and distribute the Work and such Derivative Works in Source or Object form and +do anything in relation to the Work as if the Rights did not exist. + +3. Grant of Patent License. Subject to the terms and conditions of this +License, each Contributor hereby grants to You a perpetual, worldwide, +non-exclusive, no-charge, royalty-free, irrevocable (except as stated in this +section) patent license to make, have made, use, offer to sell, sell, import, +and otherwise transfer the Work, where such license applies only to those +patent claims licensable by such Contributor that are necessarily infringed by +their Contribution(s) alone or by combination of their Contribution(s) with the +Work to which such Contribution(s) was submitted. If You institute patent +litigation against any entity (including a cross-claim or counterclaim in a +lawsuit) alleging that the Work or a Contribution incorporated within the Work +constitutes direct or contributory patent infringement, then any patent +licenses granted to You under this License for that Work shall terminate as of +the date such litigation is filed. + +4. Redistribution. You may reproduce and distribute copies of the Work or +Derivative Works thereof in any medium, with or without modifications, and in +Source or Object form, provided that You meet the following conditions: + + You must give any other recipients of the Work or Derivative Works a copy + of this License; and + + You must cause any modified files to carry prominent notices stating that + You changed the files; and + + You must retain, in the Source form of any Derivative Works that You + distribute, all copyright, patent, trademark, and attribution notices from + the Source form of the Work, excluding those notices that do not pertain to + any part of the Derivative Works; and + + If the Work includes a "NOTICE" text file as part of its distribution, then + any Derivative Works that You distribute must include a readable copy of + the attribution notices contained within such NOTICE file, excluding those + notices that do not pertain to any part of the Derivative Works, in at + least one of the following places: within a NOTICE text file distributed as + part of the Derivative Works; within the Source form or documentation, if + provided along with the Derivative Works; or, within a display generated by + the Derivative Works, if and wherever such third-party notices normally + appear. The contents of the NOTICE file are for informational purposes only + and do not modify the License. You may add Your own attribution notices + within Derivative Works that You distribute, alongside or as an addendum to + the NOTICE text from the Work, provided that such additional attribution + notices cannot be construed as modifying the License. You may add Your own + copyright statement to Your modifications and may provide additional or + different license terms and conditions for use, reproduction, or + distribution of Your modifications, or for any such Derivative Works as a + whole, provided Your use, reproduction, and distribution of the Work + otherwise complies with the conditions stated in this License. + +5. Submission of Contributions. Unless You explicitly state otherwise, any +Contribution intentionally submitted for inclusion in the Work by You to the +Licensor shall be under the terms and conditions of this License, without any +additional terms or conditions. Notwithstanding the above, nothing herein shall +supersede or modify the terms of any separate license agreement you may have +executed with Licensor regarding such Contributions. + +6. Trademarks. This License does not grant permission to use the trade names, +trademarks, service marks, or product names of the Licensor, except as required +for reasonable and customary use in describing the origin of the Work and +reproducing the content of the NOTICE file. + +7. Disclaimer of Warranty. Unless required by applicable law or agreed to in +writing, Licensor provides the Work (and each Contributor provides its +Contributions) on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +KIND, either express or implied, including, without limitation, any warranties +or conditions of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A +PARTICULAR PURPOSE. You are solely responsible for determining the +appropriateness of using or redistributing the Work and assume any risks +associated with Your exercise of permissions under this License. + +8. Limitation of Liability. In no event and under no legal theory, whether in +tort (including negligence), contract, or otherwise, unless required by +applicable law (such as deliberate and grossly negligent acts) or agreed to in +writing, shall any Contributor be liable to You for damages, including any +direct, indirect, special, incidental, or consequential damages of any +character arising as a result of this License or out of the use or inability to +use the Work (including but not limited to damages for loss of goodwill, work +stoppage, computer failure or malfunction, or any and all other commercial +damages or losses), even if such Contributor has been advised of the +possibility of such damages. + +9. Accepting Warranty or Additional Liability. While redistributing the Work or +Derivative Works thereof, You may choose to offer, and charge a fee for, +acceptance of support, warranty, indemnity, or other liability obligations +and/or rights consistent with this License. However, in accepting such +obligations, You may act only on Your own behalf and on Your sole +responsibility, not on behalf of any other Contributor, and only if You agree +to indemnify, defend, and hold each Contributor harmless for any liability +incurred by, or claims asserted against, such Contributor by reason of your +accepting any such warranty or additional liability. + +END OF TERMS AND CONDITIONS diff --git a/vendor/pulp-platform/common_cells/README.md b/vendor/pulp-platform/common_cells/README.md new file mode 100644 index 0000000000..e147638071 --- /dev/null +++ b/vendor/pulp-platform/common_cells/README.md @@ -0,0 +1,181 @@ +[![Build Status](https://travis-ci.com/pulp-platform/common_cells.svg?branch=master)](https://travis-ci.com/pulp-platform/common_cells) +[![GitHub tag (latest SemVer)](https://img.shields.io/github/v/tag/pulp-platform/common_cells?color=blue&label=current&sort=semver)](CHANGELOG.md) +[![SHL-0.51 license](https://img.shields.io/badge/license-SHL--0.51-green)](LICENSE) + +# Common Cells Repository + +Maintainer: Florian Zaruba + +This repository contains commonly used cells and headers for use in various projects. + +## Cell Contents + +This repository currently contains the following cells, ordered by categories. +Please note that cells with status *deprecated* are not to be used for new designs and only serve to provide compatibility with old code. + +### Clocks and Resets + +| Name | Description | Status | Superseded By | +| ----------------------- | --------------------------------------------------- | ------------ | ------------- | +| `clk_div` | Clock divider with integer divisor | active | | +| `clock_divider` | Clock divider with configuration registers | *deprecated* | `clk_div` | +| `clock_divider_counter` | Clock divider using a counter | *deprecated* | `clk_div` | +| `rstgen` | Reset synchronizer | active | | +| `rstgen_bypass` | Reset synchronizer with dedicated test reset bypass | active | | + +### Clock Domains and Asynchronous Crossings + +| Name | Description | Status | Superseded By | +|--------------------------------|----------------------------------------------------------------------------------|--------------|---------------| +| `cdc_2phase` | Clock domain crossing using two-phase handshake, with ready/valid interface | active | | +| `cdc_fifo_2phase` | Clock domain crossing FIFO using two-phase handshake, with ready/valid interface | active | | +| `cdc_fifo_gray` | Clock domain crossing FIFO using a gray-counter, with ready/valid interface | active | | +| `edge_detect` | Rising/falling edge detector | active | | +| `edge_propagator` | **ANTONIO ADD DESCRIPTION** | active | | +| `edge_propagator_rx` | **ANTONIO ADD DESCRIPTION** | active | | +| `edge_propagator_tx` | **ANTONIO ADD DESCRIPTION** | active | | +| `isochronous_spill_register` | Isochronous clock domain crossing and full handshake (like `spill_register`) | active | | +| `isochronous_4phase_handshake` | Isochronous four-phase handshake. | active | | +| `pulp_sync` | Serial line synchronizer | *deprecated* | `sync` | +| `pulp_sync_wedge` | Serial line synchronizer with edge detector | *deprecated* | `sync_wedge` | +| `serial_deglitch` | Serial line deglitcher | active | | +| `sync` | Serial line synchronizer | active | | +| `sync_wedge` | Serial line synchronizer with edge detector | active | | + +### Counters and Shift Registers + +| Name | Description | Status | Superseded By | +| ------------------- | ----------------------------------------------------------------- | ------------ | ------------- | +| `counter` | Generic up/down counter with overflow detection | active | | +| `delta_counter` | Up/down counter with variable delta and overflow detection | active | | +| `generic_LFSR_8bit` | 8-bit linear feedback shift register (LFSR) | *deprecated* | `lfsr_8bit` | +| `lfsr_8bit` | 8-bit linear feedback shift register (LFSR) | active | | +| `lfsr_16bit` | 16-bit linear feedback shift register (LFSR) | active | | +| `lfsr` | 4...64-bit parametric Galois LFSR with optional whitening feature | active | | +| `max_counter` | Up/down counter with variable delta that tracks its maximum value | active | | +| `mv_filter` | **ZARUBAF ADD DESCRIPTION** | active | | + +### Data Path Elements + +| Name | Description | Status | Superseded By | +| -------------------------- | --------------------------------------------------------------------------------------------------------- | ------------ | ------------- | +| `addr_decode ` | Address map decoder | active | | +| `ecc_decode` | SECDED Decoder (Single Error Correction, Double Error Detection) | active | | +| `ecc_encode` | SECDED Encoder (Single Error Correction, Double Error Detection) | active | | +| `binary_to_gray` | Binary to gray code converter | active | | +| `find_first_one` | Leading-one finder / leading-zero counter | *deprecated* | `lzc` | +| `gray_to_binary` | Gray code to binary converter | active | | +| `lzc` | Leading/trailing-zero counter | active | | +| `onehot_to_bin` | One-hot to binary converter | active | | +| `shift_reg` | Shift register for arbitrary types | active | | +| `rr_arb_tree` | Round-robin arbiter for req/gnt and vld/rdy interfaces with optional priority | active | | +| `rrarbiter` | Round-robin arbiter for req/ack interface with look-ahead | *deprecated* | `rr_arb_tree` | +| `prioarbiter` | Priority arbiter arbiter for req/ack interface with look-ahead | *deprecated* | `rr_arb_tree` | +| `fall_through_register` | Fall-through register with ready/valid interface | active | | +| `spill_register_flushable` | Register with ready/valid interface to cut all combinational interface paths and additional flush signal. | active | | +| `spill_register` | Register with ready/valid interface to cut all combinational interface paths | active | | +| `stream_arbiter` | Round-robin arbiter for ready/valid stream interface | active | | +| `stream_arbiter_flushable` | Round-robin arbiter for ready/valid stream interface and flush functionality | active | | +| `stream_demux` | Ready/valid interface demultiplexer | active | | +| `stream_join` | Ready/valid handshake join multiple to one common | active | | +| `stream_mux` | Ready/valid interface multiplexer | active | | +| `stream_register` | Register with ready/valid interface | active | | +| `stream_fork` | Ready/valid fork | active | | +| `stream_fork_dynamic` | Ready/valid fork, with selection mask for partial forking | active | | +| `stream_filter` | Ready/valid filter | active | | +| `stream_delay` | Randomize or delay ready/valid interface | active | | +| `stream_to_mem` | Use memories without flow control for output data in streams. | active | | +| `stream_xbar` | Fully connected crossbar with ready/valid interface. | active | | +| `stream_omega_net` | One-way stream omega-net with ready/valid interface. Isomorphic to a butterfly. | active | | +| `sub_per_hash` | Substitution-permutation hash function | active | | +| `popcount` | Combinatorial popcount (hamming weight) | active | | + +### Data Structures + +| Name | Description | Status | Superseded By | +| ------------------ | ----------------------------------------------- | ------------ | ------------- | +| `cb_filter` | Counting-Bloom-Filter with combinational lookup | active | | +| `fifo` | FIFO register with upper threshold | *deprecated* | `fifo_v3` | +| `fifo_v2` | FIFO register with upper and lower threshold | *deprecated* | `fifo_v3` | +| `fifo_v3` | FIFO register with generic fill counts | active | | +| `stream_fifo` | FIFO register with ready/valid interface | active | | +| `generic_fifo` | FIFO register without thresholds | *deprecated* | `fifo_v3` | +| `generic_fifo_adv` | FIFO register without thresholds | *deprecated* | `fifo_v3` | +| `sram` | SRAM behavioral model | active | | +| `plru_tree` | Pseudo least recently used tree | active | | +| `unread` | Empty module to sink unconnected outputs into | active | | + + +## Header Contents + +This repository currently contains the following header files. + +### RTL Register Macros + +The header file `registers.svh` contains macros that expand to descriptions of registers. +To avoid misuse of `always_ff` blocks, only the following macros shall be used to describe sequential behavior. +The use of linter rules that flag explicit uses of `always_ff` in source code is encouraged. + +| Macro | Arguments | Description | +| ------------ | ----------------------------------------------------------------- | ------------------------------------------------------------------------- | +| `` `FF`` | `q_sig`, `d_sig`, `rst_val`, (`clk_sig`, `arstn_sig`) | Flip-flop with asynchronous active-low reset | +| `` `FFAR`` | `q_sig`, `d_sig`, `rst_val`, `clk_sig`, `arst_sig` | Flip-flop with asynchronous active-high reset | +| `` `FFARN`` | `q_sig`, `d_sig`, `rst_val`, `clk_sig`, `arstn_sig` | *deprecated* Flip-flop with asynchronous active-low reset | +| `` `FFSR`` | `q_sig`, `d_sig`, `rst_val`, `clk_sig`, `rst_sig` | Flip-flop with synchronous active-high reset | +| `` `FFSRN`` | `q_sig`, `d_sig`, `rst_val`, `clk_sig`, `rstn_sig` | Flip-flop with synchronous active-low reset | +| `` `FFNR`` | `q_sig`, `d_sig`, `clk_sig` | Flip-flop without reset | +| | | | +| `` `FFL`` | `q_sig`, `d_sig`, `load_ena`, `rst_val`, (`clk_sig`, `arstn_sig`) | Flip-flop with load-enable and asynchronous active-low reset | +| `` `FFLAR`` | `q_sig`, `d_sig`, `load_ena`, `rst_val`, `clk_sig`, `arst_sig` | Flip-flop with load-enable and asynchronous active-high reset | +| `` `FFLARN`` | `q_sig`, `d_sig`, `load_ena`, `rst_val`, `clk_sig`, `arstn_sig` | *deprecated* Flip-flop with load-enable and asynchronous active-low reset | +| `` `FFLSR`` | `q_sig`, `d_sig`, `load_ena`, `rst_val`, `clk_sig`, `rst_sig` | Flip-flop with load-enable and synchronous active-high reset | +| `` `FFLSRN`` | `q_sig`, `d_sig`, `load_ena`, `rst_val`, `clk_sig`, `rstn_sig` | Flip-flop with load-enable and synchronous active-low reset | +| `` `FFLNR`` | `q_sig`, `d_sig`, `load_ena`, `clk_sig` | Flip-flop with load-enable without reset | +- *The name of the clock and reset signals for implicit variants is `clk_i` and `rst_ni`, respectively.* +- *Argument suffix `_sig` indicates signal names for present and next state as well as clocks and resets.* +- *Argument `rst_val` specifies the value literal to be assigned upon reset.* +- *Argument `load_ena` specifies the boolean expression that forms the load enable of the register.* + +### SystemVerilog Assertion Macros + +The header file `assertions.svh` contains macros that expand to assertion blocks. +These macros should recduce the effort in writing many assertions and make it +easier to use them. They are identical with the macros used by [lowrisc](https://github.com/lowRISC/opentitan/blob/master/hw/ip/prim/rtl/prim_assert.sv) +and just re-implemented here for the sake of easier use in PULP projects (the same include guard is used so they should not clash). + +#### Simple Assertion and Cover Macros +| Macro | Arguments | Description | +| ----------------------------------------------------------- | -------------------------------------------------------------------------- | ----------- | +| `` `ASSERT_I`` | `__name`, `__prop` | Immediate assertion | +| `` `ASSERT_INIT`` | `__name`, `__prop` | Assertion in initial block. Can be used for things like parameter checking | +| `` `ASSERT_FINAL`` | `__name`, `__prop` | Assertion in final block | +| `` `ASSERT`` | `__name`, `__prop`, (`__clk`, `__rst`) | Assert a concurrent property directly | +| `` `ASSERT_NEVER`` | `__name`, `__prop`, (`__clk`, `__rst`) | Assert a concurrent property NEVER happens | +| `` `ASSERT_KNOWN`` | `__name`, `__sig`, (`__clk`, `__rst`) | Concurrent clocked assertion with custom error message | +| `` `COVER`` | `__name`, `__prop`, (`__clk`, `__rst`) | Cover a concurrent property | +- *The name of the clock and reset signals for implicit variants is `clk_i` and `rst_ni`, respectively.* + +#### Complex Assertion Macros +| Macro | Arguments | Description | +| -------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------- | ----------- | +| `` `ASSERT_PULSE`` | `__name`, `__sig`, (`__clk`, `__rst`) | Assert that signal is an active-high pulse with pulse length of 1 clock cycle | +| `` `ASSERT_IF`` | `__name`, `__prop`, `__enable`, (`__clk`, `__rst`) | Assert that a property is true only when an enable signal is set | +| `` `ASSERT_KNOWN_IF`` | `__name`, `__sig`, `__enable`, (`__clk`, `__rst`) | Assert that signal has a known value (each bit is either '0' or '1') after reset if enable is set | +- *The name of the clock and reset signals for implicit variants is `clk_i` and `rst_ni`, respectively.* + +#### Assumption Macros + +| Macro | Arguments | Description | +| ------------------------------------------------------- | ---------------------------- | ----------- | +| `` `ASSUME`` | `__name`, `__prop`, (`__clk`, `__rst`) | Assume a concurrent property | +| `` `ASSUME_I`` | `__name`, `__prop` | Assume an immediate property | +- *The name of the clock and reset signals for implicit variants is `clk_i` and `rst_ni`, respectively.* + +#### Formal Verification Macros + +| Macro | Arguments | Description | +| ----------------------------------------------------------- | ------------------------------------------------------------ | ----------- | +| `` `ASSUME_FPV`` | `__name`, `__prop`, (`__clk`, `__rst`) | Assume a concurrent property during formal verification only | +| `` `ASSUME_I_FPV`` | `__name`, `__prop` | Assume a concurrent property during formal verification only | +| `` `COVER_FPV`` | `__name`, `__prop`, (`__clk`, `__rst`) | Cover a concurrent property during formal verification | +- *The name of the clock and reset signals for implicit variants is `clk_i` and `rst_ni`, respectively.* diff --git a/vendor/pulp-platform/common_cells/include/common_cells/assertions.svh b/vendor/pulp-platform/common_cells/include/common_cells/assertions.svh new file mode 100644 index 0000000000..b6b4b73782 --- /dev/null +++ b/vendor/pulp-platform/common_cells/include/common_cells/assertions.svh @@ -0,0 +1,201 @@ +// Copyright lowRISC contributors. +// Licensed under the Apache License, Version 2.0, see LICENSE for details. +// SPDX-License-Identifier: Apache-2.0 + +// Macros and helper code for using assertions. +// - Provides default clk and rst options to simplify code +// - Provides boiler plate template for common assertions + +`ifndef PRIM_ASSERT_SV +`define PRIM_ASSERT_SV + +`ifdef UVM + // report assertion error with UVM if compiled + package assert_rpt_pkg; + import uvm_pkg::*; + `include "uvm_macros.svh" + function void assert_rpt(string msg); + `uvm_error("ASSERT FAILED", msg) + endfunction + endpackage +`endif + +/////////////////// +// Helper macros // +/////////////////// + +// local helper macro to reduce code clutter. undefined at the end of this file +`ifndef VERILATOR +`ifndef SYNTHESIS +`ifndef XSIM +`define INC_ASSERT +`endif +`endif +`endif + +// Converts an arbitrary block of code into a Verilog string +`define PRIM_STRINGIFY(__x) `"__x`" + +// ASSERT_RPT is available to change the reporting mechanism when an assert fails +`define ASSERT_RPT(__name) \ +`ifdef UVM \ + assert_rpt_pkg::assert_rpt($sformatf("[%m] %s (%s:%0d)", \ + __name, `__FILE__, `__LINE__)); \ +`else \ + $error("[ASSERT FAILED] [%m] %s (%s:%0d)", __name, `__FILE__, `__LINE__); \ +`endif + +/////////////////////////////////////// +// Simple assertion and cover macros // +/////////////////////////////////////// + +// Default clk and reset signals used by assertion macros below. +`define ASSERT_DEFAULT_CLK clk_i +`define ASSERT_DEFAULT_RST !rst_ni + +// Immediate assertion +// Note that immediate assertions are sensitive to simulation glitches. +`define ASSERT_I(__name, __prop) \ +`ifdef INC_ASSERT \ + __name: assert (__prop) \ + else begin \ + `ASSERT_RPT(`PRIM_STRINGIFY(__name)) \ + end \ +`endif + +// Assertion in initial block. Can be used for things like parameter checking. +`define ASSERT_INIT(__name, __prop) \ +`ifdef INC_ASSERT \ + initial begin \ + __name: assert (__prop) \ + else begin \ + `ASSERT_RPT(`PRIM_STRINGIFY(__name)) \ + end \ + end \ +`endif + +// Assertion in final block. Can be used for things like queues being empty +// at end of sim, all credits returned at end of sim, state machines in idle +// at end of sim. +`define ASSERT_FINAL(__name, __prop) \ +`ifdef INC_ASSERT \ + final begin \ + __name: assert (__prop || $test$plusargs("disable_assert_final_checks")) \ + else begin \ + `ASSERT_RPT(`PRIM_STRINGIFY(__name)) \ + end \ + end \ +`endif + +// Assert a concurrent property directly. +// It can be called as a module (or interface) body item. +`define ASSERT(__name, __prop, __clk = `ASSERT_DEFAULT_CLK, __rst = `ASSERT_DEFAULT_RST) \ +`ifdef INC_ASSERT \ + __name: assert property (@(posedge __clk) disable iff ((__rst) !== '0) (__prop)) \ + else begin \ + `ASSERT_RPT(`PRIM_STRINGIFY(__name)) \ + end \ +`endif +// Note: Above we use (__rst !== '0) in the disable iff statements instead of +// (__rst == '1). This properly disables the assertion in cases when reset is X at +// the beginning of a simulation. For that case, (reset == '1) does not disable the +// assertion. + +// Assert a concurrent property NEVER happens +`define ASSERT_NEVER(__name, __prop, __clk = `ASSERT_DEFAULT_CLK, __rst = `ASSERT_DEFAULT_RST) \ +`ifdef INC_ASSERT \ + __name: assert property (@(posedge __clk) disable iff ((__rst) !== '0) not (__prop)) \ + else begin \ + `ASSERT_RPT(`PRIM_STRINGIFY(__name)) \ + end \ +`endif + +// Assert that signal has a known value (each bit is either '0' or '1') after reset. +// It can be called as a module (or interface) body item. +`define ASSERT_KNOWN(__name, __sig, __clk = `ASSERT_DEFAULT_CLK, __rst = `ASSERT_DEFAULT_RST) \ +`ifdef INC_ASSERT \ + `ASSERT(__name, !$isunknown(__sig), __clk, __rst) \ +`endif + +// Cover a concurrent property +`define COVER(__name, __prop, __clk = `ASSERT_DEFAULT_CLK, __rst = `ASSERT_DEFAULT_RST) \ +`ifdef INC_ASSERT \ + __name: cover property (@(posedge __clk) disable iff ((__rst) !== '0) (__prop)); \ +`endif + +////////////////////////////// +// Complex assertion macros // +////////////////////////////// + +// Assert that signal is an active-high pulse with pulse length of 1 clock cycle +`define ASSERT_PULSE(__name, __sig, __clk = `ASSERT_DEFAULT_CLK, __rst = `ASSERT_DEFAULT_RST) \ +`ifdef INC_ASSERT \ + `ASSERT(__name, $rose(__sig) |=> !(__sig), __clk, __rst) \ +`endif + +// Assert that a property is true only when an enable signal is set. It can be called as a module +// (or interface) body item. +`define ASSERT_IF(__name, __prop, __enable, __clk = `ASSERT_DEFAULT_CLK, __rst = `ASSERT_DEFAULT_RST) \ +`ifdef INC_ASSERT \ + `ASSERT(__name, (__enable) |-> (__prop), __clk, __rst) \ +`endif + +// Assert that signal has a known value (each bit is either '0' or '1') after reset if enable is +// set. It can be called as a module (or interface) body item. +`define ASSERT_KNOWN_IF(__name, __sig, __enable, __clk = `ASSERT_DEFAULT_CLK, __rst = `ASSERT_DEFAULT_RST) \ +`ifdef INC_ASSERT \ + `ASSERT_KNOWN(__name``KnownEnable, __enable, __clk, __rst) \ + `ASSERT_IF(__name, !$isunknown(__sig), __enable, __clk, __rst) \ +`endif + +/////////////////////// +// Assumption macros // +/////////////////////// + +// Assume a concurrent property +`define ASSUME(__name, __prop, __clk = `ASSERT_DEFAULT_CLK, __rst = `ASSERT_DEFAULT_RST) \ +`ifdef INC_ASSERT \ + __name: assume property (@(posedge __clk) disable iff ((__rst) !== '0) (__prop)) \ + else begin \ + `ASSERT_RPT(`PRIM_STRINGIFY(__name)) \ + end \ +`endif + +// Assume an immediate property +`define ASSUME_I(__name, __prop) \ +`ifdef INC_ASSERT \ + __name: assume (__prop) \ + else begin \ + `ASSERT_RPT(`PRIM_STRINGIFY(__name)) \ + end \ +`endif + +////////////////////////////////// +// For formal verification only // +////////////////////////////////// + +// Note that the existing set of ASSERT macros specified above shall be used for FPV, +// thereby ensuring that the assertions are evaluated during DV simulations as well. + +// ASSUME_FPV +// Assume a concurrent property during formal verification only. +`define ASSUME_FPV(__name, __prop, __clk = `ASSERT_DEFAULT_CLK, __rst = `ASSERT_DEFAULT_RST) \ +`ifdef FPV_ON \ + `ASSUME(__name, __prop, __clk, __rst) \ +`endif + +// ASSUME_I_FPV +// Assume a concurrent property during formal verification only. +`define ASSUME_I_FPV(__name, __prop) \ +`ifdef FPV_ON \ + `ASSUME_I(__name, __prop) \ +`endif + +// COVER_FPV +// Cover a concurrent property during formal verification +`define COVER_FPV(__name, __prop, __clk = `ASSERT_DEFAULT_CLK, __rst = `ASSERT_DEFAULT_RST) \ +`ifdef FPV_ON \ + `COVER(__name, __prop, __clk, __rst) \ +`endif + +`endif // PRIM_ASSERT_SV diff --git a/vendor/pulp-platform/common_cells/include/common_cells/registers.svh b/vendor/pulp-platform/common_cells/include/common_cells/registers.svh new file mode 100644 index 0000000000..b64f31a013 --- /dev/null +++ b/vendor/pulp-platform/common_cells/include/common_cells/registers.svh @@ -0,0 +1,221 @@ +// Copyright 2018, 2021 ETH Zurich and University of Bologna. +// +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Stefan Mach +// Description: Common register defines for RTL designs + +`ifndef COMMON_CELLS_REGISTERS_SVH_ +`define COMMON_CELLS_REGISTERS_SVH_ + +// Abridged Summary of available FF macros: +// `FF: asynchronous active-low reset +// `FFAR: asynchronous active-high reset +// `FFARN: [deprecated] asynchronous active-low reset +// `FFSR: synchronous active-high reset +// `FFSRN: synchronous active-low reset +// `FFNR: without reset +// `FFL: load-enable and asynchronous active-low reset +// `FFLAR: load-enable and asynchronous active-high reset +// `FFLARN: [deprecated] load-enable and asynchronous active-low reset +// `FFLARNC: load-enable and asynchronous active-low reset and synchronous active-high clear +// `FFLSR: load-enable and synchronous active-high reset +// `FFLSRN: load-enable and synchronous active-low reset +// `FFLNR: load-enable without reset + +`ifdef VERILATOR +`define NO_SYNOPSYS_FF 1 +`endif + +`define REG_DFLT_CLK clk_i +`define REG_DFLT_RST rst_ni + +// Flip-Flop with asynchronous active-low reset +// __q: Q output of FF +// __d: D input of FF +// __reset_value: value assigned upon reset +// (__clk: clock input) +// (__arst_n: asynchronous reset, active-low) +`define FF(__q, __d, __reset_value, __clk = `REG_DFLT_CLK, __arst_n = `REG_DFLT_RST) \ + always_ff @(posedge (__clk) or negedge (__arst_n)) begin \ + if (!__arst_n) begin \ + __q <= (__reset_value); \ + end else begin \ + __q <= (__d); \ + end \ + end + +// Flip-Flop with asynchronous active-high reset +// __q: Q output of FF +// __d: D input of FF +// __reset_value: value assigned upon reset +// __clk: clock input +// __arst: asynchronous reset, active-high +`define FFAR(__q, __d, __reset_value, __clk, __arst) \ + always_ff @(posedge (__clk) or posedge (__arst)) begin \ + if (__arst) begin \ + __q <= (__reset_value); \ + end else begin \ + __q <= (__d); \ + end \ + end + +// DEPRECATED - use `FF instead +// Flip-Flop with asynchronous active-low reset +// __q: Q output of FF +// __d: D input of FF +// __reset_value: value assigned upon reset +// __clk: clock input +// __arst_n: asynchronous reset, active-low +`define FFARN(__q, __d, __reset_value, __clk, __arst_n) \ + `FF(__q, __d, __reset_value, __clk, __arst_n) + +// Flip-Flop with synchronous active-high reset +// __q: Q output of FF +// __d: D input of FF +// __reset_value: value assigned upon reset +// __clk: clock input +// __reset_clk: reset input, active-high +`define FFSR(__q, __d, __reset_value, __clk, __reset_clk) \ + `ifndef NO_SYNOPSYS_FF \ + /``* synopsys sync_set_reset `"__reset_clk`" *``/ \ + `endif \ + always_ff @(posedge (__clk)) begin \ + __q <= (__reset_clk) ? (__reset_value) : (__d); \ + end + +// Flip-Flop with synchronous active-low reset +// __q: Q output of FF +// __d: D input of FF +// __reset_value: value assigned upon reset +// __clk: clock input +// __reset_n_clk: reset input, active-low +`define FFSRN(__q, __d, __reset_value, __clk, __reset_n_clk) \ + `ifndef NO_SYNOPSYS_FF \ + /``* synopsys sync_set_reset `"__reset_n_clk`" *``/ \ + `endif \ + always_ff @(posedge (__clk)) begin \ + __q <= (!__reset_n_clk) ? (__reset_value) : (__d); \ + end + +// Always-enable Flip-Flop without reset +// __q: Q output of FF +// __d: D input of FF +// __clk: clock input +`define FFNR(__q, __d, __clk) \ + always_ff @(posedge (__clk)) begin \ + __q <= (__d); \ + end + +// Flip-Flop with load-enable and asynchronous active-low reset (implicit clock and reset) +// __q: Q output of FF +// __d: D input of FF +// __load: load d value into FF +// __reset_value: value assigned upon reset +// (__clk: clock input) +// (__arst_n: asynchronous reset, active-low) +`define FFL(__q, __d, __load, __reset_value, __clk = `REG_DFLT_CLK, __arst_n = `REG_DFLT_RST) \ + always_ff @(posedge (__clk) or negedge (__arst_n)) begin \ + if (!__arst_n) begin \ + __q <= (__reset_value); \ + end else begin \ + __q <= (__load) ? (__d) : (__q); \ + end \ + end + +// Flip-Flop with load-enable and asynchronous active-high reset +// __q: Q output of FF +// __d: D input of FF +// __load: load d value into FF +// __reset_value: value assigned upon reset +// __clk: clock input +// __arst: asynchronous reset, active-high +`define FFLAR(__q, __d, __load, __reset_value, __clk, __arst) \ + always_ff @(posedge (__clk) or posedge (__arst)) begin \ + if (__arst) begin \ + __q <= (__reset_value); \ + end else begin \ + __q <= (__load) ? (__d) : (__q); \ + end \ + end + +// DEPRECATED - use `FFL instead +// Flip-Flop with load-enable and asynchronous active-low reset +// __q: Q output of FF +// __d: D input of FF +// __load: load d value into FF +// __reset_value: value assigned upon reset +// __clk: clock input +// __arst_n: asynchronous reset, active-low +`define FFLARN(__q, __d, __load, __reset_value, __clk, __arst_n) \ + `FFL(__q, __d, __load, __reset_value, __clk, __arst_n) + +// Flip-Flop with load-enable and synchronous active-high reset +// __q: Q output of FF +// __d: D input of FF +// __load: load d value into FF +// __reset_value: value assigned upon reset +// __clk: clock input +// __reset_clk: reset input, active-high +`define FFLSR(__q, __d, __load, __reset_value, __clk, __reset_clk) \ + `ifndef NO_SYNOPSYS_FF \ + /``* synopsys sync_set_reset `"__reset_clk`" *``/ \ + `endif \ + always_ff @(posedge (__clk)) begin \ + __q <= (__reset_clk) ? (__reset_value) : ((__load) ? (__d) : (__q)); \ + end + +// Flip-Flop with load-enable and synchronous active-low reset +// __q: Q output of FF +// __d: D input of FF +// __load: load d value into FF +// __reset_value: value assigned upon reset +// __clk: clock input +// __reset_n_clk: reset input, active-low +`define FFLSRN(__q, __d, __load, __reset_value, __clk, __reset_n_clk) \ + `ifndef NO_SYNOPSYS_FF \ + /``* synopsys sync_set_reset `"__reset_n_clk`" *``/ \ + `endif \ + always_ff @(posedge (__clk)) begin \ + __q <= (!__reset_n_clk) ? (__reset_value) : ((__load) ? (__d) : (__q)); \ + end + +// Flip-Flop with load-enable and asynchronous active-low reset and synchronous clear +// __q: Q output of FF +// __d: D input of FF +// __load: load d value into FF +// __clear: assign reset value into FF +// __reset_value: value assigned upon reset +// __clk: clock input +// __arst_n: asynchronous reset, active-low +`define FFLARNC(__q, __d, __load, __clear, __reset_value, __clk, __arst_n) \ + `ifndef NO_SYNOPSYS_FF \ + /``* synopsys sync_set_reset `"__clear`" *``/ \ + `endif \ + always_ff @(posedge (__clk) or negedge (__arst_n)) begin \ + if (!__arst_n) begin \ + __q <= (__reset_value); \ + end else begin \ + __q <= (__clear) ? (__reset_value) : (__load) ? (__d) : (__q); \ + end \ + end + +// Load-enable Flip-Flop without reset +// __q: Q output of FF +// __d: D input of FF +// __load: load d value into FF +// __clk: clock input +`define FFLNR(__q, __d, __load, __clk) \ + always_ff @(posedge (__clk)) begin \ + __q <= (__load) ? (__d) : (__q); \ + end + +`endif diff --git a/vendor/pulp-platform/common_cells/src/addr_decode.sv b/vendor/pulp-platform/common_cells/src/addr_decode.sv new file mode 100644 index 0000000000..90a43a0da2 --- /dev/null +++ b/vendor/pulp-platform/common_cells/src/addr_decode.sv @@ -0,0 +1,161 @@ +// Copyright 2019 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +// Author: Wolfgang Roenninger + +/// Address Decoder: Maps the input address combinatorially to an index. +/// The address map `addr_map_i` is a packed array of rule_t structs. +/// The ranges of any two rules may overlap. If so, the rule at the higher (more significant) +/// position in `addr_map_i` prevails. +/// +/// There can be an arbitrary number of address rules. There can be multiple +/// ranges defined for the same index. The start address has to be less than the end address. +/// +/// There is the possibility to add a default mapping: +/// `en_default_idx_i`: Driving this port to `1'b1` maps all input addresses +/// for which no rule in `addr_map_i` exists to the default index specified by +/// `default_idx_i`. In this case, `dec_error_o` is always `1'b0`. +/// +/// Assertions: The module checks every time there is a change in the address mapping +/// if the resulting map is valid. It fatals if `start_addr` is higher than `end_addr` +/// or if a mapping targets an index that is outside the number of allowed indices. +/// It issues warnings if the address regions of any two mappings overlap. +module addr_decode #( + /// Highest index which can happen in a rule. + parameter int unsigned NoIndices = 32'd0, + /// Total number of rules. + parameter int unsigned NoRules = 32'd0, + /// Address type inside the rules and to decode. + parameter type addr_t = logic, + /// Rule packed struct type. + /// The address decoder expects three fields in `rule_t`: + /// + /// typedef struct packed { + /// int unsigned idx; + /// addr_t start_addr; + /// addr_t end_addr; + /// } rule_t; + /// + /// - `idx`: index of the rule, has to be < `NoIndices` + /// - `start_addr`: start address of the range the rule describes, value is included in range + /// - `end_addr`: end address of the range the rule describes, value is NOT included in range + parameter type rule_t = logic, + /// Dependent parameter, do **not** overwite! + /// + /// Width of the `idx_o` output port. + parameter int unsigned IdxWidth = cf_math_pkg::idx_width(NoIndices), + /// Dependent parameter, do **not** overwite! + /// + /// Type of the `idx_o` output port. + parameter type idx_t = logic [IdxWidth-1:0] +) ( + /// Address to decode. + input addr_t addr_i, + /// Address map: rule with the highest array position wins on collision + input rule_t [NoRules-1:0] addr_map_i, + /// Decoded index. + output idx_t idx_o, + /// Decode is valid. + output logic dec_valid_o, + /// Decode is not valid, no matching rule found. + output logic dec_error_o, + /// Enable default port mapping. + /// + /// When not used, tie to `0`. + input logic en_default_idx_i, + /// Default port index. + /// + /// When `en_default_idx_i` is `1`, this will be the index when no rule matches. + /// + /// When not used, tie to `0`. + input idx_t default_idx_i +); + + logic [NoRules-1:0] matched_rules; // purely for address map debugging + + always_comb begin + // default assignments + matched_rules = '0; + dec_valid_o = 1'b0; + dec_error_o = (en_default_idx_i) ? 1'b0 : 1'b1; + idx_o = (en_default_idx_i) ? default_idx_i : '0; + + // match the rules + for (int unsigned i = 0; i < NoRules; i++) begin + if ((addr_i >= addr_map_i[i].start_addr) && (addr_i < addr_map_i[i].end_addr)) begin + matched_rules[i] = 1'b1; + dec_valid_o = 1'b1; + dec_error_o = 1'b0; + idx_o = idx_t'(addr_map_i[i].idx); + end + end + end + + // Assumptions and assertions + `ifndef VERILATOR + `ifndef XSIM + // pragma translate_off + initial begin : proc_check_parameters + assume ($bits(addr_i) == $bits(addr_map_i[0].start_addr)) else + $warning($sformatf("Input address has %d bits and address map has %d bits.", + $bits(addr_i), $bits(addr_map_i[0].start_addr))); + assume (NoRules > 0) else + $fatal(1, $sformatf("At least one rule needed")); + assume (NoIndices > 0) else + $fatal(1, $sformatf("At least one index needed")); + end + + assert final ($onehot0(matched_rules)) else + $warning("More than one bit set in the one-hot signal, matched_rules"); + + // These following assumptions check the validity of the address map. + // The assumptions gets generated for each distinct pair of rules. + // Each assumption is present two times, as they rely on one rules being + // effectively ordered. Only one of the rules with the same function is + // active at a time for a given pair. + // check_start: Enforces a smaller start than end address. + // check_idx: Enforces a valid index in the rule. + // check_overlap: Warns if there are overlapping address regions. + always @(addr_map_i) #0 begin : proc_check_addr_map + if (!$isunknown(addr_map_i)) begin + for (int unsigned i = 0; i < NoRules; i++) begin + check_start : assume (addr_map_i[i].start_addr < addr_map_i[i].end_addr) else + $fatal(1, $sformatf("This rule has a higher start than end address!!!\n\ + Violating rule %d.\n\ + Rule> IDX: %h START: %h END: %h\n\ + #####################################################", + i ,addr_map_i[i].idx, addr_map_i[i].start_addr, addr_map_i[i].end_addr)); + // check the SLV ids + check_idx : assume (addr_map_i[i].idx < NoIndices) else + $fatal(1, $sformatf("This rule has a IDX that is not allowed!!!\n\ + Violating rule %d.\n\ + Rule> IDX: %h START: %h END: %h\n\ + Rule> MAX_IDX: %h\n\ + #####################################################", + i, addr_map_i[i].idx, addr_map_i[i].start_addr, addr_map_i[i].end_addr, + (NoIndices-1))); + for (int unsigned j = i + 1; j < NoRules; j++) begin + // overlap check + check_overlap : assume (!((addr_map_i[j].start_addr < addr_map_i[i].end_addr) && + (addr_map_i[j].end_addr > addr_map_i[i].start_addr))) else + $warning($sformatf("Overlapping address region found!!!\n\ + Rule %d: IDX: %h START: %h END: %h\n\ + Rule %d: IDX: %h START: %h END: %h\n\ + #####################################################", + i, addr_map_i[i].idx, addr_map_i[i].start_addr, addr_map_i[i].end_addr, + j, addr_map_i[j].idx, addr_map_i[j].start_addr, addr_map_i[j].end_addr)); + end + end + end + end + // pragma translate_on + `endif + `endif +endmodule diff --git a/vendor/pulp-platform/common_cells/src/binary_to_gray.sv b/vendor/pulp-platform/common_cells/src/binary_to_gray.sv new file mode 100644 index 0000000000..f4e4efd3b4 --- /dev/null +++ b/vendor/pulp-platform/common_cells/src/binary_to_gray.sv @@ -0,0 +1,22 @@ +// Copyright 2018 ETH Zurich and University of Bologna. +// +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. +// +// Fabian Schuiki + +/// A binary to gray code converter. +module binary_to_gray #( + parameter int N = -1 +)( + input logic [N-1:0] A, + output logic [N-1:0] Z +); + assign Z = A ^ (A >> 1); +endmodule diff --git a/vendor/pulp-platform/common_cells/src/cb_filter.sv b/vendor/pulp-platform/common_cells/src/cb_filter.sv new file mode 100644 index 0000000000..9fbc5269ed --- /dev/null +++ b/vendor/pulp-platform/common_cells/src/cb_filter.sv @@ -0,0 +1,246 @@ +// Copyright (c) 2019 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +// Author: Wolfgang Roenninger + +// `cb_filter`: This module implements a counting bloom filter with parameterizable hash functions. +// +// Functionality: A counting bloom filter is a data structure to efficiently implement +// set lookups. It does so by hashing its data inputs onto multiple pointers +// which serve as indicators for an array of buckets. For lookups can be +// false positives, but no false negatives. +// - Seeding: The pseudo random generators need seeds at elaboration time to generate +// different hashes. In principle any combination of seeds can be used. +// But one should look that the hash outputs give sufficient different patterns, +// such that the resulting collision rate is low. The package `cb_filter_pkg` +// contains the struct for seeding the PRG's in the hash functions. +// - Lookup: +// - Ports: `look_data_i`, `look_valid_o` +// - Description: Lookup combinational, `look_valid_o` is high, when `look_data_i` was +// previously put into the filter. +// - Increment: +// - Ports: `incr_data_i`, `incr_valid_i` +// - Description: Put data into the counting bloom filter, when valid is high. +// - Decrement: +// - Ports: `decr_data_i`, `decr_valid_i` +// - Description: Remove data from the counting bloom filter. Only remove data that was +// previously put in, otherwise will go in a wrong state. +// - Status: +// - `filter_clear_i`: Clears the filter and sets all counters to 0. +// - `filter_ussage_o`: How many data items are currently in the filter. +// - `filter_full_o`: Filter is full, can no longer hold more items. +// - `filter_empty_o`: Filter is empty. +// - `filter_error_o`: One of the internal counters or buckets overflowed. + +/// This is a counting bloom filter +module cb_filter #( + parameter int unsigned KHashes = 32'd3, // Number of hash functions + parameter int unsigned HashWidth = 32'd4, // Number of counters is 2**HashWidth + parameter int unsigned HashRounds = 32'd1, // Number of permutation substitution rounds + parameter int unsigned InpWidth = 32'd32, // Input data width + parameter int unsigned BucketWidth = 32'd4, // Width of Bucket counters + // the seeds used for seeding the PRG's inside each hash, one `cb_seed_t` per hash function. + parameter cb_filter_pkg::cb_seed_t [KHashes-1:0] Seeds = cb_filter_pkg::EgSeeds +) ( + input logic clk_i, // Clock + input logic rst_ni, // Active low reset + // data lookup + input logic [InpWidth-1:0] look_data_i, + output logic look_valid_o, + // data increment + input logic [InpWidth-1:0] incr_data_i, + input logic incr_valid_i, + // data decrement + input logic [InpWidth-1:0] decr_data_i, + input logic decr_valid_i, + // status signals + input logic filter_clear_i, + output logic [HashWidth-1:0] filter_usage_o, + output logic filter_full_o, + output logic filter_empty_o, + output logic filter_error_o +); + + localparam int unsigned NoCounters = 2**HashWidth; + + // signal declarations + logic [NoCounters-1:0] look_ind; // hash function pointers + logic [NoCounters-1:0] incr_ind; // hash function pointers + logic [NoCounters-1:0] decr_ind; // hash function pointers + // bucket (counter signals) + logic [NoCounters-1:0] bucket_en; + logic [NoCounters-1:0] bucket_down; + logic [NoCounters-1:0] bucket_occupied; + logic [NoCounters-1:0] bucket_overflow; + logic [NoCounters-1:0] bucket_full; + logic [NoCounters-1:0] bucket_empty; + // membership lookup signals + logic [NoCounters-1:0] data_in_bucket; + // tot count signals (filter usage) + logic cnt_en; + logic cnt_down; + logic cnt_overflow; + + // ----------------------------------------- + // Lookup Hash - Membership Detection + // ----------------------------------------- + hash_block #( + .NoHashes ( KHashes ), + .InpWidth ( InpWidth ), + .HashWidth ( HashWidth ), + .NoRounds ( HashRounds ), + .Seeds ( Seeds ) + ) i_look_hashes ( + .data_i ( look_data_i ), + .indicator_o ( look_ind ) + ); + assign data_in_bucket = look_ind & bucket_occupied; + assign look_valid_o = (data_in_bucket == look_ind) ? 1'b1 : 1'b0; + + // ----------------------------------------- + // Increment Hash - Add Member to Set + // ----------------------------------------- + hash_block #( + .NoHashes ( KHashes ), + .InpWidth ( InpWidth ), + .HashWidth ( HashWidth ), + .NoRounds ( HashRounds ), + .Seeds ( Seeds ) + ) i_incr_hashes ( + .data_i ( incr_data_i ), + .indicator_o ( incr_ind ) + ); + + // ----------------------------------------- + // Decrement Hash - Remove Member from Set + // ----------------------------------------- + hash_block #( + .NoHashes ( KHashes ), + .InpWidth ( InpWidth ), + .HashWidth ( HashWidth ), + .NoRounds ( HashRounds ), + .Seeds ( Seeds ) + ) i_decr_hashes ( + .data_i ( decr_data_i ), + .indicator_o ( decr_ind ) + ); + + // ----------------------------------------- + // Control the incr/decr of buckets + // ----------------------------------------- + assign bucket_down = decr_valid_i ? decr_ind : '0; + + always_comb begin : proc_bucket_control + case ({incr_valid_i, decr_valid_i}) + 2'b00 : bucket_en = '0; + 2'b10 : bucket_en = incr_ind; + 2'b01 : bucket_en = decr_ind; + 2'b11 : bucket_en = incr_ind ^ decr_ind; + default: bucket_en = '0; // unreachable + endcase + end + + // ----------------------------------------- + // Counters + // ----------------------------------------- + for (genvar i = 0; i < NoCounters; i++) begin : gen_buckets + logic [BucketWidth-1:0] bucket_content; + counter #( + .WIDTH( BucketWidth ) + ) i_bucket ( + .clk_i ( clk_i ), + .rst_ni ( rst_ni ), + .clear_i ( filter_clear_i ), + .en_i ( bucket_en[i] ), + .load_i ( '0 ), + .down_i ( bucket_down[i] ), + .d_i ( '0 ), + .q_o ( bucket_content ), + .overflow_o ( bucket_overflow[i]) + ); + assign bucket_full[i] = bucket_overflow[i] | (&bucket_content); + assign bucket_occupied[i] = |bucket_content; + assign bucket_empty[i] = ~bucket_occupied[i]; + end + + // ----------------------------------------- + // Filter tot item counter + // ----------------------------------------- + assign cnt_en = incr_valid_i ^ decr_valid_i; + assign cnt_down = decr_valid_i; + counter #( + .WIDTH ( HashWidth ) + ) i_tot_count ( + .clk_i ( clk_i ), + .rst_ni ( rst_ni ), + .clear_i ( filter_clear_i ), + .en_i ( cnt_en ), + .load_i ( '0 ), + .down_i ( cnt_down ), + .d_i ( '0 ), + .q_o ( filter_usage_o ), + .overflow_o( cnt_overflow ) + ); + + // ----------------------------------------- + // Filter Output Flags + // ----------------------------------------- + assign filter_full_o = |bucket_full; + assign filter_empty_o = &bucket_empty; + assign filter_error_o = |bucket_overflow | cnt_overflow; +endmodule + +// gives out the or 'onehots' of all hash functions +module hash_block #( + parameter int unsigned NoHashes = 32'd3, + parameter int unsigned InpWidth = 32'd11, + parameter int unsigned HashWidth = 32'd5, + parameter int unsigned NoRounds = 32'd1, + parameter cb_filter_pkg::cb_seed_t [NoHashes-1:0] Seeds = cb_filter_pkg::EgSeeds +) ( + input logic [InpWidth-1:0] data_i, + output logic [2**HashWidth-1:0] indicator_o +); + + logic [NoHashes-1:0][2**HashWidth-1:0] hashes; + + for (genvar i = 0; i < NoHashes; i++) begin : gen_hashes + sub_per_hash #( + .InpWidth ( InpWidth ), + .HashWidth ( HashWidth ), + .NoRounds ( NoRounds ), + .PermuteKey ( Seeds[i].PermuteSeed ), + .XorKey ( Seeds[i].XorSeed ) + ) i_hash ( + .data_i ( data_i ), + .hash_o ( ), // not used, because we want the onehot + .hash_onehot_o ( hashes[i] ) + ); + end + + // output assignment + always_comb begin : proc_hash_or + indicator_o = '0; + for (int unsigned i = 0; i < (2**HashWidth); i++) begin + for (int unsigned j = 0; j < NoHashes; j++) begin + indicator_o[i] = indicator_o[i] | hashes[j][i]; + end + end + end + + // assertions + // pragma translate_off + initial begin + hash_conf: assume (InpWidth > HashWidth) else + $fatal(1, "%m:\nA Hash Function reduces the width of the input>\nInpWidth: %s\nOUT_WIDTH: %s", + InpWidth, HashWidth); + end + // pragma translate_on +endmodule diff --git a/vendor/pulp-platform/common_cells/src/cb_filter_pkg.sv b/vendor/pulp-platform/common_cells/src/cb_filter_pkg.sv new file mode 100644 index 0000000000..97334475e8 --- /dev/null +++ b/vendor/pulp-platform/common_cells/src/cb_filter_pkg.sv @@ -0,0 +1,26 @@ +// Copyright (c) 2019 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +// Author: Wolfgang Roenninger + +/// Package with the struct definition for the seeds and an example. +package cb_filter_pkg; + typedef struct packed { + int unsigned PermuteSeed; + int unsigned XorSeed; + } cb_seed_t; + + // example seeding struct + localparam cb_seed_t [2:0] EgSeeds = '{ + '{PermuteSeed: 32'd299034753, XorSeed: 32'd4094834 }, + '{PermuteSeed: 32'd19921030, XorSeed: 32'd995713 }, + '{PermuteSeed: 32'd294388, XorSeed: 32'd65146511 } + }; +endpackage diff --git a/vendor/pulp-platform/common_cells/src/cc_onehot.sv b/vendor/pulp-platform/common_cells/src/cc_onehot.sv new file mode 100644 index 0000000000..69cdf86756 --- /dev/null +++ b/vendor/pulp-platform/common_cells/src/cc_onehot.sv @@ -0,0 +1,50 @@ +// Copyright 2021 ETH Zurich. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +/// Hardware implementation of SystemVerilog's `$onehot()` function. +/// It uses a tree of half adders and a separate +/// or reduction tree for the carry. + +// Author: Florian Zaruba +// Author: Fabian Schuiki +// Author: Stefan Mach +module cc_onehot #( + parameter int unsigned Width = 4 +) ( + input logic [Width-1:0] d_i, + output logic is_onehot_o +); + // trivial base case + if (Width == 1) begin : gen_degenerated_onehot + assign is_onehot_o = d_i; + end else begin : gen_onehot + localparam int LVLS = $clog2(Width) + 1; + + logic [LVLS-1:0][2**(LVLS-1)-1:0] sum, carry; + logic [LVLS-2:0] carry_array; + + // Extend to a power of two. + assign sum[0] = $unsigned(d_i); + + // generate half adders for each lvl + // lvl 0 is the input level + for (genvar i = 1; i < LVLS; i++) begin : gen_lvl + localparam int unsigned LVLWidth = 2**LVLS / 2**i; + for (genvar j = 0; j < LVLWidth; j+=2) begin : gen_width + assign sum[i][j/2] = sum[i-1][j] ^ sum[i-1][j+1]; + assign carry[i][j/2] = sum[i-1][j] & sum[i-1][j+1]; + end + // generate carry tree + assign carry_array[i-1] = |carry[i][LVLWidth/2-1:0]; + end + assign is_onehot_o = sum[LVLS-1][0] & ~|carry_array; + end + +endmodule diff --git a/vendor/pulp-platform/common_cells/src/cdc_2phase.sv b/vendor/pulp-platform/common_cells/src/cdc_2phase.sv new file mode 100644 index 0000000000..8e770abfa1 --- /dev/null +++ b/vendor/pulp-platform/common_cells/src/cdc_2phase.sv @@ -0,0 +1,175 @@ +// Copyright 2018 ETH Zurich and University of Bologna. +// +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. +// +// Fabian Schuiki + +/// A two-phase clock domain crossing. +/// +/// CONSTRAINT: Requires max_delay of min_period(src_clk_i, dst_clk_i) through +/// the paths async_req, async_ack, async_data. +/* verilator lint_off DECLFILENAME */ +module cdc_2phase #( + parameter type T = logic +)( + input logic src_rst_ni, + input logic src_clk_i, + input T src_data_i, + input logic src_valid_i, + output logic src_ready_o, + + input logic dst_rst_ni, + input logic dst_clk_i, + output T dst_data_o, + output logic dst_valid_o, + input logic dst_ready_i +); + + // Asynchronous handshake signals. + (* dont_touch = "true" *) logic async_req; + (* dont_touch = "true" *) logic async_ack; + (* dont_touch = "true" *) T async_data; + + // The sender in the source domain. + cdc_2phase_src #(.T(T)) i_src ( + .rst_ni ( src_rst_ni ), + .clk_i ( src_clk_i ), + .data_i ( src_data_i ), + .valid_i ( src_valid_i ), + .ready_o ( src_ready_o ), + .async_req_o ( async_req ), + .async_ack_i ( async_ack ), + .async_data_o ( async_data ) + ); + + // The receiver in the destination domain. + cdc_2phase_dst #(.T(T)) i_dst ( + .rst_ni ( dst_rst_ni ), + .clk_i ( dst_clk_i ), + .data_o ( dst_data_o ), + .valid_o ( dst_valid_o ), + .ready_i ( dst_ready_i ), + .async_req_i ( async_req ), + .async_ack_o ( async_ack ), + .async_data_i ( async_data ) + ); + +endmodule + + +/// Half of the two-phase clock domain crossing located in the source domain. +module cdc_2phase_src #( + parameter type T = logic +)( + input logic rst_ni, + input logic clk_i, + input T data_i, + input logic valid_i, + output logic ready_o, + output logic async_req_o, + input logic async_ack_i, + output T async_data_o +); + + (* dont_touch = "true" *) + logic req_src_q, ack_src_q, ack_q; + (* dont_touch = "true" *) + T data_src_q; + + // The req_src and data_src registers change when a new data item is accepted. + always_ff @(posedge clk_i or negedge rst_ni) begin + if (!rst_ni) begin + req_src_q <= 0; + data_src_q <= '0; + end else if (valid_i && ready_o) begin + req_src_q <= ~req_src_q; + data_src_q <= data_i; + end + end + + // The ack_src and ack registers act as synchronization stages. + always_ff @(posedge clk_i or negedge rst_ni) begin + if (!rst_ni) begin + ack_src_q <= 0; + ack_q <= 0; + end else begin + ack_src_q <= async_ack_i; + ack_q <= ack_src_q; + end + end + + // Output assignments. + assign ready_o = (req_src_q == ack_q); + assign async_req_o = req_src_q; + assign async_data_o = data_src_q; + +endmodule + + +/// Half of the two-phase clock domain crossing located in the destination +/// domain. +module cdc_2phase_dst #( + parameter type T = logic +)( + input logic rst_ni, + input logic clk_i, + output T data_o, + output logic valid_o, + input logic ready_i, + input logic async_req_i, + output logic async_ack_o, + input T async_data_i +); + + (* dont_touch = "true" *) + (* async_reg = "true" *) + logic req_dst_q, req_q0, req_q1, ack_dst_q; + (* dont_touch = "true" *) + T data_dst_q; + + // The ack_dst register changes when a new data item is accepted. + always_ff @(posedge clk_i or negedge rst_ni) begin + if (!rst_ni) begin + ack_dst_q <= 0; + end else if (valid_o && ready_i) begin + ack_dst_q <= ~ack_dst_q; + end + end + + // The data_dst register changes when a new data item is presented. This is + // indicated by the async_req line changing levels. + always_ff @(posedge clk_i or negedge rst_ni) begin + if (!rst_ni) begin + data_dst_q <= '0; + end else if (req_q0 != req_q1 && !valid_o) begin + data_dst_q <= async_data_i; + end + end + + // The req_dst and req registers act as synchronization stages. + always_ff @(posedge clk_i or negedge rst_ni) begin + if (!rst_ni) begin + req_dst_q <= 0; + req_q0 <= 0; + req_q1 <= 0; + end else begin + req_dst_q <= async_req_i; + req_q0 <= req_dst_q; + req_q1 <= req_q0; + end + end + + // Output assignments. + assign valid_o = (ack_dst_q != req_q1); + assign data_o = data_dst_q; + assign async_ack_o = ack_dst_q; + +endmodule +/* verilator lint_on DECLFILENAME */ diff --git a/vendor/pulp-platform/common_cells/src/cdc_fifo_2phase.sv b/vendor/pulp-platform/common_cells/src/cdc_fifo_2phase.sv new file mode 100644 index 0000000000..acbb7b0a70 --- /dev/null +++ b/vendor/pulp-platform/common_cells/src/cdc_fifo_2phase.sv @@ -0,0 +1,134 @@ +// Copyright 2018 ETH Zurich and University of Bologna. +// +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. +// +// Fabian Schuiki + +/// A clock domain crossing FIFO, using 2-phase hand shakes. +/// +/// This FIFO has its push and pop ports in two separate clock domains. Its size +/// can only be powers of two, which is why its depth is given as 2**LOG_DEPTH. +/// LOG_DEPTH must be at least 1. +/// +/// CONSTRAINT: See the constraints for `cdc_2phase`. An additional maximum +/// delay path needs to be specified from fifo_data_q to dst_data_o. +module cdc_fifo_2phase #( + /// The data type of the payload transported by the FIFO. + parameter type T = logic, + /// The FIFO's depth given as 2**LOG_DEPTH. + parameter int LOG_DEPTH = 3 +)( + input logic src_rst_ni, + input logic src_clk_i, + input T src_data_i, + input logic src_valid_i, + output logic src_ready_o, + + input logic dst_rst_ni, + input logic dst_clk_i, + output T dst_data_o, + output logic dst_valid_o, + input logic dst_ready_i +); + + // Check the invariants. + //pragma translate_off + initial begin + assert(LOG_DEPTH > 0); + end + //pragma translate_on + + localparam int PtrWidth = LOG_DEPTH+1; + typedef logic [PtrWidth-1:0] pointer_t; + typedef logic [LOG_DEPTH-1:0] index_t; + + localparam pointer_t PtrFull = (1 << LOG_DEPTH); + localparam pointer_t PtrEmpty = '0; + + // Allocate the registers for the FIFO memory with its separate write and read + // ports. The FIFO has the following ports: + // + // - write: fifo_widx, fifo_wdata, fifo_write, src_clk_i + // - read: fifo_ridx, fifo_rdata + index_t fifo_widx, fifo_ridx; + logic fifo_write; + T fifo_wdata, fifo_rdata; + T fifo_data_q [2**LOG_DEPTH]; + + assign fifo_rdata = fifo_data_q[fifo_ridx]; + + for (genvar i = 0; i < 2**LOG_DEPTH; i++) begin : g_word + always_ff @(posedge src_clk_i, negedge src_rst_ni) begin + if (!src_rst_ni) + fifo_data_q[i] <= '0; + else if (fifo_write && fifo_widx == i) + fifo_data_q[i] <= fifo_wdata; + end + end + + // Allocate the read and write pointers in the source and destination domain. + pointer_t src_wptr_q, dst_wptr, src_rptr, dst_rptr_q; + + always_ff @(posedge src_clk_i, negedge src_rst_ni) begin + if (!src_rst_ni) + src_wptr_q <= 0; + else if (src_valid_i && src_ready_o) + src_wptr_q <= src_wptr_q + 1; + end + + always_ff @(posedge dst_clk_i, negedge dst_rst_ni) begin + if (!dst_rst_ni) + dst_rptr_q <= 0; + else if (dst_valid_o && dst_ready_i) + dst_rptr_q <= dst_rptr_q + 1; + end + + // The pointers into the FIFO are one bit wider than the actual address into + // the FIFO. This makes detecting critical states very simple: if all but the + // topmost bit of rptr and wptr agree, the FIFO is in a critical state. If the + // topmost bit is equal, the FIFO is empty, otherwise it is full. + assign src_ready_o = ((src_wptr_q ^ src_rptr) != PtrFull); + assign dst_valid_o = ((dst_rptr_q ^ dst_wptr) != PtrEmpty); + + // Transport the read and write pointers across the clock domain boundary. + cdc_2phase #( .T(pointer_t) ) i_cdc_wptr ( + .src_rst_ni ( src_rst_ni ), + .src_clk_i ( src_clk_i ), + .src_data_i ( src_wptr_q ), + .src_valid_i ( 1'b1 ), + .src_ready_o ( ), + .dst_rst_ni ( dst_rst_ni ), + .dst_clk_i ( dst_clk_i ), + .dst_data_o ( dst_wptr ), + .dst_valid_o ( ), + .dst_ready_i ( 1'b1 ) + ); + + cdc_2phase #( .T(pointer_t) ) i_cdc_rptr ( + .src_rst_ni ( dst_rst_ni ), + .src_clk_i ( dst_clk_i ), + .src_data_i ( dst_rptr_q ), + .src_valid_i ( 1'b1 ), + .src_ready_o ( ), + .dst_rst_ni ( src_rst_ni ), + .dst_clk_i ( src_clk_i ), + .dst_data_o ( src_rptr ), + .dst_valid_o ( ), + .dst_ready_i ( 1'b1 ) + ); + + // Drive the FIFO write and read ports. + assign fifo_widx = src_wptr_q; + assign fifo_wdata = src_data_i; + assign fifo_write = src_valid_i && src_ready_o; + assign fifo_ridx = dst_rptr_q; + assign dst_data_o = fifo_rdata; + +endmodule diff --git a/vendor/pulp-platform/common_cells/src/cdc_fifo_gray.sv b/vendor/pulp-platform/common_cells/src/cdc_fifo_gray.sv new file mode 100644 index 0000000000..802f29545b --- /dev/null +++ b/vendor/pulp-platform/common_cells/src/cdc_fifo_gray.sv @@ -0,0 +1,269 @@ +// Copyright 2018-2019 ETH Zurich and University of Bologna. +// +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. +// +// Fabian Schuiki +// Florian Zaruba + +/// A clock domain crossing FIFO, using gray counters. +/// +/// # Architecture +/// +/// The design is split into two parts, each one being clocked and reset +/// separately. +/// 1. The data to be transferred over the clock domain boundary is +/// is stored in a FIFO. The corresponding write pointer is managed +/// (incremented) in the source clock domain. +/// 2. The entire FIFO content is exposed over the `async_data` port. +/// The destination clock domain increments its read pointer +/// in its destination clock domain. +/// +/// Read and write pointers are then gray coded, communicated +/// and synchronized using a classic multi-stage FF synchronizer +/// in the other clock domain. The gray coding ensures that only +/// one bit changes at each pointer increment, preventing the +/// synchronizer to accidentally latch an inconsistent state +/// on a multi-bit bus. +/// +/// The not full signal e.g. `src_ready_o` (on the sending side) +/// is generated using the local write pointer and the pessimistic +/// read pointer from the destination clock domain (pessimistic +/// because it is delayed at least two cycles because of the synchronizer +/// stages). This prevents the FIFO from overflowing. +/// +/// The not empty signal e.g. `dst_valid_o` is generated using +/// the pessimistic write pointer and the local read pointer in +/// the destination clock domain. This means the FIFO content +/// does not need to be synchronized as we are sure we are reading +/// data which has been written at least two cycles earlier. +/// Furthermore, the read select logic into the FIFO is completely +/// clocked by the destination clock domain which avoids +/// inefficient data synchronization. +/// +/// The FIFO size must be powers of two, which is why its depth is +/// given as 2**LOG_DEPTH. LOG_DEPTH must be at least 1. +/// +/// # Constraints +/// +/// We need to make sure that the propagation delay of the +/// data, read and write pointer is bound to the minimum of +/// either the sending or receiving clock period to prevent +/// an inconsistent state to be latched (if for example the one +/// bit of the read/write pointer have an excessive delay). +/// Furthermore, we should deactivate setup and hold checks on +/// the asynchronous signals. +/// +/// ``` +/// set_ungroup [get_designs cdc_fifo_gray*] false +/// set_boundary_optimization [get_designs cdc_fifo_gray*] false +/// set_max_delay min(T_src, T_dst) \ +/// -through [get_pins -hierarchical -filter async] \ +/// -through [get_pins -hierarchical -filter async] +/// set_false_path -hold \ +/// -through [get_pins -hierarchical -filter async] \ +/// -through [get_pins -hierarchical -filter async] +/// ``` + +`include "common_cells/registers.svh" + +(* no_ungroup *) +(* no_boundary_optimization *) +module cdc_fifo_gray #( + /// The width of the default logic type. + parameter int unsigned WIDTH = 1, + /// The data type of the payload transported by the FIFO. + parameter type T = logic [WIDTH-1:0], + /// The FIFO's depth given as 2**LOG_DEPTH. + parameter int LOG_DEPTH = 3, + /// The number of synchronization registers to insert on the async pointers. + parameter int SYNC_STAGES = 2 +) ( + input logic src_rst_ni, + input logic src_clk_i, + input T src_data_i, + input logic src_valid_i, + output logic src_ready_o, + + input logic dst_rst_ni, + input logic dst_clk_i, + output T dst_data_o, + output logic dst_valid_o, + input logic dst_ready_i +); + + T [2**LOG_DEPTH-1:0] async_data; + logic [LOG_DEPTH:0] async_wptr; + logic [LOG_DEPTH:0] async_rptr; + + cdc_fifo_gray_src #( + .T ( T ), + .LOG_DEPTH ( LOG_DEPTH ) + ) i_src ( + .src_rst_ni, + .src_clk_i, + .src_data_i, + .src_valid_i, + .src_ready_o, + + (* async *) .async_data_o ( async_data ), + (* async *) .async_wptr_o ( async_wptr ), + (* async *) .async_rptr_i ( async_rptr ) + ); + + cdc_fifo_gray_dst #( + .T ( T ), + .LOG_DEPTH ( LOG_DEPTH ) + ) i_dst ( + .dst_rst_ni, + .dst_clk_i, + .dst_data_o, + .dst_valid_o, + .dst_ready_i, + + (* async *) .async_data_i ( async_data ), + (* async *) .async_wptr_i ( async_wptr ), + (* async *) .async_rptr_o ( async_rptr ) + ); + + // Check the invariants. + // pragma translate_off + `ifndef VERILATOR + initial assert(LOG_DEPTH > 0); + initial assert(SYNC_STAGES >= 2); + `endif + // pragma translate_on + +endmodule + + +(* no_ungroup *) +(* no_boundary_optimization *) +module cdc_fifo_gray_src #( + parameter type T = logic, + parameter int LOG_DEPTH = 3, + parameter int SYNC_STAGES = 2 +)( + input logic src_rst_ni, + input logic src_clk_i, + input T src_data_i, + input logic src_valid_i, + output logic src_ready_o, + + output T [2**LOG_DEPTH-1:0] async_data_o, + output logic [LOG_DEPTH:0] async_wptr_o, + input logic [LOG_DEPTH:0] async_rptr_i +); + + localparam int PtrWidth = LOG_DEPTH+1; + localparam logic [PtrWidth-1:0] PtrFull = (1 << LOG_DEPTH); + + T [2**LOG_DEPTH-1:0] data_q; + logic [PtrWidth-1:0] wptr_q, wptr_d, wptr_bin, wptr_next, rptr, rptr_bin; + + // Data FIFO. + assign async_data_o = data_q; + for (genvar i = 0; i < 2**LOG_DEPTH; i++) begin : gen_word + `FFLNR(data_q[i], src_data_i, + src_valid_i & src_ready_o & (wptr_bin[LOG_DEPTH-1:0] == i), src_clk_i) + end + + // Read pointer. + for (genvar i = 0; i < PtrWidth; i++) begin : gen_sync + sync #(.STAGES(SYNC_STAGES)) i_sync ( + .clk_i ( src_clk_i ), + .rst_ni ( src_rst_ni ), + .serial_i ( async_rptr_i[i] ), + .serial_o ( rptr[i] ) + ); + end + gray_to_binary #(PtrWidth) i_rptr_g2b (.A(rptr), .Z(rptr_bin)); + + // Write pointer. + assign wptr_next = wptr_bin+1; + gray_to_binary #(PtrWidth) i_wptr_g2b (.A(wptr_q), .Z(wptr_bin)); + binary_to_gray #(PtrWidth) i_wptr_b2g (.A(wptr_next), .Z(wptr_d)); + `FFLARN(wptr_q, wptr_d, src_valid_i & src_ready_o, '0, src_clk_i, src_rst_ni) + assign async_wptr_o = wptr_q; + + // The pointers into the FIFO are one bit wider than the actual address into + // the FIFO. This makes detecting critical states very simple: if all but the + // topmost bit of rptr and wptr agree, the FIFO is in a critical state. If the + // topmost bit is equal, the FIFO is empty, otherwise it is full. + assign src_ready_o = ((wptr_bin ^ rptr_bin) != PtrFull); + +endmodule + + +(* no_ungroup *) +(* no_boundary_optimization *) +module cdc_fifo_gray_dst #( + parameter type T = logic, + parameter int LOG_DEPTH = 3, + parameter int SYNC_STAGES = 2 +)( + input logic dst_rst_ni, + input logic dst_clk_i, + output T dst_data_o, + output logic dst_valid_o, + input logic dst_ready_i, + + input T [2**LOG_DEPTH-1:0] async_data_i, + input logic [LOG_DEPTH:0] async_wptr_i, + output logic [LOG_DEPTH:0] async_rptr_o +); + + localparam int PtrWidth = LOG_DEPTH+1; + localparam logic [PtrWidth-1:0] PtrEmpty = '0; + + T dst_data; + logic [PtrWidth-1:0] rptr_q, rptr_d, rptr_bin, rptr_bin_d, rptr_next, wptr, wptr_bin; + logic dst_valid, dst_ready; + // Data selector and register. + assign dst_data = async_data_i[rptr_bin[LOG_DEPTH-1:0]]; + + // Read pointer. + assign rptr_next = rptr_bin+1; + gray_to_binary #(PtrWidth) i_rptr_g2b (.A(rptr_q), .Z(rptr_bin)); + binary_to_gray #(PtrWidth) i_rptr_b2g (.A(rptr_next), .Z(rptr_d)); + `FFLARN(rptr_q, rptr_d, dst_valid & dst_ready, '0, dst_clk_i, dst_rst_ni) + assign async_rptr_o = rptr_q; + + // Write pointer. + for (genvar i = 0; i < PtrWidth; i++) begin : gen_sync + sync #(.STAGES(SYNC_STAGES)) i_sync ( + .clk_i ( dst_clk_i ), + .rst_ni ( dst_rst_ni ), + .serial_i ( async_wptr_i[i] ), + .serial_o ( wptr[i] ) + ); + end + gray_to_binary #(PtrWidth) i_wptr_g2b (.A(wptr), .Z(wptr_bin)); + + // The pointers into the FIFO are one bit wider than the actual address into + // the FIFO. This makes detecting critical states very simple: if all but the + // topmost bit of rptr and wptr agree, the FIFO is in a critical state. If the + // topmost bit is equal, the FIFO is empty, otherwise it is full. + assign dst_valid = ((wptr_bin ^ rptr_bin) != PtrEmpty); + + // Cut the combinatorial path with a spill register. + spill_register #( + .T ( T ) + ) i_spill_register ( + .clk_i ( dst_clk_i ), + .rst_ni ( dst_rst_ni ), + .valid_i ( dst_valid ), + .ready_o ( dst_ready ), + .data_i ( dst_data ), + .valid_o ( dst_valid_o ), + .ready_i ( dst_ready_i ), + .data_o ( dst_data_o ) + ); + +endmodule diff --git a/vendor/pulp-platform/common_cells/src/cf_math_pkg.sv b/vendor/pulp-platform/common_cells/src/cf_math_pkg.sv new file mode 100644 index 0000000000..9f35a44e98 --- /dev/null +++ b/vendor/pulp-platform/common_cells/src/cf_math_pkg.sv @@ -0,0 +1,61 @@ +// Copyright 2016 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +/// cf_math_pkg: Constant Function Implementations of Mathematical Functions for HDL Elaboration +/// +/// This package contains a collection of mathematical functions that are commonly used when defining +/// the value of constants in HDL code. These functions are implemented as Verilog constants +/// functions. Introduced in Verilog 2001 (IEEE Std 1364-2001), a constant function (§ 10.3.5) is a +/// function whose value can be evaluated at compile time or during elaboration. A constant function +/// must be called with arguments that are constants. +package cf_math_pkg; + + /// Ceiled Division of Two Natural Numbers + /// + /// Returns the quotient of two natural numbers, rounded towards plus infinity. + function automatic integer ceil_div (input longint dividend, input longint divisor); + automatic longint remainder; + + // pragma translate_off + `ifndef VERILATOR + if (dividend < 0) begin + $fatal(1, "Dividend %0d is not a natural number!", dividend); + end + + if (divisor < 0) begin + $fatal(1, "Divisor %0d is not a natural number!", divisor); + end + + if (divisor == 0) begin + $fatal(1, "Division by zero!"); + end + `endif + // pragma translate_on + + remainder = dividend; + for (ceil_div = 0; remainder > 0; ceil_div++) begin + remainder = remainder - divisor; + end + endfunction + + /// Index width required to be able to represent up to `num_idx` indices as a binary + /// encoded signal. + /// Ensures that the minimum width if an index signal is `1`, regardless of parametrization. + /// + /// Sample usage in type definition: + /// As parameter: + /// `parameter type idx_t = logic[cf_math_pkg::idx_width(NumIdx)-1:0]` + /// As typedef: + /// `typedef logic [cf_math_pkg::idx_width(NumIdx)-1:0] idx_t` + function automatic integer unsigned idx_width (input integer unsigned num_idx); + return (num_idx > 32'd1) ? unsigned'($clog2(num_idx)) : 32'd1; + endfunction + +endpackage diff --git a/vendor/pulp-platform/common_cells/src/clk_div.sv b/vendor/pulp-platform/common_cells/src/clk_div.sv new file mode 100644 index 0000000000..b1df809f74 --- /dev/null +++ b/vendor/pulp-platform/common_cells/src/clk_div.sv @@ -0,0 +1,42 @@ +// Copyright 2018 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +// Author: Florian Zaruba +// Description: Divides the clock by an integer factor +module clk_div #( + parameter int unsigned RATIO = 4 +)( + input logic clk_i, // Clock + input logic rst_ni, // Asynchronous reset active low + input logic testmode_i, // testmode + input logic en_i, // enable clock divider + output logic clk_o // divided clock out +); + logic [RATIO-1:0] counter_q; + logic clk_q; + + always_ff @(posedge clk_i or negedge rst_ni) begin + if (~rst_ni) begin + clk_q <= 1'b0; + counter_q <= '0; + end else begin + clk_q <= 1'b0; + if (en_i) begin + if (counter_q == (RATIO[RATIO-1:0] - 1)) begin + clk_q <= 1'b1; + end else begin + counter_q <= counter_q + 1; + end + end + end + end + // output assignment - bypass in testmode + assign clk_o = testmode_i ? clk_i : clk_q; +endmodule diff --git a/vendor/pulp-platform/common_cells/src/counter.sv b/vendor/pulp-platform/common_cells/src/counter.sv new file mode 100644 index 0000000000..43392e4bfd --- /dev/null +++ b/vendor/pulp-platform/common_cells/src/counter.sv @@ -0,0 +1,43 @@ +// Copyright 2018 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +// Author: Florian Zaruba +// Description: Generic up/down counter + +module counter #( + parameter int unsigned WIDTH = 4, + parameter bit STICKY_OVERFLOW = 1'b0 +)( + input logic clk_i, + input logic rst_ni, + input logic clear_i, // synchronous clear + input logic en_i, // enable the counter + input logic load_i, // load a new value + input logic down_i, // downcount, default is up + input logic [WIDTH-1:0] d_i, + output logic [WIDTH-1:0] q_o, + output logic overflow_o +); + delta_counter #( + .WIDTH (WIDTH), + .STICKY_OVERFLOW (STICKY_OVERFLOW) + ) i_counter ( + .clk_i, + .rst_ni, + .clear_i, + .en_i, + .load_i, + .down_i, + .delta_i({{WIDTH-1{1'b0}}, 1'b1}), + .d_i, + .q_o, + .overflow_o + ); +endmodule diff --git a/vendor/pulp-platform/common_cells/src/delta_counter.sv b/vendor/pulp-platform/common_cells/src/delta_counter.sv new file mode 100644 index 0000000000..90b5cffa9a --- /dev/null +++ b/vendor/pulp-platform/common_cells/src/delta_counter.sv @@ -0,0 +1,74 @@ +// Copyright 2018 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +// Up/down counter with variable delta + +module delta_counter #( + parameter int unsigned WIDTH = 4, + parameter bit STICKY_OVERFLOW = 1'b0 +)( + input logic clk_i, + input logic rst_ni, + input logic clear_i, // synchronous clear + input logic en_i, // enable the counter + input logic load_i, // load a new value + input logic down_i, // downcount, default is up + input logic [WIDTH-1:0] delta_i, + input logic [WIDTH-1:0] d_i, + output logic [WIDTH-1:0] q_o, + output logic overflow_o +); + logic [WIDTH:0] counter_q, counter_d; + if (STICKY_OVERFLOW) begin : gen_sticky_overflow + logic overflow_d, overflow_q; + always_ff @(posedge clk_i or negedge rst_ni) overflow_q <= ~rst_ni ? 1'b0 : overflow_d; + always_comb begin + overflow_d = overflow_q; + if (clear_i || load_i) begin + overflow_d = 1'b0; + end else if (!overflow_q && en_i) begin + if (down_i) begin + overflow_d = delta_i > counter_q[WIDTH-1:0]; + end else begin + overflow_d = counter_q[WIDTH-1:0] > ({WIDTH{1'b1}} - delta_i); + end + end + end + assign overflow_o = overflow_q; + end else begin : gen_transient_overflow + // counter overflowed if the MSB is set + assign overflow_o = counter_q[WIDTH]; + end + assign q_o = counter_q[WIDTH-1:0]; + + always_comb begin + counter_d = counter_q; + + if (clear_i) begin + counter_d = '0; + end else if (load_i) begin + counter_d = {1'b0, d_i}; + end else if (en_i) begin + if (down_i) begin + counter_d = counter_q - delta_i; + end else begin + counter_d = counter_q + delta_i; + end + end + end + + always_ff @(posedge clk_i or negedge rst_ni) begin + if (!rst_ni) begin + counter_q <= '0; + end else begin + counter_q <= counter_d; + end + end +endmodule diff --git a/vendor/pulp-platform/common_cells/src/deprecated/clock_divider.sv b/vendor/pulp-platform/common_cells/src/deprecated/clock_divider.sv new file mode 100644 index 0000000000..343b0a2386 --- /dev/null +++ b/vendor/pulp-platform/common_cells/src/deprecated/clock_divider.sv @@ -0,0 +1,191 @@ +// Copyright 2018 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +//////////////////////////////////////////////////////////////////////////////// +// // +// Company: Multitherman Laboratory @ DEIS - University of Bologna // +// Viale Risorgimento 2 40136 // +// Bologna - fax 0512093785 - // +// // +// Engineer: Antonio Pullini - pullinia@iis.ee.ethz.ch // +// // +// Additional contributions by: // +// // +// // +// // +// Create Date: 13/02/2013 // +// Design Name: ULPSoC // +// Module Name: clock_divider // +// Project Name: ULPSoC // +// Language: SystemVerilog // +// // +// Description: Clock Divider // +// // +// // +// Revision: // +// Revision v0.1 - File Created // +// Revision v0.2 - (19/03/2015) clock_gating swapped in pulp_clock_gating // +// // +// // +// // +// // +// // +// // +//////////////////////////////////////////////////////////////////////////////// + +module clock_divider +#( + parameter DIV_INIT = 0, + parameter BYPASS_INIT = 1 +) +( + input logic clk_i, + input logic rstn_i, + input logic test_mode_i, + input logic clk_gate_async_i, + input logic [7:0] clk_div_data_i, + input logic clk_div_valid_i, + output logic clk_div_ack_o, + output logic clk_o +); + + enum logic [1:0] {IDLE, STOP, WAIT, RELEASE} state, state_next; + + logic s_clk_out; + logic s_clock_enable; + logic s_clock_enable_gate; + logic s_clk_div_valid; + + logic [7:0] reg_clk_div; + logic s_clk_div_valid_sync; + + logic s_rstn_sync; + + logic [1:0] reg_ext_gate_sync; + + assign s_clock_enable_gate = s_clock_enable & reg_ext_gate_sync; + +`ifndef PULP_FPGA_EMUL + rstgen i_rst_gen + ( + // PAD FRAME SIGNALS + .clk_i(clk_i), + .rst_ni(rstn_i), //async signal coming from pads + + // TEST MODE + .test_mode_i(test_mode_i), + + // OUTPUT RESET + .rst_no(s_rstn_sync), + .init_no() //not used + ); + `else + assign s_rstn_sync = rstn_i; +`endif + + + //handle the handshake with the soc_ctrl. Interface is now async + pulp_sync_wedge i_edge_prop + ( + .clk_i(clk_i), + .rstn_i(s_rstn_sync), + .en_i(1'b1), + .serial_i(clk_div_valid_i), + .serial_o(clk_div_ack_o), + .r_edge_o(s_clk_div_valid_sync), + .f_edge_o() + ); + + clock_divider_counter + #( + .BYPASS_INIT(BYPASS_INIT), + .DIV_INIT(DIV_INIT) + ) + i_clkdiv_cnt + ( + .clk(clk_i), + .rstn(s_rstn_sync), + .test_mode(test_mode_i), + .clk_div(reg_clk_div), + .clk_div_valid(s_clk_div_valid), + .clk_out(s_clk_out) + ); + + pulp_clock_gating i_clk_gate + ( + .clk_i(s_clk_out), + .en_i(s_clock_enable_gate), + .test_en_i(test_mode_i), + .clk_o(clk_o) + ); + + always_comb + begin + case(state) + IDLE: + begin + s_clock_enable = 1'b1; + s_clk_div_valid = 1'b0; + if (s_clk_div_valid_sync) + state_next = STOP; + else + state_next = IDLE; + end + + STOP: + begin + s_clock_enable = 1'b0; + s_clk_div_valid = 1'b1; + state_next = WAIT; + end + + WAIT: + begin + s_clock_enable = 1'b0; + s_clk_div_valid = 1'b0; + state_next = RELEASE; + end + + RELEASE: + begin + s_clock_enable = 1'b0; + s_clk_div_valid = 1'b0; + state_next = IDLE; + end + endcase + end + + always_ff @(posedge clk_i or negedge s_rstn_sync) + begin + if (!s_rstn_sync) + state <= IDLE; + else + state <= state_next; + end + + //sample the data when valid has been sync and there is a rise edge + always_ff @(posedge clk_i or negedge s_rstn_sync) + begin + if (!s_rstn_sync) + reg_clk_div <= '0; + else if (s_clk_div_valid_sync) + reg_clk_div <= clk_div_data_i; + end + + //sample the data when valid has been sync and there is a rise edge + always_ff @(posedge clk_i or negedge s_rstn_sync) + begin + if (!s_rstn_sync) + reg_ext_gate_sync <= 2'b00; + else + reg_ext_gate_sync <= {clk_gate_async_i, reg_ext_gate_sync[1]}; + end + +endmodule diff --git a/vendor/pulp-platform/common_cells/src/deprecated/clock_divider_counter.sv b/vendor/pulp-platform/common_cells/src/deprecated/clock_divider_counter.sv new file mode 100644 index 0000000000..e5c222af95 --- /dev/null +++ b/vendor/pulp-platform/common_cells/src/deprecated/clock_divider_counter.sv @@ -0,0 +1,211 @@ +// Copyright 2018 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +//////////////////////////////////////////////////////////////////////////////// +// Company: Multitherman Laboratory @ DEIS - University of Bologna // +// Viale Risorgimento 2 40136 // +// Bologna - fax 0512093785 - // +// // +// Engineer: Antonio Pullini - pullinia@iis.ee.ethz.ch // +// // +// Additional contributions by: // +// // +// // +// // +// Create Date: 13/02/2013 // +// Design Name: ULPSoC // +// Module Name: clock_divider_counter // +// Project Name: ULPSoC // +// Language: SystemVerilog // +// // +// Description: clock_divider_counter // +// // +// // +// Revision: // +// Revision v0.1 - File Created // +// Revision v0.2 - (19/03/2015) clock_gating swapped in pulp_clock_gating // +// // +// // +// // +// // +// // +// // +//////////////////////////////////////////////////////////////////////////////// + + +module clock_divider_counter +#( + parameter BYPASS_INIT = 1, + parameter DIV_INIT = 'hFF +) +( + input logic clk, + input logic rstn, + input logic test_mode, + input logic [7:0] clk_div, + input logic clk_div_valid, + output logic clk_out +); + + logic [7:0] counter; + logic [7:0] counter_next; + logic [7:0] clk_cnt; + logic en1; + logic en2; + + logic is_odd; + + logic div1; + logic div2; + logic div2_neg_sync; + + logic [7:0] clk_cnt_odd; + logic [7:0] clk_cnt_odd_incr; + logic [7:0] clk_cnt_even; + logic [7:0] clk_cnt_en2; + + logic bypass; + + logic clk_out_gen; + logic clk_div_valid_reg; + + logic clk_inv_test; + logic clk_inv; + + // assign clk_cnt_odd_incr = clk_div + 1; + // assign clk_cnt_odd = {1'b0,clk_cnt_odd_incr[7:1]}; //if odd divider than clk_cnt = (clk_div+1)/2 + assign clk_cnt_odd = clk_div - 8'h1; //if odd divider than clk_cnt = clk_div - 1 + assign clk_cnt_even = (clk_div == 8'h2) ? 8'h0 : ({1'b0,clk_div[7:1]} - 8'h1); //if even divider than clk_cnt = clk_div/2 + assign clk_cnt_en2 = {1'b0,clk_cnt[7:1]} + 8'h1; + + always_comb + begin + if (counter == 'h0) + en1 = 1'b1; + else + en1 = 1'b0; + + if (clk_div_valid) + counter_next = 'h0; + else if (counter == clk_cnt) + counter_next = 'h0; + else + counter_next = counter + 1; + + if (clk_div_valid) + en2 = 1'b0; + else if (counter == clk_cnt_en2) + en2 = 1'b1; + else + en2 = 1'b0; + end + + always_ff @(posedge clk, negedge rstn) + begin + if (~rstn) + begin + counter <= 'h0; + div1 <= 1'b0; + bypass <= BYPASS_INIT; + clk_cnt <= DIV_INIT; + is_odd <= 1'b0; + clk_div_valid_reg <= 1'b0; + end + else + begin + if (!bypass) + counter <= counter_next; + + clk_div_valid_reg <= clk_div_valid; + if (clk_div_valid) + begin + if ((clk_div == 8'h0) || (clk_div == 8'h1)) + begin + bypass <= 1'b1; + clk_cnt <= 'h0; + is_odd <= 1'b0; + end + else + begin + bypass <= 1'b0; + if (clk_div[0]) + begin + is_odd <= 1'b1; + clk_cnt <= clk_cnt_odd; + end + else + begin + is_odd <= 1'b0; + clk_cnt <= clk_cnt_even; + end + end + div1 <= 1'b0; + end + else + begin + if (en1 && !bypass) + div1 <= ~div1; + end + end + end + + pulp_clock_inverter clk_inv_i + ( + .clk_i(clk), + .clk_o(clk_inv) + ); + +`ifndef PULP_FPGA_EMUL + `ifdef PULP_DFT + pulp_clock_mux2 clk_muxinv_i + ( + .clk0_i(clk_inv), + .clk1_i(clk), + .clk_sel_i(test_mode), + .clk_o(clk_inv_test) + ); + `else + assign clk_inv_test = clk_inv; + `endif +`else + assign clk_inv_test = clk_inv; +`endif + + always_ff @(posedge clk_inv_test or negedge rstn) + begin + if (!rstn) + begin + div2 <= 1'b0; + end + else + begin + if (clk_div_valid_reg) + div2 <= 1'b0; + else if (en2 && is_odd && !bypass) + div2 <= ~div2; + end + end // always_ff @ (posedge clk_inv_test or negedge rstn) + + pulp_clock_xor2 clock_xor_i + ( + .clk_o(clk_out_gen), + .clk0_i(div1), + .clk1_i(div2) + ); + + pulp_clock_mux2 clk_mux_i + ( + .clk0_i(clk_out_gen), + .clk1_i(clk), + .clk_sel_i(bypass || test_mode), + .clk_o(clk_out) + ); + +endmodule diff --git a/vendor/pulp-platform/common_cells/src/deprecated/fifo_v1.sv b/vendor/pulp-platform/common_cells/src/deprecated/fifo_v1.sv new file mode 100644 index 0000000000..31295e80ec --- /dev/null +++ b/vendor/pulp-platform/common_cells/src/deprecated/fifo_v1.sv @@ -0,0 +1,57 @@ +// Copyright 2018 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +// Author: Florian Zaruba + +/* verilator lint_off DECLFILENAME */ +module fifo #( + parameter bit FALL_THROUGH = 1'b0, // fifo is in fall-through mode + parameter int unsigned DATA_WIDTH = 32, // default data width if the fifo is of type logic + parameter int unsigned DEPTH = 8, // depth can be arbitrary from 0 to 2**32 + parameter int unsigned THRESHOLD = 1, // fill count until when to assert threshold_o + parameter type dtype = logic [DATA_WIDTH-1:0] +)( + input logic clk_i, // Clock + input logic rst_ni, // Asynchronous reset active low + input logic flush_i, // flush the queue + input logic testmode_i, // test_mode to bypass clock gating + // status flags + output logic full_o, // queue is full + output logic empty_o, // queue is empty + output logic threshold_o, // the FIFO is above the specified threshold + // as long as the queue is not full we can push new data + input dtype data_i, // data to push into the queue + input logic push_i, // data is valid and can be pushed to the queue + // as long as the queue is not empty we can pop new elements + output dtype data_o, // output data + input logic pop_i // pop head from queue +); + fifo_v2 #( + .FALL_THROUGH ( FALL_THROUGH ), + .DATA_WIDTH ( DATA_WIDTH ), + .DEPTH ( DEPTH ), + .ALM_FULL_TH ( THRESHOLD ), + .dtype ( dtype ) + ) impl ( + .clk_i ( clk_i ), + .rst_ni ( rst_ni ), + .flush_i ( flush_i ), + .testmode_i ( testmode_i ), + .full_o ( full_o ), + .empty_o ( empty_o ), + .alm_full_o ( threshold_o ), + .alm_empty_o ( ), + .data_i ( data_i ), + .push_i ( push_i ), + .data_o ( data_o ), + .pop_i ( pop_i ) + ); +endmodule +/* verilator lint_on DECLFILENAME */ diff --git a/vendor/pulp-platform/common_cells/src/deprecated/fifo_v2.sv b/vendor/pulp-platform/common_cells/src/deprecated/fifo_v2.sv new file mode 100644 index 0000000000..9c87ed9692 --- /dev/null +++ b/vendor/pulp-platform/common_cells/src/deprecated/fifo_v2.sv @@ -0,0 +1,79 @@ +// Copyright 2018 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +// Author: Florian Zaruba + +module fifo_v2 #( + parameter bit FALL_THROUGH = 1'b0, // fifo is in fall-through mode + parameter int unsigned DATA_WIDTH = 32, // default data width if the fifo is of type logic + parameter int unsigned DEPTH = 8, // depth can be arbitrary from 0 to 2**32 + parameter int unsigned ALM_EMPTY_TH = 1, // almost empty threshold (when to assert alm_empty_o) + parameter int unsigned ALM_FULL_TH = 1, // almost full threshold (when to assert alm_full_o) + parameter type dtype = logic [DATA_WIDTH-1:0], + // DO NOT OVERWRITE THIS PARAMETER + parameter int unsigned ADDR_DEPTH = (DEPTH > 1) ? $clog2(DEPTH) : 1 +)( + input logic clk_i, // Clock + input logic rst_ni, // Asynchronous reset active low + input logic flush_i, // flush the queue + input logic testmode_i, // test_mode to bypass clock gating + // status flags + output logic full_o, // queue is full + output logic empty_o, // queue is empty + output logic alm_full_o, // FIFO fillstate >= the specified threshold + output logic alm_empty_o, // FIFO fillstate <= the specified threshold + // as long as the queue is not full we can push new data + input dtype data_i, // data to push into the queue + input logic push_i, // data is valid and can be pushed to the queue + // as long as the queue is not empty we can pop new elements + output dtype data_o, // output data + input logic pop_i // pop head from queue +); + + logic [ADDR_DEPTH-1:0] usage; + + // generate threshold parameters + if (DEPTH == 0) begin + assign alm_full_o = 1'b0; // that signal does not make any sense in a FIFO of depth 0 + assign alm_empty_o = 1'b0; // that signal does not make any sense in a FIFO of depth 0 + end else begin + assign alm_full_o = (usage >= ALM_FULL_TH[ADDR_DEPTH-1:0]); + assign alm_empty_o = (usage <= ALM_EMPTY_TH[ADDR_DEPTH-1:0]); + end + + fifo_v3 #( + .FALL_THROUGH ( FALL_THROUGH ), + .DATA_WIDTH ( DATA_WIDTH ), + .DEPTH ( DEPTH ), + .dtype ( dtype ) + ) i_fifo_v3 ( + .clk_i, + .rst_ni, + .flush_i, + .testmode_i, + .full_o, + .empty_o, + .usage_o (usage), + .data_i, + .push_i, + .data_o, + .pop_i + ); + + // pragma translate_off + `ifndef VERILATOR + initial begin + assert (ALM_FULL_TH <= DEPTH) else $error("ALM_FULL_TH can't be larger than the DEPTH."); + assert (ALM_EMPTY_TH <= DEPTH) else $error("ALM_EMPTY_TH can't be larger than the DEPTH."); + end + `endif + // pragma translate_on + +endmodule // fifo_v2 diff --git a/vendor/pulp-platform/common_cells/src/deprecated/find_first_one.sv b/vendor/pulp-platform/common_cells/src/deprecated/find_first_one.sv new file mode 100644 index 0000000000..ee3ba20f70 --- /dev/null +++ b/vendor/pulp-platform/common_cells/src/deprecated/find_first_one.sv @@ -0,0 +1,83 @@ +// Copyright 2018 ETH Zurich and University of Bologna. +// +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +// Deprecated, use lzc unit instead. + +/// A leading-one finder / leading zero counter. +/// Set FLIP to 0 for find_first_one => first_one_o is the index of the first one (from the LSB) +/// Set FLIP to 1 for leading zero counter => first_one_o is the number of leading zeroes (from the MSB) +module find_first_one #( + /// The width of the input vector. + parameter int WIDTH = -1, + parameter int FLIP = 0 +)( + input logic [WIDTH-1:0] in_i, + output logic [$clog2(WIDTH)-1:0] first_one_o, + output logic no_ones_o +); + + localparam int NUM_LEVELS = $clog2(WIDTH); + + // pragma translate_off + initial begin + assert(WIDTH >= 0); + end + // pragma translate_on + + logic [WIDTH-1:0][NUM_LEVELS-1:0] index_lut; + logic [2**NUM_LEVELS-1:0] sel_nodes; + logic [2**NUM_LEVELS-1:0][NUM_LEVELS-1:0] index_nodes; + + logic [WIDTH-1:0] in_tmp; + + for (genvar i = 0; i < WIDTH; i++) begin + assign in_tmp[i] = FLIP ? in_i[WIDTH-1-i] : in_i[i]; + end + + for (genvar j = 0; j < WIDTH; j++) begin + assign index_lut[j] = j; + end + + for (genvar level = 0; level < NUM_LEVELS; level++) begin + + if (level < NUM_LEVELS-1) begin + for (genvar l = 0; l < 2**level; l++) begin + assign sel_nodes[2**level-1+l] = sel_nodes[2**(level+1)-1+l*2] | sel_nodes[2**(level+1)-1+l*2+1]; + assign index_nodes[2**level-1+l] = (sel_nodes[2**(level+1)-1+l*2] == 1'b1) ? + index_nodes[2**(level+1)-1+l*2] : index_nodes[2**(level+1)-1+l*2+1]; + end + end + + if (level == NUM_LEVELS-1) begin + for (genvar k = 0; k < 2**level; k++) begin + // if two successive indices are still in the vector... + if (k * 2 < WIDTH-1) begin + assign sel_nodes[2**level-1+k] = in_tmp[k*2] | in_tmp[k*2+1]; + assign index_nodes[2**level-1+k] = (in_tmp[k*2] == 1'b1) ? index_lut[k*2] : index_lut[k*2+1]; + end + // if only the first index is still in the vector... + if (k * 2 == WIDTH-1) begin + assign sel_nodes[2**level-1+k] = in_tmp[k*2]; + assign index_nodes[2**level-1+k] = index_lut[k*2]; + end + // if index is out of range + if (k * 2 > WIDTH-1) begin + assign sel_nodes[2**level-1+k] = 1'b0; + assign index_nodes[2**level-1+k] = '0; + end + end + end + end + + assign first_one_o = NUM_LEVELS > 0 ? index_nodes[0] : '0; + assign no_ones_o = NUM_LEVELS > 0 ? ~sel_nodes[0] : '1; + +endmodule diff --git a/vendor/pulp-platform/common_cells/src/deprecated/generic_LFSR_8bit.sv b/vendor/pulp-platform/common_cells/src/deprecated/generic_LFSR_8bit.sv new file mode 100644 index 0000000000..fb0080accf --- /dev/null +++ b/vendor/pulp-platform/common_cells/src/deprecated/generic_LFSR_8bit.sv @@ -0,0 +1,64 @@ +// Copyright 2018 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +// Igor Loi + +module generic_LFSR_8bit + #( + parameter OH_WIDTH = 4, + parameter BIN_WIDTH = $clog2(OH_WIDTH), + parameter SEED = 8'b00000000 + ) + ( + output logic [OH_WIDTH-1:0] data_OH_o, // One hot encoding + output logic [BIN_WIDTH-1:0] data_BIN_o, // Binary encoding + input logic enable_i, // + input logic clk, // + input logic rst_n // + ); + + logic [7:0] out; + logic linear_feedback; + logic [BIN_WIDTH-1:0] temp_ref_way; + + + //-------------Code Starts Here------- + assign linear_feedback = !(out[7] ^ out[3] ^ out[2] ^ out[1]); // TAPS for XOR feedback + + assign data_BIN_o = temp_ref_way; + + always_ff @(posedge clk, negedge rst_n) + begin + if (rst_n == 1'b0) + begin + out <= SEED ; + end + else if (enable_i) + begin + out <= {out[6],out[5],out[4],out[3],out[2],out[1],out[0], linear_feedback}; + end + end + + generate + + if(OH_WIDTH == 2) + assign temp_ref_way = out[1]; + else + assign temp_ref_way = out[BIN_WIDTH:1]; + endgenerate + + // Bin to One Hot Encoder + always_comb + begin + data_OH_o = '0; + data_OH_o[temp_ref_way] = 1'b1; + end + +endmodule diff --git a/vendor/pulp-platform/common_cells/src/deprecated/generic_fifo.sv b/vendor/pulp-platform/common_cells/src/deprecated/generic_fifo.sv new file mode 100644 index 0000000000..ece4aac78e --- /dev/null +++ b/vendor/pulp-platform/common_cells/src/deprecated/generic_fifo.sv @@ -0,0 +1,274 @@ +// Copyright 2018 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +// ============================================================================= // +// Company: Multitherman Laboratory @ DEIS - University of Bologna // +// Viale Risorgimento 2 40136 // +// Bologna - fax 0512093785 - // +// // +// Engineer: Igor Loi - igor.loi@unibo.it // +// // +// // +// Additional contributions by: // +// // +// // +// // +// Create Date: 01/02/2014 // +// Design Name: MISC // +// Module Name: generic_fifo // +// Project Name: PULP // +// Language: SystemVerilog // +// // +// Description: A simple FIFO used in the D_address_decoder, and D_allocator // +// to store the destinations ports // +// // +// Revision: // +// Revision v0.1 - 01/02/2014 : File Created // +// Revision v0.2 - 02/09/2015 : Updated with a global CG cell // +// // +// ============================================================================= // + +module generic_fifo +#( + parameter int unsigned DATA_WIDTH = 32, + parameter int unsigned DATA_DEPTH = 8 +) +( + input logic clk, + input logic rst_n, + //PUSH SIDE + input logic [DATA_WIDTH-1:0] data_i, + input logic valid_i, + output logic grant_o, + //POP SIDE + output logic [DATA_WIDTH-1:0] data_o, + output logic valid_o, + input logic grant_i, + + input logic test_mode_i +); + + + // Local Parameter + localparam int unsigned ADDR_DEPTH = $clog2(DATA_DEPTH); + enum logic [1:0] { EMPTY, FULL, MIDDLE } CS, NS; + // Internal Signals + + logic gate_clock; + logic clk_gated; + + logic [ADDR_DEPTH-1:0] Pop_Pointer_CS, Pop_Pointer_NS; + logic [ADDR_DEPTH-1:0] Push_Pointer_CS, Push_Pointer_NS; + logic [DATA_WIDTH-1:0] FIFO_REGISTERS[DATA_DEPTH-1:0]; + int unsigned i; + + // Parameter Check + // synopsys translate_off + initial begin : parameter_check + integer param_err_flg; + param_err_flg = 0; + + if (DATA_WIDTH < 1) begin + param_err_flg = 1; + $display("ERROR: %m :\n Invalid value (%d) for parameter DATA_WIDTH (legal range: greater than 1)", DATA_WIDTH ); + end + + if (DATA_DEPTH < 1) begin + param_err_flg = 1; + $display("ERROR: %m :\n Invalid value (%d) for parameter DATA_DEPTH (legal range: greater than 1)", DATA_DEPTH ); + end + end + // synopsys translate_on + +`ifndef PULP_FPGA_EMUL + cluster_clock_gating cg_cell + ( + .clk_i ( clk ), + .en_i (~gate_clock ), + .test_en_i ( test_mode_i ), + .clk_o ( clk_gated ) + ); +`else + assign clk_gated = clk; +`endif + + // UPDATE THE STATE + always_ff @(posedge clk, negedge rst_n) + begin + if(rst_n == 1'b0) + begin + CS <= EMPTY; + Pop_Pointer_CS <= {ADDR_DEPTH {1'b0}}; + Push_Pointer_CS <= {ADDR_DEPTH {1'b0}}; + end + else + begin + CS <= NS; + Pop_Pointer_CS <= Pop_Pointer_NS; + Push_Pointer_CS <= Push_Pointer_NS; + end + end + + + // Compute Next State + always_comb + begin + gate_clock = 1'b0; + + case(CS) + + EMPTY: + begin + grant_o = 1'b1; + valid_o = 1'b0; + + case(valid_i) + 1'b0 : + begin + NS = EMPTY; + Push_Pointer_NS = Push_Pointer_CS; + Pop_Pointer_NS = Pop_Pointer_CS; + gate_clock = 1'b1; + end + + 1'b1: + begin + NS = MIDDLE; + Push_Pointer_NS = Push_Pointer_CS + 1'b1; + Pop_Pointer_NS = Pop_Pointer_CS; + end + + endcase + end//~EMPTY + + MIDDLE: + begin + grant_o = 1'b1; + valid_o = 1'b1; + + case({valid_i,grant_i}) + + 2'b01: + begin + gate_clock = 1'b1; + + if((Pop_Pointer_CS == Push_Pointer_CS -1 ) || ((Pop_Pointer_CS == DATA_DEPTH-1) && (Push_Pointer_CS == 0) )) + NS = EMPTY; + else + NS = MIDDLE; + + Push_Pointer_NS = Push_Pointer_CS; + + if(Pop_Pointer_CS == DATA_DEPTH-1) + Pop_Pointer_NS = 0; + else + Pop_Pointer_NS = Pop_Pointer_CS + 1'b1; + end + + 2'b00 : + begin + gate_clock = 1'b1; + NS = MIDDLE; + Push_Pointer_NS = Push_Pointer_CS; + Pop_Pointer_NS = Pop_Pointer_CS; + end + + 2'b11: + begin + NS = MIDDLE; + + if(Push_Pointer_CS == DATA_DEPTH-1) + Push_Pointer_NS = 0; + else + Push_Pointer_NS = Push_Pointer_CS + 1'b1; + + if(Pop_Pointer_CS == DATA_DEPTH-1) + Pop_Pointer_NS = 0; + else + Pop_Pointer_NS = Pop_Pointer_CS + 1'b1; + end + + 2'b10: + begin + if(( Push_Pointer_CS == Pop_Pointer_CS - 1) || ( (Push_Pointer_CS == DATA_DEPTH-1) && (Pop_Pointer_CS == 0) )) + NS = FULL; + else + NS = MIDDLE; + + if(Push_Pointer_CS == DATA_DEPTH - 1) + Push_Pointer_NS = 0; + else + Push_Pointer_NS = Push_Pointer_CS + 1'b1; + + Pop_Pointer_NS = Pop_Pointer_CS; + end + + endcase + end + + FULL: + begin + grant_o = 1'b0; + valid_o = 1'b1; + gate_clock = 1'b1; + + case(grant_i) + 1'b1: + begin + NS = MIDDLE; + + Push_Pointer_NS = Push_Pointer_CS; + + if(Pop_Pointer_CS == DATA_DEPTH-1) + Pop_Pointer_NS = 0; + else + Pop_Pointer_NS = Pop_Pointer_CS + 1'b1; + end + + 1'b0: + begin + NS = FULL; + Push_Pointer_NS = Push_Pointer_CS; + Pop_Pointer_NS = Pop_Pointer_CS; + end + endcase + + end // end of FULL + + default : + begin + gate_clock = 1'b1; + grant_o = 1'b0; + valid_o = 1'b0; + NS = EMPTY; + Pop_Pointer_NS = 0; + Push_Pointer_NS = 0; + end + + endcase + end + + always_ff @(posedge clk_gated, negedge rst_n) + begin + if(rst_n == 1'b0) + begin + for (i=0; i< DATA_DEPTH; i++) + FIFO_REGISTERS[i] <= {DATA_WIDTH {1'b0}}; + end + else + begin + if((grant_o == 1'b1) && (valid_i == 1'b1)) + FIFO_REGISTERS[Push_Pointer_CS] <= data_i; + end + end + + assign data_o = FIFO_REGISTERS[Pop_Pointer_CS]; + +endmodule // generic_fifo diff --git a/vendor/pulp-platform/common_cells/src/deprecated/generic_fifo_adv.sv b/vendor/pulp-platform/common_cells/src/deprecated/generic_fifo_adv.sv new file mode 100644 index 0000000000..df6cc0d796 --- /dev/null +++ b/vendor/pulp-platform/common_cells/src/deprecated/generic_fifo_adv.sv @@ -0,0 +1,264 @@ +// Copyright 2018 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +// Igor Loi + +module generic_fifo_adv +#( + parameter int unsigned DATA_WIDTH = 32, + parameter int unsigned DATA_DEPTH = 8 + ) + ( + input logic clk, + input logic rst_n, + input logic clear_i, + + //PUSH SIDE + input logic [DATA_WIDTH-1:0] data_i, + input logic valid_i, + output logic grant_o, + + //POP SIDE + output logic [DATA_WIDTH-1:0] data_o, + output logic valid_o, + input logic grant_i, + + input logic test_mode_i + ); + + + // Local Parameter + localparam int unsigned ADDR_DEPTH = $clog2(DATA_DEPTH); + enum logic [1:0] { EMPTY, FULL, MIDDLE } CS, NS; + // Internal Signals + + logic gate_clock; + logic clk_gated; + + logic [ADDR_DEPTH-1:0] Pop_Pointer_CS, Pop_Pointer_NS; + logic [ADDR_DEPTH-1:0] Push_Pointer_CS, Push_Pointer_NS; + logic [DATA_WIDTH-1:0] FIFO_REGISTERS[DATA_DEPTH-1:0]; + int unsigned i; + + // Parameter Check + // synopsys translate_off + initial + begin : parameter_check + integer param_err_flg; + param_err_flg = 0; + + if (DATA_WIDTH < 1) + begin + param_err_flg = 1; + $display("ERROR: %m :\n Invalid value (%d) for parameter DATA_WIDTH (legal range: greater than 1)", DATA_WIDTH ); + end + + if (DATA_DEPTH < 1) + begin + param_err_flg = 1; + $display("ERROR: %m :\n Invalid value (%d) for parameter DATA_DEPTH (legal range: greater than 1)", DATA_DEPTH ); + end + end + // synopsys translate_on + +`ifndef PULP_FPGA_EMUL + cluster_clock_gating cg_cell + ( + .clk_i ( clk ), + .en_i (~gate_clock ), + .test_en_i ( test_mode_i ), + .clk_o ( clk_gated ) + ); +`else + assign clk_gated = clk; +`endif + + // UPDATE THE STATE + always_ff @(posedge clk, negedge rst_n) + begin + if(rst_n == 1'b0) + begin + CS <= EMPTY; + Pop_Pointer_CS <= {ADDR_DEPTH {1'b0}}; + Push_Pointer_CS <= {ADDR_DEPTH {1'b0}}; + end + else + begin + if(clear_i) + begin + CS <= EMPTY; + Pop_Pointer_CS <= {ADDR_DEPTH {1'b0}}; + Push_Pointer_CS <= {ADDR_DEPTH {1'b0}}; + end + else + begin + CS <= NS; + Pop_Pointer_CS <= Pop_Pointer_NS; + Push_Pointer_CS <= Push_Pointer_NS; + end + end + end + + + // Compute Next State + always_comb + begin + gate_clock = 1'b0; + + case(CS) + + EMPTY: + begin + grant_o = 1'b1; + valid_o = 1'b0; + + case(valid_i) + 1'b0 : + begin + NS = EMPTY; + Push_Pointer_NS = Push_Pointer_CS; + Pop_Pointer_NS = Pop_Pointer_CS; + gate_clock = 1'b1; + end + + 1'b1: + begin + NS = MIDDLE; + Push_Pointer_NS = Push_Pointer_CS + 1'b1; + Pop_Pointer_NS = Pop_Pointer_CS; + end + + endcase + end//~EMPTY + + MIDDLE: + begin + grant_o = 1'b1; + valid_o = 1'b1; + + case({valid_i,grant_i}) + + 2'b01: + begin + gate_clock = 1'b1; + + if((Pop_Pointer_CS == Push_Pointer_CS -1 ) || ((Pop_Pointer_CS == DATA_DEPTH-1) && (Push_Pointer_CS == 0) )) + NS = EMPTY; + else + NS = MIDDLE; + + Push_Pointer_NS = Push_Pointer_CS; + + if(Pop_Pointer_CS == DATA_DEPTH-1) + Pop_Pointer_NS = 0; + else + Pop_Pointer_NS = Pop_Pointer_CS + 1'b1; + end + + 2'b00 : + begin + gate_clock = 1'b1; + NS = MIDDLE; + Push_Pointer_NS = Push_Pointer_CS; + Pop_Pointer_NS = Pop_Pointer_CS; + end + + 2'b11: + begin + NS = MIDDLE; + + if(Push_Pointer_CS == DATA_DEPTH-1) + Push_Pointer_NS = 0; + else + Push_Pointer_NS = Push_Pointer_CS + 1'b1; + + if(Pop_Pointer_CS == DATA_DEPTH-1) + Pop_Pointer_NS = 0; + else + Pop_Pointer_NS = Pop_Pointer_CS + 1'b1; + end + + 2'b10: + begin + if(( Push_Pointer_CS == Pop_Pointer_CS - 1) || ( (Push_Pointer_CS == DATA_DEPTH-1) && (Pop_Pointer_CS == 0) )) + NS = FULL; + else + NS = MIDDLE; + + if(Push_Pointer_CS == DATA_DEPTH - 1) + Push_Pointer_NS = 0; + else + Push_Pointer_NS = Push_Pointer_CS + 1'b1; + + Pop_Pointer_NS = Pop_Pointer_CS; + end + + endcase + end + + FULL: + begin + grant_o = 1'b0; + valid_o = 1'b1; + gate_clock = 1'b1; + + case(grant_i) + 1'b1: + begin + NS = MIDDLE; + + Push_Pointer_NS = Push_Pointer_CS; + + if(Pop_Pointer_CS == DATA_DEPTH-1) + Pop_Pointer_NS = 0; + else + Pop_Pointer_NS = Pop_Pointer_CS + 1'b1; + end + + 1'b0: + begin + NS = FULL; + Push_Pointer_NS = Push_Pointer_CS; + Pop_Pointer_NS = Pop_Pointer_CS; + end + endcase + + end // end of FULL + + default : + begin + gate_clock = 1'b1; + grant_o = 1'b0; + valid_o = 1'b0; + NS = EMPTY; + Pop_Pointer_NS = 0; + Push_Pointer_NS = 0; + end + + endcase + end + + always_ff @(posedge clk_gated, negedge rst_n) + begin + if(rst_n == 1'b0) + begin + for (i=0; i< DATA_DEPTH; i++) + FIFO_REGISTERS[i] <= {DATA_WIDTH {1'b0}}; + end + else + begin + if((grant_o == 1'b1) && (valid_i == 1'b1)) + FIFO_REGISTERS[Push_Pointer_CS] <= data_i; + end + end + + assign data_o = FIFO_REGISTERS[Pop_Pointer_CS]; + +endmodule // generic_fifo diff --git a/vendor/pulp-platform/common_cells/src/deprecated/prioarbiter.sv b/vendor/pulp-platform/common_cells/src/deprecated/prioarbiter.sv new file mode 100644 index 0000000000..730ceca4bf --- /dev/null +++ b/vendor/pulp-platform/common_cells/src/deprecated/prioarbiter.sv @@ -0,0 +1,89 @@ +// Copyright 2018 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. +// +// Author: Florian Zaruba , ETH Zurich +// Date: 16.03.2019 +// Description: Priority arbiter with Lock in. Port 0 has priority over port 1, port 1 over port2 +// and so on. If the `LOCK_IN` feature is activated the arbitration decision is kept +// when the `en_i` is low. + +// Dependencies: relies on fast leading zero counter tree "onehot_to_bin" in common_cells +module prioarbiter #( + parameter int unsigned NUM_REQ = 13, + parameter int unsigned LOCK_IN = 0 +) ( + input logic clk_i, + input logic rst_ni, + + input logic flush_i, // clears the fsm and control signal registers + input logic en_i, // arbiter enable + input logic [NUM_REQ-1:0] req_i, // request signals + + output logic [NUM_REQ-1:0] ack_o, // acknowledge signals + output logic vld_o, // request ack'ed + output logic [$clog2(NUM_REQ)-1:0] idx_o // idx output +); + + localparam SEL_WIDTH = $clog2(NUM_REQ); + + logic [SEL_WIDTH-1:0] arb_sel_lock_d, arb_sel_lock_q; + logic lock_d, lock_q; + + logic [$clog2(NUM_REQ)-1:0] idx; + + // shared + assign vld_o = (|req_i) & en_i; + assign idx_o = (lock_q) ? arb_sel_lock_q : idx; + + // Arbiter + // Port 0 has priority over all other ports + assign ack_o[0] = (req_i[0]) ? en_i : 1'b0; + // check that the priorities + for (genvar i = 1; i < NUM_REQ; i++) begin : gen_arb_req_ports + // for every subsequent port check the priorities of the previous port + assign ack_o[i] = (req_i[i] & ~(|ack_o[i-1:0])) ? en_i : 1'b0; + end + + onehot_to_bin #( + .ONEHOT_WIDTH ( NUM_REQ ) + ) i_onehot_to_bin ( + .onehot ( ack_o ), + .bin ( idx ) + ); + + if (LOCK_IN) begin : gen_lock_in + // latch decision in case we got at least one req and no acknowledge + assign lock_d = (|req_i) & ~en_i; + assign arb_sel_lock_d = idx_o; + end else begin + // disable + assign lock_d = '0; + assign arb_sel_lock_d = '0; + end + + always_ff @(posedge clk_i or negedge rst_ni) begin : p_regs + if (!rst_ni) begin + lock_q <= 1'b0; + arb_sel_lock_q <= '0; + end else begin + if (flush_i) begin + lock_q <= 1'b0; + arb_sel_lock_q <= '0; + end else begin + lock_q <= lock_d; + arb_sel_lock_q <= arb_sel_lock_d; + end + end + end + +endmodule : prioarbiter + + + diff --git a/vendor/pulp-platform/common_cells/src/deprecated/pulp_sync.sv b/vendor/pulp-platform/common_cells/src/deprecated/pulp_sync.sv new file mode 100644 index 0000000000..2b436163e5 --- /dev/null +++ b/vendor/pulp-platform/common_cells/src/deprecated/pulp_sync.sv @@ -0,0 +1,36 @@ +// Copyright 2018 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +// Antonio Pullini + +module pulp_sync + #( + parameter STAGES = 2 + ) + ( + input logic clk_i, + input logic rstn_i, + input logic serial_i, + output logic serial_o + ); + + logic [STAGES-1:0] r_reg; + + always_ff @(posedge clk_i, negedge rstn_i) + begin + if(!rstn_i) + r_reg <= 'h0; + else + r_reg <= {r_reg[STAGES-2:0], serial_i}; + end + + assign serial_o = r_reg[STAGES-1]; + +endmodule diff --git a/vendor/pulp-platform/common_cells/src/deprecated/pulp_sync_wedge.sv b/vendor/pulp-platform/common_cells/src/deprecated/pulp_sync_wedge.sv new file mode 100644 index 0000000000..66cee57d2c --- /dev/null +++ b/vendor/pulp-platform/common_cells/src/deprecated/pulp_sync_wedge.sv @@ -0,0 +1,55 @@ +// Copyright 2018 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +// Antonio Pullini + +module pulp_sync_wedge #( + parameter int unsigned STAGES = 2 +) ( + input logic clk_i, + input logic rstn_i, + input logic en_i, + input logic serial_i, + output logic r_edge_o, + output logic f_edge_o, + output logic serial_o +); + logic clk; + logic serial, serial_q; + + assign serial_o = serial_q; + assign f_edge_o = ~serial & serial_q; + assign r_edge_o = serial & ~serial_q; + + pulp_sync #( + .STAGES(STAGES) + ) i_pulp_sync ( + .clk_i, + .rstn_i, + .serial_i, + .serial_o ( serial ) + ); + + pulp_clock_gating i_pulp_clock_gating ( + .clk_i, + .en_i, + .test_en_i ( 1'b0 ), + .clk_o ( clk ) + ); + + always_ff @(posedge clk, negedge rstn_i) begin + if (!rstn_i) begin + serial_q <= 1'b0; + end else begin + serial_q <= serial; + end + end + +endmodule diff --git a/vendor/pulp-platform/common_cells/src/deprecated/rrarbiter.sv b/vendor/pulp-platform/common_cells/src/deprecated/rrarbiter.sv new file mode 100644 index 0000000000..bf806c5e42 --- /dev/null +++ b/vendor/pulp-platform/common_cells/src/deprecated/rrarbiter.sv @@ -0,0 +1,61 @@ +// Copyright 2018 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. +// +// Author: Michael Schaffner , ETH Zurich +// Date: 16.08.2018 +// Description: Fair round robin arbiter with lock feature. +// +// The rrarbiter employs fair round robin arbitration - i.e. the priorities +// rotate each cycle. +// +// The lock-in feature prevents the arbiter from changing the arbitration +// decision when the arbiter is disabled. I.e., the index of the first request +// that wins the arbitration will be locked until en_i is asserted again. +// +// Dependencies: relies on rr_arb_tree from common_cells. + +module rrarbiter #( + parameter int unsigned NUM_REQ = 64, + parameter bit LOCK_IN = 1'b0 +) ( + input logic clk_i, + input logic rst_ni, + + input logic flush_i, // clears arbiter state + input logic en_i, // arbiter enable + input logic [NUM_REQ-1:0] req_i, // request signals + + output logic [NUM_REQ-1:0] ack_o, // acknowledge signals + output logic vld_o, // request ack'ed + output logic [$clog2(NUM_REQ)-1:0] idx_o // idx output +); + + logic req; + assign vld_o = (|req_i) & en_i; + + rr_arb_tree #( + .NumIn ( NUM_REQ ), + .DataWidth ( 1 ), + .LockIn ( LOCK_IN )) + i_rr_arb_tree ( + .clk_i ( clk_i ), + .rst_ni ( rst_ni ), + .flush_i ( flush_i ), + .rr_i ( '0 ), + .req_i ( req_i ), + .gnt_o ( ack_o ), + .data_i ( '0 ), + .gnt_i ( en_i & req ), + .req_o ( req ), + .data_o ( ), + .idx_o ( idx_o ) + ); + +endmodule : rrarbiter diff --git a/vendor/pulp-platform/common_cells/src/deprecated/sram.sv b/vendor/pulp-platform/common_cells/src/deprecated/sram.sv new file mode 100644 index 0000000000..fca1372bfe --- /dev/null +++ b/vendor/pulp-platform/common_cells/src/deprecated/sram.sv @@ -0,0 +1,46 @@ +// Copyright 2017, 2018 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. +// +// Date: 13.10.2017 +// Description: SRAM Behavioral Model + +module sram #( + int unsigned DATA_WIDTH = 64, + int unsigned NUM_WORDS = 1024 +)( + input logic clk_i, + + input logic req_i, + input logic we_i, + input logic [$clog2(NUM_WORDS)-1:0] addr_i, + input logic [DATA_WIDTH-1:0] wdata_i, + input logic [DATA_WIDTH-1:0] be_i, + output logic [DATA_WIDTH-1:0] rdata_o +); + localparam ADDR_WIDTH = $clog2(NUM_WORDS); + + logic [DATA_WIDTH-1:0] ram [NUM_WORDS-1:0]; + logic [ADDR_WIDTH-1:0] raddr_q; + + // 1. randomize array + // 2. randomize output when no request is active + always_ff @(posedge clk_i) begin + if (req_i) begin + if (!we_i) + raddr_q <= addr_i; + else + for (int i = 0; i < DATA_WIDTH; i++) + if (be_i[i]) ram[addr_i][i] <= wdata_i[i]; + end + end + + assign rdata_o = ram[raddr_q]; + +endmodule diff --git a/vendor/pulp-platform/common_cells/src/ecc_decode.sv b/vendor/pulp-platform/common_cells/src/ecc_decode.sv new file mode 100644 index 0000000000..40687e906f --- /dev/null +++ b/vendor/pulp-platform/common_cells/src/ecc_decode.sv @@ -0,0 +1,128 @@ +// Copyright 2020 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. +// +// Author: Florian Zaruba +// +/// # ECC Decoder +/// +/// Implements SECDED (Single Error Correction, Double Error Detection) Hamming Code +/// with extended parity bit [1]. +/// The module receives a data word including parity bit and decodes it according to the +/// number of data and parity bit. +/// +/// 1. If no error has been detected, the syndrome will be zero and all flags will be zero. +/// 2. If a single error has been detected, the syndrome is non-zero, and `single_error_o` will be +/// asserted. The output word contains the corrected data. +/// 3. If the parity bit contained an error, the module will assert `parity_error_o`. +/// 4. In case of a double fault the syndrome is non-zero, `double_error_o` will be asserted. +/// All other status flags will be de-asserted. +/// +/// [1] https://en.wikipedia.org/wiki/Hamming_code + +module ecc_decode import ecc_pkg::*; #( + /// Data width of unencoded word. + parameter int unsigned DataWidth = 64, + // Do not change + parameter type data_t = logic [DataWidth-1:0], + parameter type parity_t = logic [get_parity_width(DataWidth)-1:0], + parameter type code_word_t = logic [get_cw_width(DataWidth)-1:0], + parameter type encoded_data_t = struct packed { + logic parity; + code_word_t code_word; + } + ) ( + /// Encoded data in + input encoded_data_t data_i, + /// Corrected data out + output data_t data_o, + /// Error syndrome indicates the erroneous bit position + output parity_t syndrome_o, + /// A single error occurred + output logic single_error_o, + /// Error received in parity bit (MSB) + output logic parity_error_o, + /// A double error occurred + output logic double_error_o +); + + logic parity; + data_t data_wo_parity; + parity_t syndrome; + logic syndrome_not_zero; + code_word_t correct_data; + + // Check parity bit. 0 = parity equal, 1 = different parity + assign parity = data_i.parity ^ (^data_i.code_word); + + ///! | 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 + ///! |p1 p2 d1 p4 d2 d3 d4 p8 d5 d6 d7 d8 d9 d10 d11 + ///! ---|---------------------------------------------- + ///! p1 | x x x x x x x x + ///! p2 | x x x x x x x x + ///! p4 | x x x x x x x x + ///! p8 | x x x x x x x x + + ///! 1. Parity bit 1 covers all bit positions which have the least significant bit + ///! set: bit 1 (the parity bit itself), 3, 5, 7, 9, etc. + ///! 2. Parity bit 2 covers all bit positions which have the second least + ///! significant bit set: bit 2 (the parity bit itself), 3, 6, 7, 10, 11, etc. + ///! 3. Parity bit 4 covers all bit positions which have the third least + ///! significant bit set: bits 4–7, 12–15, 20–23, etc. + ///! 4. Parity bit 8 covers all bit positions which have the fourth least + ///! significant bit set: bits 8–15, 24–31, 40–47, etc. + ///! 5. In general each parity bit covers all bits where the bitwise AND of the + ///! parity position and the bit position is non-zero. + always_comb begin : calculate_syndrome + syndrome = 0; + for (int unsigned i = 0; i < unsigned'($bits(parity_t)); i++) begin + for (int unsigned j = 0; j < unsigned'($bits(code_word_t)); j++) begin + if (|(unsigned'(2**i) & (j + 1))) syndrome[i] = syndrome[i] ^ data_i.code_word[j]; + end + end + end + + assign syndrome_not_zero = |syndrome; + + // correct the data word if the syndrome is non-zero + always_comb begin + correct_data = data_i.code_word; + if (syndrome_not_zero) begin + correct_data[syndrome - 1] = ~data_i.code_word[syndrome - 1]; + end + end + + ///! Syndrome | Overall Parity (MSB) | Error Type | Notes + ///! -------------------------------------------------------- + ///! 0 | 0 | No Error | + ///! /=0 | 1 | Single Error | Correctable. Syndrome holds incorrect bit position. + ///! 0 | 1 | Parity Error | Overall parity, MSB is in error and can be corrected. + ///! /=0 | 0 | Double Error | Not correctable. + assign single_error_o = parity & syndrome_not_zero; + assign parity_error_o = parity & ~syndrome_not_zero; + assign double_error_o = ~parity & syndrome_not_zero; + + // Extract data vector + always_comb begin + automatic int unsigned idx; // bit index + data_wo_parity = '0; + idx = 0; + + for (int unsigned i = 1; i < unsigned'($bits(code_word_t)) + 1; i++) begin + // if i is a power of two we are indexing a parity bit + if (unsigned'(2**$clog2(i)) != i) begin + data_wo_parity[idx] = correct_data[i - 1]; + idx++; + end + end + end + + assign data_o = data_wo_parity; + +endmodule diff --git a/vendor/pulp-platform/common_cells/src/ecc_encode.sv b/vendor/pulp-platform/common_cells/src/ecc_encode.sv new file mode 100644 index 0000000000..8669a082ed --- /dev/null +++ b/vendor/pulp-platform/common_cells/src/ecc_encode.sv @@ -0,0 +1,78 @@ +// Copyright 2020 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. +// +// Author: Florian Zaruba +/// # ECC Encoder +/// +/// Implements SECDED (Single Error Correction, Double Error Detection) Hamming Code +/// with extended parity bit [1]. +/// The module receives a data word and encodes it using above mentioned error +/// detection and correction code. The corresponding decode module +/// can be found in `ecc_decode.sv` +/// +/// [1] https://en.wikipedia.org/wiki/Hamming_code + +module ecc_encode import ecc_pkg::*; #( + /// Data width of unencoded word. + parameter int unsigned DataWidth = 64, + // Do not change + parameter type data_t = logic [DataWidth-1:0], + parameter type parity_t = logic [get_parity_width(DataWidth)-1:0], + parameter type code_word_t = logic [get_cw_width(DataWidth)-1:0], + parameter type encoded_data_t = struct packed { + logic parity; + code_word_t code_word; + } +) ( + /// Unencoded data in + input data_t data_i, + /// Encoded data out + output encoded_data_t data_o +); + + parity_t parity_code_word; + code_word_t data, codeword; + + // Expand incoming data to codeword width + always_comb begin : expand_data + automatic int unsigned idx; + data = '0; + idx = 0; + for (int unsigned i = 1; i < unsigned'($bits(code_word_t)) + 1; i++) begin + // if it is not a power of two word it is a normal data index + if (unsigned'(2**$clog2(i)) != i) begin + data[i - 1] = data_i[idx]; + idx++; + end + end + end + + // calculate code word + always_comb begin : calculate_syndrome + parity_code_word = 0; + for (int unsigned i = 0; i < unsigned'($bits(parity_t)); i++) begin + for (int unsigned j = 1; j < unsigned'($bits(code_word_t)) + 1; j++) begin + if (|(unsigned'(2**i) & j)) parity_code_word[i] = parity_code_word[i] ^ data[j - 1]; + end + end + end + + // fuse the final codeword + always_comb begin : generate_codeword + codeword = data; + for (int unsigned i = 0; i < unsigned'($bits(parity_t)); i++) begin + codeword[2**i-1] = parity_code_word[i]; + end + end + + assign data_o.code_word = codeword; + assign data_o.parity = ^codeword; + +endmodule diff --git a/vendor/pulp-platform/common_cells/src/ecc_pkg.sv b/vendor/pulp-platform/common_cells/src/ecc_pkg.sv new file mode 100644 index 0000000000..fde9f782b9 --- /dev/null +++ b/vendor/pulp-platform/common_cells/src/ecc_pkg.sv @@ -0,0 +1,31 @@ +// Copyright 2020 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. +// +// Author: Florian Zaruba +// +/// Contains common ECC definitions and helper functions. + +package ecc_pkg; + + // Calculate required ECC parity width: + function automatic int unsigned get_parity_width (input int unsigned data_width); + // data_width + cw_width + 1 <= 2**cw_width + int unsigned cw_width = 2; + while (unsigned'(2**cw_width) < cw_width + data_width + 1) cw_width++; + return cw_width; + endfunction + + // Calculate required ECC codeword width: + function automatic int unsigned get_cw_width (input int unsigned data_width); + // data width + parity width + one additional parity bit (for double error detection) + return data_width + get_parity_width(data_width); + endfunction + +endpackage diff --git a/vendor/pulp-platform/common_cells/src/edge_detect.sv b/vendor/pulp-platform/common_cells/src/edge_detect.sv new file mode 100644 index 0000000000..c6453ba519 --- /dev/null +++ b/vendor/pulp-platform/common_cells/src/edge_detect.sv @@ -0,0 +1,32 @@ +// Copyright 2018 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +// Author: Florian Zaruba +// Description: Edge detector, clock needs to oversample for proper edge detection + +module edge_detect ( + input logic clk_i, // Clock + input logic rst_ni, // Asynchronous reset active low + input logic d_i, // data stream in + output logic re_o, // rising edge detected + output logic fe_o // falling edge detected +); + + sync_wedge i_sync_wedge ( + .clk_i ( clk_i ), + .rst_ni ( rst_ni ), + .en_i ( 1'b1 ), + .serial_i ( d_i ), + .r_edge_o ( re_o ), + .f_edge_o ( fe_o ), + .serial_o ( ) + ); + +endmodule diff --git a/vendor/pulp-platform/common_cells/src/edge_propagator.sv b/vendor/pulp-platform/common_cells/src/edge_propagator.sv new file mode 100644 index 0000000000..2e27283111 --- /dev/null +++ b/vendor/pulp-platform/common_cells/src/edge_propagator.sv @@ -0,0 +1,50 @@ +// Copyright 2018 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +// Antonio Pullini + +module edge_propagator ( + input logic clk_tx_i, + input logic rstn_tx_i, + input logic edge_i, + input logic clk_rx_i, + input logic rstn_rx_i, + output logic edge_o +); + + logic [1:0] sync_a; + logic sync_b; + + logic r_input_reg; + logic s_input_reg_next; + + assign s_input_reg_next = edge_i | (r_input_reg & (~sync_a[0])); + + always @(negedge rstn_tx_i or posedge clk_tx_i) begin + if (~rstn_tx_i) begin + r_input_reg <= 1'b0; + sync_a <= 2'b00; + end else begin + r_input_reg <= s_input_reg_next; + sync_a <= {sync_b,sync_a[1]}; + end + end + + pulp_sync_wedge i_sync_clkb ( + .clk_i ( clk_rx_i ), + .rstn_i ( rstn_rx_i ), + .en_i ( 1'b1 ), + .serial_i ( r_input_reg ), + .r_edge_o ( edge_o ), + .f_edge_o ( ), + .serial_o ( sync_b ) + ); + +endmodule diff --git a/vendor/pulp-platform/common_cells/src/edge_propagator_rx.sv b/vendor/pulp-platform/common_cells/src/edge_propagator_rx.sv new file mode 100644 index 0000000000..89532cc27c --- /dev/null +++ b/vendor/pulp-platform/common_cells/src/edge_propagator_rx.sv @@ -0,0 +1,31 @@ +// Copyright 2018 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +// Antonio Pullini + +module edge_propagator_rx ( + input logic clk_i, + input logic rstn_i, + input logic valid_i, + output logic ack_o, + output logic valid_o +); + + pulp_sync_wedge i_sync_clkb ( + .clk_i ( clk_i ), + .rstn_i ( rstn_i ), + .en_i ( 1'b1 ), + .serial_i ( valid_i ), + .r_edge_o ( valid_o ), + .f_edge_o ( ), + .serial_o ( ack_o ) + ); + +endmodule diff --git a/vendor/pulp-platform/common_cells/src/edge_propagator_tx.sv b/vendor/pulp-platform/common_cells/src/edge_propagator_tx.sv new file mode 100644 index 0000000000..0274a43333 --- /dev/null +++ b/vendor/pulp-platform/common_cells/src/edge_propagator_tx.sv @@ -0,0 +1,40 @@ +// Copyright 2018 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +// Antonio Pullini + +module edge_propagator_tx ( + input logic clk_i, + input logic rstn_i, + input logic valid_i, + input logic ack_i, + output logic valid_o +); + + logic [1:0] sync_a; + + logic r_input_reg; + logic s_input_reg_next; + + assign s_input_reg_next = valid_i | (r_input_reg & ~sync_a[0]); + + always @(negedge rstn_i or posedge clk_i) begin + if (~rstn_i) begin + r_input_reg <= 1'b0; + sync_a <= 2'b00; + end else begin + r_input_reg <= s_input_reg_next; + sync_a <= {ack_i,sync_a[1]}; + end + end + + assign valid_o = r_input_reg; + +endmodule diff --git a/vendor/pulp-platform/common_cells/src/exp_backoff.sv b/vendor/pulp-platform/common_cells/src/exp_backoff.sv new file mode 100644 index 0000000000..91dccb075c --- /dev/null +++ b/vendor/pulp-platform/common_cells/src/exp_backoff.sv @@ -0,0 +1,98 @@ +// Copyright 2019 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. +// +// Author: Michael Schaffner , ETH Zurich +// Date: 10.04.2019 +// Description: exponential backoff counter with randomization. +// +// For each failed trial (set_i pulsed), this unit exponentially increases the +// (average) backoff time by masking an LFSR with a shifted mask in order to +// create the backoff counter initial value. +// +// The shift register mask and the counter value are both reset to '0 in case of +// a successful trial (clr_i). +// + +module exp_backoff #( + /// Seed for 16bit LFSR + parameter int unsigned Seed = 'hffff, + /// 2**MaxExp-1 determines the maximum range from which random wait counts are drawn + parameter int unsigned MaxExp = 16 +) ( + input logic clk_i, + input logic rst_ni, + /// Sets the backoff counter (pulse) -> use when trial did not succeed + input logic set_i, + /// Clears the backoff counter (pulse) -> use when trial succeeded + input logic clr_i, + /// Indicates whether the backoff counter is equal to zero and a new trial can be launched + output logic is_zero_o +); + + // leave this constant + localparam int unsigned WIDTH = 16; + + logic [WIDTH-1:0] lfsr_d, lfsr_q, cnt_d, cnt_q, mask_d, mask_q; + logic lfsr; + + // generate random wait counts + // note: we use a flipped lfsr here to + // avoid strange correlation effects between + // the (left-shifted) mask and the lfsr + assign lfsr = lfsr_q[15-15] ^ + lfsr_q[15-13] ^ + lfsr_q[15-12] ^ + lfsr_q[15-10]; + + assign lfsr_d = (set_i) ? {lfsr, lfsr_q[$high(lfsr_q):1]} : + lfsr_q; + + // mask the wait counts with exponentially increasing mask (shift reg) + assign mask_d = (clr_i) ? '0 : + (set_i) ? {{(WIDTH-MaxExp){1'b0}},mask_q[MaxExp-2:0], 1'b1} : + mask_q; + + assign cnt_d = (clr_i) ? '0 : + (set_i) ? (mask_q & lfsr_q) : + (!is_zero_o) ? cnt_q - 1'b1 : '0; + + assign is_zero_o = (cnt_q=='0); + + always_ff @(posedge clk_i or negedge rst_ni) begin : p_regs + if (!rst_ni) begin + lfsr_q <= WIDTH'(Seed); + mask_q <= '0; + cnt_q <= '0; + end else begin + lfsr_q <= lfsr_d; + mask_q <= mask_d; + cnt_q <= cnt_d; + end + end + +/////////////////////////////////////////////////////// +// assertions +/////////////////////////////////////////////////////// + +//pragma translate_off +`ifndef VERILATOR + initial begin + // assert wrong parameterizations + assert (MaxExp>0) + else $fatal(1,"MaxExp must be greater than 0"); + assert (MaxExp<=16) + else $fatal(1,"MaxExp cannot be greater than 16"); + assert (Seed>0) + else $fatal(1,"Zero seed is not allowed for LFSR"); + end +`endif +//pragma translate_on + +endmodule // exp_backoff diff --git a/vendor/pulp-platform/common_cells/src/fall_through_register.sv b/vendor/pulp-platform/common_cells/src/fall_through_register.sv new file mode 100644 index 0000000000..fcbbe31dbc --- /dev/null +++ b/vendor/pulp-platform/common_cells/src/fall_through_register.sv @@ -0,0 +1,58 @@ +// Copyright 2018 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +// Fall-through register with a simple stream-like ready/valid handshake. +// This register does not cut combinatorial paths on any signals: in case the module at its output +// is ready to accept data within the same clock cycle, they are forwarded. Use this module to get a +// 'default ready' behavior towards the input. +module fall_through_register #( + parameter type T = logic // Vivado requires a default value for type parameters. +) ( + input logic clk_i, // Clock + input logic rst_ni, // Asynchronous active-low reset + input logic clr_i, // Synchronous clear + input logic testmode_i, // Test mode to bypass clock gating + // Input port + input logic valid_i, + output logic ready_o, + input T data_i, + // Output port + output logic valid_o, + input logic ready_i, + output T data_o +); + + logic fifo_empty, + fifo_full; + + fifo_v2 #( + .FALL_THROUGH (1'b1), + .DATA_WIDTH ($size(T)), + .DEPTH (1), + .dtype (T) + ) i_fifo ( + .clk_i (clk_i), + .rst_ni (rst_ni), + .flush_i (clr_i), + .testmode_i (testmode_i), + .full_o (fifo_full), + .empty_o (fifo_empty), + .alm_full_o ( ), + .alm_empty_o ( ), + .data_i (data_i), + .push_i (valid_i & ~fifo_full), + .data_o (data_o), + .pop_i (ready_i & ~fifo_empty) + ); + + assign ready_o = ~fifo_full; + assign valid_o = ~fifo_empty; + +endmodule diff --git a/vendor/pulp-platform/common_cells/src/fifo_v3.sv b/vendor/pulp-platform/common_cells/src/fifo_v3.sv new file mode 100644 index 0000000000..e417a3e7b0 --- /dev/null +++ b/vendor/pulp-platform/common_cells/src/fifo_v3.sv @@ -0,0 +1,154 @@ +// Copyright 2018 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +// Author: Florian Zaruba + +module fifo_v3 #( + parameter bit FALL_THROUGH = 1'b0, // fifo is in fall-through mode + parameter int unsigned DATA_WIDTH = 32, // default data width if the fifo is of type logic + parameter int unsigned DEPTH = 8, // depth can be arbitrary from 0 to 2**32 + parameter type dtype = logic [DATA_WIDTH-1:0], + // DO NOT OVERWRITE THIS PARAMETER + parameter int unsigned ADDR_DEPTH = (DEPTH > 1) ? $clog2(DEPTH) : 1 +)( + input logic clk_i, // Clock + input logic rst_ni, // Asynchronous reset active low + input logic flush_i, // flush the queue + input logic testmode_i, // test_mode to bypass clock gating + // status flags + output logic full_o, // queue is full + output logic empty_o, // queue is empty + output logic [ADDR_DEPTH-1:0] usage_o, // fill pointer + // as long as the queue is not full we can push new data + input dtype data_i, // data to push into the queue + input logic push_i, // data is valid and can be pushed to the queue + // as long as the queue is not empty we can pop new elements + output dtype data_o, // output data + input logic pop_i // pop head from queue +); + // local parameter + // FIFO depth - handle the case of pass-through, synthesizer will do constant propagation + localparam int unsigned FifoDepth = (DEPTH > 0) ? DEPTH : 1; + // clock gating control + logic gate_clock; + // pointer to the read and write section of the queue + logic [ADDR_DEPTH - 1:0] read_pointer_n, read_pointer_q, write_pointer_n, write_pointer_q; + // keep a counter to keep track of the current queue status + // this integer will be truncated by the synthesis tool + logic [ADDR_DEPTH:0] status_cnt_n, status_cnt_q; + // actual memory + dtype [FifoDepth - 1:0] mem_n, mem_q; + + assign usage_o = status_cnt_q[ADDR_DEPTH-1:0]; + + if (DEPTH == 0) begin : gen_pass_through + assign empty_o = ~push_i; + assign full_o = ~pop_i; + end else begin : gen_fifo + assign full_o = (status_cnt_q == FifoDepth[ADDR_DEPTH:0]); + assign empty_o = (status_cnt_q == 0) & ~(FALL_THROUGH & push_i); + end + // status flags + + // read and write queue logic + always_comb begin : read_write_comb + // default assignment + read_pointer_n = read_pointer_q; + write_pointer_n = write_pointer_q; + status_cnt_n = status_cnt_q; + data_o = (DEPTH == 0) ? data_i : mem_q[read_pointer_q]; + mem_n = mem_q; + gate_clock = 1'b1; + + // push a new element to the queue + if (push_i && ~full_o) begin + // push the data onto the queue + mem_n[write_pointer_q] = data_i; + // un-gate the clock, we want to write something + gate_clock = 1'b0; + // increment the write counter + if (write_pointer_q == FifoDepth[ADDR_DEPTH-1:0] - 1) + write_pointer_n = '0; + else + write_pointer_n = write_pointer_q + 1; + // increment the overall counter + status_cnt_n = status_cnt_q + 1; + end + + if (pop_i && ~empty_o) begin + // read from the queue is a default assignment + // but increment the read pointer... + if (read_pointer_n == FifoDepth[ADDR_DEPTH-1:0] - 1) + read_pointer_n = '0; + else + read_pointer_n = read_pointer_q + 1; + // ... and decrement the overall count + status_cnt_n = status_cnt_q - 1; + end + + // keep the count pointer stable if we push and pop at the same time + if (push_i && pop_i && ~full_o && ~empty_o) + status_cnt_n = status_cnt_q; + + // FIFO is in pass through mode -> do not change the pointers + if (FALL_THROUGH && (status_cnt_q == 0) && push_i) begin + data_o = data_i; + if (pop_i) begin + status_cnt_n = status_cnt_q; + read_pointer_n = read_pointer_q; + write_pointer_n = write_pointer_q; + end + end + end + + // sequential process + always_ff @(posedge clk_i or negedge rst_ni) begin + if(~rst_ni) begin + read_pointer_q <= '0; + write_pointer_q <= '0; + status_cnt_q <= '0; + end else begin + if (flush_i) begin + read_pointer_q <= '0; + write_pointer_q <= '0; + status_cnt_q <= '0; + end else begin + read_pointer_q <= read_pointer_n; + write_pointer_q <= write_pointer_n; + status_cnt_q <= status_cnt_n; + end + end + end + + always_ff @(posedge clk_i or negedge rst_ni) begin + if(~rst_ni) begin + mem_q <= '0; + end else if (!gate_clock) begin + mem_q <= mem_n; + end + end + +// pragma translate_off +`ifndef VERILATOR + initial begin + assert (DEPTH > 0) else $error("DEPTH must be greater than 0."); + end + + full_write : assert property( + @(posedge clk_i) disable iff (~rst_ni) (full_o |-> ~push_i)) + else $fatal (1, "Trying to push new data although the FIFO is full."); + + empty_read : assert property( + @(posedge clk_i) disable iff (~rst_ni) (empty_o |-> ~pop_i)) + else $fatal (1, "Trying to pop data although the FIFO is empty."); +`endif +// pragma translate_on + +endmodule // fifo_v3 diff --git a/vendor/pulp-platform/common_cells/src/gray_to_binary.sv b/vendor/pulp-platform/common_cells/src/gray_to_binary.sv new file mode 100644 index 0000000000..b1ad46f1ef --- /dev/null +++ b/vendor/pulp-platform/common_cells/src/gray_to_binary.sv @@ -0,0 +1,23 @@ +// Copyright 2018 ETH Zurich and University of Bologna. +// +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. +// +// Fabian Schuiki + +/// A gray code to binary converter. +module gray_to_binary #( + parameter int N = -1 +)( + input logic [N-1:0] A, + output logic [N-1:0] Z +); + for (genvar i = 0; i < N; i++) + assign Z[i] = ^A[N-1:i]; +endmodule diff --git a/vendor/pulp-platform/common_cells/src/id_queue.sv b/vendor/pulp-platform/common_cells/src/id_queue.sv new file mode 100644 index 0000000000..2ba347e30a --- /dev/null +++ b/vendor/pulp-platform/common_cells/src/id_queue.sv @@ -0,0 +1,419 @@ +// Copyright 2018 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +// ID Queue +// +// In an ID queue, every element has a numeric ID. Among all elements that have the same ID, the ID +// queue preserves FIFO order. +// +// This ID queue implementation allows to either push (through the `inp_*` signals) or pop (through +// the `oup_*` signals) one element per clock cycle (depending on the _FULL_BW_ operating mode +// descibed below). The `inp_` port has priority and grants a request iff the queue is not full. The +// `oup_` port dequeues an element iff `oup_pop_i` is asserted during an `oup_` handshake; +// otherwise, it performs a non-destructive read. `oup_data_o` is valid iff `oup_data_valid_o` is +// asserted during an `oup_` handshake. If `oup_data_valid_o` is not asserted, the queue did not +// contain an element with the provided ID. +// +// The queue can work in two bandwidth modes: +// * !FULL_BW: Input and output cannot be performed simultaneously (max bandwidth: 50%). +// * FULL_BW: Input and output can be performed simultaneously and a popped cell can be reused +// immediately in the same clock cycle. Area increase typically 5-10%. +// +// This ID queue additionally provides the `exists_` port, which searches for an element anywhere in +// the queue. The comparison performed during the search can be masked: for every bit that is +// asserted in `exists_mask_i`, the corresponding bit in the queue element and in `exists_data_i` +// must be equal for a match; the other bits are not compared. If masking is not required, tie +// `exists_mask_i_ to `'1` and the synthesizer should simplify the comparisons to unmasked ones. The +// `exists_` port operates independently of the `inp_` and `oup_` ports. If the `exists_` port is +// unused, tie `exists_req_i` to `1'b0` and the synthesizer should remove the internal comparators. +// +// This ID queue can store at most `CAPACITY` elements, independent of their ID. Let +// - C = `CAPACITY` +// - B = $bits(data_t) +// - I = 2**`ID_WIDTH` +// Then +// - the queue element storage requires O(C * (B + log2(C))) bit +// - the ID table requires O(H * log2(C)) bit, where H = min(C, I) +// +// Maintainers: +// - Andreas Kurth + +module id_queue #( + parameter int ID_WIDTH = 0, + parameter int CAPACITY = 0, + parameter bit FULL_BW = 0, + parameter type data_t = logic, + // Dependent parameters, DO NOT OVERRIDE! + localparam type id_t = logic[ID_WIDTH-1:0], + localparam type mask_t = logic[$bits(data_t)-1:0] +) ( + input logic clk_i, + input logic rst_ni, + + input id_t inp_id_i, + input data_t inp_data_i, + input logic inp_req_i, + output logic inp_gnt_o, + + input data_t exists_data_i, + input mask_t exists_mask_i, + input logic exists_req_i, + output logic exists_o, + output logic exists_gnt_o, + + input id_t oup_id_i, + input logic oup_pop_i, + input logic oup_req_i, + output data_t oup_data_o, + output logic oup_data_valid_o, + output logic oup_gnt_o +); + + // Capacity of the head-tail table, which associates an ID with corresponding head and tail + // indices. + localparam int NIds = 2**ID_WIDTH; + localparam int HtCapacity = (NIds <= CAPACITY) ? NIds : CAPACITY; + localparam int unsigned HtIdxWidth = cf_math_pkg::idx_width(HtCapacity); + localparam int unsigned LdIdxWidth = cf_math_pkg::idx_width(CAPACITY); + + // Type for indexing the head-tail table. + typedef logic [HtIdxWidth-1:0] ht_idx_t; + + // Type for indexing the lined data table. + typedef logic [LdIdxWidth-1:0] ld_idx_t; + + // Type of an entry in the head-tail table. + typedef struct packed { + id_t id; + ld_idx_t head, + tail; + logic free; + } head_tail_t; + + // Type of an entry in the linked data table. + typedef struct packed { + data_t data; + ld_idx_t next; + logic free; + } linked_data_t; + + head_tail_t [HtCapacity-1:0] head_tail_d, head_tail_q; + + linked_data_t [CAPACITY-1:0] linked_data_d, linked_data_q; + + logic full, + match_in_id_valid, + match_out_id_valid, + no_in_id_match, + no_out_id_match; + + logic [HtCapacity-1:0] head_tail_free, + idx_matches_in_id, + idx_matches_out_id; + + logic [CAPACITY-1:0] exists_match, + linked_data_free; + + id_t match_in_id, match_out_id; + + ht_idx_t head_tail_free_idx, + match_in_idx, + match_out_idx; + + ld_idx_t linked_data_free_idx, + oup_data_free_idx; + + logic oup_data_popped, + oup_ht_popped; + + // Find the index in the head-tail table that matches a given ID. + for (genvar i = 0; i < HtCapacity; i++) begin: gen_idx_match + assign idx_matches_in_id[i] = match_in_id_valid && (head_tail_q[i].id == match_in_id) && + !head_tail_q[i].free; + assign idx_matches_out_id[i] = match_out_id_valid && (head_tail_q[i].id == match_out_id) && + !head_tail_q[i].free; + end + assign no_in_id_match = !(|idx_matches_in_id); + assign no_out_id_match = !(|idx_matches_out_id); + onehot_to_bin #( + .ONEHOT_WIDTH ( HtCapacity ) + ) i_id_ohb_in ( + .onehot ( idx_matches_in_id ), + .bin ( match_in_idx ) + ); + onehot_to_bin #( + .ONEHOT_WIDTH ( HtCapacity ) + ) i_id_ohb_out ( + .onehot ( idx_matches_out_id ), + .bin ( match_out_idx ) + ); + + // Find the first free index in the head-tail table. + for (genvar i = 0; i < HtCapacity; i++) begin: gen_head_tail_free + assign head_tail_free[i] = head_tail_q[i].free; + end + lzc #( + .WIDTH ( HtCapacity ), + .MODE ( 0 ) // Start at index 0. + ) i_ht_free_lzc ( + .in_i ( head_tail_free ), + .cnt_o ( head_tail_free_idx ), + .empty_o ( ) + ); + + // Find the first free index in the linked data table. + for (genvar i = 0; i < CAPACITY; i++) begin: gen_linked_data_free + assign linked_data_free[i] = linked_data_q[i].free; + end + lzc #( + .WIDTH ( CAPACITY ), + .MODE ( 0 ) // Start at index 0. + ) i_ld_free_lzc ( + .in_i ( linked_data_free ), + .cnt_o ( linked_data_free_idx ), + .empty_o ( ) + ); + + // The queue is full if and only if there are no free items in the linked data structure. + assign full = !(|linked_data_free); + // Data potentially freed by the output. + assign oup_data_free_idx = head_tail_q[match_out_idx].head; + + // Data can be accepted if the linked list pool is not full, or some data is simultaneously. + assign inp_gnt_o = ~full || (oup_data_popped && FULL_BW); + always_comb begin + match_in_id = '0; + match_out_id = '0; + match_in_id_valid = 1'b0; + match_out_id_valid = 1'b0; + head_tail_d = head_tail_q; + linked_data_d = linked_data_q; + oup_gnt_o = 1'b0; + oup_data_o = data_t'('0); + oup_data_valid_o = 1'b0; + oup_data_popped = 1'b0; + oup_ht_popped = 1'b0; + + if (!FULL_BW) begin + if (inp_req_i && !full) begin + match_in_id = inp_id_i; + match_in_id_valid = 1'b1; + // If the ID does not yet exist in the queue, add a new ID entry. + if (no_in_id_match) begin + head_tail_d[head_tail_free_idx] = '{ + id: inp_id_i, + head: linked_data_free_idx, + tail: linked_data_free_idx, + free: 1'b0 + }; + // Otherwise append it to the existing ID subqueue. + end else begin + linked_data_d[head_tail_q[match_in_idx].tail].next = linked_data_free_idx; + head_tail_d[match_in_idx].tail = linked_data_free_idx; + end + linked_data_d[linked_data_free_idx] = '{ + data: inp_data_i, + next: '0, + free: 1'b0 + }; + end else if (oup_req_i) begin + match_in_id = oup_id_i; + match_in_id_valid = 1'b1; + if (!no_in_id_match) begin + oup_data_o = data_t'(linked_data_q[head_tail_q[match_in_idx].head].data); + oup_data_valid_o = 1'b1; + if (oup_pop_i) begin + // Set free bit of linked data entry, all other bits are don't care. + linked_data_d[head_tail_q[match_in_idx].head] = '0; + linked_data_d[head_tail_q[match_in_idx].head][0] = 1'b1; + if (head_tail_q[match_in_idx].head == head_tail_q[match_in_idx].tail) begin + head_tail_d[match_in_idx] = '{free: 1'b1, default: '0}; + end else begin + head_tail_d[match_in_idx].head = + linked_data_q[head_tail_q[match_in_idx].head].next; + end + end + end + // Always grant the output request. If there was no match, the default, invalid entry + // will be returned. + oup_gnt_o = 1'b1; + end + end else begin + // FULL_BW + if (oup_req_i) begin + match_out_id = oup_id_i; + match_out_id_valid = 1'b1; + if (!no_out_id_match) begin + oup_data_o = data_t'(linked_data_q[head_tail_q[match_out_idx].head].data); + oup_data_valid_o = 1'b1; + if (oup_pop_i) begin + oup_data_popped = 1'b1; + // Set free bit of linked data entry, all other bits are don't care. + linked_data_d[head_tail_q[match_out_idx].head] = '0; + linked_data_d[head_tail_q[match_out_idx].head][0] = 1'b1; + if (head_tail_q[match_out_idx].head + == head_tail_q[match_out_idx].tail) begin + oup_ht_popped = 1'b1; + head_tail_d[match_out_idx] = '{free: 1'b1, default: '0}; + end else begin + head_tail_d[match_out_idx].head = + linked_data_q[head_tail_q[match_out_idx].head].next; + end + end + end + // Always grant the output request. If there was no match, the default, invalid entry + // will be returned. + oup_gnt_o = 1'b1; + end + if (inp_req_i && inp_gnt_o) begin + match_in_id = inp_id_i; + match_in_id_valid = 1'b1; + // If the ID does not yet exist in the queue or was just popped, add a new ID entry. + if (oup_ht_popped && (oup_id_i==inp_id_i)) begin + // If output data was popped for this ID, which lead the head_tail to be popped, + // then repopulate this head_tail immediately. + head_tail_d[match_out_idx] = '{ + id: inp_id_i, + head: oup_data_free_idx, + tail: oup_data_free_idx, + free: 1'b0 + }; + linked_data_d[oup_data_free_idx] = '{ + data: inp_data_i, + next: '0, + free: 1'b0 + }; + end else if (no_in_id_match) begin + // Else, if no head_tail corresponds to the input id. + if (oup_ht_popped) begin + head_tail_d[match_out_idx] = '{ + id: inp_id_i, + head: oup_data_free_idx, + tail: oup_data_free_idx, + free: 1'b0 + }; + linked_data_d[oup_data_free_idx] = '{ + data: inp_data_i, + next: '0, + free: 1'b0 + }; + end else begin + if (oup_data_popped) begin + head_tail_d[head_tail_free_idx] = '{ + id: inp_id_i, + head: oup_data_free_idx, + tail: oup_data_free_idx, + free: 1'b0 + }; + linked_data_d[oup_data_free_idx] = '{ + data: inp_data_i, + next: '0, + free: 1'b0 + }; + end else begin + head_tail_d[head_tail_free_idx] = '{ + id: inp_id_i, + head: linked_data_free_idx, + tail: linked_data_free_idx, + free: 1'b0 + }; + linked_data_d[linked_data_free_idx] = '{ + data: inp_data_i, + next: '0, + free: 1'b0 + }; + end + end + end else begin + // Otherwise append it to the existing ID subqueue. + if (oup_data_popped) begin + linked_data_d[head_tail_q[match_in_idx].tail].next = oup_data_free_idx; + head_tail_d[match_in_idx].tail = oup_data_free_idx; + linked_data_d[oup_data_free_idx] = '{ + data: inp_data_i, + next: '0, + free: 1'b0 + }; + end else begin + linked_data_d[head_tail_q[match_in_idx].tail].next = linked_data_free_idx; + head_tail_d[match_in_idx].tail = linked_data_free_idx; + linked_data_d[linked_data_free_idx] = '{ + data: inp_data_i, + next: '0, + free: 1'b0 + }; + end + end + end + end + end + + // Exists Lookup + for (genvar i = 0; i < CAPACITY; i++) begin: gen_lookup + mask_t exists_match_bits; + for (genvar j = 0; j < $bits(data_t); j++) begin: gen_mask + always_comb begin + if (linked_data_q[i].free) begin + exists_match_bits[j] = 1'b0; + end else begin + if (!exists_mask_i[j]) begin + exists_match_bits[j] = 1'b1; + end else begin + exists_match_bits[j] = (linked_data_q[i].data[j] == exists_data_i[j]); + end + end + end + end + assign exists_match[i] = (&exists_match_bits); + end + always_comb begin + exists_gnt_o = 1'b0; + exists_o = '0; + if (exists_req_i) begin + exists_gnt_o = 1'b1; + exists_o = (|exists_match); + end + end + + // Registers + for (genvar i = 0; i < HtCapacity; i++) begin: gen_ht_ffs + always_ff @(posedge clk_i, negedge rst_ni) begin + if (!rst_ni) begin + head_tail_q[i] <= '{free: 1'b1, default: '0}; + end else begin + head_tail_q[i] <= head_tail_d[i]; + end + end + end + for (genvar i = 0; i < CAPACITY; i++) begin: gen_data_ffs + always_ff @(posedge clk_i, negedge rst_ni) begin + if (!rst_ni) begin + // Set free bit of linked data entries, all other bits are don't care. + linked_data_q[i] <= '0; + linked_data_q[i][0] <= 1'b1; + end else begin + linked_data_q[i] <= linked_data_d[i]; + end + end + end + + // Validate parameters. +// pragma translate_off +`ifndef VERILATOR + initial begin: validate_params + assert (ID_WIDTH >= 1) + else $fatal(1, "The ID must at least be one bit wide!"); + assert (CAPACITY >= 1) + else $fatal(1, "The queue must have capacity of at least one entry!"); + end +`endif +// pragma translate_on + +endmodule diff --git a/vendor/pulp-platform/common_cells/src/isochronous_4phase_handshake.sv b/vendor/pulp-platform/common_cells/src/isochronous_4phase_handshake.sv new file mode 100644 index 0000000000..de89bd2197 --- /dev/null +++ b/vendor/pulp-platform/common_cells/src/isochronous_4phase_handshake.sv @@ -0,0 +1,81 @@ +// Copyright 2020 ETH Zurich and University of Bologna. +// +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. +// +// Author: Florian Zaruba + +/// 4-phase handshake between isochronous clock domains +/// (i.e., clock domains which operate on an integer multiple of each other). +/// +/// The internals of this modules are similar to a clock-domain crossing except that +/// they do not synchronize the handshake signals as signals can not become metastable (covered by STA). +/// The upstream circuit will only handshake iff the downstream circuit handshaked. +/// +/// ## Optionally Passing of Data +/// +/// If the passing of data is necessary this should be done out side the module, for example: +/// ``` +/// `FFLNR(dst_data_o, src_data_i, (src_valid_i && src_ready_o), src_clk_i) +/// ``` +/// +/// This module differs to `isochronous_spill_register` that it doesn't buffer any data +/// and only toggles the source handshake once the destination handshake has been toggled. +/// +/// # Restrictions +/// +/// Source and destination clock domains must be an integer multiple of each other and +/// all timing-paths need to be covered by STA. For example a recommended SDC would be: +/// +/// `create_generated_clock dst_clk_i -name dst_clk -source src_clk_i -divide_by 2 +/// +/// There are _no_ restrictions on which clock domain should be the faster, any integer +/// ratio will work. + +`include "common_cells/registers.svh" + +module isochronous_4phase_handshake ( + input logic src_clk_i, + input logic src_rst_ni, + input logic src_valid_i, + output logic src_ready_o, + input logic dst_clk_i, + input logic dst_rst_ni, + output logic dst_valid_o, + input logic dst_ready_i +); + + logic src_req_q, src_ack_q; + logic dst_req_q, dst_ack_q; + + // source is making a request + `FFLARN(src_req_q, ~src_req_q, (src_valid_i && src_ready_o), 1'b0, src_clk_i, src_rst_ni) + // "synchronize" the acknowledge into the sending clock-domain + `FFARN(src_ack_q, dst_ack_q, 1'b0, src_clk_i, src_rst_ni) + // source is ready if the request wasn't yet acknowledged + assign src_ready_o = (src_req_q == src_ack_q); + + // down-stream circuit is acknowledging the handshake + `FFLARN(dst_ack_q, ~dst_ack_q, (dst_valid_o && dst_ready_i), 1'b0, dst_clk_i, dst_rst_ni) + // "synchronize" the request into the receiving clock domain + `FFARN(dst_req_q, src_req_q, 1'b0, dst_clk_i, dst_rst_ni) + // destination is valid if we didn't yet get acknowledge + assign dst_valid_o = (dst_req_q != dst_ack_q); + + // pragma translate_off + // stability guarantees + `ifndef VERILATOR + assert property (@(posedge src_clk_i) disable iff (src_rst_ni) + (src_valid_i && !src_ready_o |=> $stable(src_valid_i))) else $error("src_valid_i is unstable"); + assert property (@(posedge dst_clk_i) disable iff (dst_rst_ni) + (dst_valid_o && !dst_ready_i |=> $stable(dst_valid_o))) else $error("dst_valid_o is unstable"); + `endif + // pragma translate_on + +endmodule diff --git a/vendor/pulp-platform/common_cells/src/isochronous_spill_register.sv b/vendor/pulp-platform/common_cells/src/isochronous_spill_register.sv new file mode 100644 index 0000000000..35c9d6d728 --- /dev/null +++ b/vendor/pulp-platform/common_cells/src/isochronous_spill_register.sv @@ -0,0 +1,111 @@ +// Copyright 2020 ETH Zurich and University of Bologna. +// +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. +// +// Florian Zaruba + +`include "common_cells/registers.svh" + +/// A register with handshakes that completely cuts any combinatorial paths +/// between the input and output in isochronous clock domains. +/// +/// > Definition of isochronous: In telecommunication, an isochronous signal is a signal +/// > in which the time interval separating any two significant instants is equal to the +/// > unit interval or a multiple of the unit interval. +/// +/// The source and destination clock domains must be derived from the same clock +/// but can vary in frequency by a constant factor (e.g., double the frequency). +/// +/// The module is basically a two deep dual-clock fifo with read and write pointers +/// in different clock domains. As we know the static timing relationship between the +/// clock domains we can rely on static timing analysis (STA) to get the sampling windows +/// right and therefore don't need any synchronization. +/// +/// # Restrictions +/// +/// Source and destination clock domains must be an integer multiple of each other and +/// all timing-paths need to be covered by STA. For example a recommended SDC would be: +/// +/// `create_generated_clock dst_clk_i -name dst_clk -source src_clk_i -divide_by 2 +/// +/// There are _no_ restrictions on which clock domain should be the faster, any integer +/// ratio will work. +module isochronous_spill_register #( + /// Data type of spill register. + parameter type T = logic, + /// Make this spill register transparent. + parameter bit Bypass = 1'b0 +) ( + /// Clock of source clock domain. + input logic src_clk_i, + /// Active low async reset in source domain. + input logic src_rst_ni, + /// Source input data is valid. + input logic src_valid_i, + /// Source is ready to accept. + output logic src_ready_o, + /// Source input data. + input T src_data_i, + /// Clock of destination clock domain. + input logic dst_clk_i, + /// Active low async reset in destination domain. + input logic dst_rst_ni, + /// Destination output data is valid. + output logic dst_valid_o, + /// Destination is ready to accept. + input logic dst_ready_i, + /// Destination output data. + output T dst_data_o +); + // Don't generate the spill register. + if (Bypass) begin : gen_bypass + assign dst_valid_o = src_valid_i; + assign src_ready_o = dst_ready_i; + assign dst_data_o = src_data_i; + // Generate the spill register + end else begin : gen_isochronous_spill_register + /// Read/write pointer are one bit wider than necessary. + /// We implicitly capture the full and empty state with the second bit: + /// If all but the topmost bit of `rd_pointer_q` and `wr_pointer_q` agree, the + /// FIFO is in a critical state. If the topmost bit is equal, the FIFO is + /// empty, otherwise it is full. + logic [1:0] rd_pointer_q, wr_pointer_q; + // Advance write pointer if we pushed a new item into the FIFO. (Source clock domain) + `FFLARN(wr_pointer_q, wr_pointer_q+1, (src_valid_i && src_ready_o), '0, src_clk_i, src_rst_ni) + // Advance read pointer if downstream consumed an item. (Destination clock domain) + `FFLARN(rd_pointer_q, rd_pointer_q+1, (dst_valid_o && dst_ready_i), '0, dst_clk_i, dst_rst_ni) + + T [1:0] mem_d, mem_q; + `FFLNR(mem_q, mem_d, (src_valid_i && src_ready_o), src_clk_i) + always_comb begin + mem_d = mem_q; + mem_d[wr_pointer_q[0]] = src_data_i; + end + + assign src_ready_o = (rd_pointer_q ^ wr_pointer_q) != 2'b10; + + assign dst_valid_o = (rd_pointer_q ^ wr_pointer_q) != '0; + assign dst_data_o = mem_q[rd_pointer_q[0]]; + end + + // pragma translate_off + // stability guarantees + `ifndef VERILATOR + assert property (@(posedge src_clk_i) disable iff (src_rst_ni) + (src_valid_i && !src_ready_o |=> $stable(src_valid_i))) else $error("src_valid_i is unstable"); + assert property (@(posedge src_clk_i) disable iff (src_rst_ni) + (src_valid_i && !src_ready_o |=> $stable(src_data_i))) else $error("src_data_i is unstable"); + assert property (@(posedge dst_clk_i) disable iff (dst_rst_ni) + (dst_valid_o && !dst_ready_i |=> $stable(dst_valid_o))) else $error("dst_valid_o is unstable"); + assert property (@(posedge dst_clk_i) disable iff (dst_rst_ni) + (dst_valid_o && !dst_ready_i |=> $stable(dst_data_o))) else $error("dst_data_o is unstable"); + `endif + // pragma translate_on +endmodule diff --git a/vendor/pulp-platform/common_cells/src/lfsr.sv b/vendor/pulp-platform/common_cells/src/lfsr.sv new file mode 100644 index 0000000000..aae2e2df83 --- /dev/null +++ b/vendor/pulp-platform/common_cells/src/lfsr.sv @@ -0,0 +1,315 @@ +// Copyright 2019 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. +// +// Author: Michael Schaffner , ETH Zurich +// Date: 26.04.2019 +// +// Description: This is a parametric LFSR with precomputed coefficients for +// LFSR lengths from 4 to 64bit. + +// Additional block cipher layers can be instantiated to non-linearly transform +// the pseudo-random LFSR sequence at the output, and hence break the shifting +// patterns. The additional cipher layers can only be used for an LFSR width +// of 64bit, since the block cipher has been designed for that block length. + +module lfsr #( + parameter int unsigned LfsrWidth = 64, // [4,64] + parameter int unsigned OutWidth = 8, // [1,LfsrWidth] + parameter logic [LfsrWidth-1:0] RstVal = '1, // [1,2^LfsrWidth-1] + // 0: disabled, the present cipher uses 31, but just a few layers (1-3) are enough + // to break linear shifting patterns + parameter int unsigned CipherLayers = 0, + parameter bit CipherReg = 1'b1 // additional output reg after cipher +) ( + input logic clk_i, + input logic rst_ni, + input logic en_i, + output logic [OutWidth-1:0] out_o +); + +// Galois LFSR feedback masks +// Automatically generated with get_lfsr_masks.py +// Masks are from https://users.ece.cmu.edu/~koopman/lfsr/ +localparam logic [63:0] Masks [4:64] = '{64'hC, + 64'h1E, + 64'h39, + 64'h7E, + 64'hFA, + 64'h1FD, + 64'h3FC, + 64'h64B, + 64'hD8F, + 64'h1296, + 64'h2496, + 64'h4357, + 64'h8679, + 64'h1030E, + 64'h206CD, + 64'h403FE, + 64'h807B8, + 64'h1004B2, + 64'h2006A8, + 64'h4004B2, + 64'h800B87, + 64'h10004F3, + 64'h200072D, + 64'h40006AE, + 64'h80009E3, + 64'h10000583, + 64'h20000C92, + 64'h400005B6, + 64'h80000EA6, + 64'h1000007A3, + 64'h200000ABF, + 64'h400000842, + 64'h80000123E, + 64'h100000074E, + 64'h2000000AE9, + 64'h400000086A, + 64'h8000001213, + 64'h1000000077E, + 64'h2000000123B, + 64'h40000000877, + 64'h8000000108D, + 64'h100000000AE9, + 64'h200000000E9F, + 64'h4000000008A6, + 64'h80000000191E, + 64'h100000000090E, + 64'h2000000000FB3, + 64'h4000000000D7D, + 64'h80000000016A5, + 64'h10000000000B4B, + 64'h200000000010AF, + 64'h40000000000DDE, + 64'h8000000000181A, + 64'h100000000000B65, + 64'h20000000000102D, + 64'h400000000000CD5, + 64'h8000000000024C1, + 64'h1000000000000EF6, + 64'h2000000000001363, + 64'h4000000000000FCD, + 64'h80000000000019E2}; + +// this S-box and permutation P has been taken from the Present Cipher, +// a super lightweight block cipher. use the cipher layers to add additional +// non-linearity to the LFSR output. note one layer does not fully correspond +// to the present cipher round, since the key and rekeying function is not applied here. +// +// See also: +// "PRESENT: An Ultra-Lightweight Block Cipher", A. Bogdanov et al., Ches 2007 +// http://www.lightweightcrypto.org/present/present_ches2007.pdf + +// this is the sbox from the present cipher +localparam logic[15:0][3:0] Sbox4 = {4'h2, 4'h1, 4'h7, 4'h4, + 4'h8, 4'hF, 4'hE, 4'h3, + 4'hD, 4'hA, 4'h0, 4'h9, + 4'hB, 4'h6, 4'h5, 4'hC }; + +// these are the permutation indices of the present cipher +localparam logic[63:0][5:0] Perm = {6'd63, 6'd47, 6'd31, 6'd15, 6'd62, 6'd46, 6'd30, 6'd14, + 6'd61, 6'd45, 6'd29, 6'd13, 6'd60, 6'd44, 6'd28, 6'd12, + 6'd59, 6'd43, 6'd27, 6'd11, 6'd58, 6'd42, 6'd26, 6'd10, + 6'd57, 6'd41, 6'd25, 6'd09, 6'd56, 6'd40, 6'd24, 6'd08, + 6'd55, 6'd39, 6'd23, 6'd07, 6'd54, 6'd38, 6'd22, 6'd06, + 6'd53, 6'd37, 6'd21, 6'd05, 6'd52, 6'd36, 6'd20, 6'd04, + 6'd51, 6'd35, 6'd19, 6'd03, 6'd50, 6'd34, 6'd18, 6'd02, + 6'd49, 6'd33, 6'd17, 6'd01, 6'd48, 6'd32, 6'd16, 6'd00}; + + +function automatic logic [63:0] sbox4_layer(logic [63:0] in); + logic [63:0] out; + //for (logic [4:0] j = '0; j<16; j++) out[j*4 +: 4] = sbox4[in[j*4 +: 4]]; + // this simulates much faster than the loop + out[0*4 +: 4] = Sbox4[in[0*4 +: 4]]; + out[1*4 +: 4] = Sbox4[in[1*4 +: 4]]; + out[2*4 +: 4] = Sbox4[in[2*4 +: 4]]; + out[3*4 +: 4] = Sbox4[in[3*4 +: 4]]; + + out[4*4 +: 4] = Sbox4[in[4*4 +: 4]]; + out[5*4 +: 4] = Sbox4[in[5*4 +: 4]]; + out[6*4 +: 4] = Sbox4[in[6*4 +: 4]]; + out[7*4 +: 4] = Sbox4[in[7*4 +: 4]]; + + out[8*4 +: 4] = Sbox4[in[8*4 +: 4]]; + out[9*4 +: 4] = Sbox4[in[9*4 +: 4]]; + out[10*4 +: 4] = Sbox4[in[10*4 +: 4]]; + out[11*4 +: 4] = Sbox4[in[11*4 +: 4]]; + + out[12*4 +: 4] = Sbox4[in[12*4 +: 4]]; + out[13*4 +: 4] = Sbox4[in[13*4 +: 4]]; + out[14*4 +: 4] = Sbox4[in[14*4 +: 4]]; + out[15*4 +: 4] = Sbox4[in[15*4 +: 4]]; + return out; +endfunction : sbox4_layer + +function automatic logic [63:0] perm_layer(logic [63:0] in); + logic [63:0] out; + // for (logic [7:0] j = '0; j<64; j++) out[perm[j]] = in[j]; + // this simulates much faster than the loop + out[Perm[0]] = in[0]; + out[Perm[1]] = in[1]; + out[Perm[2]] = in[2]; + out[Perm[3]] = in[3]; + out[Perm[4]] = in[4]; + out[Perm[5]] = in[5]; + out[Perm[6]] = in[6]; + out[Perm[7]] = in[7]; + out[Perm[8]] = in[8]; + out[Perm[9]] = in[9]; + + out[Perm[10]] = in[10]; + out[Perm[11]] = in[11]; + out[Perm[12]] = in[12]; + out[Perm[13]] = in[13]; + out[Perm[14]] = in[14]; + out[Perm[15]] = in[15]; + out[Perm[16]] = in[16]; + out[Perm[17]] = in[17]; + out[Perm[18]] = in[18]; + out[Perm[19]] = in[19]; + + out[Perm[20]] = in[20]; + out[Perm[21]] = in[21]; + out[Perm[22]] = in[22]; + out[Perm[23]] = in[23]; + out[Perm[24]] = in[24]; + out[Perm[25]] = in[25]; + out[Perm[26]] = in[26]; + out[Perm[27]] = in[27]; + out[Perm[28]] = in[28]; + out[Perm[29]] = in[29]; + + out[Perm[30]] = in[30]; + out[Perm[31]] = in[31]; + out[Perm[32]] = in[32]; + out[Perm[33]] = in[33]; + out[Perm[34]] = in[34]; + out[Perm[35]] = in[35]; + out[Perm[36]] = in[36]; + out[Perm[37]] = in[37]; + out[Perm[38]] = in[38]; + out[Perm[39]] = in[39]; + + out[Perm[40]] = in[40]; + out[Perm[41]] = in[41]; + out[Perm[42]] = in[42]; + out[Perm[43]] = in[43]; + out[Perm[44]] = in[44]; + out[Perm[45]] = in[45]; + out[Perm[46]] = in[46]; + out[Perm[47]] = in[47]; + out[Perm[48]] = in[48]; + out[Perm[49]] = in[49]; + + out[Perm[50]] = in[50]; + out[Perm[51]] = in[51]; + out[Perm[52]] = in[52]; + out[Perm[53]] = in[53]; + out[Perm[54]] = in[54]; + out[Perm[55]] = in[55]; + out[Perm[56]] = in[56]; + out[Perm[57]] = in[57]; + out[Perm[58]] = in[58]; + out[Perm[59]] = in[59]; + + out[Perm[60]] = in[60]; + out[Perm[61]] = in[61]; + out[Perm[62]] = in[62]; + out[Perm[63]] = in[63]; + return out; +endfunction : perm_layer + +//////////////////////////////////////////////////////////////////////// +// lfsr +//////////////////////////////////////////////////////////////////////// + +logic [LfsrWidth-1:0] lfsr_d, lfsr_q; +assign lfsr_d = + (en_i) ? (lfsr_q>>1) ^ ({LfsrWidth{lfsr_q[0]}} & Masks[LfsrWidth][LfsrWidth-1:0]) : lfsr_q; + +always_ff @(posedge clk_i or negedge rst_ni) begin : p_regs + //$display("%b %h", en_i, lfsr_d); + if (!rst_ni) begin + lfsr_q <= LfsrWidth'(RstVal); + end else begin + lfsr_q <= lfsr_d; + end +end + +//////////////////////////////////////////////////////////////////////// +// block cipher layers +//////////////////////////////////////////////////////////////////////// + +if (CipherLayers > unsigned'(0)) begin : g_cipher_layers + logic [63:0] ciph_layer; + localparam int unsigned NumRepl = ((64+LfsrWidth)/LfsrWidth); + + always_comb begin : p_ciph_layer + automatic logic [63:0] tmp; + tmp = 64'({NumRepl{lfsr_q}}); + for(int unsigned k = 0; k < CipherLayers; k++) begin + tmp = perm_layer(sbox4_layer(tmp)); + end + ciph_layer = tmp; + end + + // additiona output reg after cipher + if (CipherReg) begin : g_cipher_reg + logic [OutWidth-1:0] out_d, out_q; + + assign out_d = (en_i) ? ciph_layer[OutWidth-1:0] : out_q; + assign out_o = out_q[OutWidth-1:0]; + + always_ff @(posedge clk_i or negedge rst_ni) begin : p_regs + if (!rst_ni) begin + out_q <= '0; + end else begin + out_q <= out_d; + end + end + // no outreg + end else begin : g_no_out_reg + assign out_o = ciph_layer[OutWidth-1:0]; + end + +// no block cipher +end else begin : g_no_cipher_layers + assign out_o = lfsr_q[OutWidth-1:0]; +end + +//////////////////////////////////////////////////////////////////////// +// assertions +//////////////////////////////////////////////////////////////////////// + +// pragma translate_off +initial begin + // these are the LUT limits + assert(OutWidth <= LfsrWidth) else + $fatal(1,"OutWidth must be smaller equal the LfsrWidth."); + assert(RstVal > unsigned'(0)) else + $fatal(1,"RstVal must be nonzero."); + assert((LfsrWidth >= $low(Masks)) && (LfsrWidth <= $high(Masks))) else + $fatal(1,"Unsupported LfsrWidth."); + assert(Masks[LfsrWidth][LfsrWidth-1]) else + $fatal(1, "LFSR mask is not correct. The MSB must be 1." ); + assert((CipherLayers > 0) && (LfsrWidth == 64) || (CipherLayers == 0)) else + $fatal(1, "Use additional cipher layers only in conjunction with an LFSR width of 64 bit." ); +end + +`ifndef VERILATOR + all_zero: assert property ( + @(posedge clk_i) disable iff (!rst_ni) en_i |-> lfsr_d) + else $fatal(1,"Lfsr must not be all-zero."); +`endif +// pragma translate_on + +endmodule // lfsr diff --git a/vendor/pulp-platform/common_cells/src/lfsr_16bit.sv b/vendor/pulp-platform/common_cells/src/lfsr_16bit.sv new file mode 100644 index 0000000000..3fc93c7710 --- /dev/null +++ b/vendor/pulp-platform/common_cells/src/lfsr_16bit.sv @@ -0,0 +1,68 @@ +// Copyright 2018 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +// Author: Florian Zaruba, ETH Zurich +// Date: 5.11.2018 +// Description: 16-bit LFSR + +// -------------- +// 16-bit LFSR +// -------------- +// +// Description: Shift register +// +module lfsr_16bit #( + parameter logic [15:0] SEED = 8'b0, + parameter int unsigned WIDTH = 16 +)( + input logic clk_i, + input logic rst_ni, + input logic en_i, + output logic [WIDTH-1:0] refill_way_oh, + output logic [$clog2(WIDTH)-1:0] refill_way_bin +); + + localparam int unsigned LogWidth = $clog2(WIDTH); + + logic [15:0] shift_d, shift_q; + + + always_comb begin + + automatic logic shift_in; + shift_in = !(shift_q[15] ^ shift_q[12] ^ shift_q[5] ^ shift_q[1]); + + shift_d = shift_q; + + if (en_i) + shift_d = {shift_q[14:0], shift_in}; + + // output assignment + refill_way_oh = 'b0; + refill_way_oh[shift_q[LogWidth-1:0]] = 1'b1; + refill_way_bin = shift_q; + end + + always_ff @(posedge clk_i or negedge rst_ni) begin : proc_ + if(~rst_ni) begin + shift_q <= SEED; + end else begin + shift_q <= shift_d; + end + end + + //pragma translate_off + initial begin + assert (WIDTH <= 16) + else $fatal(1, "WIDTH needs to be less than 16 because of the 16-bit LFSR"); + end + //pragma translate_on + +endmodule diff --git a/vendor/pulp-platform/common_cells/src/lfsr_8bit.sv b/vendor/pulp-platform/common_cells/src/lfsr_8bit.sv new file mode 100644 index 0000000000..60fdf19f7f --- /dev/null +++ b/vendor/pulp-platform/common_cells/src/lfsr_8bit.sv @@ -0,0 +1,61 @@ +// Copyright 2018 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +// Author: Igor Loi - University of Bologna +// Author: Florian Zaruba, ETH Zurich +// Date: 12.11.2017 +// Description: 8-bit LFSR + +/// 8 bit Linear Feedback Shift register +module lfsr_8bit #( + parameter logic [7:0] SEED = 8'b0, + parameter int unsigned WIDTH = 8 +) ( + input logic clk_i, + input logic rst_ni, + input logic en_i, + output logic [ WIDTH-1:0] refill_way_oh, + output logic [$clog2(WIDTH)-1:0] refill_way_bin +); + + localparam int unsigned LogWidth = $clog2(WIDTH); + + logic [7:0] shift_d, shift_q; + + always_comb begin + + automatic logic shift_in; + shift_in = !(shift_q[7] ^ shift_q[3] ^ shift_q[2] ^ shift_q[1]); + + shift_d = shift_q; + + if (en_i) shift_d = {shift_q[6:0], shift_in}; + + // output assignment + refill_way_oh = 'b0; + refill_way_oh[shift_q[LogWidth - 1:0]] = 1'b1; + refill_way_bin = shift_q; + end + + always_ff @(posedge clk_i or negedge rst_ni) begin : proc_ + if (~rst_ni) begin + shift_q <= SEED; + end else begin + shift_q <= shift_d; + end + end + + //pragma translate_off + initial begin + assert (WIDTH <= 8) else $fatal(1, "WIDTH needs to be less than 8 because of the 8-bit LFSR"); + end + //pragma translate_on + +endmodule diff --git a/vendor/pulp-platform/common_cells/src/lzc.sv b/vendor/pulp-platform/common_cells/src/lzc.sv new file mode 100644 index 0000000000..424eb2ef62 --- /dev/null +++ b/vendor/pulp-platform/common_cells/src/lzc.sv @@ -0,0 +1,112 @@ +// Copyright (c) 2018 - 2019 ETH Zurich, University of Bologna +// All rights reserved. +// +// This code is under development and not yet released to the public. +// Until it is released, the code is under the copyright of ETH Zurich and +// the University of Bologna, and may contain confidential and/or unpublished +// work. Any reuse/redistribution is strictly forbidden without written +// permission from ETH Zurich. +// +// Bug fixes and contributions will eventually be released under the +// SolderPad open hardware license in the context of the PULP platform +// (http://www.pulp-platform.org), under the copyright of ETH Zurich and the +// University of Bologna. + +/// A trailing zero counter / leading zero counter. +/// Set MODE to 0 for trailing zero counter => cnt_o is the number of trailing zeros (from the LSB) +/// Set MODE to 1 for leading zero counter => cnt_o is the number of leading zeros (from the MSB) +/// If the input does not contain a zero, `empty_o` is asserted. Additionally `cnt_o` contains +/// the maximum number of zeros - 1. For example: +/// in_i = 000_0000, empty_o = 1, cnt_o = 6 (mode = 0) +/// in_i = 000_0001, empty_o = 0, cnt_o = 0 (mode = 0) +/// in_i = 000_1000, empty_o = 0, cnt_o = 3 (mode = 0) +/// Furthermore, this unit contains a more efficient implementation for Verilator (simulation only). +/// This speeds up simulation significantly. +module lzc #( + /// The width of the input vector. + parameter int unsigned WIDTH = 2, + /// Mode selection: 0 -> trailing zero, 1 -> leading zero + parameter bit MODE = 1'b0, + /// Dependent parameter. Do **not** change! + /// + /// Width of the output signal with the zero count. + parameter int unsigned CNT_WIDTH = cf_math_pkg::idx_width(WIDTH) +) ( + /// Input vector to be counted. + input logic [WIDTH-1:0] in_i, + /// Count of the leading / trailing zeros. + output logic [CNT_WIDTH-1:0] cnt_o, + /// Counter is empty: Asserted if all bits in in_i are zero. + output logic empty_o +); + + if (WIDTH == 1) begin : gen_degenerate_lzc + + assign cnt_o[0] = !in_i[0]; + assign empty_o = !in_i[0]; + + end else begin : gen_lzc + + localparam int unsigned NumLevels = $clog2(WIDTH); + + // pragma translate_off + initial begin + assert(WIDTH > 0) else $fatal(1, "input must be at least one bit wide"); + end + // pragma translate_on + + logic [WIDTH-1:0][NumLevels-1:0] index_lut; + logic [2**NumLevels-1:0] sel_nodes; + logic [2**NumLevels-1:0][NumLevels-1:0] index_nodes; + + logic [WIDTH-1:0] in_tmp; + + // reverse vector if required + always_comb begin : flip_vector + for (int unsigned i = 0; i < WIDTH; i++) begin + in_tmp[i] = (MODE) ? in_i[WIDTH-1-i] : in_i[i]; + end + end + + for (genvar j = 0; unsigned'(j) < WIDTH; j++) begin : g_index_lut + assign index_lut[j] = (NumLevels)'(unsigned'(j)); + end + + for (genvar level = 0; unsigned'(level) < NumLevels; level++) begin : g_levels + if (unsigned'(level) == NumLevels - 1) begin : g_last_level + for (genvar k = 0; k < 2 ** level; k++) begin : g_level + // if two successive indices are still in the vector... + if (unsigned'(k) * 2 < WIDTH - 1) begin : g_reduce + assign sel_nodes[2 ** level - 1 + k] = in_tmp[k * 2] | in_tmp[k * 2 + 1]; + assign index_nodes[2 ** level - 1 + k] = (in_tmp[k * 2] == 1'b1) + ? index_lut[k * 2] : + index_lut[k * 2 + 1]; + end + // if only the first index is still in the vector... + if (unsigned'(k) * 2 == WIDTH - 1) begin : g_base + assign sel_nodes[2 ** level - 1 + k] = in_tmp[k * 2]; + assign index_nodes[2 ** level - 1 + k] = index_lut[k * 2]; + end + // if index is out of range + if (unsigned'(k) * 2 > WIDTH - 1) begin : g_out_of_range + assign sel_nodes[2 ** level - 1 + k] = 1'b0; + assign index_nodes[2 ** level - 1 + k] = '0; + end + end + end else begin : g_not_last_level + for (genvar l = 0; l < 2 ** level; l++) begin : g_level + assign sel_nodes[2 ** level - 1 + l] = + sel_nodes[2 ** (level + 1) - 1 + l * 2] | sel_nodes[2 ** (level + 1) - 1 + l * 2 + 1]; + assign index_nodes[2 ** level - 1 + l] = (sel_nodes[2 ** (level + 1) - 1 + l * 2] == 1'b1) + ? index_nodes[2 ** (level + 1) - 1 + l * 2] : + index_nodes[2 ** (level + 1) - 1 + l * 2 + 1]; + end + end + end + + assign cnt_o = NumLevels > unsigned'(0) ? index_nodes[0] : {($clog2(WIDTH)) {1'b0}}; + assign empty_o = NumLevels > unsigned'(0) ? ~sel_nodes[0] : ~(|in_i); + + end : gen_lzc + +endmodule : lzc diff --git a/vendor/pulp-platform/common_cells/src/max_counter.sv b/vendor/pulp-platform/common_cells/src/max_counter.sv new file mode 100644 index 0000000000..0081fab163 --- /dev/null +++ b/vendor/pulp-platform/common_cells/src/max_counter.sv @@ -0,0 +1,77 @@ +// Copyright 2018 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +// Up/down counter that tracks its maximum value + +module max_counter #( + parameter int unsigned WIDTH = 4 +) ( + input logic clk_i, + input logic rst_ni, + input logic clear_i, // synchronous clear for counter + input logic clear_max_i, // synchronous clear for maximum value + input logic en_i, // enable the counter + input logic load_i, // load a new value + input logic down_i, // downcount, default is up + input logic [WIDTH-1:0] delta_i, // counter delta + input logic [WIDTH-1:0] d_i, + output logic [WIDTH-1:0] q_o, + output logic [WIDTH-1:0] max_o, + output logic overflow_o, + output logic overflow_max_o +); + logic [WIDTH-1:0] max_d, max_q; + logic overflow_max_d, overflow_max_q; + + delta_counter #( + .WIDTH (WIDTH), + .STICKY_OVERFLOW (1'b1) + ) i_counter ( + .clk_i, + .rst_ni, + .clear_i, + .en_i, + .load_i, + .down_i, + .delta_i, + .d_i, + .q_o, + .overflow_o + ); + + always_comb begin + max_d = max_q; + max_o = max_q; + overflow_max_d = overflow_max_q; + if (clear_max_i) begin + max_d = '0; + overflow_max_d = 1'b0; + end else if (q_o > max_q) begin + max_d = q_o; + max_o = q_o; + if (overflow_o) begin + overflow_max_d = 1'b1; + end + end + end + + assign overflow_max_o = overflow_max_q; + + always_ff @(posedge clk_i, negedge rst_ni) begin + if (!rst_ni) begin + max_q <= '0; + overflow_max_q <= 1'b0; + end else begin + max_q <= max_d; + overflow_max_q <= overflow_max_d; + end + end + +endmodule diff --git a/vendor/pulp-platform/common_cells/src/mv_filter.sv b/vendor/pulp-platform/common_cells/src/mv_filter.sv new file mode 100644 index 0000000000..ddb81f0774 --- /dev/null +++ b/vendor/pulp-platform/common_cells/src/mv_filter.sv @@ -0,0 +1,55 @@ +// Copyright 2018 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +// Author: Florian Zaruba + +module mv_filter #( + parameter int unsigned WIDTH = 4, + parameter int unsigned THRESHOLD = 10 +)( + input logic clk_i, + input logic rst_ni, + input logic sample_i, + input logic clear_i, + input logic d_i, + output logic q_o +); + logic [WIDTH-1:0] counter_q, counter_d; + logic d, q; + + assign q_o = q; + + always_comb begin + counter_d = counter_q; + d = q; + + if (counter_q >= THRESHOLD[WIDTH-1:0]) begin + d = 1'b1; + end else if (sample_i && d_i) begin + counter_d = counter_q + 1; + end + + // sync reset + if (clear_i) begin + counter_d = '0; + d = 1'b0; + end + end + + always_ff @(posedge clk_i or negedge rst_ni) begin + if (~rst_ni) begin + counter_q <= '0; + q <= 1'b0; + end else begin + counter_q <= counter_d; + q <= d; + end + end +endmodule diff --git a/vendor/pulp-platform/common_cells/src/onehot_to_bin.sv b/vendor/pulp-platform/common_cells/src/onehot_to_bin.sv new file mode 100644 index 0000000000..0c33f084b9 --- /dev/null +++ b/vendor/pulp-platform/common_cells/src/onehot_to_bin.sv @@ -0,0 +1,38 @@ +// Copyright 2018 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +// Franceco Conti + +module onehot_to_bin #( + parameter int unsigned ONEHOT_WIDTH = 16, + // Do Not Change + parameter int unsigned BIN_WIDTH = ONEHOT_WIDTH == 1 ? 1 : $clog2(ONEHOT_WIDTH) +) ( + input logic [ONEHOT_WIDTH-1:0] onehot, + output logic [BIN_WIDTH-1:0] bin +); + + for (genvar j = 0; j < BIN_WIDTH; j++) begin : jl + logic [ONEHOT_WIDTH-1:0] tmp_mask; + for (genvar i = 0; i < ONEHOT_WIDTH; i++) begin : il + logic [BIN_WIDTH-1:0] tmp_i; + assign tmp_i = i; + assign tmp_mask[i] = tmp_i[j]; + end + assign bin[j] = |(tmp_mask & onehot); + end + +// pragma translate_off +`ifndef VERILATOR + assert final ($onehot0(onehot)) else + $fatal(1, "[onehot_to_bin] More than two bit set in the one-hot signal"); +`endif +// pragma translate_on +endmodule diff --git a/vendor/pulp-platform/common_cells/src/plru_tree.sv b/vendor/pulp-platform/common_cells/src/plru_tree.sv new file mode 100644 index 0000000000..78a0a8430e --- /dev/null +++ b/vendor/pulp-platform/common_cells/src/plru_tree.sv @@ -0,0 +1,120 @@ +// Copyright 2018 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. +// +// Author: David Schaffenrath, TU Graz +// Author: Florian Zaruba, ETH Zurich +// +// Description: Pseudo Least Recently Used Tree (PLRU) +// See: https://en.wikipedia.org/wiki/Pseudo-LRU + +module plru_tree #( + parameter int unsigned ENTRIES = 16 +) ( + input logic clk_i, + input logic rst_ni, + input logic [ENTRIES-1:0] used_i, // element i was used (one hot) + output logic [ENTRIES-1:0] plru_o // element i is the least recently used (one hot) +); + + localparam int unsigned LogEntries = $clog2(ENTRIES); + + logic [2*(ENTRIES-1)-1:0] plru_tree_q, plru_tree_d; + + always_comb begin : plru_replacement + plru_tree_d = plru_tree_q; + // The PLRU-tree indexing: + // lvl0 0 + // / \ + // / \ + // lvl1 1 2 + // / \ / \ + // lvl2 3 4 5 6 + // / \ /\/\ /\ + // ... ... ... ... + // Just predefine which nodes will be set/cleared + // E.g. for a TLB with 8 entries, the for-loop is semantically + // equivalent to the following pseudo-code: + // unique case (1'b1) + // used_i[7]: plru_tree_d[0, 2, 6] = {1, 1, 1}; + // used_i[6]: plru_tree_d[0, 2, 6] = {1, 1, 0}; + // used_i[5]: plru_tree_d[0, 2, 5] = {1, 0, 1}; + // used_i[4]: plru_tree_d[0, 2, 5] = {1, 0, 0}; + // used_i[3]: plru_tree_d[0, 1, 4] = {0, 1, 1}; + // used_i[2]: plru_tree_d[0, 1, 4] = {0, 1, 0}; + // used_i[1]: plru_tree_d[0, 1, 3] = {0, 0, 1}; + // used_i[0]: plru_tree_d[0, 1, 3] = {0, 0, 0}; + // default: begin /* No hit */ end + // endcase + for (int unsigned i = 0; i < ENTRIES; i++) begin + automatic int unsigned idx_base, shift, new_index; + // we got a hit so update the pointer as it was least recently used + if (used_i[i]) begin + // Set the nodes to the values we would expect + for (int unsigned lvl = 0; lvl < LogEntries; lvl++) begin + idx_base = $unsigned((2**lvl)-1); + // lvl0 <=> MSB, lvl1 <=> MSB-1, ... + shift = LogEntries - lvl; + // to circumvent the 32 bit integer arithmetic assignment + new_index = ~((i >> (shift-1)) & 1); + plru_tree_d[idx_base + (i >> shift)] = new_index[0]; + end + end + end + // Decode tree to write enable signals + // Next for-loop basically creates the following logic for e.g. an 8 entry + // TLB (note: pseudo-code obviously): + // plru_o[7] = &plru_tree_q[ 6, 2, 0]; //plru_tree_q[0,2,6]=={1,1,1} + // plru_o[6] = &plru_tree_q[~6, 2, 0]; //plru_tree_q[0,2,6]=={1,1,0} + // plru_o[5] = &plru_tree_q[ 5,~2, 0]; //plru_tree_q[0,2,5]=={1,0,1} + // plru_o[4] = &plru_tree_q[~5,~2, 0]; //plru_tree_q[0,2,5]=={1,0,0} + // plru_o[3] = &plru_tree_q[ 4, 1,~0]; //plru_tree_q[0,1,4]=={0,1,1} + // plru_o[2] = &plru_tree_q[~4, 1,~0]; //plru_tree_q[0,1,4]=={0,1,0} + // plru_o[1] = &plru_tree_q[ 3,~1,~0]; //plru_tree_q[0,1,3]=={0,0,1} + // plru_o[0] = &plru_tree_q[~3,~1,~0]; //plru_tree_q[0,1,3]=={0,0,0} + // For each entry traverse the tree. If every tree-node matches, + // the corresponding bit of the entry's index, this is + // the next entry to replace. + for (int unsigned i = 0; i < ENTRIES; i += 1) begin + automatic logic en; + automatic int unsigned idx_base, shift, new_index; + en = 1'b1; + for (int unsigned lvl = 0; lvl < LogEntries; lvl++) begin + idx_base = $unsigned((2**lvl)-1); + // lvl0 <=> MSB, lvl1 <=> MSB-1, ... + shift = LogEntries - lvl; + // en &= plru_tree_q[idx_base + (i>>shift)] == ((i >> (shift-1)) & 1'b1); + new_index = (i >> (shift-1)) & 1; + if (new_index[0]) begin + en &= plru_tree_q[idx_base + (i>>shift)]; + end else begin + en &= ~plru_tree_q[idx_base + (i>>shift)]; + end + end + plru_o[i] = en; + end + end + + always_ff @(posedge clk_i or negedge rst_ni) begin + if (!rst_ni) begin + plru_tree_q <= '0; + end else begin + plru_tree_q <= plru_tree_d; + end + end + +// pragma translate_off +`ifndef VERILATOR + initial begin + assert (ENTRIES == 2**LogEntries) else $error("Entries must be a power of two"); + end +`endif +// pragma translate_on + +endmodule diff --git a/vendor/pulp-platform/common_cells/src/popcount.sv b/vendor/pulp-platform/common_cells/src/popcount.sv new file mode 100644 index 0000000000..72b9b71f0f --- /dev/null +++ b/vendor/pulp-platform/common_cells/src/popcount.sv @@ -0,0 +1,60 @@ +// Copyright (C) 2013-2018 ETH Zurich, University of Bologna +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +// Author: Manuel Eggimann + +// Description: This module calculates the hamming weight (number of ones) in +// its input vector using a balanced binary adder tree. Recursive instantiation +// is used to build the tree. Any unsigned INPUT_WIDTH larger or equal 2 is +// legal. The module pads the signal internally to the next power of two. The +// output result width is ceil(log2(INPUT_WIDTH))+1. + +module popcount #( + parameter int unsigned INPUT_WIDTH = 256, + localparam int unsigned PopcountWidth = $clog2(INPUT_WIDTH)+1 +) ( + input logic [INPUT_WIDTH-1:0] data_i, + output logic [PopcountWidth-1:0] popcount_o +); + + localparam int unsigned PaddedWidth = 1 << $clog2(INPUT_WIDTH); + + logic [PaddedWidth-1:0] padded_input; + logic [PopcountWidth-2:0] left_child_result, right_child_result; + + //Zero pad the input to next power of two + always_comb begin + padded_input = '0; + padded_input[INPUT_WIDTH-1:0] = data_i; + end + + //Recursive instantiation to build binary adder tree + if (INPUT_WIDTH == 1) begin : single_node + assign left_child_result = 1'b0; + assign right_child_result = padded_input[0]; + end else if (INPUT_WIDTH == 2) begin : leaf_node + assign left_child_result = padded_input[1]; + assign right_child_result = padded_input[0]; + end else begin : non_leaf_node + popcount #(.INPUT_WIDTH(PaddedWidth / 2)) + left_child( + .data_i(padded_input[PaddedWidth-1:PaddedWidth/2]), + .popcount_o(left_child_result)); + + popcount #(.INPUT_WIDTH(PaddedWidth / 2)) + right_child( + .data_i(padded_input[PaddedWidth/2-1:0]), + .popcount_o(right_child_result)); + end + + //Output assignment + assign popcount_o = left_child_result + right_child_result; + +endmodule : popcount diff --git a/vendor/pulp-platform/common_cells/src/rr_arb_tree.sv b/vendor/pulp-platform/common_cells/src/rr_arb_tree.sv new file mode 100644 index 0000000000..90301c822c --- /dev/null +++ b/vendor/pulp-platform/common_cells/src/rr_arb_tree.sv @@ -0,0 +1,348 @@ +// Copyright 2019 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. +// +// Author: Michael Schaffner , ETH Zurich +// Wolfgang Roenninger , ETH Zurich +// Date: 02.04.2019 +// Description: logarithmic arbitration tree with round robin arbitration scheme. + +/// The rr_arb_tree employs non-starving round robin-arbitration - i.e., the priorities +/// rotate each cycle. +/// +/// ## Fair vs. unfair Arbitration +/// +/// This refers to fair throughput distribution when not all inputs have active requests. +/// This module has an internal state `rr_q` which defines the highest priority input. (When +/// `ExtPrio` is `1'b1` this state is provided from the outside.) The arbitration tree will +/// choose the input with the same index as currently defined by the state if it has an active +/// request. Otherwise a *random* other active input is selected. The parameter `FairArb` is used +/// to distinguish between two methods of calculating the next state. +/// * `1'b0`: The next state is calculated by advancing the current state by one. This leads to the +/// state being calculated without the context of the active request. Leading to an +/// unfair throughput distribution if not all inputs have active requests. +/// * `1'b1`: The next state jumps to the next unserved request with higher index. +/// This is achieved by using two trailing-zero-counters (`lzc`). The upper has the masked +/// `req_i` signal with all indices which will have a higher priority in the next state. +/// The trailing zero count defines the input index with the next highest priority after +/// the current one is served. When the upper is empty the lower `lzc` provides the +/// wrapped index if there are outstanding requests with lower or same priority. +/// The implication of throughput fairness on the module timing are: +/// * The trailing zero counter (`lzc`) has a loglog relation of input to output timing. This means +/// that in this module the input to register path scales with Log(Log(`NumIn`)). +/// * The `rr_arb_tree` data multiplexing scales with Log(`NumIn`). This means that the input to output +/// timing path of this module also scales scales with Log(`NumIn`). +/// This implies that in this module the input to output path is always longer than the input to +/// register path. As the output data usually also terminates in a register the parameter `FairArb` +/// only has implications on the area. When it is `1'b0` a static plus one adder is instantiated. +/// If it is `1'b1` two `lzc`, a masking logic stage and a two input multiplexer are instantiated. +/// However these are small in respect of the data multiplexers needed, as the width of the `req_i` +/// signal is usually less as than `DataWidth`. +module rr_arb_tree #( + /// Number of inputs to be arbitrated. + parameter int unsigned NumIn = 64, + /// Data width of the payload in bits. Not needed if `DataType` is overwritten. + parameter int unsigned DataWidth = 32, + /// Data type of the payload, can be overwritten with custom type. Only use of `DataWidth`. + parameter type DataType = logic [DataWidth-1:0], + /// The `ExtPrio` option allows to override the internal round robin counter via the + /// `rr_i` signal. This can be useful in case multiple arbiters need to have + /// rotating priorities that are operating in lock-step. If static priority arbitration + /// is needed, just connect `rr_i` to '0. + /// + /// Set to 1'b1 to enable. + parameter bit ExtPrio = 1'b0, + /// If `AxiVldRdy` is set, the req/gnt signals are compliant with the AXI style vld/rdy + /// handshake. Namely, upstream vld (req) must not depend on rdy (gnt), as it can be deasserted + /// again even though vld is asserted. Enabling `AxiVldRdy` leads to a reduction of arbiter + /// delay and area. + /// + /// Set to `1'b1` to treat req/gnt as vld/rdy. + parameter bit AxiVldRdy = 1'b0, + /// The `LockIn` option prevents the arbiter from changing the arbitration + /// decision when the arbiter is disabled. I.e., the index of the first request + /// that wins the arbitration will be locked in case the destination is not + /// able to grant the request in the same cycle. + /// + /// Set to `1'b1` to enable. + parameter bit LockIn = 1'b0, + /// When set, ensures that throughput gets distributed evenly between all inputs. + /// + /// Set to `1'b0` to disable. + parameter bit FairArb = 1'b1, + /// Dependent parameter, do **not** overwrite. + /// Width of the arbitration priority signal and the arbitrated index. + parameter int unsigned IdxWidth = (NumIn > 32'd1) ? unsigned'($clog2(NumIn)) : 32'd1, + /// Dependent parameter, do **not** overwrite. + /// Type for defining the arbitration priority and arbitrated index signal. + parameter type idx_t = logic [IdxWidth-1:0] +) ( + /// Clock, positive edge triggered. + input logic clk_i, + /// Asynchronous reset, active low. + input logic rst_ni, + /// Clears the arbiter state. Only used if `ExtPrio` is `1'b0` or `LockIn` is `1'b1`. + input logic flush_i, + /// External round-robin priority. Only used if `ExtPrio` is `1'b1.` + input idx_t rr_i, + /// Input requests arbitration. + input logic [NumIn-1:0] req_i, + /* verilator lint_off UNOPTFLAT */ + /// Input request is granted. + output logic [NumIn-1:0] gnt_o, + /* verilator lint_on UNOPTFLAT */ + /// Input data for arbitration. + input DataType [NumIn-1:0] data_i, + /// Output request is valid. + output logic req_o, + /// Output request is granted. + input logic gnt_i, + /// Output data. + output DataType data_o, + /// Index from which input the data came from. + output idx_t idx_o +); + + // pragma translate_off + `ifndef VERILATOR + `ifndef XSIM + // Default SVA reset + default disable iff (!rst_ni || flush_i); + `endif + `endif + // pragma translate_on + + // just pass through in this corner case + if (NumIn == unsigned'(1)) begin : gen_pass_through + assign req_o = req_i[0]; + assign gnt_o[0] = gnt_i; + assign data_o = data_i[0]; + assign idx_o = '0; + // non-degenerate cases + end else begin : gen_arbiter + localparam int unsigned NumLevels = unsigned'($clog2(NumIn)); + + /* verilator lint_off UNOPTFLAT */ + idx_t [2**NumLevels-2:0] index_nodes; // used to propagate the indices + DataType [2**NumLevels-2:0] data_nodes; // used to propagate the data + logic [2**NumLevels-2:0] gnt_nodes; // used to propagate the grant to masters + logic [2**NumLevels-2:0] req_nodes; // used to propagate the requests to slave + /* lint_off */ + idx_t rr_q; + logic [NumIn-1:0] req_d; + + // the final arbitration decision can be taken from the root of the tree + assign req_o = req_nodes[0]; + assign data_o = data_nodes[0]; + assign idx_o = index_nodes[0]; + + if (ExtPrio) begin : gen_ext_rr + assign rr_q = rr_i; + assign req_d = req_i; + end else begin : gen_int_rr + idx_t rr_d; + + // lock arbiter decision in case we got at least one req and no acknowledge + if (LockIn) begin : gen_lock + logic lock_d, lock_q; + logic [NumIn-1:0] req_q; + + assign lock_d = req_o & ~gnt_i; + assign req_d = (lock_q) ? req_q : req_i; + + always_ff @(posedge clk_i or negedge rst_ni) begin : p_lock_reg + if (!rst_ni) begin + lock_q <= '0; + end else begin + if (flush_i) begin + lock_q <= '0; + end else begin + lock_q <= lock_d; + end + end + end + + // pragma translate_off + `ifndef VERILATOR + lock: assert property( + @(posedge clk_i) LockIn |-> req_o && + (!gnt_i && !flush_i) |=> idx_o == $past(idx_o)) else + $fatal (1, "Lock implies same arbiter decision in next cycle if output is not \ + ready."); + + logic [NumIn-1:0] req_tmp; + assign req_tmp = req_q & req_i; + lock_req: assume property( + @(posedge clk_i) LockIn |-> lock_d |=> req_tmp == req_q) else + $fatal (1, "It is disallowed to deassert unserved request signals when LockIn is \ + enabled."); + `endif + // pragma translate_on + + always_ff @(posedge clk_i or negedge rst_ni) begin : p_req_regs + if (!rst_ni) begin + req_q <= '0; + end else begin + if (flush_i) begin + req_q <= '0; + end else begin + req_q <= req_d; + end + end + end + end else begin : gen_no_lock + assign req_d = req_i; + end + + if (FairArb) begin : gen_fair_arb + logic [NumIn-1:0] upper_mask, lower_mask; + idx_t upper_idx, lower_idx, next_idx; + logic upper_empty, lower_empty; + + for (genvar i = 0; i < NumIn; i++) begin : gen_mask + assign upper_mask[i] = (i > rr_q) ? req_d[i] : 1'b0; + assign lower_mask[i] = (i <= rr_q) ? req_d[i] : 1'b0; + end + + lzc #( + .WIDTH ( NumIn ), + .MODE ( 1'b0 ) + ) i_lzc_upper ( + .in_i ( upper_mask ), + .cnt_o ( upper_idx ), + .empty_o ( upper_empty ) + ); + + lzc #( + .WIDTH ( NumIn ), + .MODE ( 1'b0 ) + ) i_lzc_lower ( + .in_i ( lower_mask ), + .cnt_o ( lower_idx ), + .empty_o ( /*unused*/ ) + ); + + assign next_idx = upper_empty ? lower_idx : upper_idx; + assign rr_d = (gnt_i && req_o) ? next_idx : rr_q; + + end else begin : gen_unfair_arb + assign rr_d = (gnt_i && req_o) ? ((rr_q == idx_t'(NumIn-1)) ? '0 : rr_q + 1'b1) : rr_q; + end + + // this holds the highest priority + always_ff @(posedge clk_i or negedge rst_ni) begin : p_rr_regs + if (!rst_ni) begin + rr_q <= '0; + end else begin + if (flush_i) begin + rr_q <= '0; + end else begin + rr_q <= rr_d; + end + end + end + end + + assign gnt_nodes[0] = gnt_i; + + // arbiter tree + for (genvar level = 0; unsigned'(level) < NumLevels; level++) begin : gen_levels + for (genvar l = 0; l < 2**level; l++) begin : gen_level + // local select signal + logic sel; + // index calcs + localparam int unsigned Idx0 = 2**level-1+l;// current node + localparam int unsigned Idx1 = 2**(level+1)-1+l*2; + ////////////////////////////////////////////////////////////// + // uppermost level where data is fed in from the inputs + if (unsigned'(level) == NumLevels-1) begin : gen_first_level + // if two successive indices are still in the vector... + if (unsigned'(l) * 2 < NumIn-1) begin : gen_reduce + assign req_nodes[Idx0] = req_d[l*2] | req_d[l*2+1]; + + // arbitration: round robin + assign sel = ~req_d[l*2] | req_d[l*2+1] & rr_q[NumLevels-1-level]; + + assign index_nodes[Idx0] = idx_t'(sel); + assign data_nodes[Idx0] = (sel) ? data_i[l*2+1] : data_i[l*2]; + assign gnt_o[l*2] = gnt_nodes[Idx0] & (AxiVldRdy | req_d[l*2]) & ~sel; + assign gnt_o[l*2+1] = gnt_nodes[Idx0] & (AxiVldRdy | req_d[l*2+1]) & sel; + end + // if only the first index is still in the vector... + if (unsigned'(l) * 2 == NumIn-1) begin : gen_first + assign req_nodes[Idx0] = req_d[l*2]; + assign index_nodes[Idx0] = '0;// always zero in this case + assign data_nodes[Idx0] = data_i[l*2]; + assign gnt_o[l*2] = gnt_nodes[Idx0] & (AxiVldRdy | req_d[l*2]); + end + // if index is out of range, fill up with zeros (will get pruned) + if (unsigned'(l) * 2 > NumIn-1) begin : gen_out_of_range + assign req_nodes[Idx0] = 1'b0; + assign index_nodes[Idx0] = idx_t'('0); + assign data_nodes[Idx0] = DataType'('0); + end + ////////////////////////////////////////////////////////////// + // general case for other levels within the tree + end else begin : gen_other_levels + assign req_nodes[Idx0] = req_nodes[Idx1] | req_nodes[Idx1+1]; + + // arbitration: round robin + assign sel = ~req_nodes[Idx1] | req_nodes[Idx1+1] & rr_q[NumLevels-1-level]; + + assign index_nodes[Idx0] = (sel) ? + idx_t'({1'b1, index_nodes[Idx1+1][NumLevels-unsigned'(level)-2:0]}) : + idx_t'({1'b0, index_nodes[Idx1][NumLevels-unsigned'(level)-2:0]}); + + assign data_nodes[Idx0] = (sel) ? data_nodes[Idx1+1] : data_nodes[Idx1]; + assign gnt_nodes[Idx1] = gnt_nodes[Idx0] & ~sel; + assign gnt_nodes[Idx1+1] = gnt_nodes[Idx0] & sel; + end + ////////////////////////////////////////////////////////////// + end + end + + // pragma translate_off + `ifndef VERILATOR + `ifndef XSIM + initial begin : p_assert + assert(NumIn) + else $fatal(1, "Input must be at least one element wide."); + assert(!(LockIn && ExtPrio)) + else $fatal(1,"Cannot use LockIn feature together with external ExtPrio."); + end + + hot_one : assert property( + @(posedge clk_i) $onehot0(gnt_o)) + else $fatal (1, "Grant signal must be hot1 or zero."); + + gnt0 : assert property( + @(posedge clk_i) |gnt_o |-> gnt_i) + else $fatal (1, "Grant out implies grant in."); + + gnt1 : assert property( + @(posedge clk_i) req_o |-> gnt_i |-> |gnt_o) + else $fatal (1, "Req out and grant in implies grant out."); + + gnt_idx : assert property( + @(posedge clk_i) req_o |-> gnt_i |-> gnt_o[idx_o]) + else $fatal (1, "Idx_o / gnt_o do not match."); + + req0 : assert property( + @(posedge clk_i) |req_i |-> req_o) + else $fatal (1, "Req in implies req out."); + + req1 : assert property( + @(posedge clk_i) req_o |-> |req_i) + else $fatal (1, "Req out implies req in."); + `endif + `endif + // pragma translate_on + end + +endmodule : rr_arb_tree diff --git a/vendor/pulp-platform/common_cells/src/rstgen.sv b/vendor/pulp-platform/common_cells/src/rstgen.sv new file mode 100644 index 0000000000..a7dccc63b0 --- /dev/null +++ b/vendor/pulp-platform/common_cells/src/rstgen.sv @@ -0,0 +1,30 @@ +// Copyright 2018 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +// Davide Rossi + +module rstgen ( + input logic clk_i, + input logic rst_ni, + input logic test_mode_i, + output logic rst_no, + output logic init_no +); + + rstgen_bypass i_rstgen_bypass ( + .clk_i ( clk_i ), + .rst_ni ( rst_ni ), + .rst_test_mode_ni ( rst_ni ), + .test_mode_i ( test_mode_i ), + .rst_no ( rst_no ), + .init_no ( init_no ) + ); + +endmodule diff --git a/vendor/pulp-platform/common_cells/src/rstgen_bypass.sv b/vendor/pulp-platform/common_cells/src/rstgen_bypass.sv new file mode 100644 index 0000000000..c51ee83548 --- /dev/null +++ b/vendor/pulp-platform/common_cells/src/rstgen_bypass.sv @@ -0,0 +1,57 @@ +// Copyright 2018 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +// Florian Zaruba +// Description: This module is a reset synchronizer with a dedicated reset bypass pin for testmode reset. +// Pro Tip: The wise Dr. Schaffner recommends at least 4 registers! + +module rstgen_bypass #( + parameter int unsigned NumRegs = 4 +) ( + input logic clk_i, + input logic rst_ni, + input logic rst_test_mode_ni, + input logic test_mode_i, + output logic rst_no, + output logic init_no +); + + // internal reset + logic rst_n; + + logic [NumRegs-1:0] synch_regs_q; + // bypass mode + always_comb begin + if (test_mode_i == 1'b0) begin + rst_n = rst_ni; + rst_no = synch_regs_q[NumRegs-1]; + init_no = synch_regs_q[NumRegs-1]; + end else begin + rst_n = rst_test_mode_ni; + rst_no = rst_test_mode_ni; + init_no = 1'b1; + end + end + + always @(posedge clk_i or negedge rst_n) begin + if (~rst_n) begin + synch_regs_q <= 0; + end else begin + synch_regs_q <= {synch_regs_q[NumRegs-2:0], 1'b1}; + end + end + // pragma translate_off + `ifndef VERILATOR + initial begin : p_assertions + if (NumRegs < 1) $fatal(1, "At least one register is required."); + end + `endif + // pragma translate_on +endmodule diff --git a/vendor/pulp-platform/common_cells/src/serial_deglitch.sv b/vendor/pulp-platform/common_cells/src/serial_deglitch.sv new file mode 100644 index 0000000000..3302e1ae6b --- /dev/null +++ b/vendor/pulp-platform/common_cells/src/serial_deglitch.sv @@ -0,0 +1,50 @@ +// Copyright 2018 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +// Author: Florian Zaruba +// Description: Deglitches a serial line by taking multiple samples until +// asserting the output high/low. + +module serial_deglitch #( + parameter int unsigned SIZE = 4 +)( + input logic clk_i, // clock + input logic rst_ni, // asynchronous reset active low + input logic en_i, // enable + input logic d_i, // serial data in + output logic q_o // filtered data out +); + logic [SIZE-1:0] count_q; + logic q; + + always_ff @(posedge clk_i or negedge rst_ni) begin + if (~rst_ni) begin + count_q <= '0; + q <= 1'b0; + end else begin + if (en_i) begin + if (d_i == 1'b1 && count_q != SIZE[SIZE-1:0]) begin + count_q <= count_q + 1; + end else if (d_i == 1'b0 && count_q != SIZE[SIZE-1:0]) begin + count_q <= count_q - 1; + end + end + end + end + + // output process + always_comb begin + if (count_q == SIZE[SIZE-1:0]) begin + q_o = 1'b1; + end else if (count_q == 0) begin + q_o = 1'b0; + end + end +endmodule diff --git a/vendor/pulp-platform/common_cells/src/shift_reg.sv b/vendor/pulp-platform/common_cells/src/shift_reg.sv new file mode 100644 index 0000000000..7193fbcd81 --- /dev/null +++ b/vendor/pulp-platform/common_cells/src/shift_reg.sv @@ -0,0 +1,53 @@ + +// Copyright 2018 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. +// +// Author: +// +// Description: Simple shift register for arbitrary depth and types + +module shift_reg #( + parameter type dtype = logic, + parameter int unsigned Depth = 1 +)( + input logic clk_i, // Clock + input logic rst_ni, // Asynchronous reset active low + input dtype d_i, + output dtype d_o +); + + // register of depth 0 is a wire + if (Depth == 0) begin : gen_pass_through + assign d_o = d_i; + // register of depth 1 is a simple register + end else if (Depth == 1) begin : gen_register + always_ff @(posedge clk_i or negedge rst_ni) begin + if (~rst_ni) begin + d_o <= '0; + end else begin + d_o <= d_i; + end + end + // if depth is greater than 1 it becomes a shift register + end else if (Depth > 1) begin : gen_shift_reg + dtype [Depth-1:0] reg_d, reg_q; + assign d_o = reg_q[Depth-1]; + assign reg_d = {reg_q[Depth-2:0], d_i}; + + always_ff @(posedge clk_i or negedge rst_ni) begin + if (~rst_ni) begin + reg_q <= '0; + end else begin + reg_q <= reg_d; + end + end + end + +endmodule diff --git a/vendor/pulp-platform/common_cells/src/spill_register.sv b/vendor/pulp-platform/common_cells/src/spill_register.sv new file mode 100644 index 0000000000..80ff37f149 --- /dev/null +++ b/vendor/pulp-platform/common_cells/src/spill_register.sv @@ -0,0 +1,46 @@ +// Copyright 2018 ETH Zurich and University of Bologna. +// +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. +// +// Fabian Schuiki + + +/// Wrapper around the flushable spill register to maintain back-ward +/// compatibility. +module spill_register #( + parameter type T = logic, + parameter bit Bypass = 1'b0 // make this spill register transparent +) ( + input logic clk_i , + input logic rst_ni , + input logic valid_i , + output logic ready_o , + input T data_i , + output logic valid_o , + input logic ready_i , + output T data_o +); + + spill_register_flushable #( + .T(T), + .Bypass(Bypass) + ) spill_register_flushable_i ( + .clk_i, + .rst_ni, + .valid_i, + .flush_i(1'b0), + .ready_o, + .data_i, + .valid_o, + .ready_i, + .data_o + ); + +endmodule diff --git a/vendor/pulp-platform/common_cells/src/spill_register_flushable.sv b/vendor/pulp-platform/common_cells/src/spill_register_flushable.sv new file mode 100644 index 0000000000..c03ad274de --- /dev/null +++ b/vendor/pulp-platform/common_cells/src/spill_register_flushable.sv @@ -0,0 +1,105 @@ +// Copyright 2021 ETH Zurich and University of Bologna. +// +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. +// +// Fabian Schuiki + + +/// A register with handshakes that completely cuts any combinational paths +/// between the input and output. This spill register can be flushed. +module spill_register_flushable #( + parameter type T = logic, + parameter bit Bypass = 1'b0 // make this spill register transparent +) ( + input logic clk_i , + input logic rst_ni , + input logic valid_i , + input logic flush_i , + output logic ready_o , + input T data_i , + output logic valid_o , + input logic ready_i , + output T data_o +); + + if (Bypass) begin : gen_bypass + assign valid_o = valid_i; + assign ready_o = ready_i; + assign data_o = data_i; + end else begin : gen_spill_reg + // The A register. + T a_data_q; + logic a_full_q; + logic a_fill, a_drain; + + always_ff @(posedge clk_i or negedge rst_ni) begin : ps_a_data + if (!rst_ni) + a_data_q <= '0; + else if (a_fill) + a_data_q <= data_i; + end + + always_ff @(posedge clk_i or negedge rst_ni) begin : ps_a_full + if (!rst_ni) + a_full_q <= 0; + else if (a_fill || a_drain) + a_full_q <= a_fill; + end + + // The B register. + T b_data_q; + logic b_full_q; + logic b_fill, b_drain; + + always_ff @(posedge clk_i or negedge rst_ni) begin : ps_b_data + if (!rst_ni) + b_data_q <= '0; + else if (b_fill) + b_data_q <= a_data_q; + end + + always_ff @(posedge clk_i or negedge rst_ni) begin : ps_b_full + if (!rst_ni) + b_full_q <= 0; + else if (b_fill || b_drain) + b_full_q <= b_fill; + end + + // Fill the A register when the A or B register is empty. Drain the A register + // whenever it is full and being filled, or if a flush is requested. + assign a_fill = valid_i && ready_o && (!flush_i); + assign a_drain = (a_full_q && !b_full_q) || flush_i; + + // Fill the B register whenever the A register is drained, but the downstream + // circuit is not ready. Drain the B register whenever it is full and the + // downstream circuit is ready, or if a flush is requested. + assign b_fill = a_drain && (!ready_i) && (!flush_i); + assign b_drain = (b_full_q && ready_i) || flush_i; + + // We can accept input as long as register B is not full. + // Note: flush_i and valid_i must not be high at the same time, + // otherwise an invalid handshake may occur + assign ready_o = !a_full_q || !b_full_q; + + // The unit provides output as long as one of the registers is filled. + assign valid_o = a_full_q | b_full_q; + + // We empty the spill register before the slice register. + assign data_o = b_full_q ? b_data_q : a_data_q; + + // pragma translate_off + `ifndef VERILATOR + flush_valid : assert property ( + @(posedge clk_i) disable iff (~rst_ni) (flush_i |-> ~valid_i)) else + $warning("Trying to flush and feed the spill register simultaneously. You will lose data!"); + `endif + // pragma translate_on + end +endmodule diff --git a/vendor/pulp-platform/common_cells/src/stream_arbiter.sv b/vendor/pulp-platform/common_cells/src/stream_arbiter.sv new file mode 100644 index 0000000000..c8ca2a8769 --- /dev/null +++ b/vendor/pulp-platform/common_cells/src/stream_arbiter.sv @@ -0,0 +1,49 @@ +// Copyright 2018 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +// Stream arbiter: Arbitrates a parametrizable number of input streams (i.e., valid-ready +// handshaking with dependency rules as in AXI4) to a single output stream. Once `oup_valid_o` is +// asserted, `oup_data_o` remains invariant until the output handshake has occurred. The +// arbitration scheme is round-robin with "look ahead", see the `rrarbiter` for details. + +module stream_arbiter #( + parameter type DATA_T = logic, // Vivado requires a default value for type parameters. + parameter integer N_INP = -1, // Synopsys DC requires a default value for parameters. + parameter ARBITER = "rr" // "rr" or "prio" +) ( + input logic clk_i, + input logic rst_ni, + + input DATA_T [N_INP-1:0] inp_data_i, + input logic [N_INP-1:0] inp_valid_i, + output logic [N_INP-1:0] inp_ready_o, + + output DATA_T oup_data_o, + output logic oup_valid_o, + input logic oup_ready_i +); + + stream_arbiter_flushable #( + .DATA_T (DATA_T), + .N_INP (N_INP), + .ARBITER (ARBITER) + ) i_arb ( + .clk_i (clk_i), + .rst_ni (rst_ni), + .flush_i (1'b0), + .inp_data_i (inp_data_i), + .inp_valid_i (inp_valid_i), + .inp_ready_o (inp_ready_o), + .oup_data_o (oup_data_o), + .oup_valid_o (oup_valid_o), + .oup_ready_i (oup_ready_i) + ); + +endmodule diff --git a/vendor/pulp-platform/common_cells/src/stream_arbiter_flushable.sv b/vendor/pulp-platform/common_cells/src/stream_arbiter_flushable.sv new file mode 100644 index 0000000000..32946e6859 --- /dev/null +++ b/vendor/pulp-platform/common_cells/src/stream_arbiter_flushable.sv @@ -0,0 +1,82 @@ +// Copyright 2018 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +// Stream arbiter: Arbitrates a parametrizable number of input streams (i.e., valid-ready +// handshaking with dependency rules as in AXI4) to a single output stream. Once `oup_valid_o` is +// asserted, `oup_data_o` remains invariant until the output handshake has occurred. The +// arbitration scheme is fair round-robin tree, see `rr_arb_tree` for details. + +module stream_arbiter_flushable #( + parameter type DATA_T = logic, // Vivado requires a default value for type parameters. + parameter integer N_INP = -1, // Synopsys DC requires a default value for parameters. + parameter ARBITER = "rr" // "rr" or "prio" +) ( + input logic clk_i, + input logic rst_ni, + input logic flush_i, + + input DATA_T [N_INP-1:0] inp_data_i, + input logic [N_INP-1:0] inp_valid_i, + output logic [N_INP-1:0] inp_ready_o, + + output DATA_T oup_data_o, + output logic oup_valid_o, + input logic oup_ready_i +); + + if (ARBITER == "rr") begin : gen_rr_arb + rr_arb_tree #( + .NumIn (N_INP), + .DataType (DATA_T), + .ExtPrio (1'b0), + .AxiVldRdy (1'b1), + .LockIn (1'b1) + ) i_arbiter ( + .clk_i, + .rst_ni, + .flush_i, + .rr_i ('0), + .req_i (inp_valid_i), + .gnt_o (inp_ready_o), + .data_i (inp_data_i), + .gnt_i (oup_ready_i), + .req_o (oup_valid_o), + .data_o (oup_data_o), + .idx_o () + ); + + end else if (ARBITER == "prio") begin : gen_prio_arb + rr_arb_tree #( + .NumIn (N_INP), + .DataType (DATA_T), + .ExtPrio (1'b1), + .AxiVldRdy (1'b1), + .LockIn (1'b1) + ) i_arbiter ( + .clk_i, + .rst_ni, + .flush_i, + .rr_i ('0), + .req_i (inp_valid_i), + .gnt_o (inp_ready_o), + .data_i (inp_data_i), + .gnt_i (oup_ready_i), + .req_o (oup_valid_o), + .data_o (oup_data_o), + .idx_o () + ); + + end else begin : gen_arb_error + // pragma translate_off + $fatal(1, "Invalid value for parameter 'ARBITER'!"); + // pragma translate_on + end + +endmodule diff --git a/vendor/pulp-platform/common_cells/src/stream_delay.sv b/vendor/pulp-platform/common_cells/src/stream_delay.sv new file mode 100644 index 0000000000..5051b6c24b --- /dev/null +++ b/vendor/pulp-platform/common_cells/src/stream_delay.sv @@ -0,0 +1,132 @@ +// Copyright 2018 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +// Author: Florian Zaruba, zarubaf@iis.ee.ethz.ch +// Description: Delay (or randomize) AXI-like handshaking + +module stream_delay #( + parameter bit StallRandom = 0, + parameter int FixedDelay = 1, + parameter type payload_t = logic +)( + input logic clk_i, + input logic rst_ni, + + input payload_t payload_i, + output logic ready_o, + input logic valid_i, + + output payload_t payload_o, + input logic ready_i, + output logic valid_o +); + + if (FixedDelay == 0 && !StallRandom) begin : gen_pass_through + assign ready_o = ready_i; + assign valid_o = valid_i; + assign payload_o = payload_i; + end else begin : gen_delay + + localparam int unsigned CounterBits = 4; + + typedef enum logic [1:0] { + Idle, Valid, Ready + } state_e; + + state_e state_d, state_q; + + logic load; + logic [3:0] count_out; + logic en; + + logic [CounterBits-1:0] counter_load; + + assign payload_o = payload_i; + + always_comb begin + state_d = state_q; + valid_o = 1'b0; + ready_o = 1'b0; + load = 1'b0; + en = 1'b0; + + unique case (state_q) + Idle: begin + if (valid_i) begin + load = 1'b1; + state_d = Valid; + // Just one cycle delay + if (FixedDelay == 1 || (StallRandom && counter_load == 1)) begin + state_d = Ready; + end + + if (StallRandom && counter_load == 0) begin + valid_o = 1'b1; + ready_o = ready_i; + if (ready_i) state_d = Idle; + else state_d = Ready; + end + end + end + Valid: begin + en = 1'b1; + if (count_out == 0) begin + state_d = Ready; + end + end + + Ready: begin + valid_o = 1'b1; + ready_o = ready_i; + if (ready_i) state_d = Idle; + end + default : /* default */; + endcase + + end + + if (StallRandom) begin : gen_random_stall + lfsr_16bit #( + .WIDTH ( 16 ) + ) i_lfsr_16bit ( + .clk_i ( clk_i ), + .rst_ni ( rst_ni ), + .en_i ( load ), + .refill_way_oh ( ), + .refill_way_bin ( counter_load ) + ); + end else begin : gen_fixed_delay + assign counter_load = FixedDelay; + end + + counter #( + .WIDTH ( CounterBits ) + ) i_counter ( + .clk_i ( clk_i ), + .rst_ni ( rst_ni ), + .clear_i ( 1'b0 ), + .en_i ( en ), + .load_i ( load ), + .down_i ( 1'b1 ), + .d_i ( counter_load ), + .q_o ( count_out ), + .overflow_o ( ) + ); + + always_ff @(posedge clk_i or negedge rst_ni) begin + if (~rst_ni) begin + state_q <= Idle; + end else begin + state_q <= state_d; + end + end + end + +endmodule diff --git a/vendor/pulp-platform/common_cells/src/stream_demux.sv b/vendor/pulp-platform/common_cells/src/stream_demux.sv new file mode 100644 index 0000000000..69ad3099b1 --- /dev/null +++ b/vendor/pulp-platform/common_cells/src/stream_demux.sv @@ -0,0 +1,36 @@ +// Copyright 2018 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +/// Connects the input stream (valid-ready) handshake to one of `N_OUP` output stream handshakes. +/// +/// This module has no data ports because stream data does not need to be demultiplexed: the data of +/// the input stream can just be applied at all output streams. +module stream_demux #( + /// Number of connected outputs. + parameter int unsigned N_OUP = 32'd1, + /// Dependent parameters, DO NOT OVERRIDE! + parameter int unsigned LOG_N_OUP = (N_OUP > 32'd1) ? unsigned'($clog2(N_OUP)) : 1'b1 +) ( + input logic inp_valid_i, + output logic inp_ready_o, + + input logic [LOG_N_OUP-1:0] oup_sel_i, + + output logic [N_OUP-1:0] oup_valid_o, + input logic [N_OUP-1:0] oup_ready_i +); + + always_comb begin + oup_valid_o = '0; + oup_valid_o[oup_sel_i] = inp_valid_i; + end + assign inp_ready_o = oup_ready_i[oup_sel_i]; + +endmodule diff --git a/vendor/pulp-platform/common_cells/src/stream_fifo.sv b/vendor/pulp-platform/common_cells/src/stream_fifo.sv new file mode 100644 index 0000000000..e7c60e57e2 --- /dev/null +++ b/vendor/pulp-platform/common_cells/src/stream_fifo.sv @@ -0,0 +1,66 @@ +// Copyright 2020 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +// Author: Georg Rutishauser + +module stream_fifo #( + /// FIFO is in fall-through mode + parameter bit FALL_THROUGH = 1'b0, + /// Default data width if the fifo is of type logic + parameter int unsigned DATA_WIDTH = 32, + /// Depth can be arbitrary from 0 to 2**32 + parameter int unsigned DEPTH = 8, + parameter type T = logic [DATA_WIDTH-1:0], + // DO NOT OVERWRITE THIS PARAMETER + parameter int unsigned ADDR_DEPTH = (DEPTH > 1) ? $clog2(DEPTH) : 1 +) ( + input logic clk_i, // Clock + input logic rst_ni, // Asynchronous reset active low + input logic flush_i, // flush the fifo + input logic testmode_i, // test_mode to bypass clock gating + output logic [ADDR_DEPTH-1:0] usage_o, // fill pointer + // input interface + input T data_i, // data to push into the fifo + input logic valid_i, // input data valid + output logic ready_o, // fifo is not full + // output interface + output T data_o, // output data + output logic valid_o, // fifo is not empty + input logic ready_i // pop head from fifo +); + + logic push, pop; + logic empty, full; + + assign push = valid_i & ~full; + assign pop = ready_i & ~empty; + assign ready_o = ~full; + assign valid_o = ~empty; + + fifo_v3 #( + .FALL_THROUGH (FALL_THROUGH), + .DATA_WIDTH (DATA_WIDTH), + .DEPTH (DEPTH), + .dtype(T) + ) fifo_i ( + .clk_i, + .rst_ni, + .flush_i, + .testmode_i, + .full_o (full), + .empty_o (empty), + .usage_o, + .data_i, + .push_i (push), + .data_o, + .pop_i (pop) + ); + +endmodule diff --git a/vendor/pulp-platform/common_cells/src/stream_filter.sv b/vendor/pulp-platform/common_cells/src/stream_filter.sv new file mode 100644 index 0000000000..52a5835e77 --- /dev/null +++ b/vendor/pulp-platform/common_cells/src/stream_filter.sv @@ -0,0 +1,26 @@ +// Copyright 2018 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +// Stream filter: If `drop_i` is `1`, signal `ready` to the upstream regardless of the downstream, +// and do not propagate `valid` downstream. Otherwise, connect upstream to downstream. +module stream_filter ( + input logic valid_i, + output logic ready_o, + + input logic drop_i, + + output logic valid_o, + input logic ready_i +); + + assign valid_o = drop_i ? 1'b0 : valid_i; + assign ready_o = drop_i ? 1'b1 : ready_i; + +endmodule diff --git a/vendor/pulp-platform/common_cells/src/stream_fork.sv b/vendor/pulp-platform/common_cells/src/stream_fork.sv new file mode 100644 index 0000000000..650038d263 --- /dev/null +++ b/vendor/pulp-platform/common_cells/src/stream_fork.sv @@ -0,0 +1,133 @@ +// Copyright 2018 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +// Stream fork: Connects the input stream (ready-valid) handshake to *all* of `N_OUP` output stream +// handshakes. For each input stream handshake, every output stream handshakes exactly once. The +// input stream only handshakes when all output streams have handshaked, but the output streams do +// not have to handshake simultaneously. +// +// This module has no data ports because stream data does not need to be forked: the data of the +// input stream can just be applied at all output streams. + +module stream_fork #( + parameter int unsigned N_OUP = 0 // Synopsys DC requires a default value for parameters. +) ( + input logic clk_i, + input logic rst_ni, + input logic valid_i, + output logic ready_o, + output logic [N_OUP-1:0] valid_o, + input logic [N_OUP-1:0] ready_i +); + + typedef enum logic {READY, WAIT} state_t; + + logic [N_OUP-1:0] oup_ready, + all_ones; + + state_t inp_state_d, inp_state_q; + + // Input control FSM + always_comb begin + // ready_o = 1'b0; + inp_state_d = inp_state_q; + + unique case (inp_state_q) + READY: begin + if (valid_i) begin + if (valid_o == all_ones && ready_i == all_ones) begin + // If handshake on all outputs, handshake on input. + ready_o = 1'b1; + end else begin + ready_o = 1'b0; + // Otherwise, wait for inputs that did not handshake yet. + inp_state_d = WAIT; + end + end else begin + ready_o = 1'b0; + end + end + WAIT: begin + if (valid_i && oup_ready == all_ones) begin + ready_o = 1'b1; + inp_state_d = READY; + end else begin + ready_o = 1'b0; + end + end + default: begin + inp_state_d = READY; + ready_o = 1'b0; + end + endcase + end + + always_ff @(posedge clk_i, negedge rst_ni) begin + if (!rst_ni) begin + inp_state_q <= READY; + end else begin + inp_state_q <= inp_state_d; + end + end + + // Output control FSM + for (genvar i = 0; i < N_OUP; i++) begin: gen_oup_state + state_t oup_state_d, oup_state_q; + + always_comb begin + oup_ready[i] = 1'b1; + valid_o[i] = 1'b0; + oup_state_d = oup_state_q; + + unique case (oup_state_q) + READY: begin + if (valid_i) begin + valid_o[i] = 1'b1; + if (ready_i[i]) begin // Output handshake + if (!ready_o) begin // No input handshake yet + oup_state_d = WAIT; + end + end else begin // No output handshake + oup_ready[i] = 1'b0; + end + end + end + WAIT: begin + if (valid_i && ready_o) begin // Input handshake + oup_state_d = READY; + end + end + default: begin + oup_state_d = READY; + end + endcase + end + + always_ff @(posedge clk_i, negedge rst_ni) begin + if (!rst_ni) begin + oup_state_q <= READY; + end else begin + oup_state_q <= oup_state_d; + end + end + end + + assign all_ones = '1; // Synthesis fix for Vivado, which does not correctly compute the width + // of the '1 literal when assigned to a port of parametrized width. + +// pragma translate_off +`ifndef VERILATOR + initial begin: p_assertions + assert (N_OUP >= 1) else $fatal(1, "Number of outputs must be at least 1!"); + end +`endif +// pragma translate_on + +endmodule diff --git a/vendor/pulp-platform/common_cells/src/stream_fork_dynamic.sv b/vendor/pulp-platform/common_cells/src/stream_fork_dynamic.sv new file mode 100644 index 0000000000..e4720f704f --- /dev/null +++ b/vendor/pulp-platform/common_cells/src/stream_fork_dynamic.sv @@ -0,0 +1,95 @@ +// Copyright 2020 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +// Authors: +// - Andreas Kurth + +/// Dynamic stream fork: Connects the input stream (ready-valid) handshake to a combination of output +/// stream handshake. The combination is determined dynamically through another stream, which +/// provides a bitmask for the fork. For each input stream handshake, every output stream handshakes +/// exactly once. The input stream only handshakes when all output streams have handshaked, but the +/// output streams do not have to handshake simultaneously. +/// +/// This module has no data ports because stream data does not need to be forked: the data of the +/// input stream can just be applied at all output streams. +module stream_fork_dynamic #( + /// Number of output streams + parameter int unsigned N_OUP = 32'd0 // Synopsys DC requires a default value for parameters. +) ( + /// Clock + input logic clk_i, + /// Asynchronous reset, active low + input logic rst_ni, + /// Input stream valid handshake, + input logic valid_i, + /// Input stream ready handshake + output logic ready_o, + /// Selection mask for the output handshake + input logic [N_OUP-1:0] sel_i, + /// Selection mask valid + input logic sel_valid_i, + /// Selection mask ready + output logic sel_ready_o, + /// Output streams valid handshakes + output logic [N_OUP-1:0] valid_o, + /// Output streams ready handshakes + input logic [N_OUP-1:0] ready_i +); + + logic int_inp_valid, int_inp_ready; + logic [N_OUP-1:0] int_oup_valid, int_oup_ready; + + // Output handshaking + for (genvar i = 0; i < N_OUP; i++) begin : gen_oups + always_comb begin + valid_o[i] = 1'b0; + int_oup_ready[i] = 1'b0; + if (sel_valid_i) begin + if (sel_i[i]) begin + valid_o[i] = int_oup_valid[i]; + int_oup_ready[i] = ready_i[i]; + end else begin + int_oup_ready[i] = 1'b1; + end + end + end + end + + // Input handshaking + always_comb begin + int_inp_valid = 1'b0; + ready_o = 1'b0; + sel_ready_o = 1'b0; + if (sel_valid_i) begin + int_inp_valid = valid_i; + ready_o = int_inp_ready; + sel_ready_o = int_inp_ready; + end + end + + stream_fork #( + .N_OUP ( N_OUP ) + ) i_fork ( + .clk_i, + .rst_ni, + .valid_i ( int_inp_valid ), + .ready_o ( int_inp_ready ), + .valid_o ( int_oup_valid ), + .ready_i ( int_oup_ready ) + ); + +// pragma translate_off +`ifndef VERILATOR + initial begin: p_assertions + assert (N_OUP >= 1) else $fatal(1, "N_OUP must be at least 1!"); + end +`endif +// pragma translate_on +endmodule diff --git a/vendor/pulp-platform/common_cells/src/stream_intf.sv b/vendor/pulp-platform/common_cells/src/stream_intf.sv new file mode 100644 index 0000000000..32f2d8b6b8 --- /dev/null +++ b/vendor/pulp-platform/common_cells/src/stream_intf.sv @@ -0,0 +1,49 @@ +// Copyright 2020 ETH Zurich and University of Bologna. +// +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +// Author: Florian Zaruba + +/// A stream interface with custom payload of type `payload_t`. +/// Handshaking rules as defined in the AXI standard. +interface STREAM_DV #( + /// Custom payload type. + parameter type payload_t = logic +)( + /// Interface clock. + input logic clk_i +); + payload_t data; + logic valid; + logic ready; + + modport In ( + output ready, + input valid, data + ); + + modport Out ( + output valid, data, + input ready + ); + + /// Passive modport for scoreboard and monitors. + modport Passive ( + input valid, ready, data + ); + + // Make sure that the handshake and payload is stable + // pragma translate_off + `ifndef VERILATOR + assert property (@(posedge clk_i) (valid && !ready |=> $stable(data))); + assert property (@(posedge clk_i) (valid && !ready |=> valid)); + `endif + // pragma translate_on +endinterface diff --git a/vendor/pulp-platform/common_cells/src/stream_join.sv b/vendor/pulp-platform/common_cells/src/stream_join.sv new file mode 100644 index 0000000000..2f210bc7e6 --- /dev/null +++ b/vendor/pulp-platform/common_cells/src/stream_join.sv @@ -0,0 +1,43 @@ +// Copyright 2020 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +// Authors: +// - Andreas Kurth + +/// Stream join: Joins a parametrizable number of input streams (i.e., valid-ready handshaking with +/// dependency rules as in AXI4) to a single output stream. The output handshake happens only once +/// all inputs are valid. The data channel flows outside of this module. +module stream_join #( + /// Number of input streams + parameter int unsigned N_INP = 32'd0 // Synopsys DC requires a default value for parameters. +) ( + /// Input streams valid handshakes + input logic [N_INP-1:0] inp_valid_i, + /// Input streams ready handshakes + output logic [N_INP-1:0] inp_ready_o, + /// Output stream valid handshake + output logic oup_valid_o, + /// Output stream ready handshake + input logic oup_ready_i +); + + assign oup_valid_o = (&inp_valid_i); + for (genvar i = 0; i < N_INP; i++) begin : gen_inp_ready + assign inp_ready_o[i] = oup_valid_o & oup_ready_i; + end + +// pragma translate_off +`ifndef VERILATOR + initial begin: p_assertions + assert (N_INP >= 1) else $fatal(1, "N_INP must be at least 1!"); + end +`endif +// pragma translate_on +endmodule diff --git a/vendor/pulp-platform/common_cells/src/stream_mux.sv b/vendor/pulp-platform/common_cells/src/stream_mux.sv new file mode 100644 index 0000000000..34607d916d --- /dev/null +++ b/vendor/pulp-platform/common_cells/src/stream_mux.sv @@ -0,0 +1,46 @@ +// Copyright 2018 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +/// Stream multiplexer: connects the output to one of `N_INP` data streams with valid-ready +/// handshaking. + +module stream_mux #( + parameter type DATA_T = logic, // Vivado requires a default value for type parameters. + parameter integer N_INP = 0, // Synopsys DC requires a default value for value parameters. + /// Dependent parameters, DO NOT OVERRIDE! + parameter integer LOG_N_INP = $clog2(N_INP) +) ( + input DATA_T [N_INP-1:0] inp_data_i, + input logic [N_INP-1:0] inp_valid_i, + output logic [N_INP-1:0] inp_ready_o, + + input logic [LOG_N_INP-1:0] inp_sel_i, + + output DATA_T oup_data_o, + output logic oup_valid_o, + input logic oup_ready_i +); + + always_comb begin + inp_ready_o = '0; + inp_ready_o[inp_sel_i] = oup_ready_i; + end + assign oup_data_o = inp_data_i[inp_sel_i]; + assign oup_valid_o = inp_valid_i[inp_sel_i]; + +// pragma translate_off +`ifndef VERILATOR + initial begin: p_assertions + assert (N_INP >= 1) else $fatal (1, "The number of inputs must be at least 1!"); + end +`endif +// pragma translate_on + +endmodule diff --git a/vendor/pulp-platform/common_cells/src/stream_omega_net.sv b/vendor/pulp-platform/common_cells/src/stream_omega_net.sv new file mode 100644 index 0000000000..ad8e11d8ea --- /dev/null +++ b/vendor/pulp-platform/common_cells/src/stream_omega_net.sv @@ -0,0 +1,301 @@ +// Copyright (c) 2020 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +// Author: Wolfgang Roenninger + +/// Omega network using multiple `stream_xbar` as switches. +/// +/// An omega network is isomorphic to a butterfly network. +/// +/// Handshaking rules as defined by the `AMBA AXI` standard on default. +module stream_omega_net #( + /// Number of inputs into the network (`> 0`). + parameter int unsigned NumInp = 32'd0, + /// Number of outputs from the network (`> 0`). + parameter int unsigned NumOut = 32'd0, + /// Radix of the individual switch points of the network. + /// Currently supported are `32'd2` and `32'd4`. + parameter int unsigned Radix = 32'd2, + /// Data width of the stream. Can be overwritten by defining the type parameter `payload_t`. + parameter int unsigned DataWidth = 32'd1, + /// Payload type of the data ports, only usage of parameter `DataWidth`. + parameter type payload_t = logic [DataWidth-1:0], + /// Adds a spill register stage at each output. + parameter bit SpillReg = 1'b0, + /// Use external priority for the individual `rr_arb_trees`. + parameter int unsigned ExtPrio = 1'b0, + /// Use strict AXI valid ready handshaking. + /// To be protocol conform also the parameter `LockIn` has to be set. + parameter int unsigned AxiVldRdy = 1'b1, + /// Lock in the arbitration decision of the `rr_arb_tree`. + /// When this is set, valids have to be asserted until the corresponding transaction is indicated + /// by ready. + parameter int unsigned LockIn = 1'b1, + /// Derived parameter, do **not** overwrite! + /// + /// Width of the output selection signal. + parameter int unsigned SelWidth = (NumOut > 32'd1) ? unsigned'($clog2(NumOut)) : 32'd1, + /// Derived parameter, do **not** overwrite! + /// + /// Signal type definition for selecting the output at the inputs. + parameter type sel_oup_t = logic[SelWidth-1:0], + /// Derived parameter, do **not** overwrite! + /// + /// Width of the input index signal. + parameter int unsigned IdxWidth = (NumInp > 32'd1) ? unsigned'($clog2(NumInp)) : 32'd1, + /// Derived parameter, do **not** overwrite! + /// + /// Signal type definition indicating from which input the output came. + parameter type idx_inp_t = logic[IdxWidth-1:0] +) ( + /// Clock, positive edge triggered. + input logic clk_i, + /// Asynchronous reset, active low. + input logic rst_ni, + /// Flush the state of the internal `rr_arb_tree` modules. + /// If not used set to `0`. + /// Flush should only be used if there are no active `valid_i`, otherwise it will + /// not adhere to the AXI handshaking. + input logic flush_i, + /// Provide an external state for the `rr_arb_tree` models. + /// Will only do something if ExtPrio is `1` otherwise tie to `0`. + input idx_inp_t [NumOut-1:0] rr_i, + /// Input data ports. + /// Has to be stable as long as `valid_i` is asserted when parameter `AxiVldRdy` is set. + input payload_t [NumInp-1:0] data_i, + /// Selection of the output port where the data should be routed. + /// Has to be stable as long as `valid_i` is asserted and parameter `AxiVldRdy` is set. + input sel_oup_t [NumInp-1:0] sel_i, + /// Input is valid. + input logic [NumInp-1:0] valid_i, + /// Input is ready to accept data. + output logic [NumInp-1:0] ready_o, + /// Output data ports. Valid if `valid_o = 1` + output payload_t [NumOut-1:0] data_o, + /// Index of the input port where data came from. + output idx_inp_t [NumOut-1:0] idx_o, + /// Output is valid. + output logic [NumOut-1:0] valid_o, + /// Output can be accepted. + input logic [NumOut-1:0] ready_i +); + if (NumInp <= Radix && NumOut <= Radix) begin : gen_degenerate_omega_net + // If both Number of inputs and number of outputs are smaller or the same as the radix + // just instantiate a `stream_xbar`. + stream_xbar #( + .NumInp ( NumInp ), + .NumOut ( NumOut ), + .payload_t ( payload_t ), + .OutSpillReg ( SpillReg ), + .ExtPrio ( ExtPrio ), + .AxiVldRdy ( AxiVldRdy ), + .LockIn ( LockIn ) + ) i_stream_xbar ( + .clk_i, + .rst_ni, + .flush_i, + .rr_i ( rr_i ), + .data_i ( data_i ), + .sel_i ( sel_i ), + .valid_i ( valid_i ), + .ready_o ( ready_o ), + .data_o ( data_o ), + .idx_o ( idx_o ), + .valid_o ( valid_o ), + .ready_i ( ready_i ) + ); + end else begin : gen_omega_net + // Find the next power of radix of either the number of inputs or number of outputs. + // This normalizes the network to a power of the radix. Unused inputs and outputs are tied off. + // If the radix is poorly chosen with respect to the number of input/outputs ports + // will lead to an explosion of tied off lanes, which will be removed during optimization. + // Can lead however to RTL simulation overhead. + // Dividing through the log base 2 of `Radix` leads to a change of base. + localparam int unsigned NumLanes = (NumOut > NumInp) ? + unsigned'(Radix**(cf_math_pkg::ceil_div($clog2(NumOut), $clog2(Radix)))) : + unsigned'(Radix**(cf_math_pkg::ceil_div($clog2(NumInp), $clog2(Radix)))); + + // Find the number of routing levels needed. + localparam int unsigned NumLevels = unsigned'(($clog2(NumLanes)+$clog2(Radix)-1)/$clog2(Radix)); + + // Find the number of routes per network stage. Can use a normal division here, as + // `NumLanes % Radix == 0`. + localparam int unsigned NumRouters = NumLanes / Radix; + + // Define the type of sel signal to send through the network. It has to be sliced for the + // individual sel signals of a stage. This slicing has to align with `$clog2(Radix)`. + // For example `Radix = 4`, `NumOut = 17` will lead to the sel signal of an individual stage to + // be 2 bit wide, whereas signal `sel_i` of the module will be 5 bit wide. + // To prevent slicing into an undefined field the overall sel signal is then defined with + // width 6. + typedef logic [$clog2(NumLanes)-1:0] sel_dst_t; + + // Selection signal type of an individual router + localparam int unsigned SelW = unsigned'($clog2(Radix)); + initial begin : proc_selw + $display("SelW is: %0d", SelW); + $display("SelDstW is: %0d", $bits(sel_dst_t)); + end + typedef logic [SelW-1:0] sel_t; + + // Define the payload which should be routed through the network. + typedef struct packed { + sel_dst_t sel_oup; // Selection of output, where it should be routed + payload_t payload; // External payload data + idx_inp_t idx_inp; // Index of the input of this packet + } omega_data_t; + + // signal definitions + omega_data_t [NumLevels-1:0][NumRouters-1:0][Radix-1:0] inp_router_data; + logic [NumLevels-1:0][NumRouters-1:0][Radix-1:0] inp_router_valid, inp_router_ready; + omega_data_t [NumLevels-1:0][NumRouters-1:0][Radix-1:0] out_router_data; + logic [NumLevels-1:0][NumRouters-1:0][Radix-1:0] out_router_valid, out_router_ready; + + // Generate the shuffling between the routers + for (genvar i = 0; unsigned'(i) < NumLevels-1; i++) begin : gen_shuffle_levels + for (genvar j = 0; unsigned'(j) < NumRouters; j++) begin : gen_shuffle_routers + for (genvar k = 0; unsigned'(k) < Radix; k++) begin : gen_shuffle_radix + // This parameter is from `0` to `NumLanes-1` + localparam int unsigned IdxLane = Radix * j + k; + // Do the perfect shuffle + assign inp_router_data[i+1][IdxLane%NumRouters][IdxLane/NumRouters] = + out_router_data[i][j][k]; + + assign inp_router_valid[i+1][IdxLane%NumRouters][IdxLane/NumRouters] = + out_router_valid[i][j][k]; + + assign out_router_ready[i][j][k] = + inp_router_ready[i+1][IdxLane%NumRouters][IdxLane/NumRouters]; + + // Do the first input shuffle of layer 0. + // The inputs are connected in reverse. The reason is that then the optimization + // leaves then the biggest possible network diameter. + if (i == 0) begin : gen_shuffle_inp + // Reverse the order of the input ports + if ((NumLanes-IdxLane) <= NumInp) begin : gen_inp_ports + localparam int unsigned IdxInp = NumLanes - IdxLane - 32'd1; + assign inp_router_data[0][IdxLane%NumRouters][IdxLane/NumRouters] = '{ + sel_oup: sel_dst_t'(sel_i[IdxInp]), + payload: data_i[IdxInp], + idx_inp: idx_inp_t'(IdxInp) + }; + + assign inp_router_valid[0][IdxLane%NumRouters][IdxLane/NumRouters] = valid_i[IdxInp]; + assign ready_o[IdxInp] = inp_router_ready[0][IdxLane%NumRouters][IdxLane/NumRouters]; + + end else begin : gen_tie_off + assign inp_router_data[0][IdxLane%NumRouters][IdxLane/NumRouters] = '{ default: '0}; + assign inp_router_valid[0][IdxLane%NumRouters][IdxLane/NumRouters] = 1'b0; + end + end + end + end + end + + // Generate the `stream_xbar_routers` + for (genvar i = 0; unsigned'(i) < NumLevels; i++) begin : gen_router_levels + for (genvar j = 0; unsigned'(j) < NumRouters; j++) begin : gen_routers + sel_t [Radix-1:0] sel_router; + for (genvar k = 0; unsigned'(k) < Radix; k++) begin : gen_router_sel + // For the inter stage routing some bits of the overall selection are important. + // The `MSB` is for stage `0`, `MSB-1` for stage `1` and so on for the `Radix=2` case. + // For higher radices's a bit slice following the same pattern is used. + // This is the reason that the internal network is expanded to a power of two, so that + // the selection slicing always has a valid index. + assign sel_router[k] = inp_router_data[i][j][k].sel_oup[SelW*(NumLevels-i-1)+:SelW]; + end + + stream_xbar #( + .NumInp ( Radix ), + .NumOut ( Radix ), + .payload_t ( omega_data_t ), + .OutSpillReg ( SpillReg ), + .ExtPrio ( 1'b0 ), + .AxiVldRdy ( AxiVldRdy ), + .LockIn ( LockIn ) + ) i_stream_xbar ( + .clk_i, + .rst_ni, + .flush_i, + .rr_i ( '0 ), + .data_i ( inp_router_data[i][j] ), + .sel_i ( sel_router ), + .valid_i ( inp_router_valid[i][j] ), + .ready_o ( inp_router_ready[i][j] ), + .data_o ( out_router_data[i][j] ), + .idx_o ( /* not used */ ), + .valid_o ( out_router_valid[i][j] ), + .ready_i ( out_router_ready[i][j] ) + ); + end + end + + // outputs are on the last level + for (genvar i = 0; unsigned'(i) < NumLanes; i++) begin : gen_outputs + if (i < NumOut) begin : gen_connect + assign data_o[i] = out_router_data[NumLevels-1][i/Radix][i%Radix].payload; + assign idx_o[i] = out_router_data[NumLevels-1][i/Radix][i%Radix].idx_inp; + assign valid_o[i] = out_router_valid[NumLevels-1][i/Radix][i%Radix]; + assign out_router_ready[NumLevels-1][i/Radix][i%Radix] = ready_i[i]; + end else begin : gen_tie_off + assign out_router_ready[NumLevels-1][i/Radix][i%Radix] = 1'b0; + end + end + + initial begin : proc_debug_print + $display("NumInp: %0d", NumInp); + $display("NumOut: %0d", NumOut); + $display("Radix: %0d", Radix); + $display("NumLanes: %0d", NumLanes); + $display("NumLevels: %0d", NumLevels); + $display("NumRouters: %0d", NumRouters); + end + + // Assertions + // Make sure that the handshake and payload is stable + // pragma translate_off + `ifndef VERILATOR + default disable iff rst_ni; + for (genvar i = 0; unsigned'(i) < NumInp; i++) begin : gen_sel_assertions + assert property (@(posedge clk_i) (valid_i[i] |-> sel_i[i] < sel_oup_t'(NumOut))) else + $fatal(1, "Non-existing output is selected!"); + end + + if (AxiVldRdy) begin : gen_handshake_assertions + for (genvar i = 0; unsigned'(i) < NumInp; i++) begin : gen_inp_assertions + assert property (@(posedge clk_i) (valid_i[i] && !ready_o[i] |=> $stable(data_i[i]))) else + $error("data_i is unstable at input: %0d", i); + assert property (@(posedge clk_i) (valid_i[i] && !ready_o[i] |=> $stable(sel_i[i]))) else + $error("sel_i is unstable at input: %0d", i); + assert property (@(posedge clk_i) (valid_i[i] && !ready_o[i] |=> valid_i[i])) else + $error("valid_i at input %0d has been taken away without a ready.", i); + end + for (genvar i = 0; unsigned'(i) < NumOut; i++) begin : gen_out_assertions + assert property (@(posedge clk_i) (valid_o[i] && !ready_i[i] |=> $stable(data_o[i]))) else + $error("data_o is unstable at output: %0d Check that parameter LockIn is set.", i); + assert property (@(posedge clk_i) (valid_o[i] && !ready_i[i] |=> $stable(idx_o[i]))) else + $error("idx_o is unstable at output: %0d Check that parameter LockIn is set.", i); + assert property (@(posedge clk_i) (valid_o[i] && !ready_i[i] |=> valid_o[i])) else + $error("valid_o at output %0d has been taken away without a ready.", i); + end + end + + initial begin : proc_parameter_assertions + assert ((2**$clog2(Radix) == Radix) && (Radix > 32'd1)) else + $fatal(1, "Radix %0d is not power of two.", Radix); + assert (2**$clog2(NumRouters) == NumRouters) else + $fatal(1, "NumRouters %0d is not power of two.", NumRouters); + assert ($clog2(NumLanes) % SelW == 0) else + $fatal(1, "Bit slicing of the internal selection signal is broken."); + end + `endif + // pragma translate_on + end +endmodule diff --git a/vendor/pulp-platform/common_cells/src/stream_register.sv b/vendor/pulp-platform/common_cells/src/stream_register.sv new file mode 100644 index 0000000000..f529d6a291 --- /dev/null +++ b/vendor/pulp-platform/common_cells/src/stream_register.sv @@ -0,0 +1,57 @@ +// Copyright 2018 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +/// Register with a simple stream-like ready/valid handshake. +/// This register does not cut combinatorial paths on all control signals; if you need a complete +/// cut, use the `spill_register`. +module stream_register #( + parameter type T = logic // Vivado requires a default value for type parameters. +) ( + input logic clk_i, // Clock + input logic rst_ni, // Asynchronous active-low reset + input logic clr_i, // Synchronous clear + input logic testmode_i, // Test mode to bypass clock gating + // Input port + input logic valid_i, + output logic ready_o, + input T data_i, + // Output port + output logic valid_o, + input logic ready_i, + output T data_o +); + + logic fifo_empty, + fifo_full; + + fifo_v2 #( + .FALL_THROUGH (1'b0), + .DATA_WIDTH ($bits(T)), + .DEPTH (1), + .dtype (T) + ) i_fifo ( + .clk_i (clk_i), + .rst_ni (rst_ni), + .flush_i (clr_i), + .testmode_i (testmode_i), + .full_o (fifo_full), + .empty_o (fifo_empty), + .alm_full_o ( ), + .alm_empty_o ( ), + .data_i (data_i), + .push_i (valid_i & ~fifo_full), + .data_o (data_o), + .pop_i (ready_i & ~fifo_empty) + ); + + assign ready_o = ~fifo_full; + assign valid_o = ~fifo_empty; + +endmodule diff --git a/vendor/pulp-platform/common_cells/src/stream_to_mem.sv b/vendor/pulp-platform/common_cells/src/stream_to_mem.sv new file mode 100644 index 0000000000..00c30863fa --- /dev/null +++ b/vendor/pulp-platform/common_cells/src/stream_to_mem.sv @@ -0,0 +1,134 @@ +// Copyright 2020 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +// Authors: +// - Andreas Kurth + +/// `stream_to_mem`: Allows to use memories with flow control (`valid`/`ready`) for requests but without flow +/// control for output data to be used in streams. +`include "common_cells/registers.svh" +module stream_to_mem #( + /// Memory request payload type, usually write enable, write data, etc. + parameter type mem_req_t = logic, + /// Memory response payload type, usually read data + parameter type mem_resp_t = logic, + /// Number of buffered responses (fall-through, thus no additional latency). This defines the + /// maximum number of outstanding requests on the memory interface. If the attached memory + /// responds in the same cycle a request is applied, this MUST be 0. If the attached memory + /// responds at least one cycle after a request, this MUST be >= 1 and should be equal to the + /// response latency of the memory to saturate bandwidth. + parameter int unsigned BufDepth = 32'd1 +) ( + /// Clock + input logic clk_i, + /// Asynchronous reset, active low + input logic rst_ni, + /// Request stream interface, payload + input mem_req_t req_i, + /// Request stream interface, payload is valid for transfer + input logic req_valid_i, + /// Request stream interface, payload can be accepted + output logic req_ready_o, + /// Response stream interface, payload + output mem_resp_t resp_o, + /// Response stream interface, payload is valid for transfer + output logic resp_valid_o, + /// Response stream interface, payload can be accepted + input logic resp_ready_i, + /// Memory request interface, payload + output mem_req_t mem_req_o, + /// Memory request interface, payload is valid for transfer + output logic mem_req_valid_o, + /// Memory request interface, payload can be accepted + input logic mem_req_ready_i, + /// Memory response interface, payload + input mem_resp_t mem_resp_i, + /// Memory response interface, payload is valid + input logic mem_resp_valid_i +); + + typedef logic [$clog2(BufDepth+1):0] cnt_t; + + cnt_t cnt_d, cnt_q; + logic buf_ready, + req_ready; + + if (BufDepth > 0) begin : gen_buf + // Count number of outstanding requests. + always_comb begin + cnt_d = cnt_q; + if (req_valid_i && req_ready_o) begin + cnt_d++; + end + if (resp_valid_o && resp_ready_i) begin + cnt_d--; + end + end + + // Can issue another request if the counter is not at its limit or a response is delivered in + // the current cycle. + assign req_ready = (cnt_q < BufDepth) | (resp_valid_o & resp_ready_i); + + // Control request and memory request interface handshakes. + assign req_ready_o = mem_req_ready_i & req_ready; + assign mem_req_valid_o = req_valid_i & req_ready; + + // Buffer responses. + stream_fifo #( + .FALL_THROUGH ( 1'b1 ), + .DEPTH ( BufDepth ), + .T ( mem_resp_t ) + ) i_resp_buf ( + .clk_i, + .rst_ni, + .flush_i ( 1'b0 ), + .testmode_i ( 1'b0 ), + .data_i ( mem_resp_i ), + .valid_i ( mem_resp_valid_i ), + .ready_o ( buf_ready ), + .data_o ( resp_o ), + .valid_o ( resp_valid_o ), + .ready_i ( resp_ready_i ), + .usage_o ( /* unused */ ) + ); + + // Register + `FFARN(cnt_q, cnt_d, '0, clk_i, rst_ni) + + end else begin : gen_no_buf + // Control request, memory request, and response interface handshakes. + assign mem_req_valid_o = req_valid_i; + assign resp_valid_o = mem_req_valid_o & mem_req_ready_i & mem_resp_valid_i; + assign req_ready_o = resp_ready_i & resp_valid_o; + + // Forward responses. + assign resp_o = mem_resp_i; + end + + // Forward requests. + assign mem_req_o = req_i; + +// Assertions +// pragma translate_off +`ifndef VERILATOR + if (BufDepth > 0) begin : gen_buf_asserts + assert property (@(posedge clk_i) mem_resp_valid_i |-> buf_ready) + else $error("Memory response lost!"); + assert property (@(posedge clk_i) cnt_q == '0 |=> cnt_q != '1) + else $error("Counter underflowed!"); + assert property (@(posedge clk_i) cnt_q == BufDepth |=> cnt_q != BufDepth + 1) + else $error("Counter overflowed!"); + end else begin : gen_no_buf_asserts + assume property (@(posedge clk_i) mem_req_valid_o & mem_req_ready_i |-> mem_resp_valid_i) + else $error("Without BufDepth = 0, the memory must respond in the same cycle!"); + end +`endif +// pragma translate_on +endmodule diff --git a/vendor/pulp-platform/common_cells/src/stream_xbar.sv b/vendor/pulp-platform/common_cells/src/stream_xbar.sv new file mode 100644 index 0000000000..957400680b --- /dev/null +++ b/vendor/pulp-platform/common_cells/src/stream_xbar.sv @@ -0,0 +1,198 @@ +// Copyright (c) 2020 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +// Author: Wolfgang Roenninger + +/// Fully connected stream crossbar. +/// +/// Handshaking rules as defined by the `AMBA AXI` standard on default. +module stream_xbar #( + /// Number of inputs into the crossbar (`> 0`). + parameter int unsigned NumInp = 32'd0, + /// Number of outputs from the crossbar (`> 0`). + parameter int unsigned NumOut = 32'd0, + /// Data width of the stream. Can be overwritten by defining the type parameter `payload_t`. + parameter int unsigned DataWidth = 32'd1, + /// Payload type of the data ports, only usage of parameter `DataWidth`. + parameter type payload_t = logic [DataWidth-1:0], + /// Adds a spill register stage at each output. + parameter bit OutSpillReg = 1'b0, + /// Use external priority for the individual `rr_arb_trees`. + parameter int unsigned ExtPrio = 1'b0, + /// Use strict AXI valid ready handshaking. + /// To be protocol conform also the parameter `LockIn` has to be set. + parameter int unsigned AxiVldRdy = 1'b1, + /// Lock in the arbitration decision of the `rr_arb_tree`. + /// When this is set, valids have to be asserted until the corresponding transaction is indicated + /// by ready. + parameter int unsigned LockIn = 1'b1, + /// Derived parameter, do **not** overwrite! + /// + /// Width of the output selection signal. + parameter int unsigned SelWidth = (NumOut > 32'd1) ? unsigned'($clog2(NumOut)) : 32'd1, + /// Derived parameter, do **not** overwrite! + /// + /// Signal type definition for selecting the output at the inputs. + parameter type sel_oup_t = logic[SelWidth-1:0], + /// Derived parameter, do **not** overwrite! + /// + /// Width of the input index signal. + parameter int unsigned IdxWidth = (NumInp > 32'd1) ? unsigned'($clog2(NumInp)) : 32'd1, + /// Derived parameter, do **not** overwrite! + /// + /// Signal type definition indicating from which input the output came. + parameter type idx_inp_t = logic[IdxWidth-1:0] +) ( + /// Clock, positive edge triggered. + input logic clk_i, + /// Asynchronous reset, active low. + input logic rst_ni, + /// Flush the state of the internal `rr_arb_tree` modules. + /// If not used set to `0`. + /// Flush should only be used if there are no active `valid_i`, otherwise it will + /// not adhere to the AXI handshaking. + input logic flush_i, + /// Provide an external state for the `rr_arb_tree` models. + /// Will only do something if ExtPrio is `1` otherwise tie to `0`. + input idx_inp_t [NumOut-1:0] rr_i, + /// Input data ports. + /// Has to be stable as long as `valid_i` is asserted when parameter `AxiVldRdy` is set. + input payload_t [NumInp-1:0] data_i, + /// Selection of the output port where the data should be routed. + /// Has to be stable as long as `valid_i` is asserted and parameter `AxiVldRdy` is set. + input sel_oup_t [NumInp-1:0] sel_i, + /// Input is valid. + input logic [NumInp-1:0] valid_i, + /// Input is ready to accept data. + output logic [NumInp-1:0] ready_o, + /// Output data ports. Valid if `valid_o = 1` + output payload_t [NumOut-1:0] data_o, + /// Index of the input port where data came from. + output idx_inp_t [NumOut-1:0] idx_o, + /// Output is valid. + output logic [NumOut-1:0] valid_o, + /// Output can be accepted. + input logic [NumOut-1:0] ready_i +); + typedef struct packed { + payload_t data; + idx_inp_t idx; + } spill_data_t; + + logic [NumInp-1:0][NumOut-1:0] inp_valid; + logic [NumInp-1:0][NumOut-1:0] inp_ready; + + payload_t [NumOut-1:0][NumInp-1:0] out_data; + logic [NumOut-1:0][NumInp-1:0] out_valid; + logic [NumOut-1:0][NumInp-1:0] out_ready; + + // Generate the input selection + for (genvar i = 0; unsigned'(i) < NumInp; i++) begin : gen_inps + stream_demux #( + .N_OUP ( NumOut ) + ) i_stream_demux ( + .inp_valid_i ( valid_i[i] ), + .inp_ready_o ( ready_o[i] ), + .oup_sel_i ( sel_i[i] ), + .oup_valid_o ( inp_valid[i] ), + .oup_ready_i ( inp_ready[i] ) + ); + + // Do the switching cross of the signals. + for (genvar j = 0; unsigned'(j) < NumOut; j++) begin : gen_cross + // Propagate the data from this input to all outputs. + assign out_data[j][i] = data_i[i]; + // switch handshaking + assign out_valid[j][i] = inp_valid[i][j]; + assign inp_ready[i][j] = out_ready[j][i]; + end + end + + // Generate the output arbitration. + for (genvar j = 0; unsigned'(j) < NumOut; j++) begin : gen_outs + spill_data_t arb; + logic arb_valid, arb_ready; + + rr_arb_tree #( + .NumIn ( NumInp ), + .DataType ( payload_t ), + .ExtPrio ( ExtPrio ), + .AxiVldRdy ( AxiVldRdy ), + .LockIn ( LockIn ) + ) i_rr_arb_tree ( + .clk_i, + .rst_ni, + .flush_i, + .rr_i ( rr_i[j] ), + .req_i ( out_valid[j] ), + .gnt_o ( out_ready[j] ), + .data_i ( out_data[j] ), + .req_o ( arb_valid ), + .gnt_i ( arb_ready ), + .data_o ( arb.data ), + .idx_o ( arb.idx ) + ); + + spill_data_t spill; + + spill_register #( + .T ( spill_data_t ), + .Bypass ( !OutSpillReg ) + ) i_spill_register ( + .clk_i, + .rst_ni, + .valid_i ( arb_valid ), + .ready_o ( arb_ready ), + .data_i ( arb ), + .valid_o ( valid_o[j] ), + .ready_i ( ready_i[j] ), + .data_o ( spill ) + ); + // Assign the outputs (deaggregate the data). + assign data_o[j] = spill.data; + assign idx_o[j] = spill.idx; + end + + // Assertions + // Make sure that the handshake and payload is stable + // pragma translate_off + `ifndef VERILATOR + default disable iff rst_ni; + for (genvar i = 0; unsigned'(i) < NumInp; i++) begin : gen_sel_assertions + assert property (@(posedge clk_i) (valid_i[i] |-> sel_i[i] < sel_oup_t'(NumOut))) else + $fatal(1, "Non-existing output is selected!"); + end + + if (AxiVldRdy) begin : gen_handshake_assertions + for (genvar i = 0; unsigned'(i) < NumInp; i++) begin : gen_inp_assertions + assert property (@(posedge clk_i) (valid_i[i] && !ready_o[i] |=> $stable(data_i[i]))) else + $error("data_i is unstable at input: %0d", i); + assert property (@(posedge clk_i) (valid_i[i] && !ready_o[i] |=> $stable(sel_i[i]))) else + $error("sel_i is unstable at input: %0d", i); + assert property (@(posedge clk_i) (valid_i[i] && !ready_o[i] |=> valid_i[i])) else + $error("valid_i at input %0d has been taken away without a ready.", i); + end + for (genvar i = 0; unsigned'(i) < NumOut; i++) begin : gen_out_assertions + assert property (@(posedge clk_i) (valid_o[i] && !ready_i[i] |=> $stable(data_o[i]))) else + $error("data_o is unstable at output: %0d Check that parameter LockIn is set.", i); + assert property (@(posedge clk_i) (valid_o[i] && !ready_i[i] |=> $stable(idx_o[i]))) else + $error("idx_o is unstable at output: %0d Check that parameter LockIn is set.", i); + assert property (@(posedge clk_i) (valid_o[i] && !ready_i[i] |=> valid_o[i])) else + $error("valid_o at output %0d has been taken away without a ready.", i); + end + end + + initial begin : proc_parameter_assertions + assert (NumInp > 32'd0) else $fatal(1, "NumInp has to be > 0!"); + assert (NumOut > 32'd0) else $fatal(1, "NumOut has to be > 0!"); + end + `endif + // pragma translate_on +endmodule diff --git a/vendor/pulp-platform/common_cells/src/sub_per_hash.sv b/vendor/pulp-platform/common_cells/src/sub_per_hash.sv new file mode 100644 index 0000000000..d4938ad4fe --- /dev/null +++ b/vendor/pulp-platform/common_cells/src/sub_per_hash.sv @@ -0,0 +1,173 @@ +// Copyright (c) 2019 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +// Author: Wolfgang Roenninger + +// This module implements a fully parameterizable substitution-permutation hash +// function. The hash is structured in stages consisting of a shuffle of the input bits +// and then xoring for each bit 3 pseudo-random bits of the shuffeled vector. +// The hash function is NOT cryptographically secure! +// From the keys it computes a sequence of pseudo-random numbers, which determine the permutations +// and substitutions. As pseudo random generator a multiplicative linear congruential +// generator is used and uses different constants for the computation of the permutation +// and substitution respectively. +// The permutation shuffles the bits using a variant of the Fisher-Yates shuffle algorithm. +// The substitution per stage is the xor of 3 pseudo random bits of the previous stage. +// As shifting and xoring of a signal do not change its distribution, the distribution +// of the output hash is the same as the one of the input data. +// +// Parameters: +// - `InpWidth`: The input width of the vector `data_i`. +// - `HashWidth`: The output width of the substitution-permutation hash. +// - `NoRounds`: The amount of permutation, substitution stages generated. Translates +// into how many levels of xor's there will be before optimization. +// - `PermuteKey`: The Key for the pseudo-random generator used for determining the exact +// permutation (shuffled wiring between each xor stage) at compile/elaboration. +// Any `int unsigned` value can be used as key, however one should examine the +// output of the hash function. +// - `XorKey`: The Key for the pseudo-random generator used for determining the xor +// of bits between stages. The same principles as for `PermuteKey` applies, +// however one should look that both keys have a greatest common divisor of 1. + +module sub_per_hash #( + parameter int unsigned InpWidth = 32'd11, + parameter int unsigned HashWidth = 32'd5, + parameter int unsigned NoRounds = 32'd1, + parameter int unsigned PermuteKey = 32'd299034753, + parameter int unsigned XorKey = 32'd4094834 +) ( + // is purely combinational + input logic [InpWidth-1:0] data_i, + output logic [HashWidth-1:0] hash_o, + output logic [2**HashWidth-1:0] hash_onehot_o +); + + // typedefs and respective localparams + typedef int unsigned perm_lists_t [NoRounds][InpWidth]; + localparam perm_lists_t PERMUTATIONS = get_permutations(PermuteKey); + // encoding for inner most array: + // position 0 indicates the number of inputs, 2 or 3 + // the other positions 1 - 3 indicate the inputs + typedef int unsigned xor_stages_t [NoRounds][InpWidth][3]; + localparam xor_stages_t XorStages = get_xor_stages(XorKey); + + // stage signals + logic [NoRounds-1:0][InpWidth-1:0] permuted, xored; + + // for each round + for (genvar r = 0; r < NoRounds; r++) begin : gen_round + // for each bit + for (genvar i = 0; i < InpWidth ; i++) begin : gen_sub_per + + // assign the permutation + if (r == 0) begin : gen_input + assign permuted[r][i] = data_i[PERMUTATIONS[r][i]]; + end else begin : gen_permutation + assign permuted[r][i] = permuted[r-1][PERMUTATIONS[r][i]]; + end + + // assign the xor substitution + assign xored[r][i] = permuted[r][XorStages[r][i][0]] ^ + permuted[r][XorStages[r][i][1]] ^ + permuted[r][XorStages[r][i][2]]; + end + end + + // output assignment, take the bottom bits of the last round + assign hash_o = xored[NoRounds-1][HashWidth-1:0]; + // for onehot run trough a decoder + assign hash_onehot_o = 1 << hash_o; + + // PRG is MLCG (multiplicative linear congruential generator) + // Constant values the same as RtlUniform from Native API + // X(n+1) = (a*X(n)+c) mod m + // a: large prime + // c: increment + // m: range + // Shuffling is a variation of the Fisher-Yates shuffle algorithm + function automatic perm_lists_t get_permutations(input int unsigned seed); + perm_lists_t indices; + perm_lists_t perm_array; + longint unsigned A = 2147483629; + longint unsigned C = 2147483587; + longint unsigned M = 2**31 - 1; + longint unsigned index = 0; + longint unsigned advance = 0; + longint unsigned rand_number = (A * seed + C) % M; + + // do it for each round + for (int unsigned r = 0; r < NoRounds; r++) begin + // initialize the index array + for (int unsigned i = 0; i < InpWidth; i++) begin + indices[r][i] = i; + end + // do the shuffling + for (int unsigned i = 0; i < InpWidth; i++) begin + // get the 'random' number + if (i > 0) begin + rand_number = (A * rand_number + C) % M; + index = rand_number % i; + end + // do the shuffling + if (i != index) begin + perm_array[r][i] = perm_array[r][index]; + perm_array[r][index] = indices[r][i]; + end + end + // advance the PRG a bit + rand_number = (A * rand_number + C) % M; + advance = rand_number % NoRounds; + for (int unsigned i = 0; i < advance; i++) begin + rand_number = (A * rand_number + C) % M; + end + end + return perm_array; + endfunction : get_permutations + + // PRG is MLCG (multiplicative linear congruential generator) + // Constant values the same as Numerical Recipes + // X(n+1) = (a*X(n)+c) mod m + // a: large prime + // c: increment + // m: range + function automatic xor_stages_t get_xor_stages(input int unsigned seed); + xor_stages_t xor_array; + longint unsigned A = 1664525; + longint unsigned C = 1013904223; + longint unsigned M = 2**32; + longint unsigned index = 0; + // int unsigned even = 0; + longint unsigned advance = 0; + longint unsigned rand_number = (A * seed + C) % M; + + // fill the array with 'randon' inputs + // for each xor, a even random number is two input, uneven is tree + // for each round + for (int unsigned r = 0; r < NoRounds; r++) begin + // for each bit + for (int unsigned i = 0; i < InpWidth; i++) begin + rand_number = (A * rand_number + C) % M; + // even = rand_number[3]; + for (int unsigned j = 0; j < 3; j++) begin + rand_number = (A * rand_number + C) % M; + index = rand_number % InpWidth; + xor_array[r][i][j] = index; + end + end + // advance the PRG a bit + rand_number = (A * rand_number + C) % M; + advance = rand_number % NoRounds; + for (int unsigned i = 0; i < advance; i++) begin + rand_number = (A * rand_number + C) % M; + end + end + return xor_array; + endfunction : get_xor_stages +endmodule diff --git a/vendor/pulp-platform/common_cells/src/sync.sv b/vendor/pulp-platform/common_cells/src/sync.sv new file mode 100644 index 0000000000..7d8e0a1f42 --- /dev/null +++ b/vendor/pulp-platform/common_cells/src/sync.sv @@ -0,0 +1,35 @@ +// Copyright 2018 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +// Antonio Pullini + +module sync #( + parameter int unsigned STAGES = 2, + parameter bit ResetValue = 1'b0 +) ( + input logic clk_i, + input logic rst_ni, + input logic serial_i, + output logic serial_o +); + + logic [STAGES-1:0] reg_q; + + always_ff @(posedge clk_i, negedge rst_ni) begin + if (!rst_ni) begin + reg_q <= {STAGES{ResetValue}}; + end else begin + reg_q <= {reg_q[STAGES-2:0], serial_i}; + end + end + + assign serial_o = reg_q[STAGES-1]; + +endmodule diff --git a/vendor/pulp-platform/common_cells/src/sync_wedge.sv b/vendor/pulp-platform/common_cells/src/sync_wedge.sv new file mode 100644 index 0000000000..58f1279808 --- /dev/null +++ b/vendor/pulp-platform/common_cells/src/sync_wedge.sv @@ -0,0 +1,56 @@ +// Copyright 2018 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +// Antonio Pullini + +module sync_wedge #( + parameter int unsigned STAGES = 2 +) ( + input logic clk_i, + input logic rst_ni, + input logic en_i, + input logic serial_i, + output logic r_edge_o, + output logic f_edge_o, + output logic serial_o +); + logic clk; + logic serial, serial_q; + + assign serial_o = serial_q; + assign f_edge_o = (~serial) & serial_q; + assign r_edge_o = serial & (~serial_q); + + sync #( + .STAGES (STAGES) + ) i_sync ( + .clk_i, + .rst_ni, + .serial_i, + .serial_o ( serial ) + ); + + pulp_clock_gating i_pulp_clock_gating ( + .clk_i, + .en_i, + .test_en_i ( 1'b0 ), + .clk_o ( clk ) + ); + + always_ff @(posedge clk, negedge rst_ni) begin + if (!rst_ni) begin + serial_q <= 1'b0; + end else begin + if (en_i) begin + serial_q <= serial; + end + end + end +endmodule diff --git a/vendor/pulp-platform/common_cells/src/unread.sv b/vendor/pulp-platform/common_cells/src/unread.sv new file mode 100644 index 0000000000..80e7356237 --- /dev/null +++ b/vendor/pulp-platform/common_cells/src/unread.sv @@ -0,0 +1,21 @@ +// Copyright 2018 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. +// +// Author: Florian Zaruba, ETH Zurich +// Date: 29.10.2018 +// Description: Dummy circuit to mitigate Open Pin warnings + +/* verilator lint_off UNUSED */ +module unread ( + input logic d_i +); + +endmodule +/* verilator lint_on UNUSED */ diff --git a/vendor/pulp-platform/fpnew/.gitignore b/vendor/pulp-platform/fpnew/.gitignore new file mode 100644 index 0000000000..0e866a8180 --- /dev/null +++ b/vendor/pulp-platform/fpnew/.gitignore @@ -0,0 +1,3 @@ +*~ +html +Bender.lock diff --git a/vendor/pulp-platform/fpnew/LICENSE b/vendor/pulp-platform/fpnew/LICENSE new file mode 100644 index 0000000000..5ca76ba6b9 --- /dev/null +++ b/vendor/pulp-platform/fpnew/LICENSE @@ -0,0 +1,176 @@ +SOLDERPAD HARDWARE LICENSE version 0.51 + +This license is based closely on the Apache License Version 2.0, but is not +approved or endorsed by the Apache Foundation. A copy of the non-modified +Apache License 2.0 can be found at http://www.apache.org/licenses/LICENSE-2.0. + +As this license is not currently OSI or FSF approved, the Licensor permits any +Work licensed under this License, at the option of the Licensee, to be treated +as licensed under the Apache License Version 2.0 (which is so approved). + +This License is licensed under the terms of this License and in particular +clause 7 below (Disclaimer of Warranties) applies in relation to its use. + +TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + +1. Definitions. + +“License” shall mean the terms and conditions for use, reproduction, and +distribution as defined by Sections 1 through 9 of this document. + +“Licensor” shall mean the Rights owner or entity authorized by the Rights owner +that is granting the License. + +“Legal Entity” shall mean the union of the acting entity and all other entities +that control, are controlled by, or are under common control with that entity. +For the purposes of this definition, “control” means (i) the power, direct or +indirect, to cause the direction or management of such entity, whether by +contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the +outstanding shares, or (iii) beneficial ownership of such entity. + +“You” (or “Your”) shall mean an individual or Legal Entity exercising +permissions granted by this License. + +“Rights” means copyright and any similar right including design right (whether +registered or unregistered), semiconductor topography (mask) rights and +database rights (but excluding Patents and Trademarks). + +“Source” form shall mean the preferred form for making modifications, including +but not limited to source code, net lists, board layouts, CAD files, +documentation source, and configuration files. + +“Object” form shall mean any form resulting from mechanical transformation or +translation of a Source form, including but not limited to compiled object +code, generated documentation, the instantiation of a hardware design and +conversions to other media types, including intermediate forms such as +bytecodes, FPGA bitstreams, artwork and semiconductor topographies (mask +works). + +“Work” shall mean the work of authorship, whether in Source form or other +Object form, made available under the License, as indicated by a Rights notice +that is included in or attached to the work (an example is provided in the +Appendix below). + +“Derivative Works” shall mean any work, whether in Source or Object form, that +is based on (or derived from) the Work and for which the editorial revisions, +annotations, elaborations, or other modifications represent, as a whole, an +original work of authorship. For the purposes of this License, Derivative Works +shall not include works that remain separable from, or merely link (or bind by +name) or physically connect to or interoperate with the interfaces of, the Work +and Derivative Works thereof. + +“Contribution” shall mean any design or work of authorship, including the +original version of the Work and any modifications or additions to that Work or +Derivative Works thereof, that is intentionally submitted to Licensor for +inclusion in the Work by the Rights owner or by an individual or Legal Entity +authorized to submit on behalf of the Rights owner. For the purposes of this +definition, “submitted” means any form of electronic, verbal, or written +communication sent to the Licensor or its representatives, including but not +limited to communication on electronic mailing lists, source code control +systems, and issue tracking systems that are managed by, or on behalf of, the +Licensor for the purpose of discussing and improving the Work, but excluding +communication that is conspicuously marked or otherwise designated in writing +by the Rights owner as “Not a Contribution.” + +“Contributor” shall mean Licensor and any individual or Legal Entity on behalf +of whom a Contribution has been received by Licensor and subsequently +incorporated within the Work. + +2. Grant of License. Subject to the terms and conditions of this License, each +Contributor hereby grants to You a perpetual, worldwide, non-exclusive, +no-charge, royalty-free, irrevocable license under the Rights to reproduce, +prepare Derivative Works of, publicly display, publicly perform, sublicense, +and distribute the Work and such Derivative Works in Source or Object form and +do anything in relation to the Work as if the Rights did not exist. + +3. Grant of Patent License. Subject to the terms and conditions of this +License, each Contributor hereby grants to You a perpetual, worldwide, +non-exclusive, no-charge, royalty-free, irrevocable (except as stated in this +section) patent license to make, have made, use, offer to sell, sell, import, +and otherwise transfer the Work, where such license applies only to those +patent claims licensable by such Contributor that are necessarily infringed by +their Contribution(s) alone or by combination of their Contribution(s) with the +Work to which such Contribution(s) was submitted. If You institute patent +litigation against any entity (including a cross-claim or counterclaim in a +lawsuit) alleging that the Work or a Contribution incorporated within the Work +constitutes direct or contributory patent infringement, then any patent +licenses granted to You under this License for that Work shall terminate as of +the date such litigation is filed. + +4. Redistribution. You may reproduce and distribute copies of the Work or +Derivative Works thereof in any medium, with or without modifications, and in +Source or Object form, provided that You meet the following conditions: + + You must give any other recipients of the Work or Derivative Works a copy + of this License; and + + You must cause any modified files to carry prominent notices stating that + You changed the files; and + + You must retain, in the Source form of any Derivative Works that You + distribute, all copyright, patent, trademark, and attribution notices from + the Source form of the Work, excluding those notices that do not pertain to + any part of the Derivative Works; and + + If the Work includes a “NOTICE” text file as part of its distribution, then + any Derivative Works that You distribute must include a readable copy of + the attribution notices contained within such NOTICE file, excluding those + notices that do not pertain to any part of the Derivative Works, in at + least one of the following places: within a NOTICE text file distributed as + part of the Derivative Works; within the Source form or documentation, if + provided along with the Derivative Works; or, within a display generated by + the Derivative Works, if and wherever such third-party notices normally + appear. The contents of the NOTICE file are for informational purposes only + and do not modify the License. You may add Your own attribution notices + within Derivative Works that You distribute, alongside or as an addendum to + the NOTICE text from the Work, provided that such additional attribution + notices cannot be construed as modifying the License. You may add Your own + copyright statement to Your modifications and may provide additional or + different license terms and conditions for use, reproduction, or + distribution of Your modifications, or for any such Derivative Works as a + whole, provided Your use, reproduction, and distribution of the Work + otherwise complies with the conditions stated in this License. + +5. Submission of Contributions. Unless You explicitly state otherwise, any +Contribution intentionally submitted for inclusion in the Work by You to the +Licensor shall be under the terms and conditions of this License, without any +additional terms or conditions. Notwithstanding the above, nothing herein shall +supersede or modify the terms of any separate license agreement you may have +executed with Licensor regarding such Contributions. + +6. Trademarks. This License does not grant permission to use the trade names, +trademarks, service marks, or product names of the Licensor, except as required +for reasonable and customary use in describing the origin of the Work and +reproducing the content of the NOTICE file. + +7. Disclaimer of Warranty. Unless required by applicable law or agreed to in +writing, Licensor provides the Work (and each Contributor provides its +Contributions) on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +KIND, either express or implied, including, without limitation, any warranties +or conditions of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A +PARTICULAR PURPOSE. You are solely responsible for determining the +appropriateness of using or redistributing the Work and assume any risks +associated with Your exercise of permissions under this License. + +8. Limitation of Liability. In no event and under no legal theory, whether in +tort (including negligence), contract, or otherwise, unless required by +applicable law (such as deliberate and grossly negligent acts) or agreed to in +writing, shall any Contributor be liable to You for damages, including any +direct, indirect, special, incidental, or consequential damages of any +character arising as a result of this License or out of the use or inability to +use the Work (including but not limited to damages for loss of goodwill, work +stoppage, computer failure or malfunction, or any and all other commercial +damages or losses), even if such Contributor has been advised of the +possibility of such damages. + +9. Accepting Warranty or Additional Liability. While redistributing the Work or +Derivative Works thereof, You may choose to offer, and charge a fee for, +acceptance of support, warranty, indemnity, or other liability obligations +and/or rights consistent with this License. However, in accepting such +obligations, You may act only on Your own behalf and on Your sole +responsibility, not on behalf of any other Contributor, and only if You agree +to indemnify, defend, and hold each Contributor harmless for any liability +incurred by, or claims asserted against, such Contributor by reason of your +accepting any such warranty or additional liability. + +END OF TERMS AND CONDITIONS \ No newline at end of file diff --git a/vendor/pulp-platform/fpnew/README.md b/vendor/pulp-platform/fpnew/README.md new file mode 100644 index 0000000000..7bcb9ee0dc --- /dev/null +++ b/vendor/pulp-platform/fpnew/README.md @@ -0,0 +1,147 @@ +# FPnew - New Floating-Point Unit with Transprecision Capabilities + +Parametric floating-point unit with support for standard RISC-V formats and operations as well as transprecision formats, written in SystemVerilog. + +Maintainer: Stefan Mach + +## Features + +The FPU is a parametric design that allows generating FP hardware units for various use cases. +Even though mainly designed for use in RISC-V processors, the FPU or its sub-blocks can easily be utilized in other environments. +Our design aims to be compliant with IEEE 754-2008 and provides the following features: + +### Formats +Any IEEE 754-2008 style binary floating-point format can be supported, including single-, double-, quad- and half-precision (`binary32`, `binary64`, `binary128`, `binary16`). +Formats can be defined with arbitrary number of exponent and mantissa bits through parameters and are always symmetrically biased. +Multiple FP formats can be supported concurrently, and the number of formats supported is not limited. + +Multiple integer formats with arbitrary number of bits (as source or destionation of conversions) can also be defined. + +### Operations +- Addition/Subtraction +- Multiplication +- Fused multiply-add in four flavours (`fmadd`, `fmsub`, `fnmadd`, `fnmsub`) +- Division1 +- Square root1 +- Minimum/Maximum2 +- Comparisons +- Sign-Injections (`copy`, `abs`, `negate`, `copySign` etc.) +- Conversions among all supported FP formats +- Conversions between FP formats and integers (signed & unsigned) and vice versa +- Classification + +Multi-format FMA operations (i.e. multiplication in one format, accumulation in another) are optionally supported. + +Optionally, *packed-SIMD* versions of all the above operations can be generated for formats narrower than the FPU datapath width. +E.g.: Support for double-precision (64bit) operations and two simultaneous single-precision (32bit) operations. + +It is also possible to generate only a subset of operations if e.g. divisions are not needed. + +1Some compliance issues with IEEE 754-2008 are currently known to exist
+2Implementing IEEE 754-201x `minimumNumber` and `maximumNumber`, respectively + +### Rounding modes +All IEEE 754-2008 rounding modes are supported, namely +- `roundTiesToEven` +- `roundTiesToAway` +- `roundTowardPositive` +- `roundTowardNegative` +- `roundTowardZero` + +### Status Flags +All IEEE 754-2008 status flags are supported, namely +- Invalid operation (`NV`) +- Division by zero (`DZ`) +- Overflow (`OF`) +- Underflow (`UF`) +- Inexact (`NX`) + +## Getting Started + +### Dependencies + +FPnew currently depends on the following: +- `lzc` and `rr_arb_tree` from the `common_cells` repository (https://github.com/pulp-platform/common_cells.git) +- optional: Divider and square-root unit from the `fpu-div-sqrt-mvp` repository (https://github.com/pulp-platform/fpu_div_sqrt_mvp.git) + +These two repositories are included in the source code directory as git submodules, use +```bash +git submodule update --init --recursive +``` +if you want to load these dependencies there. + +Consider using [Bender](https://github.com/fabianschuiki/bender.git) for managing dependencies in your projects. FPnew comes with Bender support! + +### Usage + +The top-level module of the FPU is called `fpnew_top` and can be directly instantiated in your design. +Make sure you compile the package `fpnew_pkg` ahead of any files making references to types, parameters or functions defined there. + +It is discouraged to `import` all of `fpnew_pkg` into your source files. Instead, explicitly scope references into the package like so: `fpnew_pkg::foo`. + +#### Example Instantiation + +```SystemVerilog +// FPU instance +fpnew_top #( + .Features ( fpnew_pkg::RV64D ), + .Implementation ( fpnew_pkg::DEFAULT_NOREGS ), + .TagType ( logic ) +) i_fpnew_top ( + .clk_i, + .rst_ni, + .operands_i, + .rnd_mode_i, + .op_i, + .op_mod_i, + .src_fmt_i, + .dst_fmt_i, + .int_fmt_i, + .vectorial_op_i, + .tag_i, + .in_valid_i, + .in_ready_o, + .flush_i, + .result_o, + .status_o, + .tag_o, + .out_valid_o, + .out_ready_i, + .busy_o +); +``` + +### Documentation + +More in-depth documentation on the FPnew configuration, interfaces and architecture is provided in [`docs/README.md`](docs/README.md). + +### Issues and Contributing + +In case you find any issues with FPnew that have not been reported yet, don't hesitate to open a new [issue](https://github.com/pulp-platform/fpnew/issues) here on Github. +Please, don't use the issue tracker for support questions. +Instead, consider contacting the maintainers or consulting the [PULP forums](https://pulp-platform.org/community/index.php). + +In case you would like to contribute to the project, please refer to the contributing guidelines in [`docs/CONTRIBUTING.md`](docs/CONTRIBUTING.md) before opening a pull request. + + +### Repository Structure + +HDL source code can be found in the `src` directory while documentation is located in `docs`. +A changelog is kept at [`docs/CHANGELOG.md`](docs/CHANGELOG.md). + +This repository loosely follows the [GitFlow](https://nvie.com/posts/a-successful-git-branching-model/) branching model. +This means that the `master` branch is considered stable and used to publish releases of the FPU while the `develop` branch contains features and bugfixes that have not yet been properly released. + +Furthermore, this repository tries to adhere to [SemVer](https://semver.org/), as outlined in the [changelog](docs/CHANGELOG.md). + +## Licensing + +FPnew is released under the *SolderPad Hardware License*, which is a permissive license based on Apache 2.0. Please refer to the [license file](LICENSE) for further information. + +## Acknowledgement + +This project has received funding from the European Union's Horizon 2020 research and innovation programme under grant agreement No 732631. + +For further information, visit [oprecomp.eu](http://oprecomp.eu). + +![OPRECOMP](docs/fig/oprecomp_logo_inline1.png) diff --git a/vendor/pulp-platform/fpnew/src/common_cells/.gitignore b/vendor/pulp-platform/fpnew/src/common_cells/.gitignore new file mode 100644 index 0000000000..10a88888c1 --- /dev/null +++ b/vendor/pulp-platform/fpnew/src/common_cells/.gitignore @@ -0,0 +1,7 @@ +.* +!.git* +*.out +*~ +/Bender.lock +/Bender.local +build diff --git a/vendor/pulp-platform/fpnew/src/common_cells/CHANGELOG.md b/vendor/pulp-platform/fpnew/src/common_cells/CHANGELOG.md new file mode 100644 index 0000000000..70cb337bce --- /dev/null +++ b/vendor/pulp-platform/fpnew/src/common_cells/CHANGELOG.md @@ -0,0 +1,210 @@ +# Changelog +All notable changes to this project will be documented in this file. + +The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/) +and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.html). + +## Unreleased + +## 1.13.1 - 2019-06-01 + +### Changed + +- Fix path in `src_files.yml` for `stream_arbiter` and `stream_arbiter_flushable` + +## 1.13.0 - 2019-05-29 + +### Added + +- Added exponential backoff window module +- Added parametric Galois LFSR module with optional whitening feature +- Added `cf_math_pkg`: Constant Function implementations of mathematical functions for HDL elaboration + +### Changed +- Parametric payload data type for `rr_arb_tree` + +### Deprecated +- The following arbiter implementations are deprecated and superseded by `rr_arb_tree`: +- Priority arbiter `prioarbiter` +- Round-robin arbiter `rrarbiter` + +### Fixed + +## 1.12.0 - 2019-04-09 + +### Added +- Add priority arbiter +- Add Pseudo Least Recently Used tree +- Add round robin arbiter mux tree + +### Changed +- Add selectable arbiter implementation for `stream_arbiter` and `stream_arbiter_flushable`. One can choose between priority (`prio`) and round-robin arbitration (`rr`). +- Add `$onehot0` assertion in one-hot to bin +- Rework `rrarbiter` unit (uses `rr_arb_tree` implementation underneath) + +## 1.11.0 - 2019-03-20 + +### Added +- Add stream fork +- Add fall-through register +- Add stream filter +- Add ID queue + +### Changed +- `sync_wedge` use existing synchronizer. This defines a single place where a tech-specific synchronizer can be defined. + +### Fixed +- Fix FIFO push and pop signals in `stream_register` to observe interface prerequisites. +- In `fifo_v3`, fix data output when pushing into empty fall-through FIFO. Previously, the data + output of an empty fall-through FIFO with data at its input (and `push_i=1`) depended on + `pop_i`: When `pop_i=0`, old, invalid data were visible at the output (even though `empty_o=0`, + indicating that the data output is valid). Only when `pop_i=1`, the data from the input fell + through. One consequence of this bug was that `data_o` of the `fall_through_register` could change + while `valid_o=1`, violating the basic stream specification. + +## 1.10.0 - 2018-12-18 + +### Added +- Add `fifo_v3` with generic fill count +- Add 16 bit LFSR +- Add stream delayer +- Add stream arbiter +- Add register macros for RTL +- Add shift register + +### Changed +- Make number of registers of `rstgen_bypass` a parameter. + +### Fixed +- Fix `valid_i` and `grant_i` guarantees in `generic_fifo` for backward compatibility. +- LZC: Synthesis of streaming operators in ternary operators +- Add missing entry for `popcount` to `Bender.yml`. +- Add default values for parameters to improve compatibility with Synopsys DC and Vivado. + +## 1.9.0 - 2018-11-02 + +### Added +- Add popcount circuit `popcount` + +## 1.8.0 - 2018-10-15 + +### Added +- Add lock feature to the rrarbiter. This prevents the arbiter to change the decision when we have pending requests that remain unaknowledged for several cycles. +- Add deglitching circuit +- Add generic clock divider +- Add edge detecter as alias to sync_wedge (name is more expressive) +- Add generic counter +- Add moving deglitcher + +## 1.7.6 - 2018-09-27 + +### Added +- Add reset synchronizer with explicit reset bypass in testmode + +## 1.7.5 - 2018-09-06 +### Fixed +- Fix incompatibility with verilator +- Fix dependency to open-source repo + +## 1.7.4 - 2018-09-06 +- Fix assertions in `fifo_v2` (write on full / read on empty did not trigger properly) + +## 1.7.3 - 2018-08-27 +### Fixed +- Use proper `fifo_v2` in `generic_fifo` module. + +## 1.7.2 - 2018-08-27 +### Added +- Almost full/empty flags to FIFO, as `fifo_v2`. + +### Changed +- FIFO moved to `fifo_v1` and instantiates `fifo_v2`. + +## 1.7.1 - 2018-08-27 +### Fixed +- Revert breaking changes to `fifo`. + +## 1.7.0 - 2018-08-24 +### Added +- Add stream register (`stream_register`). +- Add stream multiplexer and demultiplexer (`stream_mux`, `stream_demux`). +- Add round robin arbiter (`rrarbiter`). +- Add leading zero counter (`lzc`). + +### Changed +- Deprecate `find_first_one` in favor of `lzc`. + +## 1.6.0 - 2018-04-03 +### Added +- Add binary to Gray code converter. +- Add Gray code to binary converter. +- Add Gray code testbench. +- Add CDC FIFO based on Gray counters. This is a faster alternative to the 2-phase FIFO which also works if a domain's clock has stopped. + +### Changed +- Rename `cdc_fifo` to `cdc_fifo_2phase`. +- Adjust CDC FIFO testbench to cover both implementations. + +## 1.5.4 - 2018-03-31 +### Changed +- Replace explicit clock gate in `fifo` with implicit one. + +## 1.5.3 - 2018-03-16 +### Changed +- Remove duplicate deprecated modules. + +## 1.5.2 - 2018-03-16 +### Changed +- Remove deprecated `rstgen` and fix interface. + +## 1.5.1 - 2018-03-16 +### Changed +- Remove deprecated `onehot_to_bin`. + +## 1.5.0 - 2018-03-14 +### Added +- Add behavioural SRAM model + +## 1.4.0 - 2018-03-14 +### Added +- Clock domain crossing FIFO + +### Changed +- Re-name new sync modules to resolve namespace collisions + +## 1.3.0 - 2018-03-12 +### Added +- 2-phase clock domain crossing +- Add old common cells as deprecated legacy modules + +## 1.2.3 - 2018-03-09 +### Added +- Backwards compatibility wrapper for `generic_LFSR_8bit` + +## 1.2.2 - 2018-03-09 +### Added +- Backwards compatibility wrapper for `generic_fifo` + +## 1.2.1 - 2018-03-09 +### Fixed +- Fix an issue in the spill register which causes transactions to be lost + +## 1.2.0 - 2018-03-09 +### Added +- Add spill register + +## 1.1.0 - 2018-03-06 +### Added +- Find first zero + +## 1.0.0 - 2018-03-02 +### Added +- Re-implementation of the generic FIFO supporting all kinds of use-cases +- Testbench for FIFO + +### Changed +- Re-formatting and artistic code clean-up + +## 0.1.0 - 2018-02-23 +### Added +- Fork of PULP common cells repository diff --git a/vendor/pulp-platform/fpnew/src/common_cells/LICENSE b/vendor/pulp-platform/fpnew/src/common_cells/LICENSE new file mode 100644 index 0000000000..18e4f67692 --- /dev/null +++ b/vendor/pulp-platform/fpnew/src/common_cells/LICENSE @@ -0,0 +1,176 @@ +SOLDERPAD HARDWARE LICENSE version 0.51 + +This license is based closely on the Apache License Version 2.0, but is not +approved or endorsed by the Apache Foundation. A copy of the non-modified +Apache License 2.0 can be found at http://www.apache.org/licenses/LICENSE-2.0. + +As this license is not currently OSI or FSF approved, the Licensor permits any +Work licensed under this License, at the option of the Licensee, to be treated +as licensed under the Apache License Version 2.0 (which is so approved). + +This License is licensed under the terms of this License and in particular +clause 7 below (Disclaimer of Warranties) applies in relation to its use. + +TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + +1. Definitions. + +"License" shall mean the terms and conditions for use, reproduction, and +distribution as defined by Sections 1 through 9 of this document. + +"Licensor" shall mean the Rights owner or entity authorized by the Rights owner +that is granting the License. + +"Legal Entity" shall mean the union of the acting entity and all other entities +that control, are controlled by, or are under common control with that entity. +For the purposes of this definition, "control" means (i) the power, direct or +indirect, to cause the direction or management of such entity, whether by +contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the +outstanding shares, or (iii) beneficial ownership of such entity. + +"You" (or "Your") shall mean an individual or Legal Entity exercising +permissions granted by this License. + +"Rights" means copyright and any similar right including design right (whether +registered or unregistered), semiconductor topography (mask) rights and +database rights (but excluding Patents and Trademarks). + +"Source" form shall mean the preferred form for making modifications, including +but not limited to source code, net lists, board layouts, CAD files, +documentation source, and configuration files. + +"Object" form shall mean any form resulting from mechanical transformation or +translation of a Source form, including but not limited to compiled object +code, generated documentation, the instantiation of a hardware design and +conversions to other media types, including intermediate forms such as +bytecodes, FPGA bitstreams, artwork and semiconductor topographies (mask +works). + +"Work" shall mean the work of authorship, whether in Source form or other +Object form, made available under the License, as indicated by a Rights notice +that is included in or attached to the work (an example is provided in the +Appendix below). + +"Derivative Works" shall mean any work, whether in Source or Object form, that +is based on (or derived from) the Work and for which the editorial revisions, +annotations, elaborations, or other modifications represent, as a whole, an +original work of authorship. For the purposes of this License, Derivative Works +shall not include works that remain separable from, or merely link (or bind by +name) or physically connect to or interoperate with the interfaces of, the Work +and Derivative Works thereof. + +"Contribution" shall mean any design or work of authorship, including the +original version of the Work and any modifications or additions to that Work or +Derivative Works thereof, that is intentionally submitted to Licensor for +inclusion in the Work by the Rights owner or by an individual or Legal Entity +authorized to submit on behalf of the Rights owner. For the purposes of this +definition, "submitted" means any form of electronic, verbal, or written +communication sent to the Licensor or its representatives, including but not +limited to communication on electronic mailing lists, source code control +systems, and issue tracking systems that are managed by, or on behalf of, the +Licensor for the purpose of discussing and improving the Work, but excluding +communication that is conspicuously marked or otherwise designated in writing +by the Rights owner as "Not a Contribution." + +"Contributor" shall mean Licensor and any individual or Legal Entity on behalf +of whom a Contribution has been received by Licensor and subsequently +incorporated within the Work. + +2. Grant of License. Subject to the terms and conditions of this License, each +Contributor hereby grants to You a perpetual, worldwide, non-exclusive, +no-charge, royalty-free, irrevocable license under the Rights to reproduce, +prepare Derivative Works of, publicly display, publicly perform, sublicense, +and distribute the Work and such Derivative Works in Source or Object form and +do anything in relation to the Work as if the Rights did not exist. + +3. Grant of Patent License. Subject to the terms and conditions of this +License, each Contributor hereby grants to You a perpetual, worldwide, +non-exclusive, no-charge, royalty-free, irrevocable (except as stated in this +section) patent license to make, have made, use, offer to sell, sell, import, +and otherwise transfer the Work, where such license applies only to those +patent claims licensable by such Contributor that are necessarily infringed by +their Contribution(s) alone or by combination of their Contribution(s) with the +Work to which such Contribution(s) was submitted. If You institute patent +litigation against any entity (including a cross-claim or counterclaim in a +lawsuit) alleging that the Work or a Contribution incorporated within the Work +constitutes direct or contributory patent infringement, then any patent +licenses granted to You under this License for that Work shall terminate as of +the date such litigation is filed. + +4. Redistribution. You may reproduce and distribute copies of the Work or +Derivative Works thereof in any medium, with or without modifications, and in +Source or Object form, provided that You meet the following conditions: + + You must give any other recipients of the Work or Derivative Works a copy + of this License; and + + You must cause any modified files to carry prominent notices stating that + You changed the files; and + + You must retain, in the Source form of any Derivative Works that You + distribute, all copyright, patent, trademark, and attribution notices from + the Source form of the Work, excluding those notices that do not pertain to + any part of the Derivative Works; and + + If the Work includes a "NOTICE" text file as part of its distribution, then + any Derivative Works that You distribute must include a readable copy of + the attribution notices contained within such NOTICE file, excluding those + notices that do not pertain to any part of the Derivative Works, in at + least one of the following places: within a NOTICE text file distributed as + part of the Derivative Works; within the Source form or documentation, if + provided along with the Derivative Works; or, within a display generated by + the Derivative Works, if and wherever such third-party notices normally + appear. The contents of the NOTICE file are for informational purposes only + and do not modify the License. You may add Your own attribution notices + within Derivative Works that You distribute, alongside or as an addendum to + the NOTICE text from the Work, provided that such additional attribution + notices cannot be construed as modifying the License. You may add Your own + copyright statement to Your modifications and may provide additional or + different license terms and conditions for use, reproduction, or + distribution of Your modifications, or for any such Derivative Works as a + whole, provided Your use, reproduction, and distribution of the Work + otherwise complies with the conditions stated in this License. + +5. Submission of Contributions. Unless You explicitly state otherwise, any +Contribution intentionally submitted for inclusion in the Work by You to the +Licensor shall be under the terms and conditions of this License, without any +additional terms or conditions. Notwithstanding the above, nothing herein shall +supersede or modify the terms of any separate license agreement you may have +executed with Licensor regarding such Contributions. + +6. Trademarks. This License does not grant permission to use the trade names, +trademarks, service marks, or product names of the Licensor, except as required +for reasonable and customary use in describing the origin of the Work and +reproducing the content of the NOTICE file. + +7. Disclaimer of Warranty. Unless required by applicable law or agreed to in +writing, Licensor provides the Work (and each Contributor provides its +Contributions) on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +KIND, either express or implied, including, without limitation, any warranties +or conditions of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A +PARTICULAR PURPOSE. You are solely responsible for determining the +appropriateness of using or redistributing the Work and assume any risks +associated with Your exercise of permissions under this License. + +8. Limitation of Liability. In no event and under no legal theory, whether in +tort (including negligence), contract, or otherwise, unless required by +applicable law (such as deliberate and grossly negligent acts) or agreed to in +writing, shall any Contributor be liable to You for damages, including any +direct, indirect, special, incidental, or consequential damages of any +character arising as a result of this License or out of the use or inability to +use the Work (including but not limited to damages for loss of goodwill, work +stoppage, computer failure or malfunction, or any and all other commercial +damages or losses), even if such Contributor has been advised of the +possibility of such damages. + +9. Accepting Warranty or Additional Liability. While redistributing the Work or +Derivative Works thereof, You may choose to offer, and charge a fee for, +acceptance of support, warranty, indemnity, or other liability obligations +and/or rights consistent with this License. However, in accepting such +obligations, You may act only on Your own behalf and on Your sole +responsibility, not on behalf of any other Contributor, and only if You agree +to indemnify, defend, and hold each Contributor harmless for any liability +incurred by, or claims asserted against, such Contributor by reason of your +accepting any such warranty or additional liability. + +END OF TERMS AND CONDITIONS diff --git a/vendor/pulp-platform/fpnew/src/common_cells/README.md b/vendor/pulp-platform/fpnew/src/common_cells/README.md new file mode 100644 index 0000000000..cf68ec1dcc --- /dev/null +++ b/vendor/pulp-platform/fpnew/src/common_cells/README.md @@ -0,0 +1,117 @@ +# Common Cells Repository + +Maintainer: Florian Zaruba + +This repository contains commonly used cells and headers for use in various projects. + +## Cell Contents + +This repository currently contains the following cells, ordered by categories. +Please note that cells with status *deprecated* are not to be used for new designs and only serve to provide compatibility with old code. + +### Clocks and Resets + +| Name | Description | Status | Superseded By | +|-------------------------|-----------------------------------------------------|--------------|---------------| +| `clk_div` | Clock divider with integer divisor | active | | +| `clock_divider` | Clock divider with configuration registers | *deprecated* | `clk_div` | +| `clock_divider_counter` | Clock divider using a counter | *deprecated* | `clk_div` | +| `rstgen` | Reset synchronizer | active | | +| `rstgen_bypass` | Reset synchronizer with dedicated test reset bypass | active | | + +### Clock Domains and Asynchronous Crossings + +| Name | Description | Status | Superseded By | +|----------------------|----------------------------------------------------------------------------------|--------------|---------------| +| `cdc_2phase` | Clock domain crossing using two-phase handshake, with ready/valid interface | active | | +| `cdc_fifo_2phase` | Clock domain crossing FIFO using two-phase handshake, with ready/valid interface | active | | +| `cdc_fifo_gray` | Clock domain crossing FIFO using a gray-counter, with ready/valid interface | active | | +| `edge_detect` | Rising/falling edge detector | active | | +| `edge_propagator` | **ANTONIO ADD DESCRIPTION** | active | | +| `edge_propagator_rx` | **ANTONIO ADD DESCRIPTION** | active | | +| `edge_propagator_tx` | **ANTONIO ADD DESCRIPTION** | active | | +| `pulp_sync` | Serial line synchronizer | *deprecated* | `sync` | +| `pulp_sync_wedge` | Serial line synchronizer with edge detector | *deprecated* | `sync_wedge` | +| `serial_deglitch` | Serial line deglitcher | active | | +| `sync` | Serial line synchronizer | active | | +| `sync_wedge` | Serial line synchronizer with edge detector | active | | + +### Counters and Shift Registers + +| Name | Description | Status | Superseded By | +|---------------------|-------------------------------------------------------------------|--------------|---------------| +| `counter` | Generic up/down counter with overflow detection | active | | +| `generic_LFSR_8bit` | 8-bit linear feedback shift register (LFSR) | *deprecated* | `lfsr_8bit` | +| `lfsr_8bit` | 8-bit linear feedback shift register (LFSR) | active | | +| `lfsr_16bit` | 16-bit linear feedback shift register (LFSR) | active | | +| `lfsr` | 4...64-bit parametric Galois LFSR with optional whitening feature | active | | +| `mv_filter` | **ZARUBAF ADD DESCRIPTION** | active | | + +### Data Path Elements + +| Name | Description | Status | Superseded By | +| :--------------------------- | :----------------------------------------------------------------------------- | :------------- | :------------ | +| `binary_to_gray` | Binary to gray code converter | active | | +| `find_first_one` | Leading-one finder / leading-zero counter | *deprecated* | `lzc` | +| `gray_to_binary` | Gray code to binary converter | active | | +| `lzc` | Leading/trailing-zero counter | active | | +| `onehot_to_bin` | One-hot to binary converter | active | | +| `shift_reg` | Shift register for arbitrary types | active | | +| `rr_arb_tree` | Round-robin arbiter for req/gnt and vld/rdy interfaces with optional priority | active | | +| `rrarbiter` | Round-robin arbiter for req/ack interface with look-ahead | *deprecated* | `rr_arb_tree` | +| `prioarbiter` | Priority arbiter arbiter for req/ack interface with look-ahead | *deprecated* | `rr_arb_tree` | +| `fall_through_register` | Fall-through register with ready/valid interface | active | | +| `spill_register` | Register with ready/valid interface to cut all combinational interface paths | active | | +| `stream_arbiter` | Round-robin arbiter for ready/valid stream interface | active | | +| `stream_arbiter_flushable` | Round-robin arbiter for ready/valid stream interface and flush functionality | active | | +| `stream_demux` | Ready/valid interface demultiplexer | active | | +| `stream_mux` | Ready/valid interface multiplexer | active | | +| `stream_register` | Register with ready/valid interface | active | | +| `stream_fork` | Ready/valid fork | active | | +| `stream_filter` | Ready/valid filter | active | | +| `stream_delay` | Randomize or delay ready/valid interface | active | | +| `popcount` | Combinatorial popcount (hamming weight) | active | | + +### Data Structures + +| Name | Description | Status | Superseded By | +| :------------------- | :---------------------------------------------- | :------------- | :------------ | +| `fifo` | FIFO register with upper threshold | *deprecated* | `fifo_v3` | +| `fifo_v2` | FIFO register with upper and lower threshold | *deprecated* | `fifo_v3` | +| `fifo_v3` | FIFO register with generic fill counts | active | | +| `generic_fifo` | FIFO register without thresholds | *deprecated* | `fifo_v3` | +| `generic_fifo_adv` | FIFO register without thresholds | *deprecated* | `fifo_v3` | +| `sram` | SRAM behavioral model | active | | +| `plru_tree` | Pseudo least recently used tree | active | | +| `unread` | Empty module to sink unconnected outputs into | active | | + + +## Header Contents + +This repository currently contains the following header files. + +### RTL Register Macros + +The header file `registers.svh` contains macros that expand to descriptions of registers. +To avoid misuse of `always_ff` blocks, only the following macros shall be used to describe sequential behavior. +The use of linter rules that flag explicit uses of `always_ff` in source code is encouraged. + +| Macro | Arguments | Description | +|-----------------------|-----------------------------------------------------------------|-------------------------------------------------------------------------| +| \`FF | `q_sig`, `d_sig`, `rst_val` | Flip-flop with asynchronous active-low reset (implicit) | +| \`FFAR | `q_sig`, `d_sig`, `rst_val`, `clk_sig`, `arst_sig` | Flip-flop with asynchronous active-high reset | +| \`FFARN | `q_sig`, `d_sig`, `rst_val`, `clk_sig`, `arstn_sig` | Flip-flop with asynchronous active-low reset | +| \`FFSR | `q_sig`, `d_sig`, `rst_val`, `clk_sig`, `rst_sig` | Flip-flop with synchronous active-high reset | +| \`FFSRN | `q_sig`, `d_sig`, `rst_val`, `clk_sig`, `rstn_sig` | Flip-flop with synchronous active-low reset | +| \`FFNR | `q_sig`, `d_sig`, `clk_sig` | Flip-flop without reset | +| | | | +| \`FFL | `q_sig`, `d_sig`, `load_ena`, `rst_val` | Flip-flop with load-enable and asynchronous active-low reset (implicit) | +| \`FFLAR | `q_sig`, `d_sig`, `load_ena`, `rst_val`, `clk_sig`, `arst_sig` | Flip-flop with load-enable and asynchronous active-high reset | +| \`FFLARN | `q_sig`, `d_sig`, `load_ena`, `rst_val`, `clk_sig`, `arstn_sig` | Flip-flop with load-enable and asynchronous active-low reset | +| \`FFLSR | `q_sig`, `d_sig`, `load_ena`, `rst_val`, `clk_sig`, `rst_sig` | Flip-flop with load-enable and synchronous active-high reset | +| \`FFLSRN | `q_sig`, `d_sig`, `load_ena`, `rst_val`, `clk_sig`, `rstn_sig` | Flip-flop with load-enable and synchronous active-low reset | +| \`FFLNR | `q_sig`, `d_sig`, `load_ena`, `clk_sig` | Flip-flop with load-enable without reset | +- *The name of the clock and reset signals for implicit variants is `clk_i` and `rst_ni`, respectively.* +- *Argument suffix `_sig` indicates signal names for present and next state as well as clocks and resets.* +- *Argument `rst_val` specifies the value literal to be assigned upon reset.* +- *Argument `load_ena` specifies the boolean expression that forms the load enable of the register.* diff --git a/vendor/pulp-platform/fpnew/src/common_cells/include/common_cells/registers.svh b/vendor/pulp-platform/fpnew/src/common_cells/include/common_cells/registers.svh new file mode 100644 index 0000000000..c1975edcb3 --- /dev/null +++ b/vendor/pulp-platform/fpnew/src/common_cells/include/common_cells/registers.svh @@ -0,0 +1,224 @@ +// Copyright 2018 ETH Zurich and University of Bologna. +// +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +// Common register defines for RTL designs +`ifndef COMMON_CELLS_REGISTERS_SVH_ +`define COMMON_CELLS_REGISTERS_SVH_ + +// Abridged Summary of available FF macros: +// `FF: asynchronous active-low reset (implicit clock and reset) +// `FFAR: asynchronous active-high reset +// `FFARN: asynchronous active-low reset +// `FFSR: synchronous active-high reset +// `FFSRN: synchronous active-low reset +// `FFNR: without reset +// `FFL: load-enable and asynchronous active-low reset (implicit clock and reset) +// `FFLAR: load-enable and asynchronous active-high reset +// `FFLARN: load-enable and asynchronous active-low reset +// `FFLARNC: load-enable and asynchronous active-low reset and synchronous active-high clear +// `FFLSR: load-enable and synchronous active-high reset +// `FFLSRN: load-enable and synchronous active-low reset +// `FFLNR: load-enable without reset + + +// Flip-Flop with asynchronous active-low reset (implicit clock and reset) +// __q: Q output of FF +// __d: D input of FF +// __reset_value: value assigned upon reset +// Implicit: +// clk_i: clock input +// rst_ni: reset input (asynchronous, active low) +`define FF(__q, __d, __reset_value) \ + always_ff @(posedge clk_i or negedge rst_ni) begin \ + if (!rst_ni) begin \ + __q <= (__reset_value); \ + end else begin \ + __q <= (__d); \ + end \ + end + +// Flip-Flop with asynchronous active-high reset +// __q: Q output of FF +// __d: D input of FF +// __reset_value: value assigned upon reset +// __clk: clock input +// __arst: asynchronous reset +`define FFAR(__q, __d, __reset_value, __clk, __arst) \ + always_ff @(posedge (__clk) or posedge (__arst)) begin \ + if (__arst) begin \ + __q <= (__reset_value); \ + end else begin \ + __q <= (__d); \ + end \ + end + +// Flip-Flop with asynchronous active-low reset +// __q: Q output of FF +// __d: D input of FF +// __reset_value: value assigned upon reset +// __clk: clock input +// __arst_n: asynchronous reset +`define FFARN(__q, __d, __reset_value, __clk, __arst_n) \ + always_ff @(posedge (__clk) or negedge (__arst_n)) begin \ + if (!__arst_n) begin \ + __q <= (__reset_value); \ + end else begin \ + __q <= (__d); \ + end \ + end + +// Flip-Flop with synchronous active-high reset +// __q: Q output of FF +// __d: D input of FF +// __reset_value: value assigned upon reset +// __clk: clock input +// __reset_clk: reset input +`define FFSR(__q, __d, __reset_value, __clk, __reset_clk) \ + `ifndef VERILATOR \ + /``* synopsys sync_set_reset `"__reset_clk`" *``/ \ + `endif \ + always_ff @(posedge (__clk)) begin \ + __q <= (__reset_clk) ? (__reset_value) : (__d); \ + end + +// Flip-Flop with synchronous active-low reset +// __q: Q output of FF +// __d: D input of FF +// __reset_value: value assigned upon reset +// __clk: clock input +// __reset_n_clk: reset input +`define FFSRN(__q, __d, __reset_value, __clk, __reset_n_clk) \ + `ifndef VERILATOR \ + /``* synopsys sync_set_reset `"__reset_n_clk`" *``/ \ + `endif \ + always_ff @(posedge (__clk)) begin \ + __q <= (!__reset_n_clk) ? (__reset_value) : (__d); \ + end + +// Always-enable Flip-Flop without reset +// __q: Q output of FF +// __d: D input of FF +// __clk: clock input +`define FFNR(__q, __d, __clk) \ + always_ff @(posedge (__clk)) begin \ + __q <= (__d); \ + end + +// Flip-Flop with load-enable and asynchronous active-low reset (implicit clock and reset) +// __q: Q output of FF +// __d: D input of FF +// __load: load d value into FF +// __reset_value: value assigned upon reset +// Implicit: +// clk_i: clock input +// rst_ni: reset input (asynchronous, active low) +`define FFL(__q, __d, __load, __reset_value) \ + always_ff @(posedge clk_i or negedge rst_ni) begin \ + if (!rst_ni) begin \ + __q <= (__reset_value); \ + end else begin \ + __q <= (__load) ? (__d) : (__q); \ + end \ + end + +// Flip-Flop with load-enable and asynchronous active-high reset +// __q: Q output of FF +// __d: D input of FF +// __load: load d value into FF +// __reset_value: value assigned upon reset +// __clk: clock input +// __arst: asynchronous reset +`define FFLAR(__q, __d, __load, __reset_value, __clk, __arst) \ + always_ff @(posedge (__clk) or posedge (__arst)) begin \ + if (__arst) begin \ + __q <= (__reset_value); \ + end else begin \ + __q <= (__load) ? (__d) : (__q); \ + end \ + end + +// Flip-Flop with load-enable and asynchronous active-low reset +// __q: Q output of FF +// __d: D input of FF +// __load: load d value into FF +// __reset_value: value assigned upon reset +// __clk: clock input +// __arst_n: asynchronous reset +`define FFLARN(__q, __d, __load, __reset_value, __clk, __arst_n) \ + always_ff @(posedge (__clk) or negedge (__arst_n)) begin \ + if (!__arst_n) begin \ + __q <= (__reset_value); \ + end else begin \ + __q <= (__load) ? (__d) : (__q); \ + end \ + end + +// Flip-Flop with load-enable and synchronous active-high reset +// __q: Q output of FF +// __d: D input of FF +// __load: load d value into FF +// __reset_value: value assigned upon reset +// __clk: clock input +// __reset_clk: reset input +`define FFLSR(__q, __d, __load, __reset_value, __clk, __reset_clk) \ + `ifndef VERILATOR \ + /``* synopsys sync_set_reset `"__reset_clk`" *``/ \ + `endif \ + always_ff @(posedge (__clk)) begin \ + __q <= (__reset_clk) ? (__reset_value) : ((__load) ? (__d) : (__q)); \ + end + +// Flip-Flop with load-enable and synchronous active-low reset +// __q: Q output of FF +// __d: D input of FF +// __load: load d value into FF +// __reset_value: value assigned upon reset +// __clk: clock input +// __reset_n_clk: reset input +`define FFLSRN(__q, __d, __load, __reset_value, __clk, __reset_n_clk) \ + `ifndef VERILATOR \ + /``* synopsys sync_set_reset `"__reset_n_clk`" *``/ \ + `endif \ + always_ff @(posedge (__clk)) begin \ + __q <= (!__reset_n_clk) ? (__reset_value) : ((__load) ? (__d) : (__q)); \ + end + +// Flip-Flop with load-enable and asynchronous active-low reset and synchronous clear +// __q: Q output of FF +// __d: D input of FF +// __load: load d value into FF +// __clear: assign reset value into FF +// __reset_value: value assigned upon reset +// __clk: clock input +// __arst_n: asynchronous reset +`define FFLARNC(__q, __d, __load, __clear, __reset_value, __clk, __arst_n) \ + `ifndef VERILATOR \ + /``* synopsys sync_set_reset `"__clear`" *``/ \ + `endif \ + always_ff @(posedge (__clk) or negedge (__arst_n)) begin \ + if (!__arst_n) begin \ + __q <= (__reset_value); \ + end else begin \ + __q <= (__clear) ? (__reset_value) : (__load) ? (__d) : (__q); \ + end \ + end + +// Load-enable Flip-Flop without reset +// __q: Q output of FF +// __d: D input of FF +// __load: load d value into FF +// __clk: clock input +`define FFLNR(__q, __d, __load, __clk) \ + always_ff @(posedge (__clk)) begin \ + __q <= (__load) ? (__d) : (__q); \ + end + +`endif diff --git a/vendor/pulp-platform/fpnew/src/common_cells/src/cdc_2phase.sv b/vendor/pulp-platform/fpnew/src/common_cells/src/cdc_2phase.sv new file mode 100644 index 0000000000..8e770abfa1 --- /dev/null +++ b/vendor/pulp-platform/fpnew/src/common_cells/src/cdc_2phase.sv @@ -0,0 +1,175 @@ +// Copyright 2018 ETH Zurich and University of Bologna. +// +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. +// +// Fabian Schuiki + +/// A two-phase clock domain crossing. +/// +/// CONSTRAINT: Requires max_delay of min_period(src_clk_i, dst_clk_i) through +/// the paths async_req, async_ack, async_data. +/* verilator lint_off DECLFILENAME */ +module cdc_2phase #( + parameter type T = logic +)( + input logic src_rst_ni, + input logic src_clk_i, + input T src_data_i, + input logic src_valid_i, + output logic src_ready_o, + + input logic dst_rst_ni, + input logic dst_clk_i, + output T dst_data_o, + output logic dst_valid_o, + input logic dst_ready_i +); + + // Asynchronous handshake signals. + (* dont_touch = "true" *) logic async_req; + (* dont_touch = "true" *) logic async_ack; + (* dont_touch = "true" *) T async_data; + + // The sender in the source domain. + cdc_2phase_src #(.T(T)) i_src ( + .rst_ni ( src_rst_ni ), + .clk_i ( src_clk_i ), + .data_i ( src_data_i ), + .valid_i ( src_valid_i ), + .ready_o ( src_ready_o ), + .async_req_o ( async_req ), + .async_ack_i ( async_ack ), + .async_data_o ( async_data ) + ); + + // The receiver in the destination domain. + cdc_2phase_dst #(.T(T)) i_dst ( + .rst_ni ( dst_rst_ni ), + .clk_i ( dst_clk_i ), + .data_o ( dst_data_o ), + .valid_o ( dst_valid_o ), + .ready_i ( dst_ready_i ), + .async_req_i ( async_req ), + .async_ack_o ( async_ack ), + .async_data_i ( async_data ) + ); + +endmodule + + +/// Half of the two-phase clock domain crossing located in the source domain. +module cdc_2phase_src #( + parameter type T = logic +)( + input logic rst_ni, + input logic clk_i, + input T data_i, + input logic valid_i, + output logic ready_o, + output logic async_req_o, + input logic async_ack_i, + output T async_data_o +); + + (* dont_touch = "true" *) + logic req_src_q, ack_src_q, ack_q; + (* dont_touch = "true" *) + T data_src_q; + + // The req_src and data_src registers change when a new data item is accepted. + always_ff @(posedge clk_i or negedge rst_ni) begin + if (!rst_ni) begin + req_src_q <= 0; + data_src_q <= '0; + end else if (valid_i && ready_o) begin + req_src_q <= ~req_src_q; + data_src_q <= data_i; + end + end + + // The ack_src and ack registers act as synchronization stages. + always_ff @(posedge clk_i or negedge rst_ni) begin + if (!rst_ni) begin + ack_src_q <= 0; + ack_q <= 0; + end else begin + ack_src_q <= async_ack_i; + ack_q <= ack_src_q; + end + end + + // Output assignments. + assign ready_o = (req_src_q == ack_q); + assign async_req_o = req_src_q; + assign async_data_o = data_src_q; + +endmodule + + +/// Half of the two-phase clock domain crossing located in the destination +/// domain. +module cdc_2phase_dst #( + parameter type T = logic +)( + input logic rst_ni, + input logic clk_i, + output T data_o, + output logic valid_o, + input logic ready_i, + input logic async_req_i, + output logic async_ack_o, + input T async_data_i +); + + (* dont_touch = "true" *) + (* async_reg = "true" *) + logic req_dst_q, req_q0, req_q1, ack_dst_q; + (* dont_touch = "true" *) + T data_dst_q; + + // The ack_dst register changes when a new data item is accepted. + always_ff @(posedge clk_i or negedge rst_ni) begin + if (!rst_ni) begin + ack_dst_q <= 0; + end else if (valid_o && ready_i) begin + ack_dst_q <= ~ack_dst_q; + end + end + + // The data_dst register changes when a new data item is presented. This is + // indicated by the async_req line changing levels. + always_ff @(posedge clk_i or negedge rst_ni) begin + if (!rst_ni) begin + data_dst_q <= '0; + end else if (req_q0 != req_q1 && !valid_o) begin + data_dst_q <= async_data_i; + end + end + + // The req_dst and req registers act as synchronization stages. + always_ff @(posedge clk_i or negedge rst_ni) begin + if (!rst_ni) begin + req_dst_q <= 0; + req_q0 <= 0; + req_q1 <= 0; + end else begin + req_dst_q <= async_req_i; + req_q0 <= req_dst_q; + req_q1 <= req_q0; + end + end + + // Output assignments. + assign valid_o = (ack_dst_q != req_q1); + assign data_o = data_dst_q; + assign async_ack_o = ack_dst_q; + +endmodule +/* verilator lint_on DECLFILENAME */ diff --git a/vendor/pulp-platform/fpnew/src/common_cells/src/cdc_fifo_2phase.sv b/vendor/pulp-platform/fpnew/src/common_cells/src/cdc_fifo_2phase.sv new file mode 100644 index 0000000000..58939ccaf6 --- /dev/null +++ b/vendor/pulp-platform/fpnew/src/common_cells/src/cdc_fifo_2phase.sv @@ -0,0 +1,134 @@ +// Copyright 2018 ETH Zurich and University of Bologna. +// +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. +// +// Fabian Schuiki + +/// A clock domain crossing FIFO, using 2-phase hand shakes. +/// +/// This FIFO has its push and pop ports in two separate clock domains. Its size +/// can only be powers of two, which is why its depth is given as 2**LOG_DEPTH. +/// LOG_DEPTH must be at least 1. +/// +/// CONSTRAINT: See the constraints for `cdc_2phase`. An additional maximum +/// delay path needs to be specified from fifo_data_q to dst_data_o. +module cdc_fifo_2phase #( + /// The data type of the payload transported by the FIFO. + parameter type T = logic, + /// The FIFO's depth given as 2**LOG_DEPTH. + parameter int LOG_DEPTH = 3 +)( + input logic src_rst_ni, + input logic src_clk_i, + input T src_data_i, + input logic src_valid_i, + output logic src_ready_o, + + input logic dst_rst_ni, + input logic dst_clk_i, + output T dst_data_o, + output logic dst_valid_o, + input logic dst_ready_i +); + + // Check the invariants. + //pragma translate_off + initial begin + assert(LOG_DEPTH > 0); + end + //pragma translate_on + + localparam int PTR_WIDTH = LOG_DEPTH+1; + typedef logic [PTR_WIDTH-1:0] pointer_t; + typedef logic [LOG_DEPTH-1:0] index_t; + + localparam pointer_t PTR_FULL = (1 << LOG_DEPTH); + localparam pointer_t PTR_EMPTY = '0; + + // Allocate the registers for the FIFO memory with its separate write and read + // ports. The FIFO has the following ports: + // + // - write: fifo_widx, fifo_wdata, fifo_write, src_clk_i + // - read: fifo_ridx, fifo_rdata + index_t fifo_widx, fifo_ridx; + logic fifo_write; + T fifo_wdata, fifo_rdata; + T fifo_data_q [2**LOG_DEPTH]; + + assign fifo_rdata = fifo_data_q[fifo_ridx]; + + for (genvar i = 0; i < 2**LOG_DEPTH; i++) begin : g_word + always_ff @(posedge src_clk_i, negedge src_rst_ni) begin + if (!src_rst_ni) + fifo_data_q[i] <= '0; + else if (fifo_write && fifo_widx == i) + fifo_data_q[i] <= fifo_wdata; + end + end + + // Allocate the read and write pointers in the source and destination domain. + pointer_t src_wptr_q, dst_wptr, src_rptr, dst_rptr_q; + + always_ff @(posedge src_clk_i, negedge src_rst_ni) begin + if (!src_rst_ni) + src_wptr_q <= 0; + else if (src_valid_i && src_ready_o) + src_wptr_q <= src_wptr_q + 1; + end + + always_ff @(posedge dst_clk_i, negedge dst_rst_ni) begin + if (!dst_rst_ni) + dst_rptr_q <= 0; + else if (dst_valid_o && dst_ready_i) + dst_rptr_q <= dst_rptr_q + 1; + end + + // The pointers into the FIFO are one bit wider than the actual address into + // the FIFO. This makes detecting critical states very simple: if all but the + // topmost bit of rptr and wptr agree, the FIFO is in a critical state. If the + // topmost bit is equal, the FIFO is empty, otherwise it is full. + assign src_ready_o = ((src_wptr_q ^ src_rptr) != PTR_FULL); + assign dst_valid_o = ((dst_rptr_q ^ dst_wptr) != PTR_EMPTY); + + // Transport the read and write pointers across the clock domain boundary. + cdc_2phase #(pointer_t) i_cdc_wptr ( + .src_rst_ni ( src_rst_ni ), + .src_clk_i ( src_clk_i ), + .src_data_i ( src_wptr_q ), + .src_valid_i ( 1'b1 ), + .src_ready_o ( ), + .dst_rst_ni ( dst_rst_ni ), + .dst_clk_i ( dst_clk_i ), + .dst_data_o ( dst_wptr ), + .dst_valid_o ( ), + .dst_ready_i ( 1'b1 ) + ); + + cdc_2phase #(pointer_t) i_cdc_rptr ( + .src_rst_ni ( dst_rst_ni ), + .src_clk_i ( dst_clk_i ), + .src_data_i ( dst_rptr_q ), + .src_valid_i ( 1'b1 ), + .src_ready_o ( ), + .dst_rst_ni ( src_rst_ni ), + .dst_clk_i ( src_clk_i ), + .dst_data_o ( src_rptr ), + .dst_valid_o ( ), + .dst_ready_i ( 1'b1 ) + ); + + // Drive the FIFO write and read ports. + assign fifo_widx = src_wptr_q; + assign fifo_wdata = src_data_i; + assign fifo_write = src_valid_i && src_ready_o; + assign fifo_ridx = dst_rptr_q; + assign dst_data_o = fifo_rdata; + +endmodule diff --git a/vendor/pulp-platform/fpnew/src/common_cells/src/cdc_fifo_gray.sv b/vendor/pulp-platform/fpnew/src/common_cells/src/cdc_fifo_gray.sv new file mode 100644 index 0000000000..8b50e2b96c --- /dev/null +++ b/vendor/pulp-platform/fpnew/src/common_cells/src/cdc_fifo_gray.sv @@ -0,0 +1,158 @@ +// Copyright 2018 ETH Zurich and University of Bologna. +// +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. +// +// Fabian Schuiki + +/// A clock domain crossing FIFO, using gray counters. +/// +/// This FIFO has its push and pop ports in two separate clock domains. Its size +/// can only be powers of two, which is why its depth is given as 2**LOG_DEPTH. +/// LOG_DEPTH must be at least 1. +/// +/// # Constraints +/// +/// The following constraints need to be set: +/// - max_delay -from src_wptr_gray_q -to dst_wptr_gray_q +/// - max_delay -from dst_rptr_gray_q -to src_rptr_gray_q +/// - max_delay -from fifo_data_q -to fifo_rdata +module cdc_fifo_gray #( + /// The data type of the payload transported by the FIFO. + parameter type T = logic, + /// The FIFO's depth given as 2**LOG_DEPTH. + parameter int LOG_DEPTH = 3 +)( + input logic src_rst_ni, + input logic src_clk_i, + input T src_data_i, + input logic src_valid_i, + output logic src_ready_o, + + input logic dst_rst_ni, + input logic dst_clk_i, + output T dst_data_o, + output logic dst_valid_o, + input logic dst_ready_i +); + + // Check the invariants. + //pragma translate_off + initial begin + assert(LOG_DEPTH > 0); + end + //pragma translate_on + + localparam int PTR_WIDTH = LOG_DEPTH+1; + typedef logic [PTR_WIDTH-1:0] pointer_t; + typedef logic [LOG_DEPTH-1:0] index_t; + + localparam pointer_t PTR_FULL = (1 << LOG_DEPTH); + localparam pointer_t PTR_EMPTY = '0; + + // Allocate the registers for the FIFO memory with its separate write and read + // ports. The FIFO has the following ports: + // + // - write: fifo_widx, fifo_wdata, fifo_write, src_clk_i + // - read: fifo_ridx, fifo_rdata + index_t fifo_widx, fifo_ridx; + logic fifo_write; + T fifo_wdata, fifo_rdata; + T fifo_data_q [2**LOG_DEPTH]; + + assign fifo_rdata = fifo_data_q[fifo_ridx]; + + for (genvar i = 0; i < 2**LOG_DEPTH; i++) begin : g_word + always_ff @(posedge src_clk_i, negedge src_rst_ni) begin + if (!src_rst_ni) + fifo_data_q[i] <= '0; + else if (fifo_write && fifo_widx == i) + fifo_data_q[i] <= fifo_wdata; + end + end + + // Create the write and read pointers in the source and destination domain. + // These are binary counters combined with a Gray encoder. Both the binary and + // the Gray coded output are registered; the binary one for use in the local + // domain, the Gray one for synchronization into the other domain. + pointer_t src_wptr_bin_q, src_wptr_gray_q, dst_rptr_bin_q, dst_rptr_gray_q; + pointer_t src_wptr_bin_d, src_wptr_gray_d, dst_rptr_bin_d, dst_rptr_gray_d; + + assign src_wptr_bin_d = src_wptr_bin_q + 1; + assign dst_rptr_bin_d = dst_rptr_bin_q + 1; + + binary_to_gray #(PTR_WIDTH) i_src_b2g (src_wptr_bin_d, src_wptr_gray_d); + binary_to_gray #(PTR_WIDTH) i_dst_b2g (dst_rptr_bin_d, dst_rptr_gray_d); + + always_ff @(posedge src_clk_i, negedge src_rst_ni) begin + if (!src_rst_ni) begin + src_wptr_bin_q <= '0; + src_wptr_gray_q <= '0; + end else if (src_valid_i && src_ready_o) begin + src_wptr_bin_q <= src_wptr_bin_d; + src_wptr_gray_q <= src_wptr_gray_d; + end + end + + always_ff @(posedge dst_clk_i, negedge dst_rst_ni) begin + if (!dst_rst_ni) begin + dst_rptr_bin_q <= '0; + dst_rptr_gray_q <= '0; + end else if (dst_valid_o && dst_ready_i) begin + dst_rptr_bin_q <= dst_rptr_bin_d; + dst_rptr_gray_q <= dst_rptr_gray_d; + end + end + + // Move the Gray-coded pointers over into the other clock domain and + // synchronize them to reduce the probability of metastability. + pointer_t src_rptr_gray_q, src_rptr_gray_q2; + pointer_t dst_wptr_gray_q, dst_wptr_gray_q2; + + always_ff @(posedge src_clk_i, negedge src_rst_ni) begin + if (!src_rst_ni) begin + src_rptr_gray_q <= '0; + src_rptr_gray_q2 <= '0; + end else begin + src_rptr_gray_q <= dst_rptr_gray_q; + src_rptr_gray_q2 <= src_rptr_gray_q; + end + end + + always_ff @(posedge dst_clk_i, negedge dst_rst_ni) begin + if (!dst_rst_ni) begin + dst_wptr_gray_q <= '0; + dst_wptr_gray_q2 <= '0; + end else begin + dst_wptr_gray_q <= src_wptr_gray_q; + dst_wptr_gray_q2 <= dst_wptr_gray_q; + end + end + + // Reverse the Gray coding of the synchronized pointers. + pointer_t src_rptr_bin, dst_wptr_bin; + + gray_to_binary #(PTR_WIDTH) i_src_g2b (src_rptr_gray_q2, src_rptr_bin); + gray_to_binary #(PTR_WIDTH) i_dst_g2b (dst_wptr_gray_q2, dst_wptr_bin); + + // The pointers into the FIFO are one bit wider than the actual address into + // the FIFO. This makes detecting critical states very simple: if all but the + // topmost bit of rptr and wptr agree, the FIFO is in a critical state. If the + // topmost bit is equal, the FIFO is empty, otherwise it is full. + assign src_ready_o = ((src_wptr_bin_q ^ src_rptr_bin) != PTR_FULL); + assign dst_valid_o = ((dst_rptr_bin_q ^ dst_wptr_bin) != PTR_EMPTY); + + // Drive the FIFO write and read ports. + assign fifo_widx = src_wptr_bin_q; + assign fifo_wdata = src_data_i; + assign fifo_write = src_valid_i && src_ready_o; + assign fifo_ridx = dst_rptr_bin_q; + assign dst_data_o = fifo_rdata; + +endmodule diff --git a/vendor/pulp-platform/fpnew/src/common_cells/src/cf_math_pkg.sv b/vendor/pulp-platform/fpnew/src/common_cells/src/cf_math_pkg.sv new file mode 100644 index 0000000000..93e92b6d81 --- /dev/null +++ b/vendor/pulp-platform/fpnew/src/common_cells/src/cf_math_pkg.sv @@ -0,0 +1,49 @@ +// Copyright 2016 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +// cf_math_pkg: Constant Function Implementations of Mathematical Functions for HDL Elaboration +// +// This package contains a collection of mathematical functions that are commonly used when defining +// the value of constants in HDL code. These functions are implemented as Verilog constants +// functions. Introduced in Verilog 2001 (IEEE Std 1364-2001), a constant function (§ 10.3.5) is a +// function whose value can be evaluated at compile time or during elaboration. A constant function +// must be called with arguments that are constants. + +package automatic cf_math_pkg; + + // Ceiled Division of Two Natural Numbers + // + // Returns the quotient of two natural numbers, rounded towards plus infinity. + function integer ceil_div (input longint dividend, input longint divisor); + automatic longint remainder; + + // pragma translate_off + `ifndef VERILATOR + if (dividend < 0) begin + $fatal(1, "Dividend %0d is not a natural number!", dividend); + end + + if (divisor < 0) begin + $fatal(1, "Divisor %0d is not a natural number!", divisor); + end + + if (divisor == 0) begin + $fatal(1, "Division by zero!"); + end + `endif + // pragma translate_on + + remainder = dividend; + for (ceil_div = 0; remainder > 0; ceil_div++) begin + remainder = remainder - divisor; + end + endfunction + +endpackage diff --git a/vendor/pulp-platform/fpnew/src/common_cells/src/clk_div.sv b/vendor/pulp-platform/fpnew/src/common_cells/src/clk_div.sv new file mode 100644 index 0000000000..70ed084990 --- /dev/null +++ b/vendor/pulp-platform/fpnew/src/common_cells/src/clk_div.sv @@ -0,0 +1,42 @@ +// Copyright 2018 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +// Author: Florian Zaruba +// Description: Divides the clock by an integer factor +module clk_div #( + parameter int unsigned RATIO = 4 +)( + input logic clk_i, // Clock + input logic rst_ni, // Asynchronous reset active low + input logic testmode_i, // testmode + input logic en_i, // enable clock divider + output logic clk_o // divided clock out +); + logic [RATIO-1:0] counter_q; + logic clk_q; + + always_ff @(posedge clk_i or negedge rst_ni) begin + if (~rst_ni) begin + clk_q <= 1'b0; + counter_q <= '0; + end else begin + clk_q <= 1'b0; + if (en_i) begin + if (counter_q == (RATIO[RATIO-1:0] - 1)) begin + clk_q <= 1'b1; + end else begin + counter_q <= counter_q + 1; + end + end + end + end + // output assignment - bypass in testmode + assign clk_o = testmode_i ? clk_i : clk_q; +endmodule \ No newline at end of file diff --git a/vendor/pulp-platform/fpnew/src/common_cells/src/counter.sv b/vendor/pulp-platform/fpnew/src/common_cells/src/counter.sv new file mode 100644 index 0000000000..ad5ee91b1b --- /dev/null +++ b/vendor/pulp-platform/fpnew/src/common_cells/src/counter.sv @@ -0,0 +1,55 @@ +// Copyright 2018 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +// Author: Florian Zaruba +// Description: Generic up/down counter + +module counter #( + parameter int unsigned WIDTH = 4 +)( + input logic clk_i, + input logic rst_ni, + input logic clear_i, // synchronous clear + input logic en_i, // enable the counter + input logic load_i, // load a new value + input logic down_i, // downcount, default is up + input logic [WIDTH-1:0] d_i, + output logic [WIDTH-1:0] q_o, + output logic overflow_o +); + logic [WIDTH:0] counter_q, counter_d; + // counter overflowed if the MSB is set + assign overflow_o = counter_q[WIDTH]; + assign q_o = counter_q[WIDTH-1:0]; + + always_comb begin + counter_d = counter_q; + + if (clear_i) begin + counter_d = '0; + end else if (load_i) begin + counter_d = {1'b0, d_i}; + end else if (en_i) begin + if (down_i) begin + counter_d = counter_q - 1; + end else begin + counter_d = counter_q + 1; + end + end + end + + always_ff @(posedge clk_i or negedge rst_ni) begin + if (~rst_ni) begin + counter_q <= '0; + end else begin + counter_q <= counter_d; + end + end +endmodule diff --git a/vendor/pulp-platform/fpnew/src/common_cells/src/deprecated/clock_divider.sv b/vendor/pulp-platform/fpnew/src/common_cells/src/deprecated/clock_divider.sv new file mode 100644 index 0000000000..343b0a2386 --- /dev/null +++ b/vendor/pulp-platform/fpnew/src/common_cells/src/deprecated/clock_divider.sv @@ -0,0 +1,191 @@ +// Copyright 2018 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +//////////////////////////////////////////////////////////////////////////////// +// // +// Company: Multitherman Laboratory @ DEIS - University of Bologna // +// Viale Risorgimento 2 40136 // +// Bologna - fax 0512093785 - // +// // +// Engineer: Antonio Pullini - pullinia@iis.ee.ethz.ch // +// // +// Additional contributions by: // +// // +// // +// // +// Create Date: 13/02/2013 // +// Design Name: ULPSoC // +// Module Name: clock_divider // +// Project Name: ULPSoC // +// Language: SystemVerilog // +// // +// Description: Clock Divider // +// // +// // +// Revision: // +// Revision v0.1 - File Created // +// Revision v0.2 - (19/03/2015) clock_gating swapped in pulp_clock_gating // +// // +// // +// // +// // +// // +// // +//////////////////////////////////////////////////////////////////////////////// + +module clock_divider +#( + parameter DIV_INIT = 0, + parameter BYPASS_INIT = 1 +) +( + input logic clk_i, + input logic rstn_i, + input logic test_mode_i, + input logic clk_gate_async_i, + input logic [7:0] clk_div_data_i, + input logic clk_div_valid_i, + output logic clk_div_ack_o, + output logic clk_o +); + + enum logic [1:0] {IDLE, STOP, WAIT, RELEASE} state, state_next; + + logic s_clk_out; + logic s_clock_enable; + logic s_clock_enable_gate; + logic s_clk_div_valid; + + logic [7:0] reg_clk_div; + logic s_clk_div_valid_sync; + + logic s_rstn_sync; + + logic [1:0] reg_ext_gate_sync; + + assign s_clock_enable_gate = s_clock_enable & reg_ext_gate_sync; + +`ifndef PULP_FPGA_EMUL + rstgen i_rst_gen + ( + // PAD FRAME SIGNALS + .clk_i(clk_i), + .rst_ni(rstn_i), //async signal coming from pads + + // TEST MODE + .test_mode_i(test_mode_i), + + // OUTPUT RESET + .rst_no(s_rstn_sync), + .init_no() //not used + ); + `else + assign s_rstn_sync = rstn_i; +`endif + + + //handle the handshake with the soc_ctrl. Interface is now async + pulp_sync_wedge i_edge_prop + ( + .clk_i(clk_i), + .rstn_i(s_rstn_sync), + .en_i(1'b1), + .serial_i(clk_div_valid_i), + .serial_o(clk_div_ack_o), + .r_edge_o(s_clk_div_valid_sync), + .f_edge_o() + ); + + clock_divider_counter + #( + .BYPASS_INIT(BYPASS_INIT), + .DIV_INIT(DIV_INIT) + ) + i_clkdiv_cnt + ( + .clk(clk_i), + .rstn(s_rstn_sync), + .test_mode(test_mode_i), + .clk_div(reg_clk_div), + .clk_div_valid(s_clk_div_valid), + .clk_out(s_clk_out) + ); + + pulp_clock_gating i_clk_gate + ( + .clk_i(s_clk_out), + .en_i(s_clock_enable_gate), + .test_en_i(test_mode_i), + .clk_o(clk_o) + ); + + always_comb + begin + case(state) + IDLE: + begin + s_clock_enable = 1'b1; + s_clk_div_valid = 1'b0; + if (s_clk_div_valid_sync) + state_next = STOP; + else + state_next = IDLE; + end + + STOP: + begin + s_clock_enable = 1'b0; + s_clk_div_valid = 1'b1; + state_next = WAIT; + end + + WAIT: + begin + s_clock_enable = 1'b0; + s_clk_div_valid = 1'b0; + state_next = RELEASE; + end + + RELEASE: + begin + s_clock_enable = 1'b0; + s_clk_div_valid = 1'b0; + state_next = IDLE; + end + endcase + end + + always_ff @(posedge clk_i or negedge s_rstn_sync) + begin + if (!s_rstn_sync) + state <= IDLE; + else + state <= state_next; + end + + //sample the data when valid has been sync and there is a rise edge + always_ff @(posedge clk_i or negedge s_rstn_sync) + begin + if (!s_rstn_sync) + reg_clk_div <= '0; + else if (s_clk_div_valid_sync) + reg_clk_div <= clk_div_data_i; + end + + //sample the data when valid has been sync and there is a rise edge + always_ff @(posedge clk_i or negedge s_rstn_sync) + begin + if (!s_rstn_sync) + reg_ext_gate_sync <= 2'b00; + else + reg_ext_gate_sync <= {clk_gate_async_i, reg_ext_gate_sync[1]}; + end + +endmodule diff --git a/vendor/pulp-platform/fpnew/src/common_cells/src/deprecated/clock_divider_counter.sv b/vendor/pulp-platform/fpnew/src/common_cells/src/deprecated/clock_divider_counter.sv new file mode 100644 index 0000000000..e5c222af95 --- /dev/null +++ b/vendor/pulp-platform/fpnew/src/common_cells/src/deprecated/clock_divider_counter.sv @@ -0,0 +1,211 @@ +// Copyright 2018 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +//////////////////////////////////////////////////////////////////////////////// +// Company: Multitherman Laboratory @ DEIS - University of Bologna // +// Viale Risorgimento 2 40136 // +// Bologna - fax 0512093785 - // +// // +// Engineer: Antonio Pullini - pullinia@iis.ee.ethz.ch // +// // +// Additional contributions by: // +// // +// // +// // +// Create Date: 13/02/2013 // +// Design Name: ULPSoC // +// Module Name: clock_divider_counter // +// Project Name: ULPSoC // +// Language: SystemVerilog // +// // +// Description: clock_divider_counter // +// // +// // +// Revision: // +// Revision v0.1 - File Created // +// Revision v0.2 - (19/03/2015) clock_gating swapped in pulp_clock_gating // +// // +// // +// // +// // +// // +// // +//////////////////////////////////////////////////////////////////////////////// + + +module clock_divider_counter +#( + parameter BYPASS_INIT = 1, + parameter DIV_INIT = 'hFF +) +( + input logic clk, + input logic rstn, + input logic test_mode, + input logic [7:0] clk_div, + input logic clk_div_valid, + output logic clk_out +); + + logic [7:0] counter; + logic [7:0] counter_next; + logic [7:0] clk_cnt; + logic en1; + logic en2; + + logic is_odd; + + logic div1; + logic div2; + logic div2_neg_sync; + + logic [7:0] clk_cnt_odd; + logic [7:0] clk_cnt_odd_incr; + logic [7:0] clk_cnt_even; + logic [7:0] clk_cnt_en2; + + logic bypass; + + logic clk_out_gen; + logic clk_div_valid_reg; + + logic clk_inv_test; + logic clk_inv; + + // assign clk_cnt_odd_incr = clk_div + 1; + // assign clk_cnt_odd = {1'b0,clk_cnt_odd_incr[7:1]}; //if odd divider than clk_cnt = (clk_div+1)/2 + assign clk_cnt_odd = clk_div - 8'h1; //if odd divider than clk_cnt = clk_div - 1 + assign clk_cnt_even = (clk_div == 8'h2) ? 8'h0 : ({1'b0,clk_div[7:1]} - 8'h1); //if even divider than clk_cnt = clk_div/2 + assign clk_cnt_en2 = {1'b0,clk_cnt[7:1]} + 8'h1; + + always_comb + begin + if (counter == 'h0) + en1 = 1'b1; + else + en1 = 1'b0; + + if (clk_div_valid) + counter_next = 'h0; + else if (counter == clk_cnt) + counter_next = 'h0; + else + counter_next = counter + 1; + + if (clk_div_valid) + en2 = 1'b0; + else if (counter == clk_cnt_en2) + en2 = 1'b1; + else + en2 = 1'b0; + end + + always_ff @(posedge clk, negedge rstn) + begin + if (~rstn) + begin + counter <= 'h0; + div1 <= 1'b0; + bypass <= BYPASS_INIT; + clk_cnt <= DIV_INIT; + is_odd <= 1'b0; + clk_div_valid_reg <= 1'b0; + end + else + begin + if (!bypass) + counter <= counter_next; + + clk_div_valid_reg <= clk_div_valid; + if (clk_div_valid) + begin + if ((clk_div == 8'h0) || (clk_div == 8'h1)) + begin + bypass <= 1'b1; + clk_cnt <= 'h0; + is_odd <= 1'b0; + end + else + begin + bypass <= 1'b0; + if (clk_div[0]) + begin + is_odd <= 1'b1; + clk_cnt <= clk_cnt_odd; + end + else + begin + is_odd <= 1'b0; + clk_cnt <= clk_cnt_even; + end + end + div1 <= 1'b0; + end + else + begin + if (en1 && !bypass) + div1 <= ~div1; + end + end + end + + pulp_clock_inverter clk_inv_i + ( + .clk_i(clk), + .clk_o(clk_inv) + ); + +`ifndef PULP_FPGA_EMUL + `ifdef PULP_DFT + pulp_clock_mux2 clk_muxinv_i + ( + .clk0_i(clk_inv), + .clk1_i(clk), + .clk_sel_i(test_mode), + .clk_o(clk_inv_test) + ); + `else + assign clk_inv_test = clk_inv; + `endif +`else + assign clk_inv_test = clk_inv; +`endif + + always_ff @(posedge clk_inv_test or negedge rstn) + begin + if (!rstn) + begin + div2 <= 1'b0; + end + else + begin + if (clk_div_valid_reg) + div2 <= 1'b0; + else if (en2 && is_odd && !bypass) + div2 <= ~div2; + end + end // always_ff @ (posedge clk_inv_test or negedge rstn) + + pulp_clock_xor2 clock_xor_i + ( + .clk_o(clk_out_gen), + .clk0_i(div1), + .clk1_i(div2) + ); + + pulp_clock_mux2 clk_mux_i + ( + .clk0_i(clk_out_gen), + .clk1_i(clk), + .clk_sel_i(bypass || test_mode), + .clk_o(clk_out) + ); + +endmodule diff --git a/vendor/pulp-platform/fpnew/src/common_cells/src/deprecated/fifo_v1.sv b/vendor/pulp-platform/fpnew/src/common_cells/src/deprecated/fifo_v1.sv new file mode 100644 index 0000000000..31295e80ec --- /dev/null +++ b/vendor/pulp-platform/fpnew/src/common_cells/src/deprecated/fifo_v1.sv @@ -0,0 +1,57 @@ +// Copyright 2018 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +// Author: Florian Zaruba + +/* verilator lint_off DECLFILENAME */ +module fifo #( + parameter bit FALL_THROUGH = 1'b0, // fifo is in fall-through mode + parameter int unsigned DATA_WIDTH = 32, // default data width if the fifo is of type logic + parameter int unsigned DEPTH = 8, // depth can be arbitrary from 0 to 2**32 + parameter int unsigned THRESHOLD = 1, // fill count until when to assert threshold_o + parameter type dtype = logic [DATA_WIDTH-1:0] +)( + input logic clk_i, // Clock + input logic rst_ni, // Asynchronous reset active low + input logic flush_i, // flush the queue + input logic testmode_i, // test_mode to bypass clock gating + // status flags + output logic full_o, // queue is full + output logic empty_o, // queue is empty + output logic threshold_o, // the FIFO is above the specified threshold + // as long as the queue is not full we can push new data + input dtype data_i, // data to push into the queue + input logic push_i, // data is valid and can be pushed to the queue + // as long as the queue is not empty we can pop new elements + output dtype data_o, // output data + input logic pop_i // pop head from queue +); + fifo_v2 #( + .FALL_THROUGH ( FALL_THROUGH ), + .DATA_WIDTH ( DATA_WIDTH ), + .DEPTH ( DEPTH ), + .ALM_FULL_TH ( THRESHOLD ), + .dtype ( dtype ) + ) impl ( + .clk_i ( clk_i ), + .rst_ni ( rst_ni ), + .flush_i ( flush_i ), + .testmode_i ( testmode_i ), + .full_o ( full_o ), + .empty_o ( empty_o ), + .alm_full_o ( threshold_o ), + .alm_empty_o ( ), + .data_i ( data_i ), + .push_i ( push_i ), + .data_o ( data_o ), + .pop_i ( pop_i ) + ); +endmodule +/* verilator lint_on DECLFILENAME */ diff --git a/vendor/pulp-platform/fpnew/src/common_cells/src/deprecated/fifo_v2.sv b/vendor/pulp-platform/fpnew/src/common_cells/src/deprecated/fifo_v2.sv new file mode 100644 index 0000000000..9c87ed9692 --- /dev/null +++ b/vendor/pulp-platform/fpnew/src/common_cells/src/deprecated/fifo_v2.sv @@ -0,0 +1,79 @@ +// Copyright 2018 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +// Author: Florian Zaruba + +module fifo_v2 #( + parameter bit FALL_THROUGH = 1'b0, // fifo is in fall-through mode + parameter int unsigned DATA_WIDTH = 32, // default data width if the fifo is of type logic + parameter int unsigned DEPTH = 8, // depth can be arbitrary from 0 to 2**32 + parameter int unsigned ALM_EMPTY_TH = 1, // almost empty threshold (when to assert alm_empty_o) + parameter int unsigned ALM_FULL_TH = 1, // almost full threshold (when to assert alm_full_o) + parameter type dtype = logic [DATA_WIDTH-1:0], + // DO NOT OVERWRITE THIS PARAMETER + parameter int unsigned ADDR_DEPTH = (DEPTH > 1) ? $clog2(DEPTH) : 1 +)( + input logic clk_i, // Clock + input logic rst_ni, // Asynchronous reset active low + input logic flush_i, // flush the queue + input logic testmode_i, // test_mode to bypass clock gating + // status flags + output logic full_o, // queue is full + output logic empty_o, // queue is empty + output logic alm_full_o, // FIFO fillstate >= the specified threshold + output logic alm_empty_o, // FIFO fillstate <= the specified threshold + // as long as the queue is not full we can push new data + input dtype data_i, // data to push into the queue + input logic push_i, // data is valid and can be pushed to the queue + // as long as the queue is not empty we can pop new elements + output dtype data_o, // output data + input logic pop_i // pop head from queue +); + + logic [ADDR_DEPTH-1:0] usage; + + // generate threshold parameters + if (DEPTH == 0) begin + assign alm_full_o = 1'b0; // that signal does not make any sense in a FIFO of depth 0 + assign alm_empty_o = 1'b0; // that signal does not make any sense in a FIFO of depth 0 + end else begin + assign alm_full_o = (usage >= ALM_FULL_TH[ADDR_DEPTH-1:0]); + assign alm_empty_o = (usage <= ALM_EMPTY_TH[ADDR_DEPTH-1:0]); + end + + fifo_v3 #( + .FALL_THROUGH ( FALL_THROUGH ), + .DATA_WIDTH ( DATA_WIDTH ), + .DEPTH ( DEPTH ), + .dtype ( dtype ) + ) i_fifo_v3 ( + .clk_i, + .rst_ni, + .flush_i, + .testmode_i, + .full_o, + .empty_o, + .usage_o (usage), + .data_i, + .push_i, + .data_o, + .pop_i + ); + + // pragma translate_off + `ifndef VERILATOR + initial begin + assert (ALM_FULL_TH <= DEPTH) else $error("ALM_FULL_TH can't be larger than the DEPTH."); + assert (ALM_EMPTY_TH <= DEPTH) else $error("ALM_EMPTY_TH can't be larger than the DEPTH."); + end + `endif + // pragma translate_on + +endmodule // fifo_v2 diff --git a/vendor/pulp-platform/fpnew/src/common_cells/src/deprecated/find_first_one.sv b/vendor/pulp-platform/fpnew/src/common_cells/src/deprecated/find_first_one.sv new file mode 100644 index 0000000000..ee3ba20f70 --- /dev/null +++ b/vendor/pulp-platform/fpnew/src/common_cells/src/deprecated/find_first_one.sv @@ -0,0 +1,83 @@ +// Copyright 2018 ETH Zurich and University of Bologna. +// +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +// Deprecated, use lzc unit instead. + +/// A leading-one finder / leading zero counter. +/// Set FLIP to 0 for find_first_one => first_one_o is the index of the first one (from the LSB) +/// Set FLIP to 1 for leading zero counter => first_one_o is the number of leading zeroes (from the MSB) +module find_first_one #( + /// The width of the input vector. + parameter int WIDTH = -1, + parameter int FLIP = 0 +)( + input logic [WIDTH-1:0] in_i, + output logic [$clog2(WIDTH)-1:0] first_one_o, + output logic no_ones_o +); + + localparam int NUM_LEVELS = $clog2(WIDTH); + + // pragma translate_off + initial begin + assert(WIDTH >= 0); + end + // pragma translate_on + + logic [WIDTH-1:0][NUM_LEVELS-1:0] index_lut; + logic [2**NUM_LEVELS-1:0] sel_nodes; + logic [2**NUM_LEVELS-1:0][NUM_LEVELS-1:0] index_nodes; + + logic [WIDTH-1:0] in_tmp; + + for (genvar i = 0; i < WIDTH; i++) begin + assign in_tmp[i] = FLIP ? in_i[WIDTH-1-i] : in_i[i]; + end + + for (genvar j = 0; j < WIDTH; j++) begin + assign index_lut[j] = j; + end + + for (genvar level = 0; level < NUM_LEVELS; level++) begin + + if (level < NUM_LEVELS-1) begin + for (genvar l = 0; l < 2**level; l++) begin + assign sel_nodes[2**level-1+l] = sel_nodes[2**(level+1)-1+l*2] | sel_nodes[2**(level+1)-1+l*2+1]; + assign index_nodes[2**level-1+l] = (sel_nodes[2**(level+1)-1+l*2] == 1'b1) ? + index_nodes[2**(level+1)-1+l*2] : index_nodes[2**(level+1)-1+l*2+1]; + end + end + + if (level == NUM_LEVELS-1) begin + for (genvar k = 0; k < 2**level; k++) begin + // if two successive indices are still in the vector... + if (k * 2 < WIDTH-1) begin + assign sel_nodes[2**level-1+k] = in_tmp[k*2] | in_tmp[k*2+1]; + assign index_nodes[2**level-1+k] = (in_tmp[k*2] == 1'b1) ? index_lut[k*2] : index_lut[k*2+1]; + end + // if only the first index is still in the vector... + if (k * 2 == WIDTH-1) begin + assign sel_nodes[2**level-1+k] = in_tmp[k*2]; + assign index_nodes[2**level-1+k] = index_lut[k*2]; + end + // if index is out of range + if (k * 2 > WIDTH-1) begin + assign sel_nodes[2**level-1+k] = 1'b0; + assign index_nodes[2**level-1+k] = '0; + end + end + end + end + + assign first_one_o = NUM_LEVELS > 0 ? index_nodes[0] : '0; + assign no_ones_o = NUM_LEVELS > 0 ? ~sel_nodes[0] : '1; + +endmodule diff --git a/vendor/pulp-platform/fpnew/src/common_cells/src/deprecated/generic_LFSR_8bit.sv b/vendor/pulp-platform/fpnew/src/common_cells/src/deprecated/generic_LFSR_8bit.sv new file mode 100644 index 0000000000..fb0080accf --- /dev/null +++ b/vendor/pulp-platform/fpnew/src/common_cells/src/deprecated/generic_LFSR_8bit.sv @@ -0,0 +1,64 @@ +// Copyright 2018 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +// Igor Loi + +module generic_LFSR_8bit + #( + parameter OH_WIDTH = 4, + parameter BIN_WIDTH = $clog2(OH_WIDTH), + parameter SEED = 8'b00000000 + ) + ( + output logic [OH_WIDTH-1:0] data_OH_o, // One hot encoding + output logic [BIN_WIDTH-1:0] data_BIN_o, // Binary encoding + input logic enable_i, // + input logic clk, // + input logic rst_n // + ); + + logic [7:0] out; + logic linear_feedback; + logic [BIN_WIDTH-1:0] temp_ref_way; + + + //-------------Code Starts Here------- + assign linear_feedback = !(out[7] ^ out[3] ^ out[2] ^ out[1]); // TAPS for XOR feedback + + assign data_BIN_o = temp_ref_way; + + always_ff @(posedge clk, negedge rst_n) + begin + if (rst_n == 1'b0) + begin + out <= SEED ; + end + else if (enable_i) + begin + out <= {out[6],out[5],out[4],out[3],out[2],out[1],out[0], linear_feedback}; + end + end + + generate + + if(OH_WIDTH == 2) + assign temp_ref_way = out[1]; + else + assign temp_ref_way = out[BIN_WIDTH:1]; + endgenerate + + // Bin to One Hot Encoder + always_comb + begin + data_OH_o = '0; + data_OH_o[temp_ref_way] = 1'b1; + end + +endmodule diff --git a/vendor/pulp-platform/fpnew/src/common_cells/src/deprecated/generic_fifo.sv b/vendor/pulp-platform/fpnew/src/common_cells/src/deprecated/generic_fifo.sv new file mode 100644 index 0000000000..ece4aac78e --- /dev/null +++ b/vendor/pulp-platform/fpnew/src/common_cells/src/deprecated/generic_fifo.sv @@ -0,0 +1,274 @@ +// Copyright 2018 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +// ============================================================================= // +// Company: Multitherman Laboratory @ DEIS - University of Bologna // +// Viale Risorgimento 2 40136 // +// Bologna - fax 0512093785 - // +// // +// Engineer: Igor Loi - igor.loi@unibo.it // +// // +// // +// Additional contributions by: // +// // +// // +// // +// Create Date: 01/02/2014 // +// Design Name: MISC // +// Module Name: generic_fifo // +// Project Name: PULP // +// Language: SystemVerilog // +// // +// Description: A simple FIFO used in the D_address_decoder, and D_allocator // +// to store the destinations ports // +// // +// Revision: // +// Revision v0.1 - 01/02/2014 : File Created // +// Revision v0.2 - 02/09/2015 : Updated with a global CG cell // +// // +// ============================================================================= // + +module generic_fifo +#( + parameter int unsigned DATA_WIDTH = 32, + parameter int unsigned DATA_DEPTH = 8 +) +( + input logic clk, + input logic rst_n, + //PUSH SIDE + input logic [DATA_WIDTH-1:0] data_i, + input logic valid_i, + output logic grant_o, + //POP SIDE + output logic [DATA_WIDTH-1:0] data_o, + output logic valid_o, + input logic grant_i, + + input logic test_mode_i +); + + + // Local Parameter + localparam int unsigned ADDR_DEPTH = $clog2(DATA_DEPTH); + enum logic [1:0] { EMPTY, FULL, MIDDLE } CS, NS; + // Internal Signals + + logic gate_clock; + logic clk_gated; + + logic [ADDR_DEPTH-1:0] Pop_Pointer_CS, Pop_Pointer_NS; + logic [ADDR_DEPTH-1:0] Push_Pointer_CS, Push_Pointer_NS; + logic [DATA_WIDTH-1:0] FIFO_REGISTERS[DATA_DEPTH-1:0]; + int unsigned i; + + // Parameter Check + // synopsys translate_off + initial begin : parameter_check + integer param_err_flg; + param_err_flg = 0; + + if (DATA_WIDTH < 1) begin + param_err_flg = 1; + $display("ERROR: %m :\n Invalid value (%d) for parameter DATA_WIDTH (legal range: greater than 1)", DATA_WIDTH ); + end + + if (DATA_DEPTH < 1) begin + param_err_flg = 1; + $display("ERROR: %m :\n Invalid value (%d) for parameter DATA_DEPTH (legal range: greater than 1)", DATA_DEPTH ); + end + end + // synopsys translate_on + +`ifndef PULP_FPGA_EMUL + cluster_clock_gating cg_cell + ( + .clk_i ( clk ), + .en_i (~gate_clock ), + .test_en_i ( test_mode_i ), + .clk_o ( clk_gated ) + ); +`else + assign clk_gated = clk; +`endif + + // UPDATE THE STATE + always_ff @(posedge clk, negedge rst_n) + begin + if(rst_n == 1'b0) + begin + CS <= EMPTY; + Pop_Pointer_CS <= {ADDR_DEPTH {1'b0}}; + Push_Pointer_CS <= {ADDR_DEPTH {1'b0}}; + end + else + begin + CS <= NS; + Pop_Pointer_CS <= Pop_Pointer_NS; + Push_Pointer_CS <= Push_Pointer_NS; + end + end + + + // Compute Next State + always_comb + begin + gate_clock = 1'b0; + + case(CS) + + EMPTY: + begin + grant_o = 1'b1; + valid_o = 1'b0; + + case(valid_i) + 1'b0 : + begin + NS = EMPTY; + Push_Pointer_NS = Push_Pointer_CS; + Pop_Pointer_NS = Pop_Pointer_CS; + gate_clock = 1'b1; + end + + 1'b1: + begin + NS = MIDDLE; + Push_Pointer_NS = Push_Pointer_CS + 1'b1; + Pop_Pointer_NS = Pop_Pointer_CS; + end + + endcase + end//~EMPTY + + MIDDLE: + begin + grant_o = 1'b1; + valid_o = 1'b1; + + case({valid_i,grant_i}) + + 2'b01: + begin + gate_clock = 1'b1; + + if((Pop_Pointer_CS == Push_Pointer_CS -1 ) || ((Pop_Pointer_CS == DATA_DEPTH-1) && (Push_Pointer_CS == 0) )) + NS = EMPTY; + else + NS = MIDDLE; + + Push_Pointer_NS = Push_Pointer_CS; + + if(Pop_Pointer_CS == DATA_DEPTH-1) + Pop_Pointer_NS = 0; + else + Pop_Pointer_NS = Pop_Pointer_CS + 1'b1; + end + + 2'b00 : + begin + gate_clock = 1'b1; + NS = MIDDLE; + Push_Pointer_NS = Push_Pointer_CS; + Pop_Pointer_NS = Pop_Pointer_CS; + end + + 2'b11: + begin + NS = MIDDLE; + + if(Push_Pointer_CS == DATA_DEPTH-1) + Push_Pointer_NS = 0; + else + Push_Pointer_NS = Push_Pointer_CS + 1'b1; + + if(Pop_Pointer_CS == DATA_DEPTH-1) + Pop_Pointer_NS = 0; + else + Pop_Pointer_NS = Pop_Pointer_CS + 1'b1; + end + + 2'b10: + begin + if(( Push_Pointer_CS == Pop_Pointer_CS - 1) || ( (Push_Pointer_CS == DATA_DEPTH-1) && (Pop_Pointer_CS == 0) )) + NS = FULL; + else + NS = MIDDLE; + + if(Push_Pointer_CS == DATA_DEPTH - 1) + Push_Pointer_NS = 0; + else + Push_Pointer_NS = Push_Pointer_CS + 1'b1; + + Pop_Pointer_NS = Pop_Pointer_CS; + end + + endcase + end + + FULL: + begin + grant_o = 1'b0; + valid_o = 1'b1; + gate_clock = 1'b1; + + case(grant_i) + 1'b1: + begin + NS = MIDDLE; + + Push_Pointer_NS = Push_Pointer_CS; + + if(Pop_Pointer_CS == DATA_DEPTH-1) + Pop_Pointer_NS = 0; + else + Pop_Pointer_NS = Pop_Pointer_CS + 1'b1; + end + + 1'b0: + begin + NS = FULL; + Push_Pointer_NS = Push_Pointer_CS; + Pop_Pointer_NS = Pop_Pointer_CS; + end + endcase + + end // end of FULL + + default : + begin + gate_clock = 1'b1; + grant_o = 1'b0; + valid_o = 1'b0; + NS = EMPTY; + Pop_Pointer_NS = 0; + Push_Pointer_NS = 0; + end + + endcase + end + + always_ff @(posedge clk_gated, negedge rst_n) + begin + if(rst_n == 1'b0) + begin + for (i=0; i< DATA_DEPTH; i++) + FIFO_REGISTERS[i] <= {DATA_WIDTH {1'b0}}; + end + else + begin + if((grant_o == 1'b1) && (valid_i == 1'b1)) + FIFO_REGISTERS[Push_Pointer_CS] <= data_i; + end + end + + assign data_o = FIFO_REGISTERS[Pop_Pointer_CS]; + +endmodule // generic_fifo diff --git a/vendor/pulp-platform/fpnew/src/common_cells/src/deprecated/generic_fifo_adv.sv b/vendor/pulp-platform/fpnew/src/common_cells/src/deprecated/generic_fifo_adv.sv new file mode 100644 index 0000000000..df6cc0d796 --- /dev/null +++ b/vendor/pulp-platform/fpnew/src/common_cells/src/deprecated/generic_fifo_adv.sv @@ -0,0 +1,264 @@ +// Copyright 2018 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +// Igor Loi + +module generic_fifo_adv +#( + parameter int unsigned DATA_WIDTH = 32, + parameter int unsigned DATA_DEPTH = 8 + ) + ( + input logic clk, + input logic rst_n, + input logic clear_i, + + //PUSH SIDE + input logic [DATA_WIDTH-1:0] data_i, + input logic valid_i, + output logic grant_o, + + //POP SIDE + output logic [DATA_WIDTH-1:0] data_o, + output logic valid_o, + input logic grant_i, + + input logic test_mode_i + ); + + + // Local Parameter + localparam int unsigned ADDR_DEPTH = $clog2(DATA_DEPTH); + enum logic [1:0] { EMPTY, FULL, MIDDLE } CS, NS; + // Internal Signals + + logic gate_clock; + logic clk_gated; + + logic [ADDR_DEPTH-1:0] Pop_Pointer_CS, Pop_Pointer_NS; + logic [ADDR_DEPTH-1:0] Push_Pointer_CS, Push_Pointer_NS; + logic [DATA_WIDTH-1:0] FIFO_REGISTERS[DATA_DEPTH-1:0]; + int unsigned i; + + // Parameter Check + // synopsys translate_off + initial + begin : parameter_check + integer param_err_flg; + param_err_flg = 0; + + if (DATA_WIDTH < 1) + begin + param_err_flg = 1; + $display("ERROR: %m :\n Invalid value (%d) for parameter DATA_WIDTH (legal range: greater than 1)", DATA_WIDTH ); + end + + if (DATA_DEPTH < 1) + begin + param_err_flg = 1; + $display("ERROR: %m :\n Invalid value (%d) for parameter DATA_DEPTH (legal range: greater than 1)", DATA_DEPTH ); + end + end + // synopsys translate_on + +`ifndef PULP_FPGA_EMUL + cluster_clock_gating cg_cell + ( + .clk_i ( clk ), + .en_i (~gate_clock ), + .test_en_i ( test_mode_i ), + .clk_o ( clk_gated ) + ); +`else + assign clk_gated = clk; +`endif + + // UPDATE THE STATE + always_ff @(posedge clk, negedge rst_n) + begin + if(rst_n == 1'b0) + begin + CS <= EMPTY; + Pop_Pointer_CS <= {ADDR_DEPTH {1'b0}}; + Push_Pointer_CS <= {ADDR_DEPTH {1'b0}}; + end + else + begin + if(clear_i) + begin + CS <= EMPTY; + Pop_Pointer_CS <= {ADDR_DEPTH {1'b0}}; + Push_Pointer_CS <= {ADDR_DEPTH {1'b0}}; + end + else + begin + CS <= NS; + Pop_Pointer_CS <= Pop_Pointer_NS; + Push_Pointer_CS <= Push_Pointer_NS; + end + end + end + + + // Compute Next State + always_comb + begin + gate_clock = 1'b0; + + case(CS) + + EMPTY: + begin + grant_o = 1'b1; + valid_o = 1'b0; + + case(valid_i) + 1'b0 : + begin + NS = EMPTY; + Push_Pointer_NS = Push_Pointer_CS; + Pop_Pointer_NS = Pop_Pointer_CS; + gate_clock = 1'b1; + end + + 1'b1: + begin + NS = MIDDLE; + Push_Pointer_NS = Push_Pointer_CS + 1'b1; + Pop_Pointer_NS = Pop_Pointer_CS; + end + + endcase + end//~EMPTY + + MIDDLE: + begin + grant_o = 1'b1; + valid_o = 1'b1; + + case({valid_i,grant_i}) + + 2'b01: + begin + gate_clock = 1'b1; + + if((Pop_Pointer_CS == Push_Pointer_CS -1 ) || ((Pop_Pointer_CS == DATA_DEPTH-1) && (Push_Pointer_CS == 0) )) + NS = EMPTY; + else + NS = MIDDLE; + + Push_Pointer_NS = Push_Pointer_CS; + + if(Pop_Pointer_CS == DATA_DEPTH-1) + Pop_Pointer_NS = 0; + else + Pop_Pointer_NS = Pop_Pointer_CS + 1'b1; + end + + 2'b00 : + begin + gate_clock = 1'b1; + NS = MIDDLE; + Push_Pointer_NS = Push_Pointer_CS; + Pop_Pointer_NS = Pop_Pointer_CS; + end + + 2'b11: + begin + NS = MIDDLE; + + if(Push_Pointer_CS == DATA_DEPTH-1) + Push_Pointer_NS = 0; + else + Push_Pointer_NS = Push_Pointer_CS + 1'b1; + + if(Pop_Pointer_CS == DATA_DEPTH-1) + Pop_Pointer_NS = 0; + else + Pop_Pointer_NS = Pop_Pointer_CS + 1'b1; + end + + 2'b10: + begin + if(( Push_Pointer_CS == Pop_Pointer_CS - 1) || ( (Push_Pointer_CS == DATA_DEPTH-1) && (Pop_Pointer_CS == 0) )) + NS = FULL; + else + NS = MIDDLE; + + if(Push_Pointer_CS == DATA_DEPTH - 1) + Push_Pointer_NS = 0; + else + Push_Pointer_NS = Push_Pointer_CS + 1'b1; + + Pop_Pointer_NS = Pop_Pointer_CS; + end + + endcase + end + + FULL: + begin + grant_o = 1'b0; + valid_o = 1'b1; + gate_clock = 1'b1; + + case(grant_i) + 1'b1: + begin + NS = MIDDLE; + + Push_Pointer_NS = Push_Pointer_CS; + + if(Pop_Pointer_CS == DATA_DEPTH-1) + Pop_Pointer_NS = 0; + else + Pop_Pointer_NS = Pop_Pointer_CS + 1'b1; + end + + 1'b0: + begin + NS = FULL; + Push_Pointer_NS = Push_Pointer_CS; + Pop_Pointer_NS = Pop_Pointer_CS; + end + endcase + + end // end of FULL + + default : + begin + gate_clock = 1'b1; + grant_o = 1'b0; + valid_o = 1'b0; + NS = EMPTY; + Pop_Pointer_NS = 0; + Push_Pointer_NS = 0; + end + + endcase + end + + always_ff @(posedge clk_gated, negedge rst_n) + begin + if(rst_n == 1'b0) + begin + for (i=0; i< DATA_DEPTH; i++) + FIFO_REGISTERS[i] <= {DATA_WIDTH {1'b0}}; + end + else + begin + if((grant_o == 1'b1) && (valid_i == 1'b1)) + FIFO_REGISTERS[Push_Pointer_CS] <= data_i; + end + end + + assign data_o = FIFO_REGISTERS[Pop_Pointer_CS]; + +endmodule // generic_fifo diff --git a/vendor/pulp-platform/fpnew/src/common_cells/src/deprecated/prioarbiter.sv b/vendor/pulp-platform/fpnew/src/common_cells/src/deprecated/prioarbiter.sv new file mode 100644 index 0000000000..730ceca4bf --- /dev/null +++ b/vendor/pulp-platform/fpnew/src/common_cells/src/deprecated/prioarbiter.sv @@ -0,0 +1,89 @@ +// Copyright 2018 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. +// +// Author: Florian Zaruba , ETH Zurich +// Date: 16.03.2019 +// Description: Priority arbiter with Lock in. Port 0 has priority over port 1, port 1 over port2 +// and so on. If the `LOCK_IN` feature is activated the arbitration decision is kept +// when the `en_i` is low. + +// Dependencies: relies on fast leading zero counter tree "onehot_to_bin" in common_cells +module prioarbiter #( + parameter int unsigned NUM_REQ = 13, + parameter int unsigned LOCK_IN = 0 +) ( + input logic clk_i, + input logic rst_ni, + + input logic flush_i, // clears the fsm and control signal registers + input logic en_i, // arbiter enable + input logic [NUM_REQ-1:0] req_i, // request signals + + output logic [NUM_REQ-1:0] ack_o, // acknowledge signals + output logic vld_o, // request ack'ed + output logic [$clog2(NUM_REQ)-1:0] idx_o // idx output +); + + localparam SEL_WIDTH = $clog2(NUM_REQ); + + logic [SEL_WIDTH-1:0] arb_sel_lock_d, arb_sel_lock_q; + logic lock_d, lock_q; + + logic [$clog2(NUM_REQ)-1:0] idx; + + // shared + assign vld_o = (|req_i) & en_i; + assign idx_o = (lock_q) ? arb_sel_lock_q : idx; + + // Arbiter + // Port 0 has priority over all other ports + assign ack_o[0] = (req_i[0]) ? en_i : 1'b0; + // check that the priorities + for (genvar i = 1; i < NUM_REQ; i++) begin : gen_arb_req_ports + // for every subsequent port check the priorities of the previous port + assign ack_o[i] = (req_i[i] & ~(|ack_o[i-1:0])) ? en_i : 1'b0; + end + + onehot_to_bin #( + .ONEHOT_WIDTH ( NUM_REQ ) + ) i_onehot_to_bin ( + .onehot ( ack_o ), + .bin ( idx ) + ); + + if (LOCK_IN) begin : gen_lock_in + // latch decision in case we got at least one req and no acknowledge + assign lock_d = (|req_i) & ~en_i; + assign arb_sel_lock_d = idx_o; + end else begin + // disable + assign lock_d = '0; + assign arb_sel_lock_d = '0; + end + + always_ff @(posedge clk_i or negedge rst_ni) begin : p_regs + if (!rst_ni) begin + lock_q <= 1'b0; + arb_sel_lock_q <= '0; + end else begin + if (flush_i) begin + lock_q <= 1'b0; + arb_sel_lock_q <= '0; + end else begin + lock_q <= lock_d; + arb_sel_lock_q <= arb_sel_lock_d; + end + end + end + +endmodule : prioarbiter + + + diff --git a/vendor/pulp-platform/fpnew/src/common_cells/src/deprecated/pulp_sync.sv b/vendor/pulp-platform/fpnew/src/common_cells/src/deprecated/pulp_sync.sv new file mode 100644 index 0000000000..2b436163e5 --- /dev/null +++ b/vendor/pulp-platform/fpnew/src/common_cells/src/deprecated/pulp_sync.sv @@ -0,0 +1,36 @@ +// Copyright 2018 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +// Antonio Pullini + +module pulp_sync + #( + parameter STAGES = 2 + ) + ( + input logic clk_i, + input logic rstn_i, + input logic serial_i, + output logic serial_o + ); + + logic [STAGES-1:0] r_reg; + + always_ff @(posedge clk_i, negedge rstn_i) + begin + if(!rstn_i) + r_reg <= 'h0; + else + r_reg <= {r_reg[STAGES-2:0], serial_i}; + end + + assign serial_o = r_reg[STAGES-1]; + +endmodule diff --git a/vendor/pulp-platform/fpnew/src/common_cells/src/deprecated/pulp_sync_wedge.sv b/vendor/pulp-platform/fpnew/src/common_cells/src/deprecated/pulp_sync_wedge.sv new file mode 100644 index 0000000000..66cee57d2c --- /dev/null +++ b/vendor/pulp-platform/fpnew/src/common_cells/src/deprecated/pulp_sync_wedge.sv @@ -0,0 +1,55 @@ +// Copyright 2018 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +// Antonio Pullini + +module pulp_sync_wedge #( + parameter int unsigned STAGES = 2 +) ( + input logic clk_i, + input logic rstn_i, + input logic en_i, + input logic serial_i, + output logic r_edge_o, + output logic f_edge_o, + output logic serial_o +); + logic clk; + logic serial, serial_q; + + assign serial_o = serial_q; + assign f_edge_o = ~serial & serial_q; + assign r_edge_o = serial & ~serial_q; + + pulp_sync #( + .STAGES(STAGES) + ) i_pulp_sync ( + .clk_i, + .rstn_i, + .serial_i, + .serial_o ( serial ) + ); + + pulp_clock_gating i_pulp_clock_gating ( + .clk_i, + .en_i, + .test_en_i ( 1'b0 ), + .clk_o ( clk ) + ); + + always_ff @(posedge clk, negedge rstn_i) begin + if (!rstn_i) begin + serial_q <= 1'b0; + end else begin + serial_q <= serial; + end + end + +endmodule diff --git a/vendor/pulp-platform/fpnew/src/common_cells/src/deprecated/rrarbiter.sv b/vendor/pulp-platform/fpnew/src/common_cells/src/deprecated/rrarbiter.sv new file mode 100644 index 0000000000..bf806c5e42 --- /dev/null +++ b/vendor/pulp-platform/fpnew/src/common_cells/src/deprecated/rrarbiter.sv @@ -0,0 +1,61 @@ +// Copyright 2018 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. +// +// Author: Michael Schaffner , ETH Zurich +// Date: 16.08.2018 +// Description: Fair round robin arbiter with lock feature. +// +// The rrarbiter employs fair round robin arbitration - i.e. the priorities +// rotate each cycle. +// +// The lock-in feature prevents the arbiter from changing the arbitration +// decision when the arbiter is disabled. I.e., the index of the first request +// that wins the arbitration will be locked until en_i is asserted again. +// +// Dependencies: relies on rr_arb_tree from common_cells. + +module rrarbiter #( + parameter int unsigned NUM_REQ = 64, + parameter bit LOCK_IN = 1'b0 +) ( + input logic clk_i, + input logic rst_ni, + + input logic flush_i, // clears arbiter state + input logic en_i, // arbiter enable + input logic [NUM_REQ-1:0] req_i, // request signals + + output logic [NUM_REQ-1:0] ack_o, // acknowledge signals + output logic vld_o, // request ack'ed + output logic [$clog2(NUM_REQ)-1:0] idx_o // idx output +); + + logic req; + assign vld_o = (|req_i) & en_i; + + rr_arb_tree #( + .NumIn ( NUM_REQ ), + .DataWidth ( 1 ), + .LockIn ( LOCK_IN )) + i_rr_arb_tree ( + .clk_i ( clk_i ), + .rst_ni ( rst_ni ), + .flush_i ( flush_i ), + .rr_i ( '0 ), + .req_i ( req_i ), + .gnt_o ( ack_o ), + .data_i ( '0 ), + .gnt_i ( en_i & req ), + .req_o ( req ), + .data_o ( ), + .idx_o ( idx_o ) + ); + +endmodule : rrarbiter diff --git a/vendor/pulp-platform/fpnew/src/common_cells/src/edge_detect.sv b/vendor/pulp-platform/fpnew/src/common_cells/src/edge_detect.sv new file mode 100644 index 0000000000..7fe89a7bf4 --- /dev/null +++ b/vendor/pulp-platform/fpnew/src/common_cells/src/edge_detect.sv @@ -0,0 +1,32 @@ +// Copyright 2018 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +// Author: Florian Zaruba +// Description: Edge detector, clock needs to oversample for proper edge detection + +module edge_detect ( + input logic clk_i, // Clock + input logic rst_ni, // Asynchronous reset active low + input logic d_i, // data stream in + output logic re_o, // rising edge detected + output logic fe_o // falling edge detected +); + + sync_wedge i_sync_wedge ( + .clk_i ( clk_i ), + .rst_ni ( rst_ni ), + .en_i ( 1'b1 ), + .serial_i ( d_i ), + .r_edge_o ( re_o ), + .f_edge_o ( fe_o ), + .serial_o ( ) + ); + +endmodule \ No newline at end of file diff --git a/vendor/pulp-platform/fpnew/src/common_cells/src/edge_propagator.sv b/vendor/pulp-platform/fpnew/src/common_cells/src/edge_propagator.sv new file mode 100644 index 0000000000..2e27283111 --- /dev/null +++ b/vendor/pulp-platform/fpnew/src/common_cells/src/edge_propagator.sv @@ -0,0 +1,50 @@ +// Copyright 2018 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +// Antonio Pullini + +module edge_propagator ( + input logic clk_tx_i, + input logic rstn_tx_i, + input logic edge_i, + input logic clk_rx_i, + input logic rstn_rx_i, + output logic edge_o +); + + logic [1:0] sync_a; + logic sync_b; + + logic r_input_reg; + logic s_input_reg_next; + + assign s_input_reg_next = edge_i | (r_input_reg & (~sync_a[0])); + + always @(negedge rstn_tx_i or posedge clk_tx_i) begin + if (~rstn_tx_i) begin + r_input_reg <= 1'b0; + sync_a <= 2'b00; + end else begin + r_input_reg <= s_input_reg_next; + sync_a <= {sync_b,sync_a[1]}; + end + end + + pulp_sync_wedge i_sync_clkb ( + .clk_i ( clk_rx_i ), + .rstn_i ( rstn_rx_i ), + .en_i ( 1'b1 ), + .serial_i ( r_input_reg ), + .r_edge_o ( edge_o ), + .f_edge_o ( ), + .serial_o ( sync_b ) + ); + +endmodule diff --git a/vendor/pulp-platform/fpnew/src/common_cells/src/edge_propagator_rx.sv b/vendor/pulp-platform/fpnew/src/common_cells/src/edge_propagator_rx.sv new file mode 100644 index 0000000000..89532cc27c --- /dev/null +++ b/vendor/pulp-platform/fpnew/src/common_cells/src/edge_propagator_rx.sv @@ -0,0 +1,31 @@ +// Copyright 2018 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +// Antonio Pullini + +module edge_propagator_rx ( + input logic clk_i, + input logic rstn_i, + input logic valid_i, + output logic ack_o, + output logic valid_o +); + + pulp_sync_wedge i_sync_clkb ( + .clk_i ( clk_i ), + .rstn_i ( rstn_i ), + .en_i ( 1'b1 ), + .serial_i ( valid_i ), + .r_edge_o ( valid_o ), + .f_edge_o ( ), + .serial_o ( ack_o ) + ); + +endmodule diff --git a/vendor/pulp-platform/fpnew/src/common_cells/src/edge_propagator_tx.sv b/vendor/pulp-platform/fpnew/src/common_cells/src/edge_propagator_tx.sv new file mode 100644 index 0000000000..0274a43333 --- /dev/null +++ b/vendor/pulp-platform/fpnew/src/common_cells/src/edge_propagator_tx.sv @@ -0,0 +1,40 @@ +// Copyright 2018 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +// Antonio Pullini + +module edge_propagator_tx ( + input logic clk_i, + input logic rstn_i, + input logic valid_i, + input logic ack_i, + output logic valid_o +); + + logic [1:0] sync_a; + + logic r_input_reg; + logic s_input_reg_next; + + assign s_input_reg_next = valid_i | (r_input_reg & ~sync_a[0]); + + always @(negedge rstn_i or posedge clk_i) begin + if (~rstn_i) begin + r_input_reg <= 1'b0; + sync_a <= 2'b00; + end else begin + r_input_reg <= s_input_reg_next; + sync_a <= {ack_i,sync_a[1]}; + end + end + + assign valid_o = r_input_reg; + +endmodule diff --git a/vendor/pulp-platform/fpnew/src/common_cells/src/exp_backoff.sv b/vendor/pulp-platform/fpnew/src/common_cells/src/exp_backoff.sv new file mode 100644 index 0000000000..fe63798cc4 --- /dev/null +++ b/vendor/pulp-platform/fpnew/src/common_cells/src/exp_backoff.sv @@ -0,0 +1,94 @@ +// Copyright 2019 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. +// +// Author: Michael Schaffner , ETH Zurich +// Date: 10.04.2019 +// Description: exponential backoff counter with randomization. +// +// For each failed trial (set_i pulsed), this unit exponentially increases the +// (average) backoff time by masking an LFSR with a shifted mask in order to +// create the backoff counter initial value. +// +// The shift register mask and the counter value are both reset to '0 in case of +// a successful trial (clr_i). +// + +module exp_backoff #( + parameter int unsigned Seed = 'hffff, // seed for 16bit lfsr + parameter int unsigned MaxExp = 16 // 2**MaxExp-1 determines the maximum range from which random wait counts are drawn +) ( + input logic clk_i, + input logic rst_ni, + // + input logic set_i, // sets the backoff counter (pulse) -> use when trial did not succeed + input logic clr_i, // clears the backoff counter (pulse) -> use when trial succeeded + output logic is_zero_o // indicates whether the backoff counter is equal to zero and a new trial can be launched +); + + // leave this constant + localparam WIDTH = 16; + + logic [WIDTH-1:0] lfsr_d, lfsr_q, cnt_d, cnt_q, mask_d, mask_q; + logic lfsr; + + // generate random wait counts + // note: we use a flipped lfsr here to + // avoid strange correlation effects between + // the (left-shifted) mask and the lfsr + assign lfsr = lfsr_q[15-15] ^ + lfsr_q[15-13] ^ + lfsr_q[15-12] ^ + lfsr_q[15-10]; + + assign lfsr_d = (set_i) ? {lfsr, lfsr_q[$high(lfsr_q):1]} : + lfsr_q; + + // mask the wait counts with exponentially increasing mask (shift reg) + assign mask_d = (clr_i) ? '0 : + (set_i) ? {{(WIDTH-MaxExp){1'b0}},mask_q[MaxExp-2:0], 1'b1} : + mask_q; + + assign cnt_d = (clr_i) ? '0 : + (set_i) ? (mask_q & lfsr_q) : + (!is_zero_o) ? cnt_q - 1'b1 : '0; + + assign is_zero_o = (cnt_q=='0); + + always_ff @(posedge clk_i or negedge rst_ni) begin : p_regs + if (!rst_ni) begin + lfsr_q <= WIDTH'(Seed); + mask_q <= '0; + cnt_q <= '0; + end else begin + lfsr_q <= lfsr_d; + mask_q <= mask_d; + cnt_q <= cnt_d; + end + end + +/////////////////////////////////////////////////////// +// assertions +/////////////////////////////////////////////////////// + +//pragma translate_off +`ifndef VERILATOR + initial begin + // assert wrong parameterizations + assert (MaxExp>0) + else $fatal(1,"MaxExp must be greater than 0"); + assert (MaxExp<=16) + else $fatal(1,"MaxExp cannot be greater than 16"); + assert (Seed>0) + else $fatal(1,"Zero seed is not allowed for LFSR"); + end +`endif +//pragma translate_on + +endmodule // exp_backoff diff --git a/vendor/pulp-platform/fpnew/src/common_cells/src/fall_through_register.sv b/vendor/pulp-platform/fpnew/src/common_cells/src/fall_through_register.sv new file mode 100644 index 0000000000..fcbbe31dbc --- /dev/null +++ b/vendor/pulp-platform/fpnew/src/common_cells/src/fall_through_register.sv @@ -0,0 +1,58 @@ +// Copyright 2018 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +// Fall-through register with a simple stream-like ready/valid handshake. +// This register does not cut combinatorial paths on any signals: in case the module at its output +// is ready to accept data within the same clock cycle, they are forwarded. Use this module to get a +// 'default ready' behavior towards the input. +module fall_through_register #( + parameter type T = logic // Vivado requires a default value for type parameters. +) ( + input logic clk_i, // Clock + input logic rst_ni, // Asynchronous active-low reset + input logic clr_i, // Synchronous clear + input logic testmode_i, // Test mode to bypass clock gating + // Input port + input logic valid_i, + output logic ready_o, + input T data_i, + // Output port + output logic valid_o, + input logic ready_i, + output T data_o +); + + logic fifo_empty, + fifo_full; + + fifo_v2 #( + .FALL_THROUGH (1'b1), + .DATA_WIDTH ($size(T)), + .DEPTH (1), + .dtype (T) + ) i_fifo ( + .clk_i (clk_i), + .rst_ni (rst_ni), + .flush_i (clr_i), + .testmode_i (testmode_i), + .full_o (fifo_full), + .empty_o (fifo_empty), + .alm_full_o ( ), + .alm_empty_o ( ), + .data_i (data_i), + .push_i (valid_i & ~fifo_full), + .data_o (data_o), + .pop_i (ready_i & ~fifo_empty) + ); + + assign ready_o = ~fifo_full; + assign valid_o = ~fifo_empty; + +endmodule diff --git a/vendor/pulp-platform/fpnew/src/common_cells/src/fifo_v3.sv b/vendor/pulp-platform/fpnew/src/common_cells/src/fifo_v3.sv new file mode 100644 index 0000000000..8c487730a4 --- /dev/null +++ b/vendor/pulp-platform/fpnew/src/common_cells/src/fifo_v3.sv @@ -0,0 +1,153 @@ +// Copyright 2018 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +// Author: Florian Zaruba + +module fifo_v3 #( + parameter bit FALL_THROUGH = 1'b0, // fifo is in fall-through mode + parameter int unsigned DATA_WIDTH = 32, // default data width if the fifo is of type logic + parameter int unsigned DEPTH = 8, // depth can be arbitrary from 0 to 2**32 + parameter type dtype = logic [DATA_WIDTH-1:0], + // DO NOT OVERWRITE THIS PARAMETER + parameter int unsigned ADDR_DEPTH = (DEPTH > 1) ? $clog2(DEPTH) : 1 +)( + input logic clk_i, // Clock + input logic rst_ni, // Asynchronous reset active low + input logic flush_i, // flush the queue + input logic testmode_i, // test_mode to bypass clock gating + // status flags + output logic full_o, // queue is full + output logic empty_o, // queue is empty + output logic [ADDR_DEPTH-1:0] usage_o, // fill pointer + // as long as the queue is not full we can push new data + input dtype data_i, // data to push into the queue + input logic push_i, // data is valid and can be pushed to the queue + // as long as the queue is not empty we can pop new elements + output dtype data_o, // output data + input logic pop_i // pop head from queue +); + // local parameter + // FIFO depth - handle the case of pass-through, synthesizer will do constant propagation + localparam int unsigned FIFO_DEPTH = (DEPTH > 0) ? DEPTH : 1; + // clock gating control + logic gate_clock; + // pointer to the read and write section of the queue + logic [ADDR_DEPTH - 1:0] read_pointer_n, read_pointer_q, write_pointer_n, write_pointer_q; + // keep a counter to keep track of the current queue status + logic [ADDR_DEPTH:0] status_cnt_n, status_cnt_q; // this integer will be truncated by the synthesis tool + // actual memory + dtype [FIFO_DEPTH - 1:0] mem_n, mem_q; + + assign usage_o = status_cnt_q[ADDR_DEPTH-1:0]; + + if (DEPTH == 0) begin + assign empty_o = ~push_i; + assign full_o = ~pop_i; + end else begin + assign full_o = (status_cnt_q == FIFO_DEPTH[ADDR_DEPTH:0]); + assign empty_o = (status_cnt_q == 0) & ~(FALL_THROUGH & push_i); + end + // status flags + + // read and write queue logic + always_comb begin : read_write_comb + // default assignment + read_pointer_n = read_pointer_q; + write_pointer_n = write_pointer_q; + status_cnt_n = status_cnt_q; + data_o = (DEPTH == 0) ? data_i : mem_q[read_pointer_q]; + mem_n = mem_q; + gate_clock = 1'b1; + + // push a new element to the queue + if (push_i && ~full_o) begin + // push the data onto the queue + mem_n[write_pointer_q] = data_i; + // un-gate the clock, we want to write something + gate_clock = 1'b0; + // increment the write counter + if (write_pointer_q == FIFO_DEPTH[ADDR_DEPTH-1:0] - 1) + write_pointer_n = '0; + else + write_pointer_n = write_pointer_q + 1; + // increment the overall counter + status_cnt_n = status_cnt_q + 1; + end + + if (pop_i && ~empty_o) begin + // read from the queue is a default assignment + // but increment the read pointer... + if (read_pointer_n == FIFO_DEPTH[ADDR_DEPTH-1:0] - 1) + read_pointer_n = '0; + else + read_pointer_n = read_pointer_q + 1; + // ... and decrement the overall count + status_cnt_n = status_cnt_q - 1; + end + + // keep the count pointer stable if we push and pop at the same time + if (push_i && pop_i && ~full_o && ~empty_o) + status_cnt_n = status_cnt_q; + + // FIFO is in pass through mode -> do not change the pointers + if (FALL_THROUGH && (status_cnt_q == 0) && push_i) begin + data_o = data_i; + if (pop_i) begin + status_cnt_n = status_cnt_q; + read_pointer_n = read_pointer_q; + write_pointer_n = write_pointer_q; + end + end + end + + // sequential process + always_ff @(posedge clk_i or negedge rst_ni) begin + if(~rst_ni) begin + read_pointer_q <= '0; + write_pointer_q <= '0; + status_cnt_q <= '0; + end else begin + if (flush_i) begin + read_pointer_q <= '0; + write_pointer_q <= '0; + status_cnt_q <= '0; + end else begin + read_pointer_q <= read_pointer_n; + write_pointer_q <= write_pointer_n; + status_cnt_q <= status_cnt_n; + end + end + end + + always_ff @(posedge clk_i or negedge rst_ni) begin + if(~rst_ni) begin + mem_q <= '0; + end else if (!gate_clock) begin + mem_q <= mem_n; + end + end + +// pragma translate_off +`ifndef VERILATOR + initial begin + assert (DEPTH > 0) else $error("DEPTH must be greater than 0."); + end + + full_write : assert property( + @(posedge clk_i) disable iff (~rst_ni) (full_o |-> ~push_i)) + else $fatal (1, "Trying to push new data although the FIFO is full."); + + empty_read : assert property( + @(posedge clk_i) disable iff (~rst_ni) (empty_o |-> ~pop_i)) + else $fatal (1, "Trying to pop data although the FIFO is empty."); +`endif +// pragma translate_on + +endmodule // fifo_v3 diff --git a/vendor/pulp-platform/fpnew/src/common_cells/src/graycode.sv b/vendor/pulp-platform/fpnew/src/common_cells/src/graycode.sv new file mode 100644 index 0000000000..b9ef43a7b0 --- /dev/null +++ b/vendor/pulp-platform/fpnew/src/common_cells/src/graycode.sv @@ -0,0 +1,33 @@ +// Copyright 2018 ETH Zurich and University of Bologna. +// +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. +// +// Fabian Schuiki + +/// A binary to gray code converter. +module binary_to_gray #( + parameter int N = -1 +)( + input logic [N-1:0] A, + output logic [N-1:0] Z +); + assign Z = A ^ (A >> 1); +endmodule + +/// A gray code to binary converter. +module gray_to_binary #( + parameter int N = -1 +)( + input logic [N-1:0] A, + output logic [N-1:0] Z +); + for (genvar i = 0; i < N; i++) + assign Z[i] = ^A[N-1:i]; +endmodule diff --git a/vendor/pulp-platform/fpnew/src/common_cells/src/id_queue.sv b/vendor/pulp-platform/fpnew/src/common_cells/src/id_queue.sv new file mode 100644 index 0000000000..c432af4b94 --- /dev/null +++ b/vendor/pulp-platform/fpnew/src/common_cells/src/id_queue.sv @@ -0,0 +1,268 @@ +// Copyright 2018 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +// ID Queue +// +// In an ID queue, every element has a numeric ID. Among all elements that have the same ID, the ID +// queue preserves FIFO order. +// +// This ID queue implementation allows to either push (through the `inp_*` signals) or pop (through +// the `oup_*` signals) one element per clock cycle. The `inp_` port has priority and grants a +// request iff the queue is not full. The `oup_` port dequeues an element iff `oup_pop_i` is +// asserted during an `oup_` handshake; otherwise, it performs a non-destructive read. `oup_data_o` +// is valid iff `oup_data_valid_o` is asserted during an `oup_` handshake. If `oup_data_valid_o` is +// not asserted, the queue did not contain an element with the provided ID. +// +// This ID queue additionally provides the `exists_` port, which searches for an element anywhere in +// the queue. The comparison performed during the search can be masked: for every bit that is +// asserted in `exists_mask_i`, the corresponding bit in the queue element and in `exists_data_i` +// must be equal for a match; the other bits are not compared. If masking is not required, tie +// `exists_mask_i_ to `'1` and the synthesizer should simplify the comparisons to unmasked ones. The +// `exists_` port operates independently of the `inp_` and `oup_` ports. If the `exists_` port is +// unused, tie `exists_req_i` to `1'b0` and the synthesizer should remove the internal comparators. +// +// This ID queue can store at most `CAPACITY` elements, independent of their ID. Let +// - C = `CAPACITY` +// - B = $bits(data_t) +// - I = 2**`ID_WIDTH` +// Then +// - the queue element storage requires O(C * (B + log2(C))) bit +// - the ID table requires O(H * log2(C)) bit, where H = min(C, I) +// +// Maintainers: +// - Andreas Kurth + +module id_queue #( + parameter int ID_WIDTH = 0, + parameter int CAPACITY = 0, + parameter type data_t = logic, + // Dependent parameters, DO NOT OVERRIDE! + localparam type id_t = logic[ID_WIDTH-1:0], + localparam type mask_t = logic[$bits(data_t)-1:0] +) ( + input logic clk_i, + input logic rst_ni, + + input id_t inp_id_i, + input data_t inp_data_i, + input logic inp_req_i, + output logic inp_gnt_o, + + input data_t exists_data_i, + input mask_t exists_mask_i, + input logic exists_req_i, + output logic exists_o, + output logic exists_gnt_o, + + input id_t oup_id_i, + input logic oup_pop_i, + input logic oup_req_i, + output data_t oup_data_o, + output logic oup_data_valid_o, + output logic oup_gnt_o +); + + // Capacity of the head-tail table, which associates an ID with corresponding head and tail + // indices. + localparam int N_IDS = 2**ID_WIDTH; + localparam int HT_CAPACITY = (N_IDS <= CAPACITY) ? N_IDS : CAPACITY; + + // Type for indexing the head-tail table. + typedef logic [$clog2(HT_CAPACITY)-1:0] ht_idx_t; + + // Type for indexing the lined data table. + typedef logic [$clog2(CAPACITY)-1:0] ld_idx_t; + + // Type of an entry in the head-tail table. + typedef struct packed { + id_t id; + ld_idx_t head, + tail; + logic free; + } head_tail_t; + + // Type of an entry in the linked data table. + typedef struct packed { + data_t data; + ld_idx_t next; + logic free; + } linked_data_t; + + head_tail_t [HT_CAPACITY-1:0] head_tail_d, head_tail_q; + + linked_data_t [CAPACITY-1:0] linked_data_d, linked_data_q; + + logic full, + match_id_valid, + no_id_match; + + logic [HT_CAPACITY-1:0] head_tail_free, + idx_matches_id; + + logic [CAPACITY-1:0] exists_match, + linked_data_free; + + id_t match_id; + + ht_idx_t head_tail_free_idx, + match_idx; + + ld_idx_t linked_data_free_idx; + + // Find the index in the head-tail table that matches a given ID. + for (genvar i = 0; i < HT_CAPACITY; i++) begin: gen_idx_match + assign idx_matches_id[i] = match_id_valid && (head_tail_q[i].id == match_id) && + !head_tail_q[i].free; + end + assign no_id_match = !(|idx_matches_id); + onehot_to_bin #( + .ONEHOT_WIDTH (HT_CAPACITY) + ) i_id_ohb ( + .onehot (idx_matches_id), + .bin (match_idx) + ); + + // Find the first free index in the head-tail table. + for (genvar i = 0; i < HT_CAPACITY; i++) begin: gen_head_tail_free + assign head_tail_free[i] = head_tail_q[i].free; + end + lzc #( + .WIDTH (HT_CAPACITY), + .MODE (0) // Start at index 0. + ) i_ht_free_lzc ( + .in_i (head_tail_free), + .cnt_o (head_tail_free_idx), + .empty_o () + ); + + // Find the first free index in the linked data table. + for (genvar i = 0; i < CAPACITY; i++) begin: gen_linked_data_free + assign linked_data_free[i] = linked_data_q[i].free; + end + lzc #( + .WIDTH (CAPACITY), + .MODE (0) // Start at index 0. + ) i_ld_free_lzc ( + .in_i (linked_data_free), + .cnt_o (linked_data_free_idx), + .empty_o () + ); + + // The queue is full if and only if there are no free items in the linked data structure. + assign full = !(|linked_data_free); + + assign inp_gnt_o = ~full; + always_comb begin + match_id = 'x; + match_id_valid = 1'b0; + head_tail_d = head_tail_q; + linked_data_d = linked_data_q; + oup_gnt_o = 1'b0; + oup_data_o = data_t'('x); + oup_data_valid_o = 1'b0; + if (inp_req_i && !full) begin + match_id = inp_id_i; + match_id_valid = 1'b1; + // If the ID does not yet exist in the queue, add a new ID entry. + if (no_id_match) begin + head_tail_d[head_tail_free_idx] = '{ + id: inp_id_i, + head: linked_data_free_idx, + tail: linked_data_free_idx, + free: 1'b0 + }; + // Otherwise append it to the existing ID subqueue. + end else begin + linked_data_d[head_tail_q[match_idx].tail].next = linked_data_free_idx; + head_tail_d[match_idx].tail = linked_data_free_idx; + end + linked_data_d[linked_data_free_idx] = '{ + data: inp_data_i, + next: 'x, + free: 1'b0 + }; + end else if (oup_req_i) begin + match_id = oup_id_i; + match_id_valid = 1'b1; + if (!no_id_match) begin + oup_data_o = data_t'(linked_data_q[head_tail_q[match_idx].head].data); + oup_data_valid_o = 1'b1; + if (oup_pop_i) begin + // Set free bit of linked data entry, all other bits are don't care. + linked_data_d[head_tail_q[match_idx].head] = 'x; + linked_data_d[head_tail_q[match_idx].head][0] = 1'b1; + if (head_tail_q[match_idx].head == head_tail_q[match_idx].tail) begin + head_tail_d[match_idx] = '{free: 1'b1, default: 'x}; + end else begin + head_tail_d[match_idx].head = linked_data_q[head_tail_q[match_idx].head].next; + end + end + end + // Always grant the output request. If there was no match, the default, invalid entry + // will be returned. + oup_gnt_o = 1'b1; + end + end + + // Exists Lookup + for (genvar i = 0; i < CAPACITY; i++) begin: gen_lookup + mask_t exists_match_bits; + for (genvar j = 0; j < $bits(data_t); j++) begin: gen_mask + always_comb begin + if (linked_data_q[i].free) begin + exists_match_bits[j] = 1'b0; + end else begin + if (!exists_mask_i[j]) begin + exists_match_bits[j] = 1'b1; + end else begin + exists_match_bits[j] = (linked_data_q[i].data[j] == exists_data_i[j]); + end + end + end + end + assign exists_match[i] = (&exists_match_bits); + end + always_comb begin + exists_gnt_o = 1'b0; + exists_o = 'x; + if (exists_req_i) begin + exists_gnt_o = 1'b1; + exists_o = (|exists_match); + end + end + + // Registers + for (genvar i = 0; i < CAPACITY; i++) begin: gen_ffs + always_ff @(posedge clk_i, negedge rst_ni) begin + if (!rst_ni) begin + head_tail_q[i] <= '{free: 1'b1, default: 'x}; + // Set free bit of linked data entries, all other bits are don't care. + linked_data_q[i] <= 'x; + linked_data_q[i][0] <= 1'b1; + end else begin + head_tail_q[i] <= head_tail_d[i]; + linked_data_q[i] <= linked_data_d[i]; + end + end + end + + // Validate parameters. +// pragma translate_off +`ifndef VERILATOR + initial begin: validate_params + assert (ID_WIDTH >= 1) + else $fatal("The ID must at least be one bit wide!"); + assert (CAPACITY >= 1) + else $fatal("The queue must have capacity of at least one entry!"); + end +`endif +// pragma translate_on + +endmodule diff --git a/vendor/pulp-platform/fpnew/src/common_cells/src/lfsr.sv b/vendor/pulp-platform/fpnew/src/common_cells/src/lfsr.sv new file mode 100644 index 0000000000..0b730d6221 --- /dev/null +++ b/vendor/pulp-platform/fpnew/src/common_cells/src/lfsr.sv @@ -0,0 +1,310 @@ +// Copyright 2019 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. +// +// Author: Michael Schaffner , ETH Zurich +// Date: 26.04.2019 +// +// Description: This is a parametric LFSR with precomputed coefficients for +// LFSR lengths from 4 to 64bit. + +// Additional block cipher layers can be instantiated to non-linearly transform +// the pseudo-random LFSR sequence at the output, and hence break the shifting +// patterns. The additional cipher layers can only be used for an LFSR width +// of 64bit, since the block cipher has been designed for that block length. + +module lfsr #( + parameter int unsigned LfsrWidth = 64, // [4,64] + parameter int unsigned OutWidth = 8, // [1,LfsrWidth] + parameter logic [LfsrWidth-1:0] RstVal = '1, // [1,2^LfsrWidth-1] + // 0: disabled, the present cipher uses 31, but just a few layers (1-3) are enough + // to break linear shifting patterns + parameter int unsigned CipherLayers = 0, + parameter bit CipherReg = 1'b1 // additional output reg after cipher +) ( + input logic clk_i, + input logic rst_ni, + input logic en_i, + output logic [OutWidth-1:0] out_o +); + +// Galois LFSR feedback masks +// Automatically generated with get_lfsr_masks.py +// Masks are from https://users.ece.cmu.edu/~koopman/lfsr/ +localparam logic [63:0] masks [4:64] = '{64'hC, + 64'h1E, + 64'h39, + 64'h7E, + 64'hFA, + 64'h1FD, + 64'h3FC, + 64'h64B, + 64'hD8F, + 64'h1296, + 64'h2496, + 64'h4357, + 64'h8679, + 64'h1030E, + 64'h206CD, + 64'h403FE, + 64'h807B8, + 64'h1004B2, + 64'h2006A8, + 64'h4004B2, + 64'h800B87, + 64'h10004F3, + 64'h200072D, + 64'h40006AE, + 64'h80009E3, + 64'h10000583, + 64'h20000C92, + 64'h400005B6, + 64'h80000EA6, + 64'h1000007A3, + 64'h200000ABF, + 64'h400000842, + 64'h80000123E, + 64'h100000074E, + 64'h2000000AE9, + 64'h400000086A, + 64'h8000001213, + 64'h1000000077E, + 64'h2000000123B, + 64'h40000000877, + 64'h8000000108D, + 64'h100000000AE9, + 64'h200000000E9F, + 64'h4000000008A6, + 64'h80000000191E, + 64'h100000000090E, + 64'h2000000000FB3, + 64'h4000000000D7D, + 64'h80000000016A5, + 64'h10000000000B4B, + 64'h200000000010AF, + 64'h40000000000DDE, + 64'h8000000000181A, + 64'h100000000000B65, + 64'h20000000000102D, + 64'h400000000000CD5, + 64'h8000000000024C1, + 64'h1000000000000EF6, + 64'h2000000000001363, + 64'h4000000000000FCD, + 64'h80000000000019E2}; + +// this S-box and permutation P has been taken from the Present Cipher, +// a super lightweight block cipher. use the cipher layers to add additional +// non-linearity to the LFSR output. note one layer does not fully correspond +// to the present cipher round, since the key and rekeying function is not applied here. +// +// See also: +// "PRESENT: An Ultra-Lightweight Block Cipher", A. Bogdanov et al., Ches 2007 +// http://www.lightweightcrypto.org/present/present_ches2007.pdf + +// this is the sbox from the present cipher +localparam logic[15:0][3:0] sbox4 = {4'h2, 4'h1, 4'h7, 4'h4, + 4'h8, 4'hF, 4'hE, 4'h3, + 4'hD, 4'hA, 4'h0, 4'h9, + 4'hB, 4'h6, 4'h5, 4'hC }; + +// these are the permutation indices of the present cipher +localparam logic[63:0][5:0] perm = {6'd63, 6'd47, 6'd31, 6'd15, 6'd62, 6'd46, 6'd30, 6'd14, 6'd61, 6'd45, 6'd29, 6'd13, 6'd60, 6'd44, 6'd28, 6'd12, + 6'd59, 6'd43, 6'd27, 6'd11, 6'd58, 6'd42, 6'd26, 6'd10, 6'd57, 6'd41, 6'd25, 6'd09, 6'd56, 6'd40, 6'd24, 6'd08, + 6'd55, 6'd39, 6'd23, 6'd07, 6'd54, 6'd38, 6'd22, 6'd06, 6'd53, 6'd37, 6'd21, 6'd05, 6'd52, 6'd36, 6'd20, 6'd04, + 6'd51, 6'd35, 6'd19, 6'd03, 6'd50, 6'd34, 6'd18, 6'd02, 6'd49, 6'd33, 6'd17, 6'd01, 6'd48, 6'd32, 6'd16, 6'd00}; + + +function automatic logic [63:0] sbox4_layer(logic [63:0] in); + logic [63:0] out; + //for (logic [4:0] j = '0; j<16; j++) out[j*4 +: 4] = sbox4[in[j*4 +: 4]]; + // this simulates much faster than the loop + out[0*4 +: 4] = sbox4[in[0*4 +: 4]]; + out[1*4 +: 4] = sbox4[in[1*4 +: 4]]; + out[2*4 +: 4] = sbox4[in[2*4 +: 4]]; + out[3*4 +: 4] = sbox4[in[3*4 +: 4]]; + + out[4*4 +: 4] = sbox4[in[4*4 +: 4]]; + out[5*4 +: 4] = sbox4[in[5*4 +: 4]]; + out[6*4 +: 4] = sbox4[in[6*4 +: 4]]; + out[7*4 +: 4] = sbox4[in[7*4 +: 4]]; + + out[8*4 +: 4] = sbox4[in[8*4 +: 4]]; + out[9*4 +: 4] = sbox4[in[9*4 +: 4]]; + out[10*4 +: 4] = sbox4[in[10*4 +: 4]]; + out[11*4 +: 4] = sbox4[in[11*4 +: 4]]; + + out[12*4 +: 4] = sbox4[in[12*4 +: 4]]; + out[13*4 +: 4] = sbox4[in[13*4 +: 4]]; + out[14*4 +: 4] = sbox4[in[14*4 +: 4]]; + out[15*4 +: 4] = sbox4[in[15*4 +: 4]]; + return out; +endfunction : sbox4_layer + +function automatic logic [63:0] perm_layer(logic [63:0] in); + logic [63:0] out; + // for (logic [7:0] j = '0; j<64; j++) out[perm[j]] = in[j]; + // this simulates much faster than the loop + out[perm[0]] = in[0]; + out[perm[1]] = in[1]; + out[perm[2]] = in[2]; + out[perm[3]] = in[3]; + out[perm[4]] = in[4]; + out[perm[5]] = in[5]; + out[perm[6]] = in[6]; + out[perm[7]] = in[7]; + out[perm[8]] = in[8]; + out[perm[9]] = in[9]; + + out[perm[10]] = in[10]; + out[perm[11]] = in[11]; + out[perm[12]] = in[12]; + out[perm[13]] = in[13]; + out[perm[14]] = in[14]; + out[perm[15]] = in[15]; + out[perm[16]] = in[16]; + out[perm[17]] = in[17]; + out[perm[18]] = in[18]; + out[perm[19]] = in[19]; + + out[perm[20]] = in[20]; + out[perm[21]] = in[21]; + out[perm[22]] = in[22]; + out[perm[23]] = in[23]; + out[perm[24]] = in[24]; + out[perm[25]] = in[25]; + out[perm[26]] = in[26]; + out[perm[27]] = in[27]; + out[perm[28]] = in[28]; + out[perm[29]] = in[29]; + + out[perm[30]] = in[30]; + out[perm[31]] = in[31]; + out[perm[32]] = in[32]; + out[perm[33]] = in[33]; + out[perm[34]] = in[34]; + out[perm[35]] = in[35]; + out[perm[36]] = in[36]; + out[perm[37]] = in[37]; + out[perm[38]] = in[38]; + out[perm[39]] = in[39]; + + out[perm[40]] = in[40]; + out[perm[41]] = in[41]; + out[perm[42]] = in[42]; + out[perm[43]] = in[43]; + out[perm[44]] = in[44]; + out[perm[45]] = in[45]; + out[perm[46]] = in[46]; + out[perm[47]] = in[47]; + out[perm[48]] = in[48]; + out[perm[49]] = in[49]; + + out[perm[50]] = in[50]; + out[perm[51]] = in[51]; + out[perm[52]] = in[52]; + out[perm[53]] = in[53]; + out[perm[54]] = in[54]; + out[perm[55]] = in[55]; + out[perm[56]] = in[56]; + out[perm[57]] = in[57]; + out[perm[58]] = in[58]; + out[perm[59]] = in[59]; + + out[perm[60]] = in[60]; + out[perm[61]] = in[61]; + out[perm[62]] = in[62]; + out[perm[63]] = in[63]; + return out; +endfunction : perm_layer + +//////////////////////////////////////////////////////////////////////// +// lfsr +//////////////////////////////////////////////////////////////////////// + +logic [LfsrWidth-1:0] lfsr_d, lfsr_q; +assign lfsr_d = (en_i) ? (lfsr_q>>1) ^ ({LfsrWidth{lfsr_q[0]}} & masks[LfsrWidth][LfsrWidth-1:0]) : lfsr_q; + +always_ff @(posedge clk_i or negedge rst_ni) begin : p_regs + //$display("%b %h", en_i, lfsr_d); + if (!rst_ni) begin + lfsr_q <= LfsrWidth'(RstVal); + end else begin + lfsr_q <= lfsr_d; + end +end + +//////////////////////////////////////////////////////////////////////// +// block cipher layers +//////////////////////////////////////////////////////////////////////// + +if (CipherLayers > unsigned'(0)) begin : g_cipher_layers + logic [63:0] ciph_layer; + localparam int unsigned NumRepl = ((64+LfsrWidth)/LfsrWidth); + + always_comb begin : p_ciph_layer + automatic logic [63:0] tmp; + tmp = 64'({NumRepl{lfsr_q}}); + for(int unsigned k = 0; k < CipherLayers; k++) begin + tmp = perm_layer(sbox4_layer(tmp)); + end + ciph_layer = tmp; + end + + // additiona output reg after cipher + if (CipherReg) begin : g_cipher_reg + logic [OutWidth-1:0] out_d, out_q; + + assign out_d = (en_i) ? ciph_layer[OutWidth-1:0] : out_q; + assign out_o = out_q[OutWidth-1:0]; + + always_ff @(posedge clk_i or negedge rst_ni) begin : p_regs + if (!rst_ni) begin + out_q <= '0; + end else begin + out_q <= out_d; + end + end + // no outreg + end else begin : g_no_out_reg + assign out_o = ciph_layer[OutWidth-1:0]; + end + +// no block cipher +end else begin : g_no_cipher_layers + assign out_o = lfsr_q[OutWidth-1:0]; +end + +//////////////////////////////////////////////////////////////////////// +// assertions +//////////////////////////////////////////////////////////////////////// + +// pragma translate_off +initial begin + // these are the LUT limits + assert(OutWidth <= LfsrWidth) else + $fatal(1,"OutWidth must be smaller equal the LfsrWidth."); + assert(RstVal > unsigned'(0)) else + $fatal(1,"RstVal must be nonzero."); + assert((LfsrWidth >= $low(masks)) && (LfsrWidth <= $high(masks))) else + $fatal(1,"Unsupported LfsrWidth."); + assert(masks[LfsrWidth][LfsrWidth-1]) else + $fatal(1, "LFSR mask is not correct. The MSB must be 1." ); + assert((CipherLayers > 0) && (LfsrWidth == 64) || (CipherLayers == 0)) else + $fatal(1, "Use additional cipher layers only in conjunction with an LFSR width of 64 bit." ); +end + +`ifndef VERILATOR + all_zero: assert property ( + @(posedge clk_i) disable iff (!rst_ni) en_i |-> lfsr_d) + else $fatal(1,"Lfsr must not be all-zero."); +`endif +// pragma translate_on + +endmodule // lfsr diff --git a/vendor/pulp-platform/fpnew/src/common_cells/src/lfsr_16bit.sv b/vendor/pulp-platform/fpnew/src/common_cells/src/lfsr_16bit.sv new file mode 100644 index 0000000000..3fcf96e22e --- /dev/null +++ b/vendor/pulp-platform/fpnew/src/common_cells/src/lfsr_16bit.sv @@ -0,0 +1,67 @@ +// Copyright 2018 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +// Author: Florian Zaruba, ETH Zurich +// Date: 5.11.2018 +// Description: 16-bit LFSR + +// -------------- +// 16-bit LFSR +// -------------- +// +// Description: Shift register +// +module lfsr_16bit #( + parameter logic [15:0] SEED = 8'b0, + parameter int unsigned WIDTH = 16 +)( + input logic clk_i, + input logic rst_ni, + input logic en_i, + output logic [WIDTH-1:0] refill_way_oh, + output logic [$clog2(WIDTH)-1:0] refill_way_bin +); + + localparam int unsigned LOG_WIDTH = $clog2(WIDTH); + + logic [15:0] shift_d, shift_q; + + + always_comb begin + + automatic logic shift_in; + shift_in = !(shift_q[15] ^ shift_q[12] ^ shift_q[5] ^ shift_q[1]); + + shift_d = shift_q; + + if (en_i) + shift_d = {shift_q[14:0], shift_in}; + + // output assignment + refill_way_oh = 'b0; + refill_way_oh[shift_q[LOG_WIDTH-1:0]] = 1'b1; + refill_way_bin = shift_q; + end + + always_ff @(posedge clk_i or negedge rst_ni) begin : proc_ + if(~rst_ni) begin + shift_q <= SEED; + end else begin + shift_q <= shift_d; + end + end + + //pragma translate_off + initial begin + assert (WIDTH <= 16) else $fatal(1, "WIDTH needs to be less than 16 because of the 16-bit LFSR"); + end + //pragma translate_on + +endmodule diff --git a/vendor/pulp-platform/fpnew/src/common_cells/src/lfsr_8bit.sv b/vendor/pulp-platform/fpnew/src/common_cells/src/lfsr_8bit.sv new file mode 100644 index 0000000000..fbe5c748a1 --- /dev/null +++ b/vendor/pulp-platform/fpnew/src/common_cells/src/lfsr_8bit.sv @@ -0,0 +1,68 @@ +// Copyright 2018 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +// Author: Igor Loi - University of Bologna +// Author: Florian Zaruba, ETH Zurich +// Date: 12.11.2017 +// Description: 8-bit LFSR + +// -------------- +// 8-bit LFSR +// -------------- +// +// Description: Shift register +// +module lfsr_8bit #( + parameter logic [7:0] SEED = 8'b0, + parameter int unsigned WIDTH = 8 +)( + input logic clk_i, + input logic rst_ni, + input logic en_i, + output logic [WIDTH-1:0] refill_way_oh, + output logic [$clog2(WIDTH)-1:0] refill_way_bin +); + + localparam int unsigned LOG_WIDTH = $clog2(WIDTH); + + logic [7:0] shift_d, shift_q; + + + always_comb begin + + automatic logic shift_in; + shift_in = !(shift_q[7] ^ shift_q[3] ^ shift_q[2] ^ shift_q[1]); + + shift_d = shift_q; + + if (en_i) + shift_d = {shift_q[6:0], shift_in}; + + // output assignment + refill_way_oh = 'b0; + refill_way_oh[shift_q[LOG_WIDTH-1:0]] = 1'b1; + refill_way_bin = shift_q; + end + + always_ff @(posedge clk_i or negedge rst_ni) begin : proc_ + if(~rst_ni) begin + shift_q <= SEED; + end else begin + shift_q <= shift_d; + end + end + + //pragma translate_off + initial begin + assert (WIDTH <= 8) else $fatal(1, "WIDTH needs to be less than 8 because of the 8-bit LFSR"); + end + //pragma translate_on + +endmodule diff --git a/vendor/pulp-platform/fpnew/src/common_cells/src/lzc.sv b/vendor/pulp-platform/fpnew/src/common_cells/src/lzc.sv new file mode 100644 index 0000000000..4ebbb5f5ec --- /dev/null +++ b/vendor/pulp-platform/fpnew/src/common_cells/src/lzc.sv @@ -0,0 +1,93 @@ +// Copyright (c) 2018 - 2019 ETH Zurich, University of Bologna +// All rights reserved. +// +// This code is under development and not yet released to the public. +// Until it is released, the code is under the copyright of ETH Zurich and +// the University of Bologna, and may contain confidential and/or unpublished +// work. Any reuse/redistribution is strictly forbidden without written +// permission from ETH Zurich. +// +// Bug fixes and contributions will eventually be released under the +// SolderPad open hardware license in the context of the PULP platform +// (http://www.pulp-platform.org), under the copyright of ETH Zurich and the +// University of Bologna. + +/// A trailing zero counter / leading zero counter. +/// Set MODE to 0 for trailing zero counter => cnt_o is the number of trailing zeros (from the LSB) +/// Set MODE to 1 for leading zero counter => cnt_o is the number of leading zeros (from the MSB) +/// If the input does not contain a zero, `empty_o` is asserted. Additionally `cnt_o` contains +/// the maximum number of zeros - 1. For example: +/// in_i = 000_0000, empty_o = 1, cnt_o = 6 (mode = 0) +/// in_i = 000_0001, empty_o = 0, cnt_o = 0 (mode = 0) +/// in_i = 000_1000, empty_o = 0, cnt_o = 3 (mode = 0) +/// Furthermore, this unit contains a more efficient implementation for Verilator (simulation only). +/// This speeds up simulation significantly. + +module lzc #( + /// The width of the input vector. + parameter int unsigned WIDTH = 2, + parameter bit MODE = 1'b0 // 0 -> trailing zero, 1 -> leading zero +) ( + input logic [WIDTH-1:0] in_i, + output logic [$clog2(WIDTH)-1:0] cnt_o, + output logic empty_o // asserted if all bits in in_i are zero +); + + localparam int unsigned NUM_LEVELS = $clog2(WIDTH); + + // pragma translate_off + initial begin + assert(WIDTH > 0) else $fatal("input must be at least one bit wide"); + end + // pragma translate_on + + logic [WIDTH-1:0][NUM_LEVELS-1:0] index_lut; + logic [2**NUM_LEVELS-1:0] sel_nodes; + logic [2**NUM_LEVELS-1:0][NUM_LEVELS-1:0] index_nodes; + + logic [WIDTH-1:0] in_tmp; + + // reverse vector if required + always_comb begin : flip_vector + for (int unsigned i = 0; i < WIDTH; i++) begin + in_tmp[i] = (MODE) ? in_i[WIDTH-1-i] : in_i[i]; + end + end + + for (genvar j = 0; unsigned'(j) < WIDTH; j++) begin : g_index_lut + assign index_lut[j] = NUM_LEVELS'(unsigned'(j)); + end + + for (genvar level = 0; unsigned'(level) < NUM_LEVELS; level++) begin : g_levels + if (unsigned'(level) == NUM_LEVELS-1) begin : g_last_level + for (genvar k = 0; k < 2**level; k++) begin : g_level + // if two successive indices are still in the vector... + if (unsigned'(k) * 2 < WIDTH-1) begin + assign sel_nodes[2**level-1+k] = in_tmp[k*2] | in_tmp[k*2+1]; + assign index_nodes[2**level-1+k] = (in_tmp[k*2] == 1'b1) ? index_lut[k*2] : + index_lut[k*2+1]; + end + // if only the first index is still in the vector... + if (unsigned'(k) * 2 == WIDTH-1) begin + assign sel_nodes[2**level-1+k] = in_tmp[k*2]; + assign index_nodes[2**level-1+k] = index_lut[k*2]; + end + // if index is out of range + if (unsigned'(k) * 2 > WIDTH-1) begin + assign sel_nodes[2**level-1+k] = 1'b0; + assign index_nodes[2**level-1+k] = '0; + end + end + end else begin + for (genvar l = 0; l < 2**level; l++) begin : g_level + assign sel_nodes[2**level-1+l] = sel_nodes[2**(level+1)-1+l*2] | sel_nodes[2**(level+1)-1+l*2+1]; + assign index_nodes[2**level-1+l] = (sel_nodes[2**(level+1)-1+l*2] == 1'b1) ? index_nodes[2**(level+1)-1+l*2] : + index_nodes[2**(level+1)-1+l*2+1]; + end + end + end + + assign cnt_o = NUM_LEVELS > unsigned'(0) ? index_nodes[0] : $clog2(WIDTH)'(0); + assign empty_o = NUM_LEVELS > unsigned'(0) ? ~sel_nodes[0] : ~(|in_i); + +endmodule : lzc diff --git a/vendor/pulp-platform/fpnew/src/common_cells/src/mv_filter.sv b/vendor/pulp-platform/fpnew/src/common_cells/src/mv_filter.sv new file mode 100644 index 0000000000..2254c0b059 --- /dev/null +++ b/vendor/pulp-platform/fpnew/src/common_cells/src/mv_filter.sv @@ -0,0 +1,55 @@ +// Copyright 2018 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +// Author: Florian Zaruba + +module mv_filter #( + parameter int unsigned WIDTH = 4, + parameter int unsigned THRESHOLD = 10 +)( + input logic clk_i, + input logic rst_ni, + input logic sample_i, + input logic clear_i, + input logic d_i, + output logic q_o +); + logic [WIDTH-1:0] counter_q, counter_d; + logic d, q; + + assign q_o = q; + + always_comb begin + counter_d = counter_q; + d = q; + + if (counter_q >= THRESHOLD[WIDTH-1:0]) begin + d = 1'b1; + end else if (sample_i && d_i) begin + counter_d = counter_q + 1; + end + + // sync reset + if (clear_i) begin + counter_d = '0; + d = 1'b0; + end + end + + always_ff @(posedge clk_i or negedge rst_ni) begin + if (~rst_ni) begin + counter_q <= '0; + q <= 1'b0; + end else begin + counter_q <= counter_d; + q <= d; + end + end +endmodule \ No newline at end of file diff --git a/vendor/pulp-platform/fpnew/src/common_cells/src/onehot_to_bin.sv b/vendor/pulp-platform/fpnew/src/common_cells/src/onehot_to_bin.sv new file mode 100644 index 0000000000..35c513cd09 --- /dev/null +++ b/vendor/pulp-platform/fpnew/src/common_cells/src/onehot_to_bin.sv @@ -0,0 +1,39 @@ +// Copyright 2018 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +// Franceco Conti + +module onehot_to_bin #( + parameter int unsigned ONEHOT_WIDTH = 16, + // Do Not Change + parameter int unsigned BIN_WIDTH = $clog2(ONEHOT_WIDTH) +)( + input logic [ONEHOT_WIDTH-1:0] onehot, + output logic [BIN_WIDTH-1:0] bin +); + + for (genvar j = 0; j < BIN_WIDTH; j++) begin : jl + logic [ONEHOT_WIDTH-1:0] tmp_mask; + for (genvar i = 0; i < ONEHOT_WIDTH; i++) begin : il + logic [BIN_WIDTH-1:0] tmp_i; + assign tmp_i = i; + assign tmp_mask[i] = tmp_i[j]; + end + assign bin[j] = |(tmp_mask & onehot); + end + +// pragma translate_off +`ifndef VERILATOR + initial begin + assert($onehot0(onehot)) else $fatal(1, "[onehot_to_bin] More than two bit set in the one-hot signal"); + end +`endif +// pragma translate_on +endmodule diff --git a/vendor/pulp-platform/fpnew/src/common_cells/src/plru_tree.sv b/vendor/pulp-platform/fpnew/src/common_cells/src/plru_tree.sv new file mode 100644 index 0000000000..28e0cba5b4 --- /dev/null +++ b/vendor/pulp-platform/fpnew/src/common_cells/src/plru_tree.sv @@ -0,0 +1,120 @@ +// Copyright 2018 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. +// +// Author: David Schaffenrath, TU Graz +// Author: Florian Zaruba, ETH Zurich +// +// Description: Pseudo Least Recently Used Tree (PLRU) +// See: https://en.wikipedia.org/wiki/Pseudo-LRU + +module plru_tree #( + parameter int unsigned ENTRIES = 16 +) ( + input logic clk_i, + input logic rst_ni, + input logic [ENTRIES-1:0] used_i, // element i was used (one hot) + output logic [ENTRIES-1:0] plru_o // element i is the least recently used (one hot) +); + + localparam LOG_ENTRIES = $clog2(ENTRIES); + + logic [2*(ENTRIES-1)-1:0] plru_tree_q, plru_tree_d; + + always_comb begin : plru_replacement + plru_tree_d = plru_tree_q; + // The PLRU-tree indexing: + // lvl0 0 + // / \ + // / \ + // lvl1 1 2 + // / \ / \ + // lvl2 3 4 5 6 + // / \ /\/\ /\ + // ... ... ... ... + // Just predefine which nodes will be set/cleared + // E.g. for a TLB with 8 entries, the for-loop is semantically + // equivalent to the following pseudo-code: + // unique case (1'b1) + // used_i[7]: plru_tree_d[0, 2, 6] = {1, 1, 1}; + // used_i[6]: plru_tree_d[0, 2, 6] = {1, 1, 0}; + // used_i[5]: plru_tree_d[0, 2, 5] = {1, 0, 1}; + // used_i[4]: plru_tree_d[0, 2, 5] = {1, 0, 0}; + // used_i[3]: plru_tree_d[0, 1, 4] = {0, 1, 1}; + // used_i[2]: plru_tree_d[0, 1, 4] = {0, 1, 0}; + // used_i[1]: plru_tree_d[0, 1, 3] = {0, 0, 1}; + // used_i[0]: plru_tree_d[0, 1, 3] = {0, 0, 0}; + // default: begin /* No hit */ end + // endcase + for (int unsigned i = 0; i < ENTRIES; i++) begin + automatic int unsigned idx_base, shift, new_index; + // we got a hit so update the pointer as it was least recently used + if (used_i[i]) begin + // Set the nodes to the values we would expect + for (int unsigned lvl = 0; lvl < LOG_ENTRIES; lvl++) begin + idx_base = $unsigned((2**lvl)-1); + // lvl0 <=> MSB, lvl1 <=> MSB-1, ... + shift = LOG_ENTRIES - lvl; + // to circumvent the 32 bit integer arithmetic assignment + new_index = ~((i >> (shift-1)) & 32'b1); + plru_tree_d[idx_base + (i >> shift)] = new_index[0]; + end + end + end + // Decode tree to write enable signals + // Next for-loop basically creates the following logic for e.g. an 8 entry + // TLB (note: pseudo-code obviously): + // plru_o[7] = &plru_tree_q[ 6, 2, 0]; //plru_tree_q[0,2,6]=={1,1,1} + // plru_o[6] = &plru_tree_q[~6, 2, 0]; //plru_tree_q[0,2,6]=={1,1,0} + // plru_o[5] = &plru_tree_q[ 5,~2, 0]; //plru_tree_q[0,2,5]=={1,0,1} + // plru_o[4] = &plru_tree_q[~5,~2, 0]; //plru_tree_q[0,2,5]=={1,0,0} + // plru_o[3] = &plru_tree_q[ 4, 1,~0]; //plru_tree_q[0,1,4]=={0,1,1} + // plru_o[2] = &plru_tree_q[~4, 1,~0]; //plru_tree_q[0,1,4]=={0,1,0} + // plru_o[1] = &plru_tree_q[ 3,~1,~0]; //plru_tree_q[0,1,3]=={0,0,1} + // plru_o[0] = &plru_tree_q[~3,~1,~0]; //plru_tree_q[0,1,3]=={0,0,0} + // For each entry traverse the tree. If every tree-node matches, + // the corresponding bit of the entry's index, this is + // the next entry to replace. + for (int unsigned i = 0; i < ENTRIES; i += 1) begin + automatic logic en; + automatic int unsigned idx_base, shift, new_index; + en = 1'b1; + for (int unsigned lvl = 0; lvl < LOG_ENTRIES; lvl++) begin + idx_base = $unsigned((2**lvl)-1); + // lvl0 <=> MSB, lvl1 <=> MSB-1, ... + shift = LOG_ENTRIES - lvl; + // en &= plru_tree_q[idx_base + (i>>shift)] == ((i >> (shift-1)) & 1'b1); + new_index = (i >> (shift-1)) & 32'b1; + if (new_index[0]) begin + en &= plru_tree_q[idx_base + (i>>shift)]; + end else begin + en &= ~plru_tree_q[idx_base + (i>>shift)]; + end + end + plru_o[i] = en; + end + end + + always_ff @(posedge clk_i or negedge rst_ni) begin + if (!rst_ni) begin + plru_tree_q <= '0; + end else begin + plru_tree_q <= plru_tree_d; + end + end + +// pragma translate_off +`ifndef VERILATOR + initial begin + assert (ENTRIES == 2**LOG_ENTRIES) else $error("Entries must be a power of two"); + end +`endif +// pragma translate_on + +endmodule diff --git a/vendor/pulp-platform/fpnew/src/common_cells/src/popcount.sv b/vendor/pulp-platform/fpnew/src/common_cells/src/popcount.sv new file mode 100644 index 0000000000..0e16453130 --- /dev/null +++ b/vendor/pulp-platform/fpnew/src/common_cells/src/popcount.sv @@ -0,0 +1,57 @@ +// Copyright (C) 2013-2018 ETH Zurich, University of Bologna +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +// Author: Manuel Eggimann + +// Description: This module calculates the hamming weight (number of ones) in +// its input vector using a balanced binary adder tree. Recursive instantiation +// is used to build the tree. Any unsigned INPUT_WIDTH larger or equal 2 is +// legal. The module pads the signal internally to the next power of two. The +// output result width is ceil(log2(INPUT_WIDTH))+1. + +module popcount #( + parameter int unsigned INPUT_WIDTH = 256, + localparam POPCOUNT_WIDTH = $clog2(INPUT_WIDTH)+1 +) ( + input logic [INPUT_WIDTH-1:0] data_i, + output logic [POPCOUNT_WIDTH-1:0] popcount_o +); + + localparam int unsigned PADDED_WIDTH = 1 << $clog2(INPUT_WIDTH); + + logic [PADDED_WIDTH-1:0] padded_input; + logic [POPCOUNT_WIDTH-2:0] left_child_result, right_child_result; + + //Zero pad the input to next power of two + always_comb begin + padded_input = '0; + padded_input[INPUT_WIDTH-1:0] = data_i; + end + + //Recursive instantiation to build binary adder tree + if (INPUT_WIDTH == 2) begin : leaf_node + assign left_child_result = padded_input[1]; + assign right_child_result = padded_input[0]; + end else begin : non_leaf_node + popcount #(.INPUT_WIDTH(PADDED_WIDTH / 2)) + left_child( + .data_i(padded_input[PADDED_WIDTH-1:PADDED_WIDTH/2]), + .popcount_o(left_child_result)); + + popcount #(.INPUT_WIDTH(PADDED_WIDTH / 2)) + right_child( + .data_i(padded_input[PADDED_WIDTH/2-1:0]), + .popcount_o(right_child_result)); + end + + //Output assignment + assign popcount_o = left_child_result + right_child_result; + +endmodule : popcount diff --git a/vendor/pulp-platform/fpnew/src/common_cells/src/rr_arb_tree.sv b/vendor/pulp-platform/fpnew/src/common_cells/src/rr_arb_tree.sv new file mode 100644 index 0000000000..dee5024962 --- /dev/null +++ b/vendor/pulp-platform/fpnew/src/common_cells/src/rr_arb_tree.sv @@ -0,0 +1,244 @@ +// Copyright 2019 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. +// +// Author: Michael Schaffner , ETH Zurich +// Date: 02.04.2019 +// Description: logarithmic arbitration tree with round robin arbitration scheme. +// +// The rr_arb_tree employs fair round robin arbitration - i.e. the priorities +// rotate each cycle. +// +// The `LockIn` option prevents the arbiter from changing the arbitration +// decision when the arbiter is disabled. I.e., the index of the first request +// that wins the arbitration will be locked in case the destination is not +// able to grant the request in the same cycle. +// +// The `ExtPrio` option allows to override the internal round robin counter via the +// `rr_i` signal. This can be useful in case multiple arbiters need to have +// rotating priorities that are operating in lock-step. If static priority arbitration +// is needed, just connect `rr_i` to '0. +// +// If `AxiVldRdy` is set, the req/gnt signals are compliant with the AXI style vld/rdy +// handshake. Namely, upstream vld (req) must not depend on rdy (gnt), as it can be deasserted +// again even though vld is asserted. Enabling `AxiVldRdy` leads to a reduction of arbiter +// delay and area. +// + +module rr_arb_tree #( + parameter int unsigned NumIn = 64, + parameter int unsigned DataWidth = 32, + parameter type DataType = logic [DataWidth-1:0], + parameter bit ExtPrio = 1'b0, // set to 1'b1 to enable + parameter bit AxiVldRdy = 1'b0, // treat req/gnt as vld/rdy + parameter bit LockIn = 1'b0 // set to 1'b1 to enable +) ( + input logic clk_i, + input logic rst_ni, + input logic flush_i, // clears the arbiter state + input logic [$clog2(NumIn)-1:0] rr_i, // external RR prio (needs to be enabled above) + // input requests and data + input logic [NumIn-1:0] req_i, + /* verilator lint_off UNOPTFLAT */ + output logic [NumIn-1:0] gnt_o, + /* verilator lint_on UNOPTFLAT */ + input DataType [NumIn-1:0] data_i, + // arbitrated output + input logic gnt_i, + output logic req_o, + output DataType data_o, + output logic [$clog2(NumIn)-1:0] idx_o +); + // just pass through in this corner case + if (NumIn == unsigned'(1)) begin + assign req_o = req_i[0]; + assign gnt_o[0] = gnt_i; + assign data_o = data_i[0]; + assign idx_o = '0; + // non-degenerate cases + end else begin + localparam int unsigned NumLevels = $clog2(NumIn); + + /* verilator lint_off UNOPTFLAT */ + logic [2**NumLevels-2:0][NumLevels-1:0] index_nodes; // used to propagate the indices + DataType [2**NumLevels-2:0] data_nodes; // used to propagate the data + logic [2**NumLevels-2:0] gnt_nodes; // used to propagate the grant to masters + logic [2**NumLevels-2:0] req_nodes; // used to propagate the requests to slave + /* lint_off */ + logic [NumLevels-1:0] rr_q; + logic [NumIn-1:0] req_d; + + // the final arbitration decision can be taken from the root of the tree + assign req_o = req_nodes[0]; + assign data_o = data_nodes[0]; + assign idx_o = index_nodes[0]; + + if (ExtPrio) begin : gen_ext_rr + assign rr_q = rr_i; + assign req_d = req_i; + end else begin : gen_int_rr + logic [NumLevels-1:0] rr_d; + + // lock arbiter decision in case we got at least one req and no acknowledge + if (LockIn) begin : gen_lock + logic lock_d, lock_q; + logic [NumIn-1:0] req_q; + + assign lock_d = req_o & ~gnt_i; + assign req_d = (lock_q) ? req_q : req_i; + + always_ff @(posedge clk_i or negedge rst_ni) begin : p_lock_reg + if (!rst_ni) begin + lock_q <= '0; + end else begin + if (flush_i) begin + lock_q <= '0; + end else begin + lock_q <= lock_d; + end + end + end + + // pragma translate_off + `ifndef VERILATOR + lock: assert property( + @(posedge clk_i) disable iff (!rst_ni) LockIn |-> req_o && !gnt_i |=> idx_o == $past(idx_o)) + else $fatal (1, "Lock implies same arbiter decision in next cycle if output is not ready."); + + logic [NumIn-1:0] req_tmp; + assign req_tmp = req_q & req_i; + lock_req: assert property( + @(posedge clk_i) disable iff (!rst_ni) LockIn |-> lock_d |=> req_tmp == req_q) + else $fatal (1, "It is disallowed to deassert unserved request signals when LockIn is enabled."); + `endif + // pragma translate_on + + always_ff @(posedge clk_i or negedge rst_ni) begin : p_req_regs + if (!rst_ni) begin + req_q <= '0; + end else begin + if (flush_i) begin + req_q <= '0; + end else begin + req_q <= req_d; + end + end + end + end else begin : gen_no_lock + assign req_d = req_i; + end + + assign rr_d = (gnt_i && req_o) ? ((rr_q == NumLevels'(NumIn-1)) ? '0 : rr_q + 1'b1) : rr_q; + + always_ff @(posedge clk_i or negedge rst_ni) begin : p_rr_regs + if (!rst_ni) begin + rr_q <= '0; + end else begin + if (flush_i) begin + rr_q <= '0; + end else begin + rr_q <= rr_d; + end + end + end + end + + assign gnt_nodes[0] = gnt_i; + + // arbiter tree + for (genvar level = 0; unsigned'(level) < NumLevels; level++) begin : gen_levels + for (genvar l = 0; l < 2**level; l++) begin : gen_level + // local select signal + logic sel; + // index calcs + localparam int unsigned idx0 = 2**level-1+l;// current node + localparam int unsigned idx1 = 2**(level+1)-1+l*2; + ////////////////////////////////////////////////////////////// + // uppermost level where data is fed in from the inputs + if (unsigned'(level) == NumLevels-1) begin : gen_first_level + // if two successive indices are still in the vector... + if (unsigned'(l) * 2 < NumIn-1) begin + assign req_nodes[idx0] = req_d[l*2] | req_d[l*2+1]; + + // arbitration: round robin + assign sel = ~req_d[l*2] | req_d[l*2+1] & rr_q[NumLevels-1-level]; + + assign index_nodes[idx0] = NumLevels'(sel); + assign data_nodes[idx0] = (sel) ? data_i[l*2+1] : data_i[l*2]; + assign gnt_o[l*2] = gnt_nodes[idx0] & (AxiVldRdy | req_d[l*2]) & ~sel; + assign gnt_o[l*2+1] = gnt_nodes[idx0] & (AxiVldRdy | req_d[l*2+1]) & sel; + end + // if only the first index is still in the vector... + if (unsigned'(l) * 2 == NumIn-1) begin + assign req_nodes[idx0] = req_d[l*2]; + assign index_nodes[idx0] = '0;// always zero in this case + assign data_nodes[idx0] = data_i[l*2]; + assign gnt_o[l*2] = gnt_nodes[idx0] & (AxiVldRdy | req_d[l*2]); + end + // if index is out of range, fill up with zeros (will get pruned) + if (unsigned'(l) * 2 > NumIn-1) begin + assign req_nodes[idx0] = 1'b0; + assign index_nodes[idx0] = DataType'('0); + assign data_nodes[idx0] = DataType'('0); + end + ////////////////////////////////////////////////////////////// + // general case for other levels within the tree + end else begin : gen_other_levels + assign req_nodes[idx0] = req_nodes[idx1] | req_nodes[idx1+1]; + + // arbitration: round robin + assign sel = ~req_nodes[idx1] | req_nodes[idx1+1] & rr_q[NumLevels-1-level]; + + assign index_nodes[idx0] = (sel) ? NumLevels'({1'b1, index_nodes[idx1+1][NumLevels-unsigned'(level)-2:0]}) : + NumLevels'({1'b0, index_nodes[idx1][NumLevels-unsigned'(level)-2:0]}); + assign data_nodes[idx0] = (sel) ? data_nodes[idx1+1] : data_nodes[idx1]; + assign gnt_nodes[idx1] = gnt_nodes[idx0] & ~sel; + assign gnt_nodes[idx1+1] = gnt_nodes[idx0] & sel; + end + ////////////////////////////////////////////////////////////// + end + end + + // pragma translate_off + `ifndef VERILATOR + initial begin : p_assert + assert(NumIn) + else $fatal("Input must be at least one element wide."); + assert(!(LockIn && ExtPrio)) + else $fatal(1,"Cannot use LockIn feature together with external ExtPrio."); + end + + hot_one : assert property( + @(posedge clk_i) disable iff (!rst_ni) $onehot0(gnt_o)) + else $fatal (1, "Grant signal must be hot1 or zero."); + + gnt0 : assert property( + @(posedge clk_i) disable iff (!rst_ni) |gnt_o |-> gnt_i) + else $fatal (1, "Grant out implies grant in."); + + gnt1 : assert property( + @(posedge clk_i) disable iff (!rst_ni) req_o |-> gnt_i |-> |gnt_o) + else $fatal (1, "Req out and grant in implies grant out."); + + gnt_idx : assert property( + @(posedge clk_i) disable iff (!rst_ni) req_o |-> gnt_i |-> gnt_o[idx_o]) + else $fatal (1, "Idx_o / gnt_o do not match."); + + req0 : assert property( + @(posedge clk_i) disable iff (!rst_ni) |req_i |-> req_o) + else $fatal (1, "Req in implies req out."); + + req1 : assert property( + @(posedge clk_i) disable iff (!rst_ni) |req_o |-> req_i) + else $fatal (1, "Req out implies req in."); + `endif + // pragma translate_on + end + +endmodule : rr_arb_tree diff --git a/vendor/pulp-platform/fpnew/src/common_cells/src/rstgen.sv b/vendor/pulp-platform/fpnew/src/common_cells/src/rstgen.sv new file mode 100644 index 0000000000..a7dccc63b0 --- /dev/null +++ b/vendor/pulp-platform/fpnew/src/common_cells/src/rstgen.sv @@ -0,0 +1,30 @@ +// Copyright 2018 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +// Davide Rossi + +module rstgen ( + input logic clk_i, + input logic rst_ni, + input logic test_mode_i, + output logic rst_no, + output logic init_no +); + + rstgen_bypass i_rstgen_bypass ( + .clk_i ( clk_i ), + .rst_ni ( rst_ni ), + .rst_test_mode_ni ( rst_ni ), + .test_mode_i ( test_mode_i ), + .rst_no ( rst_no ), + .init_no ( init_no ) + ); + +endmodule diff --git a/vendor/pulp-platform/fpnew/src/common_cells/src/rstgen_bypass.sv b/vendor/pulp-platform/fpnew/src/common_cells/src/rstgen_bypass.sv new file mode 100644 index 0000000000..fc1bcfe729 --- /dev/null +++ b/vendor/pulp-platform/fpnew/src/common_cells/src/rstgen_bypass.sv @@ -0,0 +1,54 @@ +// Copyright 2018 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +// Florian Zaruba +// Description: This module is a reset synchronizer with a dedicated reset bypass pin for testmode reset. +// Pro Tip: The wise Dr. Schaffner recommends at least 4 registers! + +module rstgen_bypass #( + parameter NumRegs = 4 +) ( + input logic clk_i, + input logic rst_ni, + input logic rst_test_mode_ni, + input logic test_mode_i, + output logic rst_no, + output logic init_no +); + + // internal reset + logic rst_n; + + logic [NumRegs-1:0] synch_regs_q; + // bypass mode + always_comb begin + if (test_mode_i == 1'b0) begin + rst_n = rst_ni; + rst_no = synch_regs_q[NumRegs-1]; + init_no = synch_regs_q[NumRegs-1]; + end else begin + rst_n = rst_test_mode_ni; + rst_no = rst_test_mode_ni; + init_no = 1'b1; + end + end + + always @(posedge clk_i or negedge rst_n) begin + if (~rst_n) begin + synch_regs_q <= 0; + end else begin + synch_regs_q <= {synch_regs_q[NumRegs-2:0], 1'b1}; + end + end + + initial begin : p_assertions + if (NumRegs < 1) $fatal(1, "At least one register is required."); + end +endmodule diff --git a/vendor/pulp-platform/fpnew/src/common_cells/src/serial_deglitch.sv b/vendor/pulp-platform/fpnew/src/common_cells/src/serial_deglitch.sv new file mode 100644 index 0000000000..22f55a5977 --- /dev/null +++ b/vendor/pulp-platform/fpnew/src/common_cells/src/serial_deglitch.sv @@ -0,0 +1,50 @@ +// Copyright 2018 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +// Author: Florian Zaruba +// Description: Deglitches a serial line by taking multiple samples until +// asserting the output high/low. + +module serial_deglitch #( + parameter int unsigned SIZE = 4 +)( + input logic clk_i, // clock + input logic rst_ni, // asynchronous reset active low + input logic en_i, // enable + input logic d_i, // serial data in + output logic q_o // filtered data out +); + logic [SIZE-1:0] count_q; + logic q; + + always_ff @(posedge clk_i or negedge rst_ni) begin + if (~rst_ni) begin + count_q <= '0; + q <= 1'b0; + end else begin + if (en_i) begin + if (d_i == 1'b1 && count_q != SIZE[SIZE-1:0]) begin + count_q <= count_q + 1; + end else if (d_i == 1'b0 && count_q != SIZE[SIZE-1:0]) begin + count_q <= count_q - 1; + end + end + end + end + + // output process + always_comb begin + if (count_q == SIZE[SIZE-1:0]) begin + q_o = 1'b1; + end else if (count_q == 0) begin + q_o = 1'b0; + end + end +endmodule \ No newline at end of file diff --git a/vendor/pulp-platform/fpnew/src/common_cells/src/shift_reg.sv b/vendor/pulp-platform/fpnew/src/common_cells/src/shift_reg.sv new file mode 100644 index 0000000000..cbfab0e7ac --- /dev/null +++ b/vendor/pulp-platform/fpnew/src/common_cells/src/shift_reg.sv @@ -0,0 +1,53 @@ + +// Copyright 2018 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. +// +// Author: +// +// Description: Simple shift register for arbitrary depth and types + +module shift_reg #( + parameter type dtype = logic, + parameter int unsigned Depth = 1 +)( + input logic clk_i, // Clock + input logic rst_ni, // Asynchronous reset active low + input dtype d_i, + output dtype d_o +); + + // register of depth 0 is a wire + if (Depth == 0) begin + assign d_o = d_i; + // register of depth 1 is a simple register + end else if (Depth == 1) begin + always_ff @(posedge clk_i or negedge rst_ni) begin + if (~rst_ni) begin + d_o <= '0; + end else begin + d_o <= d_i; + end + end + // if depth is greater than 1 it becomes a shift register + end else if (Depth > 1) begin + dtype [Depth-1:0] reg_d, reg_q; + assign d_o = reg_q[Depth-1]; + assign reg_d = {reg_q[Depth-2:0], d_i}; + + always_ff @(posedge clk_i or negedge rst_ni) begin + if (~rst_ni) begin + reg_q <= '0; + end else begin + reg_q <= reg_d; + end + end + end + +endmodule diff --git a/vendor/pulp-platform/fpnew/src/common_cells/src/spill_register.sv b/vendor/pulp-platform/fpnew/src/common_cells/src/spill_register.sv new file mode 100644 index 0000000000..6a99c89e54 --- /dev/null +++ b/vendor/pulp-platform/fpnew/src/common_cells/src/spill_register.sv @@ -0,0 +1,89 @@ +// Copyright 2018 ETH Zurich and University of Bologna. +// +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. +// +// Fabian Schuiki + + +/// A register with handshakes that completely cuts any combinational paths +/// between the input and output. +module spill_register #( + parameter type T = logic +)( + input logic clk_i , + input logic rst_ni , + input logic valid_i , + output logic ready_o , + input T data_i , + output logic valid_o , + input logic ready_i , + output T data_o +); + + // The A register. + T a_data_q; + logic a_full_q; + logic a_fill, a_drain; + logic a_en, a_en_data; + + always_ff @(posedge clk_i or negedge rst_ni) begin : ps_a_data + if (!rst_ni) + a_data_q <= '0; + else if (a_fill) + a_data_q <= data_i; + end + + always_ff @(posedge clk_i or negedge rst_ni) begin : ps_a_full + if (!rst_ni) + a_full_q <= 0; + else if (a_fill || a_drain) + a_full_q <= a_fill; + end + + // The B register. + T b_data_q; + logic b_full_q; + logic b_fill, b_drain; + + always_ff @(posedge clk_i or negedge rst_ni) begin : ps_b_data + if (!rst_ni) + b_data_q <= '0; + else if (b_fill) + b_data_q <= a_data_q; + end + + always_ff @(posedge clk_i or negedge rst_ni) begin : ps_b_full + if (!rst_ni) + b_full_q <= 0; + else if (b_fill || b_drain) + b_full_q <= b_fill; + end + + // Fill the A register when the A or B register is empty. Drain the A register + // whenever it is full and being filled. + assign a_fill = valid_i && ready_o; + assign a_drain = a_full_q && !b_full_q; + + // Fill the B register whenever the A register is drained, but the downstream + // circuit is not ready. Drain the B register whenever it is full and the + // downstream circuit is ready. + assign b_fill = a_drain && !ready_i; + assign b_drain = b_full_q && ready_i; + + // We can accept input as long as register B is not full. + assign ready_o = !a_full_q || !b_full_q; + + // The unit provides output as long as one of the registers is filled. + assign valid_o = a_full_q | b_full_q; + + // We empty the spill register before the slice register. + assign data_o = b_full_q ? b_data_q : a_data_q; + +endmodule diff --git a/vendor/pulp-platform/fpnew/src/common_cells/src/sram.sv b/vendor/pulp-platform/fpnew/src/common_cells/src/sram.sv new file mode 100644 index 0000000000..fca1372bfe --- /dev/null +++ b/vendor/pulp-platform/fpnew/src/common_cells/src/sram.sv @@ -0,0 +1,46 @@ +// Copyright 2017, 2018 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. +// +// Date: 13.10.2017 +// Description: SRAM Behavioral Model + +module sram #( + int unsigned DATA_WIDTH = 64, + int unsigned NUM_WORDS = 1024 +)( + input logic clk_i, + + input logic req_i, + input logic we_i, + input logic [$clog2(NUM_WORDS)-1:0] addr_i, + input logic [DATA_WIDTH-1:0] wdata_i, + input logic [DATA_WIDTH-1:0] be_i, + output logic [DATA_WIDTH-1:0] rdata_o +); + localparam ADDR_WIDTH = $clog2(NUM_WORDS); + + logic [DATA_WIDTH-1:0] ram [NUM_WORDS-1:0]; + logic [ADDR_WIDTH-1:0] raddr_q; + + // 1. randomize array + // 2. randomize output when no request is active + always_ff @(posedge clk_i) begin + if (req_i) begin + if (!we_i) + raddr_q <= addr_i; + else + for (int i = 0; i < DATA_WIDTH; i++) + if (be_i[i]) ram[addr_i][i] <= wdata_i[i]; + end + end + + assign rdata_o = ram[raddr_q]; + +endmodule diff --git a/vendor/pulp-platform/fpnew/src/common_cells/src/stream_arbiter.sv b/vendor/pulp-platform/fpnew/src/common_cells/src/stream_arbiter.sv new file mode 100644 index 0000000000..c8ca2a8769 --- /dev/null +++ b/vendor/pulp-platform/fpnew/src/common_cells/src/stream_arbiter.sv @@ -0,0 +1,49 @@ +// Copyright 2018 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +// Stream arbiter: Arbitrates a parametrizable number of input streams (i.e., valid-ready +// handshaking with dependency rules as in AXI4) to a single output stream. Once `oup_valid_o` is +// asserted, `oup_data_o` remains invariant until the output handshake has occurred. The +// arbitration scheme is round-robin with "look ahead", see the `rrarbiter` for details. + +module stream_arbiter #( + parameter type DATA_T = logic, // Vivado requires a default value for type parameters. + parameter integer N_INP = -1, // Synopsys DC requires a default value for parameters. + parameter ARBITER = "rr" // "rr" or "prio" +) ( + input logic clk_i, + input logic rst_ni, + + input DATA_T [N_INP-1:0] inp_data_i, + input logic [N_INP-1:0] inp_valid_i, + output logic [N_INP-1:0] inp_ready_o, + + output DATA_T oup_data_o, + output logic oup_valid_o, + input logic oup_ready_i +); + + stream_arbiter_flushable #( + .DATA_T (DATA_T), + .N_INP (N_INP), + .ARBITER (ARBITER) + ) i_arb ( + .clk_i (clk_i), + .rst_ni (rst_ni), + .flush_i (1'b0), + .inp_data_i (inp_data_i), + .inp_valid_i (inp_valid_i), + .inp_ready_o (inp_ready_o), + .oup_data_o (oup_data_o), + .oup_valid_o (oup_valid_o), + .oup_ready_i (oup_ready_i) + ); + +endmodule diff --git a/vendor/pulp-platform/fpnew/src/common_cells/src/stream_arbiter_flushable.sv b/vendor/pulp-platform/fpnew/src/common_cells/src/stream_arbiter_flushable.sv new file mode 100644 index 0000000000..fd1411732a --- /dev/null +++ b/vendor/pulp-platform/fpnew/src/common_cells/src/stream_arbiter_flushable.sv @@ -0,0 +1,80 @@ +// Copyright 2018 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +// Stream arbiter: Arbitrates a parametrizable number of input streams (i.e., valid-ready +// handshaking with dependency rules as in AXI4) to a single output stream. Once `oup_valid_o` is +// asserted, `oup_data_o` remains invariant until the output handshake has occurred. The +// arbitration scheme is fair round-robin tree, see `rr_arb_tree` for details. + +module stream_arbiter_flushable #( + parameter type DATA_T = logic, // Vivado requires a default value for type parameters. + parameter integer N_INP = -1, // Synopsys DC requires a default value for parameters. + parameter ARBITER = "rr" // "rr" or "prio" +) ( + input logic clk_i, + input logic rst_ni, + input logic flush_i, + + input DATA_T [N_INP-1:0] inp_data_i, + input logic [N_INP-1:0] inp_valid_i, + output logic [N_INP-1:0] inp_ready_o, + + output DATA_T oup_data_o, + output logic oup_valid_o, + input logic oup_ready_i +); + + if (ARBITER == "rr") begin : gen_rr_arb + rr_arb_tree #( + .NumIn (N_INP), + .DataType (DATA_T), + .ExtPrio (1'b0), + .AxiVldRdy (1'b1), + .LockIn (1'b1) + ) i_arbiter ( + .clk_i, + .rst_ni, + .flush_i, + .rr_i ('0), + .req_i (inp_valid_i), + .gnt_o (inp_ready_o), + .data_i (inp_data_i), + .gnt_i (oup_ready_i), + .req_o (oup_valid_o), + .data_o (oup_data_o), + .idx_o () + ); + + end else if (ARBITER == "prio") begin : gen_prio_arb + rr_arb_tree #( + .NumIn (N_INP), + .DataType (DATA_T), + .ExtPrio (1'b1), + .AxiVldRdy (1'b1), + .LockIn (1'b1) + ) i_arbiter ( + .clk_i, + .rst_ni, + .flush_i, + .rr_i ('0), + .req_i (inp_valid_i), + .gnt_o (inp_ready_o), + .data_i (inp_data_i), + .gnt_i (oup_ready_i), + .req_o (oup_valid_o), + .data_o (oup_data_o), + .idx_o () + ); + + end else begin : gen_arb_error + $fatal(1, "Invalid value for parameter 'ARBITER'!"); + end + +endmodule diff --git a/vendor/pulp-platform/fpnew/src/common_cells/src/stream_delay.sv b/vendor/pulp-platform/fpnew/src/common_cells/src/stream_delay.sv new file mode 100644 index 0000000000..e0b6b01fb2 --- /dev/null +++ b/vendor/pulp-platform/fpnew/src/common_cells/src/stream_delay.sv @@ -0,0 +1,132 @@ +// Copyright 2018 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +// Author: Florian Zaruba, zarubaf@iis.ee.ethz.ch +// Description: Delay (or randomize) AXI-like handshaking + +module stream_delay #( + parameter bit StallRandom = 0, + parameter int FixedDelay = 1, + parameter type payload_t = logic +)( + input logic clk_i, + input logic rst_ni, + + input payload_t payload_i, + output logic ready_o, + input logic valid_i, + + output payload_t payload_o, + input logic ready_i, + output logic valid_o +); + + if (FixedDelay == 0 && !StallRandom) begin : pass_through + assign ready_o = ready_i; + assign valid_o = valid_i; + assign payload_o = payload_i; + end else begin + + localparam COUNTER_BITS = 4; + + typedef enum logic [1:0] { + Idle, Valid, Ready + } state_e; + + state_e state_d, state_q; + + logic load; + logic [3:0] count_out; + logic en; + + logic [COUNTER_BITS-1:0] counter_load; + + assign payload_o = payload_i; + + always_comb begin + state_d = state_q; + valid_o = 1'b0; + ready_o = 1'b0; + load = 1'b0; + en = 1'b0; + + unique case (state_q) + Idle: begin + if (valid_i) begin + load = 1'b1; + state_d = Valid; + // Just one cycle delay + if (FixedDelay == 1 || (StallRandom && counter_load == 1)) begin + state_d = Ready; + end + + if (StallRandom && counter_load == 0) begin + valid_o = 1'b1; + ready_o = ready_i; + if (ready_i) state_d = Idle; + else state_d = Ready; + end + end + end + Valid: begin + en = 1'b1; + if (count_out == 0) begin + state_d = Ready; + end + end + + Ready: begin + valid_o = 1'b1; + ready_o = ready_i; + if (ready_i) state_d = Idle; + end + default : /* default */; + endcase + + end + + if (StallRandom) begin : random_stall + lfsr_16bit #( + .WIDTH ( 16 ) + ) i_lfsr_16bit ( + .clk_i ( clk_i ), + .rst_ni ( rst_ni ), + .en_i ( load ), + .refill_way_oh ( ), + .refill_way_bin ( counter_load ) + ); + end else begin + assign counter_load = FixedDelay; + end + + counter #( + .WIDTH ( COUNTER_BITS ) + ) i_counter ( + .clk_i ( clk_i ), + .rst_ni ( rst_ni ), + .clear_i ( 1'b0 ), + .en_i ( en ), + .load_i ( load ), + .down_i ( 1'b1 ), + .d_i ( counter_load ), + .q_o ( count_out ), + .overflow_o ( ) + ); + + always_ff @(posedge clk_i or negedge rst_ni) begin + if (~rst_ni) begin + state_q <= Idle; + end else begin + state_q <= state_d; + end + end + end + +endmodule diff --git a/vendor/pulp-platform/fpnew/src/common_cells/src/stream_demux.sv b/vendor/pulp-platform/fpnew/src/common_cells/src/stream_demux.sv new file mode 100644 index 0000000000..0d2fed2700 --- /dev/null +++ b/vendor/pulp-platform/fpnew/src/common_cells/src/stream_demux.sv @@ -0,0 +1,37 @@ +// Copyright 2018 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +/// Stream demultiplexer: Connects the input stream (valid-ready) handshake to one of `N_OUP` output +/// stream handshakes. +/// +/// This module has no data ports because stream data does not need to be demultiplexed: the data of +/// the input stream can just be applied at all output streams. + +module stream_demux #( + parameter integer N_OUP = 1, + /// Dependent parameters, DO NOT OVERRIDE! + localparam integer LOG_N_OUP = $clog2(N_OUP) +) ( + input logic inp_valid_i, + output logic inp_ready_o, + + input logic [LOG_N_OUP-1:0] oup_sel_i, + + output logic [N_OUP-1:0] oup_valid_o, + input logic [N_OUP-1:0] oup_ready_i +); + + always_comb begin + oup_valid_o = '0; + oup_valid_o[oup_sel_i] = inp_valid_i; + end + assign inp_ready_o = oup_ready_i[oup_sel_i]; + +endmodule diff --git a/vendor/pulp-platform/fpnew/src/common_cells/src/stream_filter.sv b/vendor/pulp-platform/fpnew/src/common_cells/src/stream_filter.sv new file mode 100644 index 0000000000..52a5835e77 --- /dev/null +++ b/vendor/pulp-platform/fpnew/src/common_cells/src/stream_filter.sv @@ -0,0 +1,26 @@ +// Copyright 2018 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +// Stream filter: If `drop_i` is `1`, signal `ready` to the upstream regardless of the downstream, +// and do not propagate `valid` downstream. Otherwise, connect upstream to downstream. +module stream_filter ( + input logic valid_i, + output logic ready_o, + + input logic drop_i, + + output logic valid_o, + input logic ready_i +); + + assign valid_o = drop_i ? 1'b0 : valid_i; + assign ready_o = drop_i ? 1'b1 : ready_i; + +endmodule diff --git a/vendor/pulp-platform/fpnew/src/common_cells/src/stream_fork.sv b/vendor/pulp-platform/fpnew/src/common_cells/src/stream_fork.sv new file mode 100644 index 0000000000..aebb0f5d2b --- /dev/null +++ b/vendor/pulp-platform/fpnew/src/common_cells/src/stream_fork.sv @@ -0,0 +1,133 @@ +// Copyright 2018 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +// Stream fork: Connects the input stream (ready-valid) handshake to *all* of `N_OUP` output stream +// handshakes. For each input stream handshake, every output stream handshakes exactly once. The +// input stream only handshakes when all output streams have handshaked, but the output streams do +// not have to handshake simultaneously. +// +// This module has no data ports because stream data does not need to be forked: the data of the +// input stream can just be applied at all output streams. + +module stream_fork #( + parameter int unsigned N_OUP = 0 // Synopsys DC requires a default value for parameters. +) ( + input logic clk_i, + input logic rst_ni, + input logic valid_i, + output logic ready_o, + output logic [N_OUP-1:0] valid_o, + input logic [N_OUP-1:0] ready_i +); + + typedef enum logic {READY, WAIT} state_t; + + logic [N_OUP-1:0] oup_ready, + all_ones; + + state_t inp_state_d, inp_state_q; + + // Input control FSM + always_comb begin + // ready_o = 1'b0; + inp_state_d = inp_state_q; + + unique case (inp_state_q) + READY: begin + if (valid_i) begin + if (valid_o == all_ones && ready_i == all_ones) begin + // If handshake on all outputs, handshake on input. + ready_o = 1'b1; + end else begin + ready_o = 1'b0; + // Otherwise, wait for inputs that did not handshake yet. + inp_state_d = WAIT; + end + end else begin + ready_o = 1'b0; + end + end + WAIT: begin + if (valid_i && oup_ready == all_ones) begin + ready_o = 1'b1; + inp_state_d = READY; + end else begin + ready_o = 1'b0; + end + end + default: begin + inp_state_d = READY; + ready_o = 1'b0; + end + endcase + end + + always_ff @(posedge clk_i, negedge rst_ni) begin + if (!rst_ni) begin + inp_state_q <= READY; + end else begin + inp_state_q <= inp_state_d; + end + end + + // Output control FSM + for (genvar i = 0; i < N_OUP; i++) begin: gen_oup_state + state_t oup_state_d, oup_state_q; + + always_comb begin + oup_ready[i] = 1'b1; + valid_o[i] = 1'b0; + oup_state_d = oup_state_q; + + unique case (oup_state_q) + READY: begin + if (valid_i) begin + valid_o[i] = 1'b1; + if (ready_i[i]) begin // Output handshake + if (!ready_o) begin // No input handshake yet + oup_state_d = WAIT; + end + end else begin // No output handshake + oup_ready[i] = 1'b0; + end + end + end + WAIT: begin + if (valid_i && ready_o) begin // Input handshake + oup_state_d = READY; + end + end + default: begin + oup_state_d = READY; + end + endcase + end + + always_ff @(posedge clk_i, negedge rst_ni) begin + if (!rst_ni) begin + oup_state_q <= READY; + end else begin + oup_state_q <= oup_state_d; + end + end + end + + assign all_ones = '1; // Synthesis fix for Vivado, which does not correctly compute the width + // of the '1 literal when assigned to a port of parametrized width. + +// pragma translate_off +`ifndef VERILATOR + initial begin: p_assertions + assert (N_OUP >= 1) else $fatal("Number of outputs must be at least 1!"); + end +`endif +// pragma translate_on + +endmodule diff --git a/vendor/pulp-platform/fpnew/src/common_cells/src/stream_mux.sv b/vendor/pulp-platform/fpnew/src/common_cells/src/stream_mux.sv new file mode 100644 index 0000000000..fe276075c0 --- /dev/null +++ b/vendor/pulp-platform/fpnew/src/common_cells/src/stream_mux.sv @@ -0,0 +1,46 @@ +// Copyright 2018 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +/// Stream multiplexer: connects the output to one of `N_INP` data streams with valid-ready +/// handshaking. + +module stream_mux #( + parameter type DATA_T = logic, // Vivado requires a default value for type parameters. + parameter integer N_INP = 0, // Synopsys DC requires a default value for value parameters. + /// Dependent parameters, DO NOT OVERRIDE! + localparam integer LOG_N_INP = $clog2(N_INP) +) ( + input DATA_T [N_INP-1:0] inp_data_i, + input logic [N_INP-1:0] inp_valid_i, + output logic [N_INP-1:0] inp_ready_o, + + input logic [LOG_N_INP-1:0] inp_sel_i, + + output DATA_T oup_data_o, + output logic oup_valid_o, + input logic oup_ready_i +); + + always_comb begin + inp_ready_o = '0; + inp_ready_o[inp_sel_i] = oup_ready_i; + end + assign oup_data_o = inp_data_i[inp_sel_i]; + assign oup_valid_o = inp_valid_i[inp_sel_i]; + +// pragma translate_off +`ifndef VERILATOR + initial begin: p_assertions + assert (N_INP >= 1) else $fatal ("The number of inputs must be at least 1!"); + end +`endif +// pragma translate_on + +endmodule diff --git a/vendor/pulp-platform/fpnew/src/common_cells/src/stream_register.sv b/vendor/pulp-platform/fpnew/src/common_cells/src/stream_register.sv new file mode 100644 index 0000000000..e83228b361 --- /dev/null +++ b/vendor/pulp-platform/fpnew/src/common_cells/src/stream_register.sv @@ -0,0 +1,57 @@ +// Copyright 2018 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +/// Register with a simple stream-like ready/valid handshake. +/// This register does not cut combinatorial paths on all control signals; if you need a complete +/// cut, use the `spill_register`. +module stream_register #( + parameter type T = logic // Vivado requires a default value for type parameters. +) ( + input logic clk_i, // Clock + input logic rst_ni, // Asynchronous active-low reset + input logic clr_i, // Synchronous clear + input logic testmode_i, // Test mode to bypass clock gating + // Input port + input logic valid_i, + output logic ready_o, + input T data_i, + // Output port + output logic valid_o, + input logic ready_i, + output T data_o +); + + logic fifo_empty, + fifo_full; + + fifo_v2 #( + .FALL_THROUGH (1'b0), + .DATA_WIDTH ($size(T)), + .DEPTH (1), + .dtype (T) + ) i_fifo ( + .clk_i (clk_i), + .rst_ni (rst_ni), + .flush_i (clr_i), + .testmode_i (testmode_i), + .full_o (fifo_full), + .empty_o (fifo_empty), + .alm_full_o ( ), + .alm_empty_o ( ), + .data_i (data_i), + .push_i (valid_i & ~fifo_full), + .data_o (data_o), + .pop_i (ready_i & ~fifo_empty) + ); + + assign ready_o = ~fifo_full; + assign valid_o = ~fifo_empty; + +endmodule diff --git a/vendor/pulp-platform/fpnew/src/common_cells/src/sync.sv b/vendor/pulp-platform/fpnew/src/common_cells/src/sync.sv new file mode 100644 index 0000000000..ff2ef5b5fc --- /dev/null +++ b/vendor/pulp-platform/fpnew/src/common_cells/src/sync.sv @@ -0,0 +1,34 @@ +// Copyright 2018 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +// Antonio Pullini + +module sync #( + parameter int unsigned STAGES = 2 +) ( + input logic clk_i, + input logic rst_ni, + input logic serial_i, + output logic serial_o +); + + logic [STAGES-1:0] reg_q; + + always_ff @(posedge clk_i, negedge rst_ni) begin + if (!rst_ni) begin + reg_q <= 'h0; + end else begin + reg_q <= {reg_q[STAGES-2:0], serial_i}; + end + end + + assign serial_o = reg_q[STAGES-1]; + +endmodule diff --git a/vendor/pulp-platform/fpnew/src/common_cells/src/sync_wedge.sv b/vendor/pulp-platform/fpnew/src/common_cells/src/sync_wedge.sv new file mode 100644 index 0000000000..58f1279808 --- /dev/null +++ b/vendor/pulp-platform/fpnew/src/common_cells/src/sync_wedge.sv @@ -0,0 +1,56 @@ +// Copyright 2018 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +// Antonio Pullini + +module sync_wedge #( + parameter int unsigned STAGES = 2 +) ( + input logic clk_i, + input logic rst_ni, + input logic en_i, + input logic serial_i, + output logic r_edge_o, + output logic f_edge_o, + output logic serial_o +); + logic clk; + logic serial, serial_q; + + assign serial_o = serial_q; + assign f_edge_o = (~serial) & serial_q; + assign r_edge_o = serial & (~serial_q); + + sync #( + .STAGES (STAGES) + ) i_sync ( + .clk_i, + .rst_ni, + .serial_i, + .serial_o ( serial ) + ); + + pulp_clock_gating i_pulp_clock_gating ( + .clk_i, + .en_i, + .test_en_i ( 1'b0 ), + .clk_o ( clk ) + ); + + always_ff @(posedge clk, negedge rst_ni) begin + if (!rst_ni) begin + serial_q <= 1'b0; + end else begin + if (en_i) begin + serial_q <= serial; + end + end + end +endmodule diff --git a/vendor/pulp-platform/fpnew/src/common_cells/src/unread.sv b/vendor/pulp-platform/fpnew/src/common_cells/src/unread.sv new file mode 100644 index 0000000000..80e7356237 --- /dev/null +++ b/vendor/pulp-platform/fpnew/src/common_cells/src/unread.sv @@ -0,0 +1,21 @@ +// Copyright 2018 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. +// +// Author: Florian Zaruba, ETH Zurich +// Date: 29.10.2018 +// Description: Dummy circuit to mitigate Open Pin warnings + +/* verilator lint_off UNUSED */ +module unread ( + input logic d_i +); + +endmodule +/* verilator lint_on UNUSED */ diff --git a/vendor/pulp-platform/fpnew/src/fpnew_cast_multi.sv b/vendor/pulp-platform/fpnew/src/fpnew_cast_multi.sv new file mode 100644 index 0000000000..9d54c79ed3 --- /dev/null +++ b/vendor/pulp-platform/fpnew/src/fpnew_cast_multi.sv @@ -0,0 +1,760 @@ +// Copyright 2019 ETH Zurich and University of Bologna. +// +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +// Author: Stefan Mach + +`include "common_cells/registers.svh" + +module fpnew_cast_multi #( + parameter fpnew_pkg::fmt_logic_t FpFmtConfig = '1, + parameter fpnew_pkg::ifmt_logic_t IntFmtConfig = '1, + // FPU configuration + parameter int unsigned NumPipeRegs = 0, + parameter fpnew_pkg::pipe_config_t PipeConfig = fpnew_pkg::BEFORE, + parameter type TagType = logic, + parameter type AuxType = logic, + // Do not change + localparam int unsigned WIDTH = fpnew_pkg::maximum(fpnew_pkg::max_fp_width(FpFmtConfig), + fpnew_pkg::max_int_width(IntFmtConfig)), + localparam int unsigned NUM_FORMATS = fpnew_pkg::NUM_FP_FORMATS +) ( + input logic clk_i, + input logic rst_ni, + // Input signals + input logic [WIDTH-1:0] operands_i, // 1 operand + input logic [NUM_FORMATS-1:0] is_boxed_i, // 1 operand + input fpnew_pkg::roundmode_e rnd_mode_i, + input fpnew_pkg::operation_e op_i, + input logic op_mod_i, + input fpnew_pkg::fp_format_e src_fmt_i, + input fpnew_pkg::fp_format_e dst_fmt_i, + input fpnew_pkg::int_format_e int_fmt_i, + input TagType tag_i, + input AuxType aux_i, + // Input Handshake + input logic in_valid_i, + output logic in_ready_o, + input logic flush_i, + // Output signals + output logic [WIDTH-1:0] result_o, + output fpnew_pkg::status_t status_o, + output logic extension_bit_o, + output TagType tag_o, + output AuxType aux_o, + // Output handshake + output logic out_valid_o, + input logic out_ready_i, + // Indication of valid data in flight + output logic busy_o +); + + // ---------- + // Constants + // ---------- + localparam int unsigned NUM_INT_FORMATS = fpnew_pkg::NUM_INT_FORMATS; + localparam int unsigned MAX_INT_WIDTH = fpnew_pkg::max_int_width(IntFmtConfig); + + localparam fpnew_pkg::fp_encoding_t SUPER_FORMAT = fpnew_pkg::super_format(FpFmtConfig); + + localparam int unsigned SUPER_EXP_BITS = SUPER_FORMAT.exp_bits; + localparam int unsigned SUPER_MAN_BITS = SUPER_FORMAT.man_bits; + localparam int unsigned SUPER_BIAS = 2**(SUPER_EXP_BITS - 1) - 1; + + // The internal mantissa includes normal bit or an entire integer + localparam int unsigned INT_MAN_WIDTH = fpnew_pkg::maximum(SUPER_MAN_BITS + 1, MAX_INT_WIDTH); + // If needed, there will be a LZC for renormalization + localparam int unsigned LZC_RESULT_WIDTH = $clog2(INT_MAN_WIDTH); + // The internal exponent must be able to represent the smallest denormal input value as signed + // or the number of bits in an integer + localparam int unsigned INT_EXP_WIDTH = fpnew_pkg::maximum($clog2(MAX_INT_WIDTH), + fpnew_pkg::maximum(SUPER_EXP_BITS, $clog2(SUPER_BIAS + SUPER_MAN_BITS))) + 1; + // Pipelines + localparam NUM_INP_REGS = PipeConfig == fpnew_pkg::BEFORE + ? NumPipeRegs + : (PipeConfig == fpnew_pkg::DISTRIBUTED + ? ((NumPipeRegs + 1) / 3) // Second to get distributed regs + : 0); // no regs here otherwise + localparam NUM_MID_REGS = PipeConfig == fpnew_pkg::INSIDE + ? NumPipeRegs + : (PipeConfig == fpnew_pkg::DISTRIBUTED + ? ((NumPipeRegs + 2) / 3) // First to get distributed regs + : 0); // no regs here otherwise + localparam NUM_OUT_REGS = PipeConfig == fpnew_pkg::AFTER + ? NumPipeRegs + : (PipeConfig == fpnew_pkg::DISTRIBUTED + ? (NumPipeRegs / 3) // Last to get distributed regs + : 0); // no regs here otherwise + + // --------------- + // Input pipeline + // --------------- + // Selected pipeline output signals as non-arrays + logic [WIDTH-1:0] operands_q; + logic [NUM_FORMATS-1:0] is_boxed_q; + logic op_mod_q; + fpnew_pkg::fp_format_e src_fmt_q; + fpnew_pkg::fp_format_e dst_fmt_q; + fpnew_pkg::int_format_e int_fmt_q; + + // Input pipeline signals, index i holds signal after i register stages + logic [0:NUM_INP_REGS][WIDTH-1:0] inp_pipe_operands_q; + logic [0:NUM_INP_REGS][NUM_FORMATS-1:0] inp_pipe_is_boxed_q; + fpnew_pkg::roundmode_e [0:NUM_INP_REGS] inp_pipe_rnd_mode_q; + fpnew_pkg::operation_e [0:NUM_INP_REGS] inp_pipe_op_q; + logic [0:NUM_INP_REGS] inp_pipe_op_mod_q; + fpnew_pkg::fp_format_e [0:NUM_INP_REGS] inp_pipe_src_fmt_q; + fpnew_pkg::fp_format_e [0:NUM_INP_REGS] inp_pipe_dst_fmt_q; + fpnew_pkg::int_format_e [0:NUM_INP_REGS] inp_pipe_int_fmt_q; + TagType [0:NUM_INP_REGS] inp_pipe_tag_q; + AuxType [0:NUM_INP_REGS] inp_pipe_aux_q; + logic [0:NUM_INP_REGS] inp_pipe_valid_q; + // Ready signal is combinatorial for all stages + logic [0:NUM_INP_REGS] inp_pipe_ready; + + // Input stage: First element of pipeline is taken from inputs + assign inp_pipe_operands_q[0] = operands_i; + assign inp_pipe_is_boxed_q[0] = is_boxed_i; + assign inp_pipe_rnd_mode_q[0] = rnd_mode_i; + assign inp_pipe_op_q[0] = op_i; + assign inp_pipe_op_mod_q[0] = op_mod_i; + assign inp_pipe_src_fmt_q[0] = src_fmt_i; + assign inp_pipe_dst_fmt_q[0] = dst_fmt_i; + assign inp_pipe_int_fmt_q[0] = int_fmt_i; + assign inp_pipe_tag_q[0] = tag_i; + assign inp_pipe_aux_q[0] = aux_i; + assign inp_pipe_valid_q[0] = in_valid_i; + // Input stage: Propagate pipeline ready signal to updtream circuitry + assign in_ready_o = inp_pipe_ready[0]; + // Generate the register stages + for (genvar i = 0; i < NUM_INP_REGS; i++) begin : gen_input_pipeline + // Internal register enable for this stage + logic reg_ena; + // Determine the ready signal of the current stage - advance the pipeline: + // 1. if the next stage is ready for our data + // 2. if the next stage only holds a bubble (not valid) -> we can pop it + assign inp_pipe_ready[i] = inp_pipe_ready[i+1] | ~inp_pipe_valid_q[i+1]; + // Valid: enabled by ready signal, synchronous clear with the flush signal + `FFLARNC(inp_pipe_valid_q[i+1], inp_pipe_valid_q[i], inp_pipe_ready[i], flush_i, 1'b0, clk_i, rst_ni) + // Enable register if pipleine ready and a valid data item is present + assign reg_ena = inp_pipe_ready[i] & inp_pipe_valid_q[i]; + // Generate the pipeline registers within the stages, use enable-registers + `FFL(inp_pipe_operands_q[i+1], inp_pipe_operands_q[i], reg_ena, '0) + `FFL(inp_pipe_is_boxed_q[i+1], inp_pipe_is_boxed_q[i], reg_ena, '0) + `FFL(inp_pipe_rnd_mode_q[i+1], inp_pipe_rnd_mode_q[i], reg_ena, fpnew_pkg::RNE) + `FFL(inp_pipe_op_q[i+1], inp_pipe_op_q[i], reg_ena, fpnew_pkg::FMADD) + `FFL(inp_pipe_op_mod_q[i+1], inp_pipe_op_mod_q[i], reg_ena, '0) + `FFL(inp_pipe_src_fmt_q[i+1], inp_pipe_src_fmt_q[i], reg_ena, fpnew_pkg::fp_format_e'(0)) + `FFL(inp_pipe_dst_fmt_q[i+1], inp_pipe_dst_fmt_q[i], reg_ena, fpnew_pkg::fp_format_e'(0)) + `FFL(inp_pipe_int_fmt_q[i+1], inp_pipe_int_fmt_q[i], reg_ena, fpnew_pkg::int_format_e'(0)) + `FFL(inp_pipe_tag_q[i+1], inp_pipe_tag_q[i], reg_ena, TagType'('0)) + `FFL(inp_pipe_aux_q[i+1], inp_pipe_aux_q[i], reg_ena, AuxType'('0)) + end + // Output stage: assign selected pipe outputs to signals for later use + assign operands_q = inp_pipe_operands_q[NUM_INP_REGS]; + assign is_boxed_q = inp_pipe_is_boxed_q[NUM_INP_REGS]; + assign op_mod_q = inp_pipe_op_mod_q[NUM_INP_REGS]; + assign src_fmt_q = inp_pipe_src_fmt_q[NUM_INP_REGS]; + assign dst_fmt_q = inp_pipe_dst_fmt_q[NUM_INP_REGS]; + assign int_fmt_q = inp_pipe_int_fmt_q[NUM_INP_REGS]; + + // ----------------- + // Input processing + // ----------------- + logic src_is_int, dst_is_int; // if 0, it's a float + + assign src_is_int = (inp_pipe_op_q[NUM_INP_REGS] == fpnew_pkg::I2F); + assign dst_is_int = (inp_pipe_op_q[NUM_INP_REGS] == fpnew_pkg::F2I); + + logic [INT_MAN_WIDTH-1:0] encoded_mant; // input mantissa with implicit bit + + logic [NUM_FORMATS-1:0] fmt_sign; + logic signed [NUM_FORMATS-1:0][INT_EXP_WIDTH-1:0] fmt_exponent; + logic [NUM_FORMATS-1:0][INT_MAN_WIDTH-1:0] fmt_mantissa; + logic signed [NUM_FORMATS-1:0][INT_EXP_WIDTH-1:0] fmt_shift_compensation; // for LZC + + fpnew_pkg::fp_info_t [NUM_FORMATS-1:0] info; + + logic [NUM_INT_FORMATS-1:0][INT_MAN_WIDTH-1:0] ifmt_input_val; + logic int_sign; + logic [INT_MAN_WIDTH-1:0] int_value, int_mantissa; + + // FP Input initialization + for (genvar fmt = 0; fmt < int'(NUM_FORMATS); fmt++) begin : fmt_init_inputs + // Set up some constants + localparam int unsigned FP_WIDTH = fpnew_pkg::fp_width(fpnew_pkg::fp_format_e'(fmt)); + localparam int unsigned EXP_BITS = fpnew_pkg::exp_bits(fpnew_pkg::fp_format_e'(fmt)); + localparam int unsigned MAN_BITS = fpnew_pkg::man_bits(fpnew_pkg::fp_format_e'(fmt)); + + if (FpFmtConfig[fmt]) begin : active_format + // Classify input + fpnew_classifier #( + .FpFormat ( fpnew_pkg::fp_format_e'(fmt) ), + .NumOperands ( 1 ) + ) i_fpnew_classifier ( + .operands_i ( operands_q[FP_WIDTH-1:0] ), + .is_boxed_i ( is_boxed_q[fmt] ), + .info_o ( info[fmt] ) + ); + + assign fmt_sign[fmt] = operands_q[FP_WIDTH-1]; + assign fmt_exponent[fmt] = signed'({1'b0, operands_q[MAN_BITS+:EXP_BITS]}); + assign fmt_mantissa[fmt] = {info[fmt].is_normal, operands_q[MAN_BITS-1:0]}; // zero pad + // Compensation for the difference in mantissa widths used for leading-zero count + assign fmt_shift_compensation[fmt] = signed'(INT_MAN_WIDTH - 1 - MAN_BITS); + end else begin : inactive_format + assign info[fmt] = '{default: fpnew_pkg::DONT_CARE}; // format disabled + assign fmt_sign[fmt] = fpnew_pkg::DONT_CARE; // format disabled + assign fmt_exponent[fmt] = '{default: fpnew_pkg::DONT_CARE}; // format disabled + assign fmt_mantissa[fmt] = '{default: fpnew_pkg::DONT_CARE}; // format disabled + assign fmt_shift_compensation[fmt] = '{default: fpnew_pkg::DONT_CARE}; // format disabled + end + end + + // Sign-extend INT input + for (genvar ifmt = 0; ifmt < int'(NUM_INT_FORMATS); ifmt++) begin : gen_sign_extend_int + // Set up some constants + localparam int unsigned INT_WIDTH = fpnew_pkg::int_width(fpnew_pkg::int_format_e'(ifmt)); + + if (IntFmtConfig[ifmt]) begin : active_format // only active formats + always_comb begin : sign_ext_input + // sign-extend value only if it's signed + ifmt_input_val[ifmt] = '{default: operands_q[INT_WIDTH-1] & ~op_mod_q}; + ifmt_input_val[ifmt][INT_WIDTH-1:0] = operands_q[INT_WIDTH-1:0]; + end + end else begin : inactive_format + assign ifmt_input_val[ifmt] = '{default: fpnew_pkg::DONT_CARE}; // format disabled + end + end + + // Construct input mantissa from integer + assign int_value = ifmt_input_val[int_fmt_q]; + assign int_sign = int_value[INT_MAN_WIDTH-1] & ~op_mod_q; // only signed ints are negative + assign int_mantissa = int_sign ? unsigned'(-int_value) : int_value; // get magnitude of negative + + // select mantissa with source format + assign encoded_mant = src_is_int ? int_mantissa : fmt_mantissa[src_fmt_q]; + + // -------------- + // Normalization + // -------------- + logic signed [INT_EXP_WIDTH-1:0] src_bias; // src format bias + logic signed [INT_EXP_WIDTH-1:0] src_exp; // src format exponent (biased) + logic signed [INT_EXP_WIDTH-1:0] src_subnormal; // src is subnormal + logic signed [INT_EXP_WIDTH-1:0] src_offset; // src offset within mantissa + + assign src_bias = signed'(fpnew_pkg::bias(src_fmt_q)); + assign src_exp = fmt_exponent[src_fmt_q]; + assign src_subnormal = signed'({1'b0, info[src_fmt_q].is_subnormal}); + assign src_offset = fmt_shift_compensation[src_fmt_q]; + + logic input_sign; // input sign + logic signed [INT_EXP_WIDTH-1:0] input_exp; // unbiased true exponent + logic [INT_MAN_WIDTH-1:0] input_mant; // normalized input mantissa + logic mant_is_zero; // for integer zeroes + + logic signed [INT_EXP_WIDTH-1:0] fp_input_exp; + logic signed [INT_EXP_WIDTH-1:0] int_input_exp; + + // Input mantissa needs to be normalized + logic [LZC_RESULT_WIDTH-1:0] renorm_shamt; // renormalization shift amount + logic [LZC_RESULT_WIDTH:0] renorm_shamt_sgn; // signed form for calculations + + // Leading-zero counter is needed for renormalization + lzc #( + .WIDTH ( INT_MAN_WIDTH ), + .MODE ( 1 ) // MODE = 1 counts leading zeroes + ) i_lzc ( + .in_i ( encoded_mant ), + .cnt_o ( renorm_shamt ), + .empty_o ( mant_is_zero ) + ); + assign renorm_shamt_sgn = signed'({1'b0, renorm_shamt}); + + // Get the sign from the proper source + assign input_sign = src_is_int ? int_sign : fmt_sign[src_fmt_q]; + // Realign input mantissa, append zeroes if destination is wider + assign input_mant = encoded_mant << renorm_shamt; + // Unbias exponent and compensate for shift + assign fp_input_exp = signed'(src_exp + src_subnormal - src_bias - + renorm_shamt_sgn + src_offset); // compensate for shift + assign int_input_exp = signed'(INT_MAN_WIDTH - 1 - renorm_shamt_sgn); + + assign input_exp = src_is_int ? int_input_exp : fp_input_exp; + + logic signed [INT_EXP_WIDTH-1:0] destination_exp; // re-biased exponent for destination + + // Rebias the exponent + assign destination_exp = input_exp + signed'(fpnew_pkg::bias(dst_fmt_q)); + + // --------------- + // Internal pipeline + // --------------- + // Pipeline output signals as non-arrays + logic input_sign_q; + logic signed [INT_EXP_WIDTH-1:0] input_exp_q; + logic [INT_MAN_WIDTH-1:0] input_mant_q; + logic signed [INT_EXP_WIDTH-1:0] destination_exp_q; + logic src_is_int_q; + logic dst_is_int_q; + fpnew_pkg::fp_info_t info_q; + logic mant_is_zero_q; + logic op_mod_q2; + fpnew_pkg::roundmode_e rnd_mode_q; + fpnew_pkg::fp_format_e src_fmt_q2; + fpnew_pkg::fp_format_e dst_fmt_q2; + fpnew_pkg::int_format_e int_fmt_q2; + // Internal pipeline signals, index i holds signal after i register stages + + + logic [0:NUM_MID_REGS] mid_pipe_input_sign_q; + logic signed [0:NUM_MID_REGS][INT_EXP_WIDTH-1:0] mid_pipe_input_exp_q; + logic [0:NUM_MID_REGS][INT_MAN_WIDTH-1:0] mid_pipe_input_mant_q; + logic signed [0:NUM_MID_REGS][INT_EXP_WIDTH-1:0] mid_pipe_dest_exp_q; + logic [0:NUM_MID_REGS] mid_pipe_src_is_int_q; + logic [0:NUM_MID_REGS] mid_pipe_dst_is_int_q; + fpnew_pkg::fp_info_t [0:NUM_MID_REGS] mid_pipe_info_q; + logic [0:NUM_MID_REGS] mid_pipe_mant_zero_q; + logic [0:NUM_MID_REGS] mid_pipe_op_mod_q; + fpnew_pkg::roundmode_e [0:NUM_MID_REGS] mid_pipe_rnd_mode_q; + fpnew_pkg::fp_format_e [0:NUM_MID_REGS] mid_pipe_src_fmt_q; + fpnew_pkg::fp_format_e [0:NUM_MID_REGS] mid_pipe_dst_fmt_q; + fpnew_pkg::int_format_e [0:NUM_MID_REGS] mid_pipe_int_fmt_q; + TagType [0:NUM_MID_REGS] mid_pipe_tag_q; + AuxType [0:NUM_MID_REGS] mid_pipe_aux_q; + logic [0:NUM_MID_REGS] mid_pipe_valid_q; + // Ready signal is combinatorial for all stages + logic [0:NUM_MID_REGS] mid_pipe_ready; + + // Input stage: First element of pipeline is taken from upstream logic + assign mid_pipe_input_sign_q[0] = input_sign; + assign mid_pipe_input_exp_q[0] = input_exp; + assign mid_pipe_input_mant_q[0] = input_mant; + assign mid_pipe_dest_exp_q[0] = destination_exp; + assign mid_pipe_src_is_int_q[0] = src_is_int; + assign mid_pipe_dst_is_int_q[0] = dst_is_int; + assign mid_pipe_info_q[0] = info[src_fmt_q]; + assign mid_pipe_mant_zero_q[0] = mant_is_zero; + assign mid_pipe_op_mod_q[0] = op_mod_q; + assign mid_pipe_rnd_mode_q[0] = inp_pipe_rnd_mode_q[NUM_INP_REGS]; + assign mid_pipe_src_fmt_q[0] = src_fmt_q; + assign mid_pipe_dst_fmt_q[0] = dst_fmt_q; + assign mid_pipe_int_fmt_q[0] = int_fmt_q; + assign mid_pipe_tag_q[0] = inp_pipe_tag_q[NUM_INP_REGS]; + assign mid_pipe_aux_q[0] = inp_pipe_aux_q[NUM_INP_REGS]; + assign mid_pipe_valid_q[0] = inp_pipe_valid_q[NUM_INP_REGS]; + // Input stage: Propagate pipeline ready signal to input pipe + assign inp_pipe_ready[NUM_INP_REGS] = mid_pipe_ready[0]; + + // Generate the register stages + for (genvar i = 0; i < NUM_MID_REGS; i++) begin : gen_inside_pipeline + // Internal register enable for this stage + logic reg_ena; + // Determine the ready signal of the current stage - advance the pipeline: + // 1. if the next stage is ready for our data + // 2. if the next stage only holds a bubble (not valid) -> we can pop it + assign mid_pipe_ready[i] = mid_pipe_ready[i+1] | ~mid_pipe_valid_q[i+1]; + // Valid: enabled by ready signal, synchronous clear with the flush signal + `FFLARNC(mid_pipe_valid_q[i+1], mid_pipe_valid_q[i], mid_pipe_ready[i], flush_i, 1'b0, clk_i, rst_ni) + // Enable register if pipleine ready and a valid data item is present + assign reg_ena = mid_pipe_ready[i] & mid_pipe_valid_q[i]; + // Generate the pipeline registers within the stages, use enable-registers + `FFL(mid_pipe_input_sign_q[i+1], mid_pipe_input_sign_q[i], reg_ena, '0) + `FFL(mid_pipe_input_exp_q[i+1], mid_pipe_input_exp_q[i], reg_ena, '0) + `FFL(mid_pipe_input_mant_q[i+1], mid_pipe_input_mant_q[i], reg_ena, '0) + `FFL(mid_pipe_dest_exp_q[i+1], mid_pipe_dest_exp_q[i], reg_ena, '0) + `FFL(mid_pipe_src_is_int_q[i+1], mid_pipe_src_is_int_q[i], reg_ena, '0) + `FFL(mid_pipe_dst_is_int_q[i+1], mid_pipe_dst_is_int_q[i], reg_ena, '0) + `FFL(mid_pipe_info_q[i+1], mid_pipe_info_q[i], reg_ena, '0) + `FFL(mid_pipe_mant_zero_q[i+1], mid_pipe_mant_zero_q[i], reg_ena, '0) + `FFL(mid_pipe_op_mod_q[i+1], mid_pipe_op_mod_q[i], reg_ena, '0) + `FFL(mid_pipe_rnd_mode_q[i+1], mid_pipe_rnd_mode_q[i], reg_ena, fpnew_pkg::RNE) + `FFL(mid_pipe_src_fmt_q[i+1], mid_pipe_src_fmt_q[i], reg_ena, fpnew_pkg::fp_format_e'(0)) + `FFL(mid_pipe_dst_fmt_q[i+1], mid_pipe_dst_fmt_q[i], reg_ena, fpnew_pkg::fp_format_e'(0)) + `FFL(mid_pipe_int_fmt_q[i+1], mid_pipe_int_fmt_q[i], reg_ena, fpnew_pkg::int_format_e'(0)) + `FFL(mid_pipe_tag_q[i+1], mid_pipe_tag_q[i], reg_ena, TagType'('0)) + `FFL(mid_pipe_aux_q[i+1], mid_pipe_aux_q[i], reg_ena, AuxType'('0)) + end + // Output stage: assign selected pipe outputs to signals for later use + assign input_sign_q = mid_pipe_input_sign_q[NUM_MID_REGS]; + assign input_exp_q = mid_pipe_input_exp_q[NUM_MID_REGS]; + assign input_mant_q = mid_pipe_input_mant_q[NUM_MID_REGS]; + assign destination_exp_q = mid_pipe_dest_exp_q[NUM_MID_REGS]; + assign src_is_int_q = mid_pipe_src_is_int_q[NUM_MID_REGS]; + assign dst_is_int_q = mid_pipe_dst_is_int_q[NUM_MID_REGS]; + assign info_q = mid_pipe_info_q[NUM_MID_REGS]; + assign mant_is_zero_q = mid_pipe_mant_zero_q[NUM_MID_REGS]; + assign op_mod_q2 = mid_pipe_op_mod_q[NUM_MID_REGS]; + assign rnd_mode_q = mid_pipe_rnd_mode_q[NUM_MID_REGS]; + assign src_fmt_q2 = mid_pipe_src_fmt_q[NUM_MID_REGS]; + assign dst_fmt_q2 = mid_pipe_dst_fmt_q[NUM_MID_REGS]; + assign int_fmt_q2 = mid_pipe_int_fmt_q[NUM_MID_REGS]; + + // -------- + // Casting + // -------- + logic [INT_EXP_WIDTH-1:0] final_exp; // after eventual adjustments + + logic [2*INT_MAN_WIDTH:0] preshift_mant; // mantissa before final shift + logic [2*INT_MAN_WIDTH:0] destination_mant; // mantissa from shifter, with rnd bit + logic [SUPER_MAN_BITS-1:0] final_mant; // mantissa after adjustments + logic [MAX_INT_WIDTH-1:0] final_int; // integer shifted in position + + logic [$clog2(INT_MAN_WIDTH+1)-1:0] denorm_shamt; // shift amount for denormalization + + logic [1:0] fp_round_sticky_bits, int_round_sticky_bits, round_sticky_bits; + logic of_before_round, uf_before_round; + + + // Perform adjustments to mantissa and exponent + always_comb begin : cast_value + // Default assignment + final_exp = unsigned'(destination_exp_q); // take exponent as is, only look at lower bits + preshift_mant = '0; // initialize mantissa container with zeroes + denorm_shamt = SUPER_MAN_BITS - fpnew_pkg::man_bits(dst_fmt_q2); // right of mantissa + of_before_round = 1'b0; + uf_before_round = 1'b0; + + // Place mantissa to the left of the shifter + preshift_mant = input_mant_q << (INT_MAN_WIDTH + 1); + + // Handle INT casts + if (dst_is_int_q) begin + // By default right shift mantissa to be an integer + denorm_shamt = unsigned'(MAX_INT_WIDTH - 1 - input_exp_q); + // overflow: when converting to unsigned the range is larger by one + if (input_exp_q >= signed'(fpnew_pkg::int_width(int_fmt_q2) - 1 + op_mod_q2)) begin + denorm_shamt = '0; // prevent shifting + of_before_round = 1'b1; + // underflow + end else if (input_exp_q < -1) begin + denorm_shamt = MAX_INT_WIDTH + 1; // all bits go to the sticky + uf_before_round = 1'b1; + end + // Handle FP over-/underflows + end else begin + // Overflow or infinities (for proper rounding) + if ((destination_exp_q >= signed'(2**fpnew_pkg::exp_bits(dst_fmt_q2))-1) || + (~src_is_int_q && info_q.is_inf)) begin + final_exp = unsigned'(2**fpnew_pkg::exp_bits(dst_fmt_q2)-2); // largest normal value + preshift_mant = '1; // largest normal value and RS bits set + of_before_round = 1'b1; + // Denormalize underflowing values + end else if (destination_exp_q < 1 && + destination_exp_q >= -signed'(fpnew_pkg::man_bits(dst_fmt_q2))) begin + final_exp = '0; // denormal result + denorm_shamt = unsigned'(denorm_shamt + 1 - destination_exp_q); // adjust right shifting + uf_before_round = 1'b1; + // Limit the shift to retain sticky bits + end else if (destination_exp_q < -signed'(fpnew_pkg::man_bits(dst_fmt_q2))) begin + final_exp = '0; // denormal result + denorm_shamt = unsigned'(denorm_shamt + 2 + fpnew_pkg::man_bits(dst_fmt_q2)); // to sticky + uf_before_round = 1'b1; + end + end + end + + localparam NUM_FP_STICKY = 2 * INT_MAN_WIDTH - SUPER_MAN_BITS - 1; // removed mantissa, 1. and R + localparam NUM_INT_STICKY = 2 * INT_MAN_WIDTH - MAX_INT_WIDTH; // removed int and R + + // Mantissa adjustment shift + assign destination_mant = preshift_mant >> denorm_shamt; + // Extract final mantissa and round bit, discard the normal bit (for FP) + assign {final_mant, fp_round_sticky_bits[1]} = + destination_mant[2*INT_MAN_WIDTH-1-:SUPER_MAN_BITS+1]; + assign {final_int, int_round_sticky_bits[1]} = destination_mant[2*INT_MAN_WIDTH-:MAX_INT_WIDTH+1]; + // Collapse sticky bits + assign fp_round_sticky_bits[0] = (| {destination_mant[NUM_FP_STICKY-1:0]}); + assign int_round_sticky_bits[0] = (| {destination_mant[NUM_INT_STICKY-1:0]}); + + // select RS bits for destination operation + assign round_sticky_bits = dst_is_int_q ? int_round_sticky_bits : fp_round_sticky_bits; + + // ---------------------------- + // Rounding and classification + // ---------------------------- + logic [WIDTH-1:0] pre_round_abs; // absolute value of result before rnd + logic of_after_round; // overflow + logic uf_after_round; // underflow + + logic [NUM_FORMATS-1:0][WIDTH-1:0] fmt_pre_round_abs; // per format + logic [NUM_FORMATS-1:0] fmt_of_after_round; + logic [NUM_FORMATS-1:0] fmt_uf_after_round; + + logic [NUM_INT_FORMATS-1:0][WIDTH-1:0] ifmt_pre_round_abs; // per format + + logic rounded_sign; + logic [WIDTH-1:0] rounded_abs; // absolute value of result after rounding + logic result_true_zero; + + logic [WIDTH-1:0] rounded_int_res; // after possible inversion + logic rounded_int_res_zero; // after rounding + + + // Pack exponent and mantissa into proper rounding form + for (genvar fmt = 0; fmt < int'(NUM_FORMATS); fmt++) begin : gen_res_assemble + // Set up some constants + localparam int unsigned EXP_BITS = fpnew_pkg::exp_bits(fpnew_pkg::fp_format_e'(fmt)); + localparam int unsigned MAN_BITS = fpnew_pkg::man_bits(fpnew_pkg::fp_format_e'(fmt)); + + if (FpFmtConfig[fmt]) begin : active_format + always_comb begin : assemble_result + fmt_pre_round_abs[fmt] = {final_exp[EXP_BITS-1:0], final_mant[MAN_BITS-1:0]}; // 0-extend + end + end else begin : inactive_format + assign fmt_pre_round_abs[fmt] = '{default: fpnew_pkg::DONT_CARE}; + end + end + + // Sign-extend integer result + for (genvar ifmt = 0; ifmt < int'(NUM_INT_FORMATS); ifmt++) begin : gen_int_res_sign_ext + // Set up some constants + localparam int unsigned INT_WIDTH = fpnew_pkg::int_width(fpnew_pkg::int_format_e'(ifmt)); + + if (IntFmtConfig[ifmt]) begin : active_format + always_comb begin : assemble_result + // sign-extend reusult + ifmt_pre_round_abs[ifmt] = '{default: final_int[INT_WIDTH-1]}; + ifmt_pre_round_abs[ifmt][INT_WIDTH-1:0] = final_int[INT_WIDTH-1:0]; + end + end else begin : inactive_format + assign ifmt_pre_round_abs[ifmt] = '{default: fpnew_pkg::DONT_CARE}; + end + end + + // Select output with destination format and operation + assign pre_round_abs = dst_is_int_q ? ifmt_pre_round_abs[int_fmt_q2] : fmt_pre_round_abs[dst_fmt_q2]; + + fpnew_rounding #( + .AbsWidth ( WIDTH ) + ) i_fpnew_rounding ( + .abs_value_i ( pre_round_abs ), + .sign_i ( input_sign_q ), // source format + .round_sticky_bits_i ( round_sticky_bits ), + .rnd_mode_i ( rnd_mode_q ), + .effective_subtraction_i ( 1'b0 ), // no operation happened + .abs_rounded_o ( rounded_abs ), + .sign_o ( rounded_sign ), + .exact_zero_o ( result_true_zero ) + ); + + logic [NUM_FORMATS-1:0][WIDTH-1:0] fmt_result; + + // Detect overflows and inject sign + for (genvar fmt = 0; fmt < int'(NUM_FORMATS); fmt++) begin : gen_sign_inject + // Set up some constants + localparam int unsigned FP_WIDTH = fpnew_pkg::fp_width(fpnew_pkg::fp_format_e'(fmt)); + localparam int unsigned EXP_BITS = fpnew_pkg::exp_bits(fpnew_pkg::fp_format_e'(fmt)); + localparam int unsigned MAN_BITS = fpnew_pkg::man_bits(fpnew_pkg::fp_format_e'(fmt)); + + if (FpFmtConfig[fmt]) begin : active_format + always_comb begin : post_process + // detect of / uf + fmt_uf_after_round[fmt] = rounded_abs[EXP_BITS+MAN_BITS-1:MAN_BITS] == '0; // denormal + fmt_of_after_round[fmt] = rounded_abs[EXP_BITS+MAN_BITS-1:MAN_BITS] == '1; // inf exp. + + // Assemble regular result, nan box short ones. Int zeroes need to be detected` + fmt_result[fmt] = '1; + fmt_result[fmt][FP_WIDTH-1:0] = src_is_int_q & mant_is_zero_q + ? '0 + : {rounded_sign, rounded_abs[EXP_BITS+MAN_BITS-1:0]}; + end + end else begin : inactive_format + assign fmt_uf_after_round[fmt] = fpnew_pkg::DONT_CARE; + assign fmt_of_after_round[fmt] = fpnew_pkg::DONT_CARE; + assign fmt_result[fmt] = '{default: fpnew_pkg::DONT_CARE}; + end + end + + // Classification after rounding select by destination format + assign uf_after_round = fmt_uf_after_round[dst_fmt_q2]; + assign of_after_round = fmt_of_after_round[dst_fmt_q2]; + + // Negative integer result needs to be brought into two's complement + assign rounded_int_res = rounded_sign ? unsigned'(-rounded_abs) : rounded_abs; + assign rounded_int_res_zero = (rounded_int_res == '0); + + // ------------------------- + // FP Special case handling + // ------------------------- + logic [WIDTH-1:0] fp_special_result; + fpnew_pkg::status_t fp_special_status; + logic fp_result_is_special; + + logic [NUM_FORMATS-1:0][WIDTH-1:0] fmt_special_result; + + // Special result construction + for (genvar fmt = 0; fmt < int'(NUM_FORMATS); fmt++) begin : gen_special_results + // Set up some constants + localparam int unsigned FP_WIDTH = fpnew_pkg::fp_width(fpnew_pkg::fp_format_e'(fmt)); + localparam int unsigned EXP_BITS = fpnew_pkg::exp_bits(fpnew_pkg::fp_format_e'(fmt)); + localparam int unsigned MAN_BITS = fpnew_pkg::man_bits(fpnew_pkg::fp_format_e'(fmt)); + + localparam logic [EXP_BITS-1:0] QNAN_EXPONENT = '1; + localparam logic [MAN_BITS-1:0] QNAN_MANTISSA = 2**(MAN_BITS-1); + + if (FpFmtConfig[fmt]) begin : active_format + always_comb begin : special_results + logic [FP_WIDTH-1:0] special_res; + special_res = info_q.is_zero + ? input_sign_q << FP_WIDTH-1 // signed zero + : {1'b0, QNAN_EXPONENT, QNAN_MANTISSA}; // qNaN + + // Initialize special result with ones (NaN-box) + fmt_special_result[fmt] = '1; + fmt_special_result[fmt][FP_WIDTH-1:0] = special_res; + end + end else begin : inactive_format + assign fmt_special_result[fmt] = '{default: fpnew_pkg::DONT_CARE}; + end + end + + // Detect special case from source format, I2F casts don't produce a special result + assign fp_result_is_special = ~src_is_int_q & (info_q.is_zero | + info_q.is_nan | + ~info_q.is_boxed); + + // Signalling input NaNs raise invalid flag, otherwise no flags set + assign fp_special_status = '{NV: info_q.is_signalling, default: 1'b0}; + + // Assemble result according to destination format + assign fp_special_result = fmt_special_result[dst_fmt_q2]; // destination format + + // -------------------------- + // INT Special case handling + // -------------------------- + logic [WIDTH-1:0] int_special_result; + fpnew_pkg::status_t int_special_status; + logic int_result_is_special; + + logic [NUM_INT_FORMATS-1:0][WIDTH-1:0] ifmt_special_result; + + // Special result construction + for (genvar ifmt = 0; ifmt < int'(NUM_INT_FORMATS); ifmt++) begin : gen_special_results_int + // Set up some constants + localparam int unsigned INT_WIDTH = fpnew_pkg::int_width(fpnew_pkg::int_format_e'(ifmt)); + + if (IntFmtConfig[ifmt]) begin : active_format + always_comb begin : special_results + automatic logic [INT_WIDTH-1:0] special_res; + + // Default is overflow to positive max, which is 2**INT_WIDTH-1 or 2**(INT_WIDTH-1)-1 + special_res[INT_WIDTH-2:0] = '1; // alone yields 2**(INT_WIDTH-1)-1 + special_res[INT_WIDTH-1] = op_mod_q2; // for unsigned casts yields 2**INT_WIDTH-1 + + // Negative special case (except for nans) tie to -max or 0 + if (input_sign_q && !info_q.is_nan) + special_res = ~special_res; + + // Initialize special result with sign-extension + ifmt_special_result[ifmt] = '{default: special_res[INT_WIDTH-1]}; + ifmt_special_result[ifmt][INT_WIDTH-1:0] = special_res; + end + end else begin : inactive_format + assign ifmt_special_result[ifmt] = '{default: fpnew_pkg::DONT_CARE}; + end + end + + // Detect special case from source format (inf, nan, overflow, nan-boxing or negative unsigned) + assign int_result_is_special = info_q.is_nan | info_q.is_inf | + of_before_round | ~info_q.is_boxed | + (input_sign_q & op_mod_q2 & ~rounded_int_res_zero); + + // All integer special cases are invalid + assign int_special_status = '{NV: 1'b1, default: 1'b0}; + + // Assemble result according to destination format + assign int_special_result = ifmt_special_result[int_fmt_q2]; // destination format + + // ----------------- + // Result selection + // ----------------- + fpnew_pkg::status_t int_regular_status, fp_regular_status; + + logic [WIDTH-1:0] fp_result, int_result; + fpnew_pkg::status_t fp_status, int_status; + + assign fp_regular_status.NV = src_is_int_q & (of_before_round | of_after_round); // overflow is invalid for I2F casts + assign fp_regular_status.DZ = 1'b0; // no divisions + assign fp_regular_status.OF = ~src_is_int_q & (~info_q.is_inf & (of_before_round | of_after_round)); // inf casts no OF + assign fp_regular_status.UF = uf_after_round & fp_regular_status.NX; + assign fp_regular_status.NX = src_is_int_q ? (| fp_round_sticky_bits) // overflow is invalid in i2f + : (| fp_round_sticky_bits) | (~info_q.is_inf & (of_before_round | of_after_round)); + assign int_regular_status = '{NX: (| int_round_sticky_bits), default: 1'b0}; + + assign fp_result = fp_result_is_special ? fp_special_result : fmt_result[dst_fmt_q2]; + assign fp_status = fp_result_is_special ? fp_special_status : fp_regular_status; + assign int_result = int_result_is_special ? int_special_result : rounded_int_res; + assign int_status = int_result_is_special ? int_special_status : int_regular_status; + + // Final results for output pipeline + logic [WIDTH-1:0] result_d; + fpnew_pkg::status_t status_d; + logic extension_bit; + + // Select output depending on special case detection + assign result_d = dst_is_int_q ? int_result : fp_result; + assign status_d = dst_is_int_q ? int_status : fp_status; + + // MSB of int result decides extension, otherwise NaN box + assign extension_bit = dst_is_int_q ? int_result[WIDTH-1] : 1'b1; + + // ---------------- + // Output Pipeline + // ---------------- + // Output pipeline signals, index i holds signal after i register stages + logic [0:NUM_OUT_REGS][WIDTH-1:0] out_pipe_result_q; + fpnew_pkg::status_t [0:NUM_OUT_REGS] out_pipe_status_q; + logic [0:NUM_OUT_REGS] out_pipe_ext_bit_q; + TagType [0:NUM_OUT_REGS] out_pipe_tag_q; + AuxType [0:NUM_OUT_REGS] out_pipe_aux_q; + logic [0:NUM_OUT_REGS] out_pipe_valid_q; + // Ready signal is combinatorial for all stages + logic [0:NUM_OUT_REGS] out_pipe_ready; + + // Input stage: First element of pipeline is taken from inputs + assign out_pipe_result_q[0] = result_d; + assign out_pipe_status_q[0] = status_d; + assign out_pipe_ext_bit_q[0] = extension_bit; + assign out_pipe_tag_q[0] = mid_pipe_tag_q[NUM_MID_REGS]; + assign out_pipe_aux_q[0] = mid_pipe_aux_q[NUM_MID_REGS]; + assign out_pipe_valid_q[0] = mid_pipe_valid_q[NUM_MID_REGS]; + // Input stage: Propagate pipeline ready signal to inside pipe + assign mid_pipe_ready[NUM_MID_REGS] = out_pipe_ready[0]; + // Generate the register stages + for (genvar i = 0; i < NUM_OUT_REGS; i++) begin : gen_output_pipeline + // Internal register enable for this stage + logic reg_ena; + // Determine the ready signal of the current stage - advance the pipeline: + // 1. if the next stage is ready for our data + // 2. if the next stage only holds a bubble (not valid) -> we can pop it + assign out_pipe_ready[i] = out_pipe_ready[i+1] | ~out_pipe_valid_q[i+1]; + // Valid: enabled by ready signal, synchronous clear with the flush signal + `FFLARNC(out_pipe_valid_q[i+1], out_pipe_valid_q[i], out_pipe_ready[i], flush_i, 1'b0, clk_i, rst_ni) + // Enable register if pipleine ready and a valid data item is present + assign reg_ena = out_pipe_ready[i] & out_pipe_valid_q[i]; + // Generate the pipeline registers within the stages, use enable-registers + `FFL(out_pipe_result_q[i+1], out_pipe_result_q[i], reg_ena, '0) + `FFL(out_pipe_status_q[i+1], out_pipe_status_q[i], reg_ena, '0) + `FFL(out_pipe_ext_bit_q[i+1], out_pipe_ext_bit_q[i], reg_ena, '0) + `FFL(out_pipe_tag_q[i+1], out_pipe_tag_q[i], reg_ena, TagType'('0)) + `FFL(out_pipe_aux_q[i+1], out_pipe_aux_q[i], reg_ena, AuxType'('0)) + end + // Output stage: Ready travels backwards from output side, driven by downstream circuitry + assign out_pipe_ready[NUM_OUT_REGS] = out_ready_i; + // Output stage: assign module outputs + assign result_o = out_pipe_result_q[NUM_OUT_REGS]; + assign status_o = out_pipe_status_q[NUM_OUT_REGS]; + assign extension_bit_o = out_pipe_ext_bit_q[NUM_OUT_REGS]; + assign tag_o = out_pipe_tag_q[NUM_OUT_REGS]; + assign aux_o = out_pipe_aux_q[NUM_OUT_REGS]; + assign out_valid_o = out_pipe_valid_q[NUM_OUT_REGS]; + assign busy_o = (| {inp_pipe_valid_q, mid_pipe_valid_q, out_pipe_valid_q}); +endmodule diff --git a/vendor/pulp-platform/fpnew/src/fpnew_classifier.sv b/vendor/pulp-platform/fpnew/src/fpnew_classifier.sv new file mode 100644 index 0000000000..5e4fab9304 --- /dev/null +++ b/vendor/pulp-platform/fpnew/src/fpnew_classifier.sv @@ -0,0 +1,72 @@ +// Copyright 2019 ETH Zurich and University of Bologna. +// +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +// Author: Stefan Mach + +module fpnew_classifier #( + parameter fpnew_pkg::fp_format_e FpFormat = fpnew_pkg::fp_format_e'(0), + parameter int unsigned NumOperands = 1, + // Do not change + localparam int unsigned WIDTH = fpnew_pkg::fp_width(FpFormat) +) ( + input logic [NumOperands-1:0][WIDTH-1:0] operands_i, + input logic [NumOperands-1:0] is_boxed_i, + output fpnew_pkg::fp_info_t [NumOperands-1:0] info_o +); + + localparam int unsigned EXP_BITS = fpnew_pkg::exp_bits(FpFormat); + localparam int unsigned MAN_BITS = fpnew_pkg::man_bits(FpFormat); + + // Type definition + typedef struct packed { + logic sign; + logic [EXP_BITS-1:0] exponent; + logic [MAN_BITS-1:0] mantissa; + } fp_t; + + // Iterate through all operands + for (genvar op = 0; op < int'(NumOperands); op++) begin : gen_num_values + + fp_t value; + logic is_boxed; + logic is_normal; + logic is_inf; + logic is_nan; + logic is_signalling; + logic is_quiet; + logic is_zero; + logic is_subnormal; + + // --------------- + // Classify Input + // --------------- + always_comb begin : classify_input + value = operands_i[op]; + is_boxed = is_boxed_i[op]; + is_normal = is_boxed && (value.exponent != '0) && (value.exponent != '1); + is_zero = is_boxed && (value.exponent == '0) && (value.mantissa == '0); + is_subnormal = is_boxed && (value.exponent == '0) && !is_zero; + is_inf = is_boxed && ((value.exponent == '1) && (value.mantissa == '0)); + is_nan = !is_boxed || ((value.exponent == '1) && (value.mantissa != '0)); + is_signalling = is_boxed && is_nan && (value.mantissa[MAN_BITS-1] == 1'b0); + is_quiet = is_nan && !is_signalling; + // Assign output for current input + info_o[op].is_normal = is_normal; + info_o[op].is_subnormal = is_subnormal; + info_o[op].is_zero = is_zero; + info_o[op].is_inf = is_inf; + info_o[op].is_nan = is_nan; + info_o[op].is_signalling = is_signalling; + info_o[op].is_quiet = is_quiet; + info_o[op].is_boxed = is_boxed; + end + end +endmodule diff --git a/vendor/pulp-platform/fpnew/src/fpnew_divsqrt_multi.sv b/vendor/pulp-platform/fpnew/src/fpnew_divsqrt_multi.sv new file mode 100644 index 0000000000..1331f5feba --- /dev/null +++ b/vendor/pulp-platform/fpnew/src/fpnew_divsqrt_multi.sv @@ -0,0 +1,340 @@ +// Copyright 2019 ETH Zurich and University of Bologna. +// +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +// Author: Stefan Mach + +`include "common_cells/registers.svh" + +module fpnew_divsqrt_multi #( + parameter fpnew_pkg::fmt_logic_t FpFmtConfig = '1, + // FPU configuration + parameter int unsigned NumPipeRegs = 0, + parameter fpnew_pkg::pipe_config_t PipeConfig = fpnew_pkg::AFTER, + parameter type TagType = logic, + parameter type AuxType = logic, + // Do not change + localparam int unsigned WIDTH = fpnew_pkg::max_fp_width(FpFmtConfig), + localparam int unsigned NUM_FORMATS = fpnew_pkg::NUM_FP_FORMATS +) ( + input logic clk_i, + input logic rst_ni, + // Input signals + input logic [1:0][WIDTH-1:0] operands_i, // 2 operands + input logic [NUM_FORMATS-1:0][1:0] is_boxed_i, // 2 operands + input fpnew_pkg::roundmode_e rnd_mode_i, + input fpnew_pkg::operation_e op_i, + input fpnew_pkg::fp_format_e dst_fmt_i, + input TagType tag_i, + input AuxType aux_i, + // Input Handshake + input logic in_valid_i, + output logic in_ready_o, + input logic flush_i, + // Output signals + output logic [WIDTH-1:0] result_o, + output fpnew_pkg::status_t status_o, + output logic extension_bit_o, + output TagType tag_o, + output AuxType aux_o, + // Output handshake + output logic out_valid_o, + input logic out_ready_i, + // Indication of valid data in flight + output logic busy_o +); + + // ---------- + // Constants + // ---------- + // Pipelines + localparam NUM_INP_REGS = (PipeConfig == fpnew_pkg::BEFORE) + ? NumPipeRegs + : (PipeConfig == fpnew_pkg::DISTRIBUTED + ? (NumPipeRegs / 2) // Last to get distributed regs + : 0); // no regs here otherwise + localparam NUM_OUT_REGS = (PipeConfig == fpnew_pkg::AFTER || PipeConfig == fpnew_pkg::INSIDE) + ? NumPipeRegs + : (PipeConfig == fpnew_pkg::DISTRIBUTED + ? ((NumPipeRegs + 1) / 2) // First to get distributed regs + : 0); // no regs here otherwise + + // --------------- + // Input pipeline + // --------------- + // Selected pipeline output signals as non-arrays + logic [1:0][WIDTH-1:0] operands_q; + fpnew_pkg::roundmode_e rnd_mode_q; + fpnew_pkg::operation_e op_q; + fpnew_pkg::fp_format_e dst_fmt_q; + logic in_valid_q; + + // Input pipeline signals, index i holds signal after i register stages + logic [0:NUM_INP_REGS][1:0][WIDTH-1:0] inp_pipe_operands_q; + fpnew_pkg::roundmode_e [0:NUM_INP_REGS] inp_pipe_rnd_mode_q; + fpnew_pkg::operation_e [0:NUM_INP_REGS] inp_pipe_op_q; + fpnew_pkg::fp_format_e [0:NUM_INP_REGS] inp_pipe_dst_fmt_q; + TagType [0:NUM_INP_REGS] inp_pipe_tag_q; + AuxType [0:NUM_INP_REGS] inp_pipe_aux_q; + logic [0:NUM_INP_REGS] inp_pipe_valid_q; + // Ready signal is combinatorial for all stages + logic [0:NUM_INP_REGS] inp_pipe_ready; + + // Input stage: First element of pipeline is taken from inputs + assign inp_pipe_operands_q[0] = operands_i; + assign inp_pipe_rnd_mode_q[0] = rnd_mode_i; + assign inp_pipe_op_q[0] = op_i; + assign inp_pipe_dst_fmt_q[0] = dst_fmt_i; + assign inp_pipe_tag_q[0] = tag_i; + assign inp_pipe_aux_q[0] = aux_i; + assign inp_pipe_valid_q[0] = in_valid_i; + // Input stage: Propagate pipeline ready signal to updtream circuitry + assign in_ready_o = inp_pipe_ready[0]; + // Generate the register stages + for (genvar i = 0; i < NUM_INP_REGS; i++) begin : gen_input_pipeline + // Internal register enable for this stage + logic reg_ena; + // Determine the ready signal of the current stage - advance the pipeline: + // 1. if the next stage is ready for our data + // 2. if the next stage only holds a bubble (not valid) -> we can pop it + assign inp_pipe_ready[i] = inp_pipe_ready[i+1] | ~inp_pipe_valid_q[i+1]; + // Valid: enabled by ready signal, synchronous clear with the flush signal + `FFLARNC(inp_pipe_valid_q[i+1], inp_pipe_valid_q[i], inp_pipe_ready[i], flush_i, 1'b0, clk_i, rst_ni) + // Enable register if pipleine ready and a valid data item is present + assign reg_ena = inp_pipe_ready[i] & inp_pipe_valid_q[i]; + // Generate the pipeline registers within the stages, use enable-registers + `FFL(inp_pipe_operands_q[i+1], inp_pipe_operands_q[i], reg_ena, '0) + `FFL(inp_pipe_rnd_mode_q[i+1], inp_pipe_rnd_mode_q[i], reg_ena, fpnew_pkg::RNE) + `FFL(inp_pipe_op_q[i+1], inp_pipe_op_q[i], reg_ena, fpnew_pkg::FMADD) + `FFL(inp_pipe_dst_fmt_q[i+1], inp_pipe_dst_fmt_q[i], reg_ena, fpnew_pkg::fp_format_e'(0)) + `FFL(inp_pipe_tag_q[i+1], inp_pipe_tag_q[i], reg_ena, TagType'('0)) + `FFL(inp_pipe_aux_q[i+1], inp_pipe_aux_q[i], reg_ena, AuxType'('0)) + end + // Output stage: assign selected pipe outputs to signals for later use + assign operands_q = inp_pipe_operands_q[NUM_INP_REGS]; + assign rnd_mode_q = inp_pipe_rnd_mode_q[NUM_INP_REGS]; + assign op_q = inp_pipe_op_q[NUM_INP_REGS]; + assign dst_fmt_q = inp_pipe_dst_fmt_q[NUM_INP_REGS]; + assign in_valid_q = inp_pipe_valid_q[NUM_INP_REGS]; + + // ----------------- + // Input processing + // ----------------- + logic [1:0] divsqrt_fmt; + logic [1:0][63:0] divsqrt_operands; // those are fixed to 64bit + logic input_is_fp8; + + // Translate fpnew formats into divsqrt formats + always_comb begin : translate_fmt + unique case (dst_fmt_q) + fpnew_pkg::FP32: divsqrt_fmt = 2'b00; + fpnew_pkg::FP64: divsqrt_fmt = 2'b01; + fpnew_pkg::FP16: divsqrt_fmt = 2'b10; + fpnew_pkg::FP16ALT: divsqrt_fmt = 2'b11; + default: divsqrt_fmt = 2'b10; // maps also FP8 to FP16 + endcase + + // Only if FP8 is enabled + input_is_fp8 = FpFmtConfig[fpnew_pkg::FP8] & (dst_fmt_q == fpnew_pkg::FP8); + + // If FP8 is supported, map it to an FP16 value + divsqrt_operands[0] = input_is_fp8 ? operands_q[0] << 8 : operands_q[0]; + divsqrt_operands[1] = input_is_fp8 ? operands_q[1] << 8 : operands_q[1]; + end + + // ------------ + // Control FSM + // ------------ + logic in_ready; // input handshake with upstream + logic div_valid, sqrt_valid; // input signalling with unit + logic unit_ready, unit_done; // status signals from unit instance + logic op_starting; // high in the cycle a new operation starts + logic out_valid, out_ready; // output handshake with downstream + logic hold_result; // whether to put result into hold register + logic data_is_held; // data in hold register is valid + logic unit_busy; // valid data in flight + // FSM states + typedef enum logic [1:0] {IDLE, BUSY, HOLD} fsm_state_e; + fsm_state_e state_q, state_d; + + // Upstream ready comes from sanitization FSM + assign inp_pipe_ready[NUM_INP_REGS] = in_ready; + + // Valids are gated by the FSM ready. Invalid input ops run a sqrt to not lose illegal instr. + assign div_valid = in_valid_q & (op_q == fpnew_pkg::DIV) & in_ready & ~flush_i; + assign sqrt_valid = in_valid_q & (op_q != fpnew_pkg::DIV) & in_ready & ~flush_i; + assign op_starting = div_valid | sqrt_valid; + + // FSM to safely apply and receive data from DIVSQRT unit + always_comb begin : flag_fsm + // Default assignments + in_ready = 1'b0; + out_valid = 1'b0; + hold_result = 1'b0; + data_is_held = 1'b0; + unit_busy = 1'b0; + state_d = state_q; + + unique case (state_q) + // Waiting for work + IDLE: begin + in_ready = 1'b1; // we're ready + if (in_valid_q && unit_ready) begin // New work arrives + state_d = BUSY; // go into processing state + end + end + // Operation in progress + BUSY: begin + unit_busy = 1'b1; // data in flight + // If the unit is done with processing + if (unit_done) begin + out_valid = 1'b1; // try to commit result downstream + // If downstream accepts our result + if (out_ready) begin + state_d = IDLE; // we anticipate going back to idling.. + if (in_valid_q && unit_ready) begin // ..unless new work comes in + in_ready = 1'b1; // we acknowledge the instruction + state_d = BUSY; // and stay busy with it + end + // Otherwise if downstream is not ready for the result + end else begin + hold_result = 1'b1; // activate the hold register + state_d = HOLD; // wait for the pipeline to take the data + end + end + end + // Waiting with valid result for downstream + HOLD: begin + unit_busy = 1'b1; // data in flight + data_is_held = 1'b1; // data in hold register is valid + out_valid = 1'b1; // try to commit result downstream + // If the result is accepted by downstream + if (out_ready) begin + state_d = IDLE; // go back to idle.. + if (in_valid_q && unit_ready) begin // ..unless new work comes in + in_ready = 1'b1; // acknowledge the new transaction + state_d = BUSY; // will be busy with the next instruction + end + end + end + // fall into idle state otherwise + default: state_d = IDLE; + endcase + + // Flushing overrides the other actions + if (flush_i) begin + unit_busy = 1'b0; // data is invalidated + out_valid = 1'b0; // cancel any valid data + state_d = IDLE; // go to default state + end + end + + // FSM status register (asynch active low reset) + `FF(state_q, state_d, IDLE) + + // Hold additional information while the operation is in progress + logic result_is_fp8_q; + TagType result_tag_q; + AuxType result_aux_q; + + // Fill the registers everytime a valid operation arrives (load FF, active low asynch rst) + `FFL(result_is_fp8_q, input_is_fp8, op_starting, '0) + `FFL(result_tag_q, inp_pipe_tag_q[NUM_INP_REGS], op_starting, '0) + `FFL(result_aux_q, inp_pipe_aux_q[NUM_INP_REGS], op_starting, '0) + + // ----------------- + // DIVSQRT instance + // ----------------- + logic [63:0] unit_result; + logic [WIDTH-1:0] adjusted_result, held_result_q; + fpnew_pkg::status_t unit_status, held_status_q; + + div_sqrt_top_mvp i_divsqrt_lei ( + .Clk_CI ( clk_i ), + .Rst_RBI ( rst_ni ), + .Div_start_SI ( div_valid ), + .Sqrt_start_SI ( sqrt_valid ), + .Operand_a_DI ( divsqrt_operands[0] ), + .Operand_b_DI ( divsqrt_operands[1] ), + .RM_SI ( rnd_mode_q ), + .Precision_ctl_SI ( '0 ), + .Format_sel_SI ( divsqrt_fmt ), + .Kill_SI ( flush_i ), + .Result_DO ( unit_result ), + .Fflags_SO ( unit_status ), + .Ready_SO ( unit_ready ), + .Done_SO ( unit_done ) + ); + + // Adjust result width and fix FP8 + assign adjusted_result = result_is_fp8_q ? unit_result >> 8 : unit_result; + + // The Hold register (load, no reset) + `FFLNR(held_result_q, adjusted_result, hold_result, clk_i) + `FFLNR(held_status_q, unit_status, hold_result, clk_i) + + // -------------- + // Output Select + // -------------- + logic [WIDTH-1:0] result_d; + fpnew_pkg::status_t status_d; + // Prioritize hold register data + assign result_d = data_is_held ? held_result_q : adjusted_result; + assign status_d = data_is_held ? held_status_q : unit_status; + + // ---------------- + // Output Pipeline + // ---------------- + // Output pipeline signals, index i holds signal after i register stages + logic [0:NUM_OUT_REGS][WIDTH-1:0] out_pipe_result_q; + fpnew_pkg::status_t [0:NUM_OUT_REGS] out_pipe_status_q; + TagType [0:NUM_OUT_REGS] out_pipe_tag_q; + AuxType [0:NUM_OUT_REGS] out_pipe_aux_q; + logic [0:NUM_OUT_REGS] out_pipe_valid_q; + // Ready signal is combinatorial for all stages + logic [0:NUM_OUT_REGS] out_pipe_ready; + + // Input stage: First element of pipeline is taken from inputs + assign out_pipe_result_q[0] = result_d; + assign out_pipe_status_q[0] = status_d; + assign out_pipe_tag_q[0] = result_tag_q; + assign out_pipe_aux_q[0] = result_aux_q; + assign out_pipe_valid_q[0] = out_valid; + // Input stage: Propagate pipeline ready signal to inside pipe + assign out_ready = out_pipe_ready[0]; + // Generate the register stages + for (genvar i = 0; i < NUM_OUT_REGS; i++) begin : gen_output_pipeline + // Internal register enable for this stage + logic reg_ena; + // Determine the ready signal of the current stage - advance the pipeline: + // 1. if the next stage is ready for our data + // 2. if the next stage only holds a bubble (not valid) -> we can pop it + assign out_pipe_ready[i] = out_pipe_ready[i+1] | ~out_pipe_valid_q[i+1]; + // Valid: enabled by ready signal, synchronous clear with the flush signal + `FFLARNC(out_pipe_valid_q[i+1], out_pipe_valid_q[i], out_pipe_ready[i], flush_i, 1'b0, clk_i, rst_ni) + // Enable register if pipleine ready and a valid data item is present + assign reg_ena = out_pipe_ready[i] & out_pipe_valid_q[i]; + // Generate the pipeline registers within the stages, use enable-registers + `FFL(out_pipe_result_q[i+1], out_pipe_result_q[i], reg_ena, '0) + `FFL(out_pipe_status_q[i+1], out_pipe_status_q[i], reg_ena, '0) + `FFL(out_pipe_tag_q[i+1], out_pipe_tag_q[i], reg_ena, TagType'('0)) + `FFL(out_pipe_aux_q[i+1], out_pipe_aux_q[i], reg_ena, AuxType'('0)) + end + // Output stage: Ready travels backwards from output side, driven by downstream circuitry + assign out_pipe_ready[NUM_OUT_REGS] = out_ready_i; + // Output stage: assign module outputs + assign result_o = out_pipe_result_q[NUM_OUT_REGS]; + assign status_o = out_pipe_status_q[NUM_OUT_REGS]; + assign extension_bit_o = 1'b1; // always NaN-Box result + assign tag_o = out_pipe_tag_q[NUM_OUT_REGS]; + assign aux_o = out_pipe_aux_q[NUM_OUT_REGS]; + assign out_valid_o = out_pipe_valid_q[NUM_OUT_REGS]; + assign busy_o = (| {inp_pipe_valid_q, unit_busy, out_pipe_valid_q}); +endmodule diff --git a/vendor/pulp-platform/fpnew/src/fpnew_fma.sv b/vendor/pulp-platform/fpnew/src/fpnew_fma.sv new file mode 100644 index 0000000000..f9fa813bae --- /dev/null +++ b/vendor/pulp-platform/fpnew/src/fpnew_fma.sv @@ -0,0 +1,673 @@ +// Copyright 2019 ETH Zurich and University of Bologna. +// +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +// Author: Stefan Mach + +`include "common_cells/registers.svh" + +module fpnew_fma #( + parameter fpnew_pkg::fp_format_e FpFormat = fpnew_pkg::fp_format_e'(0), + parameter int unsigned NumPipeRegs = 0, + parameter fpnew_pkg::pipe_config_t PipeConfig = fpnew_pkg::BEFORE, + parameter type TagType = logic, + parameter type AuxType = logic, + + localparam int unsigned WIDTH = fpnew_pkg::fp_width(FpFormat) // do not change +) ( + input logic clk_i, + input logic rst_ni, + // Input signals + input logic [2:0][WIDTH-1:0] operands_i, // 3 operands + input logic [2:0] is_boxed_i, // 3 operands + input fpnew_pkg::roundmode_e rnd_mode_i, + input fpnew_pkg::operation_e op_i, + input logic op_mod_i, + input TagType tag_i, + input AuxType aux_i, + // Input Handshake + input logic in_valid_i, + output logic in_ready_o, + input logic flush_i, + // Output signals + output logic [WIDTH-1:0] result_o, + output fpnew_pkg::status_t status_o, + output logic extension_bit_o, + output TagType tag_o, + output AuxType aux_o, + // Output handshake + output logic out_valid_o, + input logic out_ready_i, + // Indication of valid data in flight + output logic busy_o +); + + // ---------- + // Constants + // ---------- + localparam int unsigned EXP_BITS = fpnew_pkg::exp_bits(FpFormat); + localparam int unsigned MAN_BITS = fpnew_pkg::man_bits(FpFormat); + localparam int unsigned BIAS = fpnew_pkg::bias(FpFormat); + // Precision bits 'p' include the implicit bit + localparam int unsigned PRECISION_BITS = MAN_BITS + 1; + // The lower 2p+3 bits of the internal FMA result will be needed for leading-zero detection + localparam int unsigned LOWER_SUM_WIDTH = 2 * PRECISION_BITS + 3; + localparam int unsigned LZC_RESULT_WIDTH = $clog2(LOWER_SUM_WIDTH); + // Internal exponent width of FMA must accomodate all meaningful exponent values in order to avoid + // datapath leakage. This is either given by the exponent bits or the width of the LZC result. + // In most reasonable FP formats the internal exponent will be wider than the LZC result. + localparam int unsigned EXP_WIDTH = unsigned'(fpnew_pkg::maximum(EXP_BITS + 2, LZC_RESULT_WIDTH)); + // Shift amount width: maximum internal mantissa size is 3p+3 bits + localparam int unsigned SHIFT_AMOUNT_WIDTH = $clog2(3 * PRECISION_BITS + 3); + // Pipelines + localparam NUM_INP_REGS = PipeConfig == fpnew_pkg::BEFORE + ? NumPipeRegs + : (PipeConfig == fpnew_pkg::DISTRIBUTED + ? ((NumPipeRegs + 1) / 3) // Second to get distributed regs + : 0); // no regs here otherwise + localparam NUM_MID_REGS = PipeConfig == fpnew_pkg::INSIDE + ? NumPipeRegs + : (PipeConfig == fpnew_pkg::DISTRIBUTED + ? ((NumPipeRegs + 2) / 3) // First to get distributed regs + : 0); // no regs here otherwise + localparam NUM_OUT_REGS = PipeConfig == fpnew_pkg::AFTER + ? NumPipeRegs + : (PipeConfig == fpnew_pkg::DISTRIBUTED + ? (NumPipeRegs / 3) // Last to get distributed regs + : 0); // no regs here otherwise + + // ---------------- + // Type definition + // ---------------- + typedef struct packed { + logic sign; + logic [EXP_BITS-1:0] exponent; + logic [MAN_BITS-1:0] mantissa; + } fp_t; + + // --------------- + // Input pipeline + // --------------- + // Input pipeline signals, index i holds signal after i register stages + logic [0:NUM_INP_REGS][2:0][WIDTH-1:0] inp_pipe_operands_q; + logic [0:NUM_INP_REGS][2:0] inp_pipe_is_boxed_q; + fpnew_pkg::roundmode_e [0:NUM_INP_REGS] inp_pipe_rnd_mode_q; + fpnew_pkg::operation_e [0:NUM_INP_REGS] inp_pipe_op_q; + logic [0:NUM_INP_REGS] inp_pipe_op_mod_q; + TagType [0:NUM_INP_REGS] inp_pipe_tag_q; + AuxType [0:NUM_INP_REGS] inp_pipe_aux_q; + logic [0:NUM_INP_REGS] inp_pipe_valid_q; + // Ready signal is combinatorial for all stages + logic [0:NUM_INP_REGS] inp_pipe_ready; + + // Input stage: First element of pipeline is taken from inputs + assign inp_pipe_operands_q[0] = operands_i; + assign inp_pipe_is_boxed_q[0] = is_boxed_i; + assign inp_pipe_rnd_mode_q[0] = rnd_mode_i; + assign inp_pipe_op_q[0] = op_i; + assign inp_pipe_op_mod_q[0] = op_mod_i; + assign inp_pipe_tag_q[0] = tag_i; + assign inp_pipe_aux_q[0] = aux_i; + assign inp_pipe_valid_q[0] = in_valid_i; + // Input stage: Propagate pipeline ready signal to updtream circuitry + assign in_ready_o = inp_pipe_ready[0]; + // Generate the register stages + for (genvar i = 0; i < NUM_INP_REGS; i++) begin : gen_input_pipeline + // Internal register enable for this stage + logic reg_ena; + // Determine the ready signal of the current stage - advance the pipeline: + // 1. if the next stage is ready for our data + // 2. if the next stage only holds a bubble (not valid) -> we can pop it + assign inp_pipe_ready[i] = inp_pipe_ready[i+1] | ~inp_pipe_valid_q[i+1]; + // Valid: enabled by ready signal, synchronous clear with the flush signal + `FFLARNC(inp_pipe_valid_q[i+1], inp_pipe_valid_q[i], inp_pipe_ready[i], flush_i, 1'b0, clk_i, rst_ni) + // Enable register if pipleine ready and a valid data item is present + assign reg_ena = inp_pipe_ready[i] & inp_pipe_valid_q[i]; + // Generate the pipeline registers within the stages, use enable-registers + `FFL(inp_pipe_operands_q[i+1], inp_pipe_operands_q[i], reg_ena, '0) + `FFL(inp_pipe_is_boxed_q[i+1], inp_pipe_is_boxed_q[i], reg_ena, '0) + `FFL(inp_pipe_rnd_mode_q[i+1], inp_pipe_rnd_mode_q[i], reg_ena, fpnew_pkg::RNE) + `FFL(inp_pipe_op_q[i+1], inp_pipe_op_q[i], reg_ena, fpnew_pkg::FMADD) + `FFL(inp_pipe_op_mod_q[i+1], inp_pipe_op_mod_q[i], reg_ena, '0) + `FFL(inp_pipe_tag_q[i+1], inp_pipe_tag_q[i], reg_ena, TagType'('0)) + `FFL(inp_pipe_aux_q[i+1], inp_pipe_aux_q[i], reg_ena, AuxType'('0)) + end + + // ----------------- + // Input processing + // ----------------- + fpnew_pkg::fp_info_t [2:0] info_q; + + // Classify input + fpnew_classifier #( + .FpFormat ( FpFormat ), + .NumOperands ( 3 ) + ) i_class_inputs ( + .operands_i ( inp_pipe_operands_q[NUM_INP_REGS] ), + .is_boxed_i ( inp_pipe_is_boxed_q[NUM_INP_REGS] ), + .info_o ( info_q ) + ); + + fp_t operand_a, operand_b, operand_c; + fpnew_pkg::fp_info_t info_a, info_b, info_c; + + // Operation selection and operand adjustment + // | \c op_q | \c op_mod_q | Operation Adjustment + // |:--------:|:-----------:|--------------------- + // | FMADD | \c 0 | FMADD: none + // | FMADD | \c 1 | FMSUB: Invert sign of operand C + // | FNMSUB | \c 0 | FNMSUB: Invert sign of operand A + // | FNMSUB | \c 1 | FNMADD: Invert sign of operands A and C + // | ADD | \c 0 | ADD: Set operand A to +1.0 + // | ADD | \c 1 | SUB: Set operand A to +1.0, invert sign of operand C + // | MUL | \c 0 | MUL: Set operand C to +0.0 + // | *others* | \c - | *invalid* + // \note \c op_mod_q always inverts the sign of the addend. + always_comb begin : op_select + + // Default assignments - packing-order-agnostic + operand_a = inp_pipe_operands_q[NUM_INP_REGS][0]; + operand_b = inp_pipe_operands_q[NUM_INP_REGS][1]; + operand_c = inp_pipe_operands_q[NUM_INP_REGS][2]; + info_a = info_q[0]; + info_b = info_q[1]; + info_c = info_q[2]; + + // op_mod_q inverts sign of operand C + operand_c.sign = operand_c.sign ^ inp_pipe_op_mod_q[NUM_INP_REGS]; + + unique case (inp_pipe_op_q[NUM_INP_REGS]) + fpnew_pkg::FMADD: ; // do nothing + fpnew_pkg::FNMSUB: operand_a.sign = ~operand_a.sign; // invert sign of product + fpnew_pkg::ADD: begin // Set multiplicand to +1 + operand_a = '{sign: 1'b0, exponent: BIAS, mantissa: '0}; + info_a = '{is_normal: 1'b1, is_boxed: 1'b1, default: 1'b0}; //normal, boxed value. + end + fpnew_pkg::MUL: begin // Set addend to -0 (for proper rounding with RDN) + operand_c = '{sign: 1'b1, exponent: '0, mantissa: '0}; + info_c = '{is_zero: 1'b1, is_boxed: 1'b1, default: 1'b0}; //zero, boxed value. + end + default: begin // propagate don't cares + operand_a = '{default: fpnew_pkg::DONT_CARE}; + operand_b = '{default: fpnew_pkg::DONT_CARE}; + operand_c = '{default: fpnew_pkg::DONT_CARE}; + info_a = '{default: fpnew_pkg::DONT_CARE}; + info_b = '{default: fpnew_pkg::DONT_CARE}; + info_c = '{default: fpnew_pkg::DONT_CARE}; + end + endcase + end + + // --------------------- + // Input classification + // --------------------- + logic any_operand_inf; + logic any_operand_nan; + logic signalling_nan; + logic effective_subtraction; + logic tentative_sign; + + // Reduction for special case handling + assign any_operand_inf = (| {info_a.is_inf, info_b.is_inf, info_c.is_inf}); + assign any_operand_nan = (| {info_a.is_nan, info_b.is_nan, info_c.is_nan}); + assign signalling_nan = (| {info_a.is_signalling, info_b.is_signalling, info_c.is_signalling}); + // Effective subtraction in FMA occurs when product and addend signs differ + assign effective_subtraction = operand_a.sign ^ operand_b.sign ^ operand_c.sign; + // The tentative sign of the FMA shall be the sign of the product + assign tentative_sign = operand_a.sign ^ operand_b.sign; + + // ---------------------- + // Special case handling + // ---------------------- + fp_t special_result; + fpnew_pkg::status_t special_status; + logic result_is_special; + + always_comb begin : special_cases + // Default assignments + special_result = '{sign: 1'b0, exponent: '1, mantissa: 2**(MAN_BITS-1)}; // canonical qNaN + special_status = '0; + result_is_special = 1'b0; + + // Handle potentially mixed nan & infinity input => important for the case where infinity and + // zero are multiplied and added to a qnan. + // RISC-V mandates raising the NV exception in these cases: + // (inf * 0) + c or (0 * inf) + c INVALID, no matter c (even quiet NaNs) + if ((info_a.is_inf && info_b.is_zero) || (info_a.is_zero && info_b.is_inf)) begin + result_is_special = 1'b1; // bypass FMA, output is the canonical qNaN + special_status.NV = 1'b1; // invalid operation + // NaN Inputs cause canonical quiet NaN at the output and maybe invalid OP + end else if (any_operand_nan) begin + result_is_special = 1'b1; // bypass FMA, output is the canonical qNaN + special_status.NV = signalling_nan; // raise the invalid operation flag if signalling + // Special cases involving infinity + end else if (any_operand_inf) begin + result_is_special = 1'b1; // bypass FMA + // Effective addition of opposite infinities (±inf - ±inf) is invalid! + if ((info_a.is_inf || info_b.is_inf) && info_c.is_inf && effective_subtraction) + special_status.NV = 1'b1; // invalid operation + // Handle cases where output will be inf because of inf product input + else if (info_a.is_inf || info_b.is_inf) begin + // Result is infinity with the sign of the product + special_result = '{sign: operand_a.sign ^ operand_b.sign, exponent: '1, mantissa: '0}; + // Handle cases where the addend is inf + end else if (info_c.is_inf) begin + // Result is inifinity with sign of the addend (= operand_c) + special_result = '{sign: operand_c.sign, exponent: '1, mantissa: '0}; + end + end + end + + // --------------------------- + // Initial exponent data path + // --------------------------- + logic signed [EXP_WIDTH-1:0] exponent_a, exponent_b, exponent_c; + logic signed [EXP_WIDTH-1:0] exponent_addend, exponent_product, exponent_difference; + logic signed [EXP_WIDTH-1:0] tentative_exponent; + + // Zero-extend exponents into signed container - implicit width extension + assign exponent_a = signed'({1'b0, operand_a.exponent}); + assign exponent_b = signed'({1'b0, operand_b.exponent}); + assign exponent_c = signed'({1'b0, operand_c.exponent}); + + // Calculate internal exponents from encoded values. Real exponents are (ex = Ex - bias + 1 - nx) + // with Ex the encoded exponent and nx the implicit bit. Internal exponents stay biased. + assign exponent_addend = signed'(exponent_c + $signed({1'b0, ~info_c.is_normal})); // 0 as subnorm + // Biased product exponent is the sum of encoded exponents minus the bias. + assign exponent_product = (info_a.is_zero || info_b.is_zero) + ? 2 - signed'(BIAS) // in case the product is zero, set minimum exp. + : signed'(exponent_a + info_a.is_subnormal + + exponent_b + info_b.is_subnormal + - signed'(BIAS)); + // Exponent difference is the addend exponent minus the product exponent + assign exponent_difference = exponent_addend - exponent_product; + // The tentative exponent will be the larger of the product or addend exponent + assign tentative_exponent = (exponent_difference > 0) ? exponent_addend : exponent_product; + + // Shift amount for addend based on exponents (unsigned as only right shifts) + logic [SHIFT_AMOUNT_WIDTH-1:0] addend_shamt; + + always_comb begin : addend_shift_amount + // Product-anchored case, saturated shift (addend is only in the sticky bit) + if (exponent_difference <= signed'(-2 * PRECISION_BITS - 1)) + addend_shamt = 3 * PRECISION_BITS + 4; + // Addend and product will have mutual bits to add + else if (exponent_difference <= signed'(PRECISION_BITS + 2)) + addend_shamt = unsigned'(signed'(PRECISION_BITS) + 3 - exponent_difference); + // Addend-anchored case, saturated shift (product is only in the sticky bit) + else + addend_shamt = 0; + end + + // ------------------ + // Product data path + // ------------------ + logic [PRECISION_BITS-1:0] mantissa_a, mantissa_b, mantissa_c; + logic [2*PRECISION_BITS-1:0] product; // the p*p product is 2p bits wide + logic [3*PRECISION_BITS+3:0] product_shifted; // addends are 3p+4 bit wide (including G/R) + + // Add implicit bits to mantissae + assign mantissa_a = {info_a.is_normal, operand_a.mantissa}; + assign mantissa_b = {info_b.is_normal, operand_b.mantissa}; + assign mantissa_c = {info_c.is_normal, operand_c.mantissa}; + + // Mantissa multiplier (a*b) + assign product = mantissa_a * mantissa_b; + + // Product is placed into a 3p+4 bit wide vector, padded with 2 bits for round and sticky: + // | 000...000 | product | RS | + // <- p+2 -> <- 2p -> < 2> + assign product_shifted = product << 2; // constant shift + + // ----------------- + // Addend data path + // ----------------- + logic [3*PRECISION_BITS+3:0] addend_after_shift; // upper 3p+4 bits are needed to go on + logic [PRECISION_BITS-1:0] addend_sticky_bits; // up to p bit of shifted addend are sticky + logic sticky_before_add; // they are compressed into a single sticky bit + logic [3*PRECISION_BITS+3:0] addend_shifted; // addends are 3p+4 bit wide (including G/R) + logic inject_carry_in; // inject carry for subtractions if needed + + // In parallel, the addend is right-shifted according to the exponent difference. Up to p bits + // are shifted out and compressed into a sticky bit. + // BEFORE THE SHIFT: + // | mantissa_c | 000..000 | + // <- p -> <- 3p+4 -> + // AFTER THE SHIFT: + // | 000..........000 | mantissa_c | 000...............0GR | sticky bits | + // <- addend_shamt -> <- p -> <- 2p+4-addend_shamt -> <- up to p -> + assign {addend_after_shift, addend_sticky_bits} = + (mantissa_c << (3 * PRECISION_BITS + 4)) >> addend_shamt; + + assign sticky_before_add = (| addend_sticky_bits); + // assign addend_after_shift[0] = sticky_before_add; + + // In case of a subtraction, the addend is inverted + assign addend_shifted = (effective_subtraction) ? ~addend_after_shift : addend_after_shift; + assign inject_carry_in = effective_subtraction & ~sticky_before_add; + + // ------ + // Adder + // ------ + logic [3*PRECISION_BITS+4:0] sum_raw; // added one bit for the carry + logic sum_carry; // observe carry bit from sum for sign fixing + logic [3*PRECISION_BITS+3:0] sum; // discard carry as sum won't overflow + logic final_sign; + + //Mantissa adder (ab+c). In normal addition, it cannot overflow. + assign sum_raw = product_shifted + addend_shifted + inject_carry_in; + assign sum_carry = sum_raw[3*PRECISION_BITS+4]; + + // Complement negative sum (can only happen in subtraction -> overflows for positive results) + assign sum = (effective_subtraction && ~sum_carry) ? -sum_raw : sum_raw; + + // In case of a mispredicted subtraction result, do a sign flip + assign final_sign = (effective_subtraction && (sum_carry == tentative_sign)) + ? 1'b1 + : (effective_subtraction ? 1'b0 : tentative_sign); + + // --------------- + // Internal pipeline + // --------------- + // Pipeline output signals as non-arrays + logic effective_subtraction_q; + logic signed [EXP_WIDTH-1:0] exponent_product_q; + logic signed [EXP_WIDTH-1:0] exponent_difference_q; + logic signed [EXP_WIDTH-1:0] tentative_exponent_q; + logic [SHIFT_AMOUNT_WIDTH-1:0] addend_shamt_q; + logic sticky_before_add_q; + logic [3*PRECISION_BITS+3:0] sum_q; + logic final_sign_q; + fpnew_pkg::roundmode_e rnd_mode_q; + logic result_is_special_q; + fp_t special_result_q; + fpnew_pkg::status_t special_status_q; + // Internal pipeline signals, index i holds signal after i register stages + logic [0:NUM_MID_REGS] mid_pipe_eff_sub_q; + logic signed [0:NUM_MID_REGS][EXP_WIDTH-1:0] mid_pipe_exp_prod_q; + logic signed [0:NUM_MID_REGS][EXP_WIDTH-1:0] mid_pipe_exp_diff_q; + logic signed [0:NUM_MID_REGS][EXP_WIDTH-1:0] mid_pipe_tent_exp_q; + logic [0:NUM_MID_REGS][SHIFT_AMOUNT_WIDTH-1:0] mid_pipe_add_shamt_q; + logic [0:NUM_MID_REGS] mid_pipe_sticky_q; + logic [0:NUM_MID_REGS][3*PRECISION_BITS+3:0] mid_pipe_sum_q; + logic [0:NUM_MID_REGS] mid_pipe_final_sign_q; + fpnew_pkg::roundmode_e [0:NUM_MID_REGS] mid_pipe_rnd_mode_q; + logic [0:NUM_MID_REGS] mid_pipe_res_is_spec_q; + fp_t [0:NUM_MID_REGS] mid_pipe_spec_res_q; + fpnew_pkg::status_t [0:NUM_MID_REGS] mid_pipe_spec_stat_q; + TagType [0:NUM_MID_REGS] mid_pipe_tag_q; + AuxType [0:NUM_MID_REGS] mid_pipe_aux_q; + logic [0:NUM_MID_REGS] mid_pipe_valid_q; + // Ready signal is combinatorial for all stages + logic [0:NUM_MID_REGS] mid_pipe_ready; + + // Input stage: First element of pipeline is taken from upstream logic + assign mid_pipe_eff_sub_q[0] = effective_subtraction; + assign mid_pipe_exp_prod_q[0] = exponent_product; + assign mid_pipe_exp_diff_q[0] = exponent_difference; + assign mid_pipe_tent_exp_q[0] = tentative_exponent; + assign mid_pipe_add_shamt_q[0] = addend_shamt; + assign mid_pipe_sticky_q[0] = sticky_before_add; + assign mid_pipe_sum_q[0] = sum; + assign mid_pipe_final_sign_q[0] = final_sign; + assign mid_pipe_rnd_mode_q[0] = inp_pipe_rnd_mode_q[NUM_INP_REGS]; + assign mid_pipe_res_is_spec_q[0] = result_is_special; + assign mid_pipe_spec_res_q[0] = special_result; + assign mid_pipe_spec_stat_q[0] = special_status; + assign mid_pipe_tag_q[0] = inp_pipe_tag_q[NUM_INP_REGS]; + assign mid_pipe_aux_q[0] = inp_pipe_aux_q[NUM_INP_REGS]; + assign mid_pipe_valid_q[0] = inp_pipe_valid_q[NUM_INP_REGS]; + // Input stage: Propagate pipeline ready signal to input pipe + assign inp_pipe_ready[NUM_INP_REGS] = mid_pipe_ready[0]; + + // Generate the register stages + for (genvar i = 0; i < NUM_MID_REGS; i++) begin : gen_inside_pipeline + // Internal register enable for this stage + logic reg_ena; + // Determine the ready signal of the current stage - advance the pipeline: + // 1. if the next stage is ready for our data + // 2. if the next stage only holds a bubble (not valid) -> we can pop it + assign mid_pipe_ready[i] = mid_pipe_ready[i+1] | ~mid_pipe_valid_q[i+1]; + // Valid: enabled by ready signal, synchronous clear with the flush signal + `FFLARNC(mid_pipe_valid_q[i+1], mid_pipe_valid_q[i], mid_pipe_ready[i], flush_i, 1'b0, clk_i, rst_ni) + // Enable register if pipleine ready and a valid data item is present + assign reg_ena = mid_pipe_ready[i] & mid_pipe_valid_q[i]; + // Generate the pipeline registers within the stages, use enable-registers + `FFL(mid_pipe_eff_sub_q[i+1], mid_pipe_eff_sub_q[i], reg_ena, '0) + `FFL(mid_pipe_exp_prod_q[i+1], mid_pipe_exp_prod_q[i], reg_ena, '0) + `FFL(mid_pipe_exp_diff_q[i+1], mid_pipe_exp_diff_q[i], reg_ena, '0) + `FFL(mid_pipe_tent_exp_q[i+1], mid_pipe_tent_exp_q[i], reg_ena, '0) + `FFL(mid_pipe_add_shamt_q[i+1], mid_pipe_add_shamt_q[i], reg_ena, '0) + `FFL(mid_pipe_sticky_q[i+1], mid_pipe_sticky_q[i], reg_ena, '0) + `FFL(mid_pipe_sum_q[i+1], mid_pipe_sum_q[i], reg_ena, '0) + `FFL(mid_pipe_final_sign_q[i+1], mid_pipe_final_sign_q[i], reg_ena, '0) + `FFL(mid_pipe_rnd_mode_q[i+1], mid_pipe_rnd_mode_q[i], reg_ena, fpnew_pkg::RNE) + `FFL(mid_pipe_res_is_spec_q[i+1], mid_pipe_res_is_spec_q[i], reg_ena, '0) + `FFL(mid_pipe_spec_res_q[i+1], mid_pipe_spec_res_q[i], reg_ena, '0) + `FFL(mid_pipe_spec_stat_q[i+1], mid_pipe_spec_stat_q[i], reg_ena, '0) + `FFL(mid_pipe_tag_q[i+1], mid_pipe_tag_q[i], reg_ena, TagType'('0)) + `FFL(mid_pipe_aux_q[i+1], mid_pipe_aux_q[i], reg_ena, AuxType'('0)) + end + // Output stage: assign selected pipe outputs to signals for later use + assign effective_subtraction_q = mid_pipe_eff_sub_q[NUM_MID_REGS]; + assign exponent_product_q = mid_pipe_exp_prod_q[NUM_MID_REGS]; + assign exponent_difference_q = mid_pipe_exp_diff_q[NUM_MID_REGS]; + assign tentative_exponent_q = mid_pipe_tent_exp_q[NUM_MID_REGS]; + assign addend_shamt_q = mid_pipe_add_shamt_q[NUM_MID_REGS]; + assign sticky_before_add_q = mid_pipe_sticky_q[NUM_MID_REGS]; + assign sum_q = mid_pipe_sum_q[NUM_MID_REGS]; + assign final_sign_q = mid_pipe_final_sign_q[NUM_MID_REGS]; + assign rnd_mode_q = mid_pipe_rnd_mode_q[NUM_MID_REGS]; + assign result_is_special_q = mid_pipe_res_is_spec_q[NUM_MID_REGS]; + assign special_result_q = mid_pipe_spec_res_q[NUM_MID_REGS]; + assign special_status_q = mid_pipe_spec_stat_q[NUM_MID_REGS]; + + // -------------- + // Normalization + // -------------- + logic [LOWER_SUM_WIDTH-1:0] sum_lower; // lower 2p+3 bits of sum are searched + logic [LZC_RESULT_WIDTH-1:0] leading_zero_count; // the number of leading zeroes + logic signed [LZC_RESULT_WIDTH:0] leading_zero_count_sgn; // signed leading-zero count + logic lzc_zeroes; // in case only zeroes found + + logic [SHIFT_AMOUNT_WIDTH-1:0] norm_shamt; // Normalization shift amount + logic signed [EXP_WIDTH-1:0] normalized_exponent; + + logic [3*PRECISION_BITS+4:0] sum_shifted; // result after first normalization shift + logic [PRECISION_BITS:0] final_mantissa; // final mantissa before rounding with round bit + logic [2*PRECISION_BITS+2:0] sum_sticky_bits; // remaining 2p+3 sticky bits after normalization + logic sticky_after_norm; // sticky bit after normalization + + logic signed [EXP_WIDTH-1:0] final_exponent; + + assign sum_lower = sum_q[LOWER_SUM_WIDTH-1:0]; + + // Leading zero counter for cancellations + lzc #( + .WIDTH ( LOWER_SUM_WIDTH ), + .MODE ( 1 ) // MODE = 1 counts leading zeroes + ) i_lzc ( + .in_i ( sum_lower ), + .cnt_o ( leading_zero_count ), + .empty_o ( lzc_zeroes ) + ); + + assign leading_zero_count_sgn = signed'({1'b0, leading_zero_count}); + + // Normalization shift amount based on exponents and LZC (unsigned as only left shifts) + always_comb begin : norm_shift_amount + // Product-anchored case or cancellations require LZC + if ((exponent_difference_q <= 0) || (effective_subtraction_q && (exponent_difference_q <= 2))) begin + // Normal result (biased exponent > 0 and not a zero) + if ((exponent_product_q - leading_zero_count_sgn + 1 >= 0) && !lzc_zeroes) begin + // Undo initial product shift, remove the counted zeroes + norm_shamt = PRECISION_BITS + 2 + leading_zero_count; + normalized_exponent = exponent_product_q - leading_zero_count_sgn + 1; // account for shift + // Subnormal result + end else begin + // Cap the shift distance to align mantissa with minimum exponent + norm_shamt = unsigned'(signed'(PRECISION_BITS) + 2 + exponent_product_q); + normalized_exponent = 0; // subnormals encoded as 0 + end + // Addend-anchored case + end else begin + norm_shamt = addend_shamt_q; // Undo the initial shift + normalized_exponent = tentative_exponent_q; + end + end + + // Do the large normalization shift + assign sum_shifted = sum_q << norm_shamt; + + // The addend-anchored case needs a 1-bit normalization since the leading-one can be to the left + // or right of the (non-carry) MSB of the sum. + always_comb begin : small_norm + // Default assignment, discarding carry bit + {final_mantissa, sum_sticky_bits} = sum_shifted; + final_exponent = normalized_exponent; + + // The normalized sum has overflown, align right and fix exponent + if (sum_shifted[3*PRECISION_BITS+4]) begin // check the carry bit + {final_mantissa, sum_sticky_bits} = sum_shifted >> 1; + final_exponent = normalized_exponent + 1; + // The normalized sum is normal, nothing to do + end else if (sum_shifted[3*PRECISION_BITS+3]) begin // check the sum MSB + // do nothing + // The normalized sum is still denormal, align left - unless the result is not already subnormal + end else if (normalized_exponent > 1) begin + {final_mantissa, sum_sticky_bits} = sum_shifted << 1; + final_exponent = normalized_exponent - 1; + // Otherwise we're denormal + end else begin + final_exponent = '0; + end + end + + // Update the sticky bit with the shifted-out bits + assign sticky_after_norm = (| {sum_sticky_bits}) | sticky_before_add_q; + + // ---------------------------- + // Rounding and classification + // ---------------------------- + logic pre_round_sign; + logic [EXP_BITS-1:0] pre_round_exponent; + logic [MAN_BITS-1:0] pre_round_mantissa; + logic [EXP_BITS+MAN_BITS-1:0] pre_round_abs; // absolute value of result before rounding + logic [1:0] round_sticky_bits; + + logic of_before_round, of_after_round; // overflow + logic uf_before_round, uf_after_round; // underflow + logic result_zero; + + logic rounded_sign; + logic [EXP_BITS+MAN_BITS-1:0] rounded_abs; // absolute value of result after rounding + + // Classification before round. RISC-V mandates checking underflow AFTER rounding! + assign of_before_round = final_exponent >= 2**(EXP_BITS)-1; // infinity exponent is all ones + assign uf_before_round = final_exponent == 0; // exponent for subnormals capped to 0 + + // Assemble result before rounding. In case of overflow, the largest normal value is set. + assign pre_round_sign = final_sign_q; + assign pre_round_exponent = (of_before_round) ? 2**EXP_BITS-2 : unsigned'(final_exponent[EXP_BITS-1:0]); + assign pre_round_mantissa = (of_before_round) ? '1 : final_mantissa[MAN_BITS:1]; // bit 0 is R bit + assign pre_round_abs = {pre_round_exponent, pre_round_mantissa}; + + // In case of overflow, the round and sticky bits are set for proper rounding + assign round_sticky_bits = (of_before_round) ? 2'b11 : {final_mantissa[0], sticky_after_norm}; + + // Perform the rounding + fpnew_rounding #( + .AbsWidth ( EXP_BITS + MAN_BITS ) + ) i_fpnew_rounding ( + .abs_value_i ( pre_round_abs ), + .sign_i ( pre_round_sign ), + .round_sticky_bits_i ( round_sticky_bits ), + .rnd_mode_i ( rnd_mode_q ), + .effective_subtraction_i ( effective_subtraction_q ), + .abs_rounded_o ( rounded_abs ), + .sign_o ( rounded_sign ), + .exact_zero_o ( result_zero ) + ); + + // Classification after rounding + assign uf_after_round = rounded_abs[EXP_BITS+MAN_BITS-1:MAN_BITS] == '0; // exponent = 0 + assign of_after_round = rounded_abs[EXP_BITS+MAN_BITS-1:MAN_BITS] == '1; // exponent all ones + + // ----------------- + // Result selection + // ----------------- + logic [WIDTH-1:0] regular_result; + fpnew_pkg::status_t regular_status; + + // Assemble regular result + assign regular_result = {rounded_sign, rounded_abs}; + assign regular_status.NV = 1'b0; // only valid cases are handled in regular path + assign regular_status.DZ = 1'b0; // no divisions + assign regular_status.OF = of_before_round | of_after_round; // rounding can introduce overflow + assign regular_status.UF = uf_after_round & regular_status.NX; // only inexact results raise UF + assign regular_status.NX = (| round_sticky_bits) | of_before_round | of_after_round; + + // Final results for output pipeline + fp_t result_d; + fpnew_pkg::status_t status_d; + + // Select output depending on special case detection + assign result_d = result_is_special_q ? special_result_q : regular_result; + assign status_d = result_is_special_q ? special_status_q : regular_status; + + // ---------------- + // Output Pipeline + // ---------------- + // Output pipeline signals, index i holds signal after i register stages + fp_t [0:NUM_OUT_REGS] out_pipe_result_q; + fpnew_pkg::status_t [0:NUM_OUT_REGS] out_pipe_status_q; + TagType [0:NUM_OUT_REGS] out_pipe_tag_q; + AuxType [0:NUM_OUT_REGS] out_pipe_aux_q; + logic [0:NUM_OUT_REGS] out_pipe_valid_q; + // Ready signal is combinatorial for all stages + logic [0:NUM_OUT_REGS] out_pipe_ready; + + // Input stage: First element of pipeline is taken from inputs + assign out_pipe_result_q[0] = result_d; + assign out_pipe_status_q[0] = status_d; + assign out_pipe_tag_q[0] = mid_pipe_tag_q[NUM_MID_REGS]; + assign out_pipe_aux_q[0] = mid_pipe_aux_q[NUM_MID_REGS]; + assign out_pipe_valid_q[0] = mid_pipe_valid_q[NUM_MID_REGS]; + // Input stage: Propagate pipeline ready signal to inside pipe + assign mid_pipe_ready[NUM_MID_REGS] = out_pipe_ready[0]; + // Generate the register stages + for (genvar i = 0; i < NUM_OUT_REGS; i++) begin : gen_output_pipeline + // Internal register enable for this stage + logic reg_ena; + // Determine the ready signal of the current stage - advance the pipeline: + // 1. if the next stage is ready for our data + // 2. if the next stage only holds a bubble (not valid) -> we can pop it + assign out_pipe_ready[i] = out_pipe_ready[i+1] | ~out_pipe_valid_q[i+1]; + // Valid: enabled by ready signal, synchronous clear with the flush signal + `FFLARNC(out_pipe_valid_q[i+1], out_pipe_valid_q[i], out_pipe_ready[i], flush_i, 1'b0, clk_i, rst_ni) + // Enable register if pipleine ready and a valid data item is present + assign reg_ena = out_pipe_ready[i] & out_pipe_valid_q[i]; + // Generate the pipeline registers within the stages, use enable-registers + `FFL(out_pipe_result_q[i+1], out_pipe_result_q[i], reg_ena, '0) + `FFL(out_pipe_status_q[i+1], out_pipe_status_q[i], reg_ena, '0) + `FFL(out_pipe_tag_q[i+1], out_pipe_tag_q[i], reg_ena, TagType'('0)) + `FFL(out_pipe_aux_q[i+1], out_pipe_aux_q[i], reg_ena, AuxType'('0)) + end + // Output stage: Ready travels backwards from output side, driven by downstream circuitry + assign out_pipe_ready[NUM_OUT_REGS] = out_ready_i; + // Output stage: assign module outputs + assign result_o = out_pipe_result_q[NUM_OUT_REGS]; + assign status_o = out_pipe_status_q[NUM_OUT_REGS]; + assign extension_bit_o = 1'b1; // always NaN-Box result + assign tag_o = out_pipe_tag_q[NUM_OUT_REGS]; + assign aux_o = out_pipe_aux_q[NUM_OUT_REGS]; + assign out_valid_o = out_pipe_valid_q[NUM_OUT_REGS]; + assign busy_o = (| {inp_pipe_valid_q, mid_pipe_valid_q, out_pipe_valid_q}); +endmodule diff --git a/vendor/pulp-platform/fpnew/src/fpnew_fma_multi.sv b/vendor/pulp-platform/fpnew/src/fpnew_fma_multi.sv new file mode 100644 index 0000000000..6b52237fa9 --- /dev/null +++ b/vendor/pulp-platform/fpnew/src/fpnew_fma_multi.sv @@ -0,0 +1,820 @@ +// Copyright 2019 ETH Zurich and University of Bologna. +// +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +// Author: Stefan Mach + +`include "common_cells/registers.svh" + +module fpnew_fma_multi #( + parameter fpnew_pkg::fmt_logic_t FpFmtConfig = '1, + parameter int unsigned NumPipeRegs = 0, + parameter fpnew_pkg::pipe_config_t PipeConfig = fpnew_pkg::BEFORE, + parameter type TagType = logic, + parameter type AuxType = logic, + // Do not change + localparam int unsigned WIDTH = fpnew_pkg::max_fp_width(FpFmtConfig), + localparam int unsigned NUM_FORMATS = fpnew_pkg::NUM_FP_FORMATS +) ( + input logic clk_i, + input logic rst_ni, + // Input signals + input logic [2:0][WIDTH-1:0] operands_i, // 3 operands + input logic [NUM_FORMATS-1:0][2:0] is_boxed_i, // 3 operands + input fpnew_pkg::roundmode_e rnd_mode_i, + input fpnew_pkg::operation_e op_i, + input logic op_mod_i, + input fpnew_pkg::fp_format_e src_fmt_i, // format of the multiplicands + input fpnew_pkg::fp_format_e dst_fmt_i, // format of the addend and result + input TagType tag_i, + input AuxType aux_i, + // Input Handshake + input logic in_valid_i, + output logic in_ready_o, + input logic flush_i, + // Output signals + output logic [WIDTH-1:0] result_o, + output fpnew_pkg::status_t status_o, + output logic extension_bit_o, + output TagType tag_o, + output AuxType aux_o, + // Output handshake + output logic out_valid_o, + input logic out_ready_i, + // Indication of valid data in flight + output logic busy_o +); + + // ---------- + // Constants + // ---------- + // The super-format that can hold all formats + localparam fpnew_pkg::fp_encoding_t SUPER_FORMAT = fpnew_pkg::super_format(FpFmtConfig); + + localparam int unsigned SUPER_EXP_BITS = SUPER_FORMAT.exp_bits; + localparam int unsigned SUPER_MAN_BITS = SUPER_FORMAT.man_bits; + + // Precision bits 'p' include the implicit bit + localparam int unsigned PRECISION_BITS = SUPER_MAN_BITS + 1; + // The lower 2p+3 bits of the internal FMA result will be needed for leading-zero detection + localparam int unsigned LOWER_SUM_WIDTH = 2 * PRECISION_BITS + 3; + localparam int unsigned LZC_RESULT_WIDTH = $clog2(LOWER_SUM_WIDTH); + // Internal exponent width of FMA must accomodate all meaningful exponent values in order to avoid + // datapath leakage. This is either given by the exponent bits or the width of the LZC result. + // In most reasonable FP formats the internal exponent will be wider than the LZC result. + localparam int unsigned EXP_WIDTH = fpnew_pkg::maximum(SUPER_EXP_BITS + 2, LZC_RESULT_WIDTH); + // Shift amount width: maximum internal mantissa size is 3p+3 bits + localparam int unsigned SHIFT_AMOUNT_WIDTH = $clog2(3 * PRECISION_BITS + 3); + // Pipelines + localparam NUM_INP_REGS = PipeConfig == fpnew_pkg::BEFORE + ? NumPipeRegs + : (PipeConfig == fpnew_pkg::DISTRIBUTED + ? ((NumPipeRegs + 1) / 3) // Second to get distributed regs + : 0); // no regs here otherwise + localparam NUM_MID_REGS = PipeConfig == fpnew_pkg::INSIDE + ? NumPipeRegs + : (PipeConfig == fpnew_pkg::DISTRIBUTED + ? ((NumPipeRegs + 2) / 3) // First to get distributed regs + : 0); // no regs here otherwise + localparam NUM_OUT_REGS = PipeConfig == fpnew_pkg::AFTER + ? NumPipeRegs + : (PipeConfig == fpnew_pkg::DISTRIBUTED + ? (NumPipeRegs / 3) // Last to get distributed regs + : 0); // no regs here otherwise + + // ---------------- + // Type definition + // ---------------- + typedef struct packed { + logic sign; + logic [SUPER_EXP_BITS-1:0] exponent; + logic [SUPER_MAN_BITS-1:0] mantissa; + } fp_t; + + // --------------- + // Input pipeline + // --------------- + // Selected pipeline output signals as non-arrays + logic [2:0][WIDTH-1:0] operands_q; + fpnew_pkg::fp_format_e src_fmt_q; + fpnew_pkg::fp_format_e dst_fmt_q; + + // Input pipeline signals, index i holds signal after i register stages + logic [0:NUM_INP_REGS][2:0][WIDTH-1:0] inp_pipe_operands_q; + logic [0:NUM_INP_REGS][NUM_FORMATS-1:0][2:0] inp_pipe_is_boxed_q; + fpnew_pkg::roundmode_e [0:NUM_INP_REGS] inp_pipe_rnd_mode_q; + fpnew_pkg::operation_e [0:NUM_INP_REGS] inp_pipe_op_q; + logic [0:NUM_INP_REGS] inp_pipe_op_mod_q; + fpnew_pkg::fp_format_e [0:NUM_INP_REGS] inp_pipe_src_fmt_q; + fpnew_pkg::fp_format_e [0:NUM_INP_REGS] inp_pipe_dst_fmt_q; + TagType [0:NUM_INP_REGS] inp_pipe_tag_q; + AuxType [0:NUM_INP_REGS] inp_pipe_aux_q; + logic [0:NUM_INP_REGS] inp_pipe_valid_q; + // Ready signal is combinatorial for all stages + logic [0:NUM_INP_REGS] inp_pipe_ready; + + // Input stage: First element of pipeline is taken from inputs + assign inp_pipe_operands_q[0] = operands_i; + assign inp_pipe_is_boxed_q[0] = is_boxed_i; + assign inp_pipe_rnd_mode_q[0] = rnd_mode_i; + assign inp_pipe_op_q[0] = op_i; + assign inp_pipe_op_mod_q[0] = op_mod_i; + assign inp_pipe_src_fmt_q[0] = src_fmt_i; + assign inp_pipe_dst_fmt_q[0] = dst_fmt_i; + assign inp_pipe_tag_q[0] = tag_i; + assign inp_pipe_aux_q[0] = aux_i; + assign inp_pipe_valid_q[0] = in_valid_i; + // Input stage: Propagate pipeline ready signal to updtream circuitry + assign in_ready_o = inp_pipe_ready[0]; + // Generate the register stages + for (genvar i = 0; i < NUM_INP_REGS; i++) begin : gen_input_pipeline + // Internal register enable for this stage + logic reg_ena; + // Determine the ready signal of the current stage - advance the pipeline: + // 1. if the next stage is ready for our data + // 2. if the next stage only holds a bubble (not valid) -> we can pop it + assign inp_pipe_ready[i] = inp_pipe_ready[i+1] | ~inp_pipe_valid_q[i+1]; + // Valid: enabled by ready signal, synchronous clear with the flush signal + `FFLARNC(inp_pipe_valid_q[i+1], inp_pipe_valid_q[i], inp_pipe_ready[i], flush_i, 1'b0, clk_i, rst_ni) + // Enable register if pipleine ready and a valid data item is present + assign reg_ena = inp_pipe_ready[i] & inp_pipe_valid_q[i]; + // Generate the pipeline registers within the stages, use enable-registers + `FFL(inp_pipe_operands_q[i+1], inp_pipe_operands_q[i], reg_ena, '0) + `FFL(inp_pipe_is_boxed_q[i+1], inp_pipe_is_boxed_q[i], reg_ena, '0) + `FFL(inp_pipe_rnd_mode_q[i+1], inp_pipe_rnd_mode_q[i], reg_ena, fpnew_pkg::RNE) + `FFL(inp_pipe_op_q[i+1], inp_pipe_op_q[i], reg_ena, fpnew_pkg::FMADD) + `FFL(inp_pipe_op_mod_q[i+1], inp_pipe_op_mod_q[i], reg_ena, '0) + `FFL(inp_pipe_src_fmt_q[i+1], inp_pipe_src_fmt_q[i], reg_ena, fpnew_pkg::fp_format_e'(0)) + `FFL(inp_pipe_dst_fmt_q[i+1], inp_pipe_dst_fmt_q[i], reg_ena, fpnew_pkg::fp_format_e'(0)) + `FFL(inp_pipe_tag_q[i+1], inp_pipe_tag_q[i], reg_ena, TagType'('0)) + `FFL(inp_pipe_aux_q[i+1], inp_pipe_aux_q[i], reg_ena, AuxType'('0)) + end + // Output stage: assign selected pipe outputs to signals for later use + assign operands_q = inp_pipe_operands_q[NUM_INP_REGS]; + assign src_fmt_q = inp_pipe_src_fmt_q[NUM_INP_REGS]; + assign dst_fmt_q = inp_pipe_dst_fmt_q[NUM_INP_REGS]; + + // ----------------- + // Input processing + // ----------------- + logic [NUM_FORMATS-1:0][2:0] fmt_sign; + logic signed [NUM_FORMATS-1:0][2:0][SUPER_EXP_BITS-1:0] fmt_exponent; + logic [NUM_FORMATS-1:0][2:0][SUPER_MAN_BITS-1:0] fmt_mantissa; + + fpnew_pkg::fp_info_t [NUM_FORMATS-1:0][2:0] info_q; + + // FP Input initialization + for (genvar fmt = 0; fmt < int'(NUM_FORMATS); fmt++) begin : fmt_init_inputs + // Set up some constants + localparam int unsigned FP_WIDTH = fpnew_pkg::fp_width(fpnew_pkg::fp_format_e'(fmt)); + localparam int unsigned EXP_BITS = fpnew_pkg::exp_bits(fpnew_pkg::fp_format_e'(fmt)); + localparam int unsigned MAN_BITS = fpnew_pkg::man_bits(fpnew_pkg::fp_format_e'(fmt)); + + if (FpFmtConfig[fmt]) begin : active_format + logic [2:0][FP_WIDTH-1:0] trimmed_ops; + + // Classify input + fpnew_classifier #( + .FpFormat ( fpnew_pkg::fp_format_e'(fmt) ), + .NumOperands ( 3 ) + ) i_fpnew_classifier ( + .operands_i ( trimmed_ops ), + .is_boxed_i ( inp_pipe_is_boxed_q[NUM_INP_REGS][fmt] ), + .info_o ( info_q[fmt] ) + ); + for (genvar op = 0; op < 3; op++) begin : gen_operands + assign trimmed_ops[op] = operands_q[op][FP_WIDTH-1:0]; + assign fmt_sign[fmt][op] = operands_q[op][FP_WIDTH-1]; + assign fmt_exponent[fmt][op] = signed'({1'b0, operands_q[op][MAN_BITS+:EXP_BITS]}); + assign fmt_mantissa[fmt][op] = {info_q[fmt][op].is_normal, operands_q[op][MAN_BITS-1:0]} << + (SUPER_MAN_BITS - MAN_BITS); // move to left of mantissa + end + end else begin : inactive_format + assign info_q[fmt] = '{default: fpnew_pkg::DONT_CARE}; // format disabled + assign fmt_sign[fmt] = fpnew_pkg::DONT_CARE; // format disabled + assign fmt_exponent[fmt] = '{default: fpnew_pkg::DONT_CARE}; // format disabled + assign fmt_mantissa[fmt] = '{default: fpnew_pkg::DONT_CARE}; // format disabled + end + end + + fp_t operand_a, operand_b, operand_c; + fpnew_pkg::fp_info_t info_a, info_b, info_c; + + // Operation selection and operand adjustment + // | \c op_q | \c op_mod_q | Operation Adjustment + // |:--------:|:-----------:|--------------------- + // | FMADD | \c 0 | FMADD: none + // | FMADD | \c 1 | FMSUB: Invert sign of operand C + // | FNMSUB | \c 0 | FNMSUB: Invert sign of operand A + // | FNMSUB | \c 1 | FNMADD: Invert sign of operands A and C + // | ADD | \c 0 | ADD: Set operand A to +1.0 + // | ADD | \c 1 | SUB: Set operand A to +1.0, invert sign of operand C + // | MUL | \c 0 | MUL: Set operand C to +0.0 + // | *others* | \c - | *invalid* + // \note \c op_mod_q always inverts the sign of the addend. + always_comb begin : op_select + + // Default assignments - packing-order-agnostic + operand_a = {fmt_sign[src_fmt_q][0], fmt_exponent[src_fmt_q][0], fmt_mantissa[src_fmt_q][0]}; + operand_b = {fmt_sign[src_fmt_q][1], fmt_exponent[src_fmt_q][1], fmt_mantissa[src_fmt_q][1]}; + operand_c = {fmt_sign[dst_fmt_q][2], fmt_exponent[dst_fmt_q][2], fmt_mantissa[dst_fmt_q][2]}; + info_a = info_q[src_fmt_q][0]; + info_b = info_q[src_fmt_q][1]; + info_c = info_q[dst_fmt_q][2]; + + // op_mod_q inverts sign of operand C + operand_c.sign = operand_c.sign ^ inp_pipe_op_mod_q[NUM_INP_REGS]; + + unique case (inp_pipe_op_q[NUM_INP_REGS]) + fpnew_pkg::FMADD: ; // do nothing + fpnew_pkg::FNMSUB: operand_a.sign = ~operand_a.sign; // invert sign of product + fpnew_pkg::ADD: begin // Set multiplicand to +1 + operand_a = '{sign: 1'b0, exponent: fpnew_pkg::bias(src_fmt_q), mantissa: '0}; + info_a = '{is_normal: 1'b1, is_boxed: 1'b1, default: 1'b0}; //normal, boxed value. + end + fpnew_pkg::MUL: begin // Set addend to -0 (for proper rounding with RDN) + operand_c = '{sign: 1'b1, exponent: '0, mantissa: '0}; + info_c = '{is_zero: 1'b1, is_boxed: 1'b1, default: 1'b0}; //zero, boxed value. + end + default: begin // propagate don't cares + operand_a = '{default: fpnew_pkg::DONT_CARE}; + operand_b = '{default: fpnew_pkg::DONT_CARE}; + operand_c = '{default: fpnew_pkg::DONT_CARE}; + info_a = '{default: fpnew_pkg::DONT_CARE}; + info_b = '{default: fpnew_pkg::DONT_CARE}; + info_c = '{default: fpnew_pkg::DONT_CARE}; + end + endcase + end + + // --------------------- + // Input classification + // --------------------- + logic any_operand_inf; + logic any_operand_nan; + logic signalling_nan; + logic effective_subtraction; + logic tentative_sign; + + // Reduction for special case handling + assign any_operand_inf = (| {info_a.is_inf, info_b.is_inf, info_c.is_inf}); + assign any_operand_nan = (| {info_a.is_nan, info_b.is_nan, info_c.is_nan}); + assign signalling_nan = (| {info_a.is_signalling, info_b.is_signalling, info_c.is_signalling}); + // Effective subtraction in FMA occurs when product and addend signs differ + assign effective_subtraction = operand_a.sign ^ operand_b.sign ^ operand_c.sign; + // The tentative sign of the FMA shall be the sign of the product + assign tentative_sign = operand_a.sign ^ operand_b.sign; + + // ---------------------- + // Special case handling + // ---------------------- + logic [WIDTH-1:0] special_result; + fpnew_pkg::status_t special_status; + logic result_is_special; + + logic [NUM_FORMATS-1:0][WIDTH-1:0] fmt_special_result; + fpnew_pkg::status_t [NUM_FORMATS-1:0] fmt_special_status; + logic [NUM_FORMATS-1:0] fmt_result_is_special; + + + for (genvar fmt = 0; fmt < int'(NUM_FORMATS); fmt++) begin : gen_special_results + // Set up some constants + localparam int unsigned FP_WIDTH = fpnew_pkg::fp_width(fpnew_pkg::fp_format_e'(fmt)); + localparam int unsigned EXP_BITS = fpnew_pkg::exp_bits(fpnew_pkg::fp_format_e'(fmt)); + localparam int unsigned MAN_BITS = fpnew_pkg::man_bits(fpnew_pkg::fp_format_e'(fmt)); + + localparam logic [EXP_BITS-1:0] QNAN_EXPONENT = '1; + localparam logic [MAN_BITS-1:0] QNAN_MANTISSA = 2**(MAN_BITS-1); + localparam logic [MAN_BITS-1:0] ZERO_MANTISSA = '0; + + if (FpFmtConfig[fmt]) begin : active_format + always_comb begin : special_results + logic [FP_WIDTH-1:0] special_res; + + // Default assignment + special_res = {1'b0, QNAN_EXPONENT, QNAN_MANTISSA}; // qNaN + fmt_special_status[fmt] = '0; + fmt_result_is_special[fmt] = 1'b0; + + // Handle potentially mixed nan & infinity input => important for the case where infinity and + // zero are multiplied and added to a qnan. + // RISC-V mandates raising the NV exception in these cases: + // (inf * 0) + c or (0 * inf) + c INVALID, no matter c (even quiet NaNs) + if ((info_a.is_inf && info_b.is_zero) || (info_a.is_zero && info_b.is_inf)) begin + fmt_result_is_special[fmt] = 1'b1; // bypass FMA, output is the canonical qNaN + fmt_special_status[fmt].NV = 1'b1; // invalid operation + // NaN Inputs cause canonical quiet NaN at the output and maybe invalid OP + end else if (any_operand_nan) begin + fmt_result_is_special[fmt] = 1'b1; // bypass FMA, output is the canonical qNaN + fmt_special_status[fmt].NV = signalling_nan; // raise the invalid operation flag if signalling + // Special cases involving infinity + end else if (any_operand_inf) begin + fmt_result_is_special[fmt] = 1'b1; // bypass FMA + // Effective addition of opposite infinities (±inf - ±inf) is invalid! + if ((info_a.is_inf || info_b.is_inf) && info_c.is_inf && effective_subtraction) + fmt_special_status[fmt].NV = 1'b1; // invalid operation + // Handle cases where output will be inf because of inf product input + else if (info_a.is_inf || info_b.is_inf) begin + // Result is infinity with the sign of the product + special_res = {operand_a.sign ^ operand_b.sign, QNAN_EXPONENT, ZERO_MANTISSA}; + // Handle cases where the addend is inf + end else if (info_c.is_inf) begin + // Result is inifinity with sign of the addend (= operand_c) + special_res = {operand_c.sign, QNAN_EXPONENT, ZERO_MANTISSA}; + end + end + // Initialize special result with ones (NaN-box) + fmt_special_result[fmt] = '1; + fmt_special_result[fmt][FP_WIDTH-1:0] = special_res; + end + end else begin : inactive_format + assign fmt_special_result[fmt] = '{default: fpnew_pkg::DONT_CARE}; + end + end + + // Detect special case from source format, I2F casts don't produce a special result + assign result_is_special = fmt_result_is_special[dst_fmt_q]; // they're all the same + // Signalling input NaNs raise invalid flag, otherwise no flags set + assign special_status = fmt_special_status[dst_fmt_q]; + // Assemble result according to destination format + assign special_result = fmt_special_result[dst_fmt_q]; // destination format + + // --------------------------- + // Initial exponent data path + // --------------------------- + logic signed [EXP_WIDTH-1:0] exponent_a, exponent_b, exponent_c; + logic signed [EXP_WIDTH-1:0] exponent_addend, exponent_product, exponent_difference; + logic signed [EXP_WIDTH-1:0] tentative_exponent; + + // Zero-extend exponents into signed container - implicit width extension + assign exponent_a = signed'({1'b0, operand_a.exponent}); + assign exponent_b = signed'({1'b0, operand_b.exponent}); + assign exponent_c = signed'({1'b0, operand_c.exponent}); + + // Calculate internal exponents from encoded values. Real exponents are (ex = Ex - bias + 1 - nx) + // with Ex the encoded exponent and nx the implicit bit. Internal exponents are biased to dst fmt. + assign exponent_addend = signed'(exponent_c + $signed({1'b0, ~info_c.is_normal})); // 0 as subnorm + // Biased product exponent is the sum of encoded exponents minus the bias. + assign exponent_product = (info_a.is_zero || info_b.is_zero) // in case the product is zero, set minimum exp. + ? 2 - signed'(fpnew_pkg::bias(dst_fmt_q)) + : signed'(exponent_a + info_a.is_subnormal + + exponent_b + info_b.is_subnormal + - 2*signed'(fpnew_pkg::bias(src_fmt_q)) + + signed'(fpnew_pkg::bias(dst_fmt_q))); // rebias for dst fmt + // Exponent difference is the addend exponent minus the product exponent + assign exponent_difference = exponent_addend - exponent_product; + // The tentative exponent will be the larger of the product or addend exponent + assign tentative_exponent = (exponent_difference > 0) ? exponent_addend : exponent_product; + + // Shift amount for addend based on exponents (unsigned as only right shifts) + logic [SHIFT_AMOUNT_WIDTH-1:0] addend_shamt; + + always_comb begin : addend_shift_amount + // Product-anchored case, saturated shift (addend is only in the sticky bit) + if (exponent_difference <= signed'(-2 * PRECISION_BITS - 1)) + addend_shamt = 3 * PRECISION_BITS + 4; + // Addend and product will have mutual bits to add + else if (exponent_difference <= signed'(PRECISION_BITS + 2)) + addend_shamt = unsigned'(signed'(PRECISION_BITS) + 3 - exponent_difference); + // Addend-anchored case, saturated shift (product is only in the sticky bit) + else + addend_shamt = 0; + end + + // ------------------ + // Product data path + // ------------------ + logic [PRECISION_BITS-1:0] mantissa_a, mantissa_b, mantissa_c; + logic [2*PRECISION_BITS-1:0] product; // the p*p product is 2p bits wide + logic [3*PRECISION_BITS+3:0] product_shifted; // addends are 3p+4 bit wide (including G/R) + + // Add implicit bits to mantissae + assign mantissa_a = {info_a.is_normal, operand_a.mantissa}; + assign mantissa_b = {info_b.is_normal, operand_b.mantissa}; + assign mantissa_c = {info_c.is_normal, operand_c.mantissa}; + + // Mantissa multiplier (a*b) + assign product = mantissa_a * mantissa_b; + + // Product is placed into a 3p+4 bit wide vector, padded with 2 bits for round and sticky: + // | 000...000 | product | RS | + // <- p+2 -> <- 2p -> < 2> + assign product_shifted = product << 2; // constant shift + + // ----------------- + // Addend data path + // ----------------- + logic [3*PRECISION_BITS+3:0] addend_after_shift; // upper 3p+4 bits are needed to go on + logic [PRECISION_BITS-1:0] addend_sticky_bits; // up to p bit of shifted addend are sticky + logic sticky_before_add; // they are compressed into a single sticky bit + logic [3*PRECISION_BITS+3:0] addend_shifted; // addends are 3p+4 bit wide (including G/R) + logic inject_carry_in; // inject carry for subtractions if needed + + // In parallel, the addend is right-shifted according to the exponent difference. Up to p bits are + // shifted out and compressed into a sticky bit. + // BEFORE THE SHIFT: + // | mantissa_c | 000..000 | + // <- p -> <- 3p+4 -> + // AFTER THE SHIFT: + // | 000..........000 | mantissa_c | 000...............0GR | sticky bits | + // <- addend_shamt -> <- p -> <- 2p+4-addend_shamt -> <- up to p -> + assign {addend_after_shift, addend_sticky_bits} = + (mantissa_c << (3 * PRECISION_BITS + 4)) >> addend_shamt; + + assign sticky_before_add = (| addend_sticky_bits); + + // In case of a subtraction, the addend is inverted + assign addend_shifted = (effective_subtraction) ? ~addend_after_shift : addend_after_shift; + assign inject_carry_in = effective_subtraction & ~sticky_before_add; + + // ------ + // Adder + // ------ + logic [3*PRECISION_BITS+4:0] sum_raw; // added one bit for the carry + logic sum_carry; // observe carry bit from sum for sign fixing + logic [3*PRECISION_BITS+3:0] sum; // discard carry as sum won't overflow + logic final_sign; + + //Mantissa adder (ab+c). In normal addition, it cannot overflow. + assign sum_raw = product_shifted + addend_shifted + inject_carry_in; + assign sum_carry = sum_raw[3*PRECISION_BITS+4]; + + // Complement negative sum (can only happen in subtraction -> overflows for positive results) + assign sum = (effective_subtraction && ~sum_carry) ? -sum_raw : sum_raw; + + // In case of a mispredicted subtraction result, do a sign flip + assign final_sign = (effective_subtraction && (sum_carry == tentative_sign)) + ? 1'b1 + : (effective_subtraction ? 1'b0 : tentative_sign); + + // --------------- + // Internal pipeline + // --------------- + // Pipeline output signals as non-arrays + logic effective_subtraction_q; + logic signed [EXP_WIDTH-1:0] exponent_product_q; + logic signed [EXP_WIDTH-1:0] exponent_difference_q; + logic signed [EXP_WIDTH-1:0] tentative_exponent_q; + logic [SHIFT_AMOUNT_WIDTH-1:0] addend_shamt_q; + logic sticky_before_add_q; + logic [3*PRECISION_BITS+3:0] sum_q; + logic final_sign_q; + fpnew_pkg::fp_format_e dst_fmt_q2; + fpnew_pkg::roundmode_e rnd_mode_q; + logic result_is_special_q; + fp_t special_result_q; + fpnew_pkg::status_t special_status_q; + // Internal pipeline signals, index i holds signal after i register stages + logic [0:NUM_MID_REGS] mid_pipe_eff_sub_q; + logic signed [0:NUM_MID_REGS][EXP_WIDTH-1:0] mid_pipe_exp_prod_q; + logic signed [0:NUM_MID_REGS][EXP_WIDTH-1:0] mid_pipe_exp_diff_q; + logic signed [0:NUM_MID_REGS][EXP_WIDTH-1:0] mid_pipe_tent_exp_q; + logic [0:NUM_MID_REGS][SHIFT_AMOUNT_WIDTH-1:0] mid_pipe_add_shamt_q; + logic [0:NUM_MID_REGS] mid_pipe_sticky_q; + logic [0:NUM_MID_REGS][3*PRECISION_BITS+3:0] mid_pipe_sum_q; + logic [0:NUM_MID_REGS] mid_pipe_final_sign_q; + fpnew_pkg::roundmode_e [0:NUM_MID_REGS] mid_pipe_rnd_mode_q; + fpnew_pkg::fp_format_e [0:NUM_MID_REGS] mid_pipe_dst_fmt_q; + logic [0:NUM_MID_REGS] mid_pipe_res_is_spec_q; + fp_t [0:NUM_MID_REGS] mid_pipe_spec_res_q; + fpnew_pkg::status_t [0:NUM_MID_REGS] mid_pipe_spec_stat_q; + TagType [0:NUM_MID_REGS] mid_pipe_tag_q; + AuxType [0:NUM_MID_REGS] mid_pipe_aux_q; + logic [0:NUM_MID_REGS] mid_pipe_valid_q; + // Ready signal is combinatorial for all stages + logic [0:NUM_MID_REGS] mid_pipe_ready; + + // Input stage: First element of pipeline is taken from upstream logic + assign mid_pipe_eff_sub_q[0] = effective_subtraction; + assign mid_pipe_exp_prod_q[0] = exponent_product; + assign mid_pipe_exp_diff_q[0] = exponent_difference; + assign mid_pipe_tent_exp_q[0] = tentative_exponent; + assign mid_pipe_add_shamt_q[0] = addend_shamt; + assign mid_pipe_sticky_q[0] = sticky_before_add; + assign mid_pipe_sum_q[0] = sum; + assign mid_pipe_final_sign_q[0] = final_sign; + assign mid_pipe_rnd_mode_q[0] = inp_pipe_rnd_mode_q[NUM_INP_REGS]; + assign mid_pipe_dst_fmt_q[0] = dst_fmt_q; + assign mid_pipe_res_is_spec_q[0] = result_is_special; + assign mid_pipe_spec_res_q[0] = special_result; + assign mid_pipe_spec_stat_q[0] = special_status; + assign mid_pipe_tag_q[0] = inp_pipe_tag_q[NUM_INP_REGS]; + assign mid_pipe_aux_q[0] = inp_pipe_aux_q[NUM_INP_REGS]; + assign mid_pipe_valid_q[0] = inp_pipe_valid_q[NUM_INP_REGS]; + // Input stage: Propagate pipeline ready signal to input pipe + assign inp_pipe_ready[NUM_INP_REGS] = mid_pipe_ready[0]; + + // Generate the register stages + for (genvar i = 0; i < NUM_MID_REGS; i++) begin : gen_inside_pipeline + // Internal register enable for this stage + logic reg_ena; + // Determine the ready signal of the current stage - advance the pipeline: + // 1. if the next stage is ready for our data + // 2. if the next stage only holds a bubble (not valid) -> we can pop it + assign mid_pipe_ready[i] = mid_pipe_ready[i+1] | ~mid_pipe_valid_q[i+1]; + // Valid: enabled by ready signal, synchronous clear with the flush signal + `FFLARNC(mid_pipe_valid_q[i+1], mid_pipe_valid_q[i], mid_pipe_ready[i], flush_i, 1'b0, clk_i, rst_ni) + // Enable register if pipleine ready and a valid data item is present + assign reg_ena = mid_pipe_ready[i] & mid_pipe_valid_q[i]; + // Generate the pipeline registers within the stages, use enable-registers + `FFL(mid_pipe_eff_sub_q[i+1], mid_pipe_eff_sub_q[i], reg_ena, '0) + `FFL(mid_pipe_exp_prod_q[i+1], mid_pipe_exp_prod_q[i], reg_ena, '0) + `FFL(mid_pipe_exp_diff_q[i+1], mid_pipe_exp_diff_q[i], reg_ena, '0) + `FFL(mid_pipe_tent_exp_q[i+1], mid_pipe_tent_exp_q[i], reg_ena, '0) + `FFL(mid_pipe_add_shamt_q[i+1], mid_pipe_add_shamt_q[i], reg_ena, '0) + `FFL(mid_pipe_sticky_q[i+1], mid_pipe_sticky_q[i], reg_ena, '0) + `FFL(mid_pipe_sum_q[i+1], mid_pipe_sum_q[i], reg_ena, '0) + `FFL(mid_pipe_final_sign_q[i+1], mid_pipe_final_sign_q[i], reg_ena, '0) + `FFL(mid_pipe_rnd_mode_q[i+1], mid_pipe_rnd_mode_q[i], reg_ena, fpnew_pkg::RNE) + `FFL(mid_pipe_dst_fmt_q[i+1], mid_pipe_dst_fmt_q[i], reg_ena, fpnew_pkg::fp_format_e'(0)) + `FFL(mid_pipe_res_is_spec_q[i+1], mid_pipe_res_is_spec_q[i], reg_ena, '0) + `FFL(mid_pipe_spec_res_q[i+1], mid_pipe_spec_res_q[i], reg_ena, '0) + `FFL(mid_pipe_spec_stat_q[i+1], mid_pipe_spec_stat_q[i], reg_ena, '0) + `FFL(mid_pipe_tag_q[i+1], mid_pipe_tag_q[i], reg_ena, TagType'('0)) + `FFL(mid_pipe_aux_q[i+1], mid_pipe_aux_q[i], reg_ena, AuxType'('0)) + end + // Output stage: assign selected pipe outputs to signals for later use + assign effective_subtraction_q = mid_pipe_eff_sub_q[NUM_MID_REGS]; + assign exponent_product_q = mid_pipe_exp_prod_q[NUM_MID_REGS]; + assign exponent_difference_q = mid_pipe_exp_diff_q[NUM_MID_REGS]; + assign tentative_exponent_q = mid_pipe_tent_exp_q[NUM_MID_REGS]; + assign addend_shamt_q = mid_pipe_add_shamt_q[NUM_MID_REGS]; + assign sticky_before_add_q = mid_pipe_sticky_q[NUM_MID_REGS]; + assign sum_q = mid_pipe_sum_q[NUM_MID_REGS]; + assign final_sign_q = mid_pipe_final_sign_q[NUM_MID_REGS]; + assign rnd_mode_q = mid_pipe_rnd_mode_q[NUM_MID_REGS]; + assign dst_fmt_q2 = mid_pipe_dst_fmt_q[NUM_MID_REGS]; + assign result_is_special_q = mid_pipe_res_is_spec_q[NUM_MID_REGS]; + assign special_result_q = mid_pipe_spec_res_q[NUM_MID_REGS]; + assign special_status_q = mid_pipe_spec_stat_q[NUM_MID_REGS]; + + // -------------- + // Normalization + // -------------- + logic [LOWER_SUM_WIDTH-1:0] sum_lower; // lower 2p+3 bits of sum are searched + logic [LZC_RESULT_WIDTH-1:0] leading_zero_count; // the number of leading zeroes + logic signed [LZC_RESULT_WIDTH:0] leading_zero_count_sgn; // signed leading-zero count + logic lzc_zeroes; // in case only zeroes found + + logic [SHIFT_AMOUNT_WIDTH-1:0] norm_shamt; // Normalization shift amount + logic signed [EXP_WIDTH-1:0] normalized_exponent; + + logic [3*PRECISION_BITS+4:0] sum_shifted; // result after first normalization shift + logic [PRECISION_BITS:0] final_mantissa; // final mantissa before rounding with round bit + logic [2*PRECISION_BITS+2:0] sum_sticky_bits; // remaining 2p+3 sticky bits after normalization + logic sticky_after_norm; // sticky bit after normalization + + logic signed [EXP_WIDTH-1:0] final_exponent; + + assign sum_lower = sum_q[LOWER_SUM_WIDTH-1:0]; + + // Leading zero counter for cancellations + lzc #( + .WIDTH ( LOWER_SUM_WIDTH ), + .MODE ( 1 ) // MODE = 1 counts leading zeroes + ) i_lzc ( + .in_i ( sum_lower ), + .cnt_o ( leading_zero_count ), + .empty_o ( lzc_zeroes ) + ); + + assign leading_zero_count_sgn = signed'({1'b0, leading_zero_count}); + + // Normalization shift amount based on exponents and LZC (unsigned as only left shifts) + always_comb begin : norm_shift_amount + // Product-anchored case or cancellations require LZC + if ((exponent_difference_q <= 0) || (effective_subtraction_q && (exponent_difference_q <= 2))) begin + // Normal result (biased exponent > 0 and not a zero) + if ((exponent_product_q - leading_zero_count_sgn + 1 >= 0) && !lzc_zeroes) begin + // Undo initial product shift, remove the counted zeroes + norm_shamt = PRECISION_BITS + 2 + leading_zero_count; + normalized_exponent = exponent_product_q - leading_zero_count_sgn + 1; // account for shift + // Subnormal result + end else begin + // Cap the shift distance to align mantissa with minimum exponent + norm_shamt = unsigned'(signed'(PRECISION_BITS + 2 + exponent_product_q)); + normalized_exponent = 0; // subnormals encoded as 0 + end + // Addend-anchored case + end else begin + norm_shamt = addend_shamt_q; // Undo the initial shift + normalized_exponent = tentative_exponent_q; + end + end + + // Do the large normalization shift + assign sum_shifted = sum_q << norm_shamt; + + // The addend-anchored case needs a 1-bit normalization since the leading-one can be to the left + // or right of the (non-carry) MSB of the sum. + always_comb begin : small_norm + // Default assignment, discarding carry bit + {final_mantissa, sum_sticky_bits} = sum_shifted; + final_exponent = normalized_exponent; + + // The normalized sum has overflown, align right and fix exponent + if (sum_shifted[3*PRECISION_BITS+4]) begin // check the carry bit + {final_mantissa, sum_sticky_bits} = sum_shifted >> 1; + final_exponent = normalized_exponent + 1; + // The normalized sum is normal, nothing to do + end else if (sum_shifted[3*PRECISION_BITS+3]) begin // check the sum MSB + // do nothing + // The normalized sum is still denormal, align left - unless the result is not already subnormal + end else if (normalized_exponent > 1) begin + {final_mantissa, sum_sticky_bits} = sum_shifted << 1; + final_exponent = normalized_exponent - 1; + // Otherwise we're denormal + end else begin + final_exponent = '0; + end + end + + // Update the sticky bit with the shifted-out bits + assign sticky_after_norm = (| {sum_sticky_bits}) | sticky_before_add_q; + + // ---------------------------- + // Rounding and classification + // ---------------------------- + logic pre_round_sign; + logic [SUPER_EXP_BITS+SUPER_MAN_BITS-1:0] pre_round_abs; // absolute value of result before rounding + logic [1:0] round_sticky_bits; + + logic of_before_round, of_after_round; // overflow + logic uf_before_round, uf_after_round; // underflow + + logic [NUM_FORMATS-1:0][SUPER_EXP_BITS+SUPER_MAN_BITS-1:0] fmt_pre_round_abs; // per format + logic [NUM_FORMATS-1:0][1:0] fmt_round_sticky_bits; + + logic [NUM_FORMATS-1:0] fmt_of_after_round; + logic [NUM_FORMATS-1:0] fmt_uf_after_round; + + logic rounded_sign; + logic [SUPER_EXP_BITS+SUPER_MAN_BITS-1:0] rounded_abs; // absolute value of result after rounding + logic result_zero; + + // Classification before round. RISC-V mandates checking underflow AFTER rounding! + assign of_before_round = final_exponent >= 2**(fpnew_pkg::exp_bits(dst_fmt_q2))-1; // infinity exponent is all ones + assign uf_before_round = final_exponent == 0; // exponent for subnormals capped to 0 + + // Pack exponent and mantissa into proper rounding form + for (genvar fmt = 0; fmt < int'(NUM_FORMATS); fmt++) begin : gen_res_assemble + // Set up some constants + localparam int unsigned EXP_BITS = fpnew_pkg::exp_bits(fpnew_pkg::fp_format_e'(fmt)); + localparam int unsigned MAN_BITS = fpnew_pkg::man_bits(fpnew_pkg::fp_format_e'(fmt)); + + logic [EXP_BITS-1:0] pre_round_exponent; + logic [MAN_BITS-1:0] pre_round_mantissa; + + if (FpFmtConfig[fmt]) begin : active_format + + assign pre_round_exponent = (of_before_round) ? 2**EXP_BITS-2 : final_exponent[EXP_BITS-1:0]; + assign pre_round_mantissa = (of_before_round) ? '1 : final_mantissa[SUPER_MAN_BITS-:MAN_BITS]; + // Assemble result before rounding. In case of overflow, the largest normal value is set. + assign fmt_pre_round_abs[fmt] = {pre_round_exponent, pre_round_mantissa}; // 0-extend + + // Round bit is after mantissa (1 in case of overflow for rounding) + assign fmt_round_sticky_bits[fmt][1] = final_mantissa[SUPER_MAN_BITS-MAN_BITS] | + of_before_round; + + // remaining bits in mantissa to sticky (1 in case of overflow for rounding) + if (MAN_BITS < SUPER_MAN_BITS) begin : narrow_sticky + assign fmt_round_sticky_bits[fmt][0] = (| final_mantissa[SUPER_MAN_BITS-MAN_BITS-1:0]) | + sticky_after_norm | of_before_round; + end else begin : normal_sticky + assign fmt_round_sticky_bits[fmt][0] = sticky_after_norm | of_before_round; + end + end else begin : inactive_format + assign fmt_pre_round_abs[fmt] = '{default: fpnew_pkg::DONT_CARE}; + assign fmt_round_sticky_bits[fmt] = '{default: fpnew_pkg::DONT_CARE}; + end + end + + // Assemble result before rounding. In case of overflow, the largest normal value is set. + assign pre_round_sign = final_sign_q; + assign pre_round_abs = fmt_pre_round_abs[dst_fmt_q2]; + + // In case of overflow, the round and sticky bits are set for proper rounding + assign round_sticky_bits = fmt_round_sticky_bits[dst_fmt_q2]; + + // Perform the rounding + fpnew_rounding #( + .AbsWidth ( SUPER_EXP_BITS + SUPER_MAN_BITS ) + ) i_fpnew_rounding ( + .abs_value_i ( pre_round_abs ), + .sign_i ( pre_round_sign ), + .round_sticky_bits_i ( round_sticky_bits ), + .rnd_mode_i ( rnd_mode_q ), + .effective_subtraction_i ( effective_subtraction_q ), + .abs_rounded_o ( rounded_abs ), + .sign_o ( rounded_sign ), + .exact_zero_o ( result_zero ) + ); + + logic [NUM_FORMATS-1:0][WIDTH-1:0] fmt_result; + + for (genvar fmt = 0; fmt < int'(NUM_FORMATS); fmt++) begin : gen_sign_inject + // Set up some constants + localparam int unsigned FP_WIDTH = fpnew_pkg::fp_width(fpnew_pkg::fp_format_e'(fmt)); + localparam int unsigned EXP_BITS = fpnew_pkg::exp_bits(fpnew_pkg::fp_format_e'(fmt)); + localparam int unsigned MAN_BITS = fpnew_pkg::man_bits(fpnew_pkg::fp_format_e'(fmt)); + + if (FpFmtConfig[fmt]) begin : active_format + always_comb begin : post_process + // detect of / uf + fmt_uf_after_round[fmt] = rounded_abs[EXP_BITS+MAN_BITS-1:MAN_BITS] == '0; // denormal + fmt_of_after_round[fmt] = rounded_abs[EXP_BITS+MAN_BITS-1:MAN_BITS] == '1; // inf exp. + + // Assemble regular result, nan box short ones. + fmt_result[fmt] = '1; + fmt_result[fmt][FP_WIDTH-1:0] = {rounded_sign, rounded_abs[EXP_BITS+MAN_BITS-1:0]}; + end + end else begin : inactive_format + assign fmt_uf_after_round[fmt] = fpnew_pkg::DONT_CARE; + assign fmt_of_after_round[fmt] = fpnew_pkg::DONT_CARE; + assign fmt_result[fmt] = '{default: fpnew_pkg::DONT_CARE}; + end + end + + // Classification after rounding select by destination format + assign uf_after_round = fmt_uf_after_round[dst_fmt_q2]; + assign of_after_round = fmt_of_after_round[dst_fmt_q2]; + + + // ----------------- + // Result selection + // ----------------- + logic [WIDTH-1:0] regular_result; + fpnew_pkg::status_t regular_status; + + // Assemble regular result + assign regular_result = fmt_result[dst_fmt_q2]; + assign regular_status.NV = 1'b0; // only valid cases are handled in regular path + assign regular_status.DZ = 1'b0; // no divisions + assign regular_status.OF = of_before_round | of_after_round; // rounding can introduce overflow + assign regular_status.UF = uf_after_round & regular_status.NX; // only inexact results raise UF + assign regular_status.NX = (| round_sticky_bits) | of_before_round | of_after_round; + + // Final results for output pipeline + logic [WIDTH-1:0] result_d; + fpnew_pkg::status_t status_d; + + // Select output depending on special case detection + assign result_d = result_is_special_q ? special_result_q : regular_result; + assign status_d = result_is_special_q ? special_status_q : regular_status; + + // ---------------- + // Output Pipeline + // ---------------- + // Output pipeline signals, index i holds signal after i register stages + logic [0:NUM_OUT_REGS][WIDTH-1:0] out_pipe_result_q; + fpnew_pkg::status_t [0:NUM_OUT_REGS] out_pipe_status_q; + TagType [0:NUM_OUT_REGS] out_pipe_tag_q; + AuxType [0:NUM_OUT_REGS] out_pipe_aux_q; + logic [0:NUM_OUT_REGS] out_pipe_valid_q; + // Ready signal is combinatorial for all stages + logic [0:NUM_OUT_REGS] out_pipe_ready; + + // Input stage: First element of pipeline is taken from inputs + assign out_pipe_result_q[0] = result_d; + assign out_pipe_status_q[0] = status_d; + assign out_pipe_tag_q[0] = mid_pipe_tag_q[NUM_MID_REGS]; + assign out_pipe_aux_q[0] = mid_pipe_aux_q[NUM_MID_REGS]; + assign out_pipe_valid_q[0] = mid_pipe_valid_q[NUM_MID_REGS]; + // Input stage: Propagate pipeline ready signal to inside pipe + assign mid_pipe_ready[NUM_MID_REGS] = out_pipe_ready[0]; + // Generate the register stages + for (genvar i = 0; i < NUM_OUT_REGS; i++) begin : gen_output_pipeline + // Internal register enable for this stage + logic reg_ena; + // Determine the ready signal of the current stage - advance the pipeline: + // 1. if the next stage is ready for our data + // 2. if the next stage only holds a bubble (not valid) -> we can pop it + assign out_pipe_ready[i] = out_pipe_ready[i+1] | ~out_pipe_valid_q[i+1]; + // Valid: enabled by ready signal, synchronous clear with the flush signal + `FFLARNC(out_pipe_valid_q[i+1], out_pipe_valid_q[i], out_pipe_ready[i], flush_i, 1'b0, clk_i, rst_ni) + // Enable register if pipleine ready and a valid data item is present + assign reg_ena = out_pipe_ready[i] & out_pipe_valid_q[i]; + // Generate the pipeline registers within the stages, use enable-registers + `FFL(out_pipe_result_q[i+1], out_pipe_result_q[i], reg_ena, '0) + `FFL(out_pipe_status_q[i+1], out_pipe_status_q[i], reg_ena, '0) + `FFL(out_pipe_tag_q[i+1], out_pipe_tag_q[i], reg_ena, TagType'('0)) + `FFL(out_pipe_aux_q[i+1], out_pipe_aux_q[i], reg_ena, AuxType'('0)) + end + // Output stage: Ready travels backwards from output side, driven by downstream circuitry + assign out_pipe_ready[NUM_OUT_REGS] = out_ready_i; + // Output stage: assign module outputs + assign result_o = out_pipe_result_q[NUM_OUT_REGS]; + assign status_o = out_pipe_status_q[NUM_OUT_REGS]; + assign extension_bit_o = 1'b1; // always NaN-Box result + assign tag_o = out_pipe_tag_q[NUM_OUT_REGS]; + assign aux_o = out_pipe_aux_q[NUM_OUT_REGS]; + assign out_valid_o = out_pipe_valid_q[NUM_OUT_REGS]; + assign busy_o = (| {inp_pipe_valid_q, mid_pipe_valid_q, out_pipe_valid_q}); +endmodule diff --git a/vendor/pulp-platform/fpnew/src/fpnew_noncomp.sv b/vendor/pulp-platform/fpnew/src/fpnew_noncomp.sv new file mode 100644 index 0000000000..9e485f9e9d --- /dev/null +++ b/vendor/pulp-platform/fpnew/src/fpnew_noncomp.sv @@ -0,0 +1,404 @@ +// Copyright 2019 ETH Zurich and University of Bologna. +// +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +// Author: Stefan Mach + +`include "common_cells/registers.svh" + +module fpnew_noncomp #( + parameter fpnew_pkg::fp_format_e FpFormat = fpnew_pkg::fp_format_e'(0), + parameter int unsigned NumPipeRegs = 0, + parameter fpnew_pkg::pipe_config_t PipeConfig = fpnew_pkg::BEFORE, + parameter type TagType = logic, + parameter type AuxType = logic, + + localparam int unsigned WIDTH = fpnew_pkg::fp_width(FpFormat) // do not change +) ( + input logic clk_i, + input logic rst_ni, + // Input signals + input logic [1:0][WIDTH-1:0] operands_i, // 2 operands + input logic [1:0] is_boxed_i, // 2 operands + input fpnew_pkg::roundmode_e rnd_mode_i, + input fpnew_pkg::operation_e op_i, + input logic op_mod_i, + input TagType tag_i, + input AuxType aux_i, + // Input Handshake + input logic in_valid_i, + output logic in_ready_o, + input logic flush_i, + // Output signals + output logic [WIDTH-1:0] result_o, + output fpnew_pkg::status_t status_o, + output logic extension_bit_o, + output fpnew_pkg::classmask_e class_mask_o, + output logic is_class_o, + output TagType tag_o, + output AuxType aux_o, + // Output handshake + output logic out_valid_o, + input logic out_ready_i, + // Indication of valid data in flight + output logic busy_o +); + + // ---------- + // Constants + // ---------- + localparam int unsigned EXP_BITS = fpnew_pkg::exp_bits(FpFormat); + localparam int unsigned MAN_BITS = fpnew_pkg::man_bits(FpFormat); + // Pipelines + localparam NUM_INP_REGS = (PipeConfig == fpnew_pkg::BEFORE || PipeConfig == fpnew_pkg::INSIDE) + ? NumPipeRegs + : (PipeConfig == fpnew_pkg::DISTRIBUTED + ? ((NumPipeRegs + 1) / 2) // First to get distributed regs + : 0); // no regs here otherwise + localparam NUM_OUT_REGS = PipeConfig == fpnew_pkg::AFTER + ? NumPipeRegs + : (PipeConfig == fpnew_pkg::DISTRIBUTED + ? (NumPipeRegs / 2) // Last to get distributed regs + : 0); // no regs here otherwise + + // ---------------- + // Type definition + // ---------------- + typedef struct packed { + logic sign; + logic [EXP_BITS-1:0] exponent; + logic [MAN_BITS-1:0] mantissa; + } fp_t; + + // --------------- + // Input pipeline + // --------------- + // Input pipeline signals, index i holds signal after i register stages + logic [0:NUM_INP_REGS][1:0][WIDTH-1:0] inp_pipe_operands_q; + logic [0:NUM_INP_REGS][1:0] inp_pipe_is_boxed_q; + fpnew_pkg::roundmode_e [0:NUM_INP_REGS] inp_pipe_rnd_mode_q; + fpnew_pkg::operation_e [0:NUM_INP_REGS] inp_pipe_op_q; + logic [0:NUM_INP_REGS] inp_pipe_op_mod_q; + TagType [0:NUM_INP_REGS] inp_pipe_tag_q; + AuxType [0:NUM_INP_REGS] inp_pipe_aux_q; + logic [0:NUM_INP_REGS] inp_pipe_valid_q; + // Ready signal is combinatorial for all stages + logic [0:NUM_INP_REGS] inp_pipe_ready; + + // Input stage: First element of pipeline is taken from inputs + assign inp_pipe_operands_q[0] = operands_i; + assign inp_pipe_is_boxed_q[0] = is_boxed_i; + assign inp_pipe_rnd_mode_q[0] = rnd_mode_i; + assign inp_pipe_op_q[0] = op_i; + assign inp_pipe_op_mod_q[0] = op_mod_i; + assign inp_pipe_tag_q[0] = tag_i; + assign inp_pipe_aux_q[0] = aux_i; + assign inp_pipe_valid_q[0] = in_valid_i; + // Input stage: Propagate pipeline ready signal to updtream circuitry + assign in_ready_o = inp_pipe_ready[0]; + // Generate the register stages + for (genvar i = 0; i < NUM_INP_REGS; i++) begin : gen_input_pipeline + // Internal register enable for this stage + logic reg_ena; + // Determine the ready signal of the current stage - advance the pipeline: + // 1. if the next stage is ready for our data + // 2. if the next stage only holds a bubble (not valid) -> we can pop it + assign inp_pipe_ready[i] = inp_pipe_ready[i+1] | ~inp_pipe_valid_q[i+1]; + // Valid: enabled by ready signal, synchronous clear with the flush signal + `FFLARNC(inp_pipe_valid_q[i+1], inp_pipe_valid_q[i], inp_pipe_ready[i], flush_i, 1'b0, clk_i, rst_ni) + // Enable register if pipleine ready and a valid data item is present + assign reg_ena = inp_pipe_ready[i] & inp_pipe_valid_q[i]; + // Generate the pipeline registers within the stages, use enable-registers + `FFL(inp_pipe_operands_q[i+1], inp_pipe_operands_q[i], reg_ena, '0) + `FFL(inp_pipe_is_boxed_q[i+1], inp_pipe_is_boxed_q[i], reg_ena, '0) + `FFL(inp_pipe_rnd_mode_q[i+1], inp_pipe_rnd_mode_q[i], reg_ena, fpnew_pkg::RNE) + `FFL(inp_pipe_op_q[i+1], inp_pipe_op_q[i], reg_ena, fpnew_pkg::FMADD) + `FFL(inp_pipe_op_mod_q[i+1], inp_pipe_op_mod_q[i], reg_ena, '0) + `FFL(inp_pipe_tag_q[i+1], inp_pipe_tag_q[i], reg_ena, TagType'('0)) + `FFL(inp_pipe_aux_q[i+1], inp_pipe_aux_q[i], reg_ena, AuxType'('0)) + end + + // --------------------- + // Input classification + // --------------------- + fpnew_pkg::fp_info_t [1:0] info_q; + + // Classify input + fpnew_classifier #( + .FpFormat ( FpFormat ), + .NumOperands ( 2 ) + ) i_class_a ( + .operands_i ( inp_pipe_operands_q[NUM_INP_REGS] ), + .is_boxed_i ( inp_pipe_is_boxed_q[NUM_INP_REGS] ), + .info_o ( info_q ) + ); + + fp_t operand_a, operand_b; + fpnew_pkg::fp_info_t info_a, info_b; + + // Packing-order-agnostic assignments + assign operand_a = inp_pipe_operands_q[NUM_INP_REGS][0]; + assign operand_b = inp_pipe_operands_q[NUM_INP_REGS][1]; + assign info_a = info_q[0]; + assign info_b = info_q[1]; + + logic any_operand_inf; + logic any_operand_nan; + logic signalling_nan; + + // Reduction for special case handling + assign any_operand_inf = (| {info_a.is_inf, info_b.is_inf}); + assign any_operand_nan = (| {info_a.is_nan, info_b.is_nan}); + assign signalling_nan = (| {info_a.is_signalling, info_b.is_signalling}); + + logic operands_equal, operand_a_smaller; + + // Equality checks for zeroes too + assign operands_equal = (operand_a == operand_b) || (info_a.is_zero && info_b.is_zero); + // Invert result if non-zero signs involved (unsigned comparison) + assign operand_a_smaller = (operand_a < operand_b) ^ (operand_a.sign || operand_b.sign); + + // --------------- + // Sign Injection + // --------------- + fp_t sgnj_result; + fpnew_pkg::status_t sgnj_status; + logic sgnj_extension_bit; + + // Sign Injection - operation is encoded in rnd_mode_q: + // RNE = SGNJ, RTZ = SGNJN, RDN = SGNJX, RUP = Passthrough (no NaN-box check) + always_comb begin : sign_injections + logic sign_a, sign_b; // internal signs + // Default assignment + sgnj_result = operand_a; // result based on operand a + + // NaN-boxing check will treat invalid inputs as canonical NaNs + if (!info_a.is_boxed) sgnj_result = '{sign: 1'b0, exponent: '1, mantissa: 2**(MAN_BITS-1)}; + + // Internal signs are treated as positive in case of non-NaN-boxed values + sign_a = operand_a.sign & info_a.is_boxed; + sign_b = operand_b.sign & info_b.is_boxed; + + // Do the sign injection based on rm field + unique case (inp_pipe_rnd_mode_q[NUM_INP_REGS]) + fpnew_pkg::RNE: sgnj_result.sign = sign_b; // SGNJ + fpnew_pkg::RTZ: sgnj_result.sign = ~sign_b; // SGNJN + fpnew_pkg::RDN: sgnj_result.sign = sign_a ^ sign_b; // SGNJX + fpnew_pkg::RUP: sgnj_result = operand_a; // passthrough + default: sgnj_result = '{default: fpnew_pkg::DONT_CARE}; // don't care + endcase + end + + assign sgnj_status = '0; // sign injections never raise exceptions + + // op_mod_q enables integer sign-extension of result (for storing to integer regfile) + assign sgnj_extension_bit = inp_pipe_op_mod_q[NUM_INP_REGS] ? sgnj_result.sign : 1'b1; + + // ------------------ + // Minimum / Maximum + // ------------------ + fp_t minmax_result; + fpnew_pkg::status_t minmax_status; + logic minmax_extension_bit; + + // Minimum/Maximum - operation is encoded in rnd_mode_q: + // RNE = MIN, RTZ = MAX + always_comb begin : min_max + // Default assignment + minmax_status = '0; + + // Min/Max use quiet comparisons - only sNaN are invalid + minmax_status.NV = signalling_nan; + + // Both NaN inputs cause a NaN output + if (info_a.is_nan && info_b.is_nan) + minmax_result = '{sign: 1'b0, exponent: '1, mantissa: 2**(MAN_BITS-1)}; // canonical qNaN + // If one operand is NaN, the non-NaN operand is returned + else if (info_a.is_nan) minmax_result = operand_b; + else if (info_b.is_nan) minmax_result = operand_a; + // Otherwise decide according to the operation + else begin + unique case (inp_pipe_rnd_mode_q[NUM_INP_REGS]) + fpnew_pkg::RNE: minmax_result = operand_a_smaller ? operand_a : operand_b; // MIN + fpnew_pkg::RTZ: minmax_result = operand_a_smaller ? operand_b : operand_a; // MAX + default: minmax_result = '{default: fpnew_pkg::DONT_CARE}; // don't care + endcase + end + end + + assign minmax_extension_bit = 1'b1; // NaN-box as result is always a float value + + // ------------ + // Comparisons + // ------------ + fp_t cmp_result; + fpnew_pkg::status_t cmp_status; + logic cmp_extension_bit; + + // Comparisons - operation is encoded in rnd_mode_q: + // RNE = LE, RTZ = LT, RDN = EQ + // op_mod_q inverts boolean outputs + always_comb begin : comparisons + // Default assignment + cmp_result = '0; // false + cmp_status = '0; // no flags + + // Signalling NaNs always compare as false and are illegal + if (signalling_nan) cmp_status.NV = 1'b1; // invalid operation + // Otherwise do comparisons + else begin + unique case (inp_pipe_rnd_mode_q[NUM_INP_REGS]) + fpnew_pkg::RNE: begin // Less than or equal + if (any_operand_nan) cmp_status.NV = 1'b1; // Signalling comparison: NaNs are invalid + else cmp_result = (operand_a_smaller | operands_equal) ^ inp_pipe_op_mod_q[NUM_INP_REGS]; + end + fpnew_pkg::RTZ: begin // Less than + if (any_operand_nan) cmp_status.NV = 1'b1; // Signalling comparison: NaNs are invalid + else cmp_result = (operand_a_smaller & ~operands_equal) ^ inp_pipe_op_mod_q[NUM_INP_REGS]; + end + fpnew_pkg::RDN: begin // Equal + if (any_operand_nan) cmp_result = inp_pipe_op_mod_q[NUM_INP_REGS]; // NaN always not equal + else cmp_result = operands_equal ^ inp_pipe_op_mod_q[NUM_INP_REGS]; + end + default: cmp_result = '{default: fpnew_pkg::DONT_CARE}; // don't care + endcase + end + end + + assign cmp_extension_bit = 1'b0; // Comparisons always produce booleans in integer registers + + // --------------- + // Classification + // --------------- + fpnew_pkg::status_t class_status; + logic class_extension_bit; + fpnew_pkg::classmask_e class_mask_d; // the result is actually here + + // Classification - always return the classification mask on the dedicated port + always_comb begin : classify + if (info_a.is_normal) begin + class_mask_d = operand_a.sign ? fpnew_pkg::NEGNORM : fpnew_pkg::POSNORM; + end else if (info_a.is_subnormal) begin + class_mask_d = operand_a.sign ? fpnew_pkg::NEGSUBNORM : fpnew_pkg::POSSUBNORM; + end else if (info_a.is_zero) begin + class_mask_d = operand_a.sign ? fpnew_pkg::NEGZERO : fpnew_pkg::POSZERO; + end else if (info_a.is_inf) begin + class_mask_d = operand_a.sign ? fpnew_pkg::NEGINF : fpnew_pkg::POSINF; + end else if (info_a.is_nan) begin + class_mask_d = info_a.is_signalling ? fpnew_pkg::SNAN : fpnew_pkg::QNAN; + end else begin + class_mask_d = fpnew_pkg::QNAN; // default value + end + end + + assign class_status = '0; // classification does not set flags + assign class_extension_bit = 1'b0; // classification always produces results in integer registers + + // ----------------- + // Result selection + // ----------------- + fp_t result_d; + fpnew_pkg::status_t status_d; + logic extension_bit_d; + logic is_class_d; + + // Select result + always_comb begin : select_result + unique case (inp_pipe_op_q[NUM_INP_REGS]) + fpnew_pkg::SGNJ: begin + result_d = sgnj_result; + status_d = sgnj_status; + extension_bit_d = sgnj_extension_bit; + end + fpnew_pkg::MINMAX: begin + result_d = minmax_result; + status_d = minmax_status; + extension_bit_d = minmax_extension_bit; + end + fpnew_pkg::CMP: begin + result_d = cmp_result; + status_d = cmp_status; + extension_bit_d = cmp_extension_bit; + end + fpnew_pkg::CLASSIFY: begin + result_d = '{default: fpnew_pkg::DONT_CARE}; // unused + status_d = class_status; + extension_bit_d = class_extension_bit; + end + default: begin + result_d = '{default: fpnew_pkg::DONT_CARE}; // dont care + status_d = '{default: fpnew_pkg::DONT_CARE}; // dont care + extension_bit_d = fpnew_pkg::DONT_CARE; // dont care + end + endcase + end + + assign is_class_d = (inp_pipe_op_q[NUM_INP_REGS] == fpnew_pkg::CLASSIFY); + + // ---------------- + // Output Pipeline + // ---------------- + // Output pipeline signals, index i holds signal after i register stages + fp_t [0:NUM_OUT_REGS] out_pipe_result_q; + fpnew_pkg::status_t [0:NUM_OUT_REGS] out_pipe_status_q; + logic [0:NUM_OUT_REGS] out_pipe_extension_bit_q; + fpnew_pkg::classmask_e [0:NUM_OUT_REGS] out_pipe_class_mask_q; + logic [0:NUM_OUT_REGS] out_pipe_is_class_q; + TagType [0:NUM_OUT_REGS] out_pipe_tag_q; + AuxType [0:NUM_OUT_REGS] out_pipe_aux_q; + logic [0:NUM_OUT_REGS] out_pipe_valid_q; + // Ready signal is combinatorial for all stages + logic [0:NUM_OUT_REGS] out_pipe_ready; + + // Input stage: First element of pipeline is taken from inputs + assign out_pipe_result_q[0] = result_d; + assign out_pipe_status_q[0] = status_d; + assign out_pipe_extension_bit_q[0] = extension_bit_d; + assign out_pipe_class_mask_q[0] = class_mask_d; + assign out_pipe_is_class_q[0] = is_class_d; + assign out_pipe_tag_q[0] = inp_pipe_tag_q[NUM_INP_REGS]; + assign out_pipe_aux_q[0] = inp_pipe_aux_q[NUM_INP_REGS]; + assign out_pipe_valid_q[0] = inp_pipe_valid_q[NUM_INP_REGS]; + // Input stage: Propagate pipeline ready signal to inside pipe + assign inp_pipe_ready[NUM_INP_REGS] = out_pipe_ready[0]; + // Generate the register stages + for (genvar i = 0; i < NUM_OUT_REGS; i++) begin : gen_output_pipeline + // Internal register enable for this stage + logic reg_ena; + // Determine the ready signal of the current stage - advance the pipeline: + // 1. if the next stage is ready for our data + // 2. if the next stage only holds a bubble (not valid) -> we can pop it + assign out_pipe_ready[i] = out_pipe_ready[i+1] | ~out_pipe_valid_q[i+1]; + // Valid: enabled by ready signal, synchronous clear with the flush signal + `FFLARNC(out_pipe_valid_q[i+1], out_pipe_valid_q[i], out_pipe_ready[i], flush_i, 1'b0, clk_i, rst_ni) + // Enable register if pipleine ready and a valid data item is present + assign reg_ena = out_pipe_ready[i] & out_pipe_valid_q[i]; + // Generate the pipeline registers within the stages, use enable-registers + `FFL(out_pipe_result_q[i+1], out_pipe_result_q[i], reg_ena, '0) + `FFL(out_pipe_status_q[i+1], out_pipe_status_q[i], reg_ena, '0) + `FFL(out_pipe_extension_bit_q[i+1], out_pipe_extension_bit_q[i], reg_ena, '0) + `FFL(out_pipe_class_mask_q[i+1], out_pipe_class_mask_q[i], reg_ena, fpnew_pkg::QNAN) + `FFL(out_pipe_is_class_q[i+1], out_pipe_is_class_q[i], reg_ena, '0) + `FFL(out_pipe_tag_q[i+1], out_pipe_tag_q[i], reg_ena, TagType'('0)) + `FFL(out_pipe_aux_q[i+1], out_pipe_aux_q[i], reg_ena, AuxType'('0)) + end + // Output stage: Ready travels backwards from output side, driven by downstream circuitry + assign out_pipe_ready[NUM_OUT_REGS] = out_ready_i; + // Output stage: assign module outputs + assign result_o = out_pipe_result_q[NUM_OUT_REGS]; + assign status_o = out_pipe_status_q[NUM_OUT_REGS]; + assign extension_bit_o = out_pipe_extension_bit_q[NUM_OUT_REGS]; + assign class_mask_o = out_pipe_class_mask_q[NUM_OUT_REGS]; + assign is_class_o = out_pipe_is_class_q[NUM_OUT_REGS]; + assign tag_o = out_pipe_tag_q[NUM_OUT_REGS]; + assign aux_o = out_pipe_aux_q[NUM_OUT_REGS]; + assign out_valid_o = out_pipe_valid_q[NUM_OUT_REGS]; + assign busy_o = (| {inp_pipe_valid_q, out_pipe_valid_q}); +endmodule diff --git a/vendor/pulp-platform/fpnew/src/fpnew_opgroup_block.sv b/vendor/pulp-platform/fpnew/src/fpnew_opgroup_block.sv new file mode 100644 index 0000000000..637e85f614 --- /dev/null +++ b/vendor/pulp-platform/fpnew/src/fpnew_opgroup_block.sv @@ -0,0 +1,230 @@ +// Copyright 2019 ETH Zurich and University of Bologna. +// +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +// Author: Stefan Mach + +module fpnew_opgroup_block #( + parameter fpnew_pkg::opgroup_e OpGroup = fpnew_pkg::ADDMUL, + // FPU configuration + parameter int unsigned Width = 32, + parameter logic EnableVectors = 1'b1, + parameter fpnew_pkg::fmt_logic_t FpFmtMask = '1, + parameter fpnew_pkg::ifmt_logic_t IntFmtMask = '1, + parameter fpnew_pkg::fmt_unsigned_t FmtPipeRegs = '{default: 0}, + parameter fpnew_pkg::fmt_unit_types_t FmtUnitTypes = '{default: fpnew_pkg::PARALLEL}, + parameter fpnew_pkg::pipe_config_t PipeConfig = fpnew_pkg::BEFORE, + parameter type TagType = logic, + // Do not change + localparam int unsigned NUM_FORMATS = fpnew_pkg::NUM_FP_FORMATS, + localparam int unsigned NUM_OPERANDS = fpnew_pkg::num_operands(OpGroup) +) ( + input logic clk_i, + input logic rst_ni, + // Input signals + input logic [NUM_OPERANDS-1:0][Width-1:0] operands_i, + input logic [NUM_FORMATS-1:0][NUM_OPERANDS-1:0] is_boxed_i, + input fpnew_pkg::roundmode_e rnd_mode_i, + input fpnew_pkg::operation_e op_i, + input logic op_mod_i, + input fpnew_pkg::fp_format_e src_fmt_i, + input fpnew_pkg::fp_format_e dst_fmt_i, + input fpnew_pkg::int_format_e int_fmt_i, + input logic vectorial_op_i, + input TagType tag_i, + // Input Handshake + input logic in_valid_i, + output logic in_ready_o, + input logic flush_i, + // Output signals + output logic [Width-1:0] result_o, + output fpnew_pkg::status_t status_o, + output logic extension_bit_o, + output TagType tag_o, + // Output handshake + output logic out_valid_o, + input logic out_ready_i, + // Indication of valid data in flight + output logic busy_o +); + + // ---------------- + // Type Definition + // ---------------- + typedef struct packed { + logic [Width-1:0] result; + fpnew_pkg::status_t status; + logic ext_bit; + TagType tag; + } output_t; + + // Handshake signals for the slices + logic [NUM_FORMATS-1:0] fmt_in_ready, fmt_out_valid, fmt_out_ready, fmt_busy; + output_t [NUM_FORMATS-1:0] fmt_outputs; + + // ----------- + // Input Side + // ----------- + assign in_ready_o = in_valid_i & fmt_in_ready[dst_fmt_i]; // Ready is given by selected format + + // ------------------------- + // Generate Parallel Slices + // ------------------------- + for (genvar fmt = 0; fmt < int'(NUM_FORMATS); fmt++) begin : gen_parallel_slices + // Some constants for this format + localparam logic ANY_MERGED = fpnew_pkg::any_enabled_multi(FmtUnitTypes, FpFmtMask); + localparam logic IS_FIRST_MERGED = + fpnew_pkg::is_first_enabled_multi(fpnew_pkg::fp_format_e'(fmt), FmtUnitTypes, FpFmtMask); + + // Generate slice only if format enabled + if (FpFmtMask[fmt] && (FmtUnitTypes[fmt] == fpnew_pkg::PARALLEL)) begin : active_format + + logic in_valid; + + assign in_valid = in_valid_i & (dst_fmt_i == fmt); // enable selected format + + fpnew_opgroup_fmt_slice #( + .OpGroup ( OpGroup ), + .FpFormat ( fpnew_pkg::fp_format_e'(fmt) ), + .Width ( Width ), + .EnableVectors ( EnableVectors ), + .NumPipeRegs ( FmtPipeRegs[fmt] ), + .PipeConfig ( PipeConfig ), + .TagType ( TagType ) + ) i_fmt_slice ( + .clk_i, + .rst_ni, + .operands_i ( operands_i ), + .is_boxed_i ( is_boxed_i[fmt] ), + .rnd_mode_i, + .op_i, + .op_mod_i, + .vectorial_op_i, + .tag_i, + .in_valid_i ( in_valid ), + .in_ready_o ( fmt_in_ready[fmt] ), + .flush_i, + .result_o ( fmt_outputs[fmt].result ), + .status_o ( fmt_outputs[fmt].status ), + .extension_bit_o( fmt_outputs[fmt].ext_bit ), + .tag_o ( fmt_outputs[fmt].tag ), + .out_valid_o ( fmt_out_valid[fmt] ), + .out_ready_i ( fmt_out_ready[fmt] ), + .busy_o ( fmt_busy[fmt] ) + ); + // If the format wants to use merged ops, tie off the dangling ones not used here + end else if (FpFmtMask[fmt] && ANY_MERGED && !IS_FIRST_MERGED) begin : merged_unused + + // Ready is split up into formats + assign fmt_in_ready[fmt] = fmt_in_ready[fpnew_pkg::get_first_enabled_multi(FmtUnitTypes, + FpFmtMask)]; + + assign fmt_out_valid[fmt] = 1'b0; // don't emit values + assign fmt_busy[fmt] = 1'b0; // never busy + // Outputs are don't care + assign fmt_outputs[fmt].result = '{default: fpnew_pkg::DONT_CARE}; + assign fmt_outputs[fmt].status = '{default: fpnew_pkg::DONT_CARE}; + assign fmt_outputs[fmt].ext_bit = fpnew_pkg::DONT_CARE; + assign fmt_outputs[fmt].tag = TagType'(fpnew_pkg::DONT_CARE); + + // Tie off disabled formats + end else if (!FpFmtMask[fmt] || (FmtUnitTypes[fmt] == fpnew_pkg::DISABLED)) begin : disable_fmt + assign fmt_in_ready[fmt] = 1'b0; // don't accept operations + assign fmt_out_valid[fmt] = 1'b0; // don't emit values + assign fmt_busy[fmt] = 1'b0; // never busy + // Outputs are don't care + assign fmt_outputs[fmt].result = '{default: fpnew_pkg::DONT_CARE}; + assign fmt_outputs[fmt].status = '{default: fpnew_pkg::DONT_CARE}; + assign fmt_outputs[fmt].ext_bit = fpnew_pkg::DONT_CARE; + assign fmt_outputs[fmt].tag = TagType'(fpnew_pkg::DONT_CARE); + end + end + + // ---------------------- + // Generate Merged Slice + // ---------------------- + if (fpnew_pkg::any_enabled_multi(FmtUnitTypes, FpFmtMask)) begin : gen_merged_slice + + localparam FMT = fpnew_pkg::get_first_enabled_multi(FmtUnitTypes, FpFmtMask); + localparam REG = fpnew_pkg::get_num_regs_multi(FmtPipeRegs, FmtUnitTypes, FpFmtMask); + + logic in_valid; + + assign in_valid = in_valid_i & (FmtUnitTypes[dst_fmt_i] == fpnew_pkg::MERGED); + + fpnew_opgroup_multifmt_slice #( + .OpGroup ( OpGroup ), + .Width ( Width ), + .FpFmtConfig ( FpFmtMask ), + .IntFmtConfig ( IntFmtMask ), + .EnableVectors ( EnableVectors ), + .NumPipeRegs ( REG ), + .PipeConfig ( PipeConfig ), + .TagType ( TagType ) + ) i_multifmt_slice ( + .clk_i, + .rst_ni, + .operands_i, + .is_boxed_i, + .rnd_mode_i, + .op_i, + .op_mod_i, + .src_fmt_i, + .dst_fmt_i, + .int_fmt_i, + .vectorial_op_i, + .tag_i, + .in_valid_i ( in_valid ), + .in_ready_o ( fmt_in_ready[FMT] ), + .flush_i, + .result_o ( fmt_outputs[FMT].result ), + .status_o ( fmt_outputs[FMT].status ), + .extension_bit_o ( fmt_outputs[FMT].ext_bit ), + .tag_o ( fmt_outputs[FMT].tag ), + .out_valid_o ( fmt_out_valid[FMT] ), + .out_ready_i ( fmt_out_ready[FMT] ), + .busy_o ( fmt_busy[FMT] ) + ); + + end + + // ------------------ + // Arbitrate Outputs + // ------------------ + output_t arbiter_output; + + // Round-Robin arbiter to decide which result to use + rr_arb_tree #( + .NumIn ( NUM_FORMATS ), + .DataType ( output_t ), + .AxiVldRdy ( 1'b1 ) + ) i_arbiter ( + .clk_i, + .rst_ni, + .flush_i, + .rr_i ( '0 ), + .req_i ( fmt_out_valid ), + .gnt_o ( fmt_out_ready ), + .data_i ( fmt_outputs ), + .gnt_i ( out_ready_i ), + .req_o ( out_valid_o ), + .data_o ( arbiter_output ), + .idx_o ( /* unused */ ) + ); + + // Unpack output + assign result_o = arbiter_output.result; + assign status_o = arbiter_output.status; + assign extension_bit_o = arbiter_output.ext_bit; + assign tag_o = arbiter_output.tag; + + assign busy_o = (| fmt_busy); + +endmodule diff --git a/vendor/pulp-platform/fpnew/src/fpnew_opgroup_fmt_slice.sv b/vendor/pulp-platform/fpnew/src/fpnew_opgroup_fmt_slice.sv new file mode 100644 index 0000000000..fda2a57f38 --- /dev/null +++ b/vendor/pulp-platform/fpnew/src/fpnew_opgroup_fmt_slice.sv @@ -0,0 +1,276 @@ +// Copyright 2019 ETH Zurich and University of Bologna. +// +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +// Author: Stefan Mach + +module fpnew_opgroup_fmt_slice #( + parameter fpnew_pkg::opgroup_e OpGroup = fpnew_pkg::ADDMUL, + parameter fpnew_pkg::fp_format_e FpFormat = fpnew_pkg::fp_format_e'(0), + // FPU configuration + parameter int unsigned Width = 32, + parameter logic EnableVectors = 1'b1, + parameter int unsigned NumPipeRegs = 0, + parameter fpnew_pkg::pipe_config_t PipeConfig = fpnew_pkg::BEFORE, + parameter type TagType = logic, + // Do not change + localparam int unsigned NUM_OPERANDS = fpnew_pkg::num_operands(OpGroup) +) ( + input logic clk_i, + input logic rst_ni, + // Input signals + input logic [NUM_OPERANDS-1:0][Width-1:0] operands_i, + input logic [NUM_OPERANDS-1:0] is_boxed_i, + input fpnew_pkg::roundmode_e rnd_mode_i, + input fpnew_pkg::operation_e op_i, + input logic op_mod_i, + input logic vectorial_op_i, + input TagType tag_i, + // Input Handshake + input logic in_valid_i, + output logic in_ready_o, + input logic flush_i, + // Output signals + output logic [Width-1:0] result_o, + output fpnew_pkg::status_t status_o, + output logic extension_bit_o, + output TagType tag_o, + // Output handshake + output logic out_valid_o, + input logic out_ready_i, + // Indication of valid data in flight + output logic busy_o +); + + localparam int unsigned FP_WIDTH = fpnew_pkg::fp_width(FpFormat); + localparam int unsigned NUM_LANES = fpnew_pkg::num_lanes(Width, FpFormat, EnableVectors); + + + logic [NUM_LANES-1:0] lane_in_ready, lane_out_valid; // Handshake signals for the lanes + logic vectorial_op; + + logic [NUM_LANES*FP_WIDTH-1:0] slice_result; + logic [Width-1:0] slice_regular_result, slice_class_result, slice_vec_class_result; + + fpnew_pkg::status_t [NUM_LANES-1:0] lane_status; + logic [NUM_LANES-1:0] lane_ext_bit; // only the first one is actually used + fpnew_pkg::classmask_e [NUM_LANES-1:0] lane_class_mask; + TagType [NUM_LANES-1:0] lane_tags; // only the first one is actually used + logic [NUM_LANES-1:0] lane_vectorial, lane_busy, lane_is_class; // dito + + logic result_is_vector, result_is_class; + + // ----------- + // Input Side + // ----------- + assign in_ready_o = lane_in_ready[0]; // Upstream ready is given by first lane + assign vectorial_op = vectorial_op_i & EnableVectors; // only do vectorial stuff if enabled + + // --------------- + // Generate Lanes + // --------------- + for (genvar lane = 0; lane < int'(NUM_LANES); lane++) begin : gen_num_lanes + logic [FP_WIDTH-1:0] local_result; // lane-local results + logic local_sign; + + // Generate instances only if needed, lane 0 always generated + if ((lane == 0) || EnableVectors) begin : active_lane + logic in_valid, out_valid, out_ready; // lane-local handshake + + logic [NUM_OPERANDS-1:0][FP_WIDTH-1:0] local_operands; // lane-local operands + logic [FP_WIDTH-1:0] op_result; // lane-local results + fpnew_pkg::status_t op_status; + + assign in_valid = in_valid_i & ((lane == 0) | vectorial_op); // upper lanes only for vectors + // Slice out the operands for this lane + always_comb begin : prepare_input + for (int i = 0; i < int'(NUM_OPERANDS); i++) begin + local_operands[i] = operands_i[i][(unsigned'(lane)+1)*FP_WIDTH-1:unsigned'(lane)*FP_WIDTH]; + end + end + + // Instantiate the operation from the selected opgroup + if (OpGroup == fpnew_pkg::ADDMUL) begin : lane_instance + fpnew_fma #( + .FpFormat ( FpFormat ), + .NumPipeRegs ( NumPipeRegs ), + .PipeConfig ( PipeConfig ), + .TagType ( TagType ), + .AuxType ( logic ) + ) i_fma ( + .clk_i, + .rst_ni, + .operands_i ( local_operands ), + .is_boxed_i ( is_boxed_i[NUM_OPERANDS-1:0] ), + .rnd_mode_i, + .op_i, + .op_mod_i, + .tag_i, + .aux_i ( vectorial_op ), // Remember whether operation was vectorial + .in_valid_i ( in_valid ), + .in_ready_o ( lane_in_ready[lane] ), + .flush_i, + .result_o ( op_result ), + .status_o ( op_status ), + .extension_bit_o ( lane_ext_bit[lane] ), + .tag_o ( lane_tags[lane] ), + .aux_o ( lane_vectorial[lane] ), + .out_valid_o ( out_valid ), + .out_ready_i ( out_ready ), + .busy_o ( lane_busy[lane] ) + ); + assign lane_is_class[lane] = 1'b0; + assign lane_class_mask[lane] = fpnew_pkg::NEGINF; + end else if (OpGroup == fpnew_pkg::DIVSQRT) begin : lane_instance + // fpnew_divsqrt #( + // .FpFormat (FpFormat), + // .NumPipeRegs(NumPipeRegs), + // .PipeConfig (PipeConfig), + // .TagType (TagType), + // .AuxType (logic) + // ) i_divsqrt ( + // .clk_i, + // .rst_ni, + // .operands_i ( local_operands ), + // .is_boxed_i ( is_boxed_i[NUM_OPERANDS-1:0] ), + // .rnd_mode_i, + // .op_i, + // .op_mod_i, + // .tag_i, + // .aux_i ( vectorial_op ), // Remember whether operation was vectorial + // .in_valid_i ( in_valid ), + // .in_ready_o ( lane_in_ready[lane] ), + // .flush_i, + // .result_o ( op_result ), + // .status_o ( op_status ), + // .extension_bit_o ( lane_ext_bit[lane] ), + // .tag_o ( lane_tags[lane] ), + // .aux_o ( lane_vectorial[lane] ), + // .out_valid_o ( out_valid ), + // .out_ready_i ( out_ready ), + // .busy_o ( lane_busy[lane] ) + // ); + // assign lane_is_class[lane] = 1'b0; + end else if (OpGroup == fpnew_pkg::NONCOMP) begin : lane_instance + fpnew_noncomp #( + .FpFormat (FpFormat), + .NumPipeRegs(NumPipeRegs), + .PipeConfig (PipeConfig), + .TagType (TagType), + .AuxType (logic) + ) i_noncomp ( + .clk_i, + .rst_ni, + .operands_i ( local_operands ), + .is_boxed_i ( is_boxed_i[NUM_OPERANDS-1:0] ), + .rnd_mode_i, + .op_i, + .op_mod_i, + .tag_i, + .aux_i ( vectorial_op ), // Remember whether operation was vectorial + .in_valid_i ( in_valid ), + .in_ready_o ( lane_in_ready[lane] ), + .flush_i, + .result_o ( op_result ), + .status_o ( op_status ), + .extension_bit_o ( lane_ext_bit[lane] ), + .class_mask_o ( lane_class_mask[lane] ), + .is_class_o ( lane_is_class[lane] ), + .tag_o ( lane_tags[lane] ), + .aux_o ( lane_vectorial[lane] ), + .out_valid_o ( out_valid ), + .out_ready_i ( out_ready ), + .busy_o ( lane_busy[lane] ) + ); + end // ADD OTHER OPTIONS HERE + + // Handshakes are only done if the lane is actually used + assign out_ready = out_ready_i & ((lane == 0) | result_is_vector); + assign lane_out_valid[lane] = out_valid & ((lane == 0) | result_is_vector); + + // Properly NaN-box or sign-extend the slice result if not in use + assign local_result = lane_out_valid[lane] ? op_result : '{default: lane_ext_bit[0]}; + assign lane_status[lane] = lane_out_valid[lane] ? op_status : '0; + + // Otherwise generate constant sign-extension + end else begin + assign lane_out_valid[lane] = 1'b0; // unused lane + assign lane_in_ready[lane] = 1'b0; // unused lane + assign local_result = '{default: lane_ext_bit[0]}; // sign-extend/nan box + assign lane_status[lane] = '0; + assign lane_busy[lane] = 1'b0; + assign lane_is_class[lane] = 1'b0; + end + + // Insert lane result into slice result + assign slice_result[(unsigned'(lane)+1)*FP_WIDTH-1:unsigned'(lane)*FP_WIDTH] = local_result; + + // Create Classification results + if ((lane+1)*8 <= Width) begin : vectorial_class // vectorial class blocks are 8bits in size + assign local_sign = (lane_class_mask[lane] == fpnew_pkg::NEGINF || + lane_class_mask[lane] == fpnew_pkg::NEGNORM || + lane_class_mask[lane] == fpnew_pkg::NEGSUBNORM || + lane_class_mask[lane] == fpnew_pkg::NEGZERO); + // Write the current block segment + assign slice_vec_class_result[(lane+1)*8-1:lane*8] = { + local_sign, // BIT 7 + ~local_sign, // BIT 6 + lane_class_mask[lane] == fpnew_pkg::QNAN, // BIT 5 + lane_class_mask[lane] == fpnew_pkg::SNAN, // BIT 4 + lane_class_mask[lane] == fpnew_pkg::POSZERO + || lane_class_mask[lane] == fpnew_pkg::NEGZERO, // BIT 3 + lane_class_mask[lane] == fpnew_pkg::POSSUBNORM + || lane_class_mask[lane] == fpnew_pkg::NEGSUBNORM, // BIT 2 + lane_class_mask[lane] == fpnew_pkg::POSNORM + || lane_class_mask[lane] == fpnew_pkg::NEGNORM, // BIT 1 + lane_class_mask[lane] == fpnew_pkg::POSINF + || lane_class_mask[lane] == fpnew_pkg::NEGINF // BIT 0 + }; + end + end + + // ------------ + // Output Side + // ------------ + assign result_is_vector = lane_vectorial[0]; + assign result_is_class = lane_is_class[0]; + + assign slice_regular_result = $signed({extension_bit_o, slice_result}); + + localparam int unsigned CLASS_VEC_BITS = (NUM_LANES*8 > Width) ? 8 * (Width / 8) : NUM_LANES*8; + + // Pad out unused vec_class bits + if (CLASS_VEC_BITS < Width) begin : pad_vectorial_class + assign slice_vec_class_result[Width-1:CLASS_VEC_BITS] = '0; + end + + // localparam logic [Width-1:0] CLASS_VEC_MASK = 2**CLASS_VEC_BITS - 1; + + assign slice_class_result = result_is_vector ? slice_vec_class_result : lane_class_mask[0]; + + // Select the proper result + assign result_o = result_is_class ? slice_class_result : slice_regular_result; + + assign extension_bit_o = lane_ext_bit[0]; // upper lanes unused + assign tag_o = lane_tags[0]; // upper lanes unused + assign busy_o = (| lane_busy); + assign out_valid_o = lane_out_valid[0]; // upper lanes unused + + + // Collapse the lane status + always_comb begin : output_processing + // Collapse the status + automatic fpnew_pkg::status_t temp_status; + temp_status = '0; + for (int i = 0; i < int'(NUM_LANES); i++) + temp_status |= lane_status[i]; + status_o = temp_status; + end +endmodule diff --git a/vendor/pulp-platform/fpnew/src/fpnew_opgroup_multifmt_slice.sv b/vendor/pulp-platform/fpnew/src/fpnew_opgroup_multifmt_slice.sv new file mode 100644 index 0000000000..14c1b4538f --- /dev/null +++ b/vendor/pulp-platform/fpnew/src/fpnew_opgroup_multifmt_slice.sv @@ -0,0 +1,414 @@ +// Copyright 2019 ETH Zurich and University of Bologna. +// +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +// Author: Stefan Mach + +`include "common_cells/registers.svh" + +module fpnew_opgroup_multifmt_slice #( + parameter fpnew_pkg::opgroup_e OpGroup = fpnew_pkg::CONV, + parameter int unsigned Width = 64, + // FPU configuration + parameter fpnew_pkg::fmt_logic_t FpFmtConfig = '1, + parameter fpnew_pkg::ifmt_logic_t IntFmtConfig = '1, + parameter logic EnableVectors = 1'b1, + parameter int unsigned NumPipeRegs = 0, + parameter fpnew_pkg::pipe_config_t PipeConfig = fpnew_pkg::BEFORE, + parameter type TagType = logic, + // Do not change + localparam int unsigned NUM_OPERANDS = fpnew_pkg::num_operands(OpGroup), + localparam int unsigned NUM_FORMATS = fpnew_pkg::NUM_FP_FORMATS +) ( + input logic clk_i, + input logic rst_ni, + // Input signals + input logic [NUM_OPERANDS-1:0][Width-1:0] operands_i, + input logic [NUM_FORMATS-1:0][NUM_OPERANDS-1:0] is_boxed_i, + input fpnew_pkg::roundmode_e rnd_mode_i, + input fpnew_pkg::operation_e op_i, + input logic op_mod_i, + input fpnew_pkg::fp_format_e src_fmt_i, + input fpnew_pkg::fp_format_e dst_fmt_i, + input fpnew_pkg::int_format_e int_fmt_i, + input logic vectorial_op_i, + input TagType tag_i, + // Input Handshake + input logic in_valid_i, + output logic in_ready_o, + input logic flush_i, + // Output signals + output logic [Width-1:0] result_o, + output fpnew_pkg::status_t status_o, + output logic extension_bit_o, + output TagType tag_o, + // Output handshake + output logic out_valid_o, + input logic out_ready_i, + // Indication of valid data in flight + output logic busy_o +); + + localparam int unsigned MAX_FP_WIDTH = fpnew_pkg::max_fp_width(FpFmtConfig); + localparam int unsigned MAX_INT_WIDTH = fpnew_pkg::max_int_width(IntFmtConfig); + localparam int unsigned NUM_LANES = fpnew_pkg::max_num_lanes(Width, FpFmtConfig, 1'b1); + localparam int unsigned NUM_INT_FORMATS = fpnew_pkg::NUM_INT_FORMATS; + // We will send the format information along with the data + localparam int unsigned FMT_BITS = + fpnew_pkg::maximum($clog2(NUM_FORMATS), $clog2(NUM_INT_FORMATS)); + localparam int unsigned AUX_BITS = FMT_BITS + 2; // also add vectorial and integer flags + + logic [NUM_LANES-1:0] lane_in_ready, lane_out_valid; // Handshake signals for the lanes + logic vectorial_op; + logic [FMT_BITS-1:0] dst_fmt; // destination format to pass along with operation + logic [AUX_BITS-1:0] aux_data; + + // additional flags for CONV + logic dst_fmt_is_int, dst_is_cpk; + logic [1:0] dst_vec_op; // info for vectorial results (for packing) + logic [2:0] target_aux_d, target_aux_q; + logic is_up_cast, is_down_cast; + + logic [NUM_FORMATS-1:0][Width-1:0] fmt_slice_result; + logic [NUM_INT_FORMATS-1:0][Width-1:0] ifmt_slice_result; + logic [Width-1:0] conv_slice_result; + + + logic [Width-1:0] conv_target_d, conv_target_q; // vectorial conversions update a register + + fpnew_pkg::status_t [NUM_LANES-1:0] lane_status; + logic [NUM_LANES-1:0] lane_ext_bit; // only the first one is actually used + TagType [NUM_LANES-1:0] lane_tags; // only the first one is actually used + logic [NUM_LANES-1:0][AUX_BITS-1:0] lane_aux; // only the first one is actually used + logic [NUM_LANES-1:0] lane_busy; // dito + + logic result_is_vector; + logic [FMT_BITS-1:0] result_fmt; + logic result_fmt_is_int, result_is_cpk; + logic [1:0] result_vec_op; // info for vectorial results (for packing) + + // ----------- + // Input Side + // ----------- + assign in_ready_o = lane_in_ready[0]; // Upstream ready is given by first lane + assign vectorial_op = vectorial_op_i & EnableVectors; // only do vectorial stuff if enabled + + // Cast-and-Pack ops are encoded in operation and modifier + assign dst_fmt_is_int = (OpGroup == fpnew_pkg::CONV) & (op_i == fpnew_pkg::F2I); + assign dst_is_cpk = (OpGroup == fpnew_pkg::CONV) & (op_i == fpnew_pkg::CPKAB || + op_i == fpnew_pkg::CPKCD); + assign dst_vec_op = (OpGroup == fpnew_pkg::CONV) & {(op_i == fpnew_pkg::CPKCD), op_mod_i}; + + assign is_up_cast = (fpnew_pkg::fp_width(dst_fmt_i) > fpnew_pkg::fp_width(src_fmt_i)); + assign is_down_cast = (fpnew_pkg::fp_width(dst_fmt_i) < fpnew_pkg::fp_width(src_fmt_i)); + + // The destination format is the int format for F2I casts + assign dst_fmt = dst_fmt_is_int ? int_fmt_i : dst_fmt_i; + + // The data sent along consists of the vectorial flag and format bits + assign aux_data = {dst_fmt_is_int, vectorial_op, dst_fmt}; + assign target_aux_d = {dst_vec_op, dst_is_cpk}; + + // CONV passes one operand for assembly after the unit: opC for cpk, opB for others + if (OpGroup == fpnew_pkg::CONV) begin : conv_target + assign conv_target_d = dst_is_cpk ? operands_i[2] : operands_i[1]; + end + + // For 2-operand units, prepare boxing info + logic [NUM_FORMATS-1:0] is_boxed_1op; + logic [NUM_FORMATS-1:0][1:0] is_boxed_2op; + + always_comb begin : boxed_2op + for (int fmt = 0; fmt < NUM_FORMATS; fmt++) begin + is_boxed_1op[fmt] = is_boxed_i[fmt][0]; + is_boxed_2op[fmt] = is_boxed_i[fmt][1:0]; + end + end + + // --------------- + // Generate Lanes + // --------------- + for (genvar lane = 0; lane < int'(NUM_LANES); lane++) begin : gen_num_lanes + localparam int unsigned LANE = unsigned'(lane); // unsigned to please the linter + // Get a mask of active formats for this lane + localparam fpnew_pkg::fmt_logic_t ACTIVE_FORMATS = + fpnew_pkg::get_lane_formats(Width, FpFmtConfig, LANE); + localparam fpnew_pkg::ifmt_logic_t ACTIVE_INT_FORMATS = + fpnew_pkg::get_lane_int_formats(Width, FpFmtConfig, IntFmtConfig, LANE); + localparam int unsigned MAX_WIDTH = fpnew_pkg::max_fp_width(ACTIVE_FORMATS); + + // Cast-specific parameters + localparam fpnew_pkg::fmt_logic_t CONV_FORMATS = + fpnew_pkg::get_conv_lane_formats(Width, FpFmtConfig, LANE); + localparam fpnew_pkg::ifmt_logic_t CONV_INT_FORMATS = + fpnew_pkg::get_conv_lane_int_formats(Width, FpFmtConfig, IntFmtConfig, LANE); + localparam int unsigned CONV_WIDTH = fpnew_pkg::max_fp_width(CONV_FORMATS); + + // Lane parameters from Opgroup + localparam fpnew_pkg::fmt_logic_t LANE_FORMATS = (OpGroup == fpnew_pkg::CONV) + ? CONV_FORMATS : ACTIVE_FORMATS; + localparam int unsigned LANE_WIDTH = (OpGroup == fpnew_pkg::CONV) ? CONV_WIDTH : MAX_WIDTH; + + logic [LANE_WIDTH-1:0] local_result; // lane-local results + + // Generate instances only if needed, lane 0 always generated + if ((lane == 0) || EnableVectors) begin : active_lane + logic in_valid, out_valid, out_ready; // lane-local handshake + + logic [NUM_OPERANDS-1:0][LANE_WIDTH-1:0] local_operands; // lane-local oprands + logic [LANE_WIDTH-1:0] op_result; // lane-local results + fpnew_pkg::status_t op_status; + + assign in_valid = in_valid_i & ((lane == 0) | vectorial_op); // upper lanes only for vectors + + // Slice out the operands for this lane, upper bits are ignored in the unit + always_comb begin : prepare_input + for (int unsigned i = 0; i < NUM_OPERANDS; i++) begin + local_operands[i] = operands_i[i] >> LANE*fpnew_pkg::fp_width(src_fmt_i); + end + + // override operand 0 for some conversions + if (OpGroup == fpnew_pkg::CONV) begin + // Source is an integer + if (op_i == fpnew_pkg::I2F) begin + local_operands[0] = operands_i[0] >> LANE*fpnew_pkg::int_width(int_fmt_i); + // vectorial F2F up casts + end else if (op_i == fpnew_pkg::F2F) begin + if (vectorial_op && op_mod_i && is_up_cast) begin // up cast with upper half + local_operands[0] = operands_i[0] >> LANE*fpnew_pkg::fp_width(src_fmt_i) + + MAX_FP_WIDTH/2; + end + // CPK + end else if (dst_is_cpk) begin + if (lane == 1) begin + local_operands[0] = operands_i[1][LANE_WIDTH-1:0]; // using opB as second argument + end + end + end + end + + // Instantiate the operation from the selected opgroup + if (OpGroup == fpnew_pkg::ADDMUL) begin : lane_instance + fpnew_fma_multi #( + .FpFmtConfig ( LANE_FORMATS ), + .NumPipeRegs ( NumPipeRegs ), + .PipeConfig ( PipeConfig ), + .TagType ( TagType ), + .AuxType ( logic [AUX_BITS-1:0] ) + ) i_fpnew_fma_multi ( + .clk_i, + .rst_ni, + .operands_i ( local_operands ), + .is_boxed_i, + .rnd_mode_i, + .op_i, + .op_mod_i, + .src_fmt_i, + .dst_fmt_i, + .tag_i, + .aux_i ( aux_data ), + .in_valid_i ( in_valid ), + .in_ready_o ( lane_in_ready[lane] ), + .flush_i, + .result_o ( op_result ), + .status_o ( op_status ), + .extension_bit_o ( lane_ext_bit[lane] ), + .tag_o ( lane_tags[lane] ), + .aux_o ( lane_aux[lane] ), + .out_valid_o ( out_valid ), + .out_ready_i ( out_ready ), + .busy_o ( lane_busy[lane] ) + ); + + end else if (OpGroup == fpnew_pkg::DIVSQRT) begin : lane_instance + fpnew_divsqrt_multi #( + .FpFmtConfig ( LANE_FORMATS ), + .NumPipeRegs ( NumPipeRegs ), + .PipeConfig ( PipeConfig ), + .TagType ( TagType ), + .AuxType ( logic [AUX_BITS-1:0] ) + ) i_fpnew_divsqrt_multi ( + .clk_i, + .rst_ni, + .operands_i ( local_operands[1:0] ), // 2 operands + .is_boxed_i ( is_boxed_2op ), // 2 operands + .rnd_mode_i, + .op_i, + .dst_fmt_i, + .tag_i, + .aux_i ( aux_data ), + .in_valid_i ( in_valid ), + .in_ready_o ( lane_in_ready[lane] ), + .flush_i, + .result_o ( op_result ), + .status_o ( op_status ), + .extension_bit_o ( lane_ext_bit[lane] ), + .tag_o ( lane_tags[lane] ), + .aux_o ( lane_aux[lane] ), + .out_valid_o ( out_valid ), + .out_ready_i ( out_ready ), + .busy_o ( lane_busy[lane] ) + ); + end else if (OpGroup == fpnew_pkg::NONCOMP) begin : lane_instance + + end else if (OpGroup == fpnew_pkg::CONV) begin : lane_instance + fpnew_cast_multi #( + .FpFmtConfig ( LANE_FORMATS ), + .IntFmtConfig ( CONV_INT_FORMATS ), + .NumPipeRegs ( NumPipeRegs ), + .PipeConfig ( PipeConfig ), + .TagType ( TagType ), + .AuxType ( logic [AUX_BITS-1:0] ) + ) i_fpnew_cast_multi ( + .clk_i, + .rst_ni, + .operands_i ( local_operands[0] ), + .is_boxed_i ( is_boxed_1op ), + .rnd_mode_i, + .op_i, + .op_mod_i, + .src_fmt_i, + .dst_fmt_i, + .int_fmt_i, + .tag_i, + .aux_i ( aux_data ), + .in_valid_i ( in_valid ), + .in_ready_o ( lane_in_ready[lane] ), + .flush_i, + .result_o ( op_result ), + .status_o ( op_status ), + .extension_bit_o ( lane_ext_bit[lane] ), + .tag_o ( lane_tags[lane] ), + .aux_o ( lane_aux[lane] ), + .out_valid_o ( out_valid ), + .out_ready_i ( out_ready ), + .busy_o ( lane_busy[lane] ) + ); + end // ADD OTHER OPTIONS HERE + + // Handshakes are only done if the lane is actually used + assign out_ready = out_ready_i & ((lane == 0) | result_is_vector); + assign lane_out_valid[lane] = out_valid & ((lane == 0) | result_is_vector); + + // Properly NaN-box or sign-extend the slice result if not in use + assign local_result = lane_out_valid[lane] ? op_result : '{default: lane_ext_bit[0]}; + assign lane_status[lane] = lane_out_valid[lane] ? op_status : '0; + + // Otherwise generate constant sign-extension + end else begin : inactive_lane + assign lane_out_valid[lane] = 1'b0; // unused lane + assign lane_in_ready[lane] = 1'b0; // unused lane + assign local_result = '{default: lane_ext_bit[0]}; // sign-extend/nan box + assign lane_status[lane] = '0; + assign lane_busy[lane] = 1'b0; + end + + // Generate result packing depending on float format + for (genvar fmt = 0; fmt < NUM_FORMATS; fmt++) begin : pack_fp_result + // Set up some constants + localparam int unsigned FP_WIDTH = fpnew_pkg::fp_width(fpnew_pkg::fp_format_e'(fmt)); + // only for active formats within the lane + if (ACTIVE_FORMATS[fmt]) + assign fmt_slice_result[fmt][(LANE+1)*FP_WIDTH-1:LANE*FP_WIDTH] = + local_result[FP_WIDTH-1:0]; + end + + // Generate result packing depending on integer format + if (OpGroup == fpnew_pkg::CONV) begin : int_results_enabled + for (genvar ifmt = 0; ifmt < NUM_INT_FORMATS; ifmt++) begin : pack_int_result + // Set up some constants + localparam int unsigned INT_WIDTH = fpnew_pkg::int_width(fpnew_pkg::int_format_e'(ifmt)); + if (ACTIVE_INT_FORMATS[ifmt]) + assign ifmt_slice_result[ifmt][(LANE+1)*INT_WIDTH-1:LANE*INT_WIDTH] = + local_result[INT_WIDTH-1:0]; + end + end + end + + // Extend slice result if needed + for (genvar fmt = 0; fmt < NUM_FORMATS; fmt++) begin : extend_fp_result + // Set up some constants + localparam int unsigned FP_WIDTH = fpnew_pkg::fp_width(fpnew_pkg::fp_format_e'(fmt)); + if (NUM_LANES*FP_WIDTH < Width) + assign fmt_slice_result[fmt][Width-1:NUM_LANES*FP_WIDTH] = '{default: lane_ext_bit[0]}; + end + + // Mute int results if unused + for (genvar ifmt = 0; ifmt < NUM_INT_FORMATS; ifmt++) begin : int_results_disabled + if (OpGroup != fpnew_pkg::CONV) begin : mute_int_result + assign ifmt_slice_result[ifmt] = '0; + end + end + + // Bypass lanes with target operand for vectorial casts + if (OpGroup == fpnew_pkg::CONV) begin : target_regs + // Bypass pipeline signals, index i holds signal after i register stages + logic [0:NumPipeRegs][Width-1:0] byp_pipe_target_q; + logic [0:NumPipeRegs][2:0] byp_pipe_aux_q; + logic [0:NumPipeRegs] byp_pipe_valid_q; + // Ready signal is combinatorial for all stages + logic [0:NumPipeRegs] byp_pipe_ready; + + // Input stage: First element of pipeline is taken from inputs + assign byp_pipe_target_q[0] = conv_target_d; + assign byp_pipe_aux_q[0] = target_aux_d; + assign byp_pipe_valid_q[0] = in_valid_i & vectorial_op; + // Generate the register stages + for (genvar i = 0; i < NumPipeRegs; i++) begin : gen_bypass_pipeline + // Internal register enable for this stage + logic reg_ena; + // Determine the ready signal of the current stage - advance the pipeline: + // 1. if the next stage is ready for our data + // 2. if the next stage only holds a bubble (not valid) -> we can pop it + assign byp_pipe_ready[i] = byp_pipe_ready[i+1] | ~byp_pipe_valid_q[i+1]; + // Valid: enabled by ready signal, synchronous clear with the flush signal + `FFLARNC(byp_pipe_valid_q[i+1], byp_pipe_valid_q[i], byp_pipe_ready[i], flush_i, 1'b0, clk_i, rst_ni) + // Enable register if pipleine ready and a valid data item is present + assign reg_ena = byp_pipe_ready[i] & byp_pipe_valid_q[i]; + // Generate the pipeline registers within the stages, use enable-registers + `FFL(byp_pipe_target_q[i+1], byp_pipe_target_q[i], reg_ena, '0) + `FFL(byp_pipe_aux_q[i+1], byp_pipe_aux_q[i], reg_ena, '0) + end + // Output stage: Ready travels backwards from output side, driven by downstream circuitry + assign byp_pipe_ready[NumPipeRegs] = out_ready_i & result_is_vector; + // Output stage: assign module outputs + assign conv_target_q = byp_pipe_target_q[NumPipeRegs]; + + // decode the aux data + assign {result_vec_op, result_is_cpk} = byp_pipe_aux_q[NumPipeRegs]; + end else begin : no_conv + assign {result_vec_op, result_is_cpk} = '0; + end + + // ------------ + // Output Side + // ------------ + assign {result_fmt_is_int, result_is_vector, result_fmt} = lane_aux[0]; + + assign result_o = result_fmt_is_int + ? ifmt_slice_result[result_fmt] + : fmt_slice_result[result_fmt]; + + assign extension_bit_o = lane_ext_bit[0]; // don't care about upper ones + assign tag_o = lane_tags[0]; // don't care about upper ones + assign busy_o = (| lane_busy); + + assign out_valid_o = lane_out_valid[0]; // don't care about upper ones + + // Collapse the status + always_comb begin : output_processing + // Collapse the status + automatic fpnew_pkg::status_t temp_status; + temp_status = '0; + for (int i = 0; i < int'(NUM_LANES); i++) + temp_status |= lane_status[i]; + status_o = temp_status; + end +endmodule diff --git a/vendor/pulp-platform/fpnew/src/fpnew_pkg.sv b/vendor/pulp-platform/fpnew/src/fpnew_pkg.sv new file mode 100644 index 0000000000..6065054fad --- /dev/null +++ b/vendor/pulp-platform/fpnew/src/fpnew_pkg.sv @@ -0,0 +1,484 @@ +// Copyright 2019 ETH Zurich and University of Bologna. +// +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +// Author: Stefan Mach + +package fpnew_pkg; + + // --------- + // FP TYPES + // --------- + // | Enumerator | Format | Width | EXP_BITS | MAN_BITS + // |:----------:|------------------|-------:|:--------:|:--------: + // | FP32 | IEEE binary32 | 32 bit | 8 | 23 + // | FP64 | IEEE binary64 | 64 bit | 11 | 52 + // | FP16 | IEEE binary16 | 16 bit | 5 | 10 + // | FP8 | binary8 | 8 bit | 5 | 2 + // | FP16ALT | binary16alt | 16 bit | 8 | 7 + // *NOTE:* Add new formats only at the end of the enumeration for backwards compatibilty! + + // Encoding for a format + typedef struct packed { + int unsigned exp_bits; + int unsigned man_bits; + } fp_encoding_t; + + localparam int unsigned NUM_FP_FORMATS = 5; // change me to add formats + localparam int unsigned FP_FORMAT_BITS = $clog2(NUM_FP_FORMATS); + + // FP formats + typedef enum logic [FP_FORMAT_BITS-1:0] { + FP32 = 'd0, + FP64 = 'd1, + FP16 = 'd2, + FP8 = 'd3, + FP16ALT = 'd4 + // add new formats here + } fp_format_e; + + // Encodings for supported FP formats + localparam fp_encoding_t [0:NUM_FP_FORMATS-1] FP_ENCODINGS = '{ + '{8, 23}, // IEEE binary32 (single) + '{11, 52}, // IEEE binary64 (double) + '{5, 10}, // IEEE binary16 (half) + '{5, 2}, // custom binary8 + '{8, 7} // custom binary16alt + // add new formats here + }; + + typedef logic [0:NUM_FP_FORMATS-1] fmt_logic_t; // Logic indexed by FP format (for masks) + typedef logic [0:NUM_FP_FORMATS-1][31:0] fmt_unsigned_t; // Unsigned indexed by FP format + + localparam fmt_logic_t CPK_FORMATS = 5'b11000; // FP32 and FP64 can provide CPK only + + // --------- + // INT TYPES + // --------- + // | Enumerator | Width | + // |:----------:|-------:| + // | INT8 | 8 bit | + // | INT16 | 16 bit | + // | INT32 | 32 bit | + // | INT64 | 64 bit | + // *NOTE:* Add new formats only at the end of the enumeration for backwards compatibilty! + + localparam int unsigned NUM_INT_FORMATS = 4; // change me to add formats + localparam int unsigned INT_FORMAT_BITS = $clog2(NUM_INT_FORMATS); + + // Int formats + typedef enum logic [INT_FORMAT_BITS-1:0] { + INT8, + INT16, + INT32, + INT64 + // add new formats here + } int_format_e; + + // Returns the width of an INT format by index + function automatic int unsigned int_width(int_format_e ifmt); + unique case (ifmt) + INT8: return 8; + INT16: return 16; + INT32: return 32; + INT64: return 64; + endcase + endfunction + + typedef logic [0:NUM_INT_FORMATS-1] ifmt_logic_t; // Logic indexed by INT format (for masks) + + // -------------- + // FP OPERATIONS + // -------------- + localparam int unsigned NUM_OPGROUPS = 4; + + // Each FP operation belongs to an operation group + typedef enum logic [1:0] { + ADDMUL, DIVSQRT, NONCOMP, CONV + } opgroup_e; + + localparam int unsigned OP_BITS = 4; + + typedef enum logic [OP_BITS-1:0] { + FMADD, FNMSUB, ADD, MUL, // ADDMUL operation group + DIV, SQRT, // DIVSQRT operation group + SGNJ, MINMAX, CMP, CLASSIFY, // NONCOMP operation group + F2F, F2I, I2F, CPKAB, CPKCD // CONV operation group + } operation_e; + + // ------------------- + // RISC-V FP-SPECIFIC + // ------------------- + // Rounding modes + typedef enum logic [2:0] { + RNE = 3'b000, + RTZ = 3'b001, + RDN = 3'b010, + RUP = 3'b011, + RMM = 3'b100, + DYN = 3'b111 + } roundmode_e; + + // Status flags + typedef struct packed { + logic NV; // Invalid + logic DZ; // Divide by zero + logic OF; // Overflow + logic UF; // Underflow + logic NX; // Inexact + } status_t; + + // Information about a floating point value + typedef struct packed { + logic is_normal; // is the value normal + logic is_subnormal; // is the value subnormal + logic is_zero; // is the value zero + logic is_inf; // is the value infinity + logic is_nan; // is the value NaN + logic is_signalling; // is the value a signalling NaN + logic is_quiet; // is the value a quiet NaN + logic is_boxed; // is the value properly NaN-boxed (RISC-V specific) + } fp_info_t; + + // Classification mask + typedef enum logic [9:0] { + NEGINF = 10'b00_0000_0001, + NEGNORM = 10'b00_0000_0010, + NEGSUBNORM = 10'b00_0000_0100, + NEGZERO = 10'b00_0000_1000, + POSZERO = 10'b00_0001_0000, + POSSUBNORM = 10'b00_0010_0000, + POSNORM = 10'b00_0100_0000, + POSINF = 10'b00_1000_0000, + SNAN = 10'b01_0000_0000, + QNAN = 10'b10_0000_0000 + } classmask_e; + + // ------------------ + // FPU configuration + // ------------------ + // Pipelining registers can be inserted (at elaboration time) into operational units + typedef enum logic [1:0] { + BEFORE, // registers are inserted at the inputs of the unit + AFTER, // registers are inserted at the outputs of the unit + INSIDE, // registers are inserted at predetermined (suboptimal) locations in the unit + DISTRIBUTED // registers are evenly distributed, INSIDE >= AFTER >= BEFORE + } pipe_config_t; + + // Arithmetic units can be arranged in parallel (per format), merged (multi-format) or not at all. + typedef enum logic [1:0] { + DISABLED, // arithmetic units are not generated + PARALLEL, // arithmetic units are generated in prallel slices, one for each format + MERGED // arithmetic units are contained within a merged unit holding multiple formats + } unit_type_t; + + // Array of unit types indexed by format + typedef unit_type_t [0:NUM_FP_FORMATS-1] fmt_unit_types_t; + + // Array of format-specific unit types by opgroup + typedef fmt_unit_types_t [0:NUM_OPGROUPS-1] opgrp_fmt_unit_types_t; + // same with unsigned + typedef fmt_unsigned_t [0:NUM_OPGROUPS-1] opgrp_fmt_unsigned_t; + + // FPU configuration: features + typedef struct packed { + int unsigned Width; + logic EnableVectors; + logic EnableNanBox; + fmt_logic_t FpFmtMask; + ifmt_logic_t IntFmtMask; + } fpu_features_t; + + localparam fpu_features_t RV64D = '{ + Width: 64, + EnableVectors: 1'b0, + EnableNanBox: 1'b1, + FpFmtMask: 5'b11000, + IntFmtMask: 4'b0011 + }; + + localparam fpu_features_t RV32D = '{ + Width: 64, + EnableVectors: 1'b1, + EnableNanBox: 1'b1, + FpFmtMask: 5'b11000, + IntFmtMask: 4'b0010 + }; + + localparam fpu_features_t RV32F = '{ + Width: 32, + EnableVectors: 1'b0, + EnableNanBox: 1'b1, + FpFmtMask: 5'b10000, + IntFmtMask: 4'b0010 + }; + + localparam fpu_features_t RV64D_Xsflt = '{ + Width: 64, + EnableVectors: 1'b1, + EnableNanBox: 1'b1, + FpFmtMask: 5'b11111, + IntFmtMask: 4'b1111 + }; + + localparam fpu_features_t RV32F_Xsflt = '{ + Width: 32, + EnableVectors: 1'b1, + EnableNanBox: 1'b1, + FpFmtMask: 5'b10111, + IntFmtMask: 4'b1110 + }; + + localparam fpu_features_t RV32F_Xf16alt_Xfvec = '{ + Width: 32, + EnableVectors: 1'b1, + EnableNanBox: 1'b1, + FpFmtMask: 5'b10001, + IntFmtMask: 4'b0110 + }; + + + // FPU configuraion: implementation + typedef struct packed { + opgrp_fmt_unsigned_t PipeRegs; + opgrp_fmt_unit_types_t UnitTypes; + pipe_config_t PipeConfig; + } fpu_implementation_t; + + localparam fpu_implementation_t DEFAULT_NOREGS = '{ + PipeRegs: '{default: 0}, + UnitTypes: '{'{default: PARALLEL}, // ADDMUL + '{default: MERGED}, // DIVSQRT + '{default: PARALLEL}, // NONCOMP + '{default: MERGED}}, // CONV + PipeConfig: BEFORE + }; + + localparam fpu_implementation_t DEFAULT_SNITCH = '{ + PipeRegs: '{default: 1}, + UnitTypes: '{'{default: PARALLEL}, // ADDMUL + '{default: DISABLED}, // DIVSQRT + '{default: PARALLEL}, // NONCOMP + '{default: MERGED}}, // CONV + PipeConfig: BEFORE + }; + + // ----------------------- + // Synthesis optimization + // ----------------------- + localparam logic DONT_CARE = 1'b1; // the value to assign as don't care + + // ------------------------- + // General helper functions + // ------------------------- + function automatic int minimum(int a, int b); + return (a < b) ? a : b; + endfunction + + function automatic int maximum(int a, int b); + return (a > b) ? a : b; + endfunction + + // ------------------------------------------- + // Helper functions for FP formats and values + // ------------------------------------------- + // Returns the width of a FP format + function automatic int unsigned fp_width(fp_format_e fmt); + return FP_ENCODINGS[fmt].exp_bits + FP_ENCODINGS[fmt].man_bits + 1; + endfunction + + // Returns the widest FP format present + function automatic int unsigned max_fp_width(fmt_logic_t cfg); + automatic int unsigned res = 0; + for (int unsigned i = 0; i < NUM_FP_FORMATS; i++) + if (cfg[i]) + res = unsigned'(maximum(res, fp_width(fp_format_e'(i)))); + return res; + endfunction + + // Returns the narrowest FP format present + function automatic int unsigned min_fp_width(fmt_logic_t cfg); + automatic int unsigned res = max_fp_width(cfg); + for (int unsigned i = 0; i < NUM_FP_FORMATS; i++) + if (cfg[i]) + res = unsigned'(minimum(res, fp_width(fp_format_e'(i)))); + return res; + endfunction + + // Returns the number of expoent bits for a format + function automatic int unsigned exp_bits(fp_format_e fmt); + return FP_ENCODINGS[fmt].exp_bits; + endfunction + + // Returns the number of mantissa bits for a format + function automatic int unsigned man_bits(fp_format_e fmt); + return FP_ENCODINGS[fmt].man_bits; + endfunction + + // Returns the bias value for a given format (as per IEEE 754-2008) + function automatic int unsigned bias(fp_format_e fmt); + return unsigned'(2**(FP_ENCODINGS[fmt].exp_bits-1)-1); // symmetrical bias + endfunction + + function automatic fp_encoding_t super_format(fmt_logic_t cfg); + automatic fp_encoding_t res; + res = '0; + for (int unsigned fmt = 0; fmt < NUM_FP_FORMATS; fmt++) + if (cfg[fmt]) begin // only active format + res.exp_bits = unsigned'(maximum(res.exp_bits, exp_bits(fp_format_e'(fmt)))); + res.man_bits = unsigned'(maximum(res.man_bits, man_bits(fp_format_e'(fmt)))); + end + return res; + endfunction + + // ------------------------------------------- + // Helper functions for INT formats and values + // ------------------------------------------- + // Returns the widest INT format present + function automatic int unsigned max_int_width(ifmt_logic_t cfg); + automatic int unsigned res = 0; + for (int ifmt = 0; ifmt < NUM_INT_FORMATS; ifmt++) begin + if (cfg[ifmt]) res = maximum(res, int_width(int_format_e'(ifmt))); + end + return res; + endfunction + + // -------------------------------------------------- + // Helper functions for operations and FPU structure + // -------------------------------------------------- + // Returns the operation group of the given operation + function automatic opgroup_e get_opgroup(operation_e op); + unique case (op) + FMADD, FNMSUB, ADD, MUL: return ADDMUL; + DIV, SQRT: return DIVSQRT; + SGNJ, MINMAX, CMP, CLASSIFY: return NONCOMP; + F2F, F2I, I2F, CPKAB, CPKCD: return CONV; + default: return NONCOMP; + endcase + endfunction + + // Returns the number of operands by operation group + function automatic int unsigned num_operands(opgroup_e grp); + unique case (grp) + ADDMUL: return 3; + DIVSQRT: return 2; + NONCOMP: return 2; + CONV: return 3; // vectorial casts use 3 operands + default: return 0; + endcase + endfunction + + // Returns the number of lanes according to width, format and vectors + function automatic int unsigned num_lanes(int unsigned width, fp_format_e fmt, logic vec); + return vec ? width / fp_width(fmt) : 1; // if no vectors, only one lane + endfunction + + // Returns the maximum number of lanes in the FPU according to width, format config and vectors + function automatic int unsigned max_num_lanes(int unsigned width, fmt_logic_t cfg, logic vec); + return vec ? width / min_fp_width(cfg) : 1; // if no vectors, only one lane + endfunction + + // Returns a mask of active FP formats that are present in lane lane_no of a multiformat slice + function automatic fmt_logic_t get_lane_formats(int unsigned width, + fmt_logic_t cfg, + int unsigned lane_no); + automatic fmt_logic_t res; + for (int unsigned fmt = 0; fmt < NUM_FP_FORMATS; fmt++) + // Mask active formats with the number of lanes for that format + res[fmt] = cfg[fmt] & (width / fp_width(fp_format_e'(fmt)) > lane_no); + return res; + endfunction + + // Returns a mask of active INT formats that are present in lane lane_no of a multiformat slice + function automatic ifmt_logic_t get_lane_int_formats(int unsigned width, + fmt_logic_t cfg, + ifmt_logic_t icfg, + int unsigned lane_no); + automatic ifmt_logic_t res; + automatic fmt_logic_t lanefmts; + res = '0; + lanefmts = get_lane_formats(width, cfg, lane_no); + + for (int unsigned ifmt = 0; ifmt < NUM_INT_FORMATS; ifmt++) + for (int unsigned fmt = 0; fmt < NUM_FP_FORMATS; fmt++) + // Mask active int formats with the width of the float formats + if ((fp_width(fp_format_e'(fmt)) == int_width(int_format_e'(ifmt)))) + res[ifmt] |= icfg[ifmt] && lanefmts[fmt]; + return res; + endfunction + + // Returns a mask of active FP formats that are present in lane lane_no of a CONV slice + function automatic fmt_logic_t get_conv_lane_formats(int unsigned width, + fmt_logic_t cfg, + int unsigned lane_no); + automatic fmt_logic_t res; + for (int unsigned fmt = 0; fmt < NUM_FP_FORMATS; fmt++) + // Mask active formats with the number of lanes for that format, CPK at least twice + res[fmt] = cfg[fmt] && ((width / fp_width(fp_format_e'(fmt)) > lane_no) || + (CPK_FORMATS[fmt] && (lane_no < 2))); + return res; + endfunction + + // Returns a mask of active INT formats that are present in lane lane_no of a CONV slice + function automatic ifmt_logic_t get_conv_lane_int_formats(int unsigned width, + fmt_logic_t cfg, + ifmt_logic_t icfg, + int unsigned lane_no); + automatic ifmt_logic_t res; + automatic fmt_logic_t lanefmts; + res = '0; + lanefmts = get_conv_lane_formats(width, cfg, lane_no); + + for (int unsigned ifmt = 0; ifmt < NUM_INT_FORMATS; ifmt++) + for (int unsigned fmt = 0; fmt < NUM_FP_FORMATS; fmt++) + // Mask active int formats with the width of the float formats + res[ifmt] |= icfg[ifmt] && lanefmts[fmt] && + (fp_width(fp_format_e'(fmt)) == int_width(int_format_e'(ifmt))); + return res; + endfunction + + // Return whether any active format is set as MERGED + function automatic logic any_enabled_multi(fmt_unit_types_t types, fmt_logic_t cfg); + for (int unsigned i = 0; i < NUM_FP_FORMATS; i++) + if (cfg[i] && types[i] == MERGED) + return 1'b1; + return 1'b0; + endfunction + + // Return whether the given format is the first active one set as MERGED + function automatic logic is_first_enabled_multi(fp_format_e fmt, + fmt_unit_types_t types, + fmt_logic_t cfg); + for (int unsigned i = 0; i < NUM_FP_FORMATS; i++) begin + if (cfg[i] && types[i] == MERGED) return (fp_format_e'(i) == fmt); + end + return 1'b0; + endfunction + + // Returns the first format that is active and is set as MERGED + function automatic fp_format_e get_first_enabled_multi(fmt_unit_types_t types, fmt_logic_t cfg); + for (int unsigned i = 0; i < NUM_FP_FORMATS; i++) + if (cfg[i] && types[i] == MERGED) + return fp_format_e'(i); + return fp_format_e'(0); + endfunction + + // Returns the largest number of regs that is active and is set as MERGED + function automatic int unsigned get_num_regs_multi(fmt_unsigned_t regs, + fmt_unit_types_t types, + fmt_logic_t cfg); + automatic int unsigned res = 0; + for (int unsigned i = 0; i < NUM_FP_FORMATS; i++) begin + if (cfg[i] && types[i] == MERGED) res = maximum(res, regs[i]); + end + return res; + endfunction + +endpackage diff --git a/vendor/pulp-platform/fpnew/src/fpnew_rounding.sv b/vendor/pulp-platform/fpnew/src/fpnew_rounding.sv new file mode 100644 index 0000000000..60f63bb702 --- /dev/null +++ b/vendor/pulp-platform/fpnew/src/fpnew_rounding.sv @@ -0,0 +1,72 @@ +// Copyright 2019 ETH Zurich and University of Bologna. +// +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +// Author: Stefan Mach + +module fpnew_rounding #( + parameter int unsigned AbsWidth=2 // Width of the abolute value, without sign bit +) ( + // Input value + input logic [AbsWidth-1:0] abs_value_i, // absolute value without sign + input logic sign_i, + // Rounding information + input logic [1:0] round_sticky_bits_i, // round and sticky bits {RS} + input fpnew_pkg::roundmode_e rnd_mode_i, + input logic effective_subtraction_i, // sign of inputs affects rounding of zeroes + // Output value + output logic [AbsWidth-1:0] abs_rounded_o, // absolute value without sign + output logic sign_o, + // Output classification + output logic exact_zero_o // output is an exact zero +); + + logic round_up; // Rounding decision + + // Take the rounding decision according to RISC-V spec + // RoundMode | Mnemonic | Meaning + // :--------:|:--------:|:------- + // 000 | RNE | Round to Nearest, ties to Even + // 001 | RTZ | Round towards Zero + // 010 | RDN | Round Down (towards -\infty) + // 011 | RUP | Round Up (towards \infty) + // 100 | RMM | Round to Nearest, ties to Max Magnitude + // others | | *invalid* + always_comb begin : rounding_decision + unique case (rnd_mode_i) + fpnew_pkg::RNE: // Decide accoring to round/sticky bits + unique case (round_sticky_bits_i) + 2'b00, + 2'b01: round_up = 1'b0; // < ulp/2 away, round down + 2'b10: round_up = abs_value_i[0]; // = ulp/2 away, round towards even result + 2'b11: round_up = 1'b1; // > ulp/2 away, round up + default: round_up = fpnew_pkg::DONT_CARE; + endcase + fpnew_pkg::RTZ: round_up = 1'b0; // always round down + fpnew_pkg::RDN: round_up = (| round_sticky_bits_i) ? sign_i : 1'b0; // to 0 if +, away if - + fpnew_pkg::RUP: round_up = (| round_sticky_bits_i) ? ~sign_i : 1'b0; // to 0 if -, away if + + fpnew_pkg::RMM: round_up = round_sticky_bits_i[1]; // round down if < ulp/2 away, else up + default: round_up = fpnew_pkg::DONT_CARE; // propagate x + endcase + end + + // Perform the rounding, exponent change and overflow to inf happens automagically + assign abs_rounded_o = abs_value_i + round_up; + + // True zero result is a zero result without dirty round/sticky bits + assign exact_zero_o = (abs_value_i == '0) && (round_sticky_bits_i == '0); + + // In case of effective subtraction (thus signs of addition operands must have differed) and a + // true zero result, the result sign is '-' in case of RDN and '+' for other modes. + assign sign_o = (exact_zero_o && effective_subtraction_i) + ? (rnd_mode_i == fpnew_pkg::RDN) + : sign_i; + +endmodule diff --git a/vendor/pulp-platform/fpnew/src/fpnew_top.sv b/vendor/pulp-platform/fpnew/src/fpnew_top.sv new file mode 100644 index 0000000000..581f25fbbf --- /dev/null +++ b/vendor/pulp-platform/fpnew/src/fpnew_top.sv @@ -0,0 +1,172 @@ +// Copyright 2019 ETH Zurich and University of Bologna. +// +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +// Author: Stefan Mach + +module fpnew_top #( + // FPU configuration + parameter fpnew_pkg::fpu_features_t Features = fpnew_pkg::RV64D_Xsflt, + parameter fpnew_pkg::fpu_implementation_t Implementation = fpnew_pkg::DEFAULT_NOREGS, + parameter type TagType = logic, + // Do not change + localparam int unsigned WIDTH = Features.Width, + localparam int unsigned NUM_OPERANDS = 3 +) ( + input logic clk_i, + input logic rst_ni, + // Input signals + input logic [NUM_OPERANDS-1:0][WIDTH-1:0] operands_i, + input fpnew_pkg::roundmode_e rnd_mode_i, + input fpnew_pkg::operation_e op_i, + input logic op_mod_i, + input fpnew_pkg::fp_format_e src_fmt_i, + input fpnew_pkg::fp_format_e dst_fmt_i, + input fpnew_pkg::int_format_e int_fmt_i, + input logic vectorial_op_i, + input TagType tag_i, + // Input Handshake + input logic in_valid_i, + output logic in_ready_o, + input logic flush_i, + // Output signals + output logic [WIDTH-1:0] result_o, + output fpnew_pkg::status_t status_o, + output TagType tag_o, + // Output handshake + output logic out_valid_o, + input logic out_ready_i, + // Indication of valid data in flight + output logic busy_o +); + + localparam int unsigned NUM_OPGROUPS = fpnew_pkg::NUM_OPGROUPS; + localparam int unsigned NUM_FORMATS = fpnew_pkg::NUM_FP_FORMATS; + + // ---------------- + // Type Definition + // ---------------- + typedef struct packed { + logic [WIDTH-1:0] result; + fpnew_pkg::status_t status; + TagType tag; + } output_t; + + // Handshake signals for the blocks + logic [NUM_OPGROUPS-1:0] opgrp_in_ready, opgrp_out_valid, opgrp_out_ready, opgrp_ext, opgrp_busy; + output_t [NUM_OPGROUPS-1:0] opgrp_outputs; + + logic [NUM_FORMATS-1:0][NUM_OPERANDS-1:0] is_boxed; + + // ----------- + // Input Side + // ----------- + assign in_ready_o = in_valid_i & opgrp_in_ready[fpnew_pkg::get_opgroup(op_i)]; + + // NaN-boxing check + for (genvar fmt = 0; fmt < int'(NUM_FORMATS); fmt++) begin : gen_nanbox_check + localparam int unsigned FP_WIDTH = fpnew_pkg::fp_width(fpnew_pkg::fp_format_e'(fmt)); + // NaN boxing is only generated if it's enabled and needed + if (Features.EnableNanBox && (FP_WIDTH < WIDTH)) begin : check + for (genvar op = 0; op < int'(NUM_OPERANDS); op++) begin : operands + assign is_boxed[fmt][op] = (!vectorial_op_i) + ? operands_i[op][WIDTH-1:FP_WIDTH] == '1 + : 1'b1; + end + end else begin : no_check + assign is_boxed[fmt] = '1; + end + end + + // ------------------------- + // Generate Operation Blocks + // ------------------------- + for (genvar opgrp = 0; opgrp < int'(NUM_OPGROUPS); opgrp++) begin : gen_operation_groups + localparam int unsigned NUM_OPS = fpnew_pkg::num_operands(fpnew_pkg::opgroup_e'(opgrp)); + + logic in_valid; + logic [NUM_FORMATS-1:0][NUM_OPS-1:0] input_boxed; + + assign in_valid = in_valid_i & (fpnew_pkg::get_opgroup(op_i) == fpnew_pkg::opgroup_e'(opgrp)); + + // slice out input boxing + always_comb begin : slice_inputs + for (int unsigned fmt = 0; fmt < NUM_FORMATS; fmt++) + input_boxed[fmt] = is_boxed[fmt][NUM_OPS-1:0]; + end + + fpnew_opgroup_block #( + .OpGroup ( fpnew_pkg::opgroup_e'(opgrp) ), + .Width ( WIDTH ), + .EnableVectors ( Features.EnableVectors ), + .FpFmtMask ( Features.FpFmtMask ), + .IntFmtMask ( Features.IntFmtMask ), + .FmtPipeRegs ( Implementation.PipeRegs[opgrp] ), + .FmtUnitTypes ( Implementation.UnitTypes[opgrp] ), + .PipeConfig ( Implementation.PipeConfig ), + .TagType ( TagType ) + ) i_opgroup_block ( + .clk_i, + .rst_ni, + .operands_i ( operands_i[NUM_OPS-1:0] ), + .is_boxed_i ( input_boxed ), + .rnd_mode_i, + .op_i, + .op_mod_i, + .src_fmt_i, + .dst_fmt_i, + .int_fmt_i, + .vectorial_op_i, + .tag_i, + .in_valid_i ( in_valid ), + .in_ready_o ( opgrp_in_ready[opgrp] ), + .flush_i, + .result_o ( opgrp_outputs[opgrp].result ), + .status_o ( opgrp_outputs[opgrp].status ), + .extension_bit_o ( opgrp_ext[opgrp] ), + .tag_o ( opgrp_outputs[opgrp].tag ), + .out_valid_o ( opgrp_out_valid[opgrp] ), + .out_ready_i ( opgrp_out_ready[opgrp] ), + .busy_o ( opgrp_busy[opgrp] ) + ); + end + + // ------------------ + // Arbitrate Outputs + // ------------------ + output_t arbiter_output; + + // Round-Robin arbiter to decide which result to use + rr_arb_tree #( + .NumIn ( NUM_OPGROUPS ), + .DataType ( output_t ), + .AxiVldRdy ( 1'b1 ) + ) i_arbiter ( + .clk_i, + .rst_ni, + .flush_i, + .rr_i ( '0 ), + .req_i ( opgrp_out_valid ), + .gnt_o ( opgrp_out_ready ), + .data_i ( opgrp_outputs ), + .gnt_i ( out_ready_i ), + .req_o ( out_valid_o ), + .data_o ( arbiter_output ), + .idx_o ( /* unused */ ) + ); + + // Unpack output + assign result_o = arbiter_output.result; + assign status_o = arbiter_output.status; + assign tag_o = arbiter_output.tag; + + assign busy_o = (| opgrp_busy); + +endmodule diff --git a/vendor/pulp-platform/fpnew/src/fpu_div_sqrt_mvp/LICENSE b/vendor/pulp-platform/fpnew/src/fpu_div_sqrt_mvp/LICENSE new file mode 100644 index 0000000000..18e4f67692 --- /dev/null +++ b/vendor/pulp-platform/fpnew/src/fpu_div_sqrt_mvp/LICENSE @@ -0,0 +1,176 @@ +SOLDERPAD HARDWARE LICENSE version 0.51 + +This license is based closely on the Apache License Version 2.0, but is not +approved or endorsed by the Apache Foundation. A copy of the non-modified +Apache License 2.0 can be found at http://www.apache.org/licenses/LICENSE-2.0. + +As this license is not currently OSI or FSF approved, the Licensor permits any +Work licensed under this License, at the option of the Licensee, to be treated +as licensed under the Apache License Version 2.0 (which is so approved). + +This License is licensed under the terms of this License and in particular +clause 7 below (Disclaimer of Warranties) applies in relation to its use. + +TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + +1. Definitions. + +"License" shall mean the terms and conditions for use, reproduction, and +distribution as defined by Sections 1 through 9 of this document. + +"Licensor" shall mean the Rights owner or entity authorized by the Rights owner +that is granting the License. + +"Legal Entity" shall mean the union of the acting entity and all other entities +that control, are controlled by, or are under common control with that entity. +For the purposes of this definition, "control" means (i) the power, direct or +indirect, to cause the direction or management of such entity, whether by +contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the +outstanding shares, or (iii) beneficial ownership of such entity. + +"You" (or "Your") shall mean an individual or Legal Entity exercising +permissions granted by this License. + +"Rights" means copyright and any similar right including design right (whether +registered or unregistered), semiconductor topography (mask) rights and +database rights (but excluding Patents and Trademarks). + +"Source" form shall mean the preferred form for making modifications, including +but not limited to source code, net lists, board layouts, CAD files, +documentation source, and configuration files. + +"Object" form shall mean any form resulting from mechanical transformation or +translation of a Source form, including but not limited to compiled object +code, generated documentation, the instantiation of a hardware design and +conversions to other media types, including intermediate forms such as +bytecodes, FPGA bitstreams, artwork and semiconductor topographies (mask +works). + +"Work" shall mean the work of authorship, whether in Source form or other +Object form, made available under the License, as indicated by a Rights notice +that is included in or attached to the work (an example is provided in the +Appendix below). + +"Derivative Works" shall mean any work, whether in Source or Object form, that +is based on (or derived from) the Work and for which the editorial revisions, +annotations, elaborations, or other modifications represent, as a whole, an +original work of authorship. For the purposes of this License, Derivative Works +shall not include works that remain separable from, or merely link (or bind by +name) or physically connect to or interoperate with the interfaces of, the Work +and Derivative Works thereof. + +"Contribution" shall mean any design or work of authorship, including the +original version of the Work and any modifications or additions to that Work or +Derivative Works thereof, that is intentionally submitted to Licensor for +inclusion in the Work by the Rights owner or by an individual or Legal Entity +authorized to submit on behalf of the Rights owner. For the purposes of this +definition, "submitted" means any form of electronic, verbal, or written +communication sent to the Licensor or its representatives, including but not +limited to communication on electronic mailing lists, source code control +systems, and issue tracking systems that are managed by, or on behalf of, the +Licensor for the purpose of discussing and improving the Work, but excluding +communication that is conspicuously marked or otherwise designated in writing +by the Rights owner as "Not a Contribution." + +"Contributor" shall mean Licensor and any individual or Legal Entity on behalf +of whom a Contribution has been received by Licensor and subsequently +incorporated within the Work. + +2. Grant of License. Subject to the terms and conditions of this License, each +Contributor hereby grants to You a perpetual, worldwide, non-exclusive, +no-charge, royalty-free, irrevocable license under the Rights to reproduce, +prepare Derivative Works of, publicly display, publicly perform, sublicense, +and distribute the Work and such Derivative Works in Source or Object form and +do anything in relation to the Work as if the Rights did not exist. + +3. Grant of Patent License. Subject to the terms and conditions of this +License, each Contributor hereby grants to You a perpetual, worldwide, +non-exclusive, no-charge, royalty-free, irrevocable (except as stated in this +section) patent license to make, have made, use, offer to sell, sell, import, +and otherwise transfer the Work, where such license applies only to those +patent claims licensable by such Contributor that are necessarily infringed by +their Contribution(s) alone or by combination of their Contribution(s) with the +Work to which such Contribution(s) was submitted. If You institute patent +litigation against any entity (including a cross-claim or counterclaim in a +lawsuit) alleging that the Work or a Contribution incorporated within the Work +constitutes direct or contributory patent infringement, then any patent +licenses granted to You under this License for that Work shall terminate as of +the date such litigation is filed. + +4. Redistribution. You may reproduce and distribute copies of the Work or +Derivative Works thereof in any medium, with or without modifications, and in +Source or Object form, provided that You meet the following conditions: + + You must give any other recipients of the Work or Derivative Works a copy + of this License; and + + You must cause any modified files to carry prominent notices stating that + You changed the files; and + + You must retain, in the Source form of any Derivative Works that You + distribute, all copyright, patent, trademark, and attribution notices from + the Source form of the Work, excluding those notices that do not pertain to + any part of the Derivative Works; and + + If the Work includes a "NOTICE" text file as part of its distribution, then + any Derivative Works that You distribute must include a readable copy of + the attribution notices contained within such NOTICE file, excluding those + notices that do not pertain to any part of the Derivative Works, in at + least one of the following places: within a NOTICE text file distributed as + part of the Derivative Works; within the Source form or documentation, if + provided along with the Derivative Works; or, within a display generated by + the Derivative Works, if and wherever such third-party notices normally + appear. The contents of the NOTICE file are for informational purposes only + and do not modify the License. You may add Your own attribution notices + within Derivative Works that You distribute, alongside or as an addendum to + the NOTICE text from the Work, provided that such additional attribution + notices cannot be construed as modifying the License. You may add Your own + copyright statement to Your modifications and may provide additional or + different license terms and conditions for use, reproduction, or + distribution of Your modifications, or for any such Derivative Works as a + whole, provided Your use, reproduction, and distribution of the Work + otherwise complies with the conditions stated in this License. + +5. Submission of Contributions. Unless You explicitly state otherwise, any +Contribution intentionally submitted for inclusion in the Work by You to the +Licensor shall be under the terms and conditions of this License, without any +additional terms or conditions. Notwithstanding the above, nothing herein shall +supersede or modify the terms of any separate license agreement you may have +executed with Licensor regarding such Contributions. + +6. Trademarks. This License does not grant permission to use the trade names, +trademarks, service marks, or product names of the Licensor, except as required +for reasonable and customary use in describing the origin of the Work and +reproducing the content of the NOTICE file. + +7. Disclaimer of Warranty. Unless required by applicable law or agreed to in +writing, Licensor provides the Work (and each Contributor provides its +Contributions) on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +KIND, either express or implied, including, without limitation, any warranties +or conditions of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A +PARTICULAR PURPOSE. You are solely responsible for determining the +appropriateness of using or redistributing the Work and assume any risks +associated with Your exercise of permissions under this License. + +8. Limitation of Liability. In no event and under no legal theory, whether in +tort (including negligence), contract, or otherwise, unless required by +applicable law (such as deliberate and grossly negligent acts) or agreed to in +writing, shall any Contributor be liable to You for damages, including any +direct, indirect, special, incidental, or consequential damages of any +character arising as a result of this License or out of the use or inability to +use the Work (including but not limited to damages for loss of goodwill, work +stoppage, computer failure or malfunction, or any and all other commercial +damages or losses), even if such Contributor has been advised of the +possibility of such damages. + +9. Accepting Warranty or Additional Liability. While redistributing the Work or +Derivative Works thereof, You may choose to offer, and charge a fee for, +acceptance of support, warranty, indemnity, or other liability obligations +and/or rights consistent with this License. However, in accepting such +obligations, You may act only on Your own behalf and on Your sole +responsibility, not on behalf of any other Contributor, and only if You agree +to indemnify, defend, and hold each Contributor harmless for any liability +incurred by, or claims asserted against, such Contributor by reason of your +accepting any such warranty or additional liability. + +END OF TERMS AND CONDITIONS diff --git a/vendor/pulp-platform/fpnew/src/fpu_div_sqrt_mvp/hdl/.gitignore b/vendor/pulp-platform/fpnew/src/fpu_div_sqrt_mvp/hdl/.gitignore new file mode 100644 index 0000000000..5c405f7b58 --- /dev/null +++ b/vendor/pulp-platform/fpnew/src/fpu_div_sqrt_mvp/hdl/.gitignore @@ -0,0 +1,2 @@ +*~ +*/*~ \ No newline at end of file diff --git a/vendor/pulp-platform/fpnew/src/fpu_div_sqrt_mvp/hdl/control_mvp.sv b/vendor/pulp-platform/fpnew/src/fpu_div_sqrt_mvp/hdl/control_mvp.sv new file mode 100644 index 0000000000..bda9c01fb7 --- /dev/null +++ b/vendor/pulp-platform/fpnew/src/fpu_div_sqrt_mvp/hdl/control_mvp.sv @@ -0,0 +1,3413 @@ +// Copyright 2018 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the “License”); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +//////////////////////////////////////////////////////////////////////////////// +// Company: IIS @ ETHZ - Federal Institute of Technology // +// // +// Engineers: Lei Li lile@iis.ee.ethz.ch // +// // +// Additional contributions by: // +// // +// // +// // +// Create Date: 04/03/2018 // +// Design Name: FPU // +// Module Name: control_mvp.sv // +// Project Name: Private FPU // +// Language: SystemVerilog // +// // +// Description: the control logic of div and sqrt // +// // +// Revision Date: 12/04/2018 // +// Lei Li // +// To address some requirements by Stefan and add low power // +// control for special cases // +// Revision Date: 13/04/2018 // +// Lei Li // +// To fix some bug found in Control FSM // +// when Iteration_unit_num_S = 2'b10 // +// // +// // +// // +//////////////////////////////////////////////////////////////////////////////// + +import defs_div_sqrt_mvp::*; + +module control_mvp + + (//Input + input logic Clk_CI, + input logic Rst_RBI, + input logic Div_start_SI , + input logic Sqrt_start_SI, + input logic Start_SI, + input logic Kill_SI, + input logic Special_case_SBI, + input logic Special_case_dly_SBI, + input logic [C_PC-1:0] Precision_ctl_SI, + input logic [1:0] Format_sel_SI, + input logic [C_MANT_FP64:0] Numerator_DI, + input logic [C_EXP_FP64:0] Exp_num_DI, + input logic [C_MANT_FP64:0] Denominator_DI, + input logic [C_EXP_FP64:0] Exp_den_DI, + + + output logic Div_start_dly_SO , + output logic Sqrt_start_dly_SO, + output logic Div_enable_SO, + output logic Sqrt_enable_SO, + + + //To next stage + output logic Full_precision_SO, + output logic FP32_SO, + output logic FP64_SO, + output logic FP16_SO, + output logic FP16ALT_SO, + + output logic Ready_SO, + output logic Done_SO, + + output logic [C_MANT_FP64+4:0] Mant_result_prenorm_DO, + // output logic [3:0] Round_bit_DO, + output logic [C_EXP_FP64+1:0] Exp_result_prenorm_DO + ); + + logic [C_MANT_FP64+1+4:0] Partial_remainder_DN,Partial_remainder_DP; //58bits,r=q+2 + logic [C_MANT_FP64+4:0] Quotient_DP; //57bits + ///////////////////////////////////////////////////////////////////////////// + // Assign Inputs // + ///////////////////////////////////////////////////////////////////////////// + logic [C_MANT_FP64+1:0] Numerator_se_D; //sign extension and hidden bit + logic [C_MANT_FP64+1:0] Denominator_se_D; //signa extension and hidden bit + logic [C_MANT_FP64+1:0] Denominator_se_DB; //1's complement + + assign Numerator_se_D={1'b0,Numerator_DI}; + + assign Denominator_se_D={1'b0,Denominator_DI}; + + always_comb + begin + if(FP32_SO) + begin + Denominator_se_DB={~Denominator_se_D[C_MANT_FP64+1:C_MANT_FP64-C_MANT_FP32], {(C_MANT_FP64-C_MANT_FP32){1'b0}} }; + end + else if(FP64_SO) begin + Denominator_se_DB=~Denominator_se_D; + end + else if(FP16_SO) begin + Denominator_se_DB={~Denominator_se_D[C_MANT_FP64+1:C_MANT_FP64-C_MANT_FP16], {(C_MANT_FP64-C_MANT_FP16){1'b0}} }; + end + else begin + Denominator_se_DB={~Denominator_se_D[C_MANT_FP64+1:C_MANT_FP64-C_MANT_FP16ALT], {(C_MANT_FP64-C_MANT_FP16ALT){1'b0}} }; + end + end + + + logic [C_MANT_FP64+1:0] Mant_D_sqrt_Norm; + + assign Mant_D_sqrt_Norm=Exp_num_DI[0]?{1'b0,Numerator_DI}:{Numerator_DI,1'b0}; //for sqrt + + ///////////////////////////////////////////////////////////////////////////// + // Format Selection // + ///////////////////////////////////////////////////////////////////////////// + logic [1:0] Format_sel_S; + + always_ff @(posedge Clk_CI, negedge Rst_RBI) + begin + if(~Rst_RBI) + begin + Format_sel_S<='b0; + end + else if(Start_SI&&Ready_SO) + begin + Format_sel_S<=Format_sel_SI; + end + else + begin + Format_sel_S<=Format_sel_S; + end + end + + assign FP32_SO = (Format_sel_S==2'b00); + assign FP64_SO = (Format_sel_S==2'b01); + assign FP16_SO = (Format_sel_S==2'b10); + assign FP16ALT_SO = (Format_sel_S==2'b11); + + + + ///////////////////////////////////////////////////////////////////////////// + // Precision Control // + ///////////////////////////////////////////////////////////////////////////// + + logic [C_PC-1:0] Precision_ctl_S; + always_ff @(posedge Clk_CI, negedge Rst_RBI) + begin + if(~Rst_RBI) + begin + Precision_ctl_S<='b0; + end + else if(Start_SI&&Ready_SO) + begin + Precision_ctl_S<=Precision_ctl_SI; + end + else + begin + Precision_ctl_S<=Precision_ctl_S; + end + end + assign Full_precision_SO = (Precision_ctl_S==6'h00); + + + + logic [5:0] State_ctl_S; + logic [5:0] State_Two_iteration_unit_S; + logic [5:0] State_Four_iteration_unit_S; + + assign State_Two_iteration_unit_S = Precision_ctl_S[C_PC-1:1]; //Two iteration units + assign State_Four_iteration_unit_S = Precision_ctl_S[C_PC-1:2]; //Four iteration units + always_comb + begin + case(Iteration_unit_num_S) +//////////////////////one iteration unit, start/////////////////////////////////////// + 2'b00: //one iteration unit + begin + case(Format_sel_S) + 2'b00: //FP32 + begin + if(Full_precision_SO) + begin + State_ctl_S = 6'h1b; //24+4 more iterations for rounding bits + end + else + begin + State_ctl_S = Precision_ctl_S; + end + end + 2'b01: //FP64 + begin + if(Full_precision_SO) + begin + State_ctl_S = 6'h38; //53+4 more iterations for rounding bits + end + else + begin + State_ctl_S = Precision_ctl_S; + end + end + 2'b10: //FP16 + begin + if(Full_precision_SO) + begin + State_ctl_S = 6'h0e; //11+4 more iterations for rounding bits + end + else + begin + State_ctl_S = Precision_ctl_S; + end + end + 2'b11: //FP16ALT + begin + if(Full_precision_SO) + begin + State_ctl_S = 6'h0b; //8+4 more iterations for rounding bits + end + else + begin + State_ctl_S = Precision_ctl_S; + end + end + endcase + end +//////////////////////one iteration unit, end/////////////////////////////////////// + +//////////////////////two iteration units, start/////////////////////////////////////// + 2'b01: //two iteration units + begin + case(Format_sel_S) + 2'b00: //FP32 + begin + if(Full_precision_SO) + begin + State_ctl_S = 6'h0d; //24+4 more iterations for rounding bits + end + else + begin + State_ctl_S = State_Two_iteration_unit_S; + end + end + 2'b01: //FP64 + begin + if(Full_precision_SO) + begin + State_ctl_S = 6'h1b; //53+3 more iterations for rounding bits + end + else + begin + State_ctl_S = State_Two_iteration_unit_S; + end + end + 2'b10: //FP16 + begin + if(Full_precision_SO) + begin + State_ctl_S = 6'h06; //11+3 more iterations for rounding bits + end + else + begin + State_ctl_S = State_Two_iteration_unit_S; + end + end + 2'b11: //FP16ALT + begin + if(Full_precision_SO) + begin + State_ctl_S = 6'h05; //8+4 more iterations for rounding bits + end + else + begin + State_ctl_S = State_Two_iteration_unit_S; + end + end + endcase + end +//////////////////////two iteration units, end/////////////////////////////////////// + +//////////////////////three iteration units, start/////////////////////////////////////// + 2'b10: //three iteration units + begin + case(Format_sel_S) + 2'b00: //FP32 + begin + case(Precision_ctl_S) + 6'h00: + begin + State_ctl_S = 6'h08; //24+3 more iterations for rounding bits + end + 6'h06,6'h07,6'h08: + begin + State_ctl_S = 6'h02; + end + 6'h09,6'h0a,6'h0b: + begin + State_ctl_S = 6'h03; + end + 6'h0c,6'h0d,6'h0e: + begin + State_ctl_S = 6'h04; + end + 6'h0f,6'h10,6'h11: + begin + State_ctl_S = 6'h05; + end + 6'h12,6'h13,6'h14: + begin + State_ctl_S = 6'h06; + end + 6'h15,6'h16,6'h17: + begin + State_ctl_S = 6'h07; + end + default: + begin + State_ctl_S = 6'h08; //24+3 more iterations for rounding bits + end + endcase + end + 2'b01: //FP64 + begin + case(Precision_ctl_S) + 6'h00: + begin + State_ctl_S = 6'h12; //53+4 more iterations for rounding bits + end + 6'h06,6'h07,6'h08: + begin + State_ctl_S = 6'h02; + end + 6'h09,6'h0a,6'h0b: + begin + State_ctl_S = 6'h03; + end + 6'h0c,6'h0d,6'h0e: + begin + State_ctl_S = 6'h04; + end + 6'h0f,6'h10,6'h11: + begin + State_ctl_S = 6'h05; + end + 6'h12,6'h13,6'h14: + begin + State_ctl_S = 6'h06; + end + 6'h15,6'h16,6'h17: + begin + State_ctl_S = 6'h07; + end + 6'h18,6'h19,6'h1a: + begin + State_ctl_S = 6'h08; + end + 6'h1b,6'h1c,6'h1d: + begin + State_ctl_S = 6'h09; + end + 6'h1e,6'h1f,6'h20: + begin + State_ctl_S = 6'h0a; + end + 6'h21,6'h22,6'h23: + begin + State_ctl_S = 6'h0b; + end + 6'h24,6'h25,6'h26: + begin + State_ctl_S = 6'h0c; + end + 6'h27,6'h28,6'h29: + begin + State_ctl_S = 6'h0d; + end + 6'h2a,6'h2b,6'h2c: + begin + State_ctl_S = 6'h0e; + end + 6'h2d,6'h2e,6'h2f: + begin + State_ctl_S = 6'h0f; + end + 6'h30,6'h31,6'h32: + begin + State_ctl_S = 6'h10; + end + 6'h33,6'h34,6'h35: + begin + State_ctl_S = 6'h11; + end + default: + begin + State_ctl_S = 6'h12; //53+4 more iterations for rounding bits + end + endcase + end + 2'b10: //FP16 + begin + case(Precision_ctl_S) + 6'h00: + begin + State_ctl_S = 6'h04; //12+3 more iterations for rounding bits + end + 6'h06,6'h07,6'h08: + begin + State_ctl_S = 6'h02; + end + 6'h09,6'h0a,6'h0b: + begin + State_ctl_S = 6'h03; + end + default: + begin + State_ctl_S = 6'h04; //12+3 more iterations for rounding bits + end + endcase + end + 2'b11: //FP16ALT + begin + case(Precision_ctl_S) + 6'h00: + begin + State_ctl_S = 6'h03; //8+4 more iterations for rounding bits + end + 6'h06,6'h07,6'h08: + begin + State_ctl_S = 6'h02; + end + default: + begin + State_ctl_S = 6'h03; //8+4 more iterations for rounding bits + end + endcase + end + endcase + end +//////////////////////three iteration units, end/////////////////////////////////////// + +//////////////////////four iteration units, start/////////////////////////////////////// + 2'b11: //four iteration units + begin + case(Format_sel_S) + 2'b00: //FP32 + begin + if(Full_precision_SO) + begin + State_ctl_S = 6'h06; //24+4 more iterations for rounding bits + end + else + begin + State_ctl_S = State_Four_iteration_unit_S; + end + end + 2'b01: //FP64 + begin + if(Full_precision_SO) + begin + State_ctl_S = 6'h0d; //53+3 more iterations for rounding bits + end + else + begin + State_ctl_S = State_Four_iteration_unit_S; + end + end + 2'b10: //FP16 + begin + if(Full_precision_SO) + begin + State_ctl_S = 6'h03; //11+4 more iterations for rounding bits + end + else + begin + State_ctl_S = State_Four_iteration_unit_S; + end + end + 2'b11: //FP16ALT + begin + if(Full_precision_SO) + begin + State_ctl_S = 6'h02; //8+4 more iterations for rounding bits + end + else + begin + State_ctl_S = State_Four_iteration_unit_S; + end + end + endcase + end +//////////////////////four iteration units, end/////////////////////////////////////// + + endcase + end + + + ///////////////////////////////////////////////////////////////////////////// + // control logic // + ///////////////////////////////////////////////////////////////////////////// + + logic Div_start_dly_S; + + always_ff @(posedge Clk_CI, negedge Rst_RBI) // generate Div_start_dly_S signal + begin + if(~Rst_RBI) + begin + Div_start_dly_S<=1'b0; + end + else if(Div_start_SI&&Ready_SO) + begin + Div_start_dly_S<=1'b1; + end + else + begin + Div_start_dly_S<=1'b0; + end + end + + assign Div_start_dly_SO=Div_start_dly_S; + + always_ff @(posedge Clk_CI, negedge Rst_RBI) begin // generate Div_enable_SO signal + if(~Rst_RBI) + Div_enable_SO<=1'b0; + // Synchronous reset with Flush + else if (Kill_SI) + Div_enable_SO <= 1'b0; + else if(Div_start_SI&&Ready_SO) + Div_enable_SO<=1'b1; + else if(Done_SO) + Div_enable_SO<=1'b0; + else + Div_enable_SO<=Div_enable_SO; + end + + logic Sqrt_start_dly_S; + + always_ff @(posedge Clk_CI, negedge Rst_RBI) // generate Sqrt_start_dly_SI signal + begin + if(~Rst_RBI) + begin + Sqrt_start_dly_S<=1'b0; + end + else if(Sqrt_start_SI&&Ready_SO) + begin + Sqrt_start_dly_S<=1'b1; + end + else + begin + Sqrt_start_dly_S<=1'b0; + end + end + assign Sqrt_start_dly_SO=Sqrt_start_dly_S; + + always_ff @(posedge Clk_CI, negedge Rst_RBI) begin // generate Sqrt_enable_SO signal + if(~Rst_RBI) + Sqrt_enable_SO<=1'b0; + else if (Kill_SI) + Sqrt_enable_SO <= 1'b0; + else if(Sqrt_start_SI&&Ready_SO) + Sqrt_enable_SO<=1'b1; + else if(Done_SO) + Sqrt_enable_SO<=1'b0; + else + Sqrt_enable_SO<=Sqrt_enable_SO; + end + + logic [5:0] Crtl_cnt_S; + logic Start_dly_S; + + assign Start_dly_S=Div_start_dly_S |Sqrt_start_dly_S; + + logic Fsm_enable_S; + assign Fsm_enable_S=( (Start_dly_S | (| Crtl_cnt_S)) && (~Kill_SI) && Special_case_dly_SBI); + + logic Final_state_S; + assign Final_state_S= (Crtl_cnt_S==State_ctl_S); + + + always_ff @(posedge Clk_CI, negedge Rst_RBI) //control_FSM + begin + if (~Rst_RBI) + begin + Crtl_cnt_S <= '0; + end + else if (Final_state_S | Kill_SI) + begin + Crtl_cnt_S <= '0; + end + else if(Fsm_enable_S) // one cycle Start_SI + begin + Crtl_cnt_S <= Crtl_cnt_S+1; + end + else + begin + Crtl_cnt_S <= '0; + end + end // always_ff + + + + always_ff @(posedge Clk_CI, negedge Rst_RBI) //Generate Done_SO, they can share this Done_SO. + begin + if(~Rst_RBI) + begin + Done_SO<=1'b0; + end + else if(Start_SI&&Ready_SO) + begin + if(~Special_case_SBI) + begin + Done_SO<=1'b1; + end + else + begin + Done_SO<=1'b0; + end + end + else if(Final_state_S) + begin + Done_SO<=1'b1; + end + else + begin + Done_SO<=1'b0; + end + end + + + + + always_ff @(posedge Clk_CI, negedge Rst_RBI) //Generate Ready_SO + begin + if(~Rst_RBI) + begin + Ready_SO<=1'b1; + end + + else if(Start_SI&&Ready_SO) + begin + if(~Special_case_SBI) + begin + Ready_SO<=1'b1; + end + else + begin + Ready_SO<=1'b0; + end + end + else if(Final_state_S | Kill_SI) + begin + Ready_SO<=1'b1; + end + else + begin + Ready_SO<=Ready_SO; + end + end + + + ///////////////////////////////////////////////////////////////////////////// + // Declarations for square root when Iteration_unit_num_S = 2'b00, start // + //////////////////////////////////////////////////////////////////////////// + + logic Qcnt_one_0; + logic Qcnt_one_1; + logic [1:0] Qcnt_one_2; + logic [2:0] Qcnt_one_3; + logic [3:0] Qcnt_one_4; + logic [4:0] Qcnt_one_5; + logic [5:0] Qcnt_one_6; + logic [6:0] Qcnt_one_7; + logic [7:0] Qcnt_one_8; + logic [8:0] Qcnt_one_9; + logic [9:0] Qcnt_one_10; + logic [10:0] Qcnt_one_11; + logic [11:0] Qcnt_one_12; + logic [12:0] Qcnt_one_13; + logic [13:0] Qcnt_one_14; + logic [14:0] Qcnt_one_15; + logic [15:0] Qcnt_one_16; + logic [16:0] Qcnt_one_17; + logic [17:0] Qcnt_one_18; + logic [18:0] Qcnt_one_19; + logic [19:0] Qcnt_one_20; + logic [20:0] Qcnt_one_21; + logic [21:0] Qcnt_one_22; + logic [22:0] Qcnt_one_23; + logic [23:0] Qcnt_one_24; + logic [24:0] Qcnt_one_25; + logic [25:0] Qcnt_one_26; + logic [26:0] Qcnt_one_27; + logic [27:0] Qcnt_one_28; + logic [28:0] Qcnt_one_29; + logic [29:0] Qcnt_one_30; + logic [30:0] Qcnt_one_31; + logic [31:0] Qcnt_one_32; + logic [32:0] Qcnt_one_33; + logic [33:0] Qcnt_one_34; + logic [34:0] Qcnt_one_35; + logic [35:0] Qcnt_one_36; + logic [36:0] Qcnt_one_37; + logic [37:0] Qcnt_one_38; + logic [38:0] Qcnt_one_39; + logic [39:0] Qcnt_one_40; + logic [40:0] Qcnt_one_41; + logic [41:0] Qcnt_one_42; + logic [42:0] Qcnt_one_43; + logic [43:0] Qcnt_one_44; + logic [44:0] Qcnt_one_45; + logic [45:0] Qcnt_one_46; + logic [46:0] Qcnt_one_47; + logic [47:0] Qcnt_one_48; + logic [48:0] Qcnt_one_49; + logic [49:0] Qcnt_one_50; + logic [50:0] Qcnt_one_51; + logic [51:0] Qcnt_one_52; + logic [52:0] Qcnt_one_53; + logic [53:0] Qcnt_one_54; + logic [54:0] Qcnt_one_55; + logic [55:0] Qcnt_one_56; + logic [56:0] Qcnt_one_57; + logic [57:0] Qcnt_one_58; + logic [58:0] Qcnt_one_59; + logic [59:0] Qcnt_one_60; + + ///////////////////////////////////////////////////////////////////////////// + // Declarations for square root when Iteration_unit_num_S = 2'b00, end // + //////////////////////////////////////////////////////////////////////////// + + + + ///////////////////////////////////////////////////////////////////////////// + // Declarations for square root when Iteration_unit_num_S = 2'b01, start // + //////////////////////////////////////////////////////////////////////////// + logic [1:0] Qcnt_two_0; + logic [2:0] Qcnt_two_1; + logic [4:0] Qcnt_two_2; + logic [6:0] Qcnt_two_3; + logic [8:0] Qcnt_two_4; + logic [10:0] Qcnt_two_5; + logic [12:0] Qcnt_two_6; + logic [14:0] Qcnt_two_7; + logic [16:0] Qcnt_two_8; + logic [18:0] Qcnt_two_9; + logic [20:0] Qcnt_two_10; + logic [22:0] Qcnt_two_11; + logic [24:0] Qcnt_two_12; + logic [26:0] Qcnt_two_13; + logic [28:0] Qcnt_two_14; + logic [30:0] Qcnt_two_15; + logic [32:0] Qcnt_two_16; + logic [34:0] Qcnt_two_17; + logic [36:0] Qcnt_two_18; + logic [38:0] Qcnt_two_19; + logic [40:0] Qcnt_two_20; + logic [42:0] Qcnt_two_21; + logic [44:0] Qcnt_two_22; + logic [46:0] Qcnt_two_23; + logic [48:0] Qcnt_two_24; + logic [50:0] Qcnt_two_25; + logic [52:0] Qcnt_two_26; + logic [54:0] Qcnt_two_27; + logic [56:0] Qcnt_two_28; + ///////////////////////////////////////////////////////////////////////////// + // Declarations for square root when Iteration_unit_num_S = 2'b01, end // + //////////////////////////////////////////////////////////////////////////// + + + ///////////////////////////////////////////////////////////////////////////// + // Declarations for square root when Iteration_unit_num_S = 2'b10, start // + //////////////////////////////////////////////////////////////////////////// + logic [2:0] Qcnt_three_0; + logic [4:0] Qcnt_three_1; + logic [7:0] Qcnt_three_2; + logic [10:0] Qcnt_three_3; + logic [13:0] Qcnt_three_4; + logic [16:0] Qcnt_three_5; + logic [19:0] Qcnt_three_6; + logic [22:0] Qcnt_three_7; + logic [25:0] Qcnt_three_8; + logic [28:0] Qcnt_three_9; + logic [31:0] Qcnt_three_10; + logic [34:0] Qcnt_three_11; + logic [37:0] Qcnt_three_12; + logic [40:0] Qcnt_three_13; + logic [43:0] Qcnt_three_14; + logic [46:0] Qcnt_three_15; + logic [49:0] Qcnt_three_16; + logic [52:0] Qcnt_three_17; + logic [55:0] Qcnt_three_18; + logic [58:0] Qcnt_three_19; + logic [61:0] Qcnt_three_20; + ///////////////////////////////////////////////////////////////////////////// + // Declarations for square root when Iteration_unit_num_S = 2'b10, end // + //////////////////////////////////////////////////////////////////////////// + + + ///////////////////////////////////////////////////////////////////////////// + // Declarations for square root when Iteration_unit_num_S = 2'b11, start // + //////////////////////////////////////////////////////////////////////////// + logic [3:0] Qcnt_four_0; + logic [6:0] Qcnt_four_1; + logic [10:0] Qcnt_four_2; + logic [14:0] Qcnt_four_3; + logic [18:0] Qcnt_four_4; + logic [22:0] Qcnt_four_5; + logic [26:0] Qcnt_four_6; + logic [30:0] Qcnt_four_7; + logic [34:0] Qcnt_four_8; + logic [38:0] Qcnt_four_9; + logic [42:0] Qcnt_four_10; + logic [46:0] Qcnt_four_11; + logic [50:0] Qcnt_four_12; + logic [54:0] Qcnt_four_13; + logic [58:0] Qcnt_four_14; + + ///////////////////////////////////////////////////////////////////////////// + // Declarations for square root when Iteration_unit_num_S = 2'b11, end // + //////////////////////////////////////////////////////////////////////////// + + + + logic [C_MANT_FP64+1+4:0] Sqrt_R0,Sqrt_Q0,Q_sqrt0,Q_sqrt_com_0; + logic [C_MANT_FP64+1+4:0] Sqrt_R1,Sqrt_Q1,Q_sqrt1,Q_sqrt_com_1; + logic [C_MANT_FP64+1+4:0] Sqrt_R2,Sqrt_Q2,Q_sqrt2,Q_sqrt_com_2; + logic [C_MANT_FP64+1+4:0] Sqrt_R3,Sqrt_Q3,Q_sqrt3,Q_sqrt_com_3,Sqrt_R4; //Sqrt_Q4; + + + logic [1:0] Sqrt_DI [3:0]; + logic [1:0] Sqrt_DO [3:0]; + logic Sqrt_carry_DO; + + + logic [C_MANT_FP64+1+4:0] Iteration_cell_a_D [3:0]; + logic [C_MANT_FP64+1+4:0] Iteration_cell_b_D [3:0]; + logic [C_MANT_FP64+1+4:0] Iteration_cell_a_BMASK_D [3:0]; + logic [C_MANT_FP64+1+4:0] Iteration_cell_b_BMASK_D [3:0]; + logic Iteration_cell_carry_D [3:0]; + logic [C_MANT_FP64+1+4:0] Iteration_cell_sum_D [3:0]; + logic [C_MANT_FP64+1+4:0] Iteration_cell_sum_AMASK_D [3:0]; + + + logic [3:0] Sqrt_quotinent_S; + + + always_comb + begin // + case (Format_sel_S) + 2'b00: + begin + Sqrt_quotinent_S = {(~Iteration_cell_sum_AMASK_D[0][C_MANT_FP32+5]),(~Iteration_cell_sum_AMASK_D[1][C_MANT_FP32+5]),(~Iteration_cell_sum_AMASK_D[2][C_MANT_FP32+5]),(~Iteration_cell_sum_AMASK_D[3][C_MANT_FP32+5])}; + Q_sqrt_com_0 ={ {(C_MANT_FP64-C_MANT_FP32){1'b0}},~Q_sqrt0[C_MANT_FP32+5:0] }; + Q_sqrt_com_1 ={ {(C_MANT_FP64-C_MANT_FP32){1'b0}},~Q_sqrt1[C_MANT_FP32+5:0] }; + Q_sqrt_com_2 ={ {(C_MANT_FP64-C_MANT_FP32){1'b0}},~Q_sqrt2[C_MANT_FP32+5:0] }; + Q_sqrt_com_3 ={ {(C_MANT_FP64-C_MANT_FP32){1'b0}},~Q_sqrt3[C_MANT_FP32+5:0] }; + end + 2'b01: + begin + Sqrt_quotinent_S = {Iteration_cell_carry_D[0],Iteration_cell_carry_D[1],Iteration_cell_carry_D[2],Iteration_cell_carry_D[3]}; + Q_sqrt_com_0=~Q_sqrt0; + Q_sqrt_com_1=~Q_sqrt1; + Q_sqrt_com_2=~Q_sqrt2; + Q_sqrt_com_3=~Q_sqrt3; + end + 2'b10: + begin + Sqrt_quotinent_S = {(~Iteration_cell_sum_AMASK_D[0][C_MANT_FP16+5]),(~Iteration_cell_sum_AMASK_D[1][C_MANT_FP16+5]),(~Iteration_cell_sum_AMASK_D[2][C_MANT_FP16+5]),(~Iteration_cell_sum_AMASK_D[3][C_MANT_FP16+5])}; + Q_sqrt_com_0 ={ {(C_MANT_FP64-C_MANT_FP16){1'b0}},~Q_sqrt0[C_MANT_FP16+5:0] }; + Q_sqrt_com_1 ={ {(C_MANT_FP64-C_MANT_FP16){1'b0}},~Q_sqrt1[C_MANT_FP16+5:0] }; + Q_sqrt_com_2 ={ {(C_MANT_FP64-C_MANT_FP16){1'b0}},~Q_sqrt2[C_MANT_FP16+5:0] }; + Q_sqrt_com_3 ={ {(C_MANT_FP64-C_MANT_FP16){1'b0}},~Q_sqrt3[C_MANT_FP16+5:0] }; + end + 2'b11: + begin + Sqrt_quotinent_S = {(~Iteration_cell_sum_AMASK_D[0][C_MANT_FP16ALT+5]),(~Iteration_cell_sum_AMASK_D[1][C_MANT_FP16ALT+5]),(~Iteration_cell_sum_AMASK_D[2][C_MANT_FP16ALT+5]),(~Iteration_cell_sum_AMASK_D[3][C_MANT_FP16ALT+5])}; + Q_sqrt_com_0 ={ {(C_MANT_FP64-C_MANT_FP16ALT){1'b0}},~Q_sqrt0[C_MANT_FP16ALT+5:0] }; + Q_sqrt_com_1 ={ {(C_MANT_FP64-C_MANT_FP16ALT){1'b0}},~Q_sqrt1[C_MANT_FP16ALT+5:0] }; + Q_sqrt_com_2 ={ {(C_MANT_FP64-C_MANT_FP16ALT){1'b0}},~Q_sqrt2[C_MANT_FP16ALT+5:0] }; + Q_sqrt_com_3 ={ {(C_MANT_FP64-C_MANT_FP16ALT){1'b0}},~Q_sqrt3[C_MANT_FP16ALT+5:0] }; + end + endcase + end + + + + assign Qcnt_one_0= {1'b0}; //qk for each feedback + assign Qcnt_one_1= {Quotient_DP[0]}; + assign Qcnt_one_2= {Quotient_DP[1:0]}; + assign Qcnt_one_3= {Quotient_DP[2:0]}; + assign Qcnt_one_4= {Quotient_DP[3:0]}; + assign Qcnt_one_5= {Quotient_DP[4:0]}; + assign Qcnt_one_6= {Quotient_DP[5:0]}; + assign Qcnt_one_7= {Quotient_DP[6:0]}; + assign Qcnt_one_8= {Quotient_DP[7:0]}; + assign Qcnt_one_9= {Quotient_DP[8:0]}; + assign Qcnt_one_10= {Quotient_DP[9:0]}; + assign Qcnt_one_11= {Quotient_DP[10:0]}; + assign Qcnt_one_12= {Quotient_DP[11:0]}; + assign Qcnt_one_13= {Quotient_DP[12:0]}; + assign Qcnt_one_14= {Quotient_DP[13:0]}; + assign Qcnt_one_15= {Quotient_DP[14:0]}; + assign Qcnt_one_16= {Quotient_DP[15:0]}; + assign Qcnt_one_17= {Quotient_DP[16:0]}; + assign Qcnt_one_18= {Quotient_DP[17:0]}; + assign Qcnt_one_19= {Quotient_DP[18:0]}; + assign Qcnt_one_20= {Quotient_DP[19:0]}; + assign Qcnt_one_21= {Quotient_DP[20:0]}; + assign Qcnt_one_22= {Quotient_DP[21:0]}; + assign Qcnt_one_23= {Quotient_DP[22:0]}; + assign Qcnt_one_24= {Quotient_DP[23:0]}; + assign Qcnt_one_25= {Quotient_DP[24:0]}; + assign Qcnt_one_26= {Quotient_DP[25:0]}; + assign Qcnt_one_27= {Quotient_DP[26:0]}; + assign Qcnt_one_28= {Quotient_DP[27:0]}; + assign Qcnt_one_29= {Quotient_DP[28:0]}; + assign Qcnt_one_30= {Quotient_DP[29:0]}; + assign Qcnt_one_31= {Quotient_DP[30:0]}; + assign Qcnt_one_32= {Quotient_DP[31:0]}; + assign Qcnt_one_33= {Quotient_DP[32:0]}; + assign Qcnt_one_34= {Quotient_DP[33:0]}; + assign Qcnt_one_35= {Quotient_DP[34:0]}; + assign Qcnt_one_36= {Quotient_DP[35:0]}; + assign Qcnt_one_37= {Quotient_DP[36:0]}; + assign Qcnt_one_38= {Quotient_DP[37:0]}; + assign Qcnt_one_39= {Quotient_DP[38:0]}; + assign Qcnt_one_40= {Quotient_DP[39:0]}; + assign Qcnt_one_41= {Quotient_DP[40:0]}; + assign Qcnt_one_42= {Quotient_DP[41:0]}; + assign Qcnt_one_43= {Quotient_DP[42:0]}; + assign Qcnt_one_44= {Quotient_DP[43:0]}; + assign Qcnt_one_45= {Quotient_DP[44:0]}; + assign Qcnt_one_46= {Quotient_DP[45:0]}; + assign Qcnt_one_47= {Quotient_DP[46:0]}; + assign Qcnt_one_48= {Quotient_DP[47:0]}; + assign Qcnt_one_49= {Quotient_DP[48:0]}; + assign Qcnt_one_50= {Quotient_DP[49:0]}; + assign Qcnt_one_51= {Quotient_DP[50:0]}; + assign Qcnt_one_52= {Quotient_DP[51:0]}; + assign Qcnt_one_53= {Quotient_DP[52:0]}; + assign Qcnt_one_54= {Quotient_DP[53:0]}; + assign Qcnt_one_55= {Quotient_DP[54:0]}; + assign Qcnt_one_56= {Quotient_DP[55:0]}; + assign Qcnt_one_57= {Quotient_DP[56:0]}; + + + assign Qcnt_two_0 = {1'b0, Sqrt_quotinent_S[3]}; //qk for each feedback + assign Qcnt_two_1 = {Quotient_DP[1:0],Sqrt_quotinent_S[3]}; + assign Qcnt_two_2 = {Quotient_DP[3:0],Sqrt_quotinent_S[3]}; + assign Qcnt_two_3 = {Quotient_DP[5:0],Sqrt_quotinent_S[3]}; + assign Qcnt_two_4 = {Quotient_DP[7:0],Sqrt_quotinent_S[3]}; + assign Qcnt_two_5 = {Quotient_DP[9:0],Sqrt_quotinent_S[3]}; + assign Qcnt_two_6 = {Quotient_DP[11:0],Sqrt_quotinent_S[3]}; + assign Qcnt_two_7 = {Quotient_DP[13:0],Sqrt_quotinent_S[3]}; + assign Qcnt_two_8 = {Quotient_DP[15:0],Sqrt_quotinent_S[3]}; + assign Qcnt_two_9 = {Quotient_DP[17:0],Sqrt_quotinent_S[3]}; + assign Qcnt_two_10 = {Quotient_DP[19:0],Sqrt_quotinent_S[3]}; + assign Qcnt_two_11 = {Quotient_DP[21:0],Sqrt_quotinent_S[3]}; + assign Qcnt_two_12 = {Quotient_DP[23:0],Sqrt_quotinent_S[3]}; + assign Qcnt_two_13 = {Quotient_DP[25:0],Sqrt_quotinent_S[3]}; + assign Qcnt_two_14 = {Quotient_DP[27:0],Sqrt_quotinent_S[3]}; + assign Qcnt_two_15 = {Quotient_DP[29:0],Sqrt_quotinent_S[3]}; + assign Qcnt_two_16 = {Quotient_DP[31:0],Sqrt_quotinent_S[3]}; + assign Qcnt_two_17 = {Quotient_DP[33:0],Sqrt_quotinent_S[3]}; + assign Qcnt_two_18 = {Quotient_DP[35:0],Sqrt_quotinent_S[3]}; + assign Qcnt_two_19 = {Quotient_DP[37:0],Sqrt_quotinent_S[3]}; + assign Qcnt_two_20 = {Quotient_DP[39:0],Sqrt_quotinent_S[3]}; + assign Qcnt_two_21 = {Quotient_DP[41:0],Sqrt_quotinent_S[3]}; + assign Qcnt_two_22 = {Quotient_DP[43:0],Sqrt_quotinent_S[3]}; + assign Qcnt_two_23 = {Quotient_DP[45:0],Sqrt_quotinent_S[3]}; + assign Qcnt_two_24 = {Quotient_DP[47:0],Sqrt_quotinent_S[3]}; + assign Qcnt_two_25 = {Quotient_DP[49:0],Sqrt_quotinent_S[3]}; + assign Qcnt_two_26 = {Quotient_DP[51:0],Sqrt_quotinent_S[3]}; + assign Qcnt_two_27 = {Quotient_DP[53:0],Sqrt_quotinent_S[3]}; + assign Qcnt_two_28 = {Quotient_DP[55:0],Sqrt_quotinent_S[3]}; + + + assign Qcnt_three_0 = {1'b0, Sqrt_quotinent_S[3],Sqrt_quotinent_S[2]}; //qk for each feedback + assign Qcnt_three_1 = {Quotient_DP[2:0],Sqrt_quotinent_S[3],Sqrt_quotinent_S[2]}; + assign Qcnt_three_2 = {Quotient_DP[5:0],Sqrt_quotinent_S[3],Sqrt_quotinent_S[2]}; + assign Qcnt_three_3 = {Quotient_DP[8:0],Sqrt_quotinent_S[3],Sqrt_quotinent_S[2]}; + assign Qcnt_three_4 = {Quotient_DP[11:0],Sqrt_quotinent_S[3],Sqrt_quotinent_S[2]}; + assign Qcnt_three_5 = {Quotient_DP[14:0],Sqrt_quotinent_S[3],Sqrt_quotinent_S[2]}; + assign Qcnt_three_6 = {Quotient_DP[17:0],Sqrt_quotinent_S[3],Sqrt_quotinent_S[2]}; + assign Qcnt_three_7 = {Quotient_DP[20:0],Sqrt_quotinent_S[3],Sqrt_quotinent_S[2]}; + assign Qcnt_three_8 = {Quotient_DP[23:0],Sqrt_quotinent_S[3],Sqrt_quotinent_S[2]}; + assign Qcnt_three_9 = {Quotient_DP[26:0],Sqrt_quotinent_S[3],Sqrt_quotinent_S[2]}; + assign Qcnt_three_10 = {Quotient_DP[29:0],Sqrt_quotinent_S[3],Sqrt_quotinent_S[2]}; + assign Qcnt_three_11 = {Quotient_DP[32:0],Sqrt_quotinent_S[3],Sqrt_quotinent_S[2]}; + assign Qcnt_three_12 = {Quotient_DP[35:0],Sqrt_quotinent_S[3],Sqrt_quotinent_S[2]}; + assign Qcnt_three_13 = {Quotient_DP[38:0],Sqrt_quotinent_S[3],Sqrt_quotinent_S[2]}; + assign Qcnt_three_14 = {Quotient_DP[41:0],Sqrt_quotinent_S[3],Sqrt_quotinent_S[2]}; + assign Qcnt_three_15 = {Quotient_DP[44:0],Sqrt_quotinent_S[3],Sqrt_quotinent_S[2]}; + assign Qcnt_three_16 = {Quotient_DP[47:0],Sqrt_quotinent_S[3],Sqrt_quotinent_S[2]}; + assign Qcnt_three_17 = {Quotient_DP[50:0],Sqrt_quotinent_S[3],Sqrt_quotinent_S[2]}; + assign Qcnt_three_18 = {Quotient_DP[53:0],Sqrt_quotinent_S[3],Sqrt_quotinent_S[2]}; + assign Qcnt_three_19 = {Quotient_DP[56:0],Sqrt_quotinent_S[3],Sqrt_quotinent_S[2]}; + + + assign Qcnt_four_0 = {1'b0, Sqrt_quotinent_S[3],Sqrt_quotinent_S[2],Sqrt_quotinent_S[1]}; + assign Qcnt_four_1 = {Quotient_DP[3:0],Sqrt_quotinent_S[3],Sqrt_quotinent_S[2],Sqrt_quotinent_S[1]}; + assign Qcnt_four_2 = {Quotient_DP[7:0],Sqrt_quotinent_S[3],Sqrt_quotinent_S[2],Sqrt_quotinent_S[1]}; + assign Qcnt_four_3 = {Quotient_DP[11:0],Sqrt_quotinent_S[3],Sqrt_quotinent_S[2],Sqrt_quotinent_S[1]}; + assign Qcnt_four_4 = {Quotient_DP[15:0],Sqrt_quotinent_S[3],Sqrt_quotinent_S[2],Sqrt_quotinent_S[1]}; + assign Qcnt_four_5 = {Quotient_DP[19:0],Sqrt_quotinent_S[3],Sqrt_quotinent_S[2],Sqrt_quotinent_S[1]}; + assign Qcnt_four_6 = {Quotient_DP[23:0],Sqrt_quotinent_S[3],Sqrt_quotinent_S[2],Sqrt_quotinent_S[1]}; + assign Qcnt_four_7 = {Quotient_DP[27:0],Sqrt_quotinent_S[3],Sqrt_quotinent_S[2],Sqrt_quotinent_S[1]}; + assign Qcnt_four_8 = {Quotient_DP[31:0],Sqrt_quotinent_S[3],Sqrt_quotinent_S[2],Sqrt_quotinent_S[1]}; + assign Qcnt_four_9 = {Quotient_DP[35:0],Sqrt_quotinent_S[3],Sqrt_quotinent_S[2],Sqrt_quotinent_S[1]}; + assign Qcnt_four_10 = {Quotient_DP[39:0],Sqrt_quotinent_S[3],Sqrt_quotinent_S[2],Sqrt_quotinent_S[1]}; + assign Qcnt_four_11 = {Quotient_DP[43:0],Sqrt_quotinent_S[3],Sqrt_quotinent_S[2],Sqrt_quotinent_S[1]}; + assign Qcnt_four_12 = {Quotient_DP[47:0],Sqrt_quotinent_S[3],Sqrt_quotinent_S[2],Sqrt_quotinent_S[1]}; + assign Qcnt_four_13 = {Quotient_DP[51:0],Sqrt_quotinent_S[3],Sqrt_quotinent_S[2],Sqrt_quotinent_S[1]}; + assign Qcnt_four_14 = {Quotient_DP[55:0],Sqrt_quotinent_S[3],Sqrt_quotinent_S[2],Sqrt_quotinent_S[1]}; + + + + + always_comb begin // the intermediate operands for sqrt + + case(Iteration_unit_num_S) + 2'b00: + begin + + ///////////////////////////////////////////////////////////////////////////// + // Operands for square root when Iteration_unit_num_S = 2'b00, start // + ///////////////////////////////////////////////////////////////////////////// + + + + + case(Crtl_cnt_S) + + 6'b000000: + begin + Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64+1:C_MANT_FP64]; + Q_sqrt0={{(C_MANT_FP64+5){1'b0}},Qcnt_one_0}; + Sqrt_Q0=Q_sqrt_com_0; + end + 6'b000001: + begin + Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-1:C_MANT_FP64-2]; + Q_sqrt0={{(C_MANT_FP64+5){1'b0}},Qcnt_one_1}; + Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0; + end + 6'b000010: + begin + Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-3:C_MANT_FP64-4]; + Q_sqrt0={{(C_MANT_FP64+4){1'b0}},Qcnt_one_2}; + Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0; + end + 6'b000011: + begin + Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-5:C_MANT_FP64-6]; + Q_sqrt0={{(C_MANT_FP64+3){1'b0}},Qcnt_one_3}; + Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0; + end + 6'b000100: + begin + Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-7:C_MANT_FP64-8]; + Q_sqrt0={{(C_MANT_FP64+2){1'b0}},Qcnt_one_4}; + Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0; + end + 6'b000101: + begin + Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-9:C_MANT_FP64-10]; + Q_sqrt0={{(C_MANT_FP64+1){1'b0}},Qcnt_one_5}; + Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0; + end + 6'b000110: + begin + Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-11:C_MANT_FP64-12]; + Q_sqrt0={{(C_MANT_FP64){1'b0}},Qcnt_one_6}; + Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0; + end + 6'b000111: + begin + Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-13:C_MANT_FP64-14]; + Q_sqrt0={{(C_MANT_FP64-1){1'b0}},Qcnt_one_7}; + Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0; + end + 6'b001000: + begin + Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-15:C_MANT_FP64-16]; + Q_sqrt0={{(C_MANT_FP64-2){1'b0}},Qcnt_one_8}; + Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0; + end + 6'b001001: + begin + Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-17:C_MANT_FP64-18]; + Q_sqrt0={{(C_MANT_FP64-3){1'b0}},Qcnt_one_9}; + Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0; + end + 6'b001010: + begin + Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-19:C_MANT_FP64-20]; + Q_sqrt0={{(C_MANT_FP64-4){1'b0}},Qcnt_one_10}; + Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0; + end + 6'b001011: + begin + Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-21:C_MANT_FP64-22]; + Q_sqrt0={{(C_MANT_FP64-5){1'b0}},Qcnt_one_11}; + Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0; + end + 6'b001100: + begin + Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-23:C_MANT_FP64-24]; + Q_sqrt0={{(C_MANT_FP64-6){1'b0}},Qcnt_one_12}; + Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0; + end + 6'b001101: + begin + Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-25:C_MANT_FP64-26]; + Q_sqrt0={{(C_MANT_FP64-7){1'b0}},Qcnt_one_13}; + Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0; + end + 6'b001110: + begin + Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-27:C_MANT_FP64-28]; + Q_sqrt0={{(C_MANT_FP64-8){1'b0}},Qcnt_one_14}; + Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0; + end + 6'b001111: + begin + Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-29:C_MANT_FP64-30]; + Q_sqrt0={{(C_MANT_FP64-9){1'b0}},Qcnt_one_15}; + Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0; + end + 6'b010000: + begin + Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-31:C_MANT_FP64-32]; + Q_sqrt0={{(C_MANT_FP64-10){1'b0}},Qcnt_one_16}; + Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0; + end + 6'b010001: + begin + Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-33:C_MANT_FP64-34]; + Q_sqrt0={{(C_MANT_FP64-11){1'b0}},Qcnt_one_17}; + Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0; + end + 6'b010010: + begin + Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-35:C_MANT_FP64-36]; + Q_sqrt0={{(C_MANT_FP64-12){1'b0}},Qcnt_one_18}; + Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0; + end + 6'b010011: + begin + Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-37:C_MANT_FP64-38]; + Q_sqrt0={{(C_MANT_FP64-13){1'b0}},Qcnt_one_19}; + Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0; + end + 6'b010100: + begin + Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-39:C_MANT_FP64-40]; + Q_sqrt0={{(C_MANT_FP64-14){1'b0}},Qcnt_one_20}; + Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0; + end + 6'b010101: + begin + Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-41:C_MANT_FP64-42]; + Q_sqrt0={{(C_MANT_FP64-15){1'b0}},Qcnt_one_21}; + Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0; + end + 6'b010110: + begin + Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-43:C_MANT_FP64-44]; + Q_sqrt0={{(C_MANT_FP64-16){1'b0}},Qcnt_one_22}; + Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0; + end + 6'b010111: + begin + Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-45:C_MANT_FP64-46]; + Q_sqrt0={{(C_MANT_FP64-17){1'b0}},Qcnt_one_23}; + Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0; + end + 6'b011000: + begin + Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-47:C_MANT_FP64-48]; + Q_sqrt0={{(C_MANT_FP64-18){1'b0}},Qcnt_one_24}; + Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0; + end + 6'b011001: + begin + Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-49:C_MANT_FP64-50]; + Q_sqrt0={{(C_MANT_FP64-19){1'b0}},Qcnt_one_25}; + Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0; + end + 6'b011010: + begin + Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-51:C_MANT_FP64-52]; + Q_sqrt0={{(C_MANT_FP64-20){1'b0}},Qcnt_one_26}; + Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0; + end + 6'b011011: + begin + Sqrt_DI[0]=2'b00; + Q_sqrt0={{(C_MANT_FP64-21){1'b0}},Qcnt_one_27}; + Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0; + end + 6'b011100: + begin + Sqrt_DI[0]=2'b00; + Q_sqrt0={{(C_MANT_FP64-22){1'b0}},Qcnt_one_28}; + Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0; + end + 6'b011101: + begin + Sqrt_DI[0]=2'b00; + Q_sqrt0={{(C_MANT_FP64-23){1'b0}},Qcnt_one_29}; + Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0; + end + 6'b011110: + begin + Sqrt_DI[0]=2'b00; + Q_sqrt0={{(C_MANT_FP64-24){1'b0}},Qcnt_one_30}; + Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0; + end + 6'b011111: + begin + Sqrt_DI[0]=2'b00; + Q_sqrt0={{(C_MANT_FP64-25){1'b0}},Qcnt_one_31}; + Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0; + end + 6'b100000: + begin + Sqrt_DI[0]=2'b00; + Q_sqrt0={{(C_MANT_FP64-26){1'b0}},Qcnt_one_32}; + Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0; + end + 6'b100001: + begin + Sqrt_DI[0]=2'b00; + Q_sqrt0={{(C_MANT_FP64-27){1'b0}},Qcnt_one_33}; + Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0; + end + 6'b100010: + begin + Sqrt_DI[0]=2'b00; + Q_sqrt0={{(C_MANT_FP64-28){1'b0}},Qcnt_one_34}; + Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0; + end + 6'b100011: + begin + Sqrt_DI[0]=2'b00; + Q_sqrt0={{(C_MANT_FP64-29){1'b0}},Qcnt_one_35}; + Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0; + end + 6'b100100: + begin + Sqrt_DI[0]=2'b00; + Q_sqrt0={{(C_MANT_FP64-30){1'b0}},Qcnt_one_36}; + Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0; + end + 6'b100101: + begin + Sqrt_DI[0]=2'b00; + Q_sqrt0={{(C_MANT_FP64-31){1'b0}},Qcnt_one_37}; + Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0; + end + 6'b100110: + begin + Sqrt_DI[0]=2'b00; + Q_sqrt0={{(C_MANT_FP64-32){1'b0}},Qcnt_one_38}; + Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0; + end + 6'b100111: + begin + Sqrt_DI[0]=2'b00; + Q_sqrt0={{(C_MANT_FP64-33){1'b0}},Qcnt_one_39}; + Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0; + end + 6'b101000: + begin + Sqrt_DI[0]=2'b00; + Q_sqrt0={{(C_MANT_FP64-34){1'b0}},Qcnt_one_40}; + Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0; + end + 6'b101001: + begin + Sqrt_DI[0]=2'b00; + Q_sqrt0={{(C_MANT_FP64-35){1'b0}},Qcnt_one_41}; + Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0; + end + 6'b101010: + begin + Sqrt_DI[0]=2'b00; + Q_sqrt0={{(C_MANT_FP64-36){1'b0}},Qcnt_one_42}; + Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0; + end + 6'b101011: + begin + Sqrt_DI[0]=2'b00; + Q_sqrt0={{(C_MANT_FP64-37){1'b0}},Qcnt_one_43}; + Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0; + end + 6'b101100: + begin + Sqrt_DI[0]=2'b00; + Q_sqrt0={{(C_MANT_FP64-38){1'b0}},Qcnt_one_44}; + Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0; + end + 6'b101101: + begin + Sqrt_DI[0]=2'b00; + Q_sqrt0={{(C_MANT_FP64-39){1'b0}},Qcnt_one_45}; + Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0; + end + 6'b101110: + begin + Sqrt_DI[0]=2'b00; + Q_sqrt0={{(C_MANT_FP64-40){1'b0}},Qcnt_one_46}; + Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0; + end + 6'b101111: + begin + Sqrt_DI[0]=2'b00; + Q_sqrt0={{(C_MANT_FP64-41){1'b0}},Qcnt_one_47}; + Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0; + end + 6'b110000: + begin + Sqrt_DI[0]=2'b00; + Q_sqrt0={{(C_MANT_FP64-42){1'b0}},Qcnt_one_48}; + Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0; + end + 6'b110001: + begin + Sqrt_DI[0]=2'b00; + Q_sqrt0={{(C_MANT_FP64-43){1'b0}},Qcnt_one_49}; + Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0; + end + 6'b110010: + begin + Sqrt_DI[0]=2'b00; + Q_sqrt0={{(C_MANT_FP64-44){1'b0}},Qcnt_one_50}; + Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0; + end + 6'b110011: + begin + Sqrt_DI[0]=2'b00; + Q_sqrt0={{(C_MANT_FP64-45){1'b0}},Qcnt_one_51}; + Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0; + end + 6'b110100: + begin + Sqrt_DI[0]=2'b00; + Q_sqrt0={{(C_MANT_FP64-46){1'b0}},Qcnt_one_52}; + Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0; + end + 6'b110101: + begin + Sqrt_DI[0]=2'b00; + Q_sqrt0={{(C_MANT_FP64-47){1'b0}},Qcnt_one_53}; + Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0; + end + 6'b110110: + begin + Sqrt_DI[0]=2'b00; + Q_sqrt0={{(C_MANT_FP64-48){1'b0}},Qcnt_one_54}; + Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0; + end + 6'b110111: + begin + Sqrt_DI[0]=2'b00; + Q_sqrt0={{(C_MANT_FP64-49){1'b0}},Qcnt_one_55}; + Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0; + end + 6'b111000: + begin + Sqrt_DI[0]=2'b00; + Q_sqrt0={{(C_MANT_FP64-50){1'b0}},Qcnt_one_56}; + Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0; + end + + default: + begin + Sqrt_DI[0]=2'b00; + Q_sqrt0='0; + Sqrt_Q0='0; + end + endcase + end + + + ///////////////////////////////////////////////////////////////////////////// + // Operands for square root when Iteration_unit_num_S = 2'b00, end // + ///////////////////////////////////////////////////////////////////////////// + + + 2'b01: + begin + ///////////////////////////////////////////////////////////////////////////// + // Operands for square root when Iteration_unit_num_S = 2'b01, start // + ///////////////////////////////////////////////////////////////////////////// + case(Crtl_cnt_S) + + 6'b000000: + begin + Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64+1:C_MANT_FP64]; + Q_sqrt0={{(C_MANT_FP64+5){1'b0}},Qcnt_two_0[1]}; + Sqrt_Q0=Q_sqrt_com_0; + Sqrt_DI[1]=Mant_D_sqrt_Norm[C_MANT_FP64-1:C_MANT_FP64-2]; + Q_sqrt1={{(C_MANT_FP64+4){1'b0}},Qcnt_two_0[1:0]}; + Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1; + end + + 6'b000001: + begin + Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-3:C_MANT_FP64-4]; + Q_sqrt0={{(C_MANT_FP64+4){1'b0}},Qcnt_two_1[2:1]}; + Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0; + Sqrt_DI[1]=Mant_D_sqrt_Norm[C_MANT_FP64-5:C_MANT_FP64-6]; + Q_sqrt1={{(C_MANT_FP64+3){1'b0}},Qcnt_two_1[2:0]}; + Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1; + end + + 6'b000010: + begin + Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-7:C_MANT_FP64-8]; + Q_sqrt0={{(C_MANT_FP64+2){1'b0}},Qcnt_two_2[4:1]}; + Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0; + Sqrt_DI[1]=Mant_D_sqrt_Norm[C_MANT_FP64-9:C_MANT_FP64-10]; + Q_sqrt1={{(C_MANT_FP64+1){1'b0}},Qcnt_two_2[4:0]}; + Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1; + end + + 6'b000011: + begin + Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-11:C_MANT_FP64-12]; + Q_sqrt0={{(C_MANT_FP64){1'b0}},Qcnt_two_3[6:1]}; + Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0; + Sqrt_DI[1]=Mant_D_sqrt_Norm[C_MANT_FP64-13:C_MANT_FP64-14]; + Q_sqrt1={{(C_MANT_FP64-1){1'b0}},Qcnt_two_3[6:0]}; + Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1; + end + + 6'b000100: + begin + Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-15:C_MANT_FP64-16]; + Q_sqrt0={{(C_MANT_FP64-2){1'b0}},Qcnt_two_4[8:1]}; + Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0; + Sqrt_DI[1]=Mant_D_sqrt_Norm[C_MANT_FP64-17:C_MANT_FP64-18]; + Q_sqrt1={{(C_MANT_FP64-3){1'b0}},Qcnt_two_4[8:0]}; + Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1; + end + + 6'b000101: + begin + Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-19:C_MANT_FP64-20]; + Q_sqrt0={{(C_MANT_FP64-4){1'b0}},Qcnt_two_5[10:1]}; + Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0; + Sqrt_DI[1]=Mant_D_sqrt_Norm[C_MANT_FP64-21:C_MANT_FP64-22]; + Q_sqrt1={{(C_MANT_FP64-5){1'b0}},Qcnt_two_5[10:0]}; + Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1; + end + + 6'b000110: + begin + Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-23:C_MANT_FP64-24]; + Q_sqrt0={{(C_MANT_FP64-6){1'b0}},Qcnt_two_6[12:1]}; + Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0; + Sqrt_DI[1]=Mant_D_sqrt_Norm[C_MANT_FP64-25:C_MANT_FP64-26]; + Q_sqrt1={{(C_MANT_FP64-7){1'b0}},Qcnt_two_6[12:0]}; + Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1; + end + + 6'b000111: + begin + Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-27:C_MANT_FP64-28]; + Q_sqrt0={{(C_MANT_FP64-8){1'b0}},Qcnt_two_7[14:1]}; + Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0; + Sqrt_DI[1]=Mant_D_sqrt_Norm[C_MANT_FP64-29:C_MANT_FP64-30]; + Q_sqrt1={{(C_MANT_FP64-9){1'b0}},Qcnt_two_7[14:0]}; + Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1; + end + + 6'b001000: + begin + Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-31:C_MANT_FP64-32]; + Q_sqrt0={{(C_MANT_FP64-10){1'b0}},Qcnt_two_8[16:1]}; + Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0; + Sqrt_DI[1]=Mant_D_sqrt_Norm[C_MANT_FP64-33:C_MANT_FP64-34]; + Q_sqrt1={{(C_MANT_FP64-11){1'b0}},Qcnt_two_8[16:0]}; + Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1; + end + + 6'b001001: + begin + Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-35:C_MANT_FP64-36]; + Q_sqrt0={{(C_MANT_FP64-12){1'b0}},Qcnt_two_9[18:1]}; + Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0; + Sqrt_DI[1]=Mant_D_sqrt_Norm[C_MANT_FP64-37:C_MANT_FP64-38]; + Q_sqrt1={{(C_MANT_FP64-13){1'b0}},Qcnt_two_9[18:0]}; + Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1; + end + + 6'b001010: + begin + Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-39:C_MANT_FP64-40]; + Q_sqrt0={{(C_MANT_FP64-14){1'b0}},Qcnt_two_10[20:1]}; + Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0; + Sqrt_DI[1]=Mant_D_sqrt_Norm[C_MANT_FP64-41:C_MANT_FP64-42]; + Q_sqrt1={{(C_MANT_FP64-15){1'b0}},Qcnt_two_10[20:0]}; + Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1; + end + + 6'b001011: + begin + Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-43:C_MANT_FP64-44]; + Q_sqrt0={{(C_MANT_FP64-16){1'b0}},Qcnt_two_11[22:1]}; + Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0; + Sqrt_DI[1]=Mant_D_sqrt_Norm[C_MANT_FP64-45:C_MANT_FP64-46]; + Q_sqrt1={{(C_MANT_FP64-17){1'b0}},Qcnt_two_11[22:0]}; + Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1; + end + + 6'b001100: + begin + Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-47:C_MANT_FP64-48]; + Q_sqrt0={{(C_MANT_FP64-18){1'b0}},Qcnt_two_12[24:1]}; + Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0; + Sqrt_DI[1]=Mant_D_sqrt_Norm[C_MANT_FP64-49:C_MANT_FP64-50]; + Q_sqrt1={{(C_MANT_FP64-19){1'b0}},Qcnt_two_12[24:0]}; + Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1; + end + + 6'b001101: + begin + Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-51:C_MANT_FP64-52]; + Q_sqrt0={{(C_MANT_FP64-20){1'b0}},Qcnt_two_13[26:1]}; + Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0; + Sqrt_DI[1]=2'b00; + Q_sqrt1={{(C_MANT_FP64-21){1'b0}},Qcnt_two_13[26:0]}; + Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1; + end + + 6'b001110: + begin + Sqrt_DI[0]=2'b00; + Q_sqrt0={{(C_MANT_FP64-22){1'b0}},Qcnt_two_14[28:1]}; + Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0; + Sqrt_DI[1]=2'b00; + Q_sqrt1={{(C_MANT_FP64-23){1'b0}},Qcnt_two_14[28:0]}; + Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1; + end + + 6'b001111: + begin + Sqrt_DI[0]=2'b00; + Q_sqrt0={{(C_MANT_FP64-24){1'b0}},Qcnt_two_15[30:1]}; + Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0; + Sqrt_DI[1]=2'b00; + Q_sqrt1={{(C_MANT_FP64-25){1'b0}},Qcnt_two_15[30:0]}; + Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1; + end + + 6'b010000: + begin + Sqrt_DI[0]=2'b00; + Q_sqrt0={{(C_MANT_FP64-26){1'b0}},Qcnt_two_16[32:1]}; + Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0; + Sqrt_DI[1]=2'b00; + Q_sqrt1={{(C_MANT_FP64-27){1'b0}},Qcnt_two_16[32:0]}; + Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1; + end + + 6'b010001: + begin + Sqrt_DI[0]=2'b00; + Q_sqrt0={{(C_MANT_FP64-28){1'b0}},Qcnt_two_17[34:1]}; + Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0; + Sqrt_DI[1]=2'b00; + Q_sqrt1={{(C_MANT_FP64-29){1'b0}},Qcnt_two_17[34:0]}; + Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1; + end + + 6'b010010: + begin + Sqrt_DI[0]=2'b00; + Q_sqrt0={{(C_MANT_FP64-30){1'b0}},Qcnt_two_18[36:1]}; + Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0; + Sqrt_DI[1]=2'b00; + Q_sqrt1={{(C_MANT_FP64-31){1'b0}},Qcnt_two_18[36:0]}; + Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1; + end + + 6'b010011: + begin + Sqrt_DI[0]=2'b00; + Q_sqrt0={{(C_MANT_FP64-32){1'b0}},Qcnt_two_19[38:1]}; + Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0; + Sqrt_DI[1]=2'b00; + Q_sqrt1={{(C_MANT_FP64-33){1'b0}},Qcnt_two_19[38:0]}; + Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1; + end + + 6'b010100: + begin + Sqrt_DI[0]=2'b00; + Q_sqrt0={{(C_MANT_FP64-34){1'b0}},Qcnt_two_20[40:1]}; + Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0; + Sqrt_DI[1]=2'b00; + Q_sqrt1={{(C_MANT_FP64-35){1'b0}},Qcnt_two_20[40:0]}; + Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1; + end + + 6'b010101: + begin + Sqrt_DI[0]=2'b00; + Q_sqrt0={{(C_MANT_FP64-36){1'b0}},Qcnt_two_21[42:1]}; + Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0; + Sqrt_DI[1]=2'b00; + Q_sqrt1={{(C_MANT_FP64-37){1'b0}},Qcnt_two_21[42:0]}; + Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1; + end + + 6'b010110: + begin + Sqrt_DI[0]=2'b00; + Q_sqrt0={{(C_MANT_FP64-38){1'b0}},Qcnt_two_22[44:1]}; + Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0; + Sqrt_DI[1]=2'b00; + Q_sqrt1={{(C_MANT_FP64-39){1'b0}},Qcnt_two_22[44:0]}; + Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1; + end + + 6'b010111: + begin + Sqrt_DI[0]=2'b00; + Q_sqrt0={{(C_MANT_FP64-40){1'b0}},Qcnt_two_23[46:1]}; + Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0; + Sqrt_DI[1]=2'b00; + Q_sqrt1={{(C_MANT_FP64-41){1'b0}},Qcnt_two_23[46:0]}; + Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1; + end + + 6'b011000: + begin + Sqrt_DI[0]=2'b00; + Q_sqrt0={{(C_MANT_FP64-42){1'b0}},Qcnt_two_24[48:1]}; + Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0; + Sqrt_DI[1]=2'b00; + Q_sqrt1={{(C_MANT_FP64-43){1'b0}},Qcnt_two_24[48:0]}; + Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1; + end + + 6'b011001: + begin + Sqrt_DI[0]=2'b00; + Q_sqrt0={{(C_MANT_FP64-44){1'b0}},Qcnt_two_25[50:1]}; + Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0; + Sqrt_DI[1]=2'b00; + Q_sqrt1={{(C_MANT_FP64-45){1'b0}},Qcnt_two_25[50:0]}; + Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1; + end + + 6'b011010: + begin + Sqrt_DI[0]=2'b00; + Q_sqrt0={{(C_MANT_FP64-46){1'b0}},Qcnt_two_26[52:1]}; + Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0; + Sqrt_DI[1]=2'b00; + Q_sqrt1={{(C_MANT_FP64-47){1'b0}},Qcnt_two_26[52:0]}; + Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1; + end + + 6'b011011: + begin + Sqrt_DI[0]=2'b00; + Q_sqrt0={{(C_MANT_FP64-48){1'b0}},Qcnt_two_27[54:1]}; + Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0; + Sqrt_DI[1]=2'b00; + Q_sqrt1={{(C_MANT_FP64-49){1'b0}},Qcnt_two_27[54:0]}; + Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1; + end + + 6'b011100: + begin + Sqrt_DI[0]=2'b00; + Q_sqrt0={{(C_MANT_FP64-50){1'b0}},Qcnt_two_28[56:1]}; + Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0; + Sqrt_DI[1]=2'b00; + Q_sqrt1={{(C_MANT_FP64-51){1'b0}},Qcnt_two_28[56:0]}; + Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1; + end + + default: + begin + Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64+1:C_MANT_FP64]; + Q_sqrt0={{(C_MANT_FP64+5){1'b0}},Qcnt_two_0[1]}; + Sqrt_Q0=Q_sqrt_com_0; + Sqrt_DI[1]=Mant_D_sqrt_Norm[C_MANT_FP64-1:C_MANT_FP64-2]; + Q_sqrt1={{(C_MANT_FP64+4){1'b0}},Qcnt_two_0[1:0]}; + Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1; + end + + endcase + end + + ///////////////////////////////////////////////////////////////////////////// + // Operands for square root when Iteration_unit_num_S = 2'b01, end // + ///////////////////////////////////////////////////////////////////////////// + + + 2'b10: + begin + ///////////////////////////////////////////////////////////////////////////// + // Operands for square root when Iteration_unit_num_S = 2'b10, start // + ///////////////////////////////////////////////////////////////////////////// + + case(Crtl_cnt_S) + 6'b000000: + begin + Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64+1:C_MANT_FP64]; + Q_sqrt0={{(C_MANT_FP64+5){1'b0}},Qcnt_three_0[2]}; + Sqrt_Q0=Q_sqrt_com_0; + Sqrt_DI[1]=Mant_D_sqrt_Norm[C_MANT_FP64-1:C_MANT_FP64-2]; + Q_sqrt1={{(C_MANT_FP64+4){1'b0}},Qcnt_three_0[2:1]}; + Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1; + Sqrt_DI[2]=Mant_D_sqrt_Norm[C_MANT_FP64-3:C_MANT_FP64-4]; + Q_sqrt2={{(C_MANT_FP64+3){1'b0}},Qcnt_three_0[2:0]}; + Sqrt_Q2=Sqrt_quotinent_S[2]?Q_sqrt_com_2:Q_sqrt2; + end + + 6'b000001: + begin + Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-5:C_MANT_FP64-6]; + Q_sqrt0={{(C_MANT_FP64+2){1'b0}},Qcnt_three_1[4:2]}; + Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0; + Sqrt_DI[1]=Mant_D_sqrt_Norm[C_MANT_FP64-7:C_MANT_FP64-8]; + Q_sqrt1={{(C_MANT_FP64+1){1'b0}},Qcnt_three_1[4:1]}; + Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1; + Sqrt_DI[2]=Mant_D_sqrt_Norm[C_MANT_FP64-9:C_MANT_FP64-10]; + Q_sqrt2={{(C_MANT_FP64){1'b0}},Qcnt_three_1[4:0]}; + Sqrt_Q2=Sqrt_quotinent_S[2]?Q_sqrt_com_2:Q_sqrt2; + end + + 6'b000010: + begin + Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-11:C_MANT_FP64-12]; + Q_sqrt0={{(C_MANT_FP64-1){1'b0}},Qcnt_three_2[7:2]}; + Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0; + Sqrt_DI[1]=Mant_D_sqrt_Norm[C_MANT_FP64-13:C_MANT_FP64-14]; + Q_sqrt1={{(C_MANT_FP64-2){1'b0}},Qcnt_three_2[7:1]}; + Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1; + Sqrt_DI[2]=Mant_D_sqrt_Norm[C_MANT_FP64-15:C_MANT_FP64-16]; + Q_sqrt2={{(C_MANT_FP64-3){1'b0}},Qcnt_three_2[7:0]}; + Sqrt_Q2=Sqrt_quotinent_S[2]?Q_sqrt_com_2:Q_sqrt2; + end + + 6'b000011: + begin + Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-17:C_MANT_FP64-18]; + Q_sqrt0={{(C_MANT_FP64-4){1'b0}},Qcnt_three_3[10:2]}; + Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0; + Sqrt_DI[1]=Mant_D_sqrt_Norm[C_MANT_FP64-19:C_MANT_FP64-20]; + Q_sqrt1={{(C_MANT_FP64-5){1'b0}},Qcnt_three_3[10:1]}; + Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1; + Sqrt_DI[2]=Mant_D_sqrt_Norm[C_MANT_FP64-21:C_MANT_FP64-22]; + Q_sqrt2={{(C_MANT_FP64-6){1'b0}},Qcnt_three_3[10:0]}; + Sqrt_Q2=Sqrt_quotinent_S[2]?Q_sqrt_com_2:Q_sqrt2; + end + + 6'b000100: + begin + Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-23:C_MANT_FP64-24]; + Q_sqrt0={{(C_MANT_FP64-7){1'b0}},Qcnt_three_4[13:2]}; + Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0; + Sqrt_DI[1]=Mant_D_sqrt_Norm[C_MANT_FP64-25:C_MANT_FP64-26]; + Q_sqrt1={{(C_MANT_FP64-8){1'b0}},Qcnt_three_4[13:1]}; + Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1; + Sqrt_DI[2]=Mant_D_sqrt_Norm[C_MANT_FP64-27:C_MANT_FP64-28]; + Q_sqrt2={{(C_MANT_FP64-9){1'b0}},Qcnt_three_4[13:0]}; + Sqrt_Q2=Sqrt_quotinent_S[2]?Q_sqrt_com_2:Q_sqrt2; + end + + 6'b000101: + begin + Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-29:C_MANT_FP64-30]; + Q_sqrt0={{(C_MANT_FP64-10){1'b0}},Qcnt_three_5[16:2]}; + Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0; + Sqrt_DI[1]=Mant_D_sqrt_Norm[C_MANT_FP64-31:C_MANT_FP64-32]; + Q_sqrt1={{(C_MANT_FP64-11){1'b0}},Qcnt_three_5[16:1]}; + Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1; + Sqrt_DI[2]=Mant_D_sqrt_Norm[C_MANT_FP64-33:C_MANT_FP64-34]; + Q_sqrt2={{(C_MANT_FP64-12){1'b0}},Qcnt_three_5[16:0]}; + Sqrt_Q2=Sqrt_quotinent_S[2]?Q_sqrt_com_2:Q_sqrt2; + end + + 6'b000110: + begin + Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-35:C_MANT_FP64-36]; + Q_sqrt0={{(C_MANT_FP64-13){1'b0}},Qcnt_three_6[19:2]}; + Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0; + Sqrt_DI[1]=Mant_D_sqrt_Norm[C_MANT_FP64-37:C_MANT_FP64-38]; + Q_sqrt1={{(C_MANT_FP64-14){1'b0}},Qcnt_three_6[19:1]}; + Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1; + Sqrt_DI[2]=Mant_D_sqrt_Norm[C_MANT_FP64-39:C_MANT_FP64-40]; + Q_sqrt2={{(C_MANT_FP64-15){1'b0}},Qcnt_three_6[19:0]}; + Sqrt_Q2=Sqrt_quotinent_S[2]?Q_sqrt_com_2:Q_sqrt2; + end + + 6'b000111: + begin + Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-41:C_MANT_FP64-42]; + Q_sqrt0={{(C_MANT_FP64-16){1'b0}},Qcnt_three_7[22:2]}; + Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0; + Sqrt_DI[1]=Mant_D_sqrt_Norm[C_MANT_FP64-43:C_MANT_FP64-44]; + Q_sqrt1={{(C_MANT_FP64-17){1'b0}},Qcnt_three_7[22:1]}; + Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1; + Sqrt_DI[2]=Mant_D_sqrt_Norm[C_MANT_FP64-45:C_MANT_FP64-46]; + Q_sqrt2={{(C_MANT_FP64-18){1'b0}},Qcnt_three_7[22:0]}; + Sqrt_Q2=Sqrt_quotinent_S[2]?Q_sqrt_com_2:Q_sqrt2; + end + + 6'b001000: + begin + Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-47:C_MANT_FP64-48]; + Q_sqrt0={{(C_MANT_FP64-19){1'b0}},Qcnt_three_8[25:2]}; + Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0; + Sqrt_DI[1]=Mant_D_sqrt_Norm[C_MANT_FP64-49:C_MANT_FP64-50]; + Q_sqrt1={{(C_MANT_FP64-20){1'b0}},Qcnt_three_8[25:1]}; + Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1; + Sqrt_DI[2]=Mant_D_sqrt_Norm[C_MANT_FP64-51:C_MANT_FP64-52]; + Q_sqrt2={{(C_MANT_FP64-21){1'b0}},Qcnt_three_8[25:0]}; + Sqrt_Q2=Sqrt_quotinent_S[2]?Q_sqrt_com_2:Q_sqrt2; + end + + 6'b001001: + begin + Sqrt_DI[0]=2'b00; + Q_sqrt0={{(C_MANT_FP64-22){1'b0}},Qcnt_three_9[28:2]}; + Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0; + Sqrt_DI[1]=2'b00; + Q_sqrt1={{(C_MANT_FP64-23){1'b0}},Qcnt_three_9[28:1]}; + Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1; + Sqrt_DI[2]=2'b00; + Q_sqrt2={{(C_MANT_FP64-24){1'b0}},Qcnt_three_9[28:0]}; + Sqrt_Q2=Sqrt_quotinent_S[2]?Q_sqrt_com_2:Q_sqrt2; + end + + 6'b001010: + begin + Sqrt_DI[0]=2'b00; + Q_sqrt0={{(C_MANT_FP64-25){1'b0}},Qcnt_three_10[31:2]}; + Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0; + Sqrt_DI[1]=2'b00; + Q_sqrt1={{(C_MANT_FP64-26){1'b0}},Qcnt_three_10[31:1]}; + Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1; + Sqrt_DI[2]=2'b00; + Q_sqrt2={{(C_MANT_FP64-27){1'b0}},Qcnt_three_10[31:0]}; + Sqrt_Q2=Sqrt_quotinent_S[2]?Q_sqrt_com_2:Q_sqrt2; + end + + 6'b001011: + begin + Sqrt_DI[0]=2'b00; + Q_sqrt0={{(C_MANT_FP64-28){1'b0}},Qcnt_three_11[34:2]}; + Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0; + Sqrt_DI[1]=2'b00; + Q_sqrt1={{(C_MANT_FP64-29){1'b0}},Qcnt_three_11[34:1]}; + Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1; + Sqrt_DI[2]=2'b00; + Q_sqrt2={{(C_MANT_FP64-30){1'b0}},Qcnt_three_11[34:0]}; + Sqrt_Q2=Sqrt_quotinent_S[2]?Q_sqrt_com_2:Q_sqrt2; + end + + 6'b001100: + begin + Sqrt_DI[0]=2'b00; + Q_sqrt0={{(C_MANT_FP64-31){1'b0}},Qcnt_three_12[37:2]}; + Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0; + Sqrt_DI[1]=2'b00; + Q_sqrt1={{(C_MANT_FP64-32){1'b0}},Qcnt_three_12[37:1]}; + Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1; + Sqrt_DI[2]=2'b00; + Q_sqrt2={{(C_MANT_FP64-33){1'b0}},Qcnt_three_12[37:0]}; + Sqrt_Q2=Sqrt_quotinent_S[2]?Q_sqrt_com_2:Q_sqrt2; + end + + 6'b001101: + begin + Sqrt_DI[0]=2'b00; + Q_sqrt0={{(C_MANT_FP64-34){1'b0}},Qcnt_three_13[40:2]}; + Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0; + Sqrt_DI[1]=2'b00; + Q_sqrt1={{(C_MANT_FP64-35){1'b0}},Qcnt_three_13[40:1]}; + Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1; + Sqrt_DI[2]=2'b00; + Q_sqrt2={{(C_MANT_FP64-36){1'b0}},Qcnt_three_13[40:0]}; + Sqrt_Q2=Sqrt_quotinent_S[2]?Q_sqrt_com_2:Q_sqrt2; + end + + 6'b001110: + begin + Sqrt_DI[0]=2'b00; + Q_sqrt0={{(C_MANT_FP64-37){1'b0}},Qcnt_three_14[43:2]}; + Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0; + Sqrt_DI[1]=2'b00; + Q_sqrt1={{(C_MANT_FP64-38){1'b0}},Qcnt_three_14[43:1]}; + Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1; + Sqrt_DI[2]=2'b00; + Q_sqrt2={{(C_MANT_FP64-39){1'b0}},Qcnt_three_14[43:0]}; + Sqrt_Q2=Sqrt_quotinent_S[2]?Q_sqrt_com_2:Q_sqrt2; + end + + 6'b001111: + begin + Sqrt_DI[0]=2'b00; + Q_sqrt0={{(C_MANT_FP64-40){1'b0}},Qcnt_three_15[46:2]}; + Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0; + Sqrt_DI[1]=2'b00; + Q_sqrt1={{(C_MANT_FP64-41){1'b0}},Qcnt_three_15[46:1]}; + Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1; + Sqrt_DI[2]=2'b00; + Q_sqrt2={{(C_MANT_FP64-42){1'b0}},Qcnt_three_15[46:0]}; + Sqrt_Q2=Sqrt_quotinent_S[2]?Q_sqrt_com_2:Q_sqrt2; + end + + 6'b010000: + begin + Sqrt_DI[0]=2'b00; + Q_sqrt0={{(C_MANT_FP64-43){1'b0}},Qcnt_three_16[49:2]}; + Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0; + Sqrt_DI[1]=2'b00; + Q_sqrt1={{(C_MANT_FP64-44){1'b0}},Qcnt_three_16[49:1]}; + Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1; + Sqrt_DI[2]=2'b00; + Q_sqrt2={{(C_MANT_FP64-45){1'b0}},Qcnt_three_16[49:0]}; + Sqrt_Q2=Sqrt_quotinent_S[2]?Q_sqrt_com_2:Q_sqrt2; + end + + 6'b010001: + begin + Sqrt_DI[0]=2'b00; + Q_sqrt0={{(C_MANT_FP64-46){1'b0}},Qcnt_three_17[52:2]}; + Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0; + Sqrt_DI[1]=2'b00; + Q_sqrt1={{(C_MANT_FP64-47){1'b0}},Qcnt_three_17[52:1]}; + Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1; + Sqrt_DI[2]=2'b00; + Q_sqrt2={{(C_MANT_FP64-48){1'b0}},Qcnt_three_17[52:0]}; + Sqrt_Q2=Sqrt_quotinent_S[2]?Q_sqrt_com_2:Q_sqrt2; + end + + 6'b010010: + begin + Sqrt_DI[0]=2'b00; + Q_sqrt0={{(C_MANT_FP64-49){1'b0}},Qcnt_three_18[55:2]}; + Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0; + Sqrt_DI[1]=2'b00; + Q_sqrt1={{(C_MANT_FP64-50){1'b0}},Qcnt_three_18[55:1]}; + Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1; + Sqrt_DI[2]=2'b00; + Q_sqrt2={{(C_MANT_FP64-51){1'b0}},Qcnt_three_18[55:0]}; + Sqrt_Q2=Sqrt_quotinent_S[2]?Q_sqrt_com_2:Q_sqrt2; + end + + default : + begin + Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64+1:C_MANT_FP64]; + Q_sqrt0={{(C_MANT_FP64+5){1'b0}},Qcnt_three_0[2]}; + Sqrt_Q0=Q_sqrt_com_0; + Sqrt_DI[1]=Mant_D_sqrt_Norm[C_MANT_FP64-1:C_MANT_FP64-2]; + Q_sqrt1={{(C_MANT_FP64+4){1'b0}},Qcnt_three_0[2:1]}; + Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1; + Sqrt_DI[2]=Mant_D_sqrt_Norm[C_MANT_FP64-3:C_MANT_FP64-4]; + Q_sqrt2={{(C_MANT_FP64+3){1'b0}},Qcnt_three_0[2:0]}; + Sqrt_Q2=Sqrt_quotinent_S[2]?Q_sqrt_com_2:Q_sqrt2; + end + endcase + + end + ///////////////////////////////////////////////////////////////////////////// + // Operands for square root when Iteration_unit_num_S = 2'b10, end // + ///////////////////////////////////////////////////////////////////////////// + + + 2'b11: + begin + ///////////////////////////////////////////////////////////////////////////// + // Operands for square root when Iteration_unit_num_S = 2'b11, start // + ///////////////////////////////////////////////////////////////////////////// + + case(Crtl_cnt_S) + + 6'b000000: + begin + Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64+1:C_MANT_FP64]; + Q_sqrt0={{(C_MANT_FP64+5){1'b0}},Qcnt_four_0[3]}; + Sqrt_Q0=Q_sqrt_com_0; + Sqrt_DI[1]=Mant_D_sqrt_Norm[C_MANT_FP64-1:C_MANT_FP64-2]; + Q_sqrt1={{(C_MANT_FP64+4){1'b0}},Qcnt_four_0[3:2]}; + Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1; + Sqrt_DI[2]=Mant_D_sqrt_Norm[C_MANT_FP64-3:C_MANT_FP64-4]; + Q_sqrt2={{(C_MANT_FP64+3){1'b0}},Qcnt_four_0[3:1]}; + Sqrt_Q2=Sqrt_quotinent_S[2]?Q_sqrt_com_2:Q_sqrt2; + Sqrt_DI[3]=Mant_D_sqrt_Norm[C_MANT_FP64-5:C_MANT_FP64-6]; + Q_sqrt3={{(C_MANT_FP64+2){1'b0}},Qcnt_four_0[3:0]}; + Sqrt_Q3=Sqrt_quotinent_S[1]?Q_sqrt_com_3:Q_sqrt3; + end + + 6'b000001: + begin + Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-7:C_MANT_FP64-8]; + Q_sqrt0={{(C_MANT_FP64+1){1'b0}},Qcnt_four_1[6:3]}; + Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0; + Sqrt_DI[1]=Mant_D_sqrt_Norm[C_MANT_FP64-9:C_MANT_FP64-10]; + Q_sqrt1={{(C_MANT_FP64){1'b0}},Qcnt_four_1[6:2]}; + Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1; + Sqrt_DI[2]=Mant_D_sqrt_Norm[C_MANT_FP64-11:C_MANT_FP64-12]; + Q_sqrt2={{(C_MANT_FP64-1){1'b0}},Qcnt_four_1[6:1]}; + Sqrt_Q2=Sqrt_quotinent_S[2]?Q_sqrt_com_2:Q_sqrt2; + Sqrt_DI[3]=Mant_D_sqrt_Norm[C_MANT_FP64-13:C_MANT_FP64-14]; + Q_sqrt3={{(C_MANT_FP64-2){1'b0}},Qcnt_four_1[6:0]}; + Sqrt_Q3=Sqrt_quotinent_S[1]?Q_sqrt_com_3:Q_sqrt3; + end + + 6'b000010: + begin + Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-15:C_MANT_FP64-16]; + Q_sqrt0={{(C_MANT_FP64-3){1'b0}},Qcnt_four_2[10:3]}; + Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0; + Sqrt_DI[1]=Mant_D_sqrt_Norm[C_MANT_FP64-17:C_MANT_FP64-18]; + Q_sqrt1={{(C_MANT_FP64-4){1'b0}},Qcnt_four_2[10:2]}; + Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1; + Sqrt_DI[2]=Mant_D_sqrt_Norm[C_MANT_FP64-19:C_MANT_FP64-20]; + Q_sqrt2={{(C_MANT_FP64-5){1'b0}},Qcnt_four_2[10:1]}; + Sqrt_Q2=Sqrt_quotinent_S[2]?Q_sqrt_com_2:Q_sqrt2; + Sqrt_DI[3]=Mant_D_sqrt_Norm[C_MANT_FP64-21:C_MANT_FP64-22]; + Q_sqrt3={{(C_MANT_FP64-6){1'b0}},Qcnt_four_2[10:0]}; + Sqrt_Q3=Sqrt_quotinent_S[1]?Q_sqrt_com_3:Q_sqrt3; + end + + 6'b000011: + begin + Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-23:C_MANT_FP64-24]; + Q_sqrt0={{(C_MANT_FP64-7){1'b0}},Qcnt_four_3[14:3]}; + Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0; + Sqrt_DI[1]=Mant_D_sqrt_Norm[C_MANT_FP64-25:C_MANT_FP64-26]; + Q_sqrt1={{(C_MANT_FP64-8){1'b0}},Qcnt_four_3[14:2]}; + Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1; + Sqrt_DI[2]=Mant_D_sqrt_Norm[C_MANT_FP64-27:C_MANT_FP64-28]; + Q_sqrt2={{(C_MANT_FP64-9){1'b0}},Qcnt_four_3[14:1]}; + Sqrt_Q2=Sqrt_quotinent_S[2]?Q_sqrt_com_2:Q_sqrt2; + Sqrt_DI[3]=Mant_D_sqrt_Norm[C_MANT_FP64-29:C_MANT_FP64-30]; + Q_sqrt3={{(C_MANT_FP64-10){1'b0}},Qcnt_four_3[14:0]}; + Sqrt_Q3=Sqrt_quotinent_S[1]?Q_sqrt_com_3:Q_sqrt3; + end + + 6'b000100: + begin + Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-31:C_MANT_FP64-32]; + Q_sqrt0={{(C_MANT_FP64-11){1'b0}},Qcnt_four_4[18:3]}; + Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0; + Sqrt_DI[1]=Mant_D_sqrt_Norm[C_MANT_FP64-33:C_MANT_FP64-34]; + Q_sqrt1={{(C_MANT_FP64-12){1'b0}},Qcnt_four_4[18:2]}; + Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1; + Sqrt_DI[2]=Mant_D_sqrt_Norm[C_MANT_FP64-35:C_MANT_FP64-36]; + Q_sqrt2={{(C_MANT_FP64-13){1'b0}},Qcnt_four_4[18:1]}; + Sqrt_Q2=Sqrt_quotinent_S[2]?Q_sqrt_com_2:Q_sqrt2; + Sqrt_DI[3]=Mant_D_sqrt_Norm[C_MANT_FP64-37:C_MANT_FP64-38]; + Q_sqrt3={{(C_MANT_FP64-14){1'b0}},Qcnt_four_4[18:0]}; + Sqrt_Q3=Sqrt_quotinent_S[1]?Q_sqrt_com_3:Q_sqrt3; + end + + 6'b000101: + begin + Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-39:C_MANT_FP64-40]; + Q_sqrt0={{(C_MANT_FP64-15){1'b0}},Qcnt_four_5[22:3]}; + Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0; + Sqrt_DI[1]=Mant_D_sqrt_Norm[C_MANT_FP64-41:C_MANT_FP64-42]; + Q_sqrt1={{(C_MANT_FP64-16){1'b0}},Qcnt_four_5[22:2]}; + Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1; + Sqrt_DI[2]=Mant_D_sqrt_Norm[C_MANT_FP64-43:C_MANT_FP64-44]; + Q_sqrt2={{(C_MANT_FP64-17){1'b0}},Qcnt_four_5[22:1]}; + Sqrt_Q2=Sqrt_quotinent_S[2]?Q_sqrt_com_2:Q_sqrt2; + Sqrt_DI[3]=Mant_D_sqrt_Norm[C_MANT_FP64-45:C_MANT_FP64-46]; + Q_sqrt3={{(C_MANT_FP64-18){1'b0}},Qcnt_four_5[22:0]}; + Sqrt_Q3=Sqrt_quotinent_S[1]?Q_sqrt_com_3:Q_sqrt3; + end + + 6'b000110: + begin + Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-47:C_MANT_FP64-48]; + Q_sqrt0={{(C_MANT_FP64-19){1'b0}},Qcnt_four_6[26:3]}; + Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0; + Sqrt_DI[1]=Mant_D_sqrt_Norm[C_MANT_FP64-49:C_MANT_FP64-50]; + Q_sqrt1={{(C_MANT_FP64-20){1'b0}},Qcnt_four_6[26:2]}; + Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1; + Sqrt_DI[2]=Mant_D_sqrt_Norm[C_MANT_FP64-51:C_MANT_FP64-52]; + Q_sqrt2={{(C_MANT_FP64-21){1'b0}},Qcnt_four_6[26:1]}; + Sqrt_Q2=Sqrt_quotinent_S[2]?Q_sqrt_com_2:Q_sqrt2; + Sqrt_DI[3]=2'b00; + Q_sqrt3={{(C_MANT_FP64-22){1'b0}},Qcnt_four_6[26:0]}; + Sqrt_Q3=Sqrt_quotinent_S[1]?Q_sqrt_com_3:Q_sqrt3; + end + + 6'b000111: + begin + Sqrt_DI[0]=2'b00; + Q_sqrt0={{(C_MANT_FP64-23){1'b0}},Qcnt_four_7[30:3]}; + Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0; + Sqrt_DI[1]=2'b00; + Q_sqrt1={{(C_MANT_FP64-24){1'b0}},Qcnt_four_7[30:2]}; + Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1; + Sqrt_DI[2]=2'b00; + Q_sqrt2={{(C_MANT_FP64-25){1'b0}},Qcnt_four_7[30:1]}; + Sqrt_Q2=Sqrt_quotinent_S[2]?Q_sqrt_com_2:Q_sqrt2; + Sqrt_DI[3]=2'b00; + Q_sqrt3={{(C_MANT_FP64-26){1'b0}},Qcnt_four_7[30:0]}; + Sqrt_Q3=Sqrt_quotinent_S[1]?Q_sqrt_com_3:Q_sqrt3; + end + + 6'b001000: + begin + Sqrt_DI[0]=2'b00; + Q_sqrt0={{(C_MANT_FP64-27){1'b0}},Qcnt_four_8[34:3]}; + Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0; + Sqrt_DI[1]=2'b00; + Q_sqrt1={{(C_MANT_FP64-28){1'b0}},Qcnt_four_8[34:2]}; + Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1; + Sqrt_DI[2]=2'b00; + Q_sqrt2={{(C_MANT_FP64-29){1'b0}},Qcnt_four_8[34:1]}; + Sqrt_Q2=Sqrt_quotinent_S[2]?Q_sqrt_com_2:Q_sqrt2; + Sqrt_DI[3]=2'b00; + Q_sqrt3={{(C_MANT_FP64-30){1'b0}},Qcnt_four_8[34:0]}; + Sqrt_Q3=Sqrt_quotinent_S[1]?Q_sqrt_com_3:Q_sqrt3; + end + + 6'b001001: + begin + Sqrt_DI[0]=2'b00; + Q_sqrt0={{(C_MANT_FP64-31){1'b0}},Qcnt_four_9[38:3]}; + Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0; + Sqrt_DI[1]=2'b00; + Q_sqrt1={{(C_MANT_FP64-32){1'b0}},Qcnt_four_9[38:2]}; + Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1; + Sqrt_DI[2]=2'b00; + Q_sqrt2={{(C_MANT_FP64-33){1'b0}},Qcnt_four_9[38:1]}; + Sqrt_Q2=Sqrt_quotinent_S[2]?Q_sqrt_com_2:Q_sqrt2; + Sqrt_DI[3]=2'b00; + Q_sqrt3={{(C_MANT_FP64-34){1'b0}},Qcnt_four_9[38:0]}; + Sqrt_Q3=Sqrt_quotinent_S[1]?Q_sqrt_com_3:Q_sqrt3; + end + + 6'b001010: + begin + Sqrt_DI[0]=2'b00; + Q_sqrt0={{(C_MANT_FP64-35){1'b0}},Qcnt_four_10[42:3]}; + Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0; + Sqrt_DI[1]=2'b00; + Q_sqrt1={{(C_MANT_FP64-36){1'b0}},Qcnt_four_10[42:2]}; + Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1; + Sqrt_DI[2]=2'b00; + Q_sqrt2={{(C_MANT_FP64-37){1'b0}},Qcnt_four_10[42:1]}; + Sqrt_Q2=Sqrt_quotinent_S[2]?Q_sqrt_com_2:Q_sqrt2; + Sqrt_DI[3]=2'b00; + Q_sqrt3={{(C_MANT_FP64-38){1'b0}},Qcnt_four_10[42:0]}; + Sqrt_Q3=Sqrt_quotinent_S[1]?Q_sqrt_com_3:Q_sqrt3; + end + + 6'b001011: + begin + Sqrt_DI[0]=2'b00; + Q_sqrt0={{(C_MANT_FP64-39){1'b0}},Qcnt_four_11[46:3]}; + Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0; + Sqrt_DI[1]=2'b00; + Q_sqrt1={{(C_MANT_FP64-40){1'b0}},Qcnt_four_11[46:2]}; + Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1; + Sqrt_DI[2]=2'b00; + Q_sqrt2={{(C_MANT_FP64-41){1'b0}},Qcnt_four_11[46:1]}; + Sqrt_Q2=Sqrt_quotinent_S[2]?Q_sqrt_com_2:Q_sqrt2; + Sqrt_DI[3]=2'b00; + Q_sqrt3={{(C_MANT_FP64-42){1'b0}},Qcnt_four_11[46:0]}; + Sqrt_Q3=Sqrt_quotinent_S[1]?Q_sqrt_com_3:Q_sqrt3; + end + + 6'b001100: + begin + Sqrt_DI[0]=2'b00; + Q_sqrt0={{(C_MANT_FP64-43){1'b0}},Qcnt_four_12[50:3]}; + Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0; + Sqrt_DI[1]=2'b00; + Q_sqrt1={{(C_MANT_FP64-44){1'b0}},Qcnt_four_12[50:2]}; + Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1; + Sqrt_DI[2]=2'b00; + Q_sqrt2={{(C_MANT_FP64-45){1'b0}},Qcnt_four_12[50:1]}; + Sqrt_Q2=Sqrt_quotinent_S[2]?Q_sqrt_com_2:Q_sqrt2; + Sqrt_DI[3]=2'b00; + Q_sqrt3={{(C_MANT_FP64-46){1'b0}},Qcnt_four_12[50:0]}; + Sqrt_Q3=Sqrt_quotinent_S[1]?Q_sqrt_com_3:Q_sqrt3; + end + + 6'b001101: + begin + Sqrt_DI[0]=2'b00; + Q_sqrt0={{(C_MANT_FP64-47){1'b0}},Qcnt_four_13[54:3]}; + Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0; + Sqrt_DI[1]=2'b00; + Q_sqrt1={{(C_MANT_FP64-48){1'b0}},Qcnt_four_13[54:2]}; + Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1; + Sqrt_DI[2]=2'b00; + Q_sqrt2={{(C_MANT_FP64-49){1'b0}},Qcnt_four_13[54:1]}; + Sqrt_Q2=Sqrt_quotinent_S[2]?Q_sqrt_com_2:Q_sqrt2; + Sqrt_DI[3]=2'b00; + Q_sqrt3={{(C_MANT_FP64-50){1'b0}},Qcnt_four_13[54:0]}; + Sqrt_Q3=Sqrt_quotinent_S[1]?Q_sqrt_com_3:Q_sqrt3; + end + + default: + begin + Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64+1:C_MANT_FP64]; + Q_sqrt0={{(C_MANT_FP64+5){1'b0}},Qcnt_four_0[3]}; + Sqrt_Q0=Q_sqrt_com_0; + Sqrt_DI[1]=Mant_D_sqrt_Norm[C_MANT_FP64-1:C_MANT_FP64-2]; + Q_sqrt1={{(C_MANT_FP64+4){1'b0}},Qcnt_four_0[3:2]}; + Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1; + Sqrt_DI[2]=Mant_D_sqrt_Norm[C_MANT_FP64-3:C_MANT_FP64-4]; + Q_sqrt2={{(C_MANT_FP64+3){1'b0}},Qcnt_four_0[3:1]}; + Sqrt_Q2=Sqrt_quotinent_S[2]?Q_sqrt_com_2:Q_sqrt2; + Sqrt_DI[3]=Mant_D_sqrt_Norm[C_MANT_FP64-5:C_MANT_FP64-6]; + Q_sqrt3={{(C_MANT_FP64+2){1'b0}},Qcnt_four_0[3:0]}; + Sqrt_Q3=Sqrt_quotinent_S[1]?Q_sqrt_com_3:Q_sqrt3; + end + endcase + end + endcase + ///////////////////////////////////////////////////////////////////////////// + // Operands for square root when Iteration_unit_num_S = 2'b11, end // + ///////////////////////////////////////////////////////////////////////////// + end + + + + assign Sqrt_R0= ((Sqrt_start_dly_S)?'0:{Partial_remainder_DP[C_MANT_FP64+5:0]}); + assign Sqrt_R1= {Iteration_cell_sum_AMASK_D[0][C_MANT_FP64+5],Iteration_cell_sum_AMASK_D[0][C_MANT_FP64+2:0],Sqrt_DO[0]} ; + assign Sqrt_R2= {Iteration_cell_sum_AMASK_D[1][C_MANT_FP64+5],Iteration_cell_sum_AMASK_D[1][C_MANT_FP64+2:0],Sqrt_DO[1]}; + assign Sqrt_R3= {Iteration_cell_sum_AMASK_D[2][C_MANT_FP64+5],Iteration_cell_sum_AMASK_D[2][C_MANT_FP64+2:0],Sqrt_DO[2]}; + assign Sqrt_R4= {Iteration_cell_sum_AMASK_D[3][C_MANT_FP64+5],Iteration_cell_sum_AMASK_D[3][C_MANT_FP64+2:0],Sqrt_DO[3]}; + + logic [C_MANT_FP64+5:0] Denominator_se_format_DB; // + + assign Denominator_se_format_DB={Denominator_se_DB[C_MANT_FP64+1:C_MANT_FP64-C_MANT_FP16ALT],{FP16ALT_SO?FP16ALT_SO:Denominator_se_DB[C_MANT_FP64-C_MANT_FP16ALT-1]}, + Denominator_se_DB[C_MANT_FP64-C_MANT_FP16ALT-2:C_MANT_FP64-C_MANT_FP16],{FP16_SO?FP16_SO:Denominator_se_DB[C_MANT_FP64-C_MANT_FP16-1]}, + Denominator_se_DB[C_MANT_FP64-C_MANT_FP16-2:C_MANT_FP64-C_MANT_FP32],{FP32_SO?FP32_SO:Denominator_se_DB[C_MANT_FP64-C_MANT_FP32-1]}, + Denominator_se_DB[C_MANT_FP64-C_MANT_FP32-2:C_MANT_FP64-C_MANT_FP64],FP64_SO,3'b0} ; + // for iteration cell_U0 + logic [C_MANT_FP64+5:0] First_iteration_cell_div_a_D,First_iteration_cell_div_b_D; + logic Sel_b_for_first_S; + + + assign First_iteration_cell_div_a_D=(Div_start_dly_S)?{Numerator_se_D[C_MANT_FP64+1:C_MANT_FP64-C_MANT_FP16ALT],{FP16ALT_SO?FP16ALT_SO:Numerator_se_D[C_MANT_FP64-C_MANT_FP16ALT-1]}, + Numerator_se_D[C_MANT_FP64-C_MANT_FP16ALT-2:C_MANT_FP64-C_MANT_FP16],{FP16_SO?FP16_SO:Numerator_se_D[C_MANT_FP64-C_MANT_FP16-1]}, + Numerator_se_D[C_MANT_FP64-C_MANT_FP16-2:C_MANT_FP64-C_MANT_FP32],{FP32_SO?FP32_SO:Numerator_se_D[C_MANT_FP64-C_MANT_FP32-1]}, + Numerator_se_D[C_MANT_FP64-C_MANT_FP32-2:C_MANT_FP64-C_MANT_FP64],FP64_SO,3'b0} + :{Partial_remainder_DP[C_MANT_FP64+4:C_MANT_FP64-C_MANT_FP16ALT+3],{FP16ALT_SO?Quotient_DP[0]:Partial_remainder_DP[C_MANT_FP64-C_MANT_FP16ALT+2]}, + Partial_remainder_DP[C_MANT_FP64-C_MANT_FP16ALT+1:C_MANT_FP64-C_MANT_FP16+3],{FP16_SO?Quotient_DP[0]:Partial_remainder_DP[C_MANT_FP64-C_MANT_FP16+2]}, + Partial_remainder_DP[C_MANT_FP64-C_MANT_FP16+1:C_MANT_FP64-C_MANT_FP32+3],{FP32_SO?Quotient_DP[0]:Partial_remainder_DP[C_MANT_FP64-C_MANT_FP32+2]}, + Partial_remainder_DP[C_MANT_FP64-C_MANT_FP32+1:C_MANT_FP64-C_MANT_FP64+3],FP64_SO&&Quotient_DP[0],3'b0}; + assign Sel_b_for_first_S=(Div_start_dly_S)?1:Quotient_DP[0]; + assign First_iteration_cell_div_b_D=Sel_b_for_first_S?Denominator_se_format_DB:{Denominator_se_D,4'b0}; + assign Iteration_cell_a_BMASK_D[0]=Sqrt_enable_SO?Sqrt_R0:{First_iteration_cell_div_a_D}; + assign Iteration_cell_b_BMASK_D[0]=Sqrt_enable_SO?Sqrt_Q0:{First_iteration_cell_div_b_D}; + + + + // for iteration cell_U1 + logic [C_MANT_FP64+5:0] Sec_iteration_cell_div_a_D,Sec_iteration_cell_div_b_D; + logic Sel_b_for_sec_S; + generate + if(|Iteration_unit_num_S) + begin + assign Sel_b_for_sec_S=~Iteration_cell_sum_AMASK_D[0][C_MANT_FP64+5]; + assign Sec_iteration_cell_div_a_D={Iteration_cell_sum_AMASK_D[0][C_MANT_FP64+4:C_MANT_FP64-C_MANT_FP16ALT+3],{FP16ALT_SO?Sel_b_for_sec_S:Iteration_cell_sum_AMASK_D[0][C_MANT_FP64-C_MANT_FP16ALT+2]}, + Iteration_cell_sum_AMASK_D[0][C_MANT_FP64-C_MANT_FP16ALT+1:C_MANT_FP64-C_MANT_FP16+3],{FP16_SO?Sel_b_for_sec_S:Iteration_cell_sum_AMASK_D[0][C_MANT_FP64-C_MANT_FP16+2]}, + Iteration_cell_sum_AMASK_D[0][C_MANT_FP64-C_MANT_FP16+1:C_MANT_FP64-C_MANT_FP32+3],{FP32_SO?Sel_b_for_sec_S:Iteration_cell_sum_AMASK_D[0][C_MANT_FP64-C_MANT_FP32+2]}, + Iteration_cell_sum_AMASK_D[0][C_MANT_FP64-C_MANT_FP32+1:C_MANT_FP64-C_MANT_FP64+3],FP64_SO&&Sel_b_for_sec_S,3'b0}; + assign Sec_iteration_cell_div_b_D=Sel_b_for_sec_S?Denominator_se_format_DB:{Denominator_se_D,4'b0}; + assign Iteration_cell_a_BMASK_D[1]=Sqrt_enable_SO?Sqrt_R1:{Sec_iteration_cell_div_a_D}; + assign Iteration_cell_b_BMASK_D[1]=Sqrt_enable_SO?Sqrt_Q1:{Sec_iteration_cell_div_b_D}; + end + endgenerate + + // for iteration cell_U2 + logic [C_MANT_FP64+5:0] Thi_iteration_cell_div_a_D,Thi_iteration_cell_div_b_D; + logic Sel_b_for_thi_S; + generate + if((Iteration_unit_num_S==2'b10) | (Iteration_unit_num_S==2'b11)) + begin + assign Sel_b_for_thi_S=~Iteration_cell_sum_AMASK_D[1][C_MANT_FP64+5]; + assign Thi_iteration_cell_div_a_D={Iteration_cell_sum_AMASK_D[1][C_MANT_FP64+4:C_MANT_FP64-C_MANT_FP16ALT+3],{FP16ALT_SO?Sel_b_for_thi_S:Iteration_cell_sum_AMASK_D[1][C_MANT_FP64-C_MANT_FP16ALT+2]}, + Iteration_cell_sum_AMASK_D[1][C_MANT_FP64-C_MANT_FP16ALT+1:C_MANT_FP64-C_MANT_FP16+3],{FP16_SO?Sel_b_for_thi_S:Iteration_cell_sum_AMASK_D[1][C_MANT_FP64-C_MANT_FP16+2]}, + Iteration_cell_sum_AMASK_D[1][C_MANT_FP64-C_MANT_FP16+1:C_MANT_FP64-C_MANT_FP32+3],{FP32_SO?Sel_b_for_thi_S:Iteration_cell_sum_AMASK_D[1][C_MANT_FP64-C_MANT_FP32+2]}, + Iteration_cell_sum_AMASK_D[1][C_MANT_FP64-C_MANT_FP32+1:C_MANT_FP64-C_MANT_FP64+3],FP64_SO&&Sel_b_for_thi_S,3'b0}; + assign Thi_iteration_cell_div_b_D=Sel_b_for_thi_S?Denominator_se_format_DB:{Denominator_se_D,4'b0}; + assign Iteration_cell_a_BMASK_D[2]=Sqrt_enable_SO?Sqrt_R2:{Thi_iteration_cell_div_a_D}; + assign Iteration_cell_b_BMASK_D[2]=Sqrt_enable_SO?Sqrt_Q2:{Thi_iteration_cell_div_b_D}; + end + endgenerate + + // for iteration cell_U3 + logic [C_MANT_FP64+5:0] Fou_iteration_cell_div_a_D,Fou_iteration_cell_div_b_D; + logic Sel_b_for_fou_S; + + generate + if(Iteration_unit_num_S==2'b11) + begin + assign Sel_b_for_fou_S=~Iteration_cell_sum_AMASK_D[2][C_MANT_FP64+5]; + assign Fou_iteration_cell_div_a_D={Iteration_cell_sum_AMASK_D[2][C_MANT_FP64+4:C_MANT_FP64-C_MANT_FP16ALT+3],{FP16ALT_SO?Sel_b_for_fou_S:Iteration_cell_sum_AMASK_D[2][C_MANT_FP64-C_MANT_FP16ALT+2]}, + Iteration_cell_sum_AMASK_D[2][C_MANT_FP64-C_MANT_FP16ALT+1:C_MANT_FP64-C_MANT_FP16+3],{FP16_SO?Sel_b_for_fou_S:Iteration_cell_sum_AMASK_D[2][C_MANT_FP64-C_MANT_FP16+2]}, + Iteration_cell_sum_AMASK_D[2][C_MANT_FP64-C_MANT_FP16+1:C_MANT_FP64-C_MANT_FP32+3],{FP32_SO?Sel_b_for_fou_S:Iteration_cell_sum_AMASK_D[2][C_MANT_FP64-C_MANT_FP32+2]}, + Iteration_cell_sum_AMASK_D[2][C_MANT_FP64-C_MANT_FP32+1:C_MANT_FP64-C_MANT_FP64+3],FP64_SO&&Sel_b_for_fou_S,3'b0}; + assign Fou_iteration_cell_div_b_D=Sel_b_for_fou_S?Denominator_se_format_DB:{Denominator_se_D,4'b0}; + assign Iteration_cell_a_BMASK_D[3]=Sqrt_enable_SO?Sqrt_R3:{Fou_iteration_cell_div_a_D}; + assign Iteration_cell_b_BMASK_D[3]=Sqrt_enable_SO?Sqrt_Q3:{Fou_iteration_cell_div_b_D}; + end + endgenerate + + ///////////////////////////////////////////////////////////////////////////// + // Masking Contrl // + ///////////////////////////////////////////////////////////////////////////// + + + logic [C_MANT_FP64+1+4:0] Mask_bits_ctl_S; //For extension + + assign Mask_bits_ctl_S =58'h3ff_ffff_ffff_ffff; //It is not needed. The corresponding process is handled the above codes + + ///////////////////////////////////////////////////////////////////////////// + // Iteration Instances with masking control // + ///////////////////////////////////////////////////////////////////////////// + + + logic Div_enable_SI [3:0]; + logic Div_start_dly_SI [3:0]; + logic Sqrt_enable_SI [3:0]; + generate + genvar i,j; + for (i=0; i <= Iteration_unit_num_S ; i++) + begin + for (j = 0; j <= C_MANT_FP64+5; j++) begin + assign Iteration_cell_a_D[i][j] = Mask_bits_ctl_S[j] && Iteration_cell_a_BMASK_D[i][j]; + assign Iteration_cell_b_D[i][j] = Mask_bits_ctl_S[j] && Iteration_cell_b_BMASK_D[i][j]; + assign Iteration_cell_sum_AMASK_D[i][j] = Mask_bits_ctl_S[j] && Iteration_cell_sum_D[i][j]; + end + + assign Div_enable_SI[i] = Div_enable_SO; + assign Div_start_dly_SI[i] = Div_start_dly_S; + assign Sqrt_enable_SI[i] = Sqrt_enable_SO; + iteration_div_sqrt_mvp #(C_MANT_FP64+6) iteration_div_sqrt + ( + .A_DI (Iteration_cell_a_D[i] ), + .B_DI (Iteration_cell_b_D[i] ), + .Div_enable_SI (Div_enable_SI[i] ), + .Div_start_dly_SI (Div_start_dly_SI[i] ), + .Sqrt_enable_SI (Sqrt_enable_SI[i] ), + .D_DI (Sqrt_DI[i] ), + .D_DO (Sqrt_DO[i] ), + .Sum_DO (Iteration_cell_sum_D[i] ), + .Carry_out_DO (Iteration_cell_carry_D[i] ) + ); + + end + + endgenerate + + + + always_comb + begin + case (Iteration_unit_num_S) + 2'b00: + begin + if(Fsm_enable_S) + Partial_remainder_DN = Sqrt_enable_SO?Sqrt_R1:Iteration_cell_sum_AMASK_D[0]; + else + Partial_remainder_DN = Partial_remainder_DP; + end + 2'b01: + begin + if(Fsm_enable_S) + Partial_remainder_DN = Sqrt_enable_SO?Sqrt_R2:Iteration_cell_sum_AMASK_D[1]; + else + Partial_remainder_DN = Partial_remainder_DP; + end + 2'b10: + begin + if(Fsm_enable_S) + Partial_remainder_DN = Sqrt_enable_SO?Sqrt_R3:Iteration_cell_sum_AMASK_D[2]; + else + Partial_remainder_DN = Partial_remainder_DP; + end + 2'b11: + begin + if(Fsm_enable_S) + Partial_remainder_DN = Sqrt_enable_SO?Sqrt_R4:Iteration_cell_sum_AMASK_D[3]; + else + Partial_remainder_DN = Partial_remainder_DP; + end + endcase + end + + + + always_ff @(posedge Clk_CI, negedge Rst_RBI) // partial_remainder + begin + if(~Rst_RBI) + begin + Partial_remainder_DP <= '0; + end + else + begin + Partial_remainder_DP <= Partial_remainder_DN; + end + end + + logic [C_MANT_FP64+4:0] Quotient_DN; + + always_comb // Can choosen the different carry-outs based on different operations + begin + case (Iteration_unit_num_S) + 2'b00: + begin + if(Fsm_enable_S) + Quotient_DN= Sqrt_enable_SO ? {Quotient_DP[C_MANT_FP64+3:0],Sqrt_quotinent_S[3]} :{Quotient_DP[C_MANT_FP64+3:0],Iteration_cell_carry_D[0]}; + else + Quotient_DN= Quotient_DP; + end + 2'b01: + begin + if(Fsm_enable_S) + Quotient_DN= Sqrt_enable_SO ? {Quotient_DP[C_MANT_FP64+2:0],Sqrt_quotinent_S[3:2]} :{Quotient_DP[C_MANT_FP64+2:0],Iteration_cell_carry_D[0],Iteration_cell_carry_D[1]}; + else + Quotient_DN= Quotient_DP; + end + 2'b10: + begin + if(Fsm_enable_S) + Quotient_DN= Sqrt_enable_SO ? {Quotient_DP[C_MANT_FP64+1:0],Sqrt_quotinent_S[3:1]} : {Quotient_DP[C_MANT_FP64+1:0],Iteration_cell_carry_D[0],Iteration_cell_carry_D[1],Iteration_cell_carry_D[2]}; + else + Quotient_DN= Quotient_DP; + end + 2'b11: + begin + if(Fsm_enable_S) + Quotient_DN= Sqrt_enable_SO ? {Quotient_DP[C_MANT_FP64:0],Sqrt_quotinent_S } : {Quotient_DP[C_MANT_FP64:0],Iteration_cell_carry_D[0],Iteration_cell_carry_D[1],Iteration_cell_carry_D[2],Iteration_cell_carry_D[3]}; + else + Quotient_DN= Quotient_DP; + end + endcase + end + + always_ff @(posedge Clk_CI, negedge Rst_RBI) // Quotient + begin + if(~Rst_RBI) + begin + Quotient_DP <= '0; + end + else + Quotient_DP <= Quotient_DN; + end + + + ///////////////////////////////////////////////////////////////////////////// + // Precision Control for outputs // + ///////////////////////////////////////////////////////////////////////////// + + +//////////////////////one iteration unit, start/////////////////////////////////////// + generate + if(Iteration_unit_num_S==2'b00) + begin + always_comb + begin + case (Format_sel_S) + 2'b00: + begin + case (Precision_ctl_S) + 6'h00: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP32+4:0],{(C_MANT_FP64-C_MANT_FP32){1'b0}}}; //+4 + end + 6'h17: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP32:0],{(C_MANT_FP64-C_MANT_FP32+4){1'b0}}}; //Precision_ctl_S+1 + end + 6'h16: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP32-1:0],{(C_MANT_FP64-C_MANT_FP32+4+1){1'b0}}}; //Precision_ctl_S+1 + end + 6'h15: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP32-2:0],{(C_MANT_FP64-C_MANT_FP32+4+2){1'b0}}}; //Precision_ctl_S+1 + end + 6'h14: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP32-3:0],{(C_MANT_FP64-C_MANT_FP32+4+3){1'b0}}}; //Precision_ctl_S+1 + end + 6'h13: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP32-4:0],{(C_MANT_FP64-C_MANT_FP32+4+4){1'b0}}}; //Precision_ctl_S+1 + end + 6'h12: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP32-5:0],{(C_MANT_FP64-C_MANT_FP32+4+5){1'b0}}}; //Precision_ctl_S+1 + end + 6'h11: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP32-6:0],{(C_MANT_FP64-C_MANT_FP32+4+6){1'b0}}}; //Precision_ctl_S+1 + end + 6'h10: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP32-7:0],{(C_MANT_FP64-C_MANT_FP32+4+7){1'b0}}}; //Precision_ctl_S+1 + end + 6'h0f: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP32-8:0],{(C_MANT_FP64-C_MANT_FP32+4+8){1'b0}}}; //Precision_ctl_S+1 + end + 6'h0e: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP32-9:0],{(C_MANT_FP64-C_MANT_FP32+4+9){1'b0}}}; //Precision_ctl_S+1 + end + 6'h0d: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP32-10:0],{(C_MANT_FP64-C_MANT_FP32+4+10){1'b0}}}; //Precision_ctl_S+1 + end + 6'h0c: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP32-11:0],{(C_MANT_FP64-C_MANT_FP32+4+11){1'b0}}}; //Precision_ctl_S+1 + end + 6'h0b: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP32-12:0],{(C_MANT_FP64-C_MANT_FP32+4+12){1'b0}}}; //Precision_ctl_S+1 + end + 6'h0a: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP32-13:0],{(C_MANT_FP64-C_MANT_FP32+4+13){1'b0}}}; //Precision_ctl_S+1 + end + 6'h09: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP32-14:0],{(C_MANT_FP64-C_MANT_FP32+4+14){1'b0}}}; //Precision_ctl_S+1 + end + 6'h08: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP32-15:0],{(C_MANT_FP64-C_MANT_FP32+4+15){1'b0}}}; //Precision_ctl_S+1 + end + 6'h07: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP32-16:0],{(C_MANT_FP64-C_MANT_FP32+4+16){1'b0}}}; //Precision_ctl_S+1 + end + default : + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP32+4:0],{(C_MANT_FP64-C_MANT_FP32){1'b0}}}; //+4 + end + endcase + end + + 2'b01: + begin + case (Precision_ctl_S) + 6'h00: + begin + Mant_result_prenorm_DO = Quotient_DP[C_MANT_FP64+4:0]; //+4 + end + 6'h34: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64:0],{(4){1'b0}}}; //Precision_ctl_S+1 + end + 6'h33: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-1:0],{(4+1){1'b0}}}; //Precision_ctl_S+1 + end + 6'h32: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-2:0],{(4+2){1'b0}}}; //Precision_ctl_S+1 + end + 6'h31: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-3:0],{(4+3){1'b0}}}; //Precision_ctl_S+1 + end + 6'h30: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-4:0],{(4+4){1'b0}}}; //Precision_ctl_S+1 + end + 6'h2f: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-5:0],{(4+5){1'b0}}}; //Precision_ctl_S+1 + end + 6'h2e: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-6:0],{(4+6){1'b0}}}; //Precision_ctl_S+1 + end + 6'h2d: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-7:0],{(4+7){1'b0}}}; //Precision_ctl_S+1 + end + 6'h2c: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-8:0],{(4+8){1'b0}}}; //Precision_ctl_S+1 + end + 6'h2b: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-9:0],{(4+9){1'b0}}}; //Precision_ctl_S+1 + end + 6'h2a: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-10:0],{(4+10){1'b0}}}; //Precision_ctl_S+1 + end + 6'h29: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-11:0],{(4+11){1'b0}}}; //Precision_ctl_S+1 + end + 6'h28: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-12:0],{(4+12){1'b0}}}; //Precision_ctl_S+1 + end + 6'h27: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-13:0],{(4+13){1'b0}}}; //Precision_ctl_S+1 + end + 6'h26: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-14:0],{(4+14){1'b0}}}; //Precision_ctl_S+1 + end + 6'h25: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-15:0],{(4+15){1'b0}}}; //Precision_ctl_S+1 + end + 6'h24: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-16:0],{(4+16){1'b0}}}; //Precision_ctl_S+1 + end + 6'h23: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-17:0],{(4+17){1'b0}}}; //Precision_ctl_S+1 + end + 6'h22: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-18:0],{(4+18){1'b0}}}; //Precision_ctl_S+1 + end + 6'h21: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-19:0],{(4+19){1'b0}}}; //Precision_ctl_S+1 + end + 6'h20: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-20:0],{(4+20){1'b0}}}; //Precision_ctl_S+1 + end + 6'h1f: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-21:0],{(4+21){1'b0}}}; //Precision_ctl_S+1 + end + 6'h1e: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-22:0],{(4+22){1'b0}}}; //Precision_ctl_S+1 + end + 6'h1d: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-23:0],{(4+23){1'b0}}}; //Precision_ctl_S+1 + end + 6'h1c: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-24:0],{(4+24){1'b0}}}; //Precision_ctl_S+1 + end + 6'h1b: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-25:0],{(4+25){1'b0}}}; //Precision_ctl_S+1 + end + 6'h1a: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-26:0],{(4+26){1'b0}}}; //Precision_ctl_S+1 + end + 6'h19: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-27:0],{(4+27){1'b0}}}; //Precision_ctl_S+1 + end + 6'h18: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-28:0],{(4+28){1'b0}}}; //Precision_ctl_S+1 + end + 6'h17: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-29:0],{(4+29){1'b0}}}; //Precision_ctl_S+1 + end + 6'h16: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-30:0],{(4+30){1'b0}}}; //Precision_ctl_S+1 + end + 6'h15: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-31:0],{(4+31){1'b0}}}; //Precision_ctl_S+1 + end + 6'h14: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-32:0],{(4+32){1'b0}}}; //Precision_ctl_S+1 + end + 6'h13: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-33:0],{(4+33){1'b0}}}; //Precision_ctl_S+1 + end + 6'h12: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-34:0],{(4+34){1'b0}}}; //Precision_ctl_S+1 + end + 6'h11: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-35:0],{(4+35){1'b0}}}; //Precision_ctl_S+1 + end + 6'h10: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-36:0],{(4+36){1'b0}}}; //Precision_ctl_S+1 + end + 6'h0f: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-37:0],{(4+37){1'b0}}}; //Precision_ctl_S+1 + end + 6'h0e: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-38:0],{(4+38){1'b0}}}; //Precision_ctl_S+1 + end + 6'h0d: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-39:0],{(4+39){1'b0}}}; //Precision_ctl_S+1 + end + 6'h0c: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-40:0],{(4+40){1'b0}}}; //Precision_ctl_S+1 + end + 6'h0b: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-41:0],{(4+41){1'b0}}}; //Precision_ctl_S+1 + end + 6'h0a: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-42:0],{(4+42){1'b0}}}; //Precision_ctl_S+1 + end + 6'h09: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-43:0],{(4+43){1'b0}}}; //Precision_ctl_S+1 + end + 6'h08: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-44:0],{(4+44){1'b0}}}; //Precision_ctl_S+1 + end + 6'h07: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-45:0],{(4+45){1'b0}}}; //Precision_ctl_S+1 + end + default: + begin + Mant_result_prenorm_DO = Quotient_DP[C_MANT_FP64+4:0]; //+4 + end + endcase + end + + 2'b10: + begin + case (Precision_ctl_S) + 6'b00: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP16+4:0],{(C_MANT_FP64-C_MANT_FP16){1'b0}}}; //+4 + end + 6'h0a: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP16:0],{(C_MANT_FP64-C_MANT_FP16+4){1'b0}}}; //Precision_ctl_S+1 + end + 6'h09: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP16-1:0],{(C_MANT_FP64-C_MANT_FP16+4+1){1'b0}}}; //Precision_ctl_S+1 + end + 6'h08: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP16-2:0],{(C_MANT_FP64-C_MANT_FP16+4+2){1'b0}}}; //Precision_ctl_S+1 + end + 6'h07: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP16-3:0],{(C_MANT_FP64-C_MANT_FP16+4+3){1'b0}}}; //Precision_ctl_S+1 + end + default : + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP16+4:0],{(C_MANT_FP64-C_MANT_FP16){1'b0}}}; //+4 + end + endcase + end + + 2'b11: + begin + + case (Precision_ctl_S) + 6'b00: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP16ALT+4:0],{(C_MANT_FP64-C_MANT_FP16ALT){1'b0}}}; //+4 + end + 6'h07: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP16ALT:0],{(C_MANT_FP64-C_MANT_FP16ALT+4){1'b0}}}; //Precision_ctl_S+1 + end + default : + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP16ALT+4:0],{(C_MANT_FP64-C_MANT_FP16ALT){1'b0}}}; //+4 + end + endcase + end + endcase + end + end + endgenerate +//////////////////////one iteration unit, end////////////////////////////////////////// + +//////////////////////two iteration units, start/////////////////////////////////////// + generate + if(Iteration_unit_num_S==2'b01) + begin + always_comb + begin + case (Format_sel_S) + 2'b00: + begin + case (Precision_ctl_S) + 6'h00: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP32+4:0],{(C_MANT_FP64-C_MANT_FP32){1'b0}}}; //+4 + end + 6'h17,6'h16: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP32:0],{(C_MANT_FP64-C_MANT_FP32+4){1'b0}}}; //Precision_ctl_S+1 + end + 6'h15,6'h14: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP32-2:0],{(C_MANT_FP64-C_MANT_FP32+4+2){1'b0}}}; //Precision_ctl_S+1 + end + 6'h13,6'h12: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP32-4:0],{(C_MANT_FP64-C_MANT_FP32+4+4){1'b0}}}; //Precision_ctl_S+1 + end + 6'h11,6'h10: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP32-6:0],{(C_MANT_FP64-C_MANT_FP32+4+6){1'b0}}}; //Precision_ctl_S+1 + end + 6'h0f,6'h0e: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP32-8:0],{(C_MANT_FP64-C_MANT_FP32+4+8){1'b0}}}; //Precision_ctl_S+1 + end + 6'h0d,6'h0c: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP32-10:0],{(C_MANT_FP64-C_MANT_FP32+4+10){1'b0}}}; //Precision_ctl_S+1 + end + 6'h0b,6'h0a: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP32-12:0],{(C_MANT_FP64-C_MANT_FP32+4+12){1'b0}}}; //Precision_ctl_S+1 + end + 6'h09,6'h08: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP32-14:0],{(C_MANT_FP64-C_MANT_FP32+4+14){1'b0}}}; //Precision_ctl_S+1 + end + 6'h07,6'h06: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP32-16:0],{(C_MANT_FP64-C_MANT_FP32+4+16){1'b0}}}; //Precision_ctl_S+1 + end + default: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP32+4:0],{(C_MANT_FP64-C_MANT_FP32){1'b0}}}; //+4 + end + endcase + end + 2'b01: + begin + case (Precision_ctl_S) + 6'h00: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64+3:0],1'b0}; //+3 + end + 6'h34: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64+1:1],{(4){1'b0}} }; //Precision_ctl_S+1 + end + 6'h33,6'h32: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-1:0],{(4+1){1'b0}} }; //Precision_ctl_S+1 + end + 6'h31,6'h30: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-3:0],{(4+3){1'b0}} }; //Precision_ctl_S+1 + end + 6'h2f,6'h2e: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-5:0],{(4+5){1'b0}} }; //Precision_ctl_S+1 + end + 6'h2d,6'h2c: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-7:0],{(4+7){1'b0}} }; //Precision_ctl_S+1 + end + 6'h2b,6'h2a: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-9:0],{(4+9){1'b0}} }; //Precision_ctl_S+1 + end + 6'h29,6'h28: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-11:0],{(4+11){1'b0}} }; //Precision_ctl_S+1 + end + 6'h27,6'h26: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-13:0],{(4+13){1'b0}} }; //Precision_ctl_S+1 + end + 6'h25,6'h24: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-15:0],{(4+15){1'b0}} }; //Precision_ctl_S+1 + end + 6'h23,6'h22: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-17:0],{(4+17){1'b0}} }; //Precision_ctl_S+1 + end + 6'h21,6'h20: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-19:0],{(4+19){1'b0}} }; //Precision_ctl_S+1 + end + 6'h1f,6'h1e: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-21:0],{(4+21){1'b0}} }; //Precision_ctl_S+1 + end + 6'h1d,6'h1c: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-23:0],{(4+23){1'b0}} }; //Precision_ctl_S+1 + end + 6'h1b,6'h1a: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-25:0],{(4+25){1'b0}} }; //Precision_ctl_S+1 + end + 6'h19,6'h18: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-27:0],{(4+27){1'b0}} }; //Precision_ctl_S+1 + end + 6'h17,6'h16: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-29:0],{(4+29){1'b0}} }; //Precision_ctl_S+1 + end + 6'h15,6'h14: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-31:0],{(4+31){1'b0}} }; //Precision_ctl_S+1 + end + 6'h13,6'h12: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-33:0],{(4+33){1'b0}} }; //Precision_ctl_S+1 + end + 6'h11,6'h10: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-35:0],{(4+35){1'b0}} }; //Precision_ctl_S+1 + end + 6'h0f,6'h0e: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-37:0],{(4+37){1'b0}} }; //Precision_ctl_S+1 + end + 6'h0d,6'h0c: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-39:0],{(4+39){1'b0}} }; //Precision_ctl_S+1 + end + 6'h0b,6'h0a: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-41:0],{(4+41){1'b0}} }; //Precision_ctl_S+1 + end + 6'h09,6'h08: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-43:0],{(4+43){1'b0}} }; //Precision_ctl_S+1 + end + 6'h07: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-45:0],{(4+45){1'b0}} }; //Precision_ctl_S+1 + end + default: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64+3:0],1'b0}; //+3 + end + endcase + end + + 2'b10: + begin + case (Precision_ctl_S) + 6'b00: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP16+3:0],{(C_MANT_FP64-C_MANT_FP16+1){1'b0}} }; //+3 + end + 6'h0a: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP16+1:1],{(C_MANT_FP64-C_MANT_FP16+4){1'b0}} }; //Precision_ctl_S+1 + end + 6'h09,6'h08: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP16-1:0],{(C_MANT_FP64-C_MANT_FP16+4+1){1'b0}} }; //Precision_ctl_S+1 + end + 6'h07: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP16-3:0],{(C_MANT_FP64-C_MANT_FP16+4+3){1'b0}} }; //Precision_ctl_S+1 + end + default : + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP16+4:0],{(C_MANT_FP64-C_MANT_FP16){1'b0}} }; //+4 + end + endcase + end + + 2'b11: + begin + + case (Precision_ctl_S) + 6'b00: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP16ALT+4:0],{(C_MANT_FP64-C_MANT_FP16ALT){1'b0}} }; //+4 + end + 6'h07: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP16ALT:0],{(C_MANT_FP64-C_MANT_FP16ALT+4){1'b0}} }; //Precision_ctl_S+1 + end + default : + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP16ALT+4:0],{(C_MANT_FP64-C_MANT_FP16ALT){1'b0}} }; //+4 + end + endcase + end + endcase + end + end + endgenerate +//////////////////////two iteration units, end////////////////////////////////////////// + +//////////////////////three iteration units, start/////////////////////////////////////// + generate + if(Iteration_unit_num_S==2'b10) + begin + always_comb + begin + case (Format_sel_S) + 2'b00: + begin + case (Precision_ctl_S) + 6'h00: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP32+3:0],{(C_MANT_FP64-C_MANT_FP32+1){1'b0}}}; //+3 + end + 6'h17,6'h16,6'h15: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP32:0],{(C_MANT_FP64-C_MANT_FP32+4){1'b0}}}; //Precision_ctl_S+1 + end + 6'h14,6'h13,6'h12: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP32-3:0],{(C_MANT_FP64-C_MANT_FP32+4+3){1'b0}}}; //Precision_ctl_S+1 + end + 6'h11,6'h10,6'h0f: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP32-6:0],{(C_MANT_FP64-C_MANT_FP32+4+6){1'b0}}}; //Precision_ctl_S+1 + end + 6'h0e,6'h0d,6'h0c: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP32-9:0],{(C_MANT_FP64-C_MANT_FP32+4+9){1'b0}}}; //Precision_ctl_S+1 + end + 6'h0b,6'h0a,6'h09: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP32-12:0],{(C_MANT_FP64-C_MANT_FP32+4+12){1'b0}}}; //Precision_ctl_S+1 + end + 6'h08,6'h07,6'h06: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP32-15:0],{(C_MANT_FP64-C_MANT_FP32+4+15){1'b0}}}; //Precision_ctl_S+1 + end + default: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP32+3:0],{(C_MANT_FP64-C_MANT_FP32+1){1'b0}}}; //+3 + end + endcase + end + + 2'b01: + begin + case (Precision_ctl_S) + 6'h00: + begin + Mant_result_prenorm_DO = Quotient_DP[C_MANT_FP64+4:0]; //+4 + end + 6'h34,6'h33: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64+1:1],{(4){1'b0}} }; //Precision_ctl_S+1 + end + 6'h32,6'h31,6'h30: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-2:0],{(4+2){1'b0}} }; //Precision_ctl_S+1 + end + 6'h2f,6'h2e,6'h2d: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-5:0],{(4+5){1'b0}} }; //Precision_ctl_S+1 + end + 6'h2c,6'h2b,6'h2a: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-8:0],{(4+8){1'b0}} }; //Precision_ctl_S+1 + end + 6'h29,6'h28,6'h27: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-11:0],{(4+11){1'b0}} }; //Precision_ctl_S+1 + end + 6'h26,6'h25,6'h24: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-14:0],{(4+14){1'b0}} }; //Precision_ctl_S+1 + end + 6'h23,6'h22,6'h21: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-17:0],{(4+17){1'b0}} }; //Precision_ctl_S+1 + end + 6'h20,6'h1f,6'h1e: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-20:0],{(4+20){1'b0}} }; //Precision_ctl_S+1 + end + 6'h1d,6'h1c,6'h1b: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-23:0],{(4+23){1'b0}} }; //Precision_ctl_S+1 + end + 6'h1a,6'h19,6'h18: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-26:0],{(4+26){1'b0}} }; //Precision_ctl_S+1 + end + 6'h17,6'h16,6'h15: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-29:0],{(4+29){1'b0}} }; //Precision_ctl_S+1 + end + 6'h14,6'h13,6'h12: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-32:0],{(4+32){1'b0}} }; //Precision_ctl_S+1 + end + 6'h11,6'h10,6'h0f: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-35:0],{(4+35){1'b0}} }; //Precision_ctl_S+1 + end + 6'h0e,6'h0d,6'h0c: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-38:0],{(4+38){1'b0}} }; //Precision_ctl_S+1 + end + 6'h0b,6'h0a,6'h09: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-41:0],{(4+41){1'b0}} }; //Precision_ctl_S+1 + end + 6'h08,6'h07,6'h06: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-44:0],{(4+44){1'b0}} }; //Precision_ctl_S+1 + end + default: + begin + Mant_result_prenorm_DO = Quotient_DP[C_MANT_FP64+4:0]; //+4 + end + endcase + end + + 2'b10: + begin + case (Precision_ctl_S) + 6'b00: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP16+4:0],{(C_MANT_FP64-C_MANT_FP16){1'b0}} }; //+4 + end + 6'h0a,6'h09: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP16+1:1],{(C_MANT_FP64-C_MANT_FP16+4){1'b0}} }; //Precision_ctl_S+1 + end + 6'h08,6'h07,6'h06: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP16-2:0],{(C_MANT_FP64-C_MANT_FP16+4+2){1'b0}} }; //Precision_ctl_S+1 + end + default : + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP16+4:0],{(C_MANT_FP64-C_MANT_FP16){1'b0}} }; //+4 + end + endcase + end + + 2'b11: + begin + + case (Precision_ctl_S) + 6'b00: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP16ALT+4:0],{(C_MANT_FP64-C_MANT_FP16ALT){1'b0}} }; //+4 + end + 6'h07,6'h06: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP16ALT+1:1],{(C_MANT_FP64-C_MANT_FP16ALT+4){1'b0}} }; //Precision_ctl_S+1 + end + default : + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP16ALT+4:0],{(C_MANT_FP64-C_MANT_FP16ALT){1'b0}} }; //+4 + end + endcase + end + endcase + end + end + endgenerate +//////////////////////three iteration units, end////////////////////////////////////////// + +//////////////////////four iteration units, start/////////////////////////////////////// + generate + if(Iteration_unit_num_S==2'b11) + begin + always_comb + begin + case (Format_sel_S) + 2'b00: + begin + case (Precision_ctl_S) + 6'h00: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP32+4:0],{(C_MANT_FP64-C_MANT_FP32){1'b0}}}; //+4 + end + 6'h17,6'h16,6'h15,6'h14: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP32:0],{(C_MANT_FP64-C_MANT_FP32+4){1'b0}}}; //Precision_ctl_S+1 + end + 6'h13,6'h12,6'h11,6'h10: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP32-4:0],{(C_MANT_FP64-C_MANT_FP32+4+4){1'b0}}}; //Precision_ctl_S+1 + end + 6'h0f,6'h0e,6'h0d,6'h0c: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP32-8:0],{(C_MANT_FP64-C_MANT_FP32+4+8){1'b0}}}; //Precision_ctl_S+1 + end + 6'h0b,6'h0a,6'h09,6'h08: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP32-12:0],{(C_MANT_FP64-C_MANT_FP32+4+12){1'b0}}}; //Precision_ctl_S+1 + end + 6'h07,6'h06: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP32-16:0],{(C_MANT_FP64-C_MANT_FP32+4+16){1'b0}}}; //Precision_ctl_S+1 + end + default: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP32+4:0],{(C_MANT_FP64-C_MANT_FP32){1'b0}}}; //+4 + end + endcase + end + + 2'b01: + begin + case (Precision_ctl_S) + 6'h00: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64+3:0],{(1){1'b0}}}; //+3 + end + 6'h34: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64+3:0],{(1){1'b0}} }; //Precision_ctl_S+1 + end + 6'h33,6'h32,6'h31,6'h30: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-1:0],{(5){1'b0}} }; //Precision_ctl_S+1 + end + 6'h2f,6'h2e,6'h2d,6'h2c: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-5:0],{(9){1'b0}} }; //Precision_ctl_S+1 + end + 6'h2b,6'h2a,6'h29,6'h28: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-9:0],{(13){1'b0}} }; //Precision_ctl_S+1 + end + 6'h27,6'h26,6'h25,6'h24: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-13:0],{(17){1'b0}} }; //Precision_ctl_S+1 + end + 6'h23,6'h22,6'h21,6'h20: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-17:0],{(21){1'b0}} }; //Precision_ctl_S+1 + end + 6'h1f,6'h1e,6'h1d,6'h1c: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-21:0],{(25){1'b0}} }; //Precision_ctl_S+1 + end + 6'h1b,6'h1a,6'h19,6'h18: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-25:0],{(29){1'b0}} }; //Precision_ctl_S+1 + end + 6'h17,6'h16,6'h15,6'h14: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-29:0],{(33){1'b0}} }; //Precision_ctl_S+1 + end + 6'h13,6'h12,6'h11,6'h10: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-33:0],{(37){1'b0}} }; //Precision_ctl_S+1 + end + 6'h0f,6'h0e,6'h0d,6'h0c: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-37:0],{(41){1'b0}} }; //Precision_ctl_S+1 + end + 6'h0b,6'h0a,6'h09,6'h08: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-41:0],{(45){1'b0}} }; //Precision_ctl_S+1 + end + 6'h07,6'h06: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-45:0],{(49){1'b0}} }; //Precision_ctl_S+1 + end + default: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64+3:0],{(1){1'b0}}}; //+3 + end + endcase + end + + 2'b10: + begin + case (Precision_ctl_S) + 6'b00: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP16+5:0],{(C_MANT_FP64-C_MANT_FP16-1){1'b0}} }; //+5 + end + 6'h0a,6'h09,6'h08: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP16+1:1],{(C_MANT_FP64-C_MANT_FP16+4){1'b0}} }; //Precision_ctl_S+1 + end + 6'h07,6'h06: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP16+1-4:0],{(C_MANT_FP64-C_MANT_FP16+4+3){1'b0}} }; //Precision_ctl_S+1 + end + default : + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP16+5:0],{(C_MANT_FP64-C_MANT_FP16-1){1'b0}} }; //+5 + end + endcase + end + + 2'b11: + begin + + case (Precision_ctl_S) + 6'b00: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP16ALT+4:0],{(C_MANT_FP64-C_MANT_FP16ALT){1'b0}} }; //+4 + end + 6'h07,6'h06: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP16ALT:0],{(C_MANT_FP64-C_MANT_FP16ALT+4){1'b0}} }; //Precision_ctl_S+1 + end + default : + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP16ALT+4:0],{(C_MANT_FP64-C_MANT_FP16ALT){1'b0}} }; //+4 + end + endcase + end + endcase + end + end + endgenerate +//////////////////////four iteration units, end/////////////////////////////////////// + + + + + +// resultant exponent + logic [C_EXP_FP64+1:0] Exp_result_prenorm_DN,Exp_result_prenorm_DP; + + logic [C_EXP_FP64+1:0] Exp_add_a_D; + logic [C_EXP_FP64+1:0] Exp_add_b_D; + logic [C_EXP_FP64+1:0] Exp_add_c_D; + + integer C_BIAS_AONE, C_HALF_BIAS; + always_comb + begin // + case (Format_sel_S) + 2'b00: + begin + C_BIAS_AONE =C_BIAS_AONE_FP32; + C_HALF_BIAS =C_HALF_BIAS_FP32; + end + 2'b01: + begin + C_BIAS_AONE =C_BIAS_AONE_FP64; + C_HALF_BIAS =C_HALF_BIAS_FP64; + end + 2'b10: + begin + C_BIAS_AONE =C_BIAS_AONE_FP16; + C_HALF_BIAS =C_HALF_BIAS_FP16; + end + 2'b11: + begin + C_BIAS_AONE =C_BIAS_AONE_FP16ALT; + C_HALF_BIAS =C_HALF_BIAS_FP16ALT; + end + endcase + end + +//For division, exponent=(Exp_a_D-LZ1)-(Exp_b_D-LZ2)+BIAS +//For square root, exponent=(Exp_a_D-LZ1)/2+(Exp_a_D-LZ1)%2+C_HALF_BIAS +//For exponent, in preprorces module, (Exp_a_D-LZ1) and (Exp_b_D-LZ2) have been processed with the corresponding process for denormal numbers. + + assign Exp_add_a_D = {Sqrt_start_dly_S?{Exp_num_DI[C_EXP_FP64],Exp_num_DI[C_EXP_FP64],Exp_num_DI[C_EXP_FP64],Exp_num_DI[C_EXP_FP64:1]}:{Exp_num_DI[C_EXP_FP64],Exp_num_DI[C_EXP_FP64],Exp_num_DI}}; + assign Exp_add_b_D = {Sqrt_start_dly_S?{1'b0,{C_EXP_ZERO_FP64},Exp_num_DI[0]}:{~Exp_den_DI[C_EXP_FP64],~Exp_den_DI[C_EXP_FP64],~Exp_den_DI}}; + assign Exp_add_c_D = {Div_start_dly_S?{{C_BIAS_AONE}}:{{C_HALF_BIAS}}}; + assign Exp_result_prenorm_DN = (Start_dly_S)?{Exp_add_a_D + Exp_add_b_D + Exp_add_c_D}:Exp_result_prenorm_DP; + + + always_ff @(posedge Clk_CI, negedge Rst_RBI) + begin + if(~Rst_RBI) + begin + Exp_result_prenorm_DP <= '0; + end + else + begin + Exp_result_prenorm_DP<= Exp_result_prenorm_DN; + end + end + + assign Exp_result_prenorm_DO = Exp_result_prenorm_DP; + +endmodule diff --git a/vendor/pulp-platform/fpnew/src/fpu_div_sqrt_mvp/hdl/defs_div_sqrt_mvp.sv b/vendor/pulp-platform/fpnew/src/fpu_div_sqrt_mvp/hdl/defs_div_sqrt_mvp.sv new file mode 100644 index 0000000000..b3f41fec61 --- /dev/null +++ b/vendor/pulp-platform/fpnew/src/fpu_div_sqrt_mvp/hdl/defs_div_sqrt_mvp.sv @@ -0,0 +1,83 @@ +// Copyright 2018 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the “License”); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. +// +// This file contains all div_sqrt_top_mvp parameters +// Authors : Lei Li (lile@iis.ee.ethz.ch) + +package defs_div_sqrt_mvp; + + // op command + localparam C_RM = 3; + localparam C_RM_NEAREST = 3'h0; + localparam C_RM_TRUNC = 3'h1; + localparam C_RM_PLUSINF = 3'h2; + localparam C_RM_MINUSINF = 3'h3; + localparam C_PC = 6; // Precision Control + localparam C_FS = 2; // Format Selection + localparam C_IUNC = 2; // Iteration Unit Number Control + localparam Iteration_unit_num_S = 2'b10; + + // FP64 + localparam C_OP_FP64 = 64; + localparam C_MANT_FP64 = 52; + localparam C_EXP_FP64 = 11; + localparam C_BIAS_FP64 = 1023; + localparam C_BIAS_AONE_FP64 = 11'h400; + localparam C_HALF_BIAS_FP64 = 511; + localparam C_EXP_ZERO_FP64 = 11'h000; + localparam C_EXP_ONE_FP64 = 13'h001; // Bit width is in agreement with in norm + localparam C_EXP_INF_FP64 = 11'h7FF; + localparam C_MANT_ZERO_FP64 = 52'h0; + localparam C_MANT_NAN_FP64 = 52'h8_0000_0000_0000; + localparam C_PZERO_FP64 = 64'h0000_0000_0000_0000; + localparam C_MZERO_FP64 = 64'h8000_0000_0000_0000; + localparam C_QNAN_FP64 = 64'h7FF8_0000_0000_0000; + + // FP32 + localparam C_OP_FP32 = 32; + localparam C_MANT_FP32 = 23; + localparam C_EXP_FP32 = 8; + localparam C_BIAS_FP32 = 127; + localparam C_BIAS_AONE_FP32 = 8'h80; + localparam C_HALF_BIAS_FP32 = 63; + localparam C_EXP_ZERO_FP32 = 8'h00; + localparam C_EXP_INF_FP32 = 8'hFF; + localparam C_MANT_ZERO_FP32 = 23'h0; + localparam C_PZERO_FP32 = 32'h0000_0000; + localparam C_MZERO_FP32 = 32'h8000_0000; + localparam C_QNAN_FP32 = 32'h7FC0_0000; + + // FP16 + localparam C_OP_FP16 = 16; + localparam C_MANT_FP16 = 10; + localparam C_EXP_FP16 = 5; + localparam C_BIAS_FP16 = 15; + localparam C_BIAS_AONE_FP16 = 5'h10; + localparam C_HALF_BIAS_FP16 = 7; + localparam C_EXP_ZERO_FP16 = 5'h00; + localparam C_EXP_INF_FP16 = 5'h1F; + localparam C_MANT_ZERO_FP16 = 10'h0; + localparam C_PZERO_FP16 = 16'h0000; + localparam C_MZERO_FP16 = 16'h8000; + localparam C_QNAN_FP16 = 16'h7E00; + + // FP16alt + localparam C_OP_FP16ALT = 16; + localparam C_MANT_FP16ALT = 7; + localparam C_EXP_FP16ALT = 8; + localparam C_BIAS_FP16ALT = 127; + localparam C_BIAS_AONE_FP16ALT = 8'h80; + localparam C_HALF_BIAS_FP16ALT = 63; + localparam C_EXP_ZERO_FP16ALT = 8'h00; + localparam C_EXP_INF_FP16ALT = 8'hFF; + localparam C_MANT_ZERO_FP16ALT = 7'h0; + localparam C_QNAN_FP16ALT = 16'h7FC0; + +endpackage : defs_div_sqrt_mvp diff --git a/vendor/pulp-platform/fpnew/src/fpu_div_sqrt_mvp/hdl/div_sqrt_mvp_wrapper.sv b/vendor/pulp-platform/fpnew/src/fpu_div_sqrt_mvp/hdl/div_sqrt_mvp_wrapper.sv new file mode 100644 index 0000000000..051bcc3ade --- /dev/null +++ b/vendor/pulp-platform/fpnew/src/fpu_div_sqrt_mvp/hdl/div_sqrt_mvp_wrapper.sv @@ -0,0 +1,232 @@ +// Copyright 2018 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the “License”); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. +//////////////////////////////////////////////////////////////////////////////// +// Company: IIS @ ETHZ - Federal Institute of Technology // +// // +// Engineers: Lei Li -- lile@iis.ee.ethz.ch // +// // +// Additional contributions by: // +// // +// // +// // +// Create Date: 20/04/2018 // +// Design Name: FPU // +// Module Name: div_sqrt_mvp_wrapper.sv // +// Project Name: The shared divisor and square root // +// Language: SystemVerilog // +// // +// Description: The wrapper of div_sqrt_top_mvp // +// // +// // +// // +// // +// // +// // +// // +// // +// // +// // +// // +//////////////////////////////////////////////////////////////////////////////// + +import defs_div_sqrt_mvp::*; + +module div_sqrt_mvp_wrapper +#( + parameter PrePipeline_depth_S = 0, // If you want to add a flip/flop stage before preprocess, set it to 1. + parameter PostPipeline_depth_S = 2 // The output delay stages +) + (//Input + input logic Clk_CI, + input logic Rst_RBI, + input logic Div_start_SI, + input logic Sqrt_start_SI, + + //Input Operands + input logic [C_OP_FP64-1:0] Operand_a_DI, + input logic [C_OP_FP64-1:0] Operand_b_DI, + + // Input Control + input logic [C_RM-1:0] RM_SI, //Rounding Mode + input logic [C_PC-1:0] Precision_ctl_SI, // Precision Control + input logic [C_FS-1:0] Format_sel_SI, // Format Selection, + input logic Kill_SI, + + //Output Result + output logic [C_OP_FP64-1:0] Result_DO, + + //Output-Flags + output logic [4:0] Fflags_SO, + output logic Ready_SO, + output logic Done_SO + ); + + + logic Div_start_S_S,Sqrt_start_S_S; + logic [C_OP_FP64-1:0] Operand_a_S_D; + logic [C_OP_FP64-1:0] Operand_b_S_D; + + // Input Control + logic [C_RM-1:0] RM_S_S; //Rounding Mode + logic [C_PC-1:0] Precision_ctl_S_S; // Precision Control + logic [C_FS-1:0] Format_sel_S_S; // Format Selection, + logic Kill_S_S; + + + logic [C_OP_FP64-1:0] Result_D; + logic Ready_S; + logic Done_S; + logic [4:0] Fflags_S; + + + generate + if(PrePipeline_depth_S==1) + begin + + div_sqrt_top_mvp div_top_U0 //for RTL + + (//Input + .Clk_CI (Clk_CI), + .Rst_RBI (Rst_RBI), + .Div_start_SI (Div_start_S_S), + .Sqrt_start_SI (Sqrt_start_S_S), + //Input Operands + .Operand_a_DI (Operand_a_S_D), + .Operand_b_DI (Operand_b_S_D), + .RM_SI (RM_S_S), //Rounding Mode + .Precision_ctl_SI (Precision_ctl_S_S), + .Format_sel_SI (Format_sel_S_S), + .Kill_SI (Kill_S_S), + .Result_DO (Result_D), + .Fflags_SO (Fflags_S), + .Ready_SO (Ready_S), + .Done_SO (Done_S) + ); + + always_ff @(posedge Clk_CI, negedge Rst_RBI) + begin + if(~Rst_RBI) + begin + Div_start_S_S<='0; + Sqrt_start_S_S<=1'b0; + Operand_a_S_D<='0; + Operand_b_S_D<='0; + RM_S_S <=1'b0; + Precision_ctl_S_S<='0; + Format_sel_S_S<='0; + Kill_S_S<='0; + end + else + begin + Div_start_S_S<=Div_start_SI; + Sqrt_start_S_S<=Sqrt_start_SI; + Operand_a_S_D<=Operand_a_DI; + Operand_b_S_D<=Operand_b_DI; + RM_S_S <=RM_SI; + Precision_ctl_S_S<=Precision_ctl_SI; + Format_sel_S_S<=Format_sel_SI; + Kill_S_S<=Kill_SI; + end + end + end + + else + begin + div_sqrt_top_mvp div_top_U0 //for RTL + (//Input + .Clk_CI (Clk_CI), + .Rst_RBI (Rst_RBI), + .Div_start_SI (Div_start_SI), + .Sqrt_start_SI (Sqrt_start_SI), + //Input Operands + .Operand_a_DI (Operand_a_DI), + .Operand_b_DI (Operand_b_DI), + .RM_SI (RM_SI), //Rounding Mode + .Precision_ctl_SI (Precision_ctl_SI), + .Format_sel_SI (Format_sel_SI), + .Kill_SI (Kill_SI), + .Result_DO (Result_D), + .Fflags_SO (Fflags_S), + .Ready_SO (Ready_S), + .Done_SO (Done_S) + ); + end + endgenerate + + ///////////////////////////////////////////////////////////////////////////// + // First Stage of Outputs + ///////////////////////////////////////////////////////////////////////////// + logic [C_OP_FP64-1:0] Result_dly_S_D; + logic Ready_dly_S_S; + logic Done_dly_S_S; + logic [4:0] Fflags_dly_S_S; + always_ff @(posedge Clk_CI, negedge Rst_RBI) + begin + if(~Rst_RBI) + begin + Result_dly_S_D<='0; + Ready_dly_S_S<=1'b0; + Done_dly_S_S<=1'b0; + Fflags_dly_S_S<=1'b0; + end + else + begin + Result_dly_S_D<=Result_D; + Ready_dly_S_S<=Ready_S; + Done_dly_S_S<=Done_S; + Fflags_dly_S_S<=Fflags_S; + end + end + + ///////////////////////////////////////////////////////////////////////////// + // Second Stage of Outputs + ///////////////////////////////////////////////////////////////////////////// + + logic [C_OP_FP64-1:0] Result_dly_D_D; + logic Ready_dly_D_S; + logic Done_dly_D_S; + logic [4:0] Fflags_dly_D_S; + generate + if(PostPipeline_depth_S==2) + begin + always_ff @(posedge Clk_CI, negedge Rst_RBI) + begin + if(~Rst_RBI) + begin + Result_dly_D_D<='0; + Ready_dly_D_S<=1'b0; + Done_dly_D_S<=1'b0; + Fflags_dly_D_S<=1'b0; + end + else + begin + Result_dly_D_D<=Result_dly_S_D; + Ready_dly_D_S<=Ready_dly_S_S; + Done_dly_D_S<=Done_dly_S_S; + Fflags_dly_D_S<=Fflags_dly_S_S; + end + end + assign Result_DO = Result_dly_D_D; + assign Ready_SO = Ready_dly_D_S; + assign Done_SO = Done_dly_D_S; + assign Fflags_SO=Fflags_dly_D_S; + end + + else + begin + assign Result_DO = Result_dly_S_D; + assign Ready_SO = Ready_dly_S_S; + assign Done_SO = Done_dly_S_S; + assign Fflags_SO = Fflags_dly_S_S; + end + + endgenerate + +endmodule // diff --git a/vendor/pulp-platform/fpnew/src/fpu_div_sqrt_mvp/hdl/div_sqrt_top_mvp.sv b/vendor/pulp-platform/fpnew/src/fpu_div_sqrt_mvp/hdl/div_sqrt_top_mvp.sv new file mode 100644 index 0000000000..3af6081b7f --- /dev/null +++ b/vendor/pulp-platform/fpnew/src/fpu_div_sqrt_mvp/hdl/div_sqrt_top_mvp.sv @@ -0,0 +1,180 @@ +// Copyright 2018 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the “License”); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +//////////////////////////////////////////////////////////////////////////////// +// Company: IIS @ ETHZ - Federal Institute of Technology // +// // +// Engineers: Lei Li -- lile@iis.ee.ethz.ch // +// // +// Additional contributions by: // +// // +// // +// // +// Create Date: 03/03/2018 // +// Design Name: div_sqrt_top_mvp // +// Module Name: div_sqrt_top_mvp.sv // +// Project Name: The shared divisor and square root // +// Language: SystemVerilog // +// // +// Description: The top of div and sqrt // +// // +// // +// Revision Date: 12/04/2018 // +// Lei Li // +// To address some requirements by Stefan and add low power // +// control for special cases // +//////////////////////////////////////////////////////////////////////////////// + +import defs_div_sqrt_mvp::*; + +module div_sqrt_top_mvp + + (//Input + input logic Clk_CI, + input logic Rst_RBI, + input logic Div_start_SI, + input logic Sqrt_start_SI, + + //Input Operands + input logic [C_OP_FP64-1:0] Operand_a_DI, + input logic [C_OP_FP64-1:0] Operand_b_DI, + + // Input Control + input logic [C_RM-1:0] RM_SI, //Rounding Mode + input logic [C_PC-1:0] Precision_ctl_SI, // Precision Control + input logic [C_FS-1:0] Format_sel_SI, // Format Selection, + input logic Kill_SI, + + //Output Result + output logic [C_OP_FP64-1:0] Result_DO, + + //Output-Flags + output logic [4:0] Fflags_SO, + output logic Ready_SO, + output logic Done_SO + ); + + + + + + //Operand components + logic [C_EXP_FP64:0] Exp_a_D; + logic [C_EXP_FP64:0] Exp_b_D; + logic [C_MANT_FP64:0] Mant_a_D; + logic [C_MANT_FP64:0] Mant_b_D; + + logic [C_EXP_FP64+1:0] Exp_z_D; + logic [C_MANT_FP64+4:0] Mant_z_D; + logic Sign_z_D; + logic Start_S; + logic [C_RM-1:0] RM_dly_S; + logic Div_enable_S; + logic Sqrt_enable_S; + logic Inf_a_S; + logic Inf_b_S; + logic Zero_a_S; + logic Zero_b_S; + logic NaN_a_S; + logic NaN_b_S; + logic SNaN_S; + logic Special_case_SB,Special_case_dly_SB; + + logic Full_precision_S; + logic FP32_S; + logic FP64_S; + logic FP16_S; + logic FP16ALT_S; + + + preprocess_mvp preprocess_U0 + ( + .Clk_CI (Clk_CI ), + .Rst_RBI (Rst_RBI ), + .Div_start_SI (Div_start_SI ), + .Sqrt_start_SI (Sqrt_start_SI ), + .Ready_SI (Ready_SO ), + .Operand_a_DI (Operand_a_DI ), + .Operand_b_DI (Operand_b_DI ), + .RM_SI (RM_SI ), + .Format_sel_SI (Format_sel_SI ), + .Start_SO (Start_S ), + .Exp_a_DO_norm (Exp_a_D ), + .Exp_b_DO_norm (Exp_b_D ), + .Mant_a_DO_norm (Mant_a_D ), + .Mant_b_DO_norm (Mant_b_D ), + .RM_dly_SO (RM_dly_S ), + .Sign_z_DO (Sign_z_D ), + .Inf_a_SO (Inf_a_S ), + .Inf_b_SO (Inf_b_S ), + .Zero_a_SO (Zero_a_S ), + .Zero_b_SO (Zero_b_S ), + .NaN_a_SO (NaN_a_S ), + .NaN_b_SO (NaN_b_S ), + .SNaN_SO (SNaN_S ), + .Special_case_SBO (Special_case_SB ), + .Special_case_dly_SBO (Special_case_dly_SB) + ); + + nrbd_nrsc_mvp nrbd_nrsc_U0 + ( + .Clk_CI (Clk_CI ), + .Rst_RBI (Rst_RBI ), + .Div_start_SI (Div_start_SI ) , + .Sqrt_start_SI (Sqrt_start_SI ), + .Start_SI (Start_S ), + .Kill_SI (Kill_SI ), + .Special_case_SBI (Special_case_SB ), + .Special_case_dly_SBI (Special_case_dly_SB), + .Div_enable_SO (Div_enable_S ), + .Sqrt_enable_SO (Sqrt_enable_S ), + .Precision_ctl_SI (Precision_ctl_SI ), + .Format_sel_SI (Format_sel_SI ), + .Exp_a_DI (Exp_a_D ), + .Exp_b_DI (Exp_b_D ), + .Mant_a_DI (Mant_a_D ), + .Mant_b_DI (Mant_b_D ), + .Full_precision_SO (Full_precision_S ), + .FP32_SO (FP32_S ), + .FP64_SO (FP64_S ), + .FP16_SO (FP16_S ), + .FP16ALT_SO (FP16ALT_S ), + .Ready_SO (Ready_SO ), + .Done_SO (Done_SO ), + .Exp_z_DO (Exp_z_D ), + .Mant_z_DO (Mant_z_D ) + ); + + + norm_div_sqrt_mvp fpu_norm_U0 + ( + .Mant_in_DI (Mant_z_D ), + .Exp_in_DI (Exp_z_D ), + .Sign_in_DI (Sign_z_D ), + .Div_enable_SI (Div_enable_S ), + .Sqrt_enable_SI (Sqrt_enable_S ), + .Inf_a_SI (Inf_a_S ), + .Inf_b_SI (Inf_b_S ), + .Zero_a_SI (Zero_a_S ), + .Zero_b_SI (Zero_b_S ), + .NaN_a_SI (NaN_a_S ), + .NaN_b_SI (NaN_b_S ), + .SNaN_SI (SNaN_S ), + .RM_SI (RM_dly_S ), + .Full_precision_SI (Full_precision_S ), + .FP32_SI (FP32_S ), + .FP64_SI (FP64_S ), + .FP16_SI (FP16_S ), + .FP16ALT_SI (FP16ALT_S ), + .Result_DO (Result_DO ), + .Fflags_SO (Fflags_SO ) //{NV,DZ,OF,UF,NX} + ); + +endmodule diff --git a/vendor/pulp-platform/fpnew/src/fpu_div_sqrt_mvp/hdl/iteration_div_sqrt_mvp.sv b/vendor/pulp-platform/fpnew/src/fpu_div_sqrt_mvp/hdl/iteration_div_sqrt_mvp.sv new file mode 100644 index 0000000000..0c645e6ebe --- /dev/null +++ b/vendor/pulp-platform/fpnew/src/fpu_div_sqrt_mvp/hdl/iteration_div_sqrt_mvp.sv @@ -0,0 +1,61 @@ +// Copyright 2018 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the “License”); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. +//////////////////////////////////////////////////////////////////////////////// +// Company: IIS @ ETHZ - Federal Institute of Technology // +// // +// Engineers: Lei Li lile@iis.ee.ethz.ch // +// // +// Additional contributions by: // +// // +// // +// // +// Create Date: 12/01/2017 // +// Design Name: FPU // +// Module Name: iteration_div_sqrt_mvp // +// Project Name: Private FPU // +// Language: SystemVerilog // +// // +// Description: iteration unit for div and sqrt // +// // +// // +// Revision: 03/14/2018 // +// For div_sqrt_mvp // +//////////////////////////////////////////////////////////////////////////////// + +module iteration_div_sqrt_mvp +#( + parameter WIDTH=25 +) + (//Input + + input logic [WIDTH-1:0] A_DI, + input logic [WIDTH-1:0] B_DI, + input logic Div_enable_SI, + input logic Div_start_dly_SI, + input logic Sqrt_enable_SI, + input logic [1:0] D_DI, + + output logic [1:0] D_DO, + output logic [WIDTH-1:0] Sum_DO, + output logic Carry_out_DO + ); + + logic D_carry_D; + logic Sqrt_cin_D; + logic Cin_D; + + assign D_DO[0]=~D_DI[0]; + assign D_DO[1]=~(D_DI[1] ^ D_DI[0]); + assign D_carry_D=D_DI[1] | D_DI[0]; + assign Sqrt_cin_D=Sqrt_enable_SI&&D_carry_D; + assign Cin_D=Div_enable_SI?1'b0:Sqrt_cin_D; + assign {Carry_out_DO,Sum_DO}=A_DI+B_DI+Cin_D; + +endmodule diff --git a/vendor/pulp-platform/fpnew/src/fpu_div_sqrt_mvp/hdl/norm_div_sqrt_mvp.sv b/vendor/pulp-platform/fpnew/src/fpu_div_sqrt_mvp/hdl/norm_div_sqrt_mvp.sv new file mode 100644 index 0000000000..29ef52a24a --- /dev/null +++ b/vendor/pulp-platform/fpnew/src/fpu_div_sqrt_mvp/hdl/norm_div_sqrt_mvp.sv @@ -0,0 +1,484 @@ +// Copyright 2018 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the “License”); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +//////////////////////////////////////////////////////////////////////////////// +// Company: IIS @ ETHZ - Federal Institute of Technology // +// // +// Engineers: Lei Li lile@iis.ee.ethz.ch // +// // +// Additional contributions by: // +// // +// // +// // +// Create Date: 09/03/2018 // +// Design Name: FPU // +// Module Name: norm_div_sqrt_mvp.sv // +// Project Name: // +// Language: SystemVerilog // +// // +// Description: Floating point Normalizer/Rounding unit // +// Since this module is design as a combinatinal logic, it can// +// be added arbinary register stages for different frequency // +// in the wrapper module. // +// // +// // +// // +// Revision Date: 12/04/2018 // +// Lei Li // +// To address some requirements by Stefan // +// // +// // +// // +// // +// // +// // +//////////////////////////////////////////////////////////////////////////////// + +import defs_div_sqrt_mvp::*; + +module norm_div_sqrt_mvp + (//Inputs + input logic [C_MANT_FP64+4:0] Mant_in_DI, // Include the needed 4-bit for rounding and hidden bit + input logic signed [C_EXP_FP64+1:0] Exp_in_DI, + input logic Sign_in_DI, + input logic Div_enable_SI, + input logic Sqrt_enable_SI, + input logic Inf_a_SI, + input logic Inf_b_SI, + input logic Zero_a_SI, + input logic Zero_b_SI, + input logic NaN_a_SI, + input logic NaN_b_SI, + input logic SNaN_SI, + input logic [C_RM-1:0] RM_SI, + input logic Full_precision_SI, + input logic FP32_SI, + input logic FP64_SI, + input logic FP16_SI, + input logic FP16ALT_SI, + //Outputs + output logic [C_EXP_FP64+C_MANT_FP64:0] Result_DO, + output logic [4:0] Fflags_SO //{NV,DZ,OF,UF,NX} + ); + + + logic Sign_res_D; + + logic NV_OP_S; + logic Exp_OF_S; + logic Exp_UF_S; + logic Div_Zero_S; + logic In_Exact_S; + + ///////////////////////////////////////////////////////////////////////////// + // Normalization // + ///////////////////////////////////////////////////////////////////////////// + logic [C_MANT_FP64:0] Mant_res_norm_D; + logic [C_EXP_FP64-1:0] Exp_res_norm_D; + + ///////////////////////////////////////////////////////////////////////////// + // Right shift operations for negtive exponents // + ///////////////////////////////////////////////////////////////////////////// + + logic [C_EXP_FP64+1:0] Exp_Max_RS_FP64_D; + logic [C_EXP_FP32+1:0] Exp_Max_RS_FP32_D; + logic [C_EXP_FP16+1:0] Exp_Max_RS_FP16_D; + logic [C_EXP_FP16ALT+1:0] Exp_Max_RS_FP16ALT_D; + // + assign Exp_Max_RS_FP64_D=Exp_in_DI[C_EXP_FP64:0]+C_MANT_FP64+1; // to check exponent after (C_MANT_FP64+1)-bit >> when Exp_in_DI is negative + assign Exp_Max_RS_FP32_D=Exp_in_DI[C_EXP_FP32:0]+C_MANT_FP32+1; // to check exponent after (C_MANT_FP32+1)-bit >> when Exp_in_DI is negative + assign Exp_Max_RS_FP16_D=Exp_in_DI[C_EXP_FP16:0]+C_MANT_FP16+1; // to check exponent after (C_MANT_FP16+1)-bit >> when Exp_in_DI is negative + assign Exp_Max_RS_FP16ALT_D=Exp_in_DI[C_EXP_FP16ALT:0]+C_MANT_FP16ALT+1; // to check exponent after (C_MANT_FP16ALT+1)-bit >> when Exp_in_DI is negative + logic [C_EXP_FP64+1:0] Num_RS_D; + assign Num_RS_D=~Exp_in_DI+1+1; // How many right shifts(RS) are needed to generate a denormal number? >> is need only when Exp_in_DI is negative + logic [C_MANT_FP64:0] Mant_RS_D; + logic [C_MANT_FP64+4:0] Mant_forsticky_D; + assign {Mant_RS_D,Mant_forsticky_D} ={Mant_in_DI,{(C_MANT_FP64+1){1'b0}} } >>(Num_RS_D); // +// + logic [C_EXP_FP64+1:0] Exp_subOne_D; + assign Exp_subOne_D = Exp_in_DI -1; + + //normalization + logic [1:0] Mant_lower_D; + logic Mant_sticky_bit_D; + logic [C_MANT_FP64+4:0] Mant_forround_D; + + always_comb + begin + + if(NaN_a_SI) // if a is NaN, return NaN + begin + Div_Zero_S=1'b0; + Exp_OF_S=1'b0; + Exp_UF_S=1'b0; + Mant_res_norm_D={1'b0,C_MANT_NAN_FP64}; + Exp_res_norm_D='1; + Mant_forround_D='0; + Sign_res_D=1'b0; + NV_OP_S = SNaN_SI; + end + + else if(NaN_b_SI) //if b is NaN, return NaN + begin + Div_Zero_S=1'b0; + Exp_OF_S=1'b0; + Exp_UF_S=1'b0; + Mant_res_norm_D={1'b0,C_MANT_NAN_FP64}; + Exp_res_norm_D='1; + Mant_forround_D='0; + Sign_res_D=1'b0; + NV_OP_S = SNaN_SI; + end + + else if(Inf_a_SI) + begin + if(Div_enable_SI&&Inf_b_SI) //Inf/Inf, retrurn NaN + begin + Div_Zero_S=1'b0; + Exp_OF_S=1'b0; + Exp_UF_S=1'b0; + Mant_res_norm_D={1'b0,C_MANT_NAN_FP64}; + Exp_res_norm_D='1; + Mant_forround_D='0; + Sign_res_D=1'b0; + NV_OP_S = 1'b1; + end + else if (Sqrt_enable_SI && Sign_in_DI) begin // catch sqrt(-inf) + Div_Zero_S=1'b0; + Exp_OF_S=1'b0; + Exp_UF_S=1'b0; + Mant_res_norm_D={1'b0,C_MANT_NAN_FP64}; + Exp_res_norm_D='1; + Mant_forround_D='0; + Sign_res_D=1'b0; + NV_OP_S = 1'b1; + end else begin + Div_Zero_S=1'b0; + Exp_OF_S=1'b1; + Exp_UF_S=1'b0; + Mant_res_norm_D= '0; + Exp_res_norm_D='1; + Mant_forround_D='0; + Sign_res_D=Sign_in_DI; + NV_OP_S = 1'b0; + end + end + + else if(Div_enable_SI&&Inf_b_SI) + begin + Div_Zero_S=1'b0; + Exp_OF_S=1'b1; + Exp_UF_S=1'b0; + Mant_res_norm_D= '0; + Exp_res_norm_D='0; + Mant_forround_D='0; + Sign_res_D=Sign_in_DI; + NV_OP_S = 1'b0; + end + + else if(Zero_a_SI) + begin + if(Div_enable_SI&&Zero_b_SI) + begin + Div_Zero_S=1'b1; + Exp_OF_S=1'b0; + Exp_UF_S=1'b0; + Mant_res_norm_D={1'b0,C_MANT_NAN_FP64}; + Exp_res_norm_D='1; + Mant_forround_D='0; + Sign_res_D=1'b0; + NV_OP_S = 1'b1; + end + else + begin + Div_Zero_S=1'b0; + Exp_OF_S=1'b0; + Exp_UF_S=1'b0; + Mant_res_norm_D='0; + Exp_res_norm_D='0; + Mant_forround_D='0; + Sign_res_D=Sign_in_DI; + NV_OP_S = 1'b0; + end + end + + else if(Div_enable_SI&&(Zero_b_SI)) //div Zero + begin + Div_Zero_S=1'b1; + Exp_OF_S=1'b0; + Exp_UF_S=1'b0; + Mant_res_norm_D='0; + Exp_res_norm_D='1; + Mant_forround_D='0; + Sign_res_D=Sign_in_DI; + NV_OP_S = 1'b0; + end + + else if(Sign_in_DI&&Sqrt_enable_SI) //sqrt(-a) + begin + Div_Zero_S=1'b0; + Exp_OF_S=1'b0; + Exp_UF_S=1'b0; + Mant_res_norm_D={1'b0,C_MANT_NAN_FP64}; + Exp_res_norm_D='1; + Mant_forround_D='0; + Sign_res_D=1'b0; + NV_OP_S = 1'b1; + end + + else if((Exp_in_DI[C_EXP_FP64:0]=='0)) + begin + if(Mant_in_DI!='0) //Exp=0, Mant!=0, it is denormal + begin + Div_Zero_S=1'b0; + Exp_OF_S=1'b0; + Exp_UF_S=1'b1; + Mant_res_norm_D={1'b0,Mant_in_DI[C_MANT_FP64+4:5]}; + Exp_res_norm_D='0; + Mant_forround_D={Mant_in_DI[4:0],{(C_MANT_FP64){1'b0}} }; + Sign_res_D=Sign_in_DI; + NV_OP_S = 1'b0; + end + else // Zero + begin + Div_Zero_S=1'b0; + Exp_OF_S=1'b0; + Exp_UF_S=1'b0; + Mant_res_norm_D='0; + Exp_res_norm_D='0; + Mant_forround_D='0; + Sign_res_D=Sign_in_DI; + NV_OP_S = 1'b0; + end + end + + else if((Exp_in_DI[C_EXP_FP64:0]==C_EXP_ONE_FP64)&&(~Mant_in_DI[C_MANT_FP64+4])) //denormal + begin + Div_Zero_S=1'b0; + Exp_OF_S=1'b0; + Exp_UF_S=1'b1; + Mant_res_norm_D=Mant_in_DI[C_MANT_FP64+4:4]; + Exp_res_norm_D='0; + Mant_forround_D={Mant_in_DI[3:0],{(C_MANT_FP64+1){1'b0}}}; + Sign_res_D=Sign_in_DI; + NV_OP_S = 1'b0; + end + + else if(Exp_in_DI[C_EXP_FP64+1]) //minus //consider format + begin + if(((~Exp_Max_RS_FP32_D[C_EXP_FP32+1])&&FP32_SI) | ((~Exp_Max_RS_FP64_D[C_EXP_FP64+1])&&FP64_SI) | ((~Exp_Max_RS_FP16_D[C_EXP_FP16+1])&&FP16_SI) | ((~Exp_Max_RS_FP16ALT_D[C_EXP_FP16ALT+1])&&FP16ALT_SI) ) //OF EXP<0 after RS + begin + Div_Zero_S=1'b0; + Exp_OF_S=1'b1; + Exp_UF_S=1'b0; + Mant_res_norm_D='0; + Exp_res_norm_D='0; + Mant_forround_D='0; + Sign_res_D=Sign_in_DI; + NV_OP_S = 1'b0; + end + else //denormal + begin + Div_Zero_S=1'b0; + Exp_OF_S=1'b0; + Exp_UF_S=1'b1; + Mant_res_norm_D={Mant_RS_D[C_MANT_FP64:0]}; + Exp_res_norm_D='0; + Mant_forround_D={Mant_forsticky_D[C_MANT_FP64+4:0]}; //?? + Sign_res_D=Sign_in_DI; + NV_OP_S = 1'b0; + end + end + + else if( (Exp_in_DI[C_EXP_FP32]&&FP32_SI) | (Exp_in_DI[C_EXP_FP64]&&FP64_SI) | (Exp_in_DI[C_EXP_FP16]&&FP16_SI) | (Exp_in_DI[C_EXP_FP16ALT]&&FP16ALT_SI) ) //OF + begin + Div_Zero_S=1'b0; + Exp_OF_S=1'b1; + Exp_UF_S=1'b0; + Mant_res_norm_D='0; + Exp_res_norm_D='1; + Mant_forround_D='0; + Sign_res_D=Sign_in_DI; + NV_OP_S = 1'b0; + end + + else if( ((Exp_in_DI[C_EXP_FP32-1:0]=='1)&&FP32_SI) | ((Exp_in_DI[C_EXP_FP64-1:0]=='1)&&FP64_SI) | ((Exp_in_DI[C_EXP_FP16-1:0]=='1)&&FP16_SI) | ((Exp_in_DI[C_EXP_FP16ALT-1:0]=='1)&&FP16ALT_SI) )//255 + begin + if(~Mant_in_DI[C_MANT_FP64+4]) // MSB=0 + begin + Div_Zero_S=1'b0; + Exp_OF_S=1'b0; + Exp_UF_S=1'b0; + Mant_res_norm_D=Mant_in_DI[C_MANT_FP64+3:3]; + Exp_res_norm_D=Exp_subOne_D; + Mant_forround_D={Mant_in_DI[2:0],{(C_MANT_FP64+2){1'b0}}}; + Sign_res_D=Sign_in_DI; + NV_OP_S = 1'b0; + end + else if(Mant_in_DI!='0) //NaN + begin + Div_Zero_S=1'b0; + Exp_OF_S=1'b1; + Exp_UF_S=1'b0; + Mant_res_norm_D= '0; + Exp_res_norm_D='1; + Mant_forround_D='0; + Sign_res_D=Sign_in_DI; + NV_OP_S = 1'b0; + end + else //infinity + begin + Div_Zero_S=1'b0; + Exp_OF_S=1'b1; + Exp_UF_S=1'b0; + Mant_res_norm_D= '0; + Exp_res_norm_D='1; + Mant_forround_D='0; + Sign_res_D=Sign_in_DI; + NV_OP_S = 1'b0; + end + end + + else if(Mant_in_DI[C_MANT_FP64+4]) //normal numbers with 1.XXX + begin + Div_Zero_S=1'b0; + Exp_OF_S=1'b0; + Exp_UF_S=1'b0; + Mant_res_norm_D= Mant_in_DI[C_MANT_FP64+4:4]; + Exp_res_norm_D=Exp_in_DI[C_EXP_FP64-1:0]; + Mant_forround_D={Mant_in_DI[3:0],{(C_MANT_FP64+1){1'b0}}}; + Sign_res_D=Sign_in_DI; + NV_OP_S = 1'b0; + end + + else //normal numbers with 0.1XX + begin + Div_Zero_S=1'b0; + Exp_OF_S=1'b0; + Exp_UF_S=1'b0; + Mant_res_norm_D=Mant_in_DI[C_MANT_FP64+3:3]; + Exp_res_norm_D=Exp_subOne_D; + Mant_forround_D={Mant_in_DI[2:0],{(C_MANT_FP64+2){1'b0}}}; + Sign_res_D=Sign_in_DI; + NV_OP_S = 1'b0; + end + + end + + ///////////////////////////////////////////////////////////////////////////// + // Rounding enable only for full precision (Full_precision_SI==1'b1) // + ///////////////////////////////////////////////////////////////////////////// + + logic [C_MANT_FP64:0] Mant_upper_D; + logic [C_MANT_FP64+1:0] Mant_upperRounded_D; + logic Mant_roundUp_S; + logic Mant_rounded_S; + + always_comb //determine which bits for Mant_lower_D and Mant_sticky_bit_D + begin + if(FP32_SI) + begin + Mant_upper_D = {Mant_res_norm_D[C_MANT_FP64:C_MANT_FP64-C_MANT_FP32], {(C_MANT_FP64-C_MANT_FP32){1'b0}} }; + Mant_lower_D = Mant_res_norm_D[C_MANT_FP64-C_MANT_FP32-1:C_MANT_FP64-C_MANT_FP32-2]; + Mant_sticky_bit_D = | Mant_res_norm_D[C_MANT_FP64-C_MANT_FP32-3:0]; + end + else if(FP64_SI) + begin + Mant_upper_D = Mant_res_norm_D[C_MANT_FP64:0]; + Mant_lower_D = Mant_forround_D[C_MANT_FP64+4:C_MANT_FP64+3]; + Mant_sticky_bit_D = | Mant_forround_D[C_MANT_FP64+3:0]; + end + else if(FP16_SI) + begin + Mant_upper_D = {Mant_res_norm_D[C_MANT_FP64:C_MANT_FP64-C_MANT_FP16], {(C_MANT_FP64-C_MANT_FP16){1'b0}} }; + Mant_lower_D = Mant_res_norm_D[C_MANT_FP64-C_MANT_FP16-1:C_MANT_FP64-C_MANT_FP16-2]; + Mant_sticky_bit_D = | Mant_res_norm_D[C_MANT_FP64-C_MANT_FP16-3:30]; + end + else //FP16ALT + begin + Mant_upper_D = {Mant_res_norm_D[C_MANT_FP64:C_MANT_FP64-C_MANT_FP16ALT], {(C_MANT_FP64-C_MANT_FP16ALT){1'b0}} }; + Mant_lower_D = Mant_res_norm_D[C_MANT_FP64-C_MANT_FP16ALT-1:C_MANT_FP64-C_MANT_FP16ALT-2]; + Mant_sticky_bit_D = | Mant_res_norm_D[C_MANT_FP64-C_MANT_FP16ALT-3:30]; + end + end + + assign Mant_rounded_S = (|(Mant_lower_D))| Mant_sticky_bit_D; + + + + + always_comb //determine whether to round up or not + begin + Mant_roundUp_S = 1'b0; + case (RM_SI) + C_RM_NEAREST : + Mant_roundUp_S = Mant_lower_D[1] && ((Mant_lower_D[0] | Mant_sticky_bit_D )| ( (FP32_SI&&Mant_upper_D[C_MANT_FP64-C_MANT_FP32]) | (FP64_SI&&Mant_upper_D[0]) | (FP16_SI&&Mant_upper_D[C_MANT_FP64-C_MANT_FP16]) | (FP16ALT_SI&&Mant_upper_D[C_MANT_FP64-C_MANT_FP16ALT]) ) ); + C_RM_TRUNC : + Mant_roundUp_S = 0; + C_RM_PLUSINF : + Mant_roundUp_S = Mant_rounded_S & ~Sign_in_DI; + C_RM_MINUSINF: + Mant_roundUp_S = Mant_rounded_S & Sign_in_DI; + default : + Mant_roundUp_S = 0; + endcase // case (RM_DI) + end // always_comb begin + + logic Mant_renorm_S; + logic [C_MANT_FP64:0] Mant_roundUp_Vector_S; // for all the formats + + assign Mant_roundUp_Vector_S={7'h0,(FP16ALT_SI&&Mant_roundUp_S),2'h0,(FP16_SI&&Mant_roundUp_S),12'h0,(FP32_SI&&Mant_roundUp_S),28'h0,(FP64_SI&&Mant_roundUp_S)}; + + + assign Mant_upperRounded_D = Mant_upper_D + Mant_roundUp_Vector_S; + assign Mant_renorm_S = Mant_upperRounded_D[C_MANT_FP64+1]; + + ///////////////////////////////////////////////////////////////////////////// + // Renormalization for Rounding // + ///////////////////////////////////////////////////////////////////////////// + logic [C_MANT_FP64-1:0] Mant_res_round_D; + logic [C_EXP_FP64-1:0] Exp_res_round_D; + + + assign Mant_res_round_D = (Mant_renorm_S)?Mant_upperRounded_D[C_MANT_FP64:1]:Mant_upperRounded_D[C_MANT_FP64-1:0]; // including the process of the hidden bit + assign Exp_res_round_D = Exp_res_norm_D+Mant_renorm_S; + + ///////////////////////////////////////////////////////////////////////////// + // Output Assignments // + ///////////////////////////////////////////////////////////////////////////// + logic [C_MANT_FP64-1:0] Mant_before_format_ctl_D; + logic [C_EXP_FP64-1:0] Exp_before_format_ctl_D; + assign Mant_before_format_ctl_D = Full_precision_SI ? Mant_res_round_D : Mant_res_norm_D; + assign Exp_before_format_ctl_D = Full_precision_SI ? Exp_res_round_D : Exp_res_norm_D; + + always_comb //NaN Boxing + begin // + if(FP32_SI) + begin + Result_DO ={32'hffff_ffff,Sign_res_D,Exp_before_format_ctl_D[C_EXP_FP32-1:0],Mant_before_format_ctl_D[C_MANT_FP64-1:C_MANT_FP64-C_MANT_FP32]}; + end + else if(FP64_SI) + begin + Result_DO ={Sign_res_D,Exp_before_format_ctl_D[C_EXP_FP64-1:0],Mant_before_format_ctl_D[C_MANT_FP64-1:0]}; + end + else if(FP16_SI) + begin + Result_DO ={48'hffff_ffff_ffff,Sign_res_D,Exp_before_format_ctl_D[C_EXP_FP16-1:0],Mant_before_format_ctl_D[C_MANT_FP64-1:C_MANT_FP64-C_MANT_FP16]}; + end + else + begin + Result_DO ={48'hffff_ffff_ffff,Sign_res_D,Exp_before_format_ctl_D[C_EXP_FP16ALT-1:0],Mant_before_format_ctl_D[C_MANT_FP64-1:C_MANT_FP64-C_MANT_FP16ALT]}; + end + end + +assign In_Exact_S = (~Full_precision_SI) | Mant_rounded_S; +assign Fflags_SO = {NV_OP_S,Div_Zero_S,Exp_OF_S,Exp_UF_S,In_Exact_S}; //{NV,DZ,OF,UF,NX} + +endmodule // norm_div_sqrt_mvp diff --git a/vendor/pulp-platform/fpnew/src/fpu_div_sqrt_mvp/hdl/nrbd_nrsc_mvp.sv b/vendor/pulp-platform/fpnew/src/fpu_div_sqrt_mvp/hdl/nrbd_nrsc_mvp.sv new file mode 100644 index 0000000000..62bd147f61 --- /dev/null +++ b/vendor/pulp-platform/fpnew/src/fpu_div_sqrt_mvp/hdl/nrbd_nrsc_mvp.sv @@ -0,0 +1,104 @@ +// Copyright 2018 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the “License”); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. +//////////////////////////////////////////////////////////////////////////////// +// Company: IIS @ ETHZ - Federal Institute of Technology // +// // +// Engineers: Lei Li lile@iis.ee.ethz.ch // +// // +// Additional contributions by: // +// // +// // +// // +// Create Date: 10/04/2018 // +// Design Name: FPU // +// Module Name: nrbd_nrsc_mvp.sv // +// Project Name: Private FPU // +// Language: SystemVerilog // +// // +// Description: non restroring binary divisior/ square root // +// // +// Revision Date: 12/04/2018 // +// Lei Li // +// To address some requirements by Stefan and add low power // +// control for special cases // +// // +//////////////////////////////////////////////////////////////////////////////// + +import defs_div_sqrt_mvp::*; + +module nrbd_nrsc_mvp + + (//Input + input logic Clk_CI, + input logic Rst_RBI, + input logic Div_start_SI, + input logic Sqrt_start_SI, + input logic Start_SI, + input logic Kill_SI, + input logic Special_case_SBI, + input logic Special_case_dly_SBI, + input logic [C_PC-1:0] Precision_ctl_SI, + input logic [1:0] Format_sel_SI, + input logic [C_MANT_FP64:0] Mant_a_DI, + input logic [C_MANT_FP64:0] Mant_b_DI, + input logic [C_EXP_FP64:0] Exp_a_DI, + input logic [C_EXP_FP64:0] Exp_b_DI, + //output + output logic Div_enable_SO, + output logic Sqrt_enable_SO, + + output logic Full_precision_SO, + output logic FP32_SO, + output logic FP64_SO, + output logic FP16_SO, + output logic FP16ALT_SO, + output logic Ready_SO, + output logic Done_SO, + output logic [C_MANT_FP64+4:0] Mant_z_DO, + output logic [C_EXP_FP64+1:0] Exp_z_DO + ); + + + logic Div_start_dly_S,Sqrt_start_dly_S; + + +control_mvp control_U0 +( .Clk_CI (Clk_CI ), + .Rst_RBI (Rst_RBI ), + .Div_start_SI (Div_start_SI ), + .Sqrt_start_SI (Sqrt_start_SI ), + .Start_SI (Start_SI ), + .Kill_SI (Kill_SI ), + .Special_case_SBI (Special_case_SBI ), + .Special_case_dly_SBI (Special_case_dly_SBI ), + .Precision_ctl_SI (Precision_ctl_SI ), + .Format_sel_SI (Format_sel_SI ), + .Numerator_DI (Mant_a_DI ), + .Exp_num_DI (Exp_a_DI ), + .Denominator_DI (Mant_b_DI ), + .Exp_den_DI (Exp_b_DI ), + .Div_start_dly_SO (Div_start_dly_S ), + .Sqrt_start_dly_SO (Sqrt_start_dly_S ), + .Div_enable_SO (Div_enable_SO ), + .Sqrt_enable_SO (Sqrt_enable_SO ), + .Full_precision_SO (Full_precision_SO ), + .FP32_SO (FP32_SO ), + .FP64_SO (FP64_SO ), + .FP16_SO (FP16_SO ), + .FP16ALT_SO (FP16ALT_SO ), + .Ready_SO (Ready_SO ), + .Done_SO (Done_SO ), + .Mant_result_prenorm_DO (Mant_z_DO ), + .Exp_result_prenorm_DO (Exp_z_DO ) +); + + + +endmodule diff --git a/vendor/pulp-platform/fpnew/src/fpu_div_sqrt_mvp/hdl/preprocess_mvp.sv b/vendor/pulp-platform/fpnew/src/fpu_div_sqrt_mvp/hdl/preprocess_mvp.sv new file mode 100644 index 0000000000..9e0d25f38f --- /dev/null +++ b/vendor/pulp-platform/fpnew/src/fpu_div_sqrt_mvp/hdl/preprocess_mvp.sv @@ -0,0 +1,425 @@ +// Copyright 2018 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the “License”); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. +//////////////////////////////////////////////////////////////////////////////// +// Company: IIS @ ETHZ - Federal Institute of Technology // +// // +// Engineers: Lei Li //lile@iis.ee.ethz.ch // +// // +// Additional contributions by: // +// // +// // +// // +// Create Date: 01/03/2018 // +// Design Name: FPU // +// Module Name: preprocess_mvp.sv // +// Project Name: Private FPU // +// Language: SystemVerilog // +// // +// Description: decode and data preparation // +// // +// Revision Date: 12/04/2018 // +// Lei Li // +// To address some requirements by Stefan and add low power // +// control for special cases // +// // +// // +//////////////////////////////////////////////////////////////////////////////// + +import defs_div_sqrt_mvp::*; + +module preprocess_mvp + ( + input logic Clk_CI, + input logic Rst_RBI, + input logic Div_start_SI, + input logic Sqrt_start_SI, + input logic Ready_SI, + //Input Operands + input logic [C_OP_FP64-1:0] Operand_a_DI, + input logic [C_OP_FP64-1:0] Operand_b_DI, + input logic [C_RM-1:0] RM_SI, //Rounding Mode + input logic [C_FS-1:0] Format_sel_SI, // Format Selection + + // to control + output logic Start_SO, + output logic [C_EXP_FP64:0] Exp_a_DO_norm, + output logic [C_EXP_FP64:0] Exp_b_DO_norm, + output logic [C_MANT_FP64:0] Mant_a_DO_norm, + output logic [C_MANT_FP64:0] Mant_b_DO_norm, + + output logic [C_RM-1:0] RM_dly_SO, + + output logic Sign_z_DO, + output logic Inf_a_SO, + output logic Inf_b_SO, + output logic Zero_a_SO, + output logic Zero_b_SO, + output logic NaN_a_SO, + output logic NaN_b_SO, + output logic SNaN_SO, + output logic Special_case_SBO, + output logic Special_case_dly_SBO + ); + + //Hidden Bits + logic Hb_a_D; + logic Hb_b_D; + + logic [C_EXP_FP64-1:0] Exp_a_D; + logic [C_EXP_FP64-1:0] Exp_b_D; + logic [C_MANT_FP64-1:0] Mant_a_NonH_D; + logic [C_MANT_FP64-1:0] Mant_b_NonH_D; + logic [C_MANT_FP64:0] Mant_a_D; + logic [C_MANT_FP64:0] Mant_b_D; + + ///////////////////////////////////////////////////////////////////////////// + // Disassemble operands + ///////////////////////////////////////////////////////////////////////////// + logic Sign_a_D,Sign_b_D; + logic Start_S; + + always_comb + begin + case(Format_sel_SI) + 2'b00: + begin + Sign_a_D = Operand_a_DI[C_OP_FP32-1]; + Sign_b_D = Operand_b_DI[C_OP_FP32-1]; + Exp_a_D = {3'h0, Operand_a_DI[C_OP_FP32-2:C_MANT_FP32]}; + Exp_b_D = {3'h0, Operand_b_DI[C_OP_FP32-2:C_MANT_FP32]}; + Mant_a_NonH_D = {Operand_a_DI[C_MANT_FP32-1:0],29'h0}; + Mant_b_NonH_D = {Operand_b_DI[C_MANT_FP32-1:0],29'h0}; + end + 2'b01: + begin + Sign_a_D = Operand_a_DI[C_OP_FP64-1]; + Sign_b_D = Operand_b_DI[C_OP_FP64-1]; + Exp_a_D = Operand_a_DI[C_OP_FP64-2:C_MANT_FP64]; + Exp_b_D = Operand_b_DI[C_OP_FP64-2:C_MANT_FP64]; + Mant_a_NonH_D = Operand_a_DI[C_MANT_FP64-1:0]; + Mant_b_NonH_D = Operand_b_DI[C_MANT_FP64-1:0]; + end + 2'b10: + begin + Sign_a_D = Operand_a_DI[C_OP_FP16-1]; + Sign_b_D = Operand_b_DI[C_OP_FP16-1]; + Exp_a_D = {6'h00, Operand_a_DI[C_OP_FP16-2:C_MANT_FP16]}; + Exp_b_D = {6'h00, Operand_b_DI[C_OP_FP16-2:C_MANT_FP16]}; + Mant_a_NonH_D = {Operand_a_DI[C_MANT_FP16-1:0],42'h0}; + Mant_b_NonH_D = {Operand_b_DI[C_MANT_FP16-1:0],42'h0}; + end + 2'b11: + begin + Sign_a_D = Operand_a_DI[C_OP_FP16ALT-1]; + Sign_b_D = Operand_b_DI[C_OP_FP16ALT-1]; + Exp_a_D = {3'h0, Operand_a_DI[C_OP_FP16ALT-2:C_MANT_FP16ALT]}; + Exp_b_D = {3'h0, Operand_b_DI[C_OP_FP16ALT-2:C_MANT_FP16ALT]}; + Mant_a_NonH_D = {Operand_a_DI[C_MANT_FP16ALT-1:0],45'h0}; + Mant_b_NonH_D = {Operand_b_DI[C_MANT_FP16ALT-1:0],45'h0}; + end + endcase + end + + + assign Mant_a_D = {Hb_a_D,Mant_a_NonH_D}; + assign Mant_b_D = {Hb_b_D,Mant_b_NonH_D}; + + assign Hb_a_D = | Exp_a_D; // hidden bit + assign Hb_b_D = | Exp_b_D; // hidden bit + + assign Start_S= Div_start_SI | Sqrt_start_SI; + + + + ///////////////////////////////////////////////////////////////////////////// + // preliminary checks for infinite/zero/NaN operands // + ///////////////////////////////////////////////////////////////////////////// + + logic Mant_a_prenorm_zero_S; + logic Mant_b_prenorm_zero_S; + + logic Exp_a_prenorm_zero_S; + logic Exp_b_prenorm_zero_S; + assign Exp_a_prenorm_zero_S = ~Hb_a_D; + assign Exp_b_prenorm_zero_S = ~Hb_b_D; + + logic Exp_a_prenorm_Inf_NaN_S; + logic Exp_b_prenorm_Inf_NaN_S; + + logic Mant_a_prenorm_QNaN_S; + logic Mant_a_prenorm_SNaN_S; + logic Mant_b_prenorm_QNaN_S; + logic Mant_b_prenorm_SNaN_S; + + assign Mant_a_prenorm_QNaN_S=Mant_a_NonH_D[C_MANT_FP64-1]&&(~(|Mant_a_NonH_D[C_MANT_FP64-2:0])); + assign Mant_a_prenorm_SNaN_S=(~Mant_a_NonH_D[C_MANT_FP64-1])&&((|Mant_a_NonH_D[C_MANT_FP64-2:0])); + assign Mant_b_prenorm_QNaN_S=Mant_b_NonH_D[C_MANT_FP64-1]&&(~(|Mant_b_NonH_D[C_MANT_FP64-2:0])); + assign Mant_b_prenorm_SNaN_S=(~Mant_b_NonH_D[C_MANT_FP64-1])&&((|Mant_b_NonH_D[C_MANT_FP64-2:0])); + + always_comb + begin + case(Format_sel_SI) + 2'b00: + begin + Mant_a_prenorm_zero_S=(Operand_a_DI[C_MANT_FP32-1:0] == C_MANT_ZERO_FP32); + Mant_b_prenorm_zero_S=(Operand_b_DI[C_MANT_FP32-1:0] == C_MANT_ZERO_FP32); + Exp_a_prenorm_Inf_NaN_S=(Operand_a_DI[C_OP_FP32-2:C_MANT_FP32] == C_EXP_INF_FP32); + Exp_b_prenorm_Inf_NaN_S=(Operand_b_DI[C_OP_FP32-2:C_MANT_FP32] == C_EXP_INF_FP32); + end + 2'b01: + begin + Mant_a_prenorm_zero_S=(Operand_a_DI[C_MANT_FP64-1:0] == C_MANT_ZERO_FP64); + Mant_b_prenorm_zero_S=(Operand_b_DI[C_MANT_FP64-1:0] == C_MANT_ZERO_FP64); + Exp_a_prenorm_Inf_NaN_S=(Operand_a_DI[C_OP_FP64-2:C_MANT_FP64] == C_EXP_INF_FP64); + Exp_b_prenorm_Inf_NaN_S=(Operand_b_DI[C_OP_FP64-2:C_MANT_FP64] == C_EXP_INF_FP64); + end + 2'b10: + begin + Mant_a_prenorm_zero_S=(Operand_a_DI[C_MANT_FP16-1:0] == C_MANT_ZERO_FP16); + Mant_b_prenorm_zero_S=(Operand_b_DI[C_MANT_FP16-1:0] == C_MANT_ZERO_FP16); + Exp_a_prenorm_Inf_NaN_S=(Operand_a_DI[C_OP_FP16-2:C_MANT_FP16] == C_EXP_INF_FP16); + Exp_b_prenorm_Inf_NaN_S=(Operand_b_DI[C_OP_FP16-2:C_MANT_FP16] == C_EXP_INF_FP16); + end + 2'b11: + begin + Mant_a_prenorm_zero_S=(Operand_a_DI[C_MANT_FP16ALT-1:0] == C_MANT_ZERO_FP16ALT); + Mant_b_prenorm_zero_S=(Operand_b_DI[C_MANT_FP16ALT-1:0] == C_MANT_ZERO_FP16ALT); + Exp_a_prenorm_Inf_NaN_S=(Operand_a_DI[C_OP_FP16ALT-2:C_MANT_FP16ALT] == C_EXP_INF_FP16ALT); + Exp_b_prenorm_Inf_NaN_S=(Operand_b_DI[C_OP_FP16ALT-2:C_MANT_FP16ALT] == C_EXP_INF_FP16ALT); + end + endcase + end + + + + + logic Zero_a_SN,Zero_a_SP; + logic Zero_b_SN,Zero_b_SP; + logic Inf_a_SN,Inf_a_SP; + logic Inf_b_SN,Inf_b_SP; + logic NaN_a_SN,NaN_a_SP; + logic NaN_b_SN,NaN_b_SP; + logic SNaN_SN,SNaN_SP; + + assign Zero_a_SN = (Start_S&&Ready_SI)?(Exp_a_prenorm_zero_S&&Mant_a_prenorm_zero_S):Zero_a_SP; + assign Zero_b_SN = (Start_S&&Ready_SI)?(Exp_b_prenorm_zero_S&&Mant_b_prenorm_zero_S):Zero_b_SP; + assign Inf_a_SN = (Start_S&&Ready_SI)?(Exp_a_prenorm_Inf_NaN_S&&Mant_a_prenorm_zero_S):Inf_a_SP; + assign Inf_b_SN = (Start_S&&Ready_SI)?(Exp_b_prenorm_Inf_NaN_S&&Mant_b_prenorm_zero_S):Inf_b_SP; + assign NaN_a_SN = (Start_S&&Ready_SI)?(Exp_a_prenorm_Inf_NaN_S&&(~Mant_a_prenorm_zero_S)):NaN_a_SP; + assign NaN_b_SN = (Start_S&&Ready_SI)?(Exp_b_prenorm_Inf_NaN_S&&(~Mant_b_prenorm_zero_S)):NaN_b_SP; + assign SNaN_SN = (Start_S&&Ready_SI) ? ((Mant_a_prenorm_SNaN_S&&NaN_a_SN) | (Mant_b_prenorm_SNaN_S&&NaN_b_SN)) : SNaN_SP; + + always_ff @(posedge Clk_CI, negedge Rst_RBI) + begin + if(~Rst_RBI) + begin + Zero_a_SP <='0; + Zero_b_SP <='0; + Inf_a_SP <='0; + Inf_b_SP <='0; + NaN_a_SP <='0; + NaN_b_SP <='0; + SNaN_SP <= '0; + end + else + begin + Inf_a_SP <=Inf_a_SN; + Inf_b_SP <=Inf_b_SN; + Zero_a_SP <=Zero_a_SN; + Zero_b_SP <=Zero_b_SN; + NaN_a_SP <=NaN_a_SN; + NaN_b_SP <=NaN_b_SN; + SNaN_SP <= SNaN_SN; + end + end + + ///////////////////////////////////////////////////////////////////////////// + // Low power control + ///////////////////////////////////////////////////////////////////////////// + + assign Special_case_SBO=(~{(Div_start_SI)?(Zero_a_SN | Zero_b_SN | Inf_a_SN | Inf_b_SN | NaN_a_SN | NaN_b_SN): (Zero_a_SN | Inf_a_SN | NaN_a_SN | Sign_a_D) })&&(Start_S&&Ready_SI); + + + always_ff @(posedge Clk_CI, negedge Rst_RBI) + begin + if(~Rst_RBI) + begin + Special_case_dly_SBO <= '0; + end + else if((Start_S&&Ready_SI)) + begin + Special_case_dly_SBO <= Special_case_SBO; + end + else if(Special_case_dly_SBO) + begin + Special_case_dly_SBO <= 1'b1; + end + else + begin + Special_case_dly_SBO <= '0; + end + end + + ///////////////////////////////////////////////////////////////////////////// + // Delay sign for normalization and round // + ///////////////////////////////////////////////////////////////////////////// + + logic Sign_z_DN; + logic Sign_z_DP; + + always_comb + begin + if(Div_start_SI&&Ready_SI) + Sign_z_DN = Sign_a_D ^ Sign_b_D; + else if(Sqrt_start_SI&&Ready_SI) + Sign_z_DN = Sign_a_D; + else + Sign_z_DN = Sign_z_DP; + end + + always_ff @(posedge Clk_CI, negedge Rst_RBI) + begin + if(~Rst_RBI) + begin + Sign_z_DP <= '0; + end + else + begin + Sign_z_DP <= Sign_z_DN; + end + end + + logic [C_RM-1:0] RM_DN; + logic [C_RM-1:0] RM_DP; + + always_comb + begin + if(Start_S&&Ready_SI) + RM_DN = RM_SI; + else + RM_DN = RM_DP; + end + + always_ff @(posedge Clk_CI, negedge Rst_RBI) + begin + if(~Rst_RBI) + begin + RM_DP <= '0; + end + else + begin + RM_DP <= RM_DN; + end + end + assign RM_dly_SO = RM_DP; + + logic [5:0] Mant_leadingOne_a, Mant_leadingOne_b; + logic Mant_zero_S_a,Mant_zero_S_b; + + lzc #( + .WIDTH ( C_MANT_FP64+1 ), + .MODE ( 1 ) + ) LOD_Ua ( + .in_i ( Mant_a_D ), + .cnt_o ( Mant_leadingOne_a ), + .empty_o ( Mant_zero_S_a ) + ); + + logic [C_MANT_FP64:0] Mant_a_norm_DN,Mant_a_norm_DP; + + assign Mant_a_norm_DN = ((Start_S&&Ready_SI))?(Mant_a_D<<(Mant_leadingOne_a)):Mant_a_norm_DP; + + always_ff @(posedge Clk_CI, negedge Rst_RBI) + begin + if(~Rst_RBI) + begin + Mant_a_norm_DP <= '0; + end + else + begin + Mant_a_norm_DP<=Mant_a_norm_DN; + end + end + + logic [C_EXP_FP64:0] Exp_a_norm_DN,Exp_a_norm_DP; + assign Exp_a_norm_DN = ((Start_S&&Ready_SI))?(Exp_a_D-Mant_leadingOne_a+(|Mant_leadingOne_a)):Exp_a_norm_DP; //Covering the process of denormal numbers + + always_ff @(posedge Clk_CI, negedge Rst_RBI) + begin + if(~Rst_RBI) + begin + Exp_a_norm_DP <= '0; + end + else + begin + Exp_a_norm_DP<=Exp_a_norm_DN; + end + end + + lzc #( + .WIDTH ( C_MANT_FP64+1 ), + .MODE ( 1 ) + ) LOD_Ub ( + .in_i ( Mant_b_D ), + .cnt_o ( Mant_leadingOne_b ), + .empty_o ( Mant_zero_S_b ) + ); + + + logic [C_MANT_FP64:0] Mant_b_norm_DN,Mant_b_norm_DP; + + assign Mant_b_norm_DN = ((Start_S&&Ready_SI))?(Mant_b_D<<(Mant_leadingOne_b)):Mant_b_norm_DP; + + always_ff @(posedge Clk_CI, negedge Rst_RBI) + begin + if(~Rst_RBI) + begin + Mant_b_norm_DP <= '0; + end + else + begin + Mant_b_norm_DP<=Mant_b_norm_DN; + end + end + + logic [C_EXP_FP64:0] Exp_b_norm_DN,Exp_b_norm_DP; + assign Exp_b_norm_DN = ((Start_S&&Ready_SI))?(Exp_b_D-Mant_leadingOne_b+(|Mant_leadingOne_b)):Exp_b_norm_DP; //Covering the process of denormal numbers + + always_ff @(posedge Clk_CI, negedge Rst_RBI) + begin + if(~Rst_RBI) + begin + Exp_b_norm_DP <= '0; + end + else + begin + Exp_b_norm_DP<=Exp_b_norm_DN; + end + end + + ///////////////////////////////////////////////////////////////////////////// + // Output assignments // + ///////////////////////////////////////////////////////////////////////////// + + assign Start_SO=Start_S; + assign Exp_a_DO_norm=Exp_a_norm_DP; + assign Exp_b_DO_norm=Exp_b_norm_DP; + assign Mant_a_DO_norm=Mant_a_norm_DP; + assign Mant_b_DO_norm=Mant_b_norm_DP; + assign Sign_z_DO=Sign_z_DP; + assign Inf_a_SO=Inf_a_SP; + assign Inf_b_SO=Inf_b_SP; + assign Zero_a_SO=Zero_a_SP; + assign Zero_b_SO=Zero_b_SP; + assign NaN_a_SO=NaN_a_SP; + assign NaN_b_SO=NaN_b_SP; + assign SNaN_SO=SNaN_SP; + +endmodule diff --git a/vendor/pulp-platform_common_cells.lock.hjson b/vendor/pulp-platform_common_cells.lock.hjson new file mode 100644 index 0000000000..d50d829b6e --- /dev/null +++ b/vendor/pulp-platform_common_cells.lock.hjson @@ -0,0 +1,14 @@ +// Copyright lowRISC contributors. +// Licensed under the Apache License, Version 2.0, see LICENSE for details. +// SPDX-License-Identifier: Apache-2.0 + +// This file is generated by the util/vendor script. Please do not modify it +// manually. + +{ + upstream: + { + url: https://github.com/pulp-platform/common_cells.git + rev: dc555643226419b7a602f0aa39d449545ea4c1f2 + } +} diff --git a/vendor/pulp-platform_common_cells.vendor.hjson b/vendor/pulp-platform_common_cells.vendor.hjson new file mode 100644 index 0000000000..38ed41bff5 --- /dev/null +++ b/vendor/pulp-platform_common_cells.vendor.hjson @@ -0,0 +1,40 @@ +// -*- coding: utf-8 -*- +// Copyright (C) 2022 Thales DIS France SAS +// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.0. +// Original Author: Zbigniew Chamski (zbigniew.chamski@thalesgroup.com) + +{ + // Name of the project + name: "pulp_common_cells", + + // Target directory: relative to the location of this script. + target_dir: "pulp-platform/common_cells", + + // Upstream repository + upstream: { + // URL + url: "https://github.com/pulp-platform/common_cells.git", + // revision + rev: "v1.23.0", + } + + //Patch dir for local changes + patch_dir: "patches/pulp-platform/common_cells", + + // Exclusions from upstream content + exclude_from_upstream: [ + ".github", + ".gitlab-ci.yml", + ".travis.yml", + "Bender.yml", + "ci", + "common_cells.core", + "formal", + "ips_list.yml", + "lint", + "Makefile", + "src_files.yml", + "test", + ] +} + diff --git a/vendor/pulp-platform_common_cells_fpu.lock.hjson b/vendor/pulp-platform_common_cells_fpu.lock.hjson new file mode 100644 index 0000000000..423b594804 --- /dev/null +++ b/vendor/pulp-platform_common_cells_fpu.lock.hjson @@ -0,0 +1,14 @@ +// Copyright lowRISC contributors. +// Licensed under the Apache License, Version 2.0, see LICENSE for details. +// SPDX-License-Identifier: Apache-2.0 + +// This file is generated by the util/vendor script. Please do not modify it +// manually. + +{ + upstream: + { + url: https://github.com/pulp-platform/common_cells.git + rev: 790f2385c01c83022474eede55809666209216e3 + } +} diff --git a/vendor/pulp-platform_common_cells_fpu.vendor.hjson b/vendor/pulp-platform_common_cells_fpu.vendor.hjson new file mode 100644 index 0000000000..0744a98985 --- /dev/null +++ b/vendor/pulp-platform_common_cells_fpu.vendor.hjson @@ -0,0 +1,41 @@ +// -*- coding: utf-8 -*- +// Copyright (C) 2022 Thales DIS France SAS +// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.0. +// Original Author: Zbigniew Chamski (zbigniew.chamski@thalesgroup.com) + +{ + // Name of the project + name: "pulp_common_cells_fpu", + + // Target directory: relative to the location of this script. + target_dir: "pulp-platform/fpnew/src/common_cells", + + // Upstream repository + upstream: { + // URL + url: "https://github.com/pulp-platform/common_cells.git", + // revision + rev: "v1.13.1", + } + + //Patch dir for local changes + patch_dir: "patches/pulp-platform/common_cells_fpu", + + // Exclusions from upstream content + exclude_from_upstream: [ + ".git", + ".github", + ".gitlab-ci.yml", + ".travis.yml", + "Bender.yml", + "ci", + "common_cells.core", + "formal", + "ips_list.yml", + "lint", + "Makefile", + "src_files.yml", + "test", + ] +} + diff --git a/vendor/pulp-platform_fpnew.lock.hjson b/vendor/pulp-platform_fpnew.lock.hjson new file mode 100644 index 0000000000..f112f6bac2 --- /dev/null +++ b/vendor/pulp-platform_fpnew.lock.hjson @@ -0,0 +1,14 @@ +// Copyright lowRISC contributors. +// Licensed under the Apache License, Version 2.0, see LICENSE for details. +// SPDX-License-Identifier: Apache-2.0 + +// This file is generated by the util/vendor script. Please do not modify it +// manually. + +{ + upstream: + { + url: https://github.com/pulp-platform/fpnew.git + rev: 79f75e0a0fdab6ebc3840a14077c39f4934321fe + } +} diff --git a/vendor/pulp-platform_fpnew.vendor.hjson b/vendor/pulp-platform_fpnew.vendor.hjson new file mode 100644 index 0000000000..374768c064 --- /dev/null +++ b/vendor/pulp-platform_fpnew.vendor.hjson @@ -0,0 +1,34 @@ +// -*- coding: utf-8 -*- +// Copyright (C) 2022 Thales DIS France SAS +// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.0. +// Original Author: Zbigniew Chamski (zbigniew.chamski@thalesgroup.com) + +{ + // Name of the project + name: "pulp_fpnew", + + // Target directory: relative to the location of this script. + target_dir: "pulp-platform/fpnew", + + // Upstream repository + upstream: { + // URL + url: "https://github.com/pulp-platform/fpnew.git", + // revision + rev: "v0.6.2", + } + + // Patch dir for local changes + patch_dir: "patches/pulp-platform/fpnew", + + // Exclusions from upstream content + exclude_from_upstream: [ + ".gitmodules", + "Bender.yml", + "docs", + "ips_list.yml", + "src_files.yml", + "tb", + ] +} + diff --git a/vendor/pulp-platform_fpu_div_sqrt_mvp.lock.hjson b/vendor/pulp-platform_fpu_div_sqrt_mvp.lock.hjson new file mode 100644 index 0000000000..4a9de30727 --- /dev/null +++ b/vendor/pulp-platform_fpu_div_sqrt_mvp.lock.hjson @@ -0,0 +1,14 @@ +// Copyright lowRISC contributors. +// Licensed under the Apache License, Version 2.0, see LICENSE for details. +// SPDX-License-Identifier: Apache-2.0 + +// This file is generated by the util/vendor script. Please do not modify it +// manually. + +{ + upstream: + { + url: https://github.com/pulp-platform/fpu_div_sqrt_mvp.git + rev: 83a601f97934ed5e06d737b9c80d98b08867c5fa + } +} diff --git a/vendor/pulp-platform_fpu_div_sqrt_mvp.vendor.hjson b/vendor/pulp-platform_fpu_div_sqrt_mvp.vendor.hjson new file mode 100644 index 0000000000..ecdc904c47 --- /dev/null +++ b/vendor/pulp-platform_fpu_div_sqrt_mvp.vendor.hjson @@ -0,0 +1,32 @@ +// -*- coding: utf-8 -*- +// Copyright (C) 2022 Thales DIS France SAS +// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.0. +// Original Author: Zbigniew Chamski (zbigniew.chamski@thalesgroup.com) + +{ + // Name of the project + name: "pulp_fpu_div_sqrt_mvp", + + // Target directory: relative to the location of this script. + target_dir: "pulp-platform/fpnew/src/fpu_div_sqrt_mvp", + + // Upstream repository + upstream: { + // URL + url: "https://github.com/pulp-platform/fpu_div_sqrt_mvp.git", + // revision + rev: "v1.0.3", + } + + //Patch dir for local changes + patch_dir: "patches/pulp-platform/fpu_div_sqrt_mvp", + + // Exclusions from upstream content + exclude_from_upstream: [ + ".git", + "Bender.yml", + "document", + "src_files.yml", + ] +} +