diff --git a/Bender.yml b/Bender.yml
index f61dfb1970..a3e30c312e 100644
--- a/Bender.yml
+++ b/Bender.yml
@@ -8,7 +8,7 @@ package:
 # WT_DCACHE
 
 export_include_dirs:
-  - common/submodules/common_cells/include/
+  - vendor/pulp-platform/common_cells/include/
   - corev_apu/axi/include/
 
 sources:
@@ -28,8 +28,8 @@ sources:
     - corev_apu/tb/ariane_axi_soc_pkg.sv
     - core/include/ariane_axi_pkg.sv
     - core/include/std_cache_pkg.sv
-    - core/fpu/src/fpnew_pkg.sv
-    - core/fpu/src/fpu_div_sqrt_mvp/hdl/defs_div_sqrt_mvp.sv
+    - vendor/pulp-platform/fpnew/src/fpnew_pkg.sv
+    - vendor/pulp-platform/fpnew/src/fpu_div_sqrt_mvp/hdl/defs_div_sqrt_mvp.sv
     # Stand-alone source files
     - core/ariane.sv
     - core/serdiv.sv
@@ -68,24 +68,24 @@ sources:
     - core/issue_read_operands.sv
     - core/pmp/src/pmp_entry.sv
     - core/pmp/src/pmp.sv
-    - core/fpu/src/fpnew_fma.sv
-    - core/fpu/src/fpnew_opgroup_fmt_slice.sv
-    - core/fpu/src/fpnew_divsqrt_multi.sv
-    - core/fpu/src/fpnew_fma_multi.sv
-    - core/fpu/src/fpnew_opgroup_multifmt_slice.sv
-    - core/fpu/src/fpnew_classifier.sv
-    - core/fpu/src/fpnew_noncomp.sv
-    - core/fpu/src/fpnew_cast_multi.sv
-    - core/fpu/src/fpnew_opgroup_block.sv
-    - core/fpu/src/fpnew_rounding.sv
-    - core/fpu/src/fpnew_top.sv
-    - core/fpu/src/fpu_div_sqrt_mvp/hdl/iteration_div_sqrt_mvp.sv
-    - core/fpu/src/fpu_div_sqrt_mvp/hdl/nrbd_nrsc_mvp.sv
-    - core/fpu/src/fpu_div_sqrt_mvp/hdl/div_sqrt_top_mvp.sv
-    - core/fpu/src/fpu_div_sqrt_mvp/hdl/preprocess_mvp.sv
-    - core/fpu/src/fpu_div_sqrt_mvp/hdl/control_mvp.sv
-    - core/fpu/src/fpu_div_sqrt_mvp/hdl/norm_div_sqrt_mvp.sv
-    - core/fpu/src/fpu_div_sqrt_mvp/hdl/div_sqrt_mvp_wrapper.sv
+    - vendor/pulp-platform/fpnew/src/fpnew_fma.sv
+    - vendor/pulp-platform/fpnew/src/fpnew_opgroup_fmt_slice.sv
+    - vendor/pulp-platform/fpnew/src/fpnew_divsqrt_multi.sv
+    - vendor/pulp-platform/fpnew/src/fpnew_fma_multi.sv
+    - vendor/pulp-platform/fpnew/src/fpnew_opgroup_multifmt_slice.sv
+    - vendor/pulp-platform/fpnew/src/fpnew_classifier.sv
+    - vendor/pulp-platform/fpnew/src/fpnew_noncomp.sv
+    - vendor/pulp-platform/fpnew/src/fpnew_cast_multi.sv
+    - vendor/pulp-platform/fpnew/src/fpnew_opgroup_block.sv
+    - vendor/pulp-platform/fpnew/src/fpnew_rounding.sv
+    - vendor/pulp-platform/fpnew/src/fpnew_top.sv
+    - vendor/pulp-platform/fpnew/src/fpu_div_sqrt_mvp/hdl/iteration_div_sqrt_mvp.sv
+    - vendor/pulp-platform/fpnew/src/fpu_div_sqrt_mvp/hdl/nrbd_nrsc_mvp.sv
+    - vendor/pulp-platform/fpnew/src/fpu_div_sqrt_mvp/hdl/div_sqrt_top_mvp.sv
+    - vendor/pulp-platform/fpnew/src/fpu_div_sqrt_mvp/hdl/preprocess_mvp.sv
+    - vendor/pulp-platform/fpnew/src/fpu_div_sqrt_mvp/hdl/control_mvp.sv
+    - vendor/pulp-platform/fpnew/src/fpu_div_sqrt_mvp/hdl/norm_div_sqrt_mvp.sv
+    - vendor/pulp-platform/fpnew/src/fpu_div_sqrt_mvp/hdl/div_sqrt_mvp_wrapper.sv
     - core/frontend/frontend.sv
     - core/frontend/instr_scan.sv
     - core/frontend/instr_queue.sv
@@ -143,16 +143,16 @@ sources:
     - corev_apu/riscv-dbg/debug_rom/debug_rom.sv
     - corev_apu/register_interface/src/apb_to_reg.sv
     - corev_apu/axi/src/axi_multicut.sv
-    - common/submodules/common_cells/src/cf_math_pkg.sv
-    - common/submodules/common_cells/src/deprecated/generic_fifo.sv
-    - common/submodules/common_cells/src/deprecated/pulp_sync.sv
-    - common/submodules/common_cells/src/deprecated/find_first_one.sv
-    - common/submodules/common_cells/src/rstgen_bypass.sv
-    - common/submodules/common_cells/src/rstgen.sv
-    - common/submodules/common_cells/src/stream_mux.sv
-    - common/submodules/common_cells/src/stream_demux.sv
-    - common/submodules/common_cells/src/stream_arbiter.sv
-    - common/submodules/common_cells/src/stream_arbiter_flushable.sv
+    - vendor/pulp-platform/common_cells/src/cf_math_pkg.sv
+    - vendor/pulp-platform/common_cells/src/deprecated/generic_fifo.sv
+    - vendor/pulp-platform/common_cells/src/deprecated/pulp_sync.sv
+    - vendor/pulp-platform/common_cells/src/deprecated/find_first_one.sv
+    - vendor/pulp-platform/common_cells/src/rstgen_bypass.sv
+    - vendor/pulp-platform/common_cells/src/rstgen.sv
+    - vendor/pulp-platform/common_cells/src/stream_mux.sv
+    - vendor/pulp-platform/common_cells/src/stream_demux.sv
+    - vendor/pulp-platform/common_cells/src/stream_arbiter.sv
+    - vendor/pulp-platform/common_cells/src/stream_arbiter_flushable.sv
     - corev_apu/axi/src/axi_cut.sv
     - corev_apu/axi/src/axi_join.sv
     - corev_apu/axi/src/axi_delayer.sv
@@ -164,28 +164,28 @@ sources:
     - corev_apu/axi/src/axi_demux.sv
     - corev_apu/axi/src/axi_xbar.sv
     - common/local/techlib/fpga/rtl/SyncSpRamBeNx64.sv
-    - common/submodules/common_cells/src/sync.sv
-    - common/submodules/common_cells/src/popcount.sv
-    - common/submodules/common_cells/src/unread.sv
-    - common/submodules/common_cells/src/cdc_2phase.sv
-    - common/submodules/common_cells/src/spill_register_flushable.sv
-    - common/submodules/common_cells/src/spill_register.sv
-    - common/submodules/common_cells/src/edge_detect.sv
-    - common/submodules/common_cells/src/fifo_v3.sv
-    - common/submodules/common_cells/src/deprecated/fifo_v2.sv
-    - common/submodules/common_cells/src/deprecated/fifo_v1.sv
-    - common/submodules/common_cells/src/lzc.sv
-    - common/submodules/common_cells/src/rr_arb_tree.sv
-    - common/submodules/common_cells/src/deprecated/rrarbiter.sv
-    - common/submodules/common_cells/src/stream_delay.sv
-    - common/submodules/common_cells/src/lfsr.sv
-    - common/submodules/common_cells/src/lfsr_8bit.sv
-    - common/submodules/common_cells/src/lfsr_16bit.sv
-    - common/submodules/common_cells/src/counter.sv
-    - common/submodules/common_cells/src/shift_reg.sv
-    - common/submodules/common_cells/src/exp_backoff.sv
-    - common/submodules/common_cells/src/addr_decode.sv
-    - common/submodules/common_cells/src/stream_register.sv
+    - vendor/pulp-platform/common_cells/src/sync.sv
+    - vendor/pulp-platform/common_cells/src/popcount.sv
+    - vendor/pulp-platform/common_cells/src/unread.sv
+    - vendor/pulp-platform/common_cells/src/cdc_2phase.sv
+    - vendor/pulp-platform/common_cells/src/spill_register_flushable.sv
+    - vendor/pulp-platform/common_cells/src/spill_register.sv
+    - vendor/pulp-platform/common_cells/src/edge_detect.sv
+    - vendor/pulp-platform/common_cells/src/fifo_v3.sv
+    - vendor/pulp-platform/common_cells/src/deprecated/fifo_v2.sv
+    - vendor/pulp-platform/common_cells/src/deprecated/fifo_v1.sv
+    - vendor/pulp-platform/common_cells/src/lzc.sv
+    - vendor/pulp-platform/common_cells/src/rr_arb_tree.sv
+    - vendor/pulp-platform/common_cells/src/deprecated/rrarbiter.sv
+    - vendor/pulp-platform/common_cells/src/stream_delay.sv
+    - vendor/pulp-platform/common_cells/src/lfsr.sv
+    - vendor/pulp-platform/common_cells/src/lfsr_8bit.sv
+    - vendor/pulp-platform/common_cells/src/lfsr_16bit.sv
+    - vendor/pulp-platform/common_cells/src/counter.sv
+    - vendor/pulp-platform/common_cells/src/shift_reg.sv
+    - vendor/pulp-platform/common_cells/src/exp_backoff.sv
+    - vendor/pulp-platform/common_cells/src/addr_decode.sv
+    - vendor/pulp-platform/common_cells/src/stream_register.sv
     - corev_apu/src/tech_cells_generic/src/cluster_clock_inverter.sv
     - corev_apu/src/tech_cells_generic/src/pulp_clock_mux2.sv
     - target: not(cv32a6)
diff --git a/Flist.ariane b/Flist.ariane
index f7573966f4..3085215bc0 100644
--- a/Flist.ariane
+++ b/Flist.ariane
@@ -15,7 +15,7 @@
 // Author: Michael Schaffner <schaffner@iis.ee.ethz.ch>, ETH Zurich
 // Date: 15.08.2018
 // Description: File list for OpenPiton flow
-+incdir+common/submodules/common_cells/include/
++incdir+vendor/pulp-platform/common_cells/include/
 +incdir+common/local/util/
 +incdir+corev_apu/register_interface/include/
 
@@ -28,27 +28,27 @@ corev_apu/axi/src/axi_pkg.sv
 core/include/ariane_axi_pkg.sv
 core/include/wt_cache_pkg.sv
 core/include/axi_intf.sv
-core/fpu/src/fpnew_pkg.sv
+vendor/pulp-platform/fpnew/src/fpnew_pkg.sv
 core/include/cvxif_pkg.sv
-common/submodules/common_cells/src/cf_math_pkg.sv
+vendor/pulp-platform/common_cells/src/cf_math_pkg.sv
 core/include/instr_tracer_pkg.sv
 core/cvxif_example/include/cvxif_instr_pkg.sv
 corev_apu/rv_plic/rtl/rv_plic_reg_pkg.sv
 common/local/util/sram.sv
-common/submodules/common_cells/src/deprecated/rrarbiter.sv
-common/submodules/common_cells/src/deprecated/fifo_v1.sv
-common/submodules/common_cells/src/deprecated/fifo_v2.sv
-common/submodules/common_cells/src/fifo_v3.sv
-common/submodules/common_cells/src/shift_reg.sv
-common/submodules/common_cells/src/lfsr_8bit.sv
-common/submodules/common_cells/src/lfsr.sv
-common/submodules/common_cells/src/lzc.sv
-common/submodules/common_cells/src/exp_backoff.sv
-common/submodules/common_cells/src/rr_arb_tree.sv
-common/submodules/common_cells/src/rstgen_bypass.sv
-common/submodules/common_cells/src/cdc_2phase.sv
-common/submodules/common_cells/src/unread.sv
-common/submodules/common_cells/src/popcount.sv
+vendor/pulp-platform/common_cells/src/deprecated/rrarbiter.sv
+vendor/pulp-platform/common_cells/src/deprecated/fifo_v1.sv
+vendor/pulp-platform/common_cells/src/deprecated/fifo_v2.sv
+vendor/pulp-platform/common_cells/src/fifo_v3.sv
+vendor/pulp-platform/common_cells/src/shift_reg.sv
+vendor/pulp-platform/common_cells/src/lfsr_8bit.sv
+vendor/pulp-platform/common_cells/src/lfsr.sv
+vendor/pulp-platform/common_cells/src/lzc.sv
+vendor/pulp-platform/common_cells/src/exp_backoff.sv
+vendor/pulp-platform/common_cells/src/rr_arb_tree.sv
+vendor/pulp-platform/common_cells/src/rstgen_bypass.sv
+vendor/pulp-platform/common_cells/src/cdc_2phase.sv
+vendor/pulp-platform/common_cells/src/unread.sv
+vendor/pulp-platform/common_cells/src/popcount.sv
 corev_apu/axi_mem_if/src/axi2mem.sv
 corev_apu/src/tech_cells_generic/src/deprecated/cluster_clk_cells.sv
 corev_apu/src/tech_cells_generic/src/deprecated/pulp_clk_cells.sv
@@ -132,31 +132,31 @@ corev_apu/fpga/src/axi_slice/src/axi_r_buffer.sv
 corev_apu/fpga/src/axi_slice/src/axi_aw_buffer.sv
 corev_apu/register_interface/src/apb_to_reg.sv
 corev_apu/register_interface/src/reg_intf.sv
-core/fpu/src/fpu_div_sqrt_mvp/hdl/defs_div_sqrt_mvp.sv
-core/fpu/src/fpu_div_sqrt_mvp/hdl/control_mvp.sv
-core/fpu/src/fpu_div_sqrt_mvp/hdl/div_sqrt_mvp_wrapper.sv
-core/fpu/src/fpu_div_sqrt_mvp/hdl/div_sqrt_top_mvp.sv
-core/fpu/src/fpu_div_sqrt_mvp/hdl/iteration_div_sqrt_mvp.sv
-core/fpu/src/fpu_div_sqrt_mvp/hdl/norm_div_sqrt_mvp.sv
-core/fpu/src/fpu_div_sqrt_mvp/hdl/nrbd_nrsc_mvp.sv
-core/fpu/src/fpu_div_sqrt_mvp/hdl/preprocess_mvp.sv
-core/fpu/src/fpnew_cast_multi.sv
-core/fpu/src/fpnew_classifier.sv
-core/fpu/src/fpnew_divsqrt_multi.sv
-core/fpu/src/fpnew_fma_multi.sv
-core/fpu/src/fpnew_fma.sv
-core/fpu/src/fpnew_noncomp.sv
-core/fpu/src/fpnew_opgroup_block.sv
-core/fpu/src/fpnew_opgroup_fmt_slice.sv
-core/fpu/src/fpnew_opgroup_multifmt_slice.sv
-core/fpu/src/fpnew_rounding.sv
-core/fpu/src/fpnew_top.sv
+vendor/pulp-platform/fpnew/src/fpu_div_sqrt_mvp/hdl/defs_div_sqrt_mvp.sv
+vendor/pulp-platform/fpnew/src/fpu_div_sqrt_mvp/hdl/control_mvp.sv
+vendor/pulp-platform/fpnew/src/fpu_div_sqrt_mvp/hdl/div_sqrt_mvp_wrapper.sv
+vendor/pulp-platform/fpnew/src/fpu_div_sqrt_mvp/hdl/div_sqrt_top_mvp.sv
+vendor/pulp-platform/fpnew/src/fpu_div_sqrt_mvp/hdl/iteration_div_sqrt_mvp.sv
+vendor/pulp-platform/fpnew/src/fpu_div_sqrt_mvp/hdl/norm_div_sqrt_mvp.sv
+vendor/pulp-platform/fpnew/src/fpu_div_sqrt_mvp/hdl/nrbd_nrsc_mvp.sv
+vendor/pulp-platform/fpnew/src/fpu_div_sqrt_mvp/hdl/preprocess_mvp.sv
+vendor/pulp-platform/fpnew/src/fpnew_cast_multi.sv
+vendor/pulp-platform/fpnew/src/fpnew_classifier.sv
+vendor/pulp-platform/fpnew/src/fpnew_divsqrt_multi.sv
+vendor/pulp-platform/fpnew/src/fpnew_fma_multi.sv
+vendor/pulp-platform/fpnew/src/fpnew_fma.sv
+vendor/pulp-platform/fpnew/src/fpnew_noncomp.sv
+vendor/pulp-platform/fpnew/src/fpnew_opgroup_block.sv
+vendor/pulp-platform/fpnew/src/fpnew_opgroup_fmt_slice.sv
+vendor/pulp-platform/fpnew/src/fpnew_opgroup_multifmt_slice.sv
+vendor/pulp-platform/fpnew/src/fpnew_rounding.sv
+vendor/pulp-platform/fpnew/src/fpnew_top.sv
 core/pmp/src/pmp.sv
 core/pmp/src/pmp_entry.sv
 common/local/util/instr_tracer.sv
 common/local/util/instr_tracer_if.sv
 core/cvxif_example/cvxif_example_coprocessor.sv
 core/cvxif_example/instr_decoder.sv
-common/submodules/common_cells/src/counter.sv
-common/submodules/common_cells/src/delta_counter.sv
+vendor/pulp-platform/common_cells/src/counter.sv
+vendor/pulp-platform/common_cells/src/delta_counter.sv
 core/cvxif_fu.sv
diff --git a/Makefile b/Makefile
index 570fe74af0..9536db0e29 100644
--- a/Makefile
+++ b/Makefile
@@ -169,13 +169,13 @@ src :=  core/axi_adapter.sv
         corev_apu/riscv-dbg/debug_rom/debug_rom.sv                                   \
         corev_apu/register_interface/src/apb_to_reg.sv                               \
         corev_apu/axi/src/axi_multicut.sv                                            \
-        common/submodules/common_cells/src/rstgen_bypass.sv                          \
-        common/submodules/common_cells/src/rstgen.sv                                 \
-        common/submodules/common_cells/src/stream_mux.sv                             \
-        common/submodules/common_cells/src/stream_demux.sv                           \
-        common/submodules/common_cells/src/exp_backoff.sv                            \
-        common/submodules/common_cells/src/addr_decode.sv                            \
-        common/submodules/common_cells/src/stream_register.sv                        \
+        vendor/pulp-platform/common_cells/src/rstgen_bypass.sv                          \
+        vendor/pulp-platform/common_cells/src/rstgen.sv                                 \
+        vendor/pulp-platform/common_cells/src/stream_mux.sv                             \
+        vendor/pulp-platform/common_cells/src/stream_demux.sv                           \
+        vendor/pulp-platform/common_cells/src/exp_backoff.sv                            \
+        vendor/pulp-platform/common_cells/src/addr_decode.sv                            \
+        vendor/pulp-platform/common_cells/src/stream_register.sv                        \
         corev_apu/axi/src/axi_cut.sv                                                 \
         corev_apu/axi/src/axi_join.sv                                                \
         corev_apu/axi/src/axi_delayer.sv                                             \
@@ -186,15 +186,15 @@ src :=  core/axi_adapter.sv
         corev_apu/axi/src/axi_mux.sv                                                 \
         corev_apu/axi/src/axi_demux.sv                                               \
         corev_apu/axi/src/axi_xbar.sv                                                \
-        common/submodules/common_cells/src/cdc_2phase.sv                             \
-        common/submodules/common_cells/src/spill_register_flushable.sv               \
-        common/submodules/common_cells/src/spill_register.sv                         \
-        common/submodules/common_cells/src/stream_arbiter.sv                         \
-        common/submodules/common_cells/src/stream_arbiter_flushable.sv               \
-        common/submodules/common_cells/src/deprecated/fifo_v1.sv                     \
-        common/submodules/common_cells/src/deprecated/fifo_v2.sv                     \
-        common/submodules/common_cells/src/stream_delay.sv                           \
-        common/submodules/common_cells/src/lfsr_16bit.sv                             \
+        vendor/pulp-platform/common_cells/src/cdc_2phase.sv                             \
+        vendor/pulp-platform/common_cells/src/spill_register_flushable.sv               \
+        vendor/pulp-platform/common_cells/src/spill_register.sv                         \
+        vendor/pulp-platform/common_cells/src/stream_arbiter.sv                         \
+        vendor/pulp-platform/common_cells/src/stream_arbiter_flushable.sv               \
+        vendor/pulp-platform/common_cells/src/deprecated/fifo_v1.sv                     \
+        vendor/pulp-platform/common_cells/src/deprecated/fifo_v2.sv                     \
+        vendor/pulp-platform/common_cells/src/stream_delay.sv                           \
+        vendor/pulp-platform/common_cells/src/lfsr_16bit.sv                             \
         corev_apu/src/tech_cells_generic/src/deprecated/cluster_clk_cells.sv         \
         corev_apu/src/tech_cells_generic/src/deprecated/pulp_clk_cells.sv            \
         corev_apu/src/tech_cells_generic/src/rtl/tc_clk.sv                           \
@@ -244,7 +244,7 @@ riscv-fp-tests            := $(shell xargs printf '\n%s' < $(riscv-fp-tests-list
 riscv-benchmarks          := $(shell xargs printf '\n%s' < $(riscv-benchmarks-list) | cut -b 1-)
 
 # Search here for include files (e.g.: non-standalone components)
-incdir := common/submodules/common_cells/include/ corev_apu/axi/include/ corev_apu/register_interface/include/
+incdir := vendor/pulp-platform/common_cells/include/ corev_apu/axi/include/ corev_apu/register_interface/include/
 
 # Compile and sim flags
 compile_flag     += +cover=bcfst+/dut -incr -64 -nologo -quiet -suppress 13262 -permissive +define+$(defines)
@@ -293,7 +293,7 @@ vcs_build: $(dpi-library)/ariane_dpi.so
 	vlogan $(if $(VERDI), -kdb,) -full64 -nc -sverilog +define+$(defines) -f ../core/Flist.$(target) &&\
 	vlogan $(if $(VERDI), -kdb,) -full64 -nc -sverilog +define+$(defines) $(filter %.sv,$(ariane_pkg)) +incdir+core/include/+$(VCS_HOME)/etc/uvm-1.2/dpi &&\
 	vhdlan $(if $(VERDI), -kdb,) -full64 -nc $(filter %.vhd,$(uart_src)) &&\
-	vlogan $(if $(VERDI), -kdb,) -full64 -nc -sverilog -assert svaext +define+$(defines) $(filter %.sv,$(src)) +incdir+../common/submodules/common_cells/include/+../corev_apu/axi/include/+../corev_apu/register_interface/include/ &&\
+	vlogan $(if $(VERDI), -kdb,) -full64 -nc -sverilog -assert svaext +define+$(defines) $(filter %.sv,$(src)) +incdir+../vendor/pulp-platform/common_cells/include/+../corev_apu/axi/include/+../corev_apu/register_interface/include/ &&\
 	vlogan $(if $(VERDI), -kdb,) -full64 -nc -sverilog -ntb_opts uvm-1.2 &&\
 	vlogan $(if $(VERDI), -kdb,) -full64 -nc -sverilog -ntb_opts uvm-1.2 $(tbs) +define+$(defines) +incdir+../corev_apu/axi/include/ &&\
 	vcs $(if $(VERDI), -kdb -debug_access+all -lca,) -full64 -timescale=1ns/1ns -ntb_opts uvm-1.2 work.ariane_tb
diff --git a/core/Flist.cv32a60x b/core/Flist.cv32a60x
index e8490a057c..f88c51db33 100644
--- a/core/Flist.cv32a60x
+++ b/core/Flist.cv32a60x
@@ -27,8 +27,8 @@
 
 +define+WT_DCACHE
 
-+incdir+${CVA6_REPO_DIR}/common/submodules/common_cells/include/
-+incdir+${CVA6_REPO_DIR}/common/submodules/common_cells/src/
++incdir+${CVA6_REPO_DIR}/vendor/pulp-platform/common_cells/include/
++incdir+${CVA6_REPO_DIR}/vendor/pulp-platform/common_cells/src/
 +incdir+${CVA6_REPO_DIR}/common/local/util/
 
 ${CVA6_REPO_DIR}/core/include/cv32a60x_config_pkg.sv
@@ -58,40 +58,40 @@ ${CVA6_REPO_DIR}/core/cvxif_example/cvxif_example_coprocessor.sv
 ${CVA6_REPO_DIR}/core/cvxif_example/instr_decoder.sv
 
 // Common Cells
-${CVA6_REPO_DIR}/common/submodules/common_cells/src/cf_math_pkg.sv
-${CVA6_REPO_DIR}/common/submodules/common_cells/src/fifo_v3.sv
-${CVA6_REPO_DIR}/common/submodules/common_cells/src/lfsr.sv
-${CVA6_REPO_DIR}/common/submodules/common_cells/src/lzc.sv
-${CVA6_REPO_DIR}/common/submodules/common_cells/src/rr_arb_tree.sv
-${CVA6_REPO_DIR}/common/submodules/common_cells/src/shift_reg.sv
-${CVA6_REPO_DIR}/common/submodules/common_cells/src/unread.sv
-${CVA6_REPO_DIR}/common/submodules/common_cells/src/popcount.sv
-${CVA6_REPO_DIR}/common/submodules/common_cells/src/exp_backoff.sv
+${CVA6_REPO_DIR}/vendor/pulp-platform/common_cells/src/cf_math_pkg.sv
+${CVA6_REPO_DIR}/vendor/pulp-platform/common_cells/src/fifo_v3.sv
+${CVA6_REPO_DIR}/vendor/pulp-platform/common_cells/src/lfsr.sv
+${CVA6_REPO_DIR}/vendor/pulp-platform/common_cells/src/lzc.sv
+${CVA6_REPO_DIR}/vendor/pulp-platform/common_cells/src/rr_arb_tree.sv
+${CVA6_REPO_DIR}/vendor/pulp-platform/common_cells/src/shift_reg.sv
+${CVA6_REPO_DIR}/vendor/pulp-platform/common_cells/src/unread.sv
+${CVA6_REPO_DIR}/vendor/pulp-platform/common_cells/src/popcount.sv
+${CVA6_REPO_DIR}/vendor/pulp-platform/common_cells/src/exp_backoff.sv
 
 // Common Cells for example coprocessor
-${CVA6_REPO_DIR}/common/submodules/common_cells/src/counter.sv
-${CVA6_REPO_DIR}/common/submodules/common_cells/src/delta_counter.sv
+${CVA6_REPO_DIR}/vendor/pulp-platform/common_cells/src/counter.sv
+${CVA6_REPO_DIR}/vendor/pulp-platform/common_cells/src/delta_counter.sv
 
 // Floating point unit
-${CVA6_REPO_DIR}/core/fpu/src/fpnew_pkg.sv
-${CVA6_REPO_DIR}/core/fpu/src/fpnew_cast_multi.sv
-${CVA6_REPO_DIR}/core/fpu/src/fpnew_classifier.sv
-${CVA6_REPO_DIR}/core/fpu/src/fpnew_divsqrt_multi.sv
-${CVA6_REPO_DIR}/core/fpu/src/fpnew_fma_multi.sv
-${CVA6_REPO_DIR}/core/fpu/src/fpnew_fma.sv
-${CVA6_REPO_DIR}/core/fpu/src/fpnew_noncomp.sv
-${CVA6_REPO_DIR}/core/fpu/src/fpnew_opgroup_block.sv
-${CVA6_REPO_DIR}/core/fpu/src/fpnew_opgroup_fmt_slice.sv
-${CVA6_REPO_DIR}/core/fpu/src/fpnew_opgroup_multifmt_slice.sv
-${CVA6_REPO_DIR}/core/fpu/src/fpnew_rounding.sv
-${CVA6_REPO_DIR}/core/fpu/src/fpnew_top.sv
-${CVA6_REPO_DIR}/core/fpu/src/fpu_div_sqrt_mvp/hdl/defs_div_sqrt_mvp.sv
-${CVA6_REPO_DIR}/core/fpu/src/fpu_div_sqrt_mvp/hdl/control_mvp.sv
-${CVA6_REPO_DIR}/core/fpu/src/fpu_div_sqrt_mvp/hdl/div_sqrt_top_mvp.sv
-${CVA6_REPO_DIR}/core/fpu/src/fpu_div_sqrt_mvp/hdl/iteration_div_sqrt_mvp.sv
-${CVA6_REPO_DIR}/core/fpu/src/fpu_div_sqrt_mvp/hdl/norm_div_sqrt_mvp.sv
-${CVA6_REPO_DIR}/core/fpu/src/fpu_div_sqrt_mvp/hdl/nrbd_nrsc_mvp.sv
-${CVA6_REPO_DIR}/core/fpu/src/fpu_div_sqrt_mvp/hdl/preprocess_mvp.sv
+${CVA6_REPO_DIR}/vendor/pulp-platform/fpnew/src/fpnew_pkg.sv
+${CVA6_REPO_DIR}/vendor/pulp-platform/fpnew/src/fpnew_cast_multi.sv
+${CVA6_REPO_DIR}/vendor/pulp-platform/fpnew/src/fpnew_classifier.sv
+${CVA6_REPO_DIR}/vendor/pulp-platform/fpnew/src/fpnew_divsqrt_multi.sv
+${CVA6_REPO_DIR}/vendor/pulp-platform/fpnew/src/fpnew_fma_multi.sv
+${CVA6_REPO_DIR}/vendor/pulp-platform/fpnew/src/fpnew_fma.sv
+${CVA6_REPO_DIR}/vendor/pulp-platform/fpnew/src/fpnew_noncomp.sv
+${CVA6_REPO_DIR}/vendor/pulp-platform/fpnew/src/fpnew_opgroup_block.sv
+${CVA6_REPO_DIR}/vendor/pulp-platform/fpnew/src/fpnew_opgroup_fmt_slice.sv
+${CVA6_REPO_DIR}/vendor/pulp-platform/fpnew/src/fpnew_opgroup_multifmt_slice.sv
+${CVA6_REPO_DIR}/vendor/pulp-platform/fpnew/src/fpnew_rounding.sv
+${CVA6_REPO_DIR}/vendor/pulp-platform/fpnew/src/fpnew_top.sv
+${CVA6_REPO_DIR}/vendor/pulp-platform/fpnew/src/fpu_div_sqrt_mvp/hdl/defs_div_sqrt_mvp.sv
+${CVA6_REPO_DIR}/vendor/pulp-platform/fpnew/src/fpu_div_sqrt_mvp/hdl/control_mvp.sv
+${CVA6_REPO_DIR}/vendor/pulp-platform/fpnew/src/fpu_div_sqrt_mvp/hdl/div_sqrt_top_mvp.sv
+${CVA6_REPO_DIR}/vendor/pulp-platform/fpnew/src/fpu_div_sqrt_mvp/hdl/iteration_div_sqrt_mvp.sv
+${CVA6_REPO_DIR}/vendor/pulp-platform/fpnew/src/fpu_div_sqrt_mvp/hdl/norm_div_sqrt_mvp.sv
+${CVA6_REPO_DIR}/vendor/pulp-platform/fpnew/src/fpu_div_sqrt_mvp/hdl/nrbd_nrsc_mvp.sv
+${CVA6_REPO_DIR}/vendor/pulp-platform/fpnew/src/fpu_div_sqrt_mvp/hdl/preprocess_mvp.sv
 
 // Top-level source files (not necessarily instantiated at the top of the cva6).
 ${CVA6_REPO_DIR}/core/ariane.sv
diff --git a/core/Flist.cv32a6_ima_sv32_fpga b/core/Flist.cv32a6_ima_sv32_fpga
index 310cc070df..dbaa746c05 100644
--- a/core/Flist.cv32a6_ima_sv32_fpga
+++ b/core/Flist.cv32a6_ima_sv32_fpga
@@ -27,8 +27,8 @@
 
 +define+WT_DCACHE
 
-+incdir+${CVA6_REPO_DIR}/common/submodules/common_cells/include/
-+incdir+${CVA6_REPO_DIR}/common/submodules/common_cells/src/
++incdir+${CVA6_REPO_DIR}/vendor/pulp-platform/common_cells/include/
++incdir+${CVA6_REPO_DIR}/vendor/pulp-platform/common_cells/src/
 +incdir+${CVA6_REPO_DIR}/common/local/util/
 
 ${CVA6_REPO_DIR}/core/include/cv32a6_imac_sv0_config_pkg.sv
@@ -55,36 +55,36 @@ ${CVA6_REPO_DIR}/core/include/cvxif_pkg.sv
 ${CVA6_REPO_DIR}/core/cvxif_fu.sv
 
 // Common Cells
-${CVA6_REPO_DIR}/common/submodules/common_cells/src/cf_math_pkg.sv
-${CVA6_REPO_DIR}/common/submodules/common_cells/src/fifo_v3.sv
-${CVA6_REPO_DIR}/common/submodules/common_cells/src/lfsr.sv
-${CVA6_REPO_DIR}/common/submodules/common_cells/src/lzc.sv
-${CVA6_REPO_DIR}/common/submodules/common_cells/src/rr_arb_tree.sv
-${CVA6_REPO_DIR}/common/submodules/common_cells/src/shift_reg.sv
-${CVA6_REPO_DIR}/common/submodules/common_cells/src/unread.sv
-${CVA6_REPO_DIR}/common/submodules/common_cells/src/popcount.sv
-${CVA6_REPO_DIR}/common/submodules/common_cells/src/exp_backoff.sv
+${CVA6_REPO_DIR}/vendor/pulp-platform/common_cells/src/cf_math_pkg.sv
+${CVA6_REPO_DIR}/vendor/pulp-platform/common_cells/src/fifo_v3.sv
+${CVA6_REPO_DIR}/vendor/pulp-platform/common_cells/src/lfsr.sv
+${CVA6_REPO_DIR}/vendor/pulp-platform/common_cells/src/lzc.sv
+${CVA6_REPO_DIR}/vendor/pulp-platform/common_cells/src/rr_arb_tree.sv
+${CVA6_REPO_DIR}/vendor/pulp-platform/common_cells/src/shift_reg.sv
+${CVA6_REPO_DIR}/vendor/pulp-platform/common_cells/src/unread.sv
+${CVA6_REPO_DIR}/vendor/pulp-platform/common_cells/src/popcount.sv
+${CVA6_REPO_DIR}/vendor/pulp-platform/common_cells/src/exp_backoff.sv
 
 // Floating point unit
-${CVA6_REPO_DIR}/core/fpu/src/fpnew_pkg.sv
-${CVA6_REPO_DIR}/core/fpu/src/fpnew_cast_multi.sv
-${CVA6_REPO_DIR}/core/fpu/src/fpnew_classifier.sv
-${CVA6_REPO_DIR}/core/fpu/src/fpnew_divsqrt_multi.sv
-${CVA6_REPO_DIR}/core/fpu/src/fpnew_fma_multi.sv
-${CVA6_REPO_DIR}/core/fpu/src/fpnew_fma.sv
-${CVA6_REPO_DIR}/core/fpu/src/fpnew_noncomp.sv
-${CVA6_REPO_DIR}/core/fpu/src/fpnew_opgroup_block.sv
-${CVA6_REPO_DIR}/core/fpu/src/fpnew_opgroup_fmt_slice.sv
-${CVA6_REPO_DIR}/core/fpu/src/fpnew_opgroup_multifmt_slice.sv
-${CVA6_REPO_DIR}/core/fpu/src/fpnew_rounding.sv
-${CVA6_REPO_DIR}/core/fpu/src/fpnew_top.sv
-${CVA6_REPO_DIR}/core/fpu/src/fpu_div_sqrt_mvp/hdl/defs_div_sqrt_mvp.sv
-${CVA6_REPO_DIR}/core/fpu/src/fpu_div_sqrt_mvp/hdl/control_mvp.sv
-${CVA6_REPO_DIR}/core/fpu/src/fpu_div_sqrt_mvp/hdl/div_sqrt_top_mvp.sv
-${CVA6_REPO_DIR}/core/fpu/src/fpu_div_sqrt_mvp/hdl/iteration_div_sqrt_mvp.sv
-${CVA6_REPO_DIR}/core/fpu/src/fpu_div_sqrt_mvp/hdl/norm_div_sqrt_mvp.sv
-${CVA6_REPO_DIR}/core/fpu/src/fpu_div_sqrt_mvp/hdl/nrbd_nrsc_mvp.sv
-${CVA6_REPO_DIR}/core/fpu/src/fpu_div_sqrt_mvp/hdl/preprocess_mvp.sv
+${CVA6_REPO_DIR}/vendor/pulp-platform/fpnew/src/fpnew_pkg.sv
+${CVA6_REPO_DIR}/vendor/pulp-platform/fpnew/src/fpnew_cast_multi.sv
+${CVA6_REPO_DIR}/vendor/pulp-platform/fpnew/src/fpnew_classifier.sv
+${CVA6_REPO_DIR}/vendor/pulp-platform/fpnew/src/fpnew_divsqrt_multi.sv
+${CVA6_REPO_DIR}/vendor/pulp-platform/fpnew/src/fpnew_fma_multi.sv
+${CVA6_REPO_DIR}/vendor/pulp-platform/fpnew/src/fpnew_fma.sv
+${CVA6_REPO_DIR}/vendor/pulp-platform/fpnew/src/fpnew_noncomp.sv
+${CVA6_REPO_DIR}/vendor/pulp-platform/fpnew/src/fpnew_opgroup_block.sv
+${CVA6_REPO_DIR}/vendor/pulp-platform/fpnew/src/fpnew_opgroup_fmt_slice.sv
+${CVA6_REPO_DIR}/vendor/pulp-platform/fpnew/src/fpnew_opgroup_multifmt_slice.sv
+${CVA6_REPO_DIR}/vendor/pulp-platform/fpnew/src/fpnew_rounding.sv
+${CVA6_REPO_DIR}/vendor/pulp-platform/fpnew/src/fpnew_top.sv
+${CVA6_REPO_DIR}/vendor/pulp-platform/fpnew/src/fpu_div_sqrt_mvp/hdl/defs_div_sqrt_mvp.sv
+${CVA6_REPO_DIR}/vendor/pulp-platform/fpnew/src/fpu_div_sqrt_mvp/hdl/control_mvp.sv
+${CVA6_REPO_DIR}/vendor/pulp-platform/fpnew/src/fpu_div_sqrt_mvp/hdl/div_sqrt_top_mvp.sv
+${CVA6_REPO_DIR}/vendor/pulp-platform/fpnew/src/fpu_div_sqrt_mvp/hdl/iteration_div_sqrt_mvp.sv
+${CVA6_REPO_DIR}/vendor/pulp-platform/fpnew/src/fpu_div_sqrt_mvp/hdl/norm_div_sqrt_mvp.sv
+${CVA6_REPO_DIR}/vendor/pulp-platform/fpnew/src/fpu_div_sqrt_mvp/hdl/nrbd_nrsc_mvp.sv
+${CVA6_REPO_DIR}/vendor/pulp-platform/fpnew/src/fpu_div_sqrt_mvp/hdl/preprocess_mvp.sv
 
 // Top-level source files (not necessarily instantiated at the top of the cva6).
 ${CVA6_REPO_DIR}/core/ariane.sv
diff --git a/core/Flist.cv32a6_imac_sv0 b/core/Flist.cv32a6_imac_sv0
index 310cc070df..dbaa746c05 100644
--- a/core/Flist.cv32a6_imac_sv0
+++ b/core/Flist.cv32a6_imac_sv0
@@ -27,8 +27,8 @@
 
 +define+WT_DCACHE
 
-+incdir+${CVA6_REPO_DIR}/common/submodules/common_cells/include/
-+incdir+${CVA6_REPO_DIR}/common/submodules/common_cells/src/
++incdir+${CVA6_REPO_DIR}/vendor/pulp-platform/common_cells/include/
++incdir+${CVA6_REPO_DIR}/vendor/pulp-platform/common_cells/src/
 +incdir+${CVA6_REPO_DIR}/common/local/util/
 
 ${CVA6_REPO_DIR}/core/include/cv32a6_imac_sv0_config_pkg.sv
@@ -55,36 +55,36 @@ ${CVA6_REPO_DIR}/core/include/cvxif_pkg.sv
 ${CVA6_REPO_DIR}/core/cvxif_fu.sv
 
 // Common Cells
-${CVA6_REPO_DIR}/common/submodules/common_cells/src/cf_math_pkg.sv
-${CVA6_REPO_DIR}/common/submodules/common_cells/src/fifo_v3.sv
-${CVA6_REPO_DIR}/common/submodules/common_cells/src/lfsr.sv
-${CVA6_REPO_DIR}/common/submodules/common_cells/src/lzc.sv
-${CVA6_REPO_DIR}/common/submodules/common_cells/src/rr_arb_tree.sv
-${CVA6_REPO_DIR}/common/submodules/common_cells/src/shift_reg.sv
-${CVA6_REPO_DIR}/common/submodules/common_cells/src/unread.sv
-${CVA6_REPO_DIR}/common/submodules/common_cells/src/popcount.sv
-${CVA6_REPO_DIR}/common/submodules/common_cells/src/exp_backoff.sv
+${CVA6_REPO_DIR}/vendor/pulp-platform/common_cells/src/cf_math_pkg.sv
+${CVA6_REPO_DIR}/vendor/pulp-platform/common_cells/src/fifo_v3.sv
+${CVA6_REPO_DIR}/vendor/pulp-platform/common_cells/src/lfsr.sv
+${CVA6_REPO_DIR}/vendor/pulp-platform/common_cells/src/lzc.sv
+${CVA6_REPO_DIR}/vendor/pulp-platform/common_cells/src/rr_arb_tree.sv
+${CVA6_REPO_DIR}/vendor/pulp-platform/common_cells/src/shift_reg.sv
+${CVA6_REPO_DIR}/vendor/pulp-platform/common_cells/src/unread.sv
+${CVA6_REPO_DIR}/vendor/pulp-platform/common_cells/src/popcount.sv
+${CVA6_REPO_DIR}/vendor/pulp-platform/common_cells/src/exp_backoff.sv
 
 // Floating point unit
-${CVA6_REPO_DIR}/core/fpu/src/fpnew_pkg.sv
-${CVA6_REPO_DIR}/core/fpu/src/fpnew_cast_multi.sv
-${CVA6_REPO_DIR}/core/fpu/src/fpnew_classifier.sv
-${CVA6_REPO_DIR}/core/fpu/src/fpnew_divsqrt_multi.sv
-${CVA6_REPO_DIR}/core/fpu/src/fpnew_fma_multi.sv
-${CVA6_REPO_DIR}/core/fpu/src/fpnew_fma.sv
-${CVA6_REPO_DIR}/core/fpu/src/fpnew_noncomp.sv
-${CVA6_REPO_DIR}/core/fpu/src/fpnew_opgroup_block.sv
-${CVA6_REPO_DIR}/core/fpu/src/fpnew_opgroup_fmt_slice.sv
-${CVA6_REPO_DIR}/core/fpu/src/fpnew_opgroup_multifmt_slice.sv
-${CVA6_REPO_DIR}/core/fpu/src/fpnew_rounding.sv
-${CVA6_REPO_DIR}/core/fpu/src/fpnew_top.sv
-${CVA6_REPO_DIR}/core/fpu/src/fpu_div_sqrt_mvp/hdl/defs_div_sqrt_mvp.sv
-${CVA6_REPO_DIR}/core/fpu/src/fpu_div_sqrt_mvp/hdl/control_mvp.sv
-${CVA6_REPO_DIR}/core/fpu/src/fpu_div_sqrt_mvp/hdl/div_sqrt_top_mvp.sv
-${CVA6_REPO_DIR}/core/fpu/src/fpu_div_sqrt_mvp/hdl/iteration_div_sqrt_mvp.sv
-${CVA6_REPO_DIR}/core/fpu/src/fpu_div_sqrt_mvp/hdl/norm_div_sqrt_mvp.sv
-${CVA6_REPO_DIR}/core/fpu/src/fpu_div_sqrt_mvp/hdl/nrbd_nrsc_mvp.sv
-${CVA6_REPO_DIR}/core/fpu/src/fpu_div_sqrt_mvp/hdl/preprocess_mvp.sv
+${CVA6_REPO_DIR}/vendor/pulp-platform/fpnew/src/fpnew_pkg.sv
+${CVA6_REPO_DIR}/vendor/pulp-platform/fpnew/src/fpnew_cast_multi.sv
+${CVA6_REPO_DIR}/vendor/pulp-platform/fpnew/src/fpnew_classifier.sv
+${CVA6_REPO_DIR}/vendor/pulp-platform/fpnew/src/fpnew_divsqrt_multi.sv
+${CVA6_REPO_DIR}/vendor/pulp-platform/fpnew/src/fpnew_fma_multi.sv
+${CVA6_REPO_DIR}/vendor/pulp-platform/fpnew/src/fpnew_fma.sv
+${CVA6_REPO_DIR}/vendor/pulp-platform/fpnew/src/fpnew_noncomp.sv
+${CVA6_REPO_DIR}/vendor/pulp-platform/fpnew/src/fpnew_opgroup_block.sv
+${CVA6_REPO_DIR}/vendor/pulp-platform/fpnew/src/fpnew_opgroup_fmt_slice.sv
+${CVA6_REPO_DIR}/vendor/pulp-platform/fpnew/src/fpnew_opgroup_multifmt_slice.sv
+${CVA6_REPO_DIR}/vendor/pulp-platform/fpnew/src/fpnew_rounding.sv
+${CVA6_REPO_DIR}/vendor/pulp-platform/fpnew/src/fpnew_top.sv
+${CVA6_REPO_DIR}/vendor/pulp-platform/fpnew/src/fpu_div_sqrt_mvp/hdl/defs_div_sqrt_mvp.sv
+${CVA6_REPO_DIR}/vendor/pulp-platform/fpnew/src/fpu_div_sqrt_mvp/hdl/control_mvp.sv
+${CVA6_REPO_DIR}/vendor/pulp-platform/fpnew/src/fpu_div_sqrt_mvp/hdl/div_sqrt_top_mvp.sv
+${CVA6_REPO_DIR}/vendor/pulp-platform/fpnew/src/fpu_div_sqrt_mvp/hdl/iteration_div_sqrt_mvp.sv
+${CVA6_REPO_DIR}/vendor/pulp-platform/fpnew/src/fpu_div_sqrt_mvp/hdl/norm_div_sqrt_mvp.sv
+${CVA6_REPO_DIR}/vendor/pulp-platform/fpnew/src/fpu_div_sqrt_mvp/hdl/nrbd_nrsc_mvp.sv
+${CVA6_REPO_DIR}/vendor/pulp-platform/fpnew/src/fpu_div_sqrt_mvp/hdl/preprocess_mvp.sv
 
 // Top-level source files (not necessarily instantiated at the top of the cva6).
 ${CVA6_REPO_DIR}/core/ariane.sv
diff --git a/core/Flist.cv32a6_imac_sv32 b/core/Flist.cv32a6_imac_sv32
index 310cc070df..dbaa746c05 100644
--- a/core/Flist.cv32a6_imac_sv32
+++ b/core/Flist.cv32a6_imac_sv32
@@ -27,8 +27,8 @@
 
 +define+WT_DCACHE
 
-+incdir+${CVA6_REPO_DIR}/common/submodules/common_cells/include/
-+incdir+${CVA6_REPO_DIR}/common/submodules/common_cells/src/
++incdir+${CVA6_REPO_DIR}/vendor/pulp-platform/common_cells/include/
++incdir+${CVA6_REPO_DIR}/vendor/pulp-platform/common_cells/src/
 +incdir+${CVA6_REPO_DIR}/common/local/util/
 
 ${CVA6_REPO_DIR}/core/include/cv32a6_imac_sv0_config_pkg.sv
@@ -55,36 +55,36 @@ ${CVA6_REPO_DIR}/core/include/cvxif_pkg.sv
 ${CVA6_REPO_DIR}/core/cvxif_fu.sv
 
 // Common Cells
-${CVA6_REPO_DIR}/common/submodules/common_cells/src/cf_math_pkg.sv
-${CVA6_REPO_DIR}/common/submodules/common_cells/src/fifo_v3.sv
-${CVA6_REPO_DIR}/common/submodules/common_cells/src/lfsr.sv
-${CVA6_REPO_DIR}/common/submodules/common_cells/src/lzc.sv
-${CVA6_REPO_DIR}/common/submodules/common_cells/src/rr_arb_tree.sv
-${CVA6_REPO_DIR}/common/submodules/common_cells/src/shift_reg.sv
-${CVA6_REPO_DIR}/common/submodules/common_cells/src/unread.sv
-${CVA6_REPO_DIR}/common/submodules/common_cells/src/popcount.sv
-${CVA6_REPO_DIR}/common/submodules/common_cells/src/exp_backoff.sv
+${CVA6_REPO_DIR}/vendor/pulp-platform/common_cells/src/cf_math_pkg.sv
+${CVA6_REPO_DIR}/vendor/pulp-platform/common_cells/src/fifo_v3.sv
+${CVA6_REPO_DIR}/vendor/pulp-platform/common_cells/src/lfsr.sv
+${CVA6_REPO_DIR}/vendor/pulp-platform/common_cells/src/lzc.sv
+${CVA6_REPO_DIR}/vendor/pulp-platform/common_cells/src/rr_arb_tree.sv
+${CVA6_REPO_DIR}/vendor/pulp-platform/common_cells/src/shift_reg.sv
+${CVA6_REPO_DIR}/vendor/pulp-platform/common_cells/src/unread.sv
+${CVA6_REPO_DIR}/vendor/pulp-platform/common_cells/src/popcount.sv
+${CVA6_REPO_DIR}/vendor/pulp-platform/common_cells/src/exp_backoff.sv
 
 // Floating point unit
-${CVA6_REPO_DIR}/core/fpu/src/fpnew_pkg.sv
-${CVA6_REPO_DIR}/core/fpu/src/fpnew_cast_multi.sv
-${CVA6_REPO_DIR}/core/fpu/src/fpnew_classifier.sv
-${CVA6_REPO_DIR}/core/fpu/src/fpnew_divsqrt_multi.sv
-${CVA6_REPO_DIR}/core/fpu/src/fpnew_fma_multi.sv
-${CVA6_REPO_DIR}/core/fpu/src/fpnew_fma.sv
-${CVA6_REPO_DIR}/core/fpu/src/fpnew_noncomp.sv
-${CVA6_REPO_DIR}/core/fpu/src/fpnew_opgroup_block.sv
-${CVA6_REPO_DIR}/core/fpu/src/fpnew_opgroup_fmt_slice.sv
-${CVA6_REPO_DIR}/core/fpu/src/fpnew_opgroup_multifmt_slice.sv
-${CVA6_REPO_DIR}/core/fpu/src/fpnew_rounding.sv
-${CVA6_REPO_DIR}/core/fpu/src/fpnew_top.sv
-${CVA6_REPO_DIR}/core/fpu/src/fpu_div_sqrt_mvp/hdl/defs_div_sqrt_mvp.sv
-${CVA6_REPO_DIR}/core/fpu/src/fpu_div_sqrt_mvp/hdl/control_mvp.sv
-${CVA6_REPO_DIR}/core/fpu/src/fpu_div_sqrt_mvp/hdl/div_sqrt_top_mvp.sv
-${CVA6_REPO_DIR}/core/fpu/src/fpu_div_sqrt_mvp/hdl/iteration_div_sqrt_mvp.sv
-${CVA6_REPO_DIR}/core/fpu/src/fpu_div_sqrt_mvp/hdl/norm_div_sqrt_mvp.sv
-${CVA6_REPO_DIR}/core/fpu/src/fpu_div_sqrt_mvp/hdl/nrbd_nrsc_mvp.sv
-${CVA6_REPO_DIR}/core/fpu/src/fpu_div_sqrt_mvp/hdl/preprocess_mvp.sv
+${CVA6_REPO_DIR}/vendor/pulp-platform/fpnew/src/fpnew_pkg.sv
+${CVA6_REPO_DIR}/vendor/pulp-platform/fpnew/src/fpnew_cast_multi.sv
+${CVA6_REPO_DIR}/vendor/pulp-platform/fpnew/src/fpnew_classifier.sv
+${CVA6_REPO_DIR}/vendor/pulp-platform/fpnew/src/fpnew_divsqrt_multi.sv
+${CVA6_REPO_DIR}/vendor/pulp-platform/fpnew/src/fpnew_fma_multi.sv
+${CVA6_REPO_DIR}/vendor/pulp-platform/fpnew/src/fpnew_fma.sv
+${CVA6_REPO_DIR}/vendor/pulp-platform/fpnew/src/fpnew_noncomp.sv
+${CVA6_REPO_DIR}/vendor/pulp-platform/fpnew/src/fpnew_opgroup_block.sv
+${CVA6_REPO_DIR}/vendor/pulp-platform/fpnew/src/fpnew_opgroup_fmt_slice.sv
+${CVA6_REPO_DIR}/vendor/pulp-platform/fpnew/src/fpnew_opgroup_multifmt_slice.sv
+${CVA6_REPO_DIR}/vendor/pulp-platform/fpnew/src/fpnew_rounding.sv
+${CVA6_REPO_DIR}/vendor/pulp-platform/fpnew/src/fpnew_top.sv
+${CVA6_REPO_DIR}/vendor/pulp-platform/fpnew/src/fpu_div_sqrt_mvp/hdl/defs_div_sqrt_mvp.sv
+${CVA6_REPO_DIR}/vendor/pulp-platform/fpnew/src/fpu_div_sqrt_mvp/hdl/control_mvp.sv
+${CVA6_REPO_DIR}/vendor/pulp-platform/fpnew/src/fpu_div_sqrt_mvp/hdl/div_sqrt_top_mvp.sv
+${CVA6_REPO_DIR}/vendor/pulp-platform/fpnew/src/fpu_div_sqrt_mvp/hdl/iteration_div_sqrt_mvp.sv
+${CVA6_REPO_DIR}/vendor/pulp-platform/fpnew/src/fpu_div_sqrt_mvp/hdl/norm_div_sqrt_mvp.sv
+${CVA6_REPO_DIR}/vendor/pulp-platform/fpnew/src/fpu_div_sqrt_mvp/hdl/nrbd_nrsc_mvp.sv
+${CVA6_REPO_DIR}/vendor/pulp-platform/fpnew/src/fpu_div_sqrt_mvp/hdl/preprocess_mvp.sv
 
 // Top-level source files (not necessarily instantiated at the top of the cva6).
 ${CVA6_REPO_DIR}/core/ariane.sv
diff --git a/core/Flist.cv32a6_imafc_sv32 b/core/Flist.cv32a6_imafc_sv32
index 310cc070df..dbaa746c05 100644
--- a/core/Flist.cv32a6_imafc_sv32
+++ b/core/Flist.cv32a6_imafc_sv32
@@ -27,8 +27,8 @@
 
 +define+WT_DCACHE
 
-+incdir+${CVA6_REPO_DIR}/common/submodules/common_cells/include/
-+incdir+${CVA6_REPO_DIR}/common/submodules/common_cells/src/
++incdir+${CVA6_REPO_DIR}/vendor/pulp-platform/common_cells/include/
++incdir+${CVA6_REPO_DIR}/vendor/pulp-platform/common_cells/src/
 +incdir+${CVA6_REPO_DIR}/common/local/util/
 
 ${CVA6_REPO_DIR}/core/include/cv32a6_imac_sv0_config_pkg.sv
@@ -55,36 +55,36 @@ ${CVA6_REPO_DIR}/core/include/cvxif_pkg.sv
 ${CVA6_REPO_DIR}/core/cvxif_fu.sv
 
 // Common Cells
-${CVA6_REPO_DIR}/common/submodules/common_cells/src/cf_math_pkg.sv
-${CVA6_REPO_DIR}/common/submodules/common_cells/src/fifo_v3.sv
-${CVA6_REPO_DIR}/common/submodules/common_cells/src/lfsr.sv
-${CVA6_REPO_DIR}/common/submodules/common_cells/src/lzc.sv
-${CVA6_REPO_DIR}/common/submodules/common_cells/src/rr_arb_tree.sv
-${CVA6_REPO_DIR}/common/submodules/common_cells/src/shift_reg.sv
-${CVA6_REPO_DIR}/common/submodules/common_cells/src/unread.sv
-${CVA6_REPO_DIR}/common/submodules/common_cells/src/popcount.sv
-${CVA6_REPO_DIR}/common/submodules/common_cells/src/exp_backoff.sv
+${CVA6_REPO_DIR}/vendor/pulp-platform/common_cells/src/cf_math_pkg.sv
+${CVA6_REPO_DIR}/vendor/pulp-platform/common_cells/src/fifo_v3.sv
+${CVA6_REPO_DIR}/vendor/pulp-platform/common_cells/src/lfsr.sv
+${CVA6_REPO_DIR}/vendor/pulp-platform/common_cells/src/lzc.sv
+${CVA6_REPO_DIR}/vendor/pulp-platform/common_cells/src/rr_arb_tree.sv
+${CVA6_REPO_DIR}/vendor/pulp-platform/common_cells/src/shift_reg.sv
+${CVA6_REPO_DIR}/vendor/pulp-platform/common_cells/src/unread.sv
+${CVA6_REPO_DIR}/vendor/pulp-platform/common_cells/src/popcount.sv
+${CVA6_REPO_DIR}/vendor/pulp-platform/common_cells/src/exp_backoff.sv
 
 // Floating point unit
-${CVA6_REPO_DIR}/core/fpu/src/fpnew_pkg.sv
-${CVA6_REPO_DIR}/core/fpu/src/fpnew_cast_multi.sv
-${CVA6_REPO_DIR}/core/fpu/src/fpnew_classifier.sv
-${CVA6_REPO_DIR}/core/fpu/src/fpnew_divsqrt_multi.sv
-${CVA6_REPO_DIR}/core/fpu/src/fpnew_fma_multi.sv
-${CVA6_REPO_DIR}/core/fpu/src/fpnew_fma.sv
-${CVA6_REPO_DIR}/core/fpu/src/fpnew_noncomp.sv
-${CVA6_REPO_DIR}/core/fpu/src/fpnew_opgroup_block.sv
-${CVA6_REPO_DIR}/core/fpu/src/fpnew_opgroup_fmt_slice.sv
-${CVA6_REPO_DIR}/core/fpu/src/fpnew_opgroup_multifmt_slice.sv
-${CVA6_REPO_DIR}/core/fpu/src/fpnew_rounding.sv
-${CVA6_REPO_DIR}/core/fpu/src/fpnew_top.sv
-${CVA6_REPO_DIR}/core/fpu/src/fpu_div_sqrt_mvp/hdl/defs_div_sqrt_mvp.sv
-${CVA6_REPO_DIR}/core/fpu/src/fpu_div_sqrt_mvp/hdl/control_mvp.sv
-${CVA6_REPO_DIR}/core/fpu/src/fpu_div_sqrt_mvp/hdl/div_sqrt_top_mvp.sv
-${CVA6_REPO_DIR}/core/fpu/src/fpu_div_sqrt_mvp/hdl/iteration_div_sqrt_mvp.sv
-${CVA6_REPO_DIR}/core/fpu/src/fpu_div_sqrt_mvp/hdl/norm_div_sqrt_mvp.sv
-${CVA6_REPO_DIR}/core/fpu/src/fpu_div_sqrt_mvp/hdl/nrbd_nrsc_mvp.sv
-${CVA6_REPO_DIR}/core/fpu/src/fpu_div_sqrt_mvp/hdl/preprocess_mvp.sv
+${CVA6_REPO_DIR}/vendor/pulp-platform/fpnew/src/fpnew_pkg.sv
+${CVA6_REPO_DIR}/vendor/pulp-platform/fpnew/src/fpnew_cast_multi.sv
+${CVA6_REPO_DIR}/vendor/pulp-platform/fpnew/src/fpnew_classifier.sv
+${CVA6_REPO_DIR}/vendor/pulp-platform/fpnew/src/fpnew_divsqrt_multi.sv
+${CVA6_REPO_DIR}/vendor/pulp-platform/fpnew/src/fpnew_fma_multi.sv
+${CVA6_REPO_DIR}/vendor/pulp-platform/fpnew/src/fpnew_fma.sv
+${CVA6_REPO_DIR}/vendor/pulp-platform/fpnew/src/fpnew_noncomp.sv
+${CVA6_REPO_DIR}/vendor/pulp-platform/fpnew/src/fpnew_opgroup_block.sv
+${CVA6_REPO_DIR}/vendor/pulp-platform/fpnew/src/fpnew_opgroup_fmt_slice.sv
+${CVA6_REPO_DIR}/vendor/pulp-platform/fpnew/src/fpnew_opgroup_multifmt_slice.sv
+${CVA6_REPO_DIR}/vendor/pulp-platform/fpnew/src/fpnew_rounding.sv
+${CVA6_REPO_DIR}/vendor/pulp-platform/fpnew/src/fpnew_top.sv
+${CVA6_REPO_DIR}/vendor/pulp-platform/fpnew/src/fpu_div_sqrt_mvp/hdl/defs_div_sqrt_mvp.sv
+${CVA6_REPO_DIR}/vendor/pulp-platform/fpnew/src/fpu_div_sqrt_mvp/hdl/control_mvp.sv
+${CVA6_REPO_DIR}/vendor/pulp-platform/fpnew/src/fpu_div_sqrt_mvp/hdl/div_sqrt_top_mvp.sv
+${CVA6_REPO_DIR}/vendor/pulp-platform/fpnew/src/fpu_div_sqrt_mvp/hdl/iteration_div_sqrt_mvp.sv
+${CVA6_REPO_DIR}/vendor/pulp-platform/fpnew/src/fpu_div_sqrt_mvp/hdl/norm_div_sqrt_mvp.sv
+${CVA6_REPO_DIR}/vendor/pulp-platform/fpnew/src/fpu_div_sqrt_mvp/hdl/nrbd_nrsc_mvp.sv
+${CVA6_REPO_DIR}/vendor/pulp-platform/fpnew/src/fpu_div_sqrt_mvp/hdl/preprocess_mvp.sv
 
 // Top-level source files (not necessarily instantiated at the top of the cva6).
 ${CVA6_REPO_DIR}/core/ariane.sv
diff --git a/core/Flist.cv64a6_imafdc_sv39 b/core/Flist.cv64a6_imafdc_sv39
index 72a038bc14..1fe09b4934 100644
--- a/core/Flist.cv64a6_imafdc_sv39
+++ b/core/Flist.cv64a6_imafdc_sv39
@@ -27,8 +27,8 @@
 
 +define+WT_DCACHE
 
-+incdir+${CVA6_REPO_DIR}/common/submodules/common_cells/include/
-+incdir+${CVA6_REPO_DIR}/common/submodules/common_cells/src/
++incdir+${CVA6_REPO_DIR}/vendor/pulp-platform/common_cells/include/
++incdir+${CVA6_REPO_DIR}/vendor/pulp-platform/common_cells/src/
 +incdir+${CVA6_REPO_DIR}/common/local/util/
 
 ${CVA6_REPO_DIR}/core/include/cv64a6_imafdc_sv39_config_pkg.sv
@@ -58,40 +58,40 @@ ${CVA6_REPO_DIR}/core/cvxif_example/cvxif_example_coprocessor.sv
 ${CVA6_REPO_DIR}/core/cvxif_example/instr_decoder.sv
 
 // Common Cells
-${CVA6_REPO_DIR}/common/submodules/common_cells/src/cf_math_pkg.sv
-${CVA6_REPO_DIR}/common/submodules/common_cells/src/fifo_v3.sv
-${CVA6_REPO_DIR}/common/submodules/common_cells/src/lfsr.sv
-${CVA6_REPO_DIR}/common/submodules/common_cells/src/lzc.sv
-${CVA6_REPO_DIR}/common/submodules/common_cells/src/rr_arb_tree.sv
-${CVA6_REPO_DIR}/common/submodules/common_cells/src/shift_reg.sv
-${CVA6_REPO_DIR}/common/submodules/common_cells/src/unread.sv
-${CVA6_REPO_DIR}/common/submodules/common_cells/src/popcount.sv
-${CVA6_REPO_DIR}/common/submodules/common_cells/src/exp_backoff.sv
+${CVA6_REPO_DIR}/vendor/pulp-platform/common_cells/src/cf_math_pkg.sv
+${CVA6_REPO_DIR}/vendor/pulp-platform/common_cells/src/fifo_v3.sv
+${CVA6_REPO_DIR}/vendor/pulp-platform/common_cells/src/lfsr.sv
+${CVA6_REPO_DIR}/vendor/pulp-platform/common_cells/src/lzc.sv
+${CVA6_REPO_DIR}/vendor/pulp-platform/common_cells/src/rr_arb_tree.sv
+${CVA6_REPO_DIR}/vendor/pulp-platform/common_cells/src/shift_reg.sv
+${CVA6_REPO_DIR}/vendor/pulp-platform/common_cells/src/unread.sv
+${CVA6_REPO_DIR}/vendor/pulp-platform/common_cells/src/popcount.sv
+${CVA6_REPO_DIR}/vendor/pulp-platform/common_cells/src/exp_backoff.sv
 
 // Common Cells for example coprocessor
-${CVA6_REPO_DIR}/common/submodules/common_cells/src/counter.sv
-${CVA6_REPO_DIR}/common/submodules/common_cells/src/delta_counter.sv
+${CVA6_REPO_DIR}/vendor/pulp-platform/common_cells/src/counter.sv
+${CVA6_REPO_DIR}/vendor/pulp-platform/common_cells/src/delta_counter.sv
 
 // Floating point unit
-${CVA6_REPO_DIR}/core/fpu/src/fpnew_pkg.sv
-${CVA6_REPO_DIR}/core/fpu/src/fpnew_cast_multi.sv
-${CVA6_REPO_DIR}/core/fpu/src/fpnew_classifier.sv
-${CVA6_REPO_DIR}/core/fpu/src/fpnew_divsqrt_multi.sv
-${CVA6_REPO_DIR}/core/fpu/src/fpnew_fma_multi.sv
-${CVA6_REPO_DIR}/core/fpu/src/fpnew_fma.sv
-${CVA6_REPO_DIR}/core/fpu/src/fpnew_noncomp.sv
-${CVA6_REPO_DIR}/core/fpu/src/fpnew_opgroup_block.sv
-${CVA6_REPO_DIR}/core/fpu/src/fpnew_opgroup_fmt_slice.sv
-${CVA6_REPO_DIR}/core/fpu/src/fpnew_opgroup_multifmt_slice.sv
-${CVA6_REPO_DIR}/core/fpu/src/fpnew_rounding.sv
-${CVA6_REPO_DIR}/core/fpu/src/fpnew_top.sv
-${CVA6_REPO_DIR}/core/fpu/src/fpu_div_sqrt_mvp/hdl/defs_div_sqrt_mvp.sv
-${CVA6_REPO_DIR}/core/fpu/src/fpu_div_sqrt_mvp/hdl/control_mvp.sv
-${CVA6_REPO_DIR}/core/fpu/src/fpu_div_sqrt_mvp/hdl/div_sqrt_top_mvp.sv
-${CVA6_REPO_DIR}/core/fpu/src/fpu_div_sqrt_mvp/hdl/iteration_div_sqrt_mvp.sv
-${CVA6_REPO_DIR}/core/fpu/src/fpu_div_sqrt_mvp/hdl/norm_div_sqrt_mvp.sv
-${CVA6_REPO_DIR}/core/fpu/src/fpu_div_sqrt_mvp/hdl/nrbd_nrsc_mvp.sv
-${CVA6_REPO_DIR}/core/fpu/src/fpu_div_sqrt_mvp/hdl/preprocess_mvp.sv
+${CVA6_REPO_DIR}/vendor/pulp-platform/fpnew/src/fpnew_pkg.sv
+${CVA6_REPO_DIR}/vendor/pulp-platform/fpnew/src/fpnew_cast_multi.sv
+${CVA6_REPO_DIR}/vendor/pulp-platform/fpnew/src/fpnew_classifier.sv
+${CVA6_REPO_DIR}/vendor/pulp-platform/fpnew/src/fpnew_divsqrt_multi.sv
+${CVA6_REPO_DIR}/vendor/pulp-platform/fpnew/src/fpnew_fma_multi.sv
+${CVA6_REPO_DIR}/vendor/pulp-platform/fpnew/src/fpnew_fma.sv
+${CVA6_REPO_DIR}/vendor/pulp-platform/fpnew/src/fpnew_noncomp.sv
+${CVA6_REPO_DIR}/vendor/pulp-platform/fpnew/src/fpnew_opgroup_block.sv
+${CVA6_REPO_DIR}/vendor/pulp-platform/fpnew/src/fpnew_opgroup_fmt_slice.sv
+${CVA6_REPO_DIR}/vendor/pulp-platform/fpnew/src/fpnew_opgroup_multifmt_slice.sv
+${CVA6_REPO_DIR}/vendor/pulp-platform/fpnew/src/fpnew_rounding.sv
+${CVA6_REPO_DIR}/vendor/pulp-platform/fpnew/src/fpnew_top.sv
+${CVA6_REPO_DIR}/vendor/pulp-platform/fpnew/src/fpu_div_sqrt_mvp/hdl/defs_div_sqrt_mvp.sv
+${CVA6_REPO_DIR}/vendor/pulp-platform/fpnew/src/fpu_div_sqrt_mvp/hdl/control_mvp.sv
+${CVA6_REPO_DIR}/vendor/pulp-platform/fpnew/src/fpu_div_sqrt_mvp/hdl/div_sqrt_top_mvp.sv
+${CVA6_REPO_DIR}/vendor/pulp-platform/fpnew/src/fpu_div_sqrt_mvp/hdl/iteration_div_sqrt_mvp.sv
+${CVA6_REPO_DIR}/vendor/pulp-platform/fpnew/src/fpu_div_sqrt_mvp/hdl/norm_div_sqrt_mvp.sv
+${CVA6_REPO_DIR}/vendor/pulp-platform/fpnew/src/fpu_div_sqrt_mvp/hdl/nrbd_nrsc_mvp.sv
+${CVA6_REPO_DIR}/vendor/pulp-platform/fpnew/src/fpu_div_sqrt_mvp/hdl/preprocess_mvp.sv
 
 // Top-level source files (not necessarily instantiated at the top of the cva6).
 ${CVA6_REPO_DIR}/core/ariane.sv
diff --git a/corev_apu/fpga/scripts/run.tcl b/corev_apu/fpga/scripts/run.tcl
index 6d32d95d6a..f075219831 100644
--- a/corev_apu/fpga/scripts/run.tcl
+++ b/corev_apu/fpga/scripts/run.tcl
@@ -38,24 +38,24 @@ read_ip { \
 }
 # read_ip xilinx/xlnx_protocol_checker/ip/xlnx_protocol_checker.xci
 
-set_property include_dirs { "src/axi_sd_bridge/include" "../../common/submodules/common_cells/include" "../axi/include" "../register_interface/include"} [current_fileset]
+set_property include_dirs { "src/axi_sd_bridge/include" "../../vendor/pulp-platform/common_cells/include" "../axi/include" "../register_interface/include"} [current_fileset]
 
 source scripts/add_sources.tcl
 
 set_property top ${project}_xilinx [current_fileset]
 
 if {$::env(BOARD) eq "genesys2"} {
-    read_verilog -sv {src/genesysii.svh ../../common/submodules/common_cells/include/common_cells/registers.svh}
+    read_verilog -sv {src/genesysii.svh ../../vendor/pulp-platform/common_cells/include/common_cells/registers.svh}
     set file "src/genesysii.svh"
-    set registers "../../common/submodules/common_cells/include/common_cells/registers.svh"
+    set registers "../../vendor/pulp-platform/common_cells/include/common_cells/registers.svh"
 } elseif {$::env(BOARD) eq "kc705"} {
-      read_verilog -sv {src/kc705.svh ../../common/submodules/common_cells/include/common_cells/registers.svh}
+      read_verilog -sv {src/kc705.svh ../../vendor/pulp-platform/common_cells/include/common_cells/registers.svh}
       set file "src/kc705.svh"
-      set registers "../../common/submodules/common_cells/include/common_cells/registers.svh"
+      set registers "../../vendor/pulp-platform/common_cells/include/common_cells/registers.svh"
 } elseif {$::env(BOARD) eq "vc707"} {
-      read_verilog -sv {src/vc707.svh ../../common/submodules/common_cells/include/common_cells/registers.svh}
+      read_verilog -sv {src/vc707.svh ../../vendor/pulp-platform/common_cells/include/common_cells/registers.svh}
       set file "src/vc707.svh"
-      set registers "../../common/submodules/common_cells/include/common_cells/registers.svh"
+      set registers "../../vendor/pulp-platform/common_cells/include/common_cells/registers.svh"
 } else {
     exit 1
 }
diff --git a/corev_apu/tb/tb_cva6_icache/tb.list b/corev_apu/tb/tb_cva6_icache/tb.list
index db576dac4b..60f3c41aba 100644
--- a/corev_apu/tb/tb_cva6_icache/tb.list
+++ b/corev_apu/tb/tb_cva6_icache/tb.list
@@ -3,12 +3,12 @@
 ../../riscv-dbg/src/dm_pkg.sv
 ../../../core/include/ariane_pkg.sv
 ../../../core/include/wt_cache_pkg.sv
-../../../common/submodules/common_cells/src/cf_math_pkg.sv
+../../../vendor/pulp-platform/common_cells/src/cf_math_pkg.sv
 ../../../common/local/techlib/fpga/rtl/SyncSpRamBeNx64.sv
 ../../../core/cache_subsystem/cva6_icache.sv
-../../../common/submodules/common_cells/src/lfsr.sv
-../../../common/submodules/common_cells/src/fifo_v3.sv
-../../../common/submodules/common_cells/src/lzc.sv
+../../../vendor/pulp-platform/common_cells/src/lfsr.sv
+../../../vendor/pulp-platform/common_cells/src/fifo_v3.sv
+../../../vendor/pulp-platform/common_cells/src/lzc.sv
 ../../../common/local/util/sram.sv
 hdl/mem_emul.sv
 hdl/tlb_emul.sv
diff --git a/corev_apu/tb/tb_wb_dcache/tb.list b/corev_apu/tb/tb_wb_dcache/tb.list
index f427364770..05180e4ee1 100644
--- a/corev_apu/tb/tb_wb_dcache/tb.list
+++ b/corev_apu/tb/tb_wb_dcache/tb.list
@@ -23,16 +23,16 @@
 ../../../core/cache_subsystem/std_nbdcache.sv
 ../../../core/cache_subsystem/amo_alu.sv
 ../../../core/cache_subsystem/tag_cmp.sv
-../../../common/submodules/common_cells/src/cf_math_pkg.sv
-../../../common/submodules/common_cells/src/lfsr_8bit.sv
-../../../common/submodules/common_cells/src/fifo_v3.sv
-../../../common/submodules/common_cells/src/lzc.sv
-../../../common/submodules/common_cells/src/rr_arb_tree.sv
-../../../common/submodules/common_cells/src/exp_backoff.sv
-../../../common/submodules/common_cells/src/stream_arbiter.sv
-../../../common/submodules/common_cells/src/stream_arbiter_flushable.sv
-../../../common/submodules/common_cells/src/stream_mux.sv
-../../../common/submodules/common_cells/src/stream_demux.sv
+../../../vendor/pulp-platform/common_cells/src/cf_math_pkg.sv
+../../../vendor/pulp-platform/common_cells/src/lfsr_8bit.sv
+../../../vendor/pulp-platform/common_cells/src/fifo_v3.sv
+../../../vendor/pulp-platform/common_cells/src/lzc.sv
+../../../vendor/pulp-platform/common_cells/src/rr_arb_tree.sv
+../../../vendor/pulp-platform/common_cells/src/exp_backoff.sv
+../../../vendor/pulp-platform/common_cells/src/stream_arbiter.sv
+../../../vendor/pulp-platform/common_cells/src/stream_arbiter_flushable.sv
+../../../vendor/pulp-platform/common_cells/src/stream_mux.sv
+../../../vendor/pulp-platform/common_cells/src/stream_demux.sv
 ../../../core/axi_adapter.sv
 ../../../common/local/util/sram.sv
 ../../src/axi_riscv_atomics/src/axi_res_tbl.sv
diff --git a/corev_apu/tb/tb_wt_axi_dcache/tb.list b/corev_apu/tb/tb_wt_axi_dcache/tb.list
index a12da819cd..1c744fcc5f 100644
--- a/corev_apu/tb/tb_wt_axi_dcache/tb.list
+++ b/corev_apu/tb/tb_wt_axi_dcache/tb.list
@@ -12,7 +12,7 @@
 ../../axi/src/axi_pkg.sv
 ../../axi/src/axi_intf.sv
 ../../axi/src/axi_test.sv
-../../../core/fpu/src/fpnew_pkg.sv
+../../../vendor/pulp-platform/fpnew/src/fpnew_pkg.sv
 ../../../core/include/ariane_pkg.sv
 ../ariane_soc_pkg.sv
 ../ariane_axi_soc_pkg.sv
@@ -28,15 +28,15 @@
 ../../../core/axi_shim.sv
 ../../../core/cache_subsystem/wt_axi_adapter.sv
 ../../../core/cache_subsystem/wt_cache_subsystem.sv
-../../../common/submodules/common_cells/src/cf_math_pkg.sv
-../../../common/submodules/common_cells/src/lfsr.sv
-../../../common/submodules/common_cells/src/fifo_v3.sv
-../../../common/submodules/common_cells/src/lzc.sv
-../../../common/submodules/common_cells/src/rr_arb_tree.sv
-../../../common/submodules/common_cells/src/exp_backoff.sv
-../../../common/submodules/common_cells/src/stream_arbiter.sv
-../../../common/submodules/common_cells/src/stream_arbiter_flushable.sv
-../../../common/submodules/common_cells/src/stream_mux.sv
+../../../vendor/pulp-platform/common_cells/src/cf_math_pkg.sv
+../../../vendor/pulp-platform/common_cells/src/lfsr.sv
+../../../vendor/pulp-platform/common_cells/src/fifo_v3.sv
+../../../vendor/pulp-platform/common_cells/src/lzc.sv
+../../../vendor/pulp-platform/common_cells/src/rr_arb_tree.sv
+../../../vendor/pulp-platform/common_cells/src/exp_backoff.sv
+../../../vendor/pulp-platform/common_cells/src/stream_arbiter.sv
+../../../vendor/pulp-platform/common_cells/src/stream_arbiter_flushable.sv
+../../../vendor/pulp-platform/common_cells/src/stream_mux.sv
 ../../src/tech_cells_generic/src/rtl/tc_sram.sv
 ../../../common/local/util/tc_sram_wrapper.sv
 ../../../common/local/util/sram.sv
diff --git a/corev_apu/tb/tb_wt_dcache/tb.list b/corev_apu/tb/tb_wt_dcache/tb.list
index 617eff1eda..a03beda249 100644
--- a/corev_apu/tb/tb_wt_dcache/tb.list
+++ b/corev_apu/tb/tb_wt_dcache/tb.list
@@ -16,12 +16,12 @@
 ../../../core/cache_subsystem/wt_dcache_missunit.sv
 ../../../core/cache_subsystem/wt_dcache_wbuffer.sv
 ../../../core/cache_subsystem/wt_dcache.sv
-../../../common/submodules/common_cells/src/cf_math_pkg.sv
-../../../common/submodules/common_cells/src/lfsr.sv
-../../../common/submodules/common_cells/src/fifo_v3.sv
-../../../common/submodules/common_cells/src/lzc.sv
-../../../common/submodules/common_cells/src/rr_arb_tree.sv
-../../../common/submodules/common_cells/src/exp_backoff.sv
+../../../vendor/pulp-platform/common_cells/src/cf_math_pkg.sv
+../../../vendor/pulp-platform/common_cells/src/lfsr.sv
+../../../vendor/pulp-platform/common_cells/src/fifo_v3.sv
+../../../vendor/pulp-platform/common_cells/src/lzc.sv
+../../../vendor/pulp-platform/common_cells/src/rr_arb_tree.sv
+../../../vendor/pulp-platform/common_cells/src/exp_backoff.sv
 ../../src/tech_cells_generic/src/rtl/tc_sram.sv
 ../../../common/local/util/tc_sram_wrapper.sv
 ../../../common/local/util/sram.sv
diff --git a/pd/synth/cva6_synth.tcl b/pd/synth/cva6_synth.tcl
index c3a82efa7d..334557f2bc 100644
--- a/pd/synth/cva6_synth.tcl
+++ b/pd/synth/cva6_synth.tcl
@@ -17,7 +17,7 @@ set clk_period $PERIOD
 set input_delay $INPUT_DELAY
 set output_delay $OUTPUT_DELAY
 
-set_app_var search_path "../../core/fpu/src/common_cells/include/ $search_path"
+set_app_var search_path "../../vendor/pulp-platform/fpnew/src/common_cells/include/ $search_path"
 
 sh rm -rf work
 sh mkdir work
diff --git a/util/README.md b/util/README.md
new file mode 100644
index 0000000000..9ed2109698
--- /dev/null
+++ b/util/README.md
@@ -0,0 +1,5 @@
+Content:
+
+* vendor.py
+  - vendorization script
+  - copied from https://github.com/openhwgroup/cv32e40p/blob/master/util/vendor.py, commit 69e839e
diff --git a/util/vendor.py b/util/vendor.py
new file mode 100644
index 0000000000..8c677f9288
--- /dev/null
+++ b/util/vendor.py
@@ -0,0 +1,782 @@
+#!/usr/bin/env python3
+# Copyright lowRISC contributors.
+# Licensed under the Apache License, Version 2.0, see LICENSE for details.
+# SPDX-License-Identifier: Apache-2.0
+
+'''A tool to copy source code from upstream into this repository.
+
+For an introduction to using this tool, see doc/ug/vendor_hw.md in this
+repository (on the internet at https://docs.opentitan.org/doc/ug/vendor_hw/).
+
+For full documentation, see doc/rm/vendor_in_tool.md (on the internet at
+https://docs.opentitan.org/doc/rm/vendor_in_tool).
+
+'''
+
+import argparse
+import fnmatch
+import logging as log
+import os
+import re
+import shutil
+import subprocess
+import sys
+import tempfile
+import textwrap
+from pathlib import Path
+
+import hjson
+
+EXCLUDE_ALWAYS = ['.git']
+
+LOCK_FILE_HEADER = """// Copyright lowRISC contributors.
+// Licensed under the Apache License, Version 2.0, see LICENSE for details.
+// SPDX-License-Identifier: Apache-2.0
+
+// This file is generated by the util/vendor script. Please do not modify it
+// manually.
+
+"""
+
+# Keys in the description (configuration) file which can be overridden through
+# the command line.
+OVERRIDABLE_DESC_KEYS = [
+    'patch_repo.url',
+    'patch_repo.rev_base',
+    'patch_repo.rev_patched',
+    'upstream.url',
+    'upstream.ref',
+]
+
+verbose = False
+
+
+def git_is_clean_workdir(git_workdir):
+    """Check if the git working directory is clean (no unstaged or staged changes)"""
+    cmd = ['git', 'status', '--untracked-files=no', '--porcelain']
+    modified_files = subprocess.run(cmd,
+                                    cwd=str(git_workdir),
+                                    check=True,
+                                    stdout=subprocess.PIPE,
+                                    stderr=subprocess.PIPE).stdout.strip()
+    return not modified_files
+
+
+def github_qualify_references(log, repo_userorg, repo_name):
+    """ Replace "unqualified" GitHub references with "fully qualified" one
+
+    GitHub automatically links issues and pull requests if they have a specific
+    format. Links can be qualified with the user/org name and the repository
+    name, or unqualified, if they only contain the issue or pull request number.
+
+    This function converts all unqualified references to qualified ones.
+
+    See https://help.github.com/en/articles/autolinked-references-and-urls#issues-and-pull-requests
+    for a documentation of all supported formats.
+    """
+
+    r = re.compile(r"(^|[^\w])(?:#|[gG][hH]-)(\d+)\b")
+    repl_str = r'\1%s/%s#\2' % (repo_userorg, repo_name)
+    return [r.sub(repl_str, l) for l in log]
+
+
+def test_github_qualify_references():
+    repo_userorg = 'lowRISC'
+    repo_name = 'ibex'
+
+    # Unqualified references, should be replaced
+    items_unqualified = [
+        '#28',
+        'GH-27',
+        'klaus #27',
+        'Fixes #27',
+        'Fixes #27 and #28',
+        '(#27)',
+        'something (#27) done',
+        '#27 and (GH-38)',
+    ]
+    exp_items_unqualified = [
+        'lowRISC/ibex#28',
+        'lowRISC/ibex#27',
+        'klaus lowRISC/ibex#27',
+        'Fixes lowRISC/ibex#27',
+        'Fixes lowRISC/ibex#27 and lowRISC/ibex#28',
+        '(lowRISC/ibex#27)',
+        'something (lowRISC/ibex#27) done',
+        'lowRISC/ibex#27 and (lowRISC/ibex#38)',
+    ]
+    assert github_qualify_references(items_unqualified, repo_userorg,
+                                     repo_name) == exp_items_unqualified
+
+    # Qualified references, should stay as they are
+    items_qualified = [
+        'Fixes lowrisc/ibex#27',
+        'lowrisc/ibex#2',
+    ]
+    assert github_qualify_references(items_qualified, repo_userorg,
+                                     repo_name) == items_qualified
+
+    # Invalid references, should stay as they are
+    items_invalid = [
+        'something#27',
+        'lowrisc/ibex#',
+    ]
+    assert github_qualify_references(items_invalid, repo_userorg,
+                                     repo_name) == items_invalid
+
+
+def test_github_parse_url():
+    assert github_parse_url('https://example.com/something/asdf.git') is None
+    assert github_parse_url('https://github.com/lowRISC/ibex.git') == (
+        'lowRISC', 'ibex')
+    assert github_parse_url('https://github.com/lowRISC/ibex') == ('lowRISC',
+                                                                   'ibex')
+    assert github_parse_url('git@github.com:lowRISC/ibex.git') == ('lowRISC',
+                                                                   'ibex')
+
+
+def github_parse_url(github_repo_url):
+    """Parse a GitHub repository URL into its parts.
+
+    Return a tuple (userorg, name), or None if the parsing failed.
+    """
+
+    regex = r"(?:@github\.com\:|\/github\.com\/)([a-zA-Z\d-]+)\/([a-zA-Z\d-]+)(?:\.git)?$"
+    m = re.search(regex, github_repo_url)
+    if m is None:
+        return None
+    return (m.group(1), m.group(2))
+
+
+def produce_shortlog(clone_dir, mapping, old_rev, new_rev):
+    """ Produce a list of changes between two revisions, one revision per line
+
+    Merges are excluded"""
+
+    # If mapping is None, we want to list all changes below clone_dir.
+    # Otherwise, we want to list changes in each 'source' in the mapping. Since
+    # these strings are paths relative to clone_dir, we can just pass them all
+    # to git and let it figure out what to do.
+    subdirs = (['.'] if mapping is None
+               else [m.from_path for m in mapping.items])
+
+    cmd = (['git', '-C', str(clone_dir), 'log',
+           '--pretty=format:%s (%aN)', '--no-merges',
+            old_rev + '..' + new_rev] +
+           subdirs)
+    try:
+        proc = subprocess.run(cmd,
+                              cwd=str(clone_dir),
+                              check=True,
+                              stdout=subprocess.PIPE,
+                              stderr=subprocess.PIPE,
+                              universal_newlines=True)
+        return proc.stdout.splitlines()
+    except subprocess.CalledProcessError as e:
+        log.error("Unable to capture shortlog: %s", e.stderr)
+        return ""
+
+
+def format_list_to_str(list, width=70):
+    """ Create Markdown-style formatted string from a list of strings """
+    wrapper = textwrap.TextWrapper(initial_indent="* ",
+                                   subsequent_indent="  ",
+                                   width=width)
+    return '\n'.join([wrapper.fill(s) for s in list])
+
+
+class JsonError(Exception):
+    '''An error class for when data in the source HJSON is bad'''
+    def __init__(self, path, msg):
+        self.path = path
+        self.msg = msg
+
+    def __str__(self):
+        return 'In hjson at {}, {}'.format(self.path, self.msg)
+
+
+def get_field(path, where, data, name, expected_type=dict, optional=False, constructor=None):
+    value = data.get(name)
+    if value is None:
+        if not optional:
+            raise JsonError(path, '{}, missing {!r} field.'.format(where, name))
+        return None
+
+    if not isinstance(value, expected_type):
+        raise JsonError(path,
+                        '{}, the {!r} field is {!r}, but should be of type {!r}.'
+                        .format(where, name, value, expected_type.__name__))
+
+    return value if constructor is None else constructor(value)
+
+
+class Upstream:
+    '''A class representing the 'upstream' field in a config or lock file'''
+    def __init__(self, path, data):
+        # Fields: 'url', 'rev', 'only_subdir' (optional). All should be strings.
+        where = 'in upstream dict'
+        self.url = get_field(path, where, data, 'url', str)
+        self.rev = get_field(path, where, data, 'rev', str)
+        self.only_subdir = get_field(path, where, data,
+                                     'only_subdir', str, optional=True)
+
+    def as_dict(self):
+        data = {'url': self.url, 'rev': self.rev}
+        if self.only_subdir is not None:
+            data['only_subdir'] = self.only_subdir
+        return data
+
+
+class PatchRepo:
+    '''A class representing the 'patch_repo' field in a config file'''
+    def __init__(self, path, data):
+        # Fields: 'url', 'rev_base', 'rev_patched'. All should be strings.
+        where = 'in patch_repo dict'
+        self.url = get_field(path, where, data, 'url', str)
+        self.rev_base = get_field(path, where, data, 'rev_base', str)
+        self.rev_patched = get_field(path, where, data, 'rev_patched', str)
+
+
+class Mapping1:
+    '''A class to represent a single item in the 'mapping' field in a config file'''
+    def __init__(self, from_path, to_path, patch_dir):
+        self.from_path = from_path
+        self.to_path = to_path
+        self.patch_dir = patch_dir
+
+    @staticmethod
+    def make(path, idx, data):
+        assert isinstance(data, dict)
+
+        def get_path(name, optional=False):
+            val = get_field(path, 'in mapping entry {}'.format(idx + 1),
+                            data, name, expected_type=str, optional=optional)
+            if val is None:
+                return None
+
+            # Check that the paths aren't evil ('../../../foo' or '/etc/passwd'
+            # are *not* ok!)
+            val = os.path.normpath(val)
+            if val.startswith('/') or val.startswith('..'):
+                raise JsonError(path,
+                                'Mapping entry {} has a bad path for {!r} '
+                                '(must be a relative path that doesn\'t '
+                                'escape the directory)'
+                                .format(idx + 1, name))
+
+            return Path(val)
+
+        from_path = get_path('from')
+        to_path = get_path('to')
+        patch_dir = get_path('patch_dir', optional=True)
+
+        return Mapping1(from_path, to_path, patch_dir)
+
+    @staticmethod
+    def make_default(have_patch_dir):
+        '''Make a default mapping1, which copies everything straight through'''
+        return Mapping1(Path('.'), Path('.'),
+                        Path('.') if have_patch_dir else None)
+
+    @staticmethod
+    def apply_patch(basedir, patchfile):
+        cmd = ['git', 'apply', '--directory', str(basedir), '-p1',
+               str(patchfile)]
+        if verbose:
+            cmd += ['--verbose']
+        subprocess.run(cmd, check=True)
+
+    def import_from_upstream(self, upstream_path,
+                             target_path, exclude_files, patch_dir):
+        '''Copy from the upstream checkout to target_path'''
+        from_path = upstream_path / self.from_path
+        to_path = target_path / self.to_path
+
+        # Make sure the target directory actually exists
+        to_path.parent.mkdir(exist_ok=True, parents=True)
+
+        # Copy src to dst recursively. For directories, we can use
+        # shutil.copytree. This doesn't support files, though, so we have to
+        # check for them first.
+        if from_path.is_file():
+            shutil.copy(str(from_path), str(to_path))
+        else:
+            ignore = ignore_patterns(str(upstream_path), *exclude_files)
+            shutil.copytree(str(from_path), str(to_path), ignore=ignore)
+
+        # Apply any patches to the copied files. If self.patch_dir is None,
+        # there are none to apply. Otherwise, resolve it relative to patch_dir.
+        if self.patch_dir is not None:
+            patches = (patch_dir / self.patch_dir).glob('*.patch')
+            for patch in sorted(patches):
+                log.info("Applying patch {} at {}".format(patch, to_path))
+                Mapping1.apply_patch(to_path, patch)
+
+
+class Mapping:
+    '''A class representing the 'mapping' field in a config file
+
+    This should be a list of dicts.
+    '''
+    def __init__(self, items):
+        self.items = items
+
+    @staticmethod
+    def make(path, data):
+        items = []
+        assert isinstance(data, list)
+        for idx, elt in enumerate(data):
+            if not isinstance(elt, dict):
+                raise JsonError(path, 'Mapping element {!r} is not a dict.'.format(elt))
+            items.append(Mapping1.make(path, idx, elt))
+
+        return Mapping(items)
+
+    def has_patch_dir(self):
+        '''Check whether at least one item defines a patch dir'''
+        for item in self.items:
+            if item.patch_dir is not None:
+                return True
+        return False
+
+
+class LockDesc:
+    '''A class representing the contents of a lock file'''
+    def __init__(self, handle):
+        data = hjson.loads(handle.read(), use_decimal=True)
+        self.upstream = get_field(handle.name, 'at top-level', data, 'upstream',
+                                  constructor=lambda data: Upstream(handle.name, data))
+
+
+class Desc:
+    '''A class representing the configuration file'''
+
+    def __init__(self, handle, desc_overrides):
+
+        # Ensure description file matches our naming rules (otherwise we don't
+        # know the name for the lockfile). This regex checks that we have the
+        # right suffix and a nonempty name.
+        if not re.match(r'.+\.vendor\.hjson', handle.name):
+            raise ValueError("Description file names must have a .vendor.hjson suffix.")
+
+        data = hjson.loads(handle.read(), use_decimal=True)
+        where = 'at top-level'
+
+        self.apply_overrides(data, desc_overrides)
+
+        path = Path(handle.name)
+
+        def take_path(p):
+            return path.parent / p
+
+        self.path = path
+        self.name = get_field(path, where, data, 'name', expected_type=str)
+        self.target_dir = get_field(path, where, data, 'target_dir',
+                                    expected_type=str, constructor=take_path)
+        self.upstream = get_field(path, where, data, 'upstream',
+                                  constructor=lambda data: Upstream(path, data))
+        self.patch_dir = get_field(path, where, data, 'patch_dir',
+                                   optional=True, expected_type=str, constructor=take_path)
+        self.patch_repo = get_field(path, where, data, 'patch_repo',
+                                    optional=True,
+                                    constructor=lambda data: PatchRepo(path, data))
+        self.exclude_from_upstream = (get_field(path, where, data, 'exclude_from_upstream',
+                                                optional=True, expected_type=list) or
+                                      [])
+        self.mapping = get_field(path, where, data, 'mapping', optional=True,
+                                 expected_type=list,
+                                 constructor=lambda data: Mapping.make(path, data))
+
+        # Add default exclusions
+        self.exclude_from_upstream += EXCLUDE_ALWAYS
+
+        # It doesn't make sense to define a patch_repo, but not a patch_dir
+        # (where should we put the patches that we get?)
+        if self.patch_repo is not None and self.patch_dir is None:
+            raise JsonError(path, 'Has patch_repo but not patch_dir.')
+
+        # We don't currently support a patch_repo and a mapping (just because
+        # we haven't written the code to generate the patches across subdirs
+        # yet). Tracked in issue #2317.
+        if self.patch_repo is not None and self.mapping is not None:
+            raise JsonError(path,
+                            "vendor.py doesn't currently support patch_repo "
+                            "and mapping at the same time (see issue #2317).")
+
+        # If a patch_dir is defined and there is no mapping, we will look in
+        # that directory for patches and apply them in (the only) directory
+        # that we copy stuff into.
+        #
+        # If there is a mapping check that there is a patch_dir if and only if
+        # least one mapping entry uses it.
+        if self.mapping is not None:
+            if self.patch_dir is not None:
+                if not self.mapping.has_patch_dir():
+                    raise JsonError(path, 'Has patch_dir, but no mapping item uses it.')
+            else:
+                if self.mapping.has_patch_dir():
+                    raise JsonError(path,
+                                    'Has a mapping item with a patch directory, '
+                                    'but there is no global patch_dir key.')
+
+        # Check that exclude_from_upstream really is a list of strings. Most of
+        # this type-checking is in the constructors for field types, but we
+        # don't have a "ExcludeList" class, so have to do it explicitly here.
+        for efu in self.exclude_from_upstream:
+            if not isinstance(efu, str):
+                raise JsonError(path,
+                                'exclude_from_upstream has entry {}, which is not a string.'
+                                .format(efu))
+
+    def apply_overrides(self, desc_data, desc_overrides):
+        """ Apply overrides from command line to configuration file data
+
+        Updates are applied to the desc_data reference."""
+
+        for key, value in desc_overrides:
+            log.info("Overriding description key {!r} with value {!r}".format(
+                key, value))
+            ref = desc_data
+            split_keys = key.split('.')
+            for key_part in split_keys[:-1]:
+                if key_part not in ref:
+                    ref[key_part] = {}
+                ref = ref[key_part]
+            ref[split_keys[-1]] = value
+
+    def lock_file_path(self):
+        desc_file_stem = self.path.name.rsplit('.', 2)[0]
+        return self.path.with_name(desc_file_stem + '.lock.hjson')
+
+    def import_from_upstream(self, upstream_path):
+        log.info('Copying upstream sources to {}'.format(self.target_dir))
+
+        # Remove existing directories before importing them again
+        shutil.rmtree(str(self.target_dir), ignore_errors=True)
+
+        items = (self.mapping.items if self.mapping is not None
+                 else [Mapping1.make_default(self.patch_dir is not None)])
+        for map1 in items:
+            map1.import_from_upstream(upstream_path,
+                                      self.target_dir,
+                                      self.exclude_from_upstream,
+                                      self.patch_dir)
+
+
+def refresh_patches(desc):
+    if desc.patch_repo is None:
+        log.fatal('Unable to refresh patches, patch_repo not set in config.')
+        sys.exit(1)
+
+    log.info('Refreshing patches in {}'.format(desc.patch_dir))
+
+    # remove existing patches
+    for patch in desc.patch_dir.glob('*.patch'):
+        os.unlink(str(patch))
+
+    # get current patches
+    _export_patches(desc.patch_repo.url, desc.patch_dir,
+                    desc.patch_repo.rev_base,
+                    desc.patch_repo.rev_patched)
+
+
+def _export_patches(patchrepo_clone_url, target_patch_dir, upstream_rev,
+                    patched_rev):
+    with tempfile.TemporaryDirectory() as clone_dir:
+        clone_git_repo(patchrepo_clone_url, clone_dir, patched_rev)
+        rev_range = 'origin/' + upstream_rev + '..' + 'origin/' + patched_rev
+        cmd = [
+            'git',
+            'format-patch',
+            '--no-signature',
+            '--no-stat',
+            '-o',
+            str(target_patch_dir.resolve()),
+            rev_range
+        ]
+        if not verbose:
+            cmd += ['-q']
+        subprocess.run(cmd, cwd=str(clone_dir), check=True)
+
+
+def ignore_patterns(base_dir, *patterns):
+    """Similar to shutil.ignore_patterns, but with support for directory excludes."""
+    def _rel_to_base(path, name):
+        return os.path.relpath(os.path.join(path, name), base_dir)
+
+    def _ignore_patterns(path, names):
+        ignored_names = []
+        for pattern in patterns:
+            pattern_matches = [
+                n for n in names
+                if fnmatch.fnmatch(_rel_to_base(path, n), pattern)
+            ]
+            ignored_names.extend(pattern_matches)
+        return set(ignored_names)
+
+    return _ignore_patterns
+
+
+def clone_git_repo(repo_url, clone_dir, rev='master'):
+    log.info('Cloning upstream repository %s @ %s', repo_url, rev)
+
+    # Clone the whole repository
+    cmd = ['git', 'clone', '--no-single-branch']
+    if not verbose:
+        cmd += ['-q']
+    cmd += [repo_url, str(clone_dir)]
+    subprocess.run(cmd, check=True)
+
+    # Check out exactly the revision requested
+    cmd = ['git', '-C', str(clone_dir), 'checkout', '--force', rev]
+    if not verbose:
+        cmd += ['-q']
+    subprocess.run(cmd, check=True)
+
+    # Get revision information
+    cmd = ['git', '-C', str(clone_dir), 'rev-parse', 'HEAD']
+    rev = subprocess.run(cmd,
+                         stdout=subprocess.PIPE,
+                         stderr=subprocess.PIPE,
+                         check=True,
+                         universal_newlines=True).stdout.strip()
+    log.info('Cloned at revision %s', rev)
+    return rev
+
+
+def git_get_short_rev(clone_dir, rev):
+    """ Get the shortened SHA-1 hash for a revision """
+    cmd = ['git', '-C', str(clone_dir), 'rev-parse', '--short', rev]
+    short_rev = subprocess.run(cmd,
+                               stdout=subprocess.PIPE,
+                               stderr=subprocess.PIPE,
+                               check=True,
+                               universal_newlines=True).stdout.strip()
+    return short_rev
+
+
+def git_add_commit(paths, commit_msg):
+    """ Stage and commit all changes in paths"""
+
+    assert paths
+    base_dir = paths[0].parent
+
+    # Stage all changes
+    #
+    # Rather than figuring out GIT_DIR properly, we cheat and use "git -C" to
+    # pretend that we're running in base_dir. Of course, the elements of paths
+    # are relative to our actual working directory. Rather than do anything
+    # clever, we just resolve them to absolute paths as we go.
+    abs_paths = [p.resolve() for p in paths]
+    subprocess.run(['git', '-C', base_dir, 'add'] + abs_paths, check=True)
+
+    cmd_commit = ['git', '-C', base_dir, 'commit', '-s', '-F', '-']
+    try:
+        subprocess.run(cmd_commit,
+                       check=True,
+                       universal_newlines=True,
+                       input=commit_msg)
+    except subprocess.CalledProcessError:
+        log.warning("Unable to create commit. Are there no changes?")
+
+
+def define_arg_type(arg):
+    """Sanity-check and return a config file override argument"""
+    try:
+        (key, value) = [v.strip() for v in arg.split('=', 2)]
+    except Exception:
+        raise argparse.ArgumentTypeError(
+            'unable to parse {!r}: configuration overrides must be in the form key=value'
+            .format(arg))
+
+    if key not in OVERRIDABLE_DESC_KEYS:
+        raise argparse.ArgumentTypeError(
+            'invalid configuration override: key {!r} cannot be overwritten'
+            .format(key))
+    return (key, value)
+
+
+def main(argv):
+    parser = argparse.ArgumentParser(prog="vendor", description=__doc__)
+    parser.add_argument(
+        '--update',
+        '-U',
+        dest='update',
+        action='store_true',
+        help='Update locked version of repository with upstream changes')
+    parser.add_argument('--refresh-patches',
+                        action='store_true',
+                        help='Refresh the patches from the patch repository')
+    parser.add_argument('--commit',
+                        '-c',
+                        action='store_true',
+                        help='Commit the changes')
+    parser.add_argument('--desc-override',
+                        '-D',
+                        dest="desc_overrides",
+                        action="append",
+                        type=define_arg_type,
+                        default=[],
+                        help='Override a setting in the description file. '
+                             'Format: -Dsome.key=value. '
+                             'Can be used multiple times.')
+    parser.add_argument('desc_file',
+                        metavar='file',
+                        type=argparse.FileType('r', encoding='UTF-8'),
+                        help='vendoring description file (*.vendor.hjson)')
+    parser.add_argument('--verbose', '-v', action='store_true', help='Verbose')
+    args = parser.parse_args()
+
+    global verbose
+    verbose = args.verbose
+    if (verbose):
+        log.basicConfig(format="%(levelname)s: %(message)s", level=log.DEBUG)
+    else:
+        log.basicConfig(format="%(levelname)s: %(message)s")
+
+    # Load input files (desc file; lock file) and check syntax etc.
+    try:
+        # Load description file
+        desc = Desc(args.desc_file, args.desc_overrides)
+        lock_file_path = desc.lock_file_path()
+
+        # Try to load lock file (which might not exist)
+        try:
+            with open(str(lock_file_path), 'r') as lock_file:
+                lock = LockDesc(lock_file)
+        except FileNotFoundError:
+            lock = None
+    except (JsonError, ValueError) as err:
+        log.fatal(str(err))
+        raise SystemExit(1)
+
+    # Check for a clean working directory when commit is requested
+    if args.commit:
+        if not git_is_clean_workdir(desc.path.parent):
+            log.fatal("A clean git working directory is required for "
+                      "--commit/-c. git stash your changes and try again.")
+            raise SystemExit(1)
+
+    if lock is None and not args.update:
+        log.warning("No lock file at {}, so will update upstream repo."
+                    .format(str(desc.lock_file_path())))
+        args.update = True
+
+    # If we have a lock file and we're not in update mode, override desc's
+    # upstream field with the one from the lock file. Keep track of whether the
+    # URL differs (in which case, we can't get a shortlog)
+    changed_url = False
+    if lock is not None:
+        changed_url = desc.upstream.url != lock.upstream.url
+        if not args.update:
+            desc.upstream = lock.upstream
+
+    if args.refresh_patches:
+        refresh_patches(desc)
+
+    with tempfile.TemporaryDirectory() as clone_dir:
+        # clone upstream repository
+        upstream_new_rev = clone_git_repo(desc.upstream.url, clone_dir, rev=desc.upstream.rev)
+
+        if not args.update:
+            if upstream_new_rev != lock.upstream.rev:
+                log.fatal(
+                    "Revision mismatch. Unable to re-clone locked version of repository."
+                )
+                log.fatal("Attempted revision: %s", desc.upstream.rev)
+                log.fatal("Re-cloned revision: %s", upstream_new_rev)
+                raise SystemExit(1)
+
+        clone_subdir = Path(clone_dir)
+        if desc.upstream.only_subdir is not None:
+            clone_subdir = clone_subdir / desc.upstream.only_subdir
+            if not clone_subdir.is_dir():
+                log.fatal("subdir '{}' does not exist in repo"
+                          .format(desc.upstream.only_subdir))
+                raise SystemExit(1)
+
+        # copy selected files from upstream repo and apply patches as necessary
+        desc.import_from_upstream(clone_subdir)
+
+        # get shortlog
+        get_shortlog = args.update
+        if args.update:
+            if lock is None:
+                get_shortlog = False
+                log.warning("No lock file %s: unable to summarize changes.", str(lock_file_path))
+            elif changed_url:
+                get_shortlog = False
+                log.warning("The repository URL changed since the last run. "
+                            "Unable to get log of changes.")
+
+        shortlog = None
+        if get_shortlog:
+            shortlog = produce_shortlog(clone_subdir, desc.mapping,
+                                        lock.upstream.rev, upstream_new_rev)
+
+            # Ensure fully-qualified issue/PR references for GitHub repos
+            gh_repo_info = github_parse_url(desc.upstream.url)
+            if gh_repo_info:
+                shortlog = github_qualify_references(shortlog, gh_repo_info[0],
+                                                     gh_repo_info[1])
+
+            log.info("Changes since the last import:\n" +
+                     format_list_to_str(shortlog))
+
+        # write lock file
+        if args.update:
+            lock_data = {}
+            lock_data['upstream'] = desc.upstream.as_dict()
+            lock_data['upstream']['rev'] = upstream_new_rev
+            with open(str(lock_file_path), 'w', encoding='UTF-8') as f:
+                f.write(LOCK_FILE_HEADER)
+                hjson.dump(lock_data, f)
+                f.write("\n")
+                log.info("Wrote lock file %s", str(lock_file_path))
+
+        # Commit changes
+        if args.commit:
+            sha_short = git_get_short_rev(clone_subdir, upstream_new_rev)
+
+            repo_info = github_parse_url(desc.upstream.url)
+            if repo_info is not None:
+                sha_short = "%s/%s@%s" % (repo_info[0], repo_info[1],
+                                          sha_short)
+
+            commit_msg_subject = 'Update %s to %s' % (desc.name, sha_short)
+            intro = ('Update code from {}upstream repository {} to revision {}'
+                     .format(('' if desc.upstream.only_subdir is None else
+                              'subdir {} in '.format(desc.upstream.only_subdir)),
+                             desc.upstream.url,
+                             upstream_new_rev))
+            commit_msg_body = textwrap.fill(intro, width=70)
+
+            if shortlog:
+                commit_msg_body += "\n\n"
+                commit_msg_body += format_list_to_str(shortlog, width=70)
+
+            commit_msg = commit_msg_subject + "\n\n" + commit_msg_body
+
+            commit_paths = []
+            commit_paths.append(desc.target_dir)
+            if args.refresh_patches:
+                commit_paths.append(desc.patch_dir)
+            commit_paths.append(lock_file_path)
+
+            git_add_commit(commit_paths, commit_msg)
+
+    log.info('Import finished')
+
+
+if __name__ == '__main__':
+    try:
+        main(sys.argv)
+    except subprocess.CalledProcessError as e:
+        log.fatal("Called program '%s' returned with %d.\n"
+                  "STDOUT:\n%s\n"
+                  "STDERR:\n%s\n" %
+                  (" ".join(e.cmd), e.returncode, e.stdout, e.stderr))
+        raise
+    except KeyboardInterrupt:
+        log.info("Aborting operation on user request.")
+        sys.exit(1)
diff --git a/vendor/pulp-platform/common_cells/.gitignore b/vendor/pulp-platform/common_cells/.gitignore
new file mode 100644
index 0000000000..2d00b390c2
--- /dev/null
+++ b/vendor/pulp-platform/common_cells/.gitignore
@@ -0,0 +1,14 @@
+.*
+!.travis.yml
+!.git*
+*.out
+*~
+/Bender.lock
+/Bender.local
+build
+formal/fifo_v3
+formal/counter
+formal/fall_through_register
+*.check
+*.vcd
+obj_dir/
diff --git a/vendor/pulp-platform/common_cells/CHANGELOG.md b/vendor/pulp-platform/common_cells/CHANGELOG.md
new file mode 100644
index 0000000000..8513988e40
--- /dev/null
+++ b/vendor/pulp-platform/common_cells/CHANGELOG.md
@@ -0,0 +1,342 @@
+# Changelog
+All notable changes to this project will be documented in this file.
+
+The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/)
+and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.html).
+
+## Unreleased
+
+## 1.23.0 - 2021-09-05
+### Added
+- Add `cc_onehot`
+- `isochronous_4phase_handshake`: Isochronous clock domain crossing cutting all paths using a 4-phase handshake.
+- Changed `isochronous_spill_register_tb` to `isochronous_crossing_tb` also covering the `isochronous_4phase_handshake`
+  module.
+- Make reset value of `sync` module parameterizable.
+
+### Changed
+- `id_queue`: Allow simultaneous input and output requests in `FULL_BW` mode
+
+## 1.22.1 - 2021-06-14
+### Fixed
+- Remove breaking change of `spill_register`
+
+## 1.22.0 - 2021-06-09
+### Added
+- Add `spill_register_flushable`
+
+### Changed
+- `registers.svh`: Merge explicit and implicit register variants into `` `FF `` and `` `FFL `` macros
+- `rr_arb_tree`: Allow flushing locked decision
+- Improved `verific` compatibility
+
+## 1.21.0 - 2021-01-28
+### Changed
+- Remove `timeprecision/timeunit` arguments
+- Update `common_verification` to `0.2.0`
+- Update `tech_cells_generic` to `0.2.3`
+
+## 1.20.1 - 2021-01-21
+### Changed
+- `id_queue`: Replace default or reset value of signals that were assigned `'x` with `'0`.
+- `id_queue`: Use `cf_math_pkg::idx_width()` for computation of localparams.
+
+### Fixed
+- Add `XSIM` define guard for statements incompatible with `xsim`.
+
+## 1.20.0 - 2020-11-04
+### Added
+- assertions: Assertion include header with macros (from lowrisc)
+
+### Changed
+- `sram.sv`: Deprecated as it has been moved to `tech_cells_generic`
+
+### Fixed
+- `stream_register`: Fix `DATA_WIDTH` of instantiated FIFO.
+- `stream_xbar`: Add missing argument in assertion error string.
+- Lint style fixes
+- `stream_omega`: Fix parse issue with verible.
+- `src_files.yml`: Fix compile order and missing modules.
+
+## 1.19.0 - 2020-05-25
+### Added
+- stream_to_mem: Allows to use memories with flow control (req/gnt) for requests but
+  without flow control for output data to be used in streams.
+- isochronous_spill_register: Isochronous clock domain crossing cutting all paths.
+- `rr_arb_tree_tb`: Systemverilog testbench for `rr_arb_tree`, which checks for fair throughput.
+- `cf_math_pkg::idx_width`: Constant function for defining the binary representation width
+  of an index signal.
+
+### Changed
+- `addr_decode`: Use `cf_math_pkg::idx_width` for computing the index width, inline documentation.
+- `lzc`: Use `cf_math_pkg::idx_width` for computing the index width, inline documentation.
+- `Bender`: Change levels of modules affected by depending on `cf_math_pkg::idx_width()`.
+- `stream_xbar`: Fully connected stream bassed interconnect with variable number of inputs and outputs.
+- `stream_xbar`: Fully connected stream-bassed interconnect with a variable number of inputs and outputs.
+- `stream_omega_net`: Stream-based network implementing an omega topology. Variable number of inputs,
+  outputs and radix. Topology is isomorphic to a butterfly network.
+
+### Fixed
+- Improve tool compatibility.
+- `rr_arb_tree`: Properly degenerate `rr_i` and `idx_o` signals.
+- `rr_arb_tree`: Add parameter `FairArb` to distribute throughput of input requests evenly when
+  not all inputs have requests active.
+- `stream_demux`: Properly degenerate `inp_sel_i` signal.
+
+## 1.18.0 - 2020-04-15
+### Added
+- stream_fork_dynamic: Wrapper around `stream_fork` for partial forking.
+- stream_join: Join multiple Ready/Valid handshakes to one common handshake.
+- SECDED (Single Error Correction, Double Error Detection) encoder and decoder
+- SECDED Verilator-based testbench
+- Travis build for SECDED module
+
+## 1.17.0 - 2020-04-09
+### Added
+- stream_fifo: Ready/Valid handshake wrapper around `fifo_v3`
+
+## 1.16.4 - 2020-03-02
+### Fixed
+- id_queue: Fix generation of `head_tail_q` registers
+
+## 1.16.3 - 2020-02-11
+### Fixed
+- Handle degenerated `addr_decode` with `NoIndices == 1`, change default parameters to `32'd0`
+
+## 1.16.2 - 2020-02-04
+### Fixed
+- Fix author section in Bender.yml
+
+## 1.16.1 - 2020-02-03
+### Fixed
+- `rr_arb_tree`: Add guard SVA statement for Verilator
+- Added missing sources in `Bender.yml` and `src_files.yml`
+
+## 1.16.0 - 2020-01-13
+### Fixed
+- Handle degenerated `onehot_to_bin` with `ONEHOT_WIDTH == 1`
+- Handle degenerated `id_queue` with `CAPACITY == 1` or `HT_CAPACITY == 1`
+- Fix `cdc_fifo_gray` to be a safe clock domain crossing (CDC)
+
+## 1.15.0 - 2019-12-09
+### Added
+- Added address map decoder module
+
+### Fixed
+- Handle degenerated `lzc` with `WIDTH == 1`
+
+## 1.14.0 - 2019-10-08
+
+### Added
+- Added spubstitution-permutation hash function module
+- Added couning-bloom-filter module
+- `spill_register`: Added Bypass parameter
+- `counter`: Added sticky overflow
+- Added counter with variable delta
+- Added counter that tracks its maximum value
+
+### Changed
+- Added formal testbench for `fifo` and `fall_through_regsiter`
+
+## 1.13.1 - 2019-06-01
+
+### Changed
+
+- Fix path in `src_files.yml` for `stream_arbiter` and `stream_arbiter_flushable`
+
+## 1.13.0 - 2019-05-29
+
+### Added
+
+- Added exponential backoff window module
+- Added parametric Galois LFSR module with optional whitening feature
+- Added `cf_math_pkg`: Constant Function implementations of mathematical functions for HDL elaboration
+
+### Changed
+- Parametric payload data type for `rr_arb_tree`
+
+### Deprecated
+- The following arbiter implementations are deprecated and superseded by `rr_arb_tree`:
+- Priority arbiter `prioarbiter`
+- Round-robin arbiter `rrarbiter`
+
+### Fixed
+
+## 1.12.0 - 2019-04-09
+
+### Added
+- Add priority arbiter
+- Add Pseudo Least Recently Used tree
+- Add round robin arbiter mux tree
+
+### Changed
+- Add selectable arbiter implementation for `stream_arbiter` and `stream_arbiter_flushable`. One can choose between priority (`prio`) and round-robin arbitration (`rr`).
+- Add `$onehot0` assertion in one-hot to bin
+- Rework `rrarbiter` unit (uses `rr_arb_tree` implementation underneath)
+
+## 1.11.0 - 2019-03-20
+
+### Added
+- Add stream fork
+- Add fall-through register
+- Add stream filter
+- Add ID queue
+
+### Changed
+- `sync_wedge` use existing synchronizer. This defines a single place where a tech-specific synchronizer can be defined.
+
+### Fixed
+- Fix FIFO push and pop signals in `stream_register` to observe interface prerequisites.
+- In `fifo_v3`, fix data output when pushing into empty fall-through FIFO. Previously, the data
+  output of an empty fall-through FIFO with data at its input (and `push_i=1`) depended on
+  `pop_i`: When `pop_i=0`, old, invalid data were visible at the output (even though `empty_o=0`,
+  indicating that the data output is valid). Only when `pop_i=1`, the data from the input fell
+  through. One consequence of this bug was that `data_o` of the `fall_through_register` could change
+  while `valid_o=1`, violating the basic stream specification.
+
+## 1.10.0 - 2018-12-18
+
+### Added
+- Add `fifo_v3` with generic fill count
+- Add 16 bit LFSR
+- Add stream delayer
+- Add stream arbiter
+- Add register macros for RTL
+- Add shift register
+
+### Changed
+- Make number of registers of `rstgen_bypass` a parameter.
+
+### Fixed
+- Fix `valid_i` and `grant_i` guarantees in `generic_fifo` for backward compatibility.
+- LZC: Synthesis of streaming operators in ternary operators
+- Add missing entry for `popcount` to `Bender.yml`.
+- Add default values for parameters to improve compatibility with Synopsys DC and Vivado.
+
+## 1.9.0 - 2018-11-02
+
+### Added
+- Add popcount circuit `popcount`
+
+## 1.8.0 - 2018-10-15
+
+### Added
+- Add lock feature to the rrarbiter. This prevents the arbiter to change the decision when we have pending requests that remain unaknowledged for several cycles.
+- Add deglitching circuit
+- Add generic clock divider
+- Add edge detecter as alias to sync_wedge (name is more expressive)
+- Add generic counter
+- Add moving deglitcher
+
+## 1.7.6 - 2018-09-27
+
+### Added
+- Add reset synchronizer with explicit reset bypass in testmode
+
+## 1.7.5 - 2018-09-06
+### Fixed
+- Fix incompatibility with verilator
+- Fix dependency to open-source repo
+
+## 1.7.4 - 2018-09-06
+- Fix assertions in `fifo_v2` (write on full / read on empty did not trigger properly)
+
+## 1.7.3 - 2018-08-27
+### Fixed
+- Use proper `fifo_v2` in `generic_fifo` module.
+
+## 1.7.2 - 2018-08-27
+### Added
+- Almost full/empty flags to FIFO, as `fifo_v2`.
+
+### Changed
+- FIFO moved to `fifo_v1` and instantiates `fifo_v2`.
+
+## 1.7.1 - 2018-08-27
+### Fixed
+- Revert breaking changes to `fifo`.
+
+## 1.7.0 - 2018-08-24
+### Added
+- Add stream register (`stream_register`).
+- Add stream multiplexer and demultiplexer (`stream_mux`, `stream_demux`).
+- Add round robin arbiter (`rrarbiter`).
+- Add leading zero counter (`lzc`).
+
+### Changed
+- Deprecate `find_first_one` in favor of `lzc`.
+
+## 1.6.0 - 2018-04-03
+### Added
+- Add binary to Gray code converter.
+- Add Gray code to binary converter.
+- Add Gray code testbench.
+- Add CDC FIFO based on Gray counters. This is a faster alternative to the 2-phase FIFO which also works if a domain's clock has stopped.
+
+### Changed
+- Rename `cdc_fifo` to `cdc_fifo_2phase`.
+- Adjust CDC FIFO testbench to cover both implementations.
+
+## 1.5.4 - 2018-03-31
+### Changed
+- Replace explicit clock gate in `fifo` with implicit one.
+
+## 1.5.3 - 2018-03-16
+### Changed
+- Remove duplicate deprecated modules.
+
+## 1.5.2 - 2018-03-16
+### Changed
+- Remove deprecated `rstgen` and fix interface.
+
+## 1.5.1 - 2018-03-16
+### Changed
+- Remove deprecated `onehot_to_bin`.
+
+## 1.5.0 - 2018-03-14
+### Added
+- Add behavioural SRAM model
+
+## 1.4.0 - 2018-03-14
+### Added
+- Clock domain crossing FIFO
+
+### Changed
+- Re-name new sync modules to resolve namespace collisions
+
+## 1.3.0 - 2018-03-12
+### Added
+- 2-phase clock domain crossing
+- Add old common cells as deprecated legacy modules
+
+## 1.2.3 - 2018-03-09
+### Added
+- Backwards compatibility wrapper for `generic_LFSR_8bit`
+
+## 1.2.2 - 2018-03-09
+### Added
+- Backwards compatibility wrapper for `generic_fifo`
+
+## 1.2.1 - 2018-03-09
+### Fixed
+- Fix an issue in the spill register which causes transactions to be lost
+
+## 1.2.0 - 2018-03-09
+### Added
+- Add spill register
+
+## 1.1.0 - 2018-03-06
+### Added
+- Find first zero
+
+## 1.0.0 - 2018-03-02
+### Added
+- Re-implementation of the generic FIFO supporting all kinds of use-cases
+- Testbench for FIFO
+
+### Changed
+- Re-formatting and artistic code clean-up
+
+## 0.1.0 - 2018-02-23
+### Added
+- Fork of PULP common cells repository
diff --git a/vendor/pulp-platform/common_cells/LICENSE b/vendor/pulp-platform/common_cells/LICENSE
new file mode 100644
index 0000000000..18e4f67692
--- /dev/null
+++ b/vendor/pulp-platform/common_cells/LICENSE
@@ -0,0 +1,176 @@
+SOLDERPAD HARDWARE LICENSE version 0.51
+
+This license is based closely on the Apache License Version 2.0, but is not
+approved or endorsed by the Apache Foundation. A copy of the non-modified
+Apache License 2.0 can be found at http://www.apache.org/licenses/LICENSE-2.0.
+
+As this license is not currently OSI or FSF approved, the Licensor permits any
+Work licensed under this License, at the option of the Licensee, to be treated
+as licensed under the Apache License Version 2.0 (which is so approved).
+
+This License is licensed under the terms of this License and in particular
+clause 7 below (Disclaimer of Warranties) applies in relation to its use.
+
+TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+1. Definitions.
+
+"License" shall mean the terms and conditions for use, reproduction, and
+distribution as defined by Sections 1 through 9 of this document.
+
+"Licensor" shall mean the Rights owner or entity authorized by the Rights owner
+that is granting the License.
+
+"Legal Entity" shall mean the union of the acting entity and all other entities
+that control, are controlled by, or are under common control with that entity.
+For the purposes of this definition, "control" means (i) the power, direct or
+indirect, to cause the direction or management of such entity, whether by
+contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the
+outstanding shares, or (iii) beneficial ownership of such entity.
+
+"You" (or "Your") shall mean an individual or Legal Entity exercising
+permissions granted by this License.
+
+"Rights" means copyright and any similar right including design right (whether
+registered or unregistered), semiconductor topography (mask) rights and
+database rights (but excluding Patents and Trademarks).
+
+"Source" form shall mean the preferred form for making modifications, including
+but not limited to source code, net lists, board layouts, CAD files,
+documentation source, and configuration files.
+
+"Object" form shall mean any form resulting from mechanical transformation or
+translation of a Source form, including but not limited to compiled object
+code, generated documentation, the instantiation of a hardware design and
+conversions to other media types, including intermediate forms such as
+bytecodes, FPGA bitstreams, artwork and semiconductor topographies (mask
+works).
+
+"Work" shall mean the work of authorship, whether in Source form or other
+Object form, made available under the License, as indicated by a Rights notice
+that is included in or attached to the work (an example is provided in the
+Appendix below).
+
+"Derivative Works" shall mean any work, whether in Source or Object form, that
+is based on (or derived from) the Work and for which the editorial revisions,
+annotations, elaborations, or other modifications represent, as a whole, an
+original work of authorship. For the purposes of this License, Derivative Works
+shall not include works that remain separable from, or merely link (or bind by
+name) or physically connect to or interoperate with the interfaces of, the Work
+and Derivative Works thereof.
+
+"Contribution" shall mean any design or work of authorship, including the
+original version of the Work and any modifications or additions to that Work or
+Derivative Works thereof, that is intentionally submitted to Licensor for
+inclusion in the Work by the Rights owner or by an individual or Legal Entity
+authorized to submit on behalf of the Rights owner. For the purposes of this
+definition, "submitted" means any form of electronic, verbal, or written
+communication sent to the Licensor or its representatives, including but not
+limited to communication on electronic mailing lists, source code control
+systems, and issue tracking systems that are managed by, or on behalf of, the
+Licensor for the purpose of discussing and improving the Work, but excluding
+communication that is conspicuously marked or otherwise designated in writing
+by the Rights owner as "Not a Contribution."
+
+"Contributor" shall mean Licensor and any individual or Legal Entity on behalf
+of whom a Contribution has been received by Licensor and subsequently
+incorporated within the Work.
+
+2. Grant of License. Subject to the terms and conditions of this License, each
+Contributor hereby grants to You a perpetual, worldwide, non-exclusive,
+no-charge, royalty-free, irrevocable license under the Rights to reproduce,
+prepare Derivative Works of, publicly display, publicly perform, sublicense,
+and distribute the Work and such Derivative Works in Source or Object form and
+do anything in relation to the Work as if the Rights did not exist.
+
+3. Grant of Patent License. Subject to the terms and conditions of this
+License, each Contributor hereby grants to You a perpetual, worldwide,
+non-exclusive, no-charge, royalty-free, irrevocable (except as stated in this
+section) patent license to make, have made, use, offer to sell, sell, import,
+and otherwise transfer the Work, where such license applies only to those
+patent claims licensable by such Contributor that are necessarily infringed by
+their Contribution(s) alone or by combination of their Contribution(s) with the
+Work to which such Contribution(s) was submitted. If You institute patent
+litigation against any entity (including a cross-claim or counterclaim in a
+lawsuit) alleging that the Work or a Contribution incorporated within the Work
+constitutes direct or contributory patent infringement, then any patent
+licenses granted to You under this License for that Work shall terminate as of
+the date such litigation is filed.
+
+4. Redistribution. You may reproduce and distribute copies of the Work or
+Derivative Works thereof in any medium, with or without modifications, and in
+Source or Object form, provided that You meet the following conditions:
+
+    You must give any other recipients of the Work or Derivative Works a copy
+    of this License; and
+
+    You must cause any modified files to carry prominent notices stating that
+    You changed the files; and
+
+    You must retain, in the Source form of any Derivative Works that You
+    distribute, all copyright, patent, trademark, and attribution notices from
+    the Source form of the Work, excluding those notices that do not pertain to
+    any part of the Derivative Works; and
+
+    If the Work includes a "NOTICE" text file as part of its distribution, then
+    any Derivative Works that You distribute must include a readable copy of
+    the attribution notices contained within such NOTICE file, excluding those
+    notices that do not pertain to any part of the Derivative Works, in at
+    least one of the following places: within a NOTICE text file distributed as
+    part of the Derivative Works; within the Source form or documentation, if
+    provided along with the Derivative Works; or, within a display generated by
+    the Derivative Works, if and wherever such third-party notices normally
+    appear. The contents of the NOTICE file are for informational purposes only
+    and do not modify the License. You may add Your own attribution notices
+    within Derivative Works that You distribute, alongside or as an addendum to
+    the NOTICE text from the Work, provided that such additional attribution
+    notices cannot be construed as modifying the License. You may add Your own
+    copyright statement to Your modifications and may provide additional or
+    different license terms and conditions for use, reproduction, or
+    distribution of Your modifications, or for any such Derivative Works as a
+    whole, provided Your use, reproduction, and distribution of the Work
+    otherwise complies with the conditions stated in this License.
+
+5. Submission of Contributions. Unless You explicitly state otherwise, any
+Contribution intentionally submitted for inclusion in the Work by You to the
+Licensor shall be under the terms and conditions of this License, without any
+additional terms or conditions. Notwithstanding the above, nothing herein shall
+supersede or modify the terms of any separate license agreement you may have
+executed with Licensor regarding such Contributions.
+
+6. Trademarks. This License does not grant permission to use the trade names,
+trademarks, service marks, or product names of the Licensor, except as required
+for reasonable and customary use in describing the origin of the Work and
+reproducing the content of the NOTICE file.
+
+7. Disclaimer of Warranty. Unless required by applicable law or agreed to in
+writing, Licensor provides the Work (and each Contributor provides its
+Contributions) on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+KIND, either express or implied, including, without limitation, any warranties
+or conditions of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+PARTICULAR PURPOSE. You are solely responsible for determining the
+appropriateness of using or redistributing the Work and assume any risks
+associated with Your exercise of permissions under this License.
+
+8. Limitation of Liability. In no event and under no legal theory, whether in
+tort (including negligence), contract, or otherwise, unless required by
+applicable law (such as deliberate and grossly negligent acts) or agreed to in
+writing, shall any Contributor be liable to You for damages, including any
+direct, indirect, special, incidental, or consequential damages of any
+character arising as a result of this License or out of the use or inability to
+use the Work (including but not limited to damages for loss of goodwill, work
+stoppage, computer failure or malfunction, or any and all other commercial
+damages or losses), even if such Contributor has been advised of the
+possibility of such damages.
+
+9. Accepting Warranty or Additional Liability. While redistributing the Work or
+Derivative Works thereof, You may choose to offer, and charge a fee for,
+acceptance of support, warranty, indemnity, or other liability obligations
+and/or rights consistent with this License. However, in accepting such
+obligations, You may act only on Your own behalf and on Your sole
+responsibility, not on behalf of any other Contributor, and only if You agree
+to indemnify, defend, and hold each Contributor harmless for any liability
+incurred by, or claims asserted against, such Contributor by reason of your
+accepting any such warranty or additional liability.
+
+END OF TERMS AND CONDITIONS
diff --git a/vendor/pulp-platform/common_cells/README.md b/vendor/pulp-platform/common_cells/README.md
new file mode 100644
index 0000000000..e147638071
--- /dev/null
+++ b/vendor/pulp-platform/common_cells/README.md
@@ -0,0 +1,181 @@
+[![Build Status](https://travis-ci.com/pulp-platform/common_cells.svg?branch=master)](https://travis-ci.com/pulp-platform/common_cells)
+[![GitHub tag (latest SemVer)](https://img.shields.io/github/v/tag/pulp-platform/common_cells?color=blue&label=current&sort=semver)](CHANGELOG.md)
+[![SHL-0.51 license](https://img.shields.io/badge/license-SHL--0.51-green)](LICENSE)
+
+# Common Cells Repository
+
+Maintainer: Florian Zaruba <zarubaf@iis.ee.ethz.ch>
+
+This repository contains commonly used cells and headers for use in various projects.
+
+## Cell Contents
+
+This repository currently contains the following cells, ordered by categories.
+Please note that cells with status *deprecated* are not to be used for new designs and only serve to provide compatibility with old code.
+
+### Clocks and Resets
+
+| Name                    | Description                                         | Status       | Superseded By |
+| ----------------------- | --------------------------------------------------- | ------------ | ------------- |
+| `clk_div`               | Clock divider with integer divisor                  | active       |               |
+| `clock_divider`         | Clock divider with configuration registers          | *deprecated* | `clk_div`     |
+| `clock_divider_counter` | Clock divider using a counter                       | *deprecated* | `clk_div`     |
+| `rstgen`                | Reset synchronizer                                  | active       |               |
+| `rstgen_bypass`         | Reset synchronizer with dedicated test reset bypass | active       |               |
+
+### Clock Domains and Asynchronous Crossings
+
+| Name                           | Description                                                                      | Status       | Superseded By |
+|--------------------------------|----------------------------------------------------------------------------------|--------------|---------------|
+| `cdc_2phase`                   | Clock domain crossing using two-phase handshake, with ready/valid interface      | active       |               |
+| `cdc_fifo_2phase`              | Clock domain crossing FIFO using two-phase handshake, with ready/valid interface | active       |               |
+| `cdc_fifo_gray`                | Clock domain crossing FIFO using a gray-counter, with ready/valid interface      | active       |               |
+| `edge_detect`                  | Rising/falling edge detector                                                     | active       |               |
+| `edge_propagator`              | **ANTONIO ADD DESCRIPTION**                                                      | active       |               |
+| `edge_propagator_rx`           | **ANTONIO ADD DESCRIPTION**                                                      | active       |               |
+| `edge_propagator_tx`           | **ANTONIO ADD DESCRIPTION**                                                      | active       |               |
+| `isochronous_spill_register`   | Isochronous clock domain crossing and full handshake (like `spill_register`)     | active       |               |
+| `isochronous_4phase_handshake` | Isochronous four-phase handshake.                                                | active       |               |
+| `pulp_sync`                    | Serial line synchronizer                                                         | *deprecated* | `sync`        |
+| `pulp_sync_wedge`              | Serial line synchronizer with edge detector                                      | *deprecated* | `sync_wedge`  |
+| `serial_deglitch`              | Serial line deglitcher                                                           | active       |               |
+| `sync`                         | Serial line synchronizer                                                         | active       |               |
+| `sync_wedge`                   | Serial line synchronizer with edge detector                                      | active       |               |
+
+### Counters and Shift Registers
+
+| Name                | Description                                                       | Status       | Superseded By |
+| ------------------- | ----------------------------------------------------------------- | ------------ | ------------- |
+| `counter`           | Generic up/down counter with overflow detection                   | active       |               |
+| `delta_counter`     | Up/down counter with variable delta and overflow detection        | active       |               |
+| `generic_LFSR_8bit` | 8-bit linear feedback shift register (LFSR)                       | *deprecated* | `lfsr_8bit`   |
+| `lfsr_8bit`         | 8-bit linear feedback shift register (LFSR)                       | active       |               |
+| `lfsr_16bit`        | 16-bit linear feedback shift register (LFSR)                      | active       |               |
+| `lfsr`              | 4...64-bit parametric Galois LFSR with optional whitening feature | active       |               |
+| `max_counter`       | Up/down counter with variable delta that tracks its maximum value | active       |               |
+| `mv_filter`         | **ZARUBAF ADD DESCRIPTION**                                       | active       |               |
+
+### Data Path Elements
+
+| Name                       | Description                                                                                               | Status       | Superseded By |
+| -------------------------- | --------------------------------------------------------------------------------------------------------- | ------------ | ------------- |
+| `addr_decode   `           | Address map decoder                                                                                       | active       |               |
+| `ecc_decode`               | SECDED Decoder (Single Error Correction, Double Error Detection)                                          | active       |               |
+| `ecc_encode`               | SECDED Encoder (Single Error Correction, Double Error Detection)                                          | active       |               |
+| `binary_to_gray`           | Binary to gray code converter                                                                             | active       |               |
+| `find_first_one`           | Leading-one finder / leading-zero counter                                                                 | *deprecated* | `lzc`         |
+| `gray_to_binary`           | Gray code to binary converter                                                                             | active       |               |
+| `lzc`                      | Leading/trailing-zero counter                                                                             | active       |               |
+| `onehot_to_bin`            | One-hot to binary converter                                                                               | active       |               |
+| `shift_reg`                | Shift register for arbitrary types                                                                        | active       |               |
+| `rr_arb_tree`              | Round-robin arbiter for req/gnt and vld/rdy interfaces with optional priority                             | active       |               |
+| `rrarbiter`                | Round-robin arbiter for req/ack interface with look-ahead                                                 | *deprecated* | `rr_arb_tree` |
+| `prioarbiter`              | Priority arbiter arbiter for req/ack interface with look-ahead                                            | *deprecated* | `rr_arb_tree` |
+| `fall_through_register`    | Fall-through register with ready/valid interface                                                          | active       |               |
+| `spill_register_flushable` | Register with ready/valid interface to cut all combinational interface paths and additional flush signal. | active       |               |
+| `spill_register`           | Register with ready/valid interface to cut all combinational interface paths                              | active       |               |
+| `stream_arbiter`           | Round-robin arbiter for ready/valid stream interface                                                      | active       |               |
+| `stream_arbiter_flushable` | Round-robin arbiter for ready/valid stream interface and flush functionality                              | active       |               |
+| `stream_demux`             | Ready/valid interface demultiplexer                                                                       | active       |               |
+| `stream_join`              | Ready/valid handshake join multiple to one common                                                         | active       |               |
+| `stream_mux`               | Ready/valid interface multiplexer                                                                         | active       |               |
+| `stream_register`          | Register with ready/valid interface                                                                       | active       |               |
+| `stream_fork`              | Ready/valid fork                                                                                          | active       |               |
+| `stream_fork_dynamic`      | Ready/valid fork, with selection mask for partial forking                                                 | active       |               |
+| `stream_filter`            | Ready/valid filter                                                                                        | active       |               |
+| `stream_delay`             | Randomize or delay ready/valid interface                                                                  | active       |               |
+| `stream_to_mem`            | Use memories without flow control for output data in streams.                                             | active       |               |
+| `stream_xbar`              | Fully connected crossbar with ready/valid interface.                                                      | active       |               |
+| `stream_omega_net`         | One-way stream omega-net with ready/valid interface. Isomorphic to a butterfly.                           | active       |               |
+| `sub_per_hash`             | Substitution-permutation hash function                                                                    | active       |               |
+| `popcount`                 | Combinatorial popcount (hamming weight)                                                                   | active       |               |
+
+### Data Structures
+
+| Name               | Description                                     | Status       | Superseded By |
+| ------------------ | ----------------------------------------------- | ------------ | ------------- |
+| `cb_filter`        | Counting-Bloom-Filter with combinational lookup | active       |               |
+| `fifo`             | FIFO register with upper threshold              | *deprecated* | `fifo_v3`     |
+| `fifo_v2`          | FIFO register with upper and lower threshold    | *deprecated* | `fifo_v3`     |
+| `fifo_v3`          | FIFO register with generic fill counts          | active       |               |
+| `stream_fifo`      | FIFO register with ready/valid interface        | active       |               |
+| `generic_fifo`     | FIFO register without thresholds                | *deprecated* | `fifo_v3`     |
+| `generic_fifo_adv` | FIFO register without thresholds                | *deprecated* | `fifo_v3`     |
+| `sram`             | SRAM behavioral model                           | active       |               |
+| `plru_tree`        | Pseudo least recently used tree                 | active       |               |
+| `unread`           | Empty module to sink unconnected outputs into   | active       |               |
+
+
+## Header Contents
+
+This repository currently contains the following header files.
+
+### RTL Register Macros
+
+The header file `registers.svh` contains macros that expand to descriptions of registers.
+To avoid misuse of `always_ff` blocks, only the following macros shall be used to describe sequential behavior.
+The use of linter rules that flag explicit uses of `always_ff` in source code is encouraged.
+
+|    Macro     |                             Arguments                             |                                Description                                |
+| ------------ | ----------------------------------------------------------------- | ------------------------------------------------------------------------- |
+| `` `FF``     | `q_sig`, `d_sig`, `rst_val`, (`clk_sig`, `arstn_sig`)             | Flip-flop with asynchronous active-low reset                              |
+| `` `FFAR``   | `q_sig`, `d_sig`, `rst_val`, `clk_sig`, `arst_sig`                | Flip-flop with asynchronous active-high reset                             |
+| `` `FFARN``  | `q_sig`, `d_sig`, `rst_val`, `clk_sig`, `arstn_sig`               | *deprecated* Flip-flop with asynchronous active-low reset                 |
+| `` `FFSR``   | `q_sig`, `d_sig`, `rst_val`, `clk_sig`, `rst_sig`                 | Flip-flop with synchronous active-high reset                              |
+| `` `FFSRN``  | `q_sig`, `d_sig`, `rst_val`, `clk_sig`, `rstn_sig`                | Flip-flop with synchronous active-low reset                               |
+| `` `FFNR``   | `q_sig`, `d_sig`, `clk_sig`                                       | Flip-flop without reset                                                   |
+|              |                                                                   |                                                                           |
+| `` `FFL``    | `q_sig`, `d_sig`, `load_ena`, `rst_val`, (`clk_sig`, `arstn_sig`) | Flip-flop with load-enable and asynchronous active-low reset              |
+| `` `FFLAR``  | `q_sig`, `d_sig`, `load_ena`, `rst_val`, `clk_sig`, `arst_sig`    | Flip-flop with load-enable and asynchronous active-high reset             |
+| `` `FFLARN`` | `q_sig`, `d_sig`, `load_ena`, `rst_val`, `clk_sig`, `arstn_sig`   | *deprecated* Flip-flop with load-enable and asynchronous active-low reset |
+| `` `FFLSR``  | `q_sig`, `d_sig`, `load_ena`, `rst_val`, `clk_sig`, `rst_sig`     | Flip-flop with load-enable and synchronous active-high reset              |
+| `` `FFLSRN`` | `q_sig`, `d_sig`, `load_ena`, `rst_val`, `clk_sig`, `rstn_sig`    | Flip-flop with load-enable and synchronous active-low reset               |
+| `` `FFLNR``  | `q_sig`, `d_sig`, `load_ena`, `clk_sig`                           | Flip-flop with load-enable without reset                                  |
+- *The name of the clock and reset signals for implicit variants is `clk_i` and `rst_ni`, respectively.*
+- *Argument suffix `_sig` indicates signal names for present and next state as well as clocks and resets.*
+- *Argument `rst_val` specifies the value literal to be assigned upon reset.*
+- *Argument `load_ena` specifies the boolean expression that forms the load enable of the register.*
+
+### SystemVerilog Assertion Macros
+
+The header file `assertions.svh` contains macros that expand to assertion blocks.
+These macros should recduce the effort in writing many assertions and make it
+easier to use them. They are identical with the macros used by [lowrisc](https://github.com/lowRISC/opentitan/blob/master/hw/ip/prim/rtl/prim_assert.sv)
+and just re-implemented here for the sake of easier use in PULP projects (the same include guard is used so they should not clash).
+
+#### Simple Assertion and Cover Macros
+| Macro                                                       | Arguments                                                                  | Description |
+| ----------------------------------------------------------- | -------------------------------------------------------------------------- | ----------- |
+| `` `ASSERT_I``     | `__name`, `__prop`                     | Immediate assertion                                                        |
+| `` `ASSERT_INIT``  | `__name`, `__prop`                     | Assertion in initial block. Can be used for things like parameter checking |
+| `` `ASSERT_FINAL`` | `__name`, `__prop`                     | Assertion in final block                                                   |
+| `` `ASSERT``       | `__name`, `__prop`, (`__clk`, `__rst`) | Assert a concurrent property directly                                      |
+| `` `ASSERT_NEVER`` | `__name`, `__prop`, (`__clk`, `__rst`) | Assert a concurrent property NEVER happens                                 |
+| `` `ASSERT_KNOWN`` | `__name`, `__sig`, (`__clk`, `__rst`)  | Concurrent clocked assertion with custom error message                     |
+| `` `COVER``        | `__name`, `__prop`, (`__clk`, `__rst`) | Cover a concurrent property                                                |
+- *The name of the clock and reset signals for implicit variants is `clk_i` and `rst_ni`, respectively.*
+
+#### Complex Assertion Macros
+| Macro                                                                      | Arguments                                                                                         | Description |
+| -------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------- | ----------- |
+| `` `ASSERT_PULSE``    | `__name`, `__sig`, (`__clk`, `__rst`)              | Assert that signal is an active-high pulse with pulse length of 1 clock cycle                     |
+| `` `ASSERT_IF``       | `__name`, `__prop`, `__enable`, (`__clk`, `__rst`) | Assert that a property is true only when an enable signal is set                                  |
+| `` `ASSERT_KNOWN_IF`` | `__name`, `__sig`, `__enable`, (`__clk`, `__rst`)  | Assert that signal has a known value (each bit is either '0' or '1') after reset if enable is set |
+- *The name of the clock and reset signals for implicit variants is `clk_i` and `rst_ni`, respectively.*
+
+#### Assumption Macros
+
+| Macro                                                   | Arguments                    | Description |
+| ------------------------------------------------------- | ---------------------------- | ----------- |
+| `` `ASSUME``   | `__name`, `__prop`, (`__clk`, `__rst`) | Assume a concurrent property |
+| `` `ASSUME_I`` | `__name`, `__prop`                     | Assume an immediate property |
+- *The name of the clock and reset signals for implicit variants is `clk_i` and `rst_ni`, respectively.*
+
+#### Formal Verification Macros
+
+| Macro                                                       | Arguments                                                    | Description |
+| ----------------------------------------------------------- | ------------------------------------------------------------ | ----------- |
+| `` `ASSUME_FPV``   | `__name`, `__prop`, (`__clk`, `__rst`) | Assume a concurrent property during formal verification only |
+| `` `ASSUME_I_FPV`` | `__name`, `__prop`                     | Assume a concurrent property during formal verification only |
+| `` `COVER_FPV``    | `__name`, `__prop`, (`__clk`, `__rst`) | Cover a concurrent property during formal verification       |
+- *The name of the clock and reset signals for implicit variants is `clk_i` and `rst_ni`, respectively.*
diff --git a/vendor/pulp-platform/common_cells/include/common_cells/assertions.svh b/vendor/pulp-platform/common_cells/include/common_cells/assertions.svh
new file mode 100644
index 0000000000..b6b4b73782
--- /dev/null
+++ b/vendor/pulp-platform/common_cells/include/common_cells/assertions.svh
@@ -0,0 +1,201 @@
+// Copyright lowRISC contributors.
+// Licensed under the Apache License, Version 2.0, see LICENSE for details.
+// SPDX-License-Identifier: Apache-2.0
+
+// Macros and helper code for using assertions.
+//  - Provides default clk and rst options to simplify code
+//  - Provides boiler plate template for common assertions
+
+`ifndef PRIM_ASSERT_SV
+`define PRIM_ASSERT_SV
+
+`ifdef UVM
+  // report assertion error with UVM if compiled
+  package assert_rpt_pkg;
+    import uvm_pkg::*;
+    `include "uvm_macros.svh"
+    function void assert_rpt(string msg);
+      `uvm_error("ASSERT FAILED", msg)
+    endfunction
+  endpackage
+`endif
+
+///////////////////
+// Helper macros //
+///////////////////
+
+// local helper macro to reduce code clutter. undefined at the end of this file
+`ifndef VERILATOR
+`ifndef SYNTHESIS
+`ifndef XSIM
+`define INC_ASSERT
+`endif   
+`endif
+`endif
+
+// Converts an arbitrary block of code into a Verilog string
+`define PRIM_STRINGIFY(__x) `"__x`"
+
+// ASSERT_RPT is available to change the reporting mechanism when an assert fails
+`define ASSERT_RPT(__name)                                                  \
+`ifdef UVM                                                                  \
+  assert_rpt_pkg::assert_rpt($sformatf("[%m] %s (%s:%0d)",                  \
+                             __name, `__FILE__, `__LINE__));                \
+`else                                                                       \
+  $error("[ASSERT FAILED] [%m] %s (%s:%0d)", __name, `__FILE__, `__LINE__); \
+`endif
+
+///////////////////////////////////////
+// Simple assertion and cover macros //
+///////////////////////////////////////
+
+// Default clk and reset signals used by assertion macros below.
+`define ASSERT_DEFAULT_CLK clk_i
+`define ASSERT_DEFAULT_RST !rst_ni
+
+// Immediate assertion
+// Note that immediate assertions are sensitive to simulation glitches.
+`define ASSERT_I(__name, __prop)           \
+`ifdef INC_ASSERT                          \
+  __name: assert (__prop)                  \
+    else begin                             \
+      `ASSERT_RPT(`PRIM_STRINGIFY(__name)) \
+    end                                    \
+`endif
+
+// Assertion in initial block. Can be used for things like parameter checking.
+`define ASSERT_INIT(__name, __prop)          \
+`ifdef INC_ASSERT                            \
+  initial begin                              \
+    __name: assert (__prop)                  \
+      else begin                             \
+        `ASSERT_RPT(`PRIM_STRINGIFY(__name)) \
+      end                                    \
+  end                                        \
+`endif
+
+// Assertion in final block. Can be used for things like queues being empty
+// at end of sim, all credits returned at end of sim, state machines in idle
+// at end of sim.
+`define ASSERT_FINAL(__name, __prop)                                         \
+`ifdef INC_ASSERT                                                            \
+  final begin                                                                \
+    __name: assert (__prop || $test$plusargs("disable_assert_final_checks")) \
+      else begin                                                             \
+        `ASSERT_RPT(`PRIM_STRINGIFY(__name))                                 \
+      end                                                                    \
+  end                                                                        \
+`endif
+
+// Assert a concurrent property directly.
+// It can be called as a module (or interface) body item.
+`define ASSERT(__name, __prop, __clk = `ASSERT_DEFAULT_CLK, __rst = `ASSERT_DEFAULT_RST) \
+`ifdef INC_ASSERT                                                                        \
+  __name: assert property (@(posedge __clk) disable iff ((__rst) !== '0) (__prop))       \
+    else begin                                                                           \
+      `ASSERT_RPT(`PRIM_STRINGIFY(__name))                                               \
+    end                                                                                  \
+`endif
+// Note: Above we use (__rst !== '0) in the disable iff statements instead of
+// (__rst == '1).  This properly disables the assertion in cases when reset is X at
+// the beginning of a simulation. For that case, (reset == '1) does not disable the
+// assertion.
+
+// Assert a concurrent property NEVER happens
+`define ASSERT_NEVER(__name, __prop, __clk = `ASSERT_DEFAULT_CLK, __rst = `ASSERT_DEFAULT_RST) \
+`ifdef INC_ASSERT                                                                              \
+  __name: assert property (@(posedge __clk) disable iff ((__rst) !== '0) not (__prop))         \
+    else begin                                                                                 \
+      `ASSERT_RPT(`PRIM_STRINGIFY(__name))                                                     \
+    end                                                                                        \
+`endif
+
+// Assert that signal has a known value (each bit is either '0' or '1') after reset.
+// It can be called as a module (or interface) body item.
+`define ASSERT_KNOWN(__name, __sig, __clk = `ASSERT_DEFAULT_CLK, __rst = `ASSERT_DEFAULT_RST) \
+`ifdef INC_ASSERT                                                                             \
+  `ASSERT(__name, !$isunknown(__sig), __clk, __rst)                                           \
+`endif
+
+//  Cover a concurrent property
+`define COVER(__name, __prop, __clk = `ASSERT_DEFAULT_CLK, __rst = `ASSERT_DEFAULT_RST) \
+`ifdef INC_ASSERT                                                                       \
+  __name: cover property (@(posedge __clk) disable iff ((__rst) !== '0) (__prop));      \
+`endif
+
+//////////////////////////////
+// Complex assertion macros //
+//////////////////////////////
+
+// Assert that signal is an active-high pulse with pulse length of 1 clock cycle
+`define ASSERT_PULSE(__name, __sig, __clk = `ASSERT_DEFAULT_CLK, __rst = `ASSERT_DEFAULT_RST) \
+`ifdef INC_ASSERT                                                                             \
+  `ASSERT(__name, $rose(__sig) |=> !(__sig), __clk, __rst)                                    \
+`endif
+
+// Assert that a property is true only when an enable signal is set.  It can be called as a module
+// (or interface) body item.
+`define ASSERT_IF(__name, __prop, __enable, __clk = `ASSERT_DEFAULT_CLK, __rst = `ASSERT_DEFAULT_RST) \
+`ifdef INC_ASSERT                                                                                     \
+  `ASSERT(__name, (__enable) |-> (__prop), __clk, __rst)                                              \
+`endif
+
+// Assert that signal has a known value (each bit is either '0' or '1') after reset if enable is
+// set.  It can be called as a module (or interface) body item.
+`define ASSERT_KNOWN_IF(__name, __sig, __enable, __clk = `ASSERT_DEFAULT_CLK, __rst = `ASSERT_DEFAULT_RST) \
+`ifdef INC_ASSERT                                                                                          \
+  `ASSERT_KNOWN(__name``KnownEnable, __enable, __clk, __rst)                                               \
+  `ASSERT_IF(__name, !$isunknown(__sig), __enable, __clk, __rst)                                           \
+`endif
+
+///////////////////////
+// Assumption macros //
+///////////////////////
+
+// Assume a concurrent property
+`define ASSUME(__name, __prop, __clk = `ASSERT_DEFAULT_CLK, __rst = `ASSERT_DEFAULT_RST) \
+`ifdef INC_ASSERT                                                                        \
+  __name: assume property (@(posedge __clk) disable iff ((__rst) !== '0) (__prop))       \
+    else begin                                                                           \
+      `ASSERT_RPT(`PRIM_STRINGIFY(__name))                                               \
+    end                                                                                  \
+`endif
+
+// Assume an immediate property
+`define ASSUME_I(__name, __prop)           \
+`ifdef INC_ASSERT                          \
+  __name: assume (__prop)                  \
+    else begin                             \
+      `ASSERT_RPT(`PRIM_STRINGIFY(__name)) \
+    end                                    \
+`endif
+
+//////////////////////////////////
+// For formal verification only //
+//////////////////////////////////
+
+// Note that the existing set of ASSERT macros specified above shall be used for FPV,
+// thereby ensuring that the assertions are evaluated during DV simulations as well.
+
+// ASSUME_FPV
+// Assume a concurrent property during formal verification only.
+`define ASSUME_FPV(__name, __prop, __clk = `ASSERT_DEFAULT_CLK, __rst = `ASSERT_DEFAULT_RST) \
+`ifdef FPV_ON                                                                                \
+   `ASSUME(__name, __prop, __clk, __rst)                                                     \
+`endif
+
+// ASSUME_I_FPV
+// Assume a concurrent property during formal verification only.
+`define ASSUME_I_FPV(__name, __prop) \
+`ifdef FPV_ON                        \
+   `ASSUME_I(__name, __prop)         \
+`endif
+
+// COVER_FPV
+// Cover a concurrent property during formal verification
+`define COVER_FPV(__name, __prop, __clk = `ASSERT_DEFAULT_CLK, __rst = `ASSERT_DEFAULT_RST) \
+`ifdef FPV_ON                                                                               \
+   `COVER(__name, __prop, __clk, __rst)                                                     \
+`endif
+
+`endif // PRIM_ASSERT_SV
diff --git a/vendor/pulp-platform/common_cells/include/common_cells/registers.svh b/vendor/pulp-platform/common_cells/include/common_cells/registers.svh
new file mode 100644
index 0000000000..b64f31a013
--- /dev/null
+++ b/vendor/pulp-platform/common_cells/include/common_cells/registers.svh
@@ -0,0 +1,221 @@
+// Copyright 2018, 2021 ETH Zurich and University of Bologna.
+//
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the "License"); you may not use this file except in
+// compliance with the License. You may obtain a copy of the License at
+// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+// SPDX-License-Identifier: SHL-0.51
+//
+// Author: Stefan Mach <smach@iis.ee.ethz.ch>
+// Description: Common register defines for RTL designs
+
+`ifndef COMMON_CELLS_REGISTERS_SVH_
+`define COMMON_CELLS_REGISTERS_SVH_
+
+// Abridged Summary of available FF macros:
+// `FF:      asynchronous active-low reset
+// `FFAR:    asynchronous active-high reset
+// `FFARN:   [deprecated] asynchronous active-low reset
+// `FFSR:    synchronous active-high reset
+// `FFSRN:   synchronous active-low reset
+// `FFNR:    without reset
+// `FFL:     load-enable and asynchronous active-low reset
+// `FFLAR:   load-enable and asynchronous active-high reset
+// `FFLARN:  [deprecated] load-enable and asynchronous active-low reset
+// `FFLARNC: load-enable and asynchronous active-low reset and synchronous active-high clear
+// `FFLSR:   load-enable and synchronous active-high reset
+// `FFLSRN:  load-enable and synchronous active-low reset
+// `FFLNR:   load-enable without reset
+
+`ifdef VERILATOR
+`define NO_SYNOPSYS_FF 1
+`endif
+
+`define REG_DFLT_CLK clk_i
+`define REG_DFLT_RST rst_ni
+
+// Flip-Flop with asynchronous active-low reset
+// __q: Q output of FF
+// __d: D input of FF
+// __reset_value: value assigned upon reset
+// (__clk: clock input)
+// (__arst_n: asynchronous reset, active-low)
+`define FF(__q, __d, __reset_value, __clk = `REG_DFLT_CLK, __arst_n = `REG_DFLT_RST) \
+  always_ff @(posedge (__clk) or negedge (__arst_n)) begin                           \
+    if (!__arst_n) begin                                                             \
+      __q <= (__reset_value);                                                        \
+    end else begin                                                                   \
+      __q <= (__d);                                                                  \
+    end                                                                              \
+  end
+
+// Flip-Flop with asynchronous active-high reset
+// __q: Q output of FF
+// __d: D input of FF
+// __reset_value: value assigned upon reset
+// __clk: clock input
+// __arst: asynchronous reset, active-high
+`define FFAR(__q, __d, __reset_value, __clk, __arst)     \
+  always_ff @(posedge (__clk) or posedge (__arst)) begin \
+    if (__arst) begin                                    \
+      __q <= (__reset_value);                            \
+    end else begin                                       \
+      __q <= (__d);                                      \
+    end                                                  \
+  end
+
+// DEPRECATED - use `FF instead
+// Flip-Flop with asynchronous active-low reset
+// __q: Q output of FF
+// __d: D input of FF
+// __reset_value: value assigned upon reset
+// __clk: clock input
+// __arst_n: asynchronous reset, active-low
+`define FFARN(__q, __d, __reset_value, __clk, __arst_n) \
+  `FF(__q, __d, __reset_value, __clk, __arst_n)
+
+// Flip-Flop with synchronous active-high reset
+// __q: Q output of FF
+// __d: D input of FF
+// __reset_value: value assigned upon reset
+// __clk: clock input
+// __reset_clk: reset input, active-high
+`define FFSR(__q, __d, __reset_value, __clk, __reset_clk) \
+  `ifndef NO_SYNOPSYS_FF                       \
+  /``* synopsys sync_set_reset `"__reset_clk`" *``/       \
+    `endif                        \
+  always_ff @(posedge (__clk)) begin                      \
+    __q <= (__reset_clk) ? (__reset_value) : (__d);       \
+  end
+
+// Flip-Flop with synchronous active-low reset
+// __q: Q output of FF
+// __d: D input of FF
+// __reset_value: value assigned upon reset
+// __clk: clock input
+// __reset_n_clk: reset input, active-low
+`define FFSRN(__q, __d, __reset_value, __clk, __reset_n_clk) \
+    `ifndef NO_SYNOPSYS_FF                       \
+  /``* synopsys sync_set_reset `"__reset_n_clk`" *``/        \
+    `endif                        \
+  always_ff @(posedge (__clk)) begin                         \
+    __q <= (!__reset_n_clk) ? (__reset_value) : (__d);       \
+  end
+
+// Always-enable Flip-Flop without reset
+// __q: Q output of FF
+// __d: D input of FF
+// __clk: clock input
+`define FFNR(__q, __d, __clk)        \
+  always_ff @(posedge (__clk)) begin \
+    __q <= (__d);                    \
+  end
+
+// Flip-Flop with load-enable and asynchronous active-low reset (implicit clock and reset)
+// __q: Q output of FF
+// __d: D input of FF
+// __load: load d value into FF
+// __reset_value: value assigned upon reset
+// (__clk: clock input)
+// (__arst_n: asynchronous reset, active-low)
+`define FFL(__q, __d, __load, __reset_value, __clk = `REG_DFLT_CLK, __arst_n = `REG_DFLT_RST) \
+  always_ff @(posedge (__clk) or negedge (__arst_n)) begin                                    \
+    if (!__arst_n) begin                                                                      \
+      __q <= (__reset_value);                                                                 \
+    end else begin                                                                            \
+      __q <= (__load) ? (__d) : (__q);                                                        \
+    end                                                                                       \
+  end
+
+// Flip-Flop with load-enable and asynchronous active-high reset
+// __q: Q output of FF
+// __d: D input of FF
+// __load: load d value into FF
+// __reset_value: value assigned upon reset
+// __clk: clock input
+// __arst: asynchronous reset, active-high
+`define FFLAR(__q, __d, __load, __reset_value, __clk, __arst) \
+  always_ff @(posedge (__clk) or posedge (__arst)) begin      \
+    if (__arst) begin                                         \
+      __q <= (__reset_value);                                 \
+    end else begin                                            \
+      __q <= (__load) ? (__d) : (__q);                        \
+    end                                                       \
+  end
+
+// DEPRECATED - use `FFL instead
+// Flip-Flop with load-enable and asynchronous active-low reset
+// __q: Q output of FF
+// __d: D input of FF
+// __load: load d value into FF
+// __reset_value: value assigned upon reset
+// __clk: clock input
+// __arst_n: asynchronous reset, active-low
+`define FFLARN(__q, __d, __load, __reset_value, __clk, __arst_n) \
+  `FFL(__q, __d, __load, __reset_value, __clk, __arst_n)
+
+// Flip-Flop with load-enable and synchronous active-high reset
+// __q: Q output of FF
+// __d: D input of FF
+// __load: load d value into FF
+// __reset_value: value assigned upon reset
+// __clk: clock input
+// __reset_clk: reset input, active-high
+`define FFLSR(__q, __d, __load, __reset_value, __clk, __reset_clk)       \
+    `ifndef NO_SYNOPSYS_FF                                               \
+  /``* synopsys sync_set_reset `"__reset_clk`" *``/                      \
+    `endif                                                               \
+  always_ff @(posedge (__clk)) begin                                     \
+    __q <= (__reset_clk) ? (__reset_value) : ((__load) ? (__d) : (__q)); \
+  end
+
+// Flip-Flop with load-enable and synchronous active-low reset
+// __q: Q output of FF
+// __d: D input of FF
+// __load: load d value into FF
+// __reset_value: value assigned upon reset
+// __clk: clock input
+// __reset_n_clk: reset input, active-low
+`define FFLSRN(__q, __d, __load, __reset_value, __clk, __reset_n_clk)       \
+    `ifndef NO_SYNOPSYS_FF                                                  \
+  /``* synopsys sync_set_reset `"__reset_n_clk`" *``/                       \
+    `endif                                                                  \
+  always_ff @(posedge (__clk)) begin                                        \
+    __q <= (!__reset_n_clk) ? (__reset_value) : ((__load) ? (__d) : (__q)); \
+  end
+
+// Flip-Flop with load-enable and asynchronous active-low reset and synchronous clear
+// __q: Q output of FF
+// __d: D input of FF
+// __load: load d value into FF
+// __clear: assign reset value into FF
+// __reset_value: value assigned upon reset
+// __clk: clock input
+// __arst_n: asynchronous reset, active-low
+`define FFLARNC(__q, __d, __load, __clear, __reset_value, __clk, __arst_n) \
+    `ifndef NO_SYNOPSYS_FF                                                 \
+  /``* synopsys sync_set_reset `"__clear`" *``/                            \
+    `endif                                                                 \
+  always_ff @(posedge (__clk) or negedge (__arst_n)) begin                 \
+    if (!__arst_n) begin                                                   \
+      __q <= (__reset_value);                                              \
+    end else begin                                                         \
+      __q <= (__clear) ? (__reset_value) : (__load) ? (__d) : (__q);       \
+    end                                                                    \
+  end
+
+// Load-enable Flip-Flop without reset
+// __q: Q output of FF
+// __d: D input of FF
+// __load: load d value into FF
+// __clk: clock input
+`define FFLNR(__q, __d, __load, __clk) \
+  always_ff @(posedge (__clk)) begin   \
+    __q <= (__load) ? (__d) : (__q);   \
+  end
+
+`endif
diff --git a/vendor/pulp-platform/common_cells/src/addr_decode.sv b/vendor/pulp-platform/common_cells/src/addr_decode.sv
new file mode 100644
index 0000000000..90a43a0da2
--- /dev/null
+++ b/vendor/pulp-platform/common_cells/src/addr_decode.sv
@@ -0,0 +1,161 @@
+// Copyright 2019 ETH Zurich and University of Bologna.
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the "License"); you may not use this file except in
+// compliance with the License.  You may obtain a copy of the License at
+// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+
+// Author: Wolfgang Roenninger <wroennin@ethz.ch>
+
+/// Address Decoder: Maps the input address combinatorially to an index.
+/// The address map `addr_map_i` is a packed array of rule_t structs.
+/// The ranges of any two rules may overlap. If so, the rule at the higher (more significant)
+/// position in `addr_map_i` prevails.
+///
+/// There can be an arbitrary number of address rules. There can be multiple
+/// ranges defined for the same index. The start address has to be less than the end address.
+///
+/// There is the possibility to add a default mapping:
+/// `en_default_idx_i`: Driving this port to `1'b1` maps all input addresses
+/// for which no rule in `addr_map_i` exists to the default index specified by
+/// `default_idx_i`. In this case, `dec_error_o` is always `1'b0`.
+///
+/// Assertions: The module checks every time there is a change in the address mapping
+/// if the resulting map is valid. It fatals if `start_addr` is higher than `end_addr`
+/// or if a mapping targets an index that is outside the number of allowed indices.
+/// It issues warnings if the address regions of any two mappings overlap.
+module addr_decode #(
+  /// Highest index which can happen in a rule.
+  parameter int unsigned NoIndices = 32'd0,
+  /// Total number of rules.
+  parameter int unsigned NoRules   = 32'd0,
+  /// Address type inside the rules and to decode.
+  parameter type         addr_t    = logic,
+  /// Rule packed struct type.
+  /// The address decoder expects three fields in `rule_t`:
+  ///
+  /// typedef struct packed {
+  ///   int unsigned idx;
+  ///   addr_t       start_addr;
+  ///   addr_t       end_addr;
+  /// } rule_t;
+  ///
+  ///  - `idx`:        index of the rule, has to be < `NoIndices`
+  ///  - `start_addr`: start address of the range the rule describes, value is included in range
+  ///  - `end_addr`:   end address of the range the rule describes, value is NOT included in range
+  parameter type         rule_t    = logic,
+  /// Dependent parameter, do **not** overwite!
+  ///
+  /// Width of the `idx_o` output port.
+  parameter int unsigned IdxWidth  = cf_math_pkg::idx_width(NoIndices),
+  /// Dependent parameter, do **not** overwite!
+  ///
+  /// Type of the `idx_o` output port.
+  parameter type         idx_t     = logic [IdxWidth-1:0]
+) (
+  /// Address to decode.
+  input  addr_t               addr_i,
+  /// Address map: rule with the highest array position wins on collision
+  input  rule_t [NoRules-1:0] addr_map_i,
+  /// Decoded index.
+  output idx_t                idx_o,
+  /// Decode is valid.
+  output logic                dec_valid_o,
+  /// Decode is not valid, no matching rule found.
+  output logic                dec_error_o,
+  /// Enable default port mapping.
+  ///
+  /// When not used, tie to `0`.
+  input  logic                en_default_idx_i,
+  /// Default port index.
+  ///
+  /// When `en_default_idx_i` is `1`, this will be the index when no rule matches.
+  ///
+  /// When not used, tie to `0`.
+  input  idx_t                default_idx_i
+);
+
+  logic [NoRules-1:0] matched_rules; // purely for address map debugging
+
+  always_comb begin
+    // default assignments
+    matched_rules = '0;
+    dec_valid_o   = 1'b0;
+    dec_error_o   = (en_default_idx_i) ? 1'b0 : 1'b1;
+    idx_o         = (en_default_idx_i) ? default_idx_i : '0;
+
+    // match the rules
+    for (int unsigned i = 0; i < NoRules; i++) begin
+      if ((addr_i >= addr_map_i[i].start_addr) && (addr_i < addr_map_i[i].end_addr)) begin
+        matched_rules[i] = 1'b1;
+        dec_valid_o      = 1'b1;
+        dec_error_o      = 1'b0;
+        idx_o            = idx_t'(addr_map_i[i].idx);
+      end
+    end
+  end
+
+  // Assumptions and assertions
+  `ifndef VERILATOR
+  `ifndef XSIM
+  // pragma translate_off
+  initial begin : proc_check_parameters
+    assume ($bits(addr_i) == $bits(addr_map_i[0].start_addr)) else
+      $warning($sformatf("Input address has %d bits and address map has %d bits.",
+        $bits(addr_i), $bits(addr_map_i[0].start_addr)));
+    assume (NoRules > 0) else
+      $fatal(1, $sformatf("At least one rule needed"));
+    assume (NoIndices > 0) else
+      $fatal(1, $sformatf("At least one index needed"));
+  end
+
+  assert final ($onehot0(matched_rules)) else
+    $warning("More than one bit set in the one-hot signal, matched_rules");
+
+  // These following assumptions check the validity of the address map.
+  // The assumptions gets generated for each distinct pair of rules.
+  // Each assumption is present two times, as they rely on one rules being
+  // effectively ordered. Only one of the rules with the same function is
+  // active at a time for a given pair.
+  // check_start:        Enforces a smaller start than end address.
+  // check_idx:          Enforces a valid index in the rule.
+  // check_overlap:      Warns if there are overlapping address regions.
+  always @(addr_map_i) #0 begin : proc_check_addr_map
+    if (!$isunknown(addr_map_i)) begin
+      for (int unsigned i = 0; i < NoRules; i++) begin
+        check_start : assume (addr_map_i[i].start_addr < addr_map_i[i].end_addr) else
+          $fatal(1, $sformatf("This rule has a higher start than end address!!!\n\
+              Violating rule %d.\n\
+              Rule> IDX: %h START: %h END: %h\n\
+              #####################################################",
+              i ,addr_map_i[i].idx, addr_map_i[i].start_addr, addr_map_i[i].end_addr));
+        // check the SLV ids
+        check_idx : assume (addr_map_i[i].idx < NoIndices) else
+            $fatal(1, $sformatf("This rule has a IDX that is not allowed!!!\n\
+            Violating rule %d.\n\
+            Rule> IDX: %h START: %h END: %h\n\
+            Rule> MAX_IDX: %h\n\
+            #####################################################",
+            i, addr_map_i[i].idx, addr_map_i[i].start_addr, addr_map_i[i].end_addr,
+            (NoIndices-1)));
+        for (int unsigned j = i + 1; j < NoRules; j++) begin
+          // overlap check
+          check_overlap : assume (!((addr_map_i[j].start_addr < addr_map_i[i].end_addr) &&
+                                    (addr_map_i[j].end_addr > addr_map_i[i].start_addr)))   else
+               $warning($sformatf("Overlapping address region found!!!\n\
+              Rule %d: IDX: %h START: %h END: %h\n\
+              Rule %d: IDX: %h START: %h END: %h\n\
+              #####################################################",
+              i, addr_map_i[i].idx, addr_map_i[i].start_addr, addr_map_i[i].end_addr,
+              j, addr_map_i[j].idx, addr_map_i[j].start_addr, addr_map_i[j].end_addr));
+        end
+      end
+    end
+  end
+  // pragma translate_on
+  `endif
+  `endif
+endmodule
diff --git a/vendor/pulp-platform/common_cells/src/binary_to_gray.sv b/vendor/pulp-platform/common_cells/src/binary_to_gray.sv
new file mode 100644
index 0000000000..f4e4efd3b4
--- /dev/null
+++ b/vendor/pulp-platform/common_cells/src/binary_to_gray.sv
@@ -0,0 +1,22 @@
+// Copyright 2018 ETH Zurich and University of Bologna.
+//
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the "License"); you may not use this file except in
+// compliance with the License. You may obtain a copy of the License at
+// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+//
+// Fabian Schuiki <fschuiki@iis.ee.ethz.ch>
+
+/// A binary to gray code converter.
+module binary_to_gray #(
+    parameter int N = -1
+)(
+    input  logic [N-1:0] A,
+    output logic [N-1:0] Z
+);
+    assign Z = A ^ (A >> 1);
+endmodule
diff --git a/vendor/pulp-platform/common_cells/src/cb_filter.sv b/vendor/pulp-platform/common_cells/src/cb_filter.sv
new file mode 100644
index 0000000000..9fbc5269ed
--- /dev/null
+++ b/vendor/pulp-platform/common_cells/src/cb_filter.sv
@@ -0,0 +1,246 @@
+// Copyright (c) 2019 ETH Zurich and University of Bologna.
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the "License"); you may not use this file except in
+// compliance with the License.  You may obtain a copy of the License at
+// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+
+// Author: Wolfgang Roenninger <wroennin@ethz.ch>
+
+// `cb_filter`: This module implements a counting bloom filter with parameterizable hash functions.
+//
+// Functionality: A counting bloom filter is a data structure to efficiently implement
+//                set lookups. It does so by hashing its data inputs onto multiple pointers
+//                which serve as indicators for an array of buckets. For lookups can be
+//                false positives, but no false negatives.
+// - Seeding:     The pseudo random generators need seeds at elaboration time to generate
+//                different hashes. In principle any combination of seeds can be used.
+//                But one should look that the hash outputs give sufficient different patterns,
+//                such that the resulting collision rate is low. The package `cb_filter_pkg`
+//                contains the struct for seeding the PRG's in the hash functions.
+// - Lookup:
+//   - Ports:       `look_data_i`, `look_valid_o`
+//   - Description: Lookup combinational, `look_valid_o` is high, when `look_data_i` was
+//                  previously put into the filter.
+// - Increment:
+//   - Ports:       `incr_data_i`, `incr_valid_i`
+//   - Description: Put data into the counting bloom filter, when valid is high.
+// - Decrement:
+//   - Ports:       `decr_data_i`, `decr_valid_i`
+//   - Description: Remove data from the counting bloom filter. Only remove data that was
+//                  previously put in, otherwise will go in a wrong state.
+// - Status:
+//   - `filter_clear_i`:  Clears the filter and sets all counters to 0.
+//   - `filter_ussage_o`: How many data items are currently in the filter.
+//   - `filter_full_o`:   Filter is full, can no longer hold more items.
+//   - `filter_empty_o`:  Filter is empty.
+//   - `filter_error_o`:  One of the internal counters or buckets overflowed.
+
+/// This is a counting bloom filter
+module cb_filter #(
+  parameter int unsigned KHashes     =  32'd3,  // Number of hash functions
+  parameter int unsigned HashWidth   =  32'd4,  // Number of counters is 2**HashWidth
+  parameter int unsigned HashRounds  =  32'd1,  // Number of permutation substitution rounds
+  parameter int unsigned InpWidth    =  32'd32, // Input data width
+  parameter int unsigned BucketWidth =  32'd4,  // Width of Bucket counters
+  // the seeds used for seeding the PRG's inside each hash, one `cb_seed_t` per hash function.
+  parameter cb_filter_pkg::cb_seed_t [KHashes-1:0] Seeds = cb_filter_pkg::EgSeeds
+) (
+  input  logic                 clk_i,   // Clock
+  input  logic                 rst_ni,  // Active low reset
+  // data lookup
+  input  logic [InpWidth-1:0]  look_data_i,
+  output logic                 look_valid_o,
+  // data increment
+  input  logic [InpWidth-1:0]  incr_data_i,
+  input  logic                 incr_valid_i,
+  // data decrement
+  input  logic [InpWidth-1:0]  decr_data_i,
+  input  logic                 decr_valid_i,
+  // status signals
+  input  logic                 filter_clear_i,
+  output logic [HashWidth-1:0] filter_usage_o,
+  output logic                 filter_full_o,
+  output logic                 filter_empty_o,
+  output logic                 filter_error_o
+);
+
+  localparam int unsigned NoCounters  = 2**HashWidth;
+
+  // signal declarations
+  logic [NoCounters-1:0] look_ind; // hash function pointers
+  logic [NoCounters-1:0] incr_ind; // hash function pointers
+  logic [NoCounters-1:0] decr_ind; // hash function pointers
+  // bucket (counter signals)
+  logic [NoCounters-1:0] bucket_en;
+  logic [NoCounters-1:0] bucket_down;
+  logic [NoCounters-1:0] bucket_occupied;
+  logic [NoCounters-1:0] bucket_overflow;
+  logic [NoCounters-1:0] bucket_full;
+  logic [NoCounters-1:0] bucket_empty;
+  // membership lookup signals
+  logic [NoCounters-1:0] data_in_bucket;
+  // tot count signals (filter usage)
+  logic cnt_en;
+  logic cnt_down;
+  logic cnt_overflow;
+
+  // -----------------------------------------
+  // Lookup Hash - Membership Detection
+  // -----------------------------------------
+  hash_block #(
+    .NoHashes     ( KHashes      ),
+    .InpWidth     ( InpWidth     ),
+    .HashWidth    ( HashWidth    ),
+    .NoRounds     ( HashRounds   ),
+    .Seeds        ( Seeds        )
+  ) i_look_hashes (
+    .data_i       ( look_data_i  ),
+    .indicator_o  ( look_ind     )
+  );
+  assign data_in_bucket = look_ind & bucket_occupied;
+  assign look_valid_o   = (data_in_bucket == look_ind) ? 1'b1 : 1'b0;
+
+  // -----------------------------------------
+  // Increment Hash - Add Member to Set
+  // -----------------------------------------
+  hash_block #(
+    .NoHashes     ( KHashes      ),
+    .InpWidth     ( InpWidth     ),
+    .HashWidth    ( HashWidth    ),
+    .NoRounds     ( HashRounds   ),
+    .Seeds        ( Seeds        )
+  ) i_incr_hashes (
+    .data_i       ( incr_data_i  ),
+    .indicator_o  ( incr_ind     )
+  );
+
+  // -----------------------------------------
+  // Decrement Hash - Remove Member from Set
+  // -----------------------------------------
+  hash_block #(
+    .NoHashes     ( KHashes      ),
+    .InpWidth     ( InpWidth     ),
+    .HashWidth    ( HashWidth    ),
+    .NoRounds     ( HashRounds   ),
+    .Seeds        ( Seeds        )
+  ) i_decr_hashes (
+    .data_i       ( decr_data_i  ),
+    .indicator_o  ( decr_ind     )
+  );
+
+  // -----------------------------------------
+  // Control the incr/decr of buckets
+  // -----------------------------------------
+  assign bucket_down = decr_valid_i ? decr_ind : '0;
+
+  always_comb begin : proc_bucket_control
+    case ({incr_valid_i, decr_valid_i})
+      2'b00 : bucket_en = '0;
+      2'b10 : bucket_en = incr_ind;
+      2'b01 : bucket_en = decr_ind;
+      2'b11 : bucket_en = incr_ind ^ decr_ind;
+      default: bucket_en = '0; // unreachable
+    endcase
+  end
+
+  // -----------------------------------------
+  // Counters
+  // -----------------------------------------
+  for (genvar i = 0; i < NoCounters; i++) begin : gen_buckets
+    logic [BucketWidth-1:0] bucket_content;
+    counter #(
+      .WIDTH( BucketWidth )
+    ) i_bucket (
+      .clk_i      ( clk_i             ),
+      .rst_ni     ( rst_ni            ),
+      .clear_i    ( filter_clear_i    ),
+      .en_i       ( bucket_en[i]      ),
+      .load_i     ( '0                ),
+      .down_i     ( bucket_down[i]    ),
+      .d_i        ( '0                ),
+      .q_o        ( bucket_content    ),
+      .overflow_o ( bucket_overflow[i])
+    );
+    assign bucket_full[i]     =  bucket_overflow[i] | (&bucket_content);
+    assign bucket_occupied[i] = |bucket_content;
+    assign bucket_empty[i]    = ~bucket_occupied[i];
+  end
+
+  // -----------------------------------------
+  // Filter tot item counter
+  // -----------------------------------------
+  assign cnt_en   = incr_valid_i ^ decr_valid_i;
+  assign cnt_down = decr_valid_i;
+  counter #(
+    .WIDTH ( HashWidth )
+  ) i_tot_count (
+    .clk_i     ( clk_i          ),
+    .rst_ni    ( rst_ni         ),
+    .clear_i   ( filter_clear_i ),
+    .en_i      ( cnt_en         ),
+    .load_i    ( '0             ),
+    .down_i    ( cnt_down       ),
+    .d_i       ( '0             ),
+    .q_o       ( filter_usage_o ),
+    .overflow_o( cnt_overflow   )
+  );
+
+  // -----------------------------------------
+  // Filter Output Flags
+  // -----------------------------------------
+  assign filter_full_o  = |bucket_full;
+  assign filter_empty_o = &bucket_empty;
+  assign filter_error_o = |bucket_overflow | cnt_overflow;
+endmodule
+
+// gives out the or 'onehots' of all hash functions
+module hash_block #(
+  parameter int unsigned NoHashes                         = 32'd3,
+  parameter int unsigned InpWidth                         = 32'd11,
+  parameter int unsigned HashWidth                        = 32'd5,
+  parameter int unsigned NoRounds                         = 32'd1,
+  parameter cb_filter_pkg::cb_seed_t [NoHashes-1:0] Seeds = cb_filter_pkg::EgSeeds
+) (
+  input  logic [InpWidth-1:0]     data_i,
+  output logic [2**HashWidth-1:0] indicator_o
+);
+
+  logic [NoHashes-1:0][2**HashWidth-1:0] hashes;
+
+  for (genvar i = 0; i < NoHashes; i++) begin : gen_hashes
+    sub_per_hash #(
+      .InpWidth   ( InpWidth             ),
+      .HashWidth  ( HashWidth            ),
+      .NoRounds   ( NoRounds             ),
+      .PermuteKey ( Seeds[i].PermuteSeed ),
+      .XorKey     ( Seeds[i].XorSeed     )
+    ) i_hash (
+      .data_i        ( data_i    ),
+      .hash_o        (           ), // not used, because we want the onehot
+      .hash_onehot_o ( hashes[i] )
+    );
+  end
+
+  // output assignment
+  always_comb begin : proc_hash_or
+    indicator_o = '0;
+    for (int unsigned i = 0; i < (2**HashWidth); i++) begin
+      for (int unsigned j = 0; j < NoHashes; j++) begin
+        indicator_o[i] = indicator_o[i] | hashes[j][i];
+      end
+    end
+  end
+
+  // assertions
+  // pragma translate_off
+  initial begin
+    hash_conf: assume (InpWidth > HashWidth) else
+      $fatal(1, "%m:\nA Hash Function reduces the width of the input>\nInpWidth: %s\nOUT_WIDTH: %s",
+          InpWidth, HashWidth);
+  end
+  // pragma translate_on
+endmodule
diff --git a/vendor/pulp-platform/common_cells/src/cb_filter_pkg.sv b/vendor/pulp-platform/common_cells/src/cb_filter_pkg.sv
new file mode 100644
index 0000000000..97334475e8
--- /dev/null
+++ b/vendor/pulp-platform/common_cells/src/cb_filter_pkg.sv
@@ -0,0 +1,26 @@
+// Copyright (c) 2019 ETH Zurich and University of Bologna.
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the "License"); you may not use this file except in
+// compliance with the License.  You may obtain a copy of the License at
+// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+
+// Author: Wolfgang Roenninger <wroennin@ethz.ch>
+
+/// Package with the struct definition for the seeds and an example.
+package cb_filter_pkg;
+  typedef struct packed {
+    int unsigned PermuteSeed;
+    int unsigned XorSeed;
+  } cb_seed_t;
+
+  // example seeding struct
+  localparam cb_seed_t [2:0] EgSeeds = '{
+    '{PermuteSeed: 32'd299034753, XorSeed: 32'd4094834  },
+    '{PermuteSeed: 32'd19921030,  XorSeed: 32'd995713   },
+    '{PermuteSeed: 32'd294388,    XorSeed: 32'd65146511 }
+  };
+endpackage
diff --git a/vendor/pulp-platform/common_cells/src/cc_onehot.sv b/vendor/pulp-platform/common_cells/src/cc_onehot.sv
new file mode 100644
index 0000000000..69cdf86756
--- /dev/null
+++ b/vendor/pulp-platform/common_cells/src/cc_onehot.sv
@@ -0,0 +1,50 @@
+// Copyright 2021 ETH Zurich.
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the "License"); you may not use this file except in
+// compliance with the License. You may obtain a copy of the License at
+// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+
+/// Hardware implementation of SystemVerilog's `$onehot()` function.
+/// It uses a tree of half adders and a separate
+/// or reduction tree for the carry.
+
+// Author: Florian Zaruba <zarubaf@iis.ee.ethz.ch>
+// Author: Fabian Schuiki <fschuiki@iis.ee.ethz.ch>
+// Author: Stefan Mach <smach@iis.ee.ethz.ch>
+module cc_onehot #(
+  parameter int unsigned Width = 4
+) (
+  input  logic [Width-1:0] d_i,
+  output logic is_onehot_o
+);
+  // trivial base case
+  if (Width == 1) begin : gen_degenerated_onehot
+    assign is_onehot_o = d_i;
+  end else begin : gen_onehot
+    localparam int LVLS = $clog2(Width) + 1;
+
+    logic [LVLS-1:0][2**(LVLS-1)-1:0] sum, carry;
+    logic [LVLS-2:0] carry_array;
+
+    // Extend to a power of two.
+    assign sum[0] = $unsigned(d_i);
+
+    // generate half adders for each lvl
+    // lvl 0 is the input level
+    for (genvar i = 1; i < LVLS; i++) begin : gen_lvl
+      localparam int unsigned LVLWidth = 2**LVLS / 2**i;
+      for (genvar j = 0; j < LVLWidth; j+=2) begin : gen_width
+        assign sum[i][j/2] = sum[i-1][j] ^ sum[i-1][j+1];
+        assign carry[i][j/2] = sum[i-1][j] & sum[i-1][j+1];
+      end
+      // generate carry tree
+      assign carry_array[i-1] = |carry[i][LVLWidth/2-1:0];
+    end
+    assign is_onehot_o = sum[LVLS-1][0] & ~|carry_array;
+  end
+
+endmodule
diff --git a/vendor/pulp-platform/common_cells/src/cdc_2phase.sv b/vendor/pulp-platform/common_cells/src/cdc_2phase.sv
new file mode 100644
index 0000000000..8e770abfa1
--- /dev/null
+++ b/vendor/pulp-platform/common_cells/src/cdc_2phase.sv
@@ -0,0 +1,175 @@
+// Copyright 2018 ETH Zurich and University of Bologna.
+//
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the "License"); you may not use this file except in
+// compliance with the License. You may obtain a copy of the License at
+// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+//
+// Fabian Schuiki <fschuiki@iis.ee.ethz.ch>
+
+/// A two-phase clock domain crossing.
+///
+/// CONSTRAINT: Requires max_delay of min_period(src_clk_i, dst_clk_i) through
+/// the paths async_req, async_ack, async_data.
+/* verilator lint_off DECLFILENAME */
+module cdc_2phase #(
+  parameter type T = logic
+)(
+  input  logic src_rst_ni,
+  input  logic src_clk_i,
+  input  T     src_data_i,
+  input  logic src_valid_i,
+  output logic src_ready_o,
+
+  input  logic dst_rst_ni,
+  input  logic dst_clk_i,
+  output T     dst_data_o,
+  output logic dst_valid_o,
+  input  logic dst_ready_i
+);
+
+  // Asynchronous handshake signals.
+  (* dont_touch = "true" *) logic async_req;
+  (* dont_touch = "true" *) logic async_ack;
+  (* dont_touch = "true" *) T async_data;
+
+  // The sender in the source domain.
+  cdc_2phase_src #(.T(T)) i_src (
+    .rst_ni       ( src_rst_ni  ),
+    .clk_i        ( src_clk_i   ),
+    .data_i       ( src_data_i  ),
+    .valid_i      ( src_valid_i ),
+    .ready_o      ( src_ready_o ),
+    .async_req_o  ( async_req   ),
+    .async_ack_i  ( async_ack   ),
+    .async_data_o ( async_data  )
+  );
+
+  // The receiver in the destination domain.
+  cdc_2phase_dst #(.T(T)) i_dst (
+    .rst_ni       ( dst_rst_ni  ),
+    .clk_i        ( dst_clk_i   ),
+    .data_o       ( dst_data_o  ),
+    .valid_o      ( dst_valid_o ),
+    .ready_i      ( dst_ready_i ),
+    .async_req_i  ( async_req   ),
+    .async_ack_o  ( async_ack   ),
+    .async_data_i ( async_data  )
+  );
+
+endmodule
+
+
+/// Half of the two-phase clock domain crossing located in the source domain.
+module cdc_2phase_src #(
+  parameter type T = logic
+)(
+  input  logic rst_ni,
+  input  logic clk_i,
+  input  T     data_i,
+  input  logic valid_i,
+  output logic ready_o,
+  output logic async_req_o,
+  input  logic async_ack_i,
+  output T     async_data_o
+);
+
+  (* dont_touch = "true" *)
+  logic req_src_q, ack_src_q, ack_q;
+  (* dont_touch = "true" *)
+  T data_src_q;
+
+  // The req_src and data_src registers change when a new data item is accepted.
+  always_ff @(posedge clk_i or negedge rst_ni) begin
+    if (!rst_ni) begin
+      req_src_q  <= 0;
+      data_src_q <= '0;
+    end else if (valid_i && ready_o) begin
+      req_src_q  <= ~req_src_q;
+      data_src_q <= data_i;
+    end
+  end
+
+  // The ack_src and ack registers act as synchronization stages.
+  always_ff @(posedge clk_i or negedge rst_ni) begin
+    if (!rst_ni) begin
+      ack_src_q <= 0;
+      ack_q     <= 0;
+    end else begin
+      ack_src_q <= async_ack_i;
+      ack_q     <= ack_src_q;
+    end
+  end
+
+  // Output assignments.
+  assign ready_o = (req_src_q == ack_q);
+  assign async_req_o = req_src_q;
+  assign async_data_o = data_src_q;
+
+endmodule
+
+
+/// Half of the two-phase clock domain crossing located in the destination
+/// domain.
+module cdc_2phase_dst #(
+  parameter type T = logic
+)(
+  input  logic rst_ni,
+  input  logic clk_i,
+  output T     data_o,
+  output logic valid_o,
+  input  logic ready_i,
+  input  logic async_req_i,
+  output logic async_ack_o,
+  input  T     async_data_i
+);
+
+  (* dont_touch = "true" *)
+  (* async_reg = "true" *)
+  logic req_dst_q, req_q0, req_q1, ack_dst_q;
+  (* dont_touch = "true" *)
+  T data_dst_q;
+
+  // The ack_dst register changes when a new data item is accepted.
+  always_ff @(posedge clk_i or negedge rst_ni) begin
+    if (!rst_ni) begin
+      ack_dst_q  <= 0;
+    end else if (valid_o && ready_i) begin
+      ack_dst_q  <= ~ack_dst_q;
+    end
+  end
+
+  // The data_dst register changes when a new data item is presented. This is
+  // indicated by the async_req line changing levels.
+  always_ff @(posedge clk_i or negedge rst_ni) begin
+    if (!rst_ni) begin
+      data_dst_q <= '0;
+    end else if (req_q0 != req_q1 && !valid_o) begin
+      data_dst_q <= async_data_i;
+    end
+  end
+
+  // The req_dst and req registers act as synchronization stages.
+  always_ff @(posedge clk_i or negedge rst_ni) begin
+    if (!rst_ni) begin
+      req_dst_q <= 0;
+      req_q0    <= 0;
+      req_q1    <= 0;
+    end else begin
+      req_dst_q <= async_req_i;
+      req_q0    <= req_dst_q;
+      req_q1    <= req_q0;
+    end
+  end
+
+  // Output assignments.
+  assign valid_o = (ack_dst_q != req_q1);
+  assign data_o = data_dst_q;
+  assign async_ack_o = ack_dst_q;
+
+endmodule
+/* verilator lint_on DECLFILENAME */
diff --git a/vendor/pulp-platform/common_cells/src/cdc_fifo_2phase.sv b/vendor/pulp-platform/common_cells/src/cdc_fifo_2phase.sv
new file mode 100644
index 0000000000..acbb7b0a70
--- /dev/null
+++ b/vendor/pulp-platform/common_cells/src/cdc_fifo_2phase.sv
@@ -0,0 +1,134 @@
+// Copyright 2018 ETH Zurich and University of Bologna.
+//
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the "License"); you may not use this file except in
+// compliance with the License. You may obtain a copy of the License at
+// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+//
+// Fabian Schuiki <fschuiki@iis.ee.ethz.ch>
+
+/// A clock domain crossing FIFO, using 2-phase hand shakes.
+///
+/// This FIFO has its push and pop ports in two separate clock domains. Its size
+/// can only be powers of two, which is why its depth is given as 2**LOG_DEPTH.
+/// LOG_DEPTH must be at least 1.
+///
+/// CONSTRAINT: See the constraints for `cdc_2phase`. An additional maximum
+/// delay path needs to be specified from fifo_data_q to dst_data_o.
+module cdc_fifo_2phase #(
+  /// The data type of the payload transported by the FIFO.
+  parameter type T = logic,
+  /// The FIFO's depth given as 2**LOG_DEPTH.
+  parameter int LOG_DEPTH = 3
+)(
+  input  logic src_rst_ni,
+  input  logic src_clk_i,
+  input  T     src_data_i,
+  input  logic src_valid_i,
+  output logic src_ready_o,
+
+  input  logic dst_rst_ni,
+  input  logic dst_clk_i,
+  output T     dst_data_o,
+  output logic dst_valid_o,
+  input  logic dst_ready_i
+);
+
+  // Check the invariants.
+  //pragma translate_off
+  initial begin
+    assert(LOG_DEPTH > 0);
+  end
+  //pragma translate_on
+
+  localparam int PtrWidth = LOG_DEPTH+1;
+  typedef logic [PtrWidth-1:0] pointer_t;
+  typedef logic [LOG_DEPTH-1:0] index_t;
+
+  localparam pointer_t PtrFull  = (1 << LOG_DEPTH);
+  localparam pointer_t PtrEmpty = '0;
+
+  // Allocate the registers for the FIFO memory with its separate write and read
+  // ports. The FIFO has the following ports:
+  //
+  // - write: fifo_widx, fifo_wdata, fifo_write, src_clk_i
+  // - read: fifo_ridx, fifo_rdata
+  index_t fifo_widx, fifo_ridx;
+  logic fifo_write;
+  T fifo_wdata, fifo_rdata;
+  T fifo_data_q [2**LOG_DEPTH];
+
+  assign fifo_rdata = fifo_data_q[fifo_ridx];
+
+  for (genvar i = 0; i < 2**LOG_DEPTH; i++) begin : g_word
+    always_ff @(posedge src_clk_i, negedge src_rst_ni) begin
+      if (!src_rst_ni)
+        fifo_data_q[i] <= '0;
+      else if (fifo_write && fifo_widx == i)
+        fifo_data_q[i] <= fifo_wdata;
+    end
+  end
+
+  // Allocate the read and write pointers in the source and destination domain.
+  pointer_t src_wptr_q, dst_wptr, src_rptr, dst_rptr_q;
+
+  always_ff @(posedge src_clk_i, negedge src_rst_ni) begin
+    if (!src_rst_ni)
+      src_wptr_q <= 0;
+    else if (src_valid_i && src_ready_o)
+      src_wptr_q <= src_wptr_q + 1;
+  end
+
+  always_ff @(posedge dst_clk_i, negedge dst_rst_ni) begin
+    if (!dst_rst_ni)
+      dst_rptr_q <= 0;
+    else if (dst_valid_o && dst_ready_i)
+      dst_rptr_q <= dst_rptr_q + 1;
+  end
+
+  // The pointers into the FIFO are one bit wider than the actual address into
+  // the FIFO. This makes detecting critical states very simple: if all but the
+  // topmost bit of rptr and wptr agree, the FIFO is in a critical state. If the
+  // topmost bit is equal, the FIFO is empty, otherwise it is full.
+  assign src_ready_o = ((src_wptr_q ^ src_rptr) != PtrFull);
+  assign dst_valid_o = ((dst_rptr_q ^ dst_wptr) != PtrEmpty);
+
+  // Transport the read and write pointers across the clock domain boundary.
+  cdc_2phase #( .T(pointer_t) ) i_cdc_wptr (
+    .src_rst_ni  ( src_rst_ni ),
+    .src_clk_i   ( src_clk_i  ),
+    .src_data_i  ( src_wptr_q ),
+    .src_valid_i ( 1'b1       ),
+    .src_ready_o (            ),
+    .dst_rst_ni  ( dst_rst_ni ),
+    .dst_clk_i   ( dst_clk_i  ),
+    .dst_data_o  ( dst_wptr   ),
+    .dst_valid_o (            ),
+    .dst_ready_i ( 1'b1       )
+  );
+
+  cdc_2phase #( .T(pointer_t) ) i_cdc_rptr (
+    .src_rst_ni  ( dst_rst_ni ),
+    .src_clk_i   ( dst_clk_i  ),
+    .src_data_i  ( dst_rptr_q ),
+    .src_valid_i ( 1'b1       ),
+    .src_ready_o (            ),
+    .dst_rst_ni  ( src_rst_ni ),
+    .dst_clk_i   ( src_clk_i  ),
+    .dst_data_o  ( src_rptr   ),
+    .dst_valid_o (            ),
+    .dst_ready_i ( 1'b1       )
+  );
+
+  // Drive the FIFO write and read ports.
+  assign fifo_widx  = src_wptr_q;
+  assign fifo_wdata = src_data_i;
+  assign fifo_write = src_valid_i && src_ready_o;
+  assign fifo_ridx  = dst_rptr_q;
+  assign dst_data_o = fifo_rdata;
+
+endmodule
diff --git a/vendor/pulp-platform/common_cells/src/cdc_fifo_gray.sv b/vendor/pulp-platform/common_cells/src/cdc_fifo_gray.sv
new file mode 100644
index 0000000000..802f29545b
--- /dev/null
+++ b/vendor/pulp-platform/common_cells/src/cdc_fifo_gray.sv
@@ -0,0 +1,269 @@
+// Copyright 2018-2019 ETH Zurich and University of Bologna.
+//
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the "License"); you may not use this file except in
+// compliance with the License. You may obtain a copy of the License at
+// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+//
+// Fabian Schuiki <fschuiki@iis.ee.ethz.ch>
+// Florian Zaruba <zarubaf@iis.ee.ethz.ch>
+
+/// A clock domain crossing FIFO, using gray counters.
+///
+/// # Architecture
+///
+/// The design is split into two parts, each one being clocked and reset
+/// separately.
+/// 1. The data to be transferred  over the clock domain boundary is
+///    is stored in a FIFO. The corresponding write pointer is managed
+///    (incremented) in the source clock domain.
+/// 2. The entire FIFO content is exposed over the `async_data` port.
+///    The destination clock domain increments its read pointer
+///    in its destination clock domain.
+///
+/// Read and write pointers are then gray coded, communicated
+/// and synchronized using a classic multi-stage FF synchronizer
+/// in the other clock domain. The gray coding ensures that only
+/// one bit changes at each pointer increment, preventing the
+/// synchronizer to accidentally latch an inconsistent state
+/// on a multi-bit bus.
+///
+/// The not full signal e.g. `src_ready_o` (on the sending side)
+/// is generated using the local write pointer and the pessimistic
+/// read pointer from the destination clock domain (pessimistic
+/// because it is delayed at least two cycles because of the synchronizer
+/// stages). This prevents the FIFO from overflowing.
+///
+/// The not empty signal e.g. `dst_valid_o` is generated using
+/// the pessimistic write pointer and the local read pointer in
+/// the destination clock domain. This means the FIFO content
+/// does not need to be synchronized as we are sure we are reading
+/// data which has been written at least two cycles earlier.
+/// Furthermore, the read select logic into the FIFO is completely
+/// clocked by the destination clock domain which avoids
+/// inefficient data synchronization.
+///
+/// The FIFO size must be powers of two, which is why its depth is
+/// given as 2**LOG_DEPTH. LOG_DEPTH must be at least 1.
+///
+/// # Constraints
+///
+/// We need to make sure that the propagation delay of the
+/// data, read and write pointer is bound to the minimum of
+/// either the sending or receiving clock period to prevent
+/// an inconsistent state to be latched (if for example the one
+/// bit of the read/write pointer have an excessive delay).
+/// Furthermore, we should deactivate setup and hold checks on
+/// the asynchronous signals.
+///
+/// ```
+/// set_ungroup [get_designs cdc_fifo_gray*] false
+/// set_boundary_optimization [get_designs cdc_fifo_gray*] false
+/// set_max_delay min(T_src, T_dst) \
+///     -through [get_pins -hierarchical -filter async] \
+///     -through [get_pins -hierarchical -filter async]
+/// set_false_path -hold \
+///     -through [get_pins -hierarchical -filter async] \
+///     -through [get_pins -hierarchical -filter async]
+/// ```
+
+`include "common_cells/registers.svh"
+
+(* no_ungroup *)
+(* no_boundary_optimization *)
+module cdc_fifo_gray #(
+  /// The width of the default logic type.
+  parameter int unsigned WIDTH = 1,
+  /// The data type of the payload transported by the FIFO.
+  parameter type T = logic [WIDTH-1:0],
+  /// The FIFO's depth given as 2**LOG_DEPTH.
+  parameter int LOG_DEPTH = 3,
+  /// The number of synchronization registers to insert on the async pointers.
+  parameter int SYNC_STAGES = 2
+) (
+  input  logic src_rst_ni,
+  input  logic src_clk_i,
+  input  T     src_data_i,
+  input  logic src_valid_i,
+  output logic src_ready_o,
+
+  input  logic dst_rst_ni,
+  input  logic dst_clk_i,
+  output T     dst_data_o,
+  output logic dst_valid_o,
+  input  logic dst_ready_i
+);
+
+  T [2**LOG_DEPTH-1:0] async_data;
+  logic [LOG_DEPTH:0]  async_wptr;
+  logic [LOG_DEPTH:0]  async_rptr;
+
+  cdc_fifo_gray_src #(
+    .T         ( T         ),
+    .LOG_DEPTH ( LOG_DEPTH )
+  ) i_src (
+    .src_rst_ni,
+    .src_clk_i,
+    .src_data_i,
+    .src_valid_i,
+    .src_ready_o,
+
+    (* async *) .async_data_o ( async_data ),
+    (* async *) .async_wptr_o ( async_wptr ),
+    (* async *) .async_rptr_i ( async_rptr )
+  );
+
+  cdc_fifo_gray_dst #(
+    .T         ( T         ),
+    .LOG_DEPTH ( LOG_DEPTH )
+  ) i_dst (
+    .dst_rst_ni,
+    .dst_clk_i,
+    .dst_data_o,
+    .dst_valid_o,
+    .dst_ready_i,
+
+    (* async *) .async_data_i ( async_data ),
+    (* async *) .async_wptr_i ( async_wptr ),
+    (* async *) .async_rptr_o ( async_rptr )
+  );
+
+  // Check the invariants.
+  // pragma translate_off
+  `ifndef VERILATOR
+  initial assert(LOG_DEPTH > 0);
+  initial assert(SYNC_STAGES >= 2);
+  `endif
+  // pragma translate_on
+
+endmodule
+
+
+(* no_ungroup *)
+(* no_boundary_optimization *)
+module cdc_fifo_gray_src #(
+  parameter type T = logic,
+  parameter int LOG_DEPTH = 3,
+  parameter int SYNC_STAGES = 2
+)(
+  input  logic src_rst_ni,
+  input  logic src_clk_i,
+  input  T     src_data_i,
+  input  logic src_valid_i,
+  output logic src_ready_o,
+
+  output T [2**LOG_DEPTH-1:0] async_data_o,
+  output logic [LOG_DEPTH:0]  async_wptr_o,
+  input  logic [LOG_DEPTH:0]  async_rptr_i
+);
+
+  localparam int PtrWidth = LOG_DEPTH+1;
+  localparam logic [PtrWidth-1:0] PtrFull = (1 << LOG_DEPTH);
+
+  T [2**LOG_DEPTH-1:0] data_q;
+  logic [PtrWidth-1:0] wptr_q, wptr_d, wptr_bin, wptr_next, rptr, rptr_bin;
+
+  // Data FIFO.
+  assign async_data_o = data_q;
+  for (genvar i = 0; i < 2**LOG_DEPTH; i++) begin : gen_word
+    `FFLNR(data_q[i], src_data_i,
+          src_valid_i & src_ready_o & (wptr_bin[LOG_DEPTH-1:0] == i), src_clk_i)
+  end
+
+  // Read pointer.
+  for (genvar i = 0; i < PtrWidth; i++) begin : gen_sync
+    sync #(.STAGES(SYNC_STAGES)) i_sync (
+      .clk_i    ( src_clk_i       ),
+      .rst_ni   ( src_rst_ni      ),
+      .serial_i ( async_rptr_i[i] ),
+      .serial_o ( rptr[i]         )
+    );
+  end
+  gray_to_binary #(PtrWidth) i_rptr_g2b (.A(rptr), .Z(rptr_bin));
+
+  // Write pointer.
+  assign wptr_next = wptr_bin+1;
+  gray_to_binary #(PtrWidth) i_wptr_g2b (.A(wptr_q), .Z(wptr_bin));
+  binary_to_gray #(PtrWidth) i_wptr_b2g (.A(wptr_next), .Z(wptr_d));
+  `FFLARN(wptr_q, wptr_d, src_valid_i & src_ready_o, '0, src_clk_i, src_rst_ni)
+  assign async_wptr_o = wptr_q;
+
+  // The pointers into the FIFO are one bit wider than the actual address into
+  // the FIFO. This makes detecting critical states very simple: if all but the
+  // topmost bit of rptr and wptr agree, the FIFO is in a critical state. If the
+  // topmost bit is equal, the FIFO is empty, otherwise it is full.
+  assign src_ready_o = ((wptr_bin ^ rptr_bin) != PtrFull);
+
+endmodule
+
+
+(* no_ungroup *)
+(* no_boundary_optimization *)
+module cdc_fifo_gray_dst #(
+  parameter type T = logic,
+  parameter int LOG_DEPTH = 3,
+  parameter int SYNC_STAGES = 2
+)(
+  input  logic dst_rst_ni,
+  input  logic dst_clk_i,
+  output T     dst_data_o,
+  output logic dst_valid_o,
+  input  logic dst_ready_i,
+
+  input  T [2**LOG_DEPTH-1:0] async_data_i,
+  input  logic [LOG_DEPTH:0]  async_wptr_i,
+  output logic [LOG_DEPTH:0]  async_rptr_o
+);
+
+  localparam int PtrWidth = LOG_DEPTH+1;
+  localparam logic [PtrWidth-1:0] PtrEmpty = '0;
+
+  T dst_data;
+  logic [PtrWidth-1:0] rptr_q, rptr_d, rptr_bin, rptr_bin_d, rptr_next, wptr, wptr_bin;
+  logic dst_valid, dst_ready;
+  // Data selector and register.
+  assign dst_data = async_data_i[rptr_bin[LOG_DEPTH-1:0]];
+
+  // Read pointer.
+  assign rptr_next = rptr_bin+1;
+  gray_to_binary #(PtrWidth) i_rptr_g2b (.A(rptr_q), .Z(rptr_bin));
+  binary_to_gray #(PtrWidth) i_rptr_b2g (.A(rptr_next), .Z(rptr_d));
+  `FFLARN(rptr_q, rptr_d, dst_valid & dst_ready, '0, dst_clk_i, dst_rst_ni)
+  assign async_rptr_o = rptr_q;
+
+  // Write pointer.
+  for (genvar i = 0; i < PtrWidth; i++) begin : gen_sync
+    sync #(.STAGES(SYNC_STAGES)) i_sync (
+      .clk_i    ( dst_clk_i       ),
+      .rst_ni   ( dst_rst_ni      ),
+      .serial_i ( async_wptr_i[i] ),
+      .serial_o ( wptr[i]         )
+    );
+  end
+  gray_to_binary #(PtrWidth) i_wptr_g2b (.A(wptr), .Z(wptr_bin));
+
+  // The pointers into the FIFO are one bit wider than the actual address into
+  // the FIFO. This makes detecting critical states very simple: if all but the
+  // topmost bit of rptr and wptr agree, the FIFO is in a critical state. If the
+  // topmost bit is equal, the FIFO is empty, otherwise it is full.
+  assign dst_valid = ((wptr_bin ^ rptr_bin) != PtrEmpty);
+
+  // Cut the combinatorial path with a spill register.
+  spill_register #(
+    .T       ( T           )
+  ) i_spill_register (
+    .clk_i   ( dst_clk_i   ),
+    .rst_ni  ( dst_rst_ni  ),
+    .valid_i ( dst_valid   ),
+    .ready_o ( dst_ready   ),
+    .data_i  ( dst_data    ),
+    .valid_o ( dst_valid_o ),
+    .ready_i ( dst_ready_i ),
+    .data_o  ( dst_data_o  )
+  );
+
+endmodule
diff --git a/vendor/pulp-platform/common_cells/src/cf_math_pkg.sv b/vendor/pulp-platform/common_cells/src/cf_math_pkg.sv
new file mode 100644
index 0000000000..9f35a44e98
--- /dev/null
+++ b/vendor/pulp-platform/common_cells/src/cf_math_pkg.sv
@@ -0,0 +1,61 @@
+// Copyright 2016 ETH Zurich and University of Bologna.
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the "License"); you may not use this file except in
+// compliance with the License.  You may obtain a copy of the License at
+// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+
+/// cf_math_pkg: Constant Function Implementations of Mathematical Functions for HDL Elaboration
+///
+/// This package contains a collection of mathematical functions that are commonly used when defining
+/// the value of constants in HDL code.  These functions are implemented as Verilog constants
+/// functions.  Introduced in Verilog 2001 (IEEE Std 1364-2001), a constant function (§ 10.3.5) is a
+/// function whose value can be evaluated at compile time or during elaboration.  A constant function
+/// must be called with arguments that are constants.
+package cf_math_pkg;
+
+    /// Ceiled Division of Two Natural Numbers
+    ///
+    /// Returns the quotient of two natural numbers, rounded towards plus infinity.
+    function automatic integer ceil_div (input longint dividend, input longint divisor);
+        automatic longint remainder;
+
+        // pragma translate_off
+        `ifndef VERILATOR
+        if (dividend < 0) begin
+            $fatal(1, "Dividend %0d is not a natural number!", dividend);
+        end
+
+        if (divisor < 0) begin
+            $fatal(1, "Divisor %0d is not a natural number!", divisor);
+        end
+
+        if (divisor == 0) begin
+            $fatal(1, "Division by zero!");
+        end
+        `endif
+        // pragma translate_on
+
+        remainder = dividend;
+        for (ceil_div = 0; remainder > 0; ceil_div++) begin
+            remainder = remainder - divisor;
+        end
+    endfunction
+
+    /// Index width required to be able to represent up to `num_idx` indices as a binary
+    /// encoded signal.
+    /// Ensures that the minimum width if an index signal is `1`, regardless of parametrization.
+    ///
+    /// Sample usage in type definition:
+    /// As parameter:
+    ///   `parameter type idx_t = logic[cf_math_pkg::idx_width(NumIdx)-1:0]`
+    /// As typedef:
+    ///   `typedef logic [cf_math_pkg::idx_width(NumIdx)-1:0] idx_t`
+    function automatic integer unsigned idx_width (input integer unsigned num_idx);
+        return (num_idx > 32'd1) ? unsigned'($clog2(num_idx)) : 32'd1;
+    endfunction
+
+endpackage
diff --git a/vendor/pulp-platform/common_cells/src/clk_div.sv b/vendor/pulp-platform/common_cells/src/clk_div.sv
new file mode 100644
index 0000000000..b1df809f74
--- /dev/null
+++ b/vendor/pulp-platform/common_cells/src/clk_div.sv
@@ -0,0 +1,42 @@
+// Copyright 2018 ETH Zurich and University of Bologna.
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the "License"); you may not use this file except in
+// compliance with the License. You may obtain a copy of the License at
+// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+
+// Author: Florian Zaruba
+// Description: Divides the clock by an integer factor
+module clk_div #(
+    parameter int unsigned RATIO = 4
+)(
+    input  logic clk_i,      // Clock
+    input  logic rst_ni,     // Asynchronous reset active low
+    input  logic testmode_i, // testmode
+    input  logic en_i,       // enable clock divider
+    output logic clk_o       // divided clock out
+);
+    logic [RATIO-1:0] counter_q;
+    logic clk_q;
+
+    always_ff @(posedge clk_i or negedge rst_ni) begin
+        if (~rst_ni) begin
+            clk_q       <= 1'b0;
+            counter_q <= '0;
+        end else begin
+            clk_q <= 1'b0;
+            if (en_i) begin
+                if (counter_q == (RATIO[RATIO-1:0] - 1)) begin
+                    clk_q <= 1'b1;
+                end else begin
+                    counter_q <= counter_q + 1;
+                end
+            end
+        end
+    end
+    // output assignment - bypass in testmode
+    assign clk_o = testmode_i ? clk_i : clk_q;
+endmodule
diff --git a/vendor/pulp-platform/common_cells/src/counter.sv b/vendor/pulp-platform/common_cells/src/counter.sv
new file mode 100644
index 0000000000..43392e4bfd
--- /dev/null
+++ b/vendor/pulp-platform/common_cells/src/counter.sv
@@ -0,0 +1,43 @@
+// Copyright 2018 ETH Zurich and University of Bologna.
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the "License"); you may not use this file except in
+// compliance with the License. You may obtain a copy of the License at
+// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+
+// Author: Florian Zaruba
+// Description: Generic up/down counter
+
+module counter #(
+    parameter int unsigned WIDTH = 4,
+    parameter bit STICKY_OVERFLOW = 1'b0
+)(
+    input  logic             clk_i,
+    input  logic             rst_ni,
+    input  logic             clear_i, // synchronous clear
+    input  logic             en_i,    // enable the counter
+    input  logic             load_i,  // load a new value
+    input  logic             down_i,  // downcount, default is up
+    input  logic [WIDTH-1:0] d_i,
+    output logic [WIDTH-1:0] q_o,
+    output logic             overflow_o
+);
+    delta_counter #(
+        .WIDTH          (WIDTH),
+        .STICKY_OVERFLOW (STICKY_OVERFLOW)
+    ) i_counter (
+        .clk_i,
+        .rst_ni,
+        .clear_i,
+        .en_i,
+        .load_i,
+        .down_i,
+        .delta_i({{WIDTH-1{1'b0}}, 1'b1}),
+        .d_i,
+        .q_o,
+        .overflow_o
+    );
+endmodule
diff --git a/vendor/pulp-platform/common_cells/src/delta_counter.sv b/vendor/pulp-platform/common_cells/src/delta_counter.sv
new file mode 100644
index 0000000000..90b5cffa9a
--- /dev/null
+++ b/vendor/pulp-platform/common_cells/src/delta_counter.sv
@@ -0,0 +1,74 @@
+// Copyright 2018 ETH Zurich and University of Bologna.
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the "License"); you may not use this file except in
+// compliance with the License. You may obtain a copy of the License at
+// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+
+// Up/down counter with variable delta
+
+module delta_counter #(
+    parameter int unsigned WIDTH = 4,
+    parameter bit STICKY_OVERFLOW = 1'b0
+)(
+    input  logic             clk_i,
+    input  logic             rst_ni,
+    input  logic             clear_i, // synchronous clear
+    input  logic             en_i,    // enable the counter
+    input  logic             load_i,  // load a new value
+    input  logic             down_i,  // downcount, default is up
+    input  logic [WIDTH-1:0] delta_i,
+    input  logic [WIDTH-1:0] d_i,
+    output logic [WIDTH-1:0] q_o,
+    output logic             overflow_o
+);
+    logic [WIDTH:0] counter_q, counter_d;
+    if (STICKY_OVERFLOW) begin : gen_sticky_overflow
+        logic overflow_d, overflow_q;
+        always_ff @(posedge clk_i or negedge rst_ni) overflow_q <= ~rst_ni ? 1'b0 : overflow_d;
+        always_comb begin
+            overflow_d = overflow_q;
+            if (clear_i || load_i) begin
+                overflow_d = 1'b0;
+            end else if (!overflow_q && en_i) begin
+                if (down_i) begin
+                    overflow_d = delta_i > counter_q[WIDTH-1:0];
+                end else begin
+                    overflow_d = counter_q[WIDTH-1:0] > ({WIDTH{1'b1}} - delta_i);
+                end
+            end
+        end
+        assign overflow_o = overflow_q;
+    end else begin : gen_transient_overflow
+        // counter overflowed if the MSB is set
+        assign overflow_o = counter_q[WIDTH];
+    end
+    assign q_o = counter_q[WIDTH-1:0];
+
+    always_comb begin
+        counter_d = counter_q;
+
+        if (clear_i) begin
+            counter_d = '0;
+        end else if (load_i) begin
+            counter_d = {1'b0, d_i};
+        end else if (en_i) begin
+            if (down_i) begin
+                counter_d = counter_q - delta_i;
+            end else begin
+                counter_d = counter_q + delta_i;
+            end
+        end
+    end
+
+    always_ff @(posedge clk_i or negedge rst_ni) begin
+        if (!rst_ni) begin
+           counter_q <= '0;
+        end else begin
+           counter_q <= counter_d;
+        end
+    end
+endmodule
diff --git a/vendor/pulp-platform/common_cells/src/deprecated/clock_divider.sv b/vendor/pulp-platform/common_cells/src/deprecated/clock_divider.sv
new file mode 100644
index 0000000000..343b0a2386
--- /dev/null
+++ b/vendor/pulp-platform/common_cells/src/deprecated/clock_divider.sv
@@ -0,0 +1,191 @@
+// Copyright 2018 ETH Zurich and University of Bologna.
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the "License"); you may not use this file except in
+// compliance with the License.  You may obtain a copy of the License at
+// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+
+////////////////////////////////////////////////////////////////////////////////
+//                                                                            //
+// Company:        Multitherman Laboratory @ DEIS - University of Bologna     //
+//                    Viale Risorgimento 2 40136                              //
+//                    Bologna - fax 0512093785 -                              //
+//                                                                            //
+// Engineer:       Antonio Pullini - pullinia@iis.ee.ethz.ch                  //
+//                                                                            //
+// Additional contributions by:                                               //
+//                                                                            //
+//                                                                            //
+//                                                                            //
+// Create Date:    13/02/2013                                                 //
+// Design Name:    ULPSoC                                                     //
+// Module Name:    clock_divider                                              //
+// Project Name:   ULPSoC                                                     //
+// Language:       SystemVerilog                                              //
+//                                                                            //
+// Description:    Clock Divider                                              //
+//                                                                            //
+//                                                                            //
+// Revision:                                                                  //
+// Revision v0.1 - File Created                                               //
+// Revision v0.2 - (19/03/2015)   clock_gating swapped in pulp_clock_gating   //
+//                                                                            //
+//                                                                            //
+//                                                                            //
+//                                                                            //
+//                                                                            //
+//                                                                            //
+////////////////////////////////////////////////////////////////////////////////
+
+module clock_divider
+#(
+    parameter DIV_INIT     = 0,
+    parameter BYPASS_INIT  = 1
+)
+(
+    input  logic       clk_i,
+    input  logic       rstn_i,
+    input  logic       test_mode_i,
+    input  logic       clk_gate_async_i,
+    input  logic [7:0] clk_div_data_i,
+    input  logic       clk_div_valid_i,
+    output logic       clk_div_ack_o,
+    output logic       clk_o
+);
+
+   enum                logic [1:0] {IDLE, STOP, WAIT, RELEASE} state, state_next;
+
+   logic               s_clk_out;
+   logic               s_clock_enable;
+   logic               s_clock_enable_gate;
+   logic               s_clk_div_valid;
+
+   logic [7:0]         reg_clk_div;
+   logic               s_clk_div_valid_sync;
+
+   logic               s_rstn_sync;
+
+   logic [1:0]         reg_ext_gate_sync;
+
+    assign s_clock_enable_gate =  s_clock_enable & reg_ext_gate_sync;
+
+`ifndef PULP_FPGA_EMUL
+    rstgen i_rst_gen
+    (
+        // PAD FRAME SIGNALS
+        .clk_i(clk_i),
+        .rst_ni(rstn_i),            //async signal coming from pads
+
+        // TEST MODE
+        .test_mode_i(test_mode_i),
+
+        // OUTPUT RESET
+        .rst_no(s_rstn_sync),
+        .init_no()                 //not used
+    );
+  `else
+  assign s_rstn_sync = rstn_i;
+`endif
+
+
+    //handle the handshake with the soc_ctrl. Interface is now async
+    pulp_sync_wedge i_edge_prop
+    (
+        .clk_i(clk_i),
+        .rstn_i(s_rstn_sync),
+        .en_i(1'b1),
+        .serial_i(clk_div_valid_i),
+        .serial_o(clk_div_ack_o),
+        .r_edge_o(s_clk_div_valid_sync),
+        .f_edge_o()
+    );
+
+    clock_divider_counter
+    #(
+        .BYPASS_INIT(BYPASS_INIT),
+        .DIV_INIT(DIV_INIT)
+    )
+    i_clkdiv_cnt
+    (
+        .clk(clk_i),
+        .rstn(s_rstn_sync),
+        .test_mode(test_mode_i),
+        .clk_div(reg_clk_div),
+        .clk_div_valid(s_clk_div_valid),
+        .clk_out(s_clk_out)
+    );
+
+    pulp_clock_gating i_clk_gate
+    (
+        .clk_i(s_clk_out),
+        .en_i(s_clock_enable_gate),
+        .test_en_i(test_mode_i),
+        .clk_o(clk_o)
+    );
+
+    always_comb
+    begin
+        case(state)
+        IDLE:
+        begin
+            s_clock_enable   = 1'b1;
+            s_clk_div_valid  = 1'b0;
+            if (s_clk_div_valid_sync)
+                state_next = STOP;
+            else
+                state_next = IDLE;
+        end
+
+        STOP:
+        begin
+            s_clock_enable   = 1'b0;
+            s_clk_div_valid  = 1'b1;
+            state_next = WAIT;
+        end
+
+        WAIT:
+        begin
+            s_clock_enable   = 1'b0;
+            s_clk_div_valid  = 1'b0;
+            state_next = RELEASE;
+        end
+
+        RELEASE:
+        begin
+            s_clock_enable   = 1'b0;
+            s_clk_div_valid  = 1'b0;
+            state_next = IDLE;
+        end
+        endcase
+    end
+
+    always_ff @(posedge clk_i or negedge s_rstn_sync)
+    begin
+        if (!s_rstn_sync)
+            state <= IDLE;
+        else
+            state <= state_next;
+    end
+
+    //sample the data when valid has been sync and there is a rise edge
+    always_ff @(posedge clk_i or negedge s_rstn_sync)
+    begin
+        if (!s_rstn_sync)
+            reg_clk_div <= '0;
+        else if (s_clk_div_valid_sync)
+                  reg_clk_div <= clk_div_data_i;
+    end
+
+    //sample the data when valid has been sync and there is a rise edge
+    always_ff @(posedge clk_i or negedge s_rstn_sync)
+    begin
+        if (!s_rstn_sync)
+            reg_ext_gate_sync <= 2'b00;
+        else
+            reg_ext_gate_sync <= {clk_gate_async_i, reg_ext_gate_sync[1]};
+    end
+
+endmodule
diff --git a/vendor/pulp-platform/common_cells/src/deprecated/clock_divider_counter.sv b/vendor/pulp-platform/common_cells/src/deprecated/clock_divider_counter.sv
new file mode 100644
index 0000000000..e5c222af95
--- /dev/null
+++ b/vendor/pulp-platform/common_cells/src/deprecated/clock_divider_counter.sv
@@ -0,0 +1,211 @@
+// Copyright 2018 ETH Zurich and University of Bologna.
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the "License"); you may not use this file except in
+// compliance with the License.  You may obtain a copy of the License at
+// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+
+////////////////////////////////////////////////////////////////////////////////
+// Company:        Multitherman Laboratory @ DEIS - University of Bologna     //
+//                    Viale Risorgimento 2 40136                              //
+//                    Bologna - fax 0512093785 -                              //
+//                                                                            //
+// Engineer:       Antonio Pullini - pullinia@iis.ee.ethz.ch                  //
+//                                                                            //
+// Additional contributions by:                                               //
+//                                                                            //
+//                                                                            //
+//                                                                            //
+// Create Date:    13/02/2013                                                 //
+// Design Name:    ULPSoC                                                     //
+// Module Name:    clock_divider_counter                                      //
+// Project Name:   ULPSoC                                                     //
+// Language:       SystemVerilog                                              //
+//                                                                            //
+// Description:    clock_divider_counter                                      //
+//                                                                            //
+//                                                                            //
+// Revision:                                                                  //
+// Revision v0.1 - File Created                                               //
+// Revision v0.2 - (19/03/2015)   clock_gating swapped in pulp_clock_gating   //
+//                                                                            //
+//                                                                            //
+//                                                                            //
+//                                                                            //
+//                                                                            //
+//                                                                            //
+////////////////////////////////////////////////////////////////////////////////
+
+
+module clock_divider_counter
+#(
+    parameter BYPASS_INIT = 1,
+    parameter DIV_INIT    = 'hFF
+)
+(
+    input  logic       clk,
+    input  logic       rstn,
+    input  logic       test_mode,
+    input  logic [7:0] clk_div,
+    input  logic       clk_div_valid,
+    output logic       clk_out
+);
+
+    logic [7:0]         counter;
+    logic [7:0]         counter_next;
+    logic [7:0]         clk_cnt;
+    logic               en1;
+    logic               en2;
+
+    logic               is_odd;
+
+    logic               div1;
+    logic               div2;
+    logic               div2_neg_sync;
+
+    logic [7:0]         clk_cnt_odd;
+    logic [7:0]         clk_cnt_odd_incr;
+    logic [7:0]         clk_cnt_even;
+    logic [7:0]         clk_cnt_en2;
+
+    logic               bypass;
+
+    logic               clk_out_gen;
+    logic               clk_div_valid_reg;
+
+    logic               clk_inv_test;
+    logic               clk_inv;
+
+    //        assign clk_cnt_odd_incr = clk_div + 1;
+    //        assign clk_cnt_odd  = {1'b0,clk_cnt_odd_incr[7:1]}; //if odd divider than clk_cnt = (clk_div+1)/2
+    assign clk_cnt_odd  = clk_div - 8'h1; //if odd divider than clk_cnt = clk_div - 1
+    assign clk_cnt_even = (clk_div == 8'h2) ? 8'h0 : ({1'b0,clk_div[7:1]} - 8'h1);   //if even divider than clk_cnt = clk_div/2
+    assign clk_cnt_en2  = {1'b0,clk_cnt[7:1]} + 8'h1;
+
+    always_comb
+    begin
+        if (counter == 'h0)
+            en1 = 1'b1;
+        else
+            en1 = 1'b0;
+
+        if (clk_div_valid)
+            counter_next = 'h0;
+        else if (counter == clk_cnt)
+                counter_next = 'h0;
+             else
+                counter_next = counter + 1;
+
+        if (clk_div_valid)
+            en2 = 1'b0;
+        else if (counter == clk_cnt_en2)
+                en2 = 1'b1;
+             else
+                en2 = 1'b0;
+    end
+
+   always_ff @(posedge clk, negedge rstn)
+   begin
+        if (~rstn)
+        begin
+             counter            <=  'h0;
+             div1               <= 1'b0;
+             bypass             <= BYPASS_INIT;
+             clk_cnt            <= DIV_INIT;
+             is_odd             <= 1'b0;
+             clk_div_valid_reg  <= 1'b0;
+        end
+        else
+        begin
+              if (!bypass)
+                  counter <= counter_next;
+
+              clk_div_valid_reg <= clk_div_valid;
+              if (clk_div_valid)
+              begin
+                if ((clk_div == 8'h0) || (clk_div == 8'h1))
+                  begin
+                      bypass <= 1'b1;
+                      clk_cnt <= 'h0;
+                      is_odd  <= 1'b0;
+                  end
+                else
+                  begin
+                      bypass <= 1'b0;
+                      if (clk_div[0])
+                        begin
+                          is_odd  <= 1'b1;
+                          clk_cnt <= clk_cnt_odd;
+                        end
+                      else
+                        begin
+                          is_odd  <= 1'b0;
+                          clk_cnt <= clk_cnt_even;
+                        end
+                  end
+                div1 <= 1'b0;
+              end
+              else
+              begin
+                if (en1 && !bypass)
+                  div1 <= ~div1;
+              end
+        end
+    end
+
+    pulp_clock_inverter clk_inv_i
+    (
+        .clk_i(clk),
+        .clk_o(clk_inv)
+    );
+
+`ifndef PULP_FPGA_EMUL
+ `ifdef PULP_DFT
+   pulp_clock_mux2 clk_muxinv_i
+     (
+      .clk0_i(clk_inv),
+      .clk1_i(clk),
+      .clk_sel_i(test_mode),
+      .clk_o(clk_inv_test)
+      );
+ `else
+   assign clk_inv_test = clk_inv;
+ `endif
+`else
+   assign clk_inv_test = clk_inv;
+`endif
+
+    always_ff @(posedge clk_inv_test or negedge rstn)
+    begin
+        if (!rstn)
+        begin
+            div2    <= 1'b0;
+        end
+        else
+        begin
+            if (clk_div_valid_reg)
+                div2 <= 1'b0;
+            else if (en2 && is_odd && !bypass)
+                    div2 <= ~div2;
+        end
+    end // always_ff @ (posedge clk_inv_test or negedge rstn)
+
+    pulp_clock_xor2 clock_xor_i
+    (
+        .clk_o(clk_out_gen),
+        .clk0_i(div1),
+        .clk1_i(div2)
+    );
+
+    pulp_clock_mux2 clk_mux_i
+    (
+        .clk0_i(clk_out_gen),
+        .clk1_i(clk),
+        .clk_sel_i(bypass || test_mode),
+        .clk_o(clk_out)
+    );
+
+endmodule
diff --git a/vendor/pulp-platform/common_cells/src/deprecated/fifo_v1.sv b/vendor/pulp-platform/common_cells/src/deprecated/fifo_v1.sv
new file mode 100644
index 0000000000..31295e80ec
--- /dev/null
+++ b/vendor/pulp-platform/common_cells/src/deprecated/fifo_v1.sv
@@ -0,0 +1,57 @@
+// Copyright 2018 ETH Zurich and University of Bologna.
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the "License"); you may not use this file except in
+// compliance with the License. You may obtain a copy of the License at
+// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+
+// Author: Florian Zaruba <zarubaf@iis.ee.ethz.ch>
+
+/* verilator lint_off DECLFILENAME */
+module fifo #(
+    parameter bit          FALL_THROUGH = 1'b0, // fifo is in fall-through mode
+    parameter int unsigned DATA_WIDTH   = 32,   // default data width if the fifo is of type logic
+    parameter int unsigned DEPTH        = 8,    // depth can be arbitrary from 0 to 2**32
+    parameter int unsigned THRESHOLD    = 1,    // fill count until when to assert threshold_o
+    parameter type dtype                = logic [DATA_WIDTH-1:0]
+)(
+    input  logic  clk_i,            // Clock
+    input  logic  rst_ni,           // Asynchronous reset active low
+    input  logic  flush_i,          // flush the queue
+    input  logic  testmode_i,       // test_mode to bypass clock gating
+    // status flags
+    output logic  full_o,           // queue is full
+    output logic  empty_o,          // queue is empty
+    output logic  threshold_o,      // the FIFO is above the specified threshold
+    // as long as the queue is not full we can push new data
+    input  dtype  data_i,           // data to push into the queue
+    input  logic  push_i,           // data is valid and can be pushed to the queue
+    // as long as the queue is not empty we can pop new elements
+    output dtype  data_o,           // output data
+    input  logic  pop_i             // pop head from queue
+);
+    fifo_v2 #(
+        .FALL_THROUGH ( FALL_THROUGH ),
+        .DATA_WIDTH   ( DATA_WIDTH   ),
+        .DEPTH        ( DEPTH        ),
+        .ALM_FULL_TH  ( THRESHOLD    ),
+        .dtype        ( dtype        )
+    ) impl (
+        .clk_i       ( clk_i       ),
+        .rst_ni      ( rst_ni      ),
+        .flush_i     ( flush_i     ),
+        .testmode_i  ( testmode_i  ),
+        .full_o      ( full_o      ),
+        .empty_o     ( empty_o     ),
+        .alm_full_o  ( threshold_o ),
+        .alm_empty_o (             ),
+        .data_i      ( data_i      ),
+        .push_i      ( push_i      ),
+        .data_o      ( data_o      ),
+        .pop_i       ( pop_i       )
+    );
+endmodule
+/* verilator lint_on DECLFILENAME */
diff --git a/vendor/pulp-platform/common_cells/src/deprecated/fifo_v2.sv b/vendor/pulp-platform/common_cells/src/deprecated/fifo_v2.sv
new file mode 100644
index 0000000000..9c87ed9692
--- /dev/null
+++ b/vendor/pulp-platform/common_cells/src/deprecated/fifo_v2.sv
@@ -0,0 +1,79 @@
+// Copyright 2018 ETH Zurich and University of Bologna.
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the "License"); you may not use this file except in
+// compliance with the License. You may obtain a copy of the License at
+// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+
+// Author: Florian Zaruba <zarubaf@iis.ee.ethz.ch>
+
+module fifo_v2 #(
+    parameter bit          FALL_THROUGH = 1'b0, // fifo is in fall-through mode
+    parameter int unsigned DATA_WIDTH   = 32,   // default data width if the fifo is of type logic
+    parameter int unsigned DEPTH        = 8,    // depth can be arbitrary from 0 to 2**32
+    parameter int unsigned ALM_EMPTY_TH = 1,    // almost empty threshold (when to assert alm_empty_o)
+    parameter int unsigned ALM_FULL_TH  = 1,    // almost full threshold (when to assert alm_full_o)
+    parameter type dtype                = logic [DATA_WIDTH-1:0],
+    // DO NOT OVERWRITE THIS PARAMETER
+    parameter int unsigned ADDR_DEPTH   = (DEPTH > 1) ? $clog2(DEPTH) : 1
+)(
+    input  logic  clk_i,            // Clock
+    input  logic  rst_ni,           // Asynchronous reset active low
+    input  logic  flush_i,          // flush the queue
+    input  logic  testmode_i,       // test_mode to bypass clock gating
+    // status flags
+    output logic  full_o,           // queue is full
+    output logic  empty_o,          // queue is empty
+    output logic  alm_full_o,       // FIFO fillstate >= the specified threshold
+    output logic  alm_empty_o,      // FIFO fillstate <= the specified threshold
+    // as long as the queue is not full we can push new data
+    input  dtype  data_i,           // data to push into the queue
+    input  logic  push_i,           // data is valid and can be pushed to the queue
+    // as long as the queue is not empty we can pop new elements
+    output dtype  data_o,           // output data
+    input  logic  pop_i             // pop head from queue
+);
+
+    logic [ADDR_DEPTH-1:0] usage;
+
+    // generate threshold parameters
+    if (DEPTH == 0) begin
+        assign alm_full_o  = 1'b0; // that signal does not make any sense in a FIFO of depth 0
+        assign alm_empty_o = 1'b0; // that signal does not make any sense in a FIFO of depth 0
+    end else begin
+        assign alm_full_o   = (usage >= ALM_FULL_TH[ADDR_DEPTH-1:0]);
+        assign alm_empty_o  = (usage <= ALM_EMPTY_TH[ADDR_DEPTH-1:0]);
+    end
+
+    fifo_v3 #(
+        .FALL_THROUGH ( FALL_THROUGH ),
+        .DATA_WIDTH   ( DATA_WIDTH   ),
+        .DEPTH        ( DEPTH        ),
+        .dtype        ( dtype        )
+    ) i_fifo_v3 (
+        .clk_i,
+        .rst_ni,
+        .flush_i,
+        .testmode_i,
+        .full_o,
+        .empty_o,
+        .usage_o (usage),
+        .data_i,
+        .push_i,
+        .data_o,
+        .pop_i
+    );
+
+    // pragma translate_off
+    `ifndef VERILATOR
+        initial begin
+            assert (ALM_FULL_TH <= DEPTH)  else $error("ALM_FULL_TH can't be larger than the DEPTH.");
+            assert (ALM_EMPTY_TH <= DEPTH) else $error("ALM_EMPTY_TH can't be larger than the DEPTH.");
+        end
+    `endif
+    // pragma translate_on
+
+endmodule // fifo_v2
diff --git a/vendor/pulp-platform/common_cells/src/deprecated/find_first_one.sv b/vendor/pulp-platform/common_cells/src/deprecated/find_first_one.sv
new file mode 100644
index 0000000000..ee3ba20f70
--- /dev/null
+++ b/vendor/pulp-platform/common_cells/src/deprecated/find_first_one.sv
@@ -0,0 +1,83 @@
+// Copyright 2018 ETH Zurich and University of Bologna.
+//
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the "License"); you may not use this file except in
+// compliance with the License. You may obtain a copy of the License at
+// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+
+// Deprecated, use lzc unit instead.
+
+/// A leading-one finder / leading zero counter.
+/// Set FLIP to 0 for find_first_one => first_one_o is the index of the first one (from the LSB)
+/// Set FLIP to 1 for leading zero counter => first_one_o is the number of leading zeroes (from the MSB)
+module find_first_one #(
+    /// The width of the input vector.
+    parameter int WIDTH = -1,
+    parameter int FLIP = 0
+)(
+    input  logic [WIDTH-1:0]         in_i,
+    output logic [$clog2(WIDTH)-1:0] first_one_o,
+    output logic                     no_ones_o
+);
+
+    localparam int NUM_LEVELS = $clog2(WIDTH);
+
+    // pragma translate_off
+    initial begin
+        assert(WIDTH >= 0);
+    end
+    // pragma translate_on
+
+    logic [WIDTH-1:0][NUM_LEVELS-1:0]          index_lut;
+    logic [2**NUM_LEVELS-1:0]                  sel_nodes;
+    logic [2**NUM_LEVELS-1:0][NUM_LEVELS-1:0]  index_nodes;
+
+    logic [WIDTH-1:0] in_tmp;
+
+    for (genvar i = 0; i < WIDTH; i++) begin
+        assign in_tmp[i] = FLIP ? in_i[WIDTH-1-i] : in_i[i];
+    end
+
+    for (genvar j = 0; j < WIDTH; j++) begin
+        assign index_lut[j] = j;
+    end
+
+    for (genvar level = 0; level < NUM_LEVELS; level++) begin
+
+        if (level < NUM_LEVELS-1) begin
+            for (genvar l = 0; l < 2**level; l++) begin
+                assign sel_nodes[2**level-1+l]   = sel_nodes[2**(level+1)-1+l*2] | sel_nodes[2**(level+1)-1+l*2+1];
+                assign index_nodes[2**level-1+l] = (sel_nodes[2**(level+1)-1+l*2] == 1'b1) ?
+                    index_nodes[2**(level+1)-1+l*2] : index_nodes[2**(level+1)-1+l*2+1];
+            end
+        end
+
+        if (level == NUM_LEVELS-1) begin
+            for (genvar k = 0; k < 2**level; k++) begin
+                // if two successive indices are still in the vector...
+                if (k * 2 < WIDTH-1) begin
+                    assign sel_nodes[2**level-1+k]   = in_tmp[k*2] | in_tmp[k*2+1];
+                    assign index_nodes[2**level-1+k] = (in_tmp[k*2] == 1'b1) ? index_lut[k*2] : index_lut[k*2+1];
+                end
+                // if only the first index is still in the vector...
+                if (k * 2 == WIDTH-1) begin
+                    assign sel_nodes[2**level-1+k]   = in_tmp[k*2];
+                    assign index_nodes[2**level-1+k] = index_lut[k*2];
+                end
+                // if index is out of range
+                if (k * 2 > WIDTH-1) begin
+                    assign sel_nodes[2**level-1+k]   = 1'b0;
+                    assign index_nodes[2**level-1+k] = '0;
+                end
+            end
+        end
+    end
+
+    assign first_one_o = NUM_LEVELS > 0 ? index_nodes[0] : '0;
+    assign no_ones_o   = NUM_LEVELS > 0 ? ~sel_nodes[0]  : '1;
+
+endmodule
diff --git a/vendor/pulp-platform/common_cells/src/deprecated/generic_LFSR_8bit.sv b/vendor/pulp-platform/common_cells/src/deprecated/generic_LFSR_8bit.sv
new file mode 100644
index 0000000000..fb0080accf
--- /dev/null
+++ b/vendor/pulp-platform/common_cells/src/deprecated/generic_LFSR_8bit.sv
@@ -0,0 +1,64 @@
+// Copyright 2018 ETH Zurich and University of Bologna.
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the "License"); you may not use this file except in
+// compliance with the License. You may obtain a copy of the License at
+// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+
+// Igor Loi <igor.loi@unibo.it>
+
+module generic_LFSR_8bit
+  #(
+    parameter OH_WIDTH      = 4,
+    parameter BIN_WIDTH     = $clog2(OH_WIDTH),
+    parameter SEED          = 8'b00000000
+    ) 
+   (
+    output logic [OH_WIDTH-1:0]    data_OH_o,   // One hot encoding
+    output logic [BIN_WIDTH-1:0]   data_BIN_o,  // Binary encoding
+    input  logic                   enable_i,        //
+    input  logic                   clk,             //
+    input  logic                   rst_n            //
+    );
+   
+   logic [7:0] 			   out;
+   logic                           linear_feedback;
+   logic [BIN_WIDTH-1:0] 	   temp_ref_way;
+   
+   
+   //-------------Code Starts Here-------
+   assign linear_feedback = !(out[7] ^ out[3] ^ out[2] ^ out[1]); // TAPS for XOR feedback
+   
+   assign data_BIN_o = temp_ref_way;
+   
+   always_ff @(posedge clk, negedge rst_n)
+     begin
+	if (rst_n == 1'b0)
+	  begin
+	     out <= SEED ;
+	  end 
+	else if (enable_i) 
+          begin
+             out <= {out[6],out[5],out[4],out[3],out[2],out[1],out[0], linear_feedback};
+          end 
+     end
+   
+   generate
+      
+      if(OH_WIDTH == 2)
+	assign temp_ref_way = out[1];
+      else
+	assign temp_ref_way = out[BIN_WIDTH:1];
+   endgenerate
+   
+   // Bin to One Hot Encoder
+   always_comb
+     begin
+	data_OH_o = '0;
+	data_OH_o[temp_ref_way] = 1'b1;
+     end
+   
+endmodule
diff --git a/vendor/pulp-platform/common_cells/src/deprecated/generic_fifo.sv b/vendor/pulp-platform/common_cells/src/deprecated/generic_fifo.sv
new file mode 100644
index 0000000000..ece4aac78e
--- /dev/null
+++ b/vendor/pulp-platform/common_cells/src/deprecated/generic_fifo.sv
@@ -0,0 +1,274 @@
+// Copyright 2018 ETH Zurich and University of Bologna.
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the "License"); you may not use this file except in
+// compliance with the License. You may obtain a copy of the License at
+// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+
+// ============================================================================= //
+// Company:        Multitherman Laboratory @ DEIS - University of Bologna        //
+//                    Viale Risorgimento 2 40136                                 //
+//                    Bologna - fax 0512093785 -                                 //
+//                                                                               //
+// Engineer:       Igor Loi - igor.loi@unibo.it                                  //
+//                                                                               //
+//                                                                               //
+// Additional contributions by:                                                  //
+//                                                                               //
+//                                                                               //
+//                                                                               //
+// Create Date:    01/02/2014                                                    //
+// Design Name:    MISC                                                          //
+// Module Name:    generic_fifo                                                  //
+// Project Name:   PULP                                                          //
+// Language:       SystemVerilog                                                 //
+//                                                                               //
+// Description:   A simple FIFO used in the D_address_decoder, and D_allocator   //
+//                to store the destinations ports                                //
+//                                                                               //
+// Revision:                                                                     //
+// Revision v0.1 - 01/02/2014 : File Created                                     //
+// Revision v0.2 - 02/09/2015 : Updated with a global CG cell                    //
+//                                                                               //
+// ============================================================================= //
+
+module generic_fifo
+#(
+   parameter int unsigned          DATA_WIDTH = 32,
+   parameter int unsigned          DATA_DEPTH = 8
+)
+(
+   input  logic                                    clk,
+   input  logic                                    rst_n,
+   //PUSH SIDE
+   input  logic [DATA_WIDTH-1:0]                   data_i,
+   input  logic                                    valid_i,
+   output logic                                    grant_o,
+   //POP SIDE
+   output logic [DATA_WIDTH-1:0]                   data_o,
+   output logic                                    valid_o,
+   input  logic                                    grant_i,
+
+   input  logic                                    test_mode_i
+);
+
+
+   // Local Parameter
+   localparam int unsigned ADDR_DEPTH = $clog2(DATA_DEPTH);
+   enum logic [1:0] { EMPTY, FULL, MIDDLE } CS, NS;
+   // Internal Signals
+
+   logic       gate_clock;
+   logic       clk_gated;
+
+   logic [ADDR_DEPTH-1:0]  Pop_Pointer_CS,  Pop_Pointer_NS;
+   logic [ADDR_DEPTH-1:0]  Push_Pointer_CS, Push_Pointer_NS;
+   logic [DATA_WIDTH-1:0]  FIFO_REGISTERS[DATA_DEPTH-1:0];
+   int unsigned            i;
+
+   // Parameter Check
+   // synopsys translate_off
+   initial begin : parameter_check
+      integer param_err_flg;
+      param_err_flg = 0;
+
+      if (DATA_WIDTH < 1) begin
+         param_err_flg = 1;
+         $display("ERROR: %m :\n  Invalid value (%d) for parameter DATA_WIDTH (legal range: greater than 1)", DATA_WIDTH );
+      end
+
+      if (DATA_DEPTH < 1) begin
+         param_err_flg = 1;
+         $display("ERROR: %m :\n  Invalid value (%d) for parameter DATA_DEPTH (legal range: greater than 1)", DATA_DEPTH );
+      end
+   end
+   // synopsys translate_on
+
+`ifndef PULP_FPGA_EMUL
+   cluster_clock_gating cg_cell
+   (
+     .clk_i     ( clk         ),
+     .en_i      (~gate_clock  ),
+     .test_en_i ( test_mode_i ),
+     .clk_o     ( clk_gated   )
+   );
+`else
+   assign clk_gated = clk;
+`endif
+
+   // UPDATE THE STATE
+   always_ff @(posedge clk, negedge rst_n)
+   begin
+       if(rst_n == 1'b0)
+       begin
+               CS              <= EMPTY;
+               Pop_Pointer_CS  <= {ADDR_DEPTH {1'b0}};
+               Push_Pointer_CS <= {ADDR_DEPTH {1'b0}};
+       end
+       else
+       begin
+               CS              <= NS;
+               Pop_Pointer_CS  <= Pop_Pointer_NS;
+               Push_Pointer_CS <= Push_Pointer_NS;
+       end
+   end
+
+
+   // Compute Next State
+   always_comb
+   begin
+      gate_clock      = 1'b0;
+
+      case(CS)
+
+      EMPTY:
+      begin
+          grant_o = 1'b1;
+          valid_o = 1'b0;
+
+          case(valid_i)
+          1'b0 :
+          begin
+                  NS              = EMPTY;
+                  Push_Pointer_NS = Push_Pointer_CS;
+                  Pop_Pointer_NS  = Pop_Pointer_CS;
+                  gate_clock      = 1'b1;
+          end
+
+          1'b1:
+          begin
+                  NS              = MIDDLE;
+                  Push_Pointer_NS = Push_Pointer_CS + 1'b1;
+                  Pop_Pointer_NS  = Pop_Pointer_CS;
+          end
+
+          endcase
+      end//~EMPTY
+
+      MIDDLE:
+      begin
+          grant_o = 1'b1;
+          valid_o = 1'b1;
+
+          case({valid_i,grant_i})
+
+          2'b01:
+          begin
+                  gate_clock      = 1'b1;
+
+                  if((Pop_Pointer_CS == Push_Pointer_CS -1 ) || ((Pop_Pointer_CS == DATA_DEPTH-1) && (Push_Pointer_CS == 0) ))
+                          NS              = EMPTY;
+                  else
+                          NS              = MIDDLE;
+
+                  Push_Pointer_NS = Push_Pointer_CS;
+
+                  if(Pop_Pointer_CS == DATA_DEPTH-1)
+                          Pop_Pointer_NS  = 0;
+                  else
+                          Pop_Pointer_NS  = Pop_Pointer_CS + 1'b1;
+          end
+
+          2'b00 :
+          begin
+                  gate_clock      = 1'b1;
+                  NS              = MIDDLE;
+                  Push_Pointer_NS = Push_Pointer_CS;
+                  Pop_Pointer_NS  = Pop_Pointer_CS;
+          end
+
+          2'b11:
+          begin
+                  NS              = MIDDLE;
+
+                  if(Push_Pointer_CS == DATA_DEPTH-1)
+                          Push_Pointer_NS = 0;
+                  else
+                          Push_Pointer_NS = Push_Pointer_CS + 1'b1;
+
+                  if(Pop_Pointer_CS == DATA_DEPTH-1)
+                          Pop_Pointer_NS  = 0;
+                  else
+                          Pop_Pointer_NS  = Pop_Pointer_CS  + 1'b1;
+          end
+
+          2'b10:
+          begin
+                  if(( Push_Pointer_CS == Pop_Pointer_CS - 1) || ( (Push_Pointer_CS == DATA_DEPTH-1) && (Pop_Pointer_CS == 0) ))
+                          NS              = FULL;
+                  else
+                          NS        = MIDDLE;
+
+                  if(Push_Pointer_CS == DATA_DEPTH - 1)
+                          Push_Pointer_NS = 0;
+                  else
+                          Push_Pointer_NS = Push_Pointer_CS + 1'b1;
+
+                  Pop_Pointer_NS  = Pop_Pointer_CS;
+          end
+
+          endcase
+      end
+
+      FULL:
+      begin
+          grant_o     = 1'b0;
+          valid_o     = 1'b1;
+          gate_clock  = 1'b1;
+
+          case(grant_i)
+          1'b1:
+          begin
+                  NS              = MIDDLE;
+
+                  Push_Pointer_NS = Push_Pointer_CS;
+
+                  if(Pop_Pointer_CS == DATA_DEPTH-1)
+                          Pop_Pointer_NS  = 0;
+                  else
+                          Pop_Pointer_NS  = Pop_Pointer_CS  + 1'b1;
+          end
+
+          1'b0:
+          begin
+                  NS              = FULL;
+                  Push_Pointer_NS = Push_Pointer_CS;
+                  Pop_Pointer_NS  = Pop_Pointer_CS;
+          end
+          endcase
+
+      end // end of FULL
+
+      default :
+      begin
+          gate_clock      = 1'b1;
+          grant_o         = 1'b0;
+          valid_o         = 1'b0;
+          NS              = EMPTY;
+          Pop_Pointer_NS  = 0;
+          Push_Pointer_NS = 0;
+      end
+
+      endcase
+   end
+
+   always_ff @(posedge clk_gated, negedge rst_n)
+   begin
+      if(rst_n == 1'b0)
+      begin
+      for (i=0; i< DATA_DEPTH; i++)
+         FIFO_REGISTERS[i] <= {DATA_WIDTH {1'b0}};
+      end
+      else
+      begin
+         if((grant_o == 1'b1) && (valid_i == 1'b1))
+            FIFO_REGISTERS[Push_Pointer_CS] <= data_i;
+      end
+   end
+
+   assign data_o = FIFO_REGISTERS[Pop_Pointer_CS];
+
+endmodule // generic_fifo
diff --git a/vendor/pulp-platform/common_cells/src/deprecated/generic_fifo_adv.sv b/vendor/pulp-platform/common_cells/src/deprecated/generic_fifo_adv.sv
new file mode 100644
index 0000000000..df6cc0d796
--- /dev/null
+++ b/vendor/pulp-platform/common_cells/src/deprecated/generic_fifo_adv.sv
@@ -0,0 +1,264 @@
+// Copyright 2018 ETH Zurich and University of Bologna.
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the "License"); you may not use this file except in
+// compliance with the License. You may obtain a copy of the License at
+// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+
+// Igor Loi <igor.loi@unibo.it>
+
+module generic_fifo_adv 
+#( 
+   parameter int unsigned          DATA_WIDTH = 32,
+   parameter int unsigned          DATA_DEPTH = 8
+   )
+   (
+    input  logic                                    clk,
+    input  logic                                    rst_n,
+    input  logic                                    clear_i,
+    
+    //PUSH SIDE
+    input  logic [DATA_WIDTH-1:0]                   data_i,
+    input  logic                                    valid_i,
+    output logic                                    grant_o,
+    
+    //POP SIDE
+    output logic [DATA_WIDTH-1:0]                   data_o,
+    output logic                                    valid_o,
+    input  logic                                    grant_i,
+    
+    input  logic                                    test_mode_i
+    );
+   
+   
+   // Local Parameter
+   localparam  int 				    unsigned ADDR_DEPTH = $clog2(DATA_DEPTH);
+   enum 					    logic [1:0] { EMPTY, FULL, MIDDLE } CS, NS;
+   // Internal Signals
+   
+   logic 					    gate_clock;
+   logic 					    clk_gated;
+   
+   logic [ADDR_DEPTH-1:0] 			    Pop_Pointer_CS,  Pop_Pointer_NS;
+   logic [ADDR_DEPTH-1:0] 			    Push_Pointer_CS, Push_Pointer_NS;
+   logic [DATA_WIDTH-1:0] 			    FIFO_REGISTERS[DATA_DEPTH-1:0];
+   int 						    unsigned                    i;
+   
+   // Parameter Check
+   // synopsys translate_off
+   initial
+   begin : parameter_check
+      integer param_err_flg;
+      param_err_flg = 0;
+      
+      if (DATA_WIDTH < 1)
+	begin
+           param_err_flg = 1;
+           $display("ERROR: %m :\n  Invalid value (%d) for parameter DATA_WIDTH (legal range: greater than 1)", DATA_WIDTH );
+	end
+      
+      if (DATA_DEPTH < 1)
+	begin
+           param_err_flg = 1;
+           $display("ERROR: %m :\n  Invalid value (%d) for parameter DATA_DEPTH (legal range: greater than 1)", DATA_DEPTH );
+	end                   
+   end
+   // synopsys translate_on
+   
+`ifndef PULP_FPGA_EMUL
+   cluster_clock_gating cg_cell
+     (
+      .clk_i     ( clk         ),
+      .en_i      (~gate_clock  ),
+      .test_en_i ( test_mode_i ),
+      .clk_o     ( clk_gated   )
+      );
+`else
+   assign clk_gated = clk;
+`endif
+   
+   // UPDATE THE STATE
+   always_ff @(posedge clk, negedge rst_n)
+     begin
+	if(rst_n == 1'b0)
+	  begin
+             CS              <= EMPTY;
+             Pop_Pointer_CS  <= {ADDR_DEPTH {1'b0}};
+             Push_Pointer_CS <= {ADDR_DEPTH {1'b0}};
+	  end
+	else
+	  begin
+             if(clear_i)
+               begin
+		  CS              <= EMPTY;
+		  Pop_Pointer_CS  <= {ADDR_DEPTH {1'b0}};
+		  Push_Pointer_CS <= {ADDR_DEPTH {1'b0}};
+               end
+             else
+               begin
+		  CS              <= NS;
+		  Pop_Pointer_CS  <= Pop_Pointer_NS;
+		  Push_Pointer_CS <= Push_Pointer_NS;
+               end
+	  end
+     end
+   
+   
+   // Compute Next State
+   always_comb
+     begin
+	gate_clock      = 1'b0;
+	
+	case(CS)
+	  
+	  EMPTY:
+	    begin
+               grant_o = 1'b1;
+               valid_o = 1'b0;
+	       
+               case(valid_i)
+		 1'b0 : 
+		   begin 
+                      NS              = EMPTY;
+                      Push_Pointer_NS = Push_Pointer_CS;
+                      Pop_Pointer_NS  = Pop_Pointer_CS;
+                      gate_clock      = 1'b1;
+		   end
+		 
+		 1'b1: 
+		   begin 
+                      NS              = MIDDLE;
+                      Push_Pointer_NS = Push_Pointer_CS + 1'b1;
+                      Pop_Pointer_NS  = Pop_Pointer_CS;
+		   end
+		 
+               endcase
+	    end//~EMPTY
+	  
+	  MIDDLE:
+	    begin
+               grant_o = 1'b1;
+               valid_o = 1'b1;
+	       
+               case({valid_i,grant_i})
+		 
+		 2'b01:
+		   begin
+                      gate_clock      = 1'b1;
+		      
+                      if((Pop_Pointer_CS == Push_Pointer_CS -1 ) || ((Pop_Pointer_CS == DATA_DEPTH-1) && (Push_Pointer_CS == 0) ))
+                        NS              = EMPTY;
+                      else
+                        NS              = MIDDLE;
+		      
+                      Push_Pointer_NS = Push_Pointer_CS;
+		      
+                      if(Pop_Pointer_CS == DATA_DEPTH-1)
+                        Pop_Pointer_NS  = 0;
+                      else
+                        Pop_Pointer_NS  = Pop_Pointer_CS + 1'b1;
+		   end
+		 
+		 2'b00 : 
+		   begin
+                      gate_clock      = 1'b1; 
+                      NS              = MIDDLE;
+                      Push_Pointer_NS = Push_Pointer_CS;
+                      Pop_Pointer_NS  = Pop_Pointer_CS;
+		   end
+		 
+		 2'b11: 
+		   begin
+                      NS              = MIDDLE;
+		      
+                      if(Push_Pointer_CS == DATA_DEPTH-1)
+                        Push_Pointer_NS = 0;
+                      else
+                        Push_Pointer_NS = Push_Pointer_CS + 1'b1;
+		      
+                      if(Pop_Pointer_CS == DATA_DEPTH-1)
+                        Pop_Pointer_NS  = 0;
+                      else
+                        Pop_Pointer_NS  = Pop_Pointer_CS  + 1'b1;
+		   end
+		 
+		 2'b10:
+		   begin 
+                      if(( Push_Pointer_CS == Pop_Pointer_CS - 1) || ( (Push_Pointer_CS == DATA_DEPTH-1) && (Pop_Pointer_CS == 0) ))
+                        NS              = FULL;
+                      else
+                        NS        = MIDDLE;
+		      
+                      if(Push_Pointer_CS == DATA_DEPTH - 1)
+                        Push_Pointer_NS = 0;
+                      else
+                        Push_Pointer_NS = Push_Pointer_CS + 1'b1;
+		      
+                      Pop_Pointer_NS  = Pop_Pointer_CS;
+		   end
+		 
+               endcase                     
+	    end
+	  
+	  FULL:
+	    begin
+               grant_o     = 1'b0;
+               valid_o     = 1'b1;
+               gate_clock  = 1'b1;
+	       
+               case(grant_i)
+		 1'b1: 
+		   begin 
+                      NS              = MIDDLE;
+		      
+                      Push_Pointer_NS = Push_Pointer_CS;
+		      
+                      if(Pop_Pointer_CS == DATA_DEPTH-1)
+                        Pop_Pointer_NS  = 0;
+                      else
+                        Pop_Pointer_NS  = Pop_Pointer_CS  + 1'b1;
+		   end
+		 
+		 1'b0:
+		   begin 
+                      NS              = FULL;
+                      Push_Pointer_NS = Push_Pointer_CS;
+                      Pop_Pointer_NS  = Pop_Pointer_CS;
+		   end
+               endcase                 
+	       
+	    end // end of FULL
+	  
+	  default :
+	    begin
+               gate_clock      = 1'b1;
+               grant_o         = 1'b0;
+               valid_o         = 1'b0;
+               NS              = EMPTY;
+               Pop_Pointer_NS  = 0;
+               Push_Pointer_NS = 0;
+	    end
+	  
+	endcase
+     end
+   
+   always_ff @(posedge clk_gated, negedge rst_n)
+     begin
+	if(rst_n == 1'b0)
+	  begin
+	     for (i=0; i< DATA_DEPTH; i++)
+               FIFO_REGISTERS[i] <= {DATA_WIDTH {1'b0}};
+	  end
+	else
+	  begin
+             if((grant_o == 1'b1) && (valid_i == 1'b1))
+               FIFO_REGISTERS[Push_Pointer_CS] <= data_i;
+	  end
+     end
+   
+   assign data_o = FIFO_REGISTERS[Pop_Pointer_CS];
+   
+endmodule // generic_fifo
diff --git a/vendor/pulp-platform/common_cells/src/deprecated/prioarbiter.sv b/vendor/pulp-platform/common_cells/src/deprecated/prioarbiter.sv
new file mode 100644
index 0000000000..730ceca4bf
--- /dev/null
+++ b/vendor/pulp-platform/common_cells/src/deprecated/prioarbiter.sv
@@ -0,0 +1,89 @@
+// Copyright 2018 ETH Zurich and University of Bologna.
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the "License"); you may not use this file except in
+// compliance with the License. You may obtain a copy of the License at
+// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+//
+// Author: Florian Zaruba <zarubaf@iis.ee.ethz.ch>, ETH Zurich
+// Date: 16.03.2019
+// Description: Priority arbiter with Lock in. Port 0 has priority over port 1, port 1 over port2
+//              and so on. If the `LOCK_IN` feature is activated the arbitration decision is kept
+//              when the `en_i` is low.
+
+// Dependencies: relies on fast leading zero counter tree "onehot_to_bin" in common_cells
+module prioarbiter #(
+  parameter int unsigned NUM_REQ = 13,
+  parameter int unsigned LOCK_IN = 0
+) (
+  input logic                         clk_i,
+  input logic                         rst_ni,
+
+  input logic                         flush_i, // clears the fsm and control signal registers
+  input logic                         en_i,    // arbiter enable
+  input logic [NUM_REQ-1:0]           req_i,   // request signals
+
+  output logic [NUM_REQ-1:0]          ack_o,   // acknowledge signals
+  output logic                        vld_o,   // request ack'ed
+  output logic [$clog2(NUM_REQ)-1:0]  idx_o    // idx output
+);
+
+  localparam SEL_WIDTH = $clog2(NUM_REQ);
+
+  logic [SEL_WIDTH-1:0] arb_sel_lock_d, arb_sel_lock_q;
+  logic lock_d, lock_q;
+
+  logic [$clog2(NUM_REQ)-1:0] idx;
+
+  // shared
+  assign vld_o = (|req_i) & en_i;
+  assign idx_o  = (lock_q) ? arb_sel_lock_q : idx;
+
+  // Arbiter
+  // Port 0 has priority over all other ports
+  assign ack_o[0] = (req_i[0]) ? en_i : 1'b0;
+  // check that the priorities
+  for (genvar i = 1; i < NUM_REQ; i++) begin : gen_arb_req_ports
+      // for every subsequent port check the priorities of the previous port
+      assign ack_o[i] = (req_i[i] & ~(|ack_o[i-1:0])) ? en_i : 1'b0;
+  end
+
+  onehot_to_bin #(
+    .ONEHOT_WIDTH ( NUM_REQ )
+  ) i_onehot_to_bin (
+    .onehot ( ack_o ),
+    .bin    ( idx   )
+  );
+
+  if (LOCK_IN) begin : gen_lock_in
+    // latch decision in case we got at least one req and no acknowledge
+    assign lock_d         = (|req_i) & ~en_i;
+    assign arb_sel_lock_d = idx_o;
+  end else begin
+    // disable
+    assign lock_d         = '0;
+    assign arb_sel_lock_d = '0;
+  end
+
+  always_ff @(posedge clk_i or negedge rst_ni) begin : p_regs
+    if (!rst_ni) begin
+      lock_q         <= 1'b0;
+      arb_sel_lock_q <= '0;
+    end else begin
+      if (flush_i) begin
+        lock_q         <= 1'b0;
+        arb_sel_lock_q <= '0;
+      end else begin
+        lock_q         <= lock_d;
+        arb_sel_lock_q <= arb_sel_lock_d;
+      end
+    end
+  end
+
+endmodule : prioarbiter
+
+
+
diff --git a/vendor/pulp-platform/common_cells/src/deprecated/pulp_sync.sv b/vendor/pulp-platform/common_cells/src/deprecated/pulp_sync.sv
new file mode 100644
index 0000000000..2b436163e5
--- /dev/null
+++ b/vendor/pulp-platform/common_cells/src/deprecated/pulp_sync.sv
@@ -0,0 +1,36 @@
+// Copyright 2018 ETH Zurich and University of Bologna.
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the "License"); you may not use this file except in
+// compliance with the License. You may obtain a copy of the License at
+// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+
+// Antonio Pullini <pullinia@iis.ee.ethz.ch>
+
+module pulp_sync
+  #(
+    parameter STAGES = 2
+    )
+   (
+    input  logic clk_i,
+    input  logic rstn_i,
+    input  logic serial_i,
+    output logic serial_o
+    );
+   
+   logic [STAGES-1:0] r_reg;
+   
+   always_ff @(posedge clk_i, negedge rstn_i)
+     begin
+	if(!rstn_i)
+          r_reg <= 'h0;
+	else
+          r_reg <= {r_reg[STAGES-2:0], serial_i};
+     end
+   
+   assign serial_o   =  r_reg[STAGES-1];
+   
+endmodule
diff --git a/vendor/pulp-platform/common_cells/src/deprecated/pulp_sync_wedge.sv b/vendor/pulp-platform/common_cells/src/deprecated/pulp_sync_wedge.sv
new file mode 100644
index 0000000000..66cee57d2c
--- /dev/null
+++ b/vendor/pulp-platform/common_cells/src/deprecated/pulp_sync_wedge.sv
@@ -0,0 +1,55 @@
+// Copyright 2018 ETH Zurich and University of Bologna.
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the "License"); you may not use this file except in
+// compliance with the License. You may obtain a copy of the License at
+// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+
+// Antonio Pullini <pullinia@iis.ee.ethz.ch>
+
+module pulp_sync_wedge #(
+    parameter int unsigned STAGES = 2
+) (
+    input  logic clk_i,
+    input  logic rstn_i,
+    input  logic en_i,
+    input  logic serial_i,
+    output logic r_edge_o,
+    output logic f_edge_o,
+    output logic serial_o
+);
+    logic clk;
+    logic serial, serial_q;
+
+    assign serial_o =  serial_q;
+    assign f_edge_o = ~serial &  serial_q;
+    assign r_edge_o =  serial & ~serial_q;
+
+    pulp_sync #(
+        .STAGES(STAGES)
+    ) i_pulp_sync (
+        .clk_i,
+        .rstn_i,
+        .serial_i,
+        .serial_o ( serial )
+    );
+
+    pulp_clock_gating i_pulp_clock_gating (
+        .clk_i,
+        .en_i,
+        .test_en_i ( 1'b0    ),
+        .clk_o     ( clk )
+    );
+
+    always_ff @(posedge clk, negedge rstn_i) begin
+        if (!rstn_i) begin
+            serial_q <= 1'b0;
+        end else begin
+            serial_q <= serial;
+        end
+    end
+
+endmodule
diff --git a/vendor/pulp-platform/common_cells/src/deprecated/rrarbiter.sv b/vendor/pulp-platform/common_cells/src/deprecated/rrarbiter.sv
new file mode 100644
index 0000000000..bf806c5e42
--- /dev/null
+++ b/vendor/pulp-platform/common_cells/src/deprecated/rrarbiter.sv
@@ -0,0 +1,61 @@
+// Copyright 2018 ETH Zurich and University of Bologna.
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the "License"); you may not use this file except in
+// compliance with the License. You may obtain a copy of the License at
+// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+//
+// Author: Michael Schaffner <schaffner@iis.ee.ethz.ch>, ETH Zurich
+// Date: 16.08.2018
+// Description: Fair round robin arbiter with lock feature.
+//
+// The rrarbiter employs fair round robin arbitration - i.e. the priorities
+// rotate each cycle.
+//
+// The lock-in feature prevents the arbiter from changing the arbitration
+// decision when the arbiter is disabled. I.e., the index of the first request
+// that wins the arbitration will be locked until en_i is asserted again.
+//
+// Dependencies: relies on rr_arb_tree from common_cells.
+
+module rrarbiter #(
+  parameter int unsigned NUM_REQ   = 64,
+  parameter bit          LOCK_IN   = 1'b0
+) (
+  input logic                         clk_i,
+  input logic                         rst_ni,
+
+  input logic                         flush_i, // clears arbiter state
+  input logic                         en_i,    // arbiter enable
+  input logic [NUM_REQ-1:0]           req_i,   // request signals
+
+  output logic [NUM_REQ-1:0]          ack_o,   // acknowledge signals
+  output logic                        vld_o,   // request ack'ed
+  output logic [$clog2(NUM_REQ)-1:0]  idx_o    // idx output
+);
+
+  logic req;
+  assign vld_o = (|req_i) & en_i;
+
+  rr_arb_tree #(
+    .NumIn     ( NUM_REQ ),
+    .DataWidth ( 1       ),
+    .LockIn    ( LOCK_IN ))
+  i_rr_arb_tree (
+    .clk_i   ( clk_i      ),
+    .rst_ni  ( rst_ni     ),
+    .flush_i ( flush_i    ),
+    .rr_i    ( '0         ),
+    .req_i   ( req_i      ),
+    .gnt_o   ( ack_o      ),
+    .data_i  ( '0         ),
+    .gnt_i   ( en_i & req ),
+    .req_o   ( req        ),
+    .data_o  (            ),
+    .idx_o   ( idx_o      )
+  );
+
+endmodule : rrarbiter
diff --git a/vendor/pulp-platform/common_cells/src/deprecated/sram.sv b/vendor/pulp-platform/common_cells/src/deprecated/sram.sv
new file mode 100644
index 0000000000..fca1372bfe
--- /dev/null
+++ b/vendor/pulp-platform/common_cells/src/deprecated/sram.sv
@@ -0,0 +1,46 @@
+// Copyright 2017, 2018 ETH Zurich and University of Bologna.
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the "License"); you may not use this file except in
+// compliance with the License.  You may obtain a copy of the License at
+// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+//
+// Date: 13.10.2017
+// Description: SRAM Behavioral Model
+
+module sram #(
+    int unsigned DATA_WIDTH = 64,
+    int unsigned NUM_WORDS  = 1024
+)(
+   input  logic                          clk_i,
+
+   input  logic                          req_i,
+   input  logic                          we_i,
+   input  logic [$clog2(NUM_WORDS)-1:0]  addr_i,
+   input  logic [DATA_WIDTH-1:0]         wdata_i,
+   input  logic [DATA_WIDTH-1:0]         be_i,
+   output logic [DATA_WIDTH-1:0]         rdata_o
+);
+    localparam ADDR_WIDTH = $clog2(NUM_WORDS);
+
+    logic [DATA_WIDTH-1:0] ram [NUM_WORDS-1:0];
+    logic [ADDR_WIDTH-1:0] raddr_q;
+
+    // 1. randomize array
+    // 2. randomize output when no request is active
+    always_ff @(posedge clk_i) begin
+        if (req_i) begin
+            if (!we_i)
+                raddr_q <= addr_i;
+            else
+            for (int i = 0; i < DATA_WIDTH; i++)
+                if (be_i[i]) ram[addr_i][i] <= wdata_i[i];
+        end
+    end
+
+    assign rdata_o = ram[raddr_q];
+
+endmodule
diff --git a/vendor/pulp-platform/common_cells/src/ecc_decode.sv b/vendor/pulp-platform/common_cells/src/ecc_decode.sv
new file mode 100644
index 0000000000..40687e906f
--- /dev/null
+++ b/vendor/pulp-platform/common_cells/src/ecc_decode.sv
@@ -0,0 +1,128 @@
+// Copyright 2020 ETH Zurich and University of Bologna.
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the "License"); you may not use this file except in
+// compliance with the License. You may obtain a copy of the License at
+// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+//
+// Author: Florian Zaruba <zarubaf@iis.ee.ethz.ch>
+//
+/// # ECC Decoder
+///
+/// Implements SECDED (Single Error Correction, Double Error Detection) Hamming Code
+/// with extended parity bit [1].
+/// The module receives a data word including parity bit and decodes it according to the
+/// number of data and parity bit.
+///
+/// 1. If no error has been detected, the syndrome will be zero and all flags will be zero.
+/// 2. If a single error has been detected, the syndrome is non-zero, and `single_error_o` will be
+///    asserted. The output word contains the corrected data.
+/// 3. If the parity bit contained an error, the module will assert `parity_error_o`.
+/// 4. In case of a double fault the syndrome is non-zero, `double_error_o` will be asserted.
+///    All other status flags will be de-asserted.
+///
+/// [1] https://en.wikipedia.org/wiki/Hamming_code
+
+module ecc_decode import ecc_pkg::*; #(
+  /// Data width of unencoded word.
+  parameter  int unsigned DataWidth   = 64,
+  // Do not change
+  parameter type data_t         = logic [DataWidth-1:0],
+  parameter type parity_t       = logic [get_parity_width(DataWidth)-1:0],
+  parameter type code_word_t    = logic [get_cw_width(DataWidth)-1:0],
+  parameter type encoded_data_t = struct packed {
+                                    logic parity;
+                                    code_word_t code_word;
+                                  }
+  ) (
+  /// Encoded data in
+  input  encoded_data_t data_i,
+  /// Corrected data out
+  output data_t         data_o,
+  /// Error syndrome indicates the erroneous bit position
+  output parity_t       syndrome_o,
+  /// A single error occurred
+  output logic          single_error_o,
+  /// Error received in parity bit (MSB)
+  output logic          parity_error_o,
+  /// A double error occurred
+  output logic          double_error_o
+);
+
+  logic       parity;
+  data_t      data_wo_parity;
+  parity_t    syndrome;
+  logic       syndrome_not_zero;
+  code_word_t correct_data;
+
+  // Check parity bit. 0 = parity equal, 1 = different parity
+  assign parity = data_i.parity ^ (^data_i.code_word);
+
+  ///!    | 0  1  2  3  4  5  6  7  8  9 10 11 12  13  14
+  ///!    |p1 p2 d1 p4 d2 d3 d4 p8 d5 d6 d7 d8 d9 d10 d11
+  ///! ---|----------------------------------------------
+  ///! p1 | x     x     x     x     x     x     x       x
+  ///! p2 |    x  x        x  x        x  x         x   x
+  ///! p4 |          x  x  x  x              x  x   x   x
+  ///! p8 |                      x  x  x  x  x  x   x   x
+
+  ///! 1. Parity bit 1 covers all bit positions which have the least significant bit
+  ///!    set: bit 1 (the parity bit itself), 3, 5, 7, 9, etc.
+  ///! 2. Parity bit 2 covers all bit positions which have the second least
+  ///!    significant bit set: bit 2 (the parity bit itself), 3, 6, 7, 10, 11, etc.
+  ///! 3. Parity bit 4 covers all bit positions which have the third least
+  ///!    significant bit set: bits 4–7, 12–15, 20–23, etc.
+  ///! 4. Parity bit 8 covers all bit positions which have the fourth least
+  ///!    significant bit set: bits 8–15, 24–31, 40–47, etc.
+  ///! 5. In general each parity bit covers all bits where the bitwise AND of the
+  ///!    parity position and the bit position is non-zero.
+  always_comb begin : calculate_syndrome
+    syndrome = 0;
+    for (int unsigned i = 0; i < unsigned'($bits(parity_t)); i++) begin
+      for (int unsigned j = 0; j < unsigned'($bits(code_word_t)); j++) begin
+        if (|(unsigned'(2**i) & (j + 1))) syndrome[i] = syndrome[i] ^ data_i.code_word[j];
+      end
+    end
+  end
+
+  assign syndrome_not_zero = |syndrome;
+
+  // correct the data word if the syndrome is non-zero
+  always_comb begin
+    correct_data = data_i.code_word;
+    if (syndrome_not_zero) begin
+      correct_data[syndrome - 1] = ~data_i.code_word[syndrome - 1];
+    end
+  end
+
+  ///! Syndrome | Overall Parity (MSB) | Error Type   | Notes
+  ///! --------------------------------------------------------
+  ///! 0        | 0                    | No Error     |
+  ///! /=0      | 1                    | Single Error | Correctable. Syndrome holds incorrect bit position.
+  ///! 0        | 1                    | Parity Error | Overall parity, MSB is in error and can be corrected.
+  ///! /=0      | 0                    | Double Error | Not correctable.
+  assign single_error_o = parity & syndrome_not_zero;
+  assign parity_error_o = parity & ~syndrome_not_zero;
+  assign double_error_o = ~parity & syndrome_not_zero;
+
+  // Extract data vector
+  always_comb begin
+    automatic int unsigned idx; // bit index
+    data_wo_parity = '0;
+    idx = 0;
+
+    for (int unsigned i = 1; i < unsigned'($bits(code_word_t)) + 1; i++) begin
+      // if i is a power of two we are indexing a parity bit
+      if (unsigned'(2**$clog2(i)) != i) begin
+        data_wo_parity[idx] = correct_data[i - 1];
+        idx++;
+      end
+    end
+  end
+
+  assign data_o = data_wo_parity;
+
+endmodule
diff --git a/vendor/pulp-platform/common_cells/src/ecc_encode.sv b/vendor/pulp-platform/common_cells/src/ecc_encode.sv
new file mode 100644
index 0000000000..8669a082ed
--- /dev/null
+++ b/vendor/pulp-platform/common_cells/src/ecc_encode.sv
@@ -0,0 +1,78 @@
+// Copyright 2020 ETH Zurich and University of Bologna.
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the "License"); you may not use this file except in
+// compliance with the License. You may obtain a copy of the License at
+// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+//
+// Author: Florian Zaruba <zarubaf@iis.ee.ethz.ch>
+/// # ECC Encoder
+///
+/// Implements SECDED (Single Error Correction, Double Error Detection) Hamming Code
+/// with extended parity bit [1].
+/// The module receives a data word and encodes it using above mentioned error
+/// detection and correction code. The corresponding decode module
+/// can be found in `ecc_decode.sv`
+///
+/// [1] https://en.wikipedia.org/wiki/Hamming_code
+
+module ecc_encode import ecc_pkg::*; #(
+  /// Data width of unencoded word.
+  parameter  int unsigned DataWidth   = 64,
+  // Do not change
+  parameter type data_t         = logic [DataWidth-1:0],
+  parameter type parity_t       = logic [get_parity_width(DataWidth)-1:0],
+  parameter type code_word_t    = logic [get_cw_width(DataWidth)-1:0],
+  parameter type encoded_data_t = struct packed {
+                                    logic parity;
+                                    code_word_t code_word;
+                                  }
+) (
+  /// Unencoded data in
+  input  data_t         data_i,
+  /// Encoded data out
+  output encoded_data_t data_o
+);
+
+  parity_t parity_code_word;
+  code_word_t data, codeword;
+
+  // Expand incoming data to codeword width
+  always_comb begin : expand_data
+    automatic int unsigned idx;
+    data = '0;
+    idx = 0;
+    for (int unsigned i = 1; i < unsigned'($bits(code_word_t)) + 1; i++) begin
+      // if it is not a power of two word it is a normal data index
+      if (unsigned'(2**$clog2(i)) != i) begin
+        data[i - 1] = data_i[idx];
+        idx++;
+      end
+    end
+  end
+
+  // calculate code word
+  always_comb begin : calculate_syndrome
+    parity_code_word = 0;
+    for (int unsigned i = 0; i < unsigned'($bits(parity_t)); i++) begin
+      for (int unsigned j = 1; j < unsigned'($bits(code_word_t)) + 1; j++) begin
+        if (|(unsigned'(2**i) & j)) parity_code_word[i] = parity_code_word[i] ^ data[j - 1];
+      end
+    end
+  end
+
+  // fuse the final codeword
+  always_comb begin : generate_codeword
+      codeword = data;
+      for (int unsigned i = 0; i < unsigned'($bits(parity_t)); i++) begin
+        codeword[2**i-1] = parity_code_word[i];
+      end
+  end
+
+  assign data_o.code_word = codeword;
+  assign data_o.parity = ^codeword;
+
+endmodule
diff --git a/vendor/pulp-platform/common_cells/src/ecc_pkg.sv b/vendor/pulp-platform/common_cells/src/ecc_pkg.sv
new file mode 100644
index 0000000000..fde9f782b9
--- /dev/null
+++ b/vendor/pulp-platform/common_cells/src/ecc_pkg.sv
@@ -0,0 +1,31 @@
+// Copyright 2020 ETH Zurich and University of Bologna.
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the "License"); you may not use this file except in
+// compliance with the License. You may obtain a copy of the License at
+// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+//
+// Author: Florian Zaruba <zarubaf@iis.ee.ethz.ch>
+//
+/// Contains common ECC definitions and helper functions.
+
+package ecc_pkg;
+
+  // Calculate required ECC parity width:
+  function automatic int unsigned get_parity_width (input int unsigned data_width);
+    // data_width + cw_width + 1 <= 2**cw_width
+    int unsigned cw_width = 2;
+    while (unsigned'(2**cw_width) < cw_width + data_width + 1) cw_width++;
+    return cw_width;
+  endfunction
+
+  // Calculate required ECC codeword width:
+  function automatic int unsigned get_cw_width (input int unsigned data_width);
+    // data width + parity width + one additional parity bit (for double error detection)
+    return data_width + get_parity_width(data_width);
+  endfunction
+
+endpackage
diff --git a/vendor/pulp-platform/common_cells/src/edge_detect.sv b/vendor/pulp-platform/common_cells/src/edge_detect.sv
new file mode 100644
index 0000000000..c6453ba519
--- /dev/null
+++ b/vendor/pulp-platform/common_cells/src/edge_detect.sv
@@ -0,0 +1,32 @@
+// Copyright 2018 ETH Zurich and University of Bologna.
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the "License"); you may not use this file except in
+// compliance with the License. You may obtain a copy of the License at
+// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+
+// Author: Florian Zaruba
+// Description: Edge detector, clock needs to oversample for proper edge detection
+
+module edge_detect (
+    input  logic clk_i,   // Clock
+    input  logic rst_ni,  // Asynchronous reset active low
+    input  logic d_i,     // data stream in
+    output logic re_o,    // rising edge detected
+    output logic fe_o     // falling edge detected
+);
+
+    sync_wedge i_sync_wedge (
+        .clk_i    ( clk_i  ),
+        .rst_ni   ( rst_ni ),
+        .en_i     ( 1'b1   ),
+        .serial_i ( d_i    ),
+        .r_edge_o ( re_o   ),
+        .f_edge_o ( fe_o   ),
+        .serial_o (        )
+    );
+
+endmodule
diff --git a/vendor/pulp-platform/common_cells/src/edge_propagator.sv b/vendor/pulp-platform/common_cells/src/edge_propagator.sv
new file mode 100644
index 0000000000..2e27283111
--- /dev/null
+++ b/vendor/pulp-platform/common_cells/src/edge_propagator.sv
@@ -0,0 +1,50 @@
+// Copyright 2018 ETH Zurich and University of Bologna.
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the "License"); you may not use this file except in
+// compliance with the License. You may obtain a copy of the License at
+// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+
+// Antonio Pullini <pullinia@iis.ee.ethz.ch>
+
+module edge_propagator (
+    input  logic clk_tx_i,
+    input  logic rstn_tx_i,
+    input  logic edge_i,
+    input  logic clk_rx_i,
+    input  logic rstn_rx_i,
+    output logic edge_o
+);
+
+    logic [1:0] sync_a;
+    logic       sync_b;
+
+    logic r_input_reg;
+    logic s_input_reg_next;
+
+    assign s_input_reg_next = edge_i | (r_input_reg & (~sync_a[0]));
+
+    always @(negedge rstn_tx_i or posedge clk_tx_i) begin
+        if (~rstn_tx_i) begin
+            r_input_reg <= 1'b0;
+            sync_a      <= 2'b00;
+        end else begin
+            r_input_reg <= s_input_reg_next;
+            sync_a      <= {sync_b,sync_a[1]};
+        end
+    end
+
+    pulp_sync_wedge i_sync_clkb (
+        .clk_i    ( clk_rx_i     ),
+        .rstn_i   ( rstn_rx_i    ),
+        .en_i     ( 1'b1         ),
+        .serial_i ( r_input_reg  ),
+        .r_edge_o ( edge_o       ),
+        .f_edge_o (              ),
+        .serial_o ( sync_b       )
+    );
+
+endmodule
diff --git a/vendor/pulp-platform/common_cells/src/edge_propagator_rx.sv b/vendor/pulp-platform/common_cells/src/edge_propagator_rx.sv
new file mode 100644
index 0000000000..89532cc27c
--- /dev/null
+++ b/vendor/pulp-platform/common_cells/src/edge_propagator_rx.sv
@@ -0,0 +1,31 @@
+// Copyright 2018 ETH Zurich and University of Bologna.
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the "License"); you may not use this file except in
+// compliance with the License. You may obtain a copy of the License at
+// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+
+// Antonio Pullini <pullinia@iis.ee.ethz.ch>
+
+module edge_propagator_rx (
+    input  logic clk_i,
+    input  logic rstn_i,
+    input  logic valid_i,
+    output logic ack_o,
+    output logic valid_o
+);
+
+    pulp_sync_wedge i_sync_clkb (
+        .clk_i    ( clk_i   ),
+        .rstn_i   ( rstn_i  ),
+        .en_i     ( 1'b1    ),
+        .serial_i ( valid_i ),
+        .r_edge_o ( valid_o ),
+        .f_edge_o (         ),
+        .serial_o ( ack_o   )
+    );
+
+endmodule
diff --git a/vendor/pulp-platform/common_cells/src/edge_propagator_tx.sv b/vendor/pulp-platform/common_cells/src/edge_propagator_tx.sv
new file mode 100644
index 0000000000..0274a43333
--- /dev/null
+++ b/vendor/pulp-platform/common_cells/src/edge_propagator_tx.sv
@@ -0,0 +1,40 @@
+// Copyright 2018 ETH Zurich and University of Bologna.
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the "License"); you may not use this file except in
+// compliance with the License. You may obtain a copy of the License at
+// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+
+// Antonio Pullini <pullinia@iis.ee.ethz.ch>
+
+module edge_propagator_tx (
+    input  logic clk_i,
+    input  logic rstn_i,
+    input  logic valid_i,
+    input  logic ack_i,
+    output logic valid_o
+);
+
+    logic [1:0]   sync_a;
+
+    logic    r_input_reg;
+    logic    s_input_reg_next;
+
+    assign s_input_reg_next = valid_i | (r_input_reg & ~sync_a[0]);
+
+    always @(negedge rstn_i or posedge clk_i) begin
+        if (~rstn_i) begin
+            r_input_reg <= 1'b0;
+            sync_a      <= 2'b00;
+        end else begin
+            r_input_reg <= s_input_reg_next;
+            sync_a      <= {ack_i,sync_a[1]};
+        end
+    end
+
+    assign valid_o = r_input_reg;
+
+endmodule
diff --git a/vendor/pulp-platform/common_cells/src/exp_backoff.sv b/vendor/pulp-platform/common_cells/src/exp_backoff.sv
new file mode 100644
index 0000000000..91dccb075c
--- /dev/null
+++ b/vendor/pulp-platform/common_cells/src/exp_backoff.sv
@@ -0,0 +1,98 @@
+// Copyright 2019 ETH Zurich and University of Bologna.
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the "License"); you may not use this file except in
+// compliance with the License.  You may obtain a copy of the License at
+// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+//
+// Author: Michael Schaffner <schaffner@iis.ee.ethz.ch>, ETH Zurich
+// Date: 10.04.2019
+// Description: exponential backoff counter with randomization.
+//
+// For each failed trial (set_i pulsed), this unit exponentially increases the
+// (average) backoff time by masking an LFSR with a shifted mask in order to
+// create the backoff counter initial value.
+//
+// The shift register mask and the counter value are both reset to '0 in case of
+// a successful trial (clr_i).
+//
+
+module exp_backoff #(
+  /// Seed for 16bit LFSR
+  parameter int unsigned Seed   = 'hffff,
+  /// 2**MaxExp-1 determines the maximum range from which random wait counts are drawn
+  parameter int unsigned MaxExp = 16
+) (
+  input  logic clk_i,
+  input  logic rst_ni,
+  /// Sets the backoff counter (pulse) -> use when trial did not succeed
+  input  logic set_i,
+  /// Clears the backoff counter (pulse) -> use when trial succeeded
+  input  logic clr_i,
+  /// Indicates whether the backoff counter is equal to zero and a new trial can be launched
+  output logic is_zero_o
+);
+
+  // leave this constant
+  localparam int unsigned WIDTH = 16;
+
+  logic [WIDTH-1:0] lfsr_d, lfsr_q, cnt_d, cnt_q, mask_d, mask_q;
+  logic lfsr;
+
+  // generate random wait counts
+  // note: we use a flipped lfsr here to
+  // avoid strange correlation effects between
+  // the (left-shifted) mask and the lfsr
+  assign lfsr = lfsr_q[15-15] ^
+                lfsr_q[15-13] ^
+                lfsr_q[15-12] ^
+                lfsr_q[15-10];
+
+  assign lfsr_d = (set_i) ? {lfsr, lfsr_q[$high(lfsr_q):1]} :
+                            lfsr_q;
+
+  // mask the wait counts with exponentially increasing mask (shift reg)
+  assign mask_d = (clr_i) ? '0                                :
+                  (set_i) ? {{(WIDTH-MaxExp){1'b0}},mask_q[MaxExp-2:0], 1'b1} :
+                            mask_q;
+
+  assign cnt_d =  (clr_i)      ? '0                :
+                  (set_i)      ? (mask_q & lfsr_q) :
+                  (!is_zero_o) ? cnt_q - 1'b1      : '0;
+
+  assign is_zero_o = (cnt_q=='0);
+
+  always_ff @(posedge clk_i or negedge rst_ni) begin : p_regs
+    if (!rst_ni) begin
+      lfsr_q <= WIDTH'(Seed);
+      mask_q <= '0;
+      cnt_q  <= '0;
+    end else begin
+      lfsr_q <= lfsr_d;
+      mask_q <= mask_d;
+      cnt_q  <= cnt_d;
+    end
+  end
+
+///////////////////////////////////////////////////////
+// assertions
+///////////////////////////////////////////////////////
+
+//pragma translate_off
+`ifndef VERILATOR
+  initial begin
+    // assert wrong parameterizations
+    assert (MaxExp>0)
+      else $fatal(1,"MaxExp must be greater than 0");
+    assert (MaxExp<=16)
+      else $fatal(1,"MaxExp cannot be greater than 16");
+    assert (Seed>0)
+      else $fatal(1,"Zero seed is not allowed for LFSR");
+  end
+`endif
+//pragma translate_on
+
+endmodule // exp_backoff
diff --git a/vendor/pulp-platform/common_cells/src/fall_through_register.sv b/vendor/pulp-platform/common_cells/src/fall_through_register.sv
new file mode 100644
index 0000000000..fcbbe31dbc
--- /dev/null
+++ b/vendor/pulp-platform/common_cells/src/fall_through_register.sv
@@ -0,0 +1,58 @@
+// Copyright 2018 ETH Zurich and University of Bologna.
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the "License"); you may not use this file except in
+// compliance with the License. You may obtain a copy of the License at
+// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+
+// Fall-through register with a simple stream-like ready/valid handshake.
+// This register does not cut combinatorial paths on any signals: in case the module at its output
+// is ready to accept data within the same clock cycle, they are forwarded. Use this module to get a
+// 'default ready' behavior towards the input.
+module fall_through_register #(
+    parameter type T = logic  // Vivado requires a default value for type parameters.
+) (
+    input  logic    clk_i,          // Clock
+    input  logic    rst_ni,         // Asynchronous active-low reset
+    input  logic    clr_i,          // Synchronous clear
+    input  logic    testmode_i,     // Test mode to bypass clock gating
+    // Input port
+    input  logic    valid_i,
+    output logic    ready_o,
+    input  T        data_i,
+    // Output port
+    output logic    valid_o,
+    input  logic    ready_i,
+    output T        data_o
+);
+
+    logic   fifo_empty,
+            fifo_full;
+
+    fifo_v2 #(
+        .FALL_THROUGH   (1'b1),
+        .DATA_WIDTH     ($size(T)),
+        .DEPTH          (1),
+        .dtype          (T)
+    ) i_fifo (
+        .clk_i          (clk_i),
+        .rst_ni         (rst_ni),
+        .flush_i        (clr_i),
+        .testmode_i     (testmode_i),
+        .full_o         (fifo_full),
+        .empty_o        (fifo_empty),
+        .alm_full_o     ( ),
+        .alm_empty_o    ( ),
+        .data_i         (data_i),
+        .push_i         (valid_i & ~fifo_full),
+        .data_o         (data_o),
+        .pop_i          (ready_i & ~fifo_empty)
+    );
+
+    assign ready_o = ~fifo_full;
+    assign valid_o = ~fifo_empty;
+
+endmodule
diff --git a/vendor/pulp-platform/common_cells/src/fifo_v3.sv b/vendor/pulp-platform/common_cells/src/fifo_v3.sv
new file mode 100644
index 0000000000..e417a3e7b0
--- /dev/null
+++ b/vendor/pulp-platform/common_cells/src/fifo_v3.sv
@@ -0,0 +1,154 @@
+// Copyright 2018 ETH Zurich and University of Bologna.
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the "License"); you may not use this file except in
+// compliance with the License. You may obtain a copy of the License at
+// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+
+// Author: Florian Zaruba <zarubaf@iis.ee.ethz.ch>
+
+module fifo_v3 #(
+    parameter bit          FALL_THROUGH = 1'b0, // fifo is in fall-through mode
+    parameter int unsigned DATA_WIDTH   = 32,   // default data width if the fifo is of type logic
+    parameter int unsigned DEPTH        = 8,    // depth can be arbitrary from 0 to 2**32
+    parameter type dtype                = logic [DATA_WIDTH-1:0],
+    // DO NOT OVERWRITE THIS PARAMETER
+    parameter int unsigned ADDR_DEPTH   = (DEPTH > 1) ? $clog2(DEPTH) : 1
+)(
+    input  logic  clk_i,            // Clock
+    input  logic  rst_ni,           // Asynchronous reset active low
+    input  logic  flush_i,          // flush the queue
+    input  logic  testmode_i,       // test_mode to bypass clock gating
+    // status flags
+    output logic  full_o,           // queue is full
+    output logic  empty_o,          // queue is empty
+    output logic  [ADDR_DEPTH-1:0] usage_o,  // fill pointer
+    // as long as the queue is not full we can push new data
+    input  dtype  data_i,           // data to push into the queue
+    input  logic  push_i,           // data is valid and can be pushed to the queue
+    // as long as the queue is not empty we can pop new elements
+    output dtype  data_o,           // output data
+    input  logic  pop_i             // pop head from queue
+);
+    // local parameter
+    // FIFO depth - handle the case of pass-through, synthesizer will do constant propagation
+    localparam int unsigned FifoDepth = (DEPTH > 0) ? DEPTH : 1;
+    // clock gating control
+    logic gate_clock;
+    // pointer to the read and write section of the queue
+    logic [ADDR_DEPTH - 1:0] read_pointer_n, read_pointer_q, write_pointer_n, write_pointer_q;
+    // keep a counter to keep track of the current queue status
+    // this integer will be truncated by the synthesis tool
+    logic [ADDR_DEPTH:0] status_cnt_n, status_cnt_q;
+    // actual memory
+    dtype [FifoDepth - 1:0] mem_n, mem_q;
+
+    assign usage_o = status_cnt_q[ADDR_DEPTH-1:0];
+
+    if (DEPTH == 0) begin : gen_pass_through
+        assign empty_o     = ~push_i;
+        assign full_o      = ~pop_i;
+    end else begin : gen_fifo
+        assign full_o       = (status_cnt_q == FifoDepth[ADDR_DEPTH:0]);
+        assign empty_o      = (status_cnt_q == 0) & ~(FALL_THROUGH & push_i);
+    end
+    // status flags
+
+    // read and write queue logic
+    always_comb begin : read_write_comb
+        // default assignment
+        read_pointer_n  = read_pointer_q;
+        write_pointer_n = write_pointer_q;
+        status_cnt_n    = status_cnt_q;
+        data_o          = (DEPTH == 0) ? data_i : mem_q[read_pointer_q];
+        mem_n           = mem_q;
+        gate_clock      = 1'b1;
+
+        // push a new element to the queue
+        if (push_i && ~full_o) begin
+            // push the data onto the queue
+            mem_n[write_pointer_q] = data_i;
+            // un-gate the clock, we want to write something
+            gate_clock = 1'b0;
+            // increment the write counter
+            if (write_pointer_q == FifoDepth[ADDR_DEPTH-1:0] - 1)
+                write_pointer_n = '0;
+            else
+                write_pointer_n = write_pointer_q + 1;
+            // increment the overall counter
+            status_cnt_n    = status_cnt_q + 1;
+        end
+
+        if (pop_i && ~empty_o) begin
+            // read from the queue is a default assignment
+            // but increment the read pointer...
+            if (read_pointer_n == FifoDepth[ADDR_DEPTH-1:0] - 1)
+                read_pointer_n = '0;
+            else
+                read_pointer_n = read_pointer_q + 1;
+            // ... and decrement the overall count
+            status_cnt_n   = status_cnt_q - 1;
+        end
+
+        // keep the count pointer stable if we push and pop at the same time
+        if (push_i && pop_i &&  ~full_o && ~empty_o)
+            status_cnt_n   = status_cnt_q;
+
+        // FIFO is in pass through mode -> do not change the pointers
+        if (FALL_THROUGH && (status_cnt_q == 0) && push_i) begin
+            data_o = data_i;
+            if (pop_i) begin
+                status_cnt_n = status_cnt_q;
+                read_pointer_n = read_pointer_q;
+                write_pointer_n = write_pointer_q;
+            end
+        end
+    end
+
+    // sequential process
+    always_ff @(posedge clk_i or negedge rst_ni) begin
+        if(~rst_ni) begin
+            read_pointer_q  <= '0;
+            write_pointer_q <= '0;
+            status_cnt_q    <= '0;
+        end else begin
+            if (flush_i) begin
+                read_pointer_q  <= '0;
+                write_pointer_q <= '0;
+                status_cnt_q    <= '0;
+             end else begin
+                read_pointer_q  <= read_pointer_n;
+                write_pointer_q <= write_pointer_n;
+                status_cnt_q    <= status_cnt_n;
+            end
+        end
+    end
+
+    always_ff @(posedge clk_i or negedge rst_ni) begin
+        if(~rst_ni) begin
+            mem_q <= '0;
+        end else if (!gate_clock) begin
+            mem_q <= mem_n;
+        end
+    end
+
+// pragma translate_off
+`ifndef VERILATOR
+    initial begin
+        assert (DEPTH > 0)             else $error("DEPTH must be greater than 0.");
+    end
+
+    full_write : assert property(
+        @(posedge clk_i) disable iff (~rst_ni) (full_o |-> ~push_i))
+        else $fatal (1, "Trying to push new data although the FIFO is full.");
+
+    empty_read : assert property(
+        @(posedge clk_i) disable iff (~rst_ni) (empty_o |-> ~pop_i))
+        else $fatal (1, "Trying to pop data although the FIFO is empty.");
+`endif
+// pragma translate_on
+
+endmodule // fifo_v3
diff --git a/vendor/pulp-platform/common_cells/src/gray_to_binary.sv b/vendor/pulp-platform/common_cells/src/gray_to_binary.sv
new file mode 100644
index 0000000000..b1ad46f1ef
--- /dev/null
+++ b/vendor/pulp-platform/common_cells/src/gray_to_binary.sv
@@ -0,0 +1,23 @@
+// Copyright 2018 ETH Zurich and University of Bologna.
+//
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the "License"); you may not use this file except in
+// compliance with the License. You may obtain a copy of the License at
+// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+//
+// Fabian Schuiki <fschuiki@iis.ee.ethz.ch>
+
+/// A gray code to binary converter.
+module gray_to_binary #(
+    parameter int N = -1
+)(
+    input  logic [N-1:0] A,
+    output logic [N-1:0] Z
+);
+    for (genvar i = 0; i < N; i++)
+        assign Z[i] = ^A[N-1:i];
+endmodule
diff --git a/vendor/pulp-platform/common_cells/src/id_queue.sv b/vendor/pulp-platform/common_cells/src/id_queue.sv
new file mode 100644
index 0000000000..2ba347e30a
--- /dev/null
+++ b/vendor/pulp-platform/common_cells/src/id_queue.sv
@@ -0,0 +1,419 @@
+// Copyright 2018 ETH Zurich and University of Bologna.
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the "License"); you may not use this file except in
+// compliance with the License. You may obtain a copy of the License at
+// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+
+// ID Queue
+//
+// In an ID queue, every element has a numeric ID. Among all elements that have the same ID, the ID
+// queue preserves FIFO order.
+//
+// This ID queue implementation allows to either push (through the `inp_*` signals) or pop (through
+// the `oup_*` signals) one element per clock cycle (depending on the _FULL_BW_ operating mode
+// descibed below). The `inp_` port has priority and grants a request iff the queue is not full. The
+// `oup_` port dequeues an element iff `oup_pop_i` is asserted during an `oup_` handshake;
+// otherwise, it performs a non-destructive read. `oup_data_o` is valid iff `oup_data_valid_o` is
+// asserted during an `oup_` handshake. If `oup_data_valid_o` is not asserted, the queue did not
+// contain an element with the provided ID.
+//
+// The queue can work in two bandwidth modes:
+//  * !FULL_BW: Input and output cannot be performed simultaneously (max bandwidth: 50%).
+//  *  FULL_BW: Input and output can be performed simultaneously and a popped cell can be reused
+//    immediately in the same clock cycle. Area increase typically 5-10%.
+//
+// This ID queue additionally provides the `exists_` port, which searches for an element anywhere in
+// the queue. The comparison performed during the search can be masked: for every bit that is
+// asserted in `exists_mask_i`, the corresponding bit in the queue element and in `exists_data_i`
+// must be equal for a match; the other bits are not compared. If masking is not required, tie
+// `exists_mask_i_ to `'1` and the synthesizer should simplify the comparisons to unmasked ones. The
+// `exists_` port operates independently of the `inp_` and `oup_` ports. If the `exists_` port is
+// unused, tie `exists_req_i` to `1'b0` and the synthesizer should remove the internal comparators.
+//
+// This ID queue can store at most `CAPACITY` elements, independent of their ID. Let
+// - C = `CAPACITY`
+// - B = $bits(data_t)
+// - I = 2**`ID_WIDTH`
+// Then
+// - the queue element storage requires O(C * (B + log2(C))) bit
+// - the ID table requires O(H * log2(C)) bit, where H = min(C, I)
+//
+// Maintainers:
+// - Andreas Kurth <akurth@iis.ee.ethz.ch>
+
+module id_queue #(
+    parameter int ID_WIDTH  = 0,
+    parameter int CAPACITY  = 0,
+    parameter bit FULL_BW   = 0,
+    parameter type data_t   = logic,
+    // Dependent parameters, DO NOT OVERRIDE!
+    localparam type id_t    = logic[ID_WIDTH-1:0],
+    localparam type mask_t  = logic[$bits(data_t)-1:0]
+) (
+    input  logic    clk_i,
+    input  logic    rst_ni,
+
+    input  id_t     inp_id_i,
+    input  data_t   inp_data_i,
+    input  logic    inp_req_i,
+    output logic    inp_gnt_o,
+
+    input  data_t   exists_data_i,
+    input  mask_t   exists_mask_i,
+    input  logic    exists_req_i,
+    output logic    exists_o,
+    output logic    exists_gnt_o,
+
+    input  id_t     oup_id_i,
+    input  logic    oup_pop_i,
+    input  logic    oup_req_i,
+    output data_t   oup_data_o,
+    output logic    oup_data_valid_o,
+    output logic    oup_gnt_o
+);
+
+    // Capacity of the head-tail table, which associates an ID with corresponding head and tail
+    // indices.
+    localparam int NIds = 2**ID_WIDTH;
+    localparam int HtCapacity = (NIds <= CAPACITY) ? NIds : CAPACITY;
+    localparam int unsigned HtIdxWidth = cf_math_pkg::idx_width(HtCapacity);
+    localparam int unsigned LdIdxWidth = cf_math_pkg::idx_width(CAPACITY);
+
+    // Type for indexing the head-tail table.
+    typedef logic [HtIdxWidth-1:0] ht_idx_t;
+
+    // Type for indexing the lined data table.
+    typedef logic [LdIdxWidth-1:0] ld_idx_t;
+
+    // Type of an entry in the head-tail table.
+    typedef struct packed {
+        id_t        id;
+        ld_idx_t    head,
+                    tail;
+        logic       free;
+    } head_tail_t;
+
+    // Type of an entry in the linked data table.
+    typedef struct packed {
+        data_t      data;
+        ld_idx_t    next;
+        logic       free;
+    } linked_data_t;
+
+    head_tail_t [HtCapacity-1:0]    head_tail_d,    head_tail_q;
+
+    linked_data_t [CAPACITY-1:0]    linked_data_d,  linked_data_q;
+
+    logic                           full,
+                                    match_in_id_valid,
+                                    match_out_id_valid,
+                                    no_in_id_match,
+                                    no_out_id_match;
+
+    logic [HtCapacity-1:0]         head_tail_free,
+                                    idx_matches_in_id,
+                                    idx_matches_out_id;
+
+    logic [CAPACITY-1:0]            exists_match,
+                                    linked_data_free;
+
+    id_t                            match_in_id, match_out_id;
+
+    ht_idx_t                        head_tail_free_idx,
+                                    match_in_idx,
+                                    match_out_idx;
+
+    ld_idx_t                        linked_data_free_idx,
+                                    oup_data_free_idx;
+
+    logic                           oup_data_popped,
+                                    oup_ht_popped;
+
+    // Find the index in the head-tail table that matches a given ID.
+    for (genvar i = 0; i < HtCapacity; i++) begin: gen_idx_match
+        assign idx_matches_in_id[i] = match_in_id_valid && (head_tail_q[i].id == match_in_id) &&
+                !head_tail_q[i].free;
+        assign idx_matches_out_id[i] = match_out_id_valid && (head_tail_q[i].id == match_out_id) &&
+                !head_tail_q[i].free;
+    end
+    assign no_in_id_match = !(|idx_matches_in_id);
+    assign no_out_id_match = !(|idx_matches_out_id);
+    onehot_to_bin #(
+        .ONEHOT_WIDTH ( HtCapacity )
+    ) i_id_ohb_in (
+        .onehot ( idx_matches_in_id ),
+        .bin    ( match_in_idx      )
+    );
+    onehot_to_bin #(
+        .ONEHOT_WIDTH ( HtCapacity )
+    ) i_id_ohb_out (
+        .onehot ( idx_matches_out_id ),
+        .bin    ( match_out_idx      )
+    );
+
+    // Find the first free index in the head-tail table.
+    for (genvar i = 0; i < HtCapacity; i++) begin: gen_head_tail_free
+        assign head_tail_free[i] = head_tail_q[i].free;
+    end
+    lzc #(
+        .WIDTH ( HtCapacity ),
+        .MODE  ( 0          ) // Start at index 0.
+    ) i_ht_free_lzc (
+        .in_i    ( head_tail_free     ),
+        .cnt_o   ( head_tail_free_idx ),
+        .empty_o (                    )
+    );
+
+    // Find the first free index in the linked data table.
+    for (genvar i = 0; i < CAPACITY; i++) begin: gen_linked_data_free
+        assign linked_data_free[i] = linked_data_q[i].free;
+    end
+    lzc #(
+        .WIDTH ( CAPACITY ),
+        .MODE  ( 0        ) // Start at index 0.
+    ) i_ld_free_lzc (
+        .in_i    ( linked_data_free     ),
+        .cnt_o   ( linked_data_free_idx ),
+        .empty_o (                      )
+    );
+
+    // The queue is full if and only if there are no free items in the linked data structure.
+    assign full = !(|linked_data_free);
+    // Data potentially freed by the output.
+    assign oup_data_free_idx = head_tail_q[match_out_idx].head;
+
+    // Data can be accepted if the linked list pool is not full, or some data is simultaneously.
+    assign inp_gnt_o = ~full || (oup_data_popped && FULL_BW);
+    always_comb begin
+        match_in_id         = '0;
+        match_out_id        = '0;
+        match_in_id_valid   = 1'b0;
+        match_out_id_valid  = 1'b0;
+        head_tail_d         = head_tail_q;
+        linked_data_d       = linked_data_q;
+        oup_gnt_o           = 1'b0;
+        oup_data_o          = data_t'('0);
+        oup_data_valid_o    = 1'b0;
+        oup_data_popped     = 1'b0;
+        oup_ht_popped       = 1'b0;
+
+        if (!FULL_BW) begin
+            if (inp_req_i && !full) begin
+                match_in_id = inp_id_i;
+                match_in_id_valid = 1'b1;
+                // If the ID does not yet exist in the queue, add a new ID entry.
+                if (no_in_id_match) begin
+                    head_tail_d[head_tail_free_idx] = '{
+                        id: inp_id_i,
+                        head: linked_data_free_idx,
+                        tail: linked_data_free_idx,
+                        free: 1'b0
+                    };
+                // Otherwise append it to the existing ID subqueue.
+                end else begin
+                    linked_data_d[head_tail_q[match_in_idx].tail].next = linked_data_free_idx;
+                    head_tail_d[match_in_idx].tail = linked_data_free_idx;
+                end
+                linked_data_d[linked_data_free_idx] = '{
+                    data: inp_data_i,
+                    next: '0,
+                    free: 1'b0
+                };
+            end else if (oup_req_i) begin
+                match_in_id = oup_id_i;
+                match_in_id_valid = 1'b1;
+                if (!no_in_id_match) begin
+                    oup_data_o = data_t'(linked_data_q[head_tail_q[match_in_idx].head].data);
+                    oup_data_valid_o = 1'b1;
+                    if (oup_pop_i) begin
+                        // Set free bit of linked data entry, all other bits are don't care.
+                        linked_data_d[head_tail_q[match_in_idx].head]      = '0;
+                        linked_data_d[head_tail_q[match_in_idx].head][0]   = 1'b1;
+                        if (head_tail_q[match_in_idx].head == head_tail_q[match_in_idx].tail) begin
+                            head_tail_d[match_in_idx] = '{free: 1'b1, default: '0};
+                        end else begin
+                            head_tail_d[match_in_idx].head =
+                                    linked_data_q[head_tail_q[match_in_idx].head].next;
+                        end
+                    end
+                end
+                // Always grant the output request.  If there was no match, the default, invalid entry
+                // will be returned.
+                oup_gnt_o = 1'b1;
+            end
+        end else begin
+            // FULL_BW
+            if (oup_req_i) begin
+                match_out_id = oup_id_i;
+                match_out_id_valid = 1'b1;
+                if (!no_out_id_match) begin
+                    oup_data_o = data_t'(linked_data_q[head_tail_q[match_out_idx].head].data);
+                    oup_data_valid_o = 1'b1;
+                    if (oup_pop_i) begin
+                        oup_data_popped = 1'b1;
+                        // Set free bit of linked data entry, all other bits are don't care.
+                        linked_data_d[head_tail_q[match_out_idx].head]      = '0;
+                        linked_data_d[head_tail_q[match_out_idx].head][0]   = 1'b1;
+                        if (head_tail_q[match_out_idx].head
+                                          == head_tail_q[match_out_idx].tail) begin
+                            oup_ht_popped = 1'b1;
+                            head_tail_d[match_out_idx] = '{free: 1'b1, default: '0};
+                        end else begin
+                            head_tail_d[match_out_idx].head =
+                                    linked_data_q[head_tail_q[match_out_idx].head].next;
+                        end
+                    end
+                end
+                // Always grant the output request.  If there was no match, the default, invalid entry
+                // will be returned.
+                oup_gnt_o = 1'b1;
+            end
+            if (inp_req_i && inp_gnt_o) begin
+                match_in_id = inp_id_i;
+                match_in_id_valid = 1'b1;
+                // If the ID does not yet exist in the queue or was just popped, add a new ID entry.
+                if (oup_ht_popped && (oup_id_i==inp_id_i)) begin
+                    // If output data was popped for this ID, which lead the head_tail to be popped,
+                    // then repopulate this head_tail immediately.
+                    head_tail_d[match_out_idx] = '{
+                        id: inp_id_i,
+                        head: oup_data_free_idx,
+                        tail: oup_data_free_idx,
+                        free: 1'b0
+                    };
+                    linked_data_d[oup_data_free_idx] = '{
+                        data: inp_data_i,
+                        next: '0,
+                        free: 1'b0
+                    };
+                end else if (no_in_id_match) begin
+                    // Else, if no head_tail corresponds to the input id.
+                    if (oup_ht_popped) begin
+                        head_tail_d[match_out_idx] = '{
+                            id: inp_id_i,
+                            head: oup_data_free_idx,
+                            tail: oup_data_free_idx,
+                            free: 1'b0
+                        };
+                        linked_data_d[oup_data_free_idx] = '{
+                            data: inp_data_i,
+                            next: '0,
+                            free: 1'b0
+                        };
+                    end else begin
+                        if (oup_data_popped) begin
+                          head_tail_d[head_tail_free_idx] = '{
+                            id: inp_id_i,
+                            head: oup_data_free_idx,
+                            tail: oup_data_free_idx,
+                            free: 1'b0
+                          };
+                          linked_data_d[oup_data_free_idx] = '{
+                              data: inp_data_i,
+                              next: '0,
+                              free: 1'b0
+                          };
+                        end else begin
+                            head_tail_d[head_tail_free_idx] = '{
+                              id: inp_id_i,
+                              head: linked_data_free_idx,
+                              tail: linked_data_free_idx,
+                              free: 1'b0
+                            };
+                            linked_data_d[linked_data_free_idx] = '{
+                                data: inp_data_i,
+                                next: '0,
+                                free: 1'b0
+                            };
+                        end
+                    end
+                end else begin
+                    // Otherwise append it to the existing ID subqueue.
+                    if (oup_data_popped) begin
+                        linked_data_d[head_tail_q[match_in_idx].tail].next = oup_data_free_idx;
+                        head_tail_d[match_in_idx].tail = oup_data_free_idx;
+                        linked_data_d[oup_data_free_idx] = '{
+                            data: inp_data_i,
+                            next: '0,
+                            free: 1'b0
+                        };
+                    end else begin
+                        linked_data_d[head_tail_q[match_in_idx].tail].next = linked_data_free_idx;
+                        head_tail_d[match_in_idx].tail = linked_data_free_idx;
+                        linked_data_d[linked_data_free_idx] = '{
+                            data: inp_data_i,
+                            next: '0,
+                            free: 1'b0
+                        };
+                    end
+                end
+            end
+        end
+    end
+
+    // Exists Lookup
+    for (genvar i = 0; i < CAPACITY; i++) begin: gen_lookup
+        mask_t exists_match_bits;
+        for (genvar j = 0; j < $bits(data_t); j++) begin: gen_mask
+            always_comb begin
+                if (linked_data_q[i].free) begin
+                    exists_match_bits[j] = 1'b0;
+                end else begin
+                    if (!exists_mask_i[j]) begin
+                        exists_match_bits[j] = 1'b1;
+                    end else begin
+                        exists_match_bits[j] = (linked_data_q[i].data[j] == exists_data_i[j]);
+                    end
+                end
+            end
+        end
+        assign exists_match[i] = (&exists_match_bits);
+    end
+    always_comb begin
+        exists_gnt_o = 1'b0;
+        exists_o = '0;
+        if (exists_req_i) begin
+            exists_gnt_o = 1'b1;
+            exists_o = (|exists_match);
+        end
+    end
+
+    // Registers
+    for (genvar i = 0; i < HtCapacity; i++) begin: gen_ht_ffs
+        always_ff @(posedge clk_i, negedge rst_ni) begin
+            if (!rst_ni) begin
+                head_tail_q[i] <= '{free: 1'b1, default: '0};
+            end else begin
+                head_tail_q[i] <= head_tail_d[i];
+            end
+        end
+    end
+    for (genvar i = 0; i < CAPACITY; i++) begin: gen_data_ffs
+        always_ff @(posedge clk_i, negedge rst_ni) begin
+            if (!rst_ni) begin
+                // Set free bit of linked data entries, all other bits are don't care.
+                linked_data_q[i]    <= '0;
+                linked_data_q[i][0] <= 1'b1;
+            end else begin
+                linked_data_q[i]    <= linked_data_d[i];
+            end
+        end
+    end
+
+    // Validate parameters.
+// pragma translate_off
+`ifndef VERILATOR
+    initial begin: validate_params
+        assert (ID_WIDTH >= 1)
+            else $fatal(1, "The ID must at least be one bit wide!");
+        assert (CAPACITY >= 1)
+            else $fatal(1, "The queue must have capacity of at least one entry!");
+    end
+`endif
+// pragma translate_on
+
+endmodule
diff --git a/vendor/pulp-platform/common_cells/src/isochronous_4phase_handshake.sv b/vendor/pulp-platform/common_cells/src/isochronous_4phase_handshake.sv
new file mode 100644
index 0000000000..de89bd2197
--- /dev/null
+++ b/vendor/pulp-platform/common_cells/src/isochronous_4phase_handshake.sv
@@ -0,0 +1,81 @@
+// Copyright 2020 ETH Zurich and University of Bologna.
+//
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the "License"); you may not use this file except in
+// compliance with the License. You may obtain a copy of the License at
+// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+//
+// Author: Florian Zaruba <zarubaf@iis.ee.ethz.ch>
+
+/// 4-phase handshake between isochronous clock domains
+/// (i.e., clock domains which operate on an integer multiple of each other).
+///
+/// The internals of this modules are similar to a clock-domain crossing except that
+/// they do not synchronize the handshake signals as signals can not become metastable (covered by STA).
+/// The upstream circuit will only handshake iff the downstream circuit handshaked.
+///
+/// ## Optionally Passing of Data
+///
+/// If the passing of data is necessary this should be done out side the module, for example:
+/// ```
+/// `FFLNR(dst_data_o, src_data_i, (src_valid_i && src_ready_o), src_clk_i)
+/// ```
+///
+/// This module differs to `isochronous_spill_register` that it doesn't buffer any data
+/// and only toggles the source handshake once the destination handshake has been toggled.
+///
+/// # Restrictions
+///
+/// Source and destination clock domains must be an integer multiple of each other and
+/// all timing-paths need to be covered by STA. For example a recommended SDC would be:
+///
+/// `create_generated_clock dst_clk_i -name dst_clk  -source src_clk_i -divide_by 2
+///
+/// There are _no_ restrictions on which clock domain should be the faster, any integer
+/// ratio will work.
+
+`include "common_cells/registers.svh"
+
+module isochronous_4phase_handshake (
+  input  logic src_clk_i,
+  input  logic src_rst_ni,
+  input  logic src_valid_i,
+  output logic src_ready_o,
+  input  logic dst_clk_i,
+  input  logic dst_rst_ni,
+  output logic dst_valid_o,
+  input  logic dst_ready_i
+);
+
+  logic src_req_q, src_ack_q;
+  logic dst_req_q, dst_ack_q;
+
+  // source is making a request
+  `FFLARN(src_req_q, ~src_req_q, (src_valid_i && src_ready_o), 1'b0, src_clk_i, src_rst_ni)
+  // "synchronize" the acknowledge into the sending clock-domain
+  `FFARN(src_ack_q, dst_ack_q, 1'b0, src_clk_i, src_rst_ni)
+  // source is ready if the request wasn't yet acknowledged
+  assign src_ready_o = (src_req_q == src_ack_q);
+
+  // down-stream circuit is acknowledging the handshake
+  `FFLARN(dst_ack_q, ~dst_ack_q, (dst_valid_o && dst_ready_i), 1'b0, dst_clk_i, dst_rst_ni)
+  // "synchronize" the request into the receiving clock domain
+  `FFARN(dst_req_q, src_req_q, 1'b0, dst_clk_i, dst_rst_ni)
+  // destination is valid if we didn't yet get acknowledge
+  assign dst_valid_o = (dst_req_q != dst_ack_q);
+
+ // pragma translate_off
+ // stability guarantees
+  `ifndef VERILATOR
+  assert property (@(posedge src_clk_i) disable iff (src_rst_ni)
+    (src_valid_i && !src_ready_o |=> $stable(src_valid_i))) else $error("src_valid_i is unstable");
+  assert property (@(posedge dst_clk_i) disable iff (dst_rst_ni)
+    (dst_valid_o && !dst_ready_i |=> $stable(dst_valid_o))) else $error("dst_valid_o is unstable");
+  `endif
+  // pragma translate_on
+
+endmodule
diff --git a/vendor/pulp-platform/common_cells/src/isochronous_spill_register.sv b/vendor/pulp-platform/common_cells/src/isochronous_spill_register.sv
new file mode 100644
index 0000000000..35c9d6d728
--- /dev/null
+++ b/vendor/pulp-platform/common_cells/src/isochronous_spill_register.sv
@@ -0,0 +1,111 @@
+// Copyright 2020 ETH Zurich and University of Bologna.
+//
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the "License"); you may not use this file except in
+// compliance with the License. You may obtain a copy of the License at
+// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+//
+// Florian Zaruba <zarubaf@iis.ee.ethz.ch>
+
+`include "common_cells/registers.svh"
+
+/// A register with handshakes that completely cuts any combinatorial paths
+/// between the input and output in isochronous clock domains.
+///
+/// > Definition of isochronous: In telecommunication, an isochronous signal is a signal
+/// > in which the time interval separating any two significant instants is equal to the
+/// > unit interval or a multiple of the unit interval.
+///
+/// The source and destination clock domains must be derived from the same clock
+/// but can vary in frequency by a constant factor (e.g., double the frequency).
+///
+/// The module is basically a two deep dual-clock fifo with read and write pointers
+/// in different clock domains. As we know the static timing relationship between the
+/// clock domains we can rely on static timing analysis (STA) to get the sampling windows
+/// right and therefore don't need any synchronization.
+///
+/// # Restrictions
+///
+/// Source and destination clock domains must be an integer multiple of each other and
+/// all timing-paths need to be covered by STA. For example a recommended SDC would be:
+///
+/// `create_generated_clock dst_clk_i -name dst_clk  -source src_clk_i -divide_by 2
+///
+/// There are _no_ restrictions on which clock domain should be the faster, any integer
+/// ratio will work.
+module isochronous_spill_register #(
+  /// Data type of spill register.
+  parameter type T      = logic,
+  /// Make this spill register transparent.
+  parameter bit  Bypass = 1'b0
+) (
+  /// Clock of source clock domain.
+  input  logic src_clk_i,
+  /// Active low async reset in source domain.
+  input  logic src_rst_ni,
+  /// Source input data is valid.
+  input  logic src_valid_i,
+  /// Source is ready to accept.
+  output logic src_ready_o,
+  /// Source input data.
+  input  T     src_data_i,
+  /// Clock of destination clock domain.
+  input  logic dst_clk_i,
+  /// Active low async reset in destination domain.
+  input  logic dst_rst_ni,
+  /// Destination output data is valid.
+  output logic dst_valid_o,
+  /// Destination is ready to accept.
+  input  logic dst_ready_i,
+  /// Destination output data.
+  output T     dst_data_o
+);
+  // Don't generate the spill register.
+  if (Bypass) begin : gen_bypass
+    assign dst_valid_o = src_valid_i;
+    assign src_ready_o = dst_ready_i;
+    assign dst_data_o  = src_data_i;
+  // Generate the spill register
+  end else begin : gen_isochronous_spill_register
+    /// Read/write pointer are one bit wider than necessary.
+    /// We implicitly capture the full and empty state with the second bit:
+    /// If all but the topmost bit of `rd_pointer_q` and `wr_pointer_q` agree, the
+    /// FIFO is in a critical state. If the topmost bit is equal, the FIFO is
+    /// empty, otherwise it is full.
+    logic [1:0] rd_pointer_q, wr_pointer_q;
+    // Advance write pointer if we pushed a new item into the FIFO. (Source clock domain)
+    `FFLARN(wr_pointer_q, wr_pointer_q+1, (src_valid_i && src_ready_o), '0, src_clk_i, src_rst_ni)
+    // Advance read pointer if downstream consumed an item. (Destination clock domain)
+    `FFLARN(rd_pointer_q, rd_pointer_q+1, (dst_valid_o && dst_ready_i), '0, dst_clk_i, dst_rst_ni)
+
+    T [1:0] mem_d, mem_q;
+    `FFLNR(mem_q, mem_d, (src_valid_i && src_ready_o), src_clk_i)
+    always_comb begin
+      mem_d = mem_q;
+      mem_d[wr_pointer_q[0]] = src_data_i;
+    end
+
+    assign src_ready_o = (rd_pointer_q ^ wr_pointer_q) != 2'b10;
+
+    assign dst_valid_o = (rd_pointer_q ^ wr_pointer_q) != '0;
+    assign dst_data_o = mem_q[rd_pointer_q[0]];
+  end
+
+  // pragma translate_off
+  // stability guarantees
+  `ifndef VERILATOR
+  assert property (@(posedge src_clk_i) disable iff (src_rst_ni)
+    (src_valid_i && !src_ready_o |=> $stable(src_valid_i))) else $error("src_valid_i is unstable");
+  assert property (@(posedge src_clk_i) disable iff (src_rst_ni)
+    (src_valid_i && !src_ready_o |=> $stable(src_data_i))) else $error("src_data_i is unstable");
+  assert property (@(posedge dst_clk_i) disable iff (dst_rst_ni)
+    (dst_valid_o && !dst_ready_i |=> $stable(dst_valid_o))) else $error("dst_valid_o is unstable");
+  assert property (@(posedge dst_clk_i) disable iff (dst_rst_ni)
+    (dst_valid_o && !dst_ready_i |=> $stable(dst_data_o))) else $error("dst_data_o is unstable");
+  `endif
+  // pragma translate_on
+endmodule
diff --git a/vendor/pulp-platform/common_cells/src/lfsr.sv b/vendor/pulp-platform/common_cells/src/lfsr.sv
new file mode 100644
index 0000000000..aae2e2df83
--- /dev/null
+++ b/vendor/pulp-platform/common_cells/src/lfsr.sv
@@ -0,0 +1,315 @@
+// Copyright 2019 ETH Zurich and University of Bologna.
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the "License"); you may not use this file except in
+// compliance with the License.  You may obtain a copy of the License at
+// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+//
+// Author: Michael Schaffner <schaffner@iis.ee.ethz.ch>, ETH Zurich
+// Date: 26.04.2019
+//
+// Description: This is a parametric LFSR with precomputed coefficients for
+// LFSR lengths from 4 to 64bit.
+
+// Additional block cipher layers can be instantiated to non-linearly transform
+// the pseudo-random LFSR sequence at the output, and hence break the shifting
+// patterns. The additional cipher layers can only be used for an LFSR width
+// of 64bit, since the block cipher has been designed for that block length.
+
+module lfsr #(
+  parameter int unsigned          LfsrWidth     = 64,   // [4,64]
+  parameter int unsigned          OutWidth      = 8,    // [1,LfsrWidth]
+  parameter logic [LfsrWidth-1:0] RstVal        = '1,   // [1,2^LfsrWidth-1]
+  // 0: disabled, the present cipher uses 31, but just a few layers (1-3) are enough
+  // to break linear shifting patterns
+  parameter int unsigned          CipherLayers  = 0,
+  parameter bit                   CipherReg     = 1'b1  // additional output reg after cipher
+) (
+  input  logic                 clk_i,
+  input  logic                 rst_ni,
+  input  logic                 en_i,
+  output logic [OutWidth-1:0]  out_o
+);
+
+// Galois LFSR feedback masks
+// Automatically generated with get_lfsr_masks.py
+// Masks are from https://users.ece.cmu.edu/~koopman/lfsr/
+localparam logic [63:0] Masks [4:64] = '{64'hC,
+                                         64'h1E,
+                                         64'h39,
+                                         64'h7E,
+                                         64'hFA,
+                                         64'h1FD,
+                                         64'h3FC,
+                                         64'h64B,
+                                         64'hD8F,
+                                         64'h1296,
+                                         64'h2496,
+                                         64'h4357,
+                                         64'h8679,
+                                         64'h1030E,
+                                         64'h206CD,
+                                         64'h403FE,
+                                         64'h807B8,
+                                         64'h1004B2,
+                                         64'h2006A8,
+                                         64'h4004B2,
+                                         64'h800B87,
+                                         64'h10004F3,
+                                         64'h200072D,
+                                         64'h40006AE,
+                                         64'h80009E3,
+                                         64'h10000583,
+                                         64'h20000C92,
+                                         64'h400005B6,
+                                         64'h80000EA6,
+                                         64'h1000007A3,
+                                         64'h200000ABF,
+                                         64'h400000842,
+                                         64'h80000123E,
+                                         64'h100000074E,
+                                         64'h2000000AE9,
+                                         64'h400000086A,
+                                         64'h8000001213,
+                                         64'h1000000077E,
+                                         64'h2000000123B,
+                                         64'h40000000877,
+                                         64'h8000000108D,
+                                         64'h100000000AE9,
+                                         64'h200000000E9F,
+                                         64'h4000000008A6,
+                                         64'h80000000191E,
+                                         64'h100000000090E,
+                                         64'h2000000000FB3,
+                                         64'h4000000000D7D,
+                                         64'h80000000016A5,
+                                         64'h10000000000B4B,
+                                         64'h200000000010AF,
+                                         64'h40000000000DDE,
+                                         64'h8000000000181A,
+                                         64'h100000000000B65,
+                                         64'h20000000000102D,
+                                         64'h400000000000CD5,
+                                         64'h8000000000024C1,
+                                         64'h1000000000000EF6,
+                                         64'h2000000000001363,
+                                         64'h4000000000000FCD,
+                                         64'h80000000000019E2};
+
+// this S-box and permutation P has been taken from the Present Cipher,
+// a super lightweight block cipher. use the cipher layers to add additional
+// non-linearity to the LFSR output. note one layer does not fully correspond
+// to the present cipher round, since the key and rekeying function is not applied here.
+//
+// See also:
+// "PRESENT: An Ultra-Lightweight Block Cipher", A. Bogdanov et al., Ches 2007
+// http://www.lightweightcrypto.org/present/present_ches2007.pdf
+
+// this is the sbox from the present cipher
+localparam logic[15:0][3:0] Sbox4 = {4'h2, 4'h1, 4'h7, 4'h4,
+                                     4'h8, 4'hF, 4'hE, 4'h3,
+                                     4'hD, 4'hA, 4'h0, 4'h9,
+                                     4'hB, 4'h6, 4'h5, 4'hC };
+
+// these are the permutation indices of the present cipher
+localparam logic[63:0][5:0] Perm = {6'd63, 6'd47, 6'd31, 6'd15, 6'd62, 6'd46, 6'd30, 6'd14,
+                                    6'd61, 6'd45, 6'd29, 6'd13, 6'd60, 6'd44, 6'd28, 6'd12,
+                                    6'd59, 6'd43, 6'd27, 6'd11, 6'd58, 6'd42, 6'd26, 6'd10,
+                                    6'd57, 6'd41, 6'd25, 6'd09, 6'd56, 6'd40, 6'd24, 6'd08,
+                                    6'd55, 6'd39, 6'd23, 6'd07, 6'd54, 6'd38, 6'd22, 6'd06,
+                                    6'd53, 6'd37, 6'd21, 6'd05, 6'd52, 6'd36, 6'd20, 6'd04,
+                                    6'd51, 6'd35, 6'd19, 6'd03, 6'd50, 6'd34, 6'd18, 6'd02,
+                                    6'd49, 6'd33, 6'd17, 6'd01, 6'd48, 6'd32, 6'd16, 6'd00};
+
+
+function automatic logic [63:0] sbox4_layer(logic [63:0] in);
+  logic [63:0] out;
+  //for (logic [4:0] j = '0; j<16; j++) out[j*4 +: 4] = sbox4[in[j*4 +: 4]];
+  // this simulates much faster than the loop
+  out[0*4  +: 4] = Sbox4[in[0*4  +: 4]];
+  out[1*4  +: 4] = Sbox4[in[1*4  +: 4]];
+  out[2*4  +: 4] = Sbox4[in[2*4  +: 4]];
+  out[3*4  +: 4] = Sbox4[in[3*4  +: 4]];
+
+  out[4*4  +: 4] = Sbox4[in[4*4  +: 4]];
+  out[5*4  +: 4] = Sbox4[in[5*4  +: 4]];
+  out[6*4  +: 4] = Sbox4[in[6*4  +: 4]];
+  out[7*4  +: 4] = Sbox4[in[7*4  +: 4]];
+
+  out[8*4  +: 4] = Sbox4[in[8*4  +: 4]];
+  out[9*4  +: 4] = Sbox4[in[9*4  +: 4]];
+  out[10*4 +: 4] = Sbox4[in[10*4 +: 4]];
+  out[11*4 +: 4] = Sbox4[in[11*4 +: 4]];
+
+  out[12*4 +: 4] = Sbox4[in[12*4 +: 4]];
+  out[13*4 +: 4] = Sbox4[in[13*4 +: 4]];
+  out[14*4 +: 4] = Sbox4[in[14*4 +: 4]];
+  out[15*4 +: 4] = Sbox4[in[15*4 +: 4]];
+  return out;
+endfunction : sbox4_layer
+
+function automatic logic [63:0] perm_layer(logic [63:0] in);
+  logic [63:0] out;
+  // for (logic [7:0] j = '0; j<64; j++) out[perm[j]] = in[j];
+  // this simulates much faster than the loop
+  out[Perm[0]] = in[0];
+  out[Perm[1]] = in[1];
+  out[Perm[2]] = in[2];
+  out[Perm[3]] = in[3];
+  out[Perm[4]] = in[4];
+  out[Perm[5]] = in[5];
+  out[Perm[6]] = in[6];
+  out[Perm[7]] = in[7];
+  out[Perm[8]] = in[8];
+  out[Perm[9]] = in[9];
+
+  out[Perm[10]] = in[10];
+  out[Perm[11]] = in[11];
+  out[Perm[12]] = in[12];
+  out[Perm[13]] = in[13];
+  out[Perm[14]] = in[14];
+  out[Perm[15]] = in[15];
+  out[Perm[16]] = in[16];
+  out[Perm[17]] = in[17];
+  out[Perm[18]] = in[18];
+  out[Perm[19]] = in[19];
+
+  out[Perm[20]] = in[20];
+  out[Perm[21]] = in[21];
+  out[Perm[22]] = in[22];
+  out[Perm[23]] = in[23];
+  out[Perm[24]] = in[24];
+  out[Perm[25]] = in[25];
+  out[Perm[26]] = in[26];
+  out[Perm[27]] = in[27];
+  out[Perm[28]] = in[28];
+  out[Perm[29]] = in[29];
+
+  out[Perm[30]] = in[30];
+  out[Perm[31]] = in[31];
+  out[Perm[32]] = in[32];
+  out[Perm[33]] = in[33];
+  out[Perm[34]] = in[34];
+  out[Perm[35]] = in[35];
+  out[Perm[36]] = in[36];
+  out[Perm[37]] = in[37];
+  out[Perm[38]] = in[38];
+  out[Perm[39]] = in[39];
+
+  out[Perm[40]] = in[40];
+  out[Perm[41]] = in[41];
+  out[Perm[42]] = in[42];
+  out[Perm[43]] = in[43];
+  out[Perm[44]] = in[44];
+  out[Perm[45]] = in[45];
+  out[Perm[46]] = in[46];
+  out[Perm[47]] = in[47];
+  out[Perm[48]] = in[48];
+  out[Perm[49]] = in[49];
+
+  out[Perm[50]] = in[50];
+  out[Perm[51]] = in[51];
+  out[Perm[52]] = in[52];
+  out[Perm[53]] = in[53];
+  out[Perm[54]] = in[54];
+  out[Perm[55]] = in[55];
+  out[Perm[56]] = in[56];
+  out[Perm[57]] = in[57];
+  out[Perm[58]] = in[58];
+  out[Perm[59]] = in[59];
+
+  out[Perm[60]] = in[60];
+  out[Perm[61]] = in[61];
+  out[Perm[62]] = in[62];
+  out[Perm[63]] = in[63];
+  return out;
+endfunction : perm_layer
+
+////////////////////////////////////////////////////////////////////////
+// lfsr
+////////////////////////////////////////////////////////////////////////
+
+logic [LfsrWidth-1:0] lfsr_d, lfsr_q;
+assign lfsr_d =
+  (en_i) ? (lfsr_q>>1) ^ ({LfsrWidth{lfsr_q[0]}} & Masks[LfsrWidth][LfsrWidth-1:0]) : lfsr_q;
+
+always_ff @(posedge clk_i or negedge rst_ni) begin : p_regs
+  //$display("%b %h", en_i, lfsr_d);
+  if (!rst_ni) begin
+    lfsr_q <= LfsrWidth'(RstVal);
+  end else begin
+    lfsr_q <= lfsr_d;
+  end
+end
+
+////////////////////////////////////////////////////////////////////////
+// block cipher layers
+////////////////////////////////////////////////////////////////////////
+
+if (CipherLayers > unsigned'(0)) begin : g_cipher_layers
+  logic [63:0] ciph_layer;
+  localparam int unsigned NumRepl = ((64+LfsrWidth)/LfsrWidth);
+
+  always_comb begin : p_ciph_layer
+    automatic logic [63:0] tmp;
+    tmp = 64'({NumRepl{lfsr_q}});
+    for(int unsigned k = 0; k < CipherLayers; k++) begin
+      tmp = perm_layer(sbox4_layer(tmp));
+    end
+    ciph_layer = tmp;
+  end
+
+  // additiona output reg after cipher
+  if (CipherReg) begin : g_cipher_reg
+    logic [OutWidth-1:0] out_d, out_q;
+
+    assign out_d = (en_i) ? ciph_layer[OutWidth-1:0] : out_q;
+    assign out_o = out_q[OutWidth-1:0];
+
+    always_ff @(posedge clk_i or negedge rst_ni) begin : p_regs
+      if (!rst_ni) begin
+        out_q <= '0;
+      end else begin
+        out_q <= out_d;
+      end
+    end
+  // no outreg
+  end else begin : g_no_out_reg
+    assign out_o  = ciph_layer[OutWidth-1:0];
+  end
+
+// no block cipher
+end else begin : g_no_cipher_layers
+  assign out_o    = lfsr_q[OutWidth-1:0];
+end
+
+////////////////////////////////////////////////////////////////////////
+// assertions
+////////////////////////////////////////////////////////////////////////
+
+// pragma translate_off
+initial begin
+  // these are the LUT limits
+  assert(OutWidth <= LfsrWidth) else
+    $fatal(1,"OutWidth must be smaller equal the LfsrWidth.");
+  assert(RstVal > unsigned'(0)) else
+    $fatal(1,"RstVal must be nonzero.");
+  assert((LfsrWidth >= $low(Masks)) && (LfsrWidth <= $high(Masks))) else
+    $fatal(1,"Unsupported LfsrWidth.");
+  assert(Masks[LfsrWidth][LfsrWidth-1]) else
+    $fatal(1, "LFSR mask is not correct. The MSB must be 1." );
+  assert((CipherLayers > 0) && (LfsrWidth == 64) || (CipherLayers == 0)) else
+    $fatal(1, "Use additional cipher layers only in conjunction with an LFSR width of 64 bit." );
+end
+
+`ifndef VERILATOR
+  all_zero: assert property (
+    @(posedge clk_i) disable iff (!rst_ni) en_i |-> lfsr_d)
+      else $fatal(1,"Lfsr must not be all-zero.");
+`endif
+// pragma translate_on
+
+endmodule // lfsr
diff --git a/vendor/pulp-platform/common_cells/src/lfsr_16bit.sv b/vendor/pulp-platform/common_cells/src/lfsr_16bit.sv
new file mode 100644
index 0000000000..3fc93c7710
--- /dev/null
+++ b/vendor/pulp-platform/common_cells/src/lfsr_16bit.sv
@@ -0,0 +1,68 @@
+// Copyright 2018 ETH Zurich and University of Bologna.
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the "License"); you may not use this file except in
+// compliance with the License. You may obtain a copy of the License at
+// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+
+// Author: Florian Zaruba, ETH Zurich
+// Date: 5.11.2018
+// Description: 16-bit LFSR
+
+// --------------
+// 16-bit LFSR
+// --------------
+//
+// Description: Shift register
+//
+module lfsr_16bit #(
+    parameter logic [15:0] SEED  = 8'b0,
+    parameter int unsigned WIDTH = 16
+)(
+    input  logic                      clk_i,
+    input  logic                      rst_ni,
+    input  logic                      en_i,
+    output logic [WIDTH-1:0]          refill_way_oh,
+    output logic [$clog2(WIDTH)-1:0]  refill_way_bin
+);
+
+    localparam int unsigned LogWidth = $clog2(WIDTH);
+
+    logic [15:0] shift_d, shift_q;
+
+
+    always_comb begin
+
+        automatic logic shift_in;
+        shift_in = !(shift_q[15] ^ shift_q[12] ^ shift_q[5] ^ shift_q[1]);
+
+        shift_d = shift_q;
+
+        if (en_i)
+            shift_d = {shift_q[14:0], shift_in};
+
+        // output assignment
+        refill_way_oh = 'b0;
+        refill_way_oh[shift_q[LogWidth-1:0]] = 1'b1;
+        refill_way_bin = shift_q;
+    end
+
+    always_ff @(posedge clk_i or negedge rst_ni) begin : proc_
+        if(~rst_ni) begin
+            shift_q <= SEED;
+        end else begin
+            shift_q <= shift_d;
+        end
+    end
+
+    //pragma translate_off
+    initial begin
+        assert (WIDTH <= 16)
+            else $fatal(1, "WIDTH needs to be less than 16 because of the 16-bit LFSR");
+    end
+    //pragma translate_on
+
+endmodule
diff --git a/vendor/pulp-platform/common_cells/src/lfsr_8bit.sv b/vendor/pulp-platform/common_cells/src/lfsr_8bit.sv
new file mode 100644
index 0000000000..60fdf19f7f
--- /dev/null
+++ b/vendor/pulp-platform/common_cells/src/lfsr_8bit.sv
@@ -0,0 +1,61 @@
+// Copyright 2018 ETH Zurich and University of Bologna.
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the "License"); you may not use this file except in
+// compliance with the License. You may obtain a copy of the License at
+// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+
+// Author: Igor Loi - University of Bologna
+// Author: Florian Zaruba, ETH Zurich
+// Date: 12.11.2017
+// Description: 8-bit LFSR
+
+/// 8 bit Linear Feedback Shift register
+module lfsr_8bit #(
+  parameter logic        [7:0] SEED  = 8'b0,
+  parameter int unsigned       WIDTH = 8
+) (
+  input  logic                     clk_i,
+  input  logic                     rst_ni,
+  input  logic                     en_i,
+  output logic [        WIDTH-1:0] refill_way_oh,
+  output logic [$clog2(WIDTH)-1:0] refill_way_bin
+);
+
+  localparam int unsigned LogWidth = $clog2(WIDTH);
+
+  logic [7:0] shift_d, shift_q;
+
+  always_comb begin
+
+    automatic logic shift_in;
+    shift_in = !(shift_q[7] ^ shift_q[3] ^ shift_q[2] ^ shift_q[1]);
+
+    shift_d = shift_q;
+
+    if (en_i) shift_d = {shift_q[6:0], shift_in};
+
+    // output assignment
+    refill_way_oh = 'b0;
+    refill_way_oh[shift_q[LogWidth - 1:0]] = 1'b1;
+    refill_way_bin = shift_q;
+  end
+
+  always_ff @(posedge clk_i or negedge rst_ni) begin : proc_
+    if (~rst_ni) begin
+      shift_q <= SEED;
+    end else begin
+      shift_q <= shift_d;
+    end
+  end
+
+  //pragma translate_off
+  initial begin
+    assert (WIDTH <= 8) else $fatal(1, "WIDTH needs to be less than 8 because of the 8-bit LFSR");
+  end
+  //pragma translate_on
+
+endmodule
diff --git a/vendor/pulp-platform/common_cells/src/lzc.sv b/vendor/pulp-platform/common_cells/src/lzc.sv
new file mode 100644
index 0000000000..424eb2ef62
--- /dev/null
+++ b/vendor/pulp-platform/common_cells/src/lzc.sv
@@ -0,0 +1,112 @@
+// Copyright (c) 2018 - 2019 ETH Zurich, University of Bologna
+// All rights reserved.
+//
+// This code is under development and not yet released to the public.
+// Until it is released, the code is under the copyright of ETH Zurich and
+// the University of Bologna, and may contain confidential and/or unpublished
+// work. Any reuse/redistribution is strictly forbidden without written
+// permission from ETH Zurich.
+//
+// Bug fixes and contributions will eventually be released under the
+// SolderPad open hardware license in the context of the PULP platform
+// (http://www.pulp-platform.org), under the copyright of ETH Zurich and the
+// University of Bologna.
+
+/// A trailing zero counter / leading zero counter.
+/// Set MODE to 0 for trailing zero counter => cnt_o is the number of trailing zeros (from the LSB)
+/// Set MODE to 1 for leading zero counter  => cnt_o is the number of leading zeros  (from the MSB)
+/// If the input does not contain a zero, `empty_o` is asserted. Additionally `cnt_o` contains
+/// the maximum number of zeros - 1. For example:
+///   in_i = 000_0000, empty_o = 1, cnt_o = 6 (mode = 0)
+///   in_i = 000_0001, empty_o = 0, cnt_o = 0 (mode = 0)
+///   in_i = 000_1000, empty_o = 0, cnt_o = 3 (mode = 0)
+/// Furthermore, this unit contains a more efficient implementation for Verilator (simulation only).
+/// This speeds up simulation significantly.
+module lzc #(
+  /// The width of the input vector.
+  parameter int unsigned WIDTH = 2,
+  /// Mode selection: 0 -> trailing zero, 1 -> leading zero
+  parameter bit          MODE  = 1'b0,
+  /// Dependent parameter. Do **not** change!
+  ///
+  /// Width of the output signal with the zero count.
+  parameter int unsigned CNT_WIDTH = cf_math_pkg::idx_width(WIDTH)
+) (
+  /// Input vector to be counted.
+  input  logic [WIDTH-1:0]     in_i,
+  /// Count of the leading / trailing zeros.
+  output logic [CNT_WIDTH-1:0] cnt_o,
+  /// Counter is empty: Asserted if all bits in in_i are zero.
+  output logic                 empty_o
+);
+
+  if (WIDTH == 1) begin : gen_degenerate_lzc
+
+    assign cnt_o[0] = !in_i[0];
+    assign empty_o = !in_i[0];
+
+  end else begin : gen_lzc
+
+    localparam int unsigned NumLevels = $clog2(WIDTH);
+
+    // pragma translate_off
+    initial begin
+      assert(WIDTH > 0) else $fatal(1, "input must be at least one bit wide");
+    end
+    // pragma translate_on
+
+    logic [WIDTH-1:0][NumLevels-1:0] index_lut;
+    logic [2**NumLevels-1:0] sel_nodes;
+    logic [2**NumLevels-1:0][NumLevels-1:0] index_nodes;
+
+    logic [WIDTH-1:0] in_tmp;
+
+    // reverse vector if required
+    always_comb begin : flip_vector
+      for (int unsigned i = 0; i < WIDTH; i++) begin
+        in_tmp[i] = (MODE) ? in_i[WIDTH-1-i] : in_i[i];
+      end
+    end
+
+    for (genvar j = 0; unsigned'(j) < WIDTH; j++) begin : g_index_lut
+      assign index_lut[j] = (NumLevels)'(unsigned'(j));
+    end
+
+    for (genvar level = 0; unsigned'(level) < NumLevels; level++) begin : g_levels
+      if (unsigned'(level) == NumLevels - 1) begin : g_last_level
+        for (genvar k = 0; k < 2 ** level; k++) begin : g_level
+          // if two successive indices are still in the vector...
+          if (unsigned'(k) * 2 < WIDTH - 1) begin : g_reduce
+            assign sel_nodes[2 ** level - 1 + k] = in_tmp[k * 2] | in_tmp[k * 2 + 1];
+            assign index_nodes[2 ** level - 1 + k] = (in_tmp[k * 2] == 1'b1)
+              ? index_lut[k * 2] :
+                index_lut[k * 2 + 1];
+          end
+          // if only the first index is still in the vector...
+          if (unsigned'(k) * 2 == WIDTH - 1) begin : g_base
+            assign sel_nodes[2 ** level - 1 + k] = in_tmp[k * 2];
+            assign index_nodes[2 ** level - 1 + k] = index_lut[k * 2];
+          end
+          // if index is out of range
+          if (unsigned'(k) * 2 > WIDTH - 1) begin : g_out_of_range
+            assign sel_nodes[2 ** level - 1 + k] = 1'b0;
+            assign index_nodes[2 ** level - 1 + k] = '0;
+          end
+        end
+      end else begin : g_not_last_level
+        for (genvar l = 0; l < 2 ** level; l++) begin : g_level
+          assign sel_nodes[2 ** level - 1 + l] =
+              sel_nodes[2 ** (level + 1) - 1 + l * 2] | sel_nodes[2 ** (level + 1) - 1 + l * 2 + 1];
+          assign index_nodes[2 ** level - 1 + l] = (sel_nodes[2 ** (level + 1) - 1 + l * 2] == 1'b1)
+            ? index_nodes[2 ** (level + 1) - 1 + l * 2] :
+              index_nodes[2 ** (level + 1) - 1 + l * 2 + 1];
+        end
+      end
+    end
+
+    assign cnt_o = NumLevels > unsigned'(0) ? index_nodes[0] : {($clog2(WIDTH)) {1'b0}};
+    assign empty_o = NumLevels > unsigned'(0) ? ~sel_nodes[0] : ~(|in_i);
+
+  end : gen_lzc
+
+endmodule : lzc
diff --git a/vendor/pulp-platform/common_cells/src/max_counter.sv b/vendor/pulp-platform/common_cells/src/max_counter.sv
new file mode 100644
index 0000000000..0081fab163
--- /dev/null
+++ b/vendor/pulp-platform/common_cells/src/max_counter.sv
@@ -0,0 +1,77 @@
+// Copyright 2018 ETH Zurich and University of Bologna.
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the "License"); you may not use this file except in
+// compliance with the License. You may obtain a copy of the License at
+// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+
+// Up/down counter that tracks its maximum value
+
+module max_counter #(
+    parameter int unsigned WIDTH = 4
+) (
+    input  logic             clk_i,
+    input  logic             rst_ni,
+    input  logic             clear_i,       // synchronous clear for counter
+    input  logic             clear_max_i,   // synchronous clear for maximum value
+    input  logic             en_i,          // enable the counter
+    input  logic             load_i,        // load a new value
+    input  logic             down_i,        // downcount, default is up
+    input  logic [WIDTH-1:0] delta_i,       // counter delta
+    input  logic [WIDTH-1:0] d_i,
+    output logic [WIDTH-1:0] q_o,
+    output logic [WIDTH-1:0] max_o,
+    output logic             overflow_o,
+    output logic             overflow_max_o
+);
+    logic [WIDTH-1:0] max_d, max_q;
+    logic overflow_max_d, overflow_max_q;
+
+    delta_counter #(
+        .WIDTH           (WIDTH),
+        .STICKY_OVERFLOW (1'b1)
+    ) i_counter (
+        .clk_i,
+        .rst_ni,
+        .clear_i,
+        .en_i,
+        .load_i,
+        .down_i,
+        .delta_i,
+        .d_i,
+        .q_o,
+        .overflow_o
+    );
+
+    always_comb begin
+        max_d = max_q;
+        max_o = max_q;
+        overflow_max_d = overflow_max_q;
+        if (clear_max_i) begin
+            max_d = '0;
+            overflow_max_d = 1'b0;
+        end else if (q_o > max_q) begin
+            max_d = q_o;
+            max_o = q_o;
+            if (overflow_o) begin
+                overflow_max_d = 1'b1;
+            end
+        end
+    end
+
+    assign overflow_max_o = overflow_max_q;
+
+    always_ff @(posedge clk_i, negedge rst_ni) begin
+        if (!rst_ni) begin
+           max_q <= '0;
+           overflow_max_q <= 1'b0;
+        end else begin
+           max_q <= max_d;
+           overflow_max_q <= overflow_max_d;
+        end
+    end
+
+endmodule
diff --git a/vendor/pulp-platform/common_cells/src/mv_filter.sv b/vendor/pulp-platform/common_cells/src/mv_filter.sv
new file mode 100644
index 0000000000..ddb81f0774
--- /dev/null
+++ b/vendor/pulp-platform/common_cells/src/mv_filter.sv
@@ -0,0 +1,55 @@
+// Copyright 2018 ETH Zurich and University of Bologna.
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the "License"); you may not use this file except in
+// compliance with the License. You may obtain a copy of the License at
+// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+
+// Author: Florian Zaruba <zarubaf@iis.ee.ethz.ch>
+
+module mv_filter #(
+    parameter int unsigned WIDTH     = 4,
+    parameter int unsigned THRESHOLD = 10
+)(
+    input  logic clk_i,
+    input  logic rst_ni,
+    input  logic sample_i,
+    input  logic clear_i,
+    input  logic d_i,
+    output logic q_o
+);
+    logic [WIDTH-1:0] counter_q, counter_d;
+    logic d, q;
+
+    assign q_o = q;
+
+    always_comb begin
+        counter_d = counter_q;
+        d = q;
+
+        if (counter_q >= THRESHOLD[WIDTH-1:0]) begin
+            d = 1'b1;
+        end else if (sample_i && d_i) begin
+            counter_d = counter_q + 1;
+        end
+
+        // sync reset
+        if (clear_i) begin
+            counter_d = '0;
+            d = 1'b0;
+        end
+    end
+
+    always_ff @(posedge clk_i or negedge rst_ni) begin
+        if (~rst_ni) begin
+            counter_q <= '0;
+            q         <= 1'b0;
+        end else begin
+            counter_q <= counter_d;
+            q         <= d;
+        end
+    end
+endmodule
diff --git a/vendor/pulp-platform/common_cells/src/onehot_to_bin.sv b/vendor/pulp-platform/common_cells/src/onehot_to_bin.sv
new file mode 100644
index 0000000000..0c33f084b9
--- /dev/null
+++ b/vendor/pulp-platform/common_cells/src/onehot_to_bin.sv
@@ -0,0 +1,38 @@
+// Copyright 2018 ETH Zurich and University of Bologna.
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the "License"); you may not use this file except in
+// compliance with the License. You may obtain a copy of the License at
+// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+
+// Franceco Conti <fconti@iis.ee.ethz.ch>
+
+module onehot_to_bin #(
+    parameter int unsigned ONEHOT_WIDTH = 16,
+    // Do Not Change
+    parameter int unsigned BIN_WIDTH    = ONEHOT_WIDTH == 1 ? 1 : $clog2(ONEHOT_WIDTH)
+)   (
+    input  logic [ONEHOT_WIDTH-1:0] onehot,
+    output logic [BIN_WIDTH-1:0]    bin
+);
+
+    for (genvar j = 0; j < BIN_WIDTH; j++) begin : jl
+        logic [ONEHOT_WIDTH-1:0] tmp_mask;
+            for (genvar i = 0; i < ONEHOT_WIDTH; i++) begin : il
+                logic [BIN_WIDTH-1:0] tmp_i;
+                assign tmp_i = i;
+                assign tmp_mask[i] = tmp_i[j];
+            end
+        assign bin[j] = |(tmp_mask & onehot);
+    end
+
+// pragma translate_off
+`ifndef VERILATOR
+    assert final ($onehot0(onehot)) else
+        $fatal(1, "[onehot_to_bin] More than two bit set in the one-hot signal");
+`endif
+// pragma translate_on
+endmodule
diff --git a/vendor/pulp-platform/common_cells/src/plru_tree.sv b/vendor/pulp-platform/common_cells/src/plru_tree.sv
new file mode 100644
index 0000000000..78a0a8430e
--- /dev/null
+++ b/vendor/pulp-platform/common_cells/src/plru_tree.sv
@@ -0,0 +1,120 @@
+// Copyright 2018 ETH Zurich and University of Bologna.
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the "License"); you may not use this file except in
+// compliance with the License.  You may obtain a copy of the License at
+// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+//
+// Author: David Schaffenrath, TU Graz
+// Author: Florian Zaruba, ETH Zurich
+//
+// Description: Pseudo Least Recently Used Tree (PLRU)
+// See: https://en.wikipedia.org/wiki/Pseudo-LRU
+
+module plru_tree #(
+  parameter int unsigned ENTRIES = 16
+) (
+  input  logic               clk_i,
+  input  logic               rst_ni,
+  input  logic [ENTRIES-1:0] used_i, // element i was used (one hot)
+  output logic [ENTRIES-1:0] plru_o  // element i is the least recently used (one hot)
+);
+
+    localparam int unsigned LogEntries = $clog2(ENTRIES);
+
+    logic [2*(ENTRIES-1)-1:0] plru_tree_q, plru_tree_d;
+
+    always_comb begin : plru_replacement
+        plru_tree_d = plru_tree_q;
+        // The PLRU-tree indexing:
+        // lvl0        0
+        //            / \
+        //           /   \
+        // lvl1     1     2
+        //         / \   / \
+        // lvl2   3   4 5   6
+        //       / \ /\/\  /\
+        //      ... ... ... ...
+        // Just predefine which nodes will be set/cleared
+        // E.g. for a TLB with 8 entries, the for-loop is semantically
+        // equivalent to the following pseudo-code:
+        // unique case (1'b1)
+        // used_i[7]: plru_tree_d[0, 2, 6] = {1, 1, 1};
+        // used_i[6]: plru_tree_d[0, 2, 6] = {1, 1, 0};
+        // used_i[5]: plru_tree_d[0, 2, 5] = {1, 0, 1};
+        // used_i[4]: plru_tree_d[0, 2, 5] = {1, 0, 0};
+        // used_i[3]: plru_tree_d[0, 1, 4] = {0, 1, 1};
+        // used_i[2]: plru_tree_d[0, 1, 4] = {0, 1, 0};
+        // used_i[1]: plru_tree_d[0, 1, 3] = {0, 0, 1};
+        // used_i[0]: plru_tree_d[0, 1, 3] = {0, 0, 0};
+        // default: begin /* No hit */ end
+        // endcase
+        for (int unsigned i = 0; i < ENTRIES; i++) begin
+            automatic int unsigned idx_base, shift, new_index;
+            // we got a hit so update the pointer as it was least recently used
+            if (used_i[i]) begin
+                // Set the nodes to the values we would expect
+                for (int unsigned lvl = 0; lvl < LogEntries; lvl++) begin
+                  idx_base = $unsigned((2**lvl)-1);
+                  // lvl0 <=> MSB, lvl1 <=> MSB-1, ...
+                  shift = LogEntries - lvl;
+                  // to circumvent the 32 bit integer arithmetic assignment
+                  new_index =  ~((i >> (shift-1)) & 1);
+                  plru_tree_d[idx_base + (i >> shift)] = new_index[0];
+                end
+            end
+        end
+        // Decode tree to write enable signals
+        // Next for-loop basically creates the following logic for e.g. an 8 entry
+        // TLB (note: pseudo-code obviously):
+        // plru_o[7] = &plru_tree_q[ 6, 2, 0]; //plru_tree_q[0,2,6]=={1,1,1}
+        // plru_o[6] = &plru_tree_q[~6, 2, 0]; //plru_tree_q[0,2,6]=={1,1,0}
+        // plru_o[5] = &plru_tree_q[ 5,~2, 0]; //plru_tree_q[0,2,5]=={1,0,1}
+        // plru_o[4] = &plru_tree_q[~5,~2, 0]; //plru_tree_q[0,2,5]=={1,0,0}
+        // plru_o[3] = &plru_tree_q[ 4, 1,~0]; //plru_tree_q[0,1,4]=={0,1,1}
+        // plru_o[2] = &plru_tree_q[~4, 1,~0]; //plru_tree_q[0,1,4]=={0,1,0}
+        // plru_o[1] = &plru_tree_q[ 3,~1,~0]; //plru_tree_q[0,1,3]=={0,0,1}
+        // plru_o[0] = &plru_tree_q[~3,~1,~0]; //plru_tree_q[0,1,3]=={0,0,0}
+        // For each entry traverse the tree. If every tree-node matches,
+        // the corresponding bit of the entry's index, this is
+        // the next entry to replace.
+        for (int unsigned i = 0; i < ENTRIES; i += 1) begin
+            automatic logic en;
+            automatic int unsigned idx_base, shift, new_index;
+            en = 1'b1;
+            for (int unsigned lvl = 0; lvl < LogEntries; lvl++) begin
+                idx_base = $unsigned((2**lvl)-1);
+                // lvl0 <=> MSB, lvl1 <=> MSB-1, ...
+                shift = LogEntries - lvl;
+                // en &= plru_tree_q[idx_base + (i>>shift)] == ((i >> (shift-1)) & 1'b1);
+                new_index =  (i >> (shift-1)) & 1;
+                if (new_index[0]) begin
+                  en &= plru_tree_q[idx_base + (i>>shift)];
+                end else begin
+                  en &= ~plru_tree_q[idx_base + (i>>shift)];
+                end
+            end
+            plru_o[i] = en;
+        end
+    end
+
+    always_ff @(posedge clk_i or negedge rst_ni) begin
+        if (!rst_ni) begin
+            plru_tree_q <= '0;
+        end else begin
+            plru_tree_q <= plru_tree_d;
+        end
+    end
+
+// pragma translate_off
+`ifndef VERILATOR
+    initial begin
+        assert (ENTRIES == 2**LogEntries) else $error("Entries must be a power of two");
+    end
+`endif
+// pragma translate_on
+
+endmodule
diff --git a/vendor/pulp-platform/common_cells/src/popcount.sv b/vendor/pulp-platform/common_cells/src/popcount.sv
new file mode 100644
index 0000000000..72b9b71f0f
--- /dev/null
+++ b/vendor/pulp-platform/common_cells/src/popcount.sv
@@ -0,0 +1,60 @@
+// Copyright (C) 2013-2018 ETH Zurich, University of Bologna
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the "License"); you may not use this file except in
+// compliance with the License. You may obtain a copy of the License at
+// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+
+// Author: Manuel Eggimann <meggimann@iis.ee.ethz.ch>
+
+// Description: This module calculates the hamming weight (number of ones) in
+// its input vector using a balanced binary adder tree. Recursive instantiation
+// is used to build the tree.  Any unsigned INPUT_WIDTH larger or equal 2 is
+// legal.  The module pads the signal internally to the next power of two.  The
+// output result width is ceil(log2(INPUT_WIDTH))+1.
+
+module popcount #(
+    parameter int unsigned INPUT_WIDTH = 256,
+    localparam int unsigned PopcountWidth = $clog2(INPUT_WIDTH)+1
+) (
+    input logic [INPUT_WIDTH-1:0]     data_i,
+    output logic [PopcountWidth-1:0] popcount_o
+);
+
+   localparam int unsigned PaddedWidth = 1 << $clog2(INPUT_WIDTH);
+
+   logic [PaddedWidth-1:0]           padded_input;
+   logic [PopcountWidth-2:0]         left_child_result, right_child_result;
+
+   //Zero pad the input to next power of two
+   always_comb begin
+     padded_input = '0;
+     padded_input[INPUT_WIDTH-1:0] = data_i;
+   end
+
+   //Recursive instantiation to build binary adder tree
+   if (INPUT_WIDTH == 1) begin : single_node
+     assign left_child_result  = 1'b0;
+     assign right_child_result = padded_input[0];
+   end else if (INPUT_WIDTH == 2) begin : leaf_node
+     assign left_child_result  = padded_input[1];
+     assign right_child_result = padded_input[0];
+   end else begin : non_leaf_node
+     popcount #(.INPUT_WIDTH(PaddedWidth / 2))
+         left_child(
+                    .data_i(padded_input[PaddedWidth-1:PaddedWidth/2]),
+                    .popcount_o(left_child_result));
+
+     popcount #(.INPUT_WIDTH(PaddedWidth / 2))
+         right_child(
+                     .data_i(padded_input[PaddedWidth/2-1:0]),
+                     .popcount_o(right_child_result));
+   end
+
+   //Output assignment
+   assign popcount_o = left_child_result + right_child_result;
+
+endmodule : popcount
diff --git a/vendor/pulp-platform/common_cells/src/rr_arb_tree.sv b/vendor/pulp-platform/common_cells/src/rr_arb_tree.sv
new file mode 100644
index 0000000000..90301c822c
--- /dev/null
+++ b/vendor/pulp-platform/common_cells/src/rr_arb_tree.sv
@@ -0,0 +1,348 @@
+// Copyright 2019 ETH Zurich and University of Bologna.
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the "License"); you may not use this file except in
+// compliance with the License.  You may obtain a copy of the License at
+// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+//
+// Author: Michael Schaffner <schaffner@iis.ee.ethz.ch>, ETH Zurich
+//         Wolfgang Roenninger <wroennin@iis.ee.ethz.ch>, ETH Zurich
+// Date: 02.04.2019
+// Description: logarithmic arbitration tree with round robin arbitration scheme.
+
+/// The rr_arb_tree employs non-starving round robin-arbitration - i.e., the priorities
+/// rotate each cycle.
+///
+/// ## Fair vs. unfair Arbitration
+///
+/// This refers to fair throughput distribution when not all inputs have active requests.
+/// This module has an internal state `rr_q` which defines the highest priority input. (When
+/// `ExtPrio` is `1'b1` this state is provided from the outside.) The arbitration tree will
+/// choose the input with the same index as currently defined by the state if it has an active
+/// request. Otherwise a *random* other active input is selected. The parameter `FairArb` is used
+/// to distinguish between two methods of calculating the next state.
+/// * `1'b0`: The next state is calculated by advancing the current state by one. This leads to the
+///           state being calculated without the context of the active request. Leading to an
+///           unfair throughput distribution if not all inputs have active requests.
+/// * `1'b1`: The next state jumps to the next unserved request with higher index.
+///           This is achieved by using two trailing-zero-counters (`lzc`). The upper has the masked
+///           `req_i` signal with all indices which will have a higher priority in the next state.
+///           The trailing zero count defines the input index with the next highest priority after
+///           the current one is served. When the upper is empty the lower `lzc` provides the
+///           wrapped index if there are outstanding requests with lower or same priority.
+/// The implication of throughput fairness on the module timing are:
+/// * The trailing zero counter (`lzc`) has a loglog relation of input to output timing. This means
+///   that in this module the input to register path scales with Log(Log(`NumIn`)).
+/// * The `rr_arb_tree` data multiplexing scales with Log(`NumIn`). This means that the input to output
+///   timing path of this module also scales scales with Log(`NumIn`).
+/// This implies that in this module the input to output path is always longer than the input to
+/// register path. As the output data usually also terminates in a register the parameter `FairArb`
+/// only has implications on the area. When it is `1'b0` a static plus one adder is instantiated.
+/// If it is `1'b1` two `lzc`, a masking logic stage and a two input multiplexer are instantiated.
+/// However these are small in respect of the data multiplexers needed, as the width of the `req_i`
+/// signal is usually less as than `DataWidth`.
+module rr_arb_tree #(
+  /// Number of inputs to be arbitrated.
+  parameter int unsigned NumIn      = 64,
+  /// Data width of the payload in bits. Not needed if `DataType` is overwritten.
+  parameter int unsigned DataWidth  = 32,
+  /// Data type of the payload, can be overwritten with custom type. Only use of `DataWidth`.
+  parameter type         DataType   = logic [DataWidth-1:0],
+  /// The `ExtPrio` option allows to override the internal round robin counter via the
+  /// `rr_i` signal. This can be useful in case multiple arbiters need to have
+  /// rotating priorities that are operating in lock-step. If static priority arbitration
+  /// is needed, just connect `rr_i` to '0.
+  ///
+  /// Set to 1'b1 to enable.
+  parameter bit          ExtPrio    = 1'b0,
+  /// If `AxiVldRdy` is set, the req/gnt signals are compliant with the AXI style vld/rdy
+  /// handshake. Namely, upstream vld (req) must not depend on rdy (gnt), as it can be deasserted
+  /// again even though vld is asserted. Enabling `AxiVldRdy` leads to a reduction of arbiter
+  /// delay and area.
+  ///
+  /// Set to `1'b1` to treat req/gnt as vld/rdy.
+  parameter bit          AxiVldRdy  = 1'b0,
+  /// The `LockIn` option prevents the arbiter from changing the arbitration
+  /// decision when the arbiter is disabled. I.e., the index of the first request
+  /// that wins the arbitration will be locked in case the destination is not
+  /// able to grant the request in the same cycle.
+  ///
+  /// Set to `1'b1` to enable.
+  parameter bit          LockIn     = 1'b0,
+  /// When set, ensures that throughput gets distributed evenly between all inputs.
+  ///
+  /// Set to `1'b0` to disable.
+  parameter bit          FairArb    = 1'b1,
+  /// Dependent parameter, do **not** overwrite.
+  /// Width of the arbitration priority signal and the arbitrated index.
+  parameter int unsigned IdxWidth   = (NumIn > 32'd1) ? unsigned'($clog2(NumIn)) : 32'd1,
+  /// Dependent parameter, do **not** overwrite.
+  /// Type for defining the arbitration priority and arbitrated index signal.
+  parameter type         idx_t      = logic [IdxWidth-1:0]
+) (
+  /// Clock, positive edge triggered.
+  input  logic                clk_i,
+  /// Asynchronous reset, active low.
+  input  logic                rst_ni,
+  /// Clears the arbiter state. Only used if `ExtPrio` is `1'b0` or `LockIn` is `1'b1`.
+  input  logic                flush_i,
+  /// External round-robin priority. Only used if `ExtPrio` is `1'b1.`
+  input  idx_t                rr_i,
+  /// Input requests arbitration.
+  input  logic    [NumIn-1:0] req_i,
+  /* verilator lint_off UNOPTFLAT */
+  /// Input request is granted.
+  output logic    [NumIn-1:0] gnt_o,
+  /* verilator lint_on UNOPTFLAT */
+  /// Input data for arbitration.
+  input  DataType [NumIn-1:0] data_i,
+  /// Output request is valid.
+  output logic                req_o,
+  /// Output request is granted.
+  input  logic                gnt_i,
+  /// Output data.
+  output DataType             data_o,
+  /// Index from which input the data came from.
+  output idx_t                idx_o
+);
+
+  // pragma translate_off
+  `ifndef VERILATOR
+  `ifndef XSIM
+  // Default SVA reset
+  default disable iff (!rst_ni || flush_i);
+  `endif
+  `endif
+  // pragma translate_on
+
+  // just pass through in this corner case
+  if (NumIn == unsigned'(1)) begin : gen_pass_through
+    assign req_o    = req_i[0];
+    assign gnt_o[0] = gnt_i;
+    assign data_o   = data_i[0];
+    assign idx_o    = '0;
+  // non-degenerate cases
+  end else begin : gen_arbiter
+    localparam int unsigned NumLevels = unsigned'($clog2(NumIn));
+
+    /* verilator lint_off UNOPTFLAT */
+    idx_t    [2**NumLevels-2:0] index_nodes; // used to propagate the indices
+    DataType [2**NumLevels-2:0] data_nodes;  // used to propagate the data
+    logic    [2**NumLevels-2:0] gnt_nodes;   // used to propagate the grant to masters
+    logic    [2**NumLevels-2:0] req_nodes;   // used to propagate the requests to slave
+    /* lint_off */
+    idx_t                       rr_q;
+    logic [NumIn-1:0]           req_d;
+
+    // the final arbitration decision can be taken from the root of the tree
+    assign req_o        = req_nodes[0];
+    assign data_o       = data_nodes[0];
+    assign idx_o        = index_nodes[0];
+
+    if (ExtPrio) begin : gen_ext_rr
+      assign rr_q       = rr_i;
+      assign req_d      = req_i;
+    end else begin : gen_int_rr
+      idx_t rr_d;
+
+      // lock arbiter decision in case we got at least one req and no acknowledge
+      if (LockIn) begin : gen_lock
+        logic  lock_d, lock_q;
+        logic [NumIn-1:0] req_q;
+
+        assign lock_d     = req_o & ~gnt_i;
+        assign req_d      = (lock_q) ? req_q : req_i;
+
+        always_ff @(posedge clk_i or negedge rst_ni) begin : p_lock_reg
+          if (!rst_ni) begin
+            lock_q <= '0;
+          end else begin
+            if (flush_i) begin
+              lock_q <= '0;
+            end else begin
+              lock_q <= lock_d;
+            end
+          end
+        end
+
+        // pragma translate_off
+        `ifndef VERILATOR
+          lock: assert property(
+            @(posedge clk_i) LockIn |-> req_o &&
+                             (!gnt_i && !flush_i) |=> idx_o == $past(idx_o)) else
+                $fatal (1, "Lock implies same arbiter decision in next cycle if output is not \
+                            ready.");
+
+          logic [NumIn-1:0] req_tmp;
+          assign req_tmp = req_q & req_i;
+          lock_req: assume property(
+            @(posedge clk_i) LockIn |-> lock_d |=> req_tmp == req_q) else
+                $fatal (1, "It is disallowed to deassert unserved request signals when LockIn is \
+                            enabled.");
+        `endif
+        // pragma translate_on
+
+        always_ff @(posedge clk_i or negedge rst_ni) begin : p_req_regs
+          if (!rst_ni) begin
+            req_q  <= '0;
+          end else begin
+            if (flush_i) begin
+              req_q  <= '0;
+            end else begin
+              req_q  <= req_d;
+            end
+          end
+        end
+      end else begin : gen_no_lock
+        assign req_d = req_i;
+      end
+
+      if (FairArb) begin : gen_fair_arb
+        logic [NumIn-1:0] upper_mask,  lower_mask;
+        idx_t             upper_idx,   lower_idx,   next_idx;
+        logic             upper_empty, lower_empty;
+
+        for (genvar i = 0; i < NumIn; i++) begin : gen_mask
+          assign upper_mask[i] = (i >  rr_q) ? req_d[i] : 1'b0;
+          assign lower_mask[i] = (i <= rr_q) ? req_d[i] : 1'b0;
+        end
+
+        lzc #(
+          .WIDTH ( NumIn ),
+          .MODE  ( 1'b0  )
+        ) i_lzc_upper (
+          .in_i    ( upper_mask  ),
+          .cnt_o   ( upper_idx   ),
+          .empty_o ( upper_empty )
+        );
+
+        lzc #(
+          .WIDTH ( NumIn ),
+          .MODE  ( 1'b0  )
+        ) i_lzc_lower (
+          .in_i    ( lower_mask  ),
+          .cnt_o   ( lower_idx   ),
+          .empty_o ( /*unused*/  )
+        );
+
+        assign next_idx = upper_empty      ? lower_idx : upper_idx;
+        assign rr_d     = (gnt_i && req_o) ? next_idx  : rr_q;
+
+      end else begin : gen_unfair_arb
+        assign rr_d = (gnt_i && req_o) ? ((rr_q == idx_t'(NumIn-1)) ? '0 : rr_q + 1'b1) : rr_q;
+      end
+
+      // this holds the highest priority
+      always_ff @(posedge clk_i or negedge rst_ni) begin : p_rr_regs
+        if (!rst_ni) begin
+          rr_q   <= '0;
+        end else begin
+          if (flush_i) begin
+            rr_q   <= '0;
+          end else begin
+            rr_q   <= rr_d;
+          end
+        end
+      end
+    end
+
+    assign gnt_nodes[0] = gnt_i;
+
+    // arbiter tree
+    for (genvar level = 0; unsigned'(level) < NumLevels; level++) begin : gen_levels
+      for (genvar l = 0; l < 2**level; l++) begin : gen_level
+        // local select signal
+        logic sel;
+        // index calcs
+        localparam int unsigned Idx0 = 2**level-1+l;// current node
+        localparam int unsigned Idx1 = 2**(level+1)-1+l*2;
+        //////////////////////////////////////////////////////////////
+        // uppermost level where data is fed in from the inputs
+        if (unsigned'(level) == NumLevels-1) begin : gen_first_level
+          // if two successive indices are still in the vector...
+          if (unsigned'(l) * 2 < NumIn-1) begin : gen_reduce
+            assign req_nodes[Idx0]   = req_d[l*2] | req_d[l*2+1];
+
+            // arbitration: round robin
+            assign sel =  ~req_d[l*2] | req_d[l*2+1] & rr_q[NumLevels-1-level];
+
+            assign index_nodes[Idx0] = idx_t'(sel);
+            assign data_nodes[Idx0]  = (sel) ? data_i[l*2+1] : data_i[l*2];
+            assign gnt_o[l*2]        = gnt_nodes[Idx0] & (AxiVldRdy | req_d[l*2])   & ~sel;
+            assign gnt_o[l*2+1]      = gnt_nodes[Idx0] & (AxiVldRdy | req_d[l*2+1]) & sel;
+          end
+          // if only the first index is still in the vector...
+          if (unsigned'(l) * 2 == NumIn-1) begin : gen_first
+            assign req_nodes[Idx0]   = req_d[l*2];
+            assign index_nodes[Idx0] = '0;// always zero in this case
+            assign data_nodes[Idx0]  = data_i[l*2];
+            assign gnt_o[l*2]        = gnt_nodes[Idx0] & (AxiVldRdy | req_d[l*2]);
+          end
+          // if index is out of range, fill up with zeros (will get pruned)
+          if (unsigned'(l) * 2 > NumIn-1) begin : gen_out_of_range
+            assign req_nodes[Idx0]   = 1'b0;
+            assign index_nodes[Idx0] = idx_t'('0);
+            assign data_nodes[Idx0]  = DataType'('0);
+          end
+        //////////////////////////////////////////////////////////////
+        // general case for other levels within the tree
+        end else begin : gen_other_levels
+          assign req_nodes[Idx0]   = req_nodes[Idx1] | req_nodes[Idx1+1];
+
+          // arbitration: round robin
+          assign sel =  ~req_nodes[Idx1] | req_nodes[Idx1+1] & rr_q[NumLevels-1-level];
+
+          assign index_nodes[Idx0] = (sel) ?
+            idx_t'({1'b1, index_nodes[Idx1+1][NumLevels-unsigned'(level)-2:0]}) :
+            idx_t'({1'b0, index_nodes[Idx1][NumLevels-unsigned'(level)-2:0]});
+
+          assign data_nodes[Idx0]  = (sel) ? data_nodes[Idx1+1] : data_nodes[Idx1];
+          assign gnt_nodes[Idx1]   = gnt_nodes[Idx0] & ~sel;
+          assign gnt_nodes[Idx1+1] = gnt_nodes[Idx0] & sel;
+        end
+        //////////////////////////////////////////////////////////////
+      end
+    end
+
+    // pragma translate_off
+    `ifndef VERILATOR
+    `ifndef XSIM
+    initial begin : p_assert
+      assert(NumIn)
+        else $fatal(1, "Input must be at least one element wide.");
+      assert(!(LockIn && ExtPrio))
+        else $fatal(1,"Cannot use LockIn feature together with external ExtPrio.");
+    end
+
+    hot_one : assert property(
+      @(posedge clk_i) $onehot0(gnt_o))
+        else $fatal (1, "Grant signal must be hot1 or zero.");
+
+    gnt0 : assert property(
+      @(posedge clk_i) |gnt_o |-> gnt_i)
+        else $fatal (1, "Grant out implies grant in.");
+
+    gnt1 : assert property(
+      @(posedge clk_i) req_o |-> gnt_i |-> |gnt_o)
+        else $fatal (1, "Req out and grant in implies grant out.");
+
+    gnt_idx : assert property(
+      @(posedge clk_i) req_o |->  gnt_i |-> gnt_o[idx_o])
+        else $fatal (1, "Idx_o / gnt_o do not match.");
+
+    req0 : assert property(
+      @(posedge clk_i) |req_i |-> req_o)
+        else $fatal (1, "Req in implies req out.");
+
+    req1 : assert property(
+      @(posedge clk_i) req_o |-> |req_i)
+        else $fatal (1, "Req out implies req in.");
+    `endif
+    `endif
+    // pragma translate_on
+  end
+
+endmodule : rr_arb_tree
diff --git a/vendor/pulp-platform/common_cells/src/rstgen.sv b/vendor/pulp-platform/common_cells/src/rstgen.sv
new file mode 100644
index 0000000000..a7dccc63b0
--- /dev/null
+++ b/vendor/pulp-platform/common_cells/src/rstgen.sv
@@ -0,0 +1,30 @@
+// Copyright 2018 ETH Zurich and University of Bologna.
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the "License"); you may not use this file except in
+// compliance with the License. You may obtain a copy of the License at
+// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+
+// Davide Rossi <davide.rossi@unibo.it>
+
+module rstgen (
+    input  logic clk_i,
+    input  logic rst_ni,
+    input  logic test_mode_i,
+    output logic rst_no,
+    output logic init_no
+);
+
+    rstgen_bypass i_rstgen_bypass (
+        .clk_i            ( clk_i       ),
+        .rst_ni           ( rst_ni      ),
+        .rst_test_mode_ni ( rst_ni      ),
+        .test_mode_i      ( test_mode_i ),
+        .rst_no           ( rst_no      ),
+        .init_no          ( init_no     )
+    );
+
+endmodule
diff --git a/vendor/pulp-platform/common_cells/src/rstgen_bypass.sv b/vendor/pulp-platform/common_cells/src/rstgen_bypass.sv
new file mode 100644
index 0000000000..c51ee83548
--- /dev/null
+++ b/vendor/pulp-platform/common_cells/src/rstgen_bypass.sv
@@ -0,0 +1,57 @@
+// Copyright 2018 ETH Zurich and University of Bologna.
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the "License"); you may not use this file except in
+// compliance with the License. You may obtain a copy of the License at
+// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+
+// Florian Zaruba <zarubaf@iis.ee.ethz.ch>
+// Description: This module is a reset synchronizer with a dedicated reset bypass pin for testmode reset.
+// Pro Tip: The wise Dr. Schaffner recommends at least 4 registers!
+
+module rstgen_bypass #(
+    parameter int unsigned NumRegs = 4
+) (
+    input  logic clk_i,
+    input  logic rst_ni,
+    input  logic rst_test_mode_ni,
+    input  logic test_mode_i,
+    output logic rst_no,
+    output logic init_no
+);
+
+    // internal reset
+    logic rst_n;
+
+    logic [NumRegs-1:0] synch_regs_q;
+    // bypass mode
+    always_comb begin
+        if (test_mode_i == 1'b0) begin
+            rst_n   = rst_ni;
+            rst_no  = synch_regs_q[NumRegs-1];
+            init_no = synch_regs_q[NumRegs-1];
+        end else begin
+            rst_n   = rst_test_mode_ni;
+            rst_no  = rst_test_mode_ni;
+            init_no = 1'b1;
+        end
+    end
+
+    always @(posedge clk_i or negedge rst_n) begin
+        if (~rst_n) begin
+            synch_regs_q <= 0;
+        end else begin
+            synch_regs_q <= {synch_regs_q[NumRegs-2:0], 1'b1};
+        end
+    end
+    // pragma translate_off
+    `ifndef VERILATOR
+    initial begin : p_assertions
+        if (NumRegs < 1) $fatal(1, "At least one register is required.");
+    end
+    `endif
+    // pragma translate_on
+endmodule
diff --git a/vendor/pulp-platform/common_cells/src/serial_deglitch.sv b/vendor/pulp-platform/common_cells/src/serial_deglitch.sv
new file mode 100644
index 0000000000..3302e1ae6b
--- /dev/null
+++ b/vendor/pulp-platform/common_cells/src/serial_deglitch.sv
@@ -0,0 +1,50 @@
+// Copyright 2018 ETH Zurich and University of Bologna.
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the "License"); you may not use this file except in
+// compliance with the License. You may obtain a copy of the License at
+// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+
+// Author: Florian Zaruba
+// Description: Deglitches a serial line by taking multiple samples until
+//              asserting the output high/low.
+
+module serial_deglitch #(
+    parameter int unsigned SIZE = 4
+)(
+    input  logic clk_i,    // clock
+    input  logic rst_ni,   // asynchronous reset active low
+    input  logic en_i,     // enable
+    input  logic d_i,      // serial data in
+    output logic q_o       // filtered data out
+);
+    logic [SIZE-1:0] count_q;
+    logic q;
+
+    always_ff @(posedge clk_i or negedge rst_ni) begin
+        if (~rst_ni) begin
+            count_q <= '0;
+            q       <= 1'b0;
+        end else begin
+            if (en_i) begin
+                if (d_i == 1'b1 && count_q != SIZE[SIZE-1:0]) begin
+                    count_q <= count_q + 1;
+                end else if (d_i == 1'b0 && count_q != SIZE[SIZE-1:0]) begin
+                    count_q <= count_q - 1;
+                end
+            end
+        end
+    end
+
+    // output process
+    always_comb begin
+        if (count_q == SIZE[SIZE-1:0]) begin
+            q_o = 1'b1;
+        end else if (count_q == 0) begin
+            q_o = 1'b0;
+        end
+    end
+endmodule
diff --git a/vendor/pulp-platform/common_cells/src/shift_reg.sv b/vendor/pulp-platform/common_cells/src/shift_reg.sv
new file mode 100644
index 0000000000..7193fbcd81
--- /dev/null
+++ b/vendor/pulp-platform/common_cells/src/shift_reg.sv
@@ -0,0 +1,53 @@
+
+// Copyright 2018 ETH Zurich and University of Bologna.
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the "License"); you may not use this file except in
+// compliance with the License.  You may obtain a copy of the License at
+// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+//
+// Author: <zarubaf@iis.ee.ethz.ch>
+//
+// Description: Simple shift register for arbitrary depth and types
+
+module shift_reg #(
+    parameter type dtype         = logic,
+    parameter int unsigned Depth = 1
+)(
+    input  logic clk_i,    // Clock
+    input  logic rst_ni,   // Asynchronous reset active low
+    input  dtype d_i,
+    output dtype d_o
+);
+
+    // register of depth 0 is a wire
+    if (Depth == 0) begin : gen_pass_through
+        assign d_o = d_i;
+    // register of depth 1 is a simple register
+    end else if (Depth == 1) begin : gen_register
+        always_ff @(posedge clk_i or negedge rst_ni) begin
+            if (~rst_ni) begin
+                d_o <= '0;
+            end else begin
+                d_o <= d_i;
+            end
+        end
+    // if depth is greater than 1 it becomes a shift register
+    end else if (Depth > 1) begin : gen_shift_reg
+        dtype [Depth-1:0] reg_d, reg_q;
+        assign d_o = reg_q[Depth-1];
+        assign reg_d = {reg_q[Depth-2:0], d_i};
+
+        always_ff @(posedge clk_i or negedge rst_ni) begin
+            if (~rst_ni) begin
+                reg_q <= '0;
+            end else begin
+                reg_q <= reg_d;
+            end
+        end
+    end
+
+endmodule
diff --git a/vendor/pulp-platform/common_cells/src/spill_register.sv b/vendor/pulp-platform/common_cells/src/spill_register.sv
new file mode 100644
index 0000000000..80ff37f149
--- /dev/null
+++ b/vendor/pulp-platform/common_cells/src/spill_register.sv
@@ -0,0 +1,46 @@
+// Copyright 2018 ETH Zurich and University of Bologna.
+//
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the "License"); you may not use this file except in
+// compliance with the License. You may obtain a copy of the License at
+// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+//
+// Fabian Schuiki <fschuiki@iis.ee.ethz.ch>
+
+
+/// Wrapper around the flushable spill register to maintain back-ward
+/// compatibility.
+module spill_register #(
+  parameter type T      = logic,
+  parameter bit  Bypass = 1'b0     // make this spill register transparent
+) (
+  input  logic clk_i   ,
+  input  logic rst_ni  ,
+  input  logic valid_i ,
+  output logic ready_o ,
+  input  T     data_i  ,
+  output logic valid_o ,
+  input  logic ready_i ,
+  output T     data_o
+);
+
+  spill_register_flushable #(
+    .T(T),
+    .Bypass(Bypass)
+  ) spill_register_flushable_i (
+    .clk_i,
+    .rst_ni,
+    .valid_i,
+    .flush_i(1'b0),
+    .ready_o,
+    .data_i,
+    .valid_o,
+    .ready_i,
+    .data_o
+  );
+
+endmodule
diff --git a/vendor/pulp-platform/common_cells/src/spill_register_flushable.sv b/vendor/pulp-platform/common_cells/src/spill_register_flushable.sv
new file mode 100644
index 0000000000..c03ad274de
--- /dev/null
+++ b/vendor/pulp-platform/common_cells/src/spill_register_flushable.sv
@@ -0,0 +1,105 @@
+// Copyright 2021 ETH Zurich and University of Bologna.
+//
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the "License"); you may not use this file except in
+// compliance with the License. You may obtain a copy of the License at
+// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+//
+// Fabian Schuiki <fschuiki@iis.ee.ethz.ch>
+
+
+/// A register with handshakes that completely cuts any combinational paths
+/// between the input and output. This spill register can be flushed.
+module spill_register_flushable #(
+  parameter type T           = logic,
+  parameter bit  Bypass      = 1'b0   // make this spill register transparent
+) (
+  input  logic clk_i   ,
+  input  logic rst_ni  ,
+  input  logic valid_i ,
+  input  logic flush_i ,
+  output logic ready_o ,
+  input  T     data_i  ,
+  output logic valid_o ,
+  input  logic ready_i ,
+  output T     data_o
+);
+
+  if (Bypass) begin : gen_bypass
+    assign valid_o = valid_i;
+    assign ready_o = ready_i;
+    assign data_o  = data_i;
+  end else begin : gen_spill_reg
+    // The A register.
+    T a_data_q;
+    logic a_full_q;
+    logic a_fill, a_drain;
+
+    always_ff @(posedge clk_i or negedge rst_ni) begin : ps_a_data
+      if (!rst_ni)
+        a_data_q <= '0;
+      else if (a_fill)
+        a_data_q <= data_i;
+    end
+
+    always_ff @(posedge clk_i or negedge rst_ni) begin : ps_a_full
+      if (!rst_ni)
+        a_full_q <= 0;
+      else if (a_fill || a_drain)
+        a_full_q <= a_fill;
+    end
+
+    // The B register.
+    T b_data_q;
+    logic b_full_q;
+    logic b_fill, b_drain;
+
+    always_ff @(posedge clk_i or negedge rst_ni) begin : ps_b_data
+      if (!rst_ni)
+        b_data_q <= '0;
+      else if (b_fill)
+        b_data_q <= a_data_q;
+    end
+
+    always_ff @(posedge clk_i or negedge rst_ni) begin : ps_b_full
+      if (!rst_ni)
+        b_full_q <= 0;
+      else if (b_fill || b_drain)
+        b_full_q <= b_fill;
+    end
+
+    // Fill the A register when the A or B register is empty. Drain the A register
+    // whenever it is full and being filled, or if a flush is requested.
+    assign a_fill = valid_i && ready_o && (!flush_i);
+    assign a_drain = (a_full_q && !b_full_q) || flush_i;
+
+    // Fill the B register whenever the A register is drained, but the downstream
+    // circuit is not ready. Drain the B register whenever it is full and the
+    // downstream circuit is ready, or if a flush is requested.
+    assign b_fill = a_drain && (!ready_i) && (!flush_i);
+    assign b_drain = (b_full_q && ready_i) || flush_i;
+
+    // We can accept input as long as register B is not full.
+    // Note: flush_i and valid_i must not be high at the same time,
+    // otherwise an invalid handshake may occur
+    assign ready_o = !a_full_q || !b_full_q;
+
+    // The unit provides output as long as one of the registers is filled.
+    assign valid_o = a_full_q | b_full_q;
+
+    // We empty the spill register before the slice register.
+    assign data_o = b_full_q ? b_data_q : a_data_q;
+
+    // pragma translate_off
+    `ifndef VERILATOR
+    flush_valid : assert property (
+      @(posedge clk_i) disable iff (~rst_ni) (flush_i |-> ~valid_i)) else
+      $warning("Trying to flush and feed the spill register simultaneously. You will lose data!");
+   `endif
+     // pragma translate_on
+  end
+endmodule
diff --git a/vendor/pulp-platform/common_cells/src/stream_arbiter.sv b/vendor/pulp-platform/common_cells/src/stream_arbiter.sv
new file mode 100644
index 0000000000..c8ca2a8769
--- /dev/null
+++ b/vendor/pulp-platform/common_cells/src/stream_arbiter.sv
@@ -0,0 +1,49 @@
+// Copyright 2018 ETH Zurich and University of Bologna.
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the "License"); you may not use this file except in
+// compliance with the License. You may obtain a copy of the License at
+// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+
+// Stream arbiter: Arbitrates a parametrizable number of input streams (i.e., valid-ready
+// handshaking with dependency rules as in AXI4) to a single output stream.  Once `oup_valid_o` is
+// asserted, `oup_data_o` remains invariant until the output handshake has occurred.  The
+// arbitration scheme is round-robin with "look ahead", see the `rrarbiter` for details.
+
+module stream_arbiter #(
+    parameter type      DATA_T = logic,   // Vivado requires a default value for type parameters.
+    parameter integer   N_INP = -1,       // Synopsys DC requires a default value for parameters.
+    parameter           ARBITER = "rr"    // "rr" or "prio"
+) (
+    input  logic              clk_i,
+    input  logic              rst_ni,
+
+    input  DATA_T [N_INP-1:0] inp_data_i,
+    input  logic  [N_INP-1:0] inp_valid_i,
+    output logic  [N_INP-1:0] inp_ready_o,
+
+    output DATA_T             oup_data_o,
+    output logic              oup_valid_o,
+    input  logic              oup_ready_i
+);
+
+  stream_arbiter_flushable #(
+    .DATA_T   (DATA_T),
+    .N_INP    (N_INP),
+    .ARBITER  (ARBITER)
+  ) i_arb (
+    .clk_i        (clk_i),
+    .rst_ni       (rst_ni),
+    .flush_i      (1'b0),
+    .inp_data_i   (inp_data_i),
+    .inp_valid_i  (inp_valid_i),
+    .inp_ready_o  (inp_ready_o),
+    .oup_data_o   (oup_data_o),
+    .oup_valid_o  (oup_valid_o),
+    .oup_ready_i  (oup_ready_i)
+  );
+
+endmodule
diff --git a/vendor/pulp-platform/common_cells/src/stream_arbiter_flushable.sv b/vendor/pulp-platform/common_cells/src/stream_arbiter_flushable.sv
new file mode 100644
index 0000000000..32946e6859
--- /dev/null
+++ b/vendor/pulp-platform/common_cells/src/stream_arbiter_flushable.sv
@@ -0,0 +1,82 @@
+// Copyright 2018 ETH Zurich and University of Bologna.
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the "License"); you may not use this file except in
+// compliance with the License. You may obtain a copy of the License at
+// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+
+// Stream arbiter: Arbitrates a parametrizable number of input streams (i.e., valid-ready
+// handshaking with dependency rules as in AXI4) to a single output stream.  Once `oup_valid_o` is
+// asserted, `oup_data_o` remains invariant until the output handshake has occurred.  The
+// arbitration scheme is fair round-robin tree, see `rr_arb_tree` for details.
+
+module stream_arbiter_flushable #(
+    parameter type      DATA_T = logic,   // Vivado requires a default value for type parameters.
+    parameter integer   N_INP = -1,       // Synopsys DC requires a default value for parameters.
+    parameter           ARBITER = "rr"    // "rr" or "prio"
+) (
+    input  logic              clk_i,
+    input  logic              rst_ni,
+    input  logic              flush_i,
+
+    input  DATA_T [N_INP-1:0] inp_data_i,
+    input  logic  [N_INP-1:0] inp_valid_i,
+    output logic  [N_INP-1:0] inp_ready_o,
+
+    output DATA_T             oup_data_o,
+    output logic              oup_valid_o,
+    input  logic              oup_ready_i
+);
+
+  if (ARBITER == "rr") begin : gen_rr_arb
+    rr_arb_tree #(
+      .NumIn      (N_INP),
+      .DataType   (DATA_T),
+      .ExtPrio    (1'b0),
+      .AxiVldRdy  (1'b1),
+      .LockIn     (1'b1)
+    ) i_arbiter (
+      .clk_i,
+      .rst_ni,
+      .flush_i,
+      .rr_i   ('0),
+      .req_i  (inp_valid_i),
+      .gnt_o  (inp_ready_o),
+      .data_i (inp_data_i),
+      .gnt_i  (oup_ready_i),
+      .req_o  (oup_valid_o),
+      .data_o (oup_data_o),
+      .idx_o  ()
+    );
+
+  end else if (ARBITER == "prio") begin : gen_prio_arb
+    rr_arb_tree #(
+      .NumIn      (N_INP),
+      .DataType   (DATA_T),
+      .ExtPrio    (1'b1),
+      .AxiVldRdy  (1'b1),
+      .LockIn     (1'b1)
+    ) i_arbiter (
+      .clk_i,
+      .rst_ni,
+      .flush_i,
+      .rr_i   ('0),
+      .req_i  (inp_valid_i),
+      .gnt_o  (inp_ready_o),
+      .data_i (inp_data_i),
+      .gnt_i  (oup_ready_i),
+      .req_o  (oup_valid_o),
+      .data_o (oup_data_o),
+      .idx_o  ()
+    );
+
+  end else begin : gen_arb_error
+    // pragma translate_off
+    $fatal(1, "Invalid value for parameter 'ARBITER'!");
+    // pragma translate_on
+  end
+
+endmodule
diff --git a/vendor/pulp-platform/common_cells/src/stream_delay.sv b/vendor/pulp-platform/common_cells/src/stream_delay.sv
new file mode 100644
index 0000000000..5051b6c24b
--- /dev/null
+++ b/vendor/pulp-platform/common_cells/src/stream_delay.sv
@@ -0,0 +1,132 @@
+// Copyright 2018 ETH Zurich and University of Bologna.
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the "License"); you may not use this file except in
+// compliance with the License.  You may obtain a copy of the License at
+// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+
+// Author: Florian Zaruba, zarubaf@iis.ee.ethz.ch
+// Description: Delay (or randomize) AXI-like handshaking
+
+module stream_delay #(
+    parameter bit   StallRandom = 0,
+    parameter int   FixedDelay  = 1,
+    parameter type  payload_t  = logic
+)(
+    input  logic     clk_i,
+    input  logic     rst_ni,
+
+    input  payload_t payload_i,
+    output logic     ready_o,
+    input  logic     valid_i,
+
+    output payload_t payload_o,
+    input  logic     ready_i,
+    output logic     valid_o
+);
+
+    if (FixedDelay == 0 && !StallRandom) begin : gen_pass_through
+        assign ready_o = ready_i;
+        assign valid_o = valid_i;
+        assign payload_o = payload_i;
+    end else begin : gen_delay
+
+        localparam int unsigned CounterBits = 4;
+
+        typedef enum logic [1:0] {
+            Idle, Valid, Ready
+        } state_e;
+
+        state_e state_d, state_q;
+
+        logic       load;
+        logic [3:0] count_out;
+        logic       en;
+
+        logic [CounterBits-1:0] counter_load;
+
+        assign payload_o = payload_i;
+
+        always_comb begin
+            state_d = state_q;
+            valid_o = 1'b0;
+            ready_o = 1'b0;
+            load    = 1'b0;
+            en      = 1'b0;
+
+            unique case (state_q)
+                Idle: begin
+                    if (valid_i) begin
+                        load = 1'b1;
+                        state_d = Valid;
+                        // Just one cycle delay
+                        if (FixedDelay == 1 || (StallRandom && counter_load == 1)) begin
+                            state_d = Ready;
+                        end
+
+                        if (StallRandom && counter_load == 0) begin
+                            valid_o = 1'b1;
+                            ready_o = ready_i;
+                            if (ready_i) state_d = Idle;
+                            else state_d = Ready;
+                        end
+                    end
+                end
+                Valid: begin
+                    en = 1'b1;
+                    if (count_out == 0) begin
+                        state_d = Ready;
+                    end
+                end
+
+                Ready: begin
+                    valid_o = 1'b1;
+                    ready_o = ready_i;
+                    if (ready_i) state_d = Idle;
+                end
+                default : /* default */;
+            endcase
+
+        end
+
+        if (StallRandom) begin : gen_random_stall
+            lfsr_16bit #(
+              .WIDTH ( 16 )
+            ) i_lfsr_16bit (
+                .clk_i          ( clk_i        ),
+                .rst_ni         ( rst_ni       ),
+                .en_i           ( load         ),
+                .refill_way_oh  (              ),
+                .refill_way_bin ( counter_load )
+            );
+        end else begin : gen_fixed_delay
+            assign counter_load = FixedDelay;
+        end
+
+        counter #(
+            .WIDTH      ( CounterBits )
+        ) i_counter (
+            .clk_i      ( clk_i        ),
+            .rst_ni     ( rst_ni       ),
+            .clear_i    ( 1'b0         ),
+            .en_i       ( en           ),
+            .load_i     ( load         ),
+            .down_i     ( 1'b1         ),
+            .d_i        ( counter_load ),
+            .q_o        ( count_out    ),
+            .overflow_o (              )
+        );
+
+        always_ff @(posedge clk_i or negedge rst_ni) begin
+            if (~rst_ni) begin
+                state_q <= Idle;
+            end else begin
+                state_q <= state_d;
+            end
+        end
+    end
+
+endmodule
diff --git a/vendor/pulp-platform/common_cells/src/stream_demux.sv b/vendor/pulp-platform/common_cells/src/stream_demux.sv
new file mode 100644
index 0000000000..69ad3099b1
--- /dev/null
+++ b/vendor/pulp-platform/common_cells/src/stream_demux.sv
@@ -0,0 +1,36 @@
+// Copyright 2018 ETH Zurich and University of Bologna.
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the "License"); you may not use this file except in
+// compliance with the License. You may obtain a copy of the License at
+// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+
+/// Connects the input stream (valid-ready) handshake to one of `N_OUP` output stream handshakes.
+///
+/// This module has no data ports because stream data does not need to be demultiplexed: the data of
+/// the input stream can just be applied at all output streams.
+module stream_demux #(
+  /// Number of connected outputs.
+  parameter int unsigned N_OUP     = 32'd1,
+  /// Dependent parameters, DO NOT OVERRIDE!
+  parameter int unsigned LOG_N_OUP = (N_OUP > 32'd1) ? unsigned'($clog2(N_OUP)) : 1'b1
+) (
+  input  logic                 inp_valid_i,
+  output logic                 inp_ready_o,
+
+  input  logic [LOG_N_OUP-1:0] oup_sel_i,
+
+  output logic [N_OUP-1:0]     oup_valid_o,
+  input  logic [N_OUP-1:0]     oup_ready_i
+);
+
+  always_comb begin
+    oup_valid_o = '0;
+    oup_valid_o[oup_sel_i] = inp_valid_i;
+  end
+  assign inp_ready_o = oup_ready_i[oup_sel_i];
+
+endmodule
diff --git a/vendor/pulp-platform/common_cells/src/stream_fifo.sv b/vendor/pulp-platform/common_cells/src/stream_fifo.sv
new file mode 100644
index 0000000000..e7c60e57e2
--- /dev/null
+++ b/vendor/pulp-platform/common_cells/src/stream_fifo.sv
@@ -0,0 +1,66 @@
+// Copyright 2020 ETH Zurich and University of Bologna.
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the "License"); you may not use this file except in
+// compliance with the License. You may obtain a copy of the License at
+// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+
+// Author: Georg Rutishauser <georgr@iis.ee.ethz.ch>
+
+module stream_fifo #(
+    /// FIFO is in fall-through mode
+    parameter bit          FALL_THROUGH = 1'b0,
+    /// Default data width if the fifo is of type logic
+    parameter int unsigned DATA_WIDTH   = 32,
+    /// Depth can be arbitrary from 0 to 2**32
+    parameter int unsigned DEPTH        = 8,
+    parameter type         T            = logic [DATA_WIDTH-1:0],
+    // DO NOT OVERWRITE THIS PARAMETER
+    parameter int unsigned ADDR_DEPTH  = (DEPTH > 1) ? $clog2(DEPTH) : 1
+) (
+    input  logic                  clk_i,      // Clock
+    input  logic                  rst_ni,     // Asynchronous reset active low
+    input  logic                  flush_i,    // flush the fifo
+    input  logic                  testmode_i, // test_mode to bypass clock gating
+    output logic [ADDR_DEPTH-1:0] usage_o,    // fill pointer
+    // input interface
+    input  T                      data_i,     // data to push into the fifo
+    input  logic                  valid_i,    // input data valid
+    output logic                  ready_o,    // fifo is not full
+    // output interface
+    output T                      data_o,     // output data
+    output logic                  valid_o,    // fifo is not empty
+    input  logic                  ready_i     // pop head from fifo
+);
+
+    logic push, pop;
+    logic empty, full;
+
+    assign push    = valid_i & ~full;
+    assign pop     = ready_i & ~empty;
+    assign ready_o = ~full;
+    assign valid_o = ~empty;
+
+    fifo_v3 #(
+        .FALL_THROUGH   (FALL_THROUGH),
+        .DATA_WIDTH     (DATA_WIDTH),
+        .DEPTH          (DEPTH),
+        .dtype(T)
+    ) fifo_i (
+        .clk_i,
+        .rst_ni,
+        .flush_i,
+        .testmode_i,
+        .full_o     (full),
+        .empty_o    (empty),
+        .usage_o,
+        .data_i,
+        .push_i     (push),
+        .data_o,
+        .pop_i      (pop)
+    );
+
+endmodule
diff --git a/vendor/pulp-platform/common_cells/src/stream_filter.sv b/vendor/pulp-platform/common_cells/src/stream_filter.sv
new file mode 100644
index 0000000000..52a5835e77
--- /dev/null
+++ b/vendor/pulp-platform/common_cells/src/stream_filter.sv
@@ -0,0 +1,26 @@
+// Copyright 2018 ETH Zurich and University of Bologna.
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the "License"); you may not use this file except in
+// compliance with the License. You may obtain a copy of the License at
+// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+
+// Stream filter: If `drop_i` is `1`, signal `ready` to the upstream regardless of the downstream,
+// and do not propagate `valid` downstream.  Otherwise, connect upstream to downstream.
+module stream_filter (
+    input  logic valid_i,
+    output logic ready_o,
+
+    input  logic drop_i,
+
+    output logic valid_o,
+    input  logic ready_i
+);
+
+    assign valid_o = drop_i ? 1'b0 : valid_i;
+    assign ready_o = drop_i ? 1'b1 : ready_i;
+
+endmodule
diff --git a/vendor/pulp-platform/common_cells/src/stream_fork.sv b/vendor/pulp-platform/common_cells/src/stream_fork.sv
new file mode 100644
index 0000000000..650038d263
--- /dev/null
+++ b/vendor/pulp-platform/common_cells/src/stream_fork.sv
@@ -0,0 +1,133 @@
+// Copyright 2018 ETH Zurich and University of Bologna.
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the "License"); you may not use this file except in
+// compliance with the License. You may obtain a copy of the License at
+// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+
+// Stream fork: Connects the input stream (ready-valid) handshake to *all* of `N_OUP` output stream
+// handshakes. For each input stream handshake, every output stream handshakes exactly once. The
+// input stream only handshakes when all output streams have handshaked, but the output streams do
+// not have to handshake simultaneously.
+//
+// This module has no data ports because stream data does not need to be forked: the data of the
+// input stream can just be applied at all output streams.
+
+module stream_fork #(
+    parameter int unsigned N_OUP = 0    // Synopsys DC requires a default value for parameters.
+) (
+    input  logic                clk_i,
+    input  logic                rst_ni,
+    input  logic                valid_i,
+    output logic                ready_o,
+    output logic [N_OUP-1:0]    valid_o,
+    input  logic [N_OUP-1:0]    ready_i
+);
+
+    typedef enum logic {READY, WAIT} state_t;
+
+    logic [N_OUP-1:0]   oup_ready,
+                        all_ones;
+
+    state_t inp_state_d, inp_state_q;
+
+    // Input control FSM
+    always_comb begin
+        // ready_o     = 1'b0;
+        inp_state_d = inp_state_q;
+
+        unique case (inp_state_q)
+            READY: begin
+                if (valid_i) begin
+                    if (valid_o == all_ones && ready_i == all_ones) begin
+                        // If handshake on all outputs, handshake on input.
+                        ready_o = 1'b1;
+                    end else begin
+                        ready_o = 1'b0;
+                        // Otherwise, wait for inputs that did not handshake yet.
+                        inp_state_d = WAIT;
+                    end
+                end else begin
+                    ready_o = 1'b0;
+                end
+            end
+            WAIT: begin
+                if (valid_i && oup_ready == all_ones) begin
+                    ready_o = 1'b1;
+                    inp_state_d = READY;
+                end else begin
+                    ready_o = 1'b0;
+                end
+            end
+            default: begin
+                inp_state_d = READY;
+                ready_o = 1'b0;
+            end
+        endcase
+    end
+
+    always_ff @(posedge clk_i, negedge rst_ni) begin
+        if (!rst_ni) begin
+            inp_state_q <= READY;
+        end else begin
+            inp_state_q <= inp_state_d;
+        end
+    end
+
+    // Output control FSM
+    for (genvar i = 0; i < N_OUP; i++) begin: gen_oup_state
+        state_t oup_state_d, oup_state_q;
+
+        always_comb begin
+            oup_ready[i]    = 1'b1;
+            valid_o[i]      = 1'b0;
+            oup_state_d     = oup_state_q;
+
+            unique case (oup_state_q)
+                READY: begin
+                    if (valid_i) begin
+                        valid_o[i] = 1'b1;
+                        if (ready_i[i]) begin   // Output handshake
+                            if (!ready_o) begin     // No input handshake yet
+                                oup_state_d = WAIT;
+                            end
+                        end else begin          // No output handshake
+                            oup_ready[i] = 1'b0;
+                        end
+                    end
+                end
+                WAIT: begin
+                    if (valid_i && ready_o) begin   // Input handshake
+                        oup_state_d = READY;
+                    end
+                end
+                default: begin
+                    oup_state_d = READY;
+                end
+            endcase
+        end
+
+        always_ff @(posedge clk_i, negedge rst_ni) begin
+            if (!rst_ni) begin
+                oup_state_q <= READY;
+            end else begin
+                oup_state_q <= oup_state_d;
+            end
+        end
+    end
+
+    assign all_ones = '1;   // Synthesis fix for Vivado, which does not correctly compute the width
+                            // of the '1 literal when assigned to a port of parametrized width.
+
+// pragma translate_off
+`ifndef VERILATOR
+    initial begin: p_assertions
+        assert (N_OUP >= 1) else $fatal(1, "Number of outputs must be at least 1!");
+    end
+`endif
+// pragma translate_on
+
+endmodule
diff --git a/vendor/pulp-platform/common_cells/src/stream_fork_dynamic.sv b/vendor/pulp-platform/common_cells/src/stream_fork_dynamic.sv
new file mode 100644
index 0000000000..e4720f704f
--- /dev/null
+++ b/vendor/pulp-platform/common_cells/src/stream_fork_dynamic.sv
@@ -0,0 +1,95 @@
+// Copyright 2020 ETH Zurich and University of Bologna.
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the "License"); you may not use this file except in
+// compliance with the License. You may obtain a copy of the License at
+// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+
+// Authors:
+// - Andreas Kurth <akurth@iis.ee.ethz.ch>
+
+/// Dynamic stream fork: Connects the input stream (ready-valid) handshake to a combination of output
+/// stream handshake.  The combination is determined dynamically through another stream, which
+/// provides a bitmask for the fork.  For each input stream handshake, every output stream handshakes
+/// exactly once. The input stream only handshakes when all output streams have handshaked, but the
+/// output streams do not have to handshake simultaneously.
+///
+/// This module has no data ports because stream data does not need to be forked: the data of the
+/// input stream can just be applied at all output streams.
+module stream_fork_dynamic #(
+  /// Number of output streams
+  parameter int unsigned N_OUP = 32'd0 // Synopsys DC requires a default value for parameters.
+) (
+  /// Clock
+  input  logic             clk_i,
+  /// Asynchronous reset, active low
+  input  logic             rst_ni,
+  /// Input stream valid handshake,
+  input  logic             valid_i,
+  /// Input stream ready handshake
+  output logic             ready_o,
+  /// Selection mask for the output handshake
+  input  logic [N_OUP-1:0] sel_i,
+  /// Selection mask valid
+  input  logic             sel_valid_i,
+  /// Selection mask ready
+  output logic             sel_ready_o,
+  /// Output streams valid handshakes
+  output logic [N_OUP-1:0] valid_o,
+  /// Output streams ready handshakes
+  input  logic [N_OUP-1:0] ready_i
+);
+
+  logic             int_inp_valid,  int_inp_ready;
+  logic [N_OUP-1:0] int_oup_valid,  int_oup_ready;
+
+  // Output handshaking
+  for (genvar i = 0; i < N_OUP; i++) begin : gen_oups
+    always_comb begin
+      valid_o[i]       = 1'b0;
+      int_oup_ready[i] = 1'b0;
+      if (sel_valid_i) begin
+        if (sel_i[i]) begin
+          valid_o[i]       = int_oup_valid[i];
+          int_oup_ready[i] = ready_i[i];
+        end else begin
+          int_oup_ready[i] = 1'b1;
+        end
+      end
+    end
+  end
+
+  // Input handshaking
+  always_comb begin
+    int_inp_valid = 1'b0;
+    ready_o       = 1'b0;
+    sel_ready_o   = 1'b0;
+    if (sel_valid_i) begin
+      int_inp_valid = valid_i;
+      ready_o       = int_inp_ready;
+      sel_ready_o   = int_inp_ready;
+    end
+  end
+
+  stream_fork #(
+    .N_OUP  ( N_OUP )
+  ) i_fork (
+    .clk_i,
+    .rst_ni,
+    .valid_i ( int_inp_valid ),
+    .ready_o ( int_inp_ready ),
+    .valid_o ( int_oup_valid ),
+    .ready_i ( int_oup_ready )
+  );
+
+// pragma translate_off
+`ifndef VERILATOR
+  initial begin: p_assertions
+    assert (N_OUP >= 1) else $fatal(1, "N_OUP must be at least 1!");
+  end
+`endif
+// pragma translate_on
+endmodule
diff --git a/vendor/pulp-platform/common_cells/src/stream_intf.sv b/vendor/pulp-platform/common_cells/src/stream_intf.sv
new file mode 100644
index 0000000000..32f2d8b6b8
--- /dev/null
+++ b/vendor/pulp-platform/common_cells/src/stream_intf.sv
@@ -0,0 +1,49 @@
+// Copyright 2020 ETH Zurich and University of Bologna.
+//
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the "License"); you may not use this file except in
+// compliance with the License. You may obtain a copy of the License at
+// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+
+// Author: Florian Zaruba <zarubaf@iis.ee.ethz.ch>
+
+/// A stream interface with custom payload of type `payload_t`.
+/// Handshaking rules as defined in the AXI standard.
+interface STREAM_DV #(
+  /// Custom payload type.
+  parameter type payload_t = logic
+)(
+  /// Interface clock.
+  input logic clk_i
+);
+  payload_t data;
+  logic valid;
+  logic ready;
+
+  modport In (
+    output ready,
+    input valid, data
+  );
+
+  modport Out (
+    output valid, data,
+    input ready
+  );
+
+  /// Passive modport for scoreboard and monitors.
+  modport Passive (
+    input valid, ready, data
+  );
+
+  // Make sure that the handshake and payload is stable
+  // pragma translate_off
+  `ifndef VERILATOR
+  assert property (@(posedge clk_i) (valid && !ready |=> $stable(data)));
+  assert property (@(posedge clk_i) (valid && !ready |=> valid));
+  `endif
+  // pragma translate_on
+endinterface
diff --git a/vendor/pulp-platform/common_cells/src/stream_join.sv b/vendor/pulp-platform/common_cells/src/stream_join.sv
new file mode 100644
index 0000000000..2f210bc7e6
--- /dev/null
+++ b/vendor/pulp-platform/common_cells/src/stream_join.sv
@@ -0,0 +1,43 @@
+// Copyright 2020 ETH Zurich and University of Bologna.
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the "License"); you may not use this file except in
+// compliance with the License. You may obtain a copy of the License at
+// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+
+// Authors:
+// - Andreas Kurth <akurth@iis.ee.ethz.ch>
+
+/// Stream join: Joins a parametrizable number of input streams (i.e., valid-ready handshaking with
+/// dependency rules as in AXI4) to a single output stream.  The output handshake happens only once
+/// all inputs are valid.  The data channel flows outside of this module.
+module stream_join #(
+  /// Number of input streams
+  parameter int unsigned N_INP = 32'd0 // Synopsys DC requires a default value for parameters.
+) (
+  /// Input streams valid handshakes
+  input  logic  [N_INP-1:0] inp_valid_i,
+  /// Input streams ready handshakes
+  output logic  [N_INP-1:0] inp_ready_o,
+  /// Output stream valid handshake
+  output logic              oup_valid_o,
+  /// Output stream ready handshake
+  input  logic              oup_ready_i
+);
+
+  assign oup_valid_o = (&inp_valid_i);
+  for (genvar i = 0; i < N_INP; i++) begin : gen_inp_ready
+    assign inp_ready_o[i] = oup_valid_o & oup_ready_i;
+  end
+
+// pragma translate_off
+`ifndef VERILATOR
+  initial begin: p_assertions
+    assert (N_INP >= 1) else $fatal(1, "N_INP must be at least 1!");
+  end
+`endif
+// pragma translate_on
+endmodule
diff --git a/vendor/pulp-platform/common_cells/src/stream_mux.sv b/vendor/pulp-platform/common_cells/src/stream_mux.sv
new file mode 100644
index 0000000000..34607d916d
--- /dev/null
+++ b/vendor/pulp-platform/common_cells/src/stream_mux.sv
@@ -0,0 +1,46 @@
+// Copyright 2018 ETH Zurich and University of Bologna.
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the "License"); you may not use this file except in
+// compliance with the License. You may obtain a copy of the License at
+// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+
+/// Stream multiplexer: connects the output to one of `N_INP` data streams with valid-ready
+/// handshaking.
+
+module stream_mux #(
+  parameter type DATA_T = logic,  // Vivado requires a default value for type parameters.
+  parameter integer N_INP = 0,    // Synopsys DC requires a default value for value parameters.
+  /// Dependent parameters, DO NOT OVERRIDE!
+  parameter integer LOG_N_INP = $clog2(N_INP)
+) (
+  input  DATA_T [N_INP-1:0]     inp_data_i,
+  input  logic  [N_INP-1:0]     inp_valid_i,
+  output logic  [N_INP-1:0]     inp_ready_o,
+
+  input  logic  [LOG_N_INP-1:0] inp_sel_i,
+
+  output DATA_T                 oup_data_o,
+  output logic                  oup_valid_o,
+  input  logic                  oup_ready_i
+);
+
+  always_comb begin
+    inp_ready_o = '0;
+    inp_ready_o[inp_sel_i] = oup_ready_i;
+  end
+  assign oup_data_o   = inp_data_i[inp_sel_i];
+  assign oup_valid_o  = inp_valid_i[inp_sel_i];
+
+// pragma translate_off
+`ifndef VERILATOR
+  initial begin: p_assertions
+    assert (N_INP >= 1) else $fatal (1, "The number of inputs must be at least 1!");
+  end
+`endif
+// pragma translate_on
+
+endmodule
diff --git a/vendor/pulp-platform/common_cells/src/stream_omega_net.sv b/vendor/pulp-platform/common_cells/src/stream_omega_net.sv
new file mode 100644
index 0000000000..ad8e11d8ea
--- /dev/null
+++ b/vendor/pulp-platform/common_cells/src/stream_omega_net.sv
@@ -0,0 +1,301 @@
+// Copyright (c) 2020 ETH Zurich and University of Bologna.
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the "License"); you may not use this file except in
+// compliance with the License.  You may obtain a copy of the License at
+// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+
+// Author: Wolfgang Roenninger <wroennin@ethz.ch>
+
+/// Omega network using multiple `stream_xbar` as switches.
+///
+/// An omega network is isomorphic to a butterfly network.
+///
+/// Handshaking rules as defined by the `AMBA AXI` standard on default.
+module stream_omega_net #(
+  /// Number of inputs into the network (`> 0`).
+  parameter int unsigned NumInp      = 32'd0,
+  /// Number of outputs from the network (`> 0`).
+  parameter int unsigned NumOut      = 32'd0,
+  /// Radix of the individual switch points of the network.
+  /// Currently supported are `32'd2` and `32'd4`.
+  parameter int unsigned Radix       = 32'd2,
+  /// Data width of the stream. Can be overwritten by defining the type parameter `payload_t`.
+  parameter int unsigned DataWidth   = 32'd1,
+  /// Payload type of the data ports, only usage of parameter `DataWidth`.
+  parameter type         payload_t   = logic [DataWidth-1:0],
+  /// Adds a spill register stage at each output.
+  parameter bit          SpillReg = 1'b0,
+  /// Use external priority for the individual `rr_arb_trees`.
+  parameter int unsigned ExtPrio     = 1'b0,
+  /// Use strict AXI valid ready handshaking.
+  /// To be protocol conform also the parameter `LockIn` has to be set.
+  parameter int unsigned AxiVldRdy   = 1'b1,
+  /// Lock in the arbitration decision of the `rr_arb_tree`.
+  /// When this is set, valids have to be asserted until the corresponding transaction is indicated
+  /// by ready.
+  parameter int unsigned LockIn      = 1'b1,
+  /// Derived parameter, do **not** overwrite!
+  ///
+  /// Width of the output selection signal.
+  parameter int unsigned SelWidth = (NumOut > 32'd1) ? unsigned'($clog2(NumOut)) : 32'd1,
+  /// Derived parameter, do **not** overwrite!
+  ///
+  /// Signal type definition for selecting the output at the inputs.
+  parameter type sel_oup_t = logic[SelWidth-1:0],
+  /// Derived parameter, do **not** overwrite!
+  ///
+  /// Width of the input index signal.
+  parameter int unsigned IdxWidth = (NumInp > 32'd1) ? unsigned'($clog2(NumInp)) : 32'd1,
+  /// Derived parameter, do **not** overwrite!
+  ///
+  /// Signal type definition indicating from which input the output came.
+  parameter type idx_inp_t = logic[IdxWidth-1:0]
+) (
+  /// Clock, positive edge triggered.
+  input  logic                  clk_i,
+  /// Asynchronous reset, active low.
+  input  logic                  rst_ni,
+  /// Flush the state of the internal `rr_arb_tree` modules.
+  /// If not used set to `0`.
+  /// Flush should only be used if there are no active `valid_i`, otherwise it will
+  /// not adhere to the AXI handshaking.
+  input  logic                  flush_i,
+  /// Provide an external state for the `rr_arb_tree` models.
+  /// Will only do something if ExtPrio is `1` otherwise tie to `0`.
+  input  idx_inp_t [NumOut-1:0] rr_i,
+  /// Input data ports.
+  /// Has to be stable as long as `valid_i` is asserted when parameter `AxiVldRdy` is set.
+  input  payload_t [NumInp-1:0] data_i,
+  /// Selection of the output port where the data should be routed.
+  /// Has to be stable as long as `valid_i` is asserted and parameter `AxiVldRdy` is set.
+  input  sel_oup_t [NumInp-1:0] sel_i,
+  /// Input is valid.
+  input  logic     [NumInp-1:0] valid_i,
+  /// Input is ready to accept data.
+  output logic     [NumInp-1:0] ready_o,
+  /// Output data ports. Valid if `valid_o = 1`
+  output payload_t [NumOut-1:0] data_o,
+  /// Index of the input port where data came from.
+  output idx_inp_t [NumOut-1:0] idx_o,
+  /// Output is valid.
+  output logic     [NumOut-1:0] valid_o,
+  /// Output can be accepted.
+  input  logic     [NumOut-1:0] ready_i
+);
+  if (NumInp <= Radix && NumOut <= Radix) begin : gen_degenerate_omega_net
+    // If both Number of inputs and number of outputs are smaller or the same as the radix
+    // just instantiate a `stream_xbar`.
+    stream_xbar #(
+      .NumInp      ( NumInp    ),
+      .NumOut      ( NumOut    ),
+      .payload_t   ( payload_t ),
+      .OutSpillReg ( SpillReg  ),
+      .ExtPrio     ( ExtPrio   ),
+      .AxiVldRdy   ( AxiVldRdy ),
+      .LockIn      ( LockIn    )
+    ) i_stream_xbar (
+      .clk_i,
+      .rst_ni,
+      .flush_i,
+      .rr_i    ( rr_i    ),
+      .data_i  ( data_i  ),
+      .sel_i   ( sel_i   ),
+      .valid_i ( valid_i ),
+      .ready_o ( ready_o ),
+      .data_o  ( data_o  ),
+      .idx_o   ( idx_o   ),
+      .valid_o ( valid_o ),
+      .ready_i ( ready_i )
+    );
+  end else begin : gen_omega_net
+    // Find the next power of radix of either the number of inputs or number of outputs.
+    // This normalizes the network to a power of the radix. Unused inputs and outputs are tied off.
+    // If the radix is poorly chosen with respect to the number of input/outputs ports
+    // will lead to an explosion of tied off lanes, which will be removed during optimization.
+    // Can lead however to RTL simulation overhead.
+    // Dividing through the log base 2 of `Radix` leads to a change of base.
+    localparam int unsigned NumLanes = (NumOut > NumInp) ?
+        unsigned'(Radix**(cf_math_pkg::ceil_div($clog2(NumOut), $clog2(Radix)))) :
+        unsigned'(Radix**(cf_math_pkg::ceil_div($clog2(NumInp), $clog2(Radix))));
+
+    // Find the number of routing levels needed.
+    localparam int unsigned NumLevels = unsigned'(($clog2(NumLanes)+$clog2(Radix)-1)/$clog2(Radix));
+
+    // Find the number of routes per network stage. Can use a normal division here, as
+    // `NumLanes % Radix == 0`.
+    localparam int unsigned NumRouters = NumLanes / Radix;
+
+    // Define the type of sel signal to send through the network. It has to be sliced for the
+    // individual sel signals of a stage. This slicing has to align with `$clog2(Radix)`.
+    // For example `Radix = 4`, `NumOut = 17` will lead to the sel signal of an individual stage to
+    // be 2 bit wide, whereas signal `sel_i` of the module will be 5 bit wide.
+    // To prevent slicing into an undefined field the overall sel signal is then defined with
+    // width 6.
+    typedef logic [$clog2(NumLanes)-1:0] sel_dst_t;
+
+    // Selection signal type of an individual router
+    localparam int unsigned SelW = unsigned'($clog2(Radix));
+    initial begin : proc_selw
+      $display("SelW is:    %0d", SelW);
+      $display("SelDstW is: %0d", $bits(sel_dst_t));
+    end
+    typedef logic [SelW-1:0] sel_t;
+
+    // Define the payload which should be routed through the network.
+    typedef struct packed {
+      sel_dst_t sel_oup; // Selection of output, where it should be routed
+      payload_t payload; // External payload data
+      idx_inp_t idx_inp; // Index of the input of this packet
+    } omega_data_t;
+
+    // signal definitions
+    omega_data_t [NumLevels-1:0][NumRouters-1:0][Radix-1:0] inp_router_data;
+    logic        [NumLevels-1:0][NumRouters-1:0][Radix-1:0] inp_router_valid, inp_router_ready;
+    omega_data_t [NumLevels-1:0][NumRouters-1:0][Radix-1:0] out_router_data;
+    logic        [NumLevels-1:0][NumRouters-1:0][Radix-1:0] out_router_valid, out_router_ready;
+
+    // Generate the shuffling between the routers
+    for (genvar i = 0; unsigned'(i) < NumLevels-1; i++) begin : gen_shuffle_levels
+      for (genvar j = 0; unsigned'(j) < NumRouters; j++) begin : gen_shuffle_routers
+        for (genvar k = 0; unsigned'(k) < Radix; k++) begin : gen_shuffle_radix
+          // This parameter is from `0` to `NumLanes-1`
+          localparam int unsigned IdxLane = Radix * j + k;
+          // Do the perfect shuffle
+          assign inp_router_data[i+1][IdxLane%NumRouters][IdxLane/NumRouters] =
+              out_router_data[i][j][k];
+
+          assign inp_router_valid[i+1][IdxLane%NumRouters][IdxLane/NumRouters] =
+              out_router_valid[i][j][k];
+
+          assign out_router_ready[i][j][k] =
+              inp_router_ready[i+1][IdxLane%NumRouters][IdxLane/NumRouters];
+
+          // Do the first input shuffle of layer 0.
+          // The inputs are connected in reverse. The reason is that then the optimization
+          // leaves then the biggest possible network diameter.
+          if (i == 0) begin : gen_shuffle_inp
+            // Reverse the order of the input ports
+            if ((NumLanes-IdxLane) <= NumInp) begin : gen_inp_ports
+              localparam int unsigned IdxInp = NumLanes - IdxLane - 32'd1;
+              assign inp_router_data[0][IdxLane%NumRouters][IdxLane/NumRouters] = '{
+                    sel_oup: sel_dst_t'(sel_i[IdxInp]),
+                    payload: data_i[IdxInp],
+                    idx_inp: idx_inp_t'(IdxInp)
+                  };
+
+              assign inp_router_valid[0][IdxLane%NumRouters][IdxLane/NumRouters] = valid_i[IdxInp];
+              assign ready_o[IdxInp] = inp_router_ready[0][IdxLane%NumRouters][IdxLane/NumRouters];
+
+            end else begin : gen_tie_off
+              assign inp_router_data[0][IdxLane%NumRouters][IdxLane/NumRouters] = '{ default: '0};
+              assign inp_router_valid[0][IdxLane%NumRouters][IdxLane/NumRouters] = 1'b0;
+            end
+          end
+        end
+      end
+    end
+
+    // Generate the `stream_xbar_routers`
+    for (genvar i = 0; unsigned'(i) < NumLevels; i++) begin : gen_router_levels
+      for (genvar j = 0; unsigned'(j) < NumRouters; j++) begin : gen_routers
+        sel_t [Radix-1:0] sel_router;
+        for (genvar k = 0; unsigned'(k) < Radix; k++) begin : gen_router_sel
+          // For the inter stage routing some bits of the overall selection are important.
+          // The `MSB` is for stage `0`, `MSB-1` for stage `1` and so on for the `Radix=2` case.
+          // For higher radices's a bit slice following the same pattern is used.
+          // This is the reason that the internal network is expanded to a power of two, so that
+          // the selection slicing always has a valid index.
+          assign sel_router[k] = inp_router_data[i][j][k].sel_oup[SelW*(NumLevels-i-1)+:SelW];
+        end
+
+        stream_xbar #(
+          .NumInp      ( Radix        ),
+          .NumOut      ( Radix        ),
+          .payload_t   ( omega_data_t ),
+          .OutSpillReg ( SpillReg     ),
+          .ExtPrio     ( 1'b0         ),
+          .AxiVldRdy   ( AxiVldRdy    ),
+          .LockIn      ( LockIn       )
+        ) i_stream_xbar (
+          .clk_i,
+          .rst_ni,
+          .flush_i,
+          .rr_i    ( '0                     ),
+          .data_i  ( inp_router_data[i][j]  ),
+          .sel_i   ( sel_router             ),
+          .valid_i ( inp_router_valid[i][j] ),
+          .ready_o ( inp_router_ready[i][j] ),
+          .data_o  ( out_router_data[i][j]  ),
+          .idx_o   ( /* not used */         ),
+          .valid_o ( out_router_valid[i][j] ),
+          .ready_i ( out_router_ready[i][j] )
+        );
+      end
+    end
+
+    // outputs are on the last level
+    for (genvar i = 0; unsigned'(i) < NumLanes; i++) begin : gen_outputs
+      if (i < NumOut) begin : gen_connect
+        assign data_o[i]  = out_router_data[NumLevels-1][i/Radix][i%Radix].payload;
+        assign idx_o[i]   = out_router_data[NumLevels-1][i/Radix][i%Radix].idx_inp;
+        assign valid_o[i] = out_router_valid[NumLevels-1][i/Radix][i%Radix];
+        assign out_router_ready[NumLevels-1][i/Radix][i%Radix] = ready_i[i];
+      end else begin : gen_tie_off
+        assign out_router_ready[NumLevels-1][i/Radix][i%Radix] = 1'b0;
+      end
+    end
+
+    initial begin : proc_debug_print
+      $display("NumInp:     %0d", NumInp);
+      $display("NumOut:     %0d", NumOut);
+      $display("Radix:      %0d", Radix);
+      $display("NumLanes:   %0d", NumLanes);
+      $display("NumLevels:  %0d", NumLevels);
+      $display("NumRouters: %0d", NumRouters);
+    end
+
+    // Assertions
+    // Make sure that the handshake and payload is stable
+    // pragma translate_off
+    `ifndef VERILATOR
+    default disable iff rst_ni;
+    for (genvar i = 0; unsigned'(i) < NumInp; i++) begin : gen_sel_assertions
+      assert property (@(posedge clk_i) (valid_i[i] |-> sel_i[i] < sel_oup_t'(NumOut))) else
+          $fatal(1, "Non-existing output is selected!");
+    end
+
+    if (AxiVldRdy) begin : gen_handshake_assertions
+      for (genvar i = 0; unsigned'(i) < NumInp; i++) begin : gen_inp_assertions
+        assert property (@(posedge clk_i) (valid_i[i] && !ready_o[i] |=> $stable(data_i[i]))) else
+            $error("data_i is unstable at input: %0d", i);
+        assert property (@(posedge clk_i) (valid_i[i] && !ready_o[i] |=> $stable(sel_i[i]))) else
+            $error("sel_i is unstable at input: %0d", i);
+        assert property (@(posedge clk_i) (valid_i[i] && !ready_o[i] |=> valid_i[i])) else
+            $error("valid_i at input %0d has been taken away without a ready.", i);
+      end
+      for (genvar i = 0; unsigned'(i) < NumOut; i++) begin : gen_out_assertions
+        assert property (@(posedge clk_i) (valid_o[i] && !ready_i[i] |=> $stable(data_o[i]))) else
+            $error("data_o is unstable at output: %0d Check that parameter LockIn is set.", i);
+        assert property (@(posedge clk_i) (valid_o[i] && !ready_i[i] |=> $stable(idx_o[i]))) else
+            $error("idx_o is unstable at output: %0d Check that parameter LockIn is set.", i);
+        assert property (@(posedge clk_i) (valid_o[i] && !ready_i[i] |=> valid_o[i])) else
+            $error("valid_o at output %0d has been taken away without a ready.", i);
+      end
+    end
+
+    initial begin : proc_parameter_assertions
+      assert ((2**$clog2(Radix) == Radix) && (Radix > 32'd1)) else
+          $fatal(1, "Radix %0d is not power of two.", Radix);
+      assert (2**$clog2(NumRouters) == NumRouters) else
+          $fatal(1, "NumRouters %0d is not power of two.", NumRouters);
+      assert ($clog2(NumLanes) % SelW == 0) else
+          $fatal(1, "Bit slicing of the internal selection signal is broken.");
+    end
+    `endif
+    // pragma translate_on
+  end
+endmodule
diff --git a/vendor/pulp-platform/common_cells/src/stream_register.sv b/vendor/pulp-platform/common_cells/src/stream_register.sv
new file mode 100644
index 0000000000..f529d6a291
--- /dev/null
+++ b/vendor/pulp-platform/common_cells/src/stream_register.sv
@@ -0,0 +1,57 @@
+// Copyright 2018 ETH Zurich and University of Bologna.
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the "License"); you may not use this file except in
+// compliance with the License. You may obtain a copy of the License at
+// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+
+/// Register with a simple stream-like ready/valid handshake.
+/// This register does not cut combinatorial paths on all control signals; if you need a complete
+/// cut, use the `spill_register`.
+module stream_register #(
+    parameter type T = logic  // Vivado requires a default value for type parameters.
+) (
+    input  logic    clk_i,          // Clock
+    input  logic    rst_ni,         // Asynchronous active-low reset
+    input  logic    clr_i,          // Synchronous clear
+    input  logic    testmode_i,     // Test mode to bypass clock gating
+    // Input port
+    input  logic    valid_i,
+    output logic    ready_o,
+    input  T        data_i,
+    // Output port
+    output logic    valid_o,
+    input  logic    ready_i,
+    output T        data_o
+);
+
+    logic   fifo_empty,
+            fifo_full;
+
+    fifo_v2 #(
+        .FALL_THROUGH   (1'b0),
+        .DATA_WIDTH     ($bits(T)),
+        .DEPTH          (1),
+        .dtype          (T)
+    ) i_fifo (
+        .clk_i          (clk_i),
+        .rst_ni         (rst_ni),
+        .flush_i        (clr_i),
+        .testmode_i     (testmode_i),
+        .full_o         (fifo_full),
+        .empty_o        (fifo_empty),
+        .alm_full_o     ( ),
+        .alm_empty_o    ( ),
+        .data_i         (data_i),
+        .push_i         (valid_i & ~fifo_full),
+        .data_o         (data_o),
+        .pop_i          (ready_i & ~fifo_empty)
+    );
+
+    assign ready_o = ~fifo_full;
+    assign valid_o = ~fifo_empty;
+
+endmodule
diff --git a/vendor/pulp-platform/common_cells/src/stream_to_mem.sv b/vendor/pulp-platform/common_cells/src/stream_to_mem.sv
new file mode 100644
index 0000000000..00c30863fa
--- /dev/null
+++ b/vendor/pulp-platform/common_cells/src/stream_to_mem.sv
@@ -0,0 +1,134 @@
+// Copyright 2020 ETH Zurich and University of Bologna.
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the "License"); you may not use this file except in
+// compliance with the License. You may obtain a copy of the License at
+// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+
+// Authors:
+// - Andreas Kurth <akurth@iis.ee.ethz.ch>
+
+/// `stream_to_mem`: Allows to use memories with flow control (`valid`/`ready`) for requests but without flow
+/// control for output data to be used in streams.
+`include "common_cells/registers.svh"
+module stream_to_mem #(
+  /// Memory request payload type, usually write enable, write data, etc.
+  parameter type         mem_req_t  = logic,
+  /// Memory response payload type, usually read data
+  parameter type         mem_resp_t = logic,
+  /// Number of buffered responses (fall-through, thus no additional latency).  This defines the
+  /// maximum number of outstanding requests on the memory interface. If the attached memory
+  /// responds in the same cycle a request is applied, this MUST be 0. If the attached memory
+  /// responds at least one cycle after a request, this MUST be >= 1 and should be equal to the
+  /// response latency of the memory to saturate bandwidth.
+  parameter int unsigned BufDepth   = 32'd1
+) (
+  /// Clock
+  input  logic      clk_i,
+  /// Asynchronous reset, active low
+  input  logic      rst_ni,
+  /// Request stream interface, payload
+  input  mem_req_t  req_i,
+  /// Request stream interface, payload is valid for transfer
+  input  logic      req_valid_i,
+  /// Request stream interface, payload can be accepted
+  output logic      req_ready_o,
+  /// Response stream interface, payload
+  output mem_resp_t resp_o,
+  /// Response stream interface, payload is valid for transfer
+  output logic      resp_valid_o,
+  /// Response stream interface, payload can be accepted
+  input  logic      resp_ready_i,
+  /// Memory request interface, payload
+  output mem_req_t  mem_req_o,
+  /// Memory request interface, payload is valid for transfer
+  output logic      mem_req_valid_o,
+  /// Memory request interface, payload can be accepted
+  input  logic      mem_req_ready_i,
+  /// Memory response interface, payload
+  input  mem_resp_t mem_resp_i,
+  /// Memory response interface, payload is valid
+  input  logic      mem_resp_valid_i
+);
+
+  typedef logic [$clog2(BufDepth+1):0] cnt_t;
+
+  cnt_t cnt_d, cnt_q;
+  logic buf_ready,
+        req_ready;
+
+  if (BufDepth > 0) begin : gen_buf
+    // Count number of outstanding requests.
+    always_comb begin
+      cnt_d = cnt_q;
+      if (req_valid_i && req_ready_o) begin
+        cnt_d++;
+      end
+      if (resp_valid_o && resp_ready_i) begin
+        cnt_d--;
+      end
+    end
+
+    // Can issue another request if the counter is not at its limit or a response is delivered in
+    // the current cycle.
+    assign req_ready = (cnt_q < BufDepth) | (resp_valid_o & resp_ready_i);
+
+    // Control request and memory request interface handshakes.
+    assign req_ready_o = mem_req_ready_i & req_ready;
+    assign mem_req_valid_o = req_valid_i & req_ready;
+
+    // Buffer responses.
+    stream_fifo #(
+      .FALL_THROUGH ( 1'b1       ),
+      .DEPTH        ( BufDepth   ),
+      .T            ( mem_resp_t )
+    ) i_resp_buf (
+      .clk_i,
+      .rst_ni,
+      .flush_i    ( 1'b0             ),
+      .testmode_i ( 1'b0             ),
+      .data_i     ( mem_resp_i       ),
+      .valid_i    ( mem_resp_valid_i ),
+      .ready_o    ( buf_ready        ),
+      .data_o     ( resp_o           ),
+      .valid_o    ( resp_valid_o     ),
+      .ready_i    ( resp_ready_i     ),
+      .usage_o    ( /* unused */     )
+    );
+
+    // Register
+    `FFARN(cnt_q, cnt_d, '0, clk_i, rst_ni)
+
+  end else begin : gen_no_buf
+    // Control request, memory request, and response interface handshakes.
+    assign mem_req_valid_o = req_valid_i;
+    assign resp_valid_o    = mem_req_valid_o & mem_req_ready_i & mem_resp_valid_i;
+    assign req_ready_o     = resp_ready_i    & resp_valid_o;
+
+    // Forward responses.
+    assign resp_o = mem_resp_i;
+  end
+
+  // Forward requests.
+  assign mem_req_o = req_i;
+
+// Assertions
+// pragma translate_off
+`ifndef VERILATOR
+  if (BufDepth > 0) begin : gen_buf_asserts
+    assert property (@(posedge clk_i) mem_resp_valid_i |-> buf_ready)
+      else $error("Memory response lost!");
+    assert property (@(posedge clk_i) cnt_q == '0 |=> cnt_q != '1)
+      else $error("Counter underflowed!");
+    assert property (@(posedge clk_i) cnt_q == BufDepth |=> cnt_q != BufDepth + 1)
+      else $error("Counter overflowed!");
+  end else begin : gen_no_buf_asserts
+    assume property (@(posedge clk_i) mem_req_valid_o & mem_req_ready_i |-> mem_resp_valid_i)
+      else $error("Without BufDepth = 0, the memory must respond in the same cycle!");
+  end
+`endif
+// pragma translate_on
+endmodule
diff --git a/vendor/pulp-platform/common_cells/src/stream_xbar.sv b/vendor/pulp-platform/common_cells/src/stream_xbar.sv
new file mode 100644
index 0000000000..957400680b
--- /dev/null
+++ b/vendor/pulp-platform/common_cells/src/stream_xbar.sv
@@ -0,0 +1,198 @@
+// Copyright (c) 2020 ETH Zurich and University of Bologna.
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the "License"); you may not use this file except in
+// compliance with the License.  You may obtain a copy of the License at
+// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+
+// Author: Wolfgang Roenninger <wroennin@ethz.ch>
+
+/// Fully connected stream crossbar.
+///
+/// Handshaking rules as defined by the `AMBA AXI` standard on default.
+module stream_xbar #(
+  /// Number of inputs into the crossbar (`> 0`).
+  parameter int unsigned NumInp      = 32'd0,
+  /// Number of outputs from the crossbar (`> 0`).
+  parameter int unsigned NumOut      = 32'd0,
+  /// Data width of the stream. Can be overwritten by defining the type parameter `payload_t`.
+  parameter int unsigned DataWidth   = 32'd1,
+  /// Payload type of the data ports, only usage of parameter `DataWidth`.
+  parameter type         payload_t   = logic [DataWidth-1:0],
+  /// Adds a spill register stage at each output.
+  parameter bit          OutSpillReg = 1'b0,
+  /// Use external priority for the individual `rr_arb_trees`.
+  parameter int unsigned ExtPrio     = 1'b0,
+  /// Use strict AXI valid ready handshaking.
+  /// To be protocol conform also the parameter `LockIn` has to be set.
+  parameter int unsigned AxiVldRdy   = 1'b1,
+  /// Lock in the arbitration decision of the `rr_arb_tree`.
+  /// When this is set, valids have to be asserted until the corresponding transaction is indicated
+  /// by ready.
+  parameter int unsigned LockIn      = 1'b1,
+  /// Derived parameter, do **not** overwrite!
+  ///
+  /// Width of the output selection signal.
+  parameter int unsigned SelWidth = (NumOut > 32'd1) ? unsigned'($clog2(NumOut)) : 32'd1,
+  /// Derived parameter, do **not** overwrite!
+  ///
+  /// Signal type definition for selecting the output at the inputs.
+  parameter type sel_oup_t = logic[SelWidth-1:0],
+  /// Derived parameter, do **not** overwrite!
+  ///
+  /// Width of the input index signal.
+  parameter int unsigned IdxWidth = (NumInp > 32'd1) ? unsigned'($clog2(NumInp)) : 32'd1,
+  /// Derived parameter, do **not** overwrite!
+  ///
+  /// Signal type definition indicating from which input the output came.
+  parameter type idx_inp_t = logic[IdxWidth-1:0]
+) (
+  /// Clock, positive edge triggered.
+  input  logic                  clk_i,
+  /// Asynchronous reset, active low.
+  input  logic                  rst_ni,
+  /// Flush the state of the internal `rr_arb_tree` modules.
+  /// If not used set to `0`.
+  /// Flush should only be used if there are no active `valid_i`, otherwise it will
+  /// not adhere to the AXI handshaking.
+  input  logic                  flush_i,
+  /// Provide an external state for the `rr_arb_tree` models.
+  /// Will only do something if ExtPrio is `1` otherwise tie to `0`.
+  input  idx_inp_t [NumOut-1:0] rr_i,
+  /// Input data ports.
+  /// Has to be stable as long as `valid_i` is asserted when parameter `AxiVldRdy` is set.
+  input  payload_t [NumInp-1:0] data_i,
+  /// Selection of the output port where the data should be routed.
+  /// Has to be stable as long as `valid_i` is asserted and parameter `AxiVldRdy` is set.
+  input  sel_oup_t [NumInp-1:0] sel_i,
+  /// Input is valid.
+  input  logic     [NumInp-1:0] valid_i,
+  /// Input is ready to accept data.
+  output logic     [NumInp-1:0] ready_o,
+  /// Output data ports. Valid if `valid_o = 1`
+  output payload_t [NumOut-1:0] data_o,
+  /// Index of the input port where data came from.
+  output idx_inp_t [NumOut-1:0] idx_o,
+  /// Output is valid.
+  output logic     [NumOut-1:0] valid_o,
+  /// Output can be accepted.
+  input  logic     [NumOut-1:0] ready_i
+);
+  typedef struct packed {
+    payload_t data;
+    idx_inp_t idx;
+  } spill_data_t;
+
+  logic     [NumInp-1:0][NumOut-1:0] inp_valid;
+  logic     [NumInp-1:0][NumOut-1:0] inp_ready;
+
+  payload_t [NumOut-1:0][NumInp-1:0] out_data;
+  logic     [NumOut-1:0][NumInp-1:0] out_valid;
+  logic     [NumOut-1:0][NumInp-1:0] out_ready;
+
+  // Generate the input selection
+  for (genvar i = 0; unsigned'(i) < NumInp; i++) begin : gen_inps
+    stream_demux #(
+      .N_OUP ( NumOut )
+    ) i_stream_demux (
+      .inp_valid_i ( valid_i[i]   ),
+      .inp_ready_o ( ready_o[i]   ),
+      .oup_sel_i   ( sel_i[i]     ),
+      .oup_valid_o ( inp_valid[i] ),
+      .oup_ready_i ( inp_ready[i] )
+    );
+
+    // Do the switching cross of the signals.
+    for (genvar j = 0; unsigned'(j) < NumOut; j++) begin : gen_cross
+      // Propagate the data from this input to all outputs.
+      assign out_data[j][i]  = data_i[i];
+      // switch handshaking
+      assign out_valid[j][i] = inp_valid[i][j];
+      assign inp_ready[i][j] = out_ready[j][i];
+    end
+  end
+
+  // Generate the output arbitration.
+  for (genvar j = 0; unsigned'(j) < NumOut; j++) begin : gen_outs
+    spill_data_t arb;
+    logic        arb_valid, arb_ready;
+
+    rr_arb_tree #(
+      .NumIn     ( NumInp    ),
+      .DataType  ( payload_t ),
+      .ExtPrio   ( ExtPrio   ),
+      .AxiVldRdy ( AxiVldRdy ),
+      .LockIn    ( LockIn    )
+    ) i_rr_arb_tree (
+      .clk_i,
+      .rst_ni,
+      .flush_i,
+      .rr_i    ( rr_i[j]      ),
+      .req_i   ( out_valid[j] ),
+      .gnt_o   ( out_ready[j] ),
+      .data_i  ( out_data[j]  ),
+      .req_o   ( arb_valid    ),
+      .gnt_i   ( arb_ready    ),
+      .data_o  ( arb.data     ),
+      .idx_o   ( arb.idx      )
+    );
+
+    spill_data_t spill;
+
+    spill_register #(
+      .T      ( spill_data_t ),
+      .Bypass ( !OutSpillReg )
+    ) i_spill_register (
+      .clk_i,
+      .rst_ni,
+      .valid_i ( arb_valid  ),
+      .ready_o ( arb_ready  ),
+      .data_i  ( arb        ),
+      .valid_o ( valid_o[j] ),
+      .ready_i ( ready_i[j] ),
+      .data_o  ( spill      )
+    );
+    // Assign the outputs (deaggregate the data).
+    assign data_o[j] = spill.data;
+    assign idx_o[j]  = spill.idx;
+  end
+
+  // Assertions
+  // Make sure that the handshake and payload is stable
+  // pragma translate_off
+  `ifndef VERILATOR
+  default disable iff rst_ni;
+  for (genvar i = 0; unsigned'(i) < NumInp; i++) begin : gen_sel_assertions
+    assert property (@(posedge clk_i) (valid_i[i] |-> sel_i[i] < sel_oup_t'(NumOut))) else
+        $fatal(1, "Non-existing output is selected!");
+  end
+
+  if (AxiVldRdy) begin : gen_handshake_assertions
+    for (genvar i = 0; unsigned'(i) < NumInp; i++) begin : gen_inp_assertions
+      assert property (@(posedge clk_i) (valid_i[i] && !ready_o[i] |=> $stable(data_i[i]))) else
+          $error("data_i is unstable at input: %0d", i);
+      assert property (@(posedge clk_i) (valid_i[i] && !ready_o[i] |=> $stable(sel_i[i]))) else
+          $error("sel_i is unstable at input: %0d", i);
+      assert property (@(posedge clk_i) (valid_i[i] && !ready_o[i] |=> valid_i[i])) else
+          $error("valid_i at input %0d has been taken away without a ready.", i);
+    end
+    for (genvar i = 0; unsigned'(i) < NumOut; i++) begin : gen_out_assertions
+      assert property (@(posedge clk_i) (valid_o[i] && !ready_i[i] |=> $stable(data_o[i]))) else
+          $error("data_o is unstable at output: %0d Check that parameter LockIn is set.", i);
+      assert property (@(posedge clk_i) (valid_o[i] && !ready_i[i] |=> $stable(idx_o[i]))) else
+          $error("idx_o is unstable at output: %0d Check that parameter LockIn is set.", i);
+      assert property (@(posedge clk_i) (valid_o[i] && !ready_i[i] |=> valid_o[i])) else
+          $error("valid_o at output %0d has been taken away without a ready.", i);
+    end
+  end
+
+  initial begin : proc_parameter_assertions
+    assert (NumInp > 32'd0) else $fatal(1, "NumInp has to be > 0!");
+    assert (NumOut > 32'd0) else $fatal(1, "NumOut has to be > 0!");
+  end
+  `endif
+  // pragma translate_on
+endmodule
diff --git a/vendor/pulp-platform/common_cells/src/sub_per_hash.sv b/vendor/pulp-platform/common_cells/src/sub_per_hash.sv
new file mode 100644
index 0000000000..d4938ad4fe
--- /dev/null
+++ b/vendor/pulp-platform/common_cells/src/sub_per_hash.sv
@@ -0,0 +1,173 @@
+// Copyright (c) 2019 ETH Zurich and University of Bologna.
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the "License"); you may not use this file except in
+// compliance with the License.  You may obtain a copy of the License at
+// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+
+// Author: Wolfgang Roenninger <wroennin@ethz.ch>
+
+// This module implements a fully parameterizable substitution-permutation hash
+// function. The hash is structured in stages consisting of a shuffle of the input bits
+// and then xoring for each bit 3 pseudo-random bits of the shuffeled vector.
+// The hash function is NOT cryptographically secure!
+// From the keys it computes a sequence of pseudo-random numbers, which determine the permutations
+// and substitutions. As pseudo random generator a multiplicative linear congruential
+// generator is used and uses different constants for the computation of the permutation
+// and substitution respectively.
+// The permutation shuffles the bits using a variant of the Fisher-Yates shuffle algorithm.
+// The substitution per stage is the xor of 3 pseudo random bits of the previous stage.
+// As shifting and xoring of a signal do not change its distribution, the distribution
+// of the output hash is the same as the one of the input data.
+//
+// Parameters:
+// - `InpWidth`:   The input width of the vector `data_i`.
+// - `HashWidth`:  The output width of the substitution-permutation hash.
+// - `NoRounds`:   The amount of permutation, substitution stages generated. Translates
+//                 into how many levels of xor's there will be before optimization.
+// - `PermuteKey`: The Key for the pseudo-random generator used for determining the exact
+//                 permutation (shuffled wiring between each xor stage) at compile/elaboration.
+//                 Any `int unsigned` value can be used as key, however one should examine the
+//                 output of the hash function.
+// - `XorKey`:     The Key for the pseudo-random generator used for determining the xor
+//                 of bits between stages. The same principles as for `PermuteKey` applies,
+//                 however one should look that both keys have a greatest common divisor of 1.
+
+module sub_per_hash #(
+  parameter int unsigned InpWidth   = 32'd11,
+  parameter int unsigned HashWidth  = 32'd5,
+  parameter int unsigned NoRounds   = 32'd1,
+  parameter int unsigned PermuteKey = 32'd299034753,
+  parameter int unsigned XorKey     = 32'd4094834
+) (
+  // is purely combinational
+  input  logic [InpWidth-1:0]     data_i,
+  output logic [HashWidth-1:0]    hash_o,
+  output logic [2**HashWidth-1:0] hash_onehot_o
+);
+
+  // typedefs and respective localparams
+  typedef int unsigned perm_lists_t [NoRounds][InpWidth];
+  localparam perm_lists_t PERMUTATIONS = get_permutations(PermuteKey);
+  // encoding for inner most array:
+  // position 0 indicates the number of inputs, 2 or 3
+  // the other positions 1 - 3 indicate the inputs
+  typedef int unsigned xor_stages_t [NoRounds][InpWidth][3];
+  localparam xor_stages_t XorStages = get_xor_stages(XorKey);
+
+  // stage signals
+  logic [NoRounds-1:0][InpWidth-1:0] permuted, xored;
+
+  // for each round
+  for (genvar r = 0; r < NoRounds; r++) begin : gen_round
+    // for each bit
+    for (genvar i = 0; i < InpWidth ; i++) begin : gen_sub_per
+
+      // assign the permutation
+      if (r == 0) begin : gen_input
+        assign permuted[r][i] = data_i[PERMUTATIONS[r][i]];
+      end else begin : gen_permutation
+        assign permuted[r][i] = permuted[r-1][PERMUTATIONS[r][i]];
+      end
+
+      // assign the xor substitution
+      assign xored[r][i] = permuted[r][XorStages[r][i][0]] ^
+                           permuted[r][XorStages[r][i][1]] ^
+                           permuted[r][XorStages[r][i][2]];
+    end
+  end
+
+  // output assignment, take the bottom bits of the last round
+  assign hash_o = xored[NoRounds-1][HashWidth-1:0];
+  // for onehot run trough a decoder
+  assign hash_onehot_o = 1 << hash_o;
+
+  // PRG is MLCG (multiplicative linear congruential generator)
+  // Constant values the same as RtlUniform from Native API
+  // X(n+1) = (a*X(n)+c) mod m
+  // a: large prime
+  // c: increment
+  // m: range
+  // Shuffling is a variation of the Fisher-Yates shuffle algorithm
+  function automatic perm_lists_t get_permutations(input int unsigned seed);
+    perm_lists_t indices;
+    perm_lists_t perm_array;
+    longint unsigned A = 2147483629;
+    longint unsigned C = 2147483587;
+    longint unsigned M = 2**31 - 1;
+    longint unsigned index   = 0;
+    longint unsigned advance = 0;
+    longint unsigned rand_number = (A * seed + C) % M;
+
+    // do it for each round
+    for (int unsigned r = 0; r < NoRounds; r++) begin
+      // initialize the index array
+      for (int unsigned i = 0; i < InpWidth; i++) begin
+        indices[r][i] = i;
+      end
+      // do the shuffling
+      for (int unsigned i = 0; i < InpWidth; i++) begin
+        // get the 'random' number
+        if (i > 0) begin
+          rand_number = (A * rand_number + C) % M;
+          index = rand_number % i;
+        end
+        // do the shuffling
+        if (i != index) begin
+          perm_array[r][i]     = perm_array[r][index];
+          perm_array[r][index] = indices[r][i];
+        end
+      end
+      // advance the PRG a bit
+      rand_number = (A * rand_number + C) % M;
+      advance     = rand_number % NoRounds;
+      for (int unsigned i = 0; i < advance; i++) begin
+        rand_number = (A * rand_number + C) % M;
+      end
+    end
+    return perm_array;
+  endfunction : get_permutations
+
+  // PRG is MLCG (multiplicative linear congruential generator)
+  // Constant values the same as Numerical Recipes
+  // X(n+1) = (a*X(n)+c) mod m
+  // a: large prime
+  // c: increment
+  // m: range
+  function automatic xor_stages_t get_xor_stages(input int unsigned seed);
+    xor_stages_t xor_array;
+    longint unsigned A = 1664525;
+    longint unsigned C = 1013904223;
+    longint unsigned M = 2**32;
+    longint unsigned index   = 0;
+    // int unsigned even    = 0;
+    longint unsigned advance = 0;
+    longint unsigned rand_number = (A * seed + C) % M;
+
+    // fill the array with 'randon' inputs
+    // for each xor, a even random number is two input, uneven is tree
+    // for each round
+    for (int unsigned r = 0; r < NoRounds; r++) begin
+      // for each bit
+      for (int unsigned i = 0; i < InpWidth; i++) begin
+        rand_number = (A * rand_number + C) % M;
+        // even = rand_number[3];
+        for (int unsigned j = 0; j < 3; j++) begin
+          rand_number = (A * rand_number + C) % M;
+          index = rand_number % InpWidth;
+          xor_array[r][i][j] = index;
+        end
+      end
+      // advance the PRG a bit
+      rand_number = (A * rand_number + C) % M;
+      advance     = rand_number % NoRounds;
+      for (int unsigned i = 0; i < advance; i++) begin
+        rand_number = (A * rand_number + C) % M;
+      end
+    end
+    return xor_array;
+  endfunction : get_xor_stages
+endmodule
diff --git a/vendor/pulp-platform/common_cells/src/sync.sv b/vendor/pulp-platform/common_cells/src/sync.sv
new file mode 100644
index 0000000000..7d8e0a1f42
--- /dev/null
+++ b/vendor/pulp-platform/common_cells/src/sync.sv
@@ -0,0 +1,35 @@
+// Copyright 2018 ETH Zurich and University of Bologna.
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the "License"); you may not use this file except in
+// compliance with the License. You may obtain a copy of the License at
+// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+
+// Antonio Pullini <pullinia@iis.ee.ethz.ch>
+
+module sync #(
+    parameter int unsigned STAGES = 2,
+    parameter bit ResetValue = 1'b0
+) (
+    input  logic clk_i,
+    input  logic rst_ni,
+    input  logic serial_i,
+    output logic serial_o
+);
+
+   logic [STAGES-1:0] reg_q;
+
+    always_ff @(posedge clk_i, negedge rst_ni) begin
+        if (!rst_ni) begin
+            reg_q <= {STAGES{ResetValue}};
+        end else begin
+            reg_q <= {reg_q[STAGES-2:0], serial_i};
+        end
+    end
+
+    assign serial_o = reg_q[STAGES-1];
+
+endmodule
diff --git a/vendor/pulp-platform/common_cells/src/sync_wedge.sv b/vendor/pulp-platform/common_cells/src/sync_wedge.sv
new file mode 100644
index 0000000000..58f1279808
--- /dev/null
+++ b/vendor/pulp-platform/common_cells/src/sync_wedge.sv
@@ -0,0 +1,56 @@
+// Copyright 2018 ETH Zurich and University of Bologna.
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the "License"); you may not use this file except in
+// compliance with the License. You may obtain a copy of the License at
+// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+
+// Antonio Pullini <pullinia@iis.ee.ethz.ch>
+
+module sync_wedge #(
+    parameter int unsigned STAGES = 2
+) (
+    input  logic clk_i,
+    input  logic rst_ni,
+    input  logic en_i,
+    input  logic serial_i,
+    output logic r_edge_o,
+    output logic f_edge_o,
+    output logic serial_o
+);
+    logic clk;
+    logic serial, serial_q;
+
+    assign serial_o =  serial_q;
+    assign f_edge_o = (~serial) & serial_q;
+    assign r_edge_o =  serial & (~serial_q);
+
+    sync #(
+        .STAGES (STAGES)
+    ) i_sync (
+        .clk_i,
+        .rst_ni,
+        .serial_i,
+        .serial_o ( serial )
+    );
+
+    pulp_clock_gating i_pulp_clock_gating (
+        .clk_i,
+        .en_i,
+        .test_en_i ( 1'b0 ),
+        .clk_o     ( clk  )
+    );
+
+    always_ff @(posedge clk, negedge rst_ni) begin
+        if (!rst_ni) begin
+            serial_q <= 1'b0;
+        end else begin
+            if (en_i) begin
+                serial_q <= serial;
+            end
+        end
+    end
+endmodule
diff --git a/vendor/pulp-platform/common_cells/src/unread.sv b/vendor/pulp-platform/common_cells/src/unread.sv
new file mode 100644
index 0000000000..80e7356237
--- /dev/null
+++ b/vendor/pulp-platform/common_cells/src/unread.sv
@@ -0,0 +1,21 @@
+// Copyright 2018 ETH Zurich and University of Bologna.
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the "License"); you may not use this file except in
+// compliance with the License.  You may obtain a copy of the License at
+// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+//
+// Author: Florian Zaruba, ETH Zurich
+// Date: 29.10.2018
+// Description: Dummy circuit to mitigate Open Pin warnings
+
+/* verilator lint_off UNUSED */
+module unread (
+    input logic d_i
+);
+
+endmodule
+/* verilator lint_on UNUSED */
diff --git a/vendor/pulp-platform/fpnew/.gitignore b/vendor/pulp-platform/fpnew/.gitignore
new file mode 100644
index 0000000000..0e866a8180
--- /dev/null
+++ b/vendor/pulp-platform/fpnew/.gitignore
@@ -0,0 +1,3 @@
+*~
+html
+Bender.lock
diff --git a/vendor/pulp-platform/fpnew/LICENSE b/vendor/pulp-platform/fpnew/LICENSE
new file mode 100644
index 0000000000..5ca76ba6b9
--- /dev/null
+++ b/vendor/pulp-platform/fpnew/LICENSE
@@ -0,0 +1,176 @@
+SOLDERPAD HARDWARE LICENSE version 0.51
+
+This license is based closely on the Apache License Version 2.0, but is not
+approved or endorsed by the Apache Foundation. A copy of the non-modified
+Apache License 2.0 can be found at http://www.apache.org/licenses/LICENSE-2.0.
+
+As this license is not currently OSI or FSF approved, the Licensor permits any
+Work licensed under this License, at the option of the Licensee, to be treated
+as licensed under the Apache License Version 2.0 (which is so approved).
+
+This License is licensed under the terms of this License and in particular
+clause 7 below (Disclaimer of Warranties) applies in relation to its use.
+
+TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+1. Definitions.
+
+“License” shall mean the terms and conditions for use, reproduction, and
+distribution as defined by Sections 1 through 9 of this document.
+
+“Licensor” shall mean the Rights owner or entity authorized by the Rights owner
+that is granting the License.
+
+“Legal Entity” shall mean the union of the acting entity and all other entities
+that control, are controlled by, or are under common control with that entity.
+For the purposes of this definition, “control” means (i) the power, direct or
+indirect, to cause the direction or management of such entity, whether by
+contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the
+outstanding shares, or (iii) beneficial ownership of such entity.
+
+“You” (or “Your”) shall mean an individual or Legal Entity exercising
+permissions granted by this License.
+
+“Rights” means copyright and any similar right including design right (whether
+registered or unregistered), semiconductor topography (mask) rights and
+database rights (but excluding Patents and Trademarks).
+
+“Source” form shall mean the preferred form for making modifications, including
+but not limited to source code, net lists, board layouts, CAD files,
+documentation source, and configuration files.
+
+“Object” form shall mean any form resulting from mechanical transformation or
+translation of a Source form, including but not limited to compiled object
+code, generated documentation, the instantiation of a hardware design and
+conversions to other media types, including intermediate forms such as
+bytecodes, FPGA bitstreams, artwork and semiconductor topographies (mask
+works).
+
+“Work” shall mean the work of authorship, whether in Source form or other
+Object form, made available under the License, as indicated by a Rights notice
+that is included in or attached to the work (an example is provided in the
+Appendix below).
+
+“Derivative Works” shall mean any work, whether in Source or Object form, that
+is based on (or derived from) the Work and for which the editorial revisions,
+annotations, elaborations, or other modifications represent, as a whole, an
+original work of authorship. For the purposes of this License, Derivative Works
+shall not include works that remain separable from, or merely link (or bind by
+name) or physically connect to or interoperate with the interfaces of, the Work
+and Derivative Works thereof.
+
+“Contribution” shall mean any design or work of authorship, including the
+original version of the Work and any modifications or additions to that Work or
+Derivative Works thereof, that is intentionally submitted to Licensor for
+inclusion in the Work by the Rights owner or by an individual or Legal Entity
+authorized to submit on behalf of the Rights owner. For the purposes of this
+definition, “submitted” means any form of electronic, verbal, or written
+communication sent to the Licensor or its representatives, including but not
+limited to communication on electronic mailing lists, source code control
+systems, and issue tracking systems that are managed by, or on behalf of, the
+Licensor for the purpose of discussing and improving the Work, but excluding
+communication that is conspicuously marked or otherwise designated in writing
+by the Rights owner as “Not a Contribution.”
+
+“Contributor” shall mean Licensor and any individual or Legal Entity on behalf
+of whom a Contribution has been received by Licensor and subsequently
+incorporated within the Work.
+
+2. Grant of License. Subject to the terms and conditions of this License, each
+Contributor hereby grants to You a perpetual, worldwide, non-exclusive,
+no-charge, royalty-free, irrevocable license under the Rights to reproduce,
+prepare Derivative Works of, publicly display, publicly perform, sublicense,
+and distribute the Work and such Derivative Works in Source or Object form and
+do anything in relation to the Work as if the Rights did not exist.
+
+3. Grant of Patent License. Subject to the terms and conditions of this
+License, each Contributor hereby grants to You a perpetual, worldwide,
+non-exclusive, no-charge, royalty-free, irrevocable (except as stated in this
+section) patent license to make, have made, use, offer to sell, sell, import,
+and otherwise transfer the Work, where such license applies only to those
+patent claims licensable by such Contributor that are necessarily infringed by
+their Contribution(s) alone or by combination of their Contribution(s) with the
+Work to which such Contribution(s) was submitted. If You institute patent
+litigation against any entity (including a cross-claim or counterclaim in a
+lawsuit) alleging that the Work or a Contribution incorporated within the Work
+constitutes direct or contributory patent infringement, then any patent
+licenses granted to You under this License for that Work shall terminate as of
+the date such litigation is filed.
+
+4. Redistribution. You may reproduce and distribute copies of the Work or
+Derivative Works thereof in any medium, with or without modifications, and in
+Source or Object form, provided that You meet the following conditions:
+
+    You must give any other recipients of the Work or Derivative Works a copy
+    of this License; and
+
+    You must cause any modified files to carry prominent notices stating that
+    You changed the files; and
+
+    You must retain, in the Source form of any Derivative Works that You
+    distribute, all copyright, patent, trademark, and attribution notices from
+    the Source form of the Work, excluding those notices that do not pertain to
+    any part of the Derivative Works; and
+
+    If the Work includes a “NOTICE” text file as part of its distribution, then
+    any Derivative Works that You distribute must include a readable copy of
+    the attribution notices contained within such NOTICE file, excluding those
+    notices that do not pertain to any part of the Derivative Works, in at
+    least one of the following places: within a NOTICE text file distributed as
+    part of the Derivative Works; within the Source form or documentation, if
+    provided along with the Derivative Works; or, within a display generated by
+    the Derivative Works, if and wherever such third-party notices normally
+    appear. The contents of the NOTICE file are for informational purposes only
+    and do not modify the License. You may add Your own attribution notices
+    within Derivative Works that You distribute, alongside or as an addendum to
+    the NOTICE text from the Work, provided that such additional attribution
+    notices cannot be construed as modifying the License. You may add Your own
+    copyright statement to Your modifications and may provide additional or
+    different license terms and conditions for use, reproduction, or
+    distribution of Your modifications, or for any such Derivative Works as a
+    whole, provided Your use, reproduction, and distribution of the Work
+    otherwise complies with the conditions stated in this License.
+
+5. Submission of Contributions. Unless You explicitly state otherwise, any
+Contribution intentionally submitted for inclusion in the Work by You to the
+Licensor shall be under the terms and conditions of this License, without any
+additional terms or conditions. Notwithstanding the above, nothing herein shall
+supersede or modify the terms of any separate license agreement you may have
+executed with Licensor regarding such Contributions.
+
+6. Trademarks. This License does not grant permission to use the trade names,
+trademarks, service marks, or product names of the Licensor, except as required
+for reasonable and customary use in describing the origin of the Work and
+reproducing the content of the NOTICE file.
+
+7. Disclaimer of Warranty. Unless required by applicable law or agreed to in
+writing, Licensor provides the Work (and each Contributor provides its
+Contributions) on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+KIND, either express or implied, including, without limitation, any warranties
+or conditions of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+PARTICULAR PURPOSE. You are solely responsible for determining the
+appropriateness of using or redistributing the Work and assume any risks
+associated with Your exercise of permissions under this License.
+
+8. Limitation of Liability. In no event and under no legal theory, whether in
+tort (including negligence), contract, or otherwise, unless required by
+applicable law (such as deliberate and grossly negligent acts) or agreed to in
+writing, shall any Contributor be liable to You for damages, including any
+direct, indirect, special, incidental, or consequential damages of any
+character arising as a result of this License or out of the use or inability to
+use the Work (including but not limited to damages for loss of goodwill, work
+stoppage, computer failure or malfunction, or any and all other commercial
+damages or losses), even if such Contributor has been advised of the
+possibility of such damages.
+
+9. Accepting Warranty or Additional Liability. While redistributing the Work or
+Derivative Works thereof, You may choose to offer, and charge a fee for,
+acceptance of support, warranty, indemnity, or other liability obligations
+and/or rights consistent with this License. However, in accepting such
+obligations, You may act only on Your own behalf and on Your sole
+responsibility, not on behalf of any other Contributor, and only if You agree
+to indemnify, defend, and hold each Contributor harmless for any liability
+incurred by, or claims asserted against, such Contributor by reason of your
+accepting any such warranty or additional liability.
+
+END OF TERMS AND CONDITIONS
\ No newline at end of file
diff --git a/vendor/pulp-platform/fpnew/README.md b/vendor/pulp-platform/fpnew/README.md
new file mode 100644
index 0000000000..7bcb9ee0dc
--- /dev/null
+++ b/vendor/pulp-platform/fpnew/README.md
@@ -0,0 +1,147 @@
+# FPnew - New Floating-Point Unit with Transprecision Capabilities
+
+Parametric floating-point unit with support for standard RISC-V formats and operations as well as transprecision formats, written in SystemVerilog.
+
+Maintainer: Stefan Mach <smach@iis.ee.ethz.ch>
+
+## Features
+
+The FPU is a parametric design that allows generating FP hardware units for various use cases.
+Even though mainly designed for use in RISC-V processors, the FPU or its sub-blocks can easily be utilized in other environments.
+Our design aims to be compliant with IEEE 754-2008 and provides the following features:
+
+### Formats
+Any IEEE 754-2008 style binary floating-point format can be supported, including single-, double-, quad- and half-precision (`binary32`, `binary64`, `binary128`, `binary16`).
+Formats can be defined with arbitrary number of exponent and mantissa bits through parameters and are always symmetrically biased.
+Multiple FP formats can be supported concurrently, and the number of formats supported is not limited.
+
+Multiple integer formats with arbitrary number of bits (as source or destionation of conversions) can also be defined.
+
+### Operations
+- Addition/Subtraction
+- Multiplication
+- Fused multiply-add in four flavours (`fmadd`, `fmsub`, `fnmadd`, `fnmsub`)
+- Division<sup>1</sup>
+- Square root<sup>1</sup>
+- Minimum/Maximum<sup>2</sup>
+- Comparisons
+- Sign-Injections (`copy`, `abs`, `negate`, `copySign` etc.)
+- Conversions among all supported FP formats
+- Conversions between FP formats and integers (signed & unsigned) and vice versa
+- Classification
+
+Multi-format FMA operations (i.e. multiplication in one format, accumulation in another) are optionally supported.
+
+Optionally, *packed-SIMD* versions of all the above operations can be generated for formats narrower than the FPU datapath width.
+E.g.: Support for double-precision (64bit) operations and two simultaneous single-precision (32bit) operations.
+
+It is also possible to generate only a subset of operations if e.g. divisions are not needed.
+
+<sup>1</sup>Some compliance issues with IEEE 754-2008 are currently known to exist<br>
+<sup>2</sup>Implementing IEEE 754-201x `minimumNumber` and `maximumNumber`, respectively
+
+### Rounding modes
+All IEEE 754-2008 rounding modes are supported, namely
+- `roundTiesToEven`
+- `roundTiesToAway`
+- `roundTowardPositive`
+- `roundTowardNegative`
+- `roundTowardZero`
+
+### Status Flags
+All IEEE 754-2008 status flags are supported, namely
+- Invalid operation (`NV`)
+- Division by zero (`DZ`)
+- Overflow (`OF`)
+- Underflow (`UF`)
+- Inexact (`NX`)
+
+## Getting Started
+
+### Dependencies
+
+FPnew currently depends on the following:
+- `lzc` and `rr_arb_tree` from the `common_cells` repository (https://github.com/pulp-platform/common_cells.git)
+- optional: Divider and square-root unit from the `fpu-div-sqrt-mvp` repository (https://github.com/pulp-platform/fpu_div_sqrt_mvp.git)
+
+These two repositories are included in the source code directory as git submodules, use
+```bash
+git submodule update --init --recursive
+```
+if you want to load these dependencies there.
+
+Consider using [Bender](https://github.com/fabianschuiki/bender.git) for managing dependencies in your projects. FPnew comes with Bender support!
+
+### Usage
+
+The top-level module of the FPU is called `fpnew_top` and can be directly instantiated in your design.
+Make sure you compile the package `fpnew_pkg` ahead of any files making references to types, parameters or functions defined there.
+
+It is discouraged to `import` all of `fpnew_pkg` into your source files. Instead, explicitly scope references into the package like so: `fpnew_pkg::foo`.
+
+#### Example Instantiation
+
+```SystemVerilog
+// FPU instance
+fpnew_top #(
+  .Features       ( fpnew_pkg::RV64D          ),
+  .Implementation ( fpnew_pkg::DEFAULT_NOREGS ),
+  .TagType        ( logic                     )
+) i_fpnew_top (
+  .clk_i,
+  .rst_ni,
+  .operands_i,
+  .rnd_mode_i,
+  .op_i,
+  .op_mod_i,
+  .src_fmt_i,
+  .dst_fmt_i,
+  .int_fmt_i,
+  .vectorial_op_i,
+  .tag_i,
+  .in_valid_i,
+  .in_ready_o,
+  .flush_i,
+  .result_o,
+  .status_o,
+  .tag_o,
+  .out_valid_o,
+  .out_ready_i,
+  .busy_o
+);
+```
+
+### Documentation
+
+More in-depth documentation on the FPnew configuration, interfaces and architecture is provided in [`docs/README.md`](docs/README.md).
+
+### Issues and Contributing
+
+In case you find any issues with FPnew that have not been reported yet, don't hesitate to open a new [issue](https://github.com/pulp-platform/fpnew/issues) here on Github.
+Please, don't use the issue tracker for support questions.
+Instead, consider contacting the maintainers or consulting the [PULP forums](https://pulp-platform.org/community/index.php).
+
+In case you would like to contribute to the project, please refer to the contributing guidelines in [`docs/CONTRIBUTING.md`](docs/CONTRIBUTING.md) before opening a pull request.
+
+
+### Repository Structure
+
+HDL source code can be found in the `src` directory while documentation is located in `docs`.
+A changelog is kept at [`docs/CHANGELOG.md`](docs/CHANGELOG.md).
+
+This repository loosely follows the [GitFlow](https://nvie.com/posts/a-successful-git-branching-model/) branching model.
+This means that the `master` branch is considered stable and used to publish releases of the FPU while the `develop` branch contains features and bugfixes that have not yet been properly released.
+
+Furthermore, this repository tries to adhere to [SemVer](https://semver.org/), as outlined in the [changelog](docs/CHANGELOG.md).
+
+## Licensing
+
+FPnew is released under the *SolderPad Hardware License*, which is a permissive license based on Apache 2.0. Please refer to the [license file](LICENSE) for further information.
+
+## Acknowledgement
+
+This project has received funding from the European Union's Horizon 2020 research and innovation programme under grant agreement No 732631.
+
+For further information, visit [oprecomp.eu](http://oprecomp.eu).
+
+![OPRECOMP](docs/fig/oprecomp_logo_inline1.png)
diff --git a/vendor/pulp-platform/fpnew/src/common_cells/.gitignore b/vendor/pulp-platform/fpnew/src/common_cells/.gitignore
new file mode 100644
index 0000000000..10a88888c1
--- /dev/null
+++ b/vendor/pulp-platform/fpnew/src/common_cells/.gitignore
@@ -0,0 +1,7 @@
+.*
+!.git*
+*.out
+*~
+/Bender.lock
+/Bender.local
+build
diff --git a/vendor/pulp-platform/fpnew/src/common_cells/CHANGELOG.md b/vendor/pulp-platform/fpnew/src/common_cells/CHANGELOG.md
new file mode 100644
index 0000000000..70cb337bce
--- /dev/null
+++ b/vendor/pulp-platform/fpnew/src/common_cells/CHANGELOG.md
@@ -0,0 +1,210 @@
+# Changelog
+All notable changes to this project will be documented in this file.
+
+The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/)
+and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.html).
+
+## Unreleased
+
+## 1.13.1 - 2019-06-01
+
+### Changed
+
+- Fix path in `src_files.yml` for `stream_arbiter` and `stream_arbiter_flushable`
+
+## 1.13.0 - 2019-05-29
+
+### Added
+
+- Added exponential backoff window module
+- Added parametric Galois LFSR module with optional whitening feature
+- Added `cf_math_pkg`: Constant Function implementations of mathematical functions for HDL elaboration
+
+### Changed
+- Parametric payload data type for `rr_arb_tree`
+
+### Deprecated
+- The following arbiter implementations are deprecated and superseded by `rr_arb_tree`:
+- Priority arbiter `prioarbiter`
+- Round-robin arbiter `rrarbiter`
+
+### Fixed
+
+## 1.12.0 - 2019-04-09
+
+### Added
+- Add priority arbiter
+- Add Pseudo Least Recently Used tree
+- Add round robin arbiter mux tree
+
+### Changed
+- Add selectable arbiter implementation for `stream_arbiter` and `stream_arbiter_flushable`. One can choose between priority (`prio`) and round-robin arbitration (`rr`).
+- Add `$onehot0` assertion in one-hot to bin
+- Rework `rrarbiter` unit (uses `rr_arb_tree` implementation underneath)
+
+## 1.11.0 - 2019-03-20
+
+### Added
+- Add stream fork
+- Add fall-through register
+- Add stream filter
+- Add ID queue
+
+### Changed
+- `sync_wedge` use existing synchronizer. This defines a single place where a tech-specific synchronizer can be defined.
+
+### Fixed
+- Fix FIFO push and pop signals in `stream_register` to observe interface prerequisites.
+- In `fifo_v3`, fix data output when pushing into empty fall-through FIFO. Previously, the data
+  output of an empty fall-through FIFO with data at its input (and `push_i=1`) depended on
+  `pop_i`: When `pop_i=0`, old, invalid data were visible at the output (even though `empty_o=0`,
+  indicating that the data output is valid). Only when `pop_i=1`, the data from the input fell
+  through. One consequence of this bug was that `data_o` of the `fall_through_register` could change
+  while `valid_o=1`, violating the basic stream specification.
+
+## 1.10.0 - 2018-12-18
+
+### Added
+- Add `fifo_v3` with generic fill count
+- Add 16 bit LFSR
+- Add stream delayer
+- Add stream arbiter
+- Add register macros for RTL
+- Add shift register
+
+### Changed
+- Make number of registers of `rstgen_bypass` a parameter.
+
+### Fixed
+- Fix `valid_i` and `grant_i` guarantees in `generic_fifo` for backward compatibility.
+- LZC: Synthesis of streaming operators in ternary operators
+- Add missing entry for `popcount` to `Bender.yml`.
+- Add default values for parameters to improve compatibility with Synopsys DC and Vivado.
+
+## 1.9.0 - 2018-11-02
+
+### Added
+- Add popcount circuit `popcount`
+
+## 1.8.0 - 2018-10-15
+
+### Added
+- Add lock feature to the rrarbiter. This prevents the arbiter to change the decision when we have pending requests that remain unaknowledged for several cycles.
+- Add deglitching circuit
+- Add generic clock divider
+- Add edge detecter as alias to sync_wedge (name is more expressive)
+- Add generic counter
+- Add moving deglitcher
+
+## 1.7.6 - 2018-09-27
+
+### Added
+- Add reset synchronizer with explicit reset bypass in testmode
+
+## 1.7.5 - 2018-09-06
+### Fixed
+- Fix incompatibility with verilator
+- Fix dependency to open-source repo
+
+## 1.7.4 - 2018-09-06
+- Fix assertions in `fifo_v2` (write on full / read on empty did not trigger properly)
+
+## 1.7.3 - 2018-08-27
+### Fixed
+- Use proper `fifo_v2` in `generic_fifo` module.
+
+## 1.7.2 - 2018-08-27
+### Added
+- Almost full/empty flags to FIFO, as `fifo_v2`.
+
+### Changed
+- FIFO moved to `fifo_v1` and instantiates `fifo_v2`.
+
+## 1.7.1 - 2018-08-27
+### Fixed
+- Revert breaking changes to `fifo`.
+
+## 1.7.0 - 2018-08-24
+### Added
+- Add stream register (`stream_register`).
+- Add stream multiplexer and demultiplexer (`stream_mux`, `stream_demux`).
+- Add round robin arbiter (`rrarbiter`).
+- Add leading zero counter (`lzc`).
+
+### Changed
+- Deprecate `find_first_one` in favor of `lzc`.
+
+## 1.6.0 - 2018-04-03
+### Added
+- Add binary to Gray code converter.
+- Add Gray code to binary converter.
+- Add Gray code testbench.
+- Add CDC FIFO based on Gray counters. This is a faster alternative to the 2-phase FIFO which also works if a domain's clock has stopped.
+
+### Changed
+- Rename `cdc_fifo` to `cdc_fifo_2phase`.
+- Adjust CDC FIFO testbench to cover both implementations.
+
+## 1.5.4 - 2018-03-31
+### Changed
+- Replace explicit clock gate in `fifo` with implicit one.
+
+## 1.5.3 - 2018-03-16
+### Changed
+- Remove duplicate deprecated modules.
+
+## 1.5.2 - 2018-03-16
+### Changed
+- Remove deprecated `rstgen` and fix interface.
+
+## 1.5.1 - 2018-03-16
+### Changed
+- Remove deprecated `onehot_to_bin`.
+
+## 1.5.0 - 2018-03-14
+### Added
+- Add behavioural SRAM model
+
+## 1.4.0 - 2018-03-14
+### Added
+- Clock domain crossing FIFO
+
+### Changed
+- Re-name new sync modules to resolve namespace collisions
+
+## 1.3.0 - 2018-03-12
+### Added
+- 2-phase clock domain crossing
+- Add old common cells as deprecated legacy modules
+
+## 1.2.3 - 2018-03-09
+### Added
+- Backwards compatibility wrapper for `generic_LFSR_8bit`
+
+## 1.2.2 - 2018-03-09
+### Added
+- Backwards compatibility wrapper for `generic_fifo`
+
+## 1.2.1 - 2018-03-09
+### Fixed
+- Fix an issue in the spill register which causes transactions to be lost
+
+## 1.2.0 - 2018-03-09
+### Added
+- Add spill register
+
+## 1.1.0 - 2018-03-06
+### Added
+- Find first zero
+
+## 1.0.0 - 2018-03-02
+### Added
+- Re-implementation of the generic FIFO supporting all kinds of use-cases
+- Testbench for FIFO
+
+### Changed
+- Re-formatting and artistic code clean-up
+
+## 0.1.0 - 2018-02-23
+### Added
+- Fork of PULP common cells repository
diff --git a/vendor/pulp-platform/fpnew/src/common_cells/LICENSE b/vendor/pulp-platform/fpnew/src/common_cells/LICENSE
new file mode 100644
index 0000000000..18e4f67692
--- /dev/null
+++ b/vendor/pulp-platform/fpnew/src/common_cells/LICENSE
@@ -0,0 +1,176 @@
+SOLDERPAD HARDWARE LICENSE version 0.51
+
+This license is based closely on the Apache License Version 2.0, but is not
+approved or endorsed by the Apache Foundation. A copy of the non-modified
+Apache License 2.0 can be found at http://www.apache.org/licenses/LICENSE-2.0.
+
+As this license is not currently OSI or FSF approved, the Licensor permits any
+Work licensed under this License, at the option of the Licensee, to be treated
+as licensed under the Apache License Version 2.0 (which is so approved).
+
+This License is licensed under the terms of this License and in particular
+clause 7 below (Disclaimer of Warranties) applies in relation to its use.
+
+TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+1. Definitions.
+
+"License" shall mean the terms and conditions for use, reproduction, and
+distribution as defined by Sections 1 through 9 of this document.
+
+"Licensor" shall mean the Rights owner or entity authorized by the Rights owner
+that is granting the License.
+
+"Legal Entity" shall mean the union of the acting entity and all other entities
+that control, are controlled by, or are under common control with that entity.
+For the purposes of this definition, "control" means (i) the power, direct or
+indirect, to cause the direction or management of such entity, whether by
+contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the
+outstanding shares, or (iii) beneficial ownership of such entity.
+
+"You" (or "Your") shall mean an individual or Legal Entity exercising
+permissions granted by this License.
+
+"Rights" means copyright and any similar right including design right (whether
+registered or unregistered), semiconductor topography (mask) rights and
+database rights (but excluding Patents and Trademarks).
+
+"Source" form shall mean the preferred form for making modifications, including
+but not limited to source code, net lists, board layouts, CAD files,
+documentation source, and configuration files.
+
+"Object" form shall mean any form resulting from mechanical transformation or
+translation of a Source form, including but not limited to compiled object
+code, generated documentation, the instantiation of a hardware design and
+conversions to other media types, including intermediate forms such as
+bytecodes, FPGA bitstreams, artwork and semiconductor topographies (mask
+works).
+
+"Work" shall mean the work of authorship, whether in Source form or other
+Object form, made available under the License, as indicated by a Rights notice
+that is included in or attached to the work (an example is provided in the
+Appendix below).
+
+"Derivative Works" shall mean any work, whether in Source or Object form, that
+is based on (or derived from) the Work and for which the editorial revisions,
+annotations, elaborations, or other modifications represent, as a whole, an
+original work of authorship. For the purposes of this License, Derivative Works
+shall not include works that remain separable from, or merely link (or bind by
+name) or physically connect to or interoperate with the interfaces of, the Work
+and Derivative Works thereof.
+
+"Contribution" shall mean any design or work of authorship, including the
+original version of the Work and any modifications or additions to that Work or
+Derivative Works thereof, that is intentionally submitted to Licensor for
+inclusion in the Work by the Rights owner or by an individual or Legal Entity
+authorized to submit on behalf of the Rights owner. For the purposes of this
+definition, "submitted" means any form of electronic, verbal, or written
+communication sent to the Licensor or its representatives, including but not
+limited to communication on electronic mailing lists, source code control
+systems, and issue tracking systems that are managed by, or on behalf of, the
+Licensor for the purpose of discussing and improving the Work, but excluding
+communication that is conspicuously marked or otherwise designated in writing
+by the Rights owner as "Not a Contribution."
+
+"Contributor" shall mean Licensor and any individual or Legal Entity on behalf
+of whom a Contribution has been received by Licensor and subsequently
+incorporated within the Work.
+
+2. Grant of License. Subject to the terms and conditions of this License, each
+Contributor hereby grants to You a perpetual, worldwide, non-exclusive,
+no-charge, royalty-free, irrevocable license under the Rights to reproduce,
+prepare Derivative Works of, publicly display, publicly perform, sublicense,
+and distribute the Work and such Derivative Works in Source or Object form and
+do anything in relation to the Work as if the Rights did not exist.
+
+3. Grant of Patent License. Subject to the terms and conditions of this
+License, each Contributor hereby grants to You a perpetual, worldwide,
+non-exclusive, no-charge, royalty-free, irrevocable (except as stated in this
+section) patent license to make, have made, use, offer to sell, sell, import,
+and otherwise transfer the Work, where such license applies only to those
+patent claims licensable by such Contributor that are necessarily infringed by
+their Contribution(s) alone or by combination of their Contribution(s) with the
+Work to which such Contribution(s) was submitted. If You institute patent
+litigation against any entity (including a cross-claim or counterclaim in a
+lawsuit) alleging that the Work or a Contribution incorporated within the Work
+constitutes direct or contributory patent infringement, then any patent
+licenses granted to You under this License for that Work shall terminate as of
+the date such litigation is filed.
+
+4. Redistribution. You may reproduce and distribute copies of the Work or
+Derivative Works thereof in any medium, with or without modifications, and in
+Source or Object form, provided that You meet the following conditions:
+
+    You must give any other recipients of the Work or Derivative Works a copy
+    of this License; and
+
+    You must cause any modified files to carry prominent notices stating that
+    You changed the files; and
+
+    You must retain, in the Source form of any Derivative Works that You
+    distribute, all copyright, patent, trademark, and attribution notices from
+    the Source form of the Work, excluding those notices that do not pertain to
+    any part of the Derivative Works; and
+
+    If the Work includes a "NOTICE" text file as part of its distribution, then
+    any Derivative Works that You distribute must include a readable copy of
+    the attribution notices contained within such NOTICE file, excluding those
+    notices that do not pertain to any part of the Derivative Works, in at
+    least one of the following places: within a NOTICE text file distributed as
+    part of the Derivative Works; within the Source form or documentation, if
+    provided along with the Derivative Works; or, within a display generated by
+    the Derivative Works, if and wherever such third-party notices normally
+    appear. The contents of the NOTICE file are for informational purposes only
+    and do not modify the License. You may add Your own attribution notices
+    within Derivative Works that You distribute, alongside or as an addendum to
+    the NOTICE text from the Work, provided that such additional attribution
+    notices cannot be construed as modifying the License. You may add Your own
+    copyright statement to Your modifications and may provide additional or
+    different license terms and conditions for use, reproduction, or
+    distribution of Your modifications, or for any such Derivative Works as a
+    whole, provided Your use, reproduction, and distribution of the Work
+    otherwise complies with the conditions stated in this License.
+
+5. Submission of Contributions. Unless You explicitly state otherwise, any
+Contribution intentionally submitted for inclusion in the Work by You to the
+Licensor shall be under the terms and conditions of this License, without any
+additional terms or conditions. Notwithstanding the above, nothing herein shall
+supersede or modify the terms of any separate license agreement you may have
+executed with Licensor regarding such Contributions.
+
+6. Trademarks. This License does not grant permission to use the trade names,
+trademarks, service marks, or product names of the Licensor, except as required
+for reasonable and customary use in describing the origin of the Work and
+reproducing the content of the NOTICE file.
+
+7. Disclaimer of Warranty. Unless required by applicable law or agreed to in
+writing, Licensor provides the Work (and each Contributor provides its
+Contributions) on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+KIND, either express or implied, including, without limitation, any warranties
+or conditions of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+PARTICULAR PURPOSE. You are solely responsible for determining the
+appropriateness of using or redistributing the Work and assume any risks
+associated with Your exercise of permissions under this License.
+
+8. Limitation of Liability. In no event and under no legal theory, whether in
+tort (including negligence), contract, or otherwise, unless required by
+applicable law (such as deliberate and grossly negligent acts) or agreed to in
+writing, shall any Contributor be liable to You for damages, including any
+direct, indirect, special, incidental, or consequential damages of any
+character arising as a result of this License or out of the use or inability to
+use the Work (including but not limited to damages for loss of goodwill, work
+stoppage, computer failure or malfunction, or any and all other commercial
+damages or losses), even if such Contributor has been advised of the
+possibility of such damages.
+
+9. Accepting Warranty or Additional Liability. While redistributing the Work or
+Derivative Works thereof, You may choose to offer, and charge a fee for,
+acceptance of support, warranty, indemnity, or other liability obligations
+and/or rights consistent with this License. However, in accepting such
+obligations, You may act only on Your own behalf and on Your sole
+responsibility, not on behalf of any other Contributor, and only if You agree
+to indemnify, defend, and hold each Contributor harmless for any liability
+incurred by, or claims asserted against, such Contributor by reason of your
+accepting any such warranty or additional liability.
+
+END OF TERMS AND CONDITIONS
diff --git a/vendor/pulp-platform/fpnew/src/common_cells/README.md b/vendor/pulp-platform/fpnew/src/common_cells/README.md
new file mode 100644
index 0000000000..cf68ec1dcc
--- /dev/null
+++ b/vendor/pulp-platform/fpnew/src/common_cells/README.md
@@ -0,0 +1,117 @@
+# Common Cells Repository
+
+Maintainer: Florian Zaruba <zarubaf@iis.ee.ethz.ch>
+
+This repository contains commonly used cells and headers for use in various projects.
+
+## Cell Contents
+
+This repository currently contains the following cells, ordered by categories.
+Please note that cells with status *deprecated* are not to be used for new designs and only serve to provide compatibility with old code.
+
+### Clocks and Resets
+
+|           Name          |                     Description                     |    Status    | Superseded By |
+|-------------------------|-----------------------------------------------------|--------------|---------------|
+| `clk_div`               | Clock divider with integer divisor                  | active       |               |
+| `clock_divider`         | Clock divider with configuration registers          | *deprecated* | `clk_div`     |
+| `clock_divider_counter` | Clock divider using a counter                       | *deprecated* | `clk_div`     |
+| `rstgen`                | Reset synchronizer                                  | active       |               |
+| `rstgen_bypass`         | Reset synchronizer with dedicated test reset bypass | active       |               |
+
+### Clock Domains and Asynchronous Crossings
+
+|         Name         |                                   Description                                    |    Status    | Superseded By |
+|----------------------|----------------------------------------------------------------------------------|--------------|---------------|
+| `cdc_2phase`         | Clock domain crossing using two-phase handshake, with ready/valid interface      | active       |               |
+| `cdc_fifo_2phase`    | Clock domain crossing FIFO using two-phase handshake, with ready/valid interface | active       |               |
+| `cdc_fifo_gray`      | Clock domain crossing FIFO using a gray-counter, with ready/valid interface      | active       |               |
+| `edge_detect`        | Rising/falling edge detector                                                     | active       |               |
+| `edge_propagator`    | **ANTONIO ADD DESCRIPTION**                                                      | active       |               |
+| `edge_propagator_rx` | **ANTONIO ADD DESCRIPTION**                                                      | active       |               |
+| `edge_propagator_tx` | **ANTONIO ADD DESCRIPTION**                                                      | active       |               |
+| `pulp_sync`          | Serial line synchronizer                                                         | *deprecated* | `sync`        |
+| `pulp_sync_wedge`    | Serial line synchronizer with edge detector                                      | *deprecated* | `sync_wedge`  |
+| `serial_deglitch`    | Serial line deglitcher                                                           | active       |               |
+| `sync`               | Serial line synchronizer                                                         | active       |               |
+| `sync_wedge`         | Serial line synchronizer with edge detector                                      | active       |               |
+
+### Counters and Shift Registers
+
+|         Name        |                   Description                                     |    Status    | Superseded By |
+|---------------------|-------------------------------------------------------------------|--------------|---------------|
+| `counter`           | Generic up/down counter with overflow detection                   | active       |               |
+| `generic_LFSR_8bit` | 8-bit linear feedback shift register (LFSR)                       | *deprecated* | `lfsr_8bit`   |
+| `lfsr_8bit`         | 8-bit linear feedback shift register (LFSR)                       | active       |               |
+| `lfsr_16bit`        | 16-bit linear feedback shift register (LFSR)                      | active       |               |
+| `lfsr`              | 4...64-bit parametric Galois LFSR with optional whitening feature | active       |               |
+| `mv_filter`         | **ZARUBAF ADD DESCRIPTION**                                       | active       |               |
+
+### Data Path Elements
+
+| Name                         | Description                                                                    | Status         | Superseded By |
+| :--------------------------- | :----------------------------------------------------------------------------- | :------------- | :------------ |
+| `binary_to_gray`             | Binary to gray code converter                                                  | active         |               |
+| `find_first_one`             | Leading-one finder / leading-zero counter                                      | *deprecated*   | `lzc`         |
+| `gray_to_binary`             | Gray code to binary converter                                                  | active         |               |
+| `lzc`                        | Leading/trailing-zero counter                                                  | active         |               |
+| `onehot_to_bin`              | One-hot to binary converter                                                    | active         |               |
+| `shift_reg`                  | Shift register for arbitrary types                                             | active         |               |
+| `rr_arb_tree`                | Round-robin arbiter for req/gnt and vld/rdy interfaces with optional priority  | active         |               |
+| `rrarbiter`                  | Round-robin arbiter for req/ack interface with look-ahead                      | *deprecated*   | `rr_arb_tree` |
+| `prioarbiter`                | Priority arbiter arbiter for req/ack interface with look-ahead                 | *deprecated*   | `rr_arb_tree` |
+| `fall_through_register`      | Fall-through register with ready/valid interface                               | active         |               |
+| `spill_register`             | Register with ready/valid interface to cut all combinational interface paths   | active         |               |
+| `stream_arbiter`             | Round-robin arbiter for ready/valid stream interface                           | active         |               |
+| `stream_arbiter_flushable`   | Round-robin arbiter for ready/valid stream interface and flush functionality   | active         |               |
+| `stream_demux`               | Ready/valid interface demultiplexer                                            | active         |               |
+| `stream_mux`                 | Ready/valid interface multiplexer                                              | active         |               |
+| `stream_register`            | Register with ready/valid interface                                            | active         |               |
+| `stream_fork`                | Ready/valid fork                                                               | active         |               |
+| `stream_filter`              | Ready/valid filter                                                             | active         |               |
+| `stream_delay`               | Randomize or delay ready/valid interface                                       | active         |               |
+| `popcount`                   | Combinatorial popcount (hamming weight)                                        | active         |               |
+
+### Data Structures
+
+| Name                 | Description                                     | Status         | Superseded By |
+| :------------------- | :---------------------------------------------- | :------------- | :------------ |
+| `fifo`               | FIFO register with upper threshold              | *deprecated*   | `fifo_v3`     |
+| `fifo_v2`            | FIFO register with upper and lower threshold    | *deprecated*   | `fifo_v3`     |
+| `fifo_v3`            | FIFO register with generic fill counts          | active         |               |
+| `generic_fifo`       | FIFO register without thresholds                | *deprecated*   | `fifo_v3`     |
+| `generic_fifo_adv`   | FIFO register without thresholds                | *deprecated*   | `fifo_v3`     |
+| `sram`               | SRAM behavioral model                           | active         |               |
+| `plru_tree`          | Pseudo least recently used tree                 | active         |               |
+| `unread`             | Empty module to sink unconnected outputs into   | active         |               |
+
+
+## Header Contents
+
+This repository currently contains the following header files.
+
+### RTL Register Macros
+
+The header file `registers.svh` contains macros that expand to descriptions of registers.
+To avoid misuse of `always_ff` blocks, only the following macros shall be used to describe sequential behavior.
+The use of linter rules that flag explicit uses of `always_ff` in source code is encouraged.
+
+|         Macro         |                            Arguments                            |                               Description                               |
+|-----------------------|-----------------------------------------------------------------|-------------------------------------------------------------------------|
+| <code>\`FF</code>     | `q_sig`, `d_sig`, `rst_val`                                     | Flip-flop with asynchronous active-low reset (implicit)                 |
+| <code>\`FFAR</code>   | `q_sig`, `d_sig`, `rst_val`, `clk_sig`, `arst_sig`              | Flip-flop with asynchronous active-high reset                           |
+| <code>\`FFARN</code>  | `q_sig`, `d_sig`, `rst_val`, `clk_sig`, `arstn_sig`             | Flip-flop with asynchronous active-low reset                            |
+| <code>\`FFSR</code>   | `q_sig`, `d_sig`, `rst_val`, `clk_sig`, `rst_sig`               | Flip-flop with synchronous active-high reset                            |
+| <code>\`FFSRN</code>  | `q_sig`, `d_sig`, `rst_val`, `clk_sig`, `rstn_sig`              | Flip-flop with synchronous active-low reset                             |
+| <code>\`FFNR</code>   | `q_sig`, `d_sig`, `clk_sig`                                     | Flip-flop without reset                                                 |
+|                       |                                                                 |                                                                         |
+| <code>\`FFL</code>    | `q_sig`, `d_sig`, `load_ena`, `rst_val`                         | Flip-flop with load-enable and asynchronous active-low reset (implicit) |
+| <code>\`FFLAR</code>  | `q_sig`, `d_sig`, `load_ena`, `rst_val`, `clk_sig`, `arst_sig`  | Flip-flop with load-enable and asynchronous active-high reset           |
+| <code>\`FFLARN</code> | `q_sig`, `d_sig`, `load_ena`, `rst_val`, `clk_sig`, `arstn_sig` | Flip-flop with load-enable and asynchronous active-low reset            |
+| <code>\`FFLSR</code>  | `q_sig`, `d_sig`, `load_ena`, `rst_val`, `clk_sig`, `rst_sig`   | Flip-flop with load-enable and synchronous active-high reset            |
+| <code>\`FFLSRN</code> | `q_sig`, `d_sig`, `load_ena`, `rst_val`, `clk_sig`, `rstn_sig`  | Flip-flop with load-enable and synchronous active-low reset             |
+| <code>\`FFLNR</code>  | `q_sig`, `d_sig`, `load_ena`, `clk_sig`                         | Flip-flop with load-enable without reset                                |
+- *The name of the clock and reset signals for implicit variants is `clk_i` and `rst_ni`, respectively.*
+- *Argument suffix `_sig` indicates signal names for present and next state as well as clocks and resets.*
+- *Argument `rst_val` specifies the value literal to be assigned upon reset.*
+- *Argument `load_ena` specifies the boolean expression that forms the load enable of the register.*
diff --git a/vendor/pulp-platform/fpnew/src/common_cells/include/common_cells/registers.svh b/vendor/pulp-platform/fpnew/src/common_cells/include/common_cells/registers.svh
new file mode 100644
index 0000000000..c1975edcb3
--- /dev/null
+++ b/vendor/pulp-platform/fpnew/src/common_cells/include/common_cells/registers.svh
@@ -0,0 +1,224 @@
+// Copyright 2018 ETH Zurich and University of Bologna.
+//
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the "License"); you may not use this file except in
+// compliance with the License. You may obtain a copy of the License at
+// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+
+// Common register defines for RTL designs
+`ifndef COMMON_CELLS_REGISTERS_SVH_
+`define COMMON_CELLS_REGISTERS_SVH_
+
+// Abridged Summary of available FF macros:
+// `FF:      asynchronous active-low reset (implicit clock and reset)
+// `FFAR:    asynchronous active-high reset
+// `FFARN:   asynchronous active-low reset
+// `FFSR:    synchronous active-high reset
+// `FFSRN:   synchronous active-low reset
+// `FFNR:    without reset
+// `FFL:     load-enable and asynchronous active-low reset (implicit clock and reset)
+// `FFLAR:   load-enable and asynchronous active-high reset
+// `FFLARN:  load-enable and asynchronous active-low reset
+// `FFLARNC: load-enable and asynchronous active-low reset and synchronous active-high clear
+// `FFLSR:   load-enable and synchronous active-high reset
+// `FFLSRN:  load-enable and synchronous active-low reset
+// `FFLNR:   load-enable without reset
+
+
+// Flip-Flop with asynchronous active-low reset (implicit clock and reset)
+// __q: Q output of FF
+// __d: D input of FF
+// __reset_value: value assigned upon reset
+// Implicit:
+// clk_i: clock input
+// rst_ni: reset input (asynchronous, active low)
+`define FF(__q, __d, __reset_value)                  \
+  always_ff @(posedge clk_i or negedge rst_ni) begin \
+    if (!rst_ni) begin                               \
+      __q <= (__reset_value);                        \
+    end else begin                                   \
+      __q <= (__d);                                  \
+    end                                              \
+  end
+
+// Flip-Flop with asynchronous active-high reset
+// __q: Q output of FF
+// __d: D input of FF
+// __reset_value: value assigned upon reset
+// __clk: clock input
+// __arst: asynchronous reset
+`define FFAR(__q, __d, __reset_value, __clk, __arst)     \
+  always_ff @(posedge (__clk) or posedge (__arst)) begin \
+    if (__arst) begin                                    \
+      __q <= (__reset_value);                            \
+    end else begin                                       \
+      __q <= (__d);                                      \
+    end                                                  \
+  end
+
+// Flip-Flop with asynchronous active-low reset
+// __q: Q output of FF
+// __d: D input of FF
+// __reset_value: value assigned upon reset
+// __clk: clock input
+// __arst_n: asynchronous reset
+`define FFARN(__q, __d, __reset_value, __clk, __arst_n)    \
+  always_ff @(posedge (__clk) or negedge (__arst_n)) begin \
+    if (!__arst_n) begin                                   \
+      __q <= (__reset_value);                              \
+    end else begin                                         \
+      __q <= (__d);                                        \
+    end                                                    \
+  end
+
+// Flip-Flop with synchronous active-high reset
+// __q: Q output of FF
+// __d: D input of FF
+// __reset_value: value assigned upon reset
+// __clk: clock input
+// __reset_clk: reset input
+`define FFSR(__q, __d, __reset_value, __clk, __reset_clk) \
+  `ifndef VERILATOR                       \
+  /``* synopsys sync_set_reset `"__reset_clk`" *``/       \
+    `endif                        \
+  always_ff @(posedge (__clk)) begin                      \
+    __q <= (__reset_clk) ? (__reset_value) : (__d);       \
+  end
+
+// Flip-Flop with synchronous active-low reset
+// __q: Q output of FF
+// __d: D input of FF
+// __reset_value: value assigned upon reset
+// __clk: clock input
+// __reset_n_clk: reset input
+`define FFSRN(__q, __d, __reset_value, __clk, __reset_n_clk) \
+    `ifndef VERILATOR                       \
+  /``* synopsys sync_set_reset `"__reset_n_clk`" *``/        \
+    `endif                        \
+  always_ff @(posedge (__clk)) begin                         \
+    __q <= (!__reset_n_clk) ? (__reset_value) : (__d);       \
+  end
+
+// Always-enable Flip-Flop without reset
+// __q: Q output of FF
+// __d: D input of FF
+// __clk: clock input
+`define FFNR(__q, __d, __clk)        \
+  always_ff @(posedge (__clk)) begin \
+    __q <= (__d);                    \
+  end
+
+// Flip-Flop with load-enable and asynchronous active-low reset (implicit clock and reset)
+// __q: Q output of FF
+// __d: D input of FF
+// __load: load d value into FF
+// __reset_value: value assigned upon reset
+// Implicit:
+// clk_i: clock input
+// rst_ni: reset input (asynchronous, active low)
+`define FFL(__q, __d, __load, __reset_value)         \
+  always_ff @(posedge clk_i or negedge rst_ni) begin \
+    if (!rst_ni) begin                               \
+      __q <= (__reset_value);                        \
+    end else begin                                   \
+      __q <= (__load) ? (__d) : (__q);               \
+    end                                              \
+  end
+
+// Flip-Flop with load-enable and asynchronous active-high reset
+// __q: Q output of FF
+// __d: D input of FF
+// __load: load d value into FF
+// __reset_value: value assigned upon reset
+// __clk: clock input
+// __arst: asynchronous reset
+`define FFLAR(__q, __d, __load, __reset_value, __clk, __arst) \
+  always_ff @(posedge (__clk) or posedge (__arst)) begin      \
+    if (__arst) begin                                         \
+      __q <= (__reset_value);                                 \
+    end else begin                                            \
+      __q <= (__load) ? (__d) : (__q);                        \
+    end                                                       \
+  end
+
+// Flip-Flop with load-enable and asynchronous active-low reset
+// __q: Q output of FF
+// __d: D input of FF
+// __load: load d value into FF
+// __reset_value: value assigned upon reset
+// __clk: clock input
+// __arst_n: asynchronous reset
+`define FFLARN(__q, __d, __load, __reset_value, __clk, __arst_n) \
+  always_ff @(posedge (__clk) or negedge (__arst_n)) begin       \
+    if (!__arst_n) begin                                         \
+      __q <= (__reset_value);                                    \
+    end else begin                                               \
+      __q <= (__load) ? (__d) : (__q);                           \
+    end                                                          \
+  end
+
+// Flip-Flop with load-enable and synchronous active-high reset
+// __q: Q output of FF
+// __d: D input of FF
+// __load: load d value into FF
+// __reset_value: value assigned upon reset
+// __clk: clock input
+// __reset_clk: reset input
+`define FFLSR(__q, __d, __load, __reset_value, __clk, __reset_clk)       \
+    `ifndef VERILATOR                       \
+  /``* synopsys sync_set_reset `"__reset_clk`" *``/                      \
+    `endif                        \
+  always_ff @(posedge (__clk)) begin                                     \
+    __q <= (__reset_clk) ? (__reset_value) : ((__load) ? (__d) : (__q)); \
+  end
+
+// Flip-Flop with load-enable and synchronous active-low reset
+// __q: Q output of FF
+// __d: D input of FF
+// __load: load d value into FF
+// __reset_value: value assigned upon reset
+// __clk: clock input
+// __reset_n_clk: reset input
+`define FFLSRN(__q, __d, __load, __reset_value, __clk, __reset_n_clk)       \
+    `ifndef VERILATOR                       \
+  /``* synopsys sync_set_reset `"__reset_n_clk`" *``/                       \
+    `endif                        \
+  always_ff @(posedge (__clk)) begin                                        \
+    __q <= (!__reset_n_clk) ? (__reset_value) : ((__load) ? (__d) : (__q)); \
+  end
+
+// Flip-Flop with load-enable and asynchronous active-low reset and synchronous clear
+// __q: Q output of FF
+// __d: D input of FF
+// __load: load d value into FF
+// __clear: assign reset value into FF
+// __reset_value: value assigned upon reset
+// __clk: clock input
+// __arst_n: asynchronous reset
+`define FFLARNC(__q, __d, __load, __clear, __reset_value, __clk, __arst_n) \
+    `ifndef VERILATOR                       \
+  /``* synopsys sync_set_reset `"__clear`" *``/                       \
+    `endif                        \
+  always_ff @(posedge (__clk) or negedge (__arst_n)) begin                 \
+    if (!__arst_n) begin                                                   \
+      __q <= (__reset_value);                                              \
+    end else begin                                                         \
+      __q <= (__clear) ? (__reset_value) : (__load) ? (__d) : (__q);       \
+    end                                                                    \
+  end
+
+// Load-enable Flip-Flop without reset
+// __q: Q output of FF
+// __d: D input of FF
+// __load: load d value into FF
+// __clk: clock input
+`define FFLNR(__q, __d, __load, __clk) \
+  always_ff @(posedge (__clk)) begin   \
+    __q <= (__load) ? (__d) : (__q);   \
+  end
+
+`endif
diff --git a/vendor/pulp-platform/fpnew/src/common_cells/src/cdc_2phase.sv b/vendor/pulp-platform/fpnew/src/common_cells/src/cdc_2phase.sv
new file mode 100644
index 0000000000..8e770abfa1
--- /dev/null
+++ b/vendor/pulp-platform/fpnew/src/common_cells/src/cdc_2phase.sv
@@ -0,0 +1,175 @@
+// Copyright 2018 ETH Zurich and University of Bologna.
+//
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the "License"); you may not use this file except in
+// compliance with the License. You may obtain a copy of the License at
+// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+//
+// Fabian Schuiki <fschuiki@iis.ee.ethz.ch>
+
+/// A two-phase clock domain crossing.
+///
+/// CONSTRAINT: Requires max_delay of min_period(src_clk_i, dst_clk_i) through
+/// the paths async_req, async_ack, async_data.
+/* verilator lint_off DECLFILENAME */
+module cdc_2phase #(
+  parameter type T = logic
+)(
+  input  logic src_rst_ni,
+  input  logic src_clk_i,
+  input  T     src_data_i,
+  input  logic src_valid_i,
+  output logic src_ready_o,
+
+  input  logic dst_rst_ni,
+  input  logic dst_clk_i,
+  output T     dst_data_o,
+  output logic dst_valid_o,
+  input  logic dst_ready_i
+);
+
+  // Asynchronous handshake signals.
+  (* dont_touch = "true" *) logic async_req;
+  (* dont_touch = "true" *) logic async_ack;
+  (* dont_touch = "true" *) T async_data;
+
+  // The sender in the source domain.
+  cdc_2phase_src #(.T(T)) i_src (
+    .rst_ni       ( src_rst_ni  ),
+    .clk_i        ( src_clk_i   ),
+    .data_i       ( src_data_i  ),
+    .valid_i      ( src_valid_i ),
+    .ready_o      ( src_ready_o ),
+    .async_req_o  ( async_req   ),
+    .async_ack_i  ( async_ack   ),
+    .async_data_o ( async_data  )
+  );
+
+  // The receiver in the destination domain.
+  cdc_2phase_dst #(.T(T)) i_dst (
+    .rst_ni       ( dst_rst_ni  ),
+    .clk_i        ( dst_clk_i   ),
+    .data_o       ( dst_data_o  ),
+    .valid_o      ( dst_valid_o ),
+    .ready_i      ( dst_ready_i ),
+    .async_req_i  ( async_req   ),
+    .async_ack_o  ( async_ack   ),
+    .async_data_i ( async_data  )
+  );
+
+endmodule
+
+
+/// Half of the two-phase clock domain crossing located in the source domain.
+module cdc_2phase_src #(
+  parameter type T = logic
+)(
+  input  logic rst_ni,
+  input  logic clk_i,
+  input  T     data_i,
+  input  logic valid_i,
+  output logic ready_o,
+  output logic async_req_o,
+  input  logic async_ack_i,
+  output T     async_data_o
+);
+
+  (* dont_touch = "true" *)
+  logic req_src_q, ack_src_q, ack_q;
+  (* dont_touch = "true" *)
+  T data_src_q;
+
+  // The req_src and data_src registers change when a new data item is accepted.
+  always_ff @(posedge clk_i or negedge rst_ni) begin
+    if (!rst_ni) begin
+      req_src_q  <= 0;
+      data_src_q <= '0;
+    end else if (valid_i && ready_o) begin
+      req_src_q  <= ~req_src_q;
+      data_src_q <= data_i;
+    end
+  end
+
+  // The ack_src and ack registers act as synchronization stages.
+  always_ff @(posedge clk_i or negedge rst_ni) begin
+    if (!rst_ni) begin
+      ack_src_q <= 0;
+      ack_q     <= 0;
+    end else begin
+      ack_src_q <= async_ack_i;
+      ack_q     <= ack_src_q;
+    end
+  end
+
+  // Output assignments.
+  assign ready_o = (req_src_q == ack_q);
+  assign async_req_o = req_src_q;
+  assign async_data_o = data_src_q;
+
+endmodule
+
+
+/// Half of the two-phase clock domain crossing located in the destination
+/// domain.
+module cdc_2phase_dst #(
+  parameter type T = logic
+)(
+  input  logic rst_ni,
+  input  logic clk_i,
+  output T     data_o,
+  output logic valid_o,
+  input  logic ready_i,
+  input  logic async_req_i,
+  output logic async_ack_o,
+  input  T     async_data_i
+);
+
+  (* dont_touch = "true" *)
+  (* async_reg = "true" *)
+  logic req_dst_q, req_q0, req_q1, ack_dst_q;
+  (* dont_touch = "true" *)
+  T data_dst_q;
+
+  // The ack_dst register changes when a new data item is accepted.
+  always_ff @(posedge clk_i or negedge rst_ni) begin
+    if (!rst_ni) begin
+      ack_dst_q  <= 0;
+    end else if (valid_o && ready_i) begin
+      ack_dst_q  <= ~ack_dst_q;
+    end
+  end
+
+  // The data_dst register changes when a new data item is presented. This is
+  // indicated by the async_req line changing levels.
+  always_ff @(posedge clk_i or negedge rst_ni) begin
+    if (!rst_ni) begin
+      data_dst_q <= '0;
+    end else if (req_q0 != req_q1 && !valid_o) begin
+      data_dst_q <= async_data_i;
+    end
+  end
+
+  // The req_dst and req registers act as synchronization stages.
+  always_ff @(posedge clk_i or negedge rst_ni) begin
+    if (!rst_ni) begin
+      req_dst_q <= 0;
+      req_q0    <= 0;
+      req_q1    <= 0;
+    end else begin
+      req_dst_q <= async_req_i;
+      req_q0    <= req_dst_q;
+      req_q1    <= req_q0;
+    end
+  end
+
+  // Output assignments.
+  assign valid_o = (ack_dst_q != req_q1);
+  assign data_o = data_dst_q;
+  assign async_ack_o = ack_dst_q;
+
+endmodule
+/* verilator lint_on DECLFILENAME */
diff --git a/vendor/pulp-platform/fpnew/src/common_cells/src/cdc_fifo_2phase.sv b/vendor/pulp-platform/fpnew/src/common_cells/src/cdc_fifo_2phase.sv
new file mode 100644
index 0000000000..58939ccaf6
--- /dev/null
+++ b/vendor/pulp-platform/fpnew/src/common_cells/src/cdc_fifo_2phase.sv
@@ -0,0 +1,134 @@
+// Copyright 2018 ETH Zurich and University of Bologna.
+//
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the "License"); you may not use this file except in
+// compliance with the License. You may obtain a copy of the License at
+// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+//
+// Fabian Schuiki <fschuiki@iis.ee.ethz.ch>
+
+/// A clock domain crossing FIFO, using 2-phase hand shakes.
+///
+/// This FIFO has its push and pop ports in two separate clock domains. Its size
+/// can only be powers of two, which is why its depth is given as 2**LOG_DEPTH.
+/// LOG_DEPTH must be at least 1.
+///
+/// CONSTRAINT: See the constraints for `cdc_2phase`. An additional maximum
+/// delay path needs to be specified from fifo_data_q to dst_data_o.
+module cdc_fifo_2phase #(
+  /// The data type of the payload transported by the FIFO.
+  parameter type T = logic,
+  /// The FIFO's depth given as 2**LOG_DEPTH.
+  parameter int LOG_DEPTH = 3
+)(
+  input  logic src_rst_ni,
+  input  logic src_clk_i,
+  input  T     src_data_i,
+  input  logic src_valid_i,
+  output logic src_ready_o,
+
+  input  logic dst_rst_ni,
+  input  logic dst_clk_i,
+  output T     dst_data_o,
+  output logic dst_valid_o,
+  input  logic dst_ready_i
+);
+
+  // Check the invariants.
+  //pragma translate_off
+  initial begin
+    assert(LOG_DEPTH > 0);
+  end
+  //pragma translate_on
+
+  localparam int PTR_WIDTH = LOG_DEPTH+1;
+  typedef logic [PTR_WIDTH-1:0] pointer_t;
+  typedef logic [LOG_DEPTH-1:0] index_t;
+
+  localparam pointer_t PTR_FULL  = (1 << LOG_DEPTH);
+  localparam pointer_t PTR_EMPTY = '0;
+
+  // Allocate the registers for the FIFO memory with its separate write and read
+  // ports. The FIFO has the following ports:
+  //
+  // - write: fifo_widx, fifo_wdata, fifo_write, src_clk_i
+  // - read: fifo_ridx, fifo_rdata
+  index_t fifo_widx, fifo_ridx;
+  logic fifo_write;
+  T fifo_wdata, fifo_rdata;
+  T fifo_data_q [2**LOG_DEPTH];
+
+  assign fifo_rdata = fifo_data_q[fifo_ridx];
+
+  for (genvar i = 0; i < 2**LOG_DEPTH; i++) begin : g_word
+    always_ff @(posedge src_clk_i, negedge src_rst_ni) begin
+      if (!src_rst_ni)
+        fifo_data_q[i] <= '0;
+      else if (fifo_write && fifo_widx == i)
+        fifo_data_q[i] <= fifo_wdata;
+    end
+  end
+
+  // Allocate the read and write pointers in the source and destination domain.
+  pointer_t src_wptr_q, dst_wptr, src_rptr, dst_rptr_q;
+
+  always_ff @(posedge src_clk_i, negedge src_rst_ni) begin
+    if (!src_rst_ni)
+      src_wptr_q <= 0;
+    else if (src_valid_i && src_ready_o)
+      src_wptr_q <= src_wptr_q + 1;
+  end
+
+  always_ff @(posedge dst_clk_i, negedge dst_rst_ni) begin
+    if (!dst_rst_ni)
+      dst_rptr_q <= 0;
+    else if (dst_valid_o && dst_ready_i)
+      dst_rptr_q <= dst_rptr_q + 1;
+  end
+
+  // The pointers into the FIFO are one bit wider than the actual address into
+  // the FIFO. This makes detecting critical states very simple: if all but the
+  // topmost bit of rptr and wptr agree, the FIFO is in a critical state. If the
+  // topmost bit is equal, the FIFO is empty, otherwise it is full.
+  assign src_ready_o = ((src_wptr_q ^ src_rptr) != PTR_FULL);
+  assign dst_valid_o = ((dst_rptr_q ^ dst_wptr) != PTR_EMPTY);
+
+  // Transport the read and write pointers across the clock domain boundary.
+  cdc_2phase #(pointer_t) i_cdc_wptr (
+    .src_rst_ni  ( src_rst_ni ),
+    .src_clk_i   ( src_clk_i  ),
+    .src_data_i  ( src_wptr_q ),
+    .src_valid_i ( 1'b1       ),
+    .src_ready_o (            ),
+    .dst_rst_ni  ( dst_rst_ni ),
+    .dst_clk_i   ( dst_clk_i  ),
+    .dst_data_o  ( dst_wptr   ),
+    .dst_valid_o (            ),
+    .dst_ready_i ( 1'b1       )
+  );
+
+  cdc_2phase #(pointer_t) i_cdc_rptr (
+    .src_rst_ni  ( dst_rst_ni ),
+    .src_clk_i   ( dst_clk_i  ),
+    .src_data_i  ( dst_rptr_q ),
+    .src_valid_i ( 1'b1       ),
+    .src_ready_o (            ),
+    .dst_rst_ni  ( src_rst_ni ),
+    .dst_clk_i   ( src_clk_i  ),
+    .dst_data_o  ( src_rptr   ),
+    .dst_valid_o (            ),
+    .dst_ready_i ( 1'b1       )
+  );
+
+  // Drive the FIFO write and read ports.
+  assign fifo_widx  = src_wptr_q;
+  assign fifo_wdata = src_data_i;
+  assign fifo_write = src_valid_i && src_ready_o;
+  assign fifo_ridx  = dst_rptr_q;
+  assign dst_data_o = fifo_rdata;
+
+endmodule
diff --git a/vendor/pulp-platform/fpnew/src/common_cells/src/cdc_fifo_gray.sv b/vendor/pulp-platform/fpnew/src/common_cells/src/cdc_fifo_gray.sv
new file mode 100644
index 0000000000..8b50e2b96c
--- /dev/null
+++ b/vendor/pulp-platform/fpnew/src/common_cells/src/cdc_fifo_gray.sv
@@ -0,0 +1,158 @@
+// Copyright 2018 ETH Zurich and University of Bologna.
+//
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the "License"); you may not use this file except in
+// compliance with the License. You may obtain a copy of the License at
+// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+//
+// Fabian Schuiki <fschuiki@iis.ee.ethz.ch>
+
+/// A clock domain crossing FIFO, using gray counters.
+///
+/// This FIFO has its push and pop ports in two separate clock domains. Its size
+/// can only be powers of two, which is why its depth is given as 2**LOG_DEPTH.
+/// LOG_DEPTH must be at least 1.
+///
+/// # Constraints
+///
+/// The following constraints need to be set:
+/// - max_delay -from src_wptr_gray_q -to dst_wptr_gray_q
+/// - max_delay -from dst_rptr_gray_q -to src_rptr_gray_q
+/// - max_delay -from fifo_data_q -to fifo_rdata
+module cdc_fifo_gray #(
+  /// The data type of the payload transported by the FIFO.
+  parameter type T = logic,
+  /// The FIFO's depth given as 2**LOG_DEPTH.
+  parameter int LOG_DEPTH = 3
+)(
+  input  logic src_rst_ni,
+  input  logic src_clk_i,
+  input  T     src_data_i,
+  input  logic src_valid_i,
+  output logic src_ready_o,
+
+  input  logic dst_rst_ni,
+  input  logic dst_clk_i,
+  output T     dst_data_o,
+  output logic dst_valid_o,
+  input  logic dst_ready_i
+);
+
+  // Check the invariants.
+  //pragma translate_off
+  initial begin
+    assert(LOG_DEPTH > 0);
+  end
+  //pragma translate_on
+
+  localparam int PTR_WIDTH = LOG_DEPTH+1;
+  typedef logic [PTR_WIDTH-1:0] pointer_t;
+  typedef logic [LOG_DEPTH-1:0] index_t;
+
+  localparam pointer_t PTR_FULL  = (1 << LOG_DEPTH);
+  localparam pointer_t PTR_EMPTY = '0;
+
+  // Allocate the registers for the FIFO memory with its separate write and read
+  // ports. The FIFO has the following ports:
+  //
+  // - write: fifo_widx, fifo_wdata, fifo_write, src_clk_i
+  // - read: fifo_ridx, fifo_rdata
+  index_t fifo_widx, fifo_ridx;
+  logic fifo_write;
+  T fifo_wdata, fifo_rdata;
+  T fifo_data_q [2**LOG_DEPTH];
+
+  assign fifo_rdata = fifo_data_q[fifo_ridx];
+
+  for (genvar i = 0; i < 2**LOG_DEPTH; i++) begin : g_word
+    always_ff @(posedge src_clk_i, negedge src_rst_ni) begin
+      if (!src_rst_ni)
+        fifo_data_q[i] <= '0;
+      else if (fifo_write && fifo_widx == i)
+        fifo_data_q[i] <= fifo_wdata;
+    end
+  end
+
+  // Create the write and read pointers in the source and destination domain.
+  // These are binary counters combined with a Gray encoder. Both the binary and
+  // the Gray coded output are registered; the binary one for use in the local
+  // domain, the Gray one for synchronization into the other domain.
+  pointer_t src_wptr_bin_q, src_wptr_gray_q, dst_rptr_bin_q, dst_rptr_gray_q;
+  pointer_t src_wptr_bin_d, src_wptr_gray_d, dst_rptr_bin_d, dst_rptr_gray_d;
+
+  assign src_wptr_bin_d = src_wptr_bin_q + 1;
+  assign dst_rptr_bin_d = dst_rptr_bin_q + 1;
+
+  binary_to_gray #(PTR_WIDTH) i_src_b2g (src_wptr_bin_d, src_wptr_gray_d);
+  binary_to_gray #(PTR_WIDTH) i_dst_b2g (dst_rptr_bin_d, dst_rptr_gray_d);
+
+  always_ff @(posedge src_clk_i, negedge src_rst_ni) begin
+    if (!src_rst_ni) begin
+      src_wptr_bin_q  <= '0;
+      src_wptr_gray_q <= '0;
+    end else if (src_valid_i && src_ready_o) begin
+      src_wptr_bin_q  <= src_wptr_bin_d;
+      src_wptr_gray_q <= src_wptr_gray_d;
+    end
+  end
+
+  always_ff @(posedge dst_clk_i, negedge dst_rst_ni) begin
+    if (!dst_rst_ni) begin
+      dst_rptr_bin_q  <= '0;
+      dst_rptr_gray_q <= '0;
+    end else if (dst_valid_o && dst_ready_i) begin
+      dst_rptr_bin_q  <= dst_rptr_bin_d;
+      dst_rptr_gray_q <= dst_rptr_gray_d;
+    end
+  end
+
+  // Move the Gray-coded pointers over into the other clock domain and
+  // synchronize them to reduce the probability of metastability.
+  pointer_t src_rptr_gray_q, src_rptr_gray_q2;
+  pointer_t dst_wptr_gray_q, dst_wptr_gray_q2;
+
+  always_ff @(posedge src_clk_i, negedge src_rst_ni) begin
+    if (!src_rst_ni) begin
+      src_rptr_gray_q  <= '0;
+      src_rptr_gray_q2 <= '0;
+    end else begin
+      src_rptr_gray_q  <= dst_rptr_gray_q;
+      src_rptr_gray_q2 <= src_rptr_gray_q;
+    end
+  end
+
+  always_ff @(posedge dst_clk_i, negedge dst_rst_ni) begin
+    if (!dst_rst_ni) begin
+      dst_wptr_gray_q  <= '0;
+      dst_wptr_gray_q2 <= '0;
+    end else begin
+      dst_wptr_gray_q  <= src_wptr_gray_q;
+      dst_wptr_gray_q2 <= dst_wptr_gray_q;
+    end
+  end
+
+  // Reverse the Gray coding of the synchronized pointers.
+  pointer_t src_rptr_bin, dst_wptr_bin;
+
+  gray_to_binary #(PTR_WIDTH) i_src_g2b (src_rptr_gray_q2, src_rptr_bin);
+  gray_to_binary #(PTR_WIDTH) i_dst_g2b (dst_wptr_gray_q2, dst_wptr_bin);
+
+  // The pointers into the FIFO are one bit wider than the actual address into
+  // the FIFO. This makes detecting critical states very simple: if all but the
+  // topmost bit of rptr and wptr agree, the FIFO is in a critical state. If the
+  // topmost bit is equal, the FIFO is empty, otherwise it is full.
+  assign src_ready_o = ((src_wptr_bin_q ^ src_rptr_bin) != PTR_FULL);
+  assign dst_valid_o = ((dst_rptr_bin_q ^ dst_wptr_bin) != PTR_EMPTY);
+
+  // Drive the FIFO write and read ports.
+  assign fifo_widx  = src_wptr_bin_q;
+  assign fifo_wdata = src_data_i;
+  assign fifo_write = src_valid_i && src_ready_o;
+  assign fifo_ridx  = dst_rptr_bin_q;
+  assign dst_data_o = fifo_rdata;
+
+endmodule
diff --git a/vendor/pulp-platform/fpnew/src/common_cells/src/cf_math_pkg.sv b/vendor/pulp-platform/fpnew/src/common_cells/src/cf_math_pkg.sv
new file mode 100644
index 0000000000..93e92b6d81
--- /dev/null
+++ b/vendor/pulp-platform/fpnew/src/common_cells/src/cf_math_pkg.sv
@@ -0,0 +1,49 @@
+// Copyright 2016 ETH Zurich and University of Bologna.
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the "License"); you may not use this file except in
+// compliance with the License.  You may obtain a copy of the License at
+// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+
+// cf_math_pkg: Constant Function Implementations of Mathematical Functions for HDL Elaboration
+//
+// This package contains a collection of mathematical functions that are commonly used when defining
+// the value of constants in HDL code.  These functions are implemented as Verilog constants
+// functions.  Introduced in Verilog 2001 (IEEE Std 1364-2001), a constant function (§ 10.3.5) is a
+// function whose value can be evaluated at compile time or during elaboration.  A constant function
+// must be called with arguments that are constants.
+
+package automatic cf_math_pkg;
+
+    // Ceiled Division of Two Natural Numbers
+    //
+    // Returns the quotient of two natural numbers, rounded towards plus infinity.
+    function integer ceil_div (input longint dividend, input longint divisor);
+        automatic longint remainder;
+
+        // pragma translate_off
+        `ifndef VERILATOR
+        if (dividend < 0) begin
+            $fatal(1, "Dividend %0d is not a natural number!", dividend);
+        end
+
+        if (divisor < 0) begin
+            $fatal(1, "Divisor %0d is not a natural number!", divisor);
+        end
+
+        if (divisor == 0) begin
+            $fatal(1, "Division by zero!");
+        end
+        `endif
+        // pragma translate_on
+
+        remainder = dividend;
+        for (ceil_div = 0; remainder > 0; ceil_div++) begin
+            remainder = remainder - divisor;
+        end
+    endfunction
+
+endpackage
diff --git a/vendor/pulp-platform/fpnew/src/common_cells/src/clk_div.sv b/vendor/pulp-platform/fpnew/src/common_cells/src/clk_div.sv
new file mode 100644
index 0000000000..70ed084990
--- /dev/null
+++ b/vendor/pulp-platform/fpnew/src/common_cells/src/clk_div.sv
@@ -0,0 +1,42 @@
+// Copyright 2018 ETH Zurich and University of Bologna.
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the "License"); you may not use this file except in
+// compliance with the License. You may obtain a copy of the License at
+// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+
+// Author: Florian Zaruba
+// Description: Divides the clock by an integer factor
+module clk_div #(
+    parameter int unsigned RATIO = 4
+)(
+    input  logic clk_i,      // Clock
+    input  logic rst_ni,     // Asynchronous reset active low
+    input  logic testmode_i, // testmode
+    input  logic en_i,       // enable clock divider
+    output logic clk_o       // divided clock out
+);
+    logic [RATIO-1:0] counter_q;
+    logic clk_q;
+
+    always_ff @(posedge clk_i or negedge rst_ni) begin
+        if (~rst_ni) begin
+            clk_q       <= 1'b0;
+            counter_q <= '0;
+        end else begin
+            clk_q <= 1'b0;
+            if (en_i) begin
+                if (counter_q == (RATIO[RATIO-1:0] - 1)) begin
+                    clk_q <= 1'b1;
+                end else begin
+                    counter_q <= counter_q + 1;
+                end
+            end
+        end
+    end
+    // output assignment - bypass in testmode
+    assign clk_o = testmode_i ? clk_i : clk_q;
+endmodule
\ No newline at end of file
diff --git a/vendor/pulp-platform/fpnew/src/common_cells/src/counter.sv b/vendor/pulp-platform/fpnew/src/common_cells/src/counter.sv
new file mode 100644
index 0000000000..ad5ee91b1b
--- /dev/null
+++ b/vendor/pulp-platform/fpnew/src/common_cells/src/counter.sv
@@ -0,0 +1,55 @@
+// Copyright 2018 ETH Zurich and University of Bologna.
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the "License"); you may not use this file except in
+// compliance with the License. You may obtain a copy of the License at
+// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+
+// Author: Florian Zaruba
+// Description: Generic up/down counter
+
+module counter #(
+    parameter int unsigned WIDTH = 4
+)(
+    input  logic             clk_i,
+    input  logic             rst_ni,
+    input  logic             clear_i, // synchronous clear
+    input  logic             en_i,    // enable the counter
+    input  logic             load_i,  // load a new value
+    input  logic             down_i,  // downcount, default is up
+    input  logic [WIDTH-1:0] d_i,
+    output logic [WIDTH-1:0] q_o,
+    output logic             overflow_o
+);
+    logic [WIDTH:0] counter_q, counter_d;
+    // counter overflowed if the MSB is set
+    assign overflow_o = counter_q[WIDTH];
+    assign q_o = counter_q[WIDTH-1:0];
+
+    always_comb begin
+        counter_d = counter_q;
+
+        if (clear_i) begin
+            counter_d = '0;
+        end else if (load_i) begin
+            counter_d = {1'b0, d_i};
+        end else if (en_i) begin
+            if (down_i) begin
+                counter_d = counter_q - 1;
+            end else begin
+                counter_d = counter_q + 1;
+            end
+        end
+    end
+
+    always_ff @(posedge clk_i or negedge rst_ni) begin
+        if (~rst_ni) begin
+           counter_q <= '0;
+        end else begin
+           counter_q <= counter_d;
+        end
+    end
+endmodule
diff --git a/vendor/pulp-platform/fpnew/src/common_cells/src/deprecated/clock_divider.sv b/vendor/pulp-platform/fpnew/src/common_cells/src/deprecated/clock_divider.sv
new file mode 100644
index 0000000000..343b0a2386
--- /dev/null
+++ b/vendor/pulp-platform/fpnew/src/common_cells/src/deprecated/clock_divider.sv
@@ -0,0 +1,191 @@
+// Copyright 2018 ETH Zurich and University of Bologna.
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the "License"); you may not use this file except in
+// compliance with the License.  You may obtain a copy of the License at
+// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+
+////////////////////////////////////////////////////////////////////////////////
+//                                                                            //
+// Company:        Multitherman Laboratory @ DEIS - University of Bologna     //
+//                    Viale Risorgimento 2 40136                              //
+//                    Bologna - fax 0512093785 -                              //
+//                                                                            //
+// Engineer:       Antonio Pullini - pullinia@iis.ee.ethz.ch                  //
+//                                                                            //
+// Additional contributions by:                                               //
+//                                                                            //
+//                                                                            //
+//                                                                            //
+// Create Date:    13/02/2013                                                 //
+// Design Name:    ULPSoC                                                     //
+// Module Name:    clock_divider                                              //
+// Project Name:   ULPSoC                                                     //
+// Language:       SystemVerilog                                              //
+//                                                                            //
+// Description:    Clock Divider                                              //
+//                                                                            //
+//                                                                            //
+// Revision:                                                                  //
+// Revision v0.1 - File Created                                               //
+// Revision v0.2 - (19/03/2015)   clock_gating swapped in pulp_clock_gating   //
+//                                                                            //
+//                                                                            //
+//                                                                            //
+//                                                                            //
+//                                                                            //
+//                                                                            //
+////////////////////////////////////////////////////////////////////////////////
+
+module clock_divider
+#(
+    parameter DIV_INIT     = 0,
+    parameter BYPASS_INIT  = 1
+)
+(
+    input  logic       clk_i,
+    input  logic       rstn_i,
+    input  logic       test_mode_i,
+    input  logic       clk_gate_async_i,
+    input  logic [7:0] clk_div_data_i,
+    input  logic       clk_div_valid_i,
+    output logic       clk_div_ack_o,
+    output logic       clk_o
+);
+
+   enum                logic [1:0] {IDLE, STOP, WAIT, RELEASE} state, state_next;
+
+   logic               s_clk_out;
+   logic               s_clock_enable;
+   logic               s_clock_enable_gate;
+   logic               s_clk_div_valid;
+
+   logic [7:0]         reg_clk_div;
+   logic               s_clk_div_valid_sync;
+
+   logic               s_rstn_sync;
+
+   logic [1:0]         reg_ext_gate_sync;
+
+    assign s_clock_enable_gate =  s_clock_enable & reg_ext_gate_sync;
+
+`ifndef PULP_FPGA_EMUL
+    rstgen i_rst_gen
+    (
+        // PAD FRAME SIGNALS
+        .clk_i(clk_i),
+        .rst_ni(rstn_i),            //async signal coming from pads
+
+        // TEST MODE
+        .test_mode_i(test_mode_i),
+
+        // OUTPUT RESET
+        .rst_no(s_rstn_sync),
+        .init_no()                 //not used
+    );
+  `else
+  assign s_rstn_sync = rstn_i;
+`endif
+
+
+    //handle the handshake with the soc_ctrl. Interface is now async
+    pulp_sync_wedge i_edge_prop
+    (
+        .clk_i(clk_i),
+        .rstn_i(s_rstn_sync),
+        .en_i(1'b1),
+        .serial_i(clk_div_valid_i),
+        .serial_o(clk_div_ack_o),
+        .r_edge_o(s_clk_div_valid_sync),
+        .f_edge_o()
+    );
+
+    clock_divider_counter
+    #(
+        .BYPASS_INIT(BYPASS_INIT),
+        .DIV_INIT(DIV_INIT)
+    )
+    i_clkdiv_cnt
+    (
+        .clk(clk_i),
+        .rstn(s_rstn_sync),
+        .test_mode(test_mode_i),
+        .clk_div(reg_clk_div),
+        .clk_div_valid(s_clk_div_valid),
+        .clk_out(s_clk_out)
+    );
+
+    pulp_clock_gating i_clk_gate
+    (
+        .clk_i(s_clk_out),
+        .en_i(s_clock_enable_gate),
+        .test_en_i(test_mode_i),
+        .clk_o(clk_o)
+    );
+
+    always_comb
+    begin
+        case(state)
+        IDLE:
+        begin
+            s_clock_enable   = 1'b1;
+            s_clk_div_valid  = 1'b0;
+            if (s_clk_div_valid_sync)
+                state_next = STOP;
+            else
+                state_next = IDLE;
+        end
+
+        STOP:
+        begin
+            s_clock_enable   = 1'b0;
+            s_clk_div_valid  = 1'b1;
+            state_next = WAIT;
+        end
+
+        WAIT:
+        begin
+            s_clock_enable   = 1'b0;
+            s_clk_div_valid  = 1'b0;
+            state_next = RELEASE;
+        end
+
+        RELEASE:
+        begin
+            s_clock_enable   = 1'b0;
+            s_clk_div_valid  = 1'b0;
+            state_next = IDLE;
+        end
+        endcase
+    end
+
+    always_ff @(posedge clk_i or negedge s_rstn_sync)
+    begin
+        if (!s_rstn_sync)
+            state <= IDLE;
+        else
+            state <= state_next;
+    end
+
+    //sample the data when valid has been sync and there is a rise edge
+    always_ff @(posedge clk_i or negedge s_rstn_sync)
+    begin
+        if (!s_rstn_sync)
+            reg_clk_div <= '0;
+        else if (s_clk_div_valid_sync)
+                  reg_clk_div <= clk_div_data_i;
+    end
+
+    //sample the data when valid has been sync and there is a rise edge
+    always_ff @(posedge clk_i or negedge s_rstn_sync)
+    begin
+        if (!s_rstn_sync)
+            reg_ext_gate_sync <= 2'b00;
+        else
+            reg_ext_gate_sync <= {clk_gate_async_i, reg_ext_gate_sync[1]};
+    end
+
+endmodule
diff --git a/vendor/pulp-platform/fpnew/src/common_cells/src/deprecated/clock_divider_counter.sv b/vendor/pulp-platform/fpnew/src/common_cells/src/deprecated/clock_divider_counter.sv
new file mode 100644
index 0000000000..e5c222af95
--- /dev/null
+++ b/vendor/pulp-platform/fpnew/src/common_cells/src/deprecated/clock_divider_counter.sv
@@ -0,0 +1,211 @@
+// Copyright 2018 ETH Zurich and University of Bologna.
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the "License"); you may not use this file except in
+// compliance with the License.  You may obtain a copy of the License at
+// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+
+////////////////////////////////////////////////////////////////////////////////
+// Company:        Multitherman Laboratory @ DEIS - University of Bologna     //
+//                    Viale Risorgimento 2 40136                              //
+//                    Bologna - fax 0512093785 -                              //
+//                                                                            //
+// Engineer:       Antonio Pullini - pullinia@iis.ee.ethz.ch                  //
+//                                                                            //
+// Additional contributions by:                                               //
+//                                                                            //
+//                                                                            //
+//                                                                            //
+// Create Date:    13/02/2013                                                 //
+// Design Name:    ULPSoC                                                     //
+// Module Name:    clock_divider_counter                                      //
+// Project Name:   ULPSoC                                                     //
+// Language:       SystemVerilog                                              //
+//                                                                            //
+// Description:    clock_divider_counter                                      //
+//                                                                            //
+//                                                                            //
+// Revision:                                                                  //
+// Revision v0.1 - File Created                                               //
+// Revision v0.2 - (19/03/2015)   clock_gating swapped in pulp_clock_gating   //
+//                                                                            //
+//                                                                            //
+//                                                                            //
+//                                                                            //
+//                                                                            //
+//                                                                            //
+////////////////////////////////////////////////////////////////////////////////
+
+
+module clock_divider_counter
+#(
+    parameter BYPASS_INIT = 1,
+    parameter DIV_INIT    = 'hFF
+)
+(
+    input  logic       clk,
+    input  logic       rstn,
+    input  logic       test_mode,
+    input  logic [7:0] clk_div,
+    input  logic       clk_div_valid,
+    output logic       clk_out
+);
+
+    logic [7:0]         counter;
+    logic [7:0]         counter_next;
+    logic [7:0]         clk_cnt;
+    logic               en1;
+    logic               en2;
+
+    logic               is_odd;
+
+    logic               div1;
+    logic               div2;
+    logic               div2_neg_sync;
+
+    logic [7:0]         clk_cnt_odd;
+    logic [7:0]         clk_cnt_odd_incr;
+    logic [7:0]         clk_cnt_even;
+    logic [7:0]         clk_cnt_en2;
+
+    logic               bypass;
+
+    logic               clk_out_gen;
+    logic               clk_div_valid_reg;
+
+    logic               clk_inv_test;
+    logic               clk_inv;
+
+    //        assign clk_cnt_odd_incr = clk_div + 1;
+    //        assign clk_cnt_odd  = {1'b0,clk_cnt_odd_incr[7:1]}; //if odd divider than clk_cnt = (clk_div+1)/2
+    assign clk_cnt_odd  = clk_div - 8'h1; //if odd divider than clk_cnt = clk_div - 1
+    assign clk_cnt_even = (clk_div == 8'h2) ? 8'h0 : ({1'b0,clk_div[7:1]} - 8'h1);   //if even divider than clk_cnt = clk_div/2
+    assign clk_cnt_en2  = {1'b0,clk_cnt[7:1]} + 8'h1;
+
+    always_comb
+    begin
+        if (counter == 'h0)
+            en1 = 1'b1;
+        else
+            en1 = 1'b0;
+
+        if (clk_div_valid)
+            counter_next = 'h0;
+        else if (counter == clk_cnt)
+                counter_next = 'h0;
+             else
+                counter_next = counter + 1;
+
+        if (clk_div_valid)
+            en2 = 1'b0;
+        else if (counter == clk_cnt_en2)
+                en2 = 1'b1;
+             else
+                en2 = 1'b0;
+    end
+
+   always_ff @(posedge clk, negedge rstn)
+   begin
+        if (~rstn)
+        begin
+             counter            <=  'h0;
+             div1               <= 1'b0;
+             bypass             <= BYPASS_INIT;
+             clk_cnt            <= DIV_INIT;
+             is_odd             <= 1'b0;
+             clk_div_valid_reg  <= 1'b0;
+        end
+        else
+        begin
+              if (!bypass)
+                  counter <= counter_next;
+
+              clk_div_valid_reg <= clk_div_valid;
+              if (clk_div_valid)
+              begin
+                if ((clk_div == 8'h0) || (clk_div == 8'h1))
+                  begin
+                      bypass <= 1'b1;
+                      clk_cnt <= 'h0;
+                      is_odd  <= 1'b0;
+                  end
+                else
+                  begin
+                      bypass <= 1'b0;
+                      if (clk_div[0])
+                        begin
+                          is_odd  <= 1'b1;
+                          clk_cnt <= clk_cnt_odd;
+                        end
+                      else
+                        begin
+                          is_odd  <= 1'b0;
+                          clk_cnt <= clk_cnt_even;
+                        end
+                  end
+                div1 <= 1'b0;
+              end
+              else
+              begin
+                if (en1 && !bypass)
+                  div1 <= ~div1;
+              end
+        end
+    end
+
+    pulp_clock_inverter clk_inv_i
+    (
+        .clk_i(clk),
+        .clk_o(clk_inv)
+    );
+
+`ifndef PULP_FPGA_EMUL
+ `ifdef PULP_DFT
+   pulp_clock_mux2 clk_muxinv_i
+     (
+      .clk0_i(clk_inv),
+      .clk1_i(clk),
+      .clk_sel_i(test_mode),
+      .clk_o(clk_inv_test)
+      );
+ `else
+   assign clk_inv_test = clk_inv;
+ `endif
+`else
+   assign clk_inv_test = clk_inv;
+`endif
+
+    always_ff @(posedge clk_inv_test or negedge rstn)
+    begin
+        if (!rstn)
+        begin
+            div2    <= 1'b0;
+        end
+        else
+        begin
+            if (clk_div_valid_reg)
+                div2 <= 1'b0;
+            else if (en2 && is_odd && !bypass)
+                    div2 <= ~div2;
+        end
+    end // always_ff @ (posedge clk_inv_test or negedge rstn)
+
+    pulp_clock_xor2 clock_xor_i
+    (
+        .clk_o(clk_out_gen),
+        .clk0_i(div1),
+        .clk1_i(div2)
+    );
+
+    pulp_clock_mux2 clk_mux_i
+    (
+        .clk0_i(clk_out_gen),
+        .clk1_i(clk),
+        .clk_sel_i(bypass || test_mode),
+        .clk_o(clk_out)
+    );
+
+endmodule
diff --git a/vendor/pulp-platform/fpnew/src/common_cells/src/deprecated/fifo_v1.sv b/vendor/pulp-platform/fpnew/src/common_cells/src/deprecated/fifo_v1.sv
new file mode 100644
index 0000000000..31295e80ec
--- /dev/null
+++ b/vendor/pulp-platform/fpnew/src/common_cells/src/deprecated/fifo_v1.sv
@@ -0,0 +1,57 @@
+// Copyright 2018 ETH Zurich and University of Bologna.
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the "License"); you may not use this file except in
+// compliance with the License. You may obtain a copy of the License at
+// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+
+// Author: Florian Zaruba <zarubaf@iis.ee.ethz.ch>
+
+/* verilator lint_off DECLFILENAME */
+module fifo #(
+    parameter bit          FALL_THROUGH = 1'b0, // fifo is in fall-through mode
+    parameter int unsigned DATA_WIDTH   = 32,   // default data width if the fifo is of type logic
+    parameter int unsigned DEPTH        = 8,    // depth can be arbitrary from 0 to 2**32
+    parameter int unsigned THRESHOLD    = 1,    // fill count until when to assert threshold_o
+    parameter type dtype                = logic [DATA_WIDTH-1:0]
+)(
+    input  logic  clk_i,            // Clock
+    input  logic  rst_ni,           // Asynchronous reset active low
+    input  logic  flush_i,          // flush the queue
+    input  logic  testmode_i,       // test_mode to bypass clock gating
+    // status flags
+    output logic  full_o,           // queue is full
+    output logic  empty_o,          // queue is empty
+    output logic  threshold_o,      // the FIFO is above the specified threshold
+    // as long as the queue is not full we can push new data
+    input  dtype  data_i,           // data to push into the queue
+    input  logic  push_i,           // data is valid and can be pushed to the queue
+    // as long as the queue is not empty we can pop new elements
+    output dtype  data_o,           // output data
+    input  logic  pop_i             // pop head from queue
+);
+    fifo_v2 #(
+        .FALL_THROUGH ( FALL_THROUGH ),
+        .DATA_WIDTH   ( DATA_WIDTH   ),
+        .DEPTH        ( DEPTH        ),
+        .ALM_FULL_TH  ( THRESHOLD    ),
+        .dtype        ( dtype        )
+    ) impl (
+        .clk_i       ( clk_i       ),
+        .rst_ni      ( rst_ni      ),
+        .flush_i     ( flush_i     ),
+        .testmode_i  ( testmode_i  ),
+        .full_o      ( full_o      ),
+        .empty_o     ( empty_o     ),
+        .alm_full_o  ( threshold_o ),
+        .alm_empty_o (             ),
+        .data_i      ( data_i      ),
+        .push_i      ( push_i      ),
+        .data_o      ( data_o      ),
+        .pop_i       ( pop_i       )
+    );
+endmodule
+/* verilator lint_on DECLFILENAME */
diff --git a/vendor/pulp-platform/fpnew/src/common_cells/src/deprecated/fifo_v2.sv b/vendor/pulp-platform/fpnew/src/common_cells/src/deprecated/fifo_v2.sv
new file mode 100644
index 0000000000..9c87ed9692
--- /dev/null
+++ b/vendor/pulp-platform/fpnew/src/common_cells/src/deprecated/fifo_v2.sv
@@ -0,0 +1,79 @@
+// Copyright 2018 ETH Zurich and University of Bologna.
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the "License"); you may not use this file except in
+// compliance with the License. You may obtain a copy of the License at
+// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+
+// Author: Florian Zaruba <zarubaf@iis.ee.ethz.ch>
+
+module fifo_v2 #(
+    parameter bit          FALL_THROUGH = 1'b0, // fifo is in fall-through mode
+    parameter int unsigned DATA_WIDTH   = 32,   // default data width if the fifo is of type logic
+    parameter int unsigned DEPTH        = 8,    // depth can be arbitrary from 0 to 2**32
+    parameter int unsigned ALM_EMPTY_TH = 1,    // almost empty threshold (when to assert alm_empty_o)
+    parameter int unsigned ALM_FULL_TH  = 1,    // almost full threshold (when to assert alm_full_o)
+    parameter type dtype                = logic [DATA_WIDTH-1:0],
+    // DO NOT OVERWRITE THIS PARAMETER
+    parameter int unsigned ADDR_DEPTH   = (DEPTH > 1) ? $clog2(DEPTH) : 1
+)(
+    input  logic  clk_i,            // Clock
+    input  logic  rst_ni,           // Asynchronous reset active low
+    input  logic  flush_i,          // flush the queue
+    input  logic  testmode_i,       // test_mode to bypass clock gating
+    // status flags
+    output logic  full_o,           // queue is full
+    output logic  empty_o,          // queue is empty
+    output logic  alm_full_o,       // FIFO fillstate >= the specified threshold
+    output logic  alm_empty_o,      // FIFO fillstate <= the specified threshold
+    // as long as the queue is not full we can push new data
+    input  dtype  data_i,           // data to push into the queue
+    input  logic  push_i,           // data is valid and can be pushed to the queue
+    // as long as the queue is not empty we can pop new elements
+    output dtype  data_o,           // output data
+    input  logic  pop_i             // pop head from queue
+);
+
+    logic [ADDR_DEPTH-1:0] usage;
+
+    // generate threshold parameters
+    if (DEPTH == 0) begin
+        assign alm_full_o  = 1'b0; // that signal does not make any sense in a FIFO of depth 0
+        assign alm_empty_o = 1'b0; // that signal does not make any sense in a FIFO of depth 0
+    end else begin
+        assign alm_full_o   = (usage >= ALM_FULL_TH[ADDR_DEPTH-1:0]);
+        assign alm_empty_o  = (usage <= ALM_EMPTY_TH[ADDR_DEPTH-1:0]);
+    end
+
+    fifo_v3 #(
+        .FALL_THROUGH ( FALL_THROUGH ),
+        .DATA_WIDTH   ( DATA_WIDTH   ),
+        .DEPTH        ( DEPTH        ),
+        .dtype        ( dtype        )
+    ) i_fifo_v3 (
+        .clk_i,
+        .rst_ni,
+        .flush_i,
+        .testmode_i,
+        .full_o,
+        .empty_o,
+        .usage_o (usage),
+        .data_i,
+        .push_i,
+        .data_o,
+        .pop_i
+    );
+
+    // pragma translate_off
+    `ifndef VERILATOR
+        initial begin
+            assert (ALM_FULL_TH <= DEPTH)  else $error("ALM_FULL_TH can't be larger than the DEPTH.");
+            assert (ALM_EMPTY_TH <= DEPTH) else $error("ALM_EMPTY_TH can't be larger than the DEPTH.");
+        end
+    `endif
+    // pragma translate_on
+
+endmodule // fifo_v2
diff --git a/vendor/pulp-platform/fpnew/src/common_cells/src/deprecated/find_first_one.sv b/vendor/pulp-platform/fpnew/src/common_cells/src/deprecated/find_first_one.sv
new file mode 100644
index 0000000000..ee3ba20f70
--- /dev/null
+++ b/vendor/pulp-platform/fpnew/src/common_cells/src/deprecated/find_first_one.sv
@@ -0,0 +1,83 @@
+// Copyright 2018 ETH Zurich and University of Bologna.
+//
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the "License"); you may not use this file except in
+// compliance with the License. You may obtain a copy of the License at
+// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+
+// Deprecated, use lzc unit instead.
+
+/// A leading-one finder / leading zero counter.
+/// Set FLIP to 0 for find_first_one => first_one_o is the index of the first one (from the LSB)
+/// Set FLIP to 1 for leading zero counter => first_one_o is the number of leading zeroes (from the MSB)
+module find_first_one #(
+    /// The width of the input vector.
+    parameter int WIDTH = -1,
+    parameter int FLIP = 0
+)(
+    input  logic [WIDTH-1:0]         in_i,
+    output logic [$clog2(WIDTH)-1:0] first_one_o,
+    output logic                     no_ones_o
+);
+
+    localparam int NUM_LEVELS = $clog2(WIDTH);
+
+    // pragma translate_off
+    initial begin
+        assert(WIDTH >= 0);
+    end
+    // pragma translate_on
+
+    logic [WIDTH-1:0][NUM_LEVELS-1:0]          index_lut;
+    logic [2**NUM_LEVELS-1:0]                  sel_nodes;
+    logic [2**NUM_LEVELS-1:0][NUM_LEVELS-1:0]  index_nodes;
+
+    logic [WIDTH-1:0] in_tmp;
+
+    for (genvar i = 0; i < WIDTH; i++) begin
+        assign in_tmp[i] = FLIP ? in_i[WIDTH-1-i] : in_i[i];
+    end
+
+    for (genvar j = 0; j < WIDTH; j++) begin
+        assign index_lut[j] = j;
+    end
+
+    for (genvar level = 0; level < NUM_LEVELS; level++) begin
+
+        if (level < NUM_LEVELS-1) begin
+            for (genvar l = 0; l < 2**level; l++) begin
+                assign sel_nodes[2**level-1+l]   = sel_nodes[2**(level+1)-1+l*2] | sel_nodes[2**(level+1)-1+l*2+1];
+                assign index_nodes[2**level-1+l] = (sel_nodes[2**(level+1)-1+l*2] == 1'b1) ?
+                    index_nodes[2**(level+1)-1+l*2] : index_nodes[2**(level+1)-1+l*2+1];
+            end
+        end
+
+        if (level == NUM_LEVELS-1) begin
+            for (genvar k = 0; k < 2**level; k++) begin
+                // if two successive indices are still in the vector...
+                if (k * 2 < WIDTH-1) begin
+                    assign sel_nodes[2**level-1+k]   = in_tmp[k*2] | in_tmp[k*2+1];
+                    assign index_nodes[2**level-1+k] = (in_tmp[k*2] == 1'b1) ? index_lut[k*2] : index_lut[k*2+1];
+                end
+                // if only the first index is still in the vector...
+                if (k * 2 == WIDTH-1) begin
+                    assign sel_nodes[2**level-1+k]   = in_tmp[k*2];
+                    assign index_nodes[2**level-1+k] = index_lut[k*2];
+                end
+                // if index is out of range
+                if (k * 2 > WIDTH-1) begin
+                    assign sel_nodes[2**level-1+k]   = 1'b0;
+                    assign index_nodes[2**level-1+k] = '0;
+                end
+            end
+        end
+    end
+
+    assign first_one_o = NUM_LEVELS > 0 ? index_nodes[0] : '0;
+    assign no_ones_o   = NUM_LEVELS > 0 ? ~sel_nodes[0]  : '1;
+
+endmodule
diff --git a/vendor/pulp-platform/fpnew/src/common_cells/src/deprecated/generic_LFSR_8bit.sv b/vendor/pulp-platform/fpnew/src/common_cells/src/deprecated/generic_LFSR_8bit.sv
new file mode 100644
index 0000000000..fb0080accf
--- /dev/null
+++ b/vendor/pulp-platform/fpnew/src/common_cells/src/deprecated/generic_LFSR_8bit.sv
@@ -0,0 +1,64 @@
+// Copyright 2018 ETH Zurich and University of Bologna.
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the "License"); you may not use this file except in
+// compliance with the License. You may obtain a copy of the License at
+// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+
+// Igor Loi <igor.loi@unibo.it>
+
+module generic_LFSR_8bit
+  #(
+    parameter OH_WIDTH      = 4,
+    parameter BIN_WIDTH     = $clog2(OH_WIDTH),
+    parameter SEED          = 8'b00000000
+    ) 
+   (
+    output logic [OH_WIDTH-1:0]    data_OH_o,   // One hot encoding
+    output logic [BIN_WIDTH-1:0]   data_BIN_o,  // Binary encoding
+    input  logic                   enable_i,        //
+    input  logic                   clk,             //
+    input  logic                   rst_n            //
+    );
+   
+   logic [7:0] 			   out;
+   logic                           linear_feedback;
+   logic [BIN_WIDTH-1:0] 	   temp_ref_way;
+   
+   
+   //-------------Code Starts Here-------
+   assign linear_feedback = !(out[7] ^ out[3] ^ out[2] ^ out[1]); // TAPS for XOR feedback
+   
+   assign data_BIN_o = temp_ref_way;
+   
+   always_ff @(posedge clk, negedge rst_n)
+     begin
+	if (rst_n == 1'b0)
+	  begin
+	     out <= SEED ;
+	  end 
+	else if (enable_i) 
+          begin
+             out <= {out[6],out[5],out[4],out[3],out[2],out[1],out[0], linear_feedback};
+          end 
+     end
+   
+   generate
+      
+      if(OH_WIDTH == 2)
+	assign temp_ref_way = out[1];
+      else
+	assign temp_ref_way = out[BIN_WIDTH:1];
+   endgenerate
+   
+   // Bin to One Hot Encoder
+   always_comb
+     begin
+	data_OH_o = '0;
+	data_OH_o[temp_ref_way] = 1'b1;
+     end
+   
+endmodule
diff --git a/vendor/pulp-platform/fpnew/src/common_cells/src/deprecated/generic_fifo.sv b/vendor/pulp-platform/fpnew/src/common_cells/src/deprecated/generic_fifo.sv
new file mode 100644
index 0000000000..ece4aac78e
--- /dev/null
+++ b/vendor/pulp-platform/fpnew/src/common_cells/src/deprecated/generic_fifo.sv
@@ -0,0 +1,274 @@
+// Copyright 2018 ETH Zurich and University of Bologna.
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the "License"); you may not use this file except in
+// compliance with the License. You may obtain a copy of the License at
+// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+
+// ============================================================================= //
+// Company:        Multitherman Laboratory @ DEIS - University of Bologna        //
+//                    Viale Risorgimento 2 40136                                 //
+//                    Bologna - fax 0512093785 -                                 //
+//                                                                               //
+// Engineer:       Igor Loi - igor.loi@unibo.it                                  //
+//                                                                               //
+//                                                                               //
+// Additional contributions by:                                                  //
+//                                                                               //
+//                                                                               //
+//                                                                               //
+// Create Date:    01/02/2014                                                    //
+// Design Name:    MISC                                                          //
+// Module Name:    generic_fifo                                                  //
+// Project Name:   PULP                                                          //
+// Language:       SystemVerilog                                                 //
+//                                                                               //
+// Description:   A simple FIFO used in the D_address_decoder, and D_allocator   //
+//                to store the destinations ports                                //
+//                                                                               //
+// Revision:                                                                     //
+// Revision v0.1 - 01/02/2014 : File Created                                     //
+// Revision v0.2 - 02/09/2015 : Updated with a global CG cell                    //
+//                                                                               //
+// ============================================================================= //
+
+module generic_fifo
+#(
+   parameter int unsigned          DATA_WIDTH = 32,
+   parameter int unsigned          DATA_DEPTH = 8
+)
+(
+   input  logic                                    clk,
+   input  logic                                    rst_n,
+   //PUSH SIDE
+   input  logic [DATA_WIDTH-1:0]                   data_i,
+   input  logic                                    valid_i,
+   output logic                                    grant_o,
+   //POP SIDE
+   output logic [DATA_WIDTH-1:0]                   data_o,
+   output logic                                    valid_o,
+   input  logic                                    grant_i,
+
+   input  logic                                    test_mode_i
+);
+
+
+   // Local Parameter
+   localparam int unsigned ADDR_DEPTH = $clog2(DATA_DEPTH);
+   enum logic [1:0] { EMPTY, FULL, MIDDLE } CS, NS;
+   // Internal Signals
+
+   logic       gate_clock;
+   logic       clk_gated;
+
+   logic [ADDR_DEPTH-1:0]  Pop_Pointer_CS,  Pop_Pointer_NS;
+   logic [ADDR_DEPTH-1:0]  Push_Pointer_CS, Push_Pointer_NS;
+   logic [DATA_WIDTH-1:0]  FIFO_REGISTERS[DATA_DEPTH-1:0];
+   int unsigned            i;
+
+   // Parameter Check
+   // synopsys translate_off
+   initial begin : parameter_check
+      integer param_err_flg;
+      param_err_flg = 0;
+
+      if (DATA_WIDTH < 1) begin
+         param_err_flg = 1;
+         $display("ERROR: %m :\n  Invalid value (%d) for parameter DATA_WIDTH (legal range: greater than 1)", DATA_WIDTH );
+      end
+
+      if (DATA_DEPTH < 1) begin
+         param_err_flg = 1;
+         $display("ERROR: %m :\n  Invalid value (%d) for parameter DATA_DEPTH (legal range: greater than 1)", DATA_DEPTH );
+      end
+   end
+   // synopsys translate_on
+
+`ifndef PULP_FPGA_EMUL
+   cluster_clock_gating cg_cell
+   (
+     .clk_i     ( clk         ),
+     .en_i      (~gate_clock  ),
+     .test_en_i ( test_mode_i ),
+     .clk_o     ( clk_gated   )
+   );
+`else
+   assign clk_gated = clk;
+`endif
+
+   // UPDATE THE STATE
+   always_ff @(posedge clk, negedge rst_n)
+   begin
+       if(rst_n == 1'b0)
+       begin
+               CS              <= EMPTY;
+               Pop_Pointer_CS  <= {ADDR_DEPTH {1'b0}};
+               Push_Pointer_CS <= {ADDR_DEPTH {1'b0}};
+       end
+       else
+       begin
+               CS              <= NS;
+               Pop_Pointer_CS  <= Pop_Pointer_NS;
+               Push_Pointer_CS <= Push_Pointer_NS;
+       end
+   end
+
+
+   // Compute Next State
+   always_comb
+   begin
+      gate_clock      = 1'b0;
+
+      case(CS)
+
+      EMPTY:
+      begin
+          grant_o = 1'b1;
+          valid_o = 1'b0;
+
+          case(valid_i)
+          1'b0 :
+          begin
+                  NS              = EMPTY;
+                  Push_Pointer_NS = Push_Pointer_CS;
+                  Pop_Pointer_NS  = Pop_Pointer_CS;
+                  gate_clock      = 1'b1;
+          end
+
+          1'b1:
+          begin
+                  NS              = MIDDLE;
+                  Push_Pointer_NS = Push_Pointer_CS + 1'b1;
+                  Pop_Pointer_NS  = Pop_Pointer_CS;
+          end
+
+          endcase
+      end//~EMPTY
+
+      MIDDLE:
+      begin
+          grant_o = 1'b1;
+          valid_o = 1'b1;
+
+          case({valid_i,grant_i})
+
+          2'b01:
+          begin
+                  gate_clock      = 1'b1;
+
+                  if((Pop_Pointer_CS == Push_Pointer_CS -1 ) || ((Pop_Pointer_CS == DATA_DEPTH-1) && (Push_Pointer_CS == 0) ))
+                          NS              = EMPTY;
+                  else
+                          NS              = MIDDLE;
+
+                  Push_Pointer_NS = Push_Pointer_CS;
+
+                  if(Pop_Pointer_CS == DATA_DEPTH-1)
+                          Pop_Pointer_NS  = 0;
+                  else
+                          Pop_Pointer_NS  = Pop_Pointer_CS + 1'b1;
+          end
+
+          2'b00 :
+          begin
+                  gate_clock      = 1'b1;
+                  NS              = MIDDLE;
+                  Push_Pointer_NS = Push_Pointer_CS;
+                  Pop_Pointer_NS  = Pop_Pointer_CS;
+          end
+
+          2'b11:
+          begin
+                  NS              = MIDDLE;
+
+                  if(Push_Pointer_CS == DATA_DEPTH-1)
+                          Push_Pointer_NS = 0;
+                  else
+                          Push_Pointer_NS = Push_Pointer_CS + 1'b1;
+
+                  if(Pop_Pointer_CS == DATA_DEPTH-1)
+                          Pop_Pointer_NS  = 0;
+                  else
+                          Pop_Pointer_NS  = Pop_Pointer_CS  + 1'b1;
+          end
+
+          2'b10:
+          begin
+                  if(( Push_Pointer_CS == Pop_Pointer_CS - 1) || ( (Push_Pointer_CS == DATA_DEPTH-1) && (Pop_Pointer_CS == 0) ))
+                          NS              = FULL;
+                  else
+                          NS        = MIDDLE;
+
+                  if(Push_Pointer_CS == DATA_DEPTH - 1)
+                          Push_Pointer_NS = 0;
+                  else
+                          Push_Pointer_NS = Push_Pointer_CS + 1'b1;
+
+                  Pop_Pointer_NS  = Pop_Pointer_CS;
+          end
+
+          endcase
+      end
+
+      FULL:
+      begin
+          grant_o     = 1'b0;
+          valid_o     = 1'b1;
+          gate_clock  = 1'b1;
+
+          case(grant_i)
+          1'b1:
+          begin
+                  NS              = MIDDLE;
+
+                  Push_Pointer_NS = Push_Pointer_CS;
+
+                  if(Pop_Pointer_CS == DATA_DEPTH-1)
+                          Pop_Pointer_NS  = 0;
+                  else
+                          Pop_Pointer_NS  = Pop_Pointer_CS  + 1'b1;
+          end
+
+          1'b0:
+          begin
+                  NS              = FULL;
+                  Push_Pointer_NS = Push_Pointer_CS;
+                  Pop_Pointer_NS  = Pop_Pointer_CS;
+          end
+          endcase
+
+      end // end of FULL
+
+      default :
+      begin
+          gate_clock      = 1'b1;
+          grant_o         = 1'b0;
+          valid_o         = 1'b0;
+          NS              = EMPTY;
+          Pop_Pointer_NS  = 0;
+          Push_Pointer_NS = 0;
+      end
+
+      endcase
+   end
+
+   always_ff @(posedge clk_gated, negedge rst_n)
+   begin
+      if(rst_n == 1'b0)
+      begin
+      for (i=0; i< DATA_DEPTH; i++)
+         FIFO_REGISTERS[i] <= {DATA_WIDTH {1'b0}};
+      end
+      else
+      begin
+         if((grant_o == 1'b1) && (valid_i == 1'b1))
+            FIFO_REGISTERS[Push_Pointer_CS] <= data_i;
+      end
+   end
+
+   assign data_o = FIFO_REGISTERS[Pop_Pointer_CS];
+
+endmodule // generic_fifo
diff --git a/vendor/pulp-platform/fpnew/src/common_cells/src/deprecated/generic_fifo_adv.sv b/vendor/pulp-platform/fpnew/src/common_cells/src/deprecated/generic_fifo_adv.sv
new file mode 100644
index 0000000000..df6cc0d796
--- /dev/null
+++ b/vendor/pulp-platform/fpnew/src/common_cells/src/deprecated/generic_fifo_adv.sv
@@ -0,0 +1,264 @@
+// Copyright 2018 ETH Zurich and University of Bologna.
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the "License"); you may not use this file except in
+// compliance with the License. You may obtain a copy of the License at
+// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+
+// Igor Loi <igor.loi@unibo.it>
+
+module generic_fifo_adv 
+#( 
+   parameter int unsigned          DATA_WIDTH = 32,
+   parameter int unsigned          DATA_DEPTH = 8
+   )
+   (
+    input  logic                                    clk,
+    input  logic                                    rst_n,
+    input  logic                                    clear_i,
+    
+    //PUSH SIDE
+    input  logic [DATA_WIDTH-1:0]                   data_i,
+    input  logic                                    valid_i,
+    output logic                                    grant_o,
+    
+    //POP SIDE
+    output logic [DATA_WIDTH-1:0]                   data_o,
+    output logic                                    valid_o,
+    input  logic                                    grant_i,
+    
+    input  logic                                    test_mode_i
+    );
+   
+   
+   // Local Parameter
+   localparam  int 				    unsigned ADDR_DEPTH = $clog2(DATA_DEPTH);
+   enum 					    logic [1:0] { EMPTY, FULL, MIDDLE } CS, NS;
+   // Internal Signals
+   
+   logic 					    gate_clock;
+   logic 					    clk_gated;
+   
+   logic [ADDR_DEPTH-1:0] 			    Pop_Pointer_CS,  Pop_Pointer_NS;
+   logic [ADDR_DEPTH-1:0] 			    Push_Pointer_CS, Push_Pointer_NS;
+   logic [DATA_WIDTH-1:0] 			    FIFO_REGISTERS[DATA_DEPTH-1:0];
+   int 						    unsigned                    i;
+   
+   // Parameter Check
+   // synopsys translate_off
+   initial
+   begin : parameter_check
+      integer param_err_flg;
+      param_err_flg = 0;
+      
+      if (DATA_WIDTH < 1)
+	begin
+           param_err_flg = 1;
+           $display("ERROR: %m :\n  Invalid value (%d) for parameter DATA_WIDTH (legal range: greater than 1)", DATA_WIDTH );
+	end
+      
+      if (DATA_DEPTH < 1)
+	begin
+           param_err_flg = 1;
+           $display("ERROR: %m :\n  Invalid value (%d) for parameter DATA_DEPTH (legal range: greater than 1)", DATA_DEPTH );
+	end                   
+   end
+   // synopsys translate_on
+   
+`ifndef PULP_FPGA_EMUL
+   cluster_clock_gating cg_cell
+     (
+      .clk_i     ( clk         ),
+      .en_i      (~gate_clock  ),
+      .test_en_i ( test_mode_i ),
+      .clk_o     ( clk_gated   )
+      );
+`else
+   assign clk_gated = clk;
+`endif
+   
+   // UPDATE THE STATE
+   always_ff @(posedge clk, negedge rst_n)
+     begin
+	if(rst_n == 1'b0)
+	  begin
+             CS              <= EMPTY;
+             Pop_Pointer_CS  <= {ADDR_DEPTH {1'b0}};
+             Push_Pointer_CS <= {ADDR_DEPTH {1'b0}};
+	  end
+	else
+	  begin
+             if(clear_i)
+               begin
+		  CS              <= EMPTY;
+		  Pop_Pointer_CS  <= {ADDR_DEPTH {1'b0}};
+		  Push_Pointer_CS <= {ADDR_DEPTH {1'b0}};
+               end
+             else
+               begin
+		  CS              <= NS;
+		  Pop_Pointer_CS  <= Pop_Pointer_NS;
+		  Push_Pointer_CS <= Push_Pointer_NS;
+               end
+	  end
+     end
+   
+   
+   // Compute Next State
+   always_comb
+     begin
+	gate_clock      = 1'b0;
+	
+	case(CS)
+	  
+	  EMPTY:
+	    begin
+               grant_o = 1'b1;
+               valid_o = 1'b0;
+	       
+               case(valid_i)
+		 1'b0 : 
+		   begin 
+                      NS              = EMPTY;
+                      Push_Pointer_NS = Push_Pointer_CS;
+                      Pop_Pointer_NS  = Pop_Pointer_CS;
+                      gate_clock      = 1'b1;
+		   end
+		 
+		 1'b1: 
+		   begin 
+                      NS              = MIDDLE;
+                      Push_Pointer_NS = Push_Pointer_CS + 1'b1;
+                      Pop_Pointer_NS  = Pop_Pointer_CS;
+		   end
+		 
+               endcase
+	    end//~EMPTY
+	  
+	  MIDDLE:
+	    begin
+               grant_o = 1'b1;
+               valid_o = 1'b1;
+	       
+               case({valid_i,grant_i})
+		 
+		 2'b01:
+		   begin
+                      gate_clock      = 1'b1;
+		      
+                      if((Pop_Pointer_CS == Push_Pointer_CS -1 ) || ((Pop_Pointer_CS == DATA_DEPTH-1) && (Push_Pointer_CS == 0) ))
+                        NS              = EMPTY;
+                      else
+                        NS              = MIDDLE;
+		      
+                      Push_Pointer_NS = Push_Pointer_CS;
+		      
+                      if(Pop_Pointer_CS == DATA_DEPTH-1)
+                        Pop_Pointer_NS  = 0;
+                      else
+                        Pop_Pointer_NS  = Pop_Pointer_CS + 1'b1;
+		   end
+		 
+		 2'b00 : 
+		   begin
+                      gate_clock      = 1'b1; 
+                      NS              = MIDDLE;
+                      Push_Pointer_NS = Push_Pointer_CS;
+                      Pop_Pointer_NS  = Pop_Pointer_CS;
+		   end
+		 
+		 2'b11: 
+		   begin
+                      NS              = MIDDLE;
+		      
+                      if(Push_Pointer_CS == DATA_DEPTH-1)
+                        Push_Pointer_NS = 0;
+                      else
+                        Push_Pointer_NS = Push_Pointer_CS + 1'b1;
+		      
+                      if(Pop_Pointer_CS == DATA_DEPTH-1)
+                        Pop_Pointer_NS  = 0;
+                      else
+                        Pop_Pointer_NS  = Pop_Pointer_CS  + 1'b1;
+		   end
+		 
+		 2'b10:
+		   begin 
+                      if(( Push_Pointer_CS == Pop_Pointer_CS - 1) || ( (Push_Pointer_CS == DATA_DEPTH-1) && (Pop_Pointer_CS == 0) ))
+                        NS              = FULL;
+                      else
+                        NS        = MIDDLE;
+		      
+                      if(Push_Pointer_CS == DATA_DEPTH - 1)
+                        Push_Pointer_NS = 0;
+                      else
+                        Push_Pointer_NS = Push_Pointer_CS + 1'b1;
+		      
+                      Pop_Pointer_NS  = Pop_Pointer_CS;
+		   end
+		 
+               endcase                     
+	    end
+	  
+	  FULL:
+	    begin
+               grant_o     = 1'b0;
+               valid_o     = 1'b1;
+               gate_clock  = 1'b1;
+	       
+               case(grant_i)
+		 1'b1: 
+		   begin 
+                      NS              = MIDDLE;
+		      
+                      Push_Pointer_NS = Push_Pointer_CS;
+		      
+                      if(Pop_Pointer_CS == DATA_DEPTH-1)
+                        Pop_Pointer_NS  = 0;
+                      else
+                        Pop_Pointer_NS  = Pop_Pointer_CS  + 1'b1;
+		   end
+		 
+		 1'b0:
+		   begin 
+                      NS              = FULL;
+                      Push_Pointer_NS = Push_Pointer_CS;
+                      Pop_Pointer_NS  = Pop_Pointer_CS;
+		   end
+               endcase                 
+	       
+	    end // end of FULL
+	  
+	  default :
+	    begin
+               gate_clock      = 1'b1;
+               grant_o         = 1'b0;
+               valid_o         = 1'b0;
+               NS              = EMPTY;
+               Pop_Pointer_NS  = 0;
+               Push_Pointer_NS = 0;
+	    end
+	  
+	endcase
+     end
+   
+   always_ff @(posedge clk_gated, negedge rst_n)
+     begin
+	if(rst_n == 1'b0)
+	  begin
+	     for (i=0; i< DATA_DEPTH; i++)
+               FIFO_REGISTERS[i] <= {DATA_WIDTH {1'b0}};
+	  end
+	else
+	  begin
+             if((grant_o == 1'b1) && (valid_i == 1'b1))
+               FIFO_REGISTERS[Push_Pointer_CS] <= data_i;
+	  end
+     end
+   
+   assign data_o = FIFO_REGISTERS[Pop_Pointer_CS];
+   
+endmodule // generic_fifo
diff --git a/vendor/pulp-platform/fpnew/src/common_cells/src/deprecated/prioarbiter.sv b/vendor/pulp-platform/fpnew/src/common_cells/src/deprecated/prioarbiter.sv
new file mode 100644
index 0000000000..730ceca4bf
--- /dev/null
+++ b/vendor/pulp-platform/fpnew/src/common_cells/src/deprecated/prioarbiter.sv
@@ -0,0 +1,89 @@
+// Copyright 2018 ETH Zurich and University of Bologna.
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the "License"); you may not use this file except in
+// compliance with the License. You may obtain a copy of the License at
+// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+//
+// Author: Florian Zaruba <zarubaf@iis.ee.ethz.ch>, ETH Zurich
+// Date: 16.03.2019
+// Description: Priority arbiter with Lock in. Port 0 has priority over port 1, port 1 over port2
+//              and so on. If the `LOCK_IN` feature is activated the arbitration decision is kept
+//              when the `en_i` is low.
+
+// Dependencies: relies on fast leading zero counter tree "onehot_to_bin" in common_cells
+module prioarbiter #(
+  parameter int unsigned NUM_REQ = 13,
+  parameter int unsigned LOCK_IN = 0
+) (
+  input logic                         clk_i,
+  input logic                         rst_ni,
+
+  input logic                         flush_i, // clears the fsm and control signal registers
+  input logic                         en_i,    // arbiter enable
+  input logic [NUM_REQ-1:0]           req_i,   // request signals
+
+  output logic [NUM_REQ-1:0]          ack_o,   // acknowledge signals
+  output logic                        vld_o,   // request ack'ed
+  output logic [$clog2(NUM_REQ)-1:0]  idx_o    // idx output
+);
+
+  localparam SEL_WIDTH = $clog2(NUM_REQ);
+
+  logic [SEL_WIDTH-1:0] arb_sel_lock_d, arb_sel_lock_q;
+  logic lock_d, lock_q;
+
+  logic [$clog2(NUM_REQ)-1:0] idx;
+
+  // shared
+  assign vld_o = (|req_i) & en_i;
+  assign idx_o  = (lock_q) ? arb_sel_lock_q : idx;
+
+  // Arbiter
+  // Port 0 has priority over all other ports
+  assign ack_o[0] = (req_i[0]) ? en_i : 1'b0;
+  // check that the priorities
+  for (genvar i = 1; i < NUM_REQ; i++) begin : gen_arb_req_ports
+      // for every subsequent port check the priorities of the previous port
+      assign ack_o[i] = (req_i[i] & ~(|ack_o[i-1:0])) ? en_i : 1'b0;
+  end
+
+  onehot_to_bin #(
+    .ONEHOT_WIDTH ( NUM_REQ )
+  ) i_onehot_to_bin (
+    .onehot ( ack_o ),
+    .bin    ( idx   )
+  );
+
+  if (LOCK_IN) begin : gen_lock_in
+    // latch decision in case we got at least one req and no acknowledge
+    assign lock_d         = (|req_i) & ~en_i;
+    assign arb_sel_lock_d = idx_o;
+  end else begin
+    // disable
+    assign lock_d         = '0;
+    assign arb_sel_lock_d = '0;
+  end
+
+  always_ff @(posedge clk_i or negedge rst_ni) begin : p_regs
+    if (!rst_ni) begin
+      lock_q         <= 1'b0;
+      arb_sel_lock_q <= '0;
+    end else begin
+      if (flush_i) begin
+        lock_q         <= 1'b0;
+        arb_sel_lock_q <= '0;
+      end else begin
+        lock_q         <= lock_d;
+        arb_sel_lock_q <= arb_sel_lock_d;
+      end
+    end
+  end
+
+endmodule : prioarbiter
+
+
+
diff --git a/vendor/pulp-platform/fpnew/src/common_cells/src/deprecated/pulp_sync.sv b/vendor/pulp-platform/fpnew/src/common_cells/src/deprecated/pulp_sync.sv
new file mode 100644
index 0000000000..2b436163e5
--- /dev/null
+++ b/vendor/pulp-platform/fpnew/src/common_cells/src/deprecated/pulp_sync.sv
@@ -0,0 +1,36 @@
+// Copyright 2018 ETH Zurich and University of Bologna.
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the "License"); you may not use this file except in
+// compliance with the License. You may obtain a copy of the License at
+// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+
+// Antonio Pullini <pullinia@iis.ee.ethz.ch>
+
+module pulp_sync
+  #(
+    parameter STAGES = 2
+    )
+   (
+    input  logic clk_i,
+    input  logic rstn_i,
+    input  logic serial_i,
+    output logic serial_o
+    );
+   
+   logic [STAGES-1:0] r_reg;
+   
+   always_ff @(posedge clk_i, negedge rstn_i)
+     begin
+	if(!rstn_i)
+          r_reg <= 'h0;
+	else
+          r_reg <= {r_reg[STAGES-2:0], serial_i};
+     end
+   
+   assign serial_o   =  r_reg[STAGES-1];
+   
+endmodule
diff --git a/vendor/pulp-platform/fpnew/src/common_cells/src/deprecated/pulp_sync_wedge.sv b/vendor/pulp-platform/fpnew/src/common_cells/src/deprecated/pulp_sync_wedge.sv
new file mode 100644
index 0000000000..66cee57d2c
--- /dev/null
+++ b/vendor/pulp-platform/fpnew/src/common_cells/src/deprecated/pulp_sync_wedge.sv
@@ -0,0 +1,55 @@
+// Copyright 2018 ETH Zurich and University of Bologna.
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the "License"); you may not use this file except in
+// compliance with the License. You may obtain a copy of the License at
+// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+
+// Antonio Pullini <pullinia@iis.ee.ethz.ch>
+
+module pulp_sync_wedge #(
+    parameter int unsigned STAGES = 2
+) (
+    input  logic clk_i,
+    input  logic rstn_i,
+    input  logic en_i,
+    input  logic serial_i,
+    output logic r_edge_o,
+    output logic f_edge_o,
+    output logic serial_o
+);
+    logic clk;
+    logic serial, serial_q;
+
+    assign serial_o =  serial_q;
+    assign f_edge_o = ~serial &  serial_q;
+    assign r_edge_o =  serial & ~serial_q;
+
+    pulp_sync #(
+        .STAGES(STAGES)
+    ) i_pulp_sync (
+        .clk_i,
+        .rstn_i,
+        .serial_i,
+        .serial_o ( serial )
+    );
+
+    pulp_clock_gating i_pulp_clock_gating (
+        .clk_i,
+        .en_i,
+        .test_en_i ( 1'b0    ),
+        .clk_o     ( clk )
+    );
+
+    always_ff @(posedge clk, negedge rstn_i) begin
+        if (!rstn_i) begin
+            serial_q <= 1'b0;
+        end else begin
+            serial_q <= serial;
+        end
+    end
+
+endmodule
diff --git a/vendor/pulp-platform/fpnew/src/common_cells/src/deprecated/rrarbiter.sv b/vendor/pulp-platform/fpnew/src/common_cells/src/deprecated/rrarbiter.sv
new file mode 100644
index 0000000000..bf806c5e42
--- /dev/null
+++ b/vendor/pulp-platform/fpnew/src/common_cells/src/deprecated/rrarbiter.sv
@@ -0,0 +1,61 @@
+// Copyright 2018 ETH Zurich and University of Bologna.
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the "License"); you may not use this file except in
+// compliance with the License. You may obtain a copy of the License at
+// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+//
+// Author: Michael Schaffner <schaffner@iis.ee.ethz.ch>, ETH Zurich
+// Date: 16.08.2018
+// Description: Fair round robin arbiter with lock feature.
+//
+// The rrarbiter employs fair round robin arbitration - i.e. the priorities
+// rotate each cycle.
+//
+// The lock-in feature prevents the arbiter from changing the arbitration
+// decision when the arbiter is disabled. I.e., the index of the first request
+// that wins the arbitration will be locked until en_i is asserted again.
+//
+// Dependencies: relies on rr_arb_tree from common_cells.
+
+module rrarbiter #(
+  parameter int unsigned NUM_REQ   = 64,
+  parameter bit          LOCK_IN   = 1'b0
+) (
+  input logic                         clk_i,
+  input logic                         rst_ni,
+
+  input logic                         flush_i, // clears arbiter state
+  input logic                         en_i,    // arbiter enable
+  input logic [NUM_REQ-1:0]           req_i,   // request signals
+
+  output logic [NUM_REQ-1:0]          ack_o,   // acknowledge signals
+  output logic                        vld_o,   // request ack'ed
+  output logic [$clog2(NUM_REQ)-1:0]  idx_o    // idx output
+);
+
+  logic req;
+  assign vld_o = (|req_i) & en_i;
+
+  rr_arb_tree #(
+    .NumIn     ( NUM_REQ ),
+    .DataWidth ( 1       ),
+    .LockIn    ( LOCK_IN ))
+  i_rr_arb_tree (
+    .clk_i   ( clk_i      ),
+    .rst_ni  ( rst_ni     ),
+    .flush_i ( flush_i    ),
+    .rr_i    ( '0         ),
+    .req_i   ( req_i      ),
+    .gnt_o   ( ack_o      ),
+    .data_i  ( '0         ),
+    .gnt_i   ( en_i & req ),
+    .req_o   ( req        ),
+    .data_o  (            ),
+    .idx_o   ( idx_o      )
+  );
+
+endmodule : rrarbiter
diff --git a/vendor/pulp-platform/fpnew/src/common_cells/src/edge_detect.sv b/vendor/pulp-platform/fpnew/src/common_cells/src/edge_detect.sv
new file mode 100644
index 0000000000..7fe89a7bf4
--- /dev/null
+++ b/vendor/pulp-platform/fpnew/src/common_cells/src/edge_detect.sv
@@ -0,0 +1,32 @@
+// Copyright 2018 ETH Zurich and University of Bologna.
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the "License"); you may not use this file except in
+// compliance with the License. You may obtain a copy of the License at
+// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+
+// Author: Florian Zaruba
+// Description: Edge detector, clock needs to oversample for proper edge detection
+
+module edge_detect (
+    input  logic clk_i,   // Clock
+    input  logic rst_ni,  // Asynchronous reset active low
+    input  logic d_i,     // data stream in
+    output logic re_o,    // rising edge detected
+    output logic fe_o     // falling edge detected
+);
+
+    sync_wedge i_sync_wedge (
+        .clk_i    ( clk_i  ),
+        .rst_ni   ( rst_ni ),
+        .en_i     ( 1'b1   ),
+        .serial_i ( d_i    ),
+        .r_edge_o ( re_o   ),
+        .f_edge_o ( fe_o   ),
+        .serial_o (        )
+    );
+
+endmodule
\ No newline at end of file
diff --git a/vendor/pulp-platform/fpnew/src/common_cells/src/edge_propagator.sv b/vendor/pulp-platform/fpnew/src/common_cells/src/edge_propagator.sv
new file mode 100644
index 0000000000..2e27283111
--- /dev/null
+++ b/vendor/pulp-platform/fpnew/src/common_cells/src/edge_propagator.sv
@@ -0,0 +1,50 @@
+// Copyright 2018 ETH Zurich and University of Bologna.
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the "License"); you may not use this file except in
+// compliance with the License. You may obtain a copy of the License at
+// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+
+// Antonio Pullini <pullinia@iis.ee.ethz.ch>
+
+module edge_propagator (
+    input  logic clk_tx_i,
+    input  logic rstn_tx_i,
+    input  logic edge_i,
+    input  logic clk_rx_i,
+    input  logic rstn_rx_i,
+    output logic edge_o
+);
+
+    logic [1:0] sync_a;
+    logic       sync_b;
+
+    logic r_input_reg;
+    logic s_input_reg_next;
+
+    assign s_input_reg_next = edge_i | (r_input_reg & (~sync_a[0]));
+
+    always @(negedge rstn_tx_i or posedge clk_tx_i) begin
+        if (~rstn_tx_i) begin
+            r_input_reg <= 1'b0;
+            sync_a      <= 2'b00;
+        end else begin
+            r_input_reg <= s_input_reg_next;
+            sync_a      <= {sync_b,sync_a[1]};
+        end
+    end
+
+    pulp_sync_wedge i_sync_clkb (
+        .clk_i    ( clk_rx_i     ),
+        .rstn_i   ( rstn_rx_i    ),
+        .en_i     ( 1'b1         ),
+        .serial_i ( r_input_reg  ),
+        .r_edge_o ( edge_o       ),
+        .f_edge_o (              ),
+        .serial_o ( sync_b       )
+    );
+
+endmodule
diff --git a/vendor/pulp-platform/fpnew/src/common_cells/src/edge_propagator_rx.sv b/vendor/pulp-platform/fpnew/src/common_cells/src/edge_propagator_rx.sv
new file mode 100644
index 0000000000..89532cc27c
--- /dev/null
+++ b/vendor/pulp-platform/fpnew/src/common_cells/src/edge_propagator_rx.sv
@@ -0,0 +1,31 @@
+// Copyright 2018 ETH Zurich and University of Bologna.
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the "License"); you may not use this file except in
+// compliance with the License. You may obtain a copy of the License at
+// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+
+// Antonio Pullini <pullinia@iis.ee.ethz.ch>
+
+module edge_propagator_rx (
+    input  logic clk_i,
+    input  logic rstn_i,
+    input  logic valid_i,
+    output logic ack_o,
+    output logic valid_o
+);
+
+    pulp_sync_wedge i_sync_clkb (
+        .clk_i    ( clk_i   ),
+        .rstn_i   ( rstn_i  ),
+        .en_i     ( 1'b1    ),
+        .serial_i ( valid_i ),
+        .r_edge_o ( valid_o ),
+        .f_edge_o (         ),
+        .serial_o ( ack_o   )
+    );
+
+endmodule
diff --git a/vendor/pulp-platform/fpnew/src/common_cells/src/edge_propagator_tx.sv b/vendor/pulp-platform/fpnew/src/common_cells/src/edge_propagator_tx.sv
new file mode 100644
index 0000000000..0274a43333
--- /dev/null
+++ b/vendor/pulp-platform/fpnew/src/common_cells/src/edge_propagator_tx.sv
@@ -0,0 +1,40 @@
+// Copyright 2018 ETH Zurich and University of Bologna.
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the "License"); you may not use this file except in
+// compliance with the License. You may obtain a copy of the License at
+// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+
+// Antonio Pullini <pullinia@iis.ee.ethz.ch>
+
+module edge_propagator_tx (
+    input  logic clk_i,
+    input  logic rstn_i,
+    input  logic valid_i,
+    input  logic ack_i,
+    output logic valid_o
+);
+
+    logic [1:0]   sync_a;
+
+    logic    r_input_reg;
+    logic    s_input_reg_next;
+
+    assign s_input_reg_next = valid_i | (r_input_reg & ~sync_a[0]);
+
+    always @(negedge rstn_i or posedge clk_i) begin
+        if (~rstn_i) begin
+            r_input_reg <= 1'b0;
+            sync_a      <= 2'b00;
+        end else begin
+            r_input_reg <= s_input_reg_next;
+            sync_a      <= {ack_i,sync_a[1]};
+        end
+    end
+
+    assign valid_o = r_input_reg;
+
+endmodule
diff --git a/vendor/pulp-platform/fpnew/src/common_cells/src/exp_backoff.sv b/vendor/pulp-platform/fpnew/src/common_cells/src/exp_backoff.sv
new file mode 100644
index 0000000000..fe63798cc4
--- /dev/null
+++ b/vendor/pulp-platform/fpnew/src/common_cells/src/exp_backoff.sv
@@ -0,0 +1,94 @@
+// Copyright 2019 ETH Zurich and University of Bologna.
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the "License"); you may not use this file except in
+// compliance with the License.  You may obtain a copy of the License at
+// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+//
+// Author: Michael Schaffner <schaffner@iis.ee.ethz.ch>, ETH Zurich
+// Date: 10.04.2019
+// Description: exponential backoff counter with randomization.
+//
+// For each failed trial (set_i pulsed), this unit exponentially increases the
+// (average) backoff time by masking an LFSR with a shifted mask in order to
+// create the backoff counter initial value.
+//
+// The shift register mask and the counter value are both reset to '0 in case of
+// a successful trial (clr_i).
+//
+
+module exp_backoff #(
+  parameter int unsigned Seed   = 'hffff, // seed for 16bit lfsr
+  parameter int unsigned MaxExp = 16      // 2**MaxExp-1 determines the maximum range from which random wait counts are drawn
+) (
+  input  logic clk_i,
+  input  logic rst_ni,
+  //
+  input  logic set_i,     // sets the backoff counter (pulse) -> use when trial did not succeed
+  input  logic clr_i,     // clears the backoff counter (pulse) -> use when trial succeeded
+  output logic is_zero_o  // indicates whether the backoff counter is equal to zero and a new trial can be launched
+);
+
+  // leave this constant
+  localparam WIDTH = 16;
+
+  logic [WIDTH-1:0] lfsr_d, lfsr_q, cnt_d, cnt_q, mask_d, mask_q;
+  logic lfsr;
+
+  // generate random wait counts
+  // note: we use a flipped lfsr here to
+  // avoid strange correlation effects between
+  // the (left-shifted) mask and the lfsr
+  assign lfsr = lfsr_q[15-15] ^
+                lfsr_q[15-13] ^
+                lfsr_q[15-12] ^
+                lfsr_q[15-10];
+
+  assign lfsr_d = (set_i) ? {lfsr, lfsr_q[$high(lfsr_q):1]} :
+                            lfsr_q;
+
+  // mask the wait counts with exponentially increasing mask (shift reg)
+  assign mask_d = (clr_i) ? '0                                :
+                  (set_i) ? {{(WIDTH-MaxExp){1'b0}},mask_q[MaxExp-2:0], 1'b1} :
+                            mask_q;
+
+  assign cnt_d =  (clr_i)      ? '0                :
+                  (set_i)      ? (mask_q & lfsr_q) :
+                  (!is_zero_o) ? cnt_q - 1'b1      : '0;
+
+  assign is_zero_o = (cnt_q=='0);
+
+  always_ff @(posedge clk_i or negedge rst_ni) begin : p_regs
+    if (!rst_ni) begin
+      lfsr_q <= WIDTH'(Seed);
+      mask_q <= '0;
+      cnt_q  <= '0;
+    end else begin
+      lfsr_q <= lfsr_d;
+      mask_q <= mask_d;
+      cnt_q  <= cnt_d;
+    end
+  end
+
+///////////////////////////////////////////////////////
+// assertions
+///////////////////////////////////////////////////////
+
+//pragma translate_off
+`ifndef VERILATOR
+  initial begin
+    // assert wrong parameterizations
+    assert (MaxExp>0)
+      else $fatal(1,"MaxExp must be greater than 0");
+    assert (MaxExp<=16)
+      else $fatal(1,"MaxExp cannot be greater than 16");
+    assert (Seed>0)
+      else $fatal(1,"Zero seed is not allowed for LFSR");
+  end
+`endif
+//pragma translate_on
+
+endmodule // exp_backoff
diff --git a/vendor/pulp-platform/fpnew/src/common_cells/src/fall_through_register.sv b/vendor/pulp-platform/fpnew/src/common_cells/src/fall_through_register.sv
new file mode 100644
index 0000000000..fcbbe31dbc
--- /dev/null
+++ b/vendor/pulp-platform/fpnew/src/common_cells/src/fall_through_register.sv
@@ -0,0 +1,58 @@
+// Copyright 2018 ETH Zurich and University of Bologna.
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the "License"); you may not use this file except in
+// compliance with the License. You may obtain a copy of the License at
+// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+
+// Fall-through register with a simple stream-like ready/valid handshake.
+// This register does not cut combinatorial paths on any signals: in case the module at its output
+// is ready to accept data within the same clock cycle, they are forwarded. Use this module to get a
+// 'default ready' behavior towards the input.
+module fall_through_register #(
+    parameter type T = logic  // Vivado requires a default value for type parameters.
+) (
+    input  logic    clk_i,          // Clock
+    input  logic    rst_ni,         // Asynchronous active-low reset
+    input  logic    clr_i,          // Synchronous clear
+    input  logic    testmode_i,     // Test mode to bypass clock gating
+    // Input port
+    input  logic    valid_i,
+    output logic    ready_o,
+    input  T        data_i,
+    // Output port
+    output logic    valid_o,
+    input  logic    ready_i,
+    output T        data_o
+);
+
+    logic   fifo_empty,
+            fifo_full;
+
+    fifo_v2 #(
+        .FALL_THROUGH   (1'b1),
+        .DATA_WIDTH     ($size(T)),
+        .DEPTH          (1),
+        .dtype          (T)
+    ) i_fifo (
+        .clk_i          (clk_i),
+        .rst_ni         (rst_ni),
+        .flush_i        (clr_i),
+        .testmode_i     (testmode_i),
+        .full_o         (fifo_full),
+        .empty_o        (fifo_empty),
+        .alm_full_o     ( ),
+        .alm_empty_o    ( ),
+        .data_i         (data_i),
+        .push_i         (valid_i & ~fifo_full),
+        .data_o         (data_o),
+        .pop_i          (ready_i & ~fifo_empty)
+    );
+
+    assign ready_o = ~fifo_full;
+    assign valid_o = ~fifo_empty;
+
+endmodule
diff --git a/vendor/pulp-platform/fpnew/src/common_cells/src/fifo_v3.sv b/vendor/pulp-platform/fpnew/src/common_cells/src/fifo_v3.sv
new file mode 100644
index 0000000000..8c487730a4
--- /dev/null
+++ b/vendor/pulp-platform/fpnew/src/common_cells/src/fifo_v3.sv
@@ -0,0 +1,153 @@
+// Copyright 2018 ETH Zurich and University of Bologna.
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the "License"); you may not use this file except in
+// compliance with the License. You may obtain a copy of the License at
+// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+
+// Author: Florian Zaruba <zarubaf@iis.ee.ethz.ch>
+
+module fifo_v3 #(
+    parameter bit          FALL_THROUGH = 1'b0, // fifo is in fall-through mode
+    parameter int unsigned DATA_WIDTH   = 32,   // default data width if the fifo is of type logic
+    parameter int unsigned DEPTH        = 8,    // depth can be arbitrary from 0 to 2**32
+    parameter type dtype                = logic [DATA_WIDTH-1:0],
+    // DO NOT OVERWRITE THIS PARAMETER
+    parameter int unsigned ADDR_DEPTH   = (DEPTH > 1) ? $clog2(DEPTH) : 1
+)(
+    input  logic  clk_i,            // Clock
+    input  logic  rst_ni,           // Asynchronous reset active low
+    input  logic  flush_i,          // flush the queue
+    input  logic  testmode_i,       // test_mode to bypass clock gating
+    // status flags
+    output logic  full_o,           // queue is full
+    output logic  empty_o,          // queue is empty
+    output logic  [ADDR_DEPTH-1:0] usage_o,  // fill pointer
+    // as long as the queue is not full we can push new data
+    input  dtype  data_i,           // data to push into the queue
+    input  logic  push_i,           // data is valid and can be pushed to the queue
+    // as long as the queue is not empty we can pop new elements
+    output dtype  data_o,           // output data
+    input  logic  pop_i             // pop head from queue
+);
+    // local parameter
+    // FIFO depth - handle the case of pass-through, synthesizer will do constant propagation
+    localparam int unsigned FIFO_DEPTH = (DEPTH > 0) ? DEPTH : 1;
+    // clock gating control
+    logic gate_clock;
+    // pointer to the read and write section of the queue
+    logic [ADDR_DEPTH - 1:0] read_pointer_n, read_pointer_q, write_pointer_n, write_pointer_q;
+    // keep a counter to keep track of the current queue status
+    logic [ADDR_DEPTH:0] status_cnt_n, status_cnt_q; // this integer will be truncated by the synthesis tool
+    // actual memory
+    dtype [FIFO_DEPTH - 1:0] mem_n, mem_q;
+
+    assign usage_o = status_cnt_q[ADDR_DEPTH-1:0];
+
+    if (DEPTH == 0) begin
+        assign empty_o     = ~push_i;
+        assign full_o      = ~pop_i;
+    end else begin
+        assign full_o       = (status_cnt_q == FIFO_DEPTH[ADDR_DEPTH:0]);
+        assign empty_o      = (status_cnt_q == 0) & ~(FALL_THROUGH & push_i);
+    end
+    // status flags
+
+    // read and write queue logic
+    always_comb begin : read_write_comb
+        // default assignment
+        read_pointer_n  = read_pointer_q;
+        write_pointer_n = write_pointer_q;
+        status_cnt_n    = status_cnt_q;
+        data_o          = (DEPTH == 0) ? data_i : mem_q[read_pointer_q];
+        mem_n           = mem_q;
+        gate_clock      = 1'b1;
+
+        // push a new element to the queue
+        if (push_i && ~full_o) begin
+            // push the data onto the queue
+            mem_n[write_pointer_q] = data_i;
+            // un-gate the clock, we want to write something
+            gate_clock = 1'b0;
+            // increment the write counter
+            if (write_pointer_q == FIFO_DEPTH[ADDR_DEPTH-1:0] - 1)
+                write_pointer_n = '0;
+            else
+                write_pointer_n = write_pointer_q + 1;
+            // increment the overall counter
+            status_cnt_n    = status_cnt_q + 1;
+        end
+
+        if (pop_i && ~empty_o) begin
+            // read from the queue is a default assignment
+            // but increment the read pointer...
+            if (read_pointer_n == FIFO_DEPTH[ADDR_DEPTH-1:0] - 1)
+                read_pointer_n = '0;
+            else
+                read_pointer_n = read_pointer_q + 1;
+            // ... and decrement the overall count
+            status_cnt_n   = status_cnt_q - 1;
+        end
+
+        // keep the count pointer stable if we push and pop at the same time
+        if (push_i && pop_i &&  ~full_o && ~empty_o)
+            status_cnt_n   = status_cnt_q;
+
+        // FIFO is in pass through mode -> do not change the pointers
+        if (FALL_THROUGH && (status_cnt_q == 0) && push_i) begin
+            data_o = data_i;
+            if (pop_i) begin
+                status_cnt_n = status_cnt_q;
+                read_pointer_n = read_pointer_q;
+                write_pointer_n = write_pointer_q;
+            end
+        end
+    end
+
+    // sequential process
+    always_ff @(posedge clk_i or negedge rst_ni) begin
+        if(~rst_ni) begin
+            read_pointer_q  <= '0;
+            write_pointer_q <= '0;
+            status_cnt_q    <= '0;
+        end else begin
+            if (flush_i) begin
+                read_pointer_q  <= '0;
+                write_pointer_q <= '0;
+                status_cnt_q    <= '0;
+             end else begin
+                read_pointer_q  <= read_pointer_n;
+                write_pointer_q <= write_pointer_n;
+                status_cnt_q    <= status_cnt_n;
+            end
+        end
+    end
+
+    always_ff @(posedge clk_i or negedge rst_ni) begin
+        if(~rst_ni) begin
+            mem_q <= '0;
+        end else if (!gate_clock) begin
+            mem_q <= mem_n;
+        end
+    end
+
+// pragma translate_off
+`ifndef VERILATOR
+    initial begin
+        assert (DEPTH > 0)             else $error("DEPTH must be greater than 0.");
+    end
+
+    full_write : assert property(
+        @(posedge clk_i) disable iff (~rst_ni) (full_o |-> ~push_i))
+        else $fatal (1, "Trying to push new data although the FIFO is full.");
+
+    empty_read : assert property(
+        @(posedge clk_i) disable iff (~rst_ni) (empty_o |-> ~pop_i))
+        else $fatal (1, "Trying to pop data although the FIFO is empty.");
+`endif
+// pragma translate_on
+
+endmodule // fifo_v3
diff --git a/vendor/pulp-platform/fpnew/src/common_cells/src/graycode.sv b/vendor/pulp-platform/fpnew/src/common_cells/src/graycode.sv
new file mode 100644
index 0000000000..b9ef43a7b0
--- /dev/null
+++ b/vendor/pulp-platform/fpnew/src/common_cells/src/graycode.sv
@@ -0,0 +1,33 @@
+// Copyright 2018 ETH Zurich and University of Bologna.
+//
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the "License"); you may not use this file except in
+// compliance with the License. You may obtain a copy of the License at
+// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+//
+// Fabian Schuiki <fschuiki@iis.ee.ethz.ch>
+
+/// A binary to gray code converter.
+module binary_to_gray #(
+    parameter int N = -1
+)(
+    input  logic [N-1:0] A,
+    output logic [N-1:0] Z
+);
+    assign Z = A ^ (A >> 1);
+endmodule
+
+/// A gray code to binary converter.
+module gray_to_binary #(
+    parameter int N = -1
+)(
+    input  logic [N-1:0] A,
+    output logic [N-1:0] Z
+);
+    for (genvar i = 0; i < N; i++)
+        assign Z[i] = ^A[N-1:i];
+endmodule
diff --git a/vendor/pulp-platform/fpnew/src/common_cells/src/id_queue.sv b/vendor/pulp-platform/fpnew/src/common_cells/src/id_queue.sv
new file mode 100644
index 0000000000..c432af4b94
--- /dev/null
+++ b/vendor/pulp-platform/fpnew/src/common_cells/src/id_queue.sv
@@ -0,0 +1,268 @@
+// Copyright 2018 ETH Zurich and University of Bologna.
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the "License"); you may not use this file except in
+// compliance with the License. You may obtain a copy of the License at
+// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+
+// ID Queue
+//
+// In an ID queue, every element has a numeric ID. Among all elements that have the same ID, the ID
+// queue preserves FIFO order.
+//
+// This ID queue implementation allows to either push (through the `inp_*` signals) or pop (through
+// the `oup_*` signals) one element per clock cycle. The `inp_` port has priority and grants a
+// request iff the queue is not full. The `oup_` port dequeues an element iff `oup_pop_i` is
+// asserted during an `oup_` handshake; otherwise, it performs a non-destructive read. `oup_data_o`
+// is valid iff `oup_data_valid_o` is asserted during an `oup_` handshake. If `oup_data_valid_o` is
+// not asserted, the queue did not contain an element with the provided ID.
+//
+// This ID queue additionally provides the `exists_` port, which searches for an element anywhere in
+// the queue. The comparison performed during the search can be masked: for every bit that is
+// asserted in `exists_mask_i`, the corresponding bit in the queue element and in `exists_data_i`
+// must be equal for a match; the other bits are not compared. If masking is not required, tie
+// `exists_mask_i_ to `'1` and the synthesizer should simplify the comparisons to unmasked ones. The
+// `exists_` port operates independently of the `inp_` and `oup_` ports. If the `exists_` port is
+// unused, tie `exists_req_i` to `1'b0` and the synthesizer should remove the internal comparators.
+//
+// This ID queue can store at most `CAPACITY` elements, independent of their ID. Let
+// - C = `CAPACITY`
+// - B = $bits(data_t)
+// - I = 2**`ID_WIDTH`
+// Then
+// - the queue element storage requires O(C * (B + log2(C))) bit
+// - the ID table requires O(H * log2(C)) bit, where H = min(C, I)
+//
+// Maintainers:
+// - Andreas Kurth <akurth@iis.ee.ethz.ch>
+
+module id_queue #(
+    parameter int ID_WIDTH  = 0,
+    parameter int CAPACITY  = 0,
+    parameter type data_t   = logic,
+    // Dependent parameters, DO NOT OVERRIDE!
+    localparam type id_t    = logic[ID_WIDTH-1:0],
+    localparam type mask_t  = logic[$bits(data_t)-1:0]
+) (
+    input  logic    clk_i,
+    input  logic    rst_ni,
+
+    input  id_t     inp_id_i,
+    input  data_t   inp_data_i,
+    input  logic    inp_req_i,
+    output logic    inp_gnt_o,
+
+    input  data_t   exists_data_i,
+    input  mask_t   exists_mask_i,
+    input  logic    exists_req_i,
+    output logic    exists_o,
+    output logic    exists_gnt_o,
+
+    input  id_t     oup_id_i,
+    input  logic    oup_pop_i,
+    input  logic    oup_req_i,
+    output data_t   oup_data_o,
+    output logic    oup_data_valid_o,
+    output logic    oup_gnt_o
+);
+
+    // Capacity of the head-tail table, which associates an ID with corresponding head and tail
+    // indices.
+    localparam int N_IDS = 2**ID_WIDTH;
+    localparam int HT_CAPACITY = (N_IDS <= CAPACITY) ? N_IDS : CAPACITY;
+
+    // Type for indexing the head-tail table.
+    typedef logic [$clog2(HT_CAPACITY)-1:0] ht_idx_t;
+
+    // Type for indexing the lined data table.
+    typedef logic [$clog2(CAPACITY)-1:0] ld_idx_t;
+
+    // Type of an entry in the head-tail table.
+    typedef struct packed {
+        id_t        id;
+        ld_idx_t    head,
+                    tail;
+        logic       free;
+    } head_tail_t;
+
+    // Type of an entry in the linked data table.
+    typedef struct packed {
+        data_t      data;
+        ld_idx_t    next;
+        logic       free;
+    } linked_data_t;
+
+    head_tail_t [HT_CAPACITY-1:0]   head_tail_d,    head_tail_q;
+
+    linked_data_t [CAPACITY-1:0]    linked_data_d,  linked_data_q;
+
+    logic                           full,
+                                    match_id_valid,
+                                    no_id_match;
+
+    logic [HT_CAPACITY-1:0]         head_tail_free,
+                                    idx_matches_id;
+
+    logic [CAPACITY-1:0]            exists_match,
+                                    linked_data_free;
+
+    id_t                            match_id;
+
+    ht_idx_t                        head_tail_free_idx,
+                                    match_idx;
+
+    ld_idx_t                        linked_data_free_idx;
+
+    // Find the index in the head-tail table that matches a given ID.
+    for (genvar i = 0; i < HT_CAPACITY; i++) begin: gen_idx_match
+        assign idx_matches_id[i] = match_id_valid && (head_tail_q[i].id == match_id) &&
+                !head_tail_q[i].free;
+    end
+    assign no_id_match = !(|idx_matches_id);
+    onehot_to_bin #(
+        .ONEHOT_WIDTH (HT_CAPACITY)
+    ) i_id_ohb (
+        .onehot (idx_matches_id),
+        .bin    (match_idx)
+    );
+
+    // Find the first free index in the head-tail table.
+    for (genvar i = 0; i < HT_CAPACITY; i++) begin: gen_head_tail_free
+        assign head_tail_free[i] = head_tail_q[i].free;
+    end
+    lzc #(
+        .WIDTH  (HT_CAPACITY),
+        .MODE   (0)         // Start at index 0.
+    ) i_ht_free_lzc (
+        .in_i       (head_tail_free),
+        .cnt_o      (head_tail_free_idx),
+        .empty_o    ()
+    );
+
+    // Find the first free index in the linked data table.
+    for (genvar i = 0; i < CAPACITY; i++) begin: gen_linked_data_free
+        assign linked_data_free[i] = linked_data_q[i].free;
+    end
+    lzc #(
+        .WIDTH  (CAPACITY),
+        .MODE   (0)         // Start at index 0.
+    ) i_ld_free_lzc (
+        .in_i       (linked_data_free),
+        .cnt_o      (linked_data_free_idx),
+        .empty_o    ()
+    );
+
+    // The queue is full if and only if there are no free items in the linked data structure.
+    assign full = !(|linked_data_free);
+
+    assign inp_gnt_o = ~full;
+    always_comb begin
+        match_id            = 'x;
+        match_id_valid      = 1'b0;
+        head_tail_d         = head_tail_q;
+        linked_data_d       = linked_data_q;
+        oup_gnt_o           = 1'b0;
+        oup_data_o          = data_t'('x);
+        oup_data_valid_o    = 1'b0;
+        if (inp_req_i && !full) begin
+            match_id = inp_id_i;
+            match_id_valid = 1'b1;
+            // If the ID does not yet exist in the queue, add a new ID entry.
+            if (no_id_match) begin
+                head_tail_d[head_tail_free_idx] = '{
+                    id: inp_id_i,
+                    head: linked_data_free_idx,
+                    tail: linked_data_free_idx,
+                    free: 1'b0
+                };
+            // Otherwise append it to the existing ID subqueue.
+            end else begin
+                linked_data_d[head_tail_q[match_idx].tail].next = linked_data_free_idx;
+                head_tail_d[match_idx].tail = linked_data_free_idx;
+            end
+            linked_data_d[linked_data_free_idx] = '{
+                data: inp_data_i,
+                next: 'x,
+                free: 1'b0
+            };
+        end else if (oup_req_i) begin
+            match_id = oup_id_i;
+            match_id_valid = 1'b1;
+            if (!no_id_match) begin
+                oup_data_o = data_t'(linked_data_q[head_tail_q[match_idx].head].data);
+                oup_data_valid_o = 1'b1;
+                if (oup_pop_i) begin
+                    // Set free bit of linked data entry, all other bits are don't care.
+                    linked_data_d[head_tail_q[match_idx].head]      = 'x;
+                    linked_data_d[head_tail_q[match_idx].head][0]   = 1'b1;
+                    if (head_tail_q[match_idx].head == head_tail_q[match_idx].tail) begin
+                        head_tail_d[match_idx] = '{free: 1'b1, default: 'x};
+                    end else begin
+                        head_tail_d[match_idx].head = linked_data_q[head_tail_q[match_idx].head].next;
+                    end
+                end
+            end
+            // Always grant the output request.  If there was no match, the default, invalid entry
+            // will be returned.
+            oup_gnt_o = 1'b1;
+        end
+    end
+
+    // Exists Lookup
+    for (genvar i = 0; i < CAPACITY; i++) begin: gen_lookup
+        mask_t exists_match_bits;
+        for (genvar j = 0; j < $bits(data_t); j++) begin: gen_mask
+            always_comb begin
+                if (linked_data_q[i].free) begin
+                    exists_match_bits[j] = 1'b0;
+                end else begin
+                    if (!exists_mask_i[j]) begin
+                        exists_match_bits[j] = 1'b1;
+                    end else begin
+                        exists_match_bits[j] = (linked_data_q[i].data[j] == exists_data_i[j]);
+                    end
+                end
+            end
+        end
+        assign exists_match[i] = (&exists_match_bits);
+    end
+    always_comb begin
+        exists_gnt_o = 1'b0;
+        exists_o = 'x;
+        if (exists_req_i) begin
+            exists_gnt_o = 1'b1;
+            exists_o = (|exists_match);
+        end
+    end
+
+    // Registers
+    for (genvar i = 0; i < CAPACITY; i++) begin: gen_ffs
+        always_ff @(posedge clk_i, negedge rst_ni) begin
+            if (!rst_ni) begin
+                head_tail_q[i]      <= '{free: 1'b1, default: 'x};
+                // Set free bit of linked data entries, all other bits are don't care.
+                linked_data_q[i]    <= 'x;
+                linked_data_q[i][0] <= 1'b1;
+            end else begin
+                head_tail_q[i]      <= head_tail_d[i];
+                linked_data_q[i]    <= linked_data_d[i];
+            end
+        end
+    end
+
+    // Validate parameters.
+// pragma translate_off
+`ifndef VERILATOR
+    initial begin: validate_params
+        assert (ID_WIDTH >= 1)
+            else $fatal("The ID must at least be one bit wide!");
+        assert (CAPACITY >= 1)
+            else $fatal("The queue must have capacity of at least one entry!");
+    end
+`endif
+// pragma translate_on
+
+endmodule
diff --git a/vendor/pulp-platform/fpnew/src/common_cells/src/lfsr.sv b/vendor/pulp-platform/fpnew/src/common_cells/src/lfsr.sv
new file mode 100644
index 0000000000..0b730d6221
--- /dev/null
+++ b/vendor/pulp-platform/fpnew/src/common_cells/src/lfsr.sv
@@ -0,0 +1,310 @@
+// Copyright 2019 ETH Zurich and University of Bologna.
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the "License"); you may not use this file except in
+// compliance with the License.  You may obtain a copy of the License at
+// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+//
+// Author: Michael Schaffner <schaffner@iis.ee.ethz.ch>, ETH Zurich
+// Date: 26.04.2019
+//
+// Description: This is a parametric LFSR with precomputed coefficients for
+// LFSR lengths from 4 to 64bit.
+
+// Additional block cipher layers can be instantiated to non-linearly transform
+// the pseudo-random LFSR sequence at the output, and hence break the shifting
+// patterns. The additional cipher layers can only be used for an LFSR width
+// of 64bit, since the block cipher has been designed for that block length.
+
+module lfsr #(
+  parameter int unsigned          LfsrWidth     = 64,   // [4,64]
+  parameter int unsigned          OutWidth      = 8,    // [1,LfsrWidth]
+  parameter logic [LfsrWidth-1:0] RstVal        = '1,   // [1,2^LfsrWidth-1]
+  // 0: disabled, the present cipher uses 31, but just a few layers (1-3) are enough
+  // to break linear shifting patterns
+  parameter int unsigned          CipherLayers  = 0,
+  parameter bit                   CipherReg     = 1'b1  // additional output reg after cipher
+) (
+  input  logic                 clk_i,
+  input  logic                 rst_ni,
+  input  logic                 en_i,
+  output logic [OutWidth-1:0]  out_o
+);
+
+// Galois LFSR feedback masks
+// Automatically generated with get_lfsr_masks.py
+// Masks are from https://users.ece.cmu.edu/~koopman/lfsr/
+localparam logic [63:0] masks [4:64] = '{64'hC,
+                                         64'h1E,
+                                         64'h39,
+                                         64'h7E,
+                                         64'hFA,
+                                         64'h1FD,
+                                         64'h3FC,
+                                         64'h64B,
+                                         64'hD8F,
+                                         64'h1296,
+                                         64'h2496,
+                                         64'h4357,
+                                         64'h8679,
+                                         64'h1030E,
+                                         64'h206CD,
+                                         64'h403FE,
+                                         64'h807B8,
+                                         64'h1004B2,
+                                         64'h2006A8,
+                                         64'h4004B2,
+                                         64'h800B87,
+                                         64'h10004F3,
+                                         64'h200072D,
+                                         64'h40006AE,
+                                         64'h80009E3,
+                                         64'h10000583,
+                                         64'h20000C92,
+                                         64'h400005B6,
+                                         64'h80000EA6,
+                                         64'h1000007A3,
+                                         64'h200000ABF,
+                                         64'h400000842,
+                                         64'h80000123E,
+                                         64'h100000074E,
+                                         64'h2000000AE9,
+                                         64'h400000086A,
+                                         64'h8000001213,
+                                         64'h1000000077E,
+                                         64'h2000000123B,
+                                         64'h40000000877,
+                                         64'h8000000108D,
+                                         64'h100000000AE9,
+                                         64'h200000000E9F,
+                                         64'h4000000008A6,
+                                         64'h80000000191E,
+                                         64'h100000000090E,
+                                         64'h2000000000FB3,
+                                         64'h4000000000D7D,
+                                         64'h80000000016A5,
+                                         64'h10000000000B4B,
+                                         64'h200000000010AF,
+                                         64'h40000000000DDE,
+                                         64'h8000000000181A,
+                                         64'h100000000000B65,
+                                         64'h20000000000102D,
+                                         64'h400000000000CD5,
+                                         64'h8000000000024C1,
+                                         64'h1000000000000EF6,
+                                         64'h2000000000001363,
+                                         64'h4000000000000FCD,
+                                         64'h80000000000019E2};
+
+// this S-box and permutation P has been taken from the Present Cipher,
+// a super lightweight block cipher. use the cipher layers to add additional
+// non-linearity to the LFSR output. note one layer does not fully correspond
+// to the present cipher round, since the key and rekeying function is not applied here.
+//
+// See also:
+// "PRESENT: An Ultra-Lightweight Block Cipher", A. Bogdanov et al., Ches 2007
+// http://www.lightweightcrypto.org/present/present_ches2007.pdf
+
+// this is the sbox from the present cipher
+localparam logic[15:0][3:0] sbox4 = {4'h2, 4'h1, 4'h7, 4'h4,
+                                     4'h8, 4'hF, 4'hE, 4'h3,
+                                     4'hD, 4'hA, 4'h0, 4'h9,
+                                     4'hB, 4'h6, 4'h5, 4'hC };
+
+// these are the permutation indices of the present cipher
+localparam logic[63:0][5:0] perm = {6'd63, 6'd47, 6'd31, 6'd15, 6'd62, 6'd46, 6'd30, 6'd14, 6'd61, 6'd45, 6'd29, 6'd13, 6'd60, 6'd44, 6'd28, 6'd12,
+                                    6'd59, 6'd43, 6'd27, 6'd11, 6'd58, 6'd42, 6'd26, 6'd10, 6'd57, 6'd41, 6'd25, 6'd09, 6'd56, 6'd40, 6'd24, 6'd08,
+                                    6'd55, 6'd39, 6'd23, 6'd07, 6'd54, 6'd38, 6'd22, 6'd06, 6'd53, 6'd37, 6'd21, 6'd05, 6'd52, 6'd36, 6'd20, 6'd04,
+                                    6'd51, 6'd35, 6'd19, 6'd03, 6'd50, 6'd34, 6'd18, 6'd02, 6'd49, 6'd33, 6'd17, 6'd01, 6'd48, 6'd32, 6'd16, 6'd00};
+
+
+function automatic logic [63:0] sbox4_layer(logic [63:0] in);
+  logic [63:0] out;
+  //for (logic [4:0] j = '0; j<16; j++) out[j*4 +: 4] = sbox4[in[j*4 +: 4]];
+  // this simulates much faster than the loop
+  out[0*4  +: 4] = sbox4[in[0*4  +: 4]];
+  out[1*4  +: 4] = sbox4[in[1*4  +: 4]];
+  out[2*4  +: 4] = sbox4[in[2*4  +: 4]];
+  out[3*4  +: 4] = sbox4[in[3*4  +: 4]];
+
+  out[4*4  +: 4] = sbox4[in[4*4  +: 4]];
+  out[5*4  +: 4] = sbox4[in[5*4  +: 4]];
+  out[6*4  +: 4] = sbox4[in[6*4  +: 4]];
+  out[7*4  +: 4] = sbox4[in[7*4  +: 4]];
+
+  out[8*4  +: 4] = sbox4[in[8*4  +: 4]];
+  out[9*4  +: 4] = sbox4[in[9*4  +: 4]];
+  out[10*4 +: 4] = sbox4[in[10*4 +: 4]];
+  out[11*4 +: 4] = sbox4[in[11*4 +: 4]];
+
+  out[12*4 +: 4] = sbox4[in[12*4 +: 4]];
+  out[13*4 +: 4] = sbox4[in[13*4 +: 4]];
+  out[14*4 +: 4] = sbox4[in[14*4 +: 4]];
+  out[15*4 +: 4] = sbox4[in[15*4 +: 4]];
+  return out;
+endfunction : sbox4_layer
+
+function automatic logic [63:0] perm_layer(logic [63:0] in);
+  logic [63:0] out;
+  // for (logic [7:0] j = '0; j<64; j++) out[perm[j]] = in[j];
+  // this simulates much faster than the loop
+  out[perm[0]] = in[0];
+  out[perm[1]] = in[1];
+  out[perm[2]] = in[2];
+  out[perm[3]] = in[3];
+  out[perm[4]] = in[4];
+  out[perm[5]] = in[5];
+  out[perm[6]] = in[6];
+  out[perm[7]] = in[7];
+  out[perm[8]] = in[8];
+  out[perm[9]] = in[9];
+
+  out[perm[10]] = in[10];
+  out[perm[11]] = in[11];
+  out[perm[12]] = in[12];
+  out[perm[13]] = in[13];
+  out[perm[14]] = in[14];
+  out[perm[15]] = in[15];
+  out[perm[16]] = in[16];
+  out[perm[17]] = in[17];
+  out[perm[18]] = in[18];
+  out[perm[19]] = in[19];
+
+  out[perm[20]] = in[20];
+  out[perm[21]] = in[21];
+  out[perm[22]] = in[22];
+  out[perm[23]] = in[23];
+  out[perm[24]] = in[24];
+  out[perm[25]] = in[25];
+  out[perm[26]] = in[26];
+  out[perm[27]] = in[27];
+  out[perm[28]] = in[28];
+  out[perm[29]] = in[29];
+
+  out[perm[30]] = in[30];
+  out[perm[31]] = in[31];
+  out[perm[32]] = in[32];
+  out[perm[33]] = in[33];
+  out[perm[34]] = in[34];
+  out[perm[35]] = in[35];
+  out[perm[36]] = in[36];
+  out[perm[37]] = in[37];
+  out[perm[38]] = in[38];
+  out[perm[39]] = in[39];
+
+  out[perm[40]] = in[40];
+  out[perm[41]] = in[41];
+  out[perm[42]] = in[42];
+  out[perm[43]] = in[43];
+  out[perm[44]] = in[44];
+  out[perm[45]] = in[45];
+  out[perm[46]] = in[46];
+  out[perm[47]] = in[47];
+  out[perm[48]] = in[48];
+  out[perm[49]] = in[49];
+
+  out[perm[50]] = in[50];
+  out[perm[51]] = in[51];
+  out[perm[52]] = in[52];
+  out[perm[53]] = in[53];
+  out[perm[54]] = in[54];
+  out[perm[55]] = in[55];
+  out[perm[56]] = in[56];
+  out[perm[57]] = in[57];
+  out[perm[58]] = in[58];
+  out[perm[59]] = in[59];
+
+  out[perm[60]] = in[60];
+  out[perm[61]] = in[61];
+  out[perm[62]] = in[62];
+  out[perm[63]] = in[63];
+  return out;
+endfunction : perm_layer
+
+////////////////////////////////////////////////////////////////////////
+// lfsr
+////////////////////////////////////////////////////////////////////////
+
+logic [LfsrWidth-1:0] lfsr_d, lfsr_q;
+assign lfsr_d = (en_i) ? (lfsr_q>>1) ^ ({LfsrWidth{lfsr_q[0]}} & masks[LfsrWidth][LfsrWidth-1:0]) : lfsr_q;
+
+always_ff @(posedge clk_i or negedge rst_ni) begin : p_regs
+  //$display("%b %h", en_i, lfsr_d);
+  if (!rst_ni) begin
+    lfsr_q <= LfsrWidth'(RstVal);
+  end else begin
+    lfsr_q <= lfsr_d;
+  end
+end
+
+////////////////////////////////////////////////////////////////////////
+// block cipher layers
+////////////////////////////////////////////////////////////////////////
+
+if (CipherLayers > unsigned'(0)) begin : g_cipher_layers
+  logic [63:0] ciph_layer;
+  localparam int unsigned NumRepl = ((64+LfsrWidth)/LfsrWidth);
+
+  always_comb begin : p_ciph_layer
+    automatic logic [63:0] tmp;
+    tmp = 64'({NumRepl{lfsr_q}});
+    for(int unsigned k = 0; k < CipherLayers; k++) begin
+      tmp = perm_layer(sbox4_layer(tmp));
+    end
+    ciph_layer = tmp;
+  end
+
+  // additiona output reg after cipher
+  if (CipherReg) begin : g_cipher_reg
+    logic [OutWidth-1:0] out_d, out_q;
+
+    assign out_d = (en_i) ? ciph_layer[OutWidth-1:0] : out_q;
+    assign out_o = out_q[OutWidth-1:0];
+
+    always_ff @(posedge clk_i or negedge rst_ni) begin : p_regs
+      if (!rst_ni) begin
+        out_q <= '0;
+      end else begin
+        out_q <= out_d;
+      end
+    end
+  // no outreg
+  end else begin : g_no_out_reg
+    assign out_o  = ciph_layer[OutWidth-1:0];
+  end
+
+// no block cipher
+end else begin : g_no_cipher_layers
+  assign out_o    = lfsr_q[OutWidth-1:0];
+end
+
+////////////////////////////////////////////////////////////////////////
+// assertions
+////////////////////////////////////////////////////////////////////////
+
+// pragma translate_off
+initial begin
+  // these are the LUT limits
+  assert(OutWidth <= LfsrWidth) else
+    $fatal(1,"OutWidth must be smaller equal the LfsrWidth.");
+  assert(RstVal > unsigned'(0)) else
+    $fatal(1,"RstVal must be nonzero.");
+  assert((LfsrWidth >= $low(masks)) && (LfsrWidth <= $high(masks))) else
+    $fatal(1,"Unsupported LfsrWidth.");
+  assert(masks[LfsrWidth][LfsrWidth-1]) else
+    $fatal(1, "LFSR mask is not correct. The MSB must be 1." );
+  assert((CipherLayers > 0) && (LfsrWidth == 64) || (CipherLayers == 0)) else
+    $fatal(1, "Use additional cipher layers only in conjunction with an LFSR width of 64 bit." );
+end
+
+`ifndef VERILATOR
+  all_zero: assert property (
+    @(posedge clk_i) disable iff (!rst_ni) en_i |-> lfsr_d)
+      else $fatal(1,"Lfsr must not be all-zero.");
+`endif
+// pragma translate_on
+
+endmodule // lfsr
diff --git a/vendor/pulp-platform/fpnew/src/common_cells/src/lfsr_16bit.sv b/vendor/pulp-platform/fpnew/src/common_cells/src/lfsr_16bit.sv
new file mode 100644
index 0000000000..3fcf96e22e
--- /dev/null
+++ b/vendor/pulp-platform/fpnew/src/common_cells/src/lfsr_16bit.sv
@@ -0,0 +1,67 @@
+// Copyright 2018 ETH Zurich and University of Bologna.
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the "License"); you may not use this file except in
+// compliance with the License. You may obtain a copy of the License at
+// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+
+// Author: Florian Zaruba, ETH Zurich
+// Date: 5.11.2018
+// Description: 16-bit LFSR
+
+// --------------
+// 16-bit LFSR
+// --------------
+//
+// Description: Shift register
+//
+module lfsr_16bit #(
+    parameter logic [15:0] SEED  = 8'b0,
+    parameter int unsigned WIDTH = 16
+)(
+    input  logic                      clk_i,
+    input  logic                      rst_ni,
+    input  logic                      en_i,
+    output logic [WIDTH-1:0]          refill_way_oh,
+    output logic [$clog2(WIDTH)-1:0]  refill_way_bin
+);
+
+    localparam int unsigned LOG_WIDTH = $clog2(WIDTH);
+
+    logic [15:0] shift_d, shift_q;
+
+
+    always_comb begin
+
+        automatic logic shift_in;
+        shift_in = !(shift_q[15] ^ shift_q[12] ^ shift_q[5] ^ shift_q[1]);
+
+        shift_d = shift_q;
+
+        if (en_i)
+            shift_d = {shift_q[14:0], shift_in};
+
+        // output assignment
+        refill_way_oh = 'b0;
+        refill_way_oh[shift_q[LOG_WIDTH-1:0]] = 1'b1;
+        refill_way_bin = shift_q;
+    end
+
+    always_ff @(posedge clk_i or negedge rst_ni) begin : proc_
+        if(~rst_ni) begin
+            shift_q <= SEED;
+        end else begin
+            shift_q <= shift_d;
+        end
+    end
+
+    //pragma translate_off
+    initial begin
+        assert (WIDTH <= 16) else $fatal(1, "WIDTH needs to be less than 16 because of the 16-bit LFSR");
+    end
+    //pragma translate_on
+
+endmodule
diff --git a/vendor/pulp-platform/fpnew/src/common_cells/src/lfsr_8bit.sv b/vendor/pulp-platform/fpnew/src/common_cells/src/lfsr_8bit.sv
new file mode 100644
index 0000000000..fbe5c748a1
--- /dev/null
+++ b/vendor/pulp-platform/fpnew/src/common_cells/src/lfsr_8bit.sv
@@ -0,0 +1,68 @@
+// Copyright 2018 ETH Zurich and University of Bologna.
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the "License"); you may not use this file except in
+// compliance with the License. You may obtain a copy of the License at
+// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+
+// Author: Igor Loi - University of Bologna
+// Author: Florian Zaruba, ETH Zurich
+// Date: 12.11.2017
+// Description: 8-bit LFSR
+
+// --------------
+// 8-bit LFSR
+// --------------
+//
+// Description: Shift register
+//
+module lfsr_8bit #(
+    parameter logic [7:0]  SEED  = 8'b0,
+    parameter int unsigned WIDTH = 8
+)(
+    input  logic                      clk_i,
+    input  logic                      rst_ni,
+    input  logic                      en_i,
+    output logic [WIDTH-1:0]          refill_way_oh,
+    output logic [$clog2(WIDTH)-1:0]  refill_way_bin
+);
+
+    localparam int unsigned LOG_WIDTH = $clog2(WIDTH);
+
+    logic [7:0] shift_d, shift_q;
+
+
+    always_comb begin
+
+        automatic logic shift_in;
+        shift_in = !(shift_q[7] ^ shift_q[3] ^ shift_q[2] ^ shift_q[1]);
+
+        shift_d = shift_q;
+
+        if (en_i)
+            shift_d = {shift_q[6:0], shift_in};
+
+        // output assignment
+        refill_way_oh = 'b0;
+        refill_way_oh[shift_q[LOG_WIDTH-1:0]] = 1'b1;
+        refill_way_bin = shift_q;
+    end
+
+    always_ff @(posedge clk_i or negedge rst_ni) begin : proc_
+        if(~rst_ni) begin
+            shift_q <= SEED;
+        end else begin
+            shift_q <= shift_d;
+        end
+    end
+
+    //pragma translate_off
+    initial begin
+        assert (WIDTH <= 8) else $fatal(1, "WIDTH needs to be less than 8 because of the 8-bit LFSR");
+    end
+    //pragma translate_on
+
+endmodule
diff --git a/vendor/pulp-platform/fpnew/src/common_cells/src/lzc.sv b/vendor/pulp-platform/fpnew/src/common_cells/src/lzc.sv
new file mode 100644
index 0000000000..4ebbb5f5ec
--- /dev/null
+++ b/vendor/pulp-platform/fpnew/src/common_cells/src/lzc.sv
@@ -0,0 +1,93 @@
+// Copyright (c) 2018 - 2019 ETH Zurich, University of Bologna
+// All rights reserved.
+//
+// This code is under development and not yet released to the public.
+// Until it is released, the code is under the copyright of ETH Zurich and
+// the University of Bologna, and may contain confidential and/or unpublished
+// work. Any reuse/redistribution is strictly forbidden without written
+// permission from ETH Zurich.
+//
+// Bug fixes and contributions will eventually be released under the
+// SolderPad open hardware license in the context of the PULP platform
+// (http://www.pulp-platform.org), under the copyright of ETH Zurich and the
+// University of Bologna.
+
+/// A trailing zero counter / leading zero counter.
+/// Set MODE to 0 for trailing zero counter => cnt_o is the number of trailing zeros (from the LSB)
+/// Set MODE to 1 for leading zero counter  => cnt_o is the number of leading zeros  (from the MSB)
+/// If the input does not contain a zero, `empty_o` is asserted. Additionally `cnt_o` contains
+/// the maximum number of zeros - 1. For example:
+///   in_i = 000_0000, empty_o = 1, cnt_o = 6 (mode = 0)
+///   in_i = 000_0001, empty_o = 0, cnt_o = 0 (mode = 0)
+///   in_i = 000_1000, empty_o = 0, cnt_o = 3 (mode = 0)
+/// Furthermore, this unit contains a more efficient implementation for Verilator (simulation only).
+/// This speeds up simulation significantly.
+
+module lzc #(
+  /// The width of the input vector.
+  parameter int unsigned WIDTH = 2,
+  parameter bit          MODE  = 1'b0 // 0 -> trailing zero, 1 -> leading zero
+) (
+  input  logic [WIDTH-1:0]         in_i,
+  output logic [$clog2(WIDTH)-1:0] cnt_o,
+  output logic                     empty_o // asserted if all bits in in_i are zero
+);
+
+  localparam int unsigned NUM_LEVELS = $clog2(WIDTH);
+
+  // pragma translate_off
+  initial begin
+    assert(WIDTH > 0) else $fatal("input must be at least one bit wide");
+  end
+  // pragma translate_on
+
+  logic [WIDTH-1:0][NUM_LEVELS-1:0]          index_lut;
+  logic [2**NUM_LEVELS-1:0]                  sel_nodes;
+  logic [2**NUM_LEVELS-1:0][NUM_LEVELS-1:0]  index_nodes;
+
+  logic [WIDTH-1:0] in_tmp;
+
+  // reverse vector if required
+  always_comb begin : flip_vector
+    for (int unsigned i = 0; i < WIDTH; i++) begin
+      in_tmp[i] = (MODE) ? in_i[WIDTH-1-i] : in_i[i];
+    end
+  end
+
+  for (genvar j = 0; unsigned'(j) < WIDTH; j++) begin : g_index_lut
+    assign index_lut[j] = NUM_LEVELS'(unsigned'(j));
+  end
+
+  for (genvar level = 0; unsigned'(level) < NUM_LEVELS; level++) begin : g_levels
+    if (unsigned'(level) == NUM_LEVELS-1) begin : g_last_level
+      for (genvar k = 0; k < 2**level; k++) begin : g_level
+        // if two successive indices are still in the vector...
+        if (unsigned'(k) * 2 < WIDTH-1) begin
+          assign sel_nodes[2**level-1+k]   = in_tmp[k*2] | in_tmp[k*2+1];
+          assign index_nodes[2**level-1+k] = (in_tmp[k*2] == 1'b1) ? index_lut[k*2] :
+                                                                     index_lut[k*2+1];
+        end
+        // if only the first index is still in the vector...
+        if (unsigned'(k) * 2 == WIDTH-1) begin
+          assign sel_nodes[2**level-1+k]   = in_tmp[k*2];
+          assign index_nodes[2**level-1+k] = index_lut[k*2];
+        end
+        // if index is out of range
+        if (unsigned'(k) * 2 > WIDTH-1) begin
+          assign sel_nodes[2**level-1+k]   = 1'b0;
+          assign index_nodes[2**level-1+k] = '0;
+        end
+      end
+    end else begin
+      for (genvar l = 0; l < 2**level; l++) begin : g_level
+        assign sel_nodes[2**level-1+l]   = sel_nodes[2**(level+1)-1+l*2] | sel_nodes[2**(level+1)-1+l*2+1];
+        assign index_nodes[2**level-1+l] = (sel_nodes[2**(level+1)-1+l*2] == 1'b1) ? index_nodes[2**(level+1)-1+l*2] :
+                                                                                     index_nodes[2**(level+1)-1+l*2+1];
+      end
+    end
+  end
+
+  assign cnt_o   = NUM_LEVELS > unsigned'(0) ? index_nodes[0] : $clog2(WIDTH)'(0);
+  assign empty_o = NUM_LEVELS > unsigned'(0) ? ~sel_nodes[0]  : ~(|in_i);
+
+endmodule : lzc
diff --git a/vendor/pulp-platform/fpnew/src/common_cells/src/mv_filter.sv b/vendor/pulp-platform/fpnew/src/common_cells/src/mv_filter.sv
new file mode 100644
index 0000000000..2254c0b059
--- /dev/null
+++ b/vendor/pulp-platform/fpnew/src/common_cells/src/mv_filter.sv
@@ -0,0 +1,55 @@
+// Copyright 2018 ETH Zurich and University of Bologna.
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the "License"); you may not use this file except in
+// compliance with the License. You may obtain a copy of the License at
+// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+
+// Author: Florian Zaruba <zarubaf@iis.ee.ethz.ch>
+
+module mv_filter #(
+    parameter int unsigned WIDTH     = 4,
+    parameter int unsigned THRESHOLD = 10
+)(
+    input  logic clk_i,
+    input  logic rst_ni,
+    input  logic sample_i,
+    input  logic clear_i,
+    input  logic d_i,
+    output logic q_o
+);
+    logic [WIDTH-1:0] counter_q, counter_d;
+    logic d, q;
+
+    assign q_o = q;
+
+    always_comb begin
+        counter_d = counter_q;
+        d = q;
+
+        if (counter_q >= THRESHOLD[WIDTH-1:0]) begin
+            d = 1'b1;
+        end else if (sample_i && d_i) begin
+            counter_d = counter_q + 1;
+        end
+
+        // sync reset
+        if (clear_i) begin
+            counter_d = '0;
+            d = 1'b0;
+        end
+    end
+
+    always_ff @(posedge clk_i or negedge rst_ni) begin
+        if (~rst_ni) begin
+            counter_q <= '0;
+            q         <= 1'b0;
+        end else begin
+            counter_q <= counter_d;
+            q         <= d;
+        end
+    end
+endmodule
\ No newline at end of file
diff --git a/vendor/pulp-platform/fpnew/src/common_cells/src/onehot_to_bin.sv b/vendor/pulp-platform/fpnew/src/common_cells/src/onehot_to_bin.sv
new file mode 100644
index 0000000000..35c513cd09
--- /dev/null
+++ b/vendor/pulp-platform/fpnew/src/common_cells/src/onehot_to_bin.sv
@@ -0,0 +1,39 @@
+// Copyright 2018 ETH Zurich and University of Bologna.
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the "License"); you may not use this file except in
+// compliance with the License. You may obtain a copy of the License at
+// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+
+// Franceco Conti <fconti@iis.ee.ethz.ch>
+
+module onehot_to_bin #(
+    parameter int unsigned ONEHOT_WIDTH = 16,
+    // Do Not Change
+    parameter int unsigned BIN_WIDTH   = $clog2(ONEHOT_WIDTH)
+)(
+    input  logic [ONEHOT_WIDTH-1:0] onehot,
+    output logic [BIN_WIDTH-1:0]    bin
+);
+
+    for (genvar j = 0; j < BIN_WIDTH; j++) begin : jl
+        logic [ONEHOT_WIDTH-1:0] tmp_mask;
+            for (genvar i = 0; i < ONEHOT_WIDTH; i++) begin : il
+                logic [BIN_WIDTH-1:0] tmp_i;
+                assign tmp_i = i;
+                assign tmp_mask[i] = tmp_i[j];
+            end
+        assign bin[j] = |(tmp_mask & onehot);
+    end
+
+// pragma translate_off
+`ifndef VERILATOR
+    initial begin
+        assert($onehot0(onehot)) else $fatal(1, "[onehot_to_bin] More than two bit set in the one-hot signal");
+    end
+`endif
+// pragma translate_on
+endmodule
diff --git a/vendor/pulp-platform/fpnew/src/common_cells/src/plru_tree.sv b/vendor/pulp-platform/fpnew/src/common_cells/src/plru_tree.sv
new file mode 100644
index 0000000000..28e0cba5b4
--- /dev/null
+++ b/vendor/pulp-platform/fpnew/src/common_cells/src/plru_tree.sv
@@ -0,0 +1,120 @@
+// Copyright 2018 ETH Zurich and University of Bologna.
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the "License"); you may not use this file except in
+// compliance with the License.  You may obtain a copy of the License at
+// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+//
+// Author: David Schaffenrath, TU Graz
+// Author: Florian Zaruba, ETH Zurich
+//
+// Description: Pseudo Least Recently Used Tree (PLRU)
+// See: https://en.wikipedia.org/wiki/Pseudo-LRU
+
+module plru_tree #(
+  parameter int unsigned ENTRIES = 16
+) (
+  input  logic               clk_i,
+  input  logic               rst_ni,
+  input  logic [ENTRIES-1:0] used_i, // element i was used (one hot)
+  output logic [ENTRIES-1:0] plru_o  // element i is the least recently used (one hot)
+);
+
+    localparam LOG_ENTRIES = $clog2(ENTRIES);
+
+    logic [2*(ENTRIES-1)-1:0] plru_tree_q, plru_tree_d;
+
+    always_comb begin : plru_replacement
+        plru_tree_d = plru_tree_q;
+        // The PLRU-tree indexing:
+        // lvl0        0
+        //            / \
+        //           /   \
+        // lvl1     1     2
+        //         / \   / \
+        // lvl2   3   4 5   6
+        //       / \ /\/\  /\
+        //      ... ... ... ...
+        // Just predefine which nodes will be set/cleared
+        // E.g. for a TLB with 8 entries, the for-loop is semantically
+        // equivalent to the following pseudo-code:
+        // unique case (1'b1)
+        // used_i[7]: plru_tree_d[0, 2, 6] = {1, 1, 1};
+        // used_i[6]: plru_tree_d[0, 2, 6] = {1, 1, 0};
+        // used_i[5]: plru_tree_d[0, 2, 5] = {1, 0, 1};
+        // used_i[4]: plru_tree_d[0, 2, 5] = {1, 0, 0};
+        // used_i[3]: plru_tree_d[0, 1, 4] = {0, 1, 1};
+        // used_i[2]: plru_tree_d[0, 1, 4] = {0, 1, 0};
+        // used_i[1]: plru_tree_d[0, 1, 3] = {0, 0, 1};
+        // used_i[0]: plru_tree_d[0, 1, 3] = {0, 0, 0};
+        // default: begin /* No hit */ end
+        // endcase
+        for (int unsigned i = 0; i < ENTRIES; i++) begin
+            automatic int unsigned idx_base, shift, new_index;
+            // we got a hit so update the pointer as it was least recently used
+            if (used_i[i]) begin
+                // Set the nodes to the values we would expect
+                for (int unsigned lvl = 0; lvl < LOG_ENTRIES; lvl++) begin
+                  idx_base = $unsigned((2**lvl)-1);
+                  // lvl0 <=> MSB, lvl1 <=> MSB-1, ...
+                  shift = LOG_ENTRIES - lvl;
+                  // to circumvent the 32 bit integer arithmetic assignment
+                  new_index =  ~((i >> (shift-1)) & 32'b1);
+                  plru_tree_d[idx_base + (i >> shift)] = new_index[0];
+                end
+            end
+        end
+        // Decode tree to write enable signals
+        // Next for-loop basically creates the following logic for e.g. an 8 entry
+        // TLB (note: pseudo-code obviously):
+        // plru_o[7] = &plru_tree_q[ 6, 2, 0]; //plru_tree_q[0,2,6]=={1,1,1}
+        // plru_o[6] = &plru_tree_q[~6, 2, 0]; //plru_tree_q[0,2,6]=={1,1,0}
+        // plru_o[5] = &plru_tree_q[ 5,~2, 0]; //plru_tree_q[0,2,5]=={1,0,1}
+        // plru_o[4] = &plru_tree_q[~5,~2, 0]; //plru_tree_q[0,2,5]=={1,0,0}
+        // plru_o[3] = &plru_tree_q[ 4, 1,~0]; //plru_tree_q[0,1,4]=={0,1,1}
+        // plru_o[2] = &plru_tree_q[~4, 1,~0]; //plru_tree_q[0,1,4]=={0,1,0}
+        // plru_o[1] = &plru_tree_q[ 3,~1,~0]; //plru_tree_q[0,1,3]=={0,0,1}
+        // plru_o[0] = &plru_tree_q[~3,~1,~0]; //plru_tree_q[0,1,3]=={0,0,0}
+        // For each entry traverse the tree. If every tree-node matches,
+        // the corresponding bit of the entry's index, this is
+        // the next entry to replace.
+        for (int unsigned i = 0; i < ENTRIES; i += 1) begin
+            automatic logic en;
+            automatic int unsigned idx_base, shift, new_index;
+            en = 1'b1;
+            for (int unsigned lvl = 0; lvl < LOG_ENTRIES; lvl++) begin
+                idx_base = $unsigned((2**lvl)-1);
+                // lvl0 <=> MSB, lvl1 <=> MSB-1, ...
+                shift = LOG_ENTRIES - lvl;
+                // en &= plru_tree_q[idx_base + (i>>shift)] == ((i >> (shift-1)) & 1'b1);
+                new_index =  (i >> (shift-1)) & 32'b1;
+                if (new_index[0]) begin
+                  en &= plru_tree_q[idx_base + (i>>shift)];
+                end else begin
+                  en &= ~plru_tree_q[idx_base + (i>>shift)];
+                end
+            end
+            plru_o[i] = en;
+        end
+    end
+
+    always_ff @(posedge clk_i or negedge rst_ni) begin
+        if (!rst_ni) begin
+            plru_tree_q <= '0;
+        end else begin
+            plru_tree_q <= plru_tree_d;
+        end
+    end
+
+// pragma translate_off
+`ifndef VERILATOR
+    initial begin
+        assert (ENTRIES == 2**LOG_ENTRIES) else $error("Entries must be a power of two");
+    end
+`endif
+// pragma translate_on
+
+endmodule
diff --git a/vendor/pulp-platform/fpnew/src/common_cells/src/popcount.sv b/vendor/pulp-platform/fpnew/src/common_cells/src/popcount.sv
new file mode 100644
index 0000000000..0e16453130
--- /dev/null
+++ b/vendor/pulp-platform/fpnew/src/common_cells/src/popcount.sv
@@ -0,0 +1,57 @@
+// Copyright (C) 2013-2018 ETH Zurich, University of Bologna
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the "License"); you may not use this file except in
+// compliance with the License. You may obtain a copy of the License at
+// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+
+// Author: Manuel Eggimann <meggimann@iis.ee.ethz.ch>
+
+// Description: This module calculates the hamming weight (number of ones) in
+// its input vector using a balanced binary adder tree. Recursive instantiation
+// is used to build the tree.  Any unsigned INPUT_WIDTH larger or equal 2 is
+// legal.  The module pads the signal internally to the next power of two.  The
+// output result width is ceil(log2(INPUT_WIDTH))+1.
+
+module popcount #(
+    parameter int unsigned INPUT_WIDTH = 256,
+    localparam POPCOUNT_WIDTH          = $clog2(INPUT_WIDTH)+1
+) (
+    input logic [INPUT_WIDTH-1:0]     data_i,
+    output logic [POPCOUNT_WIDTH-1:0] popcount_o
+);
+
+   localparam int unsigned PADDED_WIDTH = 1 << $clog2(INPUT_WIDTH);
+
+   logic [PADDED_WIDTH-1:0]           padded_input;
+   logic [POPCOUNT_WIDTH-2:0]         left_child_result, right_child_result;
+
+   //Zero pad the input to next power of two
+   always_comb begin
+     padded_input = '0;
+     padded_input[INPUT_WIDTH-1:0] = data_i;
+   end
+
+   //Recursive instantiation to build binary adder tree
+   if (INPUT_WIDTH == 2) begin : leaf_node
+     assign left_child_result  = padded_input[1];
+     assign right_child_result = padded_input[0];
+   end else begin : non_leaf_node
+     popcount #(.INPUT_WIDTH(PADDED_WIDTH / 2))
+         left_child(
+                    .data_i(padded_input[PADDED_WIDTH-1:PADDED_WIDTH/2]),
+                    .popcount_o(left_child_result));
+
+     popcount #(.INPUT_WIDTH(PADDED_WIDTH / 2))
+         right_child(
+                     .data_i(padded_input[PADDED_WIDTH/2-1:0]),
+                     .popcount_o(right_child_result));
+   end
+
+   //Output assignment
+   assign popcount_o = left_child_result + right_child_result;
+
+endmodule : popcount
diff --git a/vendor/pulp-platform/fpnew/src/common_cells/src/rr_arb_tree.sv b/vendor/pulp-platform/fpnew/src/common_cells/src/rr_arb_tree.sv
new file mode 100644
index 0000000000..dee5024962
--- /dev/null
+++ b/vendor/pulp-platform/fpnew/src/common_cells/src/rr_arb_tree.sv
@@ -0,0 +1,244 @@
+// Copyright 2019 ETH Zurich and University of Bologna.
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the "License"); you may not use this file except in
+// compliance with the License.  You may obtain a copy of the License at
+// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+//
+// Author: Michael Schaffner <schaffner@iis.ee.ethz.ch>, ETH Zurich
+// Date: 02.04.2019
+// Description: logarithmic arbitration tree with round robin arbitration scheme.
+//
+// The rr_arb_tree employs fair round robin arbitration - i.e. the priorities
+// rotate each cycle.
+//
+// The `LockIn` option prevents the arbiter from changing the arbitration
+// decision when the arbiter is disabled. I.e., the index of the first request
+// that wins the arbitration will be locked in case the destination is not
+// able to grant the request in the same cycle.
+//
+// The `ExtPrio` option allows to override the internal round robin counter via the
+// `rr_i` signal. This can be useful in case multiple arbiters need to have
+// rotating priorities that are operating in lock-step. If static priority arbitration
+// is needed, just connect `rr_i` to '0.
+//
+// If `AxiVldRdy` is set, the req/gnt signals are compliant with the AXI style vld/rdy
+// handshake. Namely, upstream vld (req) must not depend on rdy (gnt), as it can be deasserted
+// again even though vld is asserted. Enabling `AxiVldRdy` leads to a reduction of arbiter
+// delay and area.
+//
+
+module rr_arb_tree #(
+  parameter int unsigned NumIn      = 64,
+  parameter int unsigned DataWidth  = 32,
+  parameter type         DataType   = logic [DataWidth-1:0],
+  parameter bit          ExtPrio    = 1'b0, // set to 1'b1 to enable
+  parameter bit          AxiVldRdy  = 1'b0, // treat req/gnt as vld/rdy
+  parameter bit          LockIn     = 1'b0  // set to 1'b1 to enable
+) (
+  input  logic                             clk_i,
+  input  logic                             rst_ni,
+  input  logic                             flush_i, // clears the arbiter state
+  input  logic [$clog2(NumIn)-1:0]         rr_i,    // external RR prio (needs to be enabled above)
+  // input requests and data
+  input  logic [NumIn-1:0]                 req_i,
+  /* verilator lint_off UNOPTFLAT */
+  output logic [NumIn-1:0]                 gnt_o,
+  /* verilator lint_on UNOPTFLAT */
+  input  DataType [NumIn-1:0]              data_i,
+  // arbitrated output
+  input  logic                             gnt_i,
+  output logic                             req_o,
+  output DataType                          data_o,
+  output logic [$clog2(NumIn)-1:0]         idx_o
+);
+  // just pass through in this corner case
+  if (NumIn == unsigned'(1)) begin
+    assign req_o    = req_i[0];
+    assign gnt_o[0] = gnt_i;
+    assign data_o   = data_i[0];
+    assign idx_o    = '0;
+  // non-degenerate cases
+  end else begin
+    localparam int unsigned NumLevels = $clog2(NumIn);
+
+    /* verilator lint_off UNOPTFLAT */
+    logic [2**NumLevels-2:0][NumLevels-1:0]  index_nodes; // used to propagate the indices
+    DataType [2**NumLevels-2:0]              data_nodes;  // used to propagate the data
+    logic [2**NumLevels-2:0]                 gnt_nodes;   // used to propagate the grant to masters
+    logic [2**NumLevels-2:0]                 req_nodes;   // used to propagate the requests to slave
+    /* lint_off */
+    logic [NumLevels-1:0]                    rr_q;
+    logic [NumIn-1:0]                        req_d;
+
+    // the final arbitration decision can be taken from the root of the tree
+    assign req_o        = req_nodes[0];
+    assign data_o       = data_nodes[0];
+    assign idx_o        = index_nodes[0];
+
+    if (ExtPrio) begin : gen_ext_rr
+      assign rr_q       = rr_i;
+      assign req_d      = req_i;
+    end else begin : gen_int_rr
+      logic [NumLevels-1:0] rr_d;
+
+      // lock arbiter decision in case we got at least one req and no acknowledge
+      if (LockIn) begin : gen_lock
+        logic  lock_d, lock_q;
+        logic [NumIn-1:0]     req_q;
+
+        assign lock_d     = req_o & ~gnt_i;
+        assign req_d      = (lock_q) ? req_q : req_i;
+
+        always_ff @(posedge clk_i or negedge rst_ni) begin : p_lock_reg
+          if (!rst_ni) begin
+            lock_q <= '0;
+          end else begin
+            if (flush_i) begin
+              lock_q <= '0;
+            end else begin
+              lock_q <= lock_d;
+            end
+          end
+        end
+
+        // pragma translate_off
+        `ifndef VERILATOR
+          lock: assert property(
+            @(posedge clk_i) disable iff (!rst_ni) LockIn |-> req_o && !gnt_i |=> idx_o == $past(idx_o))
+              else $fatal (1, "Lock implies same arbiter decision in next cycle if output is not ready.");
+
+          logic [NumIn-1:0] req_tmp;
+          assign req_tmp = req_q & req_i;
+          lock_req: assert property(
+            @(posedge clk_i) disable iff (!rst_ni) LockIn |-> lock_d |=> req_tmp == req_q)
+              else $fatal (1, "It is disallowed to deassert unserved request signals when LockIn is enabled.");
+        `endif
+        // pragma translate_on
+
+        always_ff @(posedge clk_i or negedge rst_ni) begin : p_req_regs
+          if (!rst_ni) begin
+            req_q  <= '0;
+          end else begin
+            if (flush_i) begin
+              req_q  <= '0;
+            end else begin
+              req_q  <= req_d;
+            end
+          end
+        end
+      end else begin : gen_no_lock
+        assign req_d = req_i;
+      end
+
+      assign rr_d       = (gnt_i && req_o) ? ((rr_q == NumLevels'(NumIn-1)) ? '0 : rr_q + 1'b1) : rr_q;
+
+      always_ff @(posedge clk_i or negedge rst_ni) begin : p_rr_regs
+        if (!rst_ni) begin
+          rr_q   <= '0;
+        end else begin
+          if (flush_i) begin
+            rr_q   <= '0;
+          end else begin
+            rr_q   <= rr_d;
+          end
+        end
+      end
+    end
+
+    assign gnt_nodes[0] = gnt_i;
+
+    // arbiter tree
+    for (genvar level = 0; unsigned'(level) < NumLevels; level++) begin : gen_levels
+      for (genvar l = 0; l < 2**level; l++) begin : gen_level
+        // local select signal
+        logic sel;
+        // index calcs
+        localparam int unsigned idx0 = 2**level-1+l;// current node
+        localparam int unsigned idx1 = 2**(level+1)-1+l*2;
+        //////////////////////////////////////////////////////////////
+        // uppermost level where data is fed in from the inputs
+        if (unsigned'(level) == NumLevels-1) begin : gen_first_level
+          // if two successive indices are still in the vector...
+          if (unsigned'(l) * 2 < NumIn-1) begin
+            assign req_nodes[idx0]   = req_d[l*2] | req_d[l*2+1];
+
+            // arbitration: round robin
+            assign sel =  ~req_d[l*2] | req_d[l*2+1] & rr_q[NumLevels-1-level];
+
+            assign index_nodes[idx0] = NumLevels'(sel);
+            assign data_nodes[idx0]  = (sel) ? data_i[l*2+1] : data_i[l*2];
+            assign gnt_o[l*2]        = gnt_nodes[idx0] & (AxiVldRdy | req_d[l*2])   & ~sel;
+            assign gnt_o[l*2+1]      = gnt_nodes[idx0] & (AxiVldRdy | req_d[l*2+1]) & sel;
+          end
+          // if only the first index is still in the vector...
+          if (unsigned'(l) * 2 == NumIn-1) begin
+            assign req_nodes[idx0]   = req_d[l*2];
+            assign index_nodes[idx0] = '0;// always zero in this case
+            assign data_nodes[idx0]  = data_i[l*2];
+            assign gnt_o[l*2]        = gnt_nodes[idx0] & (AxiVldRdy | req_d[l*2]);
+          end
+          // if index is out of range, fill up with zeros (will get pruned)
+          if (unsigned'(l) * 2 > NumIn-1) begin
+            assign req_nodes[idx0]   = 1'b0;
+            assign index_nodes[idx0] = DataType'('0);
+            assign data_nodes[idx0]  = DataType'('0);
+          end
+        //////////////////////////////////////////////////////////////
+        // general case for other levels within the tree
+        end else begin : gen_other_levels
+          assign req_nodes[idx0]   = req_nodes[idx1] | req_nodes[idx1+1];
+
+          // arbitration: round robin
+          assign sel =  ~req_nodes[idx1] | req_nodes[idx1+1] & rr_q[NumLevels-1-level];
+
+          assign index_nodes[idx0] = (sel) ? NumLevels'({1'b1, index_nodes[idx1+1][NumLevels-unsigned'(level)-2:0]}) :
+                                             NumLevels'({1'b0, index_nodes[idx1][NumLevels-unsigned'(level)-2:0]});
+          assign data_nodes[idx0]  = (sel) ? data_nodes[idx1+1] : data_nodes[idx1];
+          assign gnt_nodes[idx1]   = gnt_nodes[idx0] & ~sel;
+          assign gnt_nodes[idx1+1] = gnt_nodes[idx0] & sel;
+        end
+        //////////////////////////////////////////////////////////////
+      end
+    end
+
+    // pragma translate_off
+    `ifndef VERILATOR
+    initial begin : p_assert
+      assert(NumIn)
+        else $fatal("Input must be at least one element wide.");
+      assert(!(LockIn && ExtPrio))
+        else $fatal(1,"Cannot use LockIn feature together with external ExtPrio.");
+    end
+
+    hot_one : assert property(
+      @(posedge clk_i) disable iff (!rst_ni) $onehot0(gnt_o))
+        else $fatal (1, "Grant signal must be hot1 or zero.");
+
+    gnt0 : assert property(
+      @(posedge clk_i) disable iff (!rst_ni) |gnt_o |-> gnt_i)
+        else $fatal (1, "Grant out implies grant in.");
+
+    gnt1 : assert property(
+      @(posedge clk_i) disable iff (!rst_ni) req_o |-> gnt_i |-> |gnt_o)
+        else $fatal (1, "Req out and grant in implies grant out.");
+
+    gnt_idx : assert property(
+      @(posedge clk_i) disable iff (!rst_ni) req_o |->  gnt_i |-> gnt_o[idx_o])
+        else $fatal (1, "Idx_o / gnt_o do not match.");
+
+    req0 : assert property(
+      @(posedge clk_i) disable iff (!rst_ni) |req_i |-> req_o)
+        else $fatal (1, "Req in implies req out.");
+
+    req1 : assert property(
+      @(posedge clk_i) disable iff (!rst_ni) |req_o |-> req_i)
+        else $fatal (1, "Req out implies req in.");
+    `endif
+    // pragma translate_on
+  end
+
+endmodule : rr_arb_tree
diff --git a/vendor/pulp-platform/fpnew/src/common_cells/src/rstgen.sv b/vendor/pulp-platform/fpnew/src/common_cells/src/rstgen.sv
new file mode 100644
index 0000000000..a7dccc63b0
--- /dev/null
+++ b/vendor/pulp-platform/fpnew/src/common_cells/src/rstgen.sv
@@ -0,0 +1,30 @@
+// Copyright 2018 ETH Zurich and University of Bologna.
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the "License"); you may not use this file except in
+// compliance with the License. You may obtain a copy of the License at
+// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+
+// Davide Rossi <davide.rossi@unibo.it>
+
+module rstgen (
+    input  logic clk_i,
+    input  logic rst_ni,
+    input  logic test_mode_i,
+    output logic rst_no,
+    output logic init_no
+);
+
+    rstgen_bypass i_rstgen_bypass (
+        .clk_i            ( clk_i       ),
+        .rst_ni           ( rst_ni      ),
+        .rst_test_mode_ni ( rst_ni      ),
+        .test_mode_i      ( test_mode_i ),
+        .rst_no           ( rst_no      ),
+        .init_no          ( init_no     )
+    );
+
+endmodule
diff --git a/vendor/pulp-platform/fpnew/src/common_cells/src/rstgen_bypass.sv b/vendor/pulp-platform/fpnew/src/common_cells/src/rstgen_bypass.sv
new file mode 100644
index 0000000000..fc1bcfe729
--- /dev/null
+++ b/vendor/pulp-platform/fpnew/src/common_cells/src/rstgen_bypass.sv
@@ -0,0 +1,54 @@
+// Copyright 2018 ETH Zurich and University of Bologna.
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the "License"); you may not use this file except in
+// compliance with the License. You may obtain a copy of the License at
+// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+
+// Florian Zaruba <zarubaf@iis.ee.ethz.ch>
+// Description: This module is a reset synchronizer with a dedicated reset bypass pin for testmode reset.
+// Pro Tip: The wise Dr. Schaffner recommends at least 4 registers!
+
+module rstgen_bypass #(
+    parameter NumRegs = 4
+) (
+    input  logic clk_i,
+    input  logic rst_ni,
+    input  logic rst_test_mode_ni,
+    input  logic test_mode_i,
+    output logic rst_no,
+    output logic init_no
+);
+
+    // internal reset
+    logic rst_n;
+
+    logic [NumRegs-1:0] synch_regs_q;
+    // bypass mode
+    always_comb begin
+        if (test_mode_i == 1'b0) begin
+            rst_n   = rst_ni;
+            rst_no  = synch_regs_q[NumRegs-1];
+            init_no = synch_regs_q[NumRegs-1];
+        end else begin
+            rst_n   = rst_test_mode_ni;
+            rst_no  = rst_test_mode_ni;
+            init_no = 1'b1;
+        end
+    end
+
+    always @(posedge clk_i or negedge rst_n) begin
+        if (~rst_n) begin
+            synch_regs_q <= 0;
+        end else begin
+            synch_regs_q <= {synch_regs_q[NumRegs-2:0], 1'b1};
+        end
+    end
+
+    initial begin : p_assertions
+        if (NumRegs < 1) $fatal(1, "At least one register is required.");
+    end
+endmodule
diff --git a/vendor/pulp-platform/fpnew/src/common_cells/src/serial_deglitch.sv b/vendor/pulp-platform/fpnew/src/common_cells/src/serial_deglitch.sv
new file mode 100644
index 0000000000..22f55a5977
--- /dev/null
+++ b/vendor/pulp-platform/fpnew/src/common_cells/src/serial_deglitch.sv
@@ -0,0 +1,50 @@
+// Copyright 2018 ETH Zurich and University of Bologna.
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the "License"); you may not use this file except in
+// compliance with the License. You may obtain a copy of the License at
+// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+
+// Author: Florian Zaruba
+// Description: Deglitches a serial line by taking multiple samples until
+//              asserting the output high/low.
+
+module serial_deglitch #(
+    parameter int unsigned SIZE = 4
+)(
+    input  logic clk_i,    // clock
+    input  logic rst_ni,   // asynchronous reset active low
+    input  logic en_i,     // enable
+    input  logic d_i,      // serial data in
+    output logic q_o       // filtered data out
+);
+    logic [SIZE-1:0] count_q;
+    logic q;
+
+    always_ff @(posedge clk_i or negedge rst_ni) begin
+        if (~rst_ni) begin
+            count_q <= '0;
+            q       <= 1'b0;
+        end else begin
+            if (en_i) begin
+                if (d_i == 1'b1 && count_q != SIZE[SIZE-1:0]) begin
+                    count_q <= count_q + 1;
+                end else if (d_i == 1'b0 && count_q != SIZE[SIZE-1:0]) begin
+                    count_q <= count_q - 1;
+                end
+            end
+        end
+    end
+
+    // output process
+    always_comb begin
+        if (count_q == SIZE[SIZE-1:0]) begin
+            q_o = 1'b1;
+        end else if (count_q == 0) begin
+            q_o = 1'b0;
+        end
+    end
+endmodule
\ No newline at end of file
diff --git a/vendor/pulp-platform/fpnew/src/common_cells/src/shift_reg.sv b/vendor/pulp-platform/fpnew/src/common_cells/src/shift_reg.sv
new file mode 100644
index 0000000000..cbfab0e7ac
--- /dev/null
+++ b/vendor/pulp-platform/fpnew/src/common_cells/src/shift_reg.sv
@@ -0,0 +1,53 @@
+
+// Copyright 2018 ETH Zurich and University of Bologna.
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the "License"); you may not use this file except in
+// compliance with the License.  You may obtain a copy of the License at
+// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+//
+// Author: <zarubaf@iis.ee.ethz.ch>
+//
+// Description: Simple shift register for arbitrary depth and types
+
+module shift_reg #(
+    parameter type dtype         = logic,
+    parameter int unsigned Depth = 1
+)(
+    input  logic clk_i,    // Clock
+    input  logic rst_ni,   // Asynchronous reset active low
+    input  dtype d_i,
+    output dtype d_o
+);
+
+    // register of depth 0 is a wire
+    if (Depth == 0) begin
+        assign d_o = d_i;
+    // register of depth 1 is a simple register
+    end else if (Depth == 1) begin
+        always_ff @(posedge clk_i or negedge rst_ni) begin
+            if (~rst_ni) begin
+                d_o <= '0;
+            end else begin
+                d_o <= d_i;
+            end
+        end
+    // if depth is greater than 1 it becomes a shift register
+    end else if (Depth > 1) begin
+        dtype [Depth-1:0] reg_d, reg_q;
+        assign d_o = reg_q[Depth-1];
+        assign reg_d = {reg_q[Depth-2:0], d_i};
+
+        always_ff @(posedge clk_i or negedge rst_ni) begin
+            if (~rst_ni) begin
+                reg_q <= '0;
+            end else begin
+                reg_q <= reg_d;
+            end
+        end
+    end
+
+endmodule
diff --git a/vendor/pulp-platform/fpnew/src/common_cells/src/spill_register.sv b/vendor/pulp-platform/fpnew/src/common_cells/src/spill_register.sv
new file mode 100644
index 0000000000..6a99c89e54
--- /dev/null
+++ b/vendor/pulp-platform/fpnew/src/common_cells/src/spill_register.sv
@@ -0,0 +1,89 @@
+// Copyright 2018 ETH Zurich and University of Bologna.
+//
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the "License"); you may not use this file except in
+// compliance with the License. You may obtain a copy of the License at
+// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+//
+// Fabian Schuiki <fschuiki@iis.ee.ethz.ch>
+
+
+/// A register with handshakes that completely cuts any combinational paths
+/// between the input and output.
+module spill_register #(
+  parameter type T = logic
+)(
+  input  logic clk_i   ,
+  input  logic rst_ni  ,
+  input  logic valid_i ,
+  output logic ready_o ,
+  input  T     data_i  ,
+  output logic valid_o ,
+  input  logic ready_i ,
+  output T     data_o
+);
+
+  // The A register.
+  T a_data_q;
+  logic a_full_q;
+  logic a_fill, a_drain;
+  logic a_en, a_en_data;
+
+  always_ff @(posedge clk_i or negedge rst_ni) begin : ps_a_data
+    if (!rst_ni)
+      a_data_q <= '0;
+    else if (a_fill)
+      a_data_q <= data_i;
+  end
+
+  always_ff @(posedge clk_i or negedge rst_ni) begin : ps_a_full
+    if (!rst_ni)
+      a_full_q <= 0;
+    else if (a_fill || a_drain)
+      a_full_q <= a_fill;
+  end
+
+  // The B register.
+  T b_data_q;
+  logic b_full_q;
+  logic b_fill, b_drain;
+
+  always_ff @(posedge clk_i or negedge rst_ni) begin : ps_b_data
+    if (!rst_ni)
+      b_data_q <= '0;
+    else if (b_fill)
+      b_data_q <= a_data_q;
+  end
+
+  always_ff @(posedge clk_i or negedge rst_ni) begin : ps_b_full
+    if (!rst_ni)
+      b_full_q <= 0;
+    else if (b_fill || b_drain)
+      b_full_q <= b_fill;
+  end
+
+  // Fill the A register when the A or B register is empty. Drain the A register
+  // whenever it is full and being filled.
+  assign a_fill = valid_i && ready_o;
+  assign a_drain = a_full_q && !b_full_q;
+
+  // Fill the B register whenever the A register is drained, but the downstream
+  // circuit is not ready. Drain the B register whenever it is full and the
+  // downstream circuit is ready.
+  assign b_fill = a_drain && !ready_i;
+  assign b_drain = b_full_q && ready_i;
+
+  // We can accept input as long as register B is not full.
+  assign ready_o = !a_full_q || !b_full_q;
+
+  // The unit provides output as long as one of the registers is filled.
+  assign valid_o = a_full_q | b_full_q;
+
+  // We empty the spill register before the slice register.
+  assign data_o = b_full_q ? b_data_q : a_data_q;
+
+endmodule
diff --git a/vendor/pulp-platform/fpnew/src/common_cells/src/sram.sv b/vendor/pulp-platform/fpnew/src/common_cells/src/sram.sv
new file mode 100644
index 0000000000..fca1372bfe
--- /dev/null
+++ b/vendor/pulp-platform/fpnew/src/common_cells/src/sram.sv
@@ -0,0 +1,46 @@
+// Copyright 2017, 2018 ETH Zurich and University of Bologna.
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the "License"); you may not use this file except in
+// compliance with the License.  You may obtain a copy of the License at
+// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+//
+// Date: 13.10.2017
+// Description: SRAM Behavioral Model
+
+module sram #(
+    int unsigned DATA_WIDTH = 64,
+    int unsigned NUM_WORDS  = 1024
+)(
+   input  logic                          clk_i,
+
+   input  logic                          req_i,
+   input  logic                          we_i,
+   input  logic [$clog2(NUM_WORDS)-1:0]  addr_i,
+   input  logic [DATA_WIDTH-1:0]         wdata_i,
+   input  logic [DATA_WIDTH-1:0]         be_i,
+   output logic [DATA_WIDTH-1:0]         rdata_o
+);
+    localparam ADDR_WIDTH = $clog2(NUM_WORDS);
+
+    logic [DATA_WIDTH-1:0] ram [NUM_WORDS-1:0];
+    logic [ADDR_WIDTH-1:0] raddr_q;
+
+    // 1. randomize array
+    // 2. randomize output when no request is active
+    always_ff @(posedge clk_i) begin
+        if (req_i) begin
+            if (!we_i)
+                raddr_q <= addr_i;
+            else
+            for (int i = 0; i < DATA_WIDTH; i++)
+                if (be_i[i]) ram[addr_i][i] <= wdata_i[i];
+        end
+    end
+
+    assign rdata_o = ram[raddr_q];
+
+endmodule
diff --git a/vendor/pulp-platform/fpnew/src/common_cells/src/stream_arbiter.sv b/vendor/pulp-platform/fpnew/src/common_cells/src/stream_arbiter.sv
new file mode 100644
index 0000000000..c8ca2a8769
--- /dev/null
+++ b/vendor/pulp-platform/fpnew/src/common_cells/src/stream_arbiter.sv
@@ -0,0 +1,49 @@
+// Copyright 2018 ETH Zurich and University of Bologna.
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the "License"); you may not use this file except in
+// compliance with the License. You may obtain a copy of the License at
+// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+
+// Stream arbiter: Arbitrates a parametrizable number of input streams (i.e., valid-ready
+// handshaking with dependency rules as in AXI4) to a single output stream.  Once `oup_valid_o` is
+// asserted, `oup_data_o` remains invariant until the output handshake has occurred.  The
+// arbitration scheme is round-robin with "look ahead", see the `rrarbiter` for details.
+
+module stream_arbiter #(
+    parameter type      DATA_T = logic,   // Vivado requires a default value for type parameters.
+    parameter integer   N_INP = -1,       // Synopsys DC requires a default value for parameters.
+    parameter           ARBITER = "rr"    // "rr" or "prio"
+) (
+    input  logic              clk_i,
+    input  logic              rst_ni,
+
+    input  DATA_T [N_INP-1:0] inp_data_i,
+    input  logic  [N_INP-1:0] inp_valid_i,
+    output logic  [N_INP-1:0] inp_ready_o,
+
+    output DATA_T             oup_data_o,
+    output logic              oup_valid_o,
+    input  logic              oup_ready_i
+);
+
+  stream_arbiter_flushable #(
+    .DATA_T   (DATA_T),
+    .N_INP    (N_INP),
+    .ARBITER  (ARBITER)
+  ) i_arb (
+    .clk_i        (clk_i),
+    .rst_ni       (rst_ni),
+    .flush_i      (1'b0),
+    .inp_data_i   (inp_data_i),
+    .inp_valid_i  (inp_valid_i),
+    .inp_ready_o  (inp_ready_o),
+    .oup_data_o   (oup_data_o),
+    .oup_valid_o  (oup_valid_o),
+    .oup_ready_i  (oup_ready_i)
+  );
+
+endmodule
diff --git a/vendor/pulp-platform/fpnew/src/common_cells/src/stream_arbiter_flushable.sv b/vendor/pulp-platform/fpnew/src/common_cells/src/stream_arbiter_flushable.sv
new file mode 100644
index 0000000000..fd1411732a
--- /dev/null
+++ b/vendor/pulp-platform/fpnew/src/common_cells/src/stream_arbiter_flushable.sv
@@ -0,0 +1,80 @@
+// Copyright 2018 ETH Zurich and University of Bologna.
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the "License"); you may not use this file except in
+// compliance with the License. You may obtain a copy of the License at
+// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+
+// Stream arbiter: Arbitrates a parametrizable number of input streams (i.e., valid-ready
+// handshaking with dependency rules as in AXI4) to a single output stream.  Once `oup_valid_o` is
+// asserted, `oup_data_o` remains invariant until the output handshake has occurred.  The
+// arbitration scheme is fair round-robin tree, see `rr_arb_tree` for details.
+
+module stream_arbiter_flushable #(
+    parameter type      DATA_T = logic,   // Vivado requires a default value for type parameters.
+    parameter integer   N_INP = -1,       // Synopsys DC requires a default value for parameters.
+    parameter           ARBITER = "rr"    // "rr" or "prio"
+) (
+    input  logic              clk_i,
+    input  logic              rst_ni,
+    input  logic              flush_i,
+
+    input  DATA_T [N_INP-1:0] inp_data_i,
+    input  logic  [N_INP-1:0] inp_valid_i,
+    output logic  [N_INP-1:0] inp_ready_o,
+
+    output DATA_T             oup_data_o,
+    output logic              oup_valid_o,
+    input  logic              oup_ready_i
+);
+
+  if (ARBITER == "rr") begin : gen_rr_arb
+    rr_arb_tree #(
+      .NumIn      (N_INP),
+      .DataType   (DATA_T),
+      .ExtPrio    (1'b0),
+      .AxiVldRdy  (1'b1),
+      .LockIn     (1'b1)
+    ) i_arbiter (
+      .clk_i,
+      .rst_ni,
+      .flush_i,
+      .rr_i   ('0),
+      .req_i  (inp_valid_i),
+      .gnt_o  (inp_ready_o),
+      .data_i (inp_data_i),
+      .gnt_i  (oup_ready_i),
+      .req_o  (oup_valid_o),
+      .data_o (oup_data_o),
+      .idx_o  ()
+    );
+
+  end else if (ARBITER == "prio") begin : gen_prio_arb
+    rr_arb_tree #(
+      .NumIn      (N_INP),
+      .DataType   (DATA_T),
+      .ExtPrio    (1'b1),
+      .AxiVldRdy  (1'b1),
+      .LockIn     (1'b1)
+    ) i_arbiter (
+      .clk_i,
+      .rst_ni,
+      .flush_i,
+      .rr_i   ('0),
+      .req_i  (inp_valid_i),
+      .gnt_o  (inp_ready_o),
+      .data_i (inp_data_i),
+      .gnt_i  (oup_ready_i),
+      .req_o  (oup_valid_o),
+      .data_o (oup_data_o),
+      .idx_o  ()
+    );
+
+  end else begin : gen_arb_error
+    $fatal(1, "Invalid value for parameter 'ARBITER'!");
+  end
+
+endmodule
diff --git a/vendor/pulp-platform/fpnew/src/common_cells/src/stream_delay.sv b/vendor/pulp-platform/fpnew/src/common_cells/src/stream_delay.sv
new file mode 100644
index 0000000000..e0b6b01fb2
--- /dev/null
+++ b/vendor/pulp-platform/fpnew/src/common_cells/src/stream_delay.sv
@@ -0,0 +1,132 @@
+// Copyright 2018 ETH Zurich and University of Bologna.
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the "License"); you may not use this file except in
+// compliance with the License.  You may obtain a copy of the License at
+// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+
+// Author: Florian Zaruba, zarubaf@iis.ee.ethz.ch
+// Description: Delay (or randomize) AXI-like handshaking
+
+module stream_delay #(
+    parameter bit   StallRandom = 0,
+    parameter int   FixedDelay  = 1,
+    parameter type  payload_t  = logic
+)(
+    input  logic     clk_i,
+    input  logic     rst_ni,
+
+    input  payload_t payload_i,
+    output logic     ready_o,
+    input  logic     valid_i,
+
+    output payload_t payload_o,
+    input  logic     ready_i,
+    output logic     valid_o
+);
+
+    if (FixedDelay == 0 && !StallRandom) begin : pass_through
+        assign ready_o = ready_i;
+        assign valid_o = valid_i;
+        assign payload_o = payload_i;
+    end else begin
+
+        localparam COUNTER_BITS = 4;
+
+        typedef enum logic [1:0] {
+            Idle, Valid, Ready
+        } state_e;
+
+        state_e state_d, state_q;
+
+        logic       load;
+        logic [3:0] count_out;
+        logic       en;
+
+        logic [COUNTER_BITS-1:0] counter_load;
+
+        assign payload_o = payload_i;
+
+        always_comb begin
+            state_d = state_q;
+            valid_o = 1'b0;
+            ready_o = 1'b0;
+            load    = 1'b0;
+            en      = 1'b0;
+
+            unique case (state_q)
+                Idle: begin
+                    if (valid_i) begin
+                        load = 1'b1;
+                        state_d = Valid;
+                        // Just one cycle delay
+                        if (FixedDelay == 1 || (StallRandom && counter_load == 1)) begin
+                            state_d = Ready;
+                        end
+
+                        if (StallRandom && counter_load == 0) begin
+                            valid_o = 1'b1;
+                            ready_o = ready_i;
+                            if (ready_i) state_d = Idle;
+                            else state_d = Ready;
+                        end
+                    end
+                end
+                Valid: begin
+                    en = 1'b1;
+                    if (count_out == 0) begin
+                        state_d = Ready;
+                    end
+                end
+
+                Ready: begin
+                    valid_o = 1'b1;
+                    ready_o = ready_i;
+                    if (ready_i) state_d = Idle;
+                end
+                default : /* default */;
+            endcase
+
+        end
+
+        if (StallRandom) begin : random_stall
+            lfsr_16bit #(
+              .WIDTH ( 16 )
+            ) i_lfsr_16bit (
+                .clk_i          ( clk_i        ),
+                .rst_ni         ( rst_ni       ),
+                .en_i           ( load         ),
+                .refill_way_oh  (              ),
+                .refill_way_bin ( counter_load )
+            );
+        end else begin
+            assign counter_load = FixedDelay;
+        end
+
+        counter #(
+            .WIDTH      ( COUNTER_BITS )
+        ) i_counter (
+            .clk_i      ( clk_i        ),
+            .rst_ni     ( rst_ni       ),
+            .clear_i    ( 1'b0         ),
+            .en_i       ( en           ),
+            .load_i     ( load         ),
+            .down_i     ( 1'b1         ),
+            .d_i        ( counter_load ),
+            .q_o        ( count_out    ),
+            .overflow_o (              )
+        );
+
+        always_ff @(posedge clk_i or negedge rst_ni) begin
+            if (~rst_ni) begin
+                state_q <= Idle;
+            end else begin
+                state_q <= state_d;
+            end
+        end
+    end
+
+endmodule
diff --git a/vendor/pulp-platform/fpnew/src/common_cells/src/stream_demux.sv b/vendor/pulp-platform/fpnew/src/common_cells/src/stream_demux.sv
new file mode 100644
index 0000000000..0d2fed2700
--- /dev/null
+++ b/vendor/pulp-platform/fpnew/src/common_cells/src/stream_demux.sv
@@ -0,0 +1,37 @@
+// Copyright 2018 ETH Zurich and University of Bologna.
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the "License"); you may not use this file except in
+// compliance with the License. You may obtain a copy of the License at
+// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+
+/// Stream demultiplexer: Connects the input stream (valid-ready) handshake to one of `N_OUP` output
+/// stream handshakes.
+///
+/// This module has no data ports because stream data does not need to be demultiplexed: the data of
+/// the input stream can just be applied at all output streams.
+
+module stream_demux #(
+  parameter integer N_OUP = 1,
+  /// Dependent parameters, DO NOT OVERRIDE!
+  localparam integer LOG_N_OUP = $clog2(N_OUP)
+) (
+  input  logic                  inp_valid_i,
+  output logic                  inp_ready_o,
+
+  input  logic  [LOG_N_OUP-1:0] oup_sel_i,
+
+  output logic  [N_OUP-1:0]     oup_valid_o,
+  input  logic  [N_OUP-1:0]     oup_ready_i
+);
+
+  always_comb begin
+    oup_valid_o = '0;
+    oup_valid_o[oup_sel_i] = inp_valid_i;
+  end
+  assign inp_ready_o = oup_ready_i[oup_sel_i];
+
+endmodule
diff --git a/vendor/pulp-platform/fpnew/src/common_cells/src/stream_filter.sv b/vendor/pulp-platform/fpnew/src/common_cells/src/stream_filter.sv
new file mode 100644
index 0000000000..52a5835e77
--- /dev/null
+++ b/vendor/pulp-platform/fpnew/src/common_cells/src/stream_filter.sv
@@ -0,0 +1,26 @@
+// Copyright 2018 ETH Zurich and University of Bologna.
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the "License"); you may not use this file except in
+// compliance with the License. You may obtain a copy of the License at
+// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+
+// Stream filter: If `drop_i` is `1`, signal `ready` to the upstream regardless of the downstream,
+// and do not propagate `valid` downstream.  Otherwise, connect upstream to downstream.
+module stream_filter (
+    input  logic valid_i,
+    output logic ready_o,
+
+    input  logic drop_i,
+
+    output logic valid_o,
+    input  logic ready_i
+);
+
+    assign valid_o = drop_i ? 1'b0 : valid_i;
+    assign ready_o = drop_i ? 1'b1 : ready_i;
+
+endmodule
diff --git a/vendor/pulp-platform/fpnew/src/common_cells/src/stream_fork.sv b/vendor/pulp-platform/fpnew/src/common_cells/src/stream_fork.sv
new file mode 100644
index 0000000000..aebb0f5d2b
--- /dev/null
+++ b/vendor/pulp-platform/fpnew/src/common_cells/src/stream_fork.sv
@@ -0,0 +1,133 @@
+// Copyright 2018 ETH Zurich and University of Bologna.
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the "License"); you may not use this file except in
+// compliance with the License. You may obtain a copy of the License at
+// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+
+// Stream fork: Connects the input stream (ready-valid) handshake to *all* of `N_OUP` output stream
+// handshakes. For each input stream handshake, every output stream handshakes exactly once. The
+// input stream only handshakes when all output streams have handshaked, but the output streams do
+// not have to handshake simultaneously.
+//
+// This module has no data ports because stream data does not need to be forked: the data of the
+// input stream can just be applied at all output streams.
+
+module stream_fork #(
+    parameter int unsigned N_OUP = 0    // Synopsys DC requires a default value for parameters.
+) (
+    input  logic                clk_i,
+    input  logic                rst_ni,
+    input  logic                valid_i,
+    output logic                ready_o,
+    output logic [N_OUP-1:0]    valid_o,
+    input  logic [N_OUP-1:0]    ready_i
+);
+
+    typedef enum logic {READY, WAIT} state_t;
+
+    logic [N_OUP-1:0]   oup_ready,
+                        all_ones;
+
+    state_t inp_state_d, inp_state_q;
+
+    // Input control FSM
+    always_comb begin
+        // ready_o     = 1'b0;
+        inp_state_d = inp_state_q;
+
+        unique case (inp_state_q)
+            READY: begin
+                if (valid_i) begin
+                    if (valid_o == all_ones && ready_i == all_ones) begin
+                        // If handshake on all outputs, handshake on input.
+                        ready_o = 1'b1;
+                    end else begin
+                        ready_o = 1'b0;
+                        // Otherwise, wait for inputs that did not handshake yet.
+                        inp_state_d = WAIT;
+                    end
+                end else begin
+                    ready_o = 1'b0;
+                end
+            end
+            WAIT: begin
+                if (valid_i && oup_ready == all_ones) begin
+                    ready_o = 1'b1;
+                    inp_state_d = READY;
+                end else begin
+                    ready_o = 1'b0;
+                end
+            end
+            default: begin
+                inp_state_d = READY;
+                ready_o = 1'b0;
+            end
+        endcase
+    end
+
+    always_ff @(posedge clk_i, negedge rst_ni) begin
+        if (!rst_ni) begin
+            inp_state_q <= READY;
+        end else begin
+            inp_state_q <= inp_state_d;
+        end
+    end
+
+    // Output control FSM
+    for (genvar i = 0; i < N_OUP; i++) begin: gen_oup_state
+        state_t oup_state_d, oup_state_q;
+
+        always_comb begin
+            oup_ready[i]    = 1'b1;
+            valid_o[i]      = 1'b0;
+            oup_state_d     = oup_state_q;
+
+            unique case (oup_state_q)
+                READY: begin
+                    if (valid_i) begin
+                        valid_o[i] = 1'b1;
+                        if (ready_i[i]) begin   // Output handshake
+                            if (!ready_o) begin     // No input handshake yet
+                                oup_state_d = WAIT;
+                            end
+                        end else begin          // No output handshake
+                            oup_ready[i] = 1'b0;
+                        end
+                    end
+                end
+                WAIT: begin
+                    if (valid_i && ready_o) begin   // Input handshake
+                        oup_state_d = READY;
+                    end
+                end
+                default: begin
+                    oup_state_d = READY;
+                end
+            endcase
+        end
+
+        always_ff @(posedge clk_i, negedge rst_ni) begin
+            if (!rst_ni) begin
+                oup_state_q <= READY;
+            end else begin
+                oup_state_q <= oup_state_d;
+            end
+        end
+    end
+
+    assign all_ones = '1;   // Synthesis fix for Vivado, which does not correctly compute the width
+                            // of the '1 literal when assigned to a port of parametrized width.
+
+// pragma translate_off
+`ifndef VERILATOR
+    initial begin: p_assertions
+        assert (N_OUP >= 1) else $fatal("Number of outputs must be at least 1!");
+    end
+`endif
+// pragma translate_on
+
+endmodule
diff --git a/vendor/pulp-platform/fpnew/src/common_cells/src/stream_mux.sv b/vendor/pulp-platform/fpnew/src/common_cells/src/stream_mux.sv
new file mode 100644
index 0000000000..fe276075c0
--- /dev/null
+++ b/vendor/pulp-platform/fpnew/src/common_cells/src/stream_mux.sv
@@ -0,0 +1,46 @@
+// Copyright 2018 ETH Zurich and University of Bologna.
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the "License"); you may not use this file except in
+// compliance with the License. You may obtain a copy of the License at
+// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+
+/// Stream multiplexer: connects the output to one of `N_INP` data streams with valid-ready
+/// handshaking.
+
+module stream_mux #(
+  parameter type DATA_T = logic,  // Vivado requires a default value for type parameters.
+  parameter integer N_INP = 0,    // Synopsys DC requires a default value for value parameters.
+  /// Dependent parameters, DO NOT OVERRIDE!
+  localparam integer LOG_N_INP = $clog2(N_INP)
+) (
+  input  DATA_T [N_INP-1:0]     inp_data_i,
+  input  logic  [N_INP-1:0]     inp_valid_i,
+  output logic  [N_INP-1:0]     inp_ready_o,
+
+  input  logic  [LOG_N_INP-1:0] inp_sel_i,
+
+  output DATA_T                 oup_data_o,
+  output logic                  oup_valid_o,
+  input  logic                  oup_ready_i
+);
+
+  always_comb begin
+    inp_ready_o = '0;
+    inp_ready_o[inp_sel_i] = oup_ready_i;
+  end
+  assign oup_data_o   = inp_data_i[inp_sel_i];
+  assign oup_valid_o  = inp_valid_i[inp_sel_i];
+
+// pragma translate_off
+`ifndef VERILATOR
+  initial begin: p_assertions
+    assert (N_INP >= 1) else $fatal ("The number of inputs must be at least 1!");
+  end
+`endif
+// pragma translate_on
+
+endmodule
diff --git a/vendor/pulp-platform/fpnew/src/common_cells/src/stream_register.sv b/vendor/pulp-platform/fpnew/src/common_cells/src/stream_register.sv
new file mode 100644
index 0000000000..e83228b361
--- /dev/null
+++ b/vendor/pulp-platform/fpnew/src/common_cells/src/stream_register.sv
@@ -0,0 +1,57 @@
+// Copyright 2018 ETH Zurich and University of Bologna.
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the "License"); you may not use this file except in
+// compliance with the License. You may obtain a copy of the License at
+// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+
+/// Register with a simple stream-like ready/valid handshake.
+/// This register does not cut combinatorial paths on all control signals; if you need a complete
+/// cut, use the `spill_register`.
+module stream_register #(
+    parameter type T = logic  // Vivado requires a default value for type parameters.
+) (
+    input  logic    clk_i,          // Clock
+    input  logic    rst_ni,         // Asynchronous active-low reset
+    input  logic    clr_i,          // Synchronous clear
+    input  logic    testmode_i,     // Test mode to bypass clock gating
+    // Input port
+    input  logic    valid_i,
+    output logic    ready_o,
+    input  T        data_i,
+    // Output port
+    output logic    valid_o,
+    input  logic    ready_i,
+    output T        data_o
+);
+
+    logic   fifo_empty,
+            fifo_full;
+
+    fifo_v2 #(
+        .FALL_THROUGH   (1'b0),
+        .DATA_WIDTH     ($size(T)),
+        .DEPTH          (1),
+        .dtype          (T)
+    ) i_fifo (
+        .clk_i          (clk_i),
+        .rst_ni         (rst_ni),
+        .flush_i        (clr_i),
+        .testmode_i     (testmode_i),
+        .full_o         (fifo_full),
+        .empty_o        (fifo_empty),
+        .alm_full_o     ( ),
+        .alm_empty_o    ( ),
+        .data_i         (data_i),
+        .push_i         (valid_i & ~fifo_full),
+        .data_o         (data_o),
+        .pop_i          (ready_i & ~fifo_empty)
+    );
+
+    assign ready_o = ~fifo_full;
+    assign valid_o = ~fifo_empty;
+
+endmodule
diff --git a/vendor/pulp-platform/fpnew/src/common_cells/src/sync.sv b/vendor/pulp-platform/fpnew/src/common_cells/src/sync.sv
new file mode 100644
index 0000000000..ff2ef5b5fc
--- /dev/null
+++ b/vendor/pulp-platform/fpnew/src/common_cells/src/sync.sv
@@ -0,0 +1,34 @@
+// Copyright 2018 ETH Zurich and University of Bologna.
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the "License"); you may not use this file except in
+// compliance with the License. You may obtain a copy of the License at
+// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+
+// Antonio Pullini <pullinia@iis.ee.ethz.ch>
+
+module sync #(
+    parameter int unsigned STAGES = 2
+) (
+    input  logic clk_i,
+    input  logic rst_ni,
+    input  logic serial_i,
+    output logic serial_o
+);
+
+   logic [STAGES-1:0] reg_q;
+
+    always_ff @(posedge clk_i, negedge rst_ni) begin
+        if (!rst_ni) begin
+            reg_q <= 'h0;
+        end else begin
+            reg_q <= {reg_q[STAGES-2:0], serial_i};
+        end
+    end
+
+    assign serial_o = reg_q[STAGES-1];
+
+endmodule
diff --git a/vendor/pulp-platform/fpnew/src/common_cells/src/sync_wedge.sv b/vendor/pulp-platform/fpnew/src/common_cells/src/sync_wedge.sv
new file mode 100644
index 0000000000..58f1279808
--- /dev/null
+++ b/vendor/pulp-platform/fpnew/src/common_cells/src/sync_wedge.sv
@@ -0,0 +1,56 @@
+// Copyright 2018 ETH Zurich and University of Bologna.
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the "License"); you may not use this file except in
+// compliance with the License. You may obtain a copy of the License at
+// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+
+// Antonio Pullini <pullinia@iis.ee.ethz.ch>
+
+module sync_wedge #(
+    parameter int unsigned STAGES = 2
+) (
+    input  logic clk_i,
+    input  logic rst_ni,
+    input  logic en_i,
+    input  logic serial_i,
+    output logic r_edge_o,
+    output logic f_edge_o,
+    output logic serial_o
+);
+    logic clk;
+    logic serial, serial_q;
+
+    assign serial_o =  serial_q;
+    assign f_edge_o = (~serial) & serial_q;
+    assign r_edge_o =  serial & (~serial_q);
+
+    sync #(
+        .STAGES (STAGES)
+    ) i_sync (
+        .clk_i,
+        .rst_ni,
+        .serial_i,
+        .serial_o ( serial )
+    );
+
+    pulp_clock_gating i_pulp_clock_gating (
+        .clk_i,
+        .en_i,
+        .test_en_i ( 1'b0 ),
+        .clk_o     ( clk  )
+    );
+
+    always_ff @(posedge clk, negedge rst_ni) begin
+        if (!rst_ni) begin
+            serial_q <= 1'b0;
+        end else begin
+            if (en_i) begin
+                serial_q <= serial;
+            end
+        end
+    end
+endmodule
diff --git a/vendor/pulp-platform/fpnew/src/common_cells/src/unread.sv b/vendor/pulp-platform/fpnew/src/common_cells/src/unread.sv
new file mode 100644
index 0000000000..80e7356237
--- /dev/null
+++ b/vendor/pulp-platform/fpnew/src/common_cells/src/unread.sv
@@ -0,0 +1,21 @@
+// Copyright 2018 ETH Zurich and University of Bologna.
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the "License"); you may not use this file except in
+// compliance with the License.  You may obtain a copy of the License at
+// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+//
+// Author: Florian Zaruba, ETH Zurich
+// Date: 29.10.2018
+// Description: Dummy circuit to mitigate Open Pin warnings
+
+/* verilator lint_off UNUSED */
+module unread (
+    input logic d_i
+);
+
+endmodule
+/* verilator lint_on UNUSED */
diff --git a/vendor/pulp-platform/fpnew/src/fpnew_cast_multi.sv b/vendor/pulp-platform/fpnew/src/fpnew_cast_multi.sv
new file mode 100644
index 0000000000..9d54c79ed3
--- /dev/null
+++ b/vendor/pulp-platform/fpnew/src/fpnew_cast_multi.sv
@@ -0,0 +1,760 @@
+// Copyright 2019 ETH Zurich and University of Bologna.
+//
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the "License"); you may not use this file except in
+// compliance with the License. You may obtain a copy of the License at
+// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+
+// Author: Stefan Mach <smach@iis.ee.ethz.ch>
+
+`include "common_cells/registers.svh"
+
+module fpnew_cast_multi #(
+  parameter fpnew_pkg::fmt_logic_t   FpFmtConfig  = '1,
+  parameter fpnew_pkg::ifmt_logic_t  IntFmtConfig = '1,
+  // FPU configuration
+  parameter int unsigned             NumPipeRegs = 0,
+  parameter fpnew_pkg::pipe_config_t PipeConfig  = fpnew_pkg::BEFORE,
+  parameter type                     TagType     = logic,
+  parameter type                     AuxType     = logic,
+  // Do not change
+  localparam int unsigned WIDTH = fpnew_pkg::maximum(fpnew_pkg::max_fp_width(FpFmtConfig),
+                                                     fpnew_pkg::max_int_width(IntFmtConfig)),
+  localparam int unsigned NUM_FORMATS = fpnew_pkg::NUM_FP_FORMATS
+) (
+  input  logic                   clk_i,
+  input  logic                   rst_ni,
+  // Input signals
+  input  logic [WIDTH-1:0]       operands_i, // 1 operand
+  input  logic [NUM_FORMATS-1:0] is_boxed_i, // 1 operand
+  input  fpnew_pkg::roundmode_e  rnd_mode_i,
+  input  fpnew_pkg::operation_e  op_i,
+  input  logic                   op_mod_i,
+  input  fpnew_pkg::fp_format_e  src_fmt_i,
+  input  fpnew_pkg::fp_format_e  dst_fmt_i,
+  input  fpnew_pkg::int_format_e int_fmt_i,
+  input  TagType                 tag_i,
+  input  AuxType                 aux_i,
+  // Input Handshake
+  input  logic                   in_valid_i,
+  output logic                   in_ready_o,
+  input  logic                   flush_i,
+  // Output signals
+  output logic [WIDTH-1:0]       result_o,
+  output fpnew_pkg::status_t     status_o,
+  output logic                   extension_bit_o,
+  output TagType                 tag_o,
+  output AuxType                 aux_o,
+  // Output handshake
+  output logic                   out_valid_o,
+  input  logic                   out_ready_i,
+  // Indication of valid data in flight
+  output logic                   busy_o
+);
+
+  // ----------
+  // Constants
+  // ----------
+  localparam int unsigned NUM_INT_FORMATS = fpnew_pkg::NUM_INT_FORMATS;
+  localparam int unsigned MAX_INT_WIDTH   = fpnew_pkg::max_int_width(IntFmtConfig);
+
+  localparam fpnew_pkg::fp_encoding_t SUPER_FORMAT = fpnew_pkg::super_format(FpFmtConfig);
+
+  localparam int unsigned SUPER_EXP_BITS = SUPER_FORMAT.exp_bits;
+  localparam int unsigned SUPER_MAN_BITS = SUPER_FORMAT.man_bits;
+  localparam int unsigned SUPER_BIAS     = 2**(SUPER_EXP_BITS - 1) - 1;
+
+  // The internal mantissa includes normal bit or an entire integer
+  localparam int unsigned INT_MAN_WIDTH = fpnew_pkg::maximum(SUPER_MAN_BITS + 1, MAX_INT_WIDTH);
+  // If needed, there will be a LZC for renormalization
+  localparam int unsigned LZC_RESULT_WIDTH = $clog2(INT_MAN_WIDTH);
+  // The internal exponent must be able to represent the smallest denormal input value as signed
+  // or the number of bits in an integer
+  localparam int unsigned INT_EXP_WIDTH = fpnew_pkg::maximum($clog2(MAX_INT_WIDTH),
+      fpnew_pkg::maximum(SUPER_EXP_BITS, $clog2(SUPER_BIAS + SUPER_MAN_BITS))) + 1;
+  // Pipelines
+  localparam NUM_INP_REGS = PipeConfig == fpnew_pkg::BEFORE
+                            ? NumPipeRegs
+                            : (PipeConfig == fpnew_pkg::DISTRIBUTED
+                               ? ((NumPipeRegs + 1) / 3) // Second to get distributed regs
+                               : 0); // no regs here otherwise
+  localparam NUM_MID_REGS = PipeConfig == fpnew_pkg::INSIDE
+                          ? NumPipeRegs
+                          : (PipeConfig == fpnew_pkg::DISTRIBUTED
+                             ? ((NumPipeRegs + 2) / 3) // First to get distributed regs
+                             : 0); // no regs here otherwise
+  localparam NUM_OUT_REGS = PipeConfig == fpnew_pkg::AFTER
+                            ? NumPipeRegs
+                            : (PipeConfig == fpnew_pkg::DISTRIBUTED
+                               ? (NumPipeRegs / 3) // Last to get distributed regs
+                               : 0); // no regs here otherwise
+
+  // ---------------
+  // Input pipeline
+  // ---------------
+  // Selected pipeline output signals as non-arrays
+  logic [WIDTH-1:0]       operands_q;
+  logic [NUM_FORMATS-1:0] is_boxed_q;
+  logic                   op_mod_q;
+  fpnew_pkg::fp_format_e  src_fmt_q;
+  fpnew_pkg::fp_format_e  dst_fmt_q;
+  fpnew_pkg::int_format_e int_fmt_q;
+
+  // Input pipeline signals, index i holds signal after i register stages
+  logic                   [0:NUM_INP_REGS][WIDTH-1:0]       inp_pipe_operands_q;
+  logic                   [0:NUM_INP_REGS][NUM_FORMATS-1:0] inp_pipe_is_boxed_q;
+  fpnew_pkg::roundmode_e  [0:NUM_INP_REGS]                  inp_pipe_rnd_mode_q;
+  fpnew_pkg::operation_e  [0:NUM_INP_REGS]                  inp_pipe_op_q;
+  logic                   [0:NUM_INP_REGS]                  inp_pipe_op_mod_q;
+  fpnew_pkg::fp_format_e  [0:NUM_INP_REGS]                  inp_pipe_src_fmt_q;
+  fpnew_pkg::fp_format_e  [0:NUM_INP_REGS]                  inp_pipe_dst_fmt_q;
+  fpnew_pkg::int_format_e [0:NUM_INP_REGS]                  inp_pipe_int_fmt_q;
+  TagType                 [0:NUM_INP_REGS]                  inp_pipe_tag_q;
+  AuxType                 [0:NUM_INP_REGS]                  inp_pipe_aux_q;
+  logic                   [0:NUM_INP_REGS]                  inp_pipe_valid_q;
+  // Ready signal is combinatorial for all stages
+  logic [0:NUM_INP_REGS] inp_pipe_ready;
+
+  // Input stage: First element of pipeline is taken from inputs
+  assign inp_pipe_operands_q[0] = operands_i;
+  assign inp_pipe_is_boxed_q[0] = is_boxed_i;
+  assign inp_pipe_rnd_mode_q[0] = rnd_mode_i;
+  assign inp_pipe_op_q[0]       = op_i;
+  assign inp_pipe_op_mod_q[0]   = op_mod_i;
+  assign inp_pipe_src_fmt_q[0]  = src_fmt_i;
+  assign inp_pipe_dst_fmt_q[0]  = dst_fmt_i;
+  assign inp_pipe_int_fmt_q[0]  = int_fmt_i;
+  assign inp_pipe_tag_q[0]      = tag_i;
+  assign inp_pipe_aux_q[0]      = aux_i;
+  assign inp_pipe_valid_q[0]    = in_valid_i;
+  // Input stage: Propagate pipeline ready signal to updtream circuitry
+  assign in_ready_o = inp_pipe_ready[0];
+  // Generate the register stages
+  for (genvar i = 0; i < NUM_INP_REGS; i++) begin : gen_input_pipeline
+    // Internal register enable for this stage
+    logic reg_ena;
+    // Determine the ready signal of the current stage - advance the pipeline:
+    // 1. if the next stage is ready for our data
+    // 2. if the next stage only holds a bubble (not valid) -> we can pop it
+    assign inp_pipe_ready[i] = inp_pipe_ready[i+1] | ~inp_pipe_valid_q[i+1];
+    // Valid: enabled by ready signal, synchronous clear with the flush signal
+    `FFLARNC(inp_pipe_valid_q[i+1], inp_pipe_valid_q[i], inp_pipe_ready[i], flush_i, 1'b0, clk_i, rst_ni)
+    // Enable register if pipleine ready and a valid data item is present
+    assign reg_ena = inp_pipe_ready[i] & inp_pipe_valid_q[i];
+    // Generate the pipeline registers within the stages, use enable-registers
+    `FFL(inp_pipe_operands_q[i+1], inp_pipe_operands_q[i], reg_ena, '0)
+    `FFL(inp_pipe_is_boxed_q[i+1], inp_pipe_is_boxed_q[i], reg_ena, '0)
+    `FFL(inp_pipe_rnd_mode_q[i+1], inp_pipe_rnd_mode_q[i], reg_ena, fpnew_pkg::RNE)
+    `FFL(inp_pipe_op_q[i+1],       inp_pipe_op_q[i],       reg_ena, fpnew_pkg::FMADD)
+    `FFL(inp_pipe_op_mod_q[i+1],   inp_pipe_op_mod_q[i],   reg_ena, '0)
+    `FFL(inp_pipe_src_fmt_q[i+1],  inp_pipe_src_fmt_q[i],  reg_ena, fpnew_pkg::fp_format_e'(0))
+    `FFL(inp_pipe_dst_fmt_q[i+1],  inp_pipe_dst_fmt_q[i],  reg_ena, fpnew_pkg::fp_format_e'(0))
+    `FFL(inp_pipe_int_fmt_q[i+1],  inp_pipe_int_fmt_q[i],  reg_ena, fpnew_pkg::int_format_e'(0))
+    `FFL(inp_pipe_tag_q[i+1],      inp_pipe_tag_q[i],      reg_ena, TagType'('0))
+    `FFL(inp_pipe_aux_q[i+1],      inp_pipe_aux_q[i],      reg_ena, AuxType'('0))
+  end
+  // Output stage: assign selected pipe outputs to signals for later use
+  assign operands_q = inp_pipe_operands_q[NUM_INP_REGS];
+  assign is_boxed_q = inp_pipe_is_boxed_q[NUM_INP_REGS];
+  assign op_mod_q   = inp_pipe_op_mod_q[NUM_INP_REGS];
+  assign src_fmt_q  = inp_pipe_src_fmt_q[NUM_INP_REGS];
+  assign dst_fmt_q  = inp_pipe_dst_fmt_q[NUM_INP_REGS];
+  assign int_fmt_q  = inp_pipe_int_fmt_q[NUM_INP_REGS];
+
+  // -----------------
+  // Input processing
+  // -----------------
+  logic src_is_int, dst_is_int; // if 0, it's a float
+
+  assign src_is_int = (inp_pipe_op_q[NUM_INP_REGS] == fpnew_pkg::I2F);
+  assign dst_is_int = (inp_pipe_op_q[NUM_INP_REGS] == fpnew_pkg::F2I);
+
+  logic [INT_MAN_WIDTH-1:0] encoded_mant; // input mantissa with implicit bit
+
+  logic        [NUM_FORMATS-1:0]                    fmt_sign;
+  logic signed [NUM_FORMATS-1:0][INT_EXP_WIDTH-1:0] fmt_exponent;
+  logic        [NUM_FORMATS-1:0][INT_MAN_WIDTH-1:0] fmt_mantissa;
+  logic signed [NUM_FORMATS-1:0][INT_EXP_WIDTH-1:0] fmt_shift_compensation; // for LZC
+
+  fpnew_pkg::fp_info_t [NUM_FORMATS-1:0] info;
+
+  logic [NUM_INT_FORMATS-1:0][INT_MAN_WIDTH-1:0] ifmt_input_val;
+  logic                                          int_sign;
+  logic [INT_MAN_WIDTH-1:0]                      int_value, int_mantissa;
+
+  // FP Input initialization
+  for (genvar fmt = 0; fmt < int'(NUM_FORMATS); fmt++) begin : fmt_init_inputs
+    // Set up some constants
+    localparam int unsigned FP_WIDTH = fpnew_pkg::fp_width(fpnew_pkg::fp_format_e'(fmt));
+    localparam int unsigned EXP_BITS = fpnew_pkg::exp_bits(fpnew_pkg::fp_format_e'(fmt));
+    localparam int unsigned MAN_BITS = fpnew_pkg::man_bits(fpnew_pkg::fp_format_e'(fmt));
+
+    if (FpFmtConfig[fmt]) begin : active_format
+      // Classify input
+      fpnew_classifier #(
+        .FpFormat    ( fpnew_pkg::fp_format_e'(fmt) ),
+        .NumOperands ( 1                            )
+      ) i_fpnew_classifier (
+        .operands_i ( operands_q[FP_WIDTH-1:0] ),
+        .is_boxed_i ( is_boxed_q[fmt]          ),
+        .info_o     ( info[fmt]                )
+      );
+
+      assign fmt_sign[fmt]     = operands_q[FP_WIDTH-1];
+      assign fmt_exponent[fmt] = signed'({1'b0, operands_q[MAN_BITS+:EXP_BITS]});
+      assign fmt_mantissa[fmt] = {info[fmt].is_normal, operands_q[MAN_BITS-1:0]}; // zero pad
+      // Compensation for the difference in mantissa widths used for leading-zero count
+      assign fmt_shift_compensation[fmt] = signed'(INT_MAN_WIDTH - 1 - MAN_BITS);
+    end else begin : inactive_format
+      assign info[fmt]                   = '{default: fpnew_pkg::DONT_CARE}; // format disabled
+      assign fmt_sign[fmt]               = fpnew_pkg::DONT_CARE;             // format disabled
+      assign fmt_exponent[fmt]           = '{default: fpnew_pkg::DONT_CARE}; // format disabled
+      assign fmt_mantissa[fmt]           = '{default: fpnew_pkg::DONT_CARE}; // format disabled
+      assign fmt_shift_compensation[fmt] = '{default: fpnew_pkg::DONT_CARE}; // format disabled
+    end
+  end
+
+  // Sign-extend INT input
+  for (genvar ifmt = 0; ifmt < int'(NUM_INT_FORMATS); ifmt++) begin : gen_sign_extend_int
+    // Set up some constants
+    localparam int unsigned INT_WIDTH = fpnew_pkg::int_width(fpnew_pkg::int_format_e'(ifmt));
+
+    if (IntFmtConfig[ifmt]) begin : active_format // only active formats
+      always_comb begin : sign_ext_input
+        // sign-extend value only if it's signed
+        ifmt_input_val[ifmt]                = '{default: operands_q[INT_WIDTH-1] & ~op_mod_q};
+        ifmt_input_val[ifmt][INT_WIDTH-1:0] = operands_q[INT_WIDTH-1:0];
+      end
+    end else begin : inactive_format
+      assign ifmt_input_val[ifmt] = '{default: fpnew_pkg::DONT_CARE}; // format disabled
+    end
+  end
+
+  // Construct input mantissa from integer
+  assign int_value    = ifmt_input_val[int_fmt_q];
+  assign int_sign     = int_value[INT_MAN_WIDTH-1] & ~op_mod_q; // only signed ints are negative
+  assign int_mantissa = int_sign ? unsigned'(-int_value) : int_value; // get magnitude of negative
+
+  // select mantissa with source format
+  assign encoded_mant = src_is_int ? int_mantissa : fmt_mantissa[src_fmt_q];
+
+  // --------------
+  // Normalization
+  // --------------
+  logic signed [INT_EXP_WIDTH-1:0] src_bias;      // src format bias
+  logic signed [INT_EXP_WIDTH-1:0] src_exp;       // src format exponent (biased)
+  logic signed [INT_EXP_WIDTH-1:0] src_subnormal; // src is subnormal
+  logic signed [INT_EXP_WIDTH-1:0] src_offset;    // src offset within mantissa
+
+  assign src_bias      = signed'(fpnew_pkg::bias(src_fmt_q));
+  assign src_exp       = fmt_exponent[src_fmt_q];
+  assign src_subnormal = signed'({1'b0, info[src_fmt_q].is_subnormal});
+  assign src_offset    = fmt_shift_compensation[src_fmt_q];
+
+  logic                            input_sign;   // input sign
+  logic signed [INT_EXP_WIDTH-1:0] input_exp;    // unbiased true exponent
+  logic        [INT_MAN_WIDTH-1:0] input_mant;   // normalized input mantissa
+  logic                            mant_is_zero; // for integer zeroes
+
+  logic signed [INT_EXP_WIDTH-1:0] fp_input_exp;
+  logic signed [INT_EXP_WIDTH-1:0] int_input_exp;
+
+  // Input mantissa needs to be normalized
+  logic [LZC_RESULT_WIDTH-1:0] renorm_shamt;     // renormalization shift amount
+  logic [LZC_RESULT_WIDTH:0]   renorm_shamt_sgn; // signed form for calculations
+
+  // Leading-zero counter is needed for renormalization
+  lzc #(
+    .WIDTH ( INT_MAN_WIDTH ),
+    .MODE  ( 1             ) // MODE = 1 counts leading zeroes
+  ) i_lzc (
+    .in_i    ( encoded_mant ),
+    .cnt_o   ( renorm_shamt ),
+    .empty_o ( mant_is_zero )
+  );
+  assign renorm_shamt_sgn = signed'({1'b0, renorm_shamt});
+
+  // Get the sign from the proper source
+  assign input_sign = src_is_int ? int_sign : fmt_sign[src_fmt_q];
+  // Realign input mantissa, append zeroes if destination is wider
+  assign input_mant = encoded_mant << renorm_shamt;
+  // Unbias exponent and compensate for shift
+  assign fp_input_exp  = signed'(src_exp + src_subnormal - src_bias -
+                                 renorm_shamt_sgn + src_offset); // compensate for shift
+  assign int_input_exp = signed'(INT_MAN_WIDTH - 1 - renorm_shamt_sgn);
+
+  assign input_exp     = src_is_int ? int_input_exp : fp_input_exp;
+
+  logic signed [INT_EXP_WIDTH-1:0] destination_exp;  // re-biased exponent for destination
+
+  // Rebias the exponent
+  assign destination_exp = input_exp + signed'(fpnew_pkg::bias(dst_fmt_q));
+
+  // ---------------
+  // Internal pipeline
+  // ---------------
+  // Pipeline output signals as non-arrays
+  logic                            input_sign_q;
+  logic signed [INT_EXP_WIDTH-1:0] input_exp_q;
+  logic [INT_MAN_WIDTH-1:0]        input_mant_q;
+  logic signed [INT_EXP_WIDTH-1:0] destination_exp_q;
+  logic                            src_is_int_q;
+  logic                            dst_is_int_q;
+  fpnew_pkg::fp_info_t             info_q;
+  logic                            mant_is_zero_q;
+  logic                            op_mod_q2;
+  fpnew_pkg::roundmode_e           rnd_mode_q;
+  fpnew_pkg::fp_format_e           src_fmt_q2;
+  fpnew_pkg::fp_format_e           dst_fmt_q2;
+  fpnew_pkg::int_format_e          int_fmt_q2;
+  // Internal pipeline signals, index i holds signal after i register stages
+
+
+  logic                   [0:NUM_MID_REGS]                    mid_pipe_input_sign_q;
+  logic signed            [0:NUM_MID_REGS][INT_EXP_WIDTH-1:0] mid_pipe_input_exp_q;
+  logic                   [0:NUM_MID_REGS][INT_MAN_WIDTH-1:0] mid_pipe_input_mant_q;
+  logic signed            [0:NUM_MID_REGS][INT_EXP_WIDTH-1:0] mid_pipe_dest_exp_q;
+  logic                   [0:NUM_MID_REGS]                    mid_pipe_src_is_int_q;
+  logic                   [0:NUM_MID_REGS]                    mid_pipe_dst_is_int_q;
+  fpnew_pkg::fp_info_t    [0:NUM_MID_REGS]                    mid_pipe_info_q;
+  logic                   [0:NUM_MID_REGS]                    mid_pipe_mant_zero_q;
+  logic                   [0:NUM_MID_REGS]                    mid_pipe_op_mod_q;
+  fpnew_pkg::roundmode_e  [0:NUM_MID_REGS]                    mid_pipe_rnd_mode_q;
+  fpnew_pkg::fp_format_e  [0:NUM_MID_REGS]                    mid_pipe_src_fmt_q;
+  fpnew_pkg::fp_format_e  [0:NUM_MID_REGS]                    mid_pipe_dst_fmt_q;
+  fpnew_pkg::int_format_e [0:NUM_MID_REGS]                    mid_pipe_int_fmt_q;
+  TagType                 [0:NUM_MID_REGS]                    mid_pipe_tag_q;
+  AuxType                 [0:NUM_MID_REGS]                    mid_pipe_aux_q;
+  logic                   [0:NUM_MID_REGS]                    mid_pipe_valid_q;
+  // Ready signal is combinatorial for all stages
+  logic [0:NUM_MID_REGS] mid_pipe_ready;
+
+  // Input stage: First element of pipeline is taken from upstream logic
+  assign mid_pipe_input_sign_q[0] = input_sign;
+  assign mid_pipe_input_exp_q[0]  = input_exp;
+  assign mid_pipe_input_mant_q[0] = input_mant;
+  assign mid_pipe_dest_exp_q[0]   = destination_exp;
+  assign mid_pipe_src_is_int_q[0] = src_is_int;
+  assign mid_pipe_dst_is_int_q[0] = dst_is_int;
+  assign mid_pipe_info_q[0]       = info[src_fmt_q];
+  assign mid_pipe_mant_zero_q[0]  = mant_is_zero;
+  assign mid_pipe_op_mod_q[0]     = op_mod_q;
+  assign mid_pipe_rnd_mode_q[0]   = inp_pipe_rnd_mode_q[NUM_INP_REGS];
+  assign mid_pipe_src_fmt_q[0]    = src_fmt_q;
+  assign mid_pipe_dst_fmt_q[0]    = dst_fmt_q;
+  assign mid_pipe_int_fmt_q[0]    = int_fmt_q;
+  assign mid_pipe_tag_q[0]        = inp_pipe_tag_q[NUM_INP_REGS];
+  assign mid_pipe_aux_q[0]        = inp_pipe_aux_q[NUM_INP_REGS];
+  assign mid_pipe_valid_q[0]      = inp_pipe_valid_q[NUM_INP_REGS];
+  // Input stage: Propagate pipeline ready signal to input pipe
+  assign inp_pipe_ready[NUM_INP_REGS] = mid_pipe_ready[0];
+
+  // Generate the register stages
+  for (genvar i = 0; i < NUM_MID_REGS; i++) begin : gen_inside_pipeline
+    // Internal register enable for this stage
+    logic reg_ena;
+    // Determine the ready signal of the current stage - advance the pipeline:
+    // 1. if the next stage is ready for our data
+    // 2. if the next stage only holds a bubble (not valid) -> we can pop it
+    assign mid_pipe_ready[i] = mid_pipe_ready[i+1] | ~mid_pipe_valid_q[i+1];
+    // Valid: enabled by ready signal, synchronous clear with the flush signal
+    `FFLARNC(mid_pipe_valid_q[i+1], mid_pipe_valid_q[i], mid_pipe_ready[i], flush_i, 1'b0, clk_i, rst_ni)
+    // Enable register if pipleine ready and a valid data item is present
+    assign reg_ena = mid_pipe_ready[i] & mid_pipe_valid_q[i];
+    // Generate the pipeline registers within the stages, use enable-registers
+    `FFL(mid_pipe_input_sign_q[i+1], mid_pipe_input_sign_q[i], reg_ena, '0)
+    `FFL(mid_pipe_input_exp_q[i+1],  mid_pipe_input_exp_q[i],  reg_ena, '0)
+    `FFL(mid_pipe_input_mant_q[i+1], mid_pipe_input_mant_q[i], reg_ena, '0)
+    `FFL(mid_pipe_dest_exp_q[i+1],   mid_pipe_dest_exp_q[i],   reg_ena, '0)
+    `FFL(mid_pipe_src_is_int_q[i+1], mid_pipe_src_is_int_q[i], reg_ena, '0)
+    `FFL(mid_pipe_dst_is_int_q[i+1], mid_pipe_dst_is_int_q[i], reg_ena, '0)
+    `FFL(mid_pipe_info_q[i+1],       mid_pipe_info_q[i],       reg_ena, '0)
+    `FFL(mid_pipe_mant_zero_q[i+1],  mid_pipe_mant_zero_q[i],  reg_ena, '0)
+    `FFL(mid_pipe_op_mod_q[i+1],     mid_pipe_op_mod_q[i],     reg_ena, '0)
+    `FFL(mid_pipe_rnd_mode_q[i+1],   mid_pipe_rnd_mode_q[i],   reg_ena, fpnew_pkg::RNE)
+    `FFL(mid_pipe_src_fmt_q[i+1],    mid_pipe_src_fmt_q[i],    reg_ena, fpnew_pkg::fp_format_e'(0))
+    `FFL(mid_pipe_dst_fmt_q[i+1],    mid_pipe_dst_fmt_q[i],    reg_ena, fpnew_pkg::fp_format_e'(0))
+    `FFL(mid_pipe_int_fmt_q[i+1],    mid_pipe_int_fmt_q[i],    reg_ena, fpnew_pkg::int_format_e'(0))
+    `FFL(mid_pipe_tag_q[i+1],        mid_pipe_tag_q[i],        reg_ena, TagType'('0))
+    `FFL(mid_pipe_aux_q[i+1],        mid_pipe_aux_q[i],        reg_ena, AuxType'('0))
+  end
+  // Output stage: assign selected pipe outputs to signals for later use
+  assign input_sign_q      = mid_pipe_input_sign_q[NUM_MID_REGS];
+  assign input_exp_q       = mid_pipe_input_exp_q[NUM_MID_REGS];
+  assign input_mant_q      = mid_pipe_input_mant_q[NUM_MID_REGS];
+  assign destination_exp_q = mid_pipe_dest_exp_q[NUM_MID_REGS];
+  assign src_is_int_q      = mid_pipe_src_is_int_q[NUM_MID_REGS];
+  assign dst_is_int_q      = mid_pipe_dst_is_int_q[NUM_MID_REGS];
+  assign info_q            = mid_pipe_info_q[NUM_MID_REGS];
+  assign mant_is_zero_q    = mid_pipe_mant_zero_q[NUM_MID_REGS];
+  assign op_mod_q2         = mid_pipe_op_mod_q[NUM_MID_REGS];
+  assign rnd_mode_q        = mid_pipe_rnd_mode_q[NUM_MID_REGS];
+  assign src_fmt_q2        = mid_pipe_src_fmt_q[NUM_MID_REGS];
+  assign dst_fmt_q2        = mid_pipe_dst_fmt_q[NUM_MID_REGS];
+  assign int_fmt_q2        = mid_pipe_int_fmt_q[NUM_MID_REGS];
+
+  // --------
+  // Casting
+  // --------
+  logic [INT_EXP_WIDTH-1:0] final_exp;        // after eventual adjustments
+
+  logic [2*INT_MAN_WIDTH:0]  preshift_mant;    // mantissa before final shift
+  logic [2*INT_MAN_WIDTH:0]  destination_mant; // mantissa from shifter, with rnd bit
+  logic [SUPER_MAN_BITS-1:0] final_mant;       // mantissa after adjustments
+  logic [MAX_INT_WIDTH-1:0]  final_int;        // integer shifted in position
+
+  logic [$clog2(INT_MAN_WIDTH+1)-1:0] denorm_shamt; // shift amount for denormalization
+
+  logic [1:0] fp_round_sticky_bits, int_round_sticky_bits, round_sticky_bits;
+  logic       of_before_round, uf_before_round;
+
+
+  // Perform adjustments to mantissa and exponent
+  always_comb begin : cast_value
+    // Default assignment
+    final_exp       = unsigned'(destination_exp_q); // take exponent as is, only look at lower bits
+    preshift_mant   = '0;  // initialize mantissa container with zeroes
+    denorm_shamt    = SUPER_MAN_BITS - fpnew_pkg::man_bits(dst_fmt_q2); // right of mantissa
+    of_before_round = 1'b0;
+    uf_before_round = 1'b0;
+
+    // Place mantissa to the left of the shifter
+    preshift_mant = input_mant_q << (INT_MAN_WIDTH + 1);
+
+    // Handle INT casts
+    if (dst_is_int_q) begin
+      // By default right shift mantissa to be an integer
+      denorm_shamt = unsigned'(MAX_INT_WIDTH - 1 - input_exp_q);
+      // overflow: when converting to unsigned the range is larger by one
+      if (input_exp_q >= signed'(fpnew_pkg::int_width(int_fmt_q2) - 1 + op_mod_q2)) begin
+        denorm_shamt    = '0; // prevent shifting
+        of_before_round = 1'b1;
+      // underflow
+      end else if (input_exp_q < -1) begin
+        denorm_shamt    = MAX_INT_WIDTH + 1; // all bits go to the sticky
+        uf_before_round = 1'b1;
+      end
+    // Handle FP over-/underflows
+    end else begin
+      // Overflow or infinities (for proper rounding)
+      if ((destination_exp_q >= signed'(2**fpnew_pkg::exp_bits(dst_fmt_q2))-1) ||
+          (~src_is_int_q && info_q.is_inf)) begin
+        final_exp       = unsigned'(2**fpnew_pkg::exp_bits(dst_fmt_q2)-2); // largest normal value
+        preshift_mant   = '1;                           // largest normal value and RS bits set
+        of_before_round = 1'b1;
+      // Denormalize underflowing values
+      end else if (destination_exp_q < 1 &&
+                   destination_exp_q >= -signed'(fpnew_pkg::man_bits(dst_fmt_q2))) begin
+        final_exp       = '0; // denormal result
+        denorm_shamt    = unsigned'(denorm_shamt + 1 - destination_exp_q); // adjust right shifting
+        uf_before_round = 1'b1;
+      // Limit the shift to retain sticky bits
+      end else if (destination_exp_q < -signed'(fpnew_pkg::man_bits(dst_fmt_q2))) begin
+        final_exp       = '0; // denormal result
+        denorm_shamt    = unsigned'(denorm_shamt + 2 + fpnew_pkg::man_bits(dst_fmt_q2)); // to sticky
+        uf_before_round = 1'b1;
+      end
+    end
+  end
+
+  localparam NUM_FP_STICKY  = 2 * INT_MAN_WIDTH - SUPER_MAN_BITS - 1; // removed mantissa, 1. and R
+  localparam NUM_INT_STICKY = 2 * INT_MAN_WIDTH - MAX_INT_WIDTH; // removed int and R
+
+  // Mantissa adjustment shift
+  assign destination_mant = preshift_mant >> denorm_shamt;
+  // Extract final mantissa and round bit, discard the normal bit (for FP)
+  assign {final_mant, fp_round_sticky_bits[1]} =
+      destination_mant[2*INT_MAN_WIDTH-1-:SUPER_MAN_BITS+1];
+  assign {final_int, int_round_sticky_bits[1]} = destination_mant[2*INT_MAN_WIDTH-:MAX_INT_WIDTH+1];
+  // Collapse sticky bits
+  assign fp_round_sticky_bits[0]  = (| {destination_mant[NUM_FP_STICKY-1:0]});
+  assign int_round_sticky_bits[0] = (| {destination_mant[NUM_INT_STICKY-1:0]});
+
+  // select RS bits for destination operation
+  assign round_sticky_bits = dst_is_int_q ? int_round_sticky_bits : fp_round_sticky_bits;
+
+  // ----------------------------
+  // Rounding and classification
+  // ----------------------------
+  logic [WIDTH-1:0] pre_round_abs;  // absolute value of result before rnd
+  logic             of_after_round; // overflow
+  logic             uf_after_round; // underflow
+
+  logic [NUM_FORMATS-1:0][WIDTH-1:0] fmt_pre_round_abs; // per format
+  logic [NUM_FORMATS-1:0]            fmt_of_after_round;
+  logic [NUM_FORMATS-1:0]            fmt_uf_after_round;
+
+  logic [NUM_INT_FORMATS-1:0][WIDTH-1:0] ifmt_pre_round_abs; // per format
+
+  logic             rounded_sign;
+  logic [WIDTH-1:0] rounded_abs; // absolute value of result after rounding
+  logic             result_true_zero;
+
+  logic [WIDTH-1:0] rounded_int_res; // after possible inversion
+  logic             rounded_int_res_zero; // after rounding
+
+
+  // Pack exponent and mantissa into proper rounding form
+  for (genvar fmt = 0; fmt < int'(NUM_FORMATS); fmt++) begin : gen_res_assemble
+    // Set up some constants
+    localparam int unsigned EXP_BITS = fpnew_pkg::exp_bits(fpnew_pkg::fp_format_e'(fmt));
+    localparam int unsigned MAN_BITS = fpnew_pkg::man_bits(fpnew_pkg::fp_format_e'(fmt));
+
+    if (FpFmtConfig[fmt]) begin : active_format
+      always_comb begin : assemble_result
+        fmt_pre_round_abs[fmt] = {final_exp[EXP_BITS-1:0], final_mant[MAN_BITS-1:0]}; // 0-extend
+      end
+    end else begin : inactive_format
+      assign fmt_pre_round_abs[fmt] = '{default: fpnew_pkg::DONT_CARE};
+    end
+  end
+
+  // Sign-extend integer result
+  for (genvar ifmt = 0; ifmt < int'(NUM_INT_FORMATS); ifmt++) begin : gen_int_res_sign_ext
+    // Set up some constants
+    localparam int unsigned INT_WIDTH = fpnew_pkg::int_width(fpnew_pkg::int_format_e'(ifmt));
+
+    if (IntFmtConfig[ifmt]) begin : active_format
+      always_comb begin : assemble_result
+        // sign-extend reusult
+        ifmt_pre_round_abs[ifmt]                = '{default: final_int[INT_WIDTH-1]};
+        ifmt_pre_round_abs[ifmt][INT_WIDTH-1:0] = final_int[INT_WIDTH-1:0];
+      end
+    end else begin : inactive_format
+      assign ifmt_pre_round_abs[ifmt] = '{default: fpnew_pkg::DONT_CARE};
+    end
+  end
+
+  // Select output with destination format and operation
+  assign pre_round_abs = dst_is_int_q ? ifmt_pre_round_abs[int_fmt_q2] : fmt_pre_round_abs[dst_fmt_q2];
+
+  fpnew_rounding #(
+    .AbsWidth ( WIDTH )
+  ) i_fpnew_rounding (
+    .abs_value_i             ( pre_round_abs     ),
+    .sign_i                  ( input_sign_q      ), // source format
+    .round_sticky_bits_i     ( round_sticky_bits ),
+    .rnd_mode_i              ( rnd_mode_q        ),
+    .effective_subtraction_i ( 1'b0              ), // no operation happened
+    .abs_rounded_o           ( rounded_abs       ),
+    .sign_o                  ( rounded_sign      ),
+    .exact_zero_o            ( result_true_zero  )
+  );
+
+  logic [NUM_FORMATS-1:0][WIDTH-1:0] fmt_result;
+
+  // Detect overflows and inject sign
+  for (genvar fmt = 0; fmt < int'(NUM_FORMATS); fmt++) begin : gen_sign_inject
+    // Set up some constants
+    localparam int unsigned FP_WIDTH = fpnew_pkg::fp_width(fpnew_pkg::fp_format_e'(fmt));
+    localparam int unsigned EXP_BITS = fpnew_pkg::exp_bits(fpnew_pkg::fp_format_e'(fmt));
+    localparam int unsigned MAN_BITS = fpnew_pkg::man_bits(fpnew_pkg::fp_format_e'(fmt));
+
+    if (FpFmtConfig[fmt]) begin : active_format
+      always_comb begin : post_process
+        // detect of / uf
+        fmt_uf_after_round[fmt] = rounded_abs[EXP_BITS+MAN_BITS-1:MAN_BITS] == '0; // denormal
+        fmt_of_after_round[fmt] = rounded_abs[EXP_BITS+MAN_BITS-1:MAN_BITS] == '1; // inf exp.
+
+        // Assemble regular result, nan box short ones. Int zeroes need to be detected`
+        fmt_result[fmt]               = '1;
+        fmt_result[fmt][FP_WIDTH-1:0] = src_is_int_q & mant_is_zero_q
+                                        ? '0
+                                        : {rounded_sign, rounded_abs[EXP_BITS+MAN_BITS-1:0]};
+      end
+    end else begin : inactive_format
+      assign fmt_uf_after_round[fmt] = fpnew_pkg::DONT_CARE;
+      assign fmt_of_after_round[fmt] = fpnew_pkg::DONT_CARE;
+      assign fmt_result[fmt]         = '{default: fpnew_pkg::DONT_CARE};
+    end
+  end
+
+  // Classification after rounding select by destination format
+  assign uf_after_round = fmt_uf_after_round[dst_fmt_q2];
+  assign of_after_round = fmt_of_after_round[dst_fmt_q2];
+
+  // Negative integer result needs to be brought into two's complement
+  assign rounded_int_res      = rounded_sign ? unsigned'(-rounded_abs) : rounded_abs;
+  assign rounded_int_res_zero = (rounded_int_res == '0);
+
+  // -------------------------
+  // FP Special case handling
+  // -------------------------
+  logic [WIDTH-1:0]   fp_special_result;
+  fpnew_pkg::status_t fp_special_status;
+  logic               fp_result_is_special;
+
+  logic [NUM_FORMATS-1:0][WIDTH-1:0] fmt_special_result;
+
+  // Special result construction
+  for (genvar fmt = 0; fmt < int'(NUM_FORMATS); fmt++) begin : gen_special_results
+    // Set up some constants
+    localparam int unsigned FP_WIDTH = fpnew_pkg::fp_width(fpnew_pkg::fp_format_e'(fmt));
+    localparam int unsigned EXP_BITS = fpnew_pkg::exp_bits(fpnew_pkg::fp_format_e'(fmt));
+    localparam int unsigned MAN_BITS = fpnew_pkg::man_bits(fpnew_pkg::fp_format_e'(fmt));
+
+    localparam logic [EXP_BITS-1:0] QNAN_EXPONENT = '1;
+    localparam logic [MAN_BITS-1:0] QNAN_MANTISSA = 2**(MAN_BITS-1);
+
+    if (FpFmtConfig[fmt]) begin : active_format
+      always_comb begin : special_results
+        logic [FP_WIDTH-1:0] special_res;
+        special_res = info_q.is_zero
+                      ? input_sign_q << FP_WIDTH-1 // signed zero
+                      : {1'b0, QNAN_EXPONENT, QNAN_MANTISSA}; // qNaN
+
+        // Initialize special result with ones (NaN-box)
+        fmt_special_result[fmt]               = '1;
+        fmt_special_result[fmt][FP_WIDTH-1:0] = special_res;
+      end
+    end else begin : inactive_format
+      assign fmt_special_result[fmt] = '{default: fpnew_pkg::DONT_CARE};
+    end
+  end
+
+  // Detect special case from source format, I2F casts don't produce a special result
+  assign fp_result_is_special = ~src_is_int_q & (info_q.is_zero |
+                                                 info_q.is_nan |
+                                                 ~info_q.is_boxed);
+
+  // Signalling input NaNs raise invalid flag, otherwise no flags set
+  assign fp_special_status = '{NV: info_q.is_signalling, default: 1'b0};
+
+  // Assemble result according to destination format
+  assign fp_special_result = fmt_special_result[dst_fmt_q2]; // destination format
+
+  // --------------------------
+  // INT Special case handling
+  // --------------------------
+  logic [WIDTH-1:0]   int_special_result;
+  fpnew_pkg::status_t int_special_status;
+  logic               int_result_is_special;
+
+  logic [NUM_INT_FORMATS-1:0][WIDTH-1:0] ifmt_special_result;
+
+  // Special result construction
+  for (genvar ifmt = 0; ifmt < int'(NUM_INT_FORMATS); ifmt++) begin : gen_special_results_int
+    // Set up some constants
+    localparam int unsigned INT_WIDTH = fpnew_pkg::int_width(fpnew_pkg::int_format_e'(ifmt));
+
+    if (IntFmtConfig[ifmt]) begin : active_format
+      always_comb begin : special_results
+        automatic logic [INT_WIDTH-1:0] special_res;
+
+        // Default is overflow to positive max, which is 2**INT_WIDTH-1 or 2**(INT_WIDTH-1)-1
+        special_res[INT_WIDTH-2:0] = '1;       // alone yields 2**(INT_WIDTH-1)-1
+        special_res[INT_WIDTH-1]   = op_mod_q2; // for unsigned casts yields 2**INT_WIDTH-1
+
+        // Negative special case (except for nans) tie to -max or 0
+        if (input_sign_q && !info_q.is_nan)
+          special_res = ~special_res;
+
+        // Initialize special result with sign-extension
+        ifmt_special_result[ifmt]                = '{default: special_res[INT_WIDTH-1]};
+        ifmt_special_result[ifmt][INT_WIDTH-1:0] = special_res;
+      end
+    end else begin : inactive_format
+      assign ifmt_special_result[ifmt] = '{default: fpnew_pkg::DONT_CARE};
+    end
+  end
+
+  // Detect special case from source format (inf, nan, overflow, nan-boxing or negative unsigned)
+  assign int_result_is_special = info_q.is_nan | info_q.is_inf |
+                                 of_before_round | ~info_q.is_boxed |
+                                 (input_sign_q & op_mod_q2 & ~rounded_int_res_zero);
+
+  // All integer special cases are invalid
+  assign int_special_status = '{NV: 1'b1, default: 1'b0};
+
+  // Assemble result according to destination format
+  assign int_special_result = ifmt_special_result[int_fmt_q2]; // destination format
+
+  // -----------------
+  // Result selection
+  // -----------------
+  fpnew_pkg::status_t int_regular_status, fp_regular_status;
+
+  logic [WIDTH-1:0]   fp_result, int_result;
+  fpnew_pkg::status_t fp_status, int_status;
+
+  assign fp_regular_status.NV = src_is_int_q & (of_before_round | of_after_round); // overflow is invalid for I2F casts
+  assign fp_regular_status.DZ = 1'b0; // no divisions
+  assign fp_regular_status.OF = ~src_is_int_q & (~info_q.is_inf & (of_before_round | of_after_round)); // inf casts no OF
+  assign fp_regular_status.UF = uf_after_round & fp_regular_status.NX;
+  assign fp_regular_status.NX = src_is_int_q ? (| fp_round_sticky_bits) // overflow is invalid in i2f
+            : (| fp_round_sticky_bits) | (~info_q.is_inf & (of_before_round | of_after_round));
+  assign int_regular_status = '{NX: (| int_round_sticky_bits), default: 1'b0};
+
+  assign fp_result  = fp_result_is_special  ? fp_special_result  : fmt_result[dst_fmt_q2];
+  assign fp_status  = fp_result_is_special  ? fp_special_status  : fp_regular_status;
+  assign int_result = int_result_is_special ? int_special_result : rounded_int_res;
+  assign int_status = int_result_is_special ? int_special_status : int_regular_status;
+
+  // Final results for output pipeline
+  logic [WIDTH-1:0]   result_d;
+  fpnew_pkg::status_t status_d;
+  logic               extension_bit;
+
+  // Select output depending on special case detection
+  assign result_d = dst_is_int_q ? int_result : fp_result;
+  assign status_d = dst_is_int_q ? int_status : fp_status;
+
+  // MSB of int result decides extension, otherwise NaN box
+  assign extension_bit = dst_is_int_q ? int_result[WIDTH-1] : 1'b1;
+
+  // ----------------
+  // Output Pipeline
+  // ----------------
+  // Output pipeline signals, index i holds signal after i register stages
+  logic               [0:NUM_OUT_REGS][WIDTH-1:0] out_pipe_result_q;
+  fpnew_pkg::status_t [0:NUM_OUT_REGS]            out_pipe_status_q;
+  logic               [0:NUM_OUT_REGS]            out_pipe_ext_bit_q;
+  TagType             [0:NUM_OUT_REGS]            out_pipe_tag_q;
+  AuxType             [0:NUM_OUT_REGS]            out_pipe_aux_q;
+  logic               [0:NUM_OUT_REGS]            out_pipe_valid_q;
+  // Ready signal is combinatorial for all stages
+  logic [0:NUM_OUT_REGS] out_pipe_ready;
+
+  // Input stage: First element of pipeline is taken from inputs
+  assign out_pipe_result_q[0]  = result_d;
+  assign out_pipe_status_q[0]  = status_d;
+  assign out_pipe_ext_bit_q[0] = extension_bit;
+  assign out_pipe_tag_q[0]     = mid_pipe_tag_q[NUM_MID_REGS];
+  assign out_pipe_aux_q[0]     = mid_pipe_aux_q[NUM_MID_REGS];
+  assign out_pipe_valid_q[0]   = mid_pipe_valid_q[NUM_MID_REGS];
+  // Input stage: Propagate pipeline ready signal to inside pipe
+  assign mid_pipe_ready[NUM_MID_REGS] = out_pipe_ready[0];
+  // Generate the register stages
+  for (genvar i = 0; i < NUM_OUT_REGS; i++) begin : gen_output_pipeline
+    // Internal register enable for this stage
+    logic reg_ena;
+    // Determine the ready signal of the current stage - advance the pipeline:
+    // 1. if the next stage is ready for our data
+    // 2. if the next stage only holds a bubble (not valid) -> we can pop it
+    assign out_pipe_ready[i] = out_pipe_ready[i+1] | ~out_pipe_valid_q[i+1];
+    // Valid: enabled by ready signal, synchronous clear with the flush signal
+    `FFLARNC(out_pipe_valid_q[i+1], out_pipe_valid_q[i], out_pipe_ready[i], flush_i, 1'b0, clk_i, rst_ni)
+    // Enable register if pipleine ready and a valid data item is present
+    assign reg_ena = out_pipe_ready[i] & out_pipe_valid_q[i];
+    // Generate the pipeline registers within the stages, use enable-registers
+    `FFL(out_pipe_result_q[i+1],  out_pipe_result_q[i],  reg_ena, '0)
+    `FFL(out_pipe_status_q[i+1],  out_pipe_status_q[i],  reg_ena, '0)
+    `FFL(out_pipe_ext_bit_q[i+1], out_pipe_ext_bit_q[i], reg_ena, '0)
+    `FFL(out_pipe_tag_q[i+1],     out_pipe_tag_q[i],     reg_ena, TagType'('0))
+    `FFL(out_pipe_aux_q[i+1],     out_pipe_aux_q[i],     reg_ena, AuxType'('0))
+  end
+  // Output stage: Ready travels backwards from output side, driven by downstream circuitry
+  assign out_pipe_ready[NUM_OUT_REGS] = out_ready_i;
+  // Output stage: assign module outputs
+  assign result_o        = out_pipe_result_q[NUM_OUT_REGS];
+  assign status_o        = out_pipe_status_q[NUM_OUT_REGS];
+  assign extension_bit_o = out_pipe_ext_bit_q[NUM_OUT_REGS];
+  assign tag_o           = out_pipe_tag_q[NUM_OUT_REGS];
+  assign aux_o           = out_pipe_aux_q[NUM_OUT_REGS];
+  assign out_valid_o     = out_pipe_valid_q[NUM_OUT_REGS];
+  assign busy_o          = (| {inp_pipe_valid_q, mid_pipe_valid_q, out_pipe_valid_q});
+endmodule
diff --git a/vendor/pulp-platform/fpnew/src/fpnew_classifier.sv b/vendor/pulp-platform/fpnew/src/fpnew_classifier.sv
new file mode 100644
index 0000000000..5e4fab9304
--- /dev/null
+++ b/vendor/pulp-platform/fpnew/src/fpnew_classifier.sv
@@ -0,0 +1,72 @@
+// Copyright 2019 ETH Zurich and University of Bologna.
+//
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the "License"); you may not use this file except in
+// compliance with the License. You may obtain a copy of the License at
+// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+
+// Author: Stefan Mach <smach@iis.ee.ethz.ch>
+
+module fpnew_classifier #(
+  parameter fpnew_pkg::fp_format_e   FpFormat = fpnew_pkg::fp_format_e'(0),
+  parameter int unsigned             NumOperands = 1,
+  // Do not change
+  localparam int unsigned WIDTH = fpnew_pkg::fp_width(FpFormat)
+) (
+  input  logic                [NumOperands-1:0][WIDTH-1:0] operands_i,
+  input  logic                [NumOperands-1:0]            is_boxed_i,
+  output fpnew_pkg::fp_info_t [NumOperands-1:0]            info_o
+);
+
+  localparam int unsigned EXP_BITS = fpnew_pkg::exp_bits(FpFormat);
+  localparam int unsigned MAN_BITS = fpnew_pkg::man_bits(FpFormat);
+
+  // Type definition
+  typedef struct packed {
+    logic                sign;
+    logic [EXP_BITS-1:0] exponent;
+    logic [MAN_BITS-1:0] mantissa;
+  } fp_t;
+
+  // Iterate through all operands
+  for (genvar op = 0; op < int'(NumOperands); op++) begin : gen_num_values
+
+    fp_t value;
+    logic is_boxed;
+    logic is_normal;
+    logic is_inf;
+    logic is_nan;
+    logic is_signalling;
+    logic is_quiet;
+    logic is_zero;
+    logic is_subnormal;
+
+    // ---------------
+    // Classify Input
+    // ---------------
+    always_comb begin : classify_input
+      value         = operands_i[op];
+      is_boxed      = is_boxed_i[op];
+      is_normal     = is_boxed && (value.exponent != '0) && (value.exponent != '1);
+      is_zero       = is_boxed && (value.exponent == '0) && (value.mantissa == '0);
+      is_subnormal  = is_boxed && (value.exponent == '0) && !is_zero;
+      is_inf        = is_boxed && ((value.exponent == '1) && (value.mantissa == '0));
+      is_nan        = !is_boxed || ((value.exponent == '1) && (value.mantissa != '0));
+      is_signalling = is_boxed && is_nan && (value.mantissa[MAN_BITS-1] == 1'b0);
+      is_quiet      = is_nan && !is_signalling;
+      // Assign output for current input
+      info_o[op].is_normal     = is_normal;
+      info_o[op].is_subnormal  = is_subnormal;
+      info_o[op].is_zero       = is_zero;
+      info_o[op].is_inf        = is_inf;
+      info_o[op].is_nan        = is_nan;
+      info_o[op].is_signalling = is_signalling;
+      info_o[op].is_quiet      = is_quiet;
+      info_o[op].is_boxed      = is_boxed;
+    end
+  end
+endmodule
diff --git a/vendor/pulp-platform/fpnew/src/fpnew_divsqrt_multi.sv b/vendor/pulp-platform/fpnew/src/fpnew_divsqrt_multi.sv
new file mode 100644
index 0000000000..1331f5feba
--- /dev/null
+++ b/vendor/pulp-platform/fpnew/src/fpnew_divsqrt_multi.sv
@@ -0,0 +1,340 @@
+// Copyright 2019 ETH Zurich and University of Bologna.
+//
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the "License"); you may not use this file except in
+// compliance with the License. You may obtain a copy of the License at
+// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+
+// Author: Stefan Mach <smach@iis.ee.ethz.ch>
+
+`include "common_cells/registers.svh"
+
+module fpnew_divsqrt_multi #(
+  parameter fpnew_pkg::fmt_logic_t   FpFmtConfig  = '1,
+  // FPU configuration
+  parameter int unsigned             NumPipeRegs = 0,
+  parameter fpnew_pkg::pipe_config_t PipeConfig  = fpnew_pkg::AFTER,
+  parameter type                     TagType     = logic,
+  parameter type                     AuxType     = logic,
+  // Do not change
+  localparam int unsigned WIDTH       = fpnew_pkg::max_fp_width(FpFmtConfig),
+  localparam int unsigned NUM_FORMATS = fpnew_pkg::NUM_FP_FORMATS
+) (
+  input  logic                        clk_i,
+  input  logic                        rst_ni,
+  // Input signals
+  input  logic [1:0][WIDTH-1:0]       operands_i, // 2 operands
+  input  logic [NUM_FORMATS-1:0][1:0] is_boxed_i, // 2 operands
+  input  fpnew_pkg::roundmode_e       rnd_mode_i,
+  input  fpnew_pkg::operation_e       op_i,
+  input  fpnew_pkg::fp_format_e       dst_fmt_i,
+  input  TagType                      tag_i,
+  input  AuxType                      aux_i,
+  // Input Handshake
+  input  logic                        in_valid_i,
+  output logic                        in_ready_o,
+  input  logic                        flush_i,
+  // Output signals
+  output logic [WIDTH-1:0]            result_o,
+  output fpnew_pkg::status_t          status_o,
+  output logic                        extension_bit_o,
+  output TagType                      tag_o,
+  output AuxType                      aux_o,
+  // Output handshake
+  output logic                        out_valid_o,
+  input  logic                        out_ready_i,
+  // Indication of valid data in flight
+  output logic                        busy_o
+);
+
+  // ----------
+  // Constants
+  // ----------
+  // Pipelines
+  localparam NUM_INP_REGS = (PipeConfig == fpnew_pkg::BEFORE)
+                            ? NumPipeRegs
+                            : (PipeConfig == fpnew_pkg::DISTRIBUTED
+                               ? (NumPipeRegs / 2) // Last to get distributed regs
+                               : 0); // no regs here otherwise
+  localparam NUM_OUT_REGS = (PipeConfig == fpnew_pkg::AFTER || PipeConfig == fpnew_pkg::INSIDE)
+                            ? NumPipeRegs
+                            : (PipeConfig == fpnew_pkg::DISTRIBUTED
+                               ? ((NumPipeRegs + 1) / 2) // First to get distributed regs
+                               : 0); // no regs here otherwise
+
+  // ---------------
+  // Input pipeline
+  // ---------------
+  // Selected pipeline output signals as non-arrays
+  logic [1:0][WIDTH-1:0] operands_q;
+  fpnew_pkg::roundmode_e rnd_mode_q;
+  fpnew_pkg::operation_e op_q;
+  fpnew_pkg::fp_format_e dst_fmt_q;
+  logic                  in_valid_q;
+
+  // Input pipeline signals, index i holds signal after i register stages
+  logic                  [0:NUM_INP_REGS][1:0][WIDTH-1:0]       inp_pipe_operands_q;
+  fpnew_pkg::roundmode_e [0:NUM_INP_REGS]                       inp_pipe_rnd_mode_q;
+  fpnew_pkg::operation_e [0:NUM_INP_REGS]                       inp_pipe_op_q;
+  fpnew_pkg::fp_format_e [0:NUM_INP_REGS]                       inp_pipe_dst_fmt_q;
+  TagType                [0:NUM_INP_REGS]                       inp_pipe_tag_q;
+  AuxType                [0:NUM_INP_REGS]                       inp_pipe_aux_q;
+  logic                  [0:NUM_INP_REGS]                       inp_pipe_valid_q;
+  // Ready signal is combinatorial for all stages
+  logic [0:NUM_INP_REGS] inp_pipe_ready;
+
+  // Input stage: First element of pipeline is taken from inputs
+  assign inp_pipe_operands_q[0] = operands_i;
+  assign inp_pipe_rnd_mode_q[0] = rnd_mode_i;
+  assign inp_pipe_op_q[0]       = op_i;
+  assign inp_pipe_dst_fmt_q[0]  = dst_fmt_i;
+  assign inp_pipe_tag_q[0]      = tag_i;
+  assign inp_pipe_aux_q[0]      = aux_i;
+  assign inp_pipe_valid_q[0]    = in_valid_i;
+  // Input stage: Propagate pipeline ready signal to updtream circuitry
+  assign in_ready_o = inp_pipe_ready[0];
+  // Generate the register stages
+  for (genvar i = 0; i < NUM_INP_REGS; i++) begin : gen_input_pipeline
+    // Internal register enable for this stage
+    logic reg_ena;
+    // Determine the ready signal of the current stage - advance the pipeline:
+    // 1. if the next stage is ready for our data
+    // 2. if the next stage only holds a bubble (not valid) -> we can pop it
+    assign inp_pipe_ready[i] = inp_pipe_ready[i+1] | ~inp_pipe_valid_q[i+1];
+    // Valid: enabled by ready signal, synchronous clear with the flush signal
+    `FFLARNC(inp_pipe_valid_q[i+1], inp_pipe_valid_q[i], inp_pipe_ready[i], flush_i, 1'b0, clk_i, rst_ni)
+    // Enable register if pipleine ready and a valid data item is present
+    assign reg_ena = inp_pipe_ready[i] & inp_pipe_valid_q[i];
+    // Generate the pipeline registers within the stages, use enable-registers
+    `FFL(inp_pipe_operands_q[i+1], inp_pipe_operands_q[i], reg_ena, '0)
+    `FFL(inp_pipe_rnd_mode_q[i+1], inp_pipe_rnd_mode_q[i], reg_ena, fpnew_pkg::RNE)
+    `FFL(inp_pipe_op_q[i+1],       inp_pipe_op_q[i],       reg_ena, fpnew_pkg::FMADD)
+    `FFL(inp_pipe_dst_fmt_q[i+1],  inp_pipe_dst_fmt_q[i],  reg_ena, fpnew_pkg::fp_format_e'(0))
+    `FFL(inp_pipe_tag_q[i+1],      inp_pipe_tag_q[i],      reg_ena, TagType'('0))
+    `FFL(inp_pipe_aux_q[i+1],      inp_pipe_aux_q[i],      reg_ena, AuxType'('0))
+  end
+  // Output stage: assign selected pipe outputs to signals for later use
+  assign operands_q = inp_pipe_operands_q[NUM_INP_REGS];
+  assign rnd_mode_q = inp_pipe_rnd_mode_q[NUM_INP_REGS];
+  assign op_q       = inp_pipe_op_q[NUM_INP_REGS];
+  assign dst_fmt_q  = inp_pipe_dst_fmt_q[NUM_INP_REGS];
+  assign in_valid_q = inp_pipe_valid_q[NUM_INP_REGS];
+
+  // -----------------
+  // Input processing
+  // -----------------
+  logic [1:0]       divsqrt_fmt;
+  logic [1:0][63:0] divsqrt_operands; // those are fixed to 64bit
+  logic             input_is_fp8;
+
+  // Translate fpnew formats into divsqrt formats
+  always_comb begin : translate_fmt
+    unique case (dst_fmt_q)
+      fpnew_pkg::FP32:    divsqrt_fmt = 2'b00;
+      fpnew_pkg::FP64:    divsqrt_fmt = 2'b01;
+      fpnew_pkg::FP16:    divsqrt_fmt = 2'b10;
+      fpnew_pkg::FP16ALT: divsqrt_fmt = 2'b11;
+      default:            divsqrt_fmt = 2'b10; // maps also FP8 to FP16
+    endcase
+
+    // Only if FP8 is enabled
+    input_is_fp8 = FpFmtConfig[fpnew_pkg::FP8] & (dst_fmt_q == fpnew_pkg::FP8);
+
+    // If FP8 is supported, map it to an FP16 value
+    divsqrt_operands[0] = input_is_fp8 ? operands_q[0] << 8 : operands_q[0];
+    divsqrt_operands[1] = input_is_fp8 ? operands_q[1] << 8 : operands_q[1];
+  end
+
+  // ------------
+  // Control FSM
+  // ------------
+  logic in_ready;               // input handshake with upstream
+  logic div_valid, sqrt_valid;  // input signalling with unit
+  logic unit_ready, unit_done;  // status signals from unit instance
+  logic op_starting;            // high in the cycle a new operation starts
+  logic out_valid, out_ready;   // output handshake with downstream
+  logic hold_result;            // whether to put result into hold register
+  logic data_is_held;           // data in hold register is valid
+  logic unit_busy;              // valid data in flight
+  // FSM states
+  typedef enum logic [1:0] {IDLE, BUSY, HOLD} fsm_state_e;
+  fsm_state_e state_q, state_d;
+
+  // Upstream ready comes from sanitization FSM
+  assign inp_pipe_ready[NUM_INP_REGS] = in_ready;
+
+  // Valids are gated by the FSM ready. Invalid input ops run a sqrt to not lose illegal instr.
+  assign div_valid   = in_valid_q & (op_q == fpnew_pkg::DIV) & in_ready & ~flush_i;
+  assign sqrt_valid  = in_valid_q & (op_q != fpnew_pkg::DIV) & in_ready & ~flush_i;
+  assign op_starting = div_valid | sqrt_valid;
+
+  // FSM to safely apply and receive data from DIVSQRT unit
+  always_comb begin : flag_fsm
+    // Default assignments
+    in_ready     = 1'b0;
+    out_valid    = 1'b0;
+    hold_result  = 1'b0;
+    data_is_held = 1'b0;
+    unit_busy    = 1'b0;
+    state_d      = state_q;
+
+    unique case (state_q)
+      // Waiting for work
+      IDLE: begin
+        in_ready = 1'b1; // we're ready
+        if (in_valid_q && unit_ready) begin // New work arrives
+          state_d = BUSY; // go into processing state
+        end
+      end
+      // Operation in progress
+      BUSY: begin
+        unit_busy = 1'b1; // data in flight
+        // If the unit is done with processing
+        if (unit_done) begin
+          out_valid = 1'b1; // try to commit result downstream
+          // If downstream accepts our result
+          if (out_ready) begin
+            state_d = IDLE; // we anticipate going back to idling..
+            if (in_valid_q && unit_ready) begin // ..unless new work comes in
+              in_ready = 1'b1; // we acknowledge the instruction
+              state_d  = BUSY; // and stay busy with it
+            end
+          // Otherwise if downstream is not ready for the result
+          end else begin
+            hold_result = 1'b1; // activate the hold register
+            state_d     = HOLD; // wait for the pipeline to take the data
+          end
+        end
+      end
+      // Waiting with valid result for downstream
+      HOLD: begin
+        unit_busy    = 1'b1; // data in flight
+        data_is_held = 1'b1; // data in hold register is valid
+        out_valid    = 1'b1; // try to commit result downstream
+        // If the result is accepted by downstream
+        if (out_ready) begin
+          state_d = IDLE; // go back to idle..
+          if (in_valid_q && unit_ready) begin // ..unless new work comes in
+            in_ready = 1'b1; // acknowledge the new transaction
+            state_d  = BUSY; // will be busy with the next instruction
+          end
+        end
+      end
+      // fall into idle state otherwise
+      default: state_d = IDLE;
+    endcase
+
+    // Flushing overrides the other actions
+    if (flush_i) begin
+      unit_busy = 1'b0; // data is invalidated
+      out_valid = 1'b0; // cancel any valid data
+      state_d   = IDLE; // go to default state
+    end
+  end
+
+  // FSM status register (asynch active low reset)
+  `FF(state_q, state_d, IDLE)
+
+  // Hold additional information while the operation is in progress
+  logic result_is_fp8_q;
+  TagType result_tag_q;
+  AuxType result_aux_q;
+
+  // Fill the registers everytime a valid operation arrives (load FF, active low asynch rst)
+  `FFL(result_is_fp8_q, input_is_fp8,                 op_starting, '0)
+  `FFL(result_tag_q,    inp_pipe_tag_q[NUM_INP_REGS], op_starting, '0)
+  `FFL(result_aux_q,    inp_pipe_aux_q[NUM_INP_REGS], op_starting, '0)
+
+  // -----------------
+  // DIVSQRT instance
+  // -----------------
+  logic [63:0]        unit_result;
+  logic [WIDTH-1:0]   adjusted_result, held_result_q;
+  fpnew_pkg::status_t unit_status, held_status_q;
+
+  div_sqrt_top_mvp i_divsqrt_lei (
+   .Clk_CI           ( clk_i               ),
+   .Rst_RBI          ( rst_ni              ),
+   .Div_start_SI     ( div_valid           ),
+   .Sqrt_start_SI    ( sqrt_valid          ),
+   .Operand_a_DI     ( divsqrt_operands[0] ),
+   .Operand_b_DI     ( divsqrt_operands[1] ),
+   .RM_SI            ( rnd_mode_q          ),
+   .Precision_ctl_SI ( '0                  ),
+   .Format_sel_SI    ( divsqrt_fmt         ),
+   .Kill_SI          ( flush_i             ),
+   .Result_DO        ( unit_result         ),
+   .Fflags_SO        ( unit_status         ),
+   .Ready_SO         ( unit_ready          ),
+   .Done_SO          ( unit_done           )
+  );
+
+  // Adjust result width and fix FP8
+  assign adjusted_result = result_is_fp8_q ? unit_result >> 8 : unit_result;
+
+  // The Hold register (load, no reset)
+  `FFLNR(held_result_q, adjusted_result, hold_result, clk_i)
+  `FFLNR(held_status_q, unit_status,     hold_result, clk_i)
+
+  // --------------
+  // Output Select
+  // --------------
+  logic [WIDTH-1:0]   result_d;
+  fpnew_pkg::status_t status_d;
+  // Prioritize hold register data
+  assign result_d = data_is_held ? held_result_q : adjusted_result;
+  assign status_d = data_is_held ? held_status_q : unit_status;
+
+  // ----------------
+  // Output Pipeline
+  // ----------------
+  // Output pipeline signals, index i holds signal after i register stages
+  logic               [0:NUM_OUT_REGS][WIDTH-1:0] out_pipe_result_q;
+  fpnew_pkg::status_t [0:NUM_OUT_REGS]            out_pipe_status_q;
+  TagType             [0:NUM_OUT_REGS]            out_pipe_tag_q;
+  AuxType             [0:NUM_OUT_REGS]            out_pipe_aux_q;
+  logic               [0:NUM_OUT_REGS]            out_pipe_valid_q;
+  // Ready signal is combinatorial for all stages
+  logic [0:NUM_OUT_REGS] out_pipe_ready;
+
+  // Input stage: First element of pipeline is taken from inputs
+  assign out_pipe_result_q[0] = result_d;
+  assign out_pipe_status_q[0] = status_d;
+  assign out_pipe_tag_q[0]    = result_tag_q;
+  assign out_pipe_aux_q[0]    = result_aux_q;
+  assign out_pipe_valid_q[0]  = out_valid;
+  // Input stage: Propagate pipeline ready signal to inside pipe
+  assign out_ready = out_pipe_ready[0];
+  // Generate the register stages
+  for (genvar i = 0; i < NUM_OUT_REGS; i++) begin : gen_output_pipeline
+    // Internal register enable for this stage
+    logic reg_ena;
+    // Determine the ready signal of the current stage - advance the pipeline:
+    // 1. if the next stage is ready for our data
+    // 2. if the next stage only holds a bubble (not valid) -> we can pop it
+    assign out_pipe_ready[i] = out_pipe_ready[i+1] | ~out_pipe_valid_q[i+1];
+    // Valid: enabled by ready signal, synchronous clear with the flush signal
+    `FFLARNC(out_pipe_valid_q[i+1], out_pipe_valid_q[i], out_pipe_ready[i], flush_i, 1'b0, clk_i, rst_ni)
+    // Enable register if pipleine ready and a valid data item is present
+    assign reg_ena = out_pipe_ready[i] & out_pipe_valid_q[i];
+    // Generate the pipeline registers within the stages, use enable-registers
+    `FFL(out_pipe_result_q[i+1], out_pipe_result_q[i], reg_ena, '0)
+    `FFL(out_pipe_status_q[i+1], out_pipe_status_q[i], reg_ena, '0)
+    `FFL(out_pipe_tag_q[i+1],    out_pipe_tag_q[i],    reg_ena, TagType'('0))
+    `FFL(out_pipe_aux_q[i+1],    out_pipe_aux_q[i],    reg_ena, AuxType'('0))
+  end
+  // Output stage: Ready travels backwards from output side, driven by downstream circuitry
+  assign out_pipe_ready[NUM_OUT_REGS] = out_ready_i;
+  // Output stage: assign module outputs
+  assign result_o        = out_pipe_result_q[NUM_OUT_REGS];
+  assign status_o        = out_pipe_status_q[NUM_OUT_REGS];
+  assign extension_bit_o = 1'b1; // always NaN-Box result
+  assign tag_o           = out_pipe_tag_q[NUM_OUT_REGS];
+  assign aux_o           = out_pipe_aux_q[NUM_OUT_REGS];
+  assign out_valid_o     = out_pipe_valid_q[NUM_OUT_REGS];
+  assign busy_o          = (| {inp_pipe_valid_q, unit_busy, out_pipe_valid_q});
+endmodule
diff --git a/vendor/pulp-platform/fpnew/src/fpnew_fma.sv b/vendor/pulp-platform/fpnew/src/fpnew_fma.sv
new file mode 100644
index 0000000000..f9fa813bae
--- /dev/null
+++ b/vendor/pulp-platform/fpnew/src/fpnew_fma.sv
@@ -0,0 +1,673 @@
+// Copyright 2019 ETH Zurich and University of Bologna.
+//
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the "License"); you may not use this file except in
+// compliance with the License. You may obtain a copy of the License at
+// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+
+// Author: Stefan Mach <smach@iis.ee.ethz.ch>
+
+`include "common_cells/registers.svh"
+
+module fpnew_fma #(
+  parameter fpnew_pkg::fp_format_e   FpFormat    = fpnew_pkg::fp_format_e'(0),
+  parameter int unsigned             NumPipeRegs = 0,
+  parameter fpnew_pkg::pipe_config_t PipeConfig  = fpnew_pkg::BEFORE,
+  parameter type                     TagType     = logic,
+  parameter type                     AuxType     = logic,
+
+  localparam int unsigned WIDTH = fpnew_pkg::fp_width(FpFormat) // do not change
+) (
+  input logic                      clk_i,
+  input logic                      rst_ni,
+  // Input signals
+  input logic [2:0][WIDTH-1:0]     operands_i, // 3 operands
+  input logic [2:0]                is_boxed_i, // 3 operands
+  input fpnew_pkg::roundmode_e     rnd_mode_i,
+  input fpnew_pkg::operation_e     op_i,
+  input logic                      op_mod_i,
+  input TagType                    tag_i,
+  input AuxType                    aux_i,
+  // Input Handshake
+  input  logic                     in_valid_i,
+  output logic                     in_ready_o,
+  input  logic                     flush_i,
+  // Output signals
+  output logic [WIDTH-1:0]         result_o,
+  output fpnew_pkg::status_t       status_o,
+  output logic                     extension_bit_o,
+  output TagType                   tag_o,
+  output AuxType                   aux_o,
+  // Output handshake
+  output logic                     out_valid_o,
+  input  logic                     out_ready_i,
+  // Indication of valid data in flight
+  output logic                     busy_o
+);
+
+  // ----------
+  // Constants
+  // ----------
+  localparam int unsigned EXP_BITS = fpnew_pkg::exp_bits(FpFormat);
+  localparam int unsigned MAN_BITS = fpnew_pkg::man_bits(FpFormat);
+  localparam int unsigned BIAS     = fpnew_pkg::bias(FpFormat);
+  // Precision bits 'p' include the implicit bit
+  localparam int unsigned PRECISION_BITS = MAN_BITS + 1;
+  // The lower 2p+3 bits of the internal FMA result will be needed for leading-zero detection
+  localparam int unsigned LOWER_SUM_WIDTH  = 2 * PRECISION_BITS + 3;
+  localparam int unsigned LZC_RESULT_WIDTH = $clog2(LOWER_SUM_WIDTH);
+  // Internal exponent width of FMA must accomodate all meaningful exponent values in order to avoid
+  // datapath leakage. This is either given by the exponent bits or the width of the LZC result.
+  // In most reasonable FP formats the internal exponent will be wider than the LZC result.
+  localparam int unsigned EXP_WIDTH = unsigned'(fpnew_pkg::maximum(EXP_BITS + 2, LZC_RESULT_WIDTH));
+  // Shift amount width: maximum internal mantissa size is 3p+3 bits
+  localparam int unsigned SHIFT_AMOUNT_WIDTH = $clog2(3 * PRECISION_BITS + 3);
+  // Pipelines
+  localparam NUM_INP_REGS = PipeConfig == fpnew_pkg::BEFORE
+                            ? NumPipeRegs
+                            : (PipeConfig == fpnew_pkg::DISTRIBUTED
+                               ? ((NumPipeRegs + 1) / 3) // Second to get distributed regs
+                               : 0); // no regs here otherwise
+  localparam NUM_MID_REGS = PipeConfig == fpnew_pkg::INSIDE
+                          ? NumPipeRegs
+                          : (PipeConfig == fpnew_pkg::DISTRIBUTED
+                             ? ((NumPipeRegs + 2) / 3) // First to get distributed regs
+                             : 0); // no regs here otherwise
+  localparam NUM_OUT_REGS = PipeConfig == fpnew_pkg::AFTER
+                            ? NumPipeRegs
+                            : (PipeConfig == fpnew_pkg::DISTRIBUTED
+                               ? (NumPipeRegs / 3) // Last to get distributed regs
+                               : 0); // no regs here otherwise
+
+  // ----------------
+  // Type definition
+  // ----------------
+  typedef struct packed {
+    logic                sign;
+    logic [EXP_BITS-1:0] exponent;
+    logic [MAN_BITS-1:0] mantissa;
+  } fp_t;
+
+  // ---------------
+  // Input pipeline
+  // ---------------
+  // Input pipeline signals, index i holds signal after i register stages
+  logic                  [0:NUM_INP_REGS][2:0][WIDTH-1:0] inp_pipe_operands_q;
+  logic                  [0:NUM_INP_REGS][2:0]            inp_pipe_is_boxed_q;
+  fpnew_pkg::roundmode_e [0:NUM_INP_REGS]                 inp_pipe_rnd_mode_q;
+  fpnew_pkg::operation_e [0:NUM_INP_REGS]                 inp_pipe_op_q;
+  logic                  [0:NUM_INP_REGS]                 inp_pipe_op_mod_q;
+  TagType                [0:NUM_INP_REGS]                 inp_pipe_tag_q;
+  AuxType                [0:NUM_INP_REGS]                 inp_pipe_aux_q;
+  logic                  [0:NUM_INP_REGS]                 inp_pipe_valid_q;
+  // Ready signal is combinatorial for all stages
+  logic [0:NUM_INP_REGS] inp_pipe_ready;
+
+  // Input stage: First element of pipeline is taken from inputs
+  assign inp_pipe_operands_q[0] = operands_i;
+  assign inp_pipe_is_boxed_q[0] = is_boxed_i;
+  assign inp_pipe_rnd_mode_q[0] = rnd_mode_i;
+  assign inp_pipe_op_q[0]       = op_i;
+  assign inp_pipe_op_mod_q[0]   = op_mod_i;
+  assign inp_pipe_tag_q[0]      = tag_i;
+  assign inp_pipe_aux_q[0]      = aux_i;
+  assign inp_pipe_valid_q[0]    = in_valid_i;
+  // Input stage: Propagate pipeline ready signal to updtream circuitry
+  assign in_ready_o = inp_pipe_ready[0];
+  // Generate the register stages
+  for (genvar i = 0; i < NUM_INP_REGS; i++) begin : gen_input_pipeline
+    // Internal register enable for this stage
+    logic reg_ena;
+    // Determine the ready signal of the current stage - advance the pipeline:
+    // 1. if the next stage is ready for our data
+    // 2. if the next stage only holds a bubble (not valid) -> we can pop it
+    assign inp_pipe_ready[i] = inp_pipe_ready[i+1] | ~inp_pipe_valid_q[i+1];
+    // Valid: enabled by ready signal, synchronous clear with the flush signal
+    `FFLARNC(inp_pipe_valid_q[i+1], inp_pipe_valid_q[i], inp_pipe_ready[i], flush_i, 1'b0, clk_i, rst_ni)
+    // Enable register if pipleine ready and a valid data item is present
+    assign reg_ena = inp_pipe_ready[i] & inp_pipe_valid_q[i];
+    // Generate the pipeline registers within the stages, use enable-registers
+    `FFL(inp_pipe_operands_q[i+1], inp_pipe_operands_q[i], reg_ena, '0)
+    `FFL(inp_pipe_is_boxed_q[i+1], inp_pipe_is_boxed_q[i], reg_ena, '0)
+    `FFL(inp_pipe_rnd_mode_q[i+1], inp_pipe_rnd_mode_q[i], reg_ena, fpnew_pkg::RNE)
+    `FFL(inp_pipe_op_q[i+1],       inp_pipe_op_q[i],       reg_ena, fpnew_pkg::FMADD)
+    `FFL(inp_pipe_op_mod_q[i+1],   inp_pipe_op_mod_q[i],   reg_ena, '0)
+    `FFL(inp_pipe_tag_q[i+1],      inp_pipe_tag_q[i],      reg_ena, TagType'('0))
+    `FFL(inp_pipe_aux_q[i+1],      inp_pipe_aux_q[i],      reg_ena, AuxType'('0))
+  end
+
+  // -----------------
+  // Input processing
+  // -----------------
+  fpnew_pkg::fp_info_t [2:0] info_q;
+
+  // Classify input
+  fpnew_classifier #(
+    .FpFormat    ( FpFormat ),
+    .NumOperands ( 3        )
+    ) i_class_inputs (
+    .operands_i ( inp_pipe_operands_q[NUM_INP_REGS] ),
+    .is_boxed_i ( inp_pipe_is_boxed_q[NUM_INP_REGS] ),
+    .info_o     ( info_q                            )
+  );
+
+  fp_t                 operand_a, operand_b, operand_c;
+  fpnew_pkg::fp_info_t info_a,    info_b,    info_c;
+
+  // Operation selection and operand adjustment
+  // | \c op_q  | \c op_mod_q | Operation Adjustment
+  // |:--------:|:-----------:|---------------------
+  // | FMADD    | \c 0        | FMADD: none
+  // | FMADD    | \c 1        | FMSUB: Invert sign of operand C
+  // | FNMSUB   | \c 0        | FNMSUB: Invert sign of operand A
+  // | FNMSUB   | \c 1        | FNMADD: Invert sign of operands A and C
+  // | ADD      | \c 0        | ADD: Set operand A to +1.0
+  // | ADD      | \c 1        | SUB: Set operand A to +1.0, invert sign of operand C
+  // | MUL      | \c 0        | MUL: Set operand C to +0.0
+  // | *others* | \c -        | *invalid*
+  // \note \c op_mod_q always inverts the sign of the addend.
+  always_comb begin : op_select
+
+    // Default assignments - packing-order-agnostic
+    operand_a = inp_pipe_operands_q[NUM_INP_REGS][0];
+    operand_b = inp_pipe_operands_q[NUM_INP_REGS][1];
+    operand_c = inp_pipe_operands_q[NUM_INP_REGS][2];
+    info_a    = info_q[0];
+    info_b    = info_q[1];
+    info_c    = info_q[2];
+
+    // op_mod_q inverts sign of operand C
+    operand_c.sign = operand_c.sign ^ inp_pipe_op_mod_q[NUM_INP_REGS];
+
+    unique case (inp_pipe_op_q[NUM_INP_REGS])
+      fpnew_pkg::FMADD:  ; // do nothing
+      fpnew_pkg::FNMSUB: operand_a.sign = ~operand_a.sign; // invert sign of product
+      fpnew_pkg::ADD: begin // Set multiplicand to +1
+        operand_a = '{sign: 1'b0, exponent: BIAS, mantissa: '0};
+        info_a    = '{is_normal: 1'b1, is_boxed: 1'b1, default: 1'b0}; //normal, boxed value.
+      end
+      fpnew_pkg::MUL: begin // Set addend to -0 (for proper rounding with RDN)
+        operand_c = '{sign: 1'b1, exponent: '0, mantissa: '0};
+        info_c    = '{is_zero: 1'b1, is_boxed: 1'b1, default: 1'b0}; //zero, boxed value.
+      end
+      default: begin // propagate don't cares
+        operand_a  = '{default: fpnew_pkg::DONT_CARE};
+        operand_b  = '{default: fpnew_pkg::DONT_CARE};
+        operand_c  = '{default: fpnew_pkg::DONT_CARE};
+        info_a     = '{default: fpnew_pkg::DONT_CARE};
+        info_b     = '{default: fpnew_pkg::DONT_CARE};
+        info_c     = '{default: fpnew_pkg::DONT_CARE};
+      end
+    endcase
+  end
+
+  // ---------------------
+  // Input classification
+  // ---------------------
+  logic any_operand_inf;
+  logic any_operand_nan;
+  logic signalling_nan;
+  logic effective_subtraction;
+  logic tentative_sign;
+
+  // Reduction for special case handling
+  assign any_operand_inf = (| {info_a.is_inf,        info_b.is_inf,        info_c.is_inf});
+  assign any_operand_nan = (| {info_a.is_nan,        info_b.is_nan,        info_c.is_nan});
+  assign signalling_nan  = (| {info_a.is_signalling, info_b.is_signalling, info_c.is_signalling});
+  // Effective subtraction in FMA occurs when product and addend signs differ
+  assign effective_subtraction = operand_a.sign ^ operand_b.sign ^ operand_c.sign;
+  // The tentative sign of the FMA shall be the sign of the product
+  assign tentative_sign = operand_a.sign ^ operand_b.sign;
+
+  // ----------------------
+  // Special case handling
+  // ----------------------
+  fp_t                special_result;
+  fpnew_pkg::status_t special_status;
+  logic               result_is_special;
+
+  always_comb begin : special_cases
+    // Default assignments
+    special_result    = '{sign: 1'b0, exponent: '1, mantissa: 2**(MAN_BITS-1)}; // canonical qNaN
+    special_status    = '0;
+    result_is_special = 1'b0;
+
+    // Handle potentially mixed nan & infinity input => important for the case where infinity and
+    // zero are multiplied and added to a qnan.
+    // RISC-V mandates raising the NV exception in these cases:
+    // (inf * 0) + c or (0 * inf) + c INVALID, no matter c (even quiet NaNs)
+    if ((info_a.is_inf && info_b.is_zero) || (info_a.is_zero && info_b.is_inf)) begin
+      result_is_special = 1'b1; // bypass FMA, output is the canonical qNaN
+      special_status.NV = 1'b1; // invalid operation
+    // NaN Inputs cause canonical quiet NaN at the output and maybe invalid OP
+    end else if (any_operand_nan) begin
+      result_is_special = 1'b1;           // bypass FMA, output is the canonical qNaN
+      special_status.NV = signalling_nan; // raise the invalid operation flag if signalling
+    // Special cases involving infinity
+    end else if (any_operand_inf) begin
+      result_is_special = 1'b1; // bypass FMA
+      // Effective addition of opposite infinities (±inf - ±inf) is invalid!
+      if ((info_a.is_inf || info_b.is_inf) && info_c.is_inf && effective_subtraction)
+        special_status.NV = 1'b1; // invalid operation
+      // Handle cases where output will be inf because of inf product input
+      else if (info_a.is_inf || info_b.is_inf) begin
+        // Result is infinity with the sign of the product
+        special_result    = '{sign: operand_a.sign ^ operand_b.sign, exponent: '1, mantissa: '0};
+      // Handle cases where the addend is inf
+      end else if (info_c.is_inf) begin
+        // Result is inifinity with sign of the addend (= operand_c)
+        special_result    = '{sign: operand_c.sign, exponent: '1, mantissa: '0};
+      end
+    end
+  end
+
+  // ---------------------------
+  // Initial exponent data path
+  // ---------------------------
+  logic signed [EXP_WIDTH-1:0] exponent_a, exponent_b, exponent_c;
+  logic signed [EXP_WIDTH-1:0] exponent_addend, exponent_product, exponent_difference;
+  logic signed [EXP_WIDTH-1:0] tentative_exponent;
+
+  // Zero-extend exponents into signed container - implicit width extension
+  assign exponent_a = signed'({1'b0, operand_a.exponent});
+  assign exponent_b = signed'({1'b0, operand_b.exponent});
+  assign exponent_c = signed'({1'b0, operand_c.exponent});
+
+  // Calculate internal exponents from encoded values. Real exponents are (ex = Ex - bias + 1 - nx)
+  // with Ex the encoded exponent and nx the implicit bit. Internal exponents stay biased.
+  assign exponent_addend = signed'(exponent_c + $signed({1'b0, ~info_c.is_normal})); // 0 as subnorm
+  // Biased product exponent is the sum of encoded exponents minus the bias.
+  assign exponent_product = (info_a.is_zero || info_b.is_zero)
+                            ? 2 - signed'(BIAS) // in case the product is zero, set minimum exp.
+                            : signed'(exponent_a + info_a.is_subnormal
+                                      + exponent_b + info_b.is_subnormal
+                                      - signed'(BIAS));
+  // Exponent difference is the addend exponent minus the product exponent
+  assign exponent_difference = exponent_addend - exponent_product;
+  // The tentative exponent will be the larger of the product or addend exponent
+  assign tentative_exponent = (exponent_difference > 0) ? exponent_addend : exponent_product;
+
+  // Shift amount for addend based on exponents (unsigned as only right shifts)
+  logic [SHIFT_AMOUNT_WIDTH-1:0] addend_shamt;
+
+  always_comb begin : addend_shift_amount
+    // Product-anchored case, saturated shift (addend is only in the sticky bit)
+    if (exponent_difference <= signed'(-2 * PRECISION_BITS - 1))
+      addend_shamt = 3 * PRECISION_BITS + 4;
+    // Addend and product will have mutual bits to add
+    else if (exponent_difference <= signed'(PRECISION_BITS + 2))
+      addend_shamt = unsigned'(signed'(PRECISION_BITS) + 3 - exponent_difference);
+    // Addend-anchored case, saturated shift (product is only in the sticky bit)
+    else
+      addend_shamt = 0;
+  end
+
+  // ------------------
+  // Product data path
+  // ------------------
+  logic [PRECISION_BITS-1:0]   mantissa_a, mantissa_b, mantissa_c;
+  logic [2*PRECISION_BITS-1:0] product;             // the p*p product is 2p bits wide
+  logic [3*PRECISION_BITS+3:0] product_shifted;     // addends are 3p+4 bit wide (including G/R)
+
+  // Add implicit bits to mantissae
+  assign mantissa_a = {info_a.is_normal, operand_a.mantissa};
+  assign mantissa_b = {info_b.is_normal, operand_b.mantissa};
+  assign mantissa_c = {info_c.is_normal, operand_c.mantissa};
+
+  // Mantissa multiplier (a*b)
+  assign product = mantissa_a * mantissa_b;
+
+  // Product is placed into a 3p+4 bit wide vector, padded with 2 bits for round and sticky:
+  // | 000...000 | product | RS |
+  //  <-  p+2  -> <-  2p -> < 2>
+  assign product_shifted = product << 2; // constant shift
+
+  // -----------------
+  // Addend data path
+  // -----------------
+  logic [3*PRECISION_BITS+3:0] addend_after_shift;  // upper 3p+4 bits are needed to go on
+  logic [PRECISION_BITS-1:0]   addend_sticky_bits;  // up to p bit of shifted addend are sticky
+  logic                        sticky_before_add;   // they are compressed into a single sticky bit
+  logic [3*PRECISION_BITS+3:0] addend_shifted;      // addends are 3p+4 bit wide (including G/R)
+  logic                        inject_carry_in;     // inject carry for subtractions if needed
+
+  // In parallel, the addend is right-shifted according to the exponent difference. Up to p bits
+  // are shifted out and compressed into a sticky bit.
+  // BEFORE THE SHIFT:
+  // | mantissa_c | 000..000 |
+  //  <-    p   -> <- 3p+4 ->
+  // AFTER THE SHIFT:
+  // | 000..........000 | mantissa_c | 000...............0GR |  sticky bits  |
+  //  <- addend_shamt -> <-    p   -> <- 2p+4-addend_shamt -> <-  up to p  ->
+  assign {addend_after_shift, addend_sticky_bits} =
+      (mantissa_c << (3 * PRECISION_BITS + 4)) >> addend_shamt;
+
+  assign sticky_before_add     = (| addend_sticky_bits);
+  // assign addend_after_shift[0] = sticky_before_add;
+
+  // In case of a subtraction, the addend is inverted
+  assign addend_shifted  = (effective_subtraction) ? ~addend_after_shift : addend_after_shift;
+  assign inject_carry_in = effective_subtraction & ~sticky_before_add;
+
+  // ------
+  // Adder
+  // ------
+  logic [3*PRECISION_BITS+4:0] sum_raw;   // added one bit for the carry
+  logic                        sum_carry; // observe carry bit from sum for sign fixing
+  logic [3*PRECISION_BITS+3:0] sum;       // discard carry as sum won't overflow
+  logic                        final_sign;
+
+  //Mantissa adder (ab+c). In normal addition, it cannot overflow.
+  assign sum_raw = product_shifted + addend_shifted + inject_carry_in;
+  assign sum_carry = sum_raw[3*PRECISION_BITS+4];
+
+  // Complement negative sum (can only happen in subtraction -> overflows for positive results)
+  assign sum        = (effective_subtraction && ~sum_carry) ? -sum_raw : sum_raw;
+
+  // In case of a mispredicted subtraction result, do a sign flip
+  assign final_sign = (effective_subtraction && (sum_carry == tentative_sign))
+                      ? 1'b1
+                      : (effective_subtraction ? 1'b0 : tentative_sign);
+
+  // ---------------
+  // Internal pipeline
+  // ---------------
+  // Pipeline output signals as non-arrays
+  logic                          effective_subtraction_q;
+  logic signed [EXP_WIDTH-1:0]   exponent_product_q;
+  logic signed [EXP_WIDTH-1:0]   exponent_difference_q;
+  logic signed [EXP_WIDTH-1:0]   tentative_exponent_q;
+  logic [SHIFT_AMOUNT_WIDTH-1:0] addend_shamt_q;
+  logic                          sticky_before_add_q;
+  logic [3*PRECISION_BITS+3:0]   sum_q;
+  logic                          final_sign_q;
+  fpnew_pkg::roundmode_e         rnd_mode_q;
+  logic                          result_is_special_q;
+  fp_t                           special_result_q;
+  fpnew_pkg::status_t            special_status_q;
+  // Internal pipeline signals, index i holds signal after i register stages
+  logic                  [0:NUM_MID_REGS]                         mid_pipe_eff_sub_q;
+  logic signed           [0:NUM_MID_REGS][EXP_WIDTH-1:0]          mid_pipe_exp_prod_q;
+  logic signed           [0:NUM_MID_REGS][EXP_WIDTH-1:0]          mid_pipe_exp_diff_q;
+  logic signed           [0:NUM_MID_REGS][EXP_WIDTH-1:0]          mid_pipe_tent_exp_q;
+  logic                  [0:NUM_MID_REGS][SHIFT_AMOUNT_WIDTH-1:0] mid_pipe_add_shamt_q;
+  logic                  [0:NUM_MID_REGS]                         mid_pipe_sticky_q;
+  logic                  [0:NUM_MID_REGS][3*PRECISION_BITS+3:0]   mid_pipe_sum_q;
+  logic                  [0:NUM_MID_REGS]                         mid_pipe_final_sign_q;
+  fpnew_pkg::roundmode_e [0:NUM_MID_REGS]                         mid_pipe_rnd_mode_q;
+  logic                  [0:NUM_MID_REGS]                         mid_pipe_res_is_spec_q;
+  fp_t                   [0:NUM_MID_REGS]                         mid_pipe_spec_res_q;
+  fpnew_pkg::status_t    [0:NUM_MID_REGS]                         mid_pipe_spec_stat_q;
+  TagType                [0:NUM_MID_REGS]                         mid_pipe_tag_q;
+  AuxType                [0:NUM_MID_REGS]                         mid_pipe_aux_q;
+  logic                  [0:NUM_MID_REGS]                         mid_pipe_valid_q;
+  // Ready signal is combinatorial for all stages
+  logic [0:NUM_MID_REGS] mid_pipe_ready;
+
+  // Input stage: First element of pipeline is taken from upstream logic
+  assign mid_pipe_eff_sub_q[0]     = effective_subtraction;
+  assign mid_pipe_exp_prod_q[0]    = exponent_product;
+  assign mid_pipe_exp_diff_q[0]    = exponent_difference;
+  assign mid_pipe_tent_exp_q[0]    = tentative_exponent;
+  assign mid_pipe_add_shamt_q[0]   = addend_shamt;
+  assign mid_pipe_sticky_q[0]      = sticky_before_add;
+  assign mid_pipe_sum_q[0]         = sum;
+  assign mid_pipe_final_sign_q[0]  = final_sign;
+  assign mid_pipe_rnd_mode_q[0]    = inp_pipe_rnd_mode_q[NUM_INP_REGS];
+  assign mid_pipe_res_is_spec_q[0] = result_is_special;
+  assign mid_pipe_spec_res_q[0]    = special_result;
+  assign mid_pipe_spec_stat_q[0]   = special_status;
+  assign mid_pipe_tag_q[0]         = inp_pipe_tag_q[NUM_INP_REGS];
+  assign mid_pipe_aux_q[0]         = inp_pipe_aux_q[NUM_INP_REGS];
+  assign mid_pipe_valid_q[0]       = inp_pipe_valid_q[NUM_INP_REGS];
+  // Input stage: Propagate pipeline ready signal to input pipe
+  assign inp_pipe_ready[NUM_INP_REGS] = mid_pipe_ready[0];
+
+  // Generate the register stages
+  for (genvar i = 0; i < NUM_MID_REGS; i++) begin : gen_inside_pipeline
+    // Internal register enable for this stage
+    logic reg_ena;
+    // Determine the ready signal of the current stage - advance the pipeline:
+    // 1. if the next stage is ready for our data
+    // 2. if the next stage only holds a bubble (not valid) -> we can pop it
+    assign mid_pipe_ready[i] = mid_pipe_ready[i+1] | ~mid_pipe_valid_q[i+1];
+    // Valid: enabled by ready signal, synchronous clear with the flush signal
+    `FFLARNC(mid_pipe_valid_q[i+1], mid_pipe_valid_q[i], mid_pipe_ready[i], flush_i, 1'b0, clk_i, rst_ni)
+    // Enable register if pipleine ready and a valid data item is present
+    assign reg_ena = mid_pipe_ready[i] & mid_pipe_valid_q[i];
+    // Generate the pipeline registers within the stages, use enable-registers
+    `FFL(mid_pipe_eff_sub_q[i+1],     mid_pipe_eff_sub_q[i],     reg_ena, '0)
+    `FFL(mid_pipe_exp_prod_q[i+1],    mid_pipe_exp_prod_q[i],    reg_ena, '0)
+    `FFL(mid_pipe_exp_diff_q[i+1],    mid_pipe_exp_diff_q[i],    reg_ena, '0)
+    `FFL(mid_pipe_tent_exp_q[i+1],    mid_pipe_tent_exp_q[i],    reg_ena, '0)
+    `FFL(mid_pipe_add_shamt_q[i+1],   mid_pipe_add_shamt_q[i],   reg_ena, '0)
+    `FFL(mid_pipe_sticky_q[i+1],      mid_pipe_sticky_q[i],      reg_ena, '0)
+    `FFL(mid_pipe_sum_q[i+1],         mid_pipe_sum_q[i],         reg_ena, '0)
+    `FFL(mid_pipe_final_sign_q[i+1],  mid_pipe_final_sign_q[i],  reg_ena, '0)
+    `FFL(mid_pipe_rnd_mode_q[i+1],    mid_pipe_rnd_mode_q[i],    reg_ena, fpnew_pkg::RNE)
+    `FFL(mid_pipe_res_is_spec_q[i+1], mid_pipe_res_is_spec_q[i], reg_ena, '0)
+    `FFL(mid_pipe_spec_res_q[i+1],    mid_pipe_spec_res_q[i],    reg_ena, '0)
+    `FFL(mid_pipe_spec_stat_q[i+1],   mid_pipe_spec_stat_q[i],   reg_ena, '0)
+    `FFL(mid_pipe_tag_q[i+1],         mid_pipe_tag_q[i],         reg_ena, TagType'('0))
+    `FFL(mid_pipe_aux_q[i+1],         mid_pipe_aux_q[i],         reg_ena, AuxType'('0))
+  end
+  // Output stage: assign selected pipe outputs to signals for later use
+  assign effective_subtraction_q = mid_pipe_eff_sub_q[NUM_MID_REGS];
+  assign exponent_product_q      = mid_pipe_exp_prod_q[NUM_MID_REGS];
+  assign exponent_difference_q   = mid_pipe_exp_diff_q[NUM_MID_REGS];
+  assign tentative_exponent_q    = mid_pipe_tent_exp_q[NUM_MID_REGS];
+  assign addend_shamt_q          = mid_pipe_add_shamt_q[NUM_MID_REGS];
+  assign sticky_before_add_q     = mid_pipe_sticky_q[NUM_MID_REGS];
+  assign sum_q                   = mid_pipe_sum_q[NUM_MID_REGS];
+  assign final_sign_q            = mid_pipe_final_sign_q[NUM_MID_REGS];
+  assign rnd_mode_q              = mid_pipe_rnd_mode_q[NUM_MID_REGS];
+  assign result_is_special_q     = mid_pipe_res_is_spec_q[NUM_MID_REGS];
+  assign special_result_q        = mid_pipe_spec_res_q[NUM_MID_REGS];
+  assign special_status_q        = mid_pipe_spec_stat_q[NUM_MID_REGS];
+
+  // --------------
+  // Normalization
+  // --------------
+  logic        [LOWER_SUM_WIDTH-1:0]  sum_lower;              // lower 2p+3 bits of sum are searched
+  logic        [LZC_RESULT_WIDTH-1:0] leading_zero_count;     // the number of leading zeroes
+  logic signed [LZC_RESULT_WIDTH:0]   leading_zero_count_sgn; // signed leading-zero count
+  logic                               lzc_zeroes;             // in case only zeroes found
+
+  logic        [SHIFT_AMOUNT_WIDTH-1:0] norm_shamt; // Normalization shift amount
+  logic signed [EXP_WIDTH-1:0]          normalized_exponent;
+
+  logic [3*PRECISION_BITS+4:0] sum_shifted;       // result after first normalization shift
+  logic [PRECISION_BITS:0]     final_mantissa;    // final mantissa before rounding with round bit
+  logic [2*PRECISION_BITS+2:0] sum_sticky_bits;   // remaining 2p+3 sticky bits after normalization
+  logic                        sticky_after_norm; // sticky bit after normalization
+
+  logic signed [EXP_WIDTH-1:0] final_exponent;
+
+  assign sum_lower = sum_q[LOWER_SUM_WIDTH-1:0];
+
+  // Leading zero counter for cancellations
+  lzc #(
+    .WIDTH ( LOWER_SUM_WIDTH ),
+    .MODE  ( 1               ) // MODE = 1 counts leading zeroes
+  ) i_lzc (
+    .in_i    ( sum_lower          ),
+    .cnt_o   ( leading_zero_count ),
+    .empty_o ( lzc_zeroes         )
+  );
+
+  assign leading_zero_count_sgn = signed'({1'b0, leading_zero_count});
+
+  // Normalization shift amount based on exponents and LZC (unsigned as only left shifts)
+  always_comb begin : norm_shift_amount
+    // Product-anchored case or cancellations require LZC
+    if ((exponent_difference_q <= 0) || (effective_subtraction_q && (exponent_difference_q <= 2))) begin
+      // Normal result (biased exponent > 0 and not a zero)
+      if ((exponent_product_q - leading_zero_count_sgn + 1 >= 0) && !lzc_zeroes) begin
+        // Undo initial product shift, remove the counted zeroes
+        norm_shamt          = PRECISION_BITS + 2 + leading_zero_count;
+        normalized_exponent = exponent_product_q - leading_zero_count_sgn + 1; // account for shift
+      // Subnormal result
+      end else begin
+        // Cap the shift distance to align mantissa with minimum exponent
+        norm_shamt          = unsigned'(signed'(PRECISION_BITS) + 2 + exponent_product_q);
+        normalized_exponent = 0; // subnormals encoded as 0
+      end
+    // Addend-anchored case
+    end else begin
+      norm_shamt          = addend_shamt_q; // Undo the initial shift
+      normalized_exponent = tentative_exponent_q;
+    end
+  end
+
+  // Do the large normalization shift
+  assign sum_shifted       = sum_q << norm_shamt;
+
+  // The addend-anchored case needs a 1-bit normalization since the leading-one can be to the left
+  // or right of the (non-carry) MSB of the sum.
+  always_comb begin : small_norm
+    // Default assignment, discarding carry bit
+    {final_mantissa, sum_sticky_bits} = sum_shifted;
+    final_exponent                    = normalized_exponent;
+
+    // The normalized sum has overflown, align right and fix exponent
+    if (sum_shifted[3*PRECISION_BITS+4]) begin // check the carry bit
+      {final_mantissa, sum_sticky_bits} = sum_shifted >> 1;
+      final_exponent                    = normalized_exponent + 1;
+    // The normalized sum is normal, nothing to do
+    end else if (sum_shifted[3*PRECISION_BITS+3]) begin // check the sum MSB
+      // do nothing
+    // The normalized sum is still denormal, align left - unless the result is not already subnormal
+    end else if (normalized_exponent > 1) begin
+      {final_mantissa, sum_sticky_bits} = sum_shifted << 1;
+      final_exponent                    = normalized_exponent - 1;
+    // Otherwise we're denormal
+    end else begin
+      final_exponent = '0;
+    end
+  end
+
+  // Update the sticky bit with the shifted-out bits
+  assign sticky_after_norm = (| {sum_sticky_bits}) | sticky_before_add_q;
+
+  // ----------------------------
+  // Rounding and classification
+  // ----------------------------
+  logic                         pre_round_sign;
+  logic [EXP_BITS-1:0]          pre_round_exponent;
+  logic [MAN_BITS-1:0]          pre_round_mantissa;
+  logic [EXP_BITS+MAN_BITS-1:0] pre_round_abs; // absolute value of result before rounding
+  logic [1:0]                   round_sticky_bits;
+
+  logic of_before_round, of_after_round; // overflow
+  logic uf_before_round, uf_after_round; // underflow
+  logic result_zero;
+
+  logic                         rounded_sign;
+  logic [EXP_BITS+MAN_BITS-1:0] rounded_abs; // absolute value of result after rounding
+
+  // Classification before round. RISC-V mandates checking underflow AFTER rounding!
+  assign of_before_round = final_exponent >= 2**(EXP_BITS)-1; // infinity exponent is all ones
+  assign uf_before_round = final_exponent == 0;               // exponent for subnormals capped to 0
+
+  // Assemble result before rounding. In case of overflow, the largest normal value is set.
+  assign pre_round_sign     = final_sign_q;
+  assign pre_round_exponent = (of_before_round) ? 2**EXP_BITS-2 : unsigned'(final_exponent[EXP_BITS-1:0]);
+  assign pre_round_mantissa = (of_before_round) ? '1 : final_mantissa[MAN_BITS:1]; // bit 0 is R bit
+  assign pre_round_abs      = {pre_round_exponent, pre_round_mantissa};
+
+  // In case of overflow, the round and sticky bits are set for proper rounding
+  assign round_sticky_bits  = (of_before_round) ? 2'b11 : {final_mantissa[0], sticky_after_norm};
+
+  // Perform the rounding
+  fpnew_rounding #(
+    .AbsWidth ( EXP_BITS + MAN_BITS )
+  ) i_fpnew_rounding (
+    .abs_value_i             ( pre_round_abs           ),
+    .sign_i                  ( pre_round_sign          ),
+    .round_sticky_bits_i     ( round_sticky_bits       ),
+    .rnd_mode_i              ( rnd_mode_q              ),
+    .effective_subtraction_i ( effective_subtraction_q ),
+    .abs_rounded_o           ( rounded_abs             ),
+    .sign_o                  ( rounded_sign            ),
+    .exact_zero_o            ( result_zero             )
+  );
+
+  // Classification after rounding
+  assign uf_after_round = rounded_abs[EXP_BITS+MAN_BITS-1:MAN_BITS] == '0; // exponent = 0
+  assign of_after_round = rounded_abs[EXP_BITS+MAN_BITS-1:MAN_BITS] == '1; // exponent all ones
+
+  // -----------------
+  // Result selection
+  // -----------------
+  logic [WIDTH-1:0]     regular_result;
+  fpnew_pkg::status_t   regular_status;
+
+  // Assemble regular result
+  assign regular_result    = {rounded_sign, rounded_abs};
+  assign regular_status.NV = 1'b0; // only valid cases are handled in regular path
+  assign regular_status.DZ = 1'b0; // no divisions
+  assign regular_status.OF = of_before_round | of_after_round;   // rounding can introduce overflow
+  assign regular_status.UF = uf_after_round & regular_status.NX; // only inexact results raise UF
+  assign regular_status.NX = (| round_sticky_bits) | of_before_round | of_after_round;
+
+  // Final results for output pipeline
+  fp_t                result_d;
+  fpnew_pkg::status_t status_d;
+
+  // Select output depending on special case detection
+  assign result_d = result_is_special_q ? special_result_q : regular_result;
+  assign status_d = result_is_special_q ? special_status_q : regular_status;
+
+  // ----------------
+  // Output Pipeline
+  // ----------------
+  // Output pipeline signals, index i holds signal after i register stages
+  fp_t                [0:NUM_OUT_REGS] out_pipe_result_q;
+  fpnew_pkg::status_t [0:NUM_OUT_REGS] out_pipe_status_q;
+  TagType             [0:NUM_OUT_REGS] out_pipe_tag_q;
+  AuxType             [0:NUM_OUT_REGS] out_pipe_aux_q;
+  logic               [0:NUM_OUT_REGS] out_pipe_valid_q;
+  // Ready signal is combinatorial for all stages
+  logic [0:NUM_OUT_REGS] out_pipe_ready;
+
+  // Input stage: First element of pipeline is taken from inputs
+  assign out_pipe_result_q[0] = result_d;
+  assign out_pipe_status_q[0] = status_d;
+  assign out_pipe_tag_q[0]    = mid_pipe_tag_q[NUM_MID_REGS];
+  assign out_pipe_aux_q[0]    = mid_pipe_aux_q[NUM_MID_REGS];
+  assign out_pipe_valid_q[0]  = mid_pipe_valid_q[NUM_MID_REGS];
+  // Input stage: Propagate pipeline ready signal to inside pipe
+  assign mid_pipe_ready[NUM_MID_REGS] = out_pipe_ready[0];
+  // Generate the register stages
+  for (genvar i = 0; i < NUM_OUT_REGS; i++) begin : gen_output_pipeline
+    // Internal register enable for this stage
+    logic reg_ena;
+    // Determine the ready signal of the current stage - advance the pipeline:
+    // 1. if the next stage is ready for our data
+    // 2. if the next stage only holds a bubble (not valid) -> we can pop it
+    assign out_pipe_ready[i] = out_pipe_ready[i+1] | ~out_pipe_valid_q[i+1];
+    // Valid: enabled by ready signal, synchronous clear with the flush signal
+    `FFLARNC(out_pipe_valid_q[i+1], out_pipe_valid_q[i], out_pipe_ready[i], flush_i, 1'b0, clk_i, rst_ni)
+    // Enable register if pipleine ready and a valid data item is present
+    assign reg_ena = out_pipe_ready[i] & out_pipe_valid_q[i];
+    // Generate the pipeline registers within the stages, use enable-registers
+    `FFL(out_pipe_result_q[i+1], out_pipe_result_q[i], reg_ena, '0)
+    `FFL(out_pipe_status_q[i+1], out_pipe_status_q[i], reg_ena, '0)
+    `FFL(out_pipe_tag_q[i+1],    out_pipe_tag_q[i],    reg_ena, TagType'('0))
+    `FFL(out_pipe_aux_q[i+1],    out_pipe_aux_q[i],    reg_ena, AuxType'('0))
+  end
+  // Output stage: Ready travels backwards from output side, driven by downstream circuitry
+  assign out_pipe_ready[NUM_OUT_REGS] = out_ready_i;
+  // Output stage: assign module outputs
+  assign result_o        = out_pipe_result_q[NUM_OUT_REGS];
+  assign status_o        = out_pipe_status_q[NUM_OUT_REGS];
+  assign extension_bit_o = 1'b1; // always NaN-Box result
+  assign tag_o           = out_pipe_tag_q[NUM_OUT_REGS];
+  assign aux_o           = out_pipe_aux_q[NUM_OUT_REGS];
+  assign out_valid_o     = out_pipe_valid_q[NUM_OUT_REGS];
+  assign busy_o          = (| {inp_pipe_valid_q, mid_pipe_valid_q, out_pipe_valid_q});
+endmodule
diff --git a/vendor/pulp-platform/fpnew/src/fpnew_fma_multi.sv b/vendor/pulp-platform/fpnew/src/fpnew_fma_multi.sv
new file mode 100644
index 0000000000..6b52237fa9
--- /dev/null
+++ b/vendor/pulp-platform/fpnew/src/fpnew_fma_multi.sv
@@ -0,0 +1,820 @@
+// Copyright 2019 ETH Zurich and University of Bologna.
+//
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the "License"); you may not use this file except in
+// compliance with the License. You may obtain a copy of the License at
+// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+
+// Author: Stefan Mach <smach@iis.ee.ethz.ch>
+
+`include "common_cells/registers.svh"
+
+module fpnew_fma_multi #(
+  parameter fpnew_pkg::fmt_logic_t   FpFmtConfig = '1,
+  parameter int unsigned             NumPipeRegs = 0,
+  parameter fpnew_pkg::pipe_config_t PipeConfig  = fpnew_pkg::BEFORE,
+  parameter type                     TagType     = logic,
+  parameter type                     AuxType     = logic,
+  // Do not change
+  localparam int unsigned WIDTH       = fpnew_pkg::max_fp_width(FpFmtConfig),
+  localparam int unsigned NUM_FORMATS = fpnew_pkg::NUM_FP_FORMATS
+) (
+  input  logic                        clk_i,
+  input  logic                        rst_ni,
+  // Input signals
+  input  logic [2:0][WIDTH-1:0]       operands_i, // 3 operands
+  input  logic [NUM_FORMATS-1:0][2:0] is_boxed_i, // 3 operands
+  input  fpnew_pkg::roundmode_e       rnd_mode_i,
+  input  fpnew_pkg::operation_e       op_i,
+  input  logic                        op_mod_i,
+  input  fpnew_pkg::fp_format_e       src_fmt_i, // format of the multiplicands
+  input  fpnew_pkg::fp_format_e       dst_fmt_i, // format of the addend and result
+  input  TagType                      tag_i,
+  input  AuxType                      aux_i,
+  // Input Handshake
+  input  logic                        in_valid_i,
+  output logic                        in_ready_o,
+  input  logic                        flush_i,
+  // Output signals
+  output logic [WIDTH-1:0]            result_o,
+  output fpnew_pkg::status_t          status_o,
+  output logic                        extension_bit_o,
+  output TagType                      tag_o,
+  output AuxType                      aux_o,
+  // Output handshake
+  output logic                        out_valid_o,
+  input  logic                        out_ready_i,
+  // Indication of valid data in flight
+  output logic                        busy_o
+);
+
+  // ----------
+  // Constants
+  // ----------
+  // The super-format that can hold all formats
+  localparam fpnew_pkg::fp_encoding_t SUPER_FORMAT = fpnew_pkg::super_format(FpFmtConfig);
+
+  localparam int unsigned SUPER_EXP_BITS = SUPER_FORMAT.exp_bits;
+  localparam int unsigned SUPER_MAN_BITS = SUPER_FORMAT.man_bits;
+
+  // Precision bits 'p' include the implicit bit
+  localparam int unsigned PRECISION_BITS = SUPER_MAN_BITS + 1;
+  // The lower 2p+3 bits of the internal FMA result will be needed for leading-zero detection
+  localparam int unsigned LOWER_SUM_WIDTH  = 2 * PRECISION_BITS + 3;
+  localparam int unsigned LZC_RESULT_WIDTH = $clog2(LOWER_SUM_WIDTH);
+  // Internal exponent width of FMA must accomodate all meaningful exponent values in order to avoid
+  // datapath leakage. This is either given by the exponent bits or the width of the LZC result.
+  // In most reasonable FP formats the internal exponent will be wider than the LZC result.
+  localparam int unsigned EXP_WIDTH = fpnew_pkg::maximum(SUPER_EXP_BITS + 2, LZC_RESULT_WIDTH);
+  // Shift amount width: maximum internal mantissa size is 3p+3 bits
+  localparam int unsigned SHIFT_AMOUNT_WIDTH = $clog2(3 * PRECISION_BITS + 3);
+  // Pipelines
+  localparam NUM_INP_REGS = PipeConfig == fpnew_pkg::BEFORE
+                            ? NumPipeRegs
+                            : (PipeConfig == fpnew_pkg::DISTRIBUTED
+                               ? ((NumPipeRegs + 1) / 3) // Second to get distributed regs
+                               : 0); // no regs here otherwise
+  localparam NUM_MID_REGS = PipeConfig == fpnew_pkg::INSIDE
+                          ? NumPipeRegs
+                          : (PipeConfig == fpnew_pkg::DISTRIBUTED
+                             ? ((NumPipeRegs + 2) / 3) // First to get distributed regs
+                             : 0); // no regs here otherwise
+  localparam NUM_OUT_REGS = PipeConfig == fpnew_pkg::AFTER
+                            ? NumPipeRegs
+                            : (PipeConfig == fpnew_pkg::DISTRIBUTED
+                               ? (NumPipeRegs / 3) // Last to get distributed regs
+                               : 0); // no regs here otherwise
+
+  // ----------------
+  // Type definition
+  // ----------------
+  typedef struct packed {
+    logic                      sign;
+    logic [SUPER_EXP_BITS-1:0] exponent;
+    logic [SUPER_MAN_BITS-1:0] mantissa;
+  } fp_t;
+
+  // ---------------
+  // Input pipeline
+  // ---------------
+  // Selected pipeline output signals as non-arrays
+  logic [2:0][WIDTH-1:0] operands_q;
+  fpnew_pkg::fp_format_e src_fmt_q;
+  fpnew_pkg::fp_format_e dst_fmt_q;
+
+  // Input pipeline signals, index i holds signal after i register stages
+  logic                  [0:NUM_INP_REGS][2:0][WIDTH-1:0]       inp_pipe_operands_q;
+  logic                  [0:NUM_INP_REGS][NUM_FORMATS-1:0][2:0] inp_pipe_is_boxed_q;
+  fpnew_pkg::roundmode_e [0:NUM_INP_REGS]                       inp_pipe_rnd_mode_q;
+  fpnew_pkg::operation_e [0:NUM_INP_REGS]                       inp_pipe_op_q;
+  logic                  [0:NUM_INP_REGS]                       inp_pipe_op_mod_q;
+  fpnew_pkg::fp_format_e [0:NUM_INP_REGS]                       inp_pipe_src_fmt_q;
+  fpnew_pkg::fp_format_e [0:NUM_INP_REGS]                       inp_pipe_dst_fmt_q;
+  TagType                [0:NUM_INP_REGS]                       inp_pipe_tag_q;
+  AuxType                [0:NUM_INP_REGS]                       inp_pipe_aux_q;
+  logic                  [0:NUM_INP_REGS]                       inp_pipe_valid_q;
+  // Ready signal is combinatorial for all stages
+  logic [0:NUM_INP_REGS] inp_pipe_ready;
+
+  // Input stage: First element of pipeline is taken from inputs
+  assign inp_pipe_operands_q[0] = operands_i;
+  assign inp_pipe_is_boxed_q[0] = is_boxed_i;
+  assign inp_pipe_rnd_mode_q[0] = rnd_mode_i;
+  assign inp_pipe_op_q[0]       = op_i;
+  assign inp_pipe_op_mod_q[0]   = op_mod_i;
+  assign inp_pipe_src_fmt_q[0]  = src_fmt_i;
+  assign inp_pipe_dst_fmt_q[0]  = dst_fmt_i;
+  assign inp_pipe_tag_q[0]      = tag_i;
+  assign inp_pipe_aux_q[0]      = aux_i;
+  assign inp_pipe_valid_q[0]    = in_valid_i;
+  // Input stage: Propagate pipeline ready signal to updtream circuitry
+  assign in_ready_o = inp_pipe_ready[0];
+  // Generate the register stages
+  for (genvar i = 0; i < NUM_INP_REGS; i++) begin : gen_input_pipeline
+    // Internal register enable for this stage
+    logic reg_ena;
+    // Determine the ready signal of the current stage - advance the pipeline:
+    // 1. if the next stage is ready for our data
+    // 2. if the next stage only holds a bubble (not valid) -> we can pop it
+    assign inp_pipe_ready[i] = inp_pipe_ready[i+1] | ~inp_pipe_valid_q[i+1];
+    // Valid: enabled by ready signal, synchronous clear with the flush signal
+    `FFLARNC(inp_pipe_valid_q[i+1], inp_pipe_valid_q[i], inp_pipe_ready[i], flush_i, 1'b0, clk_i, rst_ni)
+    // Enable register if pipleine ready and a valid data item is present
+    assign reg_ena = inp_pipe_ready[i] & inp_pipe_valid_q[i];
+    // Generate the pipeline registers within the stages, use enable-registers
+    `FFL(inp_pipe_operands_q[i+1], inp_pipe_operands_q[i], reg_ena, '0)
+    `FFL(inp_pipe_is_boxed_q[i+1], inp_pipe_is_boxed_q[i], reg_ena, '0)
+    `FFL(inp_pipe_rnd_mode_q[i+1], inp_pipe_rnd_mode_q[i], reg_ena, fpnew_pkg::RNE)
+    `FFL(inp_pipe_op_q[i+1],       inp_pipe_op_q[i],       reg_ena, fpnew_pkg::FMADD)
+    `FFL(inp_pipe_op_mod_q[i+1],   inp_pipe_op_mod_q[i],   reg_ena, '0)
+    `FFL(inp_pipe_src_fmt_q[i+1],  inp_pipe_src_fmt_q[i],  reg_ena, fpnew_pkg::fp_format_e'(0))
+    `FFL(inp_pipe_dst_fmt_q[i+1],  inp_pipe_dst_fmt_q[i],  reg_ena, fpnew_pkg::fp_format_e'(0))
+    `FFL(inp_pipe_tag_q[i+1],      inp_pipe_tag_q[i],      reg_ena, TagType'('0))
+    `FFL(inp_pipe_aux_q[i+1],      inp_pipe_aux_q[i],      reg_ena, AuxType'('0))
+  end
+  // Output stage: assign selected pipe outputs to signals for later use
+  assign operands_q = inp_pipe_operands_q[NUM_INP_REGS];
+  assign src_fmt_q  = inp_pipe_src_fmt_q[NUM_INP_REGS];
+  assign dst_fmt_q  = inp_pipe_dst_fmt_q[NUM_INP_REGS];
+
+  // -----------------
+  // Input processing
+  // -----------------
+  logic        [NUM_FORMATS-1:0][2:0]                     fmt_sign;
+  logic signed [NUM_FORMATS-1:0][2:0][SUPER_EXP_BITS-1:0] fmt_exponent;
+  logic        [NUM_FORMATS-1:0][2:0][SUPER_MAN_BITS-1:0] fmt_mantissa;
+
+  fpnew_pkg::fp_info_t [NUM_FORMATS-1:0][2:0] info_q;
+
+  // FP Input initialization
+  for (genvar fmt = 0; fmt < int'(NUM_FORMATS); fmt++) begin : fmt_init_inputs
+    // Set up some constants
+    localparam int unsigned FP_WIDTH = fpnew_pkg::fp_width(fpnew_pkg::fp_format_e'(fmt));
+    localparam int unsigned EXP_BITS = fpnew_pkg::exp_bits(fpnew_pkg::fp_format_e'(fmt));
+    localparam int unsigned MAN_BITS = fpnew_pkg::man_bits(fpnew_pkg::fp_format_e'(fmt));
+
+    if (FpFmtConfig[fmt]) begin : active_format
+      logic [2:0][FP_WIDTH-1:0] trimmed_ops;
+
+      // Classify input
+      fpnew_classifier #(
+        .FpFormat    ( fpnew_pkg::fp_format_e'(fmt) ),
+        .NumOperands ( 3                            )
+      ) i_fpnew_classifier (
+        .operands_i ( trimmed_ops                            ),
+        .is_boxed_i ( inp_pipe_is_boxed_q[NUM_INP_REGS][fmt] ),
+        .info_o     ( info_q[fmt]                            )
+      );
+      for (genvar op = 0; op < 3; op++) begin : gen_operands
+        assign trimmed_ops[op]       = operands_q[op][FP_WIDTH-1:0];
+        assign fmt_sign[fmt][op]     = operands_q[op][FP_WIDTH-1];
+        assign fmt_exponent[fmt][op] = signed'({1'b0, operands_q[op][MAN_BITS+:EXP_BITS]});
+        assign fmt_mantissa[fmt][op] = {info_q[fmt][op].is_normal, operands_q[op][MAN_BITS-1:0]} <<
+                                       (SUPER_MAN_BITS - MAN_BITS); // move to left of mantissa
+      end
+    end else begin : inactive_format
+      assign info_q[fmt]                 = '{default: fpnew_pkg::DONT_CARE}; // format disabled
+      assign fmt_sign[fmt]               = fpnew_pkg::DONT_CARE;             // format disabled
+      assign fmt_exponent[fmt]           = '{default: fpnew_pkg::DONT_CARE}; // format disabled
+      assign fmt_mantissa[fmt]           = '{default: fpnew_pkg::DONT_CARE}; // format disabled
+    end
+  end
+
+  fp_t                 operand_a, operand_b, operand_c;
+  fpnew_pkg::fp_info_t info_a,    info_b,    info_c;
+
+  // Operation selection and operand adjustment
+  // | \c op_q  | \c op_mod_q | Operation Adjustment
+  // |:--------:|:-----------:|---------------------
+  // | FMADD    | \c 0        | FMADD: none
+  // | FMADD    | \c 1        | FMSUB: Invert sign of operand C
+  // | FNMSUB   | \c 0        | FNMSUB: Invert sign of operand A
+  // | FNMSUB   | \c 1        | FNMADD: Invert sign of operands A and C
+  // | ADD      | \c 0        | ADD: Set operand A to +1.0
+  // | ADD      | \c 1        | SUB: Set operand A to +1.0, invert sign of operand C
+  // | MUL      | \c 0        | MUL: Set operand C to +0.0
+  // | *others* | \c -        | *invalid*
+  // \note \c op_mod_q always inverts the sign of the addend.
+  always_comb begin : op_select
+
+    // Default assignments - packing-order-agnostic
+    operand_a = {fmt_sign[src_fmt_q][0], fmt_exponent[src_fmt_q][0], fmt_mantissa[src_fmt_q][0]};
+    operand_b = {fmt_sign[src_fmt_q][1], fmt_exponent[src_fmt_q][1], fmt_mantissa[src_fmt_q][1]};
+    operand_c = {fmt_sign[dst_fmt_q][2], fmt_exponent[dst_fmt_q][2], fmt_mantissa[dst_fmt_q][2]};
+    info_a    = info_q[src_fmt_q][0];
+    info_b    = info_q[src_fmt_q][1];
+    info_c    = info_q[dst_fmt_q][2];
+
+    // op_mod_q inverts sign of operand C
+    operand_c.sign = operand_c.sign ^ inp_pipe_op_mod_q[NUM_INP_REGS];
+
+    unique case (inp_pipe_op_q[NUM_INP_REGS])
+      fpnew_pkg::FMADD:  ; // do nothing
+      fpnew_pkg::FNMSUB: operand_a.sign = ~operand_a.sign; // invert sign of product
+      fpnew_pkg::ADD: begin // Set multiplicand to +1
+        operand_a = '{sign: 1'b0, exponent: fpnew_pkg::bias(src_fmt_q), mantissa: '0};
+        info_a    = '{is_normal: 1'b1, is_boxed: 1'b1, default: 1'b0}; //normal, boxed value.
+      end
+      fpnew_pkg::MUL: begin // Set addend to -0 (for proper rounding with RDN)
+        operand_c = '{sign: 1'b1, exponent: '0, mantissa: '0};
+        info_c    = '{is_zero: 1'b1, is_boxed: 1'b1, default: 1'b0}; //zero, boxed value.
+      end
+      default: begin // propagate don't cares
+        operand_a  = '{default: fpnew_pkg::DONT_CARE};
+        operand_b  = '{default: fpnew_pkg::DONT_CARE};
+        operand_c  = '{default: fpnew_pkg::DONT_CARE};
+        info_a     = '{default: fpnew_pkg::DONT_CARE};
+        info_b     = '{default: fpnew_pkg::DONT_CARE};
+        info_c     = '{default: fpnew_pkg::DONT_CARE};
+      end
+    endcase
+  end
+
+  // ---------------------
+  // Input classification
+  // ---------------------
+  logic any_operand_inf;
+  logic any_operand_nan;
+  logic signalling_nan;
+  logic effective_subtraction;
+  logic tentative_sign;
+
+  // Reduction for special case handling
+  assign any_operand_inf = (| {info_a.is_inf,        info_b.is_inf,        info_c.is_inf});
+  assign any_operand_nan = (| {info_a.is_nan,        info_b.is_nan,        info_c.is_nan});
+  assign signalling_nan  = (| {info_a.is_signalling, info_b.is_signalling, info_c.is_signalling});
+  // Effective subtraction in FMA occurs when product and addend signs differ
+  assign effective_subtraction = operand_a.sign ^ operand_b.sign ^ operand_c.sign;
+  // The tentative sign of the FMA shall be the sign of the product
+  assign tentative_sign = operand_a.sign ^ operand_b.sign;
+
+  // ----------------------
+  // Special case handling
+  // ----------------------
+  logic [WIDTH-1:0]   special_result;
+  fpnew_pkg::status_t special_status;
+  logic               result_is_special;
+
+  logic [NUM_FORMATS-1:0][WIDTH-1:0]    fmt_special_result;
+  fpnew_pkg::status_t [NUM_FORMATS-1:0] fmt_special_status;
+  logic [NUM_FORMATS-1:0]               fmt_result_is_special;
+
+
+  for (genvar fmt = 0; fmt < int'(NUM_FORMATS); fmt++) begin : gen_special_results
+    // Set up some constants
+    localparam int unsigned FP_WIDTH = fpnew_pkg::fp_width(fpnew_pkg::fp_format_e'(fmt));
+    localparam int unsigned EXP_BITS = fpnew_pkg::exp_bits(fpnew_pkg::fp_format_e'(fmt));
+    localparam int unsigned MAN_BITS = fpnew_pkg::man_bits(fpnew_pkg::fp_format_e'(fmt));
+
+    localparam logic [EXP_BITS-1:0] QNAN_EXPONENT = '1;
+    localparam logic [MAN_BITS-1:0] QNAN_MANTISSA = 2**(MAN_BITS-1);
+    localparam logic [MAN_BITS-1:0] ZERO_MANTISSA = '0;
+
+    if (FpFmtConfig[fmt]) begin : active_format
+      always_comb begin : special_results
+        logic [FP_WIDTH-1:0] special_res;
+
+        // Default assignment
+        special_res                = {1'b0, QNAN_EXPONENT, QNAN_MANTISSA}; // qNaN
+        fmt_special_status[fmt]    = '0;
+        fmt_result_is_special[fmt] = 1'b0;
+
+        // Handle potentially mixed nan & infinity input => important for the case where infinity and
+        // zero are multiplied and added to a qnan.
+        // RISC-V mandates raising the NV exception in these cases:
+        // (inf * 0) + c or (0 * inf) + c INVALID, no matter c (even quiet NaNs)
+        if ((info_a.is_inf && info_b.is_zero) || (info_a.is_zero && info_b.is_inf)) begin
+          fmt_result_is_special[fmt] = 1'b1; // bypass FMA, output is the canonical qNaN
+          fmt_special_status[fmt].NV = 1'b1; // invalid operation
+        // NaN Inputs cause canonical quiet NaN at the output and maybe invalid OP
+        end else if (any_operand_nan) begin
+          fmt_result_is_special[fmt] = 1'b1;           // bypass FMA, output is the canonical qNaN
+          fmt_special_status[fmt].NV = signalling_nan; // raise the invalid operation flag if signalling
+        // Special cases involving infinity
+        end else if (any_operand_inf) begin
+          fmt_result_is_special[fmt] = 1'b1; // bypass FMA
+          // Effective addition of opposite infinities (±inf - ±inf) is invalid!
+          if ((info_a.is_inf || info_b.is_inf) && info_c.is_inf && effective_subtraction)
+            fmt_special_status[fmt].NV = 1'b1; // invalid operation
+          // Handle cases where output will be inf because of inf product input
+          else if (info_a.is_inf || info_b.is_inf) begin
+            // Result is infinity with the sign of the product
+            special_res = {operand_a.sign ^ operand_b.sign, QNAN_EXPONENT, ZERO_MANTISSA};
+          // Handle cases where the addend is inf
+          end else if (info_c.is_inf) begin
+            // Result is inifinity with sign of the addend (= operand_c)
+            special_res = {operand_c.sign, QNAN_EXPONENT, ZERO_MANTISSA};
+          end
+        end
+        // Initialize special result with ones (NaN-box)
+        fmt_special_result[fmt]               = '1;
+        fmt_special_result[fmt][FP_WIDTH-1:0] = special_res;
+      end
+    end else begin : inactive_format
+      assign fmt_special_result[fmt] = '{default: fpnew_pkg::DONT_CARE};
+    end
+  end
+
+  // Detect special case from source format, I2F casts don't produce a special result
+  assign result_is_special = fmt_result_is_special[dst_fmt_q]; // they're all the same
+  // Signalling input NaNs raise invalid flag, otherwise no flags set
+  assign special_status = fmt_special_status[dst_fmt_q];
+  // Assemble result according to destination format
+  assign special_result = fmt_special_result[dst_fmt_q]; // destination format
+
+  // ---------------------------
+  // Initial exponent data path
+  // ---------------------------
+  logic signed [EXP_WIDTH-1:0] exponent_a, exponent_b, exponent_c;
+  logic signed [EXP_WIDTH-1:0] exponent_addend, exponent_product, exponent_difference;
+  logic signed [EXP_WIDTH-1:0] tentative_exponent;
+
+  // Zero-extend exponents into signed container - implicit width extension
+  assign exponent_a = signed'({1'b0, operand_a.exponent});
+  assign exponent_b = signed'({1'b0, operand_b.exponent});
+  assign exponent_c = signed'({1'b0, operand_c.exponent});
+
+  // Calculate internal exponents from encoded values. Real exponents are (ex = Ex - bias + 1 - nx)
+  // with Ex the encoded exponent and nx the implicit bit. Internal exponents are biased to dst fmt.
+  assign exponent_addend = signed'(exponent_c + $signed({1'b0, ~info_c.is_normal})); // 0 as subnorm
+  // Biased product exponent is the sum of encoded exponents minus the bias.
+  assign exponent_product = (info_a.is_zero || info_b.is_zero) // in case the product is zero, set minimum exp.
+                            ? 2 - signed'(fpnew_pkg::bias(dst_fmt_q))
+                            : signed'(exponent_a + info_a.is_subnormal
+                                      + exponent_b + info_b.is_subnormal
+                                      - 2*signed'(fpnew_pkg::bias(src_fmt_q))
+                                      + signed'(fpnew_pkg::bias(dst_fmt_q))); // rebias for dst fmt
+  // Exponent difference is the addend exponent minus the product exponent
+  assign exponent_difference = exponent_addend - exponent_product;
+  // The tentative exponent will be the larger of the product or addend exponent
+  assign tentative_exponent = (exponent_difference > 0) ? exponent_addend : exponent_product;
+
+  // Shift amount for addend based on exponents (unsigned as only right shifts)
+  logic [SHIFT_AMOUNT_WIDTH-1:0] addend_shamt;
+
+  always_comb begin : addend_shift_amount
+    // Product-anchored case, saturated shift (addend is only in the sticky bit)
+    if (exponent_difference <= signed'(-2 * PRECISION_BITS - 1))
+      addend_shamt = 3 * PRECISION_BITS + 4;
+    // Addend and product will have mutual bits to add
+    else if (exponent_difference <= signed'(PRECISION_BITS + 2))
+      addend_shamt = unsigned'(signed'(PRECISION_BITS) + 3 - exponent_difference);
+    // Addend-anchored case, saturated shift (product is only in the sticky bit)
+    else
+      addend_shamt = 0;
+  end
+
+  // ------------------
+  // Product data path
+  // ------------------
+  logic [PRECISION_BITS-1:0]   mantissa_a, mantissa_b, mantissa_c;
+  logic [2*PRECISION_BITS-1:0] product;             // the p*p product is 2p bits wide
+  logic [3*PRECISION_BITS+3:0] product_shifted;     // addends are 3p+4 bit wide (including G/R)
+
+  // Add implicit bits to mantissae
+  assign mantissa_a = {info_a.is_normal, operand_a.mantissa};
+  assign mantissa_b = {info_b.is_normal, operand_b.mantissa};
+  assign mantissa_c = {info_c.is_normal, operand_c.mantissa};
+
+  // Mantissa multiplier (a*b)
+  assign product = mantissa_a * mantissa_b;
+
+  // Product is placed into a 3p+4 bit wide vector, padded with 2 bits for round and sticky:
+  // | 000...000 | product | RS |
+  //  <-  p+2  -> <-  2p -> < 2>
+  assign product_shifted = product << 2; // constant shift
+
+  // -----------------
+  // Addend data path
+  // -----------------
+  logic [3*PRECISION_BITS+3:0] addend_after_shift;  // upper 3p+4 bits are needed to go on
+  logic [PRECISION_BITS-1:0]   addend_sticky_bits;  // up to p bit of shifted addend are sticky
+  logic                        sticky_before_add;   // they are compressed into a single sticky bit
+  logic [3*PRECISION_BITS+3:0] addend_shifted;      // addends are 3p+4 bit wide (including G/R)
+  logic                        inject_carry_in;     // inject carry for subtractions if needed
+
+  // In parallel, the addend is right-shifted according to the exponent difference. Up to p bits are
+  // shifted out and compressed into a sticky bit.
+  // BEFORE THE SHIFT:
+  // | mantissa_c | 000..000 |
+  //  <-    p   -> <- 3p+4 ->
+  // AFTER THE SHIFT:
+  // | 000..........000 | mantissa_c | 000...............0GR |  sticky bits  |
+  //  <- addend_shamt -> <-    p   -> <- 2p+4-addend_shamt -> <-  up to p  ->
+  assign {addend_after_shift, addend_sticky_bits} =
+      (mantissa_c << (3 * PRECISION_BITS + 4)) >> addend_shamt;
+
+  assign sticky_before_add     = (| addend_sticky_bits);
+
+  // In case of a subtraction, the addend is inverted
+  assign addend_shifted = (effective_subtraction) ? ~addend_after_shift : addend_after_shift;
+  assign inject_carry_in = effective_subtraction & ~sticky_before_add;
+
+  // ------
+  // Adder
+  // ------
+  logic [3*PRECISION_BITS+4:0] sum_raw;   // added one bit for the carry
+  logic                        sum_carry; // observe carry bit from sum for sign fixing
+  logic [3*PRECISION_BITS+3:0] sum;       // discard carry as sum won't overflow
+  logic                        final_sign;
+
+  //Mantissa adder (ab+c). In normal addition, it cannot overflow.
+  assign sum_raw = product_shifted + addend_shifted + inject_carry_in;
+  assign sum_carry = sum_raw[3*PRECISION_BITS+4];
+
+  // Complement negative sum (can only happen in subtraction -> overflows for positive results)
+  assign sum        = (effective_subtraction && ~sum_carry) ? -sum_raw : sum_raw;
+
+  // In case of a mispredicted subtraction result, do a sign flip
+  assign final_sign = (effective_subtraction && (sum_carry == tentative_sign))
+                      ? 1'b1
+                      : (effective_subtraction ? 1'b0 : tentative_sign);
+
+  // ---------------
+  // Internal pipeline
+  // ---------------
+  // Pipeline output signals as non-arrays
+  logic                          effective_subtraction_q;
+  logic signed [EXP_WIDTH-1:0]   exponent_product_q;
+  logic signed [EXP_WIDTH-1:0]   exponent_difference_q;
+  logic signed [EXP_WIDTH-1:0]   tentative_exponent_q;
+  logic [SHIFT_AMOUNT_WIDTH-1:0] addend_shamt_q;
+  logic                          sticky_before_add_q;
+  logic [3*PRECISION_BITS+3:0]   sum_q;
+  logic                          final_sign_q;
+  fpnew_pkg::fp_format_e         dst_fmt_q2;
+  fpnew_pkg::roundmode_e         rnd_mode_q;
+  logic                          result_is_special_q;
+  fp_t                           special_result_q;
+  fpnew_pkg::status_t            special_status_q;
+  // Internal pipeline signals, index i holds signal after i register stages
+  logic                  [0:NUM_MID_REGS]                         mid_pipe_eff_sub_q;
+  logic signed           [0:NUM_MID_REGS][EXP_WIDTH-1:0]          mid_pipe_exp_prod_q;
+  logic signed           [0:NUM_MID_REGS][EXP_WIDTH-1:0]          mid_pipe_exp_diff_q;
+  logic signed           [0:NUM_MID_REGS][EXP_WIDTH-1:0]          mid_pipe_tent_exp_q;
+  logic                  [0:NUM_MID_REGS][SHIFT_AMOUNT_WIDTH-1:0] mid_pipe_add_shamt_q;
+  logic                  [0:NUM_MID_REGS]                         mid_pipe_sticky_q;
+  logic                  [0:NUM_MID_REGS][3*PRECISION_BITS+3:0]   mid_pipe_sum_q;
+  logic                  [0:NUM_MID_REGS]                         mid_pipe_final_sign_q;
+  fpnew_pkg::roundmode_e [0:NUM_MID_REGS]                         mid_pipe_rnd_mode_q;
+  fpnew_pkg::fp_format_e [0:NUM_MID_REGS]                         mid_pipe_dst_fmt_q;
+  logic                  [0:NUM_MID_REGS]                         mid_pipe_res_is_spec_q;
+  fp_t                   [0:NUM_MID_REGS]                         mid_pipe_spec_res_q;
+  fpnew_pkg::status_t    [0:NUM_MID_REGS]                         mid_pipe_spec_stat_q;
+  TagType                [0:NUM_MID_REGS]                         mid_pipe_tag_q;
+  AuxType                [0:NUM_MID_REGS]                         mid_pipe_aux_q;
+  logic                  [0:NUM_MID_REGS]                         mid_pipe_valid_q;
+  // Ready signal is combinatorial for all stages
+  logic [0:NUM_MID_REGS] mid_pipe_ready;
+
+  // Input stage: First element of pipeline is taken from upstream logic
+  assign mid_pipe_eff_sub_q[0]     = effective_subtraction;
+  assign mid_pipe_exp_prod_q[0]    = exponent_product;
+  assign mid_pipe_exp_diff_q[0]    = exponent_difference;
+  assign mid_pipe_tent_exp_q[0]    = tentative_exponent;
+  assign mid_pipe_add_shamt_q[0]   = addend_shamt;
+  assign mid_pipe_sticky_q[0]      = sticky_before_add;
+  assign mid_pipe_sum_q[0]         = sum;
+  assign mid_pipe_final_sign_q[0]  = final_sign;
+  assign mid_pipe_rnd_mode_q[0]    = inp_pipe_rnd_mode_q[NUM_INP_REGS];
+  assign mid_pipe_dst_fmt_q[0]     = dst_fmt_q;
+  assign mid_pipe_res_is_spec_q[0] = result_is_special;
+  assign mid_pipe_spec_res_q[0]    = special_result;
+  assign mid_pipe_spec_stat_q[0]   = special_status;
+  assign mid_pipe_tag_q[0]         = inp_pipe_tag_q[NUM_INP_REGS];
+  assign mid_pipe_aux_q[0]         = inp_pipe_aux_q[NUM_INP_REGS];
+  assign mid_pipe_valid_q[0]       = inp_pipe_valid_q[NUM_INP_REGS];
+  // Input stage: Propagate pipeline ready signal to input pipe
+  assign inp_pipe_ready[NUM_INP_REGS] = mid_pipe_ready[0];
+
+  // Generate the register stages
+  for (genvar i = 0; i < NUM_MID_REGS; i++) begin : gen_inside_pipeline
+    // Internal register enable for this stage
+    logic reg_ena;
+    // Determine the ready signal of the current stage - advance the pipeline:
+    // 1. if the next stage is ready for our data
+    // 2. if the next stage only holds a bubble (not valid) -> we can pop it
+    assign mid_pipe_ready[i] = mid_pipe_ready[i+1] | ~mid_pipe_valid_q[i+1];
+    // Valid: enabled by ready signal, synchronous clear with the flush signal
+    `FFLARNC(mid_pipe_valid_q[i+1], mid_pipe_valid_q[i], mid_pipe_ready[i], flush_i, 1'b0, clk_i, rst_ni)
+    // Enable register if pipleine ready and a valid data item is present
+    assign reg_ena = mid_pipe_ready[i] & mid_pipe_valid_q[i];
+    // Generate the pipeline registers within the stages, use enable-registers
+    `FFL(mid_pipe_eff_sub_q[i+1],     mid_pipe_eff_sub_q[i],     reg_ena, '0)
+    `FFL(mid_pipe_exp_prod_q[i+1],    mid_pipe_exp_prod_q[i],    reg_ena, '0)
+    `FFL(mid_pipe_exp_diff_q[i+1],    mid_pipe_exp_diff_q[i],    reg_ena, '0)
+    `FFL(mid_pipe_tent_exp_q[i+1],    mid_pipe_tent_exp_q[i],    reg_ena, '0)
+    `FFL(mid_pipe_add_shamt_q[i+1],   mid_pipe_add_shamt_q[i],   reg_ena, '0)
+    `FFL(mid_pipe_sticky_q[i+1],      mid_pipe_sticky_q[i],      reg_ena, '0)
+    `FFL(mid_pipe_sum_q[i+1],         mid_pipe_sum_q[i],         reg_ena, '0)
+    `FFL(mid_pipe_final_sign_q[i+1],  mid_pipe_final_sign_q[i],  reg_ena, '0)
+    `FFL(mid_pipe_rnd_mode_q[i+1],    mid_pipe_rnd_mode_q[i],    reg_ena, fpnew_pkg::RNE)
+    `FFL(mid_pipe_dst_fmt_q[i+1],     mid_pipe_dst_fmt_q[i],     reg_ena, fpnew_pkg::fp_format_e'(0))
+    `FFL(mid_pipe_res_is_spec_q[i+1], mid_pipe_res_is_spec_q[i], reg_ena, '0)
+    `FFL(mid_pipe_spec_res_q[i+1],    mid_pipe_spec_res_q[i],    reg_ena, '0)
+    `FFL(mid_pipe_spec_stat_q[i+1],   mid_pipe_spec_stat_q[i],   reg_ena, '0)
+    `FFL(mid_pipe_tag_q[i+1],         mid_pipe_tag_q[i],         reg_ena, TagType'('0))
+    `FFL(mid_pipe_aux_q[i+1],         mid_pipe_aux_q[i],         reg_ena, AuxType'('0))
+  end
+  // Output stage: assign selected pipe outputs to signals for later use
+  assign effective_subtraction_q = mid_pipe_eff_sub_q[NUM_MID_REGS];
+  assign exponent_product_q      = mid_pipe_exp_prod_q[NUM_MID_REGS];
+  assign exponent_difference_q   = mid_pipe_exp_diff_q[NUM_MID_REGS];
+  assign tentative_exponent_q    = mid_pipe_tent_exp_q[NUM_MID_REGS];
+  assign addend_shamt_q          = mid_pipe_add_shamt_q[NUM_MID_REGS];
+  assign sticky_before_add_q     = mid_pipe_sticky_q[NUM_MID_REGS];
+  assign sum_q                   = mid_pipe_sum_q[NUM_MID_REGS];
+  assign final_sign_q            = mid_pipe_final_sign_q[NUM_MID_REGS];
+  assign rnd_mode_q              = mid_pipe_rnd_mode_q[NUM_MID_REGS];
+  assign dst_fmt_q2              = mid_pipe_dst_fmt_q[NUM_MID_REGS];
+  assign result_is_special_q     = mid_pipe_res_is_spec_q[NUM_MID_REGS];
+  assign special_result_q        = mid_pipe_spec_res_q[NUM_MID_REGS];
+  assign special_status_q        = mid_pipe_spec_stat_q[NUM_MID_REGS];
+
+  // --------------
+  // Normalization
+  // --------------
+  logic        [LOWER_SUM_WIDTH-1:0]  sum_lower;              // lower 2p+3 bits of sum are searched
+  logic        [LZC_RESULT_WIDTH-1:0] leading_zero_count;     // the number of leading zeroes
+  logic signed [LZC_RESULT_WIDTH:0]   leading_zero_count_sgn; // signed leading-zero count
+  logic                               lzc_zeroes;             // in case only zeroes found
+
+  logic        [SHIFT_AMOUNT_WIDTH-1:0] norm_shamt; // Normalization shift amount
+  logic signed [EXP_WIDTH-1:0]          normalized_exponent;
+
+  logic [3*PRECISION_BITS+4:0] sum_shifted;       // result after first normalization shift
+  logic [PRECISION_BITS:0]     final_mantissa;    // final mantissa before rounding with round bit
+  logic [2*PRECISION_BITS+2:0] sum_sticky_bits;   // remaining 2p+3 sticky bits after normalization
+  logic                        sticky_after_norm; // sticky bit after normalization
+
+  logic signed [EXP_WIDTH-1:0] final_exponent;
+
+  assign sum_lower = sum_q[LOWER_SUM_WIDTH-1:0];
+
+  // Leading zero counter for cancellations
+  lzc #(
+    .WIDTH ( LOWER_SUM_WIDTH ),
+    .MODE  ( 1               ) // MODE = 1 counts leading zeroes
+  ) i_lzc (
+    .in_i    ( sum_lower          ),
+    .cnt_o   ( leading_zero_count ),
+    .empty_o ( lzc_zeroes         )
+  );
+
+  assign leading_zero_count_sgn = signed'({1'b0, leading_zero_count});
+
+  // Normalization shift amount based on exponents and LZC (unsigned as only left shifts)
+  always_comb begin : norm_shift_amount
+    // Product-anchored case or cancellations require LZC
+    if ((exponent_difference_q <= 0) || (effective_subtraction_q && (exponent_difference_q <= 2))) begin
+      // Normal result (biased exponent > 0 and not a zero)
+      if ((exponent_product_q - leading_zero_count_sgn + 1 >= 0) && !lzc_zeroes) begin
+        // Undo initial product shift, remove the counted zeroes
+        norm_shamt          = PRECISION_BITS + 2 + leading_zero_count;
+        normalized_exponent = exponent_product_q - leading_zero_count_sgn + 1; // account for shift
+      // Subnormal result
+      end else begin
+        // Cap the shift distance to align mantissa with minimum exponent
+        norm_shamt          = unsigned'(signed'(PRECISION_BITS + 2 + exponent_product_q));
+        normalized_exponent = 0; // subnormals encoded as 0
+      end
+    // Addend-anchored case
+    end else begin
+      norm_shamt          = addend_shamt_q; // Undo the initial shift
+      normalized_exponent = tentative_exponent_q;
+    end
+  end
+
+  // Do the large normalization shift
+  assign sum_shifted       = sum_q << norm_shamt;
+
+  // The addend-anchored case needs a 1-bit normalization since the leading-one can be to the left
+  // or right of the (non-carry) MSB of the sum.
+  always_comb begin : small_norm
+    // Default assignment, discarding carry bit
+    {final_mantissa, sum_sticky_bits} = sum_shifted;
+    final_exponent                    = normalized_exponent;
+
+    // The normalized sum has overflown, align right and fix exponent
+    if (sum_shifted[3*PRECISION_BITS+4]) begin // check the carry bit
+      {final_mantissa, sum_sticky_bits} = sum_shifted >> 1;
+      final_exponent                    = normalized_exponent + 1;
+    // The normalized sum is normal, nothing to do
+    end else if (sum_shifted[3*PRECISION_BITS+3]) begin // check the sum MSB
+      // do nothing
+    // The normalized sum is still denormal, align left - unless the result is not already subnormal
+    end else if (normalized_exponent > 1) begin
+      {final_mantissa, sum_sticky_bits} = sum_shifted << 1;
+      final_exponent                    = normalized_exponent - 1;
+    // Otherwise we're denormal
+    end else begin
+      final_exponent = '0;
+    end
+  end
+
+  // Update the sticky bit with the shifted-out bits
+  assign sticky_after_norm = (| {sum_sticky_bits}) | sticky_before_add_q;
+
+  // ----------------------------
+  // Rounding and classification
+  // ----------------------------
+  logic                                     pre_round_sign;
+  logic [SUPER_EXP_BITS+SUPER_MAN_BITS-1:0] pre_round_abs; // absolute value of result before rounding
+  logic [1:0]                               round_sticky_bits;
+
+  logic of_before_round, of_after_round; // overflow
+  logic uf_before_round, uf_after_round; // underflow
+
+  logic [NUM_FORMATS-1:0][SUPER_EXP_BITS+SUPER_MAN_BITS-1:0] fmt_pre_round_abs; // per format
+  logic [NUM_FORMATS-1:0][1:0]                               fmt_round_sticky_bits;
+
+  logic [NUM_FORMATS-1:0]                                    fmt_of_after_round;
+  logic [NUM_FORMATS-1:0]                                    fmt_uf_after_round;
+
+  logic                                     rounded_sign;
+  logic [SUPER_EXP_BITS+SUPER_MAN_BITS-1:0] rounded_abs; // absolute value of result after rounding
+  logic                                     result_zero;
+
+  // Classification before round. RISC-V mandates checking underflow AFTER rounding!
+  assign of_before_round = final_exponent >= 2**(fpnew_pkg::exp_bits(dst_fmt_q2))-1; // infinity exponent is all ones
+  assign uf_before_round = final_exponent == 0;               // exponent for subnormals capped to 0
+
+  // Pack exponent and mantissa into proper rounding form
+  for (genvar fmt = 0; fmt < int'(NUM_FORMATS); fmt++) begin : gen_res_assemble
+    // Set up some constants
+    localparam int unsigned EXP_BITS = fpnew_pkg::exp_bits(fpnew_pkg::fp_format_e'(fmt));
+    localparam int unsigned MAN_BITS = fpnew_pkg::man_bits(fpnew_pkg::fp_format_e'(fmt));
+
+    logic [EXP_BITS-1:0] pre_round_exponent;
+    logic [MAN_BITS-1:0] pre_round_mantissa;
+
+    if (FpFmtConfig[fmt]) begin : active_format
+
+      assign pre_round_exponent = (of_before_round) ? 2**EXP_BITS-2 : final_exponent[EXP_BITS-1:0];
+      assign pre_round_mantissa = (of_before_round) ? '1 : final_mantissa[SUPER_MAN_BITS-:MAN_BITS];
+      // Assemble result before rounding. In case of overflow, the largest normal value is set.
+      assign fmt_pre_round_abs[fmt] = {pre_round_exponent, pre_round_mantissa}; // 0-extend
+
+      // Round bit is after mantissa (1 in case of overflow for rounding)
+      assign fmt_round_sticky_bits[fmt][1] = final_mantissa[SUPER_MAN_BITS-MAN_BITS] |
+                                             of_before_round;
+
+      // remaining bits in mantissa to sticky (1 in case of overflow for rounding)
+      if (MAN_BITS < SUPER_MAN_BITS) begin : narrow_sticky
+        assign fmt_round_sticky_bits[fmt][0] = (| final_mantissa[SUPER_MAN_BITS-MAN_BITS-1:0]) |
+                                               sticky_after_norm | of_before_round;
+      end else begin : normal_sticky
+        assign fmt_round_sticky_bits[fmt][0] = sticky_after_norm | of_before_round;
+      end
+    end else begin : inactive_format
+      assign fmt_pre_round_abs[fmt] = '{default: fpnew_pkg::DONT_CARE};
+      assign fmt_round_sticky_bits[fmt] = '{default: fpnew_pkg::DONT_CARE};
+    end
+  end
+
+  // Assemble result before rounding. In case of overflow, the largest normal value is set.
+  assign pre_round_sign     = final_sign_q;
+  assign pre_round_abs      = fmt_pre_round_abs[dst_fmt_q2];
+
+  // In case of overflow, the round and sticky bits are set for proper rounding
+  assign round_sticky_bits  = fmt_round_sticky_bits[dst_fmt_q2];
+
+  // Perform the rounding
+  fpnew_rounding #(
+    .AbsWidth ( SUPER_EXP_BITS + SUPER_MAN_BITS )
+  ) i_fpnew_rounding (
+    .abs_value_i             ( pre_round_abs           ),
+    .sign_i                  ( pre_round_sign          ),
+    .round_sticky_bits_i     ( round_sticky_bits       ),
+    .rnd_mode_i              ( rnd_mode_q              ),
+    .effective_subtraction_i ( effective_subtraction_q ),
+    .abs_rounded_o           ( rounded_abs             ),
+    .sign_o                  ( rounded_sign            ),
+    .exact_zero_o            ( result_zero             )
+  );
+
+  logic [NUM_FORMATS-1:0][WIDTH-1:0] fmt_result;
+
+  for (genvar fmt = 0; fmt < int'(NUM_FORMATS); fmt++) begin : gen_sign_inject
+    // Set up some constants
+    localparam int unsigned FP_WIDTH = fpnew_pkg::fp_width(fpnew_pkg::fp_format_e'(fmt));
+    localparam int unsigned EXP_BITS = fpnew_pkg::exp_bits(fpnew_pkg::fp_format_e'(fmt));
+    localparam int unsigned MAN_BITS = fpnew_pkg::man_bits(fpnew_pkg::fp_format_e'(fmt));
+
+    if (FpFmtConfig[fmt]) begin : active_format
+      always_comb begin : post_process
+        // detect of / uf
+        fmt_uf_after_round[fmt] = rounded_abs[EXP_BITS+MAN_BITS-1:MAN_BITS] == '0; // denormal
+        fmt_of_after_round[fmt] = rounded_abs[EXP_BITS+MAN_BITS-1:MAN_BITS] == '1; // inf exp.
+
+        // Assemble regular result, nan box short ones.
+        fmt_result[fmt]               = '1;
+        fmt_result[fmt][FP_WIDTH-1:0] = {rounded_sign, rounded_abs[EXP_BITS+MAN_BITS-1:0]};
+      end
+    end else begin : inactive_format
+      assign fmt_uf_after_round[fmt] = fpnew_pkg::DONT_CARE;
+      assign fmt_of_after_round[fmt] = fpnew_pkg::DONT_CARE;
+      assign fmt_result[fmt]         = '{default: fpnew_pkg::DONT_CARE};
+    end
+  end
+
+  // Classification after rounding select by destination format
+  assign uf_after_round = fmt_uf_after_round[dst_fmt_q2];
+  assign of_after_round = fmt_of_after_round[dst_fmt_q2];
+
+
+  // -----------------
+  // Result selection
+  // -----------------
+  logic [WIDTH-1:0]     regular_result;
+  fpnew_pkg::status_t   regular_status;
+
+  // Assemble regular result
+  assign regular_result = fmt_result[dst_fmt_q2];
+  assign regular_status.NV = 1'b0; // only valid cases are handled in regular path
+  assign regular_status.DZ = 1'b0; // no divisions
+  assign regular_status.OF = of_before_round | of_after_round;   // rounding can introduce overflow
+  assign regular_status.UF = uf_after_round & regular_status.NX; // only inexact results raise UF
+  assign regular_status.NX = (| round_sticky_bits) | of_before_round | of_after_round;
+
+  // Final results for output pipeline
+  logic [WIDTH-1:0]   result_d;
+  fpnew_pkg::status_t status_d;
+
+  // Select output depending on special case detection
+  assign result_d = result_is_special_q ? special_result_q : regular_result;
+  assign status_d = result_is_special_q ? special_status_q : regular_status;
+
+  // ----------------
+  // Output Pipeline
+  // ----------------
+  // Output pipeline signals, index i holds signal after i register stages
+  logic               [0:NUM_OUT_REGS][WIDTH-1:0] out_pipe_result_q;
+  fpnew_pkg::status_t [0:NUM_OUT_REGS]            out_pipe_status_q;
+  TagType             [0:NUM_OUT_REGS]            out_pipe_tag_q;
+  AuxType             [0:NUM_OUT_REGS]            out_pipe_aux_q;
+  logic               [0:NUM_OUT_REGS]            out_pipe_valid_q;
+  // Ready signal is combinatorial for all stages
+  logic [0:NUM_OUT_REGS] out_pipe_ready;
+
+  // Input stage: First element of pipeline is taken from inputs
+  assign out_pipe_result_q[0] = result_d;
+  assign out_pipe_status_q[0] = status_d;
+  assign out_pipe_tag_q[0]    = mid_pipe_tag_q[NUM_MID_REGS];
+  assign out_pipe_aux_q[0]    = mid_pipe_aux_q[NUM_MID_REGS];
+  assign out_pipe_valid_q[0]  = mid_pipe_valid_q[NUM_MID_REGS];
+  // Input stage: Propagate pipeline ready signal to inside pipe
+  assign mid_pipe_ready[NUM_MID_REGS] = out_pipe_ready[0];
+  // Generate the register stages
+  for (genvar i = 0; i < NUM_OUT_REGS; i++) begin : gen_output_pipeline
+    // Internal register enable for this stage
+    logic reg_ena;
+    // Determine the ready signal of the current stage - advance the pipeline:
+    // 1. if the next stage is ready for our data
+    // 2. if the next stage only holds a bubble (not valid) -> we can pop it
+    assign out_pipe_ready[i] = out_pipe_ready[i+1] | ~out_pipe_valid_q[i+1];
+    // Valid: enabled by ready signal, synchronous clear with the flush signal
+    `FFLARNC(out_pipe_valid_q[i+1], out_pipe_valid_q[i], out_pipe_ready[i], flush_i, 1'b0, clk_i, rst_ni)
+    // Enable register if pipleine ready and a valid data item is present
+    assign reg_ena = out_pipe_ready[i] & out_pipe_valid_q[i];
+    // Generate the pipeline registers within the stages, use enable-registers
+    `FFL(out_pipe_result_q[i+1], out_pipe_result_q[i], reg_ena, '0)
+    `FFL(out_pipe_status_q[i+1], out_pipe_status_q[i], reg_ena, '0)
+    `FFL(out_pipe_tag_q[i+1],    out_pipe_tag_q[i],    reg_ena, TagType'('0))
+    `FFL(out_pipe_aux_q[i+1],    out_pipe_aux_q[i],    reg_ena, AuxType'('0))
+  end
+  // Output stage: Ready travels backwards from output side, driven by downstream circuitry
+  assign out_pipe_ready[NUM_OUT_REGS] = out_ready_i;
+  // Output stage: assign module outputs
+  assign result_o        = out_pipe_result_q[NUM_OUT_REGS];
+  assign status_o        = out_pipe_status_q[NUM_OUT_REGS];
+  assign extension_bit_o = 1'b1; // always NaN-Box result
+  assign tag_o           = out_pipe_tag_q[NUM_OUT_REGS];
+  assign aux_o           = out_pipe_aux_q[NUM_OUT_REGS];
+  assign out_valid_o     = out_pipe_valid_q[NUM_OUT_REGS];
+  assign busy_o          = (| {inp_pipe_valid_q, mid_pipe_valid_q, out_pipe_valid_q});
+endmodule
diff --git a/vendor/pulp-platform/fpnew/src/fpnew_noncomp.sv b/vendor/pulp-platform/fpnew/src/fpnew_noncomp.sv
new file mode 100644
index 0000000000..9e485f9e9d
--- /dev/null
+++ b/vendor/pulp-platform/fpnew/src/fpnew_noncomp.sv
@@ -0,0 +1,404 @@
+// Copyright 2019 ETH Zurich and University of Bologna.
+//
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the "License"); you may not use this file except in
+// compliance with the License. You may obtain a copy of the License at
+// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+
+// Author: Stefan Mach <smach@iis.ee.ethz.ch>
+
+`include "common_cells/registers.svh"
+
+module fpnew_noncomp #(
+  parameter fpnew_pkg::fp_format_e   FpFormat    = fpnew_pkg::fp_format_e'(0),
+  parameter int unsigned             NumPipeRegs = 0,
+  parameter fpnew_pkg::pipe_config_t PipeConfig  = fpnew_pkg::BEFORE,
+  parameter type                     TagType     = logic,
+  parameter type                     AuxType     = logic,
+
+  localparam int unsigned WIDTH = fpnew_pkg::fp_width(FpFormat) // do not change
+) (
+  input logic                  clk_i,
+  input logic                  rst_ni,
+  // Input signals
+  input logic [1:0][WIDTH-1:0]     operands_i, // 2 operands
+  input logic [1:0]                is_boxed_i, // 2 operands
+  input fpnew_pkg::roundmode_e     rnd_mode_i,
+  input fpnew_pkg::operation_e     op_i,
+  input logic                      op_mod_i,
+  input TagType                    tag_i,
+  input AuxType                    aux_i,
+  // Input Handshake
+  input  logic                     in_valid_i,
+  output logic                     in_ready_o,
+  input  logic                     flush_i,
+  // Output signals
+  output logic [WIDTH-1:0]         result_o,
+  output fpnew_pkg::status_t       status_o,
+  output logic                     extension_bit_o,
+  output fpnew_pkg::classmask_e    class_mask_o,
+  output logic                     is_class_o,
+  output TagType                   tag_o,
+  output AuxType                   aux_o,
+  // Output handshake
+  output logic                     out_valid_o,
+  input  logic                     out_ready_i,
+  // Indication of valid data in flight
+  output logic                     busy_o
+);
+
+  // ----------
+  // Constants
+  // ----------
+  localparam int unsigned EXP_BITS = fpnew_pkg::exp_bits(FpFormat);
+  localparam int unsigned MAN_BITS = fpnew_pkg::man_bits(FpFormat);
+  // Pipelines
+  localparam NUM_INP_REGS = (PipeConfig == fpnew_pkg::BEFORE || PipeConfig == fpnew_pkg::INSIDE)
+                            ? NumPipeRegs
+                            : (PipeConfig == fpnew_pkg::DISTRIBUTED
+                               ? ((NumPipeRegs + 1) / 2) // First to get distributed regs
+                               : 0); // no regs here otherwise
+  localparam NUM_OUT_REGS = PipeConfig == fpnew_pkg::AFTER
+                            ? NumPipeRegs
+                            : (PipeConfig == fpnew_pkg::DISTRIBUTED
+                               ? (NumPipeRegs / 2) // Last to get distributed regs
+                               : 0); // no regs here otherwise
+
+  // ----------------
+  // Type definition
+  // ----------------
+  typedef struct packed {
+    logic                sign;
+    logic [EXP_BITS-1:0] exponent;
+    logic [MAN_BITS-1:0] mantissa;
+  } fp_t;
+
+  // ---------------
+  // Input pipeline
+  // ---------------
+  // Input pipeline signals, index i holds signal after i register stages
+  logic                  [0:NUM_INP_REGS][1:0][WIDTH-1:0] inp_pipe_operands_q;
+  logic                  [0:NUM_INP_REGS][1:0]            inp_pipe_is_boxed_q;
+  fpnew_pkg::roundmode_e [0:NUM_INP_REGS]                 inp_pipe_rnd_mode_q;
+  fpnew_pkg::operation_e [0:NUM_INP_REGS]                 inp_pipe_op_q;
+  logic                  [0:NUM_INP_REGS]                 inp_pipe_op_mod_q;
+  TagType                [0:NUM_INP_REGS]                 inp_pipe_tag_q;
+  AuxType                [0:NUM_INP_REGS]                 inp_pipe_aux_q;
+  logic                  [0:NUM_INP_REGS]                 inp_pipe_valid_q;
+  // Ready signal is combinatorial for all stages
+  logic [0:NUM_INP_REGS] inp_pipe_ready;
+
+  // Input stage: First element of pipeline is taken from inputs
+  assign inp_pipe_operands_q[0] = operands_i;
+  assign inp_pipe_is_boxed_q[0] = is_boxed_i;
+  assign inp_pipe_rnd_mode_q[0] = rnd_mode_i;
+  assign inp_pipe_op_q[0]       = op_i;
+  assign inp_pipe_op_mod_q[0]   = op_mod_i;
+  assign inp_pipe_tag_q[0]      = tag_i;
+  assign inp_pipe_aux_q[0]      = aux_i;
+  assign inp_pipe_valid_q[0]    = in_valid_i;
+  // Input stage: Propagate pipeline ready signal to updtream circuitry
+  assign in_ready_o = inp_pipe_ready[0];
+  // Generate the register stages
+  for (genvar i = 0; i < NUM_INP_REGS; i++) begin : gen_input_pipeline
+    // Internal register enable for this stage
+    logic reg_ena;
+    // Determine the ready signal of the current stage - advance the pipeline:
+    // 1. if the next stage is ready for our data
+    // 2. if the next stage only holds a bubble (not valid) -> we can pop it
+    assign inp_pipe_ready[i] = inp_pipe_ready[i+1] | ~inp_pipe_valid_q[i+1];
+    // Valid: enabled by ready signal, synchronous clear with the flush signal
+    `FFLARNC(inp_pipe_valid_q[i+1], inp_pipe_valid_q[i], inp_pipe_ready[i], flush_i, 1'b0, clk_i, rst_ni)
+    // Enable register if pipleine ready and a valid data item is present
+    assign reg_ena = inp_pipe_ready[i] & inp_pipe_valid_q[i];
+    // Generate the pipeline registers within the stages, use enable-registers
+    `FFL(inp_pipe_operands_q[i+1], inp_pipe_operands_q[i], reg_ena, '0)
+    `FFL(inp_pipe_is_boxed_q[i+1], inp_pipe_is_boxed_q[i], reg_ena, '0)
+    `FFL(inp_pipe_rnd_mode_q[i+1], inp_pipe_rnd_mode_q[i], reg_ena, fpnew_pkg::RNE)
+    `FFL(inp_pipe_op_q[i+1],       inp_pipe_op_q[i],       reg_ena, fpnew_pkg::FMADD)
+    `FFL(inp_pipe_op_mod_q[i+1],   inp_pipe_op_mod_q[i],   reg_ena, '0)
+    `FFL(inp_pipe_tag_q[i+1],      inp_pipe_tag_q[i],      reg_ena, TagType'('0))
+    `FFL(inp_pipe_aux_q[i+1],      inp_pipe_aux_q[i],      reg_ena, AuxType'('0))
+  end
+
+  // ---------------------
+  // Input classification
+  // ---------------------
+  fpnew_pkg::fp_info_t [1:0] info_q;
+
+  // Classify input
+  fpnew_classifier #(
+    .FpFormat    ( FpFormat ),
+    .NumOperands ( 2        )
+    ) i_class_a (
+    .operands_i ( inp_pipe_operands_q[NUM_INP_REGS] ),
+    .is_boxed_i ( inp_pipe_is_boxed_q[NUM_INP_REGS] ),
+    .info_o     ( info_q                            )
+  );
+
+  fp_t                 operand_a, operand_b;
+  fpnew_pkg::fp_info_t info_a,    info_b;
+
+  // Packing-order-agnostic assignments
+  assign operand_a = inp_pipe_operands_q[NUM_INP_REGS][0];
+  assign operand_b = inp_pipe_operands_q[NUM_INP_REGS][1];
+  assign info_a    = info_q[0];
+  assign info_b    = info_q[1];
+
+  logic any_operand_inf;
+  logic any_operand_nan;
+  logic signalling_nan;
+
+  // Reduction for special case handling
+  assign any_operand_inf = (| {info_a.is_inf,        info_b.is_inf});
+  assign any_operand_nan = (| {info_a.is_nan,        info_b.is_nan});
+  assign signalling_nan  = (| {info_a.is_signalling, info_b.is_signalling});
+
+  logic operands_equal, operand_a_smaller;
+
+  // Equality checks for zeroes too
+  assign operands_equal    = (operand_a == operand_b) || (info_a.is_zero && info_b.is_zero);
+  // Invert result if non-zero signs involved (unsigned comparison)
+  assign operand_a_smaller = (operand_a < operand_b) ^ (operand_a.sign || operand_b.sign);
+
+  // ---------------
+  // Sign Injection
+  // ---------------
+  fp_t                sgnj_result;
+  fpnew_pkg::status_t sgnj_status;
+  logic               sgnj_extension_bit;
+
+  // Sign Injection - operation is encoded in rnd_mode_q:
+  // RNE = SGNJ, RTZ = SGNJN, RDN = SGNJX, RUP = Passthrough (no NaN-box check)
+  always_comb begin : sign_injections
+    logic sign_a, sign_b; // internal signs
+    // Default assignment
+    sgnj_result = operand_a; // result based on operand a
+
+    // NaN-boxing check will treat invalid inputs as canonical NaNs
+    if (!info_a.is_boxed) sgnj_result = '{sign: 1'b0, exponent: '1, mantissa: 2**(MAN_BITS-1)};
+
+    // Internal signs are treated as positive in case of non-NaN-boxed values
+    sign_a = operand_a.sign & info_a.is_boxed;
+    sign_b = operand_b.sign & info_b.is_boxed;
+
+    // Do the sign injection based on rm field
+    unique case (inp_pipe_rnd_mode_q[NUM_INP_REGS])
+      fpnew_pkg::RNE: sgnj_result.sign = sign_b;          // SGNJ
+      fpnew_pkg::RTZ: sgnj_result.sign = ~sign_b;         // SGNJN
+      fpnew_pkg::RDN: sgnj_result.sign = sign_a ^ sign_b; // SGNJX
+      fpnew_pkg::RUP: sgnj_result      = operand_a;       // passthrough
+      default: sgnj_result = '{default: fpnew_pkg::DONT_CARE}; // don't care
+    endcase
+  end
+
+  assign sgnj_status = '0;        // sign injections never raise exceptions
+
+  // op_mod_q enables integer sign-extension of result (for storing to integer regfile)
+  assign sgnj_extension_bit = inp_pipe_op_mod_q[NUM_INP_REGS] ? sgnj_result.sign : 1'b1;
+
+  // ------------------
+  // Minimum / Maximum
+  // ------------------
+  fp_t                minmax_result;
+  fpnew_pkg::status_t minmax_status;
+  logic               minmax_extension_bit;
+
+  // Minimum/Maximum - operation is encoded in rnd_mode_q:
+  // RNE = MIN, RTZ = MAX
+  always_comb begin : min_max
+    // Default assignment
+    minmax_status = '0;
+
+    // Min/Max use quiet comparisons - only sNaN are invalid
+    minmax_status.NV = signalling_nan;
+
+    // Both NaN inputs cause a NaN output
+    if (info_a.is_nan && info_b.is_nan)
+      minmax_result = '{sign: 1'b0, exponent: '1, mantissa: 2**(MAN_BITS-1)}; // canonical qNaN
+    // If one operand is NaN, the non-NaN operand is returned
+    else if (info_a.is_nan) minmax_result = operand_b;
+    else if (info_b.is_nan) minmax_result = operand_a;
+    // Otherwise decide according to the operation
+    else begin
+      unique case (inp_pipe_rnd_mode_q[NUM_INP_REGS])
+        fpnew_pkg::RNE: minmax_result = operand_a_smaller ? operand_a : operand_b; // MIN
+        fpnew_pkg::RTZ: minmax_result = operand_a_smaller ? operand_b : operand_a; // MAX
+        default: minmax_result = '{default: fpnew_pkg::DONT_CARE}; // don't care
+      endcase
+    end
+  end
+
+  assign minmax_extension_bit = 1'b1; // NaN-box as result is always a float value
+
+  // ------------
+  // Comparisons
+  // ------------
+  fp_t                cmp_result;
+  fpnew_pkg::status_t cmp_status;
+  logic               cmp_extension_bit;
+
+  // Comparisons - operation is encoded in rnd_mode_q:
+  // RNE = LE, RTZ = LT, RDN = EQ
+  // op_mod_q inverts boolean outputs
+  always_comb begin : comparisons
+    // Default assignment
+    cmp_result = '0; // false
+    cmp_status = '0; // no flags
+
+    // Signalling NaNs always compare as false and are illegal
+    if (signalling_nan) cmp_status.NV = 1'b1; // invalid operation
+    // Otherwise do comparisons
+    else begin
+      unique case (inp_pipe_rnd_mode_q[NUM_INP_REGS])
+        fpnew_pkg::RNE: begin // Less than or equal
+          if (any_operand_nan) cmp_status.NV = 1'b1; // Signalling comparison: NaNs are invalid
+          else cmp_result = (operand_a_smaller | operands_equal) ^ inp_pipe_op_mod_q[NUM_INP_REGS];
+        end
+        fpnew_pkg::RTZ: begin // Less than
+          if (any_operand_nan) cmp_status.NV = 1'b1; // Signalling comparison: NaNs are invalid
+          else cmp_result = (operand_a_smaller & ~operands_equal) ^ inp_pipe_op_mod_q[NUM_INP_REGS];
+        end
+        fpnew_pkg::RDN: begin // Equal
+          if (any_operand_nan) cmp_result = inp_pipe_op_mod_q[NUM_INP_REGS]; // NaN always not equal
+          else cmp_result = operands_equal ^ inp_pipe_op_mod_q[NUM_INP_REGS];
+        end
+        default: cmp_result = '{default: fpnew_pkg::DONT_CARE}; // don't care
+      endcase
+    end
+  end
+
+  assign cmp_extension_bit = 1'b0; // Comparisons always produce booleans in integer registers
+
+  // ---------------
+  // Classification
+  // ---------------
+  fpnew_pkg::status_t    class_status;
+  logic                  class_extension_bit;
+  fpnew_pkg::classmask_e class_mask_d; // the result is actually here
+
+  // Classification - always return the classification mask on the dedicated port
+  always_comb begin : classify
+    if (info_a.is_normal) begin
+      class_mask_d = operand_a.sign       ? fpnew_pkg::NEGNORM    : fpnew_pkg::POSNORM;
+    end else if (info_a.is_subnormal) begin
+      class_mask_d = operand_a.sign       ? fpnew_pkg::NEGSUBNORM : fpnew_pkg::POSSUBNORM;
+    end else if (info_a.is_zero) begin
+      class_mask_d = operand_a.sign       ? fpnew_pkg::NEGZERO    : fpnew_pkg::POSZERO;
+    end else if (info_a.is_inf) begin
+      class_mask_d = operand_a.sign       ? fpnew_pkg::NEGINF     : fpnew_pkg::POSINF;
+    end else if (info_a.is_nan) begin
+      class_mask_d = info_a.is_signalling ? fpnew_pkg::SNAN       : fpnew_pkg::QNAN;
+    end else begin
+      class_mask_d = fpnew_pkg::QNAN; // default value
+    end
+  end
+
+  assign class_status        = '0;   // classification does not set flags
+  assign class_extension_bit = 1'b0; // classification always produces results in integer registers
+
+  // -----------------
+  // Result selection
+  // -----------------
+  fp_t                   result_d;
+  fpnew_pkg::status_t    status_d;
+  logic                  extension_bit_d;
+  logic                  is_class_d;
+
+  // Select result
+  always_comb begin : select_result
+    unique case (inp_pipe_op_q[NUM_INP_REGS])
+      fpnew_pkg::SGNJ: begin
+        result_d        = sgnj_result;
+        status_d        = sgnj_status;
+        extension_bit_d = sgnj_extension_bit;
+      end
+      fpnew_pkg::MINMAX: begin
+        result_d        = minmax_result;
+        status_d        = minmax_status;
+        extension_bit_d = minmax_extension_bit;
+      end
+      fpnew_pkg::CMP: begin
+        result_d        = cmp_result;
+        status_d        = cmp_status;
+        extension_bit_d = cmp_extension_bit;
+      end
+      fpnew_pkg::CLASSIFY: begin
+        result_d        = '{default: fpnew_pkg::DONT_CARE}; // unused
+        status_d        = class_status;
+        extension_bit_d = class_extension_bit;
+      end
+      default: begin
+        result_d        = '{default: fpnew_pkg::DONT_CARE}; // dont care
+        status_d        = '{default: fpnew_pkg::DONT_CARE}; // dont care
+        extension_bit_d = fpnew_pkg::DONT_CARE;             // dont care
+      end
+    endcase
+  end
+
+  assign is_class_d = (inp_pipe_op_q[NUM_INP_REGS] == fpnew_pkg::CLASSIFY);
+
+  // ----------------
+  // Output Pipeline
+  // ----------------
+  // Output pipeline signals, index i holds signal after i register stages
+  fp_t                   [0:NUM_OUT_REGS] out_pipe_result_q;
+  fpnew_pkg::status_t    [0:NUM_OUT_REGS] out_pipe_status_q;
+  logic                  [0:NUM_OUT_REGS] out_pipe_extension_bit_q;
+  fpnew_pkg::classmask_e [0:NUM_OUT_REGS] out_pipe_class_mask_q;
+  logic                  [0:NUM_OUT_REGS] out_pipe_is_class_q;
+  TagType                [0:NUM_OUT_REGS] out_pipe_tag_q;
+  AuxType                [0:NUM_OUT_REGS] out_pipe_aux_q;
+  logic                  [0:NUM_OUT_REGS] out_pipe_valid_q;
+  // Ready signal is combinatorial for all stages
+  logic [0:NUM_OUT_REGS] out_pipe_ready;
+
+  // Input stage: First element of pipeline is taken from inputs
+  assign out_pipe_result_q[0]        = result_d;
+  assign out_pipe_status_q[0]        = status_d;
+  assign out_pipe_extension_bit_q[0] = extension_bit_d;
+  assign out_pipe_class_mask_q[0]    = class_mask_d;
+  assign out_pipe_is_class_q[0]      = is_class_d;
+  assign out_pipe_tag_q[0]           = inp_pipe_tag_q[NUM_INP_REGS];
+  assign out_pipe_aux_q[0]           = inp_pipe_aux_q[NUM_INP_REGS];
+  assign out_pipe_valid_q[0]         = inp_pipe_valid_q[NUM_INP_REGS];
+  // Input stage: Propagate pipeline ready signal to inside pipe
+  assign inp_pipe_ready[NUM_INP_REGS] = out_pipe_ready[0];
+  // Generate the register stages
+  for (genvar i = 0; i < NUM_OUT_REGS; i++) begin : gen_output_pipeline
+    // Internal register enable for this stage
+    logic reg_ena;
+    // Determine the ready signal of the current stage - advance the pipeline:
+    // 1. if the next stage is ready for our data
+    // 2. if the next stage only holds a bubble (not valid) -> we can pop it
+    assign out_pipe_ready[i] = out_pipe_ready[i+1] | ~out_pipe_valid_q[i+1];
+    // Valid: enabled by ready signal, synchronous clear with the flush signal
+    `FFLARNC(out_pipe_valid_q[i+1], out_pipe_valid_q[i], out_pipe_ready[i], flush_i, 1'b0, clk_i, rst_ni)
+    // Enable register if pipleine ready and a valid data item is present
+    assign reg_ena = out_pipe_ready[i] & out_pipe_valid_q[i];
+    // Generate the pipeline registers within the stages, use enable-registers
+    `FFL(out_pipe_result_q[i+1],        out_pipe_result_q[i],        reg_ena, '0)
+    `FFL(out_pipe_status_q[i+1],        out_pipe_status_q[i],        reg_ena, '0)
+    `FFL(out_pipe_extension_bit_q[i+1], out_pipe_extension_bit_q[i], reg_ena, '0)
+    `FFL(out_pipe_class_mask_q[i+1],    out_pipe_class_mask_q[i],    reg_ena, fpnew_pkg::QNAN)
+    `FFL(out_pipe_is_class_q[i+1],      out_pipe_is_class_q[i],      reg_ena, '0)
+    `FFL(out_pipe_tag_q[i+1],           out_pipe_tag_q[i],           reg_ena, TagType'('0))
+    `FFL(out_pipe_aux_q[i+1],           out_pipe_aux_q[i],           reg_ena, AuxType'('0))
+  end
+  // Output stage: Ready travels backwards from output side, driven by downstream circuitry
+  assign out_pipe_ready[NUM_OUT_REGS] = out_ready_i;
+  // Output stage: assign module outputs
+  assign result_o        = out_pipe_result_q[NUM_OUT_REGS];
+  assign status_o        = out_pipe_status_q[NUM_OUT_REGS];
+  assign extension_bit_o = out_pipe_extension_bit_q[NUM_OUT_REGS];
+  assign class_mask_o    = out_pipe_class_mask_q[NUM_OUT_REGS];
+  assign is_class_o      = out_pipe_is_class_q[NUM_OUT_REGS];
+  assign tag_o           = out_pipe_tag_q[NUM_OUT_REGS];
+  assign aux_o           = out_pipe_aux_q[NUM_OUT_REGS];
+  assign out_valid_o     = out_pipe_valid_q[NUM_OUT_REGS];
+  assign busy_o          = (| {inp_pipe_valid_q, out_pipe_valid_q});
+endmodule
diff --git a/vendor/pulp-platform/fpnew/src/fpnew_opgroup_block.sv b/vendor/pulp-platform/fpnew/src/fpnew_opgroup_block.sv
new file mode 100644
index 0000000000..637e85f614
--- /dev/null
+++ b/vendor/pulp-platform/fpnew/src/fpnew_opgroup_block.sv
@@ -0,0 +1,230 @@
+// Copyright 2019 ETH Zurich and University of Bologna.
+//
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the "License"); you may not use this file except in
+// compliance with the License. You may obtain a copy of the License at
+// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+
+// Author: Stefan Mach <smach@iis.ee.ethz.ch>
+
+module fpnew_opgroup_block #(
+  parameter fpnew_pkg::opgroup_e        OpGroup       = fpnew_pkg::ADDMUL,
+  // FPU configuration
+  parameter int unsigned                Width         = 32,
+  parameter logic                       EnableVectors = 1'b1,
+  parameter fpnew_pkg::fmt_logic_t      FpFmtMask     = '1,
+  parameter fpnew_pkg::ifmt_logic_t     IntFmtMask    = '1,
+  parameter fpnew_pkg::fmt_unsigned_t   FmtPipeRegs   = '{default: 0},
+  parameter fpnew_pkg::fmt_unit_types_t FmtUnitTypes  = '{default: fpnew_pkg::PARALLEL},
+  parameter fpnew_pkg::pipe_config_t    PipeConfig    = fpnew_pkg::BEFORE,
+  parameter type                        TagType       = logic,
+  // Do not change
+  localparam int unsigned NUM_FORMATS  = fpnew_pkg::NUM_FP_FORMATS,
+  localparam int unsigned NUM_OPERANDS = fpnew_pkg::num_operands(OpGroup)
+) (
+  input logic                                     clk_i,
+  input logic                                     rst_ni,
+  // Input signals
+  input logic [NUM_OPERANDS-1:0][Width-1:0]       operands_i,
+  input logic [NUM_FORMATS-1:0][NUM_OPERANDS-1:0] is_boxed_i,
+  input fpnew_pkg::roundmode_e                    rnd_mode_i,
+  input fpnew_pkg::operation_e                    op_i,
+  input logic                                     op_mod_i,
+  input fpnew_pkg::fp_format_e                    src_fmt_i,
+  input fpnew_pkg::fp_format_e                    dst_fmt_i,
+  input fpnew_pkg::int_format_e                   int_fmt_i,
+  input logic                                     vectorial_op_i,
+  input TagType                                   tag_i,
+  // Input Handshake
+  input  logic                                    in_valid_i,
+  output logic                                    in_ready_o,
+  input  logic                                    flush_i,
+  // Output signals
+  output logic [Width-1:0]                        result_o,
+  output fpnew_pkg::status_t                      status_o,
+  output logic                                    extension_bit_o,
+  output TagType                                  tag_o,
+  // Output handshake
+  output logic                                    out_valid_o,
+  input  logic                                    out_ready_i,
+  // Indication of valid data in flight
+  output logic                                    busy_o
+);
+
+  // ----------------
+  // Type Definition
+  // ----------------
+  typedef struct packed {
+    logic [Width-1:0]   result;
+    fpnew_pkg::status_t status;
+    logic               ext_bit;
+    TagType             tag;
+  } output_t;
+
+  // Handshake signals for the slices
+  logic [NUM_FORMATS-1:0] fmt_in_ready, fmt_out_valid, fmt_out_ready, fmt_busy;
+  output_t [NUM_FORMATS-1:0] fmt_outputs;
+
+  // -----------
+  // Input Side
+  // -----------
+  assign in_ready_o = in_valid_i & fmt_in_ready[dst_fmt_i]; // Ready is given by selected format
+
+  // -------------------------
+  // Generate Parallel Slices
+  // -------------------------
+  for (genvar fmt = 0; fmt < int'(NUM_FORMATS); fmt++) begin : gen_parallel_slices
+    // Some constants for this format
+    localparam logic ANY_MERGED = fpnew_pkg::any_enabled_multi(FmtUnitTypes, FpFmtMask);
+    localparam logic IS_FIRST_MERGED =
+        fpnew_pkg::is_first_enabled_multi(fpnew_pkg::fp_format_e'(fmt), FmtUnitTypes, FpFmtMask);
+
+    // Generate slice only if format enabled
+    if (FpFmtMask[fmt] && (FmtUnitTypes[fmt] == fpnew_pkg::PARALLEL)) begin : active_format
+
+      logic in_valid;
+
+      assign in_valid = in_valid_i & (dst_fmt_i == fmt); // enable selected format
+
+      fpnew_opgroup_fmt_slice #(
+        .OpGroup       ( OpGroup                      ),
+        .FpFormat      ( fpnew_pkg::fp_format_e'(fmt) ),
+        .Width         ( Width                        ),
+        .EnableVectors ( EnableVectors                ),
+        .NumPipeRegs   ( FmtPipeRegs[fmt]             ),
+        .PipeConfig    ( PipeConfig                   ),
+        .TagType       ( TagType                      )
+      ) i_fmt_slice (
+        .clk_i,
+        .rst_ni,
+        .operands_i     ( operands_i               ),
+        .is_boxed_i     ( is_boxed_i[fmt]          ),
+        .rnd_mode_i,
+        .op_i,
+        .op_mod_i,
+        .vectorial_op_i,
+        .tag_i,
+        .in_valid_i     ( in_valid                 ),
+        .in_ready_o     ( fmt_in_ready[fmt]        ),
+        .flush_i,
+        .result_o       ( fmt_outputs[fmt].result  ),
+        .status_o       ( fmt_outputs[fmt].status  ),
+        .extension_bit_o( fmt_outputs[fmt].ext_bit ),
+        .tag_o          ( fmt_outputs[fmt].tag     ),
+        .out_valid_o    ( fmt_out_valid[fmt]       ),
+        .out_ready_i    ( fmt_out_ready[fmt]       ),
+        .busy_o         ( fmt_busy[fmt]            )
+      );
+    // If the format wants to use merged ops, tie off the dangling ones not used here
+    end else if (FpFmtMask[fmt] && ANY_MERGED && !IS_FIRST_MERGED) begin : merged_unused
+
+      // Ready is split up into formats
+      assign fmt_in_ready[fmt]  = fmt_in_ready[fpnew_pkg::get_first_enabled_multi(FmtUnitTypes,
+                                                                                  FpFmtMask)];
+
+      assign fmt_out_valid[fmt] = 1'b0; // don't emit values
+      assign fmt_busy[fmt]      = 1'b0; // never busy
+      // Outputs are don't care
+      assign fmt_outputs[fmt].result  = '{default: fpnew_pkg::DONT_CARE};
+      assign fmt_outputs[fmt].status  = '{default: fpnew_pkg::DONT_CARE};
+      assign fmt_outputs[fmt].ext_bit = fpnew_pkg::DONT_CARE;
+      assign fmt_outputs[fmt].tag     = TagType'(fpnew_pkg::DONT_CARE);
+
+    // Tie off disabled formats
+    end else if (!FpFmtMask[fmt] || (FmtUnitTypes[fmt] == fpnew_pkg::DISABLED)) begin : disable_fmt
+      assign fmt_in_ready[fmt]  = 1'b0; // don't accept operations
+      assign fmt_out_valid[fmt] = 1'b0; // don't emit values
+      assign fmt_busy[fmt]      = 1'b0; // never busy
+      // Outputs are don't care
+      assign fmt_outputs[fmt].result  = '{default: fpnew_pkg::DONT_CARE};
+      assign fmt_outputs[fmt].status  = '{default: fpnew_pkg::DONT_CARE};
+      assign fmt_outputs[fmt].ext_bit = fpnew_pkg::DONT_CARE;
+      assign fmt_outputs[fmt].tag     = TagType'(fpnew_pkg::DONT_CARE);
+    end
+  end
+
+  // ----------------------
+  // Generate Merged Slice
+  // ----------------------
+  if (fpnew_pkg::any_enabled_multi(FmtUnitTypes, FpFmtMask)) begin : gen_merged_slice
+
+    localparam FMT = fpnew_pkg::get_first_enabled_multi(FmtUnitTypes, FpFmtMask);
+    localparam REG = fpnew_pkg::get_num_regs_multi(FmtPipeRegs, FmtUnitTypes, FpFmtMask);
+
+    logic in_valid;
+
+    assign in_valid = in_valid_i & (FmtUnitTypes[dst_fmt_i] == fpnew_pkg::MERGED);
+
+    fpnew_opgroup_multifmt_slice #(
+      .OpGroup       ( OpGroup          ),
+      .Width         ( Width            ),
+      .FpFmtConfig   ( FpFmtMask        ),
+      .IntFmtConfig  ( IntFmtMask       ),
+      .EnableVectors ( EnableVectors    ),
+      .NumPipeRegs   ( REG              ),
+      .PipeConfig    ( PipeConfig       ),
+      .TagType       ( TagType          )
+    ) i_multifmt_slice (
+      .clk_i,
+      .rst_ni,
+      .operands_i,
+      .is_boxed_i,
+      .rnd_mode_i,
+      .op_i,
+      .op_mod_i,
+      .src_fmt_i,
+      .dst_fmt_i,
+      .int_fmt_i,
+      .vectorial_op_i,
+      .tag_i,
+      .in_valid_i      ( in_valid                 ),
+      .in_ready_o      ( fmt_in_ready[FMT]        ),
+      .flush_i,
+      .result_o        ( fmt_outputs[FMT].result  ),
+      .status_o        ( fmt_outputs[FMT].status  ),
+      .extension_bit_o ( fmt_outputs[FMT].ext_bit ),
+      .tag_o           ( fmt_outputs[FMT].tag     ),
+      .out_valid_o     ( fmt_out_valid[FMT]       ),
+      .out_ready_i     ( fmt_out_ready[FMT]       ),
+      .busy_o          ( fmt_busy[FMT]            )
+    );
+
+  end
+
+  // ------------------
+  // Arbitrate Outputs
+  // ------------------
+  output_t arbiter_output;
+
+  // Round-Robin arbiter to decide which result to use
+  rr_arb_tree #(
+    .NumIn     ( NUM_FORMATS ),
+    .DataType  ( output_t    ),
+    .AxiVldRdy ( 1'b1        )
+  ) i_arbiter (
+    .clk_i,
+    .rst_ni,
+    .flush_i,
+    .rr_i   ( '0             ),
+    .req_i  ( fmt_out_valid  ),
+    .gnt_o  ( fmt_out_ready  ),
+    .data_i ( fmt_outputs    ),
+    .gnt_i  ( out_ready_i    ),
+    .req_o  ( out_valid_o    ),
+    .data_o ( arbiter_output ),
+    .idx_o  ( /* unused */   )
+  );
+
+  // Unpack output
+  assign result_o        = arbiter_output.result;
+  assign status_o        = arbiter_output.status;
+  assign extension_bit_o = arbiter_output.ext_bit;
+  assign tag_o           = arbiter_output.tag;
+
+  assign busy_o = (| fmt_busy);
+
+endmodule
diff --git a/vendor/pulp-platform/fpnew/src/fpnew_opgroup_fmt_slice.sv b/vendor/pulp-platform/fpnew/src/fpnew_opgroup_fmt_slice.sv
new file mode 100644
index 0000000000..fda2a57f38
--- /dev/null
+++ b/vendor/pulp-platform/fpnew/src/fpnew_opgroup_fmt_slice.sv
@@ -0,0 +1,276 @@
+// Copyright 2019 ETH Zurich and University of Bologna.
+//
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the "License"); you may not use this file except in
+// compliance with the License. You may obtain a copy of the License at
+// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+
+// Author: Stefan Mach <smach@iis.ee.ethz.ch>
+
+module fpnew_opgroup_fmt_slice #(
+  parameter fpnew_pkg::opgroup_e     OpGroup       = fpnew_pkg::ADDMUL,
+  parameter fpnew_pkg::fp_format_e   FpFormat      = fpnew_pkg::fp_format_e'(0),
+  // FPU configuration
+  parameter int unsigned             Width         = 32,
+  parameter logic                    EnableVectors = 1'b1,
+  parameter int unsigned             NumPipeRegs   = 0,
+  parameter fpnew_pkg::pipe_config_t PipeConfig    = fpnew_pkg::BEFORE,
+  parameter type                     TagType       = logic,
+  // Do not change
+  localparam int unsigned NUM_OPERANDS = fpnew_pkg::num_operands(OpGroup)
+) (
+  input logic                               clk_i,
+  input logic                               rst_ni,
+  // Input signals
+  input logic [NUM_OPERANDS-1:0][Width-1:0] operands_i,
+  input logic [NUM_OPERANDS-1:0]            is_boxed_i,
+  input fpnew_pkg::roundmode_e              rnd_mode_i,
+  input fpnew_pkg::operation_e              op_i,
+  input logic                               op_mod_i,
+  input logic                               vectorial_op_i,
+  input TagType                             tag_i,
+  // Input Handshake
+  input  logic                              in_valid_i,
+  output logic                              in_ready_o,
+  input  logic                              flush_i,
+  // Output signals
+  output logic [Width-1:0]                  result_o,
+  output fpnew_pkg::status_t                status_o,
+  output logic                              extension_bit_o,
+  output TagType                            tag_o,
+  // Output handshake
+  output logic                              out_valid_o,
+  input  logic                              out_ready_i,
+  // Indication of valid data in flight
+  output logic                              busy_o
+);
+
+  localparam int unsigned FP_WIDTH  = fpnew_pkg::fp_width(FpFormat);
+  localparam int unsigned NUM_LANES = fpnew_pkg::num_lanes(Width, FpFormat, EnableVectors);
+
+
+  logic [NUM_LANES-1:0] lane_in_ready, lane_out_valid; // Handshake signals for the lanes
+  logic                 vectorial_op;
+
+  logic [NUM_LANES*FP_WIDTH-1:0] slice_result;
+  logic [Width-1:0]              slice_regular_result, slice_class_result, slice_vec_class_result;
+
+  fpnew_pkg::status_t    [NUM_LANES-1:0] lane_status;
+  logic                  [NUM_LANES-1:0] lane_ext_bit; // only the first one is actually used
+  fpnew_pkg::classmask_e [NUM_LANES-1:0] lane_class_mask;
+  TagType                [NUM_LANES-1:0] lane_tags; // only the first one is actually used
+  logic                  [NUM_LANES-1:0] lane_vectorial, lane_busy, lane_is_class; // dito
+
+  logic result_is_vector, result_is_class;
+
+  // -----------
+  // Input Side
+  // -----------
+  assign in_ready_o   = lane_in_ready[0]; // Upstream ready is given by first lane
+  assign vectorial_op = vectorial_op_i & EnableVectors; // only do vectorial stuff if enabled
+
+  // ---------------
+  // Generate Lanes
+  // ---------------
+  for (genvar lane = 0; lane < int'(NUM_LANES); lane++) begin : gen_num_lanes
+    logic [FP_WIDTH-1:0] local_result; // lane-local results
+    logic                local_sign;
+
+    // Generate instances only if needed, lane 0 always generated
+    if ((lane == 0) || EnableVectors) begin : active_lane
+      logic in_valid, out_valid, out_ready; // lane-local handshake
+
+      logic [NUM_OPERANDS-1:0][FP_WIDTH-1:0] local_operands; // lane-local operands
+      logic [FP_WIDTH-1:0]                   op_result;      // lane-local results
+      fpnew_pkg::status_t                    op_status;
+
+      assign in_valid = in_valid_i & ((lane == 0) | vectorial_op); // upper lanes only for vectors
+      // Slice out the operands for this lane
+      always_comb begin : prepare_input
+        for (int i = 0; i < int'(NUM_OPERANDS); i++) begin
+          local_operands[i] = operands_i[i][(unsigned'(lane)+1)*FP_WIDTH-1:unsigned'(lane)*FP_WIDTH];
+        end
+      end
+
+      // Instantiate the operation from the selected opgroup
+      if (OpGroup == fpnew_pkg::ADDMUL) begin : lane_instance
+        fpnew_fma #(
+          .FpFormat    ( FpFormat    ),
+          .NumPipeRegs ( NumPipeRegs ),
+          .PipeConfig  ( PipeConfig  ),
+          .TagType     ( TagType     ),
+          .AuxType     ( logic       )
+        ) i_fma (
+          .clk_i,
+          .rst_ni,
+          .operands_i      ( local_operands               ),
+          .is_boxed_i      ( is_boxed_i[NUM_OPERANDS-1:0] ),
+          .rnd_mode_i,
+          .op_i,
+          .op_mod_i,
+          .tag_i,
+          .aux_i           ( vectorial_op         ), // Remember whether operation was vectorial
+          .in_valid_i      ( in_valid             ),
+          .in_ready_o      ( lane_in_ready[lane]  ),
+          .flush_i,
+          .result_o        ( op_result            ),
+          .status_o        ( op_status            ),
+          .extension_bit_o ( lane_ext_bit[lane]   ),
+          .tag_o           ( lane_tags[lane]      ),
+          .aux_o           ( lane_vectorial[lane] ),
+          .out_valid_o     ( out_valid            ),
+          .out_ready_i     ( out_ready            ),
+          .busy_o          ( lane_busy[lane]      )
+        );
+        assign lane_is_class[lane]   = 1'b0;
+        assign lane_class_mask[lane] = fpnew_pkg::NEGINF;
+      end else if (OpGroup == fpnew_pkg::DIVSQRT) begin : lane_instance
+        // fpnew_divsqrt #(
+        //   .FpFormat   (FpFormat),
+        //   .NumPipeRegs(NumPipeRegs),
+        //   .PipeConfig (PipeConfig),
+        //   .TagType    (TagType),
+        //   .AuxType    (logic)
+        // ) i_divsqrt (
+        //   .clk_i,
+        //   .rst_ni,
+        //   .operands_i      ( local_operands               ),
+        //   .is_boxed_i      ( is_boxed_i[NUM_OPERANDS-1:0] ),
+        //   .rnd_mode_i,
+        //   .op_i,
+        //   .op_mod_i,
+        //   .tag_i,
+        //   .aux_i           ( vectorial_op         ), // Remember whether operation was vectorial
+        //   .in_valid_i      ( in_valid             ),
+        //   .in_ready_o      ( lane_in_ready[lane]  ),
+        //   .flush_i,
+        //   .result_o        ( op_result            ),
+        //   .status_o        ( op_status            ),
+        //   .extension_bit_o ( lane_ext_bit[lane]   ),
+        //   .tag_o           ( lane_tags[lane]      ),
+        //   .aux_o           ( lane_vectorial[lane] ),
+        //   .out_valid_o     ( out_valid            ),
+        //   .out_ready_i     ( out_ready            ),
+        //   .busy_o          ( lane_busy[lane]      )
+        // );
+        // assign lane_is_class[lane] = 1'b0;
+      end else if (OpGroup == fpnew_pkg::NONCOMP) begin : lane_instance
+        fpnew_noncomp #(
+          .FpFormat   (FpFormat),
+          .NumPipeRegs(NumPipeRegs),
+          .PipeConfig (PipeConfig),
+          .TagType    (TagType),
+          .AuxType    (logic)
+        ) i_noncomp (
+          .clk_i,
+          .rst_ni,
+          .operands_i      ( local_operands               ),
+          .is_boxed_i      ( is_boxed_i[NUM_OPERANDS-1:0] ),
+          .rnd_mode_i,
+          .op_i,
+          .op_mod_i,
+          .tag_i,
+          .aux_i           ( vectorial_op          ), // Remember whether operation was vectorial
+          .in_valid_i      ( in_valid              ),
+          .in_ready_o      ( lane_in_ready[lane]   ),
+          .flush_i,
+          .result_o        ( op_result             ),
+          .status_o        ( op_status             ),
+          .extension_bit_o ( lane_ext_bit[lane]    ),
+          .class_mask_o    ( lane_class_mask[lane] ),
+          .is_class_o      ( lane_is_class[lane]   ),
+          .tag_o           ( lane_tags[lane]       ),
+          .aux_o           ( lane_vectorial[lane]  ),
+          .out_valid_o     ( out_valid             ),
+          .out_ready_i     ( out_ready             ),
+          .busy_o          ( lane_busy[lane]       )
+        );
+      end // ADD OTHER OPTIONS HERE
+
+      // Handshakes are only done if the lane is actually used
+      assign out_ready            = out_ready_i & ((lane == 0) | result_is_vector);
+      assign lane_out_valid[lane] = out_valid   & ((lane == 0) | result_is_vector);
+
+      // Properly NaN-box or sign-extend the slice result if not in use
+      assign local_result      = lane_out_valid[lane] ? op_result : '{default: lane_ext_bit[0]};
+      assign lane_status[lane] = lane_out_valid[lane] ? op_status : '0;
+
+    // Otherwise generate constant sign-extension
+    end else begin
+      assign lane_out_valid[lane] = 1'b0; // unused lane
+      assign lane_in_ready[lane]  = 1'b0; // unused lane
+      assign local_result         = '{default: lane_ext_bit[0]}; // sign-extend/nan box
+      assign lane_status[lane]    = '0;
+      assign lane_busy[lane]      = 1'b0;
+      assign lane_is_class[lane]  = 1'b0;
+    end
+
+    // Insert lane result into slice result
+    assign slice_result[(unsigned'(lane)+1)*FP_WIDTH-1:unsigned'(lane)*FP_WIDTH] = local_result;
+
+    // Create Classification results
+    if ((lane+1)*8 <= Width) begin : vectorial_class // vectorial class blocks are 8bits in size
+      assign local_sign = (lane_class_mask[lane] == fpnew_pkg::NEGINF ||
+                           lane_class_mask[lane] == fpnew_pkg::NEGNORM ||
+                           lane_class_mask[lane] == fpnew_pkg::NEGSUBNORM ||
+                           lane_class_mask[lane] == fpnew_pkg::NEGZERO);
+      // Write the current block segment
+      assign slice_vec_class_result[(lane+1)*8-1:lane*8] = {
+        local_sign,  // BIT 7
+        ~local_sign, // BIT 6
+        lane_class_mask[lane] == fpnew_pkg::QNAN, // BIT 5
+        lane_class_mask[lane] == fpnew_pkg::SNAN, // BIT 4
+        lane_class_mask[lane] == fpnew_pkg::POSZERO
+            || lane_class_mask[lane] == fpnew_pkg::NEGZERO, // BIT 3
+        lane_class_mask[lane] == fpnew_pkg::POSSUBNORM
+            || lane_class_mask[lane] == fpnew_pkg::NEGSUBNORM, // BIT 2
+        lane_class_mask[lane] == fpnew_pkg::POSNORM
+            || lane_class_mask[lane] == fpnew_pkg::NEGNORM, // BIT 1
+        lane_class_mask[lane] == fpnew_pkg::POSINF
+            || lane_class_mask[lane] == fpnew_pkg::NEGINF // BIT 0
+      };
+    end
+  end
+
+  // ------------
+  // Output Side
+  // ------------
+  assign result_is_vector = lane_vectorial[0];
+  assign result_is_class  = lane_is_class[0];
+
+  assign slice_regular_result = $signed({extension_bit_o, slice_result});
+
+  localparam int unsigned CLASS_VEC_BITS = (NUM_LANES*8 > Width) ? 8 * (Width / 8) : NUM_LANES*8;
+
+  // Pad out unused vec_class bits
+  if (CLASS_VEC_BITS < Width) begin : pad_vectorial_class
+    assign slice_vec_class_result[Width-1:CLASS_VEC_BITS] = '0;
+  end
+
+  // localparam logic [Width-1:0] CLASS_VEC_MASK = 2**CLASS_VEC_BITS - 1;
+
+  assign slice_class_result = result_is_vector ? slice_vec_class_result : lane_class_mask[0];
+
+  // Select the proper result
+  assign result_o = result_is_class ? slice_class_result : slice_regular_result;
+
+  assign extension_bit_o                              = lane_ext_bit[0]; // upper lanes unused
+  assign tag_o                                        = lane_tags[0];    // upper lanes unused
+  assign busy_o                                       = (| lane_busy);
+  assign out_valid_o                                  = lane_out_valid[0]; // upper lanes unused
+
+
+  // Collapse the lane status
+  always_comb begin : output_processing
+    // Collapse the status
+    automatic fpnew_pkg::status_t temp_status;
+    temp_status = '0;
+    for (int i = 0; i < int'(NUM_LANES); i++)
+      temp_status |= lane_status[i];
+    status_o = temp_status;
+  end
+endmodule
diff --git a/vendor/pulp-platform/fpnew/src/fpnew_opgroup_multifmt_slice.sv b/vendor/pulp-platform/fpnew/src/fpnew_opgroup_multifmt_slice.sv
new file mode 100644
index 0000000000..14c1b4538f
--- /dev/null
+++ b/vendor/pulp-platform/fpnew/src/fpnew_opgroup_multifmt_slice.sv
@@ -0,0 +1,414 @@
+// Copyright 2019 ETH Zurich and University of Bologna.
+//
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the "License"); you may not use this file except in
+// compliance with the License. You may obtain a copy of the License at
+// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+
+// Author: Stefan Mach <smach@iis.ee.ethz.ch>
+
+`include "common_cells/registers.svh"
+
+module fpnew_opgroup_multifmt_slice #(
+  parameter fpnew_pkg::opgroup_e     OpGroup       = fpnew_pkg::CONV,
+  parameter int unsigned             Width         = 64,
+  // FPU configuration
+  parameter fpnew_pkg::fmt_logic_t   FpFmtConfig   = '1,
+  parameter fpnew_pkg::ifmt_logic_t  IntFmtConfig  = '1,
+  parameter logic                    EnableVectors = 1'b1,
+  parameter int unsigned             NumPipeRegs   = 0,
+  parameter fpnew_pkg::pipe_config_t PipeConfig    = fpnew_pkg::BEFORE,
+  parameter type                     TagType       = logic,
+  // Do not change
+  localparam int unsigned NUM_OPERANDS = fpnew_pkg::num_operands(OpGroup),
+  localparam int unsigned NUM_FORMATS  = fpnew_pkg::NUM_FP_FORMATS
+) (
+  input logic                                     clk_i,
+  input logic                                     rst_ni,
+  // Input signals
+  input logic [NUM_OPERANDS-1:0][Width-1:0]       operands_i,
+  input logic [NUM_FORMATS-1:0][NUM_OPERANDS-1:0] is_boxed_i,
+  input fpnew_pkg::roundmode_e                    rnd_mode_i,
+  input fpnew_pkg::operation_e                    op_i,
+  input logic                                     op_mod_i,
+  input fpnew_pkg::fp_format_e                    src_fmt_i,
+  input fpnew_pkg::fp_format_e                    dst_fmt_i,
+  input fpnew_pkg::int_format_e                   int_fmt_i,
+  input logic                                     vectorial_op_i,
+  input TagType                                   tag_i,
+  // Input Handshake
+  input  logic                                    in_valid_i,
+  output logic                                    in_ready_o,
+  input  logic                                    flush_i,
+  // Output signals
+  output logic [Width-1:0]                        result_o,
+  output fpnew_pkg::status_t                      status_o,
+  output logic                                    extension_bit_o,
+  output TagType                                  tag_o,
+  // Output handshake
+  output logic                                    out_valid_o,
+  input  logic                                    out_ready_i,
+  // Indication of valid data in flight
+  output logic                                    busy_o
+);
+
+  localparam int unsigned MAX_FP_WIDTH   = fpnew_pkg::max_fp_width(FpFmtConfig);
+  localparam int unsigned MAX_INT_WIDTH  = fpnew_pkg::max_int_width(IntFmtConfig);
+  localparam int unsigned NUM_LANES = fpnew_pkg::max_num_lanes(Width, FpFmtConfig, 1'b1);
+  localparam int unsigned NUM_INT_FORMATS = fpnew_pkg::NUM_INT_FORMATS;
+  // We will send the format information along with the data
+  localparam int unsigned FMT_BITS =
+      fpnew_pkg::maximum($clog2(NUM_FORMATS), $clog2(NUM_INT_FORMATS));
+  localparam int unsigned AUX_BITS = FMT_BITS + 2; // also add vectorial and integer flags
+
+  logic [NUM_LANES-1:0] lane_in_ready, lane_out_valid; // Handshake signals for the lanes
+  logic                 vectorial_op;
+  logic [FMT_BITS-1:0]  dst_fmt; // destination format to pass along with operation
+  logic [AUX_BITS-1:0]  aux_data;
+
+  // additional flags for CONV
+  logic       dst_fmt_is_int, dst_is_cpk;
+  logic [1:0] dst_vec_op; // info for vectorial results (for packing)
+  logic [2:0] target_aux_d, target_aux_q;
+  logic       is_up_cast, is_down_cast;
+
+  logic [NUM_FORMATS-1:0][Width-1:0]     fmt_slice_result;
+  logic [NUM_INT_FORMATS-1:0][Width-1:0] ifmt_slice_result;
+  logic [Width-1:0]                      conv_slice_result;
+
+
+  logic [Width-1:0] conv_target_d, conv_target_q; // vectorial conversions update a register
+
+  fpnew_pkg::status_t [NUM_LANES-1:0]   lane_status;
+  logic   [NUM_LANES-1:0]               lane_ext_bit; // only the first one is actually used
+  TagType [NUM_LANES-1:0]               lane_tags; // only the first one is actually used
+  logic   [NUM_LANES-1:0][AUX_BITS-1:0] lane_aux; // only the first one is actually used
+  logic   [NUM_LANES-1:0]               lane_busy; // dito
+
+  logic                result_is_vector;
+  logic [FMT_BITS-1:0] result_fmt;
+  logic                result_fmt_is_int, result_is_cpk;
+  logic [1:0]          result_vec_op; // info for vectorial results (for packing)
+
+  // -----------
+  // Input Side
+  // -----------
+  assign in_ready_o   = lane_in_ready[0]; // Upstream ready is given by first lane
+  assign vectorial_op = vectorial_op_i & EnableVectors; // only do vectorial stuff if enabled
+
+  // Cast-and-Pack ops are encoded in operation and modifier
+  assign dst_fmt_is_int = (OpGroup == fpnew_pkg::CONV) & (op_i == fpnew_pkg::F2I);
+  assign dst_is_cpk     = (OpGroup == fpnew_pkg::CONV) & (op_i == fpnew_pkg::CPKAB ||
+                                                          op_i == fpnew_pkg::CPKCD);
+  assign dst_vec_op     = (OpGroup == fpnew_pkg::CONV) & {(op_i == fpnew_pkg::CPKCD), op_mod_i};
+
+  assign is_up_cast   = (fpnew_pkg::fp_width(dst_fmt_i) > fpnew_pkg::fp_width(src_fmt_i));
+  assign is_down_cast = (fpnew_pkg::fp_width(dst_fmt_i) < fpnew_pkg::fp_width(src_fmt_i));
+
+  // The destination format is the int format for F2I casts
+  assign dst_fmt    = dst_fmt_is_int ? int_fmt_i : dst_fmt_i;
+
+  // The data sent along consists of the vectorial flag and format bits
+  assign aux_data      = {dst_fmt_is_int, vectorial_op, dst_fmt};
+  assign target_aux_d  = {dst_vec_op, dst_is_cpk};
+
+  // CONV passes one operand for assembly after the unit: opC for cpk, opB for others
+  if (OpGroup == fpnew_pkg::CONV) begin : conv_target
+    assign conv_target_d = dst_is_cpk ? operands_i[2] : operands_i[1];
+  end
+
+  // For 2-operand units, prepare boxing info
+  logic [NUM_FORMATS-1:0]      is_boxed_1op;
+  logic [NUM_FORMATS-1:0][1:0] is_boxed_2op;
+
+  always_comb begin : boxed_2op
+    for (int fmt = 0; fmt < NUM_FORMATS; fmt++) begin
+      is_boxed_1op[fmt] = is_boxed_i[fmt][0];
+      is_boxed_2op[fmt] = is_boxed_i[fmt][1:0];
+    end
+  end
+
+  // ---------------
+  // Generate Lanes
+  // ---------------
+  for (genvar lane = 0; lane < int'(NUM_LANES); lane++) begin : gen_num_lanes
+    localparam int unsigned LANE = unsigned'(lane); // unsigned to please the linter
+    // Get a mask of active formats for this lane
+    localparam fpnew_pkg::fmt_logic_t ACTIVE_FORMATS =
+        fpnew_pkg::get_lane_formats(Width, FpFmtConfig, LANE);
+    localparam fpnew_pkg::ifmt_logic_t ACTIVE_INT_FORMATS =
+        fpnew_pkg::get_lane_int_formats(Width, FpFmtConfig, IntFmtConfig, LANE);
+    localparam int unsigned MAX_WIDTH = fpnew_pkg::max_fp_width(ACTIVE_FORMATS);
+
+    // Cast-specific parameters
+    localparam fpnew_pkg::fmt_logic_t CONV_FORMATS =
+        fpnew_pkg::get_conv_lane_formats(Width, FpFmtConfig, LANE);
+    localparam fpnew_pkg::ifmt_logic_t CONV_INT_FORMATS =
+        fpnew_pkg::get_conv_lane_int_formats(Width, FpFmtConfig, IntFmtConfig, LANE);
+    localparam int unsigned CONV_WIDTH = fpnew_pkg::max_fp_width(CONV_FORMATS);
+
+    // Lane parameters from Opgroup
+    localparam fpnew_pkg::fmt_logic_t LANE_FORMATS = (OpGroup == fpnew_pkg::CONV)
+                                                     ? CONV_FORMATS : ACTIVE_FORMATS;
+    localparam int unsigned LANE_WIDTH = (OpGroup == fpnew_pkg::CONV) ? CONV_WIDTH : MAX_WIDTH;
+
+    logic [LANE_WIDTH-1:0] local_result; // lane-local results
+
+    // Generate instances only if needed, lane 0 always generated
+    if ((lane == 0) || EnableVectors) begin : active_lane
+      logic in_valid, out_valid, out_ready; // lane-local handshake
+
+      logic [NUM_OPERANDS-1:0][LANE_WIDTH-1:0] local_operands;  // lane-local oprands
+      logic [LANE_WIDTH-1:0]                   op_result;       // lane-local results
+      fpnew_pkg::status_t                      op_status;
+
+      assign in_valid = in_valid_i & ((lane == 0) | vectorial_op); // upper lanes only for vectors
+
+      // Slice out the operands for this lane, upper bits are ignored in the unit
+      always_comb begin : prepare_input
+        for (int unsigned i = 0; i < NUM_OPERANDS; i++) begin
+          local_operands[i] = operands_i[i] >> LANE*fpnew_pkg::fp_width(src_fmt_i);
+        end
+
+        // override operand 0 for some conversions
+        if (OpGroup == fpnew_pkg::CONV) begin
+          // Source is an integer
+          if (op_i == fpnew_pkg::I2F) begin
+            local_operands[0] = operands_i[0] >> LANE*fpnew_pkg::int_width(int_fmt_i);
+          // vectorial F2F up casts
+          end else if (op_i == fpnew_pkg::F2F) begin
+            if (vectorial_op && op_mod_i && is_up_cast) begin // up cast with upper half
+              local_operands[0] = operands_i[0] >> LANE*fpnew_pkg::fp_width(src_fmt_i) +
+                                                   MAX_FP_WIDTH/2;
+            end
+          // CPK
+          end else if (dst_is_cpk) begin
+            if (lane == 1) begin
+              local_operands[0] = operands_i[1][LANE_WIDTH-1:0]; // using opB as second argument
+            end
+          end
+        end
+      end
+
+      // Instantiate the operation from the selected opgroup
+      if (OpGroup == fpnew_pkg::ADDMUL) begin : lane_instance
+        fpnew_fma_multi #(
+          .FpFmtConfig ( LANE_FORMATS         ),
+          .NumPipeRegs ( NumPipeRegs          ),
+          .PipeConfig  ( PipeConfig           ),
+          .TagType     ( TagType              ),
+          .AuxType     ( logic [AUX_BITS-1:0] )
+        ) i_fpnew_fma_multi (
+          .clk_i,
+          .rst_ni,
+          .operands_i      ( local_operands  ),
+          .is_boxed_i,
+          .rnd_mode_i,
+          .op_i,
+          .op_mod_i,
+          .src_fmt_i,
+          .dst_fmt_i,
+          .tag_i,
+          .aux_i           ( aux_data            ),
+          .in_valid_i      ( in_valid            ),
+          .in_ready_o      ( lane_in_ready[lane] ),
+          .flush_i,
+          .result_o        ( op_result           ),
+          .status_o        ( op_status           ),
+          .extension_bit_o ( lane_ext_bit[lane]  ),
+          .tag_o           ( lane_tags[lane]     ),
+          .aux_o           ( lane_aux[lane]      ),
+          .out_valid_o     ( out_valid           ),
+          .out_ready_i     ( out_ready           ),
+          .busy_o          ( lane_busy[lane]     )
+        );
+
+      end else if (OpGroup == fpnew_pkg::DIVSQRT) begin : lane_instance
+        fpnew_divsqrt_multi #(
+          .FpFmtConfig ( LANE_FORMATS         ),
+          .NumPipeRegs ( NumPipeRegs          ),
+          .PipeConfig  ( PipeConfig           ),
+          .TagType     ( TagType              ),
+          .AuxType     ( logic [AUX_BITS-1:0] )
+        ) i_fpnew_divsqrt_multi (
+          .clk_i,
+          .rst_ni,
+          .operands_i      ( local_operands[1:0] ), // 2 operands
+          .is_boxed_i      ( is_boxed_2op        ), // 2 operands
+          .rnd_mode_i,
+          .op_i,
+          .dst_fmt_i,
+          .tag_i,
+          .aux_i           ( aux_data            ),
+          .in_valid_i      ( in_valid            ),
+          .in_ready_o      ( lane_in_ready[lane] ),
+          .flush_i,
+          .result_o        ( op_result           ),
+          .status_o        ( op_status           ),
+          .extension_bit_o ( lane_ext_bit[lane]  ),
+          .tag_o           ( lane_tags[lane]     ),
+          .aux_o           ( lane_aux[lane]      ),
+          .out_valid_o     ( out_valid           ),
+          .out_ready_i     ( out_ready           ),
+          .busy_o          ( lane_busy[lane]     )
+        );
+      end else if (OpGroup == fpnew_pkg::NONCOMP) begin : lane_instance
+
+      end else if (OpGroup == fpnew_pkg::CONV) begin : lane_instance
+        fpnew_cast_multi #(
+          .FpFmtConfig  ( LANE_FORMATS         ),
+          .IntFmtConfig ( CONV_INT_FORMATS     ),
+          .NumPipeRegs  ( NumPipeRegs          ),
+          .PipeConfig   ( PipeConfig           ),
+          .TagType      ( TagType              ),
+          .AuxType      ( logic [AUX_BITS-1:0] )
+        ) i_fpnew_cast_multi (
+          .clk_i,
+          .rst_ni,
+          .operands_i      ( local_operands[0]   ),
+          .is_boxed_i      ( is_boxed_1op        ),
+          .rnd_mode_i,
+          .op_i,
+          .op_mod_i,
+          .src_fmt_i,
+          .dst_fmt_i,
+          .int_fmt_i,
+          .tag_i,
+          .aux_i           ( aux_data            ),
+          .in_valid_i      ( in_valid            ),
+          .in_ready_o      ( lane_in_ready[lane] ),
+          .flush_i,
+          .result_o        ( op_result           ),
+          .status_o        ( op_status           ),
+          .extension_bit_o ( lane_ext_bit[lane]  ),
+          .tag_o           ( lane_tags[lane]     ),
+          .aux_o           ( lane_aux[lane]      ),
+          .out_valid_o     ( out_valid           ),
+          .out_ready_i     ( out_ready           ),
+          .busy_o          ( lane_busy[lane]     )
+        );
+      end // ADD OTHER OPTIONS HERE
+
+      // Handshakes are only done if the lane is actually used
+      assign out_ready            = out_ready_i & ((lane == 0) | result_is_vector);
+      assign lane_out_valid[lane] = out_valid & ((lane == 0) | result_is_vector);
+
+      // Properly NaN-box or sign-extend the slice result if not in use
+      assign local_result      = lane_out_valid[lane] ? op_result : '{default: lane_ext_bit[0]};
+      assign lane_status[lane] = lane_out_valid[lane] ? op_status : '0;
+
+    // Otherwise generate constant sign-extension
+    end else begin : inactive_lane
+      assign lane_out_valid[lane] = 1'b0; // unused lane
+      assign lane_in_ready[lane]  = 1'b0; // unused lane
+      assign local_result         = '{default: lane_ext_bit[0]}; // sign-extend/nan box
+      assign lane_status[lane]    = '0;
+      assign lane_busy[lane]      = 1'b0;
+    end
+
+    // Generate result packing depending on float format
+    for (genvar fmt = 0; fmt < NUM_FORMATS; fmt++) begin : pack_fp_result
+      // Set up some constants
+      localparam int unsigned FP_WIDTH = fpnew_pkg::fp_width(fpnew_pkg::fp_format_e'(fmt));
+      // only for active formats within the lane
+      if (ACTIVE_FORMATS[fmt])
+        assign fmt_slice_result[fmt][(LANE+1)*FP_WIDTH-1:LANE*FP_WIDTH] =
+            local_result[FP_WIDTH-1:0];
+    end
+
+    // Generate result packing depending on integer format
+    if (OpGroup == fpnew_pkg::CONV) begin : int_results_enabled
+      for (genvar ifmt = 0; ifmt < NUM_INT_FORMATS; ifmt++) begin : pack_int_result
+        // Set up some constants
+        localparam int unsigned INT_WIDTH = fpnew_pkg::int_width(fpnew_pkg::int_format_e'(ifmt));
+        if (ACTIVE_INT_FORMATS[ifmt])
+          assign ifmt_slice_result[ifmt][(LANE+1)*INT_WIDTH-1:LANE*INT_WIDTH] =
+            local_result[INT_WIDTH-1:0];
+      end
+    end
+  end
+
+  // Extend slice result if needed
+  for (genvar fmt = 0; fmt < NUM_FORMATS; fmt++) begin : extend_fp_result
+    // Set up some constants
+    localparam int unsigned FP_WIDTH = fpnew_pkg::fp_width(fpnew_pkg::fp_format_e'(fmt));
+    if (NUM_LANES*FP_WIDTH < Width)
+      assign fmt_slice_result[fmt][Width-1:NUM_LANES*FP_WIDTH] = '{default: lane_ext_bit[0]};
+  end
+
+  // Mute int results if unused
+  for (genvar ifmt = 0; ifmt < NUM_INT_FORMATS; ifmt++) begin : int_results_disabled
+    if (OpGroup != fpnew_pkg::CONV) begin : mute_int_result
+      assign ifmt_slice_result[ifmt] = '0;
+    end
+  end
+
+  // Bypass lanes with target operand for vectorial casts
+  if (OpGroup == fpnew_pkg::CONV) begin : target_regs
+    // Bypass pipeline signals, index i holds signal after i register stages
+    logic [0:NumPipeRegs][Width-1:0] byp_pipe_target_q;
+    logic [0:NumPipeRegs][2:0]       byp_pipe_aux_q;
+    logic [0:NumPipeRegs]            byp_pipe_valid_q;
+    // Ready signal is combinatorial for all stages
+    logic [0:NumPipeRegs] byp_pipe_ready;
+
+    // Input stage: First element of pipeline is taken from inputs
+    assign byp_pipe_target_q[0]  = conv_target_d;
+    assign byp_pipe_aux_q[0]     = target_aux_d;
+    assign byp_pipe_valid_q[0]   = in_valid_i & vectorial_op;
+    // Generate the register stages
+    for (genvar i = 0; i < NumPipeRegs; i++) begin : gen_bypass_pipeline
+      // Internal register enable for this stage
+      logic reg_ena;
+      // Determine the ready signal of the current stage - advance the pipeline:
+      // 1. if the next stage is ready for our data
+      // 2. if the next stage only holds a bubble (not valid) -> we can pop it
+      assign byp_pipe_ready[i] = byp_pipe_ready[i+1] | ~byp_pipe_valid_q[i+1];
+      // Valid: enabled by ready signal, synchronous clear with the flush signal
+      `FFLARNC(byp_pipe_valid_q[i+1], byp_pipe_valid_q[i], byp_pipe_ready[i], flush_i, 1'b0, clk_i, rst_ni)
+      // Enable register if pipleine ready and a valid data item is present
+      assign reg_ena = byp_pipe_ready[i] & byp_pipe_valid_q[i];
+      // Generate the pipeline registers within the stages, use enable-registers
+      `FFL(byp_pipe_target_q[i+1],  byp_pipe_target_q[i],  reg_ena, '0)
+      `FFL(byp_pipe_aux_q[i+1],     byp_pipe_aux_q[i],     reg_ena, '0)
+    end
+    // Output stage: Ready travels backwards from output side, driven by downstream circuitry
+    assign byp_pipe_ready[NumPipeRegs] = out_ready_i & result_is_vector;
+    // Output stage: assign module outputs
+    assign conv_target_q = byp_pipe_target_q[NumPipeRegs];
+
+    // decode the aux data
+    assign {result_vec_op, result_is_cpk} = byp_pipe_aux_q[NumPipeRegs];
+  end else begin : no_conv
+    assign {result_vec_op, result_is_cpk} = '0;
+  end
+
+  // ------------
+  // Output Side
+  // ------------
+  assign {result_fmt_is_int, result_is_vector, result_fmt} = lane_aux[0];
+
+  assign result_o = result_fmt_is_int
+                    ? ifmt_slice_result[result_fmt]
+                    : fmt_slice_result[result_fmt];
+
+  assign extension_bit_o = lane_ext_bit[0]; // don't care about upper ones
+  assign tag_o           = lane_tags[0];    // don't care about upper ones
+  assign busy_o          = (| lane_busy);
+
+  assign out_valid_o     = lane_out_valid[0]; // don't care about upper ones
+
+  // Collapse the status
+  always_comb begin : output_processing
+    // Collapse the status
+    automatic fpnew_pkg::status_t temp_status;
+    temp_status = '0;
+    for (int i = 0; i < int'(NUM_LANES); i++)
+      temp_status |= lane_status[i];
+    status_o = temp_status;
+  end
+endmodule
diff --git a/vendor/pulp-platform/fpnew/src/fpnew_pkg.sv b/vendor/pulp-platform/fpnew/src/fpnew_pkg.sv
new file mode 100644
index 0000000000..6065054fad
--- /dev/null
+++ b/vendor/pulp-platform/fpnew/src/fpnew_pkg.sv
@@ -0,0 +1,484 @@
+// Copyright 2019 ETH Zurich and University of Bologna.
+//
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the "License"); you may not use this file except in
+// compliance with the License. You may obtain a copy of the License at
+// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+
+// Author: Stefan Mach <smach@iis.ee.ethz.ch>
+
+package fpnew_pkg;
+
+  // ---------
+  // FP TYPES
+  // ---------
+  // | Enumerator | Format           | Width  | EXP_BITS | MAN_BITS
+  // |:----------:|------------------|-------:|:--------:|:--------:
+  // | FP32       | IEEE binary32    | 32 bit | 8        | 23
+  // | FP64       | IEEE binary64    | 64 bit | 11       | 52
+  // | FP16       | IEEE binary16    | 16 bit | 5        | 10
+  // | FP8        | binary8          |  8 bit | 5        | 2
+  // | FP16ALT    | binary16alt      | 16 bit | 8        | 7
+  // *NOTE:* Add new formats only at the end of the enumeration for backwards compatibilty!
+
+  // Encoding for a format
+  typedef struct packed {
+    int unsigned exp_bits;
+    int unsigned man_bits;
+  } fp_encoding_t;
+
+  localparam int unsigned NUM_FP_FORMATS = 5; // change me to add formats
+  localparam int unsigned FP_FORMAT_BITS = $clog2(NUM_FP_FORMATS);
+
+  // FP formats
+  typedef enum logic [FP_FORMAT_BITS-1:0] {
+    FP32    = 'd0,
+    FP64    = 'd1,
+    FP16    = 'd2,
+    FP8     = 'd3,
+    FP16ALT = 'd4
+    // add new formats here
+  } fp_format_e;
+
+  // Encodings for supported FP formats
+  localparam fp_encoding_t [0:NUM_FP_FORMATS-1] FP_ENCODINGS  = '{
+    '{8,  23}, // IEEE binary32 (single)
+    '{11, 52}, // IEEE binary64 (double)
+    '{5,  10}, // IEEE binary16 (half)
+    '{5,  2},  // custom binary8
+    '{8,  7}   // custom binary16alt
+    // add new formats here
+  };
+
+  typedef logic [0:NUM_FP_FORMATS-1]       fmt_logic_t;    // Logic indexed by FP format (for masks)
+  typedef logic [0:NUM_FP_FORMATS-1][31:0] fmt_unsigned_t; // Unsigned indexed by FP format
+
+  localparam fmt_logic_t CPK_FORMATS = 5'b11000; // FP32 and FP64 can provide CPK only
+
+  // ---------
+  // INT TYPES
+  // ---------
+  // | Enumerator | Width  |
+  // |:----------:|-------:|
+  // | INT8       |  8 bit |
+  // | INT16      | 16 bit |
+  // | INT32      | 32 bit |
+  // | INT64      | 64 bit |
+  // *NOTE:* Add new formats only at the end of the enumeration for backwards compatibilty!
+
+  localparam int unsigned NUM_INT_FORMATS = 4; // change me to add formats
+  localparam int unsigned INT_FORMAT_BITS = $clog2(NUM_INT_FORMATS);
+
+  // Int formats
+  typedef enum logic [INT_FORMAT_BITS-1:0] {
+    INT8,
+    INT16,
+    INT32,
+    INT64
+    // add new formats here
+  } int_format_e;
+
+  // Returns the width of an INT format by index
+  function automatic int unsigned int_width(int_format_e ifmt);
+    unique case (ifmt)
+      INT8:  return 8;
+      INT16: return 16;
+      INT32: return 32;
+      INT64: return 64;
+    endcase
+  endfunction
+
+  typedef logic [0:NUM_INT_FORMATS-1] ifmt_logic_t; // Logic indexed by INT format (for masks)
+
+  // --------------
+  // FP OPERATIONS
+  // --------------
+  localparam int unsigned NUM_OPGROUPS = 4;
+
+  // Each FP operation belongs to an operation group
+  typedef enum logic [1:0] {
+    ADDMUL, DIVSQRT, NONCOMP, CONV
+  } opgroup_e;
+
+  localparam int unsigned OP_BITS = 4;
+
+  typedef enum logic [OP_BITS-1:0] {
+    FMADD, FNMSUB, ADD, MUL,     // ADDMUL operation group
+    DIV, SQRT,                   // DIVSQRT operation group
+    SGNJ, MINMAX, CMP, CLASSIFY, // NONCOMP operation group
+    F2F, F2I, I2F, CPKAB, CPKCD  // CONV operation group
+  } operation_e;
+
+  // -------------------
+  // RISC-V FP-SPECIFIC
+  // -------------------
+  // Rounding modes
+  typedef enum logic [2:0] {
+    RNE = 3'b000,
+    RTZ = 3'b001,
+    RDN = 3'b010,
+    RUP = 3'b011,
+    RMM = 3'b100,
+    DYN = 3'b111
+  } roundmode_e;
+
+  // Status flags
+  typedef struct packed {
+    logic NV; // Invalid
+    logic DZ; // Divide by zero
+    logic OF; // Overflow
+    logic UF; // Underflow
+    logic NX; // Inexact
+  } status_t;
+
+  // Information about a floating point value
+  typedef struct packed {
+    logic is_normal;     // is the value normal
+    logic is_subnormal;  // is the value subnormal
+    logic is_zero;       // is the value zero
+    logic is_inf;        // is the value infinity
+    logic is_nan;        // is the value NaN
+    logic is_signalling; // is the value a signalling NaN
+    logic is_quiet;      // is the value a quiet NaN
+    logic is_boxed;      // is the value properly NaN-boxed (RISC-V specific)
+  } fp_info_t;
+
+  // Classification mask
+  typedef enum logic [9:0] {
+    NEGINF     = 10'b00_0000_0001,
+    NEGNORM    = 10'b00_0000_0010,
+    NEGSUBNORM = 10'b00_0000_0100,
+    NEGZERO    = 10'b00_0000_1000,
+    POSZERO    = 10'b00_0001_0000,
+    POSSUBNORM = 10'b00_0010_0000,
+    POSNORM    = 10'b00_0100_0000,
+    POSINF     = 10'b00_1000_0000,
+    SNAN       = 10'b01_0000_0000,
+    QNAN       = 10'b10_0000_0000
+  } classmask_e;
+
+  // ------------------
+  // FPU configuration
+  // ------------------
+  // Pipelining registers can be inserted (at elaboration time) into operational units
+  typedef enum logic [1:0] {
+    BEFORE,     // registers are inserted at the inputs of the unit
+    AFTER,      // registers are inserted at the outputs of the unit
+    INSIDE,     // registers are inserted at predetermined (suboptimal) locations in the unit
+    DISTRIBUTED // registers are evenly distributed, INSIDE >= AFTER >= BEFORE
+  } pipe_config_t;
+
+  // Arithmetic units can be arranged in parallel (per format), merged (multi-format) or not at all.
+  typedef enum logic [1:0] {
+    DISABLED, // arithmetic units are not generated
+    PARALLEL, // arithmetic units are generated in prallel slices, one for each format
+    MERGED    // arithmetic units are contained within a merged unit holding multiple formats
+  } unit_type_t;
+
+  // Array of unit types indexed by format
+  typedef unit_type_t [0:NUM_FP_FORMATS-1] fmt_unit_types_t;
+
+  // Array of format-specific unit types by opgroup
+  typedef fmt_unit_types_t [0:NUM_OPGROUPS-1] opgrp_fmt_unit_types_t;
+  // same with unsigned
+  typedef fmt_unsigned_t [0:NUM_OPGROUPS-1] opgrp_fmt_unsigned_t;
+
+  // FPU configuration: features
+  typedef struct packed {
+    int unsigned Width;
+    logic        EnableVectors;
+    logic        EnableNanBox;
+    fmt_logic_t  FpFmtMask;
+    ifmt_logic_t IntFmtMask;
+  } fpu_features_t;
+
+  localparam fpu_features_t RV64D = '{
+    Width:         64,
+    EnableVectors: 1'b0,
+    EnableNanBox:  1'b1,
+    FpFmtMask:     5'b11000,
+    IntFmtMask:    4'b0011
+  };
+
+  localparam fpu_features_t RV32D = '{
+    Width:         64,
+    EnableVectors: 1'b1,
+    EnableNanBox:  1'b1,
+    FpFmtMask:     5'b11000,
+    IntFmtMask:    4'b0010
+  };
+
+  localparam fpu_features_t RV32F = '{
+    Width:         32,
+    EnableVectors: 1'b0,
+    EnableNanBox:  1'b1,
+    FpFmtMask:     5'b10000,
+    IntFmtMask:    4'b0010
+  };
+
+  localparam fpu_features_t RV64D_Xsflt = '{
+    Width:         64,
+    EnableVectors: 1'b1,
+    EnableNanBox:  1'b1,
+    FpFmtMask:     5'b11111,
+    IntFmtMask:    4'b1111
+  };
+
+  localparam fpu_features_t RV32F_Xsflt = '{
+    Width:         32,
+    EnableVectors: 1'b1,
+    EnableNanBox:  1'b1,
+    FpFmtMask:     5'b10111,
+    IntFmtMask:    4'b1110
+  };
+
+  localparam fpu_features_t RV32F_Xf16alt_Xfvec = '{
+    Width:         32,
+    EnableVectors: 1'b1,
+    EnableNanBox:  1'b1,
+    FpFmtMask:     5'b10001,
+    IntFmtMask:    4'b0110
+  };
+
+
+  // FPU configuraion: implementation
+  typedef struct packed {
+    opgrp_fmt_unsigned_t   PipeRegs;
+    opgrp_fmt_unit_types_t UnitTypes;
+    pipe_config_t          PipeConfig;
+  } fpu_implementation_t;
+
+  localparam fpu_implementation_t DEFAULT_NOREGS = '{
+    PipeRegs:   '{default: 0},
+    UnitTypes:  '{'{default: PARALLEL}, // ADDMUL
+                  '{default: MERGED},   // DIVSQRT
+                  '{default: PARALLEL}, // NONCOMP
+                  '{default: MERGED}},  // CONV
+    PipeConfig: BEFORE
+  };
+
+  localparam fpu_implementation_t DEFAULT_SNITCH = '{
+    PipeRegs:   '{default: 1},
+    UnitTypes:  '{'{default: PARALLEL}, // ADDMUL
+                  '{default: DISABLED}, // DIVSQRT
+                  '{default: PARALLEL}, // NONCOMP
+                  '{default: MERGED}},  // CONV
+    PipeConfig: BEFORE
+  };
+
+  // -----------------------
+  // Synthesis optimization
+  // -----------------------
+  localparam logic DONT_CARE = 1'b1; // the value to assign as don't care
+
+  // -------------------------
+  // General helper functions
+  // -------------------------
+  function automatic int minimum(int a, int b);
+    return (a < b) ? a : b;
+  endfunction
+
+  function automatic int maximum(int a, int b);
+    return (a > b) ? a : b;
+  endfunction
+
+  // -------------------------------------------
+  // Helper functions for FP formats and values
+  // -------------------------------------------
+  // Returns the width of a FP format
+  function automatic int unsigned fp_width(fp_format_e fmt);
+    return FP_ENCODINGS[fmt].exp_bits + FP_ENCODINGS[fmt].man_bits + 1;
+  endfunction
+
+  // Returns the widest FP format present
+  function automatic int unsigned max_fp_width(fmt_logic_t cfg);
+    automatic int unsigned res = 0;
+    for (int unsigned i = 0; i < NUM_FP_FORMATS; i++)
+      if (cfg[i])
+        res = unsigned'(maximum(res, fp_width(fp_format_e'(i))));
+    return res;
+  endfunction
+
+  // Returns the narrowest FP format present
+  function automatic int unsigned min_fp_width(fmt_logic_t cfg);
+    automatic int unsigned res = max_fp_width(cfg);
+    for (int unsigned i = 0; i < NUM_FP_FORMATS; i++)
+      if (cfg[i])
+        res = unsigned'(minimum(res, fp_width(fp_format_e'(i))));
+    return res;
+  endfunction
+
+  // Returns the number of expoent bits for a format
+  function automatic int unsigned exp_bits(fp_format_e fmt);
+    return FP_ENCODINGS[fmt].exp_bits;
+  endfunction
+
+  // Returns the number of mantissa bits for a format
+  function automatic int unsigned man_bits(fp_format_e fmt);
+    return FP_ENCODINGS[fmt].man_bits;
+  endfunction
+
+  // Returns the bias value for a given format (as per IEEE 754-2008)
+  function automatic int unsigned bias(fp_format_e fmt);
+    return unsigned'(2**(FP_ENCODINGS[fmt].exp_bits-1)-1); // symmetrical bias
+  endfunction
+
+  function automatic fp_encoding_t super_format(fmt_logic_t cfg);
+    automatic fp_encoding_t res;
+    res = '0;
+    for (int unsigned fmt = 0; fmt < NUM_FP_FORMATS; fmt++)
+      if (cfg[fmt]) begin // only active format
+        res.exp_bits = unsigned'(maximum(res.exp_bits, exp_bits(fp_format_e'(fmt))));
+        res.man_bits = unsigned'(maximum(res.man_bits, man_bits(fp_format_e'(fmt))));
+      end
+    return res;
+  endfunction
+
+  // -------------------------------------------
+  // Helper functions for INT formats and values
+  // -------------------------------------------
+  // Returns the widest INT format present
+  function automatic int unsigned max_int_width(ifmt_logic_t cfg);
+    automatic int unsigned res = 0;
+    for (int ifmt = 0; ifmt < NUM_INT_FORMATS; ifmt++) begin
+      if (cfg[ifmt]) res = maximum(res, int_width(int_format_e'(ifmt)));
+    end
+    return res;
+  endfunction
+
+  // --------------------------------------------------
+  // Helper functions for operations and FPU structure
+  // --------------------------------------------------
+  // Returns the operation group of the given operation
+  function automatic opgroup_e get_opgroup(operation_e op);
+    unique case (op)
+      FMADD, FNMSUB, ADD, MUL:     return ADDMUL;
+      DIV, SQRT:                   return DIVSQRT;
+      SGNJ, MINMAX, CMP, CLASSIFY: return NONCOMP;
+      F2F, F2I, I2F, CPKAB, CPKCD: return CONV;
+      default:                     return NONCOMP;
+    endcase
+  endfunction
+
+  // Returns the number of operands by operation group
+  function automatic int unsigned num_operands(opgroup_e grp);
+    unique case (grp)
+      ADDMUL:  return 3;
+      DIVSQRT: return 2;
+      NONCOMP: return 2;
+      CONV:    return 3; // vectorial casts use 3 operands
+      default: return 0;
+    endcase
+  endfunction
+
+  // Returns the number of lanes according to width, format and vectors
+  function automatic int unsigned num_lanes(int unsigned width, fp_format_e fmt, logic vec);
+    return vec ? width / fp_width(fmt) : 1; // if no vectors, only one lane
+  endfunction
+
+  // Returns the maximum number of lanes in the FPU according to width, format config and vectors
+  function automatic int unsigned max_num_lanes(int unsigned width, fmt_logic_t cfg, logic vec);
+    return vec ? width / min_fp_width(cfg) : 1; // if no vectors, only one lane
+  endfunction
+
+  // Returns a mask of active FP formats that are present in lane lane_no of a multiformat slice
+  function automatic fmt_logic_t get_lane_formats(int unsigned width,
+                                                  fmt_logic_t cfg,
+                                                  int unsigned lane_no);
+    automatic fmt_logic_t res;
+    for (int unsigned fmt = 0; fmt < NUM_FP_FORMATS; fmt++)
+      // Mask active formats with the number of lanes for that format
+      res[fmt] = cfg[fmt] & (width / fp_width(fp_format_e'(fmt)) > lane_no);
+    return res;
+  endfunction
+
+  // Returns a mask of active INT formats that are present in lane lane_no of a multiformat slice
+  function automatic ifmt_logic_t get_lane_int_formats(int unsigned width,
+                                                       fmt_logic_t cfg,
+                                                       ifmt_logic_t icfg,
+                                                       int unsigned lane_no);
+    automatic ifmt_logic_t res;
+    automatic fmt_logic_t lanefmts;
+    res = '0;
+    lanefmts = get_lane_formats(width, cfg, lane_no);
+
+    for (int unsigned ifmt = 0; ifmt < NUM_INT_FORMATS; ifmt++)
+      for (int unsigned fmt = 0; fmt < NUM_FP_FORMATS; fmt++)
+        // Mask active int formats with the width of the float formats
+        if ((fp_width(fp_format_e'(fmt)) == int_width(int_format_e'(ifmt))))
+          res[ifmt] |= icfg[ifmt] && lanefmts[fmt];
+    return res;
+  endfunction
+
+  // Returns a mask of active FP formats that are present in lane lane_no of a CONV slice
+  function automatic fmt_logic_t get_conv_lane_formats(int unsigned width,
+                                                       fmt_logic_t cfg,
+                                                       int unsigned lane_no);
+    automatic fmt_logic_t res;
+    for (int unsigned fmt = 0; fmt < NUM_FP_FORMATS; fmt++)
+      // Mask active formats with the number of lanes for that format, CPK at least twice
+      res[fmt] = cfg[fmt] && ((width / fp_width(fp_format_e'(fmt)) > lane_no) ||
+                             (CPK_FORMATS[fmt] && (lane_no < 2)));
+    return res;
+  endfunction
+
+  // Returns a mask of active INT formats that are present in lane lane_no of a CONV slice
+  function automatic ifmt_logic_t get_conv_lane_int_formats(int unsigned width,
+                                                            fmt_logic_t cfg,
+                                                            ifmt_logic_t icfg,
+                                                            int unsigned lane_no);
+    automatic ifmt_logic_t res;
+    automatic fmt_logic_t lanefmts;
+    res = '0;
+    lanefmts = get_conv_lane_formats(width, cfg, lane_no);
+
+    for (int unsigned ifmt = 0; ifmt < NUM_INT_FORMATS; ifmt++)
+      for (int unsigned fmt = 0; fmt < NUM_FP_FORMATS; fmt++)
+        // Mask active int formats with the width of the float formats
+        res[ifmt] |= icfg[ifmt] && lanefmts[fmt] &&
+                     (fp_width(fp_format_e'(fmt)) == int_width(int_format_e'(ifmt)));
+    return res;
+  endfunction
+
+  // Return whether any active format is set as MERGED
+  function automatic logic any_enabled_multi(fmt_unit_types_t types, fmt_logic_t cfg);
+    for (int unsigned i = 0; i < NUM_FP_FORMATS; i++)
+      if (cfg[i] && types[i] == MERGED)
+        return 1'b1;
+      return 1'b0;
+  endfunction
+
+  // Return whether the given format is the first active one set as MERGED
+  function automatic logic is_first_enabled_multi(fp_format_e fmt,
+                                                  fmt_unit_types_t types,
+                                                  fmt_logic_t cfg);
+    for (int unsigned i = 0; i < NUM_FP_FORMATS; i++) begin
+      if (cfg[i] && types[i] == MERGED) return (fp_format_e'(i) == fmt);
+    end
+    return 1'b0;
+  endfunction
+
+  // Returns the first format that is active and is set as MERGED
+  function automatic fp_format_e get_first_enabled_multi(fmt_unit_types_t types, fmt_logic_t cfg);
+    for (int unsigned i = 0; i < NUM_FP_FORMATS; i++)
+      if (cfg[i] && types[i] == MERGED)
+        return fp_format_e'(i);
+      return fp_format_e'(0);
+  endfunction
+
+  // Returns the largest number of regs that is active and is set as MERGED
+  function automatic int unsigned get_num_regs_multi(fmt_unsigned_t regs,
+                                                     fmt_unit_types_t types,
+                                                     fmt_logic_t cfg);
+    automatic int unsigned res = 0;
+    for (int unsigned i = 0; i < NUM_FP_FORMATS; i++) begin
+      if (cfg[i] && types[i] == MERGED) res = maximum(res, regs[i]);
+    end
+    return res;
+  endfunction
+
+endpackage
diff --git a/vendor/pulp-platform/fpnew/src/fpnew_rounding.sv b/vendor/pulp-platform/fpnew/src/fpnew_rounding.sv
new file mode 100644
index 0000000000..60f63bb702
--- /dev/null
+++ b/vendor/pulp-platform/fpnew/src/fpnew_rounding.sv
@@ -0,0 +1,72 @@
+// Copyright 2019 ETH Zurich and University of Bologna.
+//
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the "License"); you may not use this file except in
+// compliance with the License. You may obtain a copy of the License at
+// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+
+// Author: Stefan Mach <smach@iis.ee.ethz.ch>
+
+module fpnew_rounding #(
+  parameter int unsigned AbsWidth=2 // Width of the abolute value, without sign bit
+) (
+  // Input value
+  input logic [AbsWidth-1:0]   abs_value_i,             // absolute value without sign
+  input logic                  sign_i,
+  // Rounding information
+  input logic [1:0]            round_sticky_bits_i,     // round and sticky bits {RS}
+  input fpnew_pkg::roundmode_e rnd_mode_i,
+  input logic                  effective_subtraction_i, // sign of inputs affects rounding of zeroes
+  // Output value
+  output logic [AbsWidth-1:0]  abs_rounded_o,           // absolute value without sign
+  output logic                 sign_o,
+  // Output classification
+  output logic                 exact_zero_o             // output is an exact zero
+);
+
+  logic round_up; // Rounding decision
+
+  // Take the rounding decision according to RISC-V spec
+  // RoundMode | Mnemonic | Meaning
+  // :--------:|:--------:|:-------
+  //    000    |   RNE    | Round to Nearest, ties to Even
+  //    001    |   RTZ    | Round towards Zero
+  //    010    |   RDN    | Round Down (towards -\infty)
+  //    011    |   RUP    | Round Up (towards \infty)
+  //    100    |   RMM    | Round to Nearest, ties to Max Magnitude
+  //  others   |          | *invalid*
+  always_comb begin : rounding_decision
+    unique case (rnd_mode_i)
+      fpnew_pkg::RNE: // Decide accoring to round/sticky bits
+        unique case (round_sticky_bits_i)
+          2'b00,
+          2'b01: round_up = 1'b0;           // < ulp/2 away, round down
+          2'b10: round_up = abs_value_i[0]; // = ulp/2 away, round towards even result
+          2'b11: round_up = 1'b1;           // > ulp/2 away, round up
+          default: round_up = fpnew_pkg::DONT_CARE;
+        endcase
+      fpnew_pkg::RTZ: round_up = 1'b0; // always round down
+      fpnew_pkg::RDN: round_up = (| round_sticky_bits_i) ? sign_i  : 1'b0; // to 0 if +, away if -
+      fpnew_pkg::RUP: round_up = (| round_sticky_bits_i) ? ~sign_i : 1'b0; // to 0 if -, away if +
+      fpnew_pkg::RMM: round_up = round_sticky_bits_i[1]; // round down if < ulp/2 away, else up
+      default: round_up = fpnew_pkg::DONT_CARE; // propagate x
+    endcase
+  end
+
+  // Perform the rounding, exponent change and overflow to inf happens automagically
+  assign abs_rounded_o = abs_value_i + round_up;
+
+  // True zero result is a zero result without dirty round/sticky bits
+  assign exact_zero_o = (abs_value_i == '0) && (round_sticky_bits_i == '0);
+
+  // In case of effective subtraction (thus signs of addition operands must have differed) and a
+  // true zero result, the result sign is '-' in case of RDN and '+' for other modes.
+  assign sign_o = (exact_zero_o && effective_subtraction_i)
+                  ? (rnd_mode_i == fpnew_pkg::RDN)
+                  : sign_i;
+
+endmodule
diff --git a/vendor/pulp-platform/fpnew/src/fpnew_top.sv b/vendor/pulp-platform/fpnew/src/fpnew_top.sv
new file mode 100644
index 0000000000..581f25fbbf
--- /dev/null
+++ b/vendor/pulp-platform/fpnew/src/fpnew_top.sv
@@ -0,0 +1,172 @@
+// Copyright 2019 ETH Zurich and University of Bologna.
+//
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the "License"); you may not use this file except in
+// compliance with the License. You may obtain a copy of the License at
+// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+
+// Author: Stefan Mach <smach@iis.ee.ethz.ch>
+
+module fpnew_top #(
+  // FPU configuration
+  parameter fpnew_pkg::fpu_features_t       Features       = fpnew_pkg::RV64D_Xsflt,
+  parameter fpnew_pkg::fpu_implementation_t Implementation = fpnew_pkg::DEFAULT_NOREGS,
+  parameter type                            TagType        = logic,
+  // Do not change
+  localparam int unsigned WIDTH        = Features.Width,
+  localparam int unsigned NUM_OPERANDS = 3
+) (
+  input logic                               clk_i,
+  input logic                               rst_ni,
+  // Input signals
+  input logic [NUM_OPERANDS-1:0][WIDTH-1:0] operands_i,
+  input fpnew_pkg::roundmode_e              rnd_mode_i,
+  input fpnew_pkg::operation_e              op_i,
+  input logic                               op_mod_i,
+  input fpnew_pkg::fp_format_e              src_fmt_i,
+  input fpnew_pkg::fp_format_e              dst_fmt_i,
+  input fpnew_pkg::int_format_e             int_fmt_i,
+  input logic                               vectorial_op_i,
+  input TagType                             tag_i,
+  // Input Handshake
+  input  logic                              in_valid_i,
+  output logic                              in_ready_o,
+  input  logic                              flush_i,
+  // Output signals
+  output logic [WIDTH-1:0]                  result_o,
+  output fpnew_pkg::status_t                status_o,
+  output TagType                            tag_o,
+  // Output handshake
+  output logic                              out_valid_o,
+  input  logic                              out_ready_i,
+  // Indication of valid data in flight
+  output logic                              busy_o
+);
+
+  localparam int unsigned NUM_OPGROUPS = fpnew_pkg::NUM_OPGROUPS;
+  localparam int unsigned NUM_FORMATS  = fpnew_pkg::NUM_FP_FORMATS;
+
+  // ----------------
+  // Type Definition
+  // ----------------
+  typedef struct packed {
+    logic [WIDTH-1:0]   result;
+    fpnew_pkg::status_t status;
+    TagType             tag;
+  } output_t;
+
+  // Handshake signals for the blocks
+  logic [NUM_OPGROUPS-1:0] opgrp_in_ready, opgrp_out_valid, opgrp_out_ready, opgrp_ext, opgrp_busy;
+  output_t [NUM_OPGROUPS-1:0] opgrp_outputs;
+
+  logic [NUM_FORMATS-1:0][NUM_OPERANDS-1:0] is_boxed;
+
+  // -----------
+  // Input Side
+  // -----------
+  assign in_ready_o = in_valid_i & opgrp_in_ready[fpnew_pkg::get_opgroup(op_i)];
+
+  // NaN-boxing check
+  for (genvar fmt = 0; fmt < int'(NUM_FORMATS); fmt++) begin : gen_nanbox_check
+    localparam int unsigned FP_WIDTH = fpnew_pkg::fp_width(fpnew_pkg::fp_format_e'(fmt));
+    // NaN boxing is only generated if it's enabled and needed
+    if (Features.EnableNanBox && (FP_WIDTH < WIDTH)) begin : check
+      for (genvar op = 0; op < int'(NUM_OPERANDS); op++) begin : operands
+        assign is_boxed[fmt][op] = (!vectorial_op_i)
+                                   ? operands_i[op][WIDTH-1:FP_WIDTH] == '1
+                                   : 1'b1;
+      end
+    end else begin : no_check
+      assign is_boxed[fmt] = '1;
+    end
+  end
+
+  // -------------------------
+  // Generate Operation Blocks
+  // -------------------------
+  for (genvar opgrp = 0; opgrp < int'(NUM_OPGROUPS); opgrp++) begin : gen_operation_groups
+    localparam int unsigned NUM_OPS = fpnew_pkg::num_operands(fpnew_pkg::opgroup_e'(opgrp));
+
+    logic in_valid;
+    logic [NUM_FORMATS-1:0][NUM_OPS-1:0] input_boxed;
+
+    assign in_valid = in_valid_i & (fpnew_pkg::get_opgroup(op_i) == fpnew_pkg::opgroup_e'(opgrp));
+
+    // slice out input boxing
+    always_comb begin : slice_inputs
+      for (int unsigned fmt = 0; fmt < NUM_FORMATS; fmt++)
+        input_boxed[fmt] = is_boxed[fmt][NUM_OPS-1:0];
+    end
+
+    fpnew_opgroup_block #(
+      .OpGroup       ( fpnew_pkg::opgroup_e'(opgrp)    ),
+      .Width         ( WIDTH                           ),
+      .EnableVectors ( Features.EnableVectors          ),
+      .FpFmtMask     ( Features.FpFmtMask              ),
+      .IntFmtMask    ( Features.IntFmtMask             ),
+      .FmtPipeRegs   ( Implementation.PipeRegs[opgrp]  ),
+      .FmtUnitTypes  ( Implementation.UnitTypes[opgrp] ),
+      .PipeConfig    ( Implementation.PipeConfig       ),
+      .TagType       ( TagType                         )
+    ) i_opgroup_block (
+      .clk_i,
+      .rst_ni,
+      .operands_i      ( operands_i[NUM_OPS-1:0] ),
+      .is_boxed_i      ( input_boxed             ),
+      .rnd_mode_i,
+      .op_i,
+      .op_mod_i,
+      .src_fmt_i,
+      .dst_fmt_i,
+      .int_fmt_i,
+      .vectorial_op_i,
+      .tag_i,
+      .in_valid_i      ( in_valid              ),
+      .in_ready_o      ( opgrp_in_ready[opgrp] ),
+      .flush_i,
+      .result_o        ( opgrp_outputs[opgrp].result ),
+      .status_o        ( opgrp_outputs[opgrp].status ),
+      .extension_bit_o ( opgrp_ext[opgrp]            ),
+      .tag_o           ( opgrp_outputs[opgrp].tag    ),
+      .out_valid_o     ( opgrp_out_valid[opgrp]      ),
+      .out_ready_i     ( opgrp_out_ready[opgrp]      ),
+      .busy_o          ( opgrp_busy[opgrp]           )
+    );
+  end
+
+  // ------------------
+  // Arbitrate Outputs
+  // ------------------
+  output_t arbiter_output;
+
+  // Round-Robin arbiter to decide which result to use
+  rr_arb_tree #(
+    .NumIn     ( NUM_OPGROUPS ),
+    .DataType  ( output_t     ),
+    .AxiVldRdy ( 1'b1         )
+  ) i_arbiter (
+    .clk_i,
+    .rst_ni,
+    .flush_i,
+    .rr_i   ( '0             ),
+    .req_i  ( opgrp_out_valid ),
+    .gnt_o  ( opgrp_out_ready ),
+    .data_i ( opgrp_outputs   ),
+    .gnt_i  ( out_ready_i     ),
+    .req_o  ( out_valid_o     ),
+    .data_o ( arbiter_output  ),
+    .idx_o  ( /* unused */    )
+  );
+
+  // Unpack output
+  assign result_o        = arbiter_output.result;
+  assign status_o        = arbiter_output.status;
+  assign tag_o           = arbiter_output.tag;
+
+  assign busy_o = (| opgrp_busy);
+
+endmodule
diff --git a/vendor/pulp-platform/fpnew/src/fpu_div_sqrt_mvp/LICENSE b/vendor/pulp-platform/fpnew/src/fpu_div_sqrt_mvp/LICENSE
new file mode 100644
index 0000000000..18e4f67692
--- /dev/null
+++ b/vendor/pulp-platform/fpnew/src/fpu_div_sqrt_mvp/LICENSE
@@ -0,0 +1,176 @@
+SOLDERPAD HARDWARE LICENSE version 0.51
+
+This license is based closely on the Apache License Version 2.0, but is not
+approved or endorsed by the Apache Foundation. A copy of the non-modified
+Apache License 2.0 can be found at http://www.apache.org/licenses/LICENSE-2.0.
+
+As this license is not currently OSI or FSF approved, the Licensor permits any
+Work licensed under this License, at the option of the Licensee, to be treated
+as licensed under the Apache License Version 2.0 (which is so approved).
+
+This License is licensed under the terms of this License and in particular
+clause 7 below (Disclaimer of Warranties) applies in relation to its use.
+
+TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+1. Definitions.
+
+"License" shall mean the terms and conditions for use, reproduction, and
+distribution as defined by Sections 1 through 9 of this document.
+
+"Licensor" shall mean the Rights owner or entity authorized by the Rights owner
+that is granting the License.
+
+"Legal Entity" shall mean the union of the acting entity and all other entities
+that control, are controlled by, or are under common control with that entity.
+For the purposes of this definition, "control" means (i) the power, direct or
+indirect, to cause the direction or management of such entity, whether by
+contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the
+outstanding shares, or (iii) beneficial ownership of such entity.
+
+"You" (or "Your") shall mean an individual or Legal Entity exercising
+permissions granted by this License.
+
+"Rights" means copyright and any similar right including design right (whether
+registered or unregistered), semiconductor topography (mask) rights and
+database rights (but excluding Patents and Trademarks).
+
+"Source" form shall mean the preferred form for making modifications, including
+but not limited to source code, net lists, board layouts, CAD files,
+documentation source, and configuration files.
+
+"Object" form shall mean any form resulting from mechanical transformation or
+translation of a Source form, including but not limited to compiled object
+code, generated documentation, the instantiation of a hardware design and
+conversions to other media types, including intermediate forms such as
+bytecodes, FPGA bitstreams, artwork and semiconductor topographies (mask
+works).
+
+"Work" shall mean the work of authorship, whether in Source form or other
+Object form, made available under the License, as indicated by a Rights notice
+that is included in or attached to the work (an example is provided in the
+Appendix below).
+
+"Derivative Works" shall mean any work, whether in Source or Object form, that
+is based on (or derived from) the Work and for which the editorial revisions,
+annotations, elaborations, or other modifications represent, as a whole, an
+original work of authorship. For the purposes of this License, Derivative Works
+shall not include works that remain separable from, or merely link (or bind by
+name) or physically connect to or interoperate with the interfaces of, the Work
+and Derivative Works thereof.
+
+"Contribution" shall mean any design or work of authorship, including the
+original version of the Work and any modifications or additions to that Work or
+Derivative Works thereof, that is intentionally submitted to Licensor for
+inclusion in the Work by the Rights owner or by an individual or Legal Entity
+authorized to submit on behalf of the Rights owner. For the purposes of this
+definition, "submitted" means any form of electronic, verbal, or written
+communication sent to the Licensor or its representatives, including but not
+limited to communication on electronic mailing lists, source code control
+systems, and issue tracking systems that are managed by, or on behalf of, the
+Licensor for the purpose of discussing and improving the Work, but excluding
+communication that is conspicuously marked or otherwise designated in writing
+by the Rights owner as "Not a Contribution."
+
+"Contributor" shall mean Licensor and any individual or Legal Entity on behalf
+of whom a Contribution has been received by Licensor and subsequently
+incorporated within the Work.
+
+2. Grant of License. Subject to the terms and conditions of this License, each
+Contributor hereby grants to You a perpetual, worldwide, non-exclusive,
+no-charge, royalty-free, irrevocable license under the Rights to reproduce,
+prepare Derivative Works of, publicly display, publicly perform, sublicense,
+and distribute the Work and such Derivative Works in Source or Object form and
+do anything in relation to the Work as if the Rights did not exist.
+
+3. Grant of Patent License. Subject to the terms and conditions of this
+License, each Contributor hereby grants to You a perpetual, worldwide,
+non-exclusive, no-charge, royalty-free, irrevocable (except as stated in this
+section) patent license to make, have made, use, offer to sell, sell, import,
+and otherwise transfer the Work, where such license applies only to those
+patent claims licensable by such Contributor that are necessarily infringed by
+their Contribution(s) alone or by combination of their Contribution(s) with the
+Work to which such Contribution(s) was submitted. If You institute patent
+litigation against any entity (including a cross-claim or counterclaim in a
+lawsuit) alleging that the Work or a Contribution incorporated within the Work
+constitutes direct or contributory patent infringement, then any patent
+licenses granted to You under this License for that Work shall terminate as of
+the date such litigation is filed.
+
+4. Redistribution. You may reproduce and distribute copies of the Work or
+Derivative Works thereof in any medium, with or without modifications, and in
+Source or Object form, provided that You meet the following conditions:
+
+    You must give any other recipients of the Work or Derivative Works a copy
+    of this License; and
+
+    You must cause any modified files to carry prominent notices stating that
+    You changed the files; and
+
+    You must retain, in the Source form of any Derivative Works that You
+    distribute, all copyright, patent, trademark, and attribution notices from
+    the Source form of the Work, excluding those notices that do not pertain to
+    any part of the Derivative Works; and
+
+    If the Work includes a "NOTICE" text file as part of its distribution, then
+    any Derivative Works that You distribute must include a readable copy of
+    the attribution notices contained within such NOTICE file, excluding those
+    notices that do not pertain to any part of the Derivative Works, in at
+    least one of the following places: within a NOTICE text file distributed as
+    part of the Derivative Works; within the Source form or documentation, if
+    provided along with the Derivative Works; or, within a display generated by
+    the Derivative Works, if and wherever such third-party notices normally
+    appear. The contents of the NOTICE file are for informational purposes only
+    and do not modify the License. You may add Your own attribution notices
+    within Derivative Works that You distribute, alongside or as an addendum to
+    the NOTICE text from the Work, provided that such additional attribution
+    notices cannot be construed as modifying the License. You may add Your own
+    copyright statement to Your modifications and may provide additional or
+    different license terms and conditions for use, reproduction, or
+    distribution of Your modifications, or for any such Derivative Works as a
+    whole, provided Your use, reproduction, and distribution of the Work
+    otherwise complies with the conditions stated in this License.
+
+5. Submission of Contributions. Unless You explicitly state otherwise, any
+Contribution intentionally submitted for inclusion in the Work by You to the
+Licensor shall be under the terms and conditions of this License, without any
+additional terms or conditions. Notwithstanding the above, nothing herein shall
+supersede or modify the terms of any separate license agreement you may have
+executed with Licensor regarding such Contributions.
+
+6. Trademarks. This License does not grant permission to use the trade names,
+trademarks, service marks, or product names of the Licensor, except as required
+for reasonable and customary use in describing the origin of the Work and
+reproducing the content of the NOTICE file.
+
+7. Disclaimer of Warranty. Unless required by applicable law or agreed to in
+writing, Licensor provides the Work (and each Contributor provides its
+Contributions) on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+KIND, either express or implied, including, without limitation, any warranties
+or conditions of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+PARTICULAR PURPOSE. You are solely responsible for determining the
+appropriateness of using or redistributing the Work and assume any risks
+associated with Your exercise of permissions under this License.
+
+8. Limitation of Liability. In no event and under no legal theory, whether in
+tort (including negligence), contract, or otherwise, unless required by
+applicable law (such as deliberate and grossly negligent acts) or agreed to in
+writing, shall any Contributor be liable to You for damages, including any
+direct, indirect, special, incidental, or consequential damages of any
+character arising as a result of this License or out of the use or inability to
+use the Work (including but not limited to damages for loss of goodwill, work
+stoppage, computer failure or malfunction, or any and all other commercial
+damages or losses), even if such Contributor has been advised of the
+possibility of such damages.
+
+9. Accepting Warranty or Additional Liability. While redistributing the Work or
+Derivative Works thereof, You may choose to offer, and charge a fee for,
+acceptance of support, warranty, indemnity, or other liability obligations
+and/or rights consistent with this License. However, in accepting such
+obligations, You may act only on Your own behalf and on Your sole
+responsibility, not on behalf of any other Contributor, and only if You agree
+to indemnify, defend, and hold each Contributor harmless for any liability
+incurred by, or claims asserted against, such Contributor by reason of your
+accepting any such warranty or additional liability.
+
+END OF TERMS AND CONDITIONS
diff --git a/vendor/pulp-platform/fpnew/src/fpu_div_sqrt_mvp/hdl/.gitignore b/vendor/pulp-platform/fpnew/src/fpu_div_sqrt_mvp/hdl/.gitignore
new file mode 100644
index 0000000000..5c405f7b58
--- /dev/null
+++ b/vendor/pulp-platform/fpnew/src/fpu_div_sqrt_mvp/hdl/.gitignore
@@ -0,0 +1,2 @@
+*~
+*/*~
\ No newline at end of file
diff --git a/vendor/pulp-platform/fpnew/src/fpu_div_sqrt_mvp/hdl/control_mvp.sv b/vendor/pulp-platform/fpnew/src/fpu_div_sqrt_mvp/hdl/control_mvp.sv
new file mode 100644
index 0000000000..bda9c01fb7
--- /dev/null
+++ b/vendor/pulp-platform/fpnew/src/fpu_div_sqrt_mvp/hdl/control_mvp.sv
@@ -0,0 +1,3413 @@
+// Copyright 2018 ETH Zurich and University of Bologna.
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the “License”); you may not use this file except in
+// compliance with the License.  You may obtain a copy of the License at
+// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+
+////////////////////////////////////////////////////////////////////////////////
+// Company:        IIS @ ETHZ - Federal Institute of Technology               //
+//                                                                            //
+// Engineers:      Lei Li                    lile@iis.ee.ethz.ch              //
+//                                                                            //
+// Additional contributions by:                                               //
+//                                                                            //
+//                                                                            //
+//                                                                            //
+// Create Date:    04/03/2018                                                 //
+// Design Name:    FPU                                                        //
+// Module Name:    control_mvp.sv                                             //
+// Project Name:   Private FPU                                                //
+// Language:       SystemVerilog                                              //
+//                                                                            //
+// Description:    the control logic  of div and sqrt                         //
+//                                                                            //
+// Revision Date:  12/04/2018                                                 //
+//                 Lei Li                                                     //
+//                 To address some requirements by Stefan and add low power   //
+//                 control for special cases                                  //
+// Revision Date:  13/04/2018                                                 //
+//                 Lei Li                                                     //
+//                 To fix some bug found in Control FSM                       //
+//                 when Iteration_unit_num_S  = 2'b10                         //
+//                                                                            //
+//                                                                            //
+//                                                                            //
+////////////////////////////////////////////////////////////////////////////////
+
+import defs_div_sqrt_mvp::*;
+
+module control_mvp
+
+  (//Input
+   input logic                                        Clk_CI,
+   input logic                                        Rst_RBI,
+   input logic                                        Div_start_SI ,
+   input logic                                        Sqrt_start_SI,
+   input logic                                        Start_SI,
+   input logic                                        Kill_SI,
+   input logic                                        Special_case_SBI,
+   input logic                                        Special_case_dly_SBI,
+   input logic [C_PC-1:0]                             Precision_ctl_SI,
+   input logic [1:0]                                  Format_sel_SI,
+   input logic [C_MANT_FP64:0]                        Numerator_DI,
+   input logic [C_EXP_FP64:0]                         Exp_num_DI,
+   input logic [C_MANT_FP64:0]                        Denominator_DI,
+   input logic [C_EXP_FP64:0]                         Exp_den_DI,
+
+
+   output logic                                       Div_start_dly_SO ,
+   output logic                                       Sqrt_start_dly_SO,
+   output logic                                       Div_enable_SO,
+   output logic                                       Sqrt_enable_SO,
+
+
+   //To next stage
+   output logic                                       Full_precision_SO,
+   output logic                                       FP32_SO,
+   output logic                                       FP64_SO,
+   output logic                                       FP16_SO,
+   output logic                                       FP16ALT_SO,
+
+   output logic                                       Ready_SO,
+   output logic                                       Done_SO,
+
+   output logic [C_MANT_FP64+4:0]                     Mant_result_prenorm_DO,
+ //  output logic [3:0]                                 Round_bit_DO,
+   output logic [C_EXP_FP64+1:0]                      Exp_result_prenorm_DO
+ );
+
+   logic  [C_MANT_FP64+1+4:0]                         Partial_remainder_DN,Partial_remainder_DP; //58bits,r=q+2
+   logic  [C_MANT_FP64+4:0]                           Quotient_DP; //57bits
+   /////////////////////////////////////////////////////////////////////////////
+   // Assign Inputs                                                          //
+   /////////////////////////////////////////////////////////////////////////////
+   logic [C_MANT_FP64+1:0]                            Numerator_se_D;  //sign extension and hidden bit
+   logic [C_MANT_FP64+1:0]                            Denominator_se_D; //signa extension and hidden bit
+   logic [C_MANT_FP64+1:0]                            Denominator_se_DB;  //1's complement
+
+   assign  Numerator_se_D={1'b0,Numerator_DI};
+
+   assign  Denominator_se_D={1'b0,Denominator_DI};
+
+  always_comb
+   begin
+     if(FP32_SO)
+       begin
+         Denominator_se_DB={~Denominator_se_D[C_MANT_FP64+1:C_MANT_FP64-C_MANT_FP32], {(C_MANT_FP64-C_MANT_FP32){1'b0}} };
+       end
+     else if(FP64_SO) begin
+         Denominator_se_DB=~Denominator_se_D;
+     end
+     else if(FP16_SO) begin
+         Denominator_se_DB={~Denominator_se_D[C_MANT_FP64+1:C_MANT_FP64-C_MANT_FP16], {(C_MANT_FP64-C_MANT_FP16){1'b0}} };
+     end
+     else begin
+         Denominator_se_DB={~Denominator_se_D[C_MANT_FP64+1:C_MANT_FP64-C_MANT_FP16ALT], {(C_MANT_FP64-C_MANT_FP16ALT){1'b0}} };
+     end
+   end
+
+
+   logic [C_MANT_FP64+1:0]                            Mant_D_sqrt_Norm;
+
+   assign Mant_D_sqrt_Norm=Exp_num_DI[0]?{1'b0,Numerator_DI}:{Numerator_DI,1'b0}; //for sqrt
+
+   /////////////////////////////////////////////////////////////////////////////
+   // Format Selection                                                       //
+   /////////////////////////////////////////////////////////////////////////////
+   logic [1:0]                                      Format_sel_S;
+
+   always_ff @(posedge Clk_CI, negedge Rst_RBI)
+     begin
+        if(~Rst_RBI)
+          begin
+            Format_sel_S<='b0;
+          end
+        else if(Start_SI&&Ready_SO)
+          begin
+            Format_sel_S<=Format_sel_SI;
+          end
+        else
+          begin
+            Format_sel_S<=Format_sel_S;
+          end
+    end
+
+   assign FP32_SO = (Format_sel_S==2'b00);
+   assign FP64_SO = (Format_sel_S==2'b01);
+   assign FP16_SO = (Format_sel_S==2'b10);
+   assign FP16ALT_SO = (Format_sel_S==2'b11);
+
+
+
+   /////////////////////////////////////////////////////////////////////////////
+   // Precision Control                                                       //
+   /////////////////////////////////////////////////////////////////////////////
+
+   logic [C_PC-1:0]                                   Precision_ctl_S;
+   always_ff @(posedge Clk_CI, negedge Rst_RBI)
+     begin
+        if(~Rst_RBI)
+          begin
+            Precision_ctl_S<='b0;
+          end
+        else if(Start_SI&&Ready_SO)
+          begin
+            Precision_ctl_S<=Precision_ctl_SI;
+          end
+        else
+          begin
+            Precision_ctl_S<=Precision_ctl_S;
+          end
+    end
+  assign Full_precision_SO = (Precision_ctl_S==6'h00);
+
+
+
+     logic [5:0]                                     State_ctl_S;
+     logic [5:0]                                     State_Two_iteration_unit_S;
+     logic [5:0]                                     State_Four_iteration_unit_S;
+
+    assign State_Two_iteration_unit_S = Precision_ctl_S[C_PC-1:1];  //Two iteration units
+    assign State_Four_iteration_unit_S = Precision_ctl_S[C_PC-1:2];  //Four iteration units
+     always_comb
+       begin
+         case(Iteration_unit_num_S)
+//////////////////////one iteration unit, start///////////////////////////////////////
+           2'b00:  //one iteration unit
+             begin
+               case(Format_sel_S)
+                 2'b00: //FP32
+                   begin
+                     if(Full_precision_SO)
+                       begin
+                         State_ctl_S = 6'h1b;  //24+4 more iterations for rounding bits
+                       end
+                     else
+                       begin
+                         State_ctl_S = Precision_ctl_S;
+                       end
+                   end
+                 2'b01: //FP64
+                   begin
+                     if(Full_precision_SO)
+                       begin
+                         State_ctl_S = 6'h38;  //53+4 more iterations for rounding bits
+                       end
+                     else
+                       begin
+                         State_ctl_S = Precision_ctl_S;
+                       end
+                   end
+                 2'b10: //FP16
+                   begin
+                     if(Full_precision_SO)
+                       begin
+                         State_ctl_S = 6'h0e;  //11+4 more iterations for rounding bits
+                       end
+                     else
+                       begin
+                         State_ctl_S = Precision_ctl_S;
+                       end
+                   end
+                 2'b11: //FP16ALT
+                   begin
+                     if(Full_precision_SO)
+                       begin
+                         State_ctl_S = 6'h0b;  //8+4 more iterations for rounding bits
+                       end
+                     else
+                       begin
+                         State_ctl_S = Precision_ctl_S;
+                       end
+                  end
+                endcase
+              end
+//////////////////////one iteration unit, end///////////////////////////////////////
+
+//////////////////////two iteration units, start///////////////////////////////////////
+           2'b01:  //two iteration units
+             begin
+               case(Format_sel_S)
+                 2'b00: //FP32
+                   begin
+                     if(Full_precision_SO)
+                       begin
+                         State_ctl_S = 6'h0d;  //24+4 more iterations for rounding bits
+                       end
+                     else
+                       begin
+                         State_ctl_S = State_Two_iteration_unit_S;
+                       end
+                   end
+                 2'b01: //FP64
+                   begin
+                     if(Full_precision_SO)
+                       begin
+                         State_ctl_S = 6'h1b;  //53+3 more iterations for rounding bits
+                       end
+                     else
+                       begin
+                         State_ctl_S = State_Two_iteration_unit_S;
+                       end
+                   end
+                 2'b10: //FP16
+                   begin
+                     if(Full_precision_SO)
+                       begin
+                         State_ctl_S = 6'h06;  //11+3 more iterations for rounding bits
+                       end
+                     else
+                       begin
+                         State_ctl_S = State_Two_iteration_unit_S;
+                       end
+                   end
+                 2'b11: //FP16ALT
+                   begin
+                     if(Full_precision_SO)
+                       begin
+                         State_ctl_S = 6'h05;  //8+4 more iterations for rounding bits
+                       end
+                     else
+                       begin
+                         State_ctl_S = State_Two_iteration_unit_S;
+                       end
+                  end
+                endcase
+              end
+//////////////////////two iteration units, end///////////////////////////////////////
+
+//////////////////////three iteration units, start///////////////////////////////////////
+           2'b10:  //three iteration units
+             begin
+               case(Format_sel_S)
+                 2'b00: //FP32
+                   begin
+                     case(Precision_ctl_S)
+                       6'h00:
+                         begin
+                           State_ctl_S = 6'h08;  //24+3 more iterations for rounding bits
+                         end
+                       6'h06,6'h07,6'h08:
+                         begin
+                           State_ctl_S = 6'h02;
+                         end
+                       6'h09,6'h0a,6'h0b:
+                         begin
+                           State_ctl_S = 6'h03;
+                         end
+                       6'h0c,6'h0d,6'h0e:
+                         begin
+                           State_ctl_S = 6'h04;
+                         end
+                       6'h0f,6'h10,6'h11:
+                         begin
+                           State_ctl_S = 6'h05;
+                         end
+                       6'h12,6'h13,6'h14:
+                         begin
+                           State_ctl_S = 6'h06;
+                         end
+                       6'h15,6'h16,6'h17:
+                         begin
+                           State_ctl_S = 6'h07;
+                         end
+                       default:
+                         begin
+                           State_ctl_S = 6'h08;  //24+3 more iterations for rounding bits
+                         end
+                     endcase
+                   end
+                 2'b01: //FP64
+                   begin
+                     case(Precision_ctl_S)
+                       6'h00:
+                         begin
+                           State_ctl_S = 6'h12;  //53+4 more iterations for rounding bits
+                         end
+                       6'h06,6'h07,6'h08:
+                         begin
+                           State_ctl_S = 6'h02;
+                         end
+                       6'h09,6'h0a,6'h0b:
+                         begin
+                           State_ctl_S = 6'h03;
+                         end
+                       6'h0c,6'h0d,6'h0e:
+                         begin
+                           State_ctl_S = 6'h04;
+                         end
+                       6'h0f,6'h10,6'h11:
+                         begin
+                           State_ctl_S = 6'h05;
+                         end
+                       6'h12,6'h13,6'h14:
+                         begin
+                           State_ctl_S = 6'h06;
+                         end
+                       6'h15,6'h16,6'h17:
+                         begin
+                           State_ctl_S = 6'h07;
+                         end
+                       6'h18,6'h19,6'h1a:
+                         begin
+                           State_ctl_S = 6'h08;
+                         end
+                       6'h1b,6'h1c,6'h1d:
+                         begin
+                           State_ctl_S = 6'h09;
+                         end
+                       6'h1e,6'h1f,6'h20:
+                         begin
+                           State_ctl_S = 6'h0a;
+                         end
+                       6'h21,6'h22,6'h23:
+                         begin
+                           State_ctl_S = 6'h0b;
+                         end
+                       6'h24,6'h25,6'h26:
+                         begin
+                           State_ctl_S = 6'h0c;
+                         end
+                       6'h27,6'h28,6'h29:
+                         begin
+                           State_ctl_S = 6'h0d;
+                         end
+                       6'h2a,6'h2b,6'h2c:
+                         begin
+                           State_ctl_S = 6'h0e;
+                         end
+                       6'h2d,6'h2e,6'h2f:
+                         begin
+                           State_ctl_S = 6'h0f;
+                         end
+                       6'h30,6'h31,6'h32:
+                         begin
+                           State_ctl_S = 6'h10;
+                         end
+                       6'h33,6'h34,6'h35:
+                         begin
+                           State_ctl_S = 6'h11;
+                         end
+                       default:
+                         begin
+                           State_ctl_S = 6'h12;  //53+4 more iterations for rounding bits
+                         end
+                     endcase
+                   end
+                 2'b10: //FP16
+                   begin
+                     case(Precision_ctl_S)
+                       6'h00:
+                         begin
+                           State_ctl_S = 6'h04;  //12+3 more iterations for rounding bits
+                         end
+                       6'h06,6'h07,6'h08:
+                         begin
+                           State_ctl_S = 6'h02;
+                         end
+                       6'h09,6'h0a,6'h0b:
+                         begin
+                           State_ctl_S = 6'h03;
+                         end
+                       default:
+                         begin
+                           State_ctl_S = 6'h04;  //12+3 more iterations for rounding bits
+                         end
+                     endcase
+                   end
+                 2'b11: //FP16ALT
+                   begin
+                     case(Precision_ctl_S)
+                       6'h00:
+                         begin
+                           State_ctl_S = 6'h03;  //8+4 more iterations for rounding bits
+                         end
+                       6'h06,6'h07,6'h08:
+                         begin
+                           State_ctl_S = 6'h02;
+                         end
+                       default:
+                         begin
+                           State_ctl_S = 6'h03;  //8+4 more iterations for rounding bits
+                         end
+                     endcase
+                  end
+                endcase
+              end
+//////////////////////three iteration units, end///////////////////////////////////////
+
+//////////////////////four iteration units, start///////////////////////////////////////
+           2'b11:  //four iteration units
+             begin
+               case(Format_sel_S)
+                 2'b00: //FP32
+                   begin
+                     if(Full_precision_SO)
+                       begin
+                         State_ctl_S = 6'h06;  //24+4 more iterations for rounding bits
+                       end
+                     else
+                       begin
+                         State_ctl_S = State_Four_iteration_unit_S;
+                       end
+                   end
+                 2'b01: //FP64
+                   begin
+                     if(Full_precision_SO)
+                       begin
+                         State_ctl_S = 6'h0d;  //53+3 more iterations for rounding bits
+                       end
+                     else
+                       begin
+                         State_ctl_S = State_Four_iteration_unit_S;
+                       end
+                   end
+                 2'b10: //FP16
+                   begin
+                     if(Full_precision_SO)
+                       begin
+                         State_ctl_S = 6'h03;  //11+4 more iterations for rounding bits
+                       end
+                     else
+                       begin
+                         State_ctl_S = State_Four_iteration_unit_S;
+                       end
+                   end
+                 2'b11: //FP16ALT
+                   begin
+                     if(Full_precision_SO)
+                       begin
+                         State_ctl_S = 6'h02;  //8+4 more iterations for rounding bits
+                       end
+                     else
+                       begin
+                         State_ctl_S = State_Four_iteration_unit_S;
+                       end
+                  end
+                endcase
+              end
+//////////////////////four iteration units, end///////////////////////////////////////
+
+           endcase
+        end
+
+
+   /////////////////////////////////////////////////////////////////////////////
+   // control logic                                                           //
+   /////////////////////////////////////////////////////////////////////////////
+
+   logic                                               Div_start_dly_S;
+
+   always_ff @(posedge Clk_CI, negedge Rst_RBI)   //  generate Div_start_dly_S signal
+     begin
+        if(~Rst_RBI)
+          begin
+            Div_start_dly_S<=1'b0;
+          end
+        else if(Div_start_SI&&Ready_SO)
+         begin
+           Div_start_dly_S<=1'b1;
+         end
+        else
+          begin
+            Div_start_dly_S<=1'b0;
+          end
+    end
+
+   assign Div_start_dly_SO=Div_start_dly_S;
+
+  always_ff @(posedge Clk_CI, negedge Rst_RBI) begin  //  generate Div_enable_SO signal
+    if(~Rst_RBI)
+      Div_enable_SO<=1'b0;
+    // Synchronous reset with Flush
+    else if (Kill_SI)
+      Div_enable_SO <= 1'b0;
+    else if(Div_start_SI&&Ready_SO)
+      Div_enable_SO<=1'b1;
+    else if(Done_SO)
+      Div_enable_SO<=1'b0;
+    else
+      Div_enable_SO<=Div_enable_SO;
+  end
+
+   logic                                                Sqrt_start_dly_S;
+
+   always_ff @(posedge Clk_CI, negedge Rst_RBI)   //  generate Sqrt_start_dly_SI signal
+     begin
+        if(~Rst_RBI)
+          begin
+            Sqrt_start_dly_S<=1'b0;
+          end
+        else if(Sqrt_start_SI&&Ready_SO)
+         begin
+           Sqrt_start_dly_S<=1'b1;
+         end
+        else
+          begin
+            Sqrt_start_dly_S<=1'b0;
+          end
+      end
+    assign Sqrt_start_dly_SO=Sqrt_start_dly_S;
+
+   always_ff @(posedge Clk_CI, negedge Rst_RBI) begin   //  generate Sqrt_enable_SO signal
+    if(~Rst_RBI)
+      Sqrt_enable_SO<=1'b0;
+    else if (Kill_SI)
+      Sqrt_enable_SO <= 1'b0;
+    else if(Sqrt_start_SI&&Ready_SO)
+      Sqrt_enable_SO<=1'b1;
+    else if(Done_SO)
+      Sqrt_enable_SO<=1'b0;
+    else
+      Sqrt_enable_SO<=Sqrt_enable_SO;
+  end
+
+   logic [5:0]                                                  Crtl_cnt_S;
+   logic                                                        Start_dly_S;
+
+   assign   Start_dly_S=Div_start_dly_S |Sqrt_start_dly_S;
+
+   logic       Fsm_enable_S;
+   assign      Fsm_enable_S=( (Start_dly_S | (| Crtl_cnt_S)) && (~Kill_SI) && Special_case_dly_SBI);
+
+   logic                                                        Final_state_S;
+   assign     Final_state_S= (Crtl_cnt_S==State_ctl_S);
+
+
+   always_ff @(posedge Clk_CI, negedge Rst_RBI) //control_FSM
+     begin
+        if (~Rst_RBI)
+          begin
+             Crtl_cnt_S    <= '0;
+          end
+          else if (Final_state_S | Kill_SI)
+            begin
+              Crtl_cnt_S    <= '0;
+            end
+          else if(Fsm_enable_S) // one cycle Start_SI
+            begin
+              Crtl_cnt_S    <= Crtl_cnt_S+1;
+            end
+          else
+            begin
+              Crtl_cnt_S    <= '0;
+            end
+     end // always_ff
+
+
+
+    always_ff @(posedge Clk_CI, negedge Rst_RBI) //Generate  Done_SO,  they can share this Done_SO.
+      begin
+        if(~Rst_RBI)
+          begin
+            Done_SO<=1'b0;
+          end
+        else if(Start_SI&&Ready_SO)
+          begin
+            if(~Special_case_SBI)
+              begin
+                Done_SO<=1'b1;
+              end
+            else
+              begin
+                Done_SO<=1'b0;
+              end
+          end
+        else if(Final_state_S)
+          begin
+            Done_SO<=1'b1;
+          end
+        else
+          begin
+            Done_SO<=1'b0;
+          end
+       end
+
+
+
+
+   always_ff @(posedge Clk_CI, negedge Rst_RBI) //Generate  Ready_SO
+     begin
+       if(~Rst_RBI)
+         begin
+           Ready_SO<=1'b1;
+         end
+
+       else if(Start_SI&&Ready_SO)
+         begin
+            if(~Special_case_SBI)
+              begin
+                Ready_SO<=1'b1;
+              end
+            else
+              begin
+                Ready_SO<=1'b0;
+              end
+         end
+       else if(Final_state_S | Kill_SI)
+         begin
+           Ready_SO<=1'b1;
+         end
+       else
+         begin
+           Ready_SO<=Ready_SO;
+         end
+     end
+
+
+  /////////////////////////////////////////////////////////////////////////////
+   // Declarations for square root when Iteration_unit_num_S = 2'b00, start  //
+   ////////////////////////////////////////////////////////////////////////////
+
+  logic                                    Qcnt_one_0;
+  logic                                    Qcnt_one_1;
+  logic [1:0]                              Qcnt_one_2;
+  logic [2:0]                              Qcnt_one_3;
+  logic [3:0]                              Qcnt_one_4;
+  logic [4:0]                              Qcnt_one_5;
+  logic [5:0]                              Qcnt_one_6;
+  logic [6:0]                              Qcnt_one_7;
+  logic [7:0]                              Qcnt_one_8;
+  logic [8:0]                              Qcnt_one_9;
+  logic [9:0]                              Qcnt_one_10;
+  logic [10:0]                             Qcnt_one_11;
+  logic [11:0]                             Qcnt_one_12;
+  logic [12:0]                             Qcnt_one_13;
+  logic [13:0]                             Qcnt_one_14;
+  logic [14:0]                             Qcnt_one_15;
+  logic [15:0]                             Qcnt_one_16;
+  logic [16:0]                             Qcnt_one_17;
+  logic [17:0]                             Qcnt_one_18;
+  logic [18:0]                             Qcnt_one_19;
+  logic [19:0]                             Qcnt_one_20;
+  logic [20:0]                             Qcnt_one_21;
+  logic [21:0]                             Qcnt_one_22;
+  logic [22:0]                             Qcnt_one_23;
+  logic [23:0]                             Qcnt_one_24;
+  logic [24:0]                             Qcnt_one_25;
+  logic [25:0]                             Qcnt_one_26;
+  logic [26:0]                             Qcnt_one_27;
+  logic [27:0]                             Qcnt_one_28;
+  logic [28:0]                             Qcnt_one_29;
+  logic [29:0]                             Qcnt_one_30;
+  logic [30:0]                             Qcnt_one_31;
+  logic [31:0]                             Qcnt_one_32;
+  logic [32:0]                             Qcnt_one_33;
+  logic [33:0]                             Qcnt_one_34;
+  logic [34:0]                             Qcnt_one_35;
+  logic [35:0]                             Qcnt_one_36;
+  logic [36:0]                             Qcnt_one_37;
+  logic [37:0]                             Qcnt_one_38;
+  logic [38:0]                             Qcnt_one_39;
+  logic [39:0]                             Qcnt_one_40;
+  logic [40:0]                             Qcnt_one_41;
+  logic [41:0]                             Qcnt_one_42;
+  logic [42:0]                             Qcnt_one_43;
+  logic [43:0]                             Qcnt_one_44;
+  logic [44:0]                             Qcnt_one_45;
+  logic [45:0]                             Qcnt_one_46;
+  logic [46:0]                             Qcnt_one_47;
+  logic [47:0]                             Qcnt_one_48;
+  logic [48:0]                             Qcnt_one_49;
+  logic [49:0]                             Qcnt_one_50;
+  logic [50:0]                             Qcnt_one_51;
+  logic [51:0]                             Qcnt_one_52;
+  logic [52:0]                             Qcnt_one_53;
+  logic [53:0]                             Qcnt_one_54;
+  logic [54:0]                             Qcnt_one_55;
+  logic [55:0]                             Qcnt_one_56;
+  logic [56:0]                             Qcnt_one_57;
+  logic [57:0]                             Qcnt_one_58;
+  logic [58:0]                             Qcnt_one_59;
+  logic [59:0]                             Qcnt_one_60;
+
+  /////////////////////////////////////////////////////////////////////////////
+   // Declarations for square root when Iteration_unit_num_S = 2'b00, end    //
+   ////////////////////////////////////////////////////////////////////////////
+
+
+
+  /////////////////////////////////////////////////////////////////////////////
+   // Declarations for square root when Iteration_unit_num_S = 2'b01, start  //
+   ////////////////////////////////////////////////////////////////////////////
+  logic [1:0]                              Qcnt_two_0;
+  logic [2:0]                              Qcnt_two_1;
+  logic [4:0]                              Qcnt_two_2;
+  logic [6:0]                              Qcnt_two_3;
+  logic [8:0]                              Qcnt_two_4;
+  logic [10:0]                             Qcnt_two_5;
+  logic [12:0]                             Qcnt_two_6;
+  logic [14:0]                             Qcnt_two_7;
+  logic [16:0]                             Qcnt_two_8;
+  logic [18:0]                             Qcnt_two_9;
+  logic [20:0]                             Qcnt_two_10;
+  logic [22:0]                             Qcnt_two_11;
+  logic [24:0]                             Qcnt_two_12;
+  logic [26:0]                             Qcnt_two_13;
+  logic [28:0]                             Qcnt_two_14;
+  logic [30:0]                             Qcnt_two_15;
+  logic [32:0]                             Qcnt_two_16;
+  logic [34:0]                             Qcnt_two_17;
+  logic [36:0]                             Qcnt_two_18;
+  logic [38:0]                             Qcnt_two_19;
+  logic [40:0]                             Qcnt_two_20;
+  logic [42:0]                             Qcnt_two_21;
+  logic [44:0]                             Qcnt_two_22;
+  logic [46:0]                             Qcnt_two_23;
+  logic [48:0]                             Qcnt_two_24;
+  logic [50:0]                             Qcnt_two_25;
+  logic [52:0]                             Qcnt_two_26;
+  logic [54:0]                             Qcnt_two_27;
+  logic [56:0]                             Qcnt_two_28;
+  /////////////////////////////////////////////////////////////////////////////
+   // Declarations for square root when Iteration_unit_num_S = 2'b01, end    //
+   ////////////////////////////////////////////////////////////////////////////
+
+
+  /////////////////////////////////////////////////////////////////////////////
+   // Declarations for square root when Iteration_unit_num_S = 2'b10, start  //
+   ////////////////////////////////////////////////////////////////////////////
+  logic [2:0]                              Qcnt_three_0;
+  logic [4:0]                              Qcnt_three_1;
+  logic [7:0]                              Qcnt_three_2;
+  logic [10:0]                             Qcnt_three_3;
+  logic [13:0]                             Qcnt_three_4;
+  logic [16:0]                             Qcnt_three_5;
+  logic [19:0]                             Qcnt_three_6;
+  logic [22:0]                             Qcnt_three_7;
+  logic [25:0]                             Qcnt_three_8;
+  logic [28:0]                             Qcnt_three_9;
+  logic [31:0]                             Qcnt_three_10;
+  logic [34:0]                             Qcnt_three_11;
+  logic [37:0]                             Qcnt_three_12;
+  logic [40:0]                             Qcnt_three_13;
+  logic [43:0]                             Qcnt_three_14;
+  logic [46:0]                             Qcnt_three_15;
+  logic [49:0]                             Qcnt_three_16;
+  logic [52:0]                             Qcnt_three_17;
+  logic [55:0]                             Qcnt_three_18;
+  logic [58:0]                             Qcnt_three_19;
+  logic [61:0]                             Qcnt_three_20;
+  /////////////////////////////////////////////////////////////////////////////
+   // Declarations for square root when Iteration_unit_num_S = 2'b10, end    //
+   ////////////////////////////////////////////////////////////////////////////
+
+
+  /////////////////////////////////////////////////////////////////////////////
+   // Declarations for square root when Iteration_unit_num_S = 2'b11, start  //
+   ////////////////////////////////////////////////////////////////////////////
+  logic [3:0]                              Qcnt_four_0;
+  logic [6:0]                              Qcnt_four_1;
+  logic [10:0]                             Qcnt_four_2;
+  logic [14:0]                             Qcnt_four_3;
+  logic [18:0]                             Qcnt_four_4;
+  logic [22:0]                             Qcnt_four_5;
+  logic [26:0]                             Qcnt_four_6;
+  logic [30:0]                             Qcnt_four_7;
+  logic [34:0]                             Qcnt_four_8;
+  logic [38:0]                             Qcnt_four_9;
+  logic [42:0]                             Qcnt_four_10;
+  logic [46:0]                             Qcnt_four_11;
+  logic [50:0]                             Qcnt_four_12;
+  logic [54:0]                             Qcnt_four_13;
+  logic [58:0]                             Qcnt_four_14;
+
+  /////////////////////////////////////////////////////////////////////////////
+   // Declarations for square root when Iteration_unit_num_S = 2'b11, end    //
+   ////////////////////////////////////////////////////////////////////////////
+
+
+
+   logic [C_MANT_FP64+1+4:0]                                      Sqrt_R0,Sqrt_Q0,Q_sqrt0,Q_sqrt_com_0;
+   logic [C_MANT_FP64+1+4:0]                                      Sqrt_R1,Sqrt_Q1,Q_sqrt1,Q_sqrt_com_1;
+   logic [C_MANT_FP64+1+4:0]                                      Sqrt_R2,Sqrt_Q2,Q_sqrt2,Q_sqrt_com_2;
+   logic [C_MANT_FP64+1+4:0]                                      Sqrt_R3,Sqrt_Q3,Q_sqrt3,Q_sqrt_com_3,Sqrt_R4; //Sqrt_Q4;
+
+
+   logic [1:0]                                                    Sqrt_DI  [3:0];
+   logic [1:0]                                                    Sqrt_DO  [3:0];
+   logic                                                          Sqrt_carry_DO;
+
+
+  logic  [C_MANT_FP64+1+4:0]                                      Iteration_cell_a_D [3:0];
+  logic  [C_MANT_FP64+1+4:0]                                      Iteration_cell_b_D [3:0];
+  logic  [C_MANT_FP64+1+4:0]                                      Iteration_cell_a_BMASK_D [3:0];
+  logic  [C_MANT_FP64+1+4:0]                                      Iteration_cell_b_BMASK_D [3:0];
+  logic                                                           Iteration_cell_carry_D [3:0];
+  logic  [C_MANT_FP64+1+4:0]                                      Iteration_cell_sum_D [3:0];
+  logic  [C_MANT_FP64+1+4:0]                                      Iteration_cell_sum_AMASK_D [3:0];
+
+
+  logic [3:0]                                                     Sqrt_quotinent_S;
+
+
+   always_comb
+    begin  //
+      case (Format_sel_S)
+        2'b00:
+          begin
+            Sqrt_quotinent_S = {(~Iteration_cell_sum_AMASK_D[0][C_MANT_FP32+5]),(~Iteration_cell_sum_AMASK_D[1][C_MANT_FP32+5]),(~Iteration_cell_sum_AMASK_D[2][C_MANT_FP32+5]),(~Iteration_cell_sum_AMASK_D[3][C_MANT_FP32+5])};
+            Q_sqrt_com_0 ={ {(C_MANT_FP64-C_MANT_FP32){1'b0}},~Q_sqrt0[C_MANT_FP32+5:0] };
+            Q_sqrt_com_1 ={ {(C_MANT_FP64-C_MANT_FP32){1'b0}},~Q_sqrt1[C_MANT_FP32+5:0] };
+            Q_sqrt_com_2 ={ {(C_MANT_FP64-C_MANT_FP32){1'b0}},~Q_sqrt2[C_MANT_FP32+5:0] };
+            Q_sqrt_com_3 ={ {(C_MANT_FP64-C_MANT_FP32){1'b0}},~Q_sqrt3[C_MANT_FP32+5:0] };
+          end
+        2'b01:
+          begin
+            Sqrt_quotinent_S = {Iteration_cell_carry_D[0],Iteration_cell_carry_D[1],Iteration_cell_carry_D[2],Iteration_cell_carry_D[3]};
+            Q_sqrt_com_0=~Q_sqrt0;
+            Q_sqrt_com_1=~Q_sqrt1;
+            Q_sqrt_com_2=~Q_sqrt2;
+            Q_sqrt_com_3=~Q_sqrt3;
+          end
+        2'b10:
+          begin
+            Sqrt_quotinent_S = {(~Iteration_cell_sum_AMASK_D[0][C_MANT_FP16+5]),(~Iteration_cell_sum_AMASK_D[1][C_MANT_FP16+5]),(~Iteration_cell_sum_AMASK_D[2][C_MANT_FP16+5]),(~Iteration_cell_sum_AMASK_D[3][C_MANT_FP16+5])};
+            Q_sqrt_com_0 ={ {(C_MANT_FP64-C_MANT_FP16){1'b0}},~Q_sqrt0[C_MANT_FP16+5:0] };
+            Q_sqrt_com_1 ={ {(C_MANT_FP64-C_MANT_FP16){1'b0}},~Q_sqrt1[C_MANT_FP16+5:0] };
+            Q_sqrt_com_2 ={ {(C_MANT_FP64-C_MANT_FP16){1'b0}},~Q_sqrt2[C_MANT_FP16+5:0] };
+            Q_sqrt_com_3 ={ {(C_MANT_FP64-C_MANT_FP16){1'b0}},~Q_sqrt3[C_MANT_FP16+5:0] };
+          end
+        2'b11:
+          begin
+            Sqrt_quotinent_S = {(~Iteration_cell_sum_AMASK_D[0][C_MANT_FP16ALT+5]),(~Iteration_cell_sum_AMASK_D[1][C_MANT_FP16ALT+5]),(~Iteration_cell_sum_AMASK_D[2][C_MANT_FP16ALT+5]),(~Iteration_cell_sum_AMASK_D[3][C_MANT_FP16ALT+5])};
+            Q_sqrt_com_0 ={ {(C_MANT_FP64-C_MANT_FP16ALT){1'b0}},~Q_sqrt0[C_MANT_FP16ALT+5:0] };
+            Q_sqrt_com_1 ={ {(C_MANT_FP64-C_MANT_FP16ALT){1'b0}},~Q_sqrt1[C_MANT_FP16ALT+5:0] };
+            Q_sqrt_com_2 ={ {(C_MANT_FP64-C_MANT_FP16ALT){1'b0}},~Q_sqrt2[C_MANT_FP16ALT+5:0] };
+            Q_sqrt_com_3 ={ {(C_MANT_FP64-C_MANT_FP16ALT){1'b0}},~Q_sqrt3[C_MANT_FP16ALT+5:0] };
+          end
+        endcase
+    end
+
+
+
+  assign  Qcnt_one_0=    {1'b0};  //qk for each feedback
+  assign  Qcnt_one_1=    {Quotient_DP[0]};
+  assign  Qcnt_one_2=    {Quotient_DP[1:0]};
+  assign  Qcnt_one_3=    {Quotient_DP[2:0]};
+  assign  Qcnt_one_4=    {Quotient_DP[3:0]};
+  assign  Qcnt_one_5=    {Quotient_DP[4:0]};
+  assign  Qcnt_one_6=    {Quotient_DP[5:0]};
+  assign  Qcnt_one_7=    {Quotient_DP[6:0]};
+  assign  Qcnt_one_8=    {Quotient_DP[7:0]};
+  assign  Qcnt_one_9=    {Quotient_DP[8:0]};
+  assign  Qcnt_one_10=    {Quotient_DP[9:0]};
+  assign  Qcnt_one_11=    {Quotient_DP[10:0]};
+  assign  Qcnt_one_12=    {Quotient_DP[11:0]};
+  assign  Qcnt_one_13=    {Quotient_DP[12:0]};
+  assign  Qcnt_one_14=    {Quotient_DP[13:0]};
+  assign  Qcnt_one_15=    {Quotient_DP[14:0]};
+  assign  Qcnt_one_16=    {Quotient_DP[15:0]};
+  assign  Qcnt_one_17=    {Quotient_DP[16:0]};
+  assign  Qcnt_one_18=    {Quotient_DP[17:0]};
+  assign  Qcnt_one_19=    {Quotient_DP[18:0]};
+  assign  Qcnt_one_20=    {Quotient_DP[19:0]};
+  assign  Qcnt_one_21=    {Quotient_DP[20:0]};
+  assign  Qcnt_one_22=    {Quotient_DP[21:0]};
+  assign  Qcnt_one_23=    {Quotient_DP[22:0]};
+  assign  Qcnt_one_24=    {Quotient_DP[23:0]};
+  assign  Qcnt_one_25=    {Quotient_DP[24:0]};
+  assign  Qcnt_one_26=    {Quotient_DP[25:0]};
+  assign  Qcnt_one_27=    {Quotient_DP[26:0]};
+  assign  Qcnt_one_28=    {Quotient_DP[27:0]};
+  assign  Qcnt_one_29=    {Quotient_DP[28:0]};
+  assign  Qcnt_one_30=    {Quotient_DP[29:0]};
+  assign  Qcnt_one_31=    {Quotient_DP[30:0]};
+  assign  Qcnt_one_32=    {Quotient_DP[31:0]};
+  assign  Qcnt_one_33=    {Quotient_DP[32:0]};
+  assign  Qcnt_one_34=    {Quotient_DP[33:0]};
+  assign  Qcnt_one_35=    {Quotient_DP[34:0]};
+  assign  Qcnt_one_36=    {Quotient_DP[35:0]};
+  assign  Qcnt_one_37=    {Quotient_DP[36:0]};
+  assign  Qcnt_one_38=    {Quotient_DP[37:0]};
+  assign  Qcnt_one_39=    {Quotient_DP[38:0]};
+  assign  Qcnt_one_40=    {Quotient_DP[39:0]};
+  assign  Qcnt_one_41=    {Quotient_DP[40:0]};
+  assign  Qcnt_one_42=    {Quotient_DP[41:0]};
+  assign  Qcnt_one_43=    {Quotient_DP[42:0]};
+  assign  Qcnt_one_44=    {Quotient_DP[43:0]};
+  assign  Qcnt_one_45=    {Quotient_DP[44:0]};
+  assign  Qcnt_one_46=    {Quotient_DP[45:0]};
+  assign  Qcnt_one_47=    {Quotient_DP[46:0]};
+  assign  Qcnt_one_48=    {Quotient_DP[47:0]};
+  assign  Qcnt_one_49=    {Quotient_DP[48:0]};
+  assign  Qcnt_one_50=    {Quotient_DP[49:0]};
+  assign  Qcnt_one_51=    {Quotient_DP[50:0]};
+  assign  Qcnt_one_52=    {Quotient_DP[51:0]};
+  assign  Qcnt_one_53=    {Quotient_DP[52:0]};
+  assign  Qcnt_one_54=    {Quotient_DP[53:0]};
+  assign  Qcnt_one_55=    {Quotient_DP[54:0]};
+  assign  Qcnt_one_56=    {Quotient_DP[55:0]};
+  assign  Qcnt_one_57=    {Quotient_DP[56:0]};
+
+
+  assign  Qcnt_two_0 =    {1'b0,            Sqrt_quotinent_S[3]};  //qk for each feedback
+  assign  Qcnt_two_1 =    {Quotient_DP[1:0],Sqrt_quotinent_S[3]};
+  assign  Qcnt_two_2 =    {Quotient_DP[3:0],Sqrt_quotinent_S[3]};
+  assign  Qcnt_two_3 =    {Quotient_DP[5:0],Sqrt_quotinent_S[3]};
+  assign  Qcnt_two_4 =    {Quotient_DP[7:0],Sqrt_quotinent_S[3]};
+  assign  Qcnt_two_5 =    {Quotient_DP[9:0],Sqrt_quotinent_S[3]};
+  assign  Qcnt_two_6 =    {Quotient_DP[11:0],Sqrt_quotinent_S[3]};
+  assign  Qcnt_two_7 =    {Quotient_DP[13:0],Sqrt_quotinent_S[3]};
+  assign  Qcnt_two_8 =    {Quotient_DP[15:0],Sqrt_quotinent_S[3]};
+  assign  Qcnt_two_9 =    {Quotient_DP[17:0],Sqrt_quotinent_S[3]};
+  assign  Qcnt_two_10 =    {Quotient_DP[19:0],Sqrt_quotinent_S[3]};
+  assign  Qcnt_two_11 =    {Quotient_DP[21:0],Sqrt_quotinent_S[3]};
+  assign  Qcnt_two_12 =    {Quotient_DP[23:0],Sqrt_quotinent_S[3]};
+  assign  Qcnt_two_13 =    {Quotient_DP[25:0],Sqrt_quotinent_S[3]};
+  assign  Qcnt_two_14 =    {Quotient_DP[27:0],Sqrt_quotinent_S[3]};
+  assign  Qcnt_two_15 =    {Quotient_DP[29:0],Sqrt_quotinent_S[3]};
+  assign  Qcnt_two_16 =    {Quotient_DP[31:0],Sqrt_quotinent_S[3]};
+  assign  Qcnt_two_17 =    {Quotient_DP[33:0],Sqrt_quotinent_S[3]};
+  assign  Qcnt_two_18 =    {Quotient_DP[35:0],Sqrt_quotinent_S[3]};
+  assign  Qcnt_two_19 =    {Quotient_DP[37:0],Sqrt_quotinent_S[3]};
+  assign  Qcnt_two_20 =    {Quotient_DP[39:0],Sqrt_quotinent_S[3]};
+  assign  Qcnt_two_21 =    {Quotient_DP[41:0],Sqrt_quotinent_S[3]};
+  assign  Qcnt_two_22 =    {Quotient_DP[43:0],Sqrt_quotinent_S[3]};
+  assign  Qcnt_two_23 =    {Quotient_DP[45:0],Sqrt_quotinent_S[3]};
+  assign  Qcnt_two_24 =    {Quotient_DP[47:0],Sqrt_quotinent_S[3]};
+  assign  Qcnt_two_25 =    {Quotient_DP[49:0],Sqrt_quotinent_S[3]};
+  assign  Qcnt_two_26 =    {Quotient_DP[51:0],Sqrt_quotinent_S[3]};
+  assign  Qcnt_two_27 =    {Quotient_DP[53:0],Sqrt_quotinent_S[3]};
+  assign  Qcnt_two_28 =    {Quotient_DP[55:0],Sqrt_quotinent_S[3]};
+
+
+  assign  Qcnt_three_0 =    {1'b0,            Sqrt_quotinent_S[3],Sqrt_quotinent_S[2]};  //qk for each feedback
+  assign  Qcnt_three_1 =    {Quotient_DP[2:0],Sqrt_quotinent_S[3],Sqrt_quotinent_S[2]};
+  assign  Qcnt_three_2 =    {Quotient_DP[5:0],Sqrt_quotinent_S[3],Sqrt_quotinent_S[2]};
+  assign  Qcnt_three_3 =    {Quotient_DP[8:0],Sqrt_quotinent_S[3],Sqrt_quotinent_S[2]};
+  assign  Qcnt_three_4 =    {Quotient_DP[11:0],Sqrt_quotinent_S[3],Sqrt_quotinent_S[2]};
+  assign  Qcnt_three_5 =    {Quotient_DP[14:0],Sqrt_quotinent_S[3],Sqrt_quotinent_S[2]};
+  assign  Qcnt_three_6 =    {Quotient_DP[17:0],Sqrt_quotinent_S[3],Sqrt_quotinent_S[2]};
+  assign  Qcnt_three_7 =    {Quotient_DP[20:0],Sqrt_quotinent_S[3],Sqrt_quotinent_S[2]};
+  assign  Qcnt_three_8 =    {Quotient_DP[23:0],Sqrt_quotinent_S[3],Sqrt_quotinent_S[2]};
+  assign  Qcnt_three_9 =    {Quotient_DP[26:0],Sqrt_quotinent_S[3],Sqrt_quotinent_S[2]};
+  assign  Qcnt_three_10 =    {Quotient_DP[29:0],Sqrt_quotinent_S[3],Sqrt_quotinent_S[2]};
+  assign  Qcnt_three_11 =    {Quotient_DP[32:0],Sqrt_quotinent_S[3],Sqrt_quotinent_S[2]};
+  assign  Qcnt_three_12 =    {Quotient_DP[35:0],Sqrt_quotinent_S[3],Sqrt_quotinent_S[2]};
+  assign  Qcnt_three_13 =    {Quotient_DP[38:0],Sqrt_quotinent_S[3],Sqrt_quotinent_S[2]};
+  assign  Qcnt_three_14 =    {Quotient_DP[41:0],Sqrt_quotinent_S[3],Sqrt_quotinent_S[2]};
+  assign  Qcnt_three_15 =    {Quotient_DP[44:0],Sqrt_quotinent_S[3],Sqrt_quotinent_S[2]};
+  assign  Qcnt_three_16 =    {Quotient_DP[47:0],Sqrt_quotinent_S[3],Sqrt_quotinent_S[2]};
+  assign  Qcnt_three_17 =    {Quotient_DP[50:0],Sqrt_quotinent_S[3],Sqrt_quotinent_S[2]};
+  assign  Qcnt_three_18 =    {Quotient_DP[53:0],Sqrt_quotinent_S[3],Sqrt_quotinent_S[2]};
+  assign  Qcnt_three_19 =    {Quotient_DP[56:0],Sqrt_quotinent_S[3],Sqrt_quotinent_S[2]};
+
+
+  assign      Qcnt_four_0 =    {1'b0,            Sqrt_quotinent_S[3],Sqrt_quotinent_S[2],Sqrt_quotinent_S[1]};
+  assign      Qcnt_four_1 =    {Quotient_DP[3:0],Sqrt_quotinent_S[3],Sqrt_quotinent_S[2],Sqrt_quotinent_S[1]};
+  assign      Qcnt_four_2 =    {Quotient_DP[7:0],Sqrt_quotinent_S[3],Sqrt_quotinent_S[2],Sqrt_quotinent_S[1]};
+  assign      Qcnt_four_3 =    {Quotient_DP[11:0],Sqrt_quotinent_S[3],Sqrt_quotinent_S[2],Sqrt_quotinent_S[1]};
+  assign      Qcnt_four_4 =    {Quotient_DP[15:0],Sqrt_quotinent_S[3],Sqrt_quotinent_S[2],Sqrt_quotinent_S[1]};
+  assign      Qcnt_four_5 =    {Quotient_DP[19:0],Sqrt_quotinent_S[3],Sqrt_quotinent_S[2],Sqrt_quotinent_S[1]};
+  assign      Qcnt_four_6 =    {Quotient_DP[23:0],Sqrt_quotinent_S[3],Sqrt_quotinent_S[2],Sqrt_quotinent_S[1]};
+  assign      Qcnt_four_7 =    {Quotient_DP[27:0],Sqrt_quotinent_S[3],Sqrt_quotinent_S[2],Sqrt_quotinent_S[1]};
+  assign      Qcnt_four_8 =    {Quotient_DP[31:0],Sqrt_quotinent_S[3],Sqrt_quotinent_S[2],Sqrt_quotinent_S[1]};
+  assign      Qcnt_four_9 =    {Quotient_DP[35:0],Sqrt_quotinent_S[3],Sqrt_quotinent_S[2],Sqrt_quotinent_S[1]};
+  assign      Qcnt_four_10 =    {Quotient_DP[39:0],Sqrt_quotinent_S[3],Sqrt_quotinent_S[2],Sqrt_quotinent_S[1]};
+  assign      Qcnt_four_11 =    {Quotient_DP[43:0],Sqrt_quotinent_S[3],Sqrt_quotinent_S[2],Sqrt_quotinent_S[1]};
+  assign      Qcnt_four_12 =    {Quotient_DP[47:0],Sqrt_quotinent_S[3],Sqrt_quotinent_S[2],Sqrt_quotinent_S[1]};
+  assign      Qcnt_four_13 =    {Quotient_DP[51:0],Sqrt_quotinent_S[3],Sqrt_quotinent_S[2],Sqrt_quotinent_S[1]};
+  assign      Qcnt_four_14 =    {Quotient_DP[55:0],Sqrt_quotinent_S[3],Sqrt_quotinent_S[2],Sqrt_quotinent_S[1]};
+
+
+
+
+  always_comb begin  // the intermediate operands for sqrt
+
+  case(Iteration_unit_num_S)
+    2'b00:
+      begin
+
+  /////////////////////////////////////////////////////////////////////////////
+   // Operands for square root when Iteration_unit_num_S = 2'b00, start       //
+   /////////////////////////////////////////////////////////////////////////////
+
+
+
+
+        case(Crtl_cnt_S)
+
+          6'b000000:
+            begin
+              Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64+1:C_MANT_FP64];
+              Q_sqrt0={{(C_MANT_FP64+5){1'b0}},Qcnt_one_0};
+              Sqrt_Q0=Q_sqrt_com_0;
+            end
+          6'b000001:
+            begin
+              Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-1:C_MANT_FP64-2];
+              Q_sqrt0={{(C_MANT_FP64+5){1'b0}},Qcnt_one_1};
+              Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+            end
+          6'b000010:
+            begin
+              Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-3:C_MANT_FP64-4];
+              Q_sqrt0={{(C_MANT_FP64+4){1'b0}},Qcnt_one_2};
+              Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+            end
+          6'b000011:
+            begin
+              Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-5:C_MANT_FP64-6];
+              Q_sqrt0={{(C_MANT_FP64+3){1'b0}},Qcnt_one_3};
+              Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+            end
+          6'b000100:
+            begin
+              Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-7:C_MANT_FP64-8];
+              Q_sqrt0={{(C_MANT_FP64+2){1'b0}},Qcnt_one_4};
+              Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+            end
+          6'b000101:
+            begin
+              Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-9:C_MANT_FP64-10];
+              Q_sqrt0={{(C_MANT_FP64+1){1'b0}},Qcnt_one_5};
+              Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+            end
+          6'b000110:
+            begin
+              Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-11:C_MANT_FP64-12];
+              Q_sqrt0={{(C_MANT_FP64){1'b0}},Qcnt_one_6};
+              Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+            end
+          6'b000111:
+            begin
+              Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-13:C_MANT_FP64-14];
+              Q_sqrt0={{(C_MANT_FP64-1){1'b0}},Qcnt_one_7};
+              Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+            end
+          6'b001000:
+            begin
+              Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-15:C_MANT_FP64-16];
+              Q_sqrt0={{(C_MANT_FP64-2){1'b0}},Qcnt_one_8};
+              Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+            end
+          6'b001001:
+            begin
+              Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-17:C_MANT_FP64-18];
+              Q_sqrt0={{(C_MANT_FP64-3){1'b0}},Qcnt_one_9};
+              Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+            end
+          6'b001010:
+            begin
+              Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-19:C_MANT_FP64-20];
+              Q_sqrt0={{(C_MANT_FP64-4){1'b0}},Qcnt_one_10};
+              Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+            end
+          6'b001011:
+            begin
+              Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-21:C_MANT_FP64-22];
+              Q_sqrt0={{(C_MANT_FP64-5){1'b0}},Qcnt_one_11};
+              Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+            end
+          6'b001100:
+            begin
+              Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-23:C_MANT_FP64-24];
+              Q_sqrt0={{(C_MANT_FP64-6){1'b0}},Qcnt_one_12};
+              Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+            end
+          6'b001101:
+            begin
+              Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-25:C_MANT_FP64-26];
+              Q_sqrt0={{(C_MANT_FP64-7){1'b0}},Qcnt_one_13};
+              Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+            end
+          6'b001110:
+            begin
+              Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-27:C_MANT_FP64-28];
+              Q_sqrt0={{(C_MANT_FP64-8){1'b0}},Qcnt_one_14};
+              Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+            end
+          6'b001111:
+            begin
+              Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-29:C_MANT_FP64-30];
+              Q_sqrt0={{(C_MANT_FP64-9){1'b0}},Qcnt_one_15};
+              Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+            end
+          6'b010000:
+            begin
+              Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-31:C_MANT_FP64-32];
+              Q_sqrt0={{(C_MANT_FP64-10){1'b0}},Qcnt_one_16};
+              Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+            end
+          6'b010001:
+            begin
+              Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-33:C_MANT_FP64-34];
+              Q_sqrt0={{(C_MANT_FP64-11){1'b0}},Qcnt_one_17};
+              Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+            end
+          6'b010010:
+            begin
+              Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-35:C_MANT_FP64-36];
+              Q_sqrt0={{(C_MANT_FP64-12){1'b0}},Qcnt_one_18};
+              Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+            end
+          6'b010011:
+            begin
+              Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-37:C_MANT_FP64-38];
+              Q_sqrt0={{(C_MANT_FP64-13){1'b0}},Qcnt_one_19};
+              Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+            end
+          6'b010100:
+            begin
+              Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-39:C_MANT_FP64-40];
+              Q_sqrt0={{(C_MANT_FP64-14){1'b0}},Qcnt_one_20};
+              Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+            end
+          6'b010101:
+            begin
+              Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-41:C_MANT_FP64-42];
+              Q_sqrt0={{(C_MANT_FP64-15){1'b0}},Qcnt_one_21};
+              Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+            end
+          6'b010110:
+            begin
+              Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-43:C_MANT_FP64-44];
+              Q_sqrt0={{(C_MANT_FP64-16){1'b0}},Qcnt_one_22};
+              Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+            end
+          6'b010111:
+            begin
+              Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-45:C_MANT_FP64-46];
+              Q_sqrt0={{(C_MANT_FP64-17){1'b0}},Qcnt_one_23};
+              Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+            end
+          6'b011000:
+            begin
+              Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-47:C_MANT_FP64-48];
+              Q_sqrt0={{(C_MANT_FP64-18){1'b0}},Qcnt_one_24};
+              Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+            end
+          6'b011001:
+            begin
+              Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-49:C_MANT_FP64-50];
+              Q_sqrt0={{(C_MANT_FP64-19){1'b0}},Qcnt_one_25};
+              Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+            end
+          6'b011010:
+            begin
+              Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-51:C_MANT_FP64-52];
+              Q_sqrt0={{(C_MANT_FP64-20){1'b0}},Qcnt_one_26};
+              Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+            end
+          6'b011011:
+            begin
+              Sqrt_DI[0]=2'b00;
+              Q_sqrt0={{(C_MANT_FP64-21){1'b0}},Qcnt_one_27};
+              Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+            end
+          6'b011100:
+            begin
+              Sqrt_DI[0]=2'b00;
+              Q_sqrt0={{(C_MANT_FP64-22){1'b0}},Qcnt_one_28};
+              Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+            end
+          6'b011101:
+            begin
+              Sqrt_DI[0]=2'b00;
+              Q_sqrt0={{(C_MANT_FP64-23){1'b0}},Qcnt_one_29};
+              Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+            end
+          6'b011110:
+            begin
+              Sqrt_DI[0]=2'b00;
+              Q_sqrt0={{(C_MANT_FP64-24){1'b0}},Qcnt_one_30};
+              Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+            end
+          6'b011111:
+            begin
+              Sqrt_DI[0]=2'b00;
+              Q_sqrt0={{(C_MANT_FP64-25){1'b0}},Qcnt_one_31};
+              Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+            end
+          6'b100000:
+            begin
+              Sqrt_DI[0]=2'b00;
+              Q_sqrt0={{(C_MANT_FP64-26){1'b0}},Qcnt_one_32};
+              Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+            end
+          6'b100001:
+            begin
+              Sqrt_DI[0]=2'b00;
+              Q_sqrt0={{(C_MANT_FP64-27){1'b0}},Qcnt_one_33};
+              Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+            end
+          6'b100010:
+            begin
+              Sqrt_DI[0]=2'b00;
+              Q_sqrt0={{(C_MANT_FP64-28){1'b0}},Qcnt_one_34};
+              Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+            end
+          6'b100011:
+            begin
+              Sqrt_DI[0]=2'b00;
+              Q_sqrt0={{(C_MANT_FP64-29){1'b0}},Qcnt_one_35};
+              Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+            end
+          6'b100100:
+            begin
+              Sqrt_DI[0]=2'b00;
+              Q_sqrt0={{(C_MANT_FP64-30){1'b0}},Qcnt_one_36};
+              Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+            end
+          6'b100101:
+            begin
+              Sqrt_DI[0]=2'b00;
+              Q_sqrt0={{(C_MANT_FP64-31){1'b0}},Qcnt_one_37};
+              Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+            end
+          6'b100110:
+            begin
+              Sqrt_DI[0]=2'b00;
+              Q_sqrt0={{(C_MANT_FP64-32){1'b0}},Qcnt_one_38};
+              Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+            end
+          6'b100111:
+            begin
+              Sqrt_DI[0]=2'b00;
+              Q_sqrt0={{(C_MANT_FP64-33){1'b0}},Qcnt_one_39};
+              Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+            end
+          6'b101000:
+            begin
+              Sqrt_DI[0]=2'b00;
+              Q_sqrt0={{(C_MANT_FP64-34){1'b0}},Qcnt_one_40};
+              Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+            end
+          6'b101001:
+            begin
+              Sqrt_DI[0]=2'b00;
+              Q_sqrt0={{(C_MANT_FP64-35){1'b0}},Qcnt_one_41};
+              Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+            end
+          6'b101010:
+            begin
+              Sqrt_DI[0]=2'b00;
+              Q_sqrt0={{(C_MANT_FP64-36){1'b0}},Qcnt_one_42};
+              Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+            end
+          6'b101011:
+            begin
+              Sqrt_DI[0]=2'b00;
+              Q_sqrt0={{(C_MANT_FP64-37){1'b0}},Qcnt_one_43};
+              Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+            end
+          6'b101100:
+            begin
+              Sqrt_DI[0]=2'b00;
+              Q_sqrt0={{(C_MANT_FP64-38){1'b0}},Qcnt_one_44};
+              Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+            end
+          6'b101101:
+            begin
+              Sqrt_DI[0]=2'b00;
+              Q_sqrt0={{(C_MANT_FP64-39){1'b0}},Qcnt_one_45};
+              Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+            end
+          6'b101110:
+            begin
+              Sqrt_DI[0]=2'b00;
+              Q_sqrt0={{(C_MANT_FP64-40){1'b0}},Qcnt_one_46};
+              Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+            end
+          6'b101111:
+            begin
+              Sqrt_DI[0]=2'b00;
+              Q_sqrt0={{(C_MANT_FP64-41){1'b0}},Qcnt_one_47};
+              Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+            end
+          6'b110000:
+            begin
+              Sqrt_DI[0]=2'b00;
+              Q_sqrt0={{(C_MANT_FP64-42){1'b0}},Qcnt_one_48};
+              Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+            end
+          6'b110001:
+            begin
+              Sqrt_DI[0]=2'b00;
+              Q_sqrt0={{(C_MANT_FP64-43){1'b0}},Qcnt_one_49};
+              Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+            end
+          6'b110010:
+            begin
+              Sqrt_DI[0]=2'b00;
+              Q_sqrt0={{(C_MANT_FP64-44){1'b0}},Qcnt_one_50};
+              Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+            end
+          6'b110011:
+            begin
+              Sqrt_DI[0]=2'b00;
+              Q_sqrt0={{(C_MANT_FP64-45){1'b0}},Qcnt_one_51};
+              Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+            end
+          6'b110100:
+            begin
+              Sqrt_DI[0]=2'b00;
+              Q_sqrt0={{(C_MANT_FP64-46){1'b0}},Qcnt_one_52};
+              Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+            end
+          6'b110101:
+            begin
+              Sqrt_DI[0]=2'b00;
+              Q_sqrt0={{(C_MANT_FP64-47){1'b0}},Qcnt_one_53};
+              Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+            end
+          6'b110110:
+            begin
+              Sqrt_DI[0]=2'b00;
+              Q_sqrt0={{(C_MANT_FP64-48){1'b0}},Qcnt_one_54};
+              Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+            end
+          6'b110111:
+            begin
+              Sqrt_DI[0]=2'b00;
+              Q_sqrt0={{(C_MANT_FP64-49){1'b0}},Qcnt_one_55};
+              Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+            end
+          6'b111000:
+            begin
+              Sqrt_DI[0]=2'b00;
+              Q_sqrt0={{(C_MANT_FP64-50){1'b0}},Qcnt_one_56};
+              Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+            end
+
+          default:
+            begin
+              Sqrt_DI[0]=2'b00;
+              Q_sqrt0='0;
+              Sqrt_Q0='0;
+            end
+        endcase
+      end
+
+
+   /////////////////////////////////////////////////////////////////////////////
+   // Operands for square root when Iteration_unit_num_S = 2'b00, end         //
+   /////////////////////////////////////////////////////////////////////////////
+
+
+    2'b01:
+      begin
+   /////////////////////////////////////////////////////////////////////////////
+   // Operands for square root when Iteration_unit_num_S = 2'b01, start       //
+   /////////////////////////////////////////////////////////////////////////////
+        case(Crtl_cnt_S)
+
+          6'b000000:
+            begin
+              Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64+1:C_MANT_FP64];
+              Q_sqrt0={{(C_MANT_FP64+5){1'b0}},Qcnt_two_0[1]};
+              Sqrt_Q0=Q_sqrt_com_0;
+              Sqrt_DI[1]=Mant_D_sqrt_Norm[C_MANT_FP64-1:C_MANT_FP64-2];
+              Q_sqrt1={{(C_MANT_FP64+4){1'b0}},Qcnt_two_0[1:0]};
+              Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1;
+            end
+
+          6'b000001:
+            begin
+              Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-3:C_MANT_FP64-4];
+              Q_sqrt0={{(C_MANT_FP64+4){1'b0}},Qcnt_two_1[2:1]};
+              Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+              Sqrt_DI[1]=Mant_D_sqrt_Norm[C_MANT_FP64-5:C_MANT_FP64-6];
+              Q_sqrt1={{(C_MANT_FP64+3){1'b0}},Qcnt_two_1[2:0]};
+              Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1;
+            end
+
+          6'b000010:
+            begin
+              Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-7:C_MANT_FP64-8];
+              Q_sqrt0={{(C_MANT_FP64+2){1'b0}},Qcnt_two_2[4:1]};
+              Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+              Sqrt_DI[1]=Mant_D_sqrt_Norm[C_MANT_FP64-9:C_MANT_FP64-10];
+              Q_sqrt1={{(C_MANT_FP64+1){1'b0}},Qcnt_two_2[4:0]};
+              Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1;
+            end
+
+          6'b000011:
+            begin
+              Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-11:C_MANT_FP64-12];
+              Q_sqrt0={{(C_MANT_FP64){1'b0}},Qcnt_two_3[6:1]};
+              Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+              Sqrt_DI[1]=Mant_D_sqrt_Norm[C_MANT_FP64-13:C_MANT_FP64-14];
+              Q_sqrt1={{(C_MANT_FP64-1){1'b0}},Qcnt_two_3[6:0]};
+              Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1;
+            end
+
+          6'b000100:
+            begin
+              Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-15:C_MANT_FP64-16];
+              Q_sqrt0={{(C_MANT_FP64-2){1'b0}},Qcnt_two_4[8:1]};
+              Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+              Sqrt_DI[1]=Mant_D_sqrt_Norm[C_MANT_FP64-17:C_MANT_FP64-18];
+              Q_sqrt1={{(C_MANT_FP64-3){1'b0}},Qcnt_two_4[8:0]};
+              Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1;
+            end
+
+            6'b000101:
+            begin
+              Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-19:C_MANT_FP64-20];
+              Q_sqrt0={{(C_MANT_FP64-4){1'b0}},Qcnt_two_5[10:1]};
+              Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+              Sqrt_DI[1]=Mant_D_sqrt_Norm[C_MANT_FP64-21:C_MANT_FP64-22];
+              Q_sqrt1={{(C_MANT_FP64-5){1'b0}},Qcnt_two_5[10:0]};
+              Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1;
+            end
+
+          6'b000110:
+            begin
+              Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-23:C_MANT_FP64-24];
+              Q_sqrt0={{(C_MANT_FP64-6){1'b0}},Qcnt_two_6[12:1]};
+              Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+              Sqrt_DI[1]=Mant_D_sqrt_Norm[C_MANT_FP64-25:C_MANT_FP64-26];
+              Q_sqrt1={{(C_MANT_FP64-7){1'b0}},Qcnt_two_6[12:0]};
+              Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1;
+            end
+
+          6'b000111:
+            begin
+              Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-27:C_MANT_FP64-28];
+              Q_sqrt0={{(C_MANT_FP64-8){1'b0}},Qcnt_two_7[14:1]};
+              Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+              Sqrt_DI[1]=Mant_D_sqrt_Norm[C_MANT_FP64-29:C_MANT_FP64-30];
+              Q_sqrt1={{(C_MANT_FP64-9){1'b0}},Qcnt_two_7[14:0]};
+              Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1;
+            end
+
+          6'b001000:
+            begin
+              Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-31:C_MANT_FP64-32];
+              Q_sqrt0={{(C_MANT_FP64-10){1'b0}},Qcnt_two_8[16:1]};
+              Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+              Sqrt_DI[1]=Mant_D_sqrt_Norm[C_MANT_FP64-33:C_MANT_FP64-34];
+              Q_sqrt1={{(C_MANT_FP64-11){1'b0}},Qcnt_two_8[16:0]};
+              Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1;
+            end
+
+          6'b001001:
+            begin
+              Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-35:C_MANT_FP64-36];
+              Q_sqrt0={{(C_MANT_FP64-12){1'b0}},Qcnt_two_9[18:1]};
+              Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+              Sqrt_DI[1]=Mant_D_sqrt_Norm[C_MANT_FP64-37:C_MANT_FP64-38];
+              Q_sqrt1={{(C_MANT_FP64-13){1'b0}},Qcnt_two_9[18:0]};
+              Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1;
+            end
+
+          6'b001010:
+            begin
+              Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-39:C_MANT_FP64-40];
+              Q_sqrt0={{(C_MANT_FP64-14){1'b0}},Qcnt_two_10[20:1]};
+              Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+              Sqrt_DI[1]=Mant_D_sqrt_Norm[C_MANT_FP64-41:C_MANT_FP64-42];
+              Q_sqrt1={{(C_MANT_FP64-15){1'b0}},Qcnt_two_10[20:0]};
+              Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1;
+            end
+
+          6'b001011:
+            begin
+              Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-43:C_MANT_FP64-44];
+              Q_sqrt0={{(C_MANT_FP64-16){1'b0}},Qcnt_two_11[22:1]};
+              Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+              Sqrt_DI[1]=Mant_D_sqrt_Norm[C_MANT_FP64-45:C_MANT_FP64-46];
+              Q_sqrt1={{(C_MANT_FP64-17){1'b0}},Qcnt_two_11[22:0]};
+              Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1;
+            end
+
+          6'b001100:
+            begin
+              Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-47:C_MANT_FP64-48];
+              Q_sqrt0={{(C_MANT_FP64-18){1'b0}},Qcnt_two_12[24:1]};
+              Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+              Sqrt_DI[1]=Mant_D_sqrt_Norm[C_MANT_FP64-49:C_MANT_FP64-50];
+              Q_sqrt1={{(C_MANT_FP64-19){1'b0}},Qcnt_two_12[24:0]};
+              Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1;
+            end
+
+          6'b001101:
+            begin
+              Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-51:C_MANT_FP64-52];
+              Q_sqrt0={{(C_MANT_FP64-20){1'b0}},Qcnt_two_13[26:1]};
+              Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+              Sqrt_DI[1]=2'b00;
+              Q_sqrt1={{(C_MANT_FP64-21){1'b0}},Qcnt_two_13[26:0]};
+              Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1;
+            end
+
+          6'b001110:
+            begin
+              Sqrt_DI[0]=2'b00;
+              Q_sqrt0={{(C_MANT_FP64-22){1'b0}},Qcnt_two_14[28:1]};
+              Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+              Sqrt_DI[1]=2'b00;
+              Q_sqrt1={{(C_MANT_FP64-23){1'b0}},Qcnt_two_14[28:0]};
+              Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1;
+            end
+
+          6'b001111:
+            begin
+              Sqrt_DI[0]=2'b00;
+              Q_sqrt0={{(C_MANT_FP64-24){1'b0}},Qcnt_two_15[30:1]};
+              Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+              Sqrt_DI[1]=2'b00;
+              Q_sqrt1={{(C_MANT_FP64-25){1'b0}},Qcnt_two_15[30:0]};
+              Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1;
+            end
+
+          6'b010000:
+            begin
+              Sqrt_DI[0]=2'b00;
+              Q_sqrt0={{(C_MANT_FP64-26){1'b0}},Qcnt_two_16[32:1]};
+              Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+              Sqrt_DI[1]=2'b00;
+              Q_sqrt1={{(C_MANT_FP64-27){1'b0}},Qcnt_two_16[32:0]};
+              Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1;
+            end
+
+          6'b010001:
+            begin
+              Sqrt_DI[0]=2'b00;
+              Q_sqrt0={{(C_MANT_FP64-28){1'b0}},Qcnt_two_17[34:1]};
+              Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+              Sqrt_DI[1]=2'b00;
+              Q_sqrt1={{(C_MANT_FP64-29){1'b0}},Qcnt_two_17[34:0]};
+              Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1;
+            end
+
+          6'b010010:
+            begin
+              Sqrt_DI[0]=2'b00;
+              Q_sqrt0={{(C_MANT_FP64-30){1'b0}},Qcnt_two_18[36:1]};
+              Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+              Sqrt_DI[1]=2'b00;
+              Q_sqrt1={{(C_MANT_FP64-31){1'b0}},Qcnt_two_18[36:0]};
+              Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1;
+            end
+
+          6'b010011:
+            begin
+              Sqrt_DI[0]=2'b00;
+              Q_sqrt0={{(C_MANT_FP64-32){1'b0}},Qcnt_two_19[38:1]};
+              Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+              Sqrt_DI[1]=2'b00;
+              Q_sqrt1={{(C_MANT_FP64-33){1'b0}},Qcnt_two_19[38:0]};
+              Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1;
+            end
+
+          6'b010100:
+            begin
+              Sqrt_DI[0]=2'b00;
+              Q_sqrt0={{(C_MANT_FP64-34){1'b0}},Qcnt_two_20[40:1]};
+              Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+              Sqrt_DI[1]=2'b00;
+              Q_sqrt1={{(C_MANT_FP64-35){1'b0}},Qcnt_two_20[40:0]};
+              Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1;
+            end
+
+          6'b010101:
+            begin
+              Sqrt_DI[0]=2'b00;
+              Q_sqrt0={{(C_MANT_FP64-36){1'b0}},Qcnt_two_21[42:1]};
+              Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+              Sqrt_DI[1]=2'b00;
+              Q_sqrt1={{(C_MANT_FP64-37){1'b0}},Qcnt_two_21[42:0]};
+              Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1;
+            end
+
+          6'b010110:
+            begin
+              Sqrt_DI[0]=2'b00;
+              Q_sqrt0={{(C_MANT_FP64-38){1'b0}},Qcnt_two_22[44:1]};
+              Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+              Sqrt_DI[1]=2'b00;
+              Q_sqrt1={{(C_MANT_FP64-39){1'b0}},Qcnt_two_22[44:0]};
+              Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1;
+            end
+
+          6'b010111:
+            begin
+              Sqrt_DI[0]=2'b00;
+              Q_sqrt0={{(C_MANT_FP64-40){1'b0}},Qcnt_two_23[46:1]};
+              Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+              Sqrt_DI[1]=2'b00;
+              Q_sqrt1={{(C_MANT_FP64-41){1'b0}},Qcnt_two_23[46:0]};
+              Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1;
+            end
+
+          6'b011000:
+            begin
+              Sqrt_DI[0]=2'b00;
+              Q_sqrt0={{(C_MANT_FP64-42){1'b0}},Qcnt_two_24[48:1]};
+              Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+              Sqrt_DI[1]=2'b00;
+              Q_sqrt1={{(C_MANT_FP64-43){1'b0}},Qcnt_two_24[48:0]};
+              Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1;
+            end
+
+          6'b011001:
+            begin
+              Sqrt_DI[0]=2'b00;
+              Q_sqrt0={{(C_MANT_FP64-44){1'b0}},Qcnt_two_25[50:1]};
+              Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+              Sqrt_DI[1]=2'b00;
+              Q_sqrt1={{(C_MANT_FP64-45){1'b0}},Qcnt_two_25[50:0]};
+              Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1;
+            end
+
+          6'b011010:
+            begin
+              Sqrt_DI[0]=2'b00;
+              Q_sqrt0={{(C_MANT_FP64-46){1'b0}},Qcnt_two_26[52:1]};
+              Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+              Sqrt_DI[1]=2'b00;
+              Q_sqrt1={{(C_MANT_FP64-47){1'b0}},Qcnt_two_26[52:0]};
+              Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1;
+            end
+
+          6'b011011:
+            begin
+              Sqrt_DI[0]=2'b00;
+              Q_sqrt0={{(C_MANT_FP64-48){1'b0}},Qcnt_two_27[54:1]};
+              Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+              Sqrt_DI[1]=2'b00;
+              Q_sqrt1={{(C_MANT_FP64-49){1'b0}},Qcnt_two_27[54:0]};
+              Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1;
+            end
+
+          6'b011100:
+            begin
+              Sqrt_DI[0]=2'b00;
+              Q_sqrt0={{(C_MANT_FP64-50){1'b0}},Qcnt_two_28[56:1]};
+              Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+              Sqrt_DI[1]=2'b00;
+              Q_sqrt1={{(C_MANT_FP64-51){1'b0}},Qcnt_two_28[56:0]};
+              Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1;
+            end
+
+          default:
+            begin
+              Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64+1:C_MANT_FP64];
+              Q_sqrt0={{(C_MANT_FP64+5){1'b0}},Qcnt_two_0[1]};
+              Sqrt_Q0=Q_sqrt_com_0;
+              Sqrt_DI[1]=Mant_D_sqrt_Norm[C_MANT_FP64-1:C_MANT_FP64-2];
+              Q_sqrt1={{(C_MANT_FP64+4){1'b0}},Qcnt_two_0[1:0]};
+              Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1;
+            end
+
+        endcase
+      end
+
+   /////////////////////////////////////////////////////////////////////////////
+   // Operands for square root when Iteration_unit_num_S = 2'b01, end       //
+   /////////////////////////////////////////////////////////////////////////////
+
+
+    2'b10:
+      begin
+   /////////////////////////////////////////////////////////////////////////////
+   // Operands for square root when Iteration_unit_num_S = 2'b10, start       //
+   /////////////////////////////////////////////////////////////////////////////
+
+        case(Crtl_cnt_S)
+          6'b000000:
+            begin
+              Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64+1:C_MANT_FP64];
+              Q_sqrt0={{(C_MANT_FP64+5){1'b0}},Qcnt_three_0[2]};
+              Sqrt_Q0=Q_sqrt_com_0;
+              Sqrt_DI[1]=Mant_D_sqrt_Norm[C_MANT_FP64-1:C_MANT_FP64-2];
+              Q_sqrt1={{(C_MANT_FP64+4){1'b0}},Qcnt_three_0[2:1]};
+              Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1;
+              Sqrt_DI[2]=Mant_D_sqrt_Norm[C_MANT_FP64-3:C_MANT_FP64-4];
+              Q_sqrt2={{(C_MANT_FP64+3){1'b0}},Qcnt_three_0[2:0]};
+              Sqrt_Q2=Sqrt_quotinent_S[2]?Q_sqrt_com_2:Q_sqrt2;
+            end
+
+          6'b000001:
+            begin
+              Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-5:C_MANT_FP64-6];
+              Q_sqrt0={{(C_MANT_FP64+2){1'b0}},Qcnt_three_1[4:2]};
+              Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+              Sqrt_DI[1]=Mant_D_sqrt_Norm[C_MANT_FP64-7:C_MANT_FP64-8];
+              Q_sqrt1={{(C_MANT_FP64+1){1'b0}},Qcnt_three_1[4:1]};
+              Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1;
+              Sqrt_DI[2]=Mant_D_sqrt_Norm[C_MANT_FP64-9:C_MANT_FP64-10];
+              Q_sqrt2={{(C_MANT_FP64){1'b0}},Qcnt_three_1[4:0]};
+              Sqrt_Q2=Sqrt_quotinent_S[2]?Q_sqrt_com_2:Q_sqrt2;
+            end
+
+          6'b000010:
+            begin
+              Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-11:C_MANT_FP64-12];
+              Q_sqrt0={{(C_MANT_FP64-1){1'b0}},Qcnt_three_2[7:2]};
+              Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+              Sqrt_DI[1]=Mant_D_sqrt_Norm[C_MANT_FP64-13:C_MANT_FP64-14];
+              Q_sqrt1={{(C_MANT_FP64-2){1'b0}},Qcnt_three_2[7:1]};
+              Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1;
+              Sqrt_DI[2]=Mant_D_sqrt_Norm[C_MANT_FP64-15:C_MANT_FP64-16];
+              Q_sqrt2={{(C_MANT_FP64-3){1'b0}},Qcnt_three_2[7:0]};
+              Sqrt_Q2=Sqrt_quotinent_S[2]?Q_sqrt_com_2:Q_sqrt2;
+            end
+
+          6'b000011:
+            begin
+              Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-17:C_MANT_FP64-18];
+              Q_sqrt0={{(C_MANT_FP64-4){1'b0}},Qcnt_three_3[10:2]};
+              Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+              Sqrt_DI[1]=Mant_D_sqrt_Norm[C_MANT_FP64-19:C_MANT_FP64-20];
+              Q_sqrt1={{(C_MANT_FP64-5){1'b0}},Qcnt_three_3[10:1]};
+              Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1;
+              Sqrt_DI[2]=Mant_D_sqrt_Norm[C_MANT_FP64-21:C_MANT_FP64-22];
+              Q_sqrt2={{(C_MANT_FP64-6){1'b0}},Qcnt_three_3[10:0]};
+              Sqrt_Q2=Sqrt_quotinent_S[2]?Q_sqrt_com_2:Q_sqrt2;
+            end
+
+          6'b000100:
+            begin
+              Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-23:C_MANT_FP64-24];
+              Q_sqrt0={{(C_MANT_FP64-7){1'b0}},Qcnt_three_4[13:2]};
+              Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+              Sqrt_DI[1]=Mant_D_sqrt_Norm[C_MANT_FP64-25:C_MANT_FP64-26];
+              Q_sqrt1={{(C_MANT_FP64-8){1'b0}},Qcnt_three_4[13:1]};
+              Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1;
+              Sqrt_DI[2]=Mant_D_sqrt_Norm[C_MANT_FP64-27:C_MANT_FP64-28];
+              Q_sqrt2={{(C_MANT_FP64-9){1'b0}},Qcnt_three_4[13:0]};
+              Sqrt_Q2=Sqrt_quotinent_S[2]?Q_sqrt_com_2:Q_sqrt2;
+            end
+
+          6'b000101:
+            begin
+              Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-29:C_MANT_FP64-30];
+              Q_sqrt0={{(C_MANT_FP64-10){1'b0}},Qcnt_three_5[16:2]};
+              Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+              Sqrt_DI[1]=Mant_D_sqrt_Norm[C_MANT_FP64-31:C_MANT_FP64-32];
+              Q_sqrt1={{(C_MANT_FP64-11){1'b0}},Qcnt_three_5[16:1]};
+              Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1;
+              Sqrt_DI[2]=Mant_D_sqrt_Norm[C_MANT_FP64-33:C_MANT_FP64-34];
+              Q_sqrt2={{(C_MANT_FP64-12){1'b0}},Qcnt_three_5[16:0]};
+              Sqrt_Q2=Sqrt_quotinent_S[2]?Q_sqrt_com_2:Q_sqrt2;
+            end
+
+          6'b000110:
+            begin
+              Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-35:C_MANT_FP64-36];
+              Q_sqrt0={{(C_MANT_FP64-13){1'b0}},Qcnt_three_6[19:2]};
+              Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+              Sqrt_DI[1]=Mant_D_sqrt_Norm[C_MANT_FP64-37:C_MANT_FP64-38];
+              Q_sqrt1={{(C_MANT_FP64-14){1'b0}},Qcnt_three_6[19:1]};
+              Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1;
+              Sqrt_DI[2]=Mant_D_sqrt_Norm[C_MANT_FP64-39:C_MANT_FP64-40];
+              Q_sqrt2={{(C_MANT_FP64-15){1'b0}},Qcnt_three_6[19:0]};
+              Sqrt_Q2=Sqrt_quotinent_S[2]?Q_sqrt_com_2:Q_sqrt2;
+            end
+
+          6'b000111:
+            begin
+              Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-41:C_MANT_FP64-42];
+              Q_sqrt0={{(C_MANT_FP64-16){1'b0}},Qcnt_three_7[22:2]};
+              Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+              Sqrt_DI[1]=Mant_D_sqrt_Norm[C_MANT_FP64-43:C_MANT_FP64-44];
+              Q_sqrt1={{(C_MANT_FP64-17){1'b0}},Qcnt_three_7[22:1]};
+              Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1;
+              Sqrt_DI[2]=Mant_D_sqrt_Norm[C_MANT_FP64-45:C_MANT_FP64-46];
+              Q_sqrt2={{(C_MANT_FP64-18){1'b0}},Qcnt_three_7[22:0]};
+              Sqrt_Q2=Sqrt_quotinent_S[2]?Q_sqrt_com_2:Q_sqrt2;
+            end
+
+          6'b001000:
+            begin
+              Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-47:C_MANT_FP64-48];
+              Q_sqrt0={{(C_MANT_FP64-19){1'b0}},Qcnt_three_8[25:2]};
+              Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+              Sqrt_DI[1]=Mant_D_sqrt_Norm[C_MANT_FP64-49:C_MANT_FP64-50];
+              Q_sqrt1={{(C_MANT_FP64-20){1'b0}},Qcnt_three_8[25:1]};
+              Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1;
+              Sqrt_DI[2]=Mant_D_sqrt_Norm[C_MANT_FP64-51:C_MANT_FP64-52];
+              Q_sqrt2={{(C_MANT_FP64-21){1'b0}},Qcnt_three_8[25:0]};
+              Sqrt_Q2=Sqrt_quotinent_S[2]?Q_sqrt_com_2:Q_sqrt2;
+            end
+
+          6'b001001:
+            begin
+              Sqrt_DI[0]=2'b00;
+              Q_sqrt0={{(C_MANT_FP64-22){1'b0}},Qcnt_three_9[28:2]};
+              Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+              Sqrt_DI[1]=2'b00;
+              Q_sqrt1={{(C_MANT_FP64-23){1'b0}},Qcnt_three_9[28:1]};
+              Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1;
+              Sqrt_DI[2]=2'b00;
+              Q_sqrt2={{(C_MANT_FP64-24){1'b0}},Qcnt_three_9[28:0]};
+              Sqrt_Q2=Sqrt_quotinent_S[2]?Q_sqrt_com_2:Q_sqrt2;
+            end
+
+          6'b001010:
+            begin
+              Sqrt_DI[0]=2'b00;
+              Q_sqrt0={{(C_MANT_FP64-25){1'b0}},Qcnt_three_10[31:2]};
+              Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+              Sqrt_DI[1]=2'b00;
+              Q_sqrt1={{(C_MANT_FP64-26){1'b0}},Qcnt_three_10[31:1]};
+              Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1;
+              Sqrt_DI[2]=2'b00;
+              Q_sqrt2={{(C_MANT_FP64-27){1'b0}},Qcnt_three_10[31:0]};
+              Sqrt_Q2=Sqrt_quotinent_S[2]?Q_sqrt_com_2:Q_sqrt2;
+            end
+
+          6'b001011:
+            begin
+              Sqrt_DI[0]=2'b00;
+              Q_sqrt0={{(C_MANT_FP64-28){1'b0}},Qcnt_three_11[34:2]};
+              Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+              Sqrt_DI[1]=2'b00;
+              Q_sqrt1={{(C_MANT_FP64-29){1'b0}},Qcnt_three_11[34:1]};
+              Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1;
+              Sqrt_DI[2]=2'b00;
+              Q_sqrt2={{(C_MANT_FP64-30){1'b0}},Qcnt_three_11[34:0]};
+              Sqrt_Q2=Sqrt_quotinent_S[2]?Q_sqrt_com_2:Q_sqrt2;
+            end
+
+          6'b001100:
+            begin
+              Sqrt_DI[0]=2'b00;
+              Q_sqrt0={{(C_MANT_FP64-31){1'b0}},Qcnt_three_12[37:2]};
+              Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+              Sqrt_DI[1]=2'b00;
+              Q_sqrt1={{(C_MANT_FP64-32){1'b0}},Qcnt_three_12[37:1]};
+              Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1;
+              Sqrt_DI[2]=2'b00;
+              Q_sqrt2={{(C_MANT_FP64-33){1'b0}},Qcnt_three_12[37:0]};
+              Sqrt_Q2=Sqrt_quotinent_S[2]?Q_sqrt_com_2:Q_sqrt2;
+            end
+
+          6'b001101:
+            begin
+              Sqrt_DI[0]=2'b00;
+              Q_sqrt0={{(C_MANT_FP64-34){1'b0}},Qcnt_three_13[40:2]};
+              Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+              Sqrt_DI[1]=2'b00;
+              Q_sqrt1={{(C_MANT_FP64-35){1'b0}},Qcnt_three_13[40:1]};
+              Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1;
+              Sqrt_DI[2]=2'b00;
+              Q_sqrt2={{(C_MANT_FP64-36){1'b0}},Qcnt_three_13[40:0]};
+              Sqrt_Q2=Sqrt_quotinent_S[2]?Q_sqrt_com_2:Q_sqrt2;
+            end
+
+          6'b001110:
+            begin
+              Sqrt_DI[0]=2'b00;
+              Q_sqrt0={{(C_MANT_FP64-37){1'b0}},Qcnt_three_14[43:2]};
+              Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+              Sqrt_DI[1]=2'b00;
+              Q_sqrt1={{(C_MANT_FP64-38){1'b0}},Qcnt_three_14[43:1]};
+              Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1;
+              Sqrt_DI[2]=2'b00;
+              Q_sqrt2={{(C_MANT_FP64-39){1'b0}},Qcnt_three_14[43:0]};
+              Sqrt_Q2=Sqrt_quotinent_S[2]?Q_sqrt_com_2:Q_sqrt2;
+            end
+
+          6'b001111:
+            begin
+              Sqrt_DI[0]=2'b00;
+              Q_sqrt0={{(C_MANT_FP64-40){1'b0}},Qcnt_three_15[46:2]};
+              Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+              Sqrt_DI[1]=2'b00;
+              Q_sqrt1={{(C_MANT_FP64-41){1'b0}},Qcnt_three_15[46:1]};
+              Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1;
+              Sqrt_DI[2]=2'b00;
+              Q_sqrt2={{(C_MANT_FP64-42){1'b0}},Qcnt_three_15[46:0]};
+              Sqrt_Q2=Sqrt_quotinent_S[2]?Q_sqrt_com_2:Q_sqrt2;
+            end
+
+          6'b010000:
+            begin
+              Sqrt_DI[0]=2'b00;
+              Q_sqrt0={{(C_MANT_FP64-43){1'b0}},Qcnt_three_16[49:2]};
+              Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+              Sqrt_DI[1]=2'b00;
+              Q_sqrt1={{(C_MANT_FP64-44){1'b0}},Qcnt_three_16[49:1]};
+              Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1;
+              Sqrt_DI[2]=2'b00;
+              Q_sqrt2={{(C_MANT_FP64-45){1'b0}},Qcnt_three_16[49:0]};
+              Sqrt_Q2=Sqrt_quotinent_S[2]?Q_sqrt_com_2:Q_sqrt2;
+            end
+
+          6'b010001:
+            begin
+              Sqrt_DI[0]=2'b00;
+              Q_sqrt0={{(C_MANT_FP64-46){1'b0}},Qcnt_three_17[52:2]};
+              Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+              Sqrt_DI[1]=2'b00;
+              Q_sqrt1={{(C_MANT_FP64-47){1'b0}},Qcnt_three_17[52:1]};
+              Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1;
+              Sqrt_DI[2]=2'b00;
+              Q_sqrt2={{(C_MANT_FP64-48){1'b0}},Qcnt_three_17[52:0]};
+              Sqrt_Q2=Sqrt_quotinent_S[2]?Q_sqrt_com_2:Q_sqrt2;
+            end
+
+          6'b010010:
+            begin
+              Sqrt_DI[0]=2'b00;
+              Q_sqrt0={{(C_MANT_FP64-49){1'b0}},Qcnt_three_18[55:2]};
+              Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+              Sqrt_DI[1]=2'b00;
+              Q_sqrt1={{(C_MANT_FP64-50){1'b0}},Qcnt_three_18[55:1]};
+              Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1;
+              Sqrt_DI[2]=2'b00;
+              Q_sqrt2={{(C_MANT_FP64-51){1'b0}},Qcnt_three_18[55:0]};
+              Sqrt_Q2=Sqrt_quotinent_S[2]?Q_sqrt_com_2:Q_sqrt2;
+            end
+
+          default :
+              begin
+              Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64+1:C_MANT_FP64];
+              Q_sqrt0={{(C_MANT_FP64+5){1'b0}},Qcnt_three_0[2]};
+              Sqrt_Q0=Q_sqrt_com_0;
+              Sqrt_DI[1]=Mant_D_sqrt_Norm[C_MANT_FP64-1:C_MANT_FP64-2];
+              Q_sqrt1={{(C_MANT_FP64+4){1'b0}},Qcnt_three_0[2:1]};
+              Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1;
+              Sqrt_DI[2]=Mant_D_sqrt_Norm[C_MANT_FP64-3:C_MANT_FP64-4];
+              Q_sqrt2={{(C_MANT_FP64+3){1'b0}},Qcnt_three_0[2:0]};
+              Sqrt_Q2=Sqrt_quotinent_S[2]?Q_sqrt_com_2:Q_sqrt2;
+            end
+        endcase
+
+      end
+   /////////////////////////////////////////////////////////////////////////////
+   // Operands for square root when Iteration_unit_num_S = 2'b10, end       //
+   /////////////////////////////////////////////////////////////////////////////
+
+
+    2'b11:
+      begin
+   /////////////////////////////////////////////////////////////////////////////
+   // Operands for square root when Iteration_unit_num_S = 2'b11, start       //
+   /////////////////////////////////////////////////////////////////////////////
+
+              case(Crtl_cnt_S)
+
+                6'b000000:
+                  begin
+                    Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64+1:C_MANT_FP64];
+                    Q_sqrt0={{(C_MANT_FP64+5){1'b0}},Qcnt_four_0[3]};
+                    Sqrt_Q0=Q_sqrt_com_0;
+                    Sqrt_DI[1]=Mant_D_sqrt_Norm[C_MANT_FP64-1:C_MANT_FP64-2];
+                    Q_sqrt1={{(C_MANT_FP64+4){1'b0}},Qcnt_four_0[3:2]};
+                    Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1;
+                    Sqrt_DI[2]=Mant_D_sqrt_Norm[C_MANT_FP64-3:C_MANT_FP64-4];
+                    Q_sqrt2={{(C_MANT_FP64+3){1'b0}},Qcnt_four_0[3:1]};
+                    Sqrt_Q2=Sqrt_quotinent_S[2]?Q_sqrt_com_2:Q_sqrt2;
+                    Sqrt_DI[3]=Mant_D_sqrt_Norm[C_MANT_FP64-5:C_MANT_FP64-6];
+                    Q_sqrt3={{(C_MANT_FP64+2){1'b0}},Qcnt_four_0[3:0]};
+                    Sqrt_Q3=Sqrt_quotinent_S[1]?Q_sqrt_com_3:Q_sqrt3;
+                  end
+
+                6'b000001:
+                  begin
+                    Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-7:C_MANT_FP64-8];
+                    Q_sqrt0={{(C_MANT_FP64+1){1'b0}},Qcnt_four_1[6:3]};
+                    Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+                    Sqrt_DI[1]=Mant_D_sqrt_Norm[C_MANT_FP64-9:C_MANT_FP64-10];
+                    Q_sqrt1={{(C_MANT_FP64){1'b0}},Qcnt_four_1[6:2]};
+                    Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1;
+                    Sqrt_DI[2]=Mant_D_sqrt_Norm[C_MANT_FP64-11:C_MANT_FP64-12];
+                    Q_sqrt2={{(C_MANT_FP64-1){1'b0}},Qcnt_four_1[6:1]};
+                    Sqrt_Q2=Sqrt_quotinent_S[2]?Q_sqrt_com_2:Q_sqrt2;
+                    Sqrt_DI[3]=Mant_D_sqrt_Norm[C_MANT_FP64-13:C_MANT_FP64-14];
+                    Q_sqrt3={{(C_MANT_FP64-2){1'b0}},Qcnt_four_1[6:0]};
+                    Sqrt_Q3=Sqrt_quotinent_S[1]?Q_sqrt_com_3:Q_sqrt3;
+                  end
+
+                6'b000010:
+                  begin
+                    Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-15:C_MANT_FP64-16];
+                    Q_sqrt0={{(C_MANT_FP64-3){1'b0}},Qcnt_four_2[10:3]};
+                    Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+                    Sqrt_DI[1]=Mant_D_sqrt_Norm[C_MANT_FP64-17:C_MANT_FP64-18];
+                    Q_sqrt1={{(C_MANT_FP64-4){1'b0}},Qcnt_four_2[10:2]};
+                    Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1;
+                    Sqrt_DI[2]=Mant_D_sqrt_Norm[C_MANT_FP64-19:C_MANT_FP64-20];
+                    Q_sqrt2={{(C_MANT_FP64-5){1'b0}},Qcnt_four_2[10:1]};
+                    Sqrt_Q2=Sqrt_quotinent_S[2]?Q_sqrt_com_2:Q_sqrt2;
+                    Sqrt_DI[3]=Mant_D_sqrt_Norm[C_MANT_FP64-21:C_MANT_FP64-22];
+                    Q_sqrt3={{(C_MANT_FP64-6){1'b0}},Qcnt_four_2[10:0]};
+                    Sqrt_Q3=Sqrt_quotinent_S[1]?Q_sqrt_com_3:Q_sqrt3;
+                  end
+
+                6'b000011:
+                  begin
+                    Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-23:C_MANT_FP64-24];
+                    Q_sqrt0={{(C_MANT_FP64-7){1'b0}},Qcnt_four_3[14:3]};
+                    Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+                    Sqrt_DI[1]=Mant_D_sqrt_Norm[C_MANT_FP64-25:C_MANT_FP64-26];
+                    Q_sqrt1={{(C_MANT_FP64-8){1'b0}},Qcnt_four_3[14:2]};
+                    Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1;
+                    Sqrt_DI[2]=Mant_D_sqrt_Norm[C_MANT_FP64-27:C_MANT_FP64-28];
+                    Q_sqrt2={{(C_MANT_FP64-9){1'b0}},Qcnt_four_3[14:1]};
+                    Sqrt_Q2=Sqrt_quotinent_S[2]?Q_sqrt_com_2:Q_sqrt2;
+                    Sqrt_DI[3]=Mant_D_sqrt_Norm[C_MANT_FP64-29:C_MANT_FP64-30];
+                    Q_sqrt3={{(C_MANT_FP64-10){1'b0}},Qcnt_four_3[14:0]};
+                    Sqrt_Q3=Sqrt_quotinent_S[1]?Q_sqrt_com_3:Q_sqrt3;
+                  end
+
+                6'b000100:
+                  begin
+                    Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-31:C_MANT_FP64-32];
+                    Q_sqrt0={{(C_MANT_FP64-11){1'b0}},Qcnt_four_4[18:3]};
+                    Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+                    Sqrt_DI[1]=Mant_D_sqrt_Norm[C_MANT_FP64-33:C_MANT_FP64-34];
+                    Q_sqrt1={{(C_MANT_FP64-12){1'b0}},Qcnt_four_4[18:2]};
+                    Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1;
+                    Sqrt_DI[2]=Mant_D_sqrt_Norm[C_MANT_FP64-35:C_MANT_FP64-36];
+                    Q_sqrt2={{(C_MANT_FP64-13){1'b0}},Qcnt_four_4[18:1]};
+                    Sqrt_Q2=Sqrt_quotinent_S[2]?Q_sqrt_com_2:Q_sqrt2;
+                    Sqrt_DI[3]=Mant_D_sqrt_Norm[C_MANT_FP64-37:C_MANT_FP64-38];
+                    Q_sqrt3={{(C_MANT_FP64-14){1'b0}},Qcnt_four_4[18:0]};
+                    Sqrt_Q3=Sqrt_quotinent_S[1]?Q_sqrt_com_3:Q_sqrt3;
+                  end
+
+                6'b000101:
+                  begin
+                    Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-39:C_MANT_FP64-40];
+                    Q_sqrt0={{(C_MANT_FP64-15){1'b0}},Qcnt_four_5[22:3]};
+                    Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+                    Sqrt_DI[1]=Mant_D_sqrt_Norm[C_MANT_FP64-41:C_MANT_FP64-42];
+                    Q_sqrt1={{(C_MANT_FP64-16){1'b0}},Qcnt_four_5[22:2]};
+                    Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1;
+                    Sqrt_DI[2]=Mant_D_sqrt_Norm[C_MANT_FP64-43:C_MANT_FP64-44];
+                    Q_sqrt2={{(C_MANT_FP64-17){1'b0}},Qcnt_four_5[22:1]};
+                    Sqrt_Q2=Sqrt_quotinent_S[2]?Q_sqrt_com_2:Q_sqrt2;
+                    Sqrt_DI[3]=Mant_D_sqrt_Norm[C_MANT_FP64-45:C_MANT_FP64-46];
+                    Q_sqrt3={{(C_MANT_FP64-18){1'b0}},Qcnt_four_5[22:0]};
+                    Sqrt_Q3=Sqrt_quotinent_S[1]?Q_sqrt_com_3:Q_sqrt3;
+                  end
+
+                6'b000110:
+                  begin
+                    Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-47:C_MANT_FP64-48];
+                    Q_sqrt0={{(C_MANT_FP64-19){1'b0}},Qcnt_four_6[26:3]};
+                    Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+                    Sqrt_DI[1]=Mant_D_sqrt_Norm[C_MANT_FP64-49:C_MANT_FP64-50];
+                    Q_sqrt1={{(C_MANT_FP64-20){1'b0}},Qcnt_four_6[26:2]};
+                    Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1;
+                    Sqrt_DI[2]=Mant_D_sqrt_Norm[C_MANT_FP64-51:C_MANT_FP64-52];
+                    Q_sqrt2={{(C_MANT_FP64-21){1'b0}},Qcnt_four_6[26:1]};
+                    Sqrt_Q2=Sqrt_quotinent_S[2]?Q_sqrt_com_2:Q_sqrt2;
+                    Sqrt_DI[3]=2'b00;
+                    Q_sqrt3={{(C_MANT_FP64-22){1'b0}},Qcnt_four_6[26:0]};
+                    Sqrt_Q3=Sqrt_quotinent_S[1]?Q_sqrt_com_3:Q_sqrt3;
+                  end
+
+                6'b000111:
+                  begin
+                    Sqrt_DI[0]=2'b00;
+                    Q_sqrt0={{(C_MANT_FP64-23){1'b0}},Qcnt_four_7[30:3]};
+                    Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+                    Sqrt_DI[1]=2'b00;
+                    Q_sqrt1={{(C_MANT_FP64-24){1'b0}},Qcnt_four_7[30:2]};
+                    Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1;
+                    Sqrt_DI[2]=2'b00;
+                    Q_sqrt2={{(C_MANT_FP64-25){1'b0}},Qcnt_four_7[30:1]};
+                    Sqrt_Q2=Sqrt_quotinent_S[2]?Q_sqrt_com_2:Q_sqrt2;
+                    Sqrt_DI[3]=2'b00;
+                    Q_sqrt3={{(C_MANT_FP64-26){1'b0}},Qcnt_four_7[30:0]};
+                    Sqrt_Q3=Sqrt_quotinent_S[1]?Q_sqrt_com_3:Q_sqrt3;
+                  end
+
+                6'b001000:
+                  begin
+                    Sqrt_DI[0]=2'b00;
+                    Q_sqrt0={{(C_MANT_FP64-27){1'b0}},Qcnt_four_8[34:3]};
+                    Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+                    Sqrt_DI[1]=2'b00;
+                    Q_sqrt1={{(C_MANT_FP64-28){1'b0}},Qcnt_four_8[34:2]};
+                    Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1;
+                    Sqrt_DI[2]=2'b00;
+                    Q_sqrt2={{(C_MANT_FP64-29){1'b0}},Qcnt_four_8[34:1]};
+                    Sqrt_Q2=Sqrt_quotinent_S[2]?Q_sqrt_com_2:Q_sqrt2;
+                    Sqrt_DI[3]=2'b00;
+                    Q_sqrt3={{(C_MANT_FP64-30){1'b0}},Qcnt_four_8[34:0]};
+                    Sqrt_Q3=Sqrt_quotinent_S[1]?Q_sqrt_com_3:Q_sqrt3;
+                  end
+
+                6'b001001:
+                  begin
+                    Sqrt_DI[0]=2'b00;
+                    Q_sqrt0={{(C_MANT_FP64-31){1'b0}},Qcnt_four_9[38:3]};
+                    Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+                    Sqrt_DI[1]=2'b00;
+                    Q_sqrt1={{(C_MANT_FP64-32){1'b0}},Qcnt_four_9[38:2]};
+                    Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1;
+                    Sqrt_DI[2]=2'b00;
+                    Q_sqrt2={{(C_MANT_FP64-33){1'b0}},Qcnt_four_9[38:1]};
+                    Sqrt_Q2=Sqrt_quotinent_S[2]?Q_sqrt_com_2:Q_sqrt2;
+                    Sqrt_DI[3]=2'b00;
+                    Q_sqrt3={{(C_MANT_FP64-34){1'b0}},Qcnt_four_9[38:0]};
+                    Sqrt_Q3=Sqrt_quotinent_S[1]?Q_sqrt_com_3:Q_sqrt3;
+                  end
+
+                6'b001010:
+                  begin
+                    Sqrt_DI[0]=2'b00;
+                    Q_sqrt0={{(C_MANT_FP64-35){1'b0}},Qcnt_four_10[42:3]};
+                    Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+                    Sqrt_DI[1]=2'b00;
+                    Q_sqrt1={{(C_MANT_FP64-36){1'b0}},Qcnt_four_10[42:2]};
+                    Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1;
+                    Sqrt_DI[2]=2'b00;
+                    Q_sqrt2={{(C_MANT_FP64-37){1'b0}},Qcnt_four_10[42:1]};
+                    Sqrt_Q2=Sqrt_quotinent_S[2]?Q_sqrt_com_2:Q_sqrt2;
+                    Sqrt_DI[3]=2'b00;
+                    Q_sqrt3={{(C_MANT_FP64-38){1'b0}},Qcnt_four_10[42:0]};
+                    Sqrt_Q3=Sqrt_quotinent_S[1]?Q_sqrt_com_3:Q_sqrt3;
+                  end
+
+                6'b001011:
+                  begin
+                    Sqrt_DI[0]=2'b00;
+                    Q_sqrt0={{(C_MANT_FP64-39){1'b0}},Qcnt_four_11[46:3]};
+                    Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+                    Sqrt_DI[1]=2'b00;
+                    Q_sqrt1={{(C_MANT_FP64-40){1'b0}},Qcnt_four_11[46:2]};
+                    Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1;
+                    Sqrt_DI[2]=2'b00;
+                    Q_sqrt2={{(C_MANT_FP64-41){1'b0}},Qcnt_four_11[46:1]};
+                    Sqrt_Q2=Sqrt_quotinent_S[2]?Q_sqrt_com_2:Q_sqrt2;
+                    Sqrt_DI[3]=2'b00;
+                    Q_sqrt3={{(C_MANT_FP64-42){1'b0}},Qcnt_four_11[46:0]};
+                    Sqrt_Q3=Sqrt_quotinent_S[1]?Q_sqrt_com_3:Q_sqrt3;
+                  end
+
+                6'b001100:
+                  begin
+                    Sqrt_DI[0]=2'b00;
+                    Q_sqrt0={{(C_MANT_FP64-43){1'b0}},Qcnt_four_12[50:3]};
+                    Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+                    Sqrt_DI[1]=2'b00;
+                    Q_sqrt1={{(C_MANT_FP64-44){1'b0}},Qcnt_four_12[50:2]};
+                    Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1;
+                    Sqrt_DI[2]=2'b00;
+                    Q_sqrt2={{(C_MANT_FP64-45){1'b0}},Qcnt_four_12[50:1]};
+                    Sqrt_Q2=Sqrt_quotinent_S[2]?Q_sqrt_com_2:Q_sqrt2;
+                    Sqrt_DI[3]=2'b00;
+                    Q_sqrt3={{(C_MANT_FP64-46){1'b0}},Qcnt_four_12[50:0]};
+                    Sqrt_Q3=Sqrt_quotinent_S[1]?Q_sqrt_com_3:Q_sqrt3;
+                  end
+
+                6'b001101:
+                  begin
+                    Sqrt_DI[0]=2'b00;
+                    Q_sqrt0={{(C_MANT_FP64-47){1'b0}},Qcnt_four_13[54:3]};
+                    Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0;
+                    Sqrt_DI[1]=2'b00;
+                    Q_sqrt1={{(C_MANT_FP64-48){1'b0}},Qcnt_four_13[54:2]};
+                    Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1;
+                    Sqrt_DI[2]=2'b00;
+                    Q_sqrt2={{(C_MANT_FP64-49){1'b0}},Qcnt_four_13[54:1]};
+                    Sqrt_Q2=Sqrt_quotinent_S[2]?Q_sqrt_com_2:Q_sqrt2;
+                    Sqrt_DI[3]=2'b00;
+                    Q_sqrt3={{(C_MANT_FP64-50){1'b0}},Qcnt_four_13[54:0]};
+                    Sqrt_Q3=Sqrt_quotinent_S[1]?Q_sqrt_com_3:Q_sqrt3;
+                  end
+
+                default:
+                  begin
+                    Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64+1:C_MANT_FP64];
+                    Q_sqrt0={{(C_MANT_FP64+5){1'b0}},Qcnt_four_0[3]};
+                    Sqrt_Q0=Q_sqrt_com_0;
+                    Sqrt_DI[1]=Mant_D_sqrt_Norm[C_MANT_FP64-1:C_MANT_FP64-2];
+                    Q_sqrt1={{(C_MANT_FP64+4){1'b0}},Qcnt_four_0[3:2]};
+                    Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1;
+                    Sqrt_DI[2]=Mant_D_sqrt_Norm[C_MANT_FP64-3:C_MANT_FP64-4];
+                    Q_sqrt2={{(C_MANT_FP64+3){1'b0}},Qcnt_four_0[3:1]};
+                    Sqrt_Q2=Sqrt_quotinent_S[2]?Q_sqrt_com_2:Q_sqrt2;
+                    Sqrt_DI[3]=Mant_D_sqrt_Norm[C_MANT_FP64-5:C_MANT_FP64-6];
+                    Q_sqrt3={{(C_MANT_FP64+2){1'b0}},Qcnt_four_0[3:0]};
+                    Sqrt_Q3=Sqrt_quotinent_S[1]?Q_sqrt_com_3:Q_sqrt3;
+                  end
+              endcase
+            end
+      endcase
+   /////////////////////////////////////////////////////////////////////////////
+   // Operands for square root when Iteration_unit_num_S = 2'b11, end         //
+   /////////////////////////////////////////////////////////////////////////////
+ end
+
+
+
+  assign Sqrt_R0= ((Sqrt_start_dly_S)?'0:{Partial_remainder_DP[C_MANT_FP64+5:0]});
+  assign Sqrt_R1= {Iteration_cell_sum_AMASK_D[0][C_MANT_FP64+5],Iteration_cell_sum_AMASK_D[0][C_MANT_FP64+2:0],Sqrt_DO[0]} ;
+  assign Sqrt_R2= {Iteration_cell_sum_AMASK_D[1][C_MANT_FP64+5],Iteration_cell_sum_AMASK_D[1][C_MANT_FP64+2:0],Sqrt_DO[1]};
+  assign Sqrt_R3= {Iteration_cell_sum_AMASK_D[2][C_MANT_FP64+5],Iteration_cell_sum_AMASK_D[2][C_MANT_FP64+2:0],Sqrt_DO[2]};
+  assign Sqrt_R4= {Iteration_cell_sum_AMASK_D[3][C_MANT_FP64+5],Iteration_cell_sum_AMASK_D[3][C_MANT_FP64+2:0],Sqrt_DO[3]};
+
+  logic [C_MANT_FP64+5:0]                               Denominator_se_format_DB;  //
+
+  assign Denominator_se_format_DB={Denominator_se_DB[C_MANT_FP64+1:C_MANT_FP64-C_MANT_FP16ALT],{FP16ALT_SO?FP16ALT_SO:Denominator_se_DB[C_MANT_FP64-C_MANT_FP16ALT-1]},
+                                                         Denominator_se_DB[C_MANT_FP64-C_MANT_FP16ALT-2:C_MANT_FP64-C_MANT_FP16],{FP16_SO?FP16_SO:Denominator_se_DB[C_MANT_FP64-C_MANT_FP16-1]},
+                                                         Denominator_se_DB[C_MANT_FP64-C_MANT_FP16-2:C_MANT_FP64-C_MANT_FP32],{FP32_SO?FP32_SO:Denominator_se_DB[C_MANT_FP64-C_MANT_FP32-1]},
+                                                         Denominator_se_DB[C_MANT_FP64-C_MANT_FP32-2:C_MANT_FP64-C_MANT_FP64],FP64_SO,3'b0} ;
+  //                   for           iteration cell_U0
+  logic [C_MANT_FP64+5:0]                           First_iteration_cell_div_a_D,First_iteration_cell_div_b_D;
+  logic                                             Sel_b_for_first_S;
+
+
+  assign First_iteration_cell_div_a_D=(Div_start_dly_S)?{Numerator_se_D[C_MANT_FP64+1:C_MANT_FP64-C_MANT_FP16ALT],{FP16ALT_SO?FP16ALT_SO:Numerator_se_D[C_MANT_FP64-C_MANT_FP16ALT-1]},
+                                                         Numerator_se_D[C_MANT_FP64-C_MANT_FP16ALT-2:C_MANT_FP64-C_MANT_FP16],{FP16_SO?FP16_SO:Numerator_se_D[C_MANT_FP64-C_MANT_FP16-1]},
+                                                         Numerator_se_D[C_MANT_FP64-C_MANT_FP16-2:C_MANT_FP64-C_MANT_FP32],{FP32_SO?FP32_SO:Numerator_se_D[C_MANT_FP64-C_MANT_FP32-1]},
+                                                         Numerator_se_D[C_MANT_FP64-C_MANT_FP32-2:C_MANT_FP64-C_MANT_FP64],FP64_SO,3'b0}
+                                                        :{Partial_remainder_DP[C_MANT_FP64+4:C_MANT_FP64-C_MANT_FP16ALT+3],{FP16ALT_SO?Quotient_DP[0]:Partial_remainder_DP[C_MANT_FP64-C_MANT_FP16ALT+2]},
+                                                         Partial_remainder_DP[C_MANT_FP64-C_MANT_FP16ALT+1:C_MANT_FP64-C_MANT_FP16+3],{FP16_SO?Quotient_DP[0]:Partial_remainder_DP[C_MANT_FP64-C_MANT_FP16+2]},
+                                                         Partial_remainder_DP[C_MANT_FP64-C_MANT_FP16+1:C_MANT_FP64-C_MANT_FP32+3],{FP32_SO?Quotient_DP[0]:Partial_remainder_DP[C_MANT_FP64-C_MANT_FP32+2]},
+                                                         Partial_remainder_DP[C_MANT_FP64-C_MANT_FP32+1:C_MANT_FP64-C_MANT_FP64+3],FP64_SO&&Quotient_DP[0],3'b0};
+  assign Sel_b_for_first_S=(Div_start_dly_S)?1:Quotient_DP[0];
+  assign First_iteration_cell_div_b_D=Sel_b_for_first_S?Denominator_se_format_DB:{Denominator_se_D,4'b0};
+  assign Iteration_cell_a_BMASK_D[0]=Sqrt_enable_SO?Sqrt_R0:{First_iteration_cell_div_a_D};
+  assign Iteration_cell_b_BMASK_D[0]=Sqrt_enable_SO?Sqrt_Q0:{First_iteration_cell_div_b_D};
+
+
+
+  //                   for           iteration cell_U1
+  logic [C_MANT_FP64+5:0]                          Sec_iteration_cell_div_a_D,Sec_iteration_cell_div_b_D;
+  logic                                            Sel_b_for_sec_S;
+  generate
+    if(|Iteration_unit_num_S)
+      begin
+        assign Sel_b_for_sec_S=~Iteration_cell_sum_AMASK_D[0][C_MANT_FP64+5];
+        assign Sec_iteration_cell_div_a_D={Iteration_cell_sum_AMASK_D[0][C_MANT_FP64+4:C_MANT_FP64-C_MANT_FP16ALT+3],{FP16ALT_SO?Sel_b_for_sec_S:Iteration_cell_sum_AMASK_D[0][C_MANT_FP64-C_MANT_FP16ALT+2]},
+                                           Iteration_cell_sum_AMASK_D[0][C_MANT_FP64-C_MANT_FP16ALT+1:C_MANT_FP64-C_MANT_FP16+3],{FP16_SO?Sel_b_for_sec_S:Iteration_cell_sum_AMASK_D[0][C_MANT_FP64-C_MANT_FP16+2]},
+                                           Iteration_cell_sum_AMASK_D[0][C_MANT_FP64-C_MANT_FP16+1:C_MANT_FP64-C_MANT_FP32+3],{FP32_SO?Sel_b_for_sec_S:Iteration_cell_sum_AMASK_D[0][C_MANT_FP64-C_MANT_FP32+2]},
+                                           Iteration_cell_sum_AMASK_D[0][C_MANT_FP64-C_MANT_FP32+1:C_MANT_FP64-C_MANT_FP64+3],FP64_SO&&Sel_b_for_sec_S,3'b0};
+        assign Sec_iteration_cell_div_b_D=Sel_b_for_sec_S?Denominator_se_format_DB:{Denominator_se_D,4'b0};
+        assign Iteration_cell_a_BMASK_D[1]=Sqrt_enable_SO?Sqrt_R1:{Sec_iteration_cell_div_a_D};
+        assign Iteration_cell_b_BMASK_D[1]=Sqrt_enable_SO?Sqrt_Q1:{Sec_iteration_cell_div_b_D};
+      end
+    endgenerate
+
+  //                   for           iteration cell_U2
+  logic [C_MANT_FP64+5:0]                          Thi_iteration_cell_div_a_D,Thi_iteration_cell_div_b_D;
+  logic                                            Sel_b_for_thi_S;
+  generate
+    if((Iteration_unit_num_S==2'b10) | (Iteration_unit_num_S==2'b11))
+      begin
+        assign Sel_b_for_thi_S=~Iteration_cell_sum_AMASK_D[1][C_MANT_FP64+5];
+        assign Thi_iteration_cell_div_a_D={Iteration_cell_sum_AMASK_D[1][C_MANT_FP64+4:C_MANT_FP64-C_MANT_FP16ALT+3],{FP16ALT_SO?Sel_b_for_thi_S:Iteration_cell_sum_AMASK_D[1][C_MANT_FP64-C_MANT_FP16ALT+2]},
+                                           Iteration_cell_sum_AMASK_D[1][C_MANT_FP64-C_MANT_FP16ALT+1:C_MANT_FP64-C_MANT_FP16+3],{FP16_SO?Sel_b_for_thi_S:Iteration_cell_sum_AMASK_D[1][C_MANT_FP64-C_MANT_FP16+2]},
+                                           Iteration_cell_sum_AMASK_D[1][C_MANT_FP64-C_MANT_FP16+1:C_MANT_FP64-C_MANT_FP32+3],{FP32_SO?Sel_b_for_thi_S:Iteration_cell_sum_AMASK_D[1][C_MANT_FP64-C_MANT_FP32+2]},
+                                           Iteration_cell_sum_AMASK_D[1][C_MANT_FP64-C_MANT_FP32+1:C_MANT_FP64-C_MANT_FP64+3],FP64_SO&&Sel_b_for_thi_S,3'b0};
+        assign Thi_iteration_cell_div_b_D=Sel_b_for_thi_S?Denominator_se_format_DB:{Denominator_se_D,4'b0};
+        assign Iteration_cell_a_BMASK_D[2]=Sqrt_enable_SO?Sqrt_R2:{Thi_iteration_cell_div_a_D};
+        assign Iteration_cell_b_BMASK_D[2]=Sqrt_enable_SO?Sqrt_Q2:{Thi_iteration_cell_div_b_D};
+      end
+  endgenerate
+
+  //                   for           iteration cell_U3
+  logic [C_MANT_FP64+5:0]                          Fou_iteration_cell_div_a_D,Fou_iteration_cell_div_b_D;
+  logic                                            Sel_b_for_fou_S;
+
+  generate
+    if(Iteration_unit_num_S==2'b11)
+      begin
+        assign Sel_b_for_fou_S=~Iteration_cell_sum_AMASK_D[2][C_MANT_FP64+5];
+        assign Fou_iteration_cell_div_a_D={Iteration_cell_sum_AMASK_D[2][C_MANT_FP64+4:C_MANT_FP64-C_MANT_FP16ALT+3],{FP16ALT_SO?Sel_b_for_fou_S:Iteration_cell_sum_AMASK_D[2][C_MANT_FP64-C_MANT_FP16ALT+2]},
+                                           Iteration_cell_sum_AMASK_D[2][C_MANT_FP64-C_MANT_FP16ALT+1:C_MANT_FP64-C_MANT_FP16+3],{FP16_SO?Sel_b_for_fou_S:Iteration_cell_sum_AMASK_D[2][C_MANT_FP64-C_MANT_FP16+2]},
+                                           Iteration_cell_sum_AMASK_D[2][C_MANT_FP64-C_MANT_FP16+1:C_MANT_FP64-C_MANT_FP32+3],{FP32_SO?Sel_b_for_fou_S:Iteration_cell_sum_AMASK_D[2][C_MANT_FP64-C_MANT_FP32+2]},
+                                           Iteration_cell_sum_AMASK_D[2][C_MANT_FP64-C_MANT_FP32+1:C_MANT_FP64-C_MANT_FP64+3],FP64_SO&&Sel_b_for_fou_S,3'b0};
+        assign Fou_iteration_cell_div_b_D=Sel_b_for_fou_S?Denominator_se_format_DB:{Denominator_se_D,4'b0};
+        assign Iteration_cell_a_BMASK_D[3]=Sqrt_enable_SO?Sqrt_R3:{Fou_iteration_cell_div_a_D};
+        assign Iteration_cell_b_BMASK_D[3]=Sqrt_enable_SO?Sqrt_Q3:{Fou_iteration_cell_div_b_D};
+      end
+  endgenerate
+
+   /////////////////////////////////////////////////////////////////////////////
+   // Masking Contrl                                                          //
+   /////////////////////////////////////////////////////////////////////////////
+
+
+  logic [C_MANT_FP64+1+4:0]                          Mask_bits_ctl_S;  //For extension
+
+  assign Mask_bits_ctl_S =58'h3ff_ffff_ffff_ffff;   //It is not needed. The corresponding process is handled the above codes
+
+   /////////////////////////////////////////////////////////////////////////////
+   // Iteration Instances  with masking control                               //
+   /////////////////////////////////////////////////////////////////////////////
+
+
+  logic                                             Div_enable_SI   [3:0];
+  logic                                             Div_start_dly_SI   [3:0];
+  logic                                             Sqrt_enable_SI   [3:0];
+  generate
+    genvar i,j;
+      for (i=0; i <= Iteration_unit_num_S ; i++)
+        begin
+          for (j = 0; j <= C_MANT_FP64+5; j++) begin
+              assign Iteration_cell_a_D[i][j] = Mask_bits_ctl_S[j] && Iteration_cell_a_BMASK_D[i][j];
+              assign Iteration_cell_b_D[i][j] = Mask_bits_ctl_S[j] && Iteration_cell_b_BMASK_D[i][j];
+              assign Iteration_cell_sum_AMASK_D[i][j] = Mask_bits_ctl_S[j] && Iteration_cell_sum_D[i][j];
+          end
+
+          assign  Div_enable_SI[i] = Div_enable_SO;
+          assign  Div_start_dly_SI[i] = Div_start_dly_S;
+          assign  Sqrt_enable_SI[i] = Sqrt_enable_SO;
+          iteration_div_sqrt_mvp #(C_MANT_FP64+6) iteration_div_sqrt
+          (
+          .A_DI                                    (Iteration_cell_a_D[i]            ),
+          .B_DI                                    (Iteration_cell_b_D[i]            ),
+          .Div_enable_SI                           (Div_enable_SI[i]                 ),
+          .Div_start_dly_SI                        (Div_start_dly_SI[i]              ),
+          .Sqrt_enable_SI                          (Sqrt_enable_SI[i]                ),
+          .D_DI                                    (Sqrt_DI[i]                       ),
+          .D_DO                                    (Sqrt_DO[i]                       ),
+          .Sum_DO                                  (Iteration_cell_sum_D[i]          ),
+          .Carry_out_DO                            (Iteration_cell_carry_D[i]        )
+         );
+
+        end
+
+  endgenerate
+
+
+
+  always_comb
+    begin
+      case (Iteration_unit_num_S)
+        2'b00:
+          begin
+            if(Fsm_enable_S)
+               Partial_remainder_DN = Sqrt_enable_SO?Sqrt_R1:Iteration_cell_sum_AMASK_D[0];
+            else
+               Partial_remainder_DN = Partial_remainder_DP;
+          end
+        2'b01:
+          begin
+            if(Fsm_enable_S)
+               Partial_remainder_DN = Sqrt_enable_SO?Sqrt_R2:Iteration_cell_sum_AMASK_D[1];
+            else
+               Partial_remainder_DN = Partial_remainder_DP;
+          end
+        2'b10:
+          begin
+            if(Fsm_enable_S)
+               Partial_remainder_DN = Sqrt_enable_SO?Sqrt_R3:Iteration_cell_sum_AMASK_D[2];
+            else
+               Partial_remainder_DN = Partial_remainder_DP;
+          end
+        2'b11:
+          begin
+            if(Fsm_enable_S)
+               Partial_remainder_DN = Sqrt_enable_SO?Sqrt_R4:Iteration_cell_sum_AMASK_D[3];
+            else
+               Partial_remainder_DN = Partial_remainder_DP;
+          end
+        endcase
+     end
+
+
+
+   always_ff @(posedge Clk_CI, negedge Rst_RBI)   // partial_remainder
+     begin
+        if(~Rst_RBI)
+          begin
+             Partial_remainder_DP <= '0;
+          end
+        else
+          begin
+             Partial_remainder_DP <= Partial_remainder_DN;
+          end
+    end
+
+   logic [C_MANT_FP64+4:0] Quotient_DN;
+
+  always_comb                                                      // Can choosen the different carry-outs based on different operations
+    begin
+      case (Iteration_unit_num_S)
+        2'b00:
+          begin
+            if(Fsm_enable_S)
+               Quotient_DN= Sqrt_enable_SO ? {Quotient_DP[C_MANT_FP64+3:0],Sqrt_quotinent_S[3]} :{Quotient_DP[C_MANT_FP64+3:0],Iteration_cell_carry_D[0]};
+            else
+               Quotient_DN= Quotient_DP;
+          end
+        2'b01:
+          begin
+            if(Fsm_enable_S)
+               Quotient_DN= Sqrt_enable_SO ? {Quotient_DP[C_MANT_FP64+2:0],Sqrt_quotinent_S[3:2]} :{Quotient_DP[C_MANT_FP64+2:0],Iteration_cell_carry_D[0],Iteration_cell_carry_D[1]};
+            else
+               Quotient_DN= Quotient_DP;
+          end
+        2'b10:
+          begin
+            if(Fsm_enable_S)
+               Quotient_DN= Sqrt_enable_SO ? {Quotient_DP[C_MANT_FP64+1:0],Sqrt_quotinent_S[3:1]} : {Quotient_DP[C_MANT_FP64+1:0],Iteration_cell_carry_D[0],Iteration_cell_carry_D[1],Iteration_cell_carry_D[2]};
+            else
+               Quotient_DN= Quotient_DP;
+          end
+        2'b11:
+          begin
+            if(Fsm_enable_S)
+               Quotient_DN= Sqrt_enable_SO ? {Quotient_DP[C_MANT_FP64:0],Sqrt_quotinent_S } : {Quotient_DP[C_MANT_FP64:0],Iteration_cell_carry_D[0],Iteration_cell_carry_D[1],Iteration_cell_carry_D[2],Iteration_cell_carry_D[3]};
+            else
+               Quotient_DN= Quotient_DP;
+          end
+        endcase
+     end
+
+   always_ff @(posedge Clk_CI, negedge Rst_RBI)   // Quotient
+     begin
+        if(~Rst_RBI)
+          begin
+          Quotient_DP <= '0;
+          end
+        else
+          Quotient_DP <= Quotient_DN;
+    end
+
+
+   /////////////////////////////////////////////////////////////////////////////
+   // Precision Control for outputs                                          //
+   /////////////////////////////////////////////////////////////////////////////
+
+
+//////////////////////one iteration unit, start///////////////////////////////////////
+   generate
+     if(Iteration_unit_num_S==2'b00)
+       begin
+        always_comb
+          begin
+            case (Format_sel_S)
+              2'b00:
+                begin
+                  case (Precision_ctl_S)
+                    6'h00:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP32+4:0],{(C_MANT_FP64-C_MANT_FP32){1'b0}}}; //+4
+                      end
+                    6'h17:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP32:0],{(C_MANT_FP64-C_MANT_FP32+4){1'b0}}}; //Precision_ctl_S+1
+                      end
+                    6'h16:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP32-1:0],{(C_MANT_FP64-C_MANT_FP32+4+1){1'b0}}}; //Precision_ctl_S+1
+                      end
+                    6'h15:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP32-2:0],{(C_MANT_FP64-C_MANT_FP32+4+2){1'b0}}}; //Precision_ctl_S+1
+                      end
+                    6'h14:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP32-3:0],{(C_MANT_FP64-C_MANT_FP32+4+3){1'b0}}}; //Precision_ctl_S+1
+                      end
+                    6'h13:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP32-4:0],{(C_MANT_FP64-C_MANT_FP32+4+4){1'b0}}}; //Precision_ctl_S+1
+                      end
+                    6'h12:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP32-5:0],{(C_MANT_FP64-C_MANT_FP32+4+5){1'b0}}}; //Precision_ctl_S+1
+                      end
+                    6'h11:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP32-6:0],{(C_MANT_FP64-C_MANT_FP32+4+6){1'b0}}}; //Precision_ctl_S+1
+                      end
+                    6'h10:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP32-7:0],{(C_MANT_FP64-C_MANT_FP32+4+7){1'b0}}}; //Precision_ctl_S+1
+                      end
+                    6'h0f:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP32-8:0],{(C_MANT_FP64-C_MANT_FP32+4+8){1'b0}}}; //Precision_ctl_S+1
+                      end
+                    6'h0e:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP32-9:0],{(C_MANT_FP64-C_MANT_FP32+4+9){1'b0}}}; //Precision_ctl_S+1
+                      end
+                    6'h0d:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP32-10:0],{(C_MANT_FP64-C_MANT_FP32+4+10){1'b0}}}; //Precision_ctl_S+1
+                      end
+                    6'h0c:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP32-11:0],{(C_MANT_FP64-C_MANT_FP32+4+11){1'b0}}}; //Precision_ctl_S+1
+                      end
+                    6'h0b:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP32-12:0],{(C_MANT_FP64-C_MANT_FP32+4+12){1'b0}}}; //Precision_ctl_S+1
+                      end
+                    6'h0a:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP32-13:0],{(C_MANT_FP64-C_MANT_FP32+4+13){1'b0}}}; //Precision_ctl_S+1
+                      end
+                    6'h09:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP32-14:0],{(C_MANT_FP64-C_MANT_FP32+4+14){1'b0}}}; //Precision_ctl_S+1
+                      end
+                    6'h08:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP32-15:0],{(C_MANT_FP64-C_MANT_FP32+4+15){1'b0}}}; //Precision_ctl_S+1
+                      end
+                    6'h07:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP32-16:0],{(C_MANT_FP64-C_MANT_FP32+4+16){1'b0}}}; //Precision_ctl_S+1
+                      end
+                    default :
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP32+4:0],{(C_MANT_FP64-C_MANT_FP32){1'b0}}}; //+4
+                      end
+                  endcase
+                end
+
+              2'b01:
+                begin
+                  case (Precision_ctl_S)
+                    6'h00:
+                      begin
+                        Mant_result_prenorm_DO = Quotient_DP[C_MANT_FP64+4:0]; //+4
+                      end
+                    6'h34:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64:0],{(4){1'b0}}}; //Precision_ctl_S+1
+                      end
+                    6'h33:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-1:0],{(4+1){1'b0}}}; //Precision_ctl_S+1
+                      end
+                    6'h32:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-2:0],{(4+2){1'b0}}}; //Precision_ctl_S+1
+                      end
+                    6'h31:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-3:0],{(4+3){1'b0}}}; //Precision_ctl_S+1
+                      end
+                    6'h30:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-4:0],{(4+4){1'b0}}}; //Precision_ctl_S+1
+                      end
+                    6'h2f:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-5:0],{(4+5){1'b0}}}; //Precision_ctl_S+1
+                      end
+                    6'h2e:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-6:0],{(4+6){1'b0}}}; //Precision_ctl_S+1
+                      end
+                    6'h2d:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-7:0],{(4+7){1'b0}}}; //Precision_ctl_S+1
+                      end
+                    6'h2c:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-8:0],{(4+8){1'b0}}}; //Precision_ctl_S+1
+                      end
+                    6'h2b:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-9:0],{(4+9){1'b0}}}; //Precision_ctl_S+1
+                      end
+                    6'h2a:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-10:0],{(4+10){1'b0}}}; //Precision_ctl_S+1
+                      end
+                    6'h29:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-11:0],{(4+11){1'b0}}}; //Precision_ctl_S+1
+                      end
+                    6'h28:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-12:0],{(4+12){1'b0}}}; //Precision_ctl_S+1
+                      end
+                    6'h27:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-13:0],{(4+13){1'b0}}}; //Precision_ctl_S+1
+                      end
+                    6'h26:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-14:0],{(4+14){1'b0}}}; //Precision_ctl_S+1
+                      end
+                    6'h25:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-15:0],{(4+15){1'b0}}}; //Precision_ctl_S+1
+                      end
+                    6'h24:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-16:0],{(4+16){1'b0}}}; //Precision_ctl_S+1
+                      end
+                    6'h23:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-17:0],{(4+17){1'b0}}}; //Precision_ctl_S+1
+                      end
+                    6'h22:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-18:0],{(4+18){1'b0}}}; //Precision_ctl_S+1
+                      end
+                    6'h21:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-19:0],{(4+19){1'b0}}}; //Precision_ctl_S+1
+                      end
+                    6'h20:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-20:0],{(4+20){1'b0}}}; //Precision_ctl_S+1
+                      end
+                    6'h1f:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-21:0],{(4+21){1'b0}}}; //Precision_ctl_S+1
+                      end
+                    6'h1e:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-22:0],{(4+22){1'b0}}}; //Precision_ctl_S+1
+                      end
+                    6'h1d:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-23:0],{(4+23){1'b0}}}; //Precision_ctl_S+1
+                      end
+                    6'h1c:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-24:0],{(4+24){1'b0}}}; //Precision_ctl_S+1
+                      end
+                    6'h1b:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-25:0],{(4+25){1'b0}}}; //Precision_ctl_S+1
+                      end
+                    6'h1a:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-26:0],{(4+26){1'b0}}}; //Precision_ctl_S+1
+                      end
+                    6'h19:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-27:0],{(4+27){1'b0}}}; //Precision_ctl_S+1
+                      end
+                    6'h18:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-28:0],{(4+28){1'b0}}}; //Precision_ctl_S+1
+                      end
+                    6'h17:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-29:0],{(4+29){1'b0}}}; //Precision_ctl_S+1
+                      end
+                    6'h16:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-30:0],{(4+30){1'b0}}}; //Precision_ctl_S+1
+                      end
+                    6'h15:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-31:0],{(4+31){1'b0}}}; //Precision_ctl_S+1
+                      end
+                    6'h14:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-32:0],{(4+32){1'b0}}}; //Precision_ctl_S+1
+                      end
+                    6'h13:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-33:0],{(4+33){1'b0}}}; //Precision_ctl_S+1
+                      end
+                    6'h12:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-34:0],{(4+34){1'b0}}}; //Precision_ctl_S+1
+                      end
+                    6'h11:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-35:0],{(4+35){1'b0}}}; //Precision_ctl_S+1
+                      end
+                    6'h10:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-36:0],{(4+36){1'b0}}}; //Precision_ctl_S+1
+                      end
+                    6'h0f:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-37:0],{(4+37){1'b0}}}; //Precision_ctl_S+1
+                      end
+                    6'h0e:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-38:0],{(4+38){1'b0}}}; //Precision_ctl_S+1
+                      end
+                    6'h0d:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-39:0],{(4+39){1'b0}}}; //Precision_ctl_S+1
+                      end
+                    6'h0c:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-40:0],{(4+40){1'b0}}}; //Precision_ctl_S+1
+                      end
+                    6'h0b:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-41:0],{(4+41){1'b0}}}; //Precision_ctl_S+1
+                      end
+                    6'h0a:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-42:0],{(4+42){1'b0}}}; //Precision_ctl_S+1
+                      end
+                    6'h09:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-43:0],{(4+43){1'b0}}}; //Precision_ctl_S+1
+                      end
+                    6'h08:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-44:0],{(4+44){1'b0}}}; //Precision_ctl_S+1
+                      end
+                    6'h07:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-45:0],{(4+45){1'b0}}}; //Precision_ctl_S+1
+                      end
+                    default:
+                      begin
+                        Mant_result_prenorm_DO = Quotient_DP[C_MANT_FP64+4:0]; //+4
+                      end
+                  endcase
+                end
+
+              2'b10:
+                begin
+                  case (Precision_ctl_S)
+                    6'b00:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP16+4:0],{(C_MANT_FP64-C_MANT_FP16){1'b0}}}; //+4
+                      end
+                    6'h0a:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP16:0],{(C_MANT_FP64-C_MANT_FP16+4){1'b0}}}; //Precision_ctl_S+1
+                      end
+                    6'h09:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP16-1:0],{(C_MANT_FP64-C_MANT_FP16+4+1){1'b0}}}; //Precision_ctl_S+1
+                      end
+                    6'h08:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP16-2:0],{(C_MANT_FP64-C_MANT_FP16+4+2){1'b0}}}; //Precision_ctl_S+1
+                      end
+                    6'h07:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP16-3:0],{(C_MANT_FP64-C_MANT_FP16+4+3){1'b0}}}; //Precision_ctl_S+1
+                      end
+                    default :
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP16+4:0],{(C_MANT_FP64-C_MANT_FP16){1'b0}}}; //+4
+                      end
+                  endcase
+                end
+
+              2'b11:
+                begin
+
+                  case (Precision_ctl_S)
+                    6'b00:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP16ALT+4:0],{(C_MANT_FP64-C_MANT_FP16ALT){1'b0}}}; //+4
+                      end
+                    6'h07:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP16ALT:0],{(C_MANT_FP64-C_MANT_FP16ALT+4){1'b0}}}; //Precision_ctl_S+1
+                      end
+                    default :
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP16ALT+4:0],{(C_MANT_FP64-C_MANT_FP16ALT){1'b0}}}; //+4
+                      end
+                  endcase
+                end
+            endcase
+          end
+        end
+      endgenerate
+//////////////////////one iteration unit, end//////////////////////////////////////////
+
+//////////////////////two iteration units, start///////////////////////////////////////
+   generate
+     if(Iteration_unit_num_S==2'b01)
+       begin
+        always_comb
+          begin
+            case (Format_sel_S)
+              2'b00:
+                begin
+                  case (Precision_ctl_S)
+                    6'h00:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP32+4:0],{(C_MANT_FP64-C_MANT_FP32){1'b0}}}; //+4
+                      end
+                    6'h17,6'h16:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP32:0],{(C_MANT_FP64-C_MANT_FP32+4){1'b0}}}; //Precision_ctl_S+1
+                      end
+                    6'h15,6'h14:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP32-2:0],{(C_MANT_FP64-C_MANT_FP32+4+2){1'b0}}}; //Precision_ctl_S+1
+                      end
+                    6'h13,6'h12:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP32-4:0],{(C_MANT_FP64-C_MANT_FP32+4+4){1'b0}}}; //Precision_ctl_S+1
+                      end
+                    6'h11,6'h10:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP32-6:0],{(C_MANT_FP64-C_MANT_FP32+4+6){1'b0}}}; //Precision_ctl_S+1
+                      end
+                    6'h0f,6'h0e:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP32-8:0],{(C_MANT_FP64-C_MANT_FP32+4+8){1'b0}}}; //Precision_ctl_S+1
+                      end
+                    6'h0d,6'h0c:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP32-10:0],{(C_MANT_FP64-C_MANT_FP32+4+10){1'b0}}}; //Precision_ctl_S+1
+                      end
+                    6'h0b,6'h0a:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP32-12:0],{(C_MANT_FP64-C_MANT_FP32+4+12){1'b0}}}; //Precision_ctl_S+1
+                      end
+                    6'h09,6'h08:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP32-14:0],{(C_MANT_FP64-C_MANT_FP32+4+14){1'b0}}}; //Precision_ctl_S+1
+                      end
+                    6'h07,6'h06:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP32-16:0],{(C_MANT_FP64-C_MANT_FP32+4+16){1'b0}}}; //Precision_ctl_S+1
+                      end
+                    default:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP32+4:0],{(C_MANT_FP64-C_MANT_FP32){1'b0}}}; //+4
+                      end
+                  endcase
+                end
+              2'b01:
+                begin
+                  case (Precision_ctl_S)
+                    6'h00:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64+3:0],1'b0}; //+3
+                      end
+                    6'h34:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64+1:1],{(4){1'b0}} }; //Precision_ctl_S+1
+                      end
+                    6'h33,6'h32:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-1:0],{(4+1){1'b0}} }; //Precision_ctl_S+1
+                      end
+                    6'h31,6'h30:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-3:0],{(4+3){1'b0}} }; //Precision_ctl_S+1
+                      end
+                    6'h2f,6'h2e:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-5:0],{(4+5){1'b0}} }; //Precision_ctl_S+1
+                      end
+                    6'h2d,6'h2c:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-7:0],{(4+7){1'b0}} }; //Precision_ctl_S+1
+                      end
+                    6'h2b,6'h2a:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-9:0],{(4+9){1'b0}} }; //Precision_ctl_S+1
+                      end
+                    6'h29,6'h28:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-11:0],{(4+11){1'b0}} }; //Precision_ctl_S+1
+                      end
+                    6'h27,6'h26:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-13:0],{(4+13){1'b0}} }; //Precision_ctl_S+1
+                      end
+                    6'h25,6'h24:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-15:0],{(4+15){1'b0}} }; //Precision_ctl_S+1
+                      end
+                    6'h23,6'h22:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-17:0],{(4+17){1'b0}} }; //Precision_ctl_S+1
+                      end
+                    6'h21,6'h20:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-19:0],{(4+19){1'b0}} }; //Precision_ctl_S+1
+                      end
+                    6'h1f,6'h1e:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-21:0],{(4+21){1'b0}} }; //Precision_ctl_S+1
+                      end
+                    6'h1d,6'h1c:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-23:0],{(4+23){1'b0}} }; //Precision_ctl_S+1
+                      end
+                    6'h1b,6'h1a:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-25:0],{(4+25){1'b0}} }; //Precision_ctl_S+1
+                      end
+                    6'h19,6'h18:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-27:0],{(4+27){1'b0}} }; //Precision_ctl_S+1
+                      end
+                    6'h17,6'h16:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-29:0],{(4+29){1'b0}} }; //Precision_ctl_S+1
+                      end
+                    6'h15,6'h14:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-31:0],{(4+31){1'b0}} }; //Precision_ctl_S+1
+                      end
+                    6'h13,6'h12:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-33:0],{(4+33){1'b0}} }; //Precision_ctl_S+1
+                      end
+                    6'h11,6'h10:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-35:0],{(4+35){1'b0}} }; //Precision_ctl_S+1
+                      end
+                    6'h0f,6'h0e:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-37:0],{(4+37){1'b0}} }; //Precision_ctl_S+1
+                      end
+                    6'h0d,6'h0c:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-39:0],{(4+39){1'b0}} }; //Precision_ctl_S+1
+                      end
+                    6'h0b,6'h0a:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-41:0],{(4+41){1'b0}} }; //Precision_ctl_S+1
+                      end
+                    6'h09,6'h08:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-43:0],{(4+43){1'b0}} }; //Precision_ctl_S+1
+                      end
+                    6'h07:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-45:0],{(4+45){1'b0}} }; //Precision_ctl_S+1
+                      end
+                    default:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64+3:0],1'b0}; //+3
+                      end
+                  endcase
+                end
+
+              2'b10:
+                begin
+                  case (Precision_ctl_S)
+                    6'b00:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP16+3:0],{(C_MANT_FP64-C_MANT_FP16+1){1'b0}} }; //+3
+                      end
+                    6'h0a:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP16+1:1],{(C_MANT_FP64-C_MANT_FP16+4){1'b0}} }; //Precision_ctl_S+1
+                      end
+                    6'h09,6'h08:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP16-1:0],{(C_MANT_FP64-C_MANT_FP16+4+1){1'b0}} }; //Precision_ctl_S+1
+                      end
+                    6'h07:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP16-3:0],{(C_MANT_FP64-C_MANT_FP16+4+3){1'b0}} }; //Precision_ctl_S+1
+                      end
+                    default :
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP16+4:0],{(C_MANT_FP64-C_MANT_FP16){1'b0}} }; //+4
+                      end
+                  endcase
+                end
+
+              2'b11:
+                begin
+
+                  case (Precision_ctl_S)
+                    6'b00:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP16ALT+4:0],{(C_MANT_FP64-C_MANT_FP16ALT){1'b0}} }; //+4
+                      end
+                    6'h07:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP16ALT:0],{(C_MANT_FP64-C_MANT_FP16ALT+4){1'b0}} }; //Precision_ctl_S+1
+                      end
+                    default :
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP16ALT+4:0],{(C_MANT_FP64-C_MANT_FP16ALT){1'b0}} }; //+4
+                      end
+                  endcase
+                end
+            endcase
+          end
+       end
+     endgenerate
+//////////////////////two iteration units, end//////////////////////////////////////////
+
+//////////////////////three iteration units, start///////////////////////////////////////
+   generate
+     if(Iteration_unit_num_S==2'b10)
+       begin
+        always_comb
+          begin
+            case (Format_sel_S)
+              2'b00:
+                begin
+                  case (Precision_ctl_S)
+                    6'h00:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP32+3:0],{(C_MANT_FP64-C_MANT_FP32+1){1'b0}}}; //+3
+                      end
+                    6'h17,6'h16,6'h15:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP32:0],{(C_MANT_FP64-C_MANT_FP32+4){1'b0}}}; //Precision_ctl_S+1
+                      end
+                    6'h14,6'h13,6'h12:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP32-3:0],{(C_MANT_FP64-C_MANT_FP32+4+3){1'b0}}}; //Precision_ctl_S+1
+                      end
+                    6'h11,6'h10,6'h0f:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP32-6:0],{(C_MANT_FP64-C_MANT_FP32+4+6){1'b0}}}; //Precision_ctl_S+1
+                      end
+                    6'h0e,6'h0d,6'h0c:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP32-9:0],{(C_MANT_FP64-C_MANT_FP32+4+9){1'b0}}}; //Precision_ctl_S+1
+                      end
+                    6'h0b,6'h0a,6'h09:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP32-12:0],{(C_MANT_FP64-C_MANT_FP32+4+12){1'b0}}}; //Precision_ctl_S+1
+                      end
+                    6'h08,6'h07,6'h06:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP32-15:0],{(C_MANT_FP64-C_MANT_FP32+4+15){1'b0}}}; //Precision_ctl_S+1
+                      end
+                    default:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP32+3:0],{(C_MANT_FP64-C_MANT_FP32+1){1'b0}}}; //+3
+                      end
+                  endcase
+                end
+
+              2'b01:
+                begin
+                  case (Precision_ctl_S)
+                    6'h00:
+                      begin
+                        Mant_result_prenorm_DO = Quotient_DP[C_MANT_FP64+4:0]; //+4
+                      end
+                    6'h34,6'h33:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64+1:1],{(4){1'b0}} }; //Precision_ctl_S+1
+                      end
+                    6'h32,6'h31,6'h30:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-2:0],{(4+2){1'b0}} }; //Precision_ctl_S+1
+                      end
+                    6'h2f,6'h2e,6'h2d:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-5:0],{(4+5){1'b0}} }; //Precision_ctl_S+1
+                      end
+                    6'h2c,6'h2b,6'h2a:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-8:0],{(4+8){1'b0}} }; //Precision_ctl_S+1
+                      end
+                    6'h29,6'h28,6'h27:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-11:0],{(4+11){1'b0}} }; //Precision_ctl_S+1
+                      end
+                    6'h26,6'h25,6'h24:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-14:0],{(4+14){1'b0}} }; //Precision_ctl_S+1
+                      end
+                    6'h23,6'h22,6'h21:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-17:0],{(4+17){1'b0}} }; //Precision_ctl_S+1
+                      end
+                    6'h20,6'h1f,6'h1e:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-20:0],{(4+20){1'b0}} }; //Precision_ctl_S+1
+                      end
+                    6'h1d,6'h1c,6'h1b:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-23:0],{(4+23){1'b0}} }; //Precision_ctl_S+1
+                      end
+                    6'h1a,6'h19,6'h18:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-26:0],{(4+26){1'b0}} }; //Precision_ctl_S+1
+                      end
+                    6'h17,6'h16,6'h15:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-29:0],{(4+29){1'b0}} }; //Precision_ctl_S+1
+                      end
+                    6'h14,6'h13,6'h12:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-32:0],{(4+32){1'b0}} }; //Precision_ctl_S+1
+                      end
+                    6'h11,6'h10,6'h0f:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-35:0],{(4+35){1'b0}} }; //Precision_ctl_S+1
+                      end
+                    6'h0e,6'h0d,6'h0c:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-38:0],{(4+38){1'b0}} }; //Precision_ctl_S+1
+                      end
+                    6'h0b,6'h0a,6'h09:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-41:0],{(4+41){1'b0}} }; //Precision_ctl_S+1
+                      end
+                    6'h08,6'h07,6'h06:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-44:0],{(4+44){1'b0}} }; //Precision_ctl_S+1
+                      end
+                    default:
+                      begin
+                        Mant_result_prenorm_DO = Quotient_DP[C_MANT_FP64+4:0]; //+4
+                      end
+                  endcase
+                end
+
+              2'b10:
+                begin
+                  case (Precision_ctl_S)
+                    6'b00:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP16+4:0],{(C_MANT_FP64-C_MANT_FP16){1'b0}} }; //+4
+                      end
+                    6'h0a,6'h09:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP16+1:1],{(C_MANT_FP64-C_MANT_FP16+4){1'b0}} }; //Precision_ctl_S+1
+                      end
+                    6'h08,6'h07,6'h06:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP16-2:0],{(C_MANT_FP64-C_MANT_FP16+4+2){1'b0}} }; //Precision_ctl_S+1
+                      end
+                    default :
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP16+4:0],{(C_MANT_FP64-C_MANT_FP16){1'b0}} }; //+4
+                      end
+                  endcase
+                end
+
+              2'b11:
+                begin
+
+                  case (Precision_ctl_S)
+                    6'b00:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP16ALT+4:0],{(C_MANT_FP64-C_MANT_FP16ALT){1'b0}} }; //+4
+                      end
+                    6'h07,6'h06:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP16ALT+1:1],{(C_MANT_FP64-C_MANT_FP16ALT+4){1'b0}} }; //Precision_ctl_S+1
+                      end
+                    default :
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP16ALT+4:0],{(C_MANT_FP64-C_MANT_FP16ALT){1'b0}} }; //+4
+                      end
+                  endcase
+                end
+            endcase
+          end
+        end
+      endgenerate
+//////////////////////three iteration units, end//////////////////////////////////////////
+
+//////////////////////four iteration units, start///////////////////////////////////////
+   generate
+     if(Iteration_unit_num_S==2'b11)
+       begin
+        always_comb
+          begin
+            case (Format_sel_S)
+              2'b00:
+                begin
+                  case (Precision_ctl_S)
+                    6'h00:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP32+4:0],{(C_MANT_FP64-C_MANT_FP32){1'b0}}}; //+4
+                      end
+                    6'h17,6'h16,6'h15,6'h14:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP32:0],{(C_MANT_FP64-C_MANT_FP32+4){1'b0}}}; //Precision_ctl_S+1
+                      end
+                    6'h13,6'h12,6'h11,6'h10:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP32-4:0],{(C_MANT_FP64-C_MANT_FP32+4+4){1'b0}}}; //Precision_ctl_S+1
+                      end
+                    6'h0f,6'h0e,6'h0d,6'h0c:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP32-8:0],{(C_MANT_FP64-C_MANT_FP32+4+8){1'b0}}}; //Precision_ctl_S+1
+                      end
+                    6'h0b,6'h0a,6'h09,6'h08:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP32-12:0],{(C_MANT_FP64-C_MANT_FP32+4+12){1'b0}}}; //Precision_ctl_S+1
+                      end
+                    6'h07,6'h06:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP32-16:0],{(C_MANT_FP64-C_MANT_FP32+4+16){1'b0}}}; //Precision_ctl_S+1
+                      end
+                    default:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP32+4:0],{(C_MANT_FP64-C_MANT_FP32){1'b0}}}; //+4
+                      end
+                  endcase
+                end
+
+              2'b01:
+                begin
+                  case (Precision_ctl_S)
+                    6'h00:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64+3:0],{(1){1'b0}}}; //+3
+                      end
+                    6'h34:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64+3:0],{(1){1'b0}} }; //Precision_ctl_S+1
+                      end
+                    6'h33,6'h32,6'h31,6'h30:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-1:0],{(5){1'b0}} }; //Precision_ctl_S+1
+                      end
+                    6'h2f,6'h2e,6'h2d,6'h2c:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-5:0],{(9){1'b0}} }; //Precision_ctl_S+1
+                      end
+                    6'h2b,6'h2a,6'h29,6'h28:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-9:0],{(13){1'b0}} }; //Precision_ctl_S+1
+                      end
+                    6'h27,6'h26,6'h25,6'h24:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-13:0],{(17){1'b0}} }; //Precision_ctl_S+1
+                      end
+                    6'h23,6'h22,6'h21,6'h20:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-17:0],{(21){1'b0}} }; //Precision_ctl_S+1
+                      end
+                    6'h1f,6'h1e,6'h1d,6'h1c:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-21:0],{(25){1'b0}} }; //Precision_ctl_S+1
+                      end
+                    6'h1b,6'h1a,6'h19,6'h18:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-25:0],{(29){1'b0}} }; //Precision_ctl_S+1
+                      end
+                    6'h17,6'h16,6'h15,6'h14:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-29:0],{(33){1'b0}} }; //Precision_ctl_S+1
+                      end
+                    6'h13,6'h12,6'h11,6'h10:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-33:0],{(37){1'b0}} }; //Precision_ctl_S+1
+                      end
+                    6'h0f,6'h0e,6'h0d,6'h0c:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-37:0],{(41){1'b0}} }; //Precision_ctl_S+1
+                      end
+                    6'h0b,6'h0a,6'h09,6'h08:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-41:0],{(45){1'b0}} }; //Precision_ctl_S+1
+                      end
+                    6'h07,6'h06:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-45:0],{(49){1'b0}} }; //Precision_ctl_S+1
+                      end
+                    default:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64+3:0],{(1){1'b0}}}; //+3
+                      end
+                  endcase
+                end
+
+              2'b10:
+                begin
+                  case (Precision_ctl_S)
+                    6'b00:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP16+5:0],{(C_MANT_FP64-C_MANT_FP16-1){1'b0}} }; //+5
+                      end
+                    6'h0a,6'h09,6'h08:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP16+1:1],{(C_MANT_FP64-C_MANT_FP16+4){1'b0}} }; //Precision_ctl_S+1
+                      end
+                    6'h07,6'h06:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP16+1-4:0],{(C_MANT_FP64-C_MANT_FP16+4+3){1'b0}} }; //Precision_ctl_S+1
+                      end
+                    default :
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP16+5:0],{(C_MANT_FP64-C_MANT_FP16-1){1'b0}} }; //+5
+                      end
+                  endcase
+                end
+
+              2'b11:
+                begin
+
+                  case (Precision_ctl_S)
+                    6'b00:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP16ALT+4:0],{(C_MANT_FP64-C_MANT_FP16ALT){1'b0}} }; //+4
+                      end
+                    6'h07,6'h06:
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP16ALT:0],{(C_MANT_FP64-C_MANT_FP16ALT+4){1'b0}} }; //Precision_ctl_S+1
+                      end
+                    default :
+                      begin
+                        Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP16ALT+4:0],{(C_MANT_FP64-C_MANT_FP16ALT){1'b0}} }; //+4
+                      end
+                  endcase
+                end
+            endcase
+          end
+        end
+      endgenerate
+//////////////////////four iteration units, end///////////////////////////////////////
+
+
+
+
+
+// resultant exponent
+   logic   [C_EXP_FP64+1:0]    Exp_result_prenorm_DN,Exp_result_prenorm_DP;
+
+   logic   [C_EXP_FP64+1:0]                                Exp_add_a_D;
+   logic   [C_EXP_FP64+1:0]                                Exp_add_b_D;
+   logic   [C_EXP_FP64+1:0]                                Exp_add_c_D;
+
+  integer                                                 C_BIAS_AONE, C_HALF_BIAS;
+  always_comb
+    begin  //
+      case (Format_sel_S)
+        2'b00:
+          begin
+            C_BIAS_AONE =C_BIAS_AONE_FP32;
+            C_HALF_BIAS =C_HALF_BIAS_FP32;
+          end
+        2'b01:
+          begin
+            C_BIAS_AONE =C_BIAS_AONE_FP64;
+            C_HALF_BIAS =C_HALF_BIAS_FP64;
+          end
+        2'b10:
+          begin
+            C_BIAS_AONE =C_BIAS_AONE_FP16;
+            C_HALF_BIAS =C_HALF_BIAS_FP16;
+          end
+        2'b11:
+          begin
+            C_BIAS_AONE =C_BIAS_AONE_FP16ALT;
+            C_HALF_BIAS =C_HALF_BIAS_FP16ALT;
+          end
+        endcase
+    end
+
+//For division, exponent=(Exp_a_D-LZ1)-(Exp_b_D-LZ2)+BIAS
+//For square root, exponent=(Exp_a_D-LZ1)/2+(Exp_a_D-LZ1)%2+C_HALF_BIAS
+//For exponent, in preprorces module, (Exp_a_D-LZ1) and (Exp_b_D-LZ2) have been processed with the corresponding process for denormal numbers.
+
+  assign Exp_add_a_D = {Sqrt_start_dly_S?{Exp_num_DI[C_EXP_FP64],Exp_num_DI[C_EXP_FP64],Exp_num_DI[C_EXP_FP64],Exp_num_DI[C_EXP_FP64:1]}:{Exp_num_DI[C_EXP_FP64],Exp_num_DI[C_EXP_FP64],Exp_num_DI}};
+  assign Exp_add_b_D = {Sqrt_start_dly_S?{1'b0,{C_EXP_ZERO_FP64},Exp_num_DI[0]}:{~Exp_den_DI[C_EXP_FP64],~Exp_den_DI[C_EXP_FP64],~Exp_den_DI}};
+  assign Exp_add_c_D = {Div_start_dly_S?{{C_BIAS_AONE}}:{{C_HALF_BIAS}}};
+  assign Exp_result_prenorm_DN  = (Start_dly_S)?{Exp_add_a_D + Exp_add_b_D + Exp_add_c_D}:Exp_result_prenorm_DP;
+
+
+  always_ff @(posedge Clk_CI, negedge Rst_RBI)
+   begin
+      if(~Rst_RBI)
+        begin
+          Exp_result_prenorm_DP <= '0;
+        end
+      else
+        begin
+          Exp_result_prenorm_DP<=  Exp_result_prenorm_DN;
+        end
+   end
+
+  assign Exp_result_prenorm_DO = Exp_result_prenorm_DP;
+
+endmodule
diff --git a/vendor/pulp-platform/fpnew/src/fpu_div_sqrt_mvp/hdl/defs_div_sqrt_mvp.sv b/vendor/pulp-platform/fpnew/src/fpu_div_sqrt_mvp/hdl/defs_div_sqrt_mvp.sv
new file mode 100644
index 0000000000..b3f41fec61
--- /dev/null
+++ b/vendor/pulp-platform/fpnew/src/fpu_div_sqrt_mvp/hdl/defs_div_sqrt_mvp.sv
@@ -0,0 +1,83 @@
+// Copyright 2018 ETH Zurich and University of Bologna.
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the “License”); you may not use this file except in
+// compliance with the License.  You may obtain a copy of the License at
+// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+//
+// This file contains all div_sqrt_top_mvp parameters
+// Authors    : Lei Li  (lile@iis.ee.ethz.ch)
+
+package defs_div_sqrt_mvp;
+
+   // op command
+   localparam C_RM                  = 3;
+   localparam C_RM_NEAREST          = 3'h0;
+   localparam C_RM_TRUNC            = 3'h1;
+   localparam C_RM_PLUSINF          = 3'h2;
+   localparam C_RM_MINUSINF         = 3'h3;
+   localparam C_PC                  = 6; // Precision Control
+   localparam C_FS                  = 2; // Format Selection
+   localparam C_IUNC                = 2; // Iteration Unit Number Control
+   localparam Iteration_unit_num_S  = 2'b10;
+
+   // FP64
+   localparam C_OP_FP64             = 64;
+   localparam C_MANT_FP64           = 52;
+   localparam C_EXP_FP64            = 11;
+   localparam C_BIAS_FP64           = 1023;
+   localparam C_BIAS_AONE_FP64      = 11'h400;
+   localparam C_HALF_BIAS_FP64      = 511;
+   localparam C_EXP_ZERO_FP64       = 11'h000;
+   localparam C_EXP_ONE_FP64        = 13'h001; // Bit width is in agreement with in norm
+   localparam C_EXP_INF_FP64        = 11'h7FF;
+   localparam C_MANT_ZERO_FP64      = 52'h0;
+   localparam C_MANT_NAN_FP64       = 52'h8_0000_0000_0000;
+   localparam C_PZERO_FP64          = 64'h0000_0000_0000_0000;
+   localparam C_MZERO_FP64          = 64'h8000_0000_0000_0000;
+   localparam C_QNAN_FP64           = 64'h7FF8_0000_0000_0000;
+
+   // FP32
+   localparam C_OP_FP32             = 32;
+   localparam C_MANT_FP32           = 23;
+   localparam C_EXP_FP32            = 8;
+   localparam C_BIAS_FP32           = 127;
+   localparam C_BIAS_AONE_FP32      = 8'h80;
+   localparam C_HALF_BIAS_FP32      = 63;
+   localparam C_EXP_ZERO_FP32       = 8'h00;
+   localparam C_EXP_INF_FP32        = 8'hFF;
+   localparam C_MANT_ZERO_FP32      = 23'h0;
+   localparam C_PZERO_FP32          = 32'h0000_0000;
+   localparam C_MZERO_FP32          = 32'h8000_0000;
+   localparam C_QNAN_FP32           = 32'h7FC0_0000;
+
+   // FP16
+   localparam C_OP_FP16             = 16;
+   localparam C_MANT_FP16           = 10;
+   localparam C_EXP_FP16            = 5;
+   localparam C_BIAS_FP16           = 15;
+   localparam C_BIAS_AONE_FP16      = 5'h10;
+   localparam C_HALF_BIAS_FP16      = 7;
+   localparam C_EXP_ZERO_FP16       = 5'h00;
+   localparam C_EXP_INF_FP16        = 5'h1F;
+   localparam C_MANT_ZERO_FP16      = 10'h0;
+   localparam C_PZERO_FP16          = 16'h0000;
+   localparam C_MZERO_FP16          = 16'h8000;
+   localparam C_QNAN_FP16           = 16'h7E00;
+
+   // FP16alt
+   localparam C_OP_FP16ALT           = 16;
+   localparam C_MANT_FP16ALT         = 7;
+   localparam C_EXP_FP16ALT          = 8;
+   localparam C_BIAS_FP16ALT         = 127;
+   localparam C_BIAS_AONE_FP16ALT    = 8'h80;
+   localparam C_HALF_BIAS_FP16ALT    = 63;
+   localparam C_EXP_ZERO_FP16ALT     = 8'h00;
+   localparam C_EXP_INF_FP16ALT      = 8'hFF;
+   localparam C_MANT_ZERO_FP16ALT    = 7'h0;
+   localparam C_QNAN_FP16ALT         = 16'h7FC0;
+
+endpackage : defs_div_sqrt_mvp
diff --git a/vendor/pulp-platform/fpnew/src/fpu_div_sqrt_mvp/hdl/div_sqrt_mvp_wrapper.sv b/vendor/pulp-platform/fpnew/src/fpu_div_sqrt_mvp/hdl/div_sqrt_mvp_wrapper.sv
new file mode 100644
index 0000000000..051bcc3ade
--- /dev/null
+++ b/vendor/pulp-platform/fpnew/src/fpu_div_sqrt_mvp/hdl/div_sqrt_mvp_wrapper.sv
@@ -0,0 +1,232 @@
+// Copyright 2018 ETH Zurich and University of Bologna.
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the “License”); you may not use this file except in
+// compliance with the License.  You may obtain a copy of the License at
+// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+////////////////////////////////////////////////////////////////////////////////
+// Company:        IIS @ ETHZ - Federal Institute of Technology               //
+//                                                                            //
+// Engineers:      Lei Li -- lile@iis.ee.ethz.ch                              //
+//                                                                            //
+// Additional contributions by:                                               //
+//                                                                            //
+//                                                                            //
+//                                                                            //
+// Create Date:    20/04/2018                                                 //
+// Design Name:    FPU                                                        //
+// Module Name:    div_sqrt_mvp_wrapper.sv                                    //
+// Project Name:   The shared divisor and square root                         //
+// Language:       SystemVerilog                                              //
+//                                                                            //
+// Description:    The wrapper of  div_sqrt_top_mvp                           //
+//                                                                            //
+//                                                                            //
+//                                                                            //
+//                                                                            //
+//                                                                            //
+//                                                                            //
+//                                                                            //
+//                                                                            //
+//                                                                            //
+//                                                                            //
+//                                                                            //
+////////////////////////////////////////////////////////////////////////////////
+
+import defs_div_sqrt_mvp::*;
+
+module div_sqrt_mvp_wrapper
+#(
+   parameter   PrePipeline_depth_S             =        0,  // If you want to add a flip/flop stage before preprocess, set it to 1.
+   parameter   PostPipeline_depth_S            =        2  // The output delay stages
+)
+  (//Input
+   input logic                            Clk_CI,
+   input logic                            Rst_RBI,
+   input logic                            Div_start_SI,
+   input logic                            Sqrt_start_SI,
+
+   //Input Operands
+   input logic [C_OP_FP64-1:0]            Operand_a_DI,
+   input logic [C_OP_FP64-1:0]            Operand_b_DI,
+
+   // Input Control
+   input logic [C_RM-1:0]                 RM_SI,    //Rounding Mode
+   input logic [C_PC-1:0]                 Precision_ctl_SI, // Precision Control
+   input logic [C_FS-1:0]                 Format_sel_SI,  // Format Selection,
+   input logic                            Kill_SI,
+
+   //Output Result
+   output logic [C_OP_FP64-1:0]           Result_DO,
+
+   //Output-Flags
+   output logic [4:0]                     Fflags_SO,
+   output logic                           Ready_SO,
+   output logic                           Done_SO
+ );
+
+
+   logic                                 Div_start_S_S,Sqrt_start_S_S;
+   logic [C_OP_FP64-1:0]                 Operand_a_S_D;
+   logic [C_OP_FP64-1:0]                 Operand_b_S_D;
+
+   // Input Control
+   logic [C_RM-1:0]                      RM_S_S;    //Rounding Mode
+   logic [C_PC-1:0]                      Precision_ctl_S_S; // Precision Control
+   logic [C_FS-1:0]                      Format_sel_S_S;  // Format Selection,
+   logic                                 Kill_S_S;
+
+
+  logic [C_OP_FP64-1:0]                  Result_D;
+  logic                                  Ready_S;
+  logic                                  Done_S;
+  logic [4:0]                            Fflags_S;
+
+
+  generate
+    if(PrePipeline_depth_S==1)
+      begin
+
+         div_sqrt_top_mvp  div_top_U0  //for RTL
+
+          (//Input
+           .Clk_CI                 (Clk_CI),
+           .Rst_RBI                (Rst_RBI),
+           .Div_start_SI           (Div_start_S_S),
+           .Sqrt_start_SI          (Sqrt_start_S_S),
+           //Input Operands
+           .Operand_a_DI          (Operand_a_S_D),
+           .Operand_b_DI          (Operand_b_S_D),
+           .RM_SI                 (RM_S_S),    //Rounding Mode
+           .Precision_ctl_SI      (Precision_ctl_S_S),
+           .Format_sel_SI         (Format_sel_S_S),
+           .Kill_SI               (Kill_S_S),
+           .Result_DO             (Result_D),
+           .Fflags_SO             (Fflags_S),
+           .Ready_SO              (Ready_S),
+           .Done_SO               (Done_S)
+         );
+
+           always_ff @(posedge Clk_CI, negedge Rst_RBI)
+             begin
+                if(~Rst_RBI)
+                  begin
+                    Div_start_S_S<='0;
+                    Sqrt_start_S_S<=1'b0;
+                    Operand_a_S_D<='0;
+                    Operand_b_S_D<='0;
+                    RM_S_S <=1'b0;
+                    Precision_ctl_S_S<='0;
+                    Format_sel_S_S<='0;
+                    Kill_S_S<='0;
+                  end
+                else
+                  begin
+                    Div_start_S_S<=Div_start_SI;
+                    Sqrt_start_S_S<=Sqrt_start_SI;
+                    Operand_a_S_D<=Operand_a_DI;
+                    Operand_b_S_D<=Operand_b_DI;
+                    RM_S_S <=RM_SI;
+                    Precision_ctl_S_S<=Precision_ctl_SI;
+                    Format_sel_S_S<=Format_sel_SI;
+                    Kill_S_S<=Kill_SI;
+                  end
+            end
+     end
+
+     else
+      begin
+          div_sqrt_top_mvp  div_top_U0  //for RTL
+          (//Input
+           .Clk_CI                 (Clk_CI),
+           .Rst_RBI                (Rst_RBI),
+           .Div_start_SI           (Div_start_SI),
+           .Sqrt_start_SI          (Sqrt_start_SI),
+           //Input Operands
+           .Operand_a_DI          (Operand_a_DI),
+           .Operand_b_DI          (Operand_b_DI),
+           .RM_SI                 (RM_SI),    //Rounding Mode
+           .Precision_ctl_SI      (Precision_ctl_SI),
+           .Format_sel_SI         (Format_sel_SI),
+           .Kill_SI               (Kill_SI),
+           .Result_DO             (Result_D),
+           .Fflags_SO             (Fflags_S),
+           .Ready_SO              (Ready_S),
+           .Done_SO               (Done_S)
+         );
+      end
+  endgenerate
+
+   /////////////////////////////////////////////////////////////////////////////
+   // First Stage of Outputs
+   /////////////////////////////////////////////////////////////////////////////
+  logic [C_OP_FP64-1:0]         Result_dly_S_D;
+  logic                         Ready_dly_S_S;
+  logic                         Done_dly_S_S;
+  logic [4:0]                   Fflags_dly_S_S;
+   always_ff @(posedge Clk_CI, negedge Rst_RBI)
+     begin
+        if(~Rst_RBI)
+          begin
+            Result_dly_S_D<='0;
+            Ready_dly_S_S<=1'b0;
+            Done_dly_S_S<=1'b0;
+            Fflags_dly_S_S<=1'b0;
+          end
+        else
+          begin
+            Result_dly_S_D<=Result_D;
+            Ready_dly_S_S<=Ready_S;
+            Done_dly_S_S<=Done_S;
+            Fflags_dly_S_S<=Fflags_S;
+          end
+    end
+
+   /////////////////////////////////////////////////////////////////////////////
+   // Second Stage of Outputs
+   /////////////////////////////////////////////////////////////////////////////
+
+  logic [C_OP_FP64-1:0]         Result_dly_D_D;
+  logic                         Ready_dly_D_S;
+  logic                         Done_dly_D_S;
+  logic [4:0]                   Fflags_dly_D_S;
+  generate
+    if(PostPipeline_depth_S==2)
+      begin
+        always_ff @(posedge Clk_CI, negedge Rst_RBI)
+          begin
+            if(~Rst_RBI)
+              begin
+                Result_dly_D_D<='0;
+                Ready_dly_D_S<=1'b0;
+                Done_dly_D_S<=1'b0;
+                Fflags_dly_D_S<=1'b0;
+              end
+           else
+             begin
+               Result_dly_D_D<=Result_dly_S_D;
+               Ready_dly_D_S<=Ready_dly_S_S;
+               Done_dly_D_S<=Done_dly_S_S;
+               Fflags_dly_D_S<=Fflags_dly_S_S;
+             end
+          end
+        assign  Result_DO = Result_dly_D_D;
+        assign  Ready_SO  = Ready_dly_D_S;
+        assign  Done_SO  = Done_dly_D_S;
+        assign  Fflags_SO=Fflags_dly_D_S;
+       end
+
+     else
+       begin
+         assign  Result_DO = Result_dly_S_D;
+         assign  Ready_SO  = Ready_dly_S_S;
+         assign  Done_SO   = Done_dly_S_S;
+         assign  Fflags_SO  = Fflags_dly_S_S;
+       end
+
+   endgenerate
+
+endmodule //
diff --git a/vendor/pulp-platform/fpnew/src/fpu_div_sqrt_mvp/hdl/div_sqrt_top_mvp.sv b/vendor/pulp-platform/fpnew/src/fpu_div_sqrt_mvp/hdl/div_sqrt_top_mvp.sv
new file mode 100644
index 0000000000..3af6081b7f
--- /dev/null
+++ b/vendor/pulp-platform/fpnew/src/fpu_div_sqrt_mvp/hdl/div_sqrt_top_mvp.sv
@@ -0,0 +1,180 @@
+// Copyright 2018 ETH Zurich and University of Bologna.
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the “License”); you may not use this file except in
+// compliance with the License.  You may obtain a copy of the License at
+// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+
+////////////////////////////////////////////////////////////////////////////////
+// Company:        IIS @ ETHZ - Federal Institute of Technology               //
+//                                                                            //
+// Engineers:      Lei Li -- lile@iis.ee.ethz.ch                              //
+//                                                                            //
+// Additional contributions by:                                               //
+//                                                                            //
+//                                                                            //
+//                                                                            //
+// Create Date:    03/03/2018                                                 //
+// Design Name:    div_sqrt_top_mvp                                           //
+// Module Name:    div_sqrt_top_mvp.sv                                        //
+// Project Name:   The shared divisor and square root                         //
+// Language:       SystemVerilog                                              //
+//                                                                            //
+// Description:    The top of div and sqrt                                    //
+//                                                                            //
+//                                                                            //
+// Revision Date:  12/04/2018                                                 //
+//                 Lei Li                                                     //
+//                 To address some requirements by Stefan and add low power   //
+//                 control for special cases                                  //
+////////////////////////////////////////////////////////////////////////////////
+
+import defs_div_sqrt_mvp::*;
+
+module div_sqrt_top_mvp
+
+  (//Input
+   input logic                            Clk_CI,
+   input logic                            Rst_RBI,
+   input logic                            Div_start_SI,
+   input logic                            Sqrt_start_SI,
+
+   //Input Operands
+   input logic [C_OP_FP64-1:0]            Operand_a_DI,
+   input logic [C_OP_FP64-1:0]            Operand_b_DI,
+
+   // Input Control
+   input logic [C_RM-1:0]                 RM_SI,    //Rounding Mode
+   input logic [C_PC-1:0]                 Precision_ctl_SI, // Precision Control
+   input logic [C_FS-1:0]                 Format_sel_SI,  // Format Selection,
+   input logic                            Kill_SI,
+
+   //Output Result
+   output logic [C_OP_FP64-1:0]           Result_DO,
+
+   //Output-Flags
+   output logic [4:0]                     Fflags_SO,
+   output logic                           Ready_SO,
+   output logic                           Done_SO
+ );
+
+
+
+
+
+   //Operand components
+   logic [C_EXP_FP64:0]                 Exp_a_D;
+   logic [C_EXP_FP64:0]                 Exp_b_D;
+   logic [C_MANT_FP64:0]                Mant_a_D;
+   logic [C_MANT_FP64:0]                Mant_b_D;
+
+   logic [C_EXP_FP64+1:0]               Exp_z_D;
+   logic [C_MANT_FP64+4:0]              Mant_z_D;
+   logic                                Sign_z_D;
+   logic                                Start_S;
+   logic [C_RM-1:0]                     RM_dly_S;
+   logic                                Div_enable_S;
+   logic                                Sqrt_enable_S;
+   logic                                Inf_a_S;
+   logic                                Inf_b_S;
+   logic                                Zero_a_S;
+   logic                                Zero_b_S;
+   logic                                NaN_a_S;
+   logic                                NaN_b_S;
+   logic                                SNaN_S;
+   logic                                Special_case_SB,Special_case_dly_SB;
+
+   logic Full_precision_S;
+   logic FP32_S;
+   logic FP64_S;
+   logic FP16_S;
+   logic FP16ALT_S;
+
+
+ preprocess_mvp  preprocess_U0
+ (
+   .Clk_CI                (Clk_CI             ),
+   .Rst_RBI               (Rst_RBI            ),
+   .Div_start_SI          (Div_start_SI       ),
+   .Sqrt_start_SI         (Sqrt_start_SI      ),
+   .Ready_SI              (Ready_SO           ),
+   .Operand_a_DI          (Operand_a_DI       ),
+   .Operand_b_DI          (Operand_b_DI       ),
+   .RM_SI                 (RM_SI              ),
+   .Format_sel_SI         (Format_sel_SI      ),
+   .Start_SO              (Start_S            ),
+   .Exp_a_DO_norm         (Exp_a_D            ),
+   .Exp_b_DO_norm         (Exp_b_D            ),
+   .Mant_a_DO_norm        (Mant_a_D           ),
+   .Mant_b_DO_norm        (Mant_b_D           ),
+   .RM_dly_SO             (RM_dly_S           ),
+   .Sign_z_DO             (Sign_z_D           ),
+   .Inf_a_SO              (Inf_a_S            ),
+   .Inf_b_SO              (Inf_b_S            ),
+   .Zero_a_SO             (Zero_a_S           ),
+   .Zero_b_SO             (Zero_b_S           ),
+   .NaN_a_SO              (NaN_a_S            ),
+   .NaN_b_SO              (NaN_b_S            ),
+   .SNaN_SO               (SNaN_S             ),
+   .Special_case_SBO      (Special_case_SB    ),
+   .Special_case_dly_SBO  (Special_case_dly_SB)
+   );
+
+ nrbd_nrsc_mvp   nrbd_nrsc_U0
+  (
+   .Clk_CI                (Clk_CI             ),
+   .Rst_RBI               (Rst_RBI            ),
+   .Div_start_SI          (Div_start_SI       ) ,
+   .Sqrt_start_SI         (Sqrt_start_SI      ),
+   .Start_SI              (Start_S            ),
+   .Kill_SI               (Kill_SI            ),
+   .Special_case_SBI      (Special_case_SB    ),
+   .Special_case_dly_SBI  (Special_case_dly_SB),
+   .Div_enable_SO         (Div_enable_S       ),
+   .Sqrt_enable_SO        (Sqrt_enable_S      ),
+   .Precision_ctl_SI      (Precision_ctl_SI   ),
+   .Format_sel_SI         (Format_sel_SI      ),
+   .Exp_a_DI              (Exp_a_D            ),
+   .Exp_b_DI              (Exp_b_D            ),
+   .Mant_a_DI             (Mant_a_D           ),
+   .Mant_b_DI             (Mant_b_D           ),
+   .Full_precision_SO     (Full_precision_S   ),
+   .FP32_SO               (FP32_S             ),
+   .FP64_SO               (FP64_S             ),
+   .FP16_SO               (FP16_S             ),
+   .FP16ALT_SO            (FP16ALT_S          ),
+   .Ready_SO              (Ready_SO           ),
+   .Done_SO               (Done_SO            ),
+   .Exp_z_DO              (Exp_z_D            ),
+   .Mant_z_DO             (Mant_z_D           )
+    );
+
+
+ norm_div_sqrt_mvp  fpu_norm_U0
+  (
+   .Mant_in_DI            (Mant_z_D           ),
+   .Exp_in_DI             (Exp_z_D            ),
+   .Sign_in_DI            (Sign_z_D           ),
+   .Div_enable_SI         (Div_enable_S       ),
+   .Sqrt_enable_SI        (Sqrt_enable_S      ),
+   .Inf_a_SI              (Inf_a_S            ),
+   .Inf_b_SI              (Inf_b_S            ),
+   .Zero_a_SI             (Zero_a_S           ),
+   .Zero_b_SI             (Zero_b_S           ),
+   .NaN_a_SI              (NaN_a_S            ),
+   .NaN_b_SI              (NaN_b_S            ),
+   .SNaN_SI               (SNaN_S             ),
+   .RM_SI                 (RM_dly_S           ),
+   .Full_precision_SI     (Full_precision_S   ),
+   .FP32_SI               (FP32_S             ),
+   .FP64_SI               (FP64_S             ),
+   .FP16_SI               (FP16_S             ),
+   .FP16ALT_SI            (FP16ALT_S          ),
+   .Result_DO             (Result_DO          ),
+   .Fflags_SO             (Fflags_SO          ) //{NV,DZ,OF,UF,NX}
+   );
+
+endmodule
diff --git a/vendor/pulp-platform/fpnew/src/fpu_div_sqrt_mvp/hdl/iteration_div_sqrt_mvp.sv b/vendor/pulp-platform/fpnew/src/fpu_div_sqrt_mvp/hdl/iteration_div_sqrt_mvp.sv
new file mode 100644
index 0000000000..0c645e6ebe
--- /dev/null
+++ b/vendor/pulp-platform/fpnew/src/fpu_div_sqrt_mvp/hdl/iteration_div_sqrt_mvp.sv
@@ -0,0 +1,61 @@
+// Copyright 2018 ETH Zurich and University of Bologna.
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the “License”); you may not use this file except in
+// compliance with the License.  You may obtain a copy of the License at
+// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+////////////////////////////////////////////////////////////////////////////////
+// Company:        IIS @ ETHZ - Federal Institute of Technology               //
+//                                                                            //
+// Engineers:      Lei Li                  lile@iis.ee.ethz.ch                //
+//                                                                            //
+// Additional contributions by:                                               //
+//                                                                            //
+//                                                                            //
+//                                                                            //
+// Create Date:    12/01/2017                                                 //
+// Design Name:    FPU                                                        //
+// Module Name:    iteration_div_sqrt_mvp                                     //
+// Project Name:   Private FPU                                                //
+// Language:       SystemVerilog                                              //
+//                                                                            //
+// Description:    iteration unit for div and sqrt                            //
+//                                                                            //
+//                                                                            //
+// Revision:        03/14/2018                                                //
+//                  For div_sqrt_mvp                                          //
+////////////////////////////////////////////////////////////////////////////////
+
+module iteration_div_sqrt_mvp
+#(
+   parameter   WIDTH=25
+)
+  (//Input
+
+   input logic [WIDTH-1:0]      A_DI,
+   input logic [WIDTH-1:0]      B_DI,
+   input logic                  Div_enable_SI,
+   input logic                  Div_start_dly_SI,
+   input logic                  Sqrt_enable_SI,
+   input logic [1:0]            D_DI,
+
+   output logic [1:0]           D_DO,
+   output logic [WIDTH-1:0]     Sum_DO,
+   output logic                 Carry_out_DO
+    );
+
+   logic                        D_carry_D;
+   logic                        Sqrt_cin_D;
+   logic                        Cin_D;
+
+   assign D_DO[0]=~D_DI[0];
+   assign D_DO[1]=~(D_DI[1] ^ D_DI[0]);
+   assign D_carry_D=D_DI[1] | D_DI[0];
+   assign Sqrt_cin_D=Sqrt_enable_SI&&D_carry_D;
+   assign Cin_D=Div_enable_SI?1'b0:Sqrt_cin_D;
+   assign {Carry_out_DO,Sum_DO}=A_DI+B_DI+Cin_D;
+
+endmodule
diff --git a/vendor/pulp-platform/fpnew/src/fpu_div_sqrt_mvp/hdl/norm_div_sqrt_mvp.sv b/vendor/pulp-platform/fpnew/src/fpu_div_sqrt_mvp/hdl/norm_div_sqrt_mvp.sv
new file mode 100644
index 0000000000..29ef52a24a
--- /dev/null
+++ b/vendor/pulp-platform/fpnew/src/fpu_div_sqrt_mvp/hdl/norm_div_sqrt_mvp.sv
@@ -0,0 +1,484 @@
+// Copyright 2018 ETH Zurich and University of Bologna.
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the “License”); you may not use this file except in
+// compliance with the License.  You may obtain a copy of the License at
+// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+
+////////////////////////////////////////////////////////////////////////////////
+// Company:        IIS @ ETHZ - Federal Institute of Technology               //
+//                                                                            //
+// Engineers:      Lei Li    lile@iis.ee.ethz.ch                              //
+//                                                                            //
+// Additional contributions by:                                               //
+//                                                                            //
+//                                                                            //
+//                                                                            //
+// Create Date:    09/03/2018                                                 //
+// Design Name:    FPU                                                        //
+// Module Name:    norm_div_sqrt_mvp.sv                                       //
+// Project Name:                                                              //
+// Language:       SystemVerilog                                              //
+//                                                                            //
+// Description:    Floating point Normalizer/Rounding unit                    //
+//                 Since this module is design as a combinatinal logic, it can//
+//                 be added arbinary register stages for different frequency  //
+//                 in the wrapper module.                                     //
+//                                                                            //
+//                                                                            //
+//                                                                            //
+// Revision Date:  12/04/2018                                                 //
+//                 Lei Li                                                     //
+//                 To address some requirements by Stefan                     //
+//                                                                            //
+//                                                                            //
+//                                                                            //
+//                                                                            //
+//                                                                            //
+//                                                                            //
+////////////////////////////////////////////////////////////////////////////////
+
+import defs_div_sqrt_mvp::*;
+
+module norm_div_sqrt_mvp
+  (//Inputs
+   input logic [C_MANT_FP64+4:0]                Mant_in_DI,  // Include the needed 4-bit for rounding and hidden bit
+   input logic signed [C_EXP_FP64+1:0]          Exp_in_DI,
+   input logic                                  Sign_in_DI,
+   input logic                                  Div_enable_SI,
+   input logic                                  Sqrt_enable_SI,
+   input logic                                  Inf_a_SI,
+   input logic                                  Inf_b_SI,
+   input logic                                  Zero_a_SI,
+   input logic                                  Zero_b_SI,
+   input logic                                  NaN_a_SI,
+   input logic                                  NaN_b_SI,
+   input logic                                  SNaN_SI,
+   input logic [C_RM-1:0]                       RM_SI,
+   input logic                                  Full_precision_SI,
+   input logic                                  FP32_SI,
+   input logic                                  FP64_SI,
+   input logic                                  FP16_SI,
+   input logic                                  FP16ALT_SI,
+   //Outputs
+   output logic [C_EXP_FP64+C_MANT_FP64:0]      Result_DO,
+   output logic [4:0]                           Fflags_SO //{NV,DZ,OF,UF,NX}
+   );
+
+
+   logic                                        Sign_res_D;
+
+   logic                                        NV_OP_S;
+   logic                                        Exp_OF_S;
+   logic                                        Exp_UF_S;
+   logic                                        Div_Zero_S;
+   logic                                        In_Exact_S;
+
+   /////////////////////////////////////////////////////////////////////////////
+   // Normalization                                                           //
+   /////////////////////////////////////////////////////////////////////////////
+   logic [C_MANT_FP64:0]                        Mant_res_norm_D;
+   logic [C_EXP_FP64-1:0]                       Exp_res_norm_D;
+
+   /////////////////////////////////////////////////////////////////////////////
+   // Right shift operations for negtive exponents                            //
+   /////////////////////////////////////////////////////////////////////////////
+
+  logic  [C_EXP_FP64+1:0]                       Exp_Max_RS_FP64_D;
+  logic  [C_EXP_FP32+1:0]                       Exp_Max_RS_FP32_D;
+  logic  [C_EXP_FP16+1:0]                       Exp_Max_RS_FP16_D;
+  logic  [C_EXP_FP16ALT+1:0]                    Exp_Max_RS_FP16ALT_D;
+  //
+  assign Exp_Max_RS_FP64_D=Exp_in_DI[C_EXP_FP64:0]+C_MANT_FP64+1; // to check exponent after (C_MANT_FP64+1)-bit >> when Exp_in_DI is negative
+  assign Exp_Max_RS_FP32_D=Exp_in_DI[C_EXP_FP32:0]+C_MANT_FP32+1; // to check exponent after (C_MANT_FP32+1)-bit >> when Exp_in_DI is negative
+  assign Exp_Max_RS_FP16_D=Exp_in_DI[C_EXP_FP16:0]+C_MANT_FP16+1; // to check exponent after (C_MANT_FP16+1)-bit >> when Exp_in_DI is negative
+  assign Exp_Max_RS_FP16ALT_D=Exp_in_DI[C_EXP_FP16ALT:0]+C_MANT_FP16ALT+1; // to check exponent after (C_MANT_FP16ALT+1)-bit >> when Exp_in_DI is negative
+  logic  [C_EXP_FP64+1:0]                       Num_RS_D;
+  assign Num_RS_D=~Exp_in_DI+1+1;            // How many right shifts(RS) are needed to generate a denormal number? >> is need only when Exp_in_DI is negative
+  logic  [C_MANT_FP64:0]                        Mant_RS_D;
+  logic  [C_MANT_FP64+4:0]                      Mant_forsticky_D;
+  assign  {Mant_RS_D,Mant_forsticky_D} ={Mant_in_DI,{(C_MANT_FP64+1){1'b0}} } >>(Num_RS_D); //
+//
+  logic [C_EXP_FP64+1:0]                        Exp_subOne_D;
+  assign Exp_subOne_D = Exp_in_DI -1;
+
+   //normalization
+   logic [1:0]                                  Mant_lower_D;
+   logic                                        Mant_sticky_bit_D;
+   logic [C_MANT_FP64+4:0]                      Mant_forround_D;
+
+   always_comb
+     begin
+
+       if(NaN_a_SI)  //  if a is NaN, return NaN
+         begin
+           Div_Zero_S=1'b0;
+           Exp_OF_S=1'b0;
+           Exp_UF_S=1'b0;
+           Mant_res_norm_D={1'b0,C_MANT_NAN_FP64};
+           Exp_res_norm_D='1;
+           Mant_forround_D='0;
+           Sign_res_D=1'b0;
+           NV_OP_S = SNaN_SI;
+         end
+
+      else if(NaN_b_SI)   //if b is NaN, return NaN
+        begin
+          Div_Zero_S=1'b0;
+          Exp_OF_S=1'b0;
+          Exp_UF_S=1'b0;
+          Mant_res_norm_D={1'b0,C_MANT_NAN_FP64};
+          Exp_res_norm_D='1;
+          Mant_forround_D='0;
+          Sign_res_D=1'b0;
+          NV_OP_S = SNaN_SI;
+        end
+
+      else if(Inf_a_SI)
+        begin
+          if(Div_enable_SI&&Inf_b_SI)                     //Inf/Inf, retrurn NaN
+            begin
+              Div_Zero_S=1'b0;
+              Exp_OF_S=1'b0;
+              Exp_UF_S=1'b0;
+              Mant_res_norm_D={1'b0,C_MANT_NAN_FP64};
+              Exp_res_norm_D='1;
+              Mant_forround_D='0;
+              Sign_res_D=1'b0;
+              NV_OP_S = 1'b1;
+            end
+          else if (Sqrt_enable_SI && Sign_in_DI) begin // catch sqrt(-inf)
+            Div_Zero_S=1'b0;
+            Exp_OF_S=1'b0;
+            Exp_UF_S=1'b0;
+            Mant_res_norm_D={1'b0,C_MANT_NAN_FP64};
+            Exp_res_norm_D='1;
+            Mant_forround_D='0;
+            Sign_res_D=1'b0;
+            NV_OP_S = 1'b1;
+          end else begin
+            Div_Zero_S=1'b0;
+            Exp_OF_S=1'b1;
+            Exp_UF_S=1'b0;
+            Mant_res_norm_D= '0;
+            Exp_res_norm_D='1;
+            Mant_forround_D='0;
+            Sign_res_D=Sign_in_DI;
+            NV_OP_S = 1'b0;
+          end
+        end
+
+      else if(Div_enable_SI&&Inf_b_SI)
+        begin
+          Div_Zero_S=1'b0;
+          Exp_OF_S=1'b1;
+          Exp_UF_S=1'b0;
+          Mant_res_norm_D= '0;
+          Exp_res_norm_D='0;
+          Mant_forround_D='0;
+          Sign_res_D=Sign_in_DI;
+          NV_OP_S = 1'b0;
+        end
+
+     else if(Zero_a_SI)
+       begin
+         if(Div_enable_SI&&Zero_b_SI)
+           begin
+              Div_Zero_S=1'b1;
+              Exp_OF_S=1'b0;
+              Exp_UF_S=1'b0;
+              Mant_res_norm_D={1'b0,C_MANT_NAN_FP64};
+              Exp_res_norm_D='1;
+              Mant_forround_D='0;
+              Sign_res_D=1'b0;
+              NV_OP_S = 1'b1;
+           end
+         else
+           begin
+             Div_Zero_S=1'b0;
+             Exp_OF_S=1'b0;
+             Exp_UF_S=1'b0;
+             Mant_res_norm_D='0;
+             Exp_res_norm_D='0;
+             Mant_forround_D='0;
+             Sign_res_D=Sign_in_DI;
+             NV_OP_S = 1'b0;
+           end
+       end
+
+     else  if(Div_enable_SI&&(Zero_b_SI))  //div Zero
+       begin
+         Div_Zero_S=1'b1;
+         Exp_OF_S=1'b0;
+         Exp_UF_S=1'b0;
+         Mant_res_norm_D='0;
+         Exp_res_norm_D='1;
+         Mant_forround_D='0;
+         Sign_res_D=Sign_in_DI;
+         NV_OP_S = 1'b0;
+       end
+
+      else if(Sign_in_DI&&Sqrt_enable_SI)   //sqrt(-a)
+        begin
+          Div_Zero_S=1'b0;
+          Exp_OF_S=1'b0;
+          Exp_UF_S=1'b0;
+          Mant_res_norm_D={1'b0,C_MANT_NAN_FP64};
+          Exp_res_norm_D='1;
+          Mant_forround_D='0;
+          Sign_res_D=1'b0;
+          NV_OP_S = 1'b1;
+        end
+
+     else if((Exp_in_DI[C_EXP_FP64:0]=='0))
+       begin
+         if(Mant_in_DI!='0)       //Exp=0, Mant!=0, it is denormal
+           begin
+             Div_Zero_S=1'b0;
+             Exp_OF_S=1'b0;
+             Exp_UF_S=1'b1;
+             Mant_res_norm_D={1'b0,Mant_in_DI[C_MANT_FP64+4:5]};
+             Exp_res_norm_D='0;
+             Mant_forround_D={Mant_in_DI[4:0],{(C_MANT_FP64){1'b0}} };
+             Sign_res_D=Sign_in_DI;
+             NV_OP_S = 1'b0;
+           end
+         else                 // Zero
+           begin
+             Div_Zero_S=1'b0;
+             Exp_OF_S=1'b0;
+             Exp_UF_S=1'b0;
+             Mant_res_norm_D='0;
+             Exp_res_norm_D='0;
+             Mant_forround_D='0;
+             Sign_res_D=Sign_in_DI;
+             NV_OP_S = 1'b0;
+           end
+        end
+
+      else if((Exp_in_DI[C_EXP_FP64:0]==C_EXP_ONE_FP64)&&(~Mant_in_DI[C_MANT_FP64+4]))  //denormal
+        begin
+          Div_Zero_S=1'b0;
+          Exp_OF_S=1'b0;
+          Exp_UF_S=1'b1;
+          Mant_res_norm_D=Mant_in_DI[C_MANT_FP64+4:4];
+          Exp_res_norm_D='0;
+          Mant_forround_D={Mant_in_DI[3:0],{(C_MANT_FP64+1){1'b0}}};
+          Sign_res_D=Sign_in_DI;
+          NV_OP_S = 1'b0;
+        end
+
+      else if(Exp_in_DI[C_EXP_FP64+1])    //minus              //consider format
+        begin
+          if(((~Exp_Max_RS_FP32_D[C_EXP_FP32+1])&&FP32_SI) | ((~Exp_Max_RS_FP64_D[C_EXP_FP64+1])&&FP64_SI) | ((~Exp_Max_RS_FP16_D[C_EXP_FP16+1])&&FP16_SI) | ((~Exp_Max_RS_FP16ALT_D[C_EXP_FP16ALT+1])&&FP16ALT_SI) )    //OF EXP<0 after RS
+            begin
+              Div_Zero_S=1'b0;
+              Exp_OF_S=1'b1;
+              Exp_UF_S=1'b0;
+              Mant_res_norm_D='0;
+              Exp_res_norm_D='0;
+              Mant_forround_D='0;
+              Sign_res_D=Sign_in_DI;
+              NV_OP_S = 1'b0;
+            end
+          else                    //denormal
+            begin
+              Div_Zero_S=1'b0;
+              Exp_OF_S=1'b0;
+              Exp_UF_S=1'b1;
+              Mant_res_norm_D={Mant_RS_D[C_MANT_FP64:0]};
+              Exp_res_norm_D='0;
+              Mant_forround_D={Mant_forsticky_D[C_MANT_FP64+4:0]};   //??
+              Sign_res_D=Sign_in_DI;
+              NV_OP_S = 1'b0;
+            end
+        end
+
+      else if( (Exp_in_DI[C_EXP_FP32]&&FP32_SI) | (Exp_in_DI[C_EXP_FP64]&&FP64_SI) | (Exp_in_DI[C_EXP_FP16]&&FP16_SI) | (Exp_in_DI[C_EXP_FP16ALT]&&FP16ALT_SI) )            //OF
+        begin
+          Div_Zero_S=1'b0;
+          Exp_OF_S=1'b1;
+          Exp_UF_S=1'b0;
+          Mant_res_norm_D='0;
+          Exp_res_norm_D='1;
+          Mant_forround_D='0;
+          Sign_res_D=Sign_in_DI;
+          NV_OP_S = 1'b0;
+        end
+
+      else if( ((Exp_in_DI[C_EXP_FP32-1:0]=='1)&&FP32_SI) | ((Exp_in_DI[C_EXP_FP64-1:0]=='1)&&FP64_SI) |  ((Exp_in_DI[C_EXP_FP16-1:0]=='1)&&FP16_SI) | ((Exp_in_DI[C_EXP_FP16ALT-1:0]=='1)&&FP16ALT_SI) )//255
+        begin
+          if(~Mant_in_DI[C_MANT_FP64+4]) // MSB=0
+            begin
+              Div_Zero_S=1'b0;
+              Exp_OF_S=1'b0;
+              Exp_UF_S=1'b0;
+              Mant_res_norm_D=Mant_in_DI[C_MANT_FP64+3:3];
+              Exp_res_norm_D=Exp_subOne_D;
+              Mant_forround_D={Mant_in_DI[2:0],{(C_MANT_FP64+2){1'b0}}};
+              Sign_res_D=Sign_in_DI;
+              NV_OP_S = 1'b0;
+            end
+          else if(Mant_in_DI!='0)         //NaN
+            begin
+              Div_Zero_S=1'b0;
+              Exp_OF_S=1'b1;
+              Exp_UF_S=1'b0;
+              Mant_res_norm_D= '0;
+              Exp_res_norm_D='1;
+              Mant_forround_D='0;
+              Sign_res_D=Sign_in_DI;
+              NV_OP_S = 1'b0;
+            end
+          else                         //infinity
+            begin
+              Div_Zero_S=1'b0;
+              Exp_OF_S=1'b1;
+              Exp_UF_S=1'b0;
+              Mant_res_norm_D= '0;
+              Exp_res_norm_D='1;
+              Mant_forround_D='0;
+              Sign_res_D=Sign_in_DI;
+              NV_OP_S = 1'b0;
+            end
+         end
+
+      else if(Mant_in_DI[C_MANT_FP64+4])  //normal numbers with 1.XXX
+        begin
+           Div_Zero_S=1'b0;
+           Exp_OF_S=1'b0;
+           Exp_UF_S=1'b0;
+           Mant_res_norm_D= Mant_in_DI[C_MANT_FP64+4:4];
+           Exp_res_norm_D=Exp_in_DI[C_EXP_FP64-1:0];
+           Mant_forround_D={Mant_in_DI[3:0],{(C_MANT_FP64+1){1'b0}}};
+           Sign_res_D=Sign_in_DI;
+           NV_OP_S = 1'b0;
+        end
+
+      else                                   //normal numbers with 0.1XX
+         begin
+           Div_Zero_S=1'b0;
+           Exp_OF_S=1'b0;
+           Exp_UF_S=1'b0;
+           Mant_res_norm_D=Mant_in_DI[C_MANT_FP64+3:3];
+           Exp_res_norm_D=Exp_subOne_D;
+           Mant_forround_D={Mant_in_DI[2:0],{(C_MANT_FP64+2){1'b0}}};
+           Sign_res_D=Sign_in_DI;
+           NV_OP_S = 1'b0;
+         end
+
+     end
+
+   /////////////////////////////////////////////////////////////////////////////
+   // Rounding enable only for full precision (Full_precision_SI==1'b1)       //
+   /////////////////////////////////////////////////////////////////////////////
+
+   logic [C_MANT_FP64:0]                   Mant_upper_D;
+   logic [C_MANT_FP64+1:0]                 Mant_upperRounded_D;
+   logic                                   Mant_roundUp_S;
+   logic                                   Mant_rounded_S;
+
+  always_comb //determine which bits for Mant_lower_D and Mant_sticky_bit_D
+    begin
+      if(FP32_SI)
+        begin
+          Mant_upper_D = {Mant_res_norm_D[C_MANT_FP64:C_MANT_FP64-C_MANT_FP32], {(C_MANT_FP64-C_MANT_FP32){1'b0}} };
+          Mant_lower_D = Mant_res_norm_D[C_MANT_FP64-C_MANT_FP32-1:C_MANT_FP64-C_MANT_FP32-2];
+          Mant_sticky_bit_D = | Mant_res_norm_D[C_MANT_FP64-C_MANT_FP32-3:0];
+        end
+      else if(FP64_SI)
+        begin
+          Mant_upper_D = Mant_res_norm_D[C_MANT_FP64:0];
+          Mant_lower_D = Mant_forround_D[C_MANT_FP64+4:C_MANT_FP64+3];
+          Mant_sticky_bit_D = | Mant_forround_D[C_MANT_FP64+3:0];
+        end
+      else if(FP16_SI)
+        begin
+          Mant_upper_D = {Mant_res_norm_D[C_MANT_FP64:C_MANT_FP64-C_MANT_FP16], {(C_MANT_FP64-C_MANT_FP16){1'b0}} };
+          Mant_lower_D = Mant_res_norm_D[C_MANT_FP64-C_MANT_FP16-1:C_MANT_FP64-C_MANT_FP16-2];
+          Mant_sticky_bit_D = | Mant_res_norm_D[C_MANT_FP64-C_MANT_FP16-3:30];
+        end
+      else  //FP16ALT
+      begin
+          Mant_upper_D = {Mant_res_norm_D[C_MANT_FP64:C_MANT_FP64-C_MANT_FP16ALT], {(C_MANT_FP64-C_MANT_FP16ALT){1'b0}} };
+          Mant_lower_D = Mant_res_norm_D[C_MANT_FP64-C_MANT_FP16ALT-1:C_MANT_FP64-C_MANT_FP16ALT-2];
+          Mant_sticky_bit_D = | Mant_res_norm_D[C_MANT_FP64-C_MANT_FP16ALT-3:30];
+      end
+    end
+
+   assign Mant_rounded_S = (|(Mant_lower_D))| Mant_sticky_bit_D;
+
+
+
+
+   always_comb //determine whether to round up or not
+     begin
+        Mant_roundUp_S = 1'b0;
+        case (RM_SI)
+          C_RM_NEAREST :
+            Mant_roundUp_S = Mant_lower_D[1] && ((Mant_lower_D[0] | Mant_sticky_bit_D )| ( (FP32_SI&&Mant_upper_D[C_MANT_FP64-C_MANT_FP32]) | (FP64_SI&&Mant_upper_D[0]) | (FP16_SI&&Mant_upper_D[C_MANT_FP64-C_MANT_FP16]) | (FP16ALT_SI&&Mant_upper_D[C_MANT_FP64-C_MANT_FP16ALT]) ) );
+          C_RM_TRUNC   :
+            Mant_roundUp_S = 0;
+          C_RM_PLUSINF :
+            Mant_roundUp_S = Mant_rounded_S & ~Sign_in_DI;
+          C_RM_MINUSINF:
+            Mant_roundUp_S = Mant_rounded_S & Sign_in_DI;
+          default          :
+            Mant_roundUp_S = 0;
+        endcase // case (RM_DI)
+     end // always_comb begin
+
+  logic                                 Mant_renorm_S;
+  logic  [C_MANT_FP64:0]                Mant_roundUp_Vector_S; // for all the formats
+
+  assign Mant_roundUp_Vector_S={7'h0,(FP16ALT_SI&&Mant_roundUp_S),2'h0,(FP16_SI&&Mant_roundUp_S),12'h0,(FP32_SI&&Mant_roundUp_S),28'h0,(FP64_SI&&Mant_roundUp_S)};
+
+
+  assign Mant_upperRounded_D = Mant_upper_D + Mant_roundUp_Vector_S;
+  assign Mant_renorm_S       = Mant_upperRounded_D[C_MANT_FP64+1];
+
+  /////////////////////////////////////////////////////////////////////////////
+  // Renormalization for Rounding                                           //
+  /////////////////////////////////////////////////////////////////////////////
+  logic [C_MANT_FP64-1:0]               Mant_res_round_D;
+  logic [C_EXP_FP64-1:0]                Exp_res_round_D;
+
+
+  assign Mant_res_round_D = (Mant_renorm_S)?Mant_upperRounded_D[C_MANT_FP64:1]:Mant_upperRounded_D[C_MANT_FP64-1:0]; // including the process of the hidden bit
+  assign Exp_res_round_D  = Exp_res_norm_D+Mant_renorm_S;
+
+  /////////////////////////////////////////////////////////////////////////////
+  //  Output Assignments                                                     //
+  /////////////////////////////////////////////////////////////////////////////
+  logic [C_MANT_FP64-1:0]               Mant_before_format_ctl_D;
+  logic [C_EXP_FP64-1:0]                Exp_before_format_ctl_D;
+  assign Mant_before_format_ctl_D = Full_precision_SI ? Mant_res_round_D : Mant_res_norm_D;
+  assign Exp_before_format_ctl_D = Full_precision_SI ? Exp_res_round_D : Exp_res_norm_D;
+
+  always_comb    //NaN Boxing
+    begin  //
+      if(FP32_SI)
+          begin
+            Result_DO ={32'hffff_ffff,Sign_res_D,Exp_before_format_ctl_D[C_EXP_FP32-1:0],Mant_before_format_ctl_D[C_MANT_FP64-1:C_MANT_FP64-C_MANT_FP32]};
+          end
+       else if(FP64_SI)
+          begin
+            Result_DO ={Sign_res_D,Exp_before_format_ctl_D[C_EXP_FP64-1:0],Mant_before_format_ctl_D[C_MANT_FP64-1:0]};
+          end
+      else if(FP16_SI)
+          begin
+            Result_DO ={48'hffff_ffff_ffff,Sign_res_D,Exp_before_format_ctl_D[C_EXP_FP16-1:0],Mant_before_format_ctl_D[C_MANT_FP64-1:C_MANT_FP64-C_MANT_FP16]};
+          end
+      else
+          begin
+            Result_DO ={48'hffff_ffff_ffff,Sign_res_D,Exp_before_format_ctl_D[C_EXP_FP16ALT-1:0],Mant_before_format_ctl_D[C_MANT_FP64-1:C_MANT_FP64-C_MANT_FP16ALT]};
+          end
+    end
+
+assign In_Exact_S = (~Full_precision_SI) | Mant_rounded_S;
+assign Fflags_SO = {NV_OP_S,Div_Zero_S,Exp_OF_S,Exp_UF_S,In_Exact_S}; //{NV,DZ,OF,UF,NX}
+
+endmodule // norm_div_sqrt_mvp
diff --git a/vendor/pulp-platform/fpnew/src/fpu_div_sqrt_mvp/hdl/nrbd_nrsc_mvp.sv b/vendor/pulp-platform/fpnew/src/fpu_div_sqrt_mvp/hdl/nrbd_nrsc_mvp.sv
new file mode 100644
index 0000000000..62bd147f61
--- /dev/null
+++ b/vendor/pulp-platform/fpnew/src/fpu_div_sqrt_mvp/hdl/nrbd_nrsc_mvp.sv
@@ -0,0 +1,104 @@
+// Copyright 2018 ETH Zurich and University of Bologna.
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the “License”); you may not use this file except in
+// compliance with the License.  You may obtain a copy of the License at
+// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+////////////////////////////////////////////////////////////////////////////////
+// Company:        IIS @ ETHZ - Federal Institute of Technology               //
+//                                                                            //
+// Engineers:      Lei Li      lile@iis.ee.ethz.ch                            //
+//                                                                            //
+// Additional contributions by:                                               //
+//                                                                            //
+//                                                                            //
+//                                                                            //
+// Create Date:    10/04/2018                                                 //
+// Design Name:    FPU                                                        //
+// Module Name:    nrbd_nrsc_mvp.sv                                           //
+// Project Name:   Private FPU                                                //
+// Language:       SystemVerilog                                              //
+//                                                                            //
+// Description:   non restroring binary  divisior/ square root                //
+//                                                                            //
+// Revision Date:  12/04/2018                                                 //
+//                 Lei Li                                                     //
+//                 To address some requirements by Stefan and add low power   //
+//                 control for special cases                                  //
+//                                                                            //
+////////////////////////////////////////////////////////////////////////////////
+
+import defs_div_sqrt_mvp::*;
+
+module nrbd_nrsc_mvp
+
+  (//Input
+   input logic                                 Clk_CI,
+   input logic                                 Rst_RBI,
+   input logic                                 Div_start_SI,
+   input logic                                 Sqrt_start_SI,
+   input logic                                 Start_SI,
+   input logic                                 Kill_SI,
+   input logic                                 Special_case_SBI,
+   input logic                                 Special_case_dly_SBI,
+   input logic [C_PC-1:0]                      Precision_ctl_SI,
+   input logic [1:0]                           Format_sel_SI,
+   input logic [C_MANT_FP64:0]                 Mant_a_DI,
+   input logic [C_MANT_FP64:0]                 Mant_b_DI,
+   input logic [C_EXP_FP64:0]                  Exp_a_DI,
+   input logic [C_EXP_FP64:0]                  Exp_b_DI,
+  //output
+   output logic                                Div_enable_SO,
+   output logic                                Sqrt_enable_SO,
+
+   output logic                                Full_precision_SO,
+   output logic                                FP32_SO,
+   output logic                                FP64_SO,
+   output logic                                FP16_SO,
+   output logic                                FP16ALT_SO,
+   output logic                                Ready_SO,
+   output logic                                Done_SO,
+   output logic  [C_MANT_FP64+4:0]             Mant_z_DO,
+   output logic [C_EXP_FP64+1:0]               Exp_z_DO
+    );
+
+
+    logic                                     Div_start_dly_S,Sqrt_start_dly_S;
+
+
+control_mvp         control_U0
+(  .Clk_CI                                   (Clk_CI                          ),
+   .Rst_RBI                                  (Rst_RBI                         ),
+   .Div_start_SI                             (Div_start_SI                    ),
+   .Sqrt_start_SI                            (Sqrt_start_SI                   ),
+   .Start_SI                                 (Start_SI                        ),
+   .Kill_SI                                  (Kill_SI                         ),
+   .Special_case_SBI                         (Special_case_SBI                ),
+   .Special_case_dly_SBI                     (Special_case_dly_SBI            ),
+   .Precision_ctl_SI                         (Precision_ctl_SI                ),
+   .Format_sel_SI                            (Format_sel_SI                   ),
+   .Numerator_DI                             (Mant_a_DI                       ),
+   .Exp_num_DI                               (Exp_a_DI                        ),
+   .Denominator_DI                           (Mant_b_DI                       ),
+   .Exp_den_DI                               (Exp_b_DI                        ),
+   .Div_start_dly_SO                         (Div_start_dly_S                 ),
+   .Sqrt_start_dly_SO                        (Sqrt_start_dly_S                ),
+   .Div_enable_SO                            (Div_enable_SO                   ),
+   .Sqrt_enable_SO                           (Sqrt_enable_SO                  ),
+   .Full_precision_SO                        (Full_precision_SO               ),
+   .FP32_SO                                  (FP32_SO                         ),
+   .FP64_SO                                  (FP64_SO                         ),
+   .FP16_SO                                  (FP16_SO                         ),
+   .FP16ALT_SO                               (FP16ALT_SO                      ),
+   .Ready_SO                                 (Ready_SO                        ),
+   .Done_SO                                  (Done_SO                         ),
+   .Mant_result_prenorm_DO                   (Mant_z_DO                       ),
+   .Exp_result_prenorm_DO                    (Exp_z_DO                        )
+);
+
+
+
+endmodule
diff --git a/vendor/pulp-platform/fpnew/src/fpu_div_sqrt_mvp/hdl/preprocess_mvp.sv b/vendor/pulp-platform/fpnew/src/fpu_div_sqrt_mvp/hdl/preprocess_mvp.sv
new file mode 100644
index 0000000000..9e0d25f38f
--- /dev/null
+++ b/vendor/pulp-platform/fpnew/src/fpu_div_sqrt_mvp/hdl/preprocess_mvp.sv
@@ -0,0 +1,425 @@
+// Copyright 2018 ETH Zurich and University of Bologna.
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the “License”); you may not use this file except in
+// compliance with the License.  You may obtain a copy of the License at
+// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+////////////////////////////////////////////////////////////////////////////////
+// Company:        IIS @ ETHZ - Federal Institute of Technology               //
+//                                                                            //
+// Engineers:                Lei Li  //lile@iis.ee.ethz.ch                    //
+//		                                                                        //
+// Additional contributions by:                                               //
+//                                                                            //
+//                                                                            //
+//                                                                            //
+// Create Date:    01/03/2018                                                 //
+// Design Name:    FPU                                                        //
+// Module Name:    preprocess_mvp.sv                                          //
+// Project Name:   Private FPU                                                //
+// Language:       SystemVerilog                                              //
+//                                                                            //
+// Description:           decode and data preparation                         //
+//                                                                            //
+// Revision Date:  12/04/2018                                                 //
+//                 Lei Li                                                     //
+//                 To address some requirements by Stefan and add low power   //
+//                 control for special cases                                  //
+//                                                                            //
+//                                                                            //
+////////////////////////////////////////////////////////////////////////////////
+
+import defs_div_sqrt_mvp::*;
+
+module preprocess_mvp
+  (
+   input logic                   Clk_CI,
+   input logic                   Rst_RBI,
+   input logic                   Div_start_SI,
+   input logic                   Sqrt_start_SI,
+   input logic                   Ready_SI,
+   //Input Operands
+   input logic [C_OP_FP64-1:0]   Operand_a_DI,
+   input logic [C_OP_FP64-1:0]   Operand_b_DI,
+   input logic [C_RM-1:0]        RM_SI,    //Rounding Mode
+   input logic [C_FS-1:0]        Format_sel_SI,  // Format Selection
+
+   // to control
+   output logic                  Start_SO,
+   output logic [C_EXP_FP64:0]   Exp_a_DO_norm,
+   output logic [C_EXP_FP64:0]   Exp_b_DO_norm,
+   output logic [C_MANT_FP64:0]  Mant_a_DO_norm,
+   output logic [C_MANT_FP64:0]  Mant_b_DO_norm,
+
+   output logic [C_RM-1:0]       RM_dly_SO,
+
+   output logic                  Sign_z_DO,
+   output logic                  Inf_a_SO,
+   output logic                  Inf_b_SO,
+   output logic                  Zero_a_SO,
+   output logic                  Zero_b_SO,
+   output logic                  NaN_a_SO,
+   output logic                  NaN_b_SO,
+   output logic                  SNaN_SO,
+   output logic                  Special_case_SBO,
+   output logic                  Special_case_dly_SBO
+   );
+
+   //Hidden Bits
+   logic                         Hb_a_D;
+   logic                         Hb_b_D;
+
+   logic [C_EXP_FP64-1:0]        Exp_a_D;
+   logic [C_EXP_FP64-1:0]        Exp_b_D;
+   logic [C_MANT_FP64-1:0]       Mant_a_NonH_D;
+   logic [C_MANT_FP64-1:0]       Mant_b_NonH_D;
+   logic [C_MANT_FP64:0]         Mant_a_D;
+   logic [C_MANT_FP64:0]         Mant_b_D;
+
+   /////////////////////////////////////////////////////////////////////////////
+   // Disassemble operands
+   /////////////////////////////////////////////////////////////////////////////
+   logic                      Sign_a_D,Sign_b_D;
+   logic                      Start_S;
+
+     always_comb
+       begin
+         case(Format_sel_SI)
+           2'b00:
+             begin
+               Sign_a_D = Operand_a_DI[C_OP_FP32-1];
+               Sign_b_D = Operand_b_DI[C_OP_FP32-1];
+               Exp_a_D  = {3'h0, Operand_a_DI[C_OP_FP32-2:C_MANT_FP32]};
+               Exp_b_D  = {3'h0, Operand_b_DI[C_OP_FP32-2:C_MANT_FP32]};
+               Mant_a_NonH_D = {Operand_a_DI[C_MANT_FP32-1:0],29'h0};
+               Mant_b_NonH_D = {Operand_b_DI[C_MANT_FP32-1:0],29'h0};
+             end
+           2'b01:
+             begin
+               Sign_a_D = Operand_a_DI[C_OP_FP64-1];
+               Sign_b_D = Operand_b_DI[C_OP_FP64-1];
+               Exp_a_D  = Operand_a_DI[C_OP_FP64-2:C_MANT_FP64];
+               Exp_b_D  = Operand_b_DI[C_OP_FP64-2:C_MANT_FP64];
+               Mant_a_NonH_D = Operand_a_DI[C_MANT_FP64-1:0];
+               Mant_b_NonH_D = Operand_b_DI[C_MANT_FP64-1:0];
+             end
+           2'b10:
+             begin
+               Sign_a_D = Operand_a_DI[C_OP_FP16-1];
+               Sign_b_D = Operand_b_DI[C_OP_FP16-1];
+               Exp_a_D  = {6'h00, Operand_a_DI[C_OP_FP16-2:C_MANT_FP16]};
+               Exp_b_D  = {6'h00, Operand_b_DI[C_OP_FP16-2:C_MANT_FP16]};
+               Mant_a_NonH_D = {Operand_a_DI[C_MANT_FP16-1:0],42'h0};
+               Mant_b_NonH_D = {Operand_b_DI[C_MANT_FP16-1:0],42'h0};
+             end
+           2'b11:
+             begin
+               Sign_a_D = Operand_a_DI[C_OP_FP16ALT-1];
+               Sign_b_D = Operand_b_DI[C_OP_FP16ALT-1];
+               Exp_a_D  = {3'h0, Operand_a_DI[C_OP_FP16ALT-2:C_MANT_FP16ALT]};
+               Exp_b_D  = {3'h0, Operand_b_DI[C_OP_FP16ALT-2:C_MANT_FP16ALT]};
+               Mant_a_NonH_D = {Operand_a_DI[C_MANT_FP16ALT-1:0],45'h0};
+               Mant_b_NonH_D = {Operand_b_DI[C_MANT_FP16ALT-1:0],45'h0};
+             end
+           endcase
+       end
+
+
+   assign Mant_a_D = {Hb_a_D,Mant_a_NonH_D};
+   assign Mant_b_D = {Hb_b_D,Mant_b_NonH_D};
+
+   assign Hb_a_D = | Exp_a_D; // hidden bit
+   assign Hb_b_D = | Exp_b_D; // hidden bit
+
+   assign Start_S= Div_start_SI | Sqrt_start_SI;
+
+
+
+   /////////////////////////////////////////////////////////////////////////////
+   // preliminary checks for infinite/zero/NaN operands                       //
+   /////////////////////////////////////////////////////////////////////////////
+
+   logic               Mant_a_prenorm_zero_S;
+   logic               Mant_b_prenorm_zero_S;
+
+   logic               Exp_a_prenorm_zero_S;
+   logic               Exp_b_prenorm_zero_S;
+   assign Exp_a_prenorm_zero_S = ~Hb_a_D;
+   assign Exp_b_prenorm_zero_S = ~Hb_b_D;
+
+   logic               Exp_a_prenorm_Inf_NaN_S;
+   logic               Exp_b_prenorm_Inf_NaN_S;
+
+   logic               Mant_a_prenorm_QNaN_S;
+   logic               Mant_a_prenorm_SNaN_S;
+   logic               Mant_b_prenorm_QNaN_S;
+   logic               Mant_b_prenorm_SNaN_S;
+
+   assign Mant_a_prenorm_QNaN_S=Mant_a_NonH_D[C_MANT_FP64-1]&&(~(|Mant_a_NonH_D[C_MANT_FP64-2:0]));
+   assign Mant_a_prenorm_SNaN_S=(~Mant_a_NonH_D[C_MANT_FP64-1])&&((|Mant_a_NonH_D[C_MANT_FP64-2:0]));
+   assign Mant_b_prenorm_QNaN_S=Mant_b_NonH_D[C_MANT_FP64-1]&&(~(|Mant_b_NonH_D[C_MANT_FP64-2:0]));
+   assign Mant_b_prenorm_SNaN_S=(~Mant_b_NonH_D[C_MANT_FP64-1])&&((|Mant_b_NonH_D[C_MANT_FP64-2:0]));
+
+     always_comb
+       begin
+         case(Format_sel_SI)
+           2'b00:
+             begin
+               Mant_a_prenorm_zero_S=(Operand_a_DI[C_MANT_FP32-1:0] == C_MANT_ZERO_FP32);
+               Mant_b_prenorm_zero_S=(Operand_b_DI[C_MANT_FP32-1:0] == C_MANT_ZERO_FP32);
+               Exp_a_prenorm_Inf_NaN_S=(Operand_a_DI[C_OP_FP32-2:C_MANT_FP32] == C_EXP_INF_FP32);
+               Exp_b_prenorm_Inf_NaN_S=(Operand_b_DI[C_OP_FP32-2:C_MANT_FP32] == C_EXP_INF_FP32);
+             end
+           2'b01:
+             begin
+               Mant_a_prenorm_zero_S=(Operand_a_DI[C_MANT_FP64-1:0] == C_MANT_ZERO_FP64);
+               Mant_b_prenorm_zero_S=(Operand_b_DI[C_MANT_FP64-1:0] == C_MANT_ZERO_FP64);
+               Exp_a_prenorm_Inf_NaN_S=(Operand_a_DI[C_OP_FP64-2:C_MANT_FP64] == C_EXP_INF_FP64);
+               Exp_b_prenorm_Inf_NaN_S=(Operand_b_DI[C_OP_FP64-2:C_MANT_FP64] == C_EXP_INF_FP64);
+             end
+           2'b10:
+             begin
+               Mant_a_prenorm_zero_S=(Operand_a_DI[C_MANT_FP16-1:0] == C_MANT_ZERO_FP16);
+               Mant_b_prenorm_zero_S=(Operand_b_DI[C_MANT_FP16-1:0] == C_MANT_ZERO_FP16);
+               Exp_a_prenorm_Inf_NaN_S=(Operand_a_DI[C_OP_FP16-2:C_MANT_FP16] == C_EXP_INF_FP16);
+               Exp_b_prenorm_Inf_NaN_S=(Operand_b_DI[C_OP_FP16-2:C_MANT_FP16] == C_EXP_INF_FP16);
+             end
+           2'b11:
+             begin
+               Mant_a_prenorm_zero_S=(Operand_a_DI[C_MANT_FP16ALT-1:0] == C_MANT_ZERO_FP16ALT);
+               Mant_b_prenorm_zero_S=(Operand_b_DI[C_MANT_FP16ALT-1:0] == C_MANT_ZERO_FP16ALT);
+               Exp_a_prenorm_Inf_NaN_S=(Operand_a_DI[C_OP_FP16ALT-2:C_MANT_FP16ALT] == C_EXP_INF_FP16ALT);
+               Exp_b_prenorm_Inf_NaN_S=(Operand_b_DI[C_OP_FP16ALT-2:C_MANT_FP16ALT] == C_EXP_INF_FP16ALT);
+             end
+           endcase
+       end
+
+
+
+
+   logic               Zero_a_SN,Zero_a_SP;
+   logic               Zero_b_SN,Zero_b_SP;
+   logic               Inf_a_SN,Inf_a_SP;
+   logic               Inf_b_SN,Inf_b_SP;
+   logic               NaN_a_SN,NaN_a_SP;
+   logic               NaN_b_SN,NaN_b_SP;
+   logic               SNaN_SN,SNaN_SP;
+
+   assign Zero_a_SN = (Start_S&&Ready_SI)?(Exp_a_prenorm_zero_S&&Mant_a_prenorm_zero_S):Zero_a_SP;
+   assign Zero_b_SN = (Start_S&&Ready_SI)?(Exp_b_prenorm_zero_S&&Mant_b_prenorm_zero_S):Zero_b_SP;
+   assign Inf_a_SN = (Start_S&&Ready_SI)?(Exp_a_prenorm_Inf_NaN_S&&Mant_a_prenorm_zero_S):Inf_a_SP;
+   assign Inf_b_SN = (Start_S&&Ready_SI)?(Exp_b_prenorm_Inf_NaN_S&&Mant_b_prenorm_zero_S):Inf_b_SP;
+   assign NaN_a_SN = (Start_S&&Ready_SI)?(Exp_a_prenorm_Inf_NaN_S&&(~Mant_a_prenorm_zero_S)):NaN_a_SP;
+   assign NaN_b_SN = (Start_S&&Ready_SI)?(Exp_b_prenorm_Inf_NaN_S&&(~Mant_b_prenorm_zero_S)):NaN_b_SP;
+   assign SNaN_SN = (Start_S&&Ready_SI) ? ((Mant_a_prenorm_SNaN_S&&NaN_a_SN) | (Mant_b_prenorm_SNaN_S&&NaN_b_SN)) : SNaN_SP;
+
+   always_ff @(posedge Clk_CI, negedge Rst_RBI)
+     begin
+        if(~Rst_RBI)
+          begin
+            Zero_a_SP <='0;
+            Zero_b_SP <='0;
+            Inf_a_SP <='0;
+            Inf_b_SP <='0;
+            NaN_a_SP <='0;
+            NaN_b_SP <='0;
+            SNaN_SP <= '0;
+          end
+        else
+         begin
+           Inf_a_SP <=Inf_a_SN;
+           Inf_b_SP <=Inf_b_SN;
+           Zero_a_SP <=Zero_a_SN;
+           Zero_b_SP <=Zero_b_SN;
+           NaN_a_SP <=NaN_a_SN;
+           NaN_b_SP <=NaN_b_SN;
+           SNaN_SP <= SNaN_SN;
+         end
+      end
+
+   /////////////////////////////////////////////////////////////////////////////
+   // Low power control
+   /////////////////////////////////////////////////////////////////////////////
+
+   assign Special_case_SBO=(~{(Div_start_SI)?(Zero_a_SN | Zero_b_SN |  Inf_a_SN | Inf_b_SN | NaN_a_SN | NaN_b_SN): (Zero_a_SN | Inf_a_SN | NaN_a_SN | Sign_a_D) })&&(Start_S&&Ready_SI);
+
+
+   always_ff @(posedge Clk_CI, negedge Rst_RBI)
+     begin
+       if(~Rst_RBI)
+          begin
+            Special_case_dly_SBO <= '0;
+          end
+       else if((Start_S&&Ready_SI))
+         begin
+            Special_case_dly_SBO <= Special_case_SBO;
+         end
+       else if(Special_case_dly_SBO)
+         begin
+         Special_case_dly_SBO <= 1'b1;
+         end
+      else
+         begin
+            Special_case_dly_SBO <= '0;
+         end
+    end
+
+   /////////////////////////////////////////////////////////////////////////////
+   // Delay sign for normalization and round                                  //
+   /////////////////////////////////////////////////////////////////////////////
+
+   logic                   Sign_z_DN;
+   logic                   Sign_z_DP;
+
+   always_comb
+     begin
+       if(Div_start_SI&&Ready_SI)
+           Sign_z_DN = Sign_a_D ^ Sign_b_D;
+       else if(Sqrt_start_SI&&Ready_SI)
+           Sign_z_DN = Sign_a_D;
+       else
+           Sign_z_DN = Sign_z_DP;
+    end
+
+   always_ff @(posedge Clk_CI, negedge Rst_RBI)
+     begin
+       if(~Rst_RBI)
+          begin
+            Sign_z_DP <= '0;
+          end
+       else
+         begin
+            Sign_z_DP <= Sign_z_DN;
+         end
+    end
+
+   logic [C_RM-1:0]                  RM_DN;
+   logic [C_RM-1:0]                  RM_DP;
+
+   always_comb
+     begin
+       if(Start_S&&Ready_SI)
+           RM_DN = RM_SI;
+       else
+           RM_DN = RM_DP;
+    end
+
+   always_ff @(posedge Clk_CI, negedge Rst_RBI)
+     begin
+       if(~Rst_RBI)
+          begin
+            RM_DP <= '0;
+          end
+       else
+         begin
+            RM_DP <= RM_DN;
+         end
+    end
+   assign RM_dly_SO = RM_DP;
+
+   logic [5:0]                  Mant_leadingOne_a, Mant_leadingOne_b;
+   logic                        Mant_zero_S_a,Mant_zero_S_b;
+
+  lzc #(
+    .WIDTH ( C_MANT_FP64+1 ),
+    .MODE  ( 1             )
+  ) LOD_Ua (
+    .in_i    ( Mant_a_D          ),
+    .cnt_o   ( Mant_leadingOne_a ),
+    .empty_o ( Mant_zero_S_a     )
+  );
+
+   logic [C_MANT_FP64:0]            Mant_a_norm_DN,Mant_a_norm_DP;
+
+   assign  Mant_a_norm_DN = ((Start_S&&Ready_SI))?(Mant_a_D<<(Mant_leadingOne_a)):Mant_a_norm_DP;
+
+   always_ff @(posedge Clk_CI, negedge Rst_RBI)
+     begin
+        if(~Rst_RBI)
+          begin
+            Mant_a_norm_DP <= '0;
+          end
+        else
+          begin
+            Mant_a_norm_DP<=Mant_a_norm_DN;
+          end
+     end
+
+   logic [C_EXP_FP64:0]            Exp_a_norm_DN,Exp_a_norm_DP;
+   assign  Exp_a_norm_DN = ((Start_S&&Ready_SI))?(Exp_a_D-Mant_leadingOne_a+(|Mant_leadingOne_a)):Exp_a_norm_DP;  //Covering the process of denormal numbers
+
+   always_ff @(posedge Clk_CI, negedge Rst_RBI)
+     begin
+        if(~Rst_RBI)
+          begin
+            Exp_a_norm_DP <= '0;
+          end
+        else
+          begin
+            Exp_a_norm_DP<=Exp_a_norm_DN;
+          end
+     end
+
+  lzc #(
+    .WIDTH ( C_MANT_FP64+1 ),
+    .MODE  ( 1             )
+  ) LOD_Ub (
+    .in_i    ( Mant_b_D          ),
+    .cnt_o   ( Mant_leadingOne_b ),
+    .empty_o ( Mant_zero_S_b     )
+  );
+
+
+   logic [C_MANT_FP64:0]            Mant_b_norm_DN,Mant_b_norm_DP;
+
+   assign  Mant_b_norm_DN = ((Start_S&&Ready_SI))?(Mant_b_D<<(Mant_leadingOne_b)):Mant_b_norm_DP;
+
+   always_ff @(posedge Clk_CI, negedge Rst_RBI)
+     begin
+        if(~Rst_RBI)
+          begin
+            Mant_b_norm_DP <= '0;
+          end
+        else
+          begin
+            Mant_b_norm_DP<=Mant_b_norm_DN;
+          end
+     end
+
+   logic [C_EXP_FP64:0]            Exp_b_norm_DN,Exp_b_norm_DP;
+   assign  Exp_b_norm_DN = ((Start_S&&Ready_SI))?(Exp_b_D-Mant_leadingOne_b+(|Mant_leadingOne_b)):Exp_b_norm_DP; //Covering the process of denormal numbers
+
+   always_ff @(posedge Clk_CI, negedge Rst_RBI)
+     begin
+        if(~Rst_RBI)
+          begin
+            Exp_b_norm_DP <= '0;
+          end
+        else
+          begin
+            Exp_b_norm_DP<=Exp_b_norm_DN;
+          end
+     end
+
+   /////////////////////////////////////////////////////////////////////////////
+   // Output assignments                                                      //
+   /////////////////////////////////////////////////////////////////////////////
+
+   assign Start_SO=Start_S;
+   assign Exp_a_DO_norm=Exp_a_norm_DP;
+   assign Exp_b_DO_norm=Exp_b_norm_DP;
+   assign Mant_a_DO_norm=Mant_a_norm_DP;
+   assign Mant_b_DO_norm=Mant_b_norm_DP;
+   assign Sign_z_DO=Sign_z_DP;
+   assign Inf_a_SO=Inf_a_SP;
+   assign Inf_b_SO=Inf_b_SP;
+   assign Zero_a_SO=Zero_a_SP;
+   assign Zero_b_SO=Zero_b_SP;
+   assign NaN_a_SO=NaN_a_SP;
+   assign NaN_b_SO=NaN_b_SP;
+   assign SNaN_SO=SNaN_SP;
+
+endmodule
diff --git a/vendor/pulp-platform_common_cells.lock.hjson b/vendor/pulp-platform_common_cells.lock.hjson
new file mode 100644
index 0000000000..d50d829b6e
--- /dev/null
+++ b/vendor/pulp-platform_common_cells.lock.hjson
@@ -0,0 +1,14 @@
+// Copyright lowRISC contributors.
+// Licensed under the Apache License, Version 2.0, see LICENSE for details.
+// SPDX-License-Identifier: Apache-2.0
+
+// This file is generated by the util/vendor script. Please do not modify it
+// manually.
+
+{
+  upstream:
+  {
+    url: https://github.com/pulp-platform/common_cells.git
+    rev: dc555643226419b7a602f0aa39d449545ea4c1f2
+  }
+}
diff --git a/vendor/pulp-platform_common_cells.vendor.hjson b/vendor/pulp-platform_common_cells.vendor.hjson
new file mode 100644
index 0000000000..38ed41bff5
--- /dev/null
+++ b/vendor/pulp-platform_common_cells.vendor.hjson
@@ -0,0 +1,40 @@
+// -*- coding: utf-8 -*-
+// Copyright (C) 2022 Thales DIS France SAS
+// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.0.
+// Original Author: Zbigniew Chamski (zbigniew.chamski@thalesgroup.com)
+
+{
+    // Name of the project
+    name: "pulp_common_cells",
+
+    // Target directory: relative to the location of this script.
+    target_dir: "pulp-platform/common_cells",
+
+    // Upstream repository
+    upstream: {
+        // URL
+        url: "https://github.com/pulp-platform/common_cells.git",
+        // revision
+        rev: "v1.23.0",
+    }
+
+    //Patch dir for local changes
+    patch_dir: "patches/pulp-platform/common_cells",
+
+    // Exclusions from upstream content
+    exclude_from_upstream: [
+        ".github",
+        ".gitlab-ci.yml",
+        ".travis.yml",
+        "Bender.yml",
+        "ci",
+        "common_cells.core",
+        "formal",
+        "ips_list.yml",
+        "lint",
+        "Makefile",
+        "src_files.yml",
+        "test",
+    ]
+}
+
diff --git a/vendor/pulp-platform_common_cells_fpu.lock.hjson b/vendor/pulp-platform_common_cells_fpu.lock.hjson
new file mode 100644
index 0000000000..423b594804
--- /dev/null
+++ b/vendor/pulp-platform_common_cells_fpu.lock.hjson
@@ -0,0 +1,14 @@
+// Copyright lowRISC contributors.
+// Licensed under the Apache License, Version 2.0, see LICENSE for details.
+// SPDX-License-Identifier: Apache-2.0
+
+// This file is generated by the util/vendor script. Please do not modify it
+// manually.
+
+{
+  upstream:
+  {
+    url: https://github.com/pulp-platform/common_cells.git
+    rev: 790f2385c01c83022474eede55809666209216e3
+  }
+}
diff --git a/vendor/pulp-platform_common_cells_fpu.vendor.hjson b/vendor/pulp-platform_common_cells_fpu.vendor.hjson
new file mode 100644
index 0000000000..0744a98985
--- /dev/null
+++ b/vendor/pulp-platform_common_cells_fpu.vendor.hjson
@@ -0,0 +1,41 @@
+// -*- coding: utf-8 -*-
+// Copyright (C) 2022 Thales DIS France SAS
+// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.0.
+// Original Author: Zbigniew Chamski (zbigniew.chamski@thalesgroup.com)
+
+{
+    // Name of the project
+    name: "pulp_common_cells_fpu",
+
+    // Target directory: relative to the location of this script.
+    target_dir: "pulp-platform/fpnew/src/common_cells",
+
+    // Upstream repository
+    upstream: {
+        // URL
+        url: "https://github.com/pulp-platform/common_cells.git",
+        // revision
+        rev: "v1.13.1",
+    }
+
+    //Patch dir for local changes
+    patch_dir: "patches/pulp-platform/common_cells_fpu",
+
+    // Exclusions from upstream content
+    exclude_from_upstream: [
+        ".git",
+        ".github",
+        ".gitlab-ci.yml",
+        ".travis.yml",
+        "Bender.yml",
+        "ci",
+        "common_cells.core",
+        "formal",
+        "ips_list.yml",
+        "lint",
+        "Makefile",
+        "src_files.yml",
+        "test",
+    ]
+}
+
diff --git a/vendor/pulp-platform_fpnew.lock.hjson b/vendor/pulp-platform_fpnew.lock.hjson
new file mode 100644
index 0000000000..f112f6bac2
--- /dev/null
+++ b/vendor/pulp-platform_fpnew.lock.hjson
@@ -0,0 +1,14 @@
+// Copyright lowRISC contributors.
+// Licensed under the Apache License, Version 2.0, see LICENSE for details.
+// SPDX-License-Identifier: Apache-2.0
+
+// This file is generated by the util/vendor script. Please do not modify it
+// manually.
+
+{
+  upstream:
+  {
+    url: https://github.com/pulp-platform/fpnew.git
+    rev: 79f75e0a0fdab6ebc3840a14077c39f4934321fe
+  }
+}
diff --git a/vendor/pulp-platform_fpnew.vendor.hjson b/vendor/pulp-platform_fpnew.vendor.hjson
new file mode 100644
index 0000000000..374768c064
--- /dev/null
+++ b/vendor/pulp-platform_fpnew.vendor.hjson
@@ -0,0 +1,34 @@
+// -*- coding: utf-8 -*-
+// Copyright (C) 2022 Thales DIS France SAS
+// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.0.
+// Original Author: Zbigniew Chamski (zbigniew.chamski@thalesgroup.com)
+
+{
+    // Name of the project
+    name: "pulp_fpnew",
+
+    // Target directory: relative to the location of this script.
+    target_dir: "pulp-platform/fpnew",
+
+    // Upstream repository
+    upstream: {
+        // URL
+        url: "https://github.com/pulp-platform/fpnew.git",
+        // revision
+        rev: "v0.6.2",
+    }
+
+    // Patch dir for local changes
+    patch_dir: "patches/pulp-platform/fpnew",
+
+    // Exclusions from upstream content
+    exclude_from_upstream: [
+        ".gitmodules",
+        "Bender.yml",
+        "docs",
+        "ips_list.yml",
+        "src_files.yml",
+        "tb",
+    ]
+}
+
diff --git a/vendor/pulp-platform_fpu_div_sqrt_mvp.lock.hjson b/vendor/pulp-platform_fpu_div_sqrt_mvp.lock.hjson
new file mode 100644
index 0000000000..4a9de30727
--- /dev/null
+++ b/vendor/pulp-platform_fpu_div_sqrt_mvp.lock.hjson
@@ -0,0 +1,14 @@
+// Copyright lowRISC contributors.
+// Licensed under the Apache License, Version 2.0, see LICENSE for details.
+// SPDX-License-Identifier: Apache-2.0
+
+// This file is generated by the util/vendor script. Please do not modify it
+// manually.
+
+{
+  upstream:
+  {
+    url: https://github.com/pulp-platform/fpu_div_sqrt_mvp.git
+    rev: 83a601f97934ed5e06d737b9c80d98b08867c5fa
+  }
+}
diff --git a/vendor/pulp-platform_fpu_div_sqrt_mvp.vendor.hjson b/vendor/pulp-platform_fpu_div_sqrt_mvp.vendor.hjson
new file mode 100644
index 0000000000..ecdc904c47
--- /dev/null
+++ b/vendor/pulp-platform_fpu_div_sqrt_mvp.vendor.hjson
@@ -0,0 +1,32 @@
+// -*- coding: utf-8 -*-
+// Copyright (C) 2022 Thales DIS France SAS
+// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.0.
+// Original Author: Zbigniew Chamski (zbigniew.chamski@thalesgroup.com)
+
+{
+    // Name of the project
+    name: "pulp_fpu_div_sqrt_mvp",
+
+    // Target directory: relative to the location of this script.
+    target_dir: "pulp-platform/fpnew/src/fpu_div_sqrt_mvp",
+
+    // Upstream repository
+    upstream: {
+        // URL
+        url: "https://github.com/pulp-platform/fpu_div_sqrt_mvp.git",
+        // revision
+        rev: "v1.0.3",
+    }
+
+    //Patch dir for local changes
+    patch_dir: "patches/pulp-platform/fpu_div_sqrt_mvp",
+
+    // Exclusions from upstream content
+    exclude_from_upstream: [
+        ".git",
+        "Bender.yml",
+        "document",
+        "src_files.yml",
+    ]
+}
+