From cc187048f6595e66078b6f1f7c14855fede3da09 Mon Sep 17 00:00:00 2001 From: Duck Deux Date: Thu, 31 Oct 2024 00:01:59 -0700 Subject: [PATCH] Add behavior to handle unmapped muxes without constant inputs better --- .../arch_lang/circuit_model_examples.rst | 2 + .../fpga_bitstream_commands.rst | 4 + .../openfpga_bitstream_command_template.h | 5 + .../src/base/openfpga_bitstream_template.h | 4 +- .../fpga_bitstream/build_device_bitstream.cpp | 8 +- .../fpga_bitstream/build_device_bitstream.h | 1 + .../fpga_bitstream/build_grid_bitstream.cpp | 81 ++++++++---- .../src/fpga_bitstream/build_grid_bitstream.h | 4 +- .../build_routing_bitstream.cpp | 119 ++++++++++++------ .../fpga_bitstream/build_routing_bitstream.h | 1 + 10 files changed, 169 insertions(+), 60 deletions(-) diff --git a/docs/source/manual/arch_lang/circuit_model_examples.rst b/docs/source/manual/arch_lang/circuit_model_examples.rst index 5bc928c6c9..78fb673243 100644 --- a/docs/source/manual/arch_lang/circuit_model_examples.rst +++ b/docs/source/manual/arch_lang/circuit_model_examples.rst @@ -525,6 +525,8 @@ Template - ``structure="tree|multi_level|one_level"`` Specify the multiplexer structure for a multiplexer. The structure option is only valid for SRAM-based multiplexers. For RRAM-based multiplexers, currently we only support the one_level structure +.. _mux_const_input_option: + - ``num_level=""`` Specify the number of levels when ``multi_level`` structure is selected. - ``add_const_input="true|false"`` Specify if an extra input should be added to the multiplexer circuits. For example, an 4-input multiplexer will be turned to a 5-input multiplexer. The extra input will be wired to a constant value, which can be specified through the XML syntax ``const_input_val``. diff --git a/docs/source/manual/openfpga_shell/openfpga_commands/fpga_bitstream_commands.rst b/docs/source/manual/openfpga_shell/openfpga_commands/fpga_bitstream_commands.rst index a35f4d9efd..489926ca13 100644 --- a/docs/source/manual/openfpga_shell/openfpga_commands/fpga_bitstream_commands.rst +++ b/docs/source/manual/openfpga_shell/openfpga_commands/fpga_bitstream_commands.rst @@ -51,6 +51,10 @@ build_architecture_bitstream Output the fabric-independent bitstream to an XML file. See details at :ref:`file_formats_architecture_bitstream`. + .. option:: --prefer_unused_mux_input + + Try to connect unmapped mux outputs to unmapped inputs. Only effective if there is no constant input to muxes (see :ref:`mux_const_input_option`). This option aims to reduce power consumption by preventing unnecessary switching of unmapped mux outputs. + .. option:: --no_time_stamp Do not print time stamp in bitstream files diff --git a/openfpga/src/base/openfpga_bitstream_command_template.h b/openfpga/src/base/openfpga_bitstream_command_template.h index f80176514d..18d2c3541b 100644 --- a/openfpga/src/base/openfpga_bitstream_command_template.h +++ b/openfpga/src/base/openfpga_bitstream_command_template.h @@ -74,6 +74,11 @@ ShellCommandId add_build_arch_bitstream_command_template( "read_file", false, "file path to read the bitstream database"); shell_cmd.set_option_require_value(opt_read_file, openfpga::OPT_STRING); + /* Add an option '--prefer_unused_mux_input' */ + shell_cmd.add_option( + "prefer_unused_mux_input", false, + "Try to connect unmapped mux outputs to unmapped inputs"); + /* Add an option '--no_time_stamp' */ shell_cmd.add_option("no_time_stamp", false, "Do not print time stamp in output files"); diff --git a/openfpga/src/base/openfpga_bitstream_template.h b/openfpga/src/base/openfpga_bitstream_template.h index af5bf1ae7c..7adfa31262 100644 --- a/openfpga/src/base/openfpga_bitstream_template.h +++ b/openfpga/src/base/openfpga_bitstream_template.h @@ -39,13 +39,15 @@ int fpga_bitstream_template(T& openfpga_ctx, const Command& cmd, CommandOptionId opt_no_time_stamp = cmd.option("no_time_stamp"); CommandOptionId opt_write_file = cmd.option("write_file"); CommandOptionId opt_read_file = cmd.option("read_file"); + CommandOptionId opt_prefer_unused = cmd.option("prefer_unused_mux_input"); if (true == cmd_context.option_enable(cmd, opt_read_file)) { openfpga_ctx.mutable_bitstream_manager() = read_xml_architecture_bitstream( cmd_context.option_value(cmd, opt_read_file).c_str()); } else { openfpga_ctx.mutable_bitstream_manager() = build_device_bitstream( - g_vpr_ctx, openfpga_ctx, cmd_context.option_enable(cmd, opt_verbose)); + g_vpr_ctx, openfpga_ctx, cmd_context.option_enable(cmd, opt_prefer_unused), + cmd_context.option_enable(cmd, opt_verbose)); } overwrite_bitstream(openfpga_ctx.mutable_bitstream_manager(), diff --git a/openfpga/src/fpga_bitstream/build_device_bitstream.cpp b/openfpga/src/fpga_bitstream/build_device_bitstream.cpp index a3fb264321..cb8b687382 100644 --- a/openfpga/src/fpga_bitstream/build_device_bitstream.cpp +++ b/openfpga/src/fpga_bitstream/build_device_bitstream.cpp @@ -149,6 +149,7 @@ static size_t rec_estimate_device_bitstream_num_bits( *******************************************************************/ BitstreamManager build_device_bitstream(const VprContext& vpr_ctx, const OpenfpgaContext& openfpga_ctx, + const bool& prefer_unused_mux_input, const bool& verbose) { std::string timer_message = std::string("\nBuild fabric-independent bitstream for implementation '") + @@ -218,7 +219,9 @@ BitstreamManager build_device_bitstream(const VprContext& vpr_ctx, openfpga_ctx.vpr_device_annotation(), openfpga_ctx.vpr_clustering_annotation(), openfpga_ctx.vpr_placement_annotation(), - openfpga_ctx.vpr_bitstream_annotation(), verbose); + openfpga_ctx.vpr_bitstream_annotation(), + prefer_unused_mux_input, + verbose); VTR_LOGV(verbose, "Done\n"); /* Create bitstream from routing architectures */ @@ -229,7 +232,8 @@ BitstreamManager build_device_bitstream(const VprContext& vpr_ctx, openfpga_ctx.arch().circuit_lib, openfpga_ctx.mux_lib(), vpr_ctx.atom(), openfpga_ctx.vpr_device_annotation(), openfpga_ctx.vpr_routing_annotation(), vpr_ctx.device().rr_graph, openfpga_ctx.device_rr_gsb(), - openfpga_ctx.flow_manager().compress_routing(), verbose); + openfpga_ctx.flow_manager().compress_routing(), + prefer_unused_mux_input, verbose); VTR_LOGV(verbose, "Done\n"); diff --git a/openfpga/src/fpga_bitstream/build_device_bitstream.h b/openfpga/src/fpga_bitstream/build_device_bitstream.h index 3bddf92115..a7e1637a66 100644 --- a/openfpga/src/fpga_bitstream/build_device_bitstream.h +++ b/openfpga/src/fpga_bitstream/build_device_bitstream.h @@ -18,6 +18,7 @@ namespace openfpga { BitstreamManager build_device_bitstream(const VprContext& vpr_ctx, const OpenfpgaContext& openfpga_ctx, + const bool& prefer_unused_mux_input, const bool& verbose); } /* end namespace openfpga */ diff --git a/openfpga/src/fpga_bitstream/build_grid_bitstream.cpp b/openfpga/src/fpga_bitstream/build_grid_bitstream.cpp index cbc5e99b16..41af370b76 100644 --- a/openfpga/src/fpga_bitstream/build_grid_bitstream.cpp +++ b/openfpga/src/fpga_bitstream/build_grid_bitstream.cpp @@ -185,7 +185,9 @@ static void build_physical_block_pin_interc_bitstream( const AtomContext& atom_ctx, const VprDeviceAnnotation& device_annotation, const VprBitstreamAnnotation& bitstream_annotation, const PhysicalPb& physical_pb, t_pb_graph_pin* des_pb_graph_pin, - t_mode* physical_mode, const bool& verbose) { + t_mode* physical_mode, + const bool& prefer_unused_mux_input, + const bool& verbose) { /* Identify the number of fan-in (Consider interconnection edges of only * selected mode) */ t_interconnect* cur_interc = @@ -225,16 +227,46 @@ static void build_physical_block_pin_interc_bitstream( * - if des pb is not valid, this is an unmapped pb, we can set a default * path_id * - There is no net mapped to des_pb_graph_pin we use default path id - * - There is a net mapped to des_pin_graph_pin: we find the path id + * - There is a net mapped to des_pb_graph_pin: we find the path id */ const PhysicalPbId& des_pb_id = physical_pb.find_pb(des_pb_graph_pin->parent_node); size_t mux_input_pin_id = 0; - if (true != physical_pb.valid_pb_id(des_pb_id)) { + if (true != physical_pb.valid_pb_id(des_pb_id)) { /* Unmapped pb */ mux_input_pin_id = DEFAULT_PATH_ID; } else if (AtomNetId::INVALID() == physical_pb.pb_graph_pin_atom_net( des_pb_id, des_pb_graph_pin)) { - mux_input_pin_id = DEFAULT_PATH_ID; + /* Unmapped output */ + if(false == circuit_lib.mux_add_const_input(mux_model) && + prefer_unused_mux_input){ + /* No constant input and fix flag is set + * Select the first unmapped input */ + auto pin_inputs = pb_graph_pin_inputs(des_pb_graph_pin, cur_interc); + size_t pin_id; + for (pin_id = 0; pin_id < pin_inputs.size(); pin_id++) { + auto src_pb_graph_pin = pin_inputs[pin_id]; + const PhysicalPbId& src_pb_id = + physical_pb.find_pb(src_pb_graph_pin->parent_node); + if (!physical_pb.valid_pb_id(src_pb_id)) { + mux_input_pin_id = pin_id; + break; + } + } + /* Couldn't find an unmapped input, use default path ID */ + if (pin_id == pin_inputs.size()) { + VTR_LOGV_WARN(verbose, + "At PhysicalPbId=%d: output is unmapped but all inputs are mapped\n", + des_pb_id); + mux_input_pin_id = DEFAULT_PATH_ID; + } + /* or the first input was already unmapped, use default path ID */ + if(mux_input_pin_id == 0) { + mux_input_pin_id = DEFAULT_PATH_ID; + } + } else { + /* We have constant input, use the default path ID */ + mux_input_pin_id = DEFAULT_PATH_ID; + } } else { output_net = physical_pb.pb_graph_pin_atom_net(des_pb_id, des_pb_graph_pin); @@ -385,7 +417,7 @@ static void build_physical_block_interc_port_bitstream( const VprBitstreamAnnotation& bitstream_annotation, t_pb_graph_node* physical_pb_graph_node, const PhysicalPb& physical_pb, const e_circuit_pb_port_type& pb_port_type, t_mode* physical_mode, - const bool& verbose) { + const bool& prefer_unused_mux_input, const bool& verbose) { switch (pb_port_type) { case CIRCUIT_PB_PORT_INPUT: for (int iport = 0; iport < physical_pb_graph_node->num_input_ports; @@ -398,7 +430,7 @@ static void build_physical_block_interc_port_bitstream( circuit_lib, mux_lib, atom_ctx, device_annotation, bitstream_annotation, physical_pb, &(physical_pb_graph_node->input_pins[iport][ipin]), physical_mode, - verbose); + prefer_unused_mux_input, verbose); } } break; @@ -413,7 +445,7 @@ static void build_physical_block_interc_port_bitstream( circuit_lib, mux_lib, atom_ctx, device_annotation, bitstream_annotation, physical_pb, &(physical_pb_graph_node->output_pins[iport][ipin]), physical_mode, - verbose); + prefer_unused_mux_input, verbose); } } break; @@ -428,7 +460,7 @@ static void build_physical_block_interc_port_bitstream( circuit_lib, mux_lib, atom_ctx, device_annotation, bitstream_annotation, physical_pb, &(physical_pb_graph_node->clock_pins[iport][ipin]), physical_mode, - verbose); + prefer_unused_mux_input, verbose); } } break; @@ -451,7 +483,8 @@ static void build_physical_block_interc_bitstream( const AtomContext& atom_ctx, const VprDeviceAnnotation& device_annotation, const VprBitstreamAnnotation& bitstream_annotation, t_pb_graph_node* physical_pb_graph_node, const PhysicalPb& physical_pb, - t_mode* physical_mode, const bool& verbose) { + t_mode* physical_mode, const bool& prefer_unused_mux_input, + const bool& verbose) { /* Check if the pb_graph node is valid or not */ if (nullptr == physical_pb_graph_node) { VTR_LOGF_ERROR(__FILE__, __LINE__, "Invalid physical_pb_graph_node.\n"); @@ -472,7 +505,8 @@ static void build_physical_block_interc_bitstream( bitstream_manager, grouped_mem_inst_scoreboard, parent_configurable_block, module_manager, module_name_map, circuit_lib, mux_lib, atom_ctx, device_annotation, bitstream_annotation, physical_pb_graph_node, - physical_pb, CIRCUIT_PB_PORT_OUTPUT, physical_mode, verbose); + physical_pb, CIRCUIT_PB_PORT_OUTPUT, physical_mode, + prefer_unused_mux_input, verbose); /* We check input_pins of child_pb_graph_node and its the input_edges * Iterate over the interconnections between inputs of physical_pb_graph_node @@ -496,14 +530,14 @@ static void build_physical_block_interc_bitstream( parent_configurable_block, module_manager, module_name_map, circuit_lib, mux_lib, atom_ctx, device_annotation, bitstream_annotation, child_pb_graph_node, physical_pb, CIRCUIT_PB_PORT_INPUT, physical_mode, - verbose); + prefer_unused_mux_input, verbose); /* For clock pins, we should do the same work */ build_physical_block_interc_port_bitstream( bitstream_manager, grouped_mem_inst_scoreboard, parent_configurable_block, module_manager, module_name_map, circuit_lib, mux_lib, atom_ctx, device_annotation, bitstream_annotation, child_pb_graph_node, physical_pb, CIRCUIT_PB_PORT_CLOCK, physical_mode, - verbose); + prefer_unused_mux_input, verbose); } } } @@ -712,7 +746,7 @@ static void rec_build_physical_block_bitstream( const VprBitstreamAnnotation& bitstream_annotation, const e_side& border_side, const PhysicalPb& physical_pb, const PhysicalPbId& pb_id, t_pb_graph_node* physical_pb_graph_node, const size_t& pb_graph_node_index, - const bool& verbose) { + const bool& prefer_unused_mux_input, const bool& verbose) { /* Get the physical pb_type that is linked to the pb_graph node */ t_pb_type* physical_pb_type = physical_pb_graph_node->pb_type; @@ -773,7 +807,7 @@ static void rec_build_physical_block_bitstream( child_pb, &(physical_pb_graph_node ->child_pb_graph_nodes[physical_mode->index][ipb][jpb]), - jpb, verbose); + jpb, prefer_unused_mux_input, verbose); } } } @@ -817,7 +851,7 @@ static void rec_build_physical_block_bitstream( bitstream_manager, grouped_mem_inst_scoreboard, pb_configurable_block, module_manager, module_name_map, circuit_lib, mux_lib, atom_ctx, device_annotation, bitstream_annotation, physical_pb_graph_node, - physical_pb, physical_mode, verbose); + physical_pb, physical_mode, prefer_unused_mux_input, verbose); } /******************************************************************** @@ -836,7 +870,8 @@ static void build_physical_block_bitstream( const VprPlacementAnnotation& place_annotation, const VprBitstreamAnnotation& bitstream_annotation, const DeviceGrid& grids, const size_t& layer, const vtr::Point& grid_coord, - const e_side& border_side, const bool& verbose) { + const e_side& border_side, const bool& prefer_unused_mux_input, + const bool& verbose) { /* Create a block for the grid in bitstream manager */ t_physical_tile_type_ptr grid_type = grids.get_physical_type( t_physical_tile_loc(grid_coord.x(), grid_coord.y(), layer)); @@ -931,7 +966,7 @@ static void build_physical_block_bitstream( grid_configurable_block, module_manager, module_name_map, circuit_lib, mux_lib, atom_ctx, device_annotation, bitstream_annotation, border_side, PhysicalPb(), PhysicalPbId::INVALID(), - lb_type->pb_graph_head, z, verbose); + lb_type->pb_graph_head, z, prefer_unused_mux_input, verbose); } else { const PhysicalPb& phy_pb = cluster_annotation.physical_pb( place_annotation.grid_blocks(grid_coord)[z]); @@ -946,7 +981,7 @@ static void build_physical_block_bitstream( bitstream_manager, grouped_mem_inst_scoreboard, grid_configurable_block, module_manager, module_name_map, circuit_lib, mux_lib, atom_ctx, device_annotation, bitstream_annotation, - border_side, phy_pb, top_pb_id, pb_graph_head, z, verbose); + border_side, phy_pb, top_pb_id, pb_graph_head, z, prefer_unused_mux_input, verbose); } } } @@ -966,7 +1001,9 @@ void build_grid_bitstream( const AtomContext& atom_ctx, const VprDeviceAnnotation& device_annotation, const VprClusteringAnnotation& cluster_annotation, const VprPlacementAnnotation& place_annotation, - const VprBitstreamAnnotation& bitstream_annotation, const bool& verbose) { + const VprBitstreamAnnotation& bitstream_annotation, + const bool& prefer_unused_mux_input, + const bool& verbose) { VTR_LOGV(verbose, "Generating bitstream for core grids..."); /* Generate bitstream for the core logic block one by one */ @@ -1007,7 +1044,8 @@ void build_grid_bitstream( bitstream_manager, parent_block, module_manager, module_name_map, fabric_tile, curr_tile, circuit_lib, mux_lib, atom_ctx, device_annotation, cluster_annotation, place_annotation, - bitstream_annotation, grids, layer, grid_coord, NUM_2D_SIDES, verbose); + bitstream_annotation, grids, layer, grid_coord, NUM_2D_SIDES, + prefer_unused_mux_input, verbose); } } VTR_LOGV(verbose, "Done\n"); @@ -1055,7 +1093,8 @@ void build_grid_bitstream( bitstream_manager, parent_block, module_manager, module_name_map, fabric_tile, curr_tile, circuit_lib, mux_lib, atom_ctx, device_annotation, cluster_annotation, place_annotation, - bitstream_annotation, grids, layer, io_coordinate, io_side, verbose); + bitstream_annotation, grids, layer, io_coordinate, io_side, + prefer_unused_mux_input, verbose); } } VTR_LOGV(verbose, "Done\n"); diff --git a/openfpga/src/fpga_bitstream/build_grid_bitstream.h b/openfpga/src/fpga_bitstream/build_grid_bitstream.h index 36b297c987..ebdabf4c1a 100644 --- a/openfpga/src/fpga_bitstream/build_grid_bitstream.h +++ b/openfpga/src/fpga_bitstream/build_grid_bitstream.h @@ -34,7 +34,9 @@ void build_grid_bitstream( const AtomContext& atom_ctx, const VprDeviceAnnotation& device_annotation, const VprClusteringAnnotation& cluster_annotation, const VprPlacementAnnotation& place_annotation, - const VprBitstreamAnnotation& bitstream_annotation, const bool& verbose); + const VprBitstreamAnnotation& bitstream_annotation, + const bool& prefer_unused_mux_input, + const bool& verbose); } /* end namespace openfpga */ diff --git a/openfpga/src/fpga_bitstream/build_routing_bitstream.cpp b/openfpga/src/fpga_bitstream/build_routing_bitstream.cpp index dd2cc73296..6dcc7cd138 100644 --- a/openfpga/src/fpga_bitstream/build_routing_bitstream.cpp +++ b/openfpga/src/fpga_bitstream/build_routing_bitstream.cpp @@ -38,7 +38,8 @@ static void build_switch_block_mux_bitstream( const RRGraphView& rr_graph, const RRNodeId& cur_rr_node, const std::vector& drive_rr_nodes, const AtomContext& atom_ctx, const VprDeviceAnnotation& device_annotation, - const VprRoutingAnnotation& routing_annotation, const bool& verbose) { + const VprRoutingAnnotation& routing_annotation, + const bool& prefer_unused_mux_input, const bool& verbose) { /* Check current rr_node is CHANX or CHANY*/ VTR_ASSERT((CHANX == rr_graph.node_type(cur_rr_node)) || (CHANY == rr_graph.node_type(cur_rr_node))); @@ -46,6 +47,14 @@ static void build_switch_block_mux_bitstream( /* Find the input size of the implementation of a routing multiplexer */ size_t datapath_mux_size = drive_rr_nodes.size(); + /* Find the circuit model id of the mux, we need its design technology which + * matters the bitstream generation */ + std::vector driver_switches = + get_rr_graph_driver_switches(rr_graph, cur_rr_node); + VTR_ASSERT(1 == driver_switches.size()); + CircuitModelId mux_model = + device_annotation.rr_switch_circuit_model(driver_switches[0]); + /* Cache input and output nets */ std::vector input_nets; ClusterNetId output_net = routing_annotation.rr_node_net(cur_rr_node); @@ -78,21 +87,34 @@ static void build_switch_block_mux_bitstream( break; } } - } + } else if (false == circuit_lib.mux_add_const_input(mux_model) && prefer_unused_mux_input) { + /* If + * 1. output net is INVALID (unmapped) + * 2. and we don't have a constant input, + * 3. and the prefer_unused_mux_input flag is on, + * then find an unmapped input and connect it to the output net */ + for (size_t inode = 0; inode < drive_rr_nodes.size(); inode++) { + if (input_nets[inode] == ClusterNetId::INVALID()) { + path_id = inode; + break; + } + } + /* Warn if all inputs were mapped */ + if (path_id == DEFAULT_PATH_ID) { + VTR_LOGV_WARN(verbose, + "At RRNodeId = %d: output is unmapped but all inputs are mapped?", cur_rr_node); + } + /* If the first input was already unmapped, set path id to default (for compatibility purposes) */ + if (path_id == 0) { + path_id = DEFAULT_PATH_ID; + } + } /* Keep default path id if output is unmapped but somehow all inputs are mapped */ /* Ensure that our path id makes sense! */ VTR_ASSERT( (DEFAULT_PATH_ID == path_id) || ((DEFAULT_PATH_ID < path_id) && (path_id < (int)datapath_mux_size))); - /* Find the circuit model id of the mux, we need its design technology which - * matters the bitstream generation */ - std::vector driver_switches = - get_rr_graph_driver_switches(rr_graph, cur_rr_node); - VTR_ASSERT(1 == driver_switches.size()); - CircuitModelId mux_model = - device_annotation.rr_switch_circuit_model(driver_switches[0]); - /* Generate bitstream depend on both technology and structure of this MUX */ std::vector mux_bitstream = build_mux_bitstream( circuit_lib, mux_model, mux_lib, datapath_mux_size, path_id); @@ -164,7 +186,8 @@ static void build_switch_block_interc_bitstream( const RRGraphView& rr_graph, const AtomContext& atom_ctx, const VprDeviceAnnotation& device_annotation, const VprRoutingAnnotation& routing_annotation, const RRGSB& rr_gsb, - const e_side& chan_side, const size_t& chan_node_id, const bool& verbose) { + const e_side& chan_side, const size_t& chan_node_id, + const bool& prefer_unused_mux_input, const bool& verbose) { std::vector driver_rr_nodes; /* Get the node */ @@ -200,7 +223,7 @@ static void build_switch_block_interc_bitstream( build_switch_block_mux_bitstream( bitstream_manager, mux_mem_block, module_manager, module_name_map, circuit_lib, mux_lib, rr_graph, cur_rr_node, driver_rr_nodes, atom_ctx, - device_annotation, routing_annotation, verbose); + device_annotation, routing_annotation, prefer_unused_mux_input, verbose); } /*Nothing should be done else*/ } @@ -221,7 +244,7 @@ static void build_switch_block_bitstream( const CircuitLibrary& circuit_lib, const MuxLibrary& mux_lib, const AtomContext& atom_ctx, const VprDeviceAnnotation& device_annotation, const VprRoutingAnnotation& routing_annotation, const RRGraphView& rr_graph, - const RRGSB& rr_gsb, const bool& verbose) { + const RRGSB& rr_gsb, const bool& prefer_unused_mux_input, const bool& verbose) { /* Iterate over all the multiplexers */ for (size_t side = 0; side < rr_gsb.get_num_sides(); ++side) { SideManager side_manager(side); @@ -239,7 +262,7 @@ static void build_switch_block_bitstream( build_switch_block_interc_bitstream( bitstream_manager, sb_config_block, module_manager, module_name_map, circuit_lib, mux_lib, rr_graph, atom_ctx, device_annotation, - routing_annotation, rr_gsb, side_manager.get_side(), itrack, verbose); + routing_annotation, rr_gsb, side_manager.get_side(), itrack, prefer_unused_mux_input, verbose); } } } @@ -258,13 +281,21 @@ static void build_connection_block_mux_bitstream( const AtomContext& atom_ctx, const VprDeviceAnnotation& device_annotation, const VprRoutingAnnotation& routing_annotation, const RRGraphView& rr_graph, const RRGSB& rr_gsb, const e_side& cb_ipin_side, const size_t& ipin_index, - const bool& verbose) { + const bool& prefer_unused_mux_input, const bool& verbose) { RRNodeId src_rr_node = rr_gsb.get_ipin_node(cb_ipin_side, ipin_index); /* Find drive_rr_nodes*/ std::vector driver_rr_edges = rr_gsb.get_ipin_node_in_edges(rr_graph, cb_ipin_side, ipin_index); size_t datapath_mux_size = driver_rr_edges.size(); + /* Find the circuit model id of the mux, we need its design technology which + * matters the bitstream generation */ + std::vector driver_switches = + get_rr_graph_driver_switches(rr_graph, src_rr_node); + VTR_ASSERT(1 == driver_switches.size()); + CircuitModelId mux_model = + device_annotation.rr_switch_circuit_model(driver_switches[0]); + /* Cache input and output nets */ std::vector input_nets; ClusterNetId output_net = routing_annotation.rr_node_net(src_rr_node); @@ -295,21 +326,35 @@ static void build_connection_block_mux_bitstream( } edge_index++; } - } + } else if (false == circuit_lib.mux_add_const_input(mux_model) && prefer_unused_mux_input){ + /* If + * 1. output net is INVALID (unmapped) + * 2. and we don't have a constant input, + * 3. and the prefer_unused_mux_input flag is on, + * then find an unmapped input and connect it to the output net */ + for (int iedge = driver_rr_edges.size() - 1; iedge >= 0; --iedge){ + RREdgeId edge = driver_rr_edges[iedge]; + RRNodeId driver_node = rr_graph.edge_src_node(edge); + if (routing_annotation.rr_node_net(driver_node) == ClusterNetId::INVALID()){ + path_id = iedge; + break; + } + } + /* Warn if all inputs are mapped */ + if(path_id == -1){ + VTR_LOG_ERROR("At RRNodeId = %d: output is unmapped but all inputs are mapped?", src_rr_node); + } + /* If the last input was already unmapped, set path id to default (for compatibility purposes) */ + if((size_t)path_id == driver_rr_edges.size() - 1){ + path_id = DEFAULT_PATH_ID; + } + } /* Keep default path id if output is unmapped but somehow all inputs are mapped */ /* Ensure that our path id makes sense! */ VTR_ASSERT( (DEFAULT_PATH_ID == path_id) || ((DEFAULT_PATH_ID < path_id) && (path_id < (int)datapath_mux_size))); - /* Find the circuit model id of the mux, we need its design technology which - * matters the bitstream generation */ - std::vector driver_switches = - get_rr_graph_driver_switches(rr_graph, src_rr_node); - VTR_ASSERT(1 == driver_switches.size()); - CircuitModelId mux_model = - device_annotation.rr_switch_circuit_model(driver_switches[0]); - /* Generate bitstream depend on both technology and structure of this MUX */ std::vector mux_bitstream = build_mux_bitstream( circuit_lib, mux_model, mux_lib, datapath_mux_size, path_id); @@ -382,7 +427,7 @@ static void build_connection_block_interc_bitstream( const AtomContext& atom_ctx, const VprDeviceAnnotation& device_annotation, const VprRoutingAnnotation& routing_annotation, const RRGraphView& rr_graph, const RRGSB& rr_gsb, const e_side& cb_ipin_side, const size_t& ipin_index, - const bool& verbose) { + const bool& prefer_unused_mux_input, const bool& verbose) { RRNodeId src_rr_node = rr_gsb.get_ipin_node(cb_ipin_side, ipin_index); VTR_LOGV(verbose, "\tGenerating bitstream for IPIN '%lu'. Details: %s\n", @@ -414,7 +459,7 @@ static void build_connection_block_interc_bitstream( build_connection_block_mux_bitstream( bitstream_manager, mux_mem_block, module_manager, module_name_map, circuit_lib, mux_lib, atom_ctx, device_annotation, routing_annotation, - rr_graph, rr_gsb, cb_ipin_side, ipin_index, verbose); + rr_graph, rr_gsb, cb_ipin_side, ipin_index, prefer_unused_mux_input, verbose); } /*Nothing should be done else*/ } @@ -436,7 +481,8 @@ static void build_connection_block_bitstream( const CircuitLibrary& circuit_lib, const MuxLibrary& mux_lib, const AtomContext& atom_ctx, const VprDeviceAnnotation& device_annotation, const VprRoutingAnnotation& routing_annotation, const RRGraphView& rr_graph, - const RRGSB& rr_gsb, const t_rr_type& cb_type, const bool& verbose) { + const RRGSB& rr_gsb, const t_rr_type& cb_type, + const bool& prefer_unused_mux_input, const bool& verbose) { /* Find routing multiplexers on the sides of a Connection block where IPIN * nodes locate */ std::vector cb_sides = rr_gsb.get_cb_ipin_sides(cb_type); @@ -451,7 +497,8 @@ static void build_connection_block_bitstream( build_connection_block_interc_bitstream( bitstream_manager, cb_configurable_block, module_manager, module_name_map, circuit_lib, mux_lib, atom_ctx, device_annotation, - routing_annotation, rr_graph, rr_gsb, cb_ipin_side, inode, verbose); + routing_annotation, rr_graph, rr_gsb, cb_ipin_side, inode, + prefer_unused_mux_input, verbose); } } } @@ -467,8 +514,9 @@ static void build_connection_block_bitstreams( const MuxLibrary& mux_lib, const AtomContext& atom_ctx, const VprDeviceAnnotation& device_annotation, const VprRoutingAnnotation& routing_annotation, const RRGraphView& rr_graph, - const DeviceRRGSB& device_rr_gsb, const bool& compact_routing_hierarchy, - const t_rr_type& cb_type, const bool& verbose) { + const DeviceRRGSB& device_rr_gsb, bool compact_routing_hierarchy, + const t_rr_type& cb_type, const bool& prefer_unused_mux_input, + const bool& verbose) { vtr::Point cb_range = device_rr_gsb.get_gsb_range(); for (size_t ix = 0; ix < cb_range.x(); ++ix) { @@ -592,7 +640,8 @@ static void build_connection_block_bitstreams( build_connection_block_bitstream( bitstream_manager, cb_configurable_block, module_manager, module_name_map, circuit_lib, mux_lib, atom_ctx, device_annotation, - routing_annotation, rr_graph, rr_gsb, cb_type, verbose); + routing_annotation, rr_graph, rr_gsb, cb_type, + prefer_unused_mux_input, verbose); VTR_LOGV(verbose, "\tDone\n"); } @@ -614,7 +663,7 @@ void build_routing_bitstream( const VprDeviceAnnotation& device_annotation, const VprRoutingAnnotation& routing_annotation, const RRGraphView& rr_graph, const DeviceRRGSB& device_rr_gsb, const bool& compact_routing_hierarchy, - const bool& verbose) { + const bool& prefer_unused_mux_input, const bool& verbose) { /* Generate bitstream for each switch blocks * To organize the bitstream in blocks, we create a block for each switch * block and give names which are same as they are in top-level module @@ -724,7 +773,7 @@ void build_routing_bitstream( build_switch_block_bitstream( bitstream_manager, sb_configurable_block, module_manager, module_name_map, circuit_lib, mux_lib, atom_ctx, device_annotation, - routing_annotation, rr_graph, rr_gsb, verbose); + routing_annotation, rr_graph, rr_gsb, prefer_unused_mux_input, verbose); VTR_LOGV(verbose, "\tDone\n"); } @@ -742,7 +791,7 @@ void build_routing_bitstream( bitstream_manager, top_configurable_block, module_manager, module_name_map, fabric_tile, circuit_lib, mux_lib, atom_ctx, device_annotation, routing_annotation, rr_graph, device_rr_gsb, compact_routing_hierarchy, - CHANX, verbose); + CHANX, prefer_unused_mux_input, verbose); VTR_LOG("Done\n"); VTR_LOG("Generating bitstream for Y-direction Connection blocks ..."); @@ -751,7 +800,7 @@ void build_routing_bitstream( bitstream_manager, top_configurable_block, module_manager, module_name_map, fabric_tile, circuit_lib, mux_lib, atom_ctx, device_annotation, routing_annotation, rr_graph, device_rr_gsb, compact_routing_hierarchy, - CHANY, verbose); + CHANY, prefer_unused_mux_input, verbose); VTR_LOG("Done\n"); } diff --git a/openfpga/src/fpga_bitstream/build_routing_bitstream.h b/openfpga/src/fpga_bitstream/build_routing_bitstream.h index c3af671bd8..9f4683fc7b 100644 --- a/openfpga/src/fpga_bitstream/build_routing_bitstream.h +++ b/openfpga/src/fpga_bitstream/build_routing_bitstream.h @@ -36,6 +36,7 @@ void build_routing_bitstream( const VprDeviceAnnotation& device_annotation, const VprRoutingAnnotation& routing_annotation, const RRGraphView& rr_graph, const DeviceRRGSB& device_rr_gsb, const bool& compact_routing_hierarchy, + const bool& prefer_unused_mux_input, const bool& verbose); } /* end namespace openfpga */