Skip to content

Commit

Permalink
[hardware] Make AXI channels stable when valid is asserted
Browse files Browse the repository at this point in the history
The last_o signal in the store unit is still an exception.
This can be solved in the future. However, the AXI cut in the
vlsu should make Ara's VLSU fully compliant.
  • Loading branch information
mp-17 committed Feb 26, 2024
1 parent 0f69b47 commit 70c8ba7
Show file tree
Hide file tree
Showing 2 changed files with 145 additions and 138 deletions.
199 changes: 103 additions & 96 deletions hardware/src/vlsu/addrgen.sv
Original file line number Diff line number Diff line change
Expand Up @@ -556,13 +556,7 @@ module addrgen import ara_pkg::*; import rvv_pkg::*; #(
// implementation we can incur in deadlocks
if (axi_addrgen_queue_empty || (axi_addrgen_req_o.is_load && axi_addrgen_q.is_load) ||
(~axi_addrgen_req_o.is_load && ~axi_addrgen_q.is_load)) begin
// Immediately assert valid_o
if (axi_addrgen_q.is_load) begin
axi_ar_valid_o = 1'b1;
end else begin
axi_aw_valid_o = 1'b1;
end
if (!axi_addrgen_queue_full && axi_ax_ready) begin
if (!axi_addrgen_queue_full) begin
if (axi_addrgen_q.is_burst) begin

/////////////////////////
Expand Down Expand Up @@ -592,6 +586,7 @@ module addrgen import ara_pkg::*; import rvv_pkg::*; #(
burst : BURST_INCR,
default: '0
};
axi_ar_valid_o = 1'b1;
end
// AW Channel
else begin
Expand All @@ -605,54 +600,57 @@ module addrgen import ara_pkg::*; import rvv_pkg::*; #(
burst : BURST_INCR,
default: '0
};
axi_aw_valid_o = 1'b1;
end

// Send this request to the load/store units
axi_addrgen_queue = '{
addr : axi_addrgen_q.addr,
len : burst_length - 1,
size : eff_axi_dw_log_q,
is_load: axi_addrgen_q.is_load
};
axi_addrgen_queue_push = 1'b1;

// Account for the requested operands
axi_addrgen_d.len = axi_addrgen_q.len -
((aligned_end_addr_q[11:0] - axi_addrgen_q.addr[11:0] + 1)
>> int'(axi_addrgen_q.vew));
if (axi_addrgen_q.len <
((aligned_end_addr_q[11:0] - axi_addrgen_q.addr[11:0] + 1)
>> int'(axi_addrgen_q.vew)))
axi_addrgen_d.len = 0;
axi_addrgen_d.addr = aligned_next_start_addr_q;

// Finished generating AXI requests
if (axi_addrgen_d.len == 0) begin
addrgen_req_ready = 1'b1;
axi_addrgen_state_d = AXI_ADDRGEN_IDLE;
end
if (axi_ax_ready) begin
axi_addrgen_queue = '{
addr : axi_addrgen_q.addr,
len : burst_length - 1,
size : eff_axi_dw_log_q,
is_load: axi_addrgen_q.is_load
};
axi_addrgen_queue_push = 1'b1;

// Calculate the addresses for the next iteration
// The start address is found by aligning the original request address by the width of
// the memory interface. In our case, we have it already.
aligned_start_addr_d = axi_addrgen_d.addr;
// The final address can be found similarly.
// How many B we requested? No more than (256 << burst_size)
if (axi_addrgen_d.len << int'(axi_addrgen_q.vew) >= (256 << eff_axi_dw_log_q)) begin
aligned_next_start_addr_d =
aligned_addr(aligned_start_addr_d + (256 << eff_axi_dw_log_q), eff_axi_dw_log_q);
aligned_end_addr_d = aligned_next_start_addr_d - 1;
end else begin
aligned_next_start_addr_d =
aligned_addr(aligned_start_addr_d + (axi_addrgen_d.len << int'(axi_addrgen_q.vew))
- 1, eff_axi_dw_log_q) + eff_axi_dw_q;
aligned_end_addr_d = aligned_next_start_addr_d - 1;
end
// But since AXI requests are aligned in 4 KiB pages, aligned_end_addr must be in the
// same page as aligned_start_addr
if (aligned_start_addr_d[AxiAddrWidth-1:12] != aligned_end_addr_d[AxiAddrWidth-1:12]) begin
aligned_end_addr_d = {aligned_start_addr_d[AxiAddrWidth-1:12], 12'hFFF};
aligned_next_start_addr_d = { next_2page_msb_d, 12'h000};
// Account for the requested operands
axi_addrgen_d.len = axi_addrgen_q.len -
((aligned_end_addr_q[11:0] - axi_addrgen_q.addr[11:0] + 1)
>> int'(axi_addrgen_q.vew));
if (axi_addrgen_q.len <
((aligned_end_addr_q[11:0] - axi_addrgen_q.addr[11:0] + 1)
>> int'(axi_addrgen_q.vew)))
axi_addrgen_d.len = 0;
axi_addrgen_d.addr = aligned_next_start_addr_q;

// Finished generating AXI requests
if (axi_addrgen_d.len == 0) begin
addrgen_req_ready = 1'b1;
axi_addrgen_state_d = AXI_ADDRGEN_IDLE;
end

// Calculate the addresses for the next iteration
// The start address is found by aligning the original request address by the width of
// the memory interface. In our case, we have it already.
aligned_start_addr_d = axi_addrgen_d.addr;
// The final address can be found similarly.
// How many B we requested? No more than (256 << burst_size)
if (axi_addrgen_d.len << int'(axi_addrgen_q.vew) >= (256 << eff_axi_dw_log_q)) begin
aligned_next_start_addr_d =
aligned_addr(aligned_start_addr_d + (256 << eff_axi_dw_log_q), eff_axi_dw_log_q);
aligned_end_addr_d = aligned_next_start_addr_d - 1;
end else begin
aligned_next_start_addr_d =
aligned_addr(aligned_start_addr_d + (axi_addrgen_d.len << int'(axi_addrgen_q.vew))
- 1, eff_axi_dw_log_q) + eff_axi_dw_q;
aligned_end_addr_d = aligned_next_start_addr_d - 1;
end
// But since AXI requests are aligned in 4 KiB pages, aligned_end_addr must be in the
// same page as aligned_start_addr
if (aligned_start_addr_d[AxiAddrWidth-1:12] != aligned_end_addr_d[AxiAddrWidth-1:12]) begin
aligned_end_addr_d = {aligned_start_addr_d[AxiAddrWidth-1:12], 12'hFFF};
aligned_next_start_addr_d = { next_2page_msb_d, 12'h000};
end
end
end else if (state_q != ADDRGEN_IDX_OP) begin

Expand All @@ -670,6 +668,7 @@ module addrgen import ara_pkg::*; import rvv_pkg::*; #(
burst : BURST_INCR,
default: '0
};
axi_ar_valid_o = 1'b1;
end
// AW Channel
else begin
Expand All @@ -681,26 +680,29 @@ module addrgen import ara_pkg::*; import rvv_pkg::*; #(
burst : BURST_INCR,
default: '0
};
axi_aw_valid_o = 1'b1;
end

// Send this request to the load/store units
axi_addrgen_queue = '{
addr : axi_addrgen_q.addr,
size : axi_addrgen_q.vew,
len : 0,
is_load: axi_addrgen_q.is_load
};
axi_addrgen_queue_push = 1'b1;

// Account for the requested operands
axi_addrgen_d.len = axi_addrgen_q.len - 1;
// Calculate the addresses for the next iteration, adding the correct stride
axi_addrgen_d.addr = axi_addrgen_q.addr + axi_addrgen_q.stride;

// Finished generating AXI requests
if (axi_addrgen_d.len == 0) begin
addrgen_req_ready = 1'b1;
axi_addrgen_state_d = AXI_ADDRGEN_IDLE;
if (axi_ax_ready) begin
// Send this request to the load/store units
axi_addrgen_queue = '{
addr : axi_addrgen_q.addr,
size : axi_addrgen_q.vew,
len : 0,
is_load: axi_addrgen_q.is_load
};
axi_addrgen_queue_push = 1'b1;

// Account for the requested operands
axi_addrgen_d.len = axi_addrgen_q.len - 1;
// Calculate the addresses for the next iteration, adding the correct stride
axi_addrgen_d.addr = axi_addrgen_q.addr + axi_addrgen_q.stride;

// Finished generating AXI requests
if (axi_addrgen_d.len == 0) begin
addrgen_req_ready = 1'b1;
axi_addrgen_state_d = AXI_ADDRGEN_IDLE;
end
end
end else begin

Expand All @@ -709,8 +711,6 @@ module addrgen import ara_pkg::*; import rvv_pkg::*; #(
//////////////////////

if (idx_addr_valid_q) begin
// We consumed a word
idx_addr_ready_d = 1'b1;

// AR Channel
if (axi_addrgen_q.is_load) begin
Expand All @@ -722,6 +722,7 @@ module addrgen import ara_pkg::*; import rvv_pkg::*; #(
burst : BURST_INCR,
default: '0
};
axi_ar_valid_o = 1'b1;
end
// AW Channel
else begin
Expand All @@ -733,34 +734,40 @@ module addrgen import ara_pkg::*; import rvv_pkg::*; #(
burst : BURST_INCR,
default: '0
};
axi_aw_valid_o = 1'b1;
end

// Send this request to the load/store units
axi_addrgen_queue = '{
addr : idx_final_addr_q,
size : axi_addrgen_q.vew,
len : 0,
is_load: axi_addrgen_q.is_load
};
axi_addrgen_queue_push = 1'b1;

// Account for the requested operands
axi_addrgen_d.len = axi_addrgen_q.len - 1;

// Check if the address does generate an exception
if (is_addr_error(idx_final_addr_q, axi_addrgen_q.vew)) begin
// Generate an error
idx_op_error_d = 1'b1;
// Forward next vstart info to the dispatcher
addrgen_error_vl_d = addrgen_req.len - axi_addrgen_q.len - 1;
addrgen_req_ready = 1'b1;
axi_addrgen_state_d = AXI_ADDRGEN_IDLE;
end
if (axi_ax_ready) begin
// We consumed a word
idx_addr_ready_d = 1'b1;

// Finished generating AXI requests
if (axi_addrgen_d.len == 0) begin
addrgen_req_ready = 1'b1;
axi_addrgen_state_d = AXI_ADDRGEN_IDLE;
// Send this request to the load/store units
axi_addrgen_queue = '{
addr : idx_final_addr_q,
size : axi_addrgen_q.vew,
len : 0,
is_load: axi_addrgen_q.is_load
};
axi_addrgen_queue_push = 1'b1;

// Account for the requested operands
axi_addrgen_d.len = axi_addrgen_q.len - 1;

// Check if the address does generate an exception
if (is_addr_error(idx_final_addr_q, axi_addrgen_q.vew)) begin
// Generate an error
idx_op_error_d = 1'b1;
// Forward next vstart info to the dispatcher
addrgen_error_vl_d = addrgen_req.len - axi_addrgen_q.len - 1;
addrgen_req_ready = 1'b1;
axi_addrgen_state_d = AXI_ADDRGEN_IDLE;
end

// Finished generating AXI requests
if (axi_addrgen_d.len == 0) begin
addrgen_req_ready = 1'b1;
axi_addrgen_state_d = AXI_ADDRGEN_IDLE;
end
end
end
end
Expand Down
84 changes: 42 additions & 42 deletions hardware/src/vlsu/vstu.sv
Original file line number Diff line number Diff line change
Expand Up @@ -207,53 +207,53 @@ module vstu import ara_pkg::*; import rvv_pkg::*; #(

if (vinsn_issue_valid && &stu_operand_valid && (vinsn_issue_q.vm || (|mask_valid_i)) &&
axi_addrgen_req_valid_i && !axi_addrgen_req_i.is_load) begin
// We have a W beat to send

// Bytes valid in the current W beat
automatic shortint unsigned lower_byte = beat_lower_byte(axi_addrgen_req_i.addr,
axi_addrgen_req_i.size, axi_addrgen_req_i.len, BURST_INCR, AxiDataWidth/8, len_q);
automatic shortint unsigned upper_byte = beat_upper_byte(axi_addrgen_req_i.addr,
axi_addrgen_req_i.size, axi_addrgen_req_i.len, BURST_INCR, AxiDataWidth/8, len_q);

// Account for the issued bytes
// How many bytes are valid in this VRF word
automatic vlen_t vrf_valid_bytes = NrLanes * 8 - vrf_pnt_q;
// How many bytes are valid in this instruction
automatic vlen_t vinsn_valid_bytes = issue_cnt_q - vrf_pnt_q;
// How many bytes are valid in this AXI word
automatic vlen_t axi_valid_bytes = upper_byte - lower_byte + 1;

// How many bytes are we committing?
automatic logic [idx_width(DataWidth*NrLanes/8):0] valid_bytes;
valid_bytes = issue_cnt_q < NrLanes * 8 ? vinsn_valid_bytes : vrf_valid_bytes;
valid_bytes = valid_bytes < axi_valid_bytes ? valid_bytes : axi_valid_bytes;

// Copy data from the operands into the W channel
for (int axi_byte = 0; axi_byte < AxiDataWidth/8; axi_byte++) begin
// Is this byte a valid byte in the W beat?
if (axi_byte >= lower_byte && axi_byte <= upper_byte) begin
// Map axy_byte to the corresponding byte in the VRF word (sequential)
automatic int vrf_seq_byte = axi_byte - lower_byte + vrf_pnt_q;
// And then shuffle it
automatic int vrf_byte = shuffle_index(vrf_seq_byte, NrLanes, vinsn_issue_q.eew_vs1);

// Is this byte a valid byte in the VRF word?
if (vrf_seq_byte < issue_cnt_q) begin
// At which lane, and what is the byte offset in that lane, of the byte vrf_byte?
automatic int vrf_lane = vrf_byte >> 3;
automatic int vrf_offset = vrf_byte[2:0];

// Copy data
axi_w_o.data[8*axi_byte +: 8] = stu_operand[vrf_lane][8*vrf_offset +: 8];
axi_w_o.strb[axi_byte] = vinsn_issue_q.vm || mask_i[vrf_lane][vrf_offset];
end
end
end
// Send the W beat
axi_w_valid_o = 1'b1;

if (axi_w_ready_i) begin
// Bytes valid in the current W beat
automatic shortint unsigned lower_byte = beat_lower_byte(axi_addrgen_req_i.addr,
axi_addrgen_req_i.size, axi_addrgen_req_i.len, BURST_INCR, AxiDataWidth/8, len_q);
automatic shortint unsigned upper_byte = beat_upper_byte(axi_addrgen_req_i.addr,
axi_addrgen_req_i.size, axi_addrgen_req_i.len, BURST_INCR, AxiDataWidth/8, len_q);

// Account for the issued bytes
// How many bytes are valid in this VRF word
automatic vlen_t vrf_valid_bytes = NrLanes * 8 - vrf_pnt_q;
// How many bytes are valid in this instruction
automatic vlen_t vinsn_valid_bytes = issue_cnt_q - vrf_pnt_q;
// How many bytes are valid in this AXI word
automatic vlen_t axi_valid_bytes = upper_byte - lower_byte + 1;

// How many bytes are we committing?
automatic logic [idx_width(DataWidth*NrLanes/8):0] valid_bytes;
valid_bytes = issue_cnt_q < NrLanes * 8 ? vinsn_valid_bytes : vrf_valid_bytes;
valid_bytes = valid_bytes < axi_valid_bytes ? valid_bytes : axi_valid_bytes;

vrf_pnt_d = vrf_pnt_q + valid_bytes;

// Copy data from the operands into the W channel
for (int axi_byte = 0; axi_byte < AxiDataWidth/8; axi_byte++) begin
// Is this byte a valid byte in the W beat?
if (axi_byte >= lower_byte && axi_byte <= upper_byte) begin
// Map axy_byte to the corresponding byte in the VRF word (sequential)
automatic int vrf_seq_byte = axi_byte - lower_byte + vrf_pnt_q;
// And then shuffle it
automatic int vrf_byte = shuffle_index(vrf_seq_byte, NrLanes, vinsn_issue_q.eew_vs1);

// Is this byte a valid byte in the VRF word?
if (vrf_seq_byte < issue_cnt_q) begin
// At which lane, and what is the byte offset in that lane, of the byte vrf_byte?
automatic int vrf_lane = vrf_byte >> 3;
automatic int vrf_offset = vrf_byte[2:0];

// Copy data
axi_w_o.data[8*axi_byte +: 8] = stu_operand[vrf_lane][8*vrf_offset +: 8];
axi_w_o.strb[axi_byte] = vinsn_issue_q.vm || mask_i[vrf_lane][vrf_offset];
end
end
end

// Account for the beat we sent
len_d = len_q + 1;
// We wrote all the beats for this AW burst
Expand Down

0 comments on commit 70c8ba7

Please sign in to comment.