Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

sw: Add SD write support and flashing utility #45

Open
wants to merge 13 commits into
base: main
Choose a base branch
from
Open
3 changes: 2 additions & 1 deletion hw/bootrom/cheshire_bootrom.S
Original file line number Diff line number Diff line change
Expand Up @@ -119,10 +119,11 @@ _boot:
// If main returns, we end up here
.global _exit
_exit:
// Save the return value to scratch register 2 and wait forever
// Save the return value to scratch register 2, try `ebreak`, then wait forever
slli a0, a0, 1
ori a0, a0, 1
la t0, __base_regs
sw a0, 8(t0) // regs.SCRATCH[2]
ebreak
1: wfi
j 1b
3 changes: 1 addition & 2 deletions hw/bootrom/cheshire_bootrom.c
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,12 @@
//
// Nicole Narr <[email protected]>
// Christopher Reinwardt <[email protected]>
// Paul Scheffler <paulsc@student.ethz.ch>
// Paul Scheffler <paulsc@iis.ee.ethz.ch>

#include <stdint.h>
#include "util.h"
#include "params.h"
#include "regs/cheshire.h"
#include "regs/serial_link.h"
#include "spi_host_regs.h"
#include "dif/clint.h"
#include "hal/i2c_24fc1025.h"
Expand Down
3,496 changes: 1,748 additions & 1,748 deletions hw/bootrom/cheshire_bootrom.sv

Large diffs are not rendered by default.

96 changes: 96 additions & 0 deletions sw/boot/flash.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
// Copyright 2023 ETH Zurich and University of Bologna.
// Licensed under the Apache License, Version 2.0, see LICENSE for details.
// SPDX-License-Identifier: Apache-2.0
//
// Paul Scheffler <[email protected]>
//
// Boot disk flasher for Cheshire; writes a contiguous disk segment to a boot target disk.
// This program can be preloaded and invoked repeatedly to write multiple segments.

#include <stdint.h>
#include "util.h"
#include "params.h"
#include "regs/cheshire.h"
#include "spi_host_regs.h"
#include "dif/clint.h"
#include "hal/i2c_24fc1025.h"
#include "hal/spi_s25fs512s.h"
#include "hal/spi_sdcard.h"
#include "hal/uart_debug.h"
#include "gpt.h"
#include "printf.h"

int flash_spi_sdcard(uint64_t core_freq, uint64_t rtc_freq, void *img_base, uint64_t sector,
uint64_t len) {
// Initialize device handle
spi_sdcard_t device = {
.spi_freq = 24 * 1000 * 1000, // 24MHz (maximum is 25MHz)
.csid = 0,
.csid_dummy = SPI_HOST_PARAM_NUM_C_S - 1 // Last physical CS is designated dummy
};
CHECK_CALL(spi_sdcard_init(&device, core_freq))
// Wait for device to be initialized (1ms, round up extra tick to be sure)
clint_spin_until((1000 * rtc_freq) / (1000 * 1000) + 1);
// Write sectors
return spi_sdcard_write_blocks(&device, img_base, sector, len, 1);
}

int flash_spi_s25fs512s(uint64_t core_freq, uint64_t rtc_freq, void *img_base, uint64_t sector,
uint64_t len) {
// Initialize device handle
spi_s25fs512s_t device = {
.spi_freq = MIN(40 * 1000 * 1000, core_freq / 4), // Up to quarter core freq or 40MHz
.csid = 1};
CHECK_CALL(spi_s25fs512s_init(&device, core_freq))
// Wait for device to be initialized (t_PU = 300us, round up extra tick to be sure)
clint_spin_until((350 * rtc_freq) / (1000 * 1000) + 1);
// Write sectors
return spi_s25fs512s_single_flash(&device, img_base, sector, len);
}

int flash_i2c_24fc1025(uint64_t core_freq, void *img_base, uint64_t sector, uint64_t len) {
// Initialize device handle
dif_i2c_t i2c;
CHECK_CALL(i2c_24fc1025_init(&i2c, core_freq))
// Write sectors
return i2c_24fc1025_write(&i2c, img_base, sector, 512 * len);
}

int main() {
int ret;
// Read reference frequency and compute core frequency
uint32_t rtc_freq = *reg32(&__base_regs, CHESHIRE_RTC_FREQ_REG_OFFSET);
uint64_t core_freq = clint_get_core_freq(rtc_freq, 2500);
// Get arguments from scratch registers
volatile uint32_t *scratch = reg32(&__base_regs, CHESHIRE_SCRATCH_0_REG_OFFSET);
uint64_t target = scratch[0];
void *img_base = (void *)(uintptr_t)scratch[1];
uint64_t sector = scratch[2];
uint64_t len = scratch[3];
// Flash chosen disk
printf("[FLASH] Write buffer at 0x%x of length %d to target %d, sector %d ... ", img_base, len,
target, sector);
switch (target) {
case 1: {
ret = flash_spi_sdcard(core_freq, rtc_freq, img_base, sector, len);
break;
}
case 2: {
ret = flash_spi_s25fs512s(core_freq, rtc_freq, img_base, sector, len);
break;
}
case 3: {
ret = flash_i2c_24fc1025(core_freq, img_base, sector, len);
break;
}
default: {
ret = -1;
break;
}
}
if (ret)
printf("ERROR (%d)\r\n", ret);
else
printf("OK\r\n");
return ret;
}
8 changes: 6 additions & 2 deletions sw/include/hal/spi_sdcard.h
Original file line number Diff line number Diff line change
Expand Up @@ -23,10 +23,10 @@ typedef struct {
static const uint64_t __spi_sdcard_init_clock = 200000;

// How many cycles to wait for a non-yielding R1b response
static const uint64_t __spi_sdcard_r1b_timeout = 10000;
static const uint64_t __spi_sdcard_r1b_timeout = 100000;

// How many cycles to wait for another data block
static const uint64_t __spi_sdcard_data_timeout = 10000;
static const uint64_t __spi_sdcard_data_timeout = 100000;

// Sets up only this device; other functions may be used with own setup if requirements are met.
// This assumes the power-up period of 1ms will be elapsed *before* issuing further commands.
Expand All @@ -35,3 +35,7 @@ int spi_sdcard_init(spi_sdcard_t *handle, uint64_t core_freq);
int spi_sdcard_read_checkcrc(void *priv, void *buf, uint64_t addr, uint64_t len);

int spi_sdcard_read_ignorecrc(void *priv, void *buf, uint64_t addr, uint64_t len);

// Transfer whole 512B blocks, aligned on the SD card. CRC must be computed if enabled at the time.
int spi_sdcard_write_blocks(spi_sdcard_t *handle, void *buf, uint64_t block, uint64_t len,
int compute_crc);
63 changes: 51 additions & 12 deletions sw/lib/hal/spi_sdcard.c
Original file line number Diff line number Diff line change
Expand Up @@ -124,8 +124,8 @@ static inline int __spi_sdcard_csfree(spi_sdcard_t *handle) {
static inline int __spi_sdcard_cmd(spi_sdcard_t *handle, uint64_t cmd, spi_sdcard_resp_t resp,
uint8_t *rdata, int csaat) {
uint8_t rdummy;
// Start CS phase by sending 6-byte command
CHECK_CALL(__spi_sdcard_xfer_csaat(handle, NULL, &cmd, 6))
// If a command is provided, Start CS phase by sending 6-byte command or a 1-byte stop tran
if (cmd) CHECK_CALL(__spi_sdcard_xfer_csaat(handle, NULL, &cmd, cmd == 0xFD ? 1 : 6))
// If we sent CMD12, discard stuff byte after command
if ((cmd & 0xFF) == 0x4C) CHECK_CALL(__spi_sdcard_xfer_csaat(handle, &rdummy, NULL, 1))
// Poll bytes until we get a response (i.e. most significant bit low)
Expand Down Expand Up @@ -236,27 +236,27 @@ uint64_t __spi_sdcard_build_cmd(uint8_t opcode, uint32_t arg) {

// Transfer aligned 512B blocks. We write only part of the first & last block using a swap buffer.
// If the requested transfers are aligned, this buffer may be left unallocated (i.e. NULL).
int __spi_sdcard_read_blocks(spi_sdcard_t *handle, void *buf, uint64_t block, uint64_t len,
uint8_t *block_swap, uint64_t first_offs, uint64_t last_len,
int check_crc) {
uint8_t rxdummy = 0xAA;
static int __spi_sdcard_read_blocks(spi_sdcard_t *handle, void *buf, uint64_t block, uint64_t len,
uint8_t *block_swap, uint64_t first_offs, uint64_t last_len,
int check_crc) {
uint8_t rx;
// Check if no transfer
if (len == 0) return 0;
// CMD17 for single block transfer, CMD18 for multiple
uint64_t cmd = __spi_sdcard_build_cmd((len > 1) ? 0x52 : 0x51, block);
// TODO: handle CRC error for prior commands here?
CHECK_CALL(__spi_sdcard_cmd(handle, cmd, kSpiSdcardRespR1, &rxdummy, 1))
// Align target buffer with bock boundaries
CHECK_CALL(__spi_sdcard_cmd(handle, cmd, kSpiSdcardRespR1, &rx, 1))
// Align target buffer with block boundaries
buf -= first_offs;
// Read blocks
for (uint64_t b = 0; b < len; ++b) {
// Poll bytes until we get a token
int timeout = __spi_sdcard_data_timeout;
do CHECK_CALL(__spi_sdcard_xfer_csaat(handle, &rxdummy, NULL, 1))
while (--timeout && rxdummy == 0xFF);
do CHECK_CALL(__spi_sdcard_xfer_csaat(handle, &rx, NULL, 1))
while (--timeout && rx == 0xFF);
if (timeout == 0) return 0x19;
// Quit on unexpected tokens
if (rxdummy != 0xFE) return 0x20;
if (rx != 0xFE) return 0x20;
// Read block in chunks of at most FIFO size
int first_block = (b == 0 && first_offs != 0);
int last_block = (b == len - 1 && last_len != 512);
Expand Down Expand Up @@ -287,7 +287,7 @@ int __spi_sdcard_read_blocks(spi_sdcard_t *handle, void *buf, uint64_t block, ui
}

// Read any alignment abstracted through blocks with or without CRC
int __spi_sdcard_read(void *priv, void *buf, uint64_t addr, uint64_t len, int check_crc) {
static int __spi_sdcard_read(void *priv, void *buf, uint64_t addr, uint64_t len, int check_crc) {
// Allocate swap buffer
uint8_t swap[512];
// Handle block alignment
Expand All @@ -309,3 +309,42 @@ int spi_sdcard_read_checkcrc(void *priv, void *buf, uint64_t addr, uint64_t len)
int spi_sdcard_read_ignorecrc(void *priv, void *buf, uint64_t addr, uint64_t len) {
return __spi_sdcard_read(priv, buf, addr, len, 0);
}

int spi_sdcard_write_blocks(spi_sdcard_t *handle, void *buf, uint64_t block, uint64_t len,
int compute_crc) {
uint8_t rx;
// Check if no transfer
if (len == 0) return 0;
// TODO: in case of CMD25, issue ACMD23 for pre-erase here
// CMD24 for single block transfer, CMD25 for multiple
uint64_t cmd = __spi_sdcard_build_cmd((len > 1) ? 0x59 : 0x58, block);
uint8_t tok = (len > 1) ? 0xFC : 0xFE;
// TODO: handle CRC error for prior commands here?
CHECK_CALL(__spi_sdcard_cmd(handle, cmd, kSpiSdcardRespR1, &rx, 1))
// Insert safety dummy byte
CHECK_CALL(__spi_sdcard_xfer_csaat(handle, NULL, NULL, 1));
// Write blocks
for (uint64_t b = 0; b < len; ++b) {
// Send token
CHECK_CALL(__spi_sdcard_xfer_csaat(handle, NULL, &tok, 1));
// Write block in chunks of at most FIFO size
void *block_src = buf + 512 * b;
for (uint64_t offs = 0; offs < 512; offs += 4 * SPI_HOST_PARAM_RX_DEPTH) {
uint64_t chunk_len = MIN(4 * SPI_HOST_PARAM_RX_DEPTH, 512 - offs);
CHECK_CALL(__spi_sdcard_xfer_csaat(handle, NULL, block_src + offs, chunk_len))
}
// Write CRC16 of block
uint16_t crc = compute_crc ? __spi_sdcard_crc16((uint8_t *)block_src, 512) : 0;
CHECK_CALL(__spi_sdcard_xfer_csaat(handle, NULL, &crc, 2))
// Get and check data response (should be 'data accepted')
CHECK_CALL(__spi_sdcard_xfer_csaat(handle, &rx, NULL, 1))
if ((rx & 0x1F) != 0x05) return 0x22;
// Wait until no longer busy
CHECK_CALL(__spi_sdcard_cmd(handle, 0, kSpiSdcardRespR1b, &rx, 1))
}
// If this is a multi-block transfer, send stop tran to end the transaction and detach
if (len > 1)
return __spi_sdcard_cmd(handle, 0xFD, kSpiSdcardRespR1b, &rx, 0);
else
return __spi_sdcard_csfree(handle);
}
3 changes: 1 addition & 2 deletions sw/tests/helloworld.c
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,7 @@
//
// Nicole Narr <[email protected]>
// Christopher Reinwardt <[email protected]>
//
// Simple payload to test bootmodes
// Paul Scheffler <[email protected]>

#include "regs/cheshire.h"
#include "dif/clint.h"
Expand Down
15 changes: 11 additions & 4 deletions target/xilinx/constraints/genesys2.xdc
Original file line number Diff line number Diff line change
Expand Up @@ -77,11 +77,11 @@ set_output_delay -max -clock $soc_clk [expr { $SOC_TCK * 0.35 }] [get_ports vga*
# SPIM #
########

set_input_delay -min -clock $soc_clk [expr { 0.10 * $SOC_TCK }] [get_ports {sd_d_* sd_cd_i}]
set_input_delay -max -clock $soc_clk [expr { 0.35 * $SOC_TCK }] [get_ports {sd_d_* sd_cd_i}]
set_input_delay -min -clock $soc_clk [expr { 0.10 * $SOC_TCK }] [get_ports {sd_d_* sd_cd_i spih_sd_*}]
set_input_delay -max -clock $soc_clk [expr { 0.35 * $SOC_TCK }] [get_ports {sd_d_* sd_cd_i spih_sd_*}]
# TODO: fix this by raising it back up...
set_output_delay -min -clock $soc_clk [expr { 0.020 * $SOC_TCK }] [get_ports {sd_d_* sd_*_o}]
set_output_delay -max -clock $soc_clk [expr { 0.063 * $SOC_TCK }] [get_ports {sd_d_* sd_*_o}]
set_output_delay -min -clock $soc_clk [expr { 0.020 * $SOC_TCK }] [get_ports {sd_d_* sd_*_o spih_sd_* spih_csb_o}]
set_output_delay -max -clock $soc_clk [expr { 0.063 * $SOC_TCK }] [get_ports {sd_d_* sd_*_o spih_sd_* spih_csb_o}]

#######
# I2C #
Expand Down Expand Up @@ -150,6 +150,13 @@ set_property -dict { PACKAGE_PIN T30 IOSTANDARD LVCMOS33 } [get_ports { sd_d_i
set_property -dict { PACKAGE_PIN AE24 IOSTANDARD LVCMOS33 } [get_ports { sd_reset_o }]; #IO_L12N_T1_MRCC_12 Sch=sd_reset
set_property -dict { PACKAGE_PIN R28 IOSTANDARD LVCMOS33 } [get_ports { sd_sclk_o }]; #IO_L11P_T1_SRCC_14 Sch=sd_sclk

# QSPI
set_property -dict { PACKAGE_PIN U19 IOSTANDARD LVCMOS33 } [get_ports { spih_csb_o }]; #IO_L6P_T0_FCS_B_14 Sch=qspi_csn
set_property -dict { PACKAGE_PIN P24 IOSTANDARD LVCMOS33 } [get_ports { spih_sd_io[0] }]; #IO_L1P_T0_D00_MOSI_14 Sch=qspi_d[0]
set_property -dict { PACKAGE_PIN R25 IOSTANDARD LVCMOS33 } [get_ports { spih_sd_io[1] }]; #IO_L1N_T0_D01_DIN_14 Sch=qspi_d[1]
set_property -dict { PACKAGE_PIN R20 IOSTANDARD LVCMOS33 } [get_ports { spih_sd_io[2] }]; #IO_L2P_T0_D02_14 Sch=qspi_d[2]
set_property -dict { PACKAGE_PIN R21 IOSTANDARD LVCMOS33 } [get_ports { spih_sd_io[3] }]; #IO_L2N_T0_D03_14 Sch=qspi_d[3]

# VGA Connector
set_property -dict { PACKAGE_PIN AH20 IOSTANDARD LVCMOS33 } [get_ports { vga_blue_o[0] }]; #IO_L22N_T3_12 Sch=vga_b[3]
set_property -dict { PACKAGE_PIN AG20 IOSTANDARD LVCMOS33 } [get_ports { vga_blue_o[1] }]; #IO_L22P_T3_12 Sch=vga_b[4]
Expand Down
1 change: 1 addition & 0 deletions target/xilinx/scripts/common.tcl
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
set bpart(genesys2) "digilentinc.com:genesys2:part0:1.1"
set fpart(genesys2) "xc7k325tffg900-2"
set hwdev(genesys2) "xc7k325t_0"
set cfgmp(genesys2) "s25fl256sxxxxxx0-spi-x1_x2_x4"

# vcu128 board params
set bpart(vcu128) "xilinx.com:vcu128:part0:1.0"
Expand Down
1 change: 1 addition & 0 deletions target/xilinx/scripts/util/flash.tcl
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ set hw_cfgmem [get_property PROGRAM.HW_CFGMEM $hw_device]
# Create image for and configure memory depending on board
# TODO: add bitstream flashing for genesys2
switch $board {
genesys2 -
vcu128 {
set mcs ${project_root}/image.mcs
write_cfgmem -force -format mcs -size 256 -interface SPIx4 \
Expand Down
72 changes: 71 additions & 1 deletion target/xilinx/src/cheshire_top_xilinx.sv
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,17 @@ module cheshire_top_xilinx import cheshire_pkg::*; (
output logic [4:0] vga_blue_o,
`endif

`ifdef USE_QSPI
`ifndef USE_STARTUPE3
`ifndef USE_STARTUPE2
// If a STARTUPE2 is present, this is wired there.
output wire qspi_sck_o,
`endif
output wire qspi_csb_o,
inout wire [3:0] qspi_sd_io,
`endif
`endif

`ifdef USE_DDR4
`DDR4_INTF
`endif
Expand Down Expand Up @@ -320,7 +331,66 @@ module cheshire_top_xilinx import cheshire_pkg::*; (
.USRDONETS ( 1'b1 )
);
`else
// TODO: off-chip QSPI interface
`ifdef USE_STARTUPE2
// define output SPI clock locally here in this case
wire spih_sck_o;
(*keep="TRUE"*)
STARTUPE2 #(
.PROG_USR("FALSE"),
.SIM_CCLK_FREQ(0.0)
) i_startupe2 (
.CFGCLK ( ),
.CFGMCLK ( ),
.EOS ( ),
.PREQ ( ),
.CLK ( 1'b0 ),
.GSR ( 1'b0 ),
.GTS ( 1'b0 ),
.KEYCLEARB ( 1'b0 ),
.PACK ( 1'b0 ),
.USRCCLKO ( spih_sck_o ),
.USRCCLKTS ( 1'b0 ),
.USRDONEO ( 1'b0 ),
.USRDONETS ( 1'b0 )
);
`endif
IOBUF #(
.DRIVE ( 12 ),
.IBUF_LOW_PWR ( "FALSE" ),
.IOSTANDARD ( "DEFAULT" ),
.SLEW ( "FAST" )
) i_scl_iobuf (
.O ( ),
.IO ( spih_sck_o ),
.I ( spi_sck_soc ),
.T ( ~spi_sck_en )
);

IOBUF #(
.DRIVE ( 12 ),
.IBUF_LOW_PWR ( "FALSE" ),
.IOSTANDARD ( "DEFAULT" ),
.SLEW ( "FAST" )
) i_scl_iobuf (
.O ( ),
.IO ( spih_csb_o ),
.I ( spi_cs_soc [1] ),
.T ( ~spi_cs_en [1] )
);

for (genvar i = 0; i < 4; ++i) : gen_qspi_iobufs
IOBUF #(
.DRIVE ( 12 ),
.IBUF_LOW_PWR ( "FALSE" ),
.IOSTANDARD ( "DEFAULT" ),
.SLEW ( "FAST" )
) i_scl_iobuf (
.O ( spi_sd_soc_in [i] ),
.IO ( spih_sd_io [i] ),
.I ( spi_sd_soc_out [i] ),
.T ( ~spi_sd_en [i] )
);
end
`endif
`endif

Expand Down
2 changes: 2 additions & 0 deletions target/xilinx/src/phy_definitions.svh
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,8 @@
`define USE_I2C
`define USE_VGA
`define USE_USB
`define USE_STARTUPE2
`define USE_QSPI
`endif

`ifdef TARGET_ZCU102
Expand Down
Loading
Loading