Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add XIP cache maintenance API (fixes #2005) #2013

Merged
merged 4 commits into from
Nov 11, 2024
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions docs/index.h
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@
* \cond hardware_uart \defgroup hardware_uart hardware_uart \endcond
* \cond hardware_vreg \defgroup hardware_vreg hardware_vreg \endcond
* \cond hardware_watchdog \defgroup hardware_watchdog hardware_watchdog \endcond
* \cond hardware_xip_cache \defgroup hardware_xip_cache hardware_xip_cache \endcond
* \cond hardware_xosc \defgroup hardware_xosc hardware_xosc \endcond
* \cond hardware_powman hardware_powman
* \cond hardware_hazard3 hardware_hazard3
Expand Down
1 change: 1 addition & 0 deletions src/cmake/rp2_common.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@ pico_add_subdirectory(rp2_common/hardware_timer)
pico_add_subdirectory(rp2_common/hardware_uart)
pico_add_subdirectory(rp2_common/hardware_vreg)
pico_add_subdirectory(rp2_common/hardware_watchdog)
pico_add_subdirectory(rp2_common/hardware_xip_cache)
pico_add_subdirectory(rp2_common/hardware_xosc)

if (PICO_RP2350 OR PICO_COMBINED_DOCS)
Expand Down
1 change: 1 addition & 0 deletions src/rp2_common/hardware_flash/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
pico_simple_hardware_target(flash)
pico_mirrored_target_link_libraries(hardware_flash INTERFACE pico_bootrom)
pico_mirrored_target_link_libraries(hardware_flash INTERFACE hardware_xip_cache)
6 changes: 6 additions & 0 deletions src/rp2_common/hardware_flash/flash.c
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
#else
#include "hardware/structs/qmi.h"
#endif
#include "hardware/xip_cache.h"

#define FLASH_BLOCK_ERASE_CMD 0xd8

Expand Down Expand Up @@ -84,6 +85,8 @@ void __no_inline_not_in_flash_func(flash_range_erase)(uint32_t flash_offs, size_
rom_flash_flush_cache_fn flash_flush_cache_func = (rom_flash_flush_cache_fn)rom_func_lookup_inline(ROM_FUNC_FLASH_FLUSH_CACHE);
assert(connect_internal_flash_func && flash_exit_xip_func && flash_range_erase_func && flash_flush_cache_func);
flash_init_boot2_copyout();
// Commit any pending writes to external RAM, to avoid losing them in the subsequent flush:
xip_cache_clean_all();

// No flash accesses after this point
__compiler_memory_barrier();
Expand Down Expand Up @@ -112,6 +115,7 @@ void __no_inline_not_in_flash_func(flash_range_program)(uint32_t flash_offs, con
rom_flash_flush_cache_fn flash_flush_cache_func = (rom_flash_flush_cache_fn)rom_func_lookup_inline(ROM_FUNC_FLASH_FLUSH_CACHE);
assert(connect_internal_flash_func && flash_exit_xip_func && flash_range_program_func && flash_flush_cache_func);
flash_init_boot2_copyout();
xip_cache_clean_all();

__compiler_memory_barrier();

Expand Down Expand Up @@ -152,6 +156,8 @@ void __no_inline_not_in_flash_func(flash_do_cmd)(const uint8_t *txbuf, uint8_t *
rom_flash_flush_cache_fn flash_flush_cache_func = (rom_flash_flush_cache_fn)rom_func_lookup_inline(ROM_FUNC_FLASH_FLUSH_CACHE);
assert(connect_internal_flash_func && flash_exit_xip_func && flash_flush_cache_func);
flash_init_boot2_copyout();
xip_cache_clean_all();

__compiler_memory_barrier();
connect_internal_flash_func();
flash_exit_xip_func();
Expand Down
3 changes: 3 additions & 0 deletions src/rp2_common/hardware_xip_cache/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
pico_simple_hardware_target(xip_cache)

pico_mirrored_target_link_libraries(hardware_xip_cache INTERFACE hardware_sync)
210 changes: 210 additions & 0 deletions src/rp2_common/hardware_xip_cache/include/hardware/xip_cache.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,210 @@
/*
* Copyright (c) 2024 Raspberry Pi Ltd.
*
* SPDX-License-Identifier: BSD-3-Clause
*/

#ifndef _HARDWARE_XIP_CACHE_H
#define _HARDWARE_XIP_CACHE_H

#include "pico.h"
#include "hardware/regs/addressmap.h"

/** \file xip_cache.h
* \defgroup hardware_xip_cache hardware_xip_cache
*
* \brief Low-level cache maintenance operations for the XIP cache
*
* These functions apply some maintenance operation to either the entire cache contents, or a range
* of offsets within the downstream address space. Offsets start from 0 (indicating the first byte
* of flash), so pointers should have XIP_BASE subtracted before passing into one of these
* functions.
*
* \if rp2040-specific
* The only valid cache maintenance operation on RP2040 is "invalidate", which tells the cache to
* forget everything it knows about some address. This is necessary after a programming operation,
* because the cache does not automatically know about any serial programming operations performed
* on the external flash device, and could return stale data.
* \endif
*
* \if rp2350-specific
* On RP2350, the three types of operation are:
*
* * Invalidate: tell the cache to forget everything it knows about some address. The next access to
* that address will fetch from downstream memory.
*
* * Clean: if the addressed cache line contains data not yet written to external memory, then write
* that data out now, and mark the line as "clean" (i.e. not containing uncommitted write data)
*
* * Pin: mark an address as always being resident in the cache. This persists until the line is
* invalidated, and can be used to allocate part of the cache for cache-as-SRAM use.
*
* When using both external flash and external RAM (e.g. PSRAM), a simple way to maintain coherence
* over flash programming operations is to:
*
* 1. Clean the entire cache (e.g. using xip_cache_clean_all())
*
* 2. Erase + program the flash using serial SPI commands
*
* 3. Invalidate ("flush") the entire cache (e.g. using xip_cache_invalidate_all())
*
* The invalidate ensures the programming is visible to subsequent reads. The clean ensures that the
* invalidate does not discard any cached PSRAM write data.
*
* \endif
*
*/

// PICO_CONFIG: PARAM_ASSERTIONS_ENABLED_HARDWARE_XIP_CACHE, Enable/disable assertions in the hardware_xip_cache module, type=bool, default=0, group=hardware_xip_cache
#ifndef PARAM_ASSERTIONS_ENABLED_HARDWARE_XIP_CACHE
#define PARAM_ASSERTIONS_ENABLED_HARDWARE_XIP_CACHE 0
#endif

#define XIP_CACHE_LINE_SIZE _u(8)

#define XIP_CACHE_SIZE (_u(16) * _u(1024))

#if PICO_RP2040
#define XIP_CACHE_ADDRESS_SPACE_SIZE (_u(16) * _u(1024) * _u(1024))
#else
#define XIP_CACHE_ADDRESS_SPACE_SIZE (XIP_END - XIP_BASE)
#endif

// A read-only cache never requires cleaning (you can still call the functions, they are just no-ops)
#if PICO_RP2040
#define XIP_CACHE_IS_READ_ONLY 1
#else
#define XIP_CACHE_IS_READ_ONLY 0
#endif

#ifndef __ASSEMBLER__

#ifdef __cplusplus
extern "C" {
#endif

/*! \brief Invalidate the cache for the entire XIP address space
* \ingroup hardware_xip_cache
*
* Invalidation ensures that subsequent reads will fetch data from the downstream memory, rather
* than using (potentially stale) cached data.
*
* This function is faster than calling xip_cache_invalidate_range() for the entire address space,
* because it iterates over cachelines instead of addresses.
*
* @note Any pending write data held in the cache is lost: you can force the cache to commit these
* writes first, by calling xip_cache_clean_all()
*
* @note Unlike flash_flush_cache(), this function affects *only* the cache line state.
* flash_flush_cache() calls a ROM API which can have other effects on some platforms, like
* cleaning up the bootrom's QSPI GPIO setup on RP2040. Prefer this function for general cache
* maintenance use, and prefer flash_flush_cache in sequences of ROM flash API calls.
*/
void xip_cache_invalidate_all(void);

/*! \brief Invalidate a range of offsets within the XIP address space
* \ingroup hardware_xip_cache
*
* \param start_offset The first offset to be invalidated. Offset 0 means the first byte of XIP
* memory (e.g. flash). Pointers must have XIP_BASE subtracted before passing into this function.
* Must be 4-byte-aligned on RP2040. Must be a aligned to the start of a cache line
* (XIP_CACHE_LINE_SIZE) on other platforms.
*
* \param size_bytes The number of bytes to invalidate. Must be a multiple of 4 bytes on RP2040.
* Must be a multiple of XIP_CACHE_LINE_SIZE on other platforms.
*
* Invalidation ensures that subsequent reads will fetch data from the downstream memory, rather
* than using (potentially stale) cached data.

* @note Any pending write data held in the cache is lost: you can force the cache to commit these
* writes first, by calling xip_cache_clean_range() with the same parameters. Generally this is
* not necessary because invalidation is used with flash (write-behind via programming), and
* cleaning is used with PSRAM (writing through the cache).
*
*/
void xip_cache_invalidate_range(uintptr_t start_offset, uintptr_t size_bytes);

#if !XIP_CACHE_IS_READ_ONLY

/*! \brief Clean the cache for the entire XIP address space
* \ingroup hardware_xip_cache
*
* This causes the cache to write out all pending write data to the downstream memory. For example,
* when suspending the system with state retained in external PSRAM, this ensures all data has made
* it out to external PSRAM before powering down.
*
* This function is faster than calling xip_cache_clean_range() for the entire address space,
* because it iterates over cachelines instead of addresses.
*
* \if rp2040-specific
Wren6991 marked this conversation as resolved.
Show resolved Hide resolved
* On RP2040 this is a no-op, as the XIP cache is read-only. This is indicated by the
* XIP_CACHE_IS_READ_ONLY macro.
* \endif
*
* \if rp2350-specific
* On RP2350, due to the workaround applied for RP2350-E11, this function also effectively
* invalidates all cache lines after cleaning them. The next access to each line will miss. Avoid
* this by calling xip_cache_clean_range() which does not suffer this issue.
* \endif
*
*/
void xip_cache_clean_all(void);

/*! \brief Clean a range of offsets within the XIP address space
* \ingroup hardware_xip_cache
*
* This causes the cache to write out pending write data at these offsets to the downstream memory.
*
* \if rp2040-specific
* On RP2040 this is a no-op, as the XIP cache is read-only. This is indicated by the
* XIP_CACHE_IS_READ_ONLY macro.
* \endif
*
* \param start_offset The first offset to be invalidated. Offset 0 means the first byte of XIP
* memory (e.g. flash). Pointers must have XIP_BASE subtracted before passing into this function.
* Must be aligned to the start of a cache line (XIP_CACHE_LINE_SIZE).
*
* \param size_bytes The number of bytes to clean. Must be a multiple of XIP_CACHE_LINE_SIZE.
*/
void xip_cache_clean_range(uintptr_t start_offset, uintptr_t size_bytes);

#else
// Stub these out inline to avoid generating a call to an empty function when they are no-ops
static inline void xip_cache_clean_all(void) {}
static inline void xip_cache_clean_range(uintptr_t start_offset, uintptr_t size_bytes) {
(void)start_offset;
(void)size_bytes;
}
#endif

#if !PICO_RP2040

/*! \brief Pin a range of offsets within the XIP address space
* \ingroup hardware_xip_cache
*
* Pinning a line at an address allocates the line exclusively for use at that address. This means
* that all subsequent accesses to that address will hit the cache, and will not go to downstream
* memory. This persists until one of two things happens:
*
* * The line is invalidated, e.g. via xip_cache_invalidate_all()
*
* * The same line is pinned at a different address (note lines are selected by address modulo
* XIP_CACHE_SIZE)
*
* \param start_offset The first offset to be pinnned. Offset 0 means the first byte of XIP
* memory (e.g. flash). Pointers must have XIP_BASE subtracted before passing into this function.
* Must be aligned to the start of a cache line (XIP_CACHE_LINE_SIZE).
*
* \param size_bytes The number of bytes to pin. Must be a multiple of XIP_CACHE_LINE_SIZE.
*
*/
void xip_cache_pin_range(uintptr_t start_offset, uintptr_t size_bytes);
#endif

#ifdef __cplusplus
}
#endif

#endif // !__ASSEMBLER__

#endif // !_HARDWARE_XIP_CACHE_H
109 changes: 109 additions & 0 deletions src/rp2_common/hardware_xip_cache/xip_cache.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,109 @@
#include "hardware/xip_cache.h"
#include "hardware/structs/xip.h"
// For barrier macros:
#include "hardware/sync.h"

// Implementation-private constants (exporting these would create a compatibility headache as they
// don't exist on all platforms; all of these operations are exposed through APIs anyways)

#if !PICO_RP2040
typedef enum {
XIP_CACHE_INVALIDATE_BY_SET_WAY = 0,
XIP_CACHE_CLEAN_BY_SET_WAY = 1,
XIP_CACHE_INVALIDATE_BY_ADDRESS = 2,
XIP_CACHE_CLEAN_BY_ADDRESS = 3,
XIP_CACHE_PIN_AT_ADDRESS = 7,
XIP_CACHE_OP_MAX = 7
} cache_op_t;
#endif

// Used to ensure subsequent accesses observe the new state of the maintained cache lines
#define __post_maintenance_barrier() do {__dsb(); __isb();} while (0)

// All functions in this file are marked non-flash, even though they themselves may be executed
// safely from flash, because they are likely to be called during a flash programming operation
// (which makes flash execution momentarily unsafe)

__always_inline static void check_xip_offset_range(uintptr_t start_offset, uintptr_t size_bytes) {
// We use offsets, not addresses, for consistency with the flash API. This means the range of
// valid inputs starts at 0.
(void)start_offset;
(void)size_bytes;
valid_params_if(HARDWARE_XIP_CACHE, start_offset <= XIP_CACHE_ADDRESS_SPACE_SIZE);
valid_params_if(HARDWARE_XIP_CACHE, start_offset + size_bytes <= XIP_CACHE_ADDRESS_SPACE_SIZE);
// Check for unsigned wrapping too:
valid_params_if(HARDWARE_XIP_CACHE, start_offset + size_bytes >= start_offset);
}

#if !PICO_RP2040
// Generic code for RP2350-style caches: apply a maintenance operation to a range of offsets
static void __no_inline_not_in_flash_func(xip_cache_maintain)(uintptr_t start_offset, uintptr_t size_bytes, cache_op_t op) {
check_xip_offset_range(start_offset, size_bytes);
valid_params_if(HARDWARE_XIP_CACHE, (start_offset & (XIP_CACHE_LINE_SIZE - 1u)) == 0);
valid_params_if(HARDWARE_XIP_CACHE, (size_bytes & (XIP_CACHE_LINE_SIZE - 1u)) == 0);
valid_params_if(HARDWARE_XIP_CACHE, (uint)op <= (uint)XIP_CACHE_OP_MAX);

uintptr_t end = start_offset + size_bytes;
for (uintptr_t offset = start_offset; offset < end; offset += XIP_CACHE_LINE_SIZE) {
*(io_wo_8 *) (XIP_MAINTENANCE_BASE + offset + (uintptr_t)op) = 0;
}
__post_maintenance_barrier();
}
#endif

void __no_inline_not_in_flash_func(xip_cache_invalidate_all)(void) {
#if PICO_RP2040
xip_ctrl_hw->flush = 1;
// Read back to wait for completion
(void)xip_ctrl_hw->flush;
__post_maintenance_barrier();
#else
xip_cache_maintain(XIP_CACHE_ADDRESS_SPACE_SIZE - XIP_CACHE_SIZE, XIP_CACHE_SIZE, XIP_CACHE_INVALIDATE_BY_SET_WAY);
#endif
}

void __no_inline_not_in_flash_func(xip_cache_invalidate_range)(uintptr_t start_offset, uintptr_t size_bytes) {
#if PICO_RP2040
// Accsses are at intervals of one half cache line (so 4 bytes) because RP2040's cache has two
// valid flags per cache line, and we need to clear both.
check_xip_offset_range(start_offset, size_bytes);
valid_params_if(HARDWARE_XIP_CACHE, (start_offset & 3u) == 0);
valid_params_if(HARDWARE_XIP_CACHE, (size_bytes & 3u) == 0);

uintptr_t end = start_offset + size_bytes;
// On RP2040 you can invalidate a sector (half-line) by writing to its normal cached+allocating address
for (uintptr_t offset = start_offset; offset < end; offset += 4u) {
*(io_wo_32 *)(offset + XIP_BASE) = 0;
}
__post_maintenance_barrier();

#else

xip_cache_maintain(start_offset, size_bytes, XIP_CACHE_INVALIDATE_BY_ADDRESS);

#endif
}

#if !XIP_CACHE_IS_READ_ONLY
void __no_inline_not_in_flash_func(xip_cache_clean_all)(void) {
// Use addresses outside of the downstream QMI address range to work around RP2350-E11; this
// effectively performs a clean+invalidate (except being a no-op on pinned lines) due to the
// erroneous update of the tag. Consequently you will take a miss on the next access to the
// cleaned address.
xip_cache_maintain(XIP_END - XIP_BASE - XIP_CACHE_SIZE, XIP_CACHE_SIZE, XIP_CACHE_CLEAN_BY_SET_WAY);
}
#endif

#if !XIP_CACHE_IS_READ_ONLY
void __no_inline_not_in_flash_func(xip_cache_clean_range)(uintptr_t start_offset, uintptr_t size_bytes) {
xip_cache_maintain(start_offset, size_bytes, XIP_CACHE_CLEAN_BY_ADDRESS);
}
#endif

#if !PICO_RP2040
void __no_inline_not_in_flash_func(xip_cache_pin_range)(uintptr_t start_offset, uintptr_t size_bytes) {
valid_params_if(HARDWARE_XIP_CACHE, size_bytes <= XIP_CACHE_SIZE);
xip_cache_maintain(start_offset, size_bytes, XIP_CACHE_PIN_AT_ADDRESS);
}
#endif

1 change: 1 addition & 0 deletions test/kitchen_sink/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ set(KITCHEN_SINK_LIBS
hardware_uart
hardware_vreg
hardware_watchdog
hardware_xip_cache
hardware_xosc
pico_aon_timer
pico_bit_ops
Expand Down
Loading