|
| 1 | +// ---------------------------------------------------------------------------- |
| 2 | +// Second stage boot code |
| 3 | +// Copyright (c) 2019-2021 Raspberry Pi (Trading) Ltd. |
| 4 | +// SPDX-License-Identifier: BSD-3-Clause |
| 5 | +// |
| 6 | +// Device: Adesto AT25SF128A |
| 7 | +// Based on W25Q080 code: main difference is the QE bit is being set |
| 8 | +// via command 0x31 |
| 9 | +// |
| 10 | +// Description: Configures AT25SF128A to run in Quad I/O continuous read XIP mode |
| 11 | +// |
| 12 | +// Details: * Check status register 2 to determine if QSPI mode is enabled, |
| 13 | +// and perform an SR2 programming cycle if necessary. |
| 14 | +// * Use SSI to perform a dummy 0xEB read command, with the mode |
| 15 | +// continuation bits set, so that the flash will not require |
| 16 | +// 0xEB instruction prefix on subsequent reads. |
| 17 | +// * Configure SSI to write address, mode bits, but no instruction. |
| 18 | +// SSI + flash are now jointly in a state where continuous reads |
| 19 | +// can take place. |
| 20 | +// * Jump to exit pointer passed in via lr. Bootrom passes null, |
| 21 | +// in which case this code uses a default 256 byte flash offset |
| 22 | +// |
| 23 | +// Building: * This code must be position-independent, and use stack only |
| 24 | +// * The code will be padded to a size of 256 bytes, including a |
| 25 | +// 4-byte checksum. Therefore code size cannot exceed 252 bytes. |
| 26 | +// ---------------------------------------------------------------------------- |
| 27 | + |
| 28 | +#include "pico/asm_helper.S" |
| 29 | +#include "hardware/regs/addressmap.h" |
| 30 | +#include "hardware/regs/ssi.h" |
| 31 | +#include "hardware/regs/pads_qspi.h" |
| 32 | + |
| 33 | +// ---------------------------------------------------------------------------- |
| 34 | +// Config section |
| 35 | +// ---------------------------------------------------------------------------- |
| 36 | +// It should be possible to support most flash devices by modifying this section |
| 37 | + |
| 38 | +// The serial flash interface will run at clk_sys/PICO_FLASH_SPI_CLKDIV. |
| 39 | +// This must be a positive, even integer. |
| 40 | +// The bootrom is very conservative with SPI frequency, but here we should be |
| 41 | +// as aggressive as possible. |
| 42 | + |
| 43 | +#ifndef PICO_FLASH_SPI_CLKDIV |
| 44 | +#define PICO_FLASH_SPI_CLKDIV 4 |
| 45 | +#endif |
| 46 | +#if PICO_FLASH_SPI_CLKDIV & 1 |
| 47 | +#error PICO_FLASH_SPI_CLKDIV must be even |
| 48 | +#endif |
| 49 | + |
| 50 | +// Define interface width: single/dual/quad IO |
| 51 | +#define FRAME_FORMAT SSI_CTRLR0_SPI_FRF_VALUE_QUAD |
| 52 | + |
| 53 | +// For W25Q080 this is the "Read data fast quad IO" instruction: |
| 54 | +#define CMD_READ 0xeb |
| 55 | + |
| 56 | +// "Mode bits" are 8 special bits sent immediately after |
| 57 | +// the address bits in a "Read Data Fast Quad I/O" command sequence. |
| 58 | +// On W25Q080, the four LSBs are don't care, and if MSBs == 0xa, the |
| 59 | +// next read does not require the 0xeb instruction prefix. |
| 60 | +#define MODE_CONTINUOUS_READ 0x20 |
| 61 | + |
| 62 | +// The number of address + mode bits, divided by 4 (always 4, not function of |
| 63 | +// interface width). |
| 64 | +#define ADDR_L 8 |
| 65 | + |
| 66 | +// How many clocks of Hi-Z following the mode bits. For W25Q080, 4 dummy cycles |
| 67 | +// are required. |
| 68 | +#define WAIT_CYCLES 4 |
| 69 | + |
| 70 | +// If defined, we will read status reg, compare to SREG_DATA, and overwrite |
| 71 | +// with our value if the SR doesn't match. |
| 72 | +// We do a two-byte write to SR1 (01h cmd) rather than a one-byte write to |
| 73 | +// SR2 (31h cmd) as the latter command isn't supported by WX25Q080. |
| 74 | +// This isn't great because it will remove block protections. |
| 75 | +// A better solution is to use a volatile SR write if your device supports it. |
| 76 | +#define PROGRAM_STATUS_REG |
| 77 | + |
| 78 | +#define CMD_WRITE_ENABLE 0x06 |
| 79 | +#define CMD_READ_STATUS 0x05 |
| 80 | +#define CMD_READ_STATUS2 0x35 |
| 81 | +#define CMD_WRITE_STATUS 0x01 |
| 82 | +#define CMD_WRITE_STATUS2 0x31 |
| 83 | +#define SREG_DATA 0x02 // Enable quad-SPI mode |
| 84 | + |
| 85 | +// ---------------------------------------------------------------------------- |
| 86 | +// Start of 2nd Stage Boot Code |
| 87 | +// ---------------------------------------------------------------------------- |
| 88 | + |
| 89 | +.syntax unified |
| 90 | +.cpu cortex-m0plus |
| 91 | +.thumb |
| 92 | + |
| 93 | +.section .text |
| 94 | + |
| 95 | +// The exit point is passed in lr. If entered from bootrom, this will be the |
| 96 | +// flash address immediately following this second stage (0x10000100). |
| 97 | +// Otherwise it will be a return address -- second stage being called as a |
| 98 | +// function by user code, after copying out of XIP region. r3 holds SSI base, |
| 99 | +// r0...2 used as temporaries. Other GPRs not used. |
| 100 | +.global _stage2_boot |
| 101 | +.type _stage2_boot,%function |
| 102 | +.thumb_func |
| 103 | +_stage2_boot: |
| 104 | + push {lr} |
| 105 | + |
| 106 | + // Set pad configuration: |
| 107 | + // - SCLK 8mA drive, no slew limiting |
| 108 | + // - SDx disable input Schmitt to reduce delay |
| 109 | + |
| 110 | + ldr r3, =PADS_QSPI_BASE |
| 111 | + movs r0, #(2 << PADS_QSPI_GPIO_QSPI_SCLK_DRIVE_LSB | PADS_QSPI_GPIO_QSPI_SCLK_SLEWFAST_BITS) |
| 112 | + str r0, [r3, #PADS_QSPI_GPIO_QSPI_SCLK_OFFSET] |
| 113 | + ldr r0, [r3, #PADS_QSPI_GPIO_QSPI_SD0_OFFSET] |
| 114 | + movs r1, #PADS_QSPI_GPIO_QSPI_SD0_SCHMITT_BITS |
| 115 | + bics r0, r1 |
| 116 | + str r0, [r3, #PADS_QSPI_GPIO_QSPI_SD0_OFFSET] |
| 117 | + str r0, [r3, #PADS_QSPI_GPIO_QSPI_SD1_OFFSET] |
| 118 | + str r0, [r3, #PADS_QSPI_GPIO_QSPI_SD2_OFFSET] |
| 119 | + str r0, [r3, #PADS_QSPI_GPIO_QSPI_SD3_OFFSET] |
| 120 | + |
| 121 | + ldr r3, =XIP_SSI_BASE |
| 122 | + |
| 123 | + // Disable SSI to allow further config |
| 124 | + movs r1, #0 |
| 125 | + str r1, [r3, #SSI_SSIENR_OFFSET] |
| 126 | + |
| 127 | + // Set baud rate |
| 128 | + movs r1, #PICO_FLASH_SPI_CLKDIV |
| 129 | + str r1, [r3, #SSI_BAUDR_OFFSET] |
| 130 | + |
| 131 | + // Set 1-cycle sample delay. If PICO_FLASH_SPI_CLKDIV == 2 then this means, |
| 132 | + // if the flash launches data on SCLK posedge, we capture it at the time that |
| 133 | + // the next SCLK posedge is launched. This is shortly before that posedge |
| 134 | + // arrives at the flash, so data hold time should be ok. For |
| 135 | + // PICO_FLASH_SPI_CLKDIV > 2 this pretty much has no effect. |
| 136 | + |
| 137 | + movs r1, #1 |
| 138 | + movs r2, #SSI_RX_SAMPLE_DLY_OFFSET // == 0xf0 so need 8 bits of offset significance |
| 139 | + str r1, [r3, r2] |
| 140 | + |
| 141 | + |
| 142 | +// On QSPI parts we usually need a 01h SR-write command to enable QSPI mode |
| 143 | +// (i.e. turn WPn and HOLDn into IO2/IO3) |
| 144 | +#ifdef PROGRAM_STATUS_REG |
| 145 | +program_sregs: |
| 146 | +#define CTRL0_SPI_TXRX \ |
| 147 | + (7 << SSI_CTRLR0_DFS_32_LSB) | /* 8 bits per data frame */ \ |
| 148 | + (SSI_CTRLR0_TMOD_VALUE_TX_AND_RX << SSI_CTRLR0_TMOD_LSB) |
| 149 | + |
| 150 | + ldr r1, =(CTRL0_SPI_TXRX) |
| 151 | + str r1, [r3, #SSI_CTRLR0_OFFSET] |
| 152 | + |
| 153 | + // Enable SSI and select slave 0 |
| 154 | + movs r1, #1 |
| 155 | + str r1, [r3, #SSI_SSIENR_OFFSET] |
| 156 | + |
| 157 | + // Check whether SR needs updating |
| 158 | + movs r0, #CMD_READ_STATUS2 |
| 159 | + bl read_flash_sreg |
| 160 | + movs r2, #SREG_DATA |
| 161 | + cmp r0, r2 |
| 162 | + beq skip_sreg_programming |
| 163 | + |
| 164 | + // Send write enable command |
| 165 | + movs r1, #CMD_WRITE_ENABLE |
| 166 | + str r1, [r3, #SSI_DR0_OFFSET] |
| 167 | + |
| 168 | + // Poll for completion and discard RX |
| 169 | + bl wait_ssi_ready |
| 170 | + ldr r1, [r3, #SSI_DR0_OFFSET] |
| 171 | + |
| 172 | + // Send status write command followed by data bytes |
| 173 | + movs r1, #CMD_WRITE_STATUS2 |
| 174 | + str r1, [r3, #SSI_DR0_OFFSET] |
| 175 | + str r2, [r3, #SSI_DR0_OFFSET] |
| 176 | + |
| 177 | + bl wait_ssi_ready |
| 178 | + ldr r1, [r3, #SSI_DR0_OFFSET] |
| 179 | + ldr r1, [r3, #SSI_DR0_OFFSET] |
| 180 | + ldr r1, [r3, #SSI_DR0_OFFSET] |
| 181 | + |
| 182 | + // Poll status register for write completion |
| 183 | +1: |
| 184 | + movs r0, #CMD_READ_STATUS |
| 185 | + bl read_flash_sreg |
| 186 | + movs r1, #1 |
| 187 | + tst r0, r1 |
| 188 | + bne 1b |
| 189 | + |
| 190 | +skip_sreg_programming: |
| 191 | + |
| 192 | + // Disable SSI again so that it can be reconfigured |
| 193 | + movs r1, #0 |
| 194 | + str r1, [r3, #SSI_SSIENR_OFFSET] |
| 195 | +#endif |
| 196 | + |
| 197 | +// Currently the flash expects an 8 bit serial command prefix on every |
| 198 | +// transfer, which is a waste of cycles. Perform a dummy Fast Read Quad I/O |
| 199 | +// command, with mode bits set such that the flash will not expect a serial |
| 200 | +// command prefix on *subsequent* transfers. We don't care about the results |
| 201 | +// of the read, the important part is the mode bits. |
| 202 | + |
| 203 | +dummy_read: |
| 204 | +#define CTRLR0_ENTER_XIP \ |
| 205 | + (FRAME_FORMAT /* Quad I/O mode */ \ |
| 206 | + << SSI_CTRLR0_SPI_FRF_LSB) | \ |
| 207 | + (31 << SSI_CTRLR0_DFS_32_LSB) | /* 32 data bits */ \ |
| 208 | + (SSI_CTRLR0_TMOD_VALUE_EEPROM_READ /* Send INST/ADDR, Receive Data */ \ |
| 209 | + << SSI_CTRLR0_TMOD_LSB) |
| 210 | + |
| 211 | + ldr r1, =(CTRLR0_ENTER_XIP) |
| 212 | + str r1, [r3, #SSI_CTRLR0_OFFSET] |
| 213 | + |
| 214 | + movs r1, #0x0 // NDF=0 (single 32b read) |
| 215 | + str r1, [r3, #SSI_CTRLR1_OFFSET] |
| 216 | + |
| 217 | +#define SPI_CTRLR0_ENTER_XIP \ |
| 218 | + (ADDR_L << SSI_SPI_CTRLR0_ADDR_L_LSB) | /* Address + mode bits */ \ |
| 219 | + (WAIT_CYCLES << SSI_SPI_CTRLR0_WAIT_CYCLES_LSB) | /* Hi-Z dummy clocks following address + mode */ \ |
| 220 | + (SSI_SPI_CTRLR0_INST_L_VALUE_8B \ |
| 221 | + << SSI_SPI_CTRLR0_INST_L_LSB) | /* 8-bit instruction */ \ |
| 222 | + (SSI_SPI_CTRLR0_TRANS_TYPE_VALUE_1C2A /* Send Command in serial mode then address in Quad I/O mode */ \ |
| 223 | + << SSI_SPI_CTRLR0_TRANS_TYPE_LSB) |
| 224 | + |
| 225 | + ldr r1, =(SPI_CTRLR0_ENTER_XIP) |
| 226 | + ldr r0, =(XIP_SSI_BASE + SSI_SPI_CTRLR0_OFFSET) // SPI_CTRL0 Register |
| 227 | + str r1, [r0] |
| 228 | + |
| 229 | + movs r1, #1 // Re-enable SSI |
| 230 | + str r1, [r3, #SSI_SSIENR_OFFSET] |
| 231 | + |
| 232 | + movs r1, #CMD_READ |
| 233 | + str r1, [r3, #SSI_DR0_OFFSET] // Push SPI command into TX FIFO |
| 234 | + movs r1, #MODE_CONTINUOUS_READ // 32-bit: 24 address bits (we don't care, so 0) and M[7:4]=1010 |
| 235 | + str r1, [r3, #SSI_DR0_OFFSET] // Push Address into TX FIFO - this will trigger the transaction |
| 236 | + |
| 237 | + // Poll for completion |
| 238 | + bl wait_ssi_ready |
| 239 | + |
| 240 | +// The flash is in a state where we can blast addresses in parallel, and get |
| 241 | +// parallel data back. Now configure the SSI to translate XIP bus accesses |
| 242 | +// into QSPI transfers of this form. |
| 243 | + |
| 244 | + movs r1, #0 |
| 245 | + str r1, [r3, #SSI_SSIENR_OFFSET] // Disable SSI (and clear FIFO) to allow further config |
| 246 | + |
| 247 | +// Note that the INST_L field is used to select what XIP data gets pushed into |
| 248 | +// the TX FIFO: |
| 249 | +// INST_L_0_BITS {ADDR[23:0],XIP_CMD[7:0]} Load "mode bits" into XIP_CMD |
| 250 | +// Anything else {XIP_CMD[7:0],ADDR[23:0]} Load SPI command into XIP_CMD |
| 251 | +configure_ssi: |
| 252 | +#define SPI_CTRLR0_XIP \ |
| 253 | + (MODE_CONTINUOUS_READ /* Mode bits to keep flash in continuous read mode */ \ |
| 254 | + << SSI_SPI_CTRLR0_XIP_CMD_LSB) | \ |
| 255 | + (ADDR_L << SSI_SPI_CTRLR0_ADDR_L_LSB) | /* Total number of address + mode bits */ \ |
| 256 | + (WAIT_CYCLES << SSI_SPI_CTRLR0_WAIT_CYCLES_LSB) | /* Hi-Z dummy clocks following address + mode */ \ |
| 257 | + (SSI_SPI_CTRLR0_INST_L_VALUE_NONE /* Do not send a command, instead send XIP_CMD as mode bits after address */ \ |
| 258 | + << SSI_SPI_CTRLR0_INST_L_LSB) | \ |
| 259 | + (SSI_SPI_CTRLR0_TRANS_TYPE_VALUE_2C2A /* Send Address in Quad I/O mode (and Command but that is zero bits long) */ \ |
| 260 | + << SSI_SPI_CTRLR0_TRANS_TYPE_LSB) |
| 261 | + |
| 262 | + ldr r1, =(SPI_CTRLR0_XIP) |
| 263 | + ldr r0, =(XIP_SSI_BASE + SSI_SPI_CTRLR0_OFFSET) |
| 264 | + str r1, [r0] |
| 265 | + |
| 266 | + movs r1, #1 |
| 267 | + str r1, [r3, #SSI_SSIENR_OFFSET] // Re-enable SSI |
| 268 | + |
| 269 | +// Bus accesses to the XIP window will now be transparently serviced by the |
| 270 | +// external flash on cache miss. We are ready to run code from flash. |
| 271 | + |
| 272 | +// Pull in standard exit routine |
| 273 | +#include "boot2_helpers/exit_from_boot2.S" |
| 274 | + |
| 275 | +// Common functions |
| 276 | +#include "boot2_helpers/wait_ssi_ready.S" |
| 277 | +#ifdef PROGRAM_STATUS_REG |
| 278 | +#include "boot2_helpers/read_flash_sreg.S" |
| 279 | +#endif |
| 280 | + |
| 281 | +.global literals |
| 282 | +literals: |
| 283 | +.ltorg |
| 284 | + |
| 285 | +.end |
0 commit comments