Skip to content

Commit 46f34f6

Browse files
committed
Fixes for Xilinx ZynqMP ZCU102 SD card boot with Linux
Five targeted changes required to cleanly boot a PetaLinux 2025.2 fitImage from SD card on the Xilinx ZCU102 (Zynq UltraScale+ MPSoC): 1. hal/zynq: implement hal_dts_fixup() to patch /chosen/bootargs in the DTB at runtime. Previously a TODO stub; now mirrors the Versal implementation. Uses LINUX_BOOTARGS / LINUX_BOOTARGS_ROOT to override the PetaLinux-baked root= value, allowing wolfBoot's A/B partition layout (where rootfs is on mmcblk0p4, not p2). 2. hal/zynq: add hal_get_timer_us() using the ARMv8 generic timer (CNTPCT_EL0 / CNTFRQ_EL0). Required so sdhci.c udelay() works. Uses a __uint128_t intermediate to avoid overflow of (count * 1e6) at long uptimes. 3. src/sdhci: fix CMD0 cold-boot timeout on Arasan SDHCI v3.0. Add 1ms udelay after sdhci_platform_init(), 1ms after sdhci_set_clock(400KHz), 1ms after successful power-on, and a 10-retry loop with 10ms udelay between retries around the initial CMD0. Gate the CMD0 loop on sdhci_set_power() success so power-set failures are not silently masked. Without these delays, CMD0 races the SD card power-up when DEBUG_SDHCI printf delays are not present. 4. src/boot_aarch64: clean D-cache to PoC and disable MMU + I/D-cache at EL2 before jumping to Linux. ARM64 Linux boot protocol requires MMU off and image cleaned to PoC; otherwise arm64_panic_block_init() panics with 'Non-EFI boot detected with MMU and caches enabled'. Adds el2_cleanup_and_jump_to_linux() asm helper (dc cisw loop + ic iallu + SCTLR_EL2.{M,C,I} clear + br) called from do_boot() when current_el() == 2. Gated on defined(MMU) && defined(LINUX_BOOTARGS_ROOT) so bare-metal / RTOS EL2 payloads fall through to the legacy direct br x4 path. 5. src/boot_aarch64: include the platform HAL header (hal/zynq.h, hal/nxp_ls1028a.h) alongside the existing hal/versal.h include, so EL2_HYPERVISOR is visible to do_boot(). Previously BOOT_EL1=1 was silently inert on zynq/ls1028a because the symbol was only defined in a header not pulled into boot_aarch64.c. Also: - config/examples/zynqmp_sdcard.config: default BOOT_EL1 off (EL2 handoff for Linux), DEBUG off, and root=/dev/mmcblk0p4 with a comment on mmcblk0 vs mmcblk1 enumeration depending on which SDHCI controllers are enabled in the XSA / device tree. - hal/versal.c: align default LINUX_BOOTARGS_ROOT (mmcblk0p2 -> mmcblk0p4) with the new wolfBoot A/B partition convention.
1 parent 58c2e04 commit 46f34f6

File tree

6 files changed

+263
-13
lines changed

6 files changed

+263
-13
lines changed

config/examples/zynqmp_sdcard.config

Lines changed: 14 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ HASH?=SHA3
1818
IMAGE_HEADER_SIZE?=1024
1919

2020
# Debug options
21-
DEBUG?=1
21+
DEBUG?=0
2222
DEBUG_SYMBOLS=1
2323
DEBUG_UART=1
2424
CFLAGS_EXTRA+=-DDEBUG_ZYNQ=1
@@ -39,8 +39,12 @@ NO_XIP=1
3939
# ELF loading support
4040
ELF?=1
4141

42-
# Boot Exception Level: transition from EL2 -> EL1 before jumping to app
43-
BOOT_EL1?=1
42+
# Boot Exception Level: leave wolfBoot at EL2 for handoff to Linux (matches
43+
# the standard PetaLinux U-Boot flow and preserves KVM/hypervisor use of
44+
# EL2). The EL2 Linux-cleanup path in do_boot() will clean dcache/disable
45+
# MMU before jumping to the kernel. To drop to EL1 via ERET instead, set
46+
# BOOT_EL1?=1 (requires EL2_HYPERVISOR=1, which is the hal/zynq.h default).
47+
#BOOT_EL1?=1
4448

4549
# General options
4650
VTOR?=1
@@ -78,8 +82,13 @@ CFLAGS_EXTRA+=-DBOOT_PART_B=2
7882
# Disk read chunk size (512KB)
7983
CFLAGS_EXTRA+=-DDISK_BLOCK_SIZE=0x80000
8084

81-
# Linux rootfs is on partition 4 (SD1 = mmcblk1)
82-
CFLAGS_EXTRA+=-DLINUX_BOOTARGS_ROOT=\"/dev/mmcblk1p4\"
85+
# Linux rootfs is on partition 4. Device naming depends on whether both
86+
# ZynqMP SDHCI controllers are enabled in the XSA / device tree:
87+
# * both sdhci0 + sdhci1 enabled -> SD1 = /dev/mmcblk1
88+
# * only sdhci1 enabled (ZCU102 default -> only external SD populated)
89+
# -> SD1 = /dev/mmcblk0
90+
# Check `ls /sys/class/mmc_host/` on your running target to confirm.
91+
CFLAGS_EXTRA+=-DLINUX_BOOTARGS_ROOT=\"/dev/mmcblk0p4\"
8392

8493
# ============================================================================
8594
# Boot Memory Layout

hal/versal.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,7 @@
6666
/* Linux kernel command line arguments */
6767
#ifndef LINUX_BOOTARGS
6868
#ifndef LINUX_BOOTARGS_ROOT
69-
#define LINUX_BOOTARGS_ROOT "/dev/mmcblk0p2"
69+
#define LINUX_BOOTARGS_ROOT "/dev/mmcblk0p4"
7070
#endif
7171

7272
#define LINUX_BOOTARGS \

hal/zynq.c

Lines changed: 65 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,18 @@
5757
/* QSPI bare-metal */
5858
#endif
5959

60+
/* DTB fixup for kernel command line. Override LINUX_BOOTARGS or
61+
* LINUX_BOOTARGS_ROOT in your config to customize. */
62+
#ifdef __WOLFBOOT
63+
#ifndef LINUX_BOOTARGS
64+
#ifndef LINUX_BOOTARGS_ROOT
65+
#define LINUX_BOOTARGS_ROOT "/dev/mmcblk0p4"
66+
#endif
67+
#define LINUX_BOOTARGS \
68+
"earlycon console=ttyPS0,115200 root=" LINUX_BOOTARGS_ROOT " rootwait"
69+
#endif
70+
#endif /* __WOLFBOOT */
71+
6072
/* QSPI Slave Device Information */
6173
typedef struct QspiDev {
6274
uint32_t mode; /* GQSPI_GEN_FIFO_MODE_SPI, GQSPI_GEN_FIFO_MODE_DSPI or GQSPI_GEN_FIFO_MODE_QSPI */
@@ -1796,6 +1808,19 @@ void RAMFUNCTION ext_flash_unlock(void)
17961808
}
17971809

17981810
#ifdef MMU
1811+
/* Get current time in microseconds using ARMv8 generic timer */
1812+
uint64_t hal_get_timer_us(void)
1813+
{
1814+
uint64_t count, freq;
1815+
__asm__ volatile("mrs %0, CNTPCT_EL0" : "=r"(count));
1816+
__asm__ volatile("mrs %0, CNTFRQ_EL0" : "=r"(freq));
1817+
if (freq == 0)
1818+
return 0;
1819+
/* Use __uint128_t to avoid overflow of (count * 1e6) at long uptimes
1820+
* (would overflow uint64_t after ~51h at 100MHz). */
1821+
return (uint64_t)(((__uint128_t)count * 1000000ULL) / freq);
1822+
}
1823+
17991824
void* hal_get_dts_address(void)
18001825
{
18011826
#ifdef WOLFBOOT_DTS_BOOT_ADDRESS
@@ -1809,8 +1834,46 @@ void* hal_get_dts_address(void)
18091834

18101835
int hal_dts_fixup(void* dts_addr)
18111836
{
1812-
/* place FDT fixup specific to ZynqMP here */
1813-
//fdt_set_boot_cpuid_phys(buf, fdt_boot_cpuid_phys(fdt));
1837+
int off, ret;
1838+
struct fdt_header *fdt = (struct fdt_header *)dts_addr;
1839+
1840+
/* Verify FDT header */
1841+
ret = fdt_check_header(dts_addr);
1842+
if (ret != 0) {
1843+
wolfBoot_printf("FDT: Invalid header! %d\n", ret);
1844+
return ret;
1845+
}
1846+
1847+
wolfBoot_printf("FDT: Version %d, Size %d\n",
1848+
fdt_version(fdt), fdt_totalsize(fdt));
1849+
1850+
/* NOTE: We intentionally do not call fdt_set_totalsize() to "grow" the
1851+
* DTB here - that only edits the header field, not the backing buffer.
1852+
* Relies on the load-address layout leaving physical headroom after
1853+
* the DTB (WOLFBOOT_LOAD_DTS_ADDRESS with the kernel loaded much
1854+
* higher). If a future config shrinks that gap, mutation calls below
1855+
* will return -FDT_ERR_NOSPACE and we bail. */
1856+
1857+
/* Find /chosen node */
1858+
off = fdt_find_node_offset(fdt, -1, "chosen");
1859+
if (off < 0) {
1860+
/* Create /chosen node if it doesn't exist */
1861+
off = fdt_add_subnode(fdt, 0, "chosen");
1862+
}
1863+
if (off < 0) {
1864+
wolfBoot_printf("FDT: Failed to find/create chosen node (%d)\n", off);
1865+
return off;
1866+
}
1867+
1868+
/* Set bootargs property - overrides PetaLinux default root= with
1869+
* the wolfBoot partition layout. */
1870+
wolfBoot_printf("FDT: Setting bootargs: %s\n", LINUX_BOOTARGS);
1871+
ret = fdt_fixup_str(fdt, off, "chosen", "bootargs", LINUX_BOOTARGS);
1872+
if (ret < 0) {
1873+
wolfBoot_printf("FDT: Failed to set bootargs (%d)\n", ret);
1874+
return ret;
1875+
}
1876+
18141877
return 0;
18151878
}
18161879
#endif

src/boot_aarch64.c

Lines changed: 37 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -26,9 +26,16 @@
2626
#include "printf.h"
2727
#include "wolfboot/wolfboot.h"
2828

29-
/* Include platform-specific header for EL configuration defines */
30-
#ifdef TARGET_versal
29+
/* Include platform-specific header for EL configuration defines
30+
* (EL2_HYPERVISOR, etc.). Must be visible here so the BOOT_EL1 /
31+
* EL2_HYPERVISOR guards around the EL2->EL1 ERET transition below
32+
* compile in for the active target. */
33+
#if defined(TARGET_versal)
3134
#include "hal/versal.h"
35+
#elif defined(TARGET_zynq)
36+
#include "hal/zynq.h"
37+
#elif defined(TARGET_ls1028a)
38+
#include "hal/nxp_ls1028a.h"
3239
#endif
3340

3441
/* Linker exported variables */
@@ -43,6 +50,15 @@ extern unsigned int _end_data;
4350
extern void main(void);
4451
extern void gicv2_init_secure(void);
4552

53+
/* Asm helper in boot_aarch64_start.S: cleans the entire D-cache to PoC,
54+
* disables MMU + I-cache + D-cache via SCTLR_EL2, and branches to a Linux
55+
* kernel entry point with the DTB pointer in x0. Required because wolfBoot's
56+
* EL2 startup enables MMU/caches for its own use, and the ARM64 Linux boot
57+
* protocol requires them OFF at entry. Only built when EL2_HYPERVISOR == 1
58+
* is visible to boot_aarch64_start.S (e.g. via hal/zynq.h on ZynqMP). */
59+
extern void el2_cleanup_and_jump_to_linux(uintptr_t entry, uintptr_t dtb)
60+
__attribute__((noreturn));
61+
4662
/* SKIP_GIC_INIT - Skip GIC initialization before booting app
4763
* This is needed for:
4864
* - Versal: Uses GICv3, not GICv2. BL31 handles GIC setup.
@@ -163,7 +179,25 @@ void RAMFUNCTION do_boot(const uint32_t *app_offset)
163179
el2_to_el1_boot((uintptr_t)app_offset, dts);
164180
}
165181
#else
166-
/* Stay at current EL (EL2 or EL3) and jump directly to application */
182+
/* Stay at current EL (EL2 or EL3) and jump directly to application.
183+
*
184+
* When booting a Linux kernel at EL2 we MUST clean dcache to PoC and
185+
* disable MMU/I+D-cache before entry, or Linux's
186+
* arm64_panic_block_init() panics with "Non-EFI boot detected with MMU
187+
* and caches enabled". This Linux-specific path is gated on
188+
* LINUX_BOOTARGS_ROOT being defined (supplied via CFLAGS by configs
189+
* that boot Linux, e.g. zynqmp_sdcard.config). Bare-metal / RTOS EL2
190+
* payloads fall through to the legacy direct br x4 below. */
191+
#if defined(MMU) && defined(LINUX_BOOTARGS_ROOT)
192+
if (current_el() == 2) {
193+
uintptr_t dts = (uintptr_t)dts_offset;
194+
wolfBoot_printf("do_boot: cleaning caches, disabling MMU, jumping to Linux\n");
195+
el2_cleanup_and_jump_to_linux((uintptr_t)app_offset, dts);
196+
/* unreachable */
197+
}
198+
#endif
199+
200+
/* Non-Linux EL2 and EL3 path: legacy direct br x4 */
167201

168202
/* Set application address via x4 */
169203
asm volatile("mov x4, %0" : : "r"(app_offset));

src/boot_aarch64_start.S

Lines changed: 121 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1334,4 +1334,125 @@ el2_to_el1_boot:
13341334
b .
13351335
#endif /* BOOT_EL1 && EL2_HYPERVISOR */
13361336

1337+
1338+
/*
1339+
* Clean entire D-cache to the Point of Coherency (PoC), invalidate the
1340+
* I-cache to the Point of Unification (PoU), disable MMU + I/D-cache at
1341+
* EL2, then branch to a Linux kernel entry point with the DTB pointer
1342+
* in x0.
1343+
*
1344+
* Terminology (ARM ARM B2.8):
1345+
* PoC - Point of Coherency: the point at which all observers (CPUs,
1346+
* DMA masters, etc.) see the same memory. Cleaning to PoC
1347+
* guarantees the image bytes we memcpy'd are visible to the
1348+
* kernel's first uncached instruction fetches.
1349+
* PoU - Point of Unification: the point at which instruction and data
1350+
* caches converge. Invalidating I-cache to PoU ensures stale
1351+
* fetches are discarded before we hand off.
1352+
*
1353+
* The ARM64 Linux boot protocol (Documentation/arch/arm64/booting.rst)
1354+
* REQUIRES the kernel to be entered with MMU off, D-cache off, and the
1355+
* loaded image cleaned to PoC. Linux's arm64_panic_block_init() panics
1356+
* with "Non-EFI boot detected with MMU and caches enabled" if it detects
1357+
* MMU/caches were on at entry.
1358+
*
1359+
* wolfBoot's startup (line ~347 above) enables MMU+I+D cache at EL2 for
1360+
* its own use. This helper undoes that just before handing off to Linux.
1361+
*
1362+
* Parameters:
1363+
* x0 = kernel entry point
1364+
* x1 = device tree blob address
1365+
*
1366+
* Clobbers x0-x11, x29, x30; never returns.
1367+
*/
1368+
.global el2_cleanup_and_jump_to_linux
1369+
el2_cleanup_and_jump_to_linux:
1370+
/* Save entry/dtb out of the clobber range used by the dcache loop */
1371+
mov x29, x0 /* x29 = entry */
1372+
mov x30, x1 /* x30 = dtb */
1373+
1374+
/* ---- 1. Clean & invalidate entire data cache to PoC by set/way ----
1375+
* Standard ARMv8 routine, adapted from arm-trusted-firmware /
1376+
* U-Boot / Linux. Iterates every (level, set, way) triple and
1377+
* issues `dc cisw` on it. Terminates at the Level of Coherency
1378+
* (LoC) read from CLIDR_EL1. */
1379+
mrs x0, clidr_el1
1380+
and x3, x0, #0x07000000 /* x3 = LoC (level of coherency) */
1381+
lsr x3, x3, #23 /* x3 = LoC * 2 */
1382+
cbz x3, dcache_done
1383+
mov x10, #0 /* x10 = current cache level << 1 */
1384+
1385+
dcache_level_loop:
1386+
add x2, x10, x10, lsr #1 /* x2 = level * 3 */
1387+
lsr x1, x0, x2 /* x1 = ctype field for this level */
1388+
and x1, x1, #7
1389+
cmp x1, #2
1390+
b.lt dcache_skip_level /* No data cache at this level */
1391+
msr csselr_el1, x10 /* Select cache level (instruction = 0) */
1392+
isb
1393+
mrs x1, ccsidr_el1
1394+
and x2, x1, #7 /* x2 = log2(line length) - 4 */
1395+
add x2, x2, #4 /* x2 = log2(line length) */
1396+
mov x4, #0x3ff
1397+
and x4, x4, x1, lsr #3 /* x4 = max way number */
1398+
clz w5, w4 /* x5 = bit position of way size */
1399+
mov x7, #0x7fff
1400+
and x7, x7, x1, lsr #13 /* x7 = max set number */
1401+
1402+
dcache_set_loop:
1403+
mov x9, x4 /* x9 = current way */
1404+
dcache_way_loop:
1405+
lsl x6, x9, x5
1406+
orr x11, x10, x6 /* level | way */
1407+
lsl x6, x7, x2
1408+
orr x11, x11, x6 /* level | way | set */
1409+
dc cisw, x11 /* clean & invalidate by set/way */
1410+
subs x9, x9, #1
1411+
b.ge dcache_way_loop
1412+
subs x7, x7, #1
1413+
b.ge dcache_set_loop
1414+
1415+
dcache_skip_level:
1416+
add x10, x10, #2
1417+
cmp x3, x10
1418+
b.gt dcache_level_loop
1419+
1420+
dcache_done:
1421+
mov x10, #0
1422+
msr csselr_el1, x10
1423+
dsb sy
1424+
isb
1425+
1426+
/* ---- 2. Invalidate entire I-cache to PoU ----
1427+
* `ic iallu` invalidates all instruction cache to the Point of
1428+
* Unification for the local PE. */
1429+
ic iallu
1430+
dsb ish
1431+
isb
1432+
1433+
/* ---- 3. Disable MMU + I-cache + D-cache at EL2 ----
1434+
* SCTLR_EL2.M (bit 0) = MMU enable
1435+
* SCTLR_EL2.C (bit 2) = D-cache enable
1436+
* SCTLR_EL2.I (bit 12) = I-cache enable
1437+
*/
1438+
mrs x0, SCTLR_EL2
1439+
bic x0, x0, #(1 << 0) /* M */
1440+
bic x0, x0, #(1 << 2) /* C */
1441+
bic x0, x0, #(1 << 12) /* I */
1442+
msr SCTLR_EL2, x0
1443+
isb
1444+
1445+
/* ---- 4. Set up Linux ARM64 boot protocol registers and jump ----
1446+
* x0 = DTB address
1447+
* x1 = 0 (reserved)
1448+
* x2 = 0 (reserved)
1449+
* x3 = 0 (reserved)
1450+
* PC = entry
1451+
*/
1452+
mov x0, x30 /* x0 = DTB */
1453+
mov x1, xzr
1454+
mov x2, xzr
1455+
mov x3, xzr
1456+
br x29 /* jump to kernel entry; never returns */
1457+
13371458
.end

src/sdhci.c

Lines changed: 25 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -581,6 +581,7 @@ static uint32_t sdhci_get_response_bits(int from, int count)
581581
/* voltage: 0=off or SDHCI_SRS10_BVS_[X_X]V */
582582
static int sdcard_power_init_seq(uint32_t voltage)
583583
{
584+
int retries;
584585
/* Set power to specified voltage */
585586
int status = sdhci_set_power(voltage);
586587
#ifdef DEBUG_SDHCI
@@ -590,10 +591,24 @@ static int sdcard_power_init_seq(uint32_t voltage)
590591
SDHCI_REG(SDHCI_SRS09), SDHCI_REG(SDHCI_SRS10),
591592
SDHCI_REG(SDHCI_SRS11), SDHCI_REG(SDHCI_SRS12));
592593
#endif
593-
if (status == 0) {
594-
/* send CMD0 (go idle) to reset card */
594+
if (status != 0)
595+
return status;
596+
/* SD spec requires >= 1ms after power stabilizes before CMD0. */
597+
udelay(1000);
598+
/* Some cards and the ZynqMP Arasan controller need more settling
599+
* time after the slot-type change + soft reset in sdhci_platform_init().
600+
* Use a retry loop: if CMD0 fails, wait and retry (self-calibrating). */
601+
for (retries = 0; retries < 10; retries++) {
595602
status = sdhci_cmd(MMC_CMD0_GO_IDLE, 0, SDHCI_RESP_NONE);
603+
if (status == 0)
604+
break;
605+
udelay(10000); /* 10ms between retries */
606+
}
607+
#ifdef DEBUG_SDHCI
608+
if (retries > 0 && status == 0) {
609+
wolfBoot_printf("SD: CMD0 succeeded after %d retries\n", retries);
596610
}
611+
#endif
597612
if (status == 0) {
598613
/* send the operating conditions command */
599614
status = sdhci_cmd(SD_CMD8_SEND_IF_COND, SD_IF_COND_27V_33V,
@@ -1387,6 +1402,11 @@ int sdhci_init(void)
13871402
/* Call platform-specific initialization (clocks, resets, pin mux) */
13881403
sdhci_platform_init();
13891404

1405+
/* Allow controller to settle after platform init (slot type change,
1406+
* soft reset, clock configuration). Without this, the controller may
1407+
* not be ready to accept register writes on some platforms. */
1408+
udelay(1000); /* 1ms */
1409+
13901410
/* Reset the host controller */
13911411
sdhci_reg_or(SDHCI_HRS00, SDHCI_HRS00_SWR);
13921412
/* Bit will clear when reset is done */
@@ -1482,6 +1502,9 @@ int sdhci_init(void)
14821502
/* Setup 400khz starting clock */
14831503
sdhci_set_clock(SDHCI_CLK_400KHZ);
14841504

1505+
/* Allow clock to stabilize before issuing first command */
1506+
udelay(1000); /* 1ms */
1507+
14851508
#ifdef DISK_EMMC
14861509
/* Run full eMMC card initialization */
14871510
status = emmc_card_full_init();

0 commit comments

Comments
 (0)