From 195d331f1288ad9d1bca90b75c558e14efbe07bd Mon Sep 17 00:00:00 2001 From: Jinjie Ruan Date: Mon, 25 May 2026 16:49:16 +0800 Subject: [PATCH 01/17] riscv: kexec_file: Fix crashk_low_res not exclude bug As done in commit 944a45abfabc ("arm64: kdump: Reimplement crashkernel=X") and commit 4831be702b95 ("arm64/kexec: Fix missing extra range for crashkres_low.") for arm64, while implementing crashkernel=X,[high,low], riscv should have excluded the "crashk_low_res" reserved ranges from the crash kernel memory to prevent them from being exported through /proc/vmcore, and the exclusion would need an extra crash_mem range. Just simply tested on qemu with crashkernel=4G with kexec in [1] mentioned in [2]. And the second kernel can be started normally. # dmesg | grep crash [ 0.000000] crashkernel low memory reserved: 0xf8000000 - 0x100000000 (128 MB) [ 0.000000] crashkernel reserved: 0x000000017fe00000 - 0x000000027fe00000 (4096 MB) Cc: Guo Ren Cc: Baoquan He [1]: https://github.com/chenjh005/kexec-tools/tree/build-test-riscv-v2 [2]: https://lore.kernel.org/all/20230726175000.2536220-1-chenjiahao16@huawei.com/ Fixes: 5882e5acf18d ("riscv: kdump: Implement crashkernel=X,[high,low]") Reviewed-by: Guo Ren Signed-off-by: Jinjie Ruan Signed-off-by: Linux RISC-V bot --- arch/riscv/kernel/machine_kexec_file.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/arch/riscv/kernel/machine_kexec_file.c b/arch/riscv/kernel/machine_kexec_file.c index 59d4bbc848a896..fa2946aa9b8f40 100644 --- a/arch/riscv/kernel/machine_kexec_file.c +++ b/arch/riscv/kernel/machine_kexec_file.c @@ -62,7 +62,7 @@ static int prepare_elf_headers(void **addr, unsigned long *sz) unsigned int nr_ranges; int ret; - nr_ranges = 1; /* For exclusion of crashkernel region */ + nr_ranges = 2; /* For exclusion of crashkernel region */ walk_system_ram_res(0, -1, &nr_ranges, get_nr_ram_ranges_callback); cmem = kmalloc_flex(*cmem, ranges, nr_ranges); @@ -77,8 +77,16 @@ static int prepare_elf_headers(void **addr, unsigned long *sz) /* Exclude crashkernel region */ ret = crash_exclude_mem_range(cmem, crashk_res.start, crashk_res.end); - if (!ret) - ret = crash_prepare_elf64_headers(cmem, true, addr, sz); + if (ret) + goto out; + + if (crashk_low_res.end) { + ret = crash_exclude_mem_range(cmem, crashk_low_res.start, crashk_low_res.end); + if (ret) + goto out; + } + + ret = crash_prepare_elf64_headers(cmem, true, addr, sz); out: kfree(cmem); From fa61e18cc049abc70043f412ee8e6fc70b41b8b5 Mon Sep 17 00:00:00 2001 From: Jinjie Ruan Date: Mon, 25 May 2026 16:49:17 +0800 Subject: [PATCH 02/17] powerpc/crash: Fix possible memory leak in update_crash_elfcorehdr() In get_crash_memory_ranges(), if crash_exclude_mem_range() failed after realloc_mem_ranges() has successfully allocated the cmem memory, it just returns an error but leaves cmem pointing to the allocated memory, nor is it freed in the caller update_crash_elfcorehdr(), which cause a memory leak, goto out to free the cmem. Cc: Sourabh Jain Cc: Hari Bathini Cc: Michael Ellerman Fixes: 849599b702ef ("powerpc/crash: add crash memory hotplug support") Reviewed-by: Sourabh Jain Signed-off-by: Jinjie Ruan Signed-off-by: Linux RISC-V bot --- arch/powerpc/kexec/crash.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/kexec/crash.c b/arch/powerpc/kexec/crash.c index e6539f213b3d14..a520f851c3a6bb 100644 --- a/arch/powerpc/kexec/crash.c +++ b/arch/powerpc/kexec/crash.c @@ -502,7 +502,7 @@ static void update_crash_elfcorehdr(struct kimage *image, struct memory_notify * ret = get_crash_memory_ranges(&cmem); if (ret) { pr_err("Failed to get crash mem range\n"); - return; + goto out; } /* From e6256e46c35b7e6a76a8273ec94f120d64e9c1c2 Mon Sep 17 00:00:00 2001 From: Sourabh Jain Date: Mon, 25 May 2026 16:49:18 +0800 Subject: [PATCH 03/17] powerpc/crash: sort crash memory ranges before preparing elfcorehdr During a memory hot-remove event, the elfcorehdr is rebuilt to exclude the removed memory. While updating the crash memory ranges for this operation, the crash memory ranges array can become unsorted. This happens because remove_mem_range() may split a memory range into two parts and append the higher-address part as a separate range at the end of the array. So far, no issues have been observed due to the unsorted crash memory ranges. However, this could lead to problems once crash memory range removal is handled by generic code, as introduced in the upcoming patches in this series. Currently, powerpc uses a platform-specific function, remove_mem_range(), to exclude hot-removed memory from the crash memory ranges. This function performs the same task as the generic crash_exclude_mem_range() in crash_core.c. The generic helper also ensures that the crash memory ranges remain sorted. So remove the redundant powerpc-specific implementation and instead call crash_exclude_mem_range_guarded() (which internally calls crash_exclude_mem_range()) to exclude the hot-removed memory ranges. Cc: Andrew Morton Cc: Baoquan he Cc: Jinjie Ruan Cc: Hari Bathini Cc: Madhavan Srinivasan Cc: Mahesh Salgaonkar Cc: Michael Ellerman Cc: Ritesh Harjani (IBM) Cc: Shivang Upadhyay Cc: linux-kernel@vger.kernel.org Acked-by: Baoquan He Reviewed-by: Ritesh Harjani (IBM) Acked-by: Mike Rapoport (Microsoft) Signed-off-by: Sourabh Jain Signed-off-by: Jinjie Ruan Signed-off-by: Linux RISC-V bot --- arch/powerpc/include/asm/kexec_ranges.h | 4 +- arch/powerpc/kexec/crash.c | 5 +- arch/powerpc/kexec/ranges.c | 87 +------------------------ 3 files changed, 7 insertions(+), 89 deletions(-) diff --git a/arch/powerpc/include/asm/kexec_ranges.h b/arch/powerpc/include/asm/kexec_ranges.h index 14055896cbcbcd..ad95e3792d10cc 100644 --- a/arch/powerpc/include/asm/kexec_ranges.h +++ b/arch/powerpc/include/asm/kexec_ranges.h @@ -7,7 +7,9 @@ void sort_memory_ranges(struct crash_mem *mrngs, bool merge); struct crash_mem *realloc_mem_ranges(struct crash_mem **mem_ranges); int add_mem_range(struct crash_mem **mem_ranges, u64 base, u64 size); -int remove_mem_range(struct crash_mem **mem_ranges, u64 base, u64 size); +int crash_exclude_mem_range_guarded(struct crash_mem **mem_ranges, + unsigned long long mstart, + unsigned long long mend); int get_exclude_memory_ranges(struct crash_mem **mem_ranges); int get_reserved_memory_ranges(struct crash_mem **mem_ranges); int get_crash_memory_ranges(struct crash_mem **mem_ranges); diff --git a/arch/powerpc/kexec/crash.c b/arch/powerpc/kexec/crash.c index a520f851c3a6bb..d634db67becc6e 100644 --- a/arch/powerpc/kexec/crash.c +++ b/arch/powerpc/kexec/crash.c @@ -493,7 +493,7 @@ static void update_crash_elfcorehdr(struct kimage *image, struct memory_notify * struct crash_mem *cmem = NULL; struct kexec_segment *ksegment; void *ptr, *mem, *elfbuf = NULL; - unsigned long elfsz, memsz, base_addr, size; + unsigned long elfsz, memsz, base_addr, size, end; ksegment = &image->segment[image->elfcorehdr_index]; mem = (void *) ksegment->mem; @@ -512,7 +512,8 @@ static void update_crash_elfcorehdr(struct kimage *image, struct memory_notify * if (image->hp_action == KEXEC_CRASH_HP_REMOVE_MEMORY) { base_addr = PFN_PHYS(mn->start_pfn); size = mn->nr_pages * PAGE_SIZE; - ret = remove_mem_range(&cmem, base_addr, size); + end = base_addr + size - 1; + ret = crash_exclude_mem_range_guarded(&cmem, base_addr, end); if (ret) { pr_err("Failed to remove hot-unplugged memory from crash memory ranges\n"); goto out; diff --git a/arch/powerpc/kexec/ranges.c b/arch/powerpc/kexec/ranges.c index 867135560e5c8b..6c58bcc3e13040 100644 --- a/arch/powerpc/kexec/ranges.c +++ b/arch/powerpc/kexec/ranges.c @@ -553,7 +553,7 @@ int get_usable_memory_ranges(struct crash_mem **mem_ranges) #endif /* CONFIG_KEXEC_FILE */ #ifdef CONFIG_CRASH_DUMP -static int crash_exclude_mem_range_guarded(struct crash_mem **mem_ranges, +int crash_exclude_mem_range_guarded(struct crash_mem **mem_ranges, unsigned long long mstart, unsigned long long mend) { @@ -641,89 +641,4 @@ int get_crash_memory_ranges(struct crash_mem **mem_ranges) pr_err("Failed to setup crash memory ranges\n"); return ret; } - -/** - * remove_mem_range - Removes the given memory range from the range list. - * @mem_ranges: Range list to remove the memory range to. - * @base: Base address of the range to remove. - * @size: Size of the memory range to remove. - * - * (Re)allocates memory, if needed. - * - * Returns 0 on success, negative errno on error. - */ -int remove_mem_range(struct crash_mem **mem_ranges, u64 base, u64 size) -{ - u64 end; - int ret = 0; - unsigned int i; - u64 mstart, mend; - struct crash_mem *mem_rngs = *mem_ranges; - - if (!size) - return 0; - - /* - * Memory range are stored as start and end address, use - * the same format to do remove operation. - */ - end = base + size - 1; - - for (i = 0; i < mem_rngs->nr_ranges; i++) { - mstart = mem_rngs->ranges[i].start; - mend = mem_rngs->ranges[i].end; - - /* - * Memory range to remove is not part of this range entry - * in the memory range list - */ - if (!(base >= mstart && end <= mend)) - continue; - - /* - * Memory range to remove is equivalent to this entry in the - * memory range list. Remove the range entry from the list. - */ - if (base == mstart && end == mend) { - for (; i < mem_rngs->nr_ranges - 1; i++) { - mem_rngs->ranges[i].start = mem_rngs->ranges[i+1].start; - mem_rngs->ranges[i].end = mem_rngs->ranges[i+1].end; - } - mem_rngs->nr_ranges--; - goto out; - } - /* - * Start address of the memory range to remove and the - * current memory range entry in the list is same. Just - * move the start address of the current memory range - * entry in the list to end + 1. - */ - else if (base == mstart) { - mem_rngs->ranges[i].start = end + 1; - goto out; - } - /* - * End address of the memory range to remove and the - * current memory range entry in the list is same. - * Just move the end address of the current memory - * range entry in the list to base - 1. - */ - else if (end == mend) { - mem_rngs->ranges[i].end = base - 1; - goto out; - } - /* - * Memory range to remove is not at the edge of current - * memory range entry. Split the current memory entry into - * two half. - */ - else { - size = mem_rngs->ranges[i].end - end + 1; - mem_rngs->ranges[i].end = base - 1; - ret = add_mem_range(mem_ranges, end + 1, size); - } - } -out: - return ret; -} #endif /* CONFIG_CRASH_DUMP */ From 0dbcbef7c2f7b1d07d0c24e9840ba76c3ec4e8af Mon Sep 17 00:00:00 2001 From: Jinjie Ruan Date: Mon, 25 May 2026 16:49:19 +0800 Subject: [PATCH 04/17] arm64: kexec: Fix image->elf_headers memory leak during retry loop Sashiko AI code review pointed out a potential memory leak of image->elf_headers when load_other_segments() fails on error paths. In the arm64 kexec_file file-load path, kexec_image.c runs a retry loop calling kexec_add_buffer() to find a suitable location for the kernel segment. On each iteration, load_other_segments() is invoked to allocate and populate alternative segments such as initrd, DTB, and ELF headers. However, if a placement or allocation failure occurs later in load_other_segments() (e.g., when adding initrd or dtb), the execution jumps to the out_err label. While this path restores image->nr_segments via orig_segments, it returns an error back to the caller without freeing the previously allocated image->elf_headers vmalloc buffer. As a result, the retry loop in image_load() unconditionally allocates new ELF headers on the next iteration and overwrites image->elf_headers, permanently leaking the memory blocks allocated in previous iterations. To fix this, decouple the ELF header allocation from the target-seeking retry loop. Since the contents and size of ELF headers only depend on the host memory layout and do not change with the kernel's physical placement, move prepare_elf_headers() completely outside and prior to the while retry loop in image_load(). Concurrently, remove the prepare_elf_headers() call from inside load_other_segments() and have it directly reuse the single, pre-allocated image->elf_headers. Also, ensure that image->nr_segments is explicitly rolled back to kernel_segment_number on retry failures to safely discard stale segment tracking state. This optimization eliminates redundant memory allocation/deallocation overhead during kexec placement retries and eradicates the Use-After-Free and memory leak risk. Fixes: 108aa503657e ("arm64: kexec_file: try more regions if loading segments fails") Signed-off-by: Jinjie Ruan Signed-off-by: Linux RISC-V bot --- arch/arm64/include/asm/kexec.h | 3 ++- arch/arm64/kernel/kexec_image.c | 22 ++++++++++++++++++++-- arch/arm64/kernel/machine_kexec_file.c | 16 +++------------- 3 files changed, 25 insertions(+), 16 deletions(-) diff --git a/arch/arm64/include/asm/kexec.h b/arch/arm64/include/asm/kexec.h index 892e5bebda957b..cc2f36b1b0d478 100644 --- a/arch/arm64/include/asm/kexec.h +++ b/arch/arm64/include/asm/kexec.h @@ -127,7 +127,8 @@ int arch_kimage_file_post_load_cleanup(struct kimage *image); extern int load_other_segments(struct kimage *image, unsigned long kernel_load_addr, unsigned long kernel_size, char *initrd, unsigned long initrd_len, - char *cmdline); + char *cmdline, void *headers, unsigned long headers_size); +extern int prepare_elf_headers(void **addr, unsigned long *sz); #endif #endif /* __ASSEMBLER__ */ diff --git a/arch/arm64/kernel/kexec_image.c b/arch/arm64/kernel/kexec_image.c index b70f4df15a1ae5..79efeaeb71e93a 100644 --- a/arch/arm64/kernel/kexec_image.c +++ b/arch/arm64/kernel/kexec_image.c @@ -44,6 +44,11 @@ static void *image_load(struct kimage *image, struct kexec_buf kbuf = {}; unsigned long text_offset, kernel_segment_number; struct kexec_segment *kernel_segment; +#ifdef CONFIG_CRASH_DUMP + /* load elf core header */ + unsigned long headers_sz; + void *headers; +#endif int ret; /* @@ -89,6 +94,18 @@ static void *image_load(struct kimage *image, kernel_segment_number = image->nr_segments; +#ifdef CONFIG_CRASH_DUMP + if (image->type == KEXEC_TYPE_CRASH) { + ret = prepare_elf_headers(&headers, &headers_sz); + if (ret) { + pr_err("Preparing elf core header failed\n"); + return ERR_PTR(ret); + } + image->elf_headers = headers; + image->elf_headers_sz = headers_sz; + } +#endif + /* * The location of the kernel segment may make it impossible to satisfy * the other segment requirements, so we try repeatedly to find a @@ -99,7 +116,8 @@ static void *image_load(struct kimage *image, kernel_segment = &image->segment[kernel_segment_number]; ret = load_other_segments(image, kernel_segment->mem, kernel_segment->memsz, initrd, - initrd_len, cmdline); + initrd_len, cmdline, + headers, headers_sz); if (!ret) break; @@ -107,7 +125,7 @@ static void *image_load(struct kimage *image, * We couldn't find space for the other segments; erase the * kernel segment and try the next available hole. */ - image->nr_segments -= 1; + image->nr_segments = kernel_segment_number; kbuf.buf_min = kernel_segment->mem + kernel_segment->memsz; kbuf.mem = KEXEC_BUF_MEM_UNKNOWN; } diff --git a/arch/arm64/kernel/machine_kexec_file.c b/arch/arm64/kernel/machine_kexec_file.c index e31fabed378a59..daf81a873bbd37 100644 --- a/arch/arm64/kernel/machine_kexec_file.c +++ b/arch/arm64/kernel/machine_kexec_file.c @@ -40,7 +40,7 @@ int arch_kimage_file_post_load_cleanup(struct kimage *image) } #ifdef CONFIG_CRASH_DUMP -static int prepare_elf_headers(void **addr, unsigned long *sz) +int prepare_elf_headers(void **addr, unsigned long *sz) { struct crash_mem *cmem; unsigned int nr_ranges; @@ -92,7 +92,8 @@ int load_other_segments(struct kimage *image, unsigned long kernel_load_addr, unsigned long kernel_size, char *initrd, unsigned long initrd_len, - char *cmdline) + char *cmdline, void *headers, + unsigned long headers_sz) { struct kexec_buf kbuf = {}; void *dtb = NULL; @@ -105,16 +106,7 @@ int load_other_segments(struct kimage *image, kbuf.buf_min = kernel_load_addr + kernel_size; #ifdef CONFIG_CRASH_DUMP - /* load elf core header */ - void *headers; - unsigned long headers_sz; if (image->type == KEXEC_TYPE_CRASH) { - ret = prepare_elf_headers(&headers, &headers_sz); - if (ret) { - pr_err("Preparing elf core header failed\n"); - goto out_err; - } - kbuf.buffer = headers; kbuf.bufsz = headers_sz; kbuf.mem = KEXEC_BUF_MEM_UNKNOWN; @@ -128,9 +120,7 @@ int load_other_segments(struct kimage *image, vfree(headers); goto out_err; } - image->elf_headers = headers; image->elf_load_addr = kbuf.mem; - image->elf_headers_sz = headers_sz; kexec_dprintk("Loaded elf core header at 0x%lx bufsz=0x%lx memsz=0x%lx\n", image->elf_load_addr, kbuf.bufsz, kbuf.memsz); From b3b2f5af09e3caca0a947bf4f780037a28590337 Mon Sep 17 00:00:00 2001 From: Jinjie Ruan Date: Mon, 25 May 2026 16:49:20 +0800 Subject: [PATCH 05/17] x86/kexec: Fix potential buffer overflow in prepare_elf_headers() Sashiko AI code review pointed out a there is a TOCTOU (Time-of-Check to Time-of-Use) race condition in prepare_elf_headers() between the initial pass that counts System RAM ranges and the second pass that populates them. If a memory hotplug event occurs between these two steps, the number of memory regions may increase, causing an out-of-bounds write to the cmem->ranges[] array. Directly introducing get_online_mems() inside prepare_elf_headers() would trigger an immediate recursive read-after-write deadlock when invoked by the runtime hotplug notification path (which already holds the hotplug write lock). To eliminate the TOCTOU window safely without deadlock risks, move the get_online_mems() read lock to the top-level architecture image loaders. Since these top-level loaders are strictly executed on the initial system call path and are never re-entered by the runtime hotplug notifier, this approach physically isolates the locking contexts. The system memory ranges are forced to be statically frozen during the entire layout generation, eradicating the buffer overflow vulnerability. Cc: Thomas Gleixner Cc: Ingo Molnar Cc: Borislav Petkov Cc: "H. Peter Anvin" Cc: Andrew Morton Cc: Baoquan He Cc: Mike Rapoport Cc: stable@vger.kernel.org Fixes: 8d5f894a3108 ("x86: kexec_file: lift CRASH_MAX_RANGES limit on crash_mem buffer") Signed-off-by: Jinjie Ruan Signed-off-by: Linux RISC-V bot --- arch/x86/kernel/crash.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/arch/x86/kernel/crash.c b/arch/x86/kernel/crash.c index cd796818d94d9e..f319308b06ee82 100644 --- a/arch/x86/kernel/crash.c +++ b/arch/x86/kernel/crash.c @@ -25,6 +25,7 @@ #include #include #include +#include #include #include @@ -226,6 +227,9 @@ static int prepare_elf64_ram_headers_callback(struct resource *res, void *arg) { struct crash_mem *cmem = arg; + if (WARN_ON_ONCE(cmem->nr_ranges >= cmem->max_nr_ranges)) + return -EAGAIN; + cmem->ranges[cmem->nr_ranges].start = res->start; cmem->ranges[cmem->nr_ranges].end = res->end; cmem->nr_ranges++; @@ -419,10 +423,14 @@ int crash_load_segments(struct kimage *image) .buf_max = ULONG_MAX, .top_down = false }; /* Prepare elf headers and add a segment */ + get_online_mems(); ret = prepare_elf_headers(&kbuf.buffer, &kbuf.bufsz, &pnum); - if (ret) + if (ret) { + put_online_mems(); return ret; + } + put_online_mems(); image->elf_headers = kbuf.buffer; image->elf_headers_sz = kbuf.bufsz; kbuf.memsz = kbuf.bufsz; From 440ad065ef08092589d27f8da043bd90d8c206b0 Mon Sep 17 00:00:00 2001 From: Jinjie Ruan Date: Mon, 25 May 2026 16:49:21 +0800 Subject: [PATCH 06/17] arm64: kexec_file: Fix potential buffer overflow in prepare_elf_headers() Sashiko AI code review pointed out there is a TOCTOU (Time-of-Check to Time-of-Use) race condition in prepare_elf_headers() between the initial pass that counts System RAM ranges and the second pass that populates them. If a memory hotplug event occurs between these two steps, the number of memory regions may increase, causing an out-of-bounds write to the cmem->ranges[] array. Directly introducing get_online_mems() inside prepare_elf_headers() would trigger an immediate recursive read-after-write deadlock when invoked by the runtime hotplug notification path (which already holds the hotplug write lock). To eliminate the TOCTOU window safely without deadlock risks, move the get_online_mems() read lock to the top-level architecture image loaders. Since these top-level loaders are strictly executed on the initial system call path and are never re-entered by the runtime hotplug notifier, this approach physically isolates the locking contexts. The system memory ranges are forced to be statically frozen during the entire layout generation, eradicating the buffer overflow vulnerability. Cc: Catalin Marinas Cc: Will Deacon Cc: Andrew Morton Cc: Baoquan He Cc: Breno Leitao Cc: stable@vger.kernel.org Fixes: 3751e728cef2 ("arm64: kexec_file: add crash dump support") Signed-off-by: Jinjie Ruan Signed-off-by: Linux RISC-V bot --- arch/arm64/kernel/kexec_image.c | 4 ++++ arch/arm64/kernel/machine_kexec_file.c | 5 +++++ 2 files changed, 9 insertions(+) diff --git a/arch/arm64/kernel/kexec_image.c b/arch/arm64/kernel/kexec_image.c index 79efeaeb71e93a..884e446f08e34f 100644 --- a/arch/arm64/kernel/kexec_image.c +++ b/arch/arm64/kernel/kexec_image.c @@ -12,6 +12,7 @@ #include #include #include +#include #include #include #include @@ -96,11 +97,14 @@ static void *image_load(struct kimage *image, #ifdef CONFIG_CRASH_DUMP if (image->type == KEXEC_TYPE_CRASH) { + get_online_mems(); ret = prepare_elf_headers(&headers, &headers_sz); if (ret) { pr_err("Preparing elf core header failed\n"); + put_online_mems(); return ERR_PTR(ret); } + put_online_mems(); image->elf_headers = headers; image->elf_headers_sz = headers_sz; } diff --git a/arch/arm64/kernel/machine_kexec_file.c b/arch/arm64/kernel/machine_kexec_file.c index daf81a873bbd37..c0ace89ded923a 100644 --- a/arch/arm64/kernel/machine_kexec_file.c +++ b/arch/arm64/kernel/machine_kexec_file.c @@ -59,6 +59,11 @@ int prepare_elf_headers(void **addr, unsigned long *sz) cmem->max_nr_ranges = nr_ranges; cmem->nr_ranges = 0; for_each_mem_range(i, &start, &end) { + if (WARN_ON_ONCE(cmem->nr_ranges >= cmem->max_nr_ranges)) { + ret = -EAGAIN; + goto out; + } + cmem->ranges[cmem->nr_ranges].start = start; cmem->ranges[cmem->nr_ranges].end = end - 1; cmem->nr_ranges++; From 3d9c844a7388e902e4cc07e82909e130931e754f Mon Sep 17 00:00:00 2001 From: Jinjie Ruan Date: Mon, 25 May 2026 16:49:22 +0800 Subject: [PATCH 07/17] riscv: kexec_file: Fix potential buffer overflow in prepare_elf_headers() Sashiko AI code review pointed out there is a TOCTOU (Time-of-Check to Time-of-Use) race condition in prepare_elf_headers() between the initial pass that counts System RAM ranges and the second pass that populates them. If a memory hotplug event occurs between these two steps, the number of memory regions may increase, causing an out-of-bounds write to the cmem->ranges[] array. Directly introducing get_online_mems() inside prepare_elf_headers() would trigger an immediate recursive read-after-write deadlock when invoked by the runtime hotplug notification path (which already holds the hotplug write lock). To eliminate the TOCTOU window safely without deadlock risks, move the get_online_mems() read lock to the top-level architecture image loaders. Since these top-level loaders are strictly executed on the initial system call path and are never re-entered by the runtime hotplug notifier, this approach physically isolates the locking contexts. The system memory ranges are forced to be statically frozen during the entire layout generation, eradicating the buffer overflow vulnerability. Cc: Paul Walmsley Cc: Palmer Dabbelt Cc: Albert Ou Cc: Alexandre Ghiti Cc: songshuaishuai@tinylab.org Cc: bjorn@rivosinc.com Cc: leitao@debian.org Fixes: 8acea455fafa ("RISC-V: Support for kexec_file on panic") Reviewed-by: Guo Ren Signed-off-by: Jinjie Ruan Signed-off-by: Linux RISC-V bot --- arch/riscv/kernel/machine_kexec_file.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/arch/riscv/kernel/machine_kexec_file.c b/arch/riscv/kernel/machine_kexec_file.c index fa2946aa9b8f40..7d0cbc988d4cd6 100644 --- a/arch/riscv/kernel/machine_kexec_file.c +++ b/arch/riscv/kernel/machine_kexec_file.c @@ -8,6 +8,7 @@ */ #include #include +#include #include #include #include @@ -49,6 +50,9 @@ static int prepare_elf64_ram_headers_callback(struct resource *res, void *arg) { struct crash_mem *cmem = arg; + if (WARN_ON_ONCE(cmem->nr_ranges >= cmem->max_nr_ranges)) + return -EAGAIN; + cmem->ranges[cmem->nr_ranges].start = res->start; cmem->ranges[cmem->nr_ranges].end = res->end; cmem->nr_ranges++; @@ -282,12 +286,15 @@ int load_extra_segments(struct kimage *image, unsigned long kernel_start, if (image->type == KEXEC_TYPE_CRASH) { void *headers; unsigned long headers_sz; + get_online_mems(); ret = prepare_elf_headers(&headers, &headers_sz); if (ret) { + put_online_mems(); pr_err("Preparing elf core header failed\n"); goto out; } + put_online_mems(); kbuf.buffer = headers; kbuf.bufsz = headers_sz; kbuf.mem = KEXEC_BUF_MEM_UNKNOWN; From 17e0daa124c659c1bbf3f45f9e9689cbfe7f546c Mon Sep 17 00:00:00 2001 From: Jinjie Ruan Date: Mon, 25 May 2026 16:49:23 +0800 Subject: [PATCH 08/17] LoongArch: kexec: Fix potential buffer overflow in prepare_elf_headers() Sashiko AI code review pointed out there is a TOCTOU (Time-of-Check to Time-of-Use) race condition in prepare_elf_headers() between the initial pass that counts System RAM ranges and the second pass that populates them. If a memory hotplug event occurs between these two steps, the number of memory regions may increase, causing an out-of-bounds write to the cmem->ranges[] array. Directly introducing get_online_mems() inside prepare_elf_headers() would trigger an immediate recursive read-after-write deadlock when invoked by the runtime hotplug notification path (which already holds the hotplug write lock). To eliminate the TOCTOU window safely without deadlock risks, move the get_online_mems() read lock to the top-level architecture image loaders. Since these top-level loaders are strictly executed on the initial system call path and are never re-entered by the runtime hotplug notifier, this approach physically isolates the locking contexts. The system memory ranges are forced to be statically frozen during the entire layout generation, eradicating the buffer overflow vulnerability. Cc: Youling Tang Cc: Huacai Chen Cc: WANG Xuerui Cc: stable@vger.kernel.org Fixes: 1bcca8620a91 ("LoongArch: Add crash dump support for kexec_file") Signed-off-by: Jinjie Ruan Signed-off-by: Linux RISC-V bot --- arch/loongarch/kernel/machine_kexec_file.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/arch/loongarch/kernel/machine_kexec_file.c b/arch/loongarch/kernel/machine_kexec_file.c index 5584b798ba4645..2a1a6124f04380 100644 --- a/arch/loongarch/kernel/machine_kexec_file.c +++ b/arch/loongarch/kernel/machine_kexec_file.c @@ -14,6 +14,7 @@ #include #include #include +#include #include #include #include @@ -75,6 +76,11 @@ static int prepare_elf_headers(void **addr, unsigned long *sz) cmem->max_nr_ranges = nr_ranges; cmem->nr_ranges = 0; for_each_mem_range(i, &start, &end) { + if (WARN_ON_ONCE(cmem->nr_ranges >= cmem->max_nr_ranges)) { + ret = -EAGAIN; + goto out; + } + cmem->ranges[cmem->nr_ranges].start = start; cmem->ranges[cmem->nr_ranges].end = end - 1; cmem->nr_ranges++; @@ -163,12 +169,15 @@ int load_other_segments(struct kimage *image, void *headers; unsigned long headers_sz; + get_online_mems(); ret = prepare_elf_headers(&headers, &headers_sz); if (ret < 0) { + put_online_mems(); pr_err("Preparing elf core header failed\n"); goto out_err; } + put_online_mems(); kbuf.buffer = headers; kbuf.bufsz = headers_sz; kbuf.mem = KEXEC_BUF_MEM_UNKNOWN; From 1eaa0fd2b4736b3fa54201a9f25fdfbfcead1b90 Mon Sep 17 00:00:00 2001 From: Jinjie Ruan Date: Mon, 25 May 2026 16:49:24 +0800 Subject: [PATCH 09/17] crash: Add crash_prepare_headers() to exclude crash kernel memory The crash memory alloc, and the exclude of crashk_res, crashk_low_res and crashk_cma memory are almost identical across different architectures, handling them in the crash core would eliminate a lot of duplication, so add crash_prepare_headers() helper to handle them in the common code. To achieve the above goal, three architecture-specific functions are introduced: - arch_get_system_nr_ranges(). Pre-counts the max number of memory ranges. - arch_crash_populate_cmem(). Collects the memory ranges and fills them into cmem. - arch_crash_exclude_ranges(). Architecture's additional crash memory ranges exclusion, defaulting to empty. Reviewed-by: Sourabh Jain Acked-by: Baoquan He Acked-by: Mike Rapoport (Microsoft) Signed-off-by: Jinjie Ruan Signed-off-by: Linux RISC-V bot --- include/linux/crash_core.h | 7 +++ kernel/crash_core.c | 95 ++++++++++++++++++++++++++++++++++++-- 2 files changed, 99 insertions(+), 3 deletions(-) diff --git a/include/linux/crash_core.h b/include/linux/crash_core.h index c1dee3f971a918..d69c4c7c1f5eb6 100644 --- a/include/linux/crash_core.h +++ b/include/linux/crash_core.h @@ -59,6 +59,10 @@ extern int crash_exclude_mem_range(struct crash_mem *mem, unsigned long long mend); extern int crash_prepare_elf64_headers(struct crash_mem *mem, int need_kernel_map, void **addr, unsigned long *sz); +extern int crash_prepare_headers(int need_kernel_map, void **addr, + unsigned long *sz, unsigned long *nr_mem_ranges); +extern int crash_prepare_headers_locked(int need_kernel_map, void **addr, + unsigned long *sz, unsigned long *nr_mem_ranges); struct kimage; struct kexec_segment; @@ -76,6 +80,9 @@ int kexec_should_crash(struct task_struct *p); int kexec_crash_loaded(void); void crash_save_cpu(struct pt_regs *regs, int cpu); extern int kimage_crash_copy_vmcoreinfo(struct kimage *image); +extern unsigned int arch_get_system_nr_ranges(void); +extern int arch_crash_populate_cmem(struct crash_mem *cmem); +extern int arch_crash_exclude_ranges(struct crash_mem *cmem); #else /* !CONFIG_CRASH_DUMP*/ struct pt_regs; diff --git a/kernel/crash_core.c b/kernel/crash_core.c index 4f21fc3b108b83..e84f3b63e79dce 100644 --- a/kernel/crash_core.c +++ b/kernel/crash_core.c @@ -13,6 +13,7 @@ #include #include #include +#include #include #include #include @@ -168,9 +169,6 @@ static inline resource_size_t crash_resource_size(const struct resource *res) return !res->end ? 0 : resource_size(res); } - - - int crash_prepare_elf64_headers(struct crash_mem *mem, int need_kernel_map, void **addr, unsigned long *sz) { @@ -272,6 +270,97 @@ int crash_prepare_elf64_headers(struct crash_mem *mem, int need_kernel_map, return 0; } +static struct crash_mem *alloc_cmem(unsigned int nr_ranges) +{ + struct crash_mem *cmem; + + cmem = kvzalloc_flex(*cmem, ranges, nr_ranges); + if (!cmem) + return NULL; + + cmem->max_nr_ranges = nr_ranges; + return cmem; +} + +unsigned int __weak arch_get_system_nr_ranges(void) { return 0; } +int __weak arch_crash_populate_cmem(struct crash_mem *cmem) { return -1; } +int __weak arch_crash_exclude_ranges(struct crash_mem *cmem) { return 0; } + +static int crash_exclude_core_ranges(struct crash_mem *cmem) +{ + int ret, i; + + /* Exclude crashkernel region */ + ret = crash_exclude_mem_range(cmem, crashk_res.start, crashk_res.end); + if (ret) + return ret; + + if (crashk_low_res.end) { + ret = crash_exclude_mem_range(cmem, crashk_low_res.start, crashk_low_res.end); + if (ret) + return ret; + } + + for (i = 0; i < crashk_cma_cnt; ++i) { + ret = crash_exclude_mem_range(cmem, crashk_cma_ranges[i].start, + crashk_cma_ranges[i].end); + if (ret) + return ret; + } + + return 0; +} + +int crash_prepare_headers(int need_kernel_map, void **addr, unsigned long *sz, + unsigned long *nr_mem_ranges) +{ + unsigned int max_nr_ranges; + struct crash_mem *cmem; + int ret; + + max_nr_ranges = arch_get_system_nr_ranges(); + if (!max_nr_ranges) + return -ENOMEM; + + cmem = alloc_cmem(max_nr_ranges); + if (!cmem) + return -ENOMEM; + + ret = arch_crash_populate_cmem(cmem); + if (ret) + goto out; + + ret = crash_exclude_core_ranges(cmem); + if (ret) + goto out; + + ret = arch_crash_exclude_ranges(cmem); + if (ret) + goto out; + + /* Return the computed number of memory ranges, for hotplug usage */ + if (nr_mem_ranges) + *nr_mem_ranges = cmem->nr_ranges; + + ret = crash_prepare_elf64_headers(cmem, need_kernel_map, addr, sz); + +out: + kvfree(cmem); + return ret; +} + +int crash_prepare_headers_locked(int need_kernel_map, void **addr, unsigned long *sz, + unsigned long *nr_mem_ranges) +{ + int ret; + + get_online_mems(); + ret = crash_prepare_headers(need_kernel_map, addr, sz, nr_mem_ranges); + put_online_mems(); + + return ret; +} + /** * crash_exclude_mem_range - exclude a mem range for existing ranges * @mem: mem->range contains an array of ranges sorted in ascending order From ae99b965484ae923cb0acab2465270f1a48e2eb9 Mon Sep 17 00:00:00 2001 From: Jinjie Ruan Date: Mon, 25 May 2026 16:49:25 +0800 Subject: [PATCH 10/17] arm64: kexec_file: Use crash_prepare_headers() helper to simplify code Use the newly introduced crash_prepare_headers() function to replace the existing prepare_elf_headers(), allocate cmem and exclude crash kernel memory in the crash core, which reduce code duplication. Only the following two architecture functions need to be implemented: - arch_get_system_nr_ranges(). Use for_each_mem_range() to traverse and pre-count the max number of memory ranges. - arch_crash_populate_cmem(). Use for_each_mem_range to traverse and collect the memory ranges and fills them into cmem. Acked-by: Catalin Marinas Reviewed-by: Sourabh Jain Acked-by: Baoquan He Acked-by: Mike Rapoport (Microsoft) Signed-off-by: Jinjie Ruan Signed-off-by: Linux RISC-V bot --- arch/arm64/kernel/kexec_image.c | 6 +--- arch/arm64/kernel/machine_kexec_file.c | 44 ++++++++------------------ 2 files changed, 14 insertions(+), 36 deletions(-) diff --git a/arch/arm64/kernel/kexec_image.c b/arch/arm64/kernel/kexec_image.c index 884e446f08e34f..770a4c7bf5737e 100644 --- a/arch/arm64/kernel/kexec_image.c +++ b/arch/arm64/kernel/kexec_image.c @@ -12,7 +12,6 @@ #include #include #include -#include #include #include #include @@ -97,14 +96,11 @@ static void *image_load(struct kimage *image, #ifdef CONFIG_CRASH_DUMP if (image->type == KEXEC_TYPE_CRASH) { - get_online_mems(); - ret = prepare_elf_headers(&headers, &headers_sz); + ret = crash_prepare_headers_locked(true, &headers, &headers_sz, NULL); if (ret) { pr_err("Preparing elf core header failed\n"); - put_online_mems(); return ERR_PTR(ret); } - put_online_mems(); image->elf_headers = headers; image->elf_headers_sz = headers_sz; } diff --git a/arch/arm64/kernel/machine_kexec_file.c b/arch/arm64/kernel/machine_kexec_file.c index c0ace89ded923a..66fbfbaec1c67d 100644 --- a/arch/arm64/kernel/machine_kexec_file.c +++ b/arch/arm64/kernel/machine_kexec_file.c @@ -40,51 +40,33 @@ int arch_kimage_file_post_load_cleanup(struct kimage *image) } #ifdef CONFIG_CRASH_DUMP -int prepare_elf_headers(void **addr, unsigned long *sz) +unsigned int arch_get_system_nr_ranges(void) { - struct crash_mem *cmem; - unsigned int nr_ranges; - int ret; - u64 i; + unsigned int nr_ranges = 2; /* for exclusion of crashkernel region */ phys_addr_t start, end; + u64 i; - nr_ranges = 2; /* for exclusion of crashkernel region */ for_each_mem_range(i, &start, &end) nr_ranges++; - cmem = kmalloc_flex(*cmem, ranges, nr_ranges); - if (!cmem) - return -ENOMEM; + return nr_ranges; +} + +int arch_crash_populate_cmem(struct crash_mem *cmem) +{ + phys_addr_t start, end; + u64 i; - cmem->max_nr_ranges = nr_ranges; - cmem->nr_ranges = 0; for_each_mem_range(i, &start, &end) { - if (WARN_ON_ONCE(cmem->nr_ranges >= cmem->max_nr_ranges)) { - ret = -EAGAIN; - goto out; - } + if (WARN_ON_ONCE(cmem->nr_ranges >= cmem->max_nr_ranges)) + return -EAGAIN; cmem->ranges[cmem->nr_ranges].start = start; cmem->ranges[cmem->nr_ranges].end = end - 1; cmem->nr_ranges++; } - /* Exclude crashkernel region */ - ret = crash_exclude_mem_range(cmem, crashk_res.start, crashk_res.end); - if (ret) - goto out; - - if (crashk_low_res.end) { - ret = crash_exclude_mem_range(cmem, crashk_low_res.start, crashk_low_res.end); - if (ret) - goto out; - } - - ret = crash_prepare_elf64_headers(cmem, true, addr, sz); - -out: - kfree(cmem); - return ret; + return 0; } #endif From 9d948a73d3a7bc3018dfcbab2f9432da84f2028d Mon Sep 17 00:00:00 2001 From: Jinjie Ruan Date: Mon, 25 May 2026 16:49:26 +0800 Subject: [PATCH 11/17] x86/kexec: Use crash_prepare_headers() helper to simplify code Use the newly introduced crash_prepare_headers() function to replace the existing prepare_elf_headers(), allocate cmem and exclude crash kernel memory in the crash core, which reduce code duplication. Only the following three architecture functions need to be implemented: - arch_get_system_nr_ranges(). Call get_nr_ram_ranges_callback() to pre-count the max number of memory ranges. - arch_crash_populate_cmem(). Use prepare_elf64_ram_headers_callback() to collect the memory ranges and fills them into cmem. - arch_crash_exclude_ranges(). Exclude the low 1M for x86. By the way, remove the unused "nr_mem_ranges" in arch_crash_handle_hotplug_event(). Cc: Thomas Gleixner Cc: Ingo Molnar Cc: Borislav Petkov Cc: Dave Hansen Cc: Andrew Morton Cc: Vivek Goyal Reviewed-by: Sourabh Jain Acked-by: Baoquan He Acked-by: Mike Rapoport (Microsoft) Signed-off-by: Jinjie Ruan Signed-off-by: Linux RISC-V bot --- arch/x86/kernel/crash.c | 96 ++++++----------------------------------- 1 file changed, 12 insertions(+), 84 deletions(-) diff --git a/arch/x86/kernel/crash.c b/arch/x86/kernel/crash.c index f319308b06ee82..26f140dff3481f 100644 --- a/arch/x86/kernel/crash.c +++ b/arch/x86/kernel/crash.c @@ -25,7 +25,6 @@ #include #include #include -#include #include #include @@ -154,16 +153,8 @@ static int get_nr_ram_ranges_callback(struct resource *res, void *arg) return 0; } -/* Gather all the required information to prepare elf headers for ram regions */ -static struct crash_mem *fill_up_crash_elf_data(void) +unsigned int arch_get_system_nr_ranges(void) { - unsigned int nr_ranges = 0; - struct crash_mem *cmem; - - walk_system_ram_res(0, -1, &nr_ranges, get_nr_ram_ranges_callback); - if (!nr_ranges) - return NULL; - /* * Exclusion of crash region, crashk_low_res and/or crashk_cma_ranges * may cause range splits. So add extra slots here. @@ -178,49 +169,16 @@ static struct crash_mem *fill_up_crash_elf_data(void) * But in order to lest the low 1M could be changed in the future, * (e.g. [start, 1M]), add a extra slot. */ - nr_ranges += 3 + crashk_cma_cnt; - cmem = vzalloc(struct_size(cmem, ranges, nr_ranges)); - if (!cmem) - return NULL; - - cmem->max_nr_ranges = nr_ranges; + unsigned int nr_ranges = 3 + crashk_cma_cnt; - return cmem; + walk_system_ram_res(0, -1, &nr_ranges, get_nr_ram_ranges_callback); + return nr_ranges; } -/* - * Look for any unwanted ranges between mstart, mend and remove them. This - * might lead to split and split ranges are put in cmem->ranges[] array - */ -static int elf_header_exclude_ranges(struct crash_mem *cmem) +int arch_crash_exclude_ranges(struct crash_mem *cmem) { - int ret = 0; - int i; - /* Exclude the low 1M because it is always reserved */ - ret = crash_exclude_mem_range(cmem, 0, SZ_1M - 1); - if (ret) - return ret; - - /* Exclude crashkernel region */ - ret = crash_exclude_mem_range(cmem, crashk_res.start, crashk_res.end); - if (ret) - return ret; - - if (crashk_low_res.end) - ret = crash_exclude_mem_range(cmem, crashk_low_res.start, - crashk_low_res.end); - if (ret) - return ret; - - for (i = 0; i < crashk_cma_cnt; ++i) { - ret = crash_exclude_mem_range(cmem, crashk_cma_ranges[i].start, - crashk_cma_ranges[i].end); - if (ret) - return ret; - } - - return 0; + return crash_exclude_mem_range(cmem, 0, SZ_1M - 1); } static int prepare_elf64_ram_headers_callback(struct resource *res, void *arg) @@ -237,35 +195,9 @@ static int prepare_elf64_ram_headers_callback(struct resource *res, void *arg) return 0; } -/* Prepare elf headers. Return addr and size */ -static int prepare_elf_headers(void **addr, unsigned long *sz, - unsigned long *nr_mem_ranges) +int arch_crash_populate_cmem(struct crash_mem *cmem) { - struct crash_mem *cmem; - int ret; - - cmem = fill_up_crash_elf_data(); - if (!cmem) - return -ENOMEM; - - ret = walk_system_ram_res(0, -1, cmem, prepare_elf64_ram_headers_callback); - if (ret) - goto out; - - /* Exclude unwanted mem ranges */ - ret = elf_header_exclude_ranges(cmem); - if (ret) - goto out; - - /* Return the computed number of memory ranges, for hotplug usage */ - *nr_mem_ranges = cmem->nr_ranges; - - /* By default prepare 64bit headers */ - ret = crash_prepare_elf64_headers(cmem, IS_ENABLED(CONFIG_X86_64), addr, sz); - -out: - vfree(cmem); - return ret; + return walk_system_ram_res(0, -1, cmem, prepare_elf64_ram_headers_callback); } #endif @@ -423,14 +355,11 @@ int crash_load_segments(struct kimage *image) .buf_max = ULONG_MAX, .top_down = false }; /* Prepare elf headers and add a segment */ - get_online_mems(); - ret = prepare_elf_headers(&kbuf.buffer, &kbuf.bufsz, &pnum); - if (ret) { - put_online_mems(); + ret = crash_prepare_headers_locked(IS_ENABLED(CONFIG_X86_64), &kbuf.buffer, + &kbuf.bufsz, &pnum); + if (ret) return ret; - } - put_online_mems(); image->elf_headers = kbuf.buffer; image->elf_headers_sz = kbuf.bufsz; kbuf.memsz = kbuf.bufsz; @@ -520,7 +449,6 @@ unsigned int arch_crash_get_elfcorehdr_size(void) void arch_crash_handle_hotplug_event(struct kimage *image, void *arg) { void *elfbuf = NULL, *old_elfcorehdr; - unsigned long nr_mem_ranges; unsigned long mem, memsz; unsigned long elfsz = 0; @@ -538,7 +466,7 @@ void arch_crash_handle_hotplug_event(struct kimage *image, void *arg) * Create the new elfcorehdr reflecting the changes to CPU and/or * memory resources. */ - if (prepare_elf_headers(&elfbuf, &elfsz, &nr_mem_ranges)) { + if (crash_prepare_headers(IS_ENABLED(CONFIG_X86_64), &elfbuf, &elfsz, NULL)) { pr_err("unable to create new elfcorehdr"); goto out; } From a6749dc86969abb157ed4b7c08333a157800687c Mon Sep 17 00:00:00 2001 From: Jinjie Ruan Date: Mon, 25 May 2026 16:49:27 +0800 Subject: [PATCH 12/17] riscv: kexec_file: Use crash_prepare_headers() helper to simplify code Use the newly introduced crash_prepare_headers() function to replace the existing prepare_elf_headers(), allocate cmem and exclude crash kernel memory in the crash core, which reduce code duplication. Only the following two architecture functions need to be implemented: - arch_get_system_nr_ranges(). Call get_nr_ram_ranges_callback() to pre-counts the max number of memory ranges. - arch_crash_populate_cmem(). Use prepare_elf64_ram_headers_callback() to collects the memory ranges and fills them into cmem. Cc: Paul Walmsley Cc: Palmer Dabbelt Cc: Albert Ou Cc: Alexandre Ghiti Cc: Guo Ren Reviewed-by: Sourabh Jain Acked-by: Baoquan He Acked-by: Mike Rapoport (Microsoft) Signed-off-by: Jinjie Ruan Signed-off-by: Linux RISC-V bot --- arch/riscv/kernel/machine_kexec_file.c | 51 ++++++-------------------- 1 file changed, 12 insertions(+), 39 deletions(-) diff --git a/arch/riscv/kernel/machine_kexec_file.c b/arch/riscv/kernel/machine_kexec_file.c index 7d0cbc988d4cd6..38c8261ba8fcfd 100644 --- a/arch/riscv/kernel/machine_kexec_file.c +++ b/arch/riscv/kernel/machine_kexec_file.c @@ -8,7 +8,6 @@ */ #include #include -#include #include #include #include @@ -46,6 +45,15 @@ static int get_nr_ram_ranges_callback(struct resource *res, void *arg) return 0; } +unsigned int arch_get_system_nr_ranges(void) +{ + unsigned int nr_ranges = 2; /* For exclusion of crashkernel region */ + + walk_system_ram_res(0, -1, &nr_ranges, get_nr_ram_ranges_callback); + + return nr_ranges; +} + static int prepare_elf64_ram_headers_callback(struct resource *res, void *arg) { struct crash_mem *cmem = arg; @@ -60,41 +68,9 @@ static int prepare_elf64_ram_headers_callback(struct resource *res, void *arg) return 0; } -static int prepare_elf_headers(void **addr, unsigned long *sz) +int arch_crash_populate_cmem(struct crash_mem *cmem) { - struct crash_mem *cmem; - unsigned int nr_ranges; - int ret; - - nr_ranges = 2; /* For exclusion of crashkernel region */ - walk_system_ram_res(0, -1, &nr_ranges, get_nr_ram_ranges_callback); - - cmem = kmalloc_flex(*cmem, ranges, nr_ranges); - if (!cmem) - return -ENOMEM; - - cmem->max_nr_ranges = nr_ranges; - cmem->nr_ranges = 0; - ret = walk_system_ram_res(0, -1, cmem, prepare_elf64_ram_headers_callback); - if (ret) - goto out; - - /* Exclude crashkernel region */ - ret = crash_exclude_mem_range(cmem, crashk_res.start, crashk_res.end); - if (ret) - goto out; - - if (crashk_low_res.end) { - ret = crash_exclude_mem_range(cmem, crashk_low_res.start, crashk_low_res.end); - if (ret) - goto out; - } - - ret = crash_prepare_elf64_headers(cmem, true, addr, sz); - -out: - kfree(cmem); - return ret; + return walk_system_ram_res(0, -1, cmem, prepare_elf64_ram_headers_callback); } static char *setup_kdump_cmdline(struct kimage *image, char *cmdline, @@ -286,15 +262,12 @@ int load_extra_segments(struct kimage *image, unsigned long kernel_start, if (image->type == KEXEC_TYPE_CRASH) { void *headers; unsigned long headers_sz; - get_online_mems(); - ret = prepare_elf_headers(&headers, &headers_sz); + ret = crash_prepare_headers_locked(true, &headers, &headers_sz, NULL); if (ret) { - put_online_mems(); pr_err("Preparing elf core header failed\n"); goto out; } - put_online_mems(); kbuf.buffer = headers; kbuf.bufsz = headers_sz; kbuf.mem = KEXEC_BUF_MEM_UNKNOWN; From d02dee6ffceb8de47612c211132c789316718849 Mon Sep 17 00:00:00 2001 From: Jinjie Ruan Date: Mon, 25 May 2026 16:49:28 +0800 Subject: [PATCH 13/17] LoongArch: kexec: Use crash_prepare_headers() helper to simplify code Use the newly introduced crash_prepare_headers() function to replace the existing prepare_elf_headers(), allocate cmem and exclude crash kernel memory in the crash core, which reduce code duplication. Only the following two architecture functions need to be implemented: - arch_get_system_nr_ranges(). Use for_each_mem_range to traverse and pre-count the max number of memory ranges. - arch_crash_populate_cmem(). Use for_each_mem_range to traverse and collect the memory ranges and fills them into cmem. Cc: Huacai Chen Cc: WANG Xuerui Cc: Youling Tang Cc: Baoquan He Reviewed-by: Sourabh Jain Acked-by: Baoquan He Acked-by: Mike Rapoport (Microsoft) Signed-off-by: Jinjie Ruan Signed-off-by: Linux RISC-V bot --- arch/loongarch/kernel/machine_kexec_file.c | 50 ++++++---------------- 1 file changed, 14 insertions(+), 36 deletions(-) diff --git a/arch/loongarch/kernel/machine_kexec_file.c b/arch/loongarch/kernel/machine_kexec_file.c index 2a1a6124f04380..a845fbe8883d18 100644 --- a/arch/loongarch/kernel/machine_kexec_file.c +++ b/arch/loongarch/kernel/machine_kexec_file.c @@ -14,7 +14,6 @@ #include #include #include -#include #include #include #include @@ -57,51 +56,33 @@ static void cmdline_add_initrd(struct kimage *image, unsigned long *cmdline_tmpl } #ifdef CONFIG_CRASH_DUMP - -static int prepare_elf_headers(void **addr, unsigned long *sz) +unsigned int arch_get_system_nr_ranges(void) { - int ret, nr_ranges; - uint64_t i; + int nr_ranges = 2; /* for exclusion of crashkernel region */ phys_addr_t start, end; - struct crash_mem *cmem; + uint64_t i; - nr_ranges = 2; /* for exclusion of crashkernel region */ for_each_mem_range(i, &start, &end) nr_ranges++; - cmem = kmalloc_flex(*cmem, ranges, nr_ranges); - if (!cmem) - return -ENOMEM; + return nr_ranges; +} + +int arch_crash_populate_cmem(struct crash_mem *cmem) +{ + phys_addr_t start, end; + uint64_t i; - cmem->max_nr_ranges = nr_ranges; - cmem->nr_ranges = 0; for_each_mem_range(i, &start, &end) { - if (WARN_ON_ONCE(cmem->nr_ranges >= cmem->max_nr_ranges)) { - ret = -EAGAIN; - goto out; - } + if (WARN_ON_ONCE(cmem->nr_ranges >= cmem->max_nr_ranges)) + return -EAGAIN; cmem->ranges[cmem->nr_ranges].start = start; cmem->ranges[cmem->nr_ranges].end = end - 1; cmem->nr_ranges++; } - /* Exclude crashkernel region */ - ret = crash_exclude_mem_range(cmem, crashk_res.start, crashk_res.end); - if (ret < 0) - goto out; - - if (crashk_low_res.end) { - ret = crash_exclude_mem_range(cmem, crashk_low_res.start, crashk_low_res.end); - if (ret < 0) - goto out; - } - - ret = crash_prepare_elf64_headers(cmem, true, addr, sz); - -out: - kfree(cmem); - return ret; + return 0; } /* @@ -169,15 +150,12 @@ int load_other_segments(struct kimage *image, void *headers; unsigned long headers_sz; - get_online_mems(); - ret = prepare_elf_headers(&headers, &headers_sz); + ret = crash_prepare_headers_locked(true, &headers, &headers_sz, NULL); if (ret < 0) { - put_online_mems(); pr_err("Preparing elf core header failed\n"); goto out_err; } - put_online_mems(); kbuf.buffer = headers; kbuf.bufsz = headers_sz; kbuf.mem = KEXEC_BUF_MEM_UNKNOWN; From ea75385de52eab9ad606f9ea068a0c355d5f57e9 Mon Sep 17 00:00:00 2001 From: Jinjie Ruan Date: Mon, 25 May 2026 16:49:29 +0800 Subject: [PATCH 14/17] crash: Use crash_exclude_core_ranges() on powerpc The crash memory exclude of crashk_res and crashk_cma memory on powerpc are almost identical to the generic crash_exclude_core_ranges(). By introducing the architecture-specific arch_crash_exclude_mem_range() function with a default implementation of crash_exclude_mem_range(), and using crash_exclude_mem_range_guarded as powerpc's separate implementation, the generic crash_exclude_core_ranges() helper function can be reused. Cc: Andrew Morton Cc: Hari Bathini Cc: Madhavan Srinivasan Cc: Mahesh Salgaonkar Cc: Michael Ellerman Cc: Ritesh Harjani (IBM) Cc: Shivang Upadhyay Acked-by: Baoquan He Reviewed-by: Sourabh Jain Acked-by: Mike Rapoport (Microsoft) Signed-off-by: Jinjie Ruan Signed-off-by: Linux RISC-V bot --- arch/powerpc/include/asm/kexec_ranges.h | 3 --- arch/powerpc/kexec/crash.c | 2 +- arch/powerpc/kexec/ranges.c | 16 ++++------------ include/linux/crash_core.h | 4 ++++ kernel/crash_core.c | 19 +++++++++++++------ 5 files changed, 22 insertions(+), 22 deletions(-) diff --git a/arch/powerpc/include/asm/kexec_ranges.h b/arch/powerpc/include/asm/kexec_ranges.h index ad95e3792d10cc..8489e844b44759 100644 --- a/arch/powerpc/include/asm/kexec_ranges.h +++ b/arch/powerpc/include/asm/kexec_ranges.h @@ -7,9 +7,6 @@ void sort_memory_ranges(struct crash_mem *mrngs, bool merge); struct crash_mem *realloc_mem_ranges(struct crash_mem **mem_ranges); int add_mem_range(struct crash_mem **mem_ranges, u64 base, u64 size); -int crash_exclude_mem_range_guarded(struct crash_mem **mem_ranges, - unsigned long long mstart, - unsigned long long mend); int get_exclude_memory_ranges(struct crash_mem **mem_ranges); int get_reserved_memory_ranges(struct crash_mem **mem_ranges); int get_crash_memory_ranges(struct crash_mem **mem_ranges); diff --git a/arch/powerpc/kexec/crash.c b/arch/powerpc/kexec/crash.c index d634db67becc6e..775895f3103793 100644 --- a/arch/powerpc/kexec/crash.c +++ b/arch/powerpc/kexec/crash.c @@ -513,7 +513,7 @@ static void update_crash_elfcorehdr(struct kimage *image, struct memory_notify * base_addr = PFN_PHYS(mn->start_pfn); size = mn->nr_pages * PAGE_SIZE; end = base_addr + size - 1; - ret = crash_exclude_mem_range_guarded(&cmem, base_addr, end); + ret = arch_crash_exclude_mem_range(&cmem, base_addr, end); if (ret) { pr_err("Failed to remove hot-unplugged memory from crash memory ranges\n"); goto out; diff --git a/arch/powerpc/kexec/ranges.c b/arch/powerpc/kexec/ranges.c index 6c58bcc3e13040..e5fea23b191b6d 100644 --- a/arch/powerpc/kexec/ranges.c +++ b/arch/powerpc/kexec/ranges.c @@ -553,9 +553,9 @@ int get_usable_memory_ranges(struct crash_mem **mem_ranges) #endif /* CONFIG_KEXEC_FILE */ #ifdef CONFIG_CRASH_DUMP -int crash_exclude_mem_range_guarded(struct crash_mem **mem_ranges, - unsigned long long mstart, - unsigned long long mend) +int arch_crash_exclude_mem_range(struct crash_mem **mem_ranges, + unsigned long long mstart, + unsigned long long mend) { struct crash_mem *tmem = *mem_ranges; @@ -604,18 +604,10 @@ int get_crash_memory_ranges(struct crash_mem **mem_ranges) sort_memory_ranges(*mem_ranges, true); } - /* Exclude crashkernel region */ - ret = crash_exclude_mem_range_guarded(mem_ranges, crashk_res.start, crashk_res.end); + ret = crash_exclude_core_ranges(mem_ranges); if (ret) goto out; - for (i = 0; i < crashk_cma_cnt; ++i) { - ret = crash_exclude_mem_range_guarded(mem_ranges, crashk_cma_ranges[i].start, - crashk_cma_ranges[i].end); - if (ret) - goto out; - } - /* * FIXME: For now, stay in parity with kexec-tools but if RTAS/OPAL * regions are exported to save their context at the time of diff --git a/include/linux/crash_core.h b/include/linux/crash_core.h index d69c4c7c1f5eb6..0f4ea7ffd06609 100644 --- a/include/linux/crash_core.h +++ b/include/linux/crash_core.h @@ -63,6 +63,7 @@ extern int crash_prepare_headers(int need_kernel_map, void **addr, unsigned long *sz, unsigned long *nr_mem_ranges); extern int crash_prepare_headers_locked(int need_kernel_map, void **addr, unsigned long *sz, unsigned long *nr_mem_ranges); +extern int crash_exclude_core_ranges(struct crash_mem **cmem); struct kimage; struct kexec_segment; @@ -83,6 +84,9 @@ extern int kimage_crash_copy_vmcoreinfo(struct kimage *image); extern unsigned int arch_get_system_nr_ranges(void); extern int arch_crash_populate_cmem(struct crash_mem *cmem); extern int arch_crash_exclude_ranges(struct crash_mem *cmem); +extern int arch_crash_exclude_mem_range(struct crash_mem **mem, + unsigned long long mstart, + unsigned long long mend); #else /* !CONFIG_CRASH_DUMP*/ struct pt_regs; diff --git a/kernel/crash_core.c b/kernel/crash_core.c index e84f3b63e79dce..33e945da677607 100644 --- a/kernel/crash_core.c +++ b/kernel/crash_core.c @@ -286,24 +286,31 @@ unsigned int __weak arch_get_system_nr_ranges(void) { return 0; } int __weak arch_crash_populate_cmem(struct crash_mem *cmem) { return -1; } int __weak arch_crash_exclude_ranges(struct crash_mem *cmem) { return 0; } -static int crash_exclude_core_ranges(struct crash_mem *cmem) +int __weak arch_crash_exclude_mem_range(struct crash_mem **mem, + unsigned long long mstart, + unsigned long long mend) +{ + return crash_exclude_mem_range(*mem, mstart, mend); +} + +int crash_exclude_core_ranges(struct crash_mem **cmem) { int ret, i; /* Exclude crashkernel region */ - ret = crash_exclude_mem_range(cmem, crashk_res.start, crashk_res.end); + ret = arch_crash_exclude_mem_range(cmem, crashk_res.start, crashk_res.end); if (ret) return ret; if (crashk_low_res.end) { - ret = crash_exclude_mem_range(cmem, crashk_low_res.start, crashk_low_res.end); + ret = arch_crash_exclude_mem_range(cmem, crashk_low_res.start, crashk_low_res.end); if (ret) return ret; } for (i = 0; i < crashk_cma_cnt; ++i) { - ret = crash_exclude_mem_range(cmem, crashk_cma_ranges[i].start, - crashk_cma_ranges[i].end); + ret = arch_crash_exclude_mem_range(cmem, crashk_cma_ranges[i].start, + crashk_cma_ranges[i].end); if (ret) return ret; } @@ -330,7 +337,7 @@ int crash_prepare_headers(int need_kernel_map, void **addr, unsigned long *sz, if (ret) goto out; - ret = crash_exclude_core_ranges(cmem); + ret = crash_exclude_core_ranges(&cmem); if (ret) goto out; From d3c88d107c730f3e77132f741a288b1420f639f4 Mon Sep 17 00:00:00 2001 From: Jinjie Ruan Date: Mon, 25 May 2026 16:49:30 +0800 Subject: [PATCH 15/17] arm64: kexec: Add support for crashkernel CMA reservation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit 35c18f2933c5 ("Add a new optional ",cma" suffix to the crashkernel= command line option") and commit ab475510e042 ("kdump: implement reserve_crashkernel_cma") added CMA support for kdump crashkernel reservation. Crash kernel memory reservation wastes production resources if too large, risks kdump failure if too small, and faces allocation difficulties on fragmented systems due to contiguous block constraints. The new CMA-based crashkernel reservation scheme splits the "large fixed reservation" into a "small fixed region + large CMA dynamic region": the CMA memory is available to userspace during normal operation to avoid waste, and is reclaimed for kdump upon crash—saving memory while improving reliability. So extend crashkernel CMA reservation support to arm64. The following changes are made to enable CMA reservation: - Parse and obtain the CMA reservation size along with other crashkernel parameters. - Call reserve_crashkernel_cma() to allocate the CMA region for kdump. - Include the CMA-reserved ranges for kdump kernel to use. - Exclude the CMA-reserved ranges from the crash kernel memory to prevent them from being exported through /proc/vmcore, which is already done in the crash core. Update kernel-parameters.txt to document CMA support for crashkernel on arm64 architecture. Tested-by: Breno Leitao Acked-by: Catalin Marinas Acked-by: Rob Herring (Arm) Acked-by: Baoquan He Acked-by: Mike Rapoport (Microsoft) Acked-by: Ard Biesheuvel Signed-off-by: Jinjie Ruan Signed-off-by: Linux RISC-V bot --- Documentation/admin-guide/kernel-parameters.txt | 2 +- arch/arm64/kernel/machine_kexec_file.c | 2 +- arch/arm64/mm/init.c | 5 +++-- drivers/of/fdt.c | 9 +++++---- drivers/of/kexec.c | 9 +++++++++ include/linux/crash_reserve.h | 4 +++- 6 files changed, 22 insertions(+), 9 deletions(-) diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 4d0f545fb3ec5a..52742fab49a9a3 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -1119,7 +1119,7 @@ Kernel parameters It will be ignored when crashkernel=X,high is not used or memory reserved is below 4G. crashkernel=size[KMG],cma - [KNL, X86, ppc] Reserve additional crash kernel memory from + [KNL, X86, ARM64, PPC] Reserve additional crash kernel memory from CMA. This reservation is usable by the first system's userspace memory and kernel movable allocations (memory balloon, zswap). Pages allocated from this memory range diff --git a/arch/arm64/kernel/machine_kexec_file.c b/arch/arm64/kernel/machine_kexec_file.c index 66fbfbaec1c67d..d2985ce6230629 100644 --- a/arch/arm64/kernel/machine_kexec_file.c +++ b/arch/arm64/kernel/machine_kexec_file.c @@ -42,7 +42,7 @@ int arch_kimage_file_post_load_cleanup(struct kimage *image) #ifdef CONFIG_CRASH_DUMP unsigned int arch_get_system_nr_ranges(void) { - unsigned int nr_ranges = 2; /* for exclusion of crashkernel region */ + unsigned int nr_ranges = 2 + crashk_cma_cnt; /* for exclusion of crashkernel region */ phys_addr_t start, end; u64 i; diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index 97987f850a33c3..227f58522dad29 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -96,8 +96,8 @@ phys_addr_t __ro_after_init arm64_dma_phys_limit; static void __init arch_reserve_crashkernel(void) { + unsigned long long crash_base, crash_size, cma_size = 0; unsigned long long low_size = 0; - unsigned long long crash_base, crash_size; bool high = false; int ret; @@ -106,11 +106,12 @@ static void __init arch_reserve_crashkernel(void) ret = parse_crashkernel(boot_command_line, memblock_phys_mem_size(), &crash_size, &crash_base, - &low_size, NULL, &high); + &low_size, &cma_size, &high); if (ret) return; reserve_crashkernel_generic(crash_size, crash_base, low_size, high); + reserve_crashkernel_cma(cma_size); } static phys_addr_t __init max_zone_phys(phys_addr_t zone_limit) diff --git a/drivers/of/fdt.c b/drivers/of/fdt.c index 82f7327c59ea90..0470acbd1fcf0f 100644 --- a/drivers/of/fdt.c +++ b/drivers/of/fdt.c @@ -880,11 +880,12 @@ static unsigned long chosen_node_offset = -FDT_ERR_NOTFOUND; /* * The main usage of linux,usable-memory-range is for crash dump kernel. * Originally, the number of usable-memory regions is one. Now there may - * be two regions, low region and high region. - * To make compatibility with existing user-space and older kdump, the low - * region is always the last range of linux,usable-memory-range if exist. + * be 2 + CRASHK_CMA_RANGES_MAX regions, low region, high region and cma + * regions. To make compatibility with existing user-space and older kdump, + * the high and low region are always the first two ranges of + * linux,usable-memory-range if exist. */ -#define MAX_USABLE_RANGES 2 +#define MAX_USABLE_RANGES (2 + CRASHK_CMA_RANGES_MAX) /** * early_init_dt_check_for_usable_mem_range - Decode usable memory range diff --git a/drivers/of/kexec.c b/drivers/of/kexec.c index b6837e299e7fe5..029903b986cbd3 100644 --- a/drivers/of/kexec.c +++ b/drivers/of/kexec.c @@ -458,6 +458,15 @@ void *of_kexec_alloc_and_setup_fdt(const struct kimage *image, if (ret) goto out; } + + for (int i = 0; i < crashk_cma_cnt; i++) { + ret = fdt_appendprop_addrrange(fdt, 0, chosen_node, + "linux,usable-memory-range", + crashk_cma_ranges[i].start, + crashk_cma_ranges[i].end - crashk_cma_ranges[i].start + 1); + if (ret) + goto out; + } #endif } diff --git a/include/linux/crash_reserve.h b/include/linux/crash_reserve.h index f0dc03d94ca2cd..30864d90d7f509 100644 --- a/include/linux/crash_reserve.h +++ b/include/linux/crash_reserve.h @@ -14,9 +14,11 @@ extern struct resource crashk_res; extern struct resource crashk_low_res; extern struct range crashk_cma_ranges[]; + +#define CRASHK_CMA_RANGES_MAX 4 #if defined(CONFIG_CMA) && defined(CONFIG_ARCH_HAS_GENERIC_CRASHKERNEL_RESERVATION) #define CRASHKERNEL_CMA -#define CRASHKERNEL_CMA_RANGES_MAX 4 +#define CRASHKERNEL_CMA_RANGES_MAX (CRASHK_CMA_RANGES_MAX) extern int crashk_cma_cnt; #else #define crashk_cma_cnt 0 From 49285ed155537e953a28fd691c5e0f5985271842 Mon Sep 17 00:00:00 2001 From: Jinjie Ruan Date: Mon, 25 May 2026 16:49:31 +0800 Subject: [PATCH 16/17] riscv: kexec: Add support for crashkernel CMA reservation Commit 35c18f2933c5 ("Add a new optional ",cma" suffix to the crashkernel= command line option") and commit ab475510e042 ("kdump: implement reserve_crashkernel_cma") added CMA support for kdump crashkernel reservation. This allows the kernel to dynamically allocate contiguous memory for crash dumping when needed, rather than permanently reserving a fixed region at boot time. So extend crashkernel CMA reservation support to riscv. The following changes are made to enable CMA reservation: - Parse and obtain the CMA reservation size along with other crashkernel parameters. - Call reserve_crashkernel_cma() to allocate the CMA region for kdump. - Include the CMA-reserved ranges for kdump kernel to use, which was already done in of_kexec_alloc_and_setup_fdt(). - Exclude the CMA-reserved ranges from the crash kernel memory to prevent them from being exported through /proc/vmcore, which was already done in the crash core. Update kernel-parameters.txt to document CMA support for crashkernel on riscv architecture. Cc: Paul Walmsley Cc: Palmer Dabbelt Cc: Albert Ou Cc: Alexandre Ghiti Acked-by: Baoquan He Acked-by: Mike Rapoport (Microsoft) Acked-by: Paul Walmsley # arch/riscv Signed-off-by: Jinjie Ruan Signed-off-by: Linux RISC-V bot --- Documentation/admin-guide/kernel-parameters.txt | 16 ++++++++-------- arch/riscv/kernel/machine_kexec_file.c | 2 +- arch/riscv/mm/init.c | 5 +++-- 3 files changed, 12 insertions(+), 11 deletions(-) diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 52742fab49a9a3..3ff3ddd516cf45 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -1119,14 +1119,14 @@ Kernel parameters It will be ignored when crashkernel=X,high is not used or memory reserved is below 4G. crashkernel=size[KMG],cma - [KNL, X86, ARM64, PPC] Reserve additional crash kernel memory from - CMA. This reservation is usable by the first system's - userspace memory and kernel movable allocations (memory - balloon, zswap). Pages allocated from this memory range - will not be included in the vmcore so this should not - be used if dumping of userspace memory is intended and - it has to be expected that some movable kernel pages - may be missing from the dump. + [KNL, X86, ARM64, RISCV, PPC] Reserve additional crash + kernel memory from CMA. This reservation is usable by + the first system's userspace memory and kernel movable + allocations (memory balloon, zswap). Pages allocated + from this memory range will not be included in the vmcore + so this should not be used if dumping of userspace memory + is intended and it has to be expected that some movable + kernel pages may be missing from the dump. A standard crashkernel reservation, as described above, is still needed to hold the crash kernel and initrd. diff --git a/arch/riscv/kernel/machine_kexec_file.c b/arch/riscv/kernel/machine_kexec_file.c index 38c8261ba8fcfd..dfaa1f6b953114 100644 --- a/arch/riscv/kernel/machine_kexec_file.c +++ b/arch/riscv/kernel/machine_kexec_file.c @@ -47,7 +47,7 @@ static int get_nr_ram_ranges_callback(struct resource *res, void *arg) unsigned int arch_get_system_nr_ranges(void) { - unsigned int nr_ranges = 2; /* For exclusion of crashkernel region */ + unsigned int nr_ranges = 2 + crashk_cma_cnt; /* For exclusion of crashkernel region */ walk_system_ram_res(0, -1, &nr_ranges, get_nr_ram_ranges_callback); diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c index fa8d2f6f554b57..9dd0ffe85d6aaf 100644 --- a/arch/riscv/mm/init.c +++ b/arch/riscv/mm/init.c @@ -1320,7 +1320,7 @@ static inline void setup_vm_final(void) */ static void __init arch_reserve_crashkernel(void) { - unsigned long long low_size = 0; + unsigned long long low_size = 0, cma_size = 0; unsigned long long crash_base, crash_size; bool high = false; int ret; @@ -1330,11 +1330,12 @@ static void __init arch_reserve_crashkernel(void) ret = parse_crashkernel(boot_command_line, memblock_phys_mem_size(), &crash_size, &crash_base, - &low_size, NULL, &high); + &low_size, &cma_size, &high); if (ret) return; reserve_crashkernel_generic(crash_size, crash_base, low_size, high); + reserve_crashkernel_cma(cma_size); } void __init paging_init(void) From ec1ddd471ffd555d4f140b0ae522908287489a7f Mon Sep 17 00:00:00 2001 From: Jinjie Ruan Date: Mon, 25 May 2026 16:49:32 +0800 Subject: [PATCH 17/17] arm64/crash: Add crash hotplug support Due to CPU/Memory hotplug or online/offline events, the elfcorehdr (which describes the CPUs and memory of the crashed kernel) of kdump image becomes outdated. Consequently, attempting dump collection with an outdated elfcorehdr can lead to inaccurate dump collection. The current solution to address the above issue involves monitoring the CPU/Memory add/remove events in userspace using udev rules and whenever there are changes in CPU and memory resources, the entire kdump image is loaded again. The kdump image includes kernel, initrd, elfcorehdr, FDT, purgatory. Given that only elfcorehdr gets outdated due to CPU/Memory add/remove events, reloading the entire kdump image is inefficient. More importantly, kdump remains inactive for a substantial amount of time until the kdump reload completes. To address the aforementioned issue, commit 247262756121 ("crash: add generic infrastructure for crash hotplug support") added a generic infrastructure that allows architectures to selectively update the kdump image component during CPU or memory add/remove events within the kernel itself. In the event of a CPU or memory add/remove events, the generic crash hotplug event handler, crash_handle_hotplug_event(), is triggered. It then acquires the necessary locks to update the kdump image and invokes the architecture-specific crash hotplug handler, arch_crash_handle_hotplug_event(), to update the required kdump image components. [1] has supported virtual CPU hotplug in virtual machines for ARM64, allowing vCPUs to be added or removed at runtime to meet Kubernetes demands. On ARM64, only memory add/remove events are handled. Here's why: 1. Physical CPU hotplug: Not supported on ARM64 hardware. 2. ACPI vCPU hotplug (KVM virtual machine): - vCPU hotplug is implemented as a static firmware policy where all possible vCPUs are pre-described in the MADT table at boot. - The vCPU status will be automatically updated after vCPU hotplug. - No FDT or elfcorehdr update needed. 3. Device tree booted Virtual Machine vCPU hotplug: - The elfcorehdr is built using for_each_possible_cpu(), so it already includes all possible CPUs and doesn't need updates. For memory add/remove events, the elfcorehdr is updated to reflect the current memory layout. This patch adds the ARCH_SUPPORTS_CRASH_HOTPLUG config option and implements: - arch_crash_hotplug_support(): Check if hotplug update is supported - arch_crash_get_elfcorehdr_size(): Return elfcorehdr buffer size - arch_crash_handle_hotplug_event(): Handle memory hotplug events This follows the same approach as x86 commit ea53ad9cf73b ("x86/crash: add x86 crash hotplug support") and powerpc commit b741092d5976 ("powerpc/crash: add crash CPU hotplug support") and commit 849599b702ef ("powerpc/crash: add crash memory hotplug support"). The test is based on the following QEMU version: https://github.com/salil-mehta/qemu.git virt-cpuhp-armv8/rfc-v2 Replace your '-smp' argument with something like: | -smp cpus=1,maxcpus=3,cores=3,threads=1,sockets=1 then feed the following to the Qemu montior to hotplug vCPU; | (qemu) device_add driver=host-arm-cpu,core-id=1,id=cpu1 | (qemu) device_del cpu1 feed the following to the Qemu montior to hotplug memory; | (qemu) object_add memory-backend-ram,id=mem1,size=256M | (qemu) device_add pc-dimm,id=dimm1,memdev=mem1 | (qemu) device_del dimm1 The qemu startup configuration is as follows: qemu-system-aarch64 \ -M virt,gic-version=3,acpi=on,highmem=on \ -enable-kvm \ -cpu host \ -kernel Image \ -smp cpus=1,maxcpus=3,cores=3,threads=1,sockets=1 \ -bios /usr/share/edk2/aarch64/QEMU_EFI.fd \ -m 2G,slots=64,maxmem=16G \ -nographic \ -no-reboot \ -device virtio-rng-pci \ -append "root=/dev/vda rw console=ttyAMA0 kgdboc=ttyAMA0,115200 \ earlycon acpi=on crashkernel=512M" \ -drive if=none,file=images/rootfs.ext4,format=raw,id=hd0 \ -device virtio-blk-device,drive=hd0 \ There are two system calls, `kexec_file_load` and `kexec_load`, used to load the kdump image. Only kexec_file_load syscall way is tested now. This patch is based on following rework: https://lore.kernel.org/all/20260328074013.3589544-1-ruanjinjie@huawei.com/ Cc: Catalin Marinas Cc: Will Deacon Cc: Baoquan He Cc: "Mike Rapoport (Microsoft)" Cc: Andrew Morton Cc: Breno Leitao Cc: Kees Cook [1]: https://lore.kernel.org/all/20240529133446.28446-1-Jonathan.Cameron@huawei.com/ Signed-off-by: Jinjie Ruan Signed-off-by: Linux RISC-V bot --- arch/arm64/Kconfig | 3 + arch/arm64/include/asm/kexec.h | 11 +++ arch/arm64/kernel/Makefile | 1 + arch/arm64/kernel/crash.c | 125 +++++++++++++++++++++++++ arch/arm64/kernel/machine_kexec_file.c | 23 ++++- 5 files changed, 161 insertions(+), 2 deletions(-) create mode 100644 arch/arm64/kernel/crash.c diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index fe60738e5943ba..9091c67e1cc289 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -1609,6 +1609,9 @@ config ARCH_DEFAULT_CRASH_DUMP config ARCH_HAS_GENERIC_CRASHKERNEL_RESERVATION def_bool CRASH_RESERVE +config ARCH_SUPPORTS_CRASH_HOTPLUG + def_bool y + config TRANS_TABLE def_bool y depends on HIBERNATION || KEXEC_CORE diff --git a/arch/arm64/include/asm/kexec.h b/arch/arm64/include/asm/kexec.h index cc2f36b1b0d478..f338d162dec1a3 100644 --- a/arch/arm64/include/asm/kexec.h +++ b/arch/arm64/include/asm/kexec.h @@ -131,6 +131,17 @@ extern int load_other_segments(struct kimage *image, extern int prepare_elf_headers(void **addr, unsigned long *sz); #endif +#ifdef CONFIG_CRASH_HOTPLUG +void arch_crash_handle_hotplug_event(struct kimage *image, void *arg); +#define arch_crash_handle_hotplug_event arch_crash_handle_hotplug_event + +int arch_crash_hotplug_support(struct kimage *image, unsigned long kexec_flags); +#define arch_crash_hotplug_support arch_crash_hotplug_support + +unsigned int arch_crash_get_elfcorehdr_size(void); +#define crash_get_elfcorehdr_size arch_crash_get_elfcorehdr_size +#endif + #endif /* __ASSEMBLER__ */ #endif diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile index 74b76bb7045231..629e962813abca 100644 --- a/arch/arm64/kernel/Makefile +++ b/arch/arm64/kernel/Makefile @@ -65,6 +65,7 @@ obj-$(CONFIG_KEXEC_FILE) += machine_kexec_file.o kexec_image.o obj-$(CONFIG_ARM64_RELOC_TEST) += arm64-reloc-test.o arm64-reloc-test-y := reloc_test_core.o reloc_test_syms.o obj-$(CONFIG_CRASH_DUMP) += crash_dump.o +obj-$(CONFIG_CRASH_HOTPLUG) += crash.o obj-$(CONFIG_VMCORE_INFO) += vmcore_info.o obj-$(CONFIG_ARM_SDE_INTERFACE) += sdei.o obj-$(CONFIG_ARM64_PTR_AUTH) += pointer_auth.o diff --git a/arch/arm64/kernel/crash.c b/arch/arm64/kernel/crash.c new file mode 100644 index 00000000000000..2114375820da9f --- /dev/null +++ b/arch/arm64/kernel/crash.c @@ -0,0 +1,125 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Architecture specific functions for kexec based crash dumps. + */ + +#define pr_fmt(fmt) "crash hp: " fmt + +#include +#include +#include +#include + +#include + +#ifdef CONFIG_CRASH_HOTPLUG + +int arch_crash_hotplug_support(struct kimage *image, unsigned long kexec_flags) +{ +#ifdef CONFIG_KEXEC_FILE + if (image->file_mode) + return 1; +#endif + /* + * For kexec_load syscall, crash hotplug support requires + * KEXEC_CRASH_HOTPLUG_SUPPORT flag to be passed by userspace. + */ + return kexec_flags & KEXEC_CRASH_HOTPLUG_SUPPORT; +} + +unsigned int arch_crash_get_elfcorehdr_size(void) +{ + unsigned int phdr_cnt; + + /* A program header for possible CPUs, vmcoreinfo and kernel_map */ + phdr_cnt = 2 + num_possible_cpus(); + if (IS_ENABLED(CONFIG_MEMORY_HOTPLUG)) + phdr_cnt += CONFIG_CRASH_MAX_MEMORY_RANGES; + + return sizeof(Elf64_Ehdr) + phdr_cnt * sizeof(Elf64_Phdr); +} + +/** + * update_crash_elfcorehdr() - Recreate the elfcorehdr and replace it with old + * elfcorehdr in the kexec segment array. + * @image: the active struct kimage + */ +static void update_crash_elfcorehdr(struct kimage *image) +{ + void *elfbuf = NULL, *old_elfcorehdr; + unsigned long mem, memsz; + unsigned long elfsz = 0; + + /* + * Create the new elfcorehdr reflecting the changes to CPU and/or + * memory resources. + */ + if (crash_prepare_headers(true, &elfbuf, &elfsz, NULL)) { + pr_err("unable to create new elfcorehdr"); + goto out; + } + + /* + * Obtain address and size of the elfcorehdr segment, and + * check it against the new elfcorehdr buffer. + */ + mem = image->segment[image->elfcorehdr_index].mem; + memsz = image->segment[image->elfcorehdr_index].memsz; + if (elfsz > memsz) { + pr_err("update elfcorehdr elfsz %lu > memsz %lu", + elfsz, memsz); + goto out; + } + + /* + * Copy new elfcorehdr over the old elfcorehdr at destination. + */ + old_elfcorehdr = (void *)__va(mem); + if (!old_elfcorehdr) { + pr_err("mapping elfcorehdr segment failed\n"); + goto out; + } + + /* + * Temporarily invalidate the crash image while the + * elfcorehdr is updated. + */ + xchg(&kexec_crash_image, NULL); + memcpy_flushcache(old_elfcorehdr, elfbuf, elfsz); + xchg(&kexec_crash_image, image); + pr_debug("updated elfcorehdr\n"); + +out: + vfree(elfbuf); +} + +/** + * arch_crash_handle_hotplug_event() - Handle hotplug elfcorehdr changes + * @image: a pointer to kexec_crash_image + * @arg: struct memory_notify handler for memory hotplug case and + * NULL for CPU hotplug case. + * + * Update the kdump image based on the type of hotplug event: + * - CPU add and remove: No action is needed. + * - Memory add/remove: Update the elfcorehdr to reflect the current memory layout. + * + * Prepare the new elfcorehdr and replace the existing elfcorehdr. + */ +void arch_crash_handle_hotplug_event(struct kimage *image, void *arg) +{ + switch (image->hp_action) { + case KEXEC_CRASH_HP_ADD_CPU: + fallthrough; + case KEXEC_CRASH_HP_REMOVE_CPU: + if (image->file_mode || image->elfcorehdr_updated) + return; + fallthrough; + case KEXEC_CRASH_HP_ADD_MEMORY: + case KEXEC_CRASH_HP_REMOVE_MEMORY: + update_crash_elfcorehdr(image); + return; + default: + pr_warn_once("Unknown hotplug action\n"); + } +} +#endif /* CONFIG_CRASH_HOTPLUG */ diff --git a/arch/arm64/kernel/machine_kexec_file.c b/arch/arm64/kernel/machine_kexec_file.c index d2985ce6230629..781febd0f6db10 100644 --- a/arch/arm64/kernel/machine_kexec_file.c +++ b/arch/arm64/kernel/machine_kexec_file.c @@ -82,10 +82,11 @@ int load_other_segments(struct kimage *image, char *cmdline, void *headers, unsigned long headers_sz) { - struct kexec_buf kbuf = {}; - void *dtb = NULL; unsigned long initrd_load_addr = 0, dtb_len, orig_segments = image->nr_segments; + struct kexec_buf kbuf = {}; + unsigned long pnum = 0; + void *dtb = NULL; int ret = 0; kbuf.image = image; @@ -98,6 +99,23 @@ int load_other_segments(struct kimage *image, kbuf.bufsz = headers_sz; kbuf.mem = KEXEC_BUF_MEM_UNKNOWN; kbuf.memsz = headers_sz; + +#ifdef CONFIG_CRASH_HOTPLUG + /* + * The elfcorehdr segment size accounts for VMCOREINFO, kernel_map + * maximum CPUs and maximum memory ranges. + */ + if (IS_ENABLED(CONFIG_MEMORY_HOTPLUG)) + pnum = 2 + num_possible_cpus() + CONFIG_CRASH_MAX_MEMORY_RANGES; + else + pnum += 2 + num_possible_cpus(); + + if (pnum < (unsigned long)PN_XNUM) + kbuf.memsz = pnum * sizeof(Elf64_Phdr) + sizeof(Elf64_Ehdr); + else + pr_err("number of Phdrs %lu exceeds max\n", pnum); +#endif + kbuf.buf_align = SZ_64K; /* largest supported page size */ kbuf.buf_max = ULONG_MAX; kbuf.top_down = true; @@ -108,6 +126,7 @@ int load_other_segments(struct kimage *image, goto out_err; } image->elf_load_addr = kbuf.mem; + image->elf_headers_sz = kbuf.memsz; kexec_dprintk("Loaded elf core header at 0x%lx bufsz=0x%lx memsz=0x%lx\n", image->elf_load_addr, kbuf.bufsz, kbuf.memsz);