Skip to content

Commit

Permalink
proc/vmcore: convert oldmem_pfn_is_ram callback to more generic vmcor…
Browse files Browse the repository at this point in the history
…e callbacks

Let's support multiple registered callbacks, making sure that
registering vmcore callbacks cannot fail.  Make the callback return a
bool instead of an int, handling how to deal with errors internally.
Drop unused HAVE_OLDMEM_PFN_IS_RAM.

We soon want to make use of this infrastructure from other drivers:
virtio-mem, registering one callback for each virtio-mem device, to
prevent reading unplugged virtio-mem memory.

Handle it via a generic vmcore_cb structure, prepared for future
extensions: for example, once we support virtio-mem on s390x where the
vmcore is completely constructed in the second kernel, we want to detect
and add plugged virtio-mem memory ranges to the vmcore in order for them
to get dumped properly.

Handle corner cases that are unexpected and shouldn't happen in sane
setups: registering a callback after the vmcore has already been opened
(warn only) and unregistering a callback after the vmcore has already been
opened (warn and essentially read only zeroes from that point on).

Link: https://lkml.kernel.org/r/20211005121430.30136-6-david@redhat.com
Signed-off-by: David Hildenbrand <david@redhat.com>
Cc: Baoquan He <bhe@redhat.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Cc: Dave Young <dyoung@redhat.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Jason Wang <jasowang@redhat.com>
Cc: Juergen Gross <jgross@suse.com>
Cc: "Michael S. Tsirkin" <mst@redhat.com>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Mike Rapoport <rppt@kernel.org>
Cc: Oscar Salvador <osalvador@suse.de>
Cc: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Cc: Stefano Stabellini <sstabellini@kernel.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Vivek Goyal <vgoyal@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
  • Loading branch information
davidhildenbrand authored and torvalds committed Nov 9, 2021
1 parent 2c9feea commit cc5f270
Show file tree
Hide file tree
Showing 4 changed files with 111 additions and 38 deletions.
13 changes: 12 additions & 1 deletion arch/x86/kernel/aperture_64.c
Original file line number Diff line number Diff line change
Expand Up @@ -73,12 +73,23 @@ static int gart_mem_pfn_is_ram(unsigned long pfn)
(pfn >= aperture_pfn_start + aperture_page_count));
}

#ifdef CONFIG_PROC_VMCORE
static bool gart_oldmem_pfn_is_ram(struct vmcore_cb *cb, unsigned long pfn)
{
return !!gart_mem_pfn_is_ram(pfn);
}

static struct vmcore_cb gart_vmcore_cb = {
.pfn_is_ram = gart_oldmem_pfn_is_ram,
};
#endif

static void __init exclude_from_core(u64 aper_base, u32 aper_order)
{
aperture_pfn_start = aper_base >> PAGE_SHIFT;
aperture_page_count = (32 * 1024 * 1024) << aper_order >> PAGE_SHIFT;
#ifdef CONFIG_PROC_VMCORE
WARN_ON(register_oldmem_pfn_is_ram(&gart_mem_pfn_is_ram));
register_vmcore_cb(&gart_vmcore_cb);
#endif
#ifdef CONFIG_PROC_KCORE
WARN_ON(register_mem_pfn_is_ram(&gart_mem_pfn_is_ram));
Expand Down
11 changes: 7 additions & 4 deletions arch/x86/xen/mmu_hvm.c
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,10 @@
* The kdump kernel has to check whether a pfn of the crashed kernel
* was a ballooned page. vmcore is using this function to decide
* whether to access a pfn of the crashed kernel.
* Returns 0 if the pfn is not backed by a RAM page, the caller may
* Returns "false" if the pfn is not backed by a RAM page, the caller may
* handle the pfn special in this case.
*/
static int xen_oldmem_pfn_is_ram(unsigned long pfn)
static bool xen_vmcore_pfn_is_ram(struct vmcore_cb *cb, unsigned long pfn)
{
struct xen_hvm_get_mem_type a = {
.domid = DOMID_SELF,
Expand All @@ -24,10 +24,13 @@ static int xen_oldmem_pfn_is_ram(unsigned long pfn)

if (HYPERVISOR_hvm_op(HVMOP_get_mem_type, &a)) {
pr_warn_once("Unexpected HVMOP_get_mem_type failure\n");
return -ENXIO;
return true;
}
return a.mem_type != HVMMEM_mmio_dm;
}
static struct vmcore_cb xen_vmcore_cb = {
.pfn_is_ram = xen_vmcore_pfn_is_ram,
};
#endif

static void xen_hvm_exit_mmap(struct mm_struct *mm)
Expand Down Expand Up @@ -61,6 +64,6 @@ void __init xen_hvm_init_mmu_ops(void)
if (is_pagetable_dying_supported())
pv_ops.mmu.exit_mmap = xen_hvm_exit_mmap;
#ifdef CONFIG_PROC_VMCORE
WARN_ON(register_oldmem_pfn_is_ram(&xen_oldmem_pfn_is_ram));
register_vmcore_cb(&xen_vmcore_cb);
#endif
}
99 changes: 69 additions & 30 deletions fs/proc/vmcore.c
Original file line number Diff line number Diff line change
Expand Up @@ -62,46 +62,75 @@ core_param(novmcoredd, vmcoredd_disabled, bool, 0);
/* Device Dump Size */
static size_t vmcoredd_orig_sz;

/*
* Returns > 0 for RAM pages, 0 for non-RAM pages, < 0 on error
* The called function has to take care of module refcounting.
*/
static int (*oldmem_pfn_is_ram)(unsigned long pfn);

int register_oldmem_pfn_is_ram(int (*fn)(unsigned long pfn))
static DECLARE_RWSEM(vmcore_cb_rwsem);
/* List of registered vmcore callbacks. */
static LIST_HEAD(vmcore_cb_list);
/* Whether we had a surprise unregistration of a callback. */
static bool vmcore_cb_unstable;
/* Whether the vmcore has been opened once. */
static bool vmcore_opened;

void register_vmcore_cb(struct vmcore_cb *cb)
{
if (oldmem_pfn_is_ram)
return -EBUSY;
oldmem_pfn_is_ram = fn;
return 0;
down_write(&vmcore_cb_rwsem);
INIT_LIST_HEAD(&cb->next);
list_add_tail(&cb->next, &vmcore_cb_list);
/*
* Registering a vmcore callback after the vmcore was opened is
* very unusual (e.g., manual driver loading).
*/
if (vmcore_opened)
pr_warn_once("Unexpected vmcore callback registration\n");
up_write(&vmcore_cb_rwsem);
}
EXPORT_SYMBOL_GPL(register_oldmem_pfn_is_ram);
EXPORT_SYMBOL_GPL(register_vmcore_cb);

void unregister_oldmem_pfn_is_ram(void)
void unregister_vmcore_cb(struct vmcore_cb *cb)
{
oldmem_pfn_is_ram = NULL;
wmb();
down_write(&vmcore_cb_rwsem);
list_del(&cb->next);
/*
* Unregistering a vmcore callback after the vmcore was opened is
* very unusual (e.g., forced driver removal), but we cannot stop
* unregistering.
*/
if (vmcore_opened) {
pr_warn_once("Unexpected vmcore callback unregistration\n");
vmcore_cb_unstable = true;
}
up_write(&vmcore_cb_rwsem);
}
EXPORT_SYMBOL_GPL(unregister_oldmem_pfn_is_ram);
EXPORT_SYMBOL_GPL(unregister_vmcore_cb);

static bool pfn_is_ram(unsigned long pfn)
{
int (*fn)(unsigned long pfn);
/* pfn is ram unless fn() checks pagetype */
struct vmcore_cb *cb;
bool ret = true;

/*
* Ask hypervisor if the pfn is really ram.
* A ballooned page contains no data and reading from such a page
* will cause high load in the hypervisor.
*/
fn = oldmem_pfn_is_ram;
if (fn)
ret = !!fn(pfn);
lockdep_assert_held_read(&vmcore_cb_rwsem);
if (unlikely(vmcore_cb_unstable))
return false;

list_for_each_entry(cb, &vmcore_cb_list, next) {
if (unlikely(!cb->pfn_is_ram))
continue;
ret = cb->pfn_is_ram(cb, pfn);
if (!ret)
break;
}

return ret;
}

static int open_vmcore(struct inode *inode, struct file *file)
{
down_read(&vmcore_cb_rwsem);
vmcore_opened = true;
up_read(&vmcore_cb_rwsem);

return 0;
}

/* Reads a page from the oldmem device from given offset. */
ssize_t read_from_oldmem(char *buf, size_t count,
u64 *ppos, int userbuf,
Expand All @@ -117,6 +146,7 @@ ssize_t read_from_oldmem(char *buf, size_t count,
offset = (unsigned long)(*ppos % PAGE_SIZE);
pfn = (unsigned long)(*ppos / PAGE_SIZE);

down_read(&vmcore_cb_rwsem);
do {
if (count > (PAGE_SIZE - offset))
nr_bytes = PAGE_SIZE - offset;
Expand All @@ -136,8 +166,10 @@ ssize_t read_from_oldmem(char *buf, size_t count,
tmp = copy_oldmem_page(pfn, buf, nr_bytes,
offset, userbuf);

if (tmp < 0)
if (tmp < 0) {
up_read(&vmcore_cb_rwsem);
return tmp;
}
}
*ppos += nr_bytes;
count -= nr_bytes;
Expand All @@ -147,6 +179,7 @@ ssize_t read_from_oldmem(char *buf, size_t count,
offset = 0;
} while (count);

up_read(&vmcore_cb_rwsem);
return read;
}

Expand Down Expand Up @@ -537,14 +570,19 @@ static int vmcore_remap_oldmem_pfn(struct vm_area_struct *vma,
unsigned long from, unsigned long pfn,
unsigned long size, pgprot_t prot)
{
int ret;

/*
* Check if oldmem_pfn_is_ram was registered to avoid
* looping over all pages without a reason.
*/
if (oldmem_pfn_is_ram)
return remap_oldmem_pfn_checked(vma, from, pfn, size, prot);
down_read(&vmcore_cb_rwsem);
if (!list_empty(&vmcore_cb_list) || vmcore_cb_unstable)
ret = remap_oldmem_pfn_checked(vma, from, pfn, size, prot);
else
return remap_oldmem_pfn_range(vma, from, pfn, size, prot);
ret = remap_oldmem_pfn_range(vma, from, pfn, size, prot);
up_read(&vmcore_cb_rwsem);
return ret;
}

static int mmap_vmcore(struct file *file, struct vm_area_struct *vma)
Expand Down Expand Up @@ -668,6 +706,7 @@ static int mmap_vmcore(struct file *file, struct vm_area_struct *vma)
#endif

static const struct proc_ops vmcore_proc_ops = {
.proc_open = open_vmcore,
.proc_read = read_vmcore,
.proc_lseek = default_llseek,
.proc_mmap = mmap_vmcore,
Expand Down
26 changes: 23 additions & 3 deletions include/linux/crash_dump.h
Original file line number Diff line number Diff line change
Expand Up @@ -91,9 +91,29 @@ static inline void vmcore_unusable(void)
elfcorehdr_addr = ELFCORE_ADDR_ERR;
}

#define HAVE_OLDMEM_PFN_IS_RAM 1
extern int register_oldmem_pfn_is_ram(int (*fn)(unsigned long pfn));
extern void unregister_oldmem_pfn_is_ram(void);
/**
* struct vmcore_cb - driver callbacks for /proc/vmcore handling
* @pfn_is_ram: check whether a PFN really is RAM and should be accessed when
* reading the vmcore. Will return "true" if it is RAM or if the
* callback cannot tell. If any callback returns "false", it's not
* RAM and the page must not be accessed; zeroes should be
* indicated in the vmcore instead. For example, a ballooned page
* contains no data and reading from such a page will cause high
* load in the hypervisor.
* @next: List head to manage registered callbacks internally; initialized by
* register_vmcore_cb().
*
* vmcore callbacks allow drivers managing physical memory ranges to
* coordinate with vmcore handling code, for example, to prevent accessing
* physical memory ranges that should not be accessed when reading the vmcore,
* although included in the vmcore header as memory ranges to dump.
*/
struct vmcore_cb {
bool (*pfn_is_ram)(struct vmcore_cb *cb, unsigned long pfn);
struct list_head next;
};
extern void register_vmcore_cb(struct vmcore_cb *cb);
extern void unregister_vmcore_cb(struct vmcore_cb *cb);

#else /* !CONFIG_CRASH_DUMP */
static inline bool is_kdump_kernel(void) { return 0; }
Expand Down

0 comments on commit cc5f270

Please sign in to comment.