[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20250213085538.17060-3-jgross@suse.com>
Date: Thu, 13 Feb 2025 09:55:38 +0100
From: Juergen Gross <jgross@...e.com>
To: linux-kernel@...r.kernel.org,
x86@...nel.org
Cc: Juergen Gross <jgross@...e.com>,
Boris Ostrovsky <boris.ostrovsky@...cle.com>,
Thomas Gleixner <tglx@...utronix.de>,
Ingo Molnar <mingo@...hat.com>,
Borislav Petkov <bp@...en8.de>,
Dave Hansen <dave.hansen@...ux.intel.com>,
"H. Peter Anvin" <hpa@...or.com>,
xen-devel@...ts.xenproject.org,
Alan Robinson <Alan.Robinson@...itsu.com>
Subject: [PATCH v2 2/2] x86/xen: allow larger contiguous memory regions in PV guests
Today a PV guest (including dom0) can create 2MB contiguous memory
regions for DMA buffers at max. This has led to problems at least
with the megaraid_sas driver, which wants to allocate a 2.3MB DMA
buffer.
The limiting factor is the frame array used to do the hypercall for
making the memory contiguous, which has 512 entries and is just a
static array in mmu_pv.c.
In order to not waste memory for non-PV guests, put the initial
frame array into .init.data section and dynamically allocate an array
from the .init_after_bootmem hook of PV guests.
In case a contiguous memory area larger than the initially supported
2MB is requested, allocate a larger buffer for the frame list. Note
that such an allocation is tried only after memory management has been
initialized properly, which is tested via a flag being set in the
.init_after_bootmem hook.
Fixes: 9f40ec84a797 ("xen/swiotlb: add alignment check for dma buffers")
Signed-off-by: Juergen Gross <jgross@...e.com>
Tested-by: Alan Robinson <Alan.Robinson@...itsu.com>
---
Note that the "Fixes:" tag is not really correct, as that patch didn't
introduce the problem, but rather made it visible. OTOH it is the best
indicator we have to identify kernel versions this patch should be
backported to.
---
arch/x86/xen/mmu_pv.c | 71 +++++++++++++++++++++++++++++++++++++------
1 file changed, 62 insertions(+), 9 deletions(-)
diff --git a/arch/x86/xen/mmu_pv.c b/arch/x86/xen/mmu_pv.c
index 2c70cd35e72c..d078de2c952b 100644
--- a/arch/x86/xen/mmu_pv.c
+++ b/arch/x86/xen/mmu_pv.c
@@ -111,6 +111,51 @@ static pud_t level3_user_vsyscall[PTRS_PER_PUD] __page_aligned_bss;
*/
static DEFINE_SPINLOCK(xen_reservation_lock);
+/* Protected by xen_reservation_lock. */
+#define MIN_CONTIG_ORDER 9 /* 2MB */
+static unsigned int discontig_frames_order = MIN_CONTIG_ORDER;
+static unsigned long discontig_frames_early[1UL << MIN_CONTIG_ORDER] __initdata;
+static unsigned long *discontig_frames __refdata = discontig_frames_early;
+static bool discontig_frames_dyn;
+
+static int alloc_discontig_frames(unsigned int order)
+{
+ unsigned long *new_array, *old_array;
+ unsigned int old_order;
+ unsigned long flags;
+
+ BUG_ON(order < MIN_CONTIG_ORDER);
+ BUILD_BUG_ON(sizeof(discontig_frames_early) != PAGE_SIZE);
+
+ new_array = (unsigned long *)__get_free_pages(GFP_KERNEL,
+ order - MIN_CONTIG_ORDER);
+ if (!new_array)
+ return -ENOMEM;
+
+ spin_lock_irqsave(&xen_reservation_lock, flags);
+
+ old_order = discontig_frames_order;
+
+ if (order > discontig_frames_order || !discontig_frames_dyn) {
+ if (!discontig_frames_dyn)
+ old_array = NULL;
+ else
+ old_array = discontig_frames;
+
+ discontig_frames = new_array;
+ discontig_frames_order = order;
+ discontig_frames_dyn = true;
+ } else {
+ old_array = new_array;
+ }
+
+ spin_unlock_irqrestore(&xen_reservation_lock, flags);
+
+ free_pages((unsigned long)old_array, old_order - MIN_CONTIG_ORDER);
+
+ return 0;
+}
+
/*
* Note about cr3 (pagetable base) values:
*
@@ -814,6 +859,9 @@ static void __init xen_after_bootmem(void)
SetPagePinned(virt_to_page(level3_user_vsyscall));
#endif
xen_pgd_walk(&init_mm, xen_mark_pinned, FIXADDR_TOP);
+
+ if (alloc_discontig_frames(MIN_CONTIG_ORDER))
+ BUG();
}
static void xen_unpin_page(struct mm_struct *mm, struct page *page,
@@ -2203,10 +2251,6 @@ void __init xen_init_mmu_ops(void)
memset(dummy_mapping, 0xff, PAGE_SIZE);
}
-/* Protected by xen_reservation_lock. */
-#define MAX_CONTIG_ORDER 9 /* 2MB */
-static unsigned long discontig_frames[1<<MAX_CONTIG_ORDER];
-
#define VOID_PTE (mfn_pte(0, __pgprot(0)))
static void xen_zap_pfn_range(unsigned long vaddr, unsigned int order,
unsigned long *in_frames,
@@ -2323,18 +2367,25 @@ int xen_create_contiguous_region(phys_addr_t pstart, unsigned int order,
unsigned int address_bits,
dma_addr_t *dma_handle)
{
- unsigned long *in_frames = discontig_frames, out_frame;
+ unsigned long *in_frames, out_frame;
unsigned long flags;
int success;
unsigned long vstart = (unsigned long)phys_to_virt(pstart);
- if (unlikely(order > MAX_CONTIG_ORDER))
- return -ENOMEM;
+ if (unlikely(order > discontig_frames_order)) {
+ if (!discontig_frames_dyn)
+ return -ENOMEM;
+
+ if (alloc_discontig_frames(order))
+ return -ENOMEM;
+ }
memset((void *) vstart, 0, PAGE_SIZE << order);
spin_lock_irqsave(&xen_reservation_lock, flags);
+ in_frames = discontig_frames;
+
/* 1. Zap current PTEs, remembering MFNs. */
xen_zap_pfn_range(vstart, order, in_frames, NULL);
@@ -2358,12 +2409,12 @@ int xen_create_contiguous_region(phys_addr_t pstart, unsigned int order,
void xen_destroy_contiguous_region(phys_addr_t pstart, unsigned int order)
{
- unsigned long *out_frames = discontig_frames, in_frame;
+ unsigned long *out_frames, in_frame;
unsigned long flags;
int success;
unsigned long vstart;
- if (unlikely(order > MAX_CONTIG_ORDER))
+ if (unlikely(order > discontig_frames_order))
return;
vstart = (unsigned long)phys_to_virt(pstart);
@@ -2371,6 +2422,8 @@ void xen_destroy_contiguous_region(phys_addr_t pstart, unsigned int order)
spin_lock_irqsave(&xen_reservation_lock, flags);
+ out_frames = discontig_frames;
+
/* 1. Find start MFN of contiguous extent. */
in_frame = virt_to_mfn((void *)vstart);
--
2.43.0
Powered by blists - more mailing lists