[<prev] [next>] [thread-next>] [day] [month] [year] [list]
Message-ID: <20250924121707.145350-1-ggala@linux.ibm.com>
Date: Wed, 24 Sep 2025 14:17:07 +0200
From: Gautam Gala <ggala@...ux.ibm.com>
To: Alexander Gordeev <agordeev@...ux.ibm.com>,
Gerald Schaefer <gerald.schaefer@...ux.ibm.com>,
Heiko Carstens <hca@...ux.ibm.com>, Vasily Gorbik <gor@...ux.ibm.com>,
Christian Borntraeger <borntraeger@...ux.ibm.com>,
Sven Schnelle <svens@...ux.ibm.com>,
Janosch Frank <frankja@...ux.ibm.com>,
Claudio Imbrenda <imbrenda@...ux.ibm.com>,
David Hildenbrand <david@...hat.com>
Cc: linux-s390@...r.kernel.org, linux-kernel@...r.kernel.org,
kvm@...r.kernel.org
Subject: [PATCH] KVM: s390: Fix to clear PTE when discarding a swapped page
KVM run fails when guests with 'cmm' cpu feature and host are
under memory pressure and use swap heavily. This is because
npages becomes ENOMEN (out of memory) in hva_to_pfn_slow()
which inturn propagates as EFAULT to qemu. Clearing the page
table entry when discarding an address that maps to a swap
entry resolves the issue.
Suggested-by: Claudio Imbrenda <imbrenda@...ux.ibm.com>
Signed-off-by: Gautam Gala <ggala@...ux.ibm.com>
---
arch/s390/include/asm/pgtable.h | 22 ++++++++++++++++++++++
arch/s390/mm/gmap_helpers.c | 12 +++++++++++-
arch/s390/mm/pgtable.c | 23 +----------------------
3 files changed, 34 insertions(+), 23 deletions(-)
diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h
index c1a7a92f0575..b7100c6a4054 100644
--- a/arch/s390/include/asm/pgtable.h
+++ b/arch/s390/include/asm/pgtable.h
@@ -2055,4 +2055,26 @@ static inline unsigned long gmap_pgste_get_pgt_addr(unsigned long *pgt)
return res;
}
+static inline pgste_t pgste_get_lock(pte_t *ptep)
+{
+ unsigned long value = 0;
+#ifdef CONFIG_PGSTE
+ unsigned long *ptr = (unsigned long *)(ptep + PTRS_PER_PTE);
+
+ do {
+ value = __atomic64_or_barrier(PGSTE_PCL_BIT, ptr);
+ } while (value & PGSTE_PCL_BIT);
+ value |= PGSTE_PCL_BIT;
+#endif
+ return __pgste(value);
+}
+
+static inline void pgste_set_unlock(pte_t *ptep, pgste_t pgste)
+{
+#ifdef CONFIG_PGSTE
+ barrier();
+ WRITE_ONCE(*(unsigned long *)(ptep + PTRS_PER_PTE), pgste_val(pgste) & ~PGSTE_PCL_BIT);
+#endif
+}
+
#endif /* _S390_PAGE_H */
diff --git a/arch/s390/mm/gmap_helpers.c b/arch/s390/mm/gmap_helpers.c
index b63f427e7289..d4c3c36855e2 100644
--- a/arch/s390/mm/gmap_helpers.c
+++ b/arch/s390/mm/gmap_helpers.c
@@ -15,6 +15,7 @@
#include <linux/pagewalk.h>
#include <linux/ksm.h>
#include <asm/gmap_helpers.h>
+#include <asm/pgtable.h>
/**
* ptep_zap_swap_entry() - discard a swap entry.
@@ -47,6 +48,7 @@ void gmap_helper_zap_one_page(struct mm_struct *mm, unsigned long vmaddr)
{
struct vm_area_struct *vma;
spinlock_t *ptl;
+ pgste_t pgste;
pte_t *ptep;
mmap_assert_locked(mm);
@@ -60,8 +62,16 @@ void gmap_helper_zap_one_page(struct mm_struct *mm, unsigned long vmaddr)
ptep = get_locked_pte(mm, vmaddr, &ptl);
if (unlikely(!ptep))
return;
- if (pte_swap(*ptep))
+ if (pte_swap(*ptep)) {
+ preempt_disable();
+ pgste = pgste_get_lock(ptep);
+
ptep_zap_swap_entry(mm, pte_to_swp_entry(*ptep));
+ pte_clear(mm, vmaddr, ptep);
+
+ pgste_set_unlock(ptep, pgste);
+ preempt_enable();
+ }
pte_unmap_unlock(ptep, ptl);
}
EXPORT_SYMBOL_GPL(gmap_helper_zap_one_page);
diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c
index 60688be4e876..879f39366e6c 100644
--- a/arch/s390/mm/pgtable.c
+++ b/arch/s390/mm/pgtable.c
@@ -24,6 +24,7 @@
#include <asm/tlbflush.h>
#include <asm/mmu_context.h>
#include <asm/page-states.h>
+#include <asm/pgtable.h>
#include <asm/machine.h>
pgprot_t pgprot_writecombine(pgprot_t prot)
@@ -115,28 +116,6 @@ static inline pte_t ptep_flush_lazy(struct mm_struct *mm,
return old;
}
-static inline pgste_t pgste_get_lock(pte_t *ptep)
-{
- unsigned long value = 0;
-#ifdef CONFIG_PGSTE
- unsigned long *ptr = (unsigned long *)(ptep + PTRS_PER_PTE);
-
- do {
- value = __atomic64_or_barrier(PGSTE_PCL_BIT, ptr);
- } while (value & PGSTE_PCL_BIT);
- value |= PGSTE_PCL_BIT;
-#endif
- return __pgste(value);
-}
-
-static inline void pgste_set_unlock(pte_t *ptep, pgste_t pgste)
-{
-#ifdef CONFIG_PGSTE
- barrier();
- WRITE_ONCE(*(unsigned long *)(ptep + PTRS_PER_PTE), pgste_val(pgste) & ~PGSTE_PCL_BIT);
-#endif
-}
-
static inline pgste_t pgste_get(pte_t *ptep)
{
unsigned long pgste = 0;
--
2.51.0
Powered by blists - more mailing lists