lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <1370865828-2053-2-git-send-email-dingel@linux.vnet.ibm.com>
Date:	Mon, 10 Jun 2013 14:03:45 +0200
From:	Dominik Dingel <dingel@...ux.vnet.ibm.com>
To:	Gleb Natapov <gleb@...hat.com>, Paolo Bonzini <pbonzini@...hat.com>
Cc:	Christian Borntraeger <borntraeger@...ibm.com>,
	Heiko Carstens <heiko.carstens@...ibm.com>,
	Martin Schwidefsky <schwidefsky@...ibm.com>,
	kvm@...r.kernel.org, linux-s390@...r.kernel.org,
	linux-mm@...ck.org, linux-kernel@...r.kernel.org,
	Dominik Dingel <dingel@...ux.vnet.ibm.com>
Subject: [PATCH 1/4] PF: Add FAULT_FLAG_RETRY_NOWAIT for guest fault

In case of a fault retry exit sie64a() with the gmap_fault indication set.
This makes it possbile to handle async page faults without the need for mm notifiers.

Based on a patch from Marin Schwidefsky.

Todo:
 - Add access to distinguish fault types to prevent double fault

Signed-off-by: Dominik Dingel <dingel@...ux.vnet.ibm.com>
---
 arch/s390/include/asm/processor.h |  7 +++++++
 arch/s390/kvm/kvm-s390.c          | 15 +++++++++++++++
 arch/s390/mm/fault.c              | 29 +++++++++++++++++++++++++----
 arch/s390/mm/pgtable.c            |  1 +
 4 files changed, 48 insertions(+), 4 deletions(-)

diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h
index 6b49987..938d92c 100644
--- a/arch/s390/include/asm/processor.h
+++ b/arch/s390/include/asm/processor.h
@@ -77,6 +77,13 @@ struct thread_struct {
         unsigned long ksp;              /* kernel stack pointer             */
 	mm_segment_t mm_segment;
 	unsigned long gmap_addr;	/* address of last gmap fault. */
+#define PFAULT_EN	1
+#define PFAULT_PEND	2
+	unsigned long gmap_pfault;	/*
+					 * indicator if pfault is enabled for a
+					 * guest and if a guest pfault is
+					 * pending
+					 */
 	struct per_regs per_user;	/* User specified PER registers */
 	struct per_event per_event;	/* Cause of the last PER trap */
 	unsigned long per_flags;	/* Flags to control debug behavior */
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index a44c0dc..c2ae2c4 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -706,6 +706,17 @@ static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu)
 	return 0;
 }
 
+static void kvm_arch_fault_in_sync(struct kvm_vcpu *vcpu)
+{
+	hva_t fault_addr;
+	/* TODO let current->thread.gmap_pfault indicate read or write fault */
+	struct mm_struct *mm = current->mm;
+	down_read(&mm->mmap_sem);
+	fault_addr = __gmap_fault(current->thread.gmap_addr, vcpu->arch.gmap);
+	get_user_pages(current, mm, fault_addr, 1, 1, 0, NULL, NULL);
+	up_read(&mm->mmap_sem);
+}
+
 static int __vcpu_run(struct kvm_vcpu *vcpu)
 {
 	int rc;
@@ -739,6 +750,10 @@ static int __vcpu_run(struct kvm_vcpu *vcpu)
 	if (rc < 0) {
 		if (kvm_is_ucontrol(vcpu->kvm)) {
 			rc = SIE_INTERCEPT_UCONTROL;
+		} else if (test_bit(PFAULT_PEND,
+				    &current->thread.gmap_pfault)) {
+			kvm_arch_fault_in_sync(vcpu);
+			rc = 0;
 		} else {
 			VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction");
 			trace_kvm_s390_sie_fault(vcpu);
diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c
index c5cfb6f..61b1644 100644
--- a/arch/s390/mm/fault.c
+++ b/arch/s390/mm/fault.c
@@ -50,6 +50,7 @@
 #define VM_FAULT_BADMAP		0x020000
 #define VM_FAULT_BADACCESS	0x040000
 #define VM_FAULT_SIGNAL		0x080000
+#define VM_FAULT_PFAULT		0x100000
 
 static unsigned long store_indication __read_mostly;
 
@@ -226,6 +227,7 @@ static noinline void do_fault_error(struct pt_regs *regs, int fault)
 			return;
 		}
 	case VM_FAULT_BADCONTEXT:
+	case VM_FAULT_PFAULT:
 		do_no_context(regs);
 		break;
 	case VM_FAULT_SIGNAL:
@@ -263,6 +265,9 @@ static noinline void do_fault_error(struct pt_regs *regs, int fault)
  */
 static inline int do_exception(struct pt_regs *regs, int access)
 {
+#ifdef CONFIG_PGSTE
+	struct gmap *gmap;
+#endif
 	struct task_struct *tsk;
 	struct mm_struct *mm;
 	struct vm_area_struct *vma;
@@ -301,9 +306,10 @@ static inline int do_exception(struct pt_regs *regs, int access)
 	down_read(&mm->mmap_sem);
 
 #ifdef CONFIG_PGSTE
-	if ((current->flags & PF_VCPU) && S390_lowcore.gmap) {
-		address = __gmap_fault(address,
-				     (struct gmap *) S390_lowcore.gmap);
+	gmap = (struct gmap *)
+		((current->flags & PF_VCPU) ? S390_lowcore.gmap : 0);
+	if (gmap) {
+		address = __gmap_fault(address, gmap);
 		if (address == -EFAULT) {
 			fault = VM_FAULT_BADMAP;
 			goto out_up;
@@ -312,6 +318,8 @@ static inline int do_exception(struct pt_regs *regs, int access)
 			fault = VM_FAULT_OOM;
 			goto out_up;
 		}
+		if (test_bit(PFAULT_EN, &current->thread.gmap_pfault))
+			flags |= FAULT_FLAG_RETRY_NOWAIT;
 	}
 #endif
 
@@ -368,9 +376,22 @@ retry:
 				      regs, address);
 		}
 		if (fault & VM_FAULT_RETRY) {
+#ifdef CONFIG_PGSTE
+			if (gmap &&
+			    test_bit(PFAULT_EN, &current->thread.gmap_pfault)) {
+				/* FAULT_FLAG_RETRY_NOWAIT has been set,
+				 * mmap_sem has not been released */
+				/* TODO use access to distinguish fault type */
+				set_bit(PFAULT_PEND,
+					&current->thread.gmap_pfault);
+				fault = VM_FAULT_PFAULT;
+				goto out_up;
+			}
+#endif
 			/* Clear FAULT_FLAG_ALLOW_RETRY to avoid any risk
 			 * of starvation. */
-			flags &= ~FAULT_FLAG_ALLOW_RETRY;
+			flags &= ~(FAULT_FLAG_ALLOW_RETRY |
+				   FAULT_FLAG_RETRY_NOWAIT);
 			flags |= FAULT_FLAG_TRIED;
 			down_read(&mm->mmap_sem);
 			goto retry;
diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c
index 5fb7f19..14d067d 100644
--- a/arch/s390/mm/pgtable.c
+++ b/arch/s390/mm/pgtable.c
@@ -540,6 +540,7 @@ unsigned long __gmap_fault(unsigned long address, struct gmap *gmap)
 	int rc;
 
 	current->thread.gmap_addr = address;
+	clear_bit(PFAULT_PEND, &current->thread.gmap_pfault);
 	segment_ptr = gmap_table_walk(address, gmap);
 	if (IS_ERR(segment_ptr))
 		return -EFAULT;
-- 
1.8.1.6

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ