lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <YqzyjZnflCMPo8b/@google.com>
Date:   Fri, 17 Jun 2022 21:30:53 +0000
From:   Sean Christopherson <seanjc@...gle.com>
To:     Chao Peng <chao.p.peng@...ux.intel.com>
Cc:     kvm@...r.kernel.org, linux-kernel@...r.kernel.org,
        linux-mm@...ck.org, linux-fsdevel@...r.kernel.org,
        linux-api@...r.kernel.org, linux-doc@...r.kernel.org,
        qemu-devel@...gnu.org, Paolo Bonzini <pbonzini@...hat.com>,
        Jonathan Corbet <corbet@....net>,
        Vitaly Kuznetsov <vkuznets@...hat.com>,
        Wanpeng Li <wanpengli@...cent.com>,
        Jim Mattson <jmattson@...gle.com>,
        Joerg Roedel <joro@...tes.org>,
        Thomas Gleixner <tglx@...utronix.de>,
        Ingo Molnar <mingo@...hat.com>, Borislav Petkov <bp@...en8.de>,
        x86@...nel.org, "H . Peter Anvin" <hpa@...or.com>,
        Hugh Dickins <hughd@...gle.com>,
        Jeff Layton <jlayton@...nel.org>,
        "J . Bruce Fields" <bfields@...ldses.org>,
        Andrew Morton <akpm@...ux-foundation.org>,
        Mike Rapoport <rppt@...nel.org>,
        Steven Price <steven.price@....com>,
        "Maciej S . Szmigiero" <mail@...iej.szmigiero.name>,
        Vlastimil Babka <vbabka@...e.cz>,
        Vishal Annapurve <vannapurve@...gle.com>,
        Yu Zhang <yu.c.zhang@...ux.intel.com>,
        "Kirill A . Shutemov" <kirill.shutemov@...ux.intel.com>,
        luto@...nel.org, jun.nakajima@...el.com, dave.hansen@...el.com,
        ak@...ux.intel.com, david@...hat.com, aarcange@...hat.com,
        ddutile@...hat.com, dhildenb@...hat.com,
        Quentin Perret <qperret@...gle.com>,
        Michael Roth <michael.roth@....com>, mhocko@...e.com
Subject: Re: [PATCH v6 6/8] KVM: Handle page fault for private memory

On Thu, May 19, 2022, Chao Peng wrote:
> @@ -4028,8 +4081,11 @@ static bool is_page_fault_stale(struct kvm_vcpu *vcpu,
>  	if (!sp && kvm_test_request(KVM_REQ_MMU_FREE_OBSOLETE_ROOTS, vcpu))
>  		return true;
>  
> -	return fault->slot &&
> -	       mmu_notifier_retry_hva(vcpu->kvm, mmu_seq, fault->hva);
> +	if (fault->is_private)
> +		return mmu_notifier_retry(vcpu->kvm, mmu_seq);

Hmm, this is somewhat undesirable, because faulting in private pfns will be blocked
by unrelated mmu_notifier updates.  The issue is mitigated to some degree by bumping
the sequence count if and only if overlap with a memslot is detected, e.g. mapping
changes that affects only userspace won't block the guest.

It probably won't be an issue, but at the same time it's easy to solve, and I don't
like piggybacking mmu_notifier_seq as private mappings shouldn't be subject to the
mmu_notifier.

That would also fix a theoretical bug in this patch where mmu_notifier_retry()
wouldn't be defined if CONFIG_MEMFILE_NOTIFIER=y && CONFIG_MMU_NOTIFIER=n.a

---
 arch/x86/kvm/mmu/mmu.c   | 11 ++++++-----
 include/linux/kvm_host.h | 16 +++++++++++-----
 virt/kvm/kvm_main.c      |  2 +-
 3 files changed, 18 insertions(+), 11 deletions(-)

diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index 0b455c16ec64..a4cbd29433e7 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -4100,10 +4100,10 @@ static bool is_page_fault_stale(struct kvm_vcpu *vcpu,
 		return true;

 	if (fault->is_private)
-		return mmu_notifier_retry(vcpu->kvm, mmu_seq);
-	else
-		return fault->slot &&
-			mmu_notifier_retry_hva(vcpu->kvm, mmu_seq, fault->hva);
+		return memfile_notifier_retry(vcpu->kvm, mmu_seq);
+
+	return fault->slot &&
+	       mmu_notifier_retry_hva(vcpu->kvm, mmu_seq, fault->hva);
 }

 static int direct_page_fault(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault)
@@ -4127,7 +4127,8 @@ static int direct_page_fault(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault
 	if (r)
 		return r;

-	mmu_seq = vcpu->kvm->mmu_notifier_seq;
+	mmu_seq = fault->is_private ? vcpu->kvm->memfile_notifier_seq :
+				      vcpu->kvm->mmu_notifier_seq;
 	smp_rmb();

 	r = kvm_faultin_pfn(vcpu, fault);
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 92afa5bddbc5..31f704c83099 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -773,16 +773,15 @@ struct kvm {
 	struct hlist_head irq_ack_notifier_list;
 #endif

-#if (defined(CONFIG_MMU_NOTIFIER) && defined(KVM_ARCH_WANT_MMU_NOTIFIER)) ||\
-	defined(CONFIG_MEMFILE_NOTIFIER)
+#if (defined(CONFIG_MMU_NOTIFIER) && defined(KVM_ARCH_WANT_MMU_NOTIFIER))
 	unsigned long mmu_notifier_seq;
-#endif
-
-#if defined(CONFIG_MMU_NOTIFIER) && defined(KVM_ARCH_WANT_MMU_NOTIFIER)
 	struct mmu_notifier mmu_notifier;
 	long mmu_notifier_count;
 	unsigned long mmu_notifier_range_start;
 	unsigned long mmu_notifier_range_end;
+#endif
+#ifdef CONFIG_MEMFILE_NOTIFIER
+	unsigned long memfile_notifier_seq;
 #endif
 	struct list_head devices;
 	u64 manual_dirty_log_protect;
@@ -1964,6 +1963,13 @@ static inline int mmu_notifier_retry_hva(struct kvm *kvm,
 }
 #endif

+#ifdef CONFIG_MEMFILE_NOTIFIER
+static inline bool memfile_notifier_retry(struct kvm *kvm, unsigned long mmu_seq)
+{
+	return kvm->memfile_notifier_seq != mmu_seq;
+}
+#endif
+
 #ifdef CONFIG_HAVE_KVM_IRQ_ROUTING

 #define KVM_MAX_IRQ_ROUTES 4096 /* might need extension/rework in the future */
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 2b416d3bd60e..e6d34c964d51 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -898,7 +898,7 @@ static void kvm_private_mem_notifier_handler(struct memfile_notifier *notifier,
 	KVM_MMU_LOCK(kvm);
 	if (kvm_unmap_gfn_range(kvm, &gfn_range))
 		kvm_flush_remote_tlbs(kvm);
-	kvm->mmu_notifier_seq++;
+	kvm->memfile_notifier_seq++;
 	KVM_MMU_UNLOCK(kvm);
 	srcu_read_unlock(&kvm->srcu, idx);
 }

base-commit: 333ef501c7f6c6d4ef2b7678905cad0f8ef3e271
--

> +	else
> +		return fault->slot &&
> +			mmu_notifier_retry_hva(vcpu->kvm, mmu_seq, fault->hva);
>  }
>  
>  static int direct_page_fault(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault)
> @@ -4088,7 +4144,12 @@ static int direct_page_fault(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault
>  		read_unlock(&vcpu->kvm->mmu_lock);
>  	else
>  		write_unlock(&vcpu->kvm->mmu_lock);
> -	kvm_release_pfn_clean(fault->pfn);
> +
> +	if (fault->is_private)
> +		kvm_private_mem_put_pfn(fault->slot, fault->pfn);

Why does the shmem path lock the page, and then unlock it here?

Same question for why this path marks it dirty?  The guest has the page mapped
so the dirty flag is immediately stale.

In other words, why does KVM need to do something different for private pfns?

> +	else
> +		kvm_release_pfn_clean(fault->pfn);
> +
>  	return r;
>  }
>  

...

> diff --git a/arch/x86/kvm/mmu/paging_tmpl.h b/arch/x86/kvm/mmu/paging_tmpl.h
> index 7f8f1c8dbed2..1d857919a947 100644
> --- a/arch/x86/kvm/mmu/paging_tmpl.h
> +++ b/arch/x86/kvm/mmu/paging_tmpl.h
> @@ -878,7 +878,10 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault
>  
>  out_unlock:
>  	write_unlock(&vcpu->kvm->mmu_lock);
> -	kvm_release_pfn_clean(fault->pfn);
> +	if (fault->is_private)

Indirect MMUs can't support private faults, i.e. this is unnecessary.

> +		kvm_private_mem_put_pfn(fault->slot, fault->pfn);
> +	else
> +		kvm_release_pfn_clean(fault->pfn);
>  	return r;
>  }
>  
> diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
> index 3fd168972ecd..b0a7910505ed 100644
> --- a/include/linux/kvm_host.h
> +++ b/include/linux/kvm_host.h
> @@ -2241,4 +2241,26 @@ static inline void kvm_handle_signal_exit(struct kvm_vcpu *vcpu)
>  /* Max number of entries allowed for each kvm dirty ring */
>  #define  KVM_DIRTY_RING_MAX_ENTRIES  65536
>  
> +#ifdef CONFIG_HAVE_KVM_PRIVATE_MEM
> +static inline int kvm_private_mem_get_pfn(struct kvm_memory_slot *slot,
> +					  gfn_t gfn, kvm_pfn_t *pfn, int *order)
> +{
> +	int ret;
> +	pfn_t pfnt;
> +	pgoff_t index = gfn - slot->base_gfn +
> +			(slot->private_offset >> PAGE_SHIFT);
> +
> +	ret = slot->notifier.bs->get_lock_pfn(slot->private_file, index, &pfnt,
> +						order);
> +	*pfn = pfn_t_to_pfn(pfnt);
> +	return ret;
> +}
> +
> +static inline void kvm_private_mem_put_pfn(struct kvm_memory_slot *slot,
> +					   kvm_pfn_t pfn)
> +{
> +	slot->notifier.bs->put_unlock_pfn(pfn_to_pfn_t(pfn));
> +}
> +#endif /* CONFIG_HAVE_KVM_PRIVATE_MEM */
> +
>  #endif
> -- 
> 2.25.1
> 

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ