[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <f1c4d09b81877bdcc16073afd70a48265ac5230f.camel@intel.com>
Date: Tue, 13 May 2025 21:58:41 +0000
From: "Edgecombe, Rick P" <rick.p.edgecombe@...el.com>
To: "pbonzini@...hat.com" <pbonzini@...hat.com>, "seanjc@...gle.com"
<seanjc@...gle.com>, "Zhao, Yan Y" <yan.y.zhao@...el.com>
CC: "Shutemov, Kirill" <kirill.shutemov@...el.com>, "quic_eberman@...cinc.com"
<quic_eberman@...cinc.com>, "Li, Xiaoyao" <xiaoyao.li@...el.com>,
"kvm@...r.kernel.org" <kvm@...r.kernel.org>, "Hansen, Dave"
<dave.hansen@...el.com>, "david@...hat.com" <david@...hat.com>,
"thomas.lendacky@....com" <thomas.lendacky@....com>, "tabba@...gle.com"
<tabba@...gle.com>, "Li, Zhiquan1" <zhiquan1.li@...el.com>, "Du, Fan"
<fan.du@...el.com>, "linux-kernel@...r.kernel.org"
<linux-kernel@...r.kernel.org>, "michael.roth@....com"
<michael.roth@....com>, "Weiny, Ira" <ira.weiny@...el.com>, "vbabka@...e.cz"
<vbabka@...e.cz>, "binbin.wu@...ux.intel.com" <binbin.wu@...ux.intel.com>,
"ackerleytng@...gle.com" <ackerleytng@...gle.com>, "Yamahata, Isaku"
<isaku.yamahata@...el.com>, "Peng, Chao P" <chao.p.peng@...el.com>,
"Annapurve, Vishal" <vannapurve@...gle.com>, "jroedel@...e.de"
<jroedel@...e.de>, "Miao, Jun" <jun.miao@...el.com>, "pgonda@...gle.com"
<pgonda@...gle.com>, "x86@...nel.org" <x86@...nel.org>
Subject: Re: [RFC PATCH 21/21] KVM: x86: Ignore splitting huge pages in fault
path for TDX
On Thu, 2025-04-24 at 11:09 +0800, Yan Zhao wrote:
> int tdx_sept_split_private_spt(struct kvm *kvm, gfn_t gfn, enum pg_level level,
> - void *private_spt)
> + void *private_spt, bool mmu_lock_shared)
> {
> struct page *page = virt_to_page(private_spt);
> int ret;
> @@ -1842,6 +1842,29 @@ int tdx_sept_split_private_spt(struct kvm *kvm, gfn_t gfn, enum pg_level level,
> if (KVM_BUG_ON(to_kvm_tdx(kvm)->state != TD_STATE_RUNNABLE || level != PG_LEVEL_2M, kvm))
> return -EINVAL;
>
> + /*
> + * Split request with mmu_lock held for reading can only occur when one
> + * vCPU accepts at 2MB level while another vCPU accepts at 4KB level.
> + * Ignore this 4KB mapping request by setting violation_request_level to
> + * 2MB and returning -EBUSY for retry. Then the next fault at 2MB level
> + * would be a spurious fault. The vCPU accepting at 2MB will accept the
> + * whole 2MB range.
> + */
> + if (mmu_lock_shared) {
> + struct kvm_vcpu *vcpu = kvm_get_running_vcpu();
> + struct vcpu_tdx *tdx = to_tdx(vcpu);
> +
> + if (KVM_BUG_ON(!vcpu, kvm))
> + return -EOPNOTSUPP;
> +
> + /* Request to map as 2MB leaf for the whole 2MB range */
> + tdx->violation_gfn_start = gfn_round_for_level(gfn, level);
> + tdx->violation_gfn_end = tdx->violation_gfn_start + KVM_PAGES_PER_HPAGE(level);
> + tdx->violation_request_level = level;
> +
> + return -EBUSY;
This is too hacky the way it infers so much from mmu_lock_shared. Since guests
shouldn't be doing this, what about just doing kvm_vm_dead(), with a little
pr_warn()? Maybe even just do it in set_external_spte_present() and declare it
the rule for external page tables. It can shrink this patch significantly, for
no expected user impact.
> + }
> +
> ret = tdx_sept_zap_private_spte(kvm, gfn, level, page);
> if (ret <= 0)
> return ret;
> diff --git a/arch/x86/kvm/vmx/x86_ops.h b/arch/x86/kvm/vmx/x86_ops.h
> index 0619e9390e5d..fcba76887508 100644
> --- a/arch/x86/kvm/vmx/x86_ops.h
> +++ b/arch/x86/kvm/vmx/x86_ops.h
> @@ -159,7 +159,7 @@ int tdx_sept_set_private_spte(struct kvm *kvm, gfn_t gfn,
> int tdx_sept_remove_private_spte(struct kvm *kvm, gfn_t gfn,
> enum pg_level level, kvm_pfn_t pfn);
> int tdx_sept_split_private_spt(struct kvm *kvm, gfn_t gfn, enum pg_level level,
> - void *private_spt);
> + void *private_spt, bool mmu_lock_shared);
>
> void tdx_flush_tlb_current(struct kvm_vcpu *vcpu);
> void tdx_flush_tlb_all(struct kvm_vcpu *vcpu);
> @@ -228,7 +228,8 @@ static inline int tdx_sept_remove_private_spte(struct kvm *kvm, gfn_t gfn,
>
> static inline int tdx_sept_split_private_spt(struct kvm *kvm, gfn_t gfn,
> enum pg_level level,
> - void *private_spt)
> + void *private_spt,
> + bool mmu_lock_shared)
> {
> return -EOPNOTSUPP;
> }
Powered by blists - more mailing lists