[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <28bd9d11-282f-bb22-66f5-d3d9165d4adf@gmail.com>
Date: Fri, 2 Jun 2023 14:06:53 +0800
From: Robert Hoo <robert.hoo.linux@...il.com>
To: Sean Christopherson <seanjc@...gle.com>,
Paolo Bonzini <pbonzini@...hat.com>
Cc: kvm@...r.kernel.org, linux-kernel@...r.kernel.org,
Li RongQing <lirongqing@...du.com>,
Yong He <zhuangel570@...il.com>,
Kai Huang <kai.huang@...el.com>
Subject: Re: [PATCH] KVM: x86/mmu: Add "never" option to allow sticky
disabling of nx_huge_pages
On 6/2/2023 8:58 AM, Sean Christopherson wrote:
> Add a "never" option to the nx_huge_pages module param to allow userspace
> to do a one-way hard disabling of the mitigation, and don't create the
> per-VM recovery threads when the mitigation is hard disabled. Letting
> userspace pinky swear that userspace doesn't want to enable NX mitigation
> (without reloading KVM) allows certain use cases to avoid the latency
> problems associated with spawning a kthread for each VM.
>
> E.g. in FaaS use cases, the guest kernel is trusted and the host may
> create 100+ VMs per logical CPU, which can result in 100ms+ latencies when
> a burst of VMs is created.
>
> Reported-by: Li RongQing <lirongqing@...du.com>
> Closes: https://lore.kernel.org/all/1679555884-32544-1-git-send-email-lirongqing@baidu.com
> Cc: Yong He <zhuangel570@...il.com>
> Cc: Robert Hoo <robert.hoo.linux@...il.com>
> Cc: Kai Huang <kai.huang@...el.com>
> Signed-off-by: Sean Christopherson <seanjc@...gle.com>
> ---
> arch/x86/kvm/mmu/mmu.c | 41 ++++++++++++++++++++++++++++++++++++-----
> 1 file changed, 36 insertions(+), 5 deletions(-)
>
> diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
> index c8961f45e3b1..2ed38916b904 100644
> --- a/arch/x86/kvm/mmu/mmu.c
> +++ b/arch/x86/kvm/mmu/mmu.c
> @@ -58,6 +58,8 @@
>
> extern bool itlb_multihit_kvm_mitigation;
>
> +static bool nx_hugepage_mitigation_hard_disabled;
> +
> int __read_mostly nx_huge_pages = -1;
> static uint __read_mostly nx_huge_pages_recovery_period_ms;
> #ifdef CONFIG_PREEMPT_RT
> @@ -67,12 +69,13 @@ static uint __read_mostly nx_huge_pages_recovery_ratio = 0;
> static uint __read_mostly nx_huge_pages_recovery_ratio = 60;
> #endif
>
> +static int get_nx_huge_pages(char *buffer, const struct kernel_param *kp);
> static int set_nx_huge_pages(const char *val, const struct kernel_param *kp);
> static int set_nx_huge_pages_recovery_param(const char *val, const struct kernel_param *kp);
>
> static const struct kernel_param_ops nx_huge_pages_ops = {
> .set = set_nx_huge_pages,
> - .get = param_get_bool,
> + .get = get_nx_huge_pages,
> };
>
> static const struct kernel_param_ops nx_huge_pages_recovery_param_ops = {
> @@ -6844,6 +6847,14 @@ static void mmu_destroy_caches(void)
> kmem_cache_destroy(mmu_page_header_cache);
> }
>
> +static int get_nx_huge_pages(char *buffer, const struct kernel_param *kp)
> +{
> + if (nx_hugepage_mitigation_hard_disabled)
> + return sprintf(buffer, "never\n");
> +
> + return param_get_bool(buffer, kp);
> +}
> +
> static bool get_nx_auto_mode(void)
> {
> /* Return true when CPU has the bug, and mitigations are ON */
> @@ -6860,15 +6871,29 @@ static int set_nx_huge_pages(const char *val, const struct kernel_param *kp)
> bool old_val = nx_huge_pages;
> bool new_val;
>
> + if (nx_hugepage_mitigation_hard_disabled)
> + return -EPERM;
> +
> /* In "auto" mode deploy workaround only if CPU has the bug. */
> - if (sysfs_streq(val, "off"))
> + if (sysfs_streq(val, "off")) {
> new_val = 0;
> - else if (sysfs_streq(val, "force"))
> + } else if (sysfs_streq(val, "force")) {
> new_val = 1;
> - else if (sysfs_streq(val, "auto"))
> + } else if (sysfs_streq(val, "auto")) {
> new_val = get_nx_auto_mode();
> - else if (kstrtobool(val, &new_val) < 0)
> + } else if (sysfs_streq(val, "never")) {
> + new_val = 0;
> +
> + mutex_lock(&kvm_lock);
> + if (!list_empty(&vm_list)) {
> + mutex_unlock(&kvm_lock);
> + return -EBUSY;
> + }
> + nx_hugepage_mitigation_hard_disabled = true;
> + mutex_unlock(&kvm_lock);
> + } else if (kstrtobool(val, &new_val) < 0) {
> return -EINVAL;
> + }
>
IIUC, (Initially) "auto_off"/"off" --> create some VM --> turn to "never",
the created VMs still have those kthreads, but can never be used, until
destroyed with VM.
Is this designed/accepted pattern?
> __set_nx_huge_pages(new_val);
>
> @@ -7006,6 +7031,9 @@ static int set_nx_huge_pages_recovery_param(const char *val, const struct kernel
> uint old_period, new_period;
> int err;
>
> + if (nx_hugepage_mitigation_hard_disabled)
> + return -EPERM;
> +
> was_recovery_enabled = calc_nx_huge_pages_recovery_period(&old_period);
>
> err = param_set_uint(val, kp);
> @@ -7161,6 +7189,9 @@ int kvm_mmu_post_init_vm(struct kvm *kvm)
> {
> int err;
>
> + if (nx_hugepage_mitigation_hard_disabled)
> + return 0;
> +
> err = kvm_vm_create_worker_thread(kvm, kvm_nx_huge_page_recovery_worker, 0,
> "kvm-nx-lpage-recovery",
> &kvm->arch.nx_huge_page_recovery_thread);
>
> base-commit: 39428f6ea9eace95011681628717062ff7f5eb5f
Powered by blists - more mailing lists