linux-kernel - Re: [PATCH v9 07/18] x86/virt/tdx: Do TDX module per-cpu initialization

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <Y+yi4B+P9K2FXNqt@hirez.programming.kicks-ass.net>
Date:   Wed, 15 Feb 2023 10:16:16 +0100
From:   Peter Zijlstra <peterz@...radead.org>
To:     "Huang, Kai" <kai.huang@...el.com>
Cc:     "kvm@...r.kernel.org" <kvm@...r.kernel.org>,
        "Hansen, Dave" <dave.hansen@...el.com>,
        "david@...hat.com" <david@...hat.com>,
        "bagasdotme@...il.com" <bagasdotme@...il.com>,
        "ak@...ux.intel.com" <ak@...ux.intel.com>,
        "Wysocki, Rafael J" <rafael.j.wysocki@...el.com>,
        "kirill.shutemov@...ux.intel.com" <kirill.shutemov@...ux.intel.com>,
        "Chatre, Reinette" <reinette.chatre@...el.com>,
        "Christopherson,, Sean" <seanjc@...gle.com>,
        "pbonzini@...hat.com" <pbonzini@...hat.com>,
        "tglx@...utronix.de" <tglx@...utronix.de>,
        "Yamahata, Isaku" <isaku.yamahata@...el.com>,
        "linux-kernel@...r.kernel.org" <linux-kernel@...r.kernel.org>,
        "linux-mm@...ck.org" <linux-mm@...ck.org>,
        "Luck, Tony" <tony.luck@...el.com>,
        "Shahar, Sagi" <sagis@...gle.com>,
        "imammedo@...hat.com" <imammedo@...hat.com>,
        "Gao, Chao" <chao.gao@...el.com>,
        "Brown, Len" <len.brown@...el.com>,
        "sathyanarayanan.kuppuswamy@...ux.intel.com" 
        <sathyanarayanan.kuppuswamy@...ux.intel.com>,
        "Huang, Ying" <ying.huang@...el.com>,
        "Williams, Dan J" <dan.j.williams@...el.com>,
        Tejun Heo <tj@...nel.org>
Subject: Re: [PATCH v9 07/18] x86/virt/tdx: Do TDX module per-cpu
 initialization

On Tue, Feb 14, 2023 at 10:53:26PM +0000, Huang, Kai wrote:

> Sure.  I just tried to do.  There are two minor things:
> 
> 1) should I just use smp_cond_func_t directly as the cond function?

Yeah, might as well I suppose...

> 2) schedule_on_each_cpu() takes cpus_read_lock() internally.  However in my
> case, tdx_enable() already takes that so I need a _locked_ version.
> 
> How does below look like? (Not tested)
> 
> +/**
> + * schedule_on_each_cpu_cond_locked - execute a function synchronously
> + *                                   on each online CPU for which the
> + *                                   condition function returns positive
> + * @func:      the function to call
> + * @cond_func:  the condition function to call
> + * @cond_data: the data passed to the condition function
> + *
> + * schedule_on_each_cpu_cond_locked() executes @func on each online CPU
> + * when @cond_func returns positive for that cpu, using the system
> + * workqueue and blocks until all CPUs have completed.
> + *
> + * schedule_on_each_cpu_cond_locked() doesn't hold read lock of CPU
> + * hotplug lock but depend on the caller to do.
> + *
> + * schedule_on_each_cpu_cond_locked() is very slow.
> + *
> + * Return:
> + * 0 on success, -errno on failure.
> + */
> +int schedule_on_each_cpu_cond_locked(work_func_t func,
> +                                    smp_cond_func_t cond_func,
> +                                    void *cond_data)
> +{
> +       int cpu;
> +       struct work_struct __percpu *works;
> +
> +       works = alloc_percpu(struct work_struct);
> +       if (!works)
> +               return -ENOMEM;
> +
> +       for_each_online_cpu(cpu) {
> +               struct work_struct *work = per_cpu_ptr(works, cpu);
> +
> +               if (cond_func && !cond_func(cpu, cond_data))
> +                       continue;
> +
> +               INIT_WORK(work, func);
> +               schedule_work_on(cpu, work);
> +       }
> +
> +       for_each_online_cpu(cpu)

I think you need to skip some flushes too. Given we skip setting
work->func, this will go WARN, see __flush_work().

> +               flush_work(per_cpu_ptr(works, cpu));
> +
> +       free_percpu(works);
> +       return 0;
> +}
> +
> +/**
> + * schedule_on_each_cpu_cond - execute a function synchronously on each
> + *                            online CPU for which the condition
> + *                            function returns positive
> + * @func:      the function to call
> + * @cond_func:  the condition function to call
> + * @cond_data: the data passed to the condition function
> + *
> + * schedule_on_each_cpu_cond() executes @func on each online CPU
> + * when @cond_func returns positive for that cpu, using the system
> + * workqueue and blocks until all CPUs have completed.
> + *
> + * schedule_on_each_cpu_cond() is very slow.
> + *
> + * Return:
> + * 0 on success, -errno on failure.
> + */
> +int schedule_on_each_cpu_cond(work_func_t func,
> +                             smp_cond_func_t cond_func,
> +                             void *cond_data)
> +{
> +       int ret;
> +
> +       cpus_read_lock();
> +
> +       ret = schedule_on_each_cpu_cond_locked(func, cond_func, cond_data);
> +
> +       cpus_read_unlock();
> +
> +       return ret;
> +}

Also, re-implement schedule_on_each_cpu() using the above to save a
bunch of duplication:

int schedule_on_each_cpu(work_func_t func)
{
	return schedule_on_each_cpu_cond(func, NULL, NULL);
}


That said, I find it jarring that the schedule_on*() family doesn't have
a void* argument to the function, like the smp_*() family has. So how
about something like the below (equally untested). It preserves the
current semantics, but allows a work function to cast to schedule_work
and access ->info if it so desires.


diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index a0143dd24430..5e97111322b2 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -103,6 +103,11 @@ struct work_struct {
 #endif
 };
 
+struct schedule_work {
+	struct work_struct work;
+	void *info;
+};
+
 #define WORK_DATA_INIT()	ATOMIC_LONG_INIT((unsigned long)WORK_STRUCT_NO_POOL)
 #define WORK_DATA_STATIC_INIT()	\
 	ATOMIC_LONG_INIT((unsigned long)(WORK_STRUCT_NO_POOL | WORK_STRUCT_STATIC))
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 07895deca271..c73bb8860bbc 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -51,6 +51,7 @@
 #include <linux/sched/isolation.h>
 #include <linux/nmi.h>
 #include <linux/kvm_para.h>
+#include <linux/smp.h>
 
 #include "workqueue_internal.h"
 
@@ -3302,43 +3303,64 @@ bool cancel_delayed_work_sync(struct delayed_work *dwork)
 }
 EXPORT_SYMBOL(cancel_delayed_work_sync);
 
-/**
- * schedule_on_each_cpu - execute a function synchronously on each online CPU
- * @func: the function to call
- *
- * schedule_on_each_cpu() executes @func on each online CPU using the
- * system workqueue and blocks until all CPUs have completed.
- * schedule_on_each_cpu() is very slow.
- *
- * Return:
- * 0 on success, -errno on failure.
- */
-int schedule_on_each_cpu(work_func_t func)
+int schedule_on_each_cpu_cond_locked(work_func_t func, smp_cond_func_t cond_func, void *info)
 {
+	struct schedule_work __percpu *works;
 	int cpu;
-	struct work_struct __percpu *works;
 
-	works = alloc_percpu(struct work_struct);
+	works = alloc_percpu(struct schedule_work);
 	if (!works)
 		return -ENOMEM;
 
-	cpus_read_lock();
-
 	for_each_online_cpu(cpu) {
-		struct work_struct *work = per_cpu_ptr(works, cpu);
+		struct schedule_work *work = per_cpu_ptr(works, cpu);
 
-		INIT_WORK(work, func);
-		schedule_work_on(cpu, work);
+		if (cond_func && !cond_func(cpu, info))
+			continue;
+
+		INIT_WORK(&work->work, func);
+		work->info = info;
+		schedule_work_on(cpu, &work->work);
 	}
 
-	for_each_online_cpu(cpu)
-		flush_work(per_cpu_ptr(works, cpu));
+	for_each_online_cpu(cpu) {
+		struct schedule_work *work = per_cpu_ptr(works, cpu);
+
+		if (work->work.func)
+			flush_work(&work->work);
+	}
 
-	cpus_read_unlock();
 	free_percpu(works);
 	return 0;
 }
 
+int schedule_on_each_cpu_cond(work_func_t func, smp_cond_func_t cond_func, void *info)
+{
+	int ret;
+
+	cpus_read_lock();
+	ret = schedule_on_each_cpu_cond_locked(func, cond, info);
+	cpus_read_unlock();
+
+	return ret;
+}
+
+/**
+ * schedule_on_each_cpu - execute a function synchronously on each online CPU
+ * @func: the function to call
+ *
+ * schedule_on_each_cpu() executes @func on each online CPU using the
+ * system workqueue and blocks until all CPUs have completed.
+ * schedule_on_each_cpu() is very slow.
+ *
+ * Return:
+ * 0 on success, -errno on failure.
+ */
+int schedule_on_each_cpu(work_func_t func)
+{
+	return schedule_on_each_cpu_cond(func, NULL, NULL);
+}
+
 /**
  * execute_in_process_context - reliably execute the routine with user context
  * @fn:		the function to execute