[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <op.2cardibgwjvjmi@hhuan26-mobl.amr.corp.intel.com>
Date: Wed, 04 Oct 2023 10:45:08 -0500
From: "Haitao Huang" <haitao.huang@...ux.intel.com>
To: "Jarkko Sakkinen" <jarkko@...nel.org>, dave.hansen@...ux.intel.com,
tj@...nel.org, linux-kernel@...r.kernel.org,
linux-sgx@...r.kernel.org, x86@...nel.org, cgroups@...r.kernel.org,
tglx@...utronix.de, mingo@...hat.com, bp@...en8.de, hpa@...or.com,
sohil.mehta@...el.com
Cc: zhiquan1.li@...el.com, kristen@...ux.intel.com, seanjc@...gle.com,
zhanb@...rosoft.com, anakrish@...rosoft.com,
mikko.ylinen@...ux.intel.com, yangjie@...rosoft.com
Subject: Re: [PATCH v5 01/18] cgroup/misc: Add per resource callbacks for CSS
events
Hi Jarkko
On Mon, 02 Oct 2023 17:55:14 -0500, Jarkko Sakkinen <jarkko@...nel.org>
wrote:
...
>> > >> I noticed this one later:
>> > >>
>> > >> It would better to create a separate ops struct and declare the
>> instance
>> > >> as const at minimum.
>> > >>
>> > >> Then there is no need for dynamic assigment of ops and all that is
>> in
>> > >> rodata. This is improves both security and also allows static
>> analysis
>> > >> bit better.
>> > >>
>> > >> Now you have to dynamically trace the struct instance, e.g. in
>> case of
>> > >> a bug. If this one done, it would be already in the vmlinux.
>> > >I.e. then in the driver you can have static const struct declaration
>> > > with *all* pointers pre-assigned.
>> > >
>> > > Not sure if cgroups follows this or not but it is *objectively*
>> > > better. Previous work is not always best possible work...
>> > >
>> >
>> > IIUC, like vm_ops field in vma structs. Although function pointers in
>> > vm_ops are assigned statically, but you still need dynamically assign
>> > vm_ops for each instance of vma.
>> >
>> > So the code will look like this:
>> >
>> > if (parent_cg->res[i].misc_ops && parent_cg->res[i].misc_ops->alloc)
>> > {
>> > ...
>> > }
>> >
>> > I don't see this is the pattern used in cgroups and no strong opinion
>> > either way.
>> >
>> > TJ, do you have preference on this?
>>
>> I do have strong opinion on this. In the client side we want as much
>> things declared statically as we can because it gives more tools for
>> statical analysis.
>>
>> I don't want to see dynamic assignments in the SGX driver, when they
>> are not actually needed, no matter things are done in cgroups.
>
> I.e. I don't really even care what crazy things cgroups subsystem
> might do or not do. It's not my problem.
>
> All I care is that we *do not* have any use for assigning those
> pointers at run-time. So do whatever you want with cgroups side
> as long as this is not the case.
>
So I will update to something like following. Let me know if that's
correct understanding.
@tj, I'd appreciate for your input on whether this is acceptable from
cgroups side.
--- a/include/linux/misc_cgroup.h
+++ b/include/linux/misc_cgroup.h
@@ -31,22 +31,26 @@ struct misc_cg;
#include <linux/cgroup.h>
+/* per resource callback ops */
+struct misc_operations_struct {
+ int (*alloc)(struct misc_cg *cg);
+ void (*free)(struct misc_cg *cg);
+ void (*max_write)(struct misc_cg *cg);
+};
/**
* struct misc_res: Per cgroup per misc type resource
* @max: Maximum limit on the resource.
* @usage: Current usage of the resource.
* @events: Number of times, the resource limit exceeded.
+ * @priv: resource specific data.
+ * @misc_ops: resource specific operations.
*/
struct misc_res {
u64 max;
atomic64_t usage;
atomic64_t events;
void *priv;
-
- /* per resource callback ops */
- int (*alloc)(struct misc_cg *cg);
- void (*free)(struct misc_cg *cg);
- void (*max_write)(struct misc_cg *cg);
+ const struct misc_operations_struct *misc_ops;
};
...
diff --git a/kernel/cgroup/misc.c b/kernel/cgroup/misc.c
index 4633b8629e63..500415087643 100644
--- a/kernel/cgroup/misc.c
+++ b/kernel/cgroup/misc.c
@@ -277,8 +277,8 @@ static ssize_t misc_cg_max_write(struct
kernfs_open_file *of, char *buf,
if (READ_ONCE(misc_res_capacity[type])) {
WRITE_ONCE(cg->res[type].max, max);
- if (cg->res[type].max_write)
- cg->res[type].max_write(cg);
+ if (cg->res[type].misc_ops &&
cg->res[type].misc_ops->max_write)
+ cg->res[type].misc_ops->max_write(cg);
[skip other similar changes in misc.c]
And on SGX side, it'll be updated like this:
--- a/arch/x86/kernel/cpu/sgx/epc_cgroup.c
+++ b/arch/x86/kernel/cpu/sgx/epc_cgroup.c
@@ -376,6 +376,14 @@ static void sgx_epc_cgroup_max_write(struct misc_cg
*cg)
queue_work(sgx_epc_cg_wq, &rc.epc_cg->reclaim_work);
}
+static int sgx_epc_cgroup_alloc(struct misc_cg *cg);
+
+const struct misc_operations_struct sgx_epc_cgroup_ops = {
+ .alloc = sgx_epc_cgroup_alloc,
+ .free = sgx_epc_cgroup_free,
+ .max_write = sgx_epc_cgroup_max_write,
+};
+
static int sgx_epc_cgroup_alloc(struct misc_cg *cg)
{
struct sgx_epc_cgroup *epc_cg;
@@ -386,12 +394,7 @@ static int sgx_epc_cgroup_alloc(struct misc_cg *cg)
sgx_lru_init(&epc_cg->lru);
INIT_WORK(&epc_cg->reclaim_work, sgx_epc_cgroup_reclaim_work_func);
- cg->res[MISC_CG_RES_SGX_EPC].alloc = sgx_epc_cgroup_alloc;
- cg->res[MISC_CG_RES_SGX_EPC].free = sgx_epc_cgroup_free;
- cg->res[MISC_CG_RES_SGX_EPC].max_write = sgx_epc_cgroup_max_write;
- cg->res[MISC_CG_RES_SGX_EPC].priv = epc_cg;
- epc_cg->cg = cg;
-
+ cg->res[MISC_CG_RES_SGX_EPC].misc_ops = &sgx_epc_cgroup_ops;
return 0;
}
Thanks again to all of you for feedback.
Haitao
Powered by blists - more mailing lists