[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <fedcc89c-6b61-343a-7b0b-f1924b3cf5fb@bytedance.com>
Date: Thu, 20 Oct 2022 17:14:40 +0800
From: Chuyi Zhou <zhouchuyi@...edance.com>
To: Benjamin Segall <bsegall@...gle.com>
Cc: linux-kernel@...r.kernel.org
Subject: Re: [External] Re: [RESEND] sched/fair: Add min_ratio for cfs
bandwidth_control
在 2022/10/20 05:01, Benjamin Segall 写道:
> Chuyi Zhou <zhouchuyi@...edance.com> writes:
>
>> Tasks may be throttled when holding locks for a long time by current
>> cfs bandwidth control mechanism once users set a too small quota/period
>> ratio, which can result whole system get stuck[1].
>>
>> In order to prevent the above situation from happening, this patch adds
>> sysctl_sched_cfs_bandwidth_min_ratio in /proc/sys/kernel, which indicates
>> the minimum percentage of quota/period users can set. The default value is
>> zero and users can set quota and period without triggering this
>> constraint.
>
>
> There's so many other sorts of bad inputs that can get you stuck here
> that I'm not sure it's ever safe against lockups to provide direct write
> access to an untrusted user. I'm not totally opposed but it seems like
> an incomplete fix to a broken (non-default) configuration.
>
>
Thanks for your advice.
Chuyi Zhou
>>
>> Link[1]:https://lore.kernel.org/lkml/5987be34-b527-4ff5-a17d-5f6f0dc94d6d@huawei.com/T/
>> Signed-off-by: Chuyi Zhou <zhouchuyi@...edance.com>
>> Suggested-by: Abel Wu <wuyun.abel@...edance.com>
>> ---
>> include/linux/sched/sysctl.h | 4 ++++
>> kernel/sched/core.c | 23 +++++++++++++++++++++++
>> kernel/sysctl.c | 10 ++++++++++
>> 3 files changed, 37 insertions(+)
>>
>> diff --git a/include/linux/sched/sysctl.h b/include/linux/sched/sysctl.h
>> index 303ee7dd0c7e..dedb18648f0e 100644
>> --- a/include/linux/sched/sysctl.h
>> +++ b/include/linux/sched/sysctl.h
>> @@ -21,6 +21,10 @@ enum sched_tunable_scaling {
>> SCHED_TUNABLESCALING_END,
>> };
>>
>> +#ifdef CONFIG_CFS_BANDWIDTH
>> +extern unsigned int sysctl_sched_cfs_bandwidth_min_ratio;
>> +#endif
>> +
>> #define NUMA_BALANCING_DISABLED 0x0
>> #define NUMA_BALANCING_NORMAL 0x1
>> #define NUMA_BALANCING_MEMORY_TIERING 0x2
>> diff --git a/kernel/sched/core.c b/kernel/sched/core.c
>> index 5800b0623ff3..8f6cfd889e37 100644
>> --- a/kernel/sched/core.c
>> +++ b/kernel/sched/core.c
>> @@ -10504,6 +10504,12 @@ static u64 cpu_shares_read_u64(struct cgroup_subsys_state *css,
>> }
>>
>> #ifdef CONFIG_CFS_BANDWIDTH
>> +/*
>> + * The minimum of quota/period ratio users can set, default is zero and users can set
>> + * quota and period without triggering this constraint.
>> + */
>> +unsigned int sysctl_sched_cfs_bandwidth_min_ratio;
>> +
>> static DEFINE_MUTEX(cfs_constraints_mutex);
>>
>> const u64 max_cfs_quota_period = 1 * NSEC_PER_SEC; /* 1s */
>> @@ -10513,6 +10519,20 @@ static const u64 max_cfs_runtime = MAX_BW * NSEC_PER_USEC;
>>
>> static int __cfs_schedulable(struct task_group *tg, u64 period, u64 runtime);
>>
>> +static int check_cfs_bandwidth_min_ratio(u64 period, u64 quota)
>> +{
>> + u64 ratio;
>> +
>> + if (!sysctl_sched_cfs_bandwidth_min_ratio)
>> + return 0;
>> +
>> + ratio = div64_u64(quota * 100, period);
>> + if (ratio < sysctl_sched_cfs_bandwidth_min_ratio)
>> + return -1;
>> +
>> + return 0;
>> +}
>> +
>> static int tg_set_cfs_bandwidth(struct task_group *tg, u64 period, u64 quota,
>> u64 burst)
>> {
>> @@ -10548,6 +10568,9 @@ static int tg_set_cfs_bandwidth(struct task_group *tg, u64 period, u64 quota,
>> burst + quota > max_cfs_runtime))
>> return -EINVAL;
>>
>> + if (quota != RUNTIME_INF && check_cfs_bandwidth_min_ratio(period, quota))
>> + return -EINVAL;
>> +
>> /*
>> * Prevent race between setting of cfs_rq->runtime_enabled and
>> * unthrottle_offline_cfs_rqs().
>> diff --git a/kernel/sysctl.c b/kernel/sysctl.c
>> index 188c305aeb8b..7d9743e8e514 100644
>> --- a/kernel/sysctl.c
>> +++ b/kernel/sysctl.c
>> @@ -1652,6 +1652,16 @@ static struct ctl_table kern_table[] = {
>> .extra1 = SYSCTL_ZERO,
>> },
>> #endif /* CONFIG_NUMA_BALANCING */
>> +#ifdef CONFIG_CFS_BANDWIDTH
>> + {
>> + .procname = "sched_cfs_bandwidth_min_ratio",
>> + .data = &sysctl_sched_cfs_bandwidth_min_ratio,
>> + .maxlen = sizeof(unsigned int),
>> + .mode = 0644,
>> + .proc_handler = proc_dointvec_minmax,
>> + .extra1 = SYSCTL_ZERO,
>> + },
>> +#endif /* CONFIG_CFS_BANDWIDTH */
>> {
>> .procname = "panic",
>> .data = &panic_timeout,
Powered by blists - more mailing lists