From 8fe3036b04a3a529dc500a9c880e23bcfd1daa42 Mon Sep 17 00:00:00 2001 From: "Gautham R. Shenoy" Date: Wed, 4 Feb 2026 10:08:07 +0000 Subject: [PATCH] sched/fair: Debug multiplication overflow Read the trace buffer whn dmesg says: EEVDF: Overflow (mul)! EEVDF: Overflow CPU/ per_cpu/cpu/trace should have all the information. Not-signed-off-by: K Prateek Nayak --- kernel/sched/fair.c | 83 +++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 81 insertions(+), 2 deletions(-) diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 6f1a86f7969a..15d521f795ff 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -670,6 +670,53 @@ __sum_w_vruntime_add(struct cfs_rq *cfs_rq, struct sched_entity *se) cfs_rq->sum_weight += weight; } +static DEFINE_PER_CPU(char[100], dump_padding); +u64 avg_vruntime(struct cfs_rq *cfs_rq); + +static void dump_h_overflow_entity(struct sched_entity *se, int depth, bool curr) +{ + char *padding = *this_cpu_ptr(&dump_padding); + + padding[depth] = '\0'; + trace_printk("%sse: weight(%lu) vruntime(%llu) slice(%llu) deadline(%llu) curr?(%d) task?(%d)\n", + padding, se->load.weight, se->vruntime, se->slice, se->deadline, curr, !entity_is_task(se)); +} + +static void dump_h_overflow_cfs_rq(struct cfs_rq *cfs_rq, int depth, bool rec) +{ + struct rb_node *left = rb_first_cached(&cfs_rq->tasks_timeline); + char *padding = *this_cpu_ptr(&dump_padding); + + padding[depth] = '\0'; + trace_printk("%scfs_rq: depth(%d) weight(%lu) nr_queued(%u) sum_w_vruntime(%lld) sum_weight(%llu) zero_vruntime(%llu) sum_shift(%u) avg_vruntime(%llu)\n", + padding, depth, cfs_rq->load.weight, cfs_rq->nr_queued, cfs_rq->sum_w_vruntime, cfs_rq->sum_weight, cfs_rq->zero_vruntime, cfs_rq->sum_shift, avg_vruntime(cfs_rq)); + + if (cfs_rq->curr) + dump_h_overflow_entity(cfs_rq->curr, depth, true); + + while (left) { + dump_h_overflow_entity(__node_2_se(left), depth, true); + left = rb_next(left); + } + + if (rec) { + padding[depth] = ' '; + if (cfs_rq->curr && !entity_is_task(cfs_rq->curr)) + dump_h_overflow_cfs_rq(group_cfs_rq(cfs_rq->curr), depth + 1, true); + + left = rb_first_cached(&cfs_rq->tasks_timeline); + while (left) { + struct sched_entity *se = __node_2_se(left); + + if (!entity_is_task(se)) + dump_h_overflow_cfs_rq(group_cfs_rq(se), depth + 1, true); + left = rb_next(left); + } + } + + padding[depth] = '\0'; +} + static void sum_w_vruntime_add_paranoid(struct cfs_rq *cfs_rq, struct sched_entity *se) { @@ -680,17 +727,32 @@ sum_w_vruntime_add_paranoid(struct cfs_rq *cfs_rq, struct sched_entity *se) weight = avg_vruntime_weight(cfs_rq, se->load.weight); key = entity_key(cfs_rq, se); - if (check_mul_overflow(key, weight, &key)) + if (check_mul_overflow(key, weight, &key)) { + pr_warn("EEVDF: Overflow (mul)!\n"); + pr_warn("EEVDF: Overflow CPU/%d\n", smp_processor_id()); goto overflow; + } - if (check_add_overflow(cfs_rq->sum_w_vruntime, key, &tmp)) + if (check_add_overflow(cfs_rq->sum_w_vruntime, key, &tmp)) { + pr_warn("EEVDF: Overflow (add)!\n"); + pr_warn("EEVDF: Overflow CPU/%d\n", smp_processor_id()); goto overflow; + } cfs_rq->sum_w_vruntime = tmp; cfs_rq->sum_weight += weight; return; overflow: + trace_printk("Overflowed cfs_rq:\n"); + dump_h_overflow_cfs_rq(cfs_rq, 0, false); + + trace_printk("Overflowed se:\n"); + dump_h_overflow_entity(se, 0, se == cfs_rq->curr); + + trace_printk("Overflowed hierarchy from root:\n"); + dump_h_overflow_cfs_rq(&rq_of(cfs_rq)->cfs, 0, true); + /* * There's gotta be a limit -- if we're still failing at this point * there's really nothing much to be done about things. @@ -921,6 +983,8 @@ static void __enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se) se->min_slice = se->slice; rb_add_augmented_cached(&se->run_node, &cfs_rq->tasks_timeline, __entity_less, &min_vruntime_cb); + trace_printk("Enqueue cfs_rq: depth(%d) weight(%lu) nr_queued(%u) sum_w_vruntime(%lld) sum_weight(%llu) zero_vruntime(%llu) sum_shift(%u) avg_vruntime(%llu)\n", + se->depth - 1, cfs_rq->load.weight, cfs_rq->nr_queued, cfs_rq->sum_w_vruntime, cfs_rq->sum_weight, cfs_rq->zero_vruntime, cfs_rq->sum_shift, avg_vruntime(cfs_rq)); } static void __dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se) @@ -929,6 +993,8 @@ static void __dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se) &min_vruntime_cb); sum_w_vruntime_sub(cfs_rq, se); update_zero_vruntime(cfs_rq); + trace_printk("Dequeue cfs_rq: depth(%d) weight(%lu) nr_queued(%u) sum_w_vruntime(%lld) sum_weight(%llu) zero_vruntime(%llu) sum_shift(%u) avg_vruntime(%llu)\n", + se->depth - 1, cfs_rq->load.weight, cfs_rq->nr_queued, cfs_rq->sum_w_vruntime, cfs_rq->sum_weight, cfs_rq->zero_vruntime, cfs_rq->sum_shift, avg_vruntime(cfs_rq)); } struct sched_entity *__pick_root_entity(struct cfs_rq *cfs_rq) @@ -3963,6 +4029,10 @@ static void reweight_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, bool rel_vprot = false; u64 avruntime = 0; + trace_printk("Reweight before se: weight(%lu) vruntime(%llu) vlag(%lld) deadline(%llu) curr?(%d)\n", + se->load.weight, se->vruntime, se->vlag, se->deadline, se == cfs_rq->curr); + trace_printk("Before cfs_rq: depth(%d) weight(%lu) nr_queued(%u) sum_w_vruntime(%lld) sum_weight(%llu) zero_vruntime(%llu) sum_shift(%u) avg_vruntime(%llu)\n", + se->depth - 1, cfs_rq->load.weight, cfs_rq->nr_queued, cfs_rq->sum_w_vruntime, cfs_rq->sum_weight, cfs_rq->zero_vruntime, cfs_rq->sum_shift, avg_vruntime(cfs_rq)); if (se->on_rq) { /* commit outstanding execution time */ update_curr(cfs_rq); @@ -3998,6 +4068,10 @@ static void reweight_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, __enqueue_entity(cfs_rq, se); cfs_rq->nr_queued++; } + trace_printk("Reweight after se: weight(%lu) vruntime(%llu) vlag(%lld) deadline(%llu) curr?(%d)\n", + se->load.weight, se->vruntime, se->vlag, se->deadline, se == cfs_rq->curr); + trace_printk("After cfs_rq: depth(%d) weight(%lu) nr_queued(%u) sum_w_vruntime(%lld) sum_weight(%llu) zero_vruntime(%llu) sum_shift(%u) avg_vruntime(%llu)\n", + se->depth - 1, cfs_rq->load.weight, cfs_rq->nr_queued, cfs_rq->sum_w_vruntime, cfs_rq->sum_weight, cfs_rq->zero_vruntime, cfs_rq->sum_shift, avg_vruntime(cfs_rq)); } static void reweight_task_fair(struct rq *rq, struct task_struct *p, @@ -5367,6 +5441,11 @@ place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags) * EEVDF: vd_i = ve_i + r_i/w_i */ se->deadline = se->vruntime + vslice; + + trace_printk("Placed se: weight(%lu) vruntime(%llu) vlag(%lld) deadline(%llu) curr?(%d)\n", + se->load.weight, se->vruntime, se->vlag, se->deadline, se == cfs_rq->curr); + trace_printk("Placed on cfs_rq: depth(%d) weight(%lu) nr_queued(%u) sum_w_vruntime(%lld) sum_weight(%llu) zero_vruntime(%llu) sum_shift(%u) avg_vruntime(%llu)\n", + se->depth - 1, cfs_rq->load.weight, cfs_rq->nr_queued, cfs_rq->sum_w_vruntime, cfs_rq->sum_weight, cfs_rq->zero_vruntime, cfs_rq->sum_shift, avg_vruntime(cfs_rq)); } static void check_enqueue_throttle(struct cfs_rq *cfs_rq); -- 2.34.1