[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20221221170436.449941687@redhat.com>
Date: Wed, 21 Dec 2022 13:58:07 -0300
From: Marcelo Tosatti <mtosatti@...hat.com>
To: atomlin@...mlin.com, frederic@...nel.org
Cc: cl@...ux.com, tglx@...utronix.de, mingo@...nel.org,
peterz@...radead.org, pauld@...hat.com, neelx@...hat.com,
oleksandr@...alenko.name, linux-kernel@...r.kernel.org,
linux-mm@...ck.org, Marcelo Tosatti <mtosatti@...hat.com>
Subject: [PATCH v11 6/6] mm/vmstat: avoid queueing work item if cpu stats are clean
It is not necessary to queue work item to run refresh_vm_stats
on a remote CPU if that CPU has no dirty stats and no per-CPU
allocations for remote nodes.
This fixes sosreport hang (which uses vmstat_refresh) with
spinning SCHED_FIFO process.
Signed-off-by: Marcelo Tosatti <mtosatti@...hat.com>
Index: linux-2.6/mm/vmstat.c
===================================================================
--- linux-2.6.orig/mm/vmstat.c
+++ linux-2.6/mm/vmstat.c
@@ -1917,6 +1917,31 @@ static const struct seq_operations vmsta
#ifdef CONFIG_SMP
#ifdef CONFIG_PROC_FS
+static bool need_drain_remote_zones(int cpu)
+{
+#ifdef CONFIG_NUMA
+ struct zone *zone;
+
+ for_each_populated_zone(zone) {
+ struct per_cpu_pages *pcp;
+
+ pcp = per_cpu_ptr(zone->per_cpu_pageset, cpu);
+ if (!pcp->count)
+ continue;
+
+ if (!pcp->expire)
+ continue;
+
+ if (zone_to_nid(zone) == cpu_to_node(cpu))
+ continue;
+
+ return true;
+ }
+#endif
+
+ return false;
+}
+
static void refresh_vm_stats(struct work_struct *work)
{
refresh_cpu_vm_stats(true);
@@ -1926,8 +1951,12 @@ int vmstat_refresh(struct ctl_table *tab
void *buffer, size_t *lenp, loff_t *ppos)
{
long val;
- int err;
- int i;
+ int i, cpu;
+ struct work_struct __percpu *works;
+
+ works = alloc_percpu(struct work_struct);
+ if (!works)
+ return -ENOMEM;
/*
* The regular update, every sysctl_stat_interval, may come later
@@ -1941,9 +1970,21 @@ int vmstat_refresh(struct ctl_table *tab
* transiently negative values, report an error here if any of
* the stats is negative, so we know to go looking for imbalance.
*/
- err = schedule_on_each_cpu(refresh_vm_stats);
- if (err)
- return err;
+ cpus_read_lock();
+ for_each_online_cpu(cpu) {
+ struct work_struct *work = per_cpu_ptr(works, cpu);
+ struct vmstat_dirty *vms = per_cpu_ptr(&vmstat_dirty_pcpu, cpu);
+
+ INIT_WORK(work, refresh_vm_stats);
+
+ if (vms->dirty || need_drain_remote_zones(cpu))
+ schedule_work_on(cpu, work);
+ }
+ for_each_online_cpu(cpu)
+ flush_work(per_cpu_ptr(works, cpu));
+ cpus_read_unlock();
+ free_percpu(works);
+
for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++) {
/*
* Skip checking stats known to go negative occasionally.
Powered by blists - more mailing lists