lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date:   Thu, 07 Oct 2021 16:23:49 -0300
From:   Marcelo Tosatti <mtosatti@...hat.com>
To:     linux-kernel@...r.kernel.org
Cc:     Nitesh Lal <nilal@...hat.com>,
        Nicolas Saenz Julienne <nsaenzju@...hat.com>,
        Frederic Weisbecker <frederic@...nel.org>,
        Christoph Lameter <cl@...ux.com>,
        Juri Lelli <juri.lelli@...hat.com>,
        Peter Zijlstra <peterz@...radead.org>,
        Alex Belits <abelits@...its.com>, Peter Xu <peterx@...hat.com>,
        Marcelo Tosatti <mtosatti@...hat.com>
Subject: [patch v4 3/8] task isolation: sync vmstats on return to userspace

The logic to disable vmstat worker thread, when entering
nohz full, does not cover all scenarios. For example, it is possible
for the following to happen:

1) enter nohz_full, which calls refresh_cpu_vm_stats, syncing the stats.
2) app runs mlock, which increases counters for mlock'ed pages.
3) start -RT loop

Since refresh_cpu_vm_stats from nohz_full logic can happen _before_
the mlock, vmstat shepherd can restart vmstat worker thread on
the CPU in question.

To fix this, use the task isolation prctl interface to quiesce 
deferred actions when returning to userspace.

Signed-off-by: Marcelo Tosatti <mtosatti@...hat.com>

---
 include/linux/task_isolation.h |   12 ++++++++++++
 include/linux/vmstat.h         |    8 ++++++++
 kernel/entry/common.c          |    2 ++
 kernel/task_isolation.c        |   26 ++++++++++++++++++++++++++
 mm/vmstat.c                    |   21 +++++++++++++++++++++
 5 files changed, 69 insertions(+)

Index: linux-2.6/include/linux/task_isolation.h
===================================================================
--- linux-2.6.orig/include/linux/task_isolation.h
+++ linux-2.6/include/linux/task_isolation.h
@@ -40,8 +40,20 @@ int prctl_task_isolation_activate_set(un
 
 int __copy_task_isolation(struct task_struct *tsk);
 
+void __isolation_exit_to_user_mode_prepare(void);
+
+static inline void isolation_exit_to_user_mode_prepare(void)
+{
+	if (current->isol_info)
+		__isolation_exit_to_user_mode_prepare();
+}
+
 #else
 
+static void isolation_exit_to_user_mode_prepare(void)
+{
+}
+
 static inline void tsk_isol_free(struct task_struct *tsk)
 {
 }
Index: linux-2.6/include/linux/vmstat.h
===================================================================
--- linux-2.6.orig/include/linux/vmstat.h
+++ linux-2.6/include/linux/vmstat.h
@@ -21,6 +21,14 @@ int sysctl_vm_numa_stat_handler(struct c
 		void *buffer, size_t *length, loff_t *ppos);
 #endif
 
+#ifdef CONFIG_SMP
+void sync_vmstat(void);
+#else
+static inline void sync_vmstat(void)
+{
+}
+#endif
+
 struct reclaim_stat {
 	unsigned nr_dirty;
 	unsigned nr_unqueued_dirty;
Index: linux-2.6/kernel/entry/common.c
===================================================================
--- linux-2.6.orig/kernel/entry/common.c
+++ linux-2.6/kernel/entry/common.c
@@ -6,6 +6,7 @@
 #include <linux/livepatch.h>
 #include <linux/audit.h>
 #include <linux/tick.h>
+#include <linux/task_isolation.h>
 
 #include "common.h"
 
@@ -285,6 +286,7 @@ static void syscall_exit_to_user_mode_pr
 static __always_inline void __syscall_exit_to_user_mode_work(struct pt_regs *regs)
 {
 	syscall_exit_to_user_mode_prepare(regs);
+	isolation_exit_to_user_mode_prepare();
 	local_irq_disable_exit_to_user();
 	exit_to_user_mode_prepare(regs);
 }
Index: linux-2.6/kernel/task_isolation.c
===================================================================
--- linux-2.6.orig/kernel/task_isolation.c
+++ linux-2.6/kernel/task_isolation.c
@@ -18,6 +18,8 @@
 #include <linux/sysfs.h>
 #include <linux/init.h>
 #include <linux/sched/task.h>
+#include <linux/mm.h>
+#include <linux/vmstat.h>
 
 void __tsk_isol_free(struct task_struct *tsk)
 {
@@ -355,3 +357,23 @@ int prctl_task_isolation_activate_get(un
 	return ret;
 }
 
+void __isolation_exit_to_user_mode_prepare(void)
+{
+	struct isol_info *i;
+
+	i = current->isol_info;
+	if (!i)
+		return;
+
+	if (i->active_mask != ISOL_F_QUIESCE)
+		return;
+
+	if (i->quiesce_mask & ISOL_F_QUIESCE_VMSTATS) {
+		sync_vmstat();
+		if (i->oneshot_mask & ISOL_F_QUIESCE_VMSTATS) {
+			i->oneshot_mask &= ~ISOL_F_QUIESCE_VMSTATS;
+			i->quiesce_mask &= ~ISOL_F_QUIESCE_VMSTATS;
+		}
+	}
+}
+EXPORT_SYMBOL_GPL(__isolation_exit_to_user_mode_prepare);
Index: linux-2.6/mm/vmstat.c
===================================================================
--- linux-2.6.orig/mm/vmstat.c
+++ linux-2.6/mm/vmstat.c
@@ -2003,6 +2003,27 @@ static void vmstat_shepherd(struct work_
 		round_jiffies_relative(sysctl_stat_interval));
 }
 
+void sync_vmstat(void)
+{
+	int cpu;
+
+	cpu = get_cpu();
+
+	refresh_cpu_vm_stats(false);
+	put_cpu();
+
+	/*
+	 * If task is migrated to another CPU between put_cpu
+	 * and cancel_delayed_work_sync, the code below might
+	 * cancel vmstat_update work for a different cpu
+	 * (than the one from which the vmstats were flushed).
+	 *
+	 * However, vmstat shepherd will re-enable it later,
+	 * so its harmless.
+	 */
+	cancel_delayed_work_sync(&per_cpu(vmstat_work, cpu));
+}
+
 static void __init start_shepherd_timer(void)
 {
 	int cpu;


Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ