linux-kernel - Re: [PATCH, RESEND] procfs: silence lockdep warning about read vs. exec seq

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives

Hash Suite for Android: free password hash cracker in your pocket

[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]

Message-ID: <20140803164452.GA14626@redhat.com>
Date:	Sun, 3 Aug 2014 18:44:52 +0200
From:	Oleg Nesterov <oleg@...hat.com>
To:	"Kirill A. Shutemov" <kirill@...temov.name>
Cc:	Alexander Viro <viro@...iv.linux.org.uk>,
	linux-fsdevel@...r.kernel.org, linux-kernel@...r.kernel.org,
	David Howells <dhowells@...hat.com>,
	Peter Zijlstra <peterz@...radead.org>,
	Sasha Levin <levinsasha928@...il.com>,
	Cyrill Gorcunov <gorcunov@...nvz.org>,
	"David S. Miller" <davem@...emloft.net>,
	"Kirill A. Shutemov" <kirill.shutemov@...ux.intel.com>
Subject: Re: [PATCH, RESEND] procfs: silence lockdep warning about read vs.
	exec seq_file

Sorry for delay,

On 08/02, Kirill A. Shutemov wrote:
>
> +/*
> + * proc_pid_personality() and proc_pid_stack() take cred_guard_mutex via
> + * lock_trace()

And at first glance they lock_trace() can die. But lets temporary ignore,
m_start() is trickier.

> +static struct lock_class_key pid_maps_seq_file_lock;
> +
>  void task_mem(struct seq_file *m, struct mm_struct *mm)
>  {
>  	unsigned long data, text, lib, swap;
> @@ -242,6 +254,7 @@ static int do_maps_open(struct inode *inode, struct file *file,
>  		ret = seq_open(file, ops);
>  		if (!ret) {
>  			struct seq_file *m = file->private_data;
> +			lockdep_set_class(&m->lock, &pid_maps_seq_file_lock);

Perhaps lockdep_set_subclass() would be better... But this doesn't matter.

The question is, why m_start() calls mm_access(). This is not even
strictly correct if the task execs between m_stop() + m_start().

Can't we do something like below? The patch is obviously horrible and
incomplete, just to explain what I meant. Basically this is what
proc_mem_operations does.

Oleg.


diff --git a/fs/proc/internal.h b/fs/proc/internal.h
index 3ab6d14..c16b70e 100644
--- a/fs/proc/internal.h
+++ b/fs/proc/internal.h
@@ -267,6 +267,7 @@ extern int proc_remount(struct super_block *, int *, char *);
 struct proc_maps_private {
 	struct pid *pid;
 	struct task_struct *task;
+	struct mm_struct *mm;
 #ifdef CONFIG_MMU
 	struct vm_area_struct *tail_vma;
 #endif
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index cfa63ee..9b88248 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -165,9 +165,9 @@ static void *m_start(struct seq_file *m, loff_t *pos)
 	if (!priv->task)
 		return ERR_PTR(-ESRCH);
 
-	mm = mm_access(priv->task, PTRACE_MODE_READ);
-	if (!mm || IS_ERR(mm))
-		return mm;
+	mm = priv->mm;
+	if (!mm || !atomic_inc_not_zero(&mm->mm_users))
+		return NULL;
 	down_read(&mm->mmap_sem);
 
 	tail_vma = get_gate_vma(priv->task->mm);
@@ -231,6 +231,27 @@ static void m_stop(struct seq_file *m, void *v)
 		put_task_struct(priv->task);
 }
 
+// TODO: change __mem_open() to use this helper
+static struct mm_struct *xxx(struct inode *inode, struct file *file, unsigned int mode)
+{
+	struct task_struct *task = get_proc_task(inode);
+	struct mm_struct *mm = ERR_PTR(-ESRCH);
+
+	if (task) {
+		mm = mm_access(task, mode);
+		put_task_struct(task);
+
+		if (!IS_ERR_OR_NULL(mm)) {
+			/* ensure this mm_struct can't be freed */
+			atomic_inc(&mm->mm_count);
+			/* but do not pin its memory */
+			mmput(mm);
+		}
+	}
+
+	return mm;
+}
+
 static int do_maps_open(struct inode *inode, struct file *file,
 			const struct seq_operations *ops)
 {
@@ -239,17 +260,38 @@ static int do_maps_open(struct inode *inode, struct file *file,
 	priv = kzalloc(sizeof(*priv), GFP_KERNEL);
 	if (priv) {
 		priv->pid = proc_pid(inode);
+		priv->mm = xxx(inode, file, PTRACE_MODE_READ);
+
+		// XXX cleanup me
+		if (IS_ERR(priv->mm)) {
+			ret = -EACCES;
+			goto free;
+		}
+
 		ret = seq_open(file, ops);
 		if (!ret) {
 			struct seq_file *m = file->private_data;
 			m->private = priv;
 		} else {
+			// XXX cleanup me
+			if (priv->mm)
+				mmdrop(priv->mm);
+ free:
 			kfree(priv);
 		}
 	}
 	return ret;
 }
 
+static int xxx_release(struct inode *inode, struct file *file)
+{
+	struct seq_file *seq = file->private_data;
+	struct proc_maps_private *priv = seq->private;
+	if (priv->mm)
+		mmdrop(priv->mm);
+	return 0;
+}
+
 static void
 show_map_vma(struct seq_file *m, struct vm_area_struct *vma, int is_pid)
 {
@@ -399,14 +441,14 @@ const struct file_operations proc_pid_maps_operations = {
 	.open		= pid_maps_open,
 	.read		= seq_read,
 	.llseek		= seq_lseek,
-	.release	= seq_release_private,
+	.release	= xxx_release,
 };
 
 const struct file_operations proc_tid_maps_operations = {
 	.open		= tid_maps_open,
 	.read		= seq_read,
 	.llseek		= seq_lseek,
-	.release	= seq_release_private,
+	.release	= xxx_release,
 };
 
 /*
@@ -682,14 +724,14 @@ const struct file_operations proc_pid_smaps_operations = {
 	.open		= pid_smaps_open,
 	.read		= seq_read,
 	.llseek		= seq_lseek,
-	.release	= seq_release_private,
+	.release	= xxx_release,
 };
 
 const struct file_operations proc_tid_smaps_operations = {
 	.open		= tid_smaps_open,
 	.read		= seq_read,
 	.llseek		= seq_lseek,
-	.release	= seq_release_private,
+	.release	= xxx_release,
 };
 
 /*

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/