lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <1320296035-8744-2-git-send-email-hooanon05@yahoo.co.jp>
Date:	Thu,  3 Nov 2011 13:53:54 +0900
From:	"J. R. Okajima" <hooanon05@...oo.co.jp>
To:	linux-kernel@...r.kernel.org
Cc:	hooanon05@...oo.co.jp, viro@...iv.linux.org.uk, hch@...radead.org,
	jwboyer@...il.com, wli@...omorphy.com
Subject: [RFC 1/2] introduce f_op->{pre,post}_mmap()

The locking order between mm->mmap_sem and inode->i_mutex (or other FS
internal lock) has a problem. The right order is i_mutex first and then
mmap_sem. But sometimes this is hard for FS which has complicated
->mmap() since it prohibits acquire i_mutex (or other FS internal lock)
otherwise it will case an AB-BA deadlock problem.
In order to allow FS to implemente complicated ->mmpa(), introduce
f_op->{pre,post}_mmap(). ->pre_mmap() is called just before acquiring
mmap_sem for ->mmap(), and ->post_mmap() is called just after releasing
mmap_sem.

Signed-off-by: J. R. Okajima <hooanon05@...oo.co.jp>
---
 Documentation/filesystems/Locking |    8 ++++++++
 Documentation/filesystems/vfs.txt |    7 +++++++
 include/linux/fs.h                |    2 ++
 include/linux/mm.h                |    4 ++++
 mm/mmap.c                         |   27 ++++++++++++++++++++++++---
 5 files changed, 45 insertions(+), 3 deletions(-)

diff --git a/Documentation/filesystems/Locking b/Documentation/filesystems/Locking
index 57d827d..1815e20 100644
--- a/Documentation/filesystems/Locking
+++ b/Documentation/filesystems/Locking
@@ -408,7 +408,9 @@ prototypes:
 	unsigned int (*poll) (struct file *, struct poll_table_struct *);
 	long (*unlocked_ioctl) (struct file *, unsigned int, unsigned long);
 	long (*compat_ioctl) (struct file *, unsigned int, unsigned long);
+	int (*pre_mmap) (struct file *, unsigned long, unsigned long);
 	int (*mmap) (struct file *, struct vm_area_struct *);
+	void (*post_mmap) (struct file *, unsigned long, unsigned long);
 	int (*open) (struct inode *, struct file *);
 	int (*flush) (struct file *);
 	int (*release) (struct inode *, struct file *);
@@ -466,6 +468,12 @@ components. And there are other reasons why the current interface is a mess...
 ->read on directories probably must go away - we should just enforce -EISDIR
 in sys_read() and friends.
 
+->mmap has mm->mmap_sem for write. If your FS needs i_mutex for mmap(2),
+  then never acquire it in ->mmap. Instead acquire it in ->pre_mmap(),
+  and release it in ->post_mmap() since they don't have mm->mmap_sem.
+  When ->pre_mmap() returns other than zero, both of ->mmap() and
+  ->post_mmap() will not be called.
+
 --------------------------- dquot_operations -------------------------------
 prototypes:
 	int (*write_dquot) (struct dquot *);
diff --git a/Documentation/filesystems/vfs.txt b/Documentation/filesystems/vfs.txt
index 88b9f55..e2a579e 100644
--- a/Documentation/filesystems/vfs.txt
+++ b/Documentation/filesystems/vfs.txt
@@ -751,7 +751,9 @@ struct file_operations {
 	unsigned int (*poll) (struct file *, struct poll_table_struct *);
 	long (*unlocked_ioctl) (struct file *, unsigned int, unsigned long);
 	long (*compat_ioctl) (struct file *, unsigned int, unsigned long);
+	int (*pre_mmap) (struct file *, unsigned long, unsigned long);
 	int (*mmap) (struct file *, struct vm_area_struct *);
+	void (*post_mmap) (struct file *, unsigned long, unsigned long);
 	int (*open) (struct inode *, struct file *);
 	int (*flush) (struct file *);
 	int (*release) (struct inode *, struct file *);
@@ -794,8 +796,13 @@ otherwise noted.
   compat_ioctl: called by the ioctl(2) system call when 32 bit system calls
  	 are used on 64 bit kernels.
 
+  pre_mmap: called by the mmap(2) system call
+
   mmap: called by the mmap(2) system call
 
+  post_mmap: called by the mmap(2) system call
+	For pre_mmap, mmap, post_mmap, read Locking.txt too.
+
   open: called by the VFS when an inode should be opened. When the VFS
 	opens a file, it creates a new "struct file". It then calls the
 	open method for the newly allocated file structure. You might
diff --git a/include/linux/fs.h b/include/linux/fs.h
index b5b9792..bce9d44 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1554,7 +1554,9 @@ struct file_operations {
 	unsigned int (*poll) (struct file *, struct poll_table_struct *);
 	long (*unlocked_ioctl) (struct file *, unsigned int, unsigned long);
 	long (*compat_ioctl) (struct file *, unsigned int, unsigned long);
+	int (*pre_mmap) (struct file *, unsigned long, unsigned long);
 	int (*mmap) (struct file *, struct vm_area_struct *);
+	void (*post_mmap) (struct file *, unsigned long, unsigned long);
 	int (*open) (struct inode *, struct file *);
 	int (*flush) (struct file *, fl_owner_t id);
 	int (*release) (struct inode *, struct file *);
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 9670f71..e22230c 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1428,6 +1428,10 @@ extern unsigned long mmap_region(struct file *file, unsigned long addr,
 	unsigned long len, unsigned long flags,
 	vm_flags_t vm_flags, unsigned long pgoff);
 
+extern int pre_mmap(struct file *file, unsigned long prot, unsigned long flag);
+extern void post_mmap(struct file *file, unsigned long prot,
+		      unsigned long flag);
+
 static inline unsigned long do_mmap(struct file *file, unsigned long addr,
 	unsigned long len, unsigned long prot,
 	unsigned long flag, unsigned long offset)
diff --git a/mm/mmap.c b/mm/mmap.c
index 0290c8e..0dd2acb 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -1089,6 +1089,25 @@ unsigned long do_mmap_pgoff(struct file *file, unsigned long addr,
 }
 EXPORT_SYMBOL(do_mmap_pgoff);
 
+int pre_mmap(struct file *file, unsigned long prot, unsigned long flag)
+{
+	int err;
+
+	err = 0;
+	if (file && file->f_op && file->f_op->pre_mmap)
+		err = file->f_op->pre_mmap(file, prot, flag);
+	if (!err)
+		down_write(&current->mm->mmap_sem);
+	return err;
+}
+
+void post_mmap(struct file *file, unsigned long prot, unsigned long flag)
+{
+	up_write(&current->mm->mmap_sem);
+	if (file && file->f_op && file->f_op->post_mmap)
+		file->f_op->post_mmap(file, prot, flag);
+}
+
 SYSCALL_DEFINE6(mmap_pgoff, unsigned long, addr, unsigned long, len,
 		unsigned long, prot, unsigned long, flags,
 		unsigned long, fd, unsigned long, pgoff)
@@ -1120,9 +1139,11 @@ SYSCALL_DEFINE6(mmap_pgoff, unsigned long, addr, unsigned long, len,
 
 	flags &= ~(MAP_EXECUTABLE | MAP_DENYWRITE);
 
-	down_write(&current->mm->mmap_sem);
-	retval = do_mmap_pgoff(file, addr, len, prot, flags, pgoff);
-	up_write(&current->mm->mmap_sem);
+	retval = pre_mmap(file, prot, flags);
+	if (!retval) {
+		retval = do_mmap_pgoff(file, addr, len, prot, flags, pgoff);
+		post_mmap(file, prot, flags);
+	}
 
 	if (file)
 		fput(file);
-- 
1.7.2.5

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ