lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <acd773b8-89fa-6000-ec43-18815a1ab8c2@i-love.sakura.ne.jp>
Date:   Tue, 27 Aug 2019 00:02:07 +0900
From:   Tetsuo Handa <penguin-kernel@...ove.sakura.ne.jp>
To:     Ingo Molnar <mingo@...nel.org>
Cc:     Linus Torvalds <torvalds@...ux-foundation.org>,
        Arnd Bergmann <arnd@...db.de>,
        Greg Kroah-Hartman <gregkh@...uxfoundation.org>,
        Linux List Kernel Mailing <linux-kernel@...r.kernel.org>
Subject: Rewriting read_kmem()/write_kmem() ?

On 2019/08/26 20:19, Ingo Molnar wrote:
> Basically making IO primitives interruptible is the norm and it's a 
> quality of implementation issue: it's only a historic accident that 
> /dev/mem read()s aren't.
> 
> So let's try and make it interruptible as the #3 patch I sent did - of 
> course if anything breaks we'll have to undo it. But if we can get away 
> with then by all means let's do so - even shorter reads can generate 
> nasty long processing latencies.
> 
> Ok?

This is how read_kmem()/write_kmem() could be rewritten (before doing
s/fatal_signal_pending/signal_pending/ change). Only compile tested.
Do we want to try this change using several patches?

 drivers/char/mem.c | 202 +++++++++++++++++++----------------------------------
 1 file changed, 73 insertions(+), 129 deletions(-)

diff --git a/drivers/char/mem.c b/drivers/char/mem.c
index 9eb564c..12bca2a 100644
--- a/drivers/char/mem.c
+++ b/drivers/char/mem.c
@@ -430,114 +430,32 @@ static int mmap_kmem(struct file *file, struct vm_area_struct *vma)
 	return mmap_mem(file, vma);
 }
 
-/*
- * This function reads the *virtual* memory as seen by the kernel.
- */
-static ssize_t read_kmem(struct file *file, char __user *buf,
-			 size_t count, loff_t *ppos)
+static ssize_t do_copy_kmem(unsigned long p, char __user *buf, size_t count,
+			    loff_t *ppos, const bool is_write)
 {
-	unsigned long p = *ppos;
-	ssize_t low_count, read, sz;
-	char *kbuf; /* k-addr because vread() takes vmlist_lock rwlock */
-	int err = 0;
-
-	read = 0;
-	if (p < (unsigned long) high_memory) {
-		low_count = count;
-		if (count > (unsigned long)high_memory - p)
-			low_count = (unsigned long)high_memory - p;
+	ssize_t copied = 0;
+	ssize_t sz;
 
 #ifdef __ARCH_HAS_NO_PAGE_ZERO_MAPPED
-		/* we don't have page 0 mapped on sparc and m68k.. */
-		if (p < PAGE_SIZE && low_count > 0) {
-			sz = size_inside_page(p, low_count);
-			if (clear_user(buf, sz))
-				return -EFAULT;
-			buf += sz;
-			p += sz;
-			read += sz;
-			low_count -= sz;
-			count -= sz;
-		}
-#endif
-		while (low_count > 0) {
-			sz = size_inside_page(p, low_count);
-
-			/*
-			 * On ia64 if a page has been mapped somewhere as
-			 * uncached, then it must also be accessed uncached
-			 * by the kernel or data corruption may occur
-			 */
-			kbuf = xlate_dev_kmem_ptr((void *)p);
-			if (!virt_addr_valid(kbuf))
-				return -ENXIO;
-
-			if (copy_to_user(buf, kbuf, sz))
-				return -EFAULT;
-			buf += sz;
-			p += sz;
-			read += sz;
-			low_count -= sz;
-			count -= sz;
-			if (should_stop_iteration()) {
-				count = 0;
-				break;
-			}
-		}
-	}
-
-	if (count > 0) {
-		kbuf = (char *)__get_free_page(GFP_KERNEL);
-		if (!kbuf)
-			return -ENOMEM;
-		while (count > 0) {
-			sz = size_inside_page(p, count);
-			if (!is_vmalloc_or_module_addr((void *)p)) {
-				err = -ENXIO;
-				break;
-			}
-			sz = vread(kbuf, (char *)p, sz);
-			if (!sz)
-				break;
-			if (copy_to_user(buf, kbuf, sz)) {
-				err = -EFAULT;
-				break;
-			}
-			count -= sz;
-			buf += sz;
-			read += sz;
-			p += sz;
-			if (should_stop_iteration())
-				break;
-		}
-		free_page((unsigned long)kbuf);
-	}
-	*ppos = p;
-	return read ? read : err;
-}
-
-
-static ssize_t do_write_kmem(unsigned long p, const char __user *buf,
-				size_t count, loff_t *ppos)
-{
-	ssize_t written, sz;
-	unsigned long copied;
-
-	written = 0;
-#ifdef __ARCH_HAS_NO_PAGE_ZERO_MAPPED
 	/* we don't have page 0 mapped on sparc and m68k.. */
 	if (p < PAGE_SIZE) {
 		sz = size_inside_page(p, count);
-		/* Hmm. Do something? */
+		if (is_write) {
+			/* Hmm. Do something? */
+		} else {
+			if (clear_user(buf, sz))
+				return -EFAULT;
+		}
 		buf += sz;
 		p += sz;
 		count -= sz;
-		written += sz;
+		copied += sz;
 	}
 #endif
 
 	while (count > 0) {
 		void *ptr;
+		unsigned long n;
 
 		sz = size_inside_page(p, count);
 
@@ -550,78 +468,104 @@ static ssize_t do_write_kmem(unsigned long p, const char __user *buf,
 		if (!virt_addr_valid(ptr))
 			return -ENXIO;
 
-		copied = copy_from_user(ptr, buf, sz);
-		if (copied) {
-			written += sz - copied;
-			if (written)
+		if (is_write)
+			n = copy_from_user(ptr, buf, sz);
+		else
+			n = copy_to_user(buf, buf, sz);
+		if (n) {
+			copied += sz - n;
+			if (copied)
 				break;
 			return -EFAULT;
 		}
 		buf += sz;
 		p += sz;
 		count -= sz;
-		written += sz;
+		copied += sz;
 		if (should_stop_iteration())
 			break;
 	}
 
-	*ppos += written;
-	return written;
+	*ppos += copied;
+	return copied;
 }
 
-/*
- * This function writes to the *virtual* memory as seen by the kernel.
- */
-static ssize_t write_kmem(struct file *file, const char __user *buf,
-			  size_t count, loff_t *ppos)
+static ssize_t copy_kmem(char __user *buf, size_t count, loff_t *ppos,
+			 const bool is_write)
 {
 	unsigned long p = *ppos;
-	ssize_t wrote = 0;
-	ssize_t virtr = 0;
-	char *kbuf; /* k-addr because vwrite() takes vmlist_lock rwlock */
+	ssize_t copied = 0;
 	int err = 0;
 
 	if (p < (unsigned long) high_memory) {
-		unsigned long to_write = min_t(unsigned long, count,
-					       (unsigned long)high_memory - p);
-		wrote = do_write_kmem(p, buf, to_write, ppos);
-		if (wrote != to_write)
-			return wrote;
-		p += wrote;
-		buf += wrote;
-		count -= wrote;
+		unsigned long to_copy = min_t(unsigned long, count,
+					      (unsigned long)high_memory - p);
+
+		copied = do_copy_kmem(p, buf, to_copy, ppos, is_write);
+		if (copied != to_copy)
+			return copied;
+		p += copied;
+		buf += copied;
+		count -= copied;
 	}
 
 	if (count > 0) {
-		kbuf = (char *)__get_free_page(GFP_KERNEL);
+		/* k-addr because vread()/vwrite() takes vmlist_lock rwlock */
+		char *kbuf = (char *)__get_free_page(GFP_KERNEL);
+
 		if (!kbuf)
-			return wrote ? wrote : -ENOMEM;
+			return copied ? copied : -ENOMEM;
 		while (count > 0) {
-			unsigned long sz = size_inside_page(p, count);
-			unsigned long n;
+			ssize_t sz = size_inside_page(p, count);
 
 			if (!is_vmalloc_or_module_addr((void *)p)) {
 				err = -ENXIO;
 				break;
 			}
-			n = copy_from_user(kbuf, buf, sz);
-			if (n) {
-				err = -EFAULT;
-				break;
+			if (is_write) {
+				if (copy_from_user(kbuf, buf, sz)) {
+					err = -EFAULT;
+					break;
+				}
+				vwrite(kbuf, (char *)p, sz);
+			} else {
+				sz = vread(kbuf, (char *)p, sz);
+				if (!sz)
+					break;
+				if (copy_to_user(buf, kbuf, sz)) {
+					err = -EFAULT;
+					break;
+				}
 			}
-			vwrite(kbuf, (char *)p, sz);
 			count -= sz;
 			buf += sz;
-			virtr += sz;
+			copied += sz;
 			p += sz;
 			if (should_stop_iteration())
 				break;
 		}
 		free_page((unsigned long)kbuf);
 	}
-
 	*ppos = p;
-	return virtr + wrote ? : err;
+	return copied ? copied : err;
+}
+
+/*
+ * This function reads the *virtual* memory as seen by the kernel.
+ */
+static ssize_t read_kmem(struct file *file, char __user *buf,
+			 size_t count, loff_t *ppos)
+{
+	return copy_kmem(buf, count, ppos, false);
+}
+
+/*
+ * This function writes to the *virtual* memory as seen by the kernel.
+ */
+static ssize_t write_kmem(struct file *file, const char __user *buf,
+			  size_t count, loff_t *ppos)
+{
+	return copy_kmem((char __user *) buf, count, ppos, true);
 }
 
 static ssize_t read_port(struct file *file, char __user *buf,
-- 
1.8.3.1

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ