linux-kernel - Re: [PATCH v5 06/22] Treat XIP like O

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives

Hash Suite: Windows password security audit tool. GUI, reports in PDF.

[<prev] [next>] [day] [month] [year] [list]

Message-ID: <alpine.OSX.2.00.1402121640310.60058@scrumpy>
Date:	Wed, 12 Feb 2014 16:53:53 -0700 (MST)
From:	Ross Zwisler <ross.zwisler@...ux.intel.com>
To:	Matthew Wilcox <matthew.r.wilcox@...el.com>
cc:	linux-kernel@...r.kernel.org, linux-fsdevel@...r.kernel.org,
	linux-mm@...ck.org, linux-ext4@...r.kernel.org
Subject: Re: [PATCH v5 06/22] Treat XIP like O_DIRECT

On Wed, 15 Jan 2014, Matthew Wilcox wrote:
> Instead of separate read and write methods, use the generic AIO
> infrastructure.  In addition to giving us support for AIO, this adds
> the locking between read() and truncate() that was missing.
> 
> Signed-off-by: Matthew Wilcox <matthew.r.wilcox@...el.com>

...

> +static ssize_t xip_io(int rw, struct inode *inode, const struct iovec
> *iov,
> +			loff_t start, loff_t end, unsigned nr_segs,
> +			get_block_t get_block, struct buffer_head *bh)
> +{
> +	ssize_t retval = 0;
> +	unsigned seg = 0;
> +	unsigned len;
> +	unsigned copied = 0;
> +	loff_t offset = start;
> +	loff_t max = start;
> +	void *addr;
> +	bool hole = false;
> +
> +	while (offset < end) {
> +		void __user *buf = iov[seg].iov_base + copied;
> +
> +		if (max == offset) {
> +			sector_t block = offset >> inode->i_blkbits;
> +			long size;
> +			memset(bh, 0, sizeof(*bh));
> +			bh->b_size = ALIGN(end - offset, PAGE_SIZE);
> +			retval = get_block(inode, block, bh, rw == WRITE);
> +			if (retval)
> +				break;
> +			if (buffer_mapped(bh)) {
> +				retval = xip_get_addr(inode, bh, &addr);
> +				if (retval < 0)
> +					break;
> +				addr += offset - (block << inode->i_blkbits);
> +				hole = false;
> +				size = retval;
> +			} else {
> +				if (rw == WRITE) {
> +					retval = -EIO;
> +					break;
> +				}
> +				addr = NULL;
> +				hole = true;
> +				size = bh->b_size;
> +			}
> +			max = offset + size;
> +		}
> +
> +		len = min_t(unsigned, iov[seg].iov_len - copied, max - offset);
> +
> +		if (rw == WRITE)
> +			len -= __copy_from_user_nocache(addr, buf, len);
> +		else if (!hole)
> +			len -= __copy_to_user(buf, addr, len);
> +		else
> +			len -= __clear_user(buf, len);
> +
> +		if (!len)
> +			break;
> +
> +		offset += len;
> +		copied += len;
> +		if (copied == iov[seg].iov_len) {
> +			seg++;
> +			copied = 0;
> +		}
> +	}
> +
> +	return (offset == start) ? retval : offset - start;
> +}

xip_io() as it is currently written has an issue where reads can go beyond
inode->i_size.  A quick test to show this issue is:

	create a new file
	write to the file for 1/2 a block
	seek back to 0
	read for a full block

The read in this case will return 4096, the length of the full block that was
requested.  It should return 2048, reading just the data that was written.

The issue is that we do have a full block allocated in ext4, we do have it
available via XIP via xip_get_addr(), and the only extra check that we
currently have is a check against iov_len.  iov_len in this case is 4096, so
no one stops us from doing a full block read.

Here is a quick patch that fixes this issue:

diff --git a/fs/xip.c b/fs/xip.c
index e902593..1608f29 100644
--- a/fs/xip.c
+++ b/fs/xip.c
@@ -91,13 +91,16 @@ static ssize_t xip_io(int rw, struct inode *inode, const struct
 {
        ssize_t retval = 0;
        unsigned seg = 0;
-       unsigned len;
+       unsigned len, total_len;
        unsigned copied = 0;
        loff_t offset = start;
        loff_t max = start;
        void *addr;
        bool hole = false;
 
+       end = min(end, inode->i_size);
+       total_len = end - start;
+
        while (offset < end) {
                void __user *buf = iov[seg].iov_base + copied;
 
@@ -136,6 +139,7 @@ static ssize_t xip_io(int rw, struct inode *inode, const struct
                }
 
                len = min_t(unsigned, iov[seg].iov_len - copied, max - offset);
+               len = min(len, total_len);
 
                if (rw == WRITE)
                        len -= __copy_from_user_nocache(addr, buf, len);
@@ -149,6 +153,7 @@ static ssize_t xip_io(int rw, struct inode *inode, const struct
 
                offset += len;
                copied += len;
+               total_len -= len;
                if (copied == iov[seg].iov_len) {
                        seg++;
                        copied = 0;
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/