lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <1407280017.2170.22.camel@dhcp-9-2-203-236.watson.ibm.com>
Date:	Tue, 05 Aug 2014 19:06:57 -0400
From:	Mimi Zohar <zohar@...ux.vnet.ibm.com>
To:	Michael Halcrow <mhalcrow@...gle.com>
Cc:	linux-ext4@...r.kernel.org, linux-fsdevel@...r.kernel.org,
	herbert@...dor.apana.org.au, pavel@....cz, hch@...radead.org,
	lczerner@...hat.com, tytso@....edu, tyhicks@...onical.com,
	serge.hallyn@...onical.com
Subject: Re: [PATCH 4/5] ext4: Adds EXT4 encryption read callback support

On Wed, 2014-07-23 at 14:23 -0700, Michael Halcrow wrote: 
> Adds EXT4 encryption read callback support.
> 
> Copies block_read_full_page() to ext4_read_full_page() and adds some
> callback stuff near the end. I couldn't think of an elegant way to
> modify block_read_full_page() to accept the callback context without
> unnecessarily repeatedly allocating it and immediately deallocating it
> for sparse pages.
> 
> Mimi, for IMA, maybe you'll want to do something like what's happening
> in ext4_completion_work().
> 
> Signed-off-by: Michael Halcrow <mhalcrow@...gle.com>
> ---
>  fs/ext4/crypto.c    |  30 +++++++--
>  fs/ext4/ext4.h      |   1 +
>  fs/ext4/file.c      |   9 ++-
>  fs/ext4/inode.c     | 184 ++++++++++++++++++++++++++++++++++++++++++++++++++--
>  include/linux/bio.h |   3 +
>  5 files changed, 215 insertions(+), 12 deletions(-)
> 
> diff --git a/fs/ext4/crypto.c b/fs/ext4/crypto.c
> index 3c9e9f4..435f33f 100644
> --- a/fs/ext4/crypto.c
> +++ b/fs/ext4/crypto.c
> @@ -58,6 +58,7 @@ atomic_t ext4_dbg_ctxs = ATOMIC_INIT(0);
>  void ext4_release_crypto_ctx(ext4_crypto_ctx_t *ctx)
>  {
>  	unsigned long flags;
> +	atomic_dec(&ctx->dbg_refcnt);
>  	if (ctx->bounce_page) {
>  		if (ctx->flags & EXT4_BOUNCE_PAGE_REQUIRES_FREE_ENCRYPT_FL) {
>  			__free_page(ctx->bounce_page);
> @@ -67,6 +68,7 @@ void ext4_release_crypto_ctx(ext4_crypto_ctx_t *ctx)
>  		}
>  		ctx->bounce_page = NULL;
>  	}
> +	ctx->control_page = NULL;
>  	if (ctx->flags & EXT4_CTX_REQUIRES_FREE_ENCRYPT_FL) {
>  		if (ctx->tfm)
>  			crypto_free_ablkcipher(ctx->tfm);
> @@ -136,6 +138,7 @@ ext4_crypto_ctx_t *ext4_get_crypto_ctx(
>  	} else {
>  		ctx->flags &= ~EXT4_CTX_REQUIRES_FREE_ENCRYPT_FL;
>  	}
> +	atomic_set(&ctx->dbg_refcnt, 0);
> 
>  	/* Allocate a new Crypto API context if we don't already have
>  	 * one. */
> @@ -417,13 +420,28 @@ int ext4_decrypt(ext4_crypto_ctx_t *ctx, struct page* page)
>  	sg_set_page(&src, page, PAGE_CACHE_SIZE, 0);
>  	ablkcipher_request_set_crypt(req, &src, &dst, PAGE_CACHE_SIZE,
>  				     xts_tweak);
> -	res = crypto_ablkcipher_decrypt(req);
> -	if (res == -EINPROGRESS || res == -EBUSY) {
> -		BUG_ON(req->base.data != &ecr);
> -		wait_for_completion(&ecr.completion);
> -		res = ecr.res;
> -		reinit_completion(&ecr.completion);
> +/* =======
> + * TODO(mhalcrow): Removed real crypto so intermediate patch for read
> + * path is still fully functional. For now just doing something that
> + * might expose a race condition. */
> +	{
> +		char *page_virt;
> +		char tmp;
> +                int i;
> +		page_virt = kmap(page);
> +		for (i = 0; i < PAGE_CACHE_SIZE / 2; ++i) {
> +			tmp = page_virt[i];
> +			page_virt[i] = page_virt[PAGE_CACHE_SIZE - i - 1];
> +			page_virt[PAGE_CACHE_SIZE - i - 1] = tmp;
> +		}
> +		for (i = 0; i < PAGE_CACHE_SIZE / 2; ++i) {
> +			tmp = page_virt[i];
> +			page_virt[i] = page_virt[PAGE_CACHE_SIZE - i - 1];
> +			page_virt[PAGE_CACHE_SIZE - i - 1] = tmp;
> +		}
> +		kunmap(page);
>  	}
> +/* ======= */
>  	ablkcipher_request_free(req);
>  out:
>  	if (res)
> diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
> index 7508261..1118bb0 100644
> --- a/fs/ext4/ext4.h
> +++ b/fs/ext4/ext4.h
> @@ -2821,6 +2821,7 @@ typedef struct ext4_crypto_ctx {
>  	struct work_struct work;	/* Work queue for read complete path */
>  	struct list_head free_list;	/* Free list */
>  	int flags;			/* Flags */
> +	atomic_t dbg_refcnt;		/* TODO(mhalcrow): Remove for release */
>  } ext4_crypto_ctx_t;
>  extern struct workqueue_struct *mpage_read_workqueue;
>  int ext4_allocate_crypto(size_t num_crypto_pages, size_t num_crypto_ctxs);
> diff --git a/fs/ext4/file.c b/fs/ext4/file.c
> index aca7b24..9b8478c 100644
> --- a/fs/ext4/file.c
> +++ b/fs/ext4/file.c
> @@ -202,6 +202,7 @@ static int ext4_file_mmap(struct file *file, struct vm_area_struct *vma)
>  {
>  	file_accessed(file);
>  	vma->vm_ops = &ext4_file_vm_ops;
> +	ext4_get_crypto_key(file->f_mapping->host);
>  	return 0;
>  }
> 
> @@ -212,6 +213,7 @@ static int ext4_file_open(struct inode * inode, struct file * filp)
>  	struct vfsmount *mnt = filp->f_path.mnt;
>  	struct path path;
>  	char buf[64], *cp;
> +	int ret;
> 
>  	if (unlikely(!(sbi->s_mount_flags & EXT4_MF_MNTDIR_SAMPLED) &&
>  		     !(sb->s_flags & MS_RDONLY))) {
> @@ -250,11 +252,14 @@ static int ext4_file_open(struct inode * inode, struct file * filp)
>  	 * writing and the journal is present
>  	 */
>  	if (filp->f_mode & FMODE_WRITE) {
> -		int ret = ext4_inode_attach_jinode(inode);
> +		ret = ext4_inode_attach_jinode(inode);
>  		if (ret < 0)
>  			return ret;
>  	}
> -	return dquot_file_open(inode, filp);
> +	ret = dquot_file_open(inode, filp);
> +	if (!ret)
> +		ext4_get_crypto_key(inode);
> +	return ret;
>  }
> 
>  /*
> diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
> index 4d37a12..6bf57d3 100644
> --- a/fs/ext4/inode.c
> +++ b/fs/ext4/inode.c
> @@ -800,6 +800,8 @@ struct buffer_head *ext4_bread(handle_t *handle, struct inode *inode,
>  			       ext4_lblk_t block, int create, int *err)
>  {
>  	struct buffer_head *bh;
> +	struct ext4_inode_info *ei = EXT4_I(inode);
> +	ext4_crypto_ctx_t *ctx;
> 
>  	bh = ext4_getblk(handle, inode, block, create, err);
>  	if (!bh)
> @@ -808,8 +810,16 @@ struct buffer_head *ext4_bread(handle_t *handle, struct inode *inode,
>  		return bh;
>  	ll_rw_block(READ | REQ_META | REQ_PRIO, 1, &bh);
>  	wait_on_buffer(bh);
> -	if (buffer_uptodate(bh))
> +	if (buffer_uptodate(bh)) {
> +		if (ei->i_encrypt) {
> +			BUG_ON(!bh->b_page);
> +			BUG_ON(bh->b_size != PAGE_CACHE_SIZE);
> +			ctx = ext4_get_crypto_ctx(false, ei->i_crypto_key);
> +			WARN_ON_ONCE(ext4_decrypt(ctx, bh->b_page));
> +			ext4_release_crypto_ctx(ctx);
> +		}
>  		return bh;
> +	}
>  	put_bh(bh);
>  	*err = -EIO;
>  	return NULL;
> @@ -2833,20 +2843,151 @@ static sector_t ext4_bmap(struct address_space *mapping, sector_t block)
>  	return generic_block_bmap(mapping, block, ext4_get_block);
>  }
> 
> +static void ext4_completion_work(struct work_struct *work)
> +{
> +	ext4_crypto_ctx_t *ctx = container_of(work, ext4_crypto_ctx_t, work);
> +	struct page *page = ctx->control_page;
> +	WARN_ON_ONCE(ext4_decrypt(ctx, page));
> +	ext4_release_crypto_ctx(ctx);
> +	SetPageUptodate(page);
> +	unlock_page(page);
> +}
> +

This completion work is on a per block/page basis, not file basis.  How
is this going to help?

Mimi

> +static int ext4_complete_cb(struct bio *bio, int res)
> +{
> +	ext4_crypto_ctx_t *ctx = bio->bi_cb_ctx;
> +	struct page *page = ctx->control_page;
> +	BUG_ON(atomic_read(&ctx->dbg_refcnt) != 1);
> +	if (res) {
> +		ext4_release_crypto_ctx(ctx);
> +		unlock_page(page);
> +		return res;
> +	}
> +	INIT_WORK(&ctx->work, ext4_completion_work);
> +	queue_work(mpage_read_workqueue, &ctx->work);
> +	return 0;
> +}
> +
> +static int ext4_read_full_page(struct page *page)
> +{
> +	struct inode *inode = page->mapping->host;
> +	sector_t iblock, lblock;
> +	struct buffer_head *bh, *head, *arr[MAX_BUF_PER_PAGE];
> +	unsigned int blocksize, bbits;
> +	int nr, i;
> +	int fully_mapped = 1;
> +
> +	head = create_page_buffers(page, inode, 0);
> +	blocksize = head->b_size;
> +	bbits = ilog2(blocksize);
> +
> +	iblock = (sector_t)page->index << (PAGE_CACHE_SHIFT - bbits);
> +	lblock = (i_size_read(inode)+blocksize-1) >> bbits;
> +	bh = head;
> +	nr = 0;
> +	i = 0;
> +
> +	do {
> +		if (buffer_uptodate(bh))
> +			continue;
> +
> +		if (!buffer_mapped(bh)) {
> +			int err = 0;
> +
> +			fully_mapped = 0;
> +			if (iblock < lblock) {
> +				WARN_ON(bh->b_size != blocksize);
> +				err = ext4_get_block(inode, iblock, bh, 0);
> +				if (err)
> +					SetPageError(page);
> +			}
> +			if (!buffer_mapped(bh)) {
> +				zero_user(page, i * blocksize, blocksize);
> +				if (!err)
> +					set_buffer_uptodate(bh);
> +				continue;
> +			}
> +			/*
> +			 * get_block() might have updated the buffer
> +			 * synchronously
> +			 */
> +			if (buffer_uptodate(bh))
> +				continue;
> +		}
> +		arr[nr++] = bh;
> +	} while (i++, iblock++, (bh = bh->b_this_page) != head);
> +
> +	if (fully_mapped)
> +		SetPageMappedToDisk(page);
> +
> +	if (!nr) {
> +		/*
> +		 * All buffers are uptodate - we can set the page uptodate
> +		 * as well. But not if get_block() returned an error.
> +		 */
> +		if (!PageError(page))
> +			SetPageUptodate(page);
> +		unlock_page(page);
> +		return 0;
> +	}
> +
> +	/* TODO(mhalcrow): For the development phase, encryption
> +	 * requires that the block size be equal to the page size. To
> +	 * make this the case for release (if we go that route), we'll
> +	 * need a super.c change to verify. */
> +	BUG_ON(nr != 1);
> +
> +	/* Stage two: lock the buffers */
> +	for (i = 0; i < nr; i++) {
> +		bh = arr[i];
> +		lock_buffer(bh);
> +		mark_buffer_async_read(bh);
> +	}
> +
> +	/*
> +	 * Stage 3: start the IO.  Check for uptodateness
> +	 * inside the buffer lock in case another process reading
> +	 * the underlying blockdev brought it uptodate (the sct fix).
> +	 */
> +	for (i = 0; i < nr; i++) {
> +		bh = arr[i];
> +		if (buffer_uptodate(bh))
> +			end_buffer_async_read(bh, 1);
> +		else {
> +			struct ext4_inode_info *ei = EXT4_I(inode);
> +			ext4_crypto_ctx_t *ctx = ext4_get_crypto_ctx(
> +				false, ei->i_crypto_key);
> +			BUG_ON(atomic_read(&ctx->dbg_refcnt) != 0);
> +			atomic_inc(&ctx->dbg_refcnt);
> +			BUG_ON(ctx->control_page);
> +			ctx->control_page = page;
> +			BUG_ON(atomic_read(&ctx->dbg_refcnt) != 1);
> +			if (submit_bh_cb(READ, bh, ext4_complete_cb, ctx))
> +				ext4_release_crypto_ctx(ctx);
> +		}
> +	}
> +	return 0;
> +}
> +
>  static int ext4_readpage(struct file *file, struct page *page)
>  {
>  	int ret = -EAGAIN;
>  	struct inode *inode = page->mapping->host;
> +	struct ext4_inode_info *ei = EXT4_I(inode);
> 
>  	trace_ext4_readpage(page);
> 
>  	if (ext4_has_inline_data(inode))
>  		ret = ext4_readpage_inline(inode, page);
> 
> -	if (ret == -EAGAIN)
> +	if (ei->i_encrypt) {
> +		BUG_ON(ret != -EAGAIN);
> +		ext4_read_full_page(page);
> +	} else if (ret == -EAGAIN) {
>  		return mpage_readpage(page, ext4_get_block);
> +	}
> 
> -	return ret;
> +	return 0;
>  }
> 
>  static int
> @@ -2854,12 +2995,35 @@ ext4_readpages(struct file *file, struct address_space *mapping,
>  		struct list_head *pages, unsigned nr_pages)
>  {
>  	struct inode *inode = mapping->host;
> +	struct ext4_inode_info *ei = EXT4_I(inode);
> +	struct page *page = NULL;
> +	unsigned page_idx;
> 
>  	/* If the file has inline data, no need to do readpages. */
>  	if (ext4_has_inline_data(inode))
>  		return 0;
> 
> -	return mpage_readpages(mapping, pages, nr_pages, ext4_get_block);
> +	if (ei->i_encrypt) {
> +		for (page_idx = 0; page_idx < nr_pages; page_idx++) {
> +			page = list_entry(pages->prev, struct page, lru);
> +			prefetchw(&page->flags);
> +			list_del(&page->lru);
> +			if (!add_to_page_cache_lru(page, mapping, page->index,
> +						   GFP_KERNEL)) {
> +				if (!PageUptodate(page)) {
> +					ext4_read_full_page(page);
> +				} else {
> +					unlock_page(page);
> +				}
> +			}
> +			page_cache_release(page);
> +		}
> +		BUG_ON(!list_empty(pages));
> +		return 0;
> +	} else {
> +		return mpage_readpages(mapping, pages, nr_pages,
> +				       ext4_get_block);
> +	}
>  }
> 
>  static void ext4_invalidatepage(struct page *page, unsigned int offset,
> @@ -3118,9 +3282,13 @@ static ssize_t ext4_direct_IO(int rw, struct kiocb *iocb,
>  {
>  	struct file *file = iocb->ki_filp;
>  	struct inode *inode = file->f_mapping->host;
> +	struct ext4_inode_info *ei = EXT4_I(inode);
>  	size_t count = iov_iter_count(iter);
>  	ssize_t ret;
> 
> +	if (ei->i_encrypt)
> +		return 0;
> +
>  	/*
>  	 * If we are doing data journalling we don't support O_DIRECT
>  	 */
> @@ -3243,8 +3411,10 @@ static int ext4_block_zero_page_range(handle_t *handle,
>  	unsigned blocksize, max, pos;
>  	ext4_lblk_t iblock;
>  	struct inode *inode = mapping->host;
> +	struct ext4_inode_info *ei = EXT4_I(inode);
>  	struct buffer_head *bh;
>  	struct page *page;
> +	ext4_crypto_ctx_t *ctx;
>  	int err = 0;
> 
>  	page = find_or_create_page(mapping, from >> PAGE_CACHE_SHIFT,
> @@ -3300,6 +3470,12 @@ static int ext4_block_zero_page_range(handle_t *handle,
>  		/* Uhhuh. Read error. Complain and punt. */
>  		if (!buffer_uptodate(bh))
>  			goto unlock;
> +		if (ei->i_encrypt) {
> +			BUG_ON(blocksize != PAGE_CACHE_SIZE);
> +			ctx = ext4_get_crypto_ctx(false, ei->i_crypto_key);
> +			WARN_ON_ONCE(ext4_decrypt(ctx, page));
> +			ext4_release_crypto_ctx(ctx);
> +		}
>  	}
>  	if (ext4_should_journal_data(inode)) {
>  		BUFFER_TRACE(bh, "get write access");
> diff --git a/include/linux/bio.h b/include/linux/bio.h
> index d2633ee..6ec3bee 100644
> --- a/include/linux/bio.h
> +++ b/include/linux/bio.h
> @@ -375,6 +375,9 @@ static inline struct bio *bio_clone_kmalloc(struct bio *bio, gfp_t gfp_mask)
> 
>  }
> 
> +/* TODO(mhalcrow): Only here for test; remove before release */
> +extern atomic_t global_bio_count;
> +
>  extern void bio_endio(struct bio *, int);
>  extern void bio_endio_nodec(struct bio *, int);
>  struct request_queue;


--
To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ