[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <4F75F904.5020107@sandeen.net>
Date: Fri, 30 Mar 2012 13:18:44 -0500
From: Eric Sandeen <sandeen@...deen.net>
To: Jeff Moyer <jmoyer@...hat.com>
CC: linux-fsdevel@...r.kernel.org, hch@...radead.org,
linux-ext4@...r.kernel.org, jack@...e.cz, xfs@....sgi.com
Subject: Re: [PATCH 5/7] xfs: honor the O_SYNC flag for aysnchronous direct
I/O requests
On 3/29/12 5:05 PM, Jeff Moyer wrote:
> Hi,
>
> If a file is opened with O_SYNC|O_DIRECT, the drive cache does not get
> flushed after the write completion for AIOs. This patch attempts to fix
> that problem by marking an I/O as requiring a cache flush in endio
> processing, and then issuing the cache flush after any unwritten extent
> conversion is done.
>
> Signed-off-by: Jeff Moyer <jmoyer@...hat.com>
> ---
> fs/xfs/xfs_aops.c | 108 +++++++++++++++++++++++++++++++++++++++++++++++++++-
> fs/xfs/xfs_mount.h | 1 +
> fs/xfs/xfs_super.c | 8 ++++
> 3 files changed, 116 insertions(+), 1 deletions(-)
>
> diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
> index 0dbb9e7..6ef8f7a 100644
> --- a/fs/xfs/xfs_aops.c
> +++ b/fs/xfs/xfs_aops.c
> @@ -170,6 +170,58 @@ xfs_setfilesize(
> }
>
> /*
> + * In the case of synchronous, AIO, O_DIRECT writes, we need to flush
> + * the disk cache when the I/O is complete.
> + */
> +STATIC bool
> +xfs_ioend_needs_cache_flush(
> + struct xfs_ioend *ioend)
> +{
> + struct xfs_inode *ip = XFS_I(ioend->io_inode);
> + struct xfs_mount *mp = ip->i_mount;
> +
> + if (!ioend->io_isasync)
> + return false;
> +
> + if (!(mp->m_flags & XFS_MOUNT_BARRIER))
> + return false;
> +
> + return (IS_SYNC(ioend->io_inode) ||
> + (ioend->io_iocb->ki_filp->f_flags & O_DSYNC));
> +}
> +
> +STATIC void
> +xfs_end_io_flush(
> + struct bio *bio,
> + int error)
> +{
> + struct xfs_ioend *ioend = bio->bi_private;
> +
> + if (error && ioend->io_result > 0)
> + ioend->io_result = error;
> +
> + xfs_destroy_ioend(ioend);
> + bio_put(bio);
> +}
> +
> +/*
> + * Issue a WRITE_FLUSH to the specified device.
> + */
> +STATIC void
> +xfs_ioend_flush_cache(
> + struct xfs_ioend *ioend,
> + xfs_buftarg_t *targp)
> +{
> + struct bio *bio;
> +
> + bio = bio_alloc(GFP_KERNEL, 0);
> + bio->bi_end_io = xfs_end_io_flush;
> + bio->bi_bdev = targp->bt_bdev;
> + bio->bi_private = ioend;
> + submit_bio(WRITE_FLUSH, bio);
> +}
> +
> +/*
> * Schedule IO completion handling on the final put of an ioend.
> *
> * If there is no work to do we might as well call it a day and free the
> @@ -186,11 +238,61 @@ xfs_finish_ioend(
> queue_work(mp->m_unwritten_workqueue, &ioend->io_work);
> else if (ioend->io_append_trans)
> queue_work(mp->m_data_workqueue, &ioend->io_work);
> + else if (xfs_ioend_needs_cache_flush(ioend))
> + queue_work(mp->m_flush_workqueue, &ioend->io_work);
> else
> xfs_destroy_ioend(ioend);
> }
> }
>
> +STATIC void
> +xfs_ioend_force_cache_flush(
> + xfs_ioend_t *ioend)
> +{
> + struct xfs_inode *ip = XFS_I(ioend->io_inode);
> + struct xfs_mount *mp = ip->i_mount;
> + xfs_lsn_t lsn = 0;
> + int err = 0;
> + int log_flushed = 0;
> +
> + /*
> + * Check to see if we need to sync metadata. If so,
> + * perform a log flush. If not, just flush the disk
> + * write cache for the data disk.
> + */
> + if (IS_SYNC(ioend->io_inode) ||
> + (ioend->io_iocb->ki_filp->f_flags & __O_SYNC)) {
> + /*
> + * TODO: xfs_blkdev_issue_flush and _xfs_log_force_lsn
> + * are synchronous, and so will block the I/O
> + * completion work queue.
> + */
> + /*
> + * If the log device is different from the data device,
> + * be sure to flush the cache on the data device
> + * first.
> + */
> + if (mp->m_logdev_targp != mp->m_ddev_targp)
> + xfs_blkdev_issue_flush(mp->m_ddev_targp);
> +
> + xfs_ilock(ip, XFS_ILOCK_SHARED);
> + if (xfs_ipincount(ip))
> + lsn = ip->i_itemp->ili_last_lsn;
> + xfs_iunlock(ip, XFS_ILOCK_SHARED);
> + if (lsn)
> + err = _xfs_log_force_lsn(mp, lsn, XFS_LOG_SYNC,
> + &log_flushed);
> + if (err && ioend->io_result > 0)
> + ioend->io_result = err;
Careful you don't get burned by _xfs_log_force_lsn returning positive
errors here...
-Eric
> + if (err || log_flushed)
> + xfs_destroy_ioend(ioend);
> + else
> + xfs_ioend_flush_cache(ioend, mp->m_logdev_targp);
> + } else
> + /* data sync only, flush the disk cache */
> + xfs_ioend_flush_cache(ioend, mp->m_ddev_targp);
> +}
> +
> /*
> * IO write completion.
> */
> @@ -243,7 +345,11 @@ xfs_end_io(
> }
>
> done:
> - xfs_destroy_ioend(ioend);
> + /* the honoring of O_SYNC has to be done last */
> + if (xfs_ioend_needs_cache_flush(ioend))
> + xfs_ioend_force_cache_flush(ioend);
> + else
> + xfs_destroy_ioend(ioend);
> }
>
> /*
> diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
> index 9eba738..e406204 100644
> --- a/fs/xfs/xfs_mount.h
> +++ b/fs/xfs/xfs_mount.h
> @@ -214,6 +214,7 @@ typedef struct xfs_mount {
>
> struct workqueue_struct *m_data_workqueue;
> struct workqueue_struct *m_unwritten_workqueue;
> + struct workqueue_struct *m_flush_workqueue;
> } xfs_mount_t;
>
> /*
> diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c
> index dab9a5f..e32b309 100644
> --- a/fs/xfs/xfs_super.c
> +++ b/fs/xfs/xfs_super.c
> @@ -773,8 +773,15 @@ xfs_init_mount_workqueues(
> if (!mp->m_unwritten_workqueue)
> goto out_destroy_data_iodone_queue;
>
> + mp->m_flush_workqueue = alloc_workqueue("xfs-flush/%s",
> + WQ_MEM_RECLAIM, 0, mp->m_fsname);
> + if (!mp->m_flush_workqueue)
> + goto out_destroy_unwritten_queue;
> +
> return 0;
>
> +out_destroy_unwritten_queue:
> + destroy_workqueue(mp->m_unwritten_workqueue);
> out_destroy_data_iodone_queue:
> destroy_workqueue(mp->m_data_workqueue);
> out:
> @@ -785,6 +792,7 @@ STATIC void
> xfs_destroy_mount_workqueues(
> struct xfs_mount *mp)
> {
> + destroy_workqueue(mp->m_flush_workqueue);
> destroy_workqueue(mp->m_data_workqueue);
> destroy_workqueue(mp->m_unwritten_workqueue);
> }
--
To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Powered by blists - more mailing lists