linux-ext4 - [Fwd: Re: [patch 003/152] jbd: fix commit of ordered data buffers]

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [day] [month] [year] [list]
Message-ID: <451C58B2.1070302@us.ibm.com>
Date:	Thu, 28 Sep 2006 16:20:18 -0700
From:	Badari Pulavarty <pbadari@...ibm.com>
To:	ext4 <linux-ext4@...r.kernel.org>,
	lkml <linux-kernel@...r.kernel.org>
Subject: [Fwd: Re: [patch 003/152] jbd: fix commit of ordered data buffers]

Forgot to CC: ext4 & lkml earlier..

Thanks,
Badari

Message-ID: <451C4DDE.60307@...ibm.com>
Date: Thu, 28 Sep 2006 15:34:06 -0700
From: Badari Pulavarty <pbadari@...ibm.com>
User-Agent: Thunderbird 1.5.0.7 (Windows/20060909)
MIME-Version: 1.0
To:  akpm@...l.org
CC:  torvalds@...l.org,  jack@...e.cz,  stable@...nel.org
Subject: Re: [patch 003/152] jbd: fix commit of ordered data buffers
References: <200609260630.k8Q6UrvQ011999@...ll0.pdx.osdl.net>
In-Reply-To: <200609260630.k8Q6UrvQ011999@...ll0.pdx.osdl.net>
Content-Type: text/plain; charset=ISO-8859-1; format=flowed
Content-Transfer-Encoding: 7bit

akpm@...l.org wrote:
> From: Jan Kara <jack@...e.cz>
>
> ....
> [suitable for 2.6.18.x around the 2.6.19-rc2 timeframe]
>
> Signed-off-by: Jan Kara <jack@...e.cz>
> Cc: Badari Pulavarty <pbadari@...ibm.com>
> Cc: <stable@...nel.org>
> Signed-off-by: Andrew Morton <akpm@...l.org>
> ---
>
>  fs/jbd/commit.c |  182 ++++++++++++++++++++++++++++------------------
>  1 file changed, 113 insertions(+), 69 deletions(-)
>
> diff -puN fs/jbd/commit.c~jbd-fix-commit-of-ordered-data-buffers fs/jbd/commit.c
> --- a/fs/jbd/commit.c~jbd-fix-commit-of-ordered-data-buffers
> +++ a/fs/jbd/commit.c
> @@ -160,6 +160,117 @@ static int journal_write_commit_record(j
>  	return (ret == -EIO);
>  }
>  
> +static void journal_do_submit_data(struct buffer_head **wbuf, int bufs)
> +{
> +	int i;
> +
> +	for (i = 0; i < bufs; i++) {
> +		wbuf[i]->b_end_io = end_buffer_write_sync;
> +		/* We use-up our safety reference in submit_bh() */
> +		submit_bh(WRITE, wbuf[i]);
> +	}
> +}
> +
> +/*
> + *  Submit all the data buffers to disk
> + */
> +static void journal_submit_data_buffers(journal_t *journal,
> +				transaction_t *commit_transaction)
> +{
> +	struct journal_head *jh;
> +	struct buffer_head *bh;
> +	int locked;
> +	int bufs = 0;
> +	struct buffer_head **wbuf = journal->j_wbuf;
> +
> +	/*
> +	 * Whenever we unlock the journal and sleep, things can get added
> +	 * onto ->t_sync_datalist, so we have to keep looping back to
> +	 * write_out_data until we *know* that the list is empty.
> +	 *
> +	 * Cleanup any flushed data buffers from the data list.  Even in
> +	 * abort mode, we want to flush this out as soon as possible.
> +	 */
> +write_out_data:
> +	cond_resched();
> +	spin_lock(&journal->j_list_lock);
> +
> +	while (commit_transaction->t_sync_datalist) {
> +		jh = commit_transaction->t_sync_datalist;
>   
Don't we need                  

		commit_transaction->t_sync_datalist = jh->b_tnext;

here to skip to the next "jh" for next time ?

Thanks,
Badari

> +		bh = jh2bh(jh);
> +		locked = 0;
> +
> +		/* Get reference just to make sure buffer does not disappear
> +		 * when we are forced to drop various locks */
> +		get_bh(bh);
> +		/* If the buffer is dirty, we need to submit IO and hence
> +		 * we need the buffer lock. We try to lock the buffer without
> +		 * blocking. If we fail, we need to drop j_list_lock and do
> +		 * blocking lock_buffer().
> +		 */
> +		if (buffer_dirty(bh)) {
> +			if (test_set_buffer_locked(bh)) {
> +				BUFFER_TRACE(bh, "needs blocking lock");
> +				spin_unlock(&journal->j_list_lock);
> +				/* Write out all data to prevent deadlocks */
> +				journal_do_submit_data(wbuf, bufs);
> +				bufs = 0;
> +				lock_buffer(bh);
> +				spin_lock(&journal->j_list_lock);
> +			}
> +			locked = 1;
> +		}
> +		/* We have to get bh_state lock. Again out of order, sigh. */
> +		if (!inverted_lock(journal, bh)) {
> +			jbd_lock_bh_state(bh);
> +			spin_lock(&journal->j_list_lock);
> +		}
> +		/* Someone already cleaned up the buffer? */
> +		if (!buffer_jbd(bh)
> +			|| jh->b_transaction != commit_transaction
> +			|| jh->b_jlist != BJ_SyncData) {
> +			jbd_unlock_bh_state(bh);
> +			if (locked)
> +				unlock_buffer(bh);
> +			BUFFER_TRACE(bh, "already cleaned up");
> +			put_bh(bh);
> +			continue;
> +		}
> +		if (locked && test_clear_buffer_dirty(bh)) {
> +			BUFFER_TRACE(bh, "needs writeout, adding to array");
> +			wbuf[bufs++] = bh;
> +			__journal_file_buffer(jh, commit_transaction,
> +						BJ_Locked);
> +			jbd_unlock_bh_state(bh);
> +			if (bufs == journal->j_wbufsize) {
> +				spin_unlock(&journal->j_list_lock);
> +				journal_do_submit_data(wbuf, bufs);
> +				bufs = 0;
> +				goto write_out_data;
> +			}
> +		}
> +		else {
> +			BUFFER_TRACE(bh, "writeout complete: unfile");
> +			__journal_unfile_buffer(jh);
> +			jbd_unlock_bh_state(bh);
> +			if (locked)
> +				unlock_buffer(bh);
> +			journal_remove_journal_head(bh);
> +			/* Once for our safety reference, once for
> +			 * journal_remove_journal_head() */
> +			put_bh(bh);
> +			put_bh(bh);
> +		}
> +
> +		if (lock_need_resched(&journal->j_list_lock)) {
> +			spin_unlock(&journal->j_list_lock);
> +			goto write_out_data;
> +		}
> +	}
> +	spin_unlock(&journal->j_list_lock);
> +	journal_do_submit_data(wbuf, bufs);
> +}
> +
>  /*
>   * journal_commit_transaction
>   *
> @@ -313,80 +424,13 @@ void journal_commit_transaction(journal_
>  	 * Now start flushing things to disk, in the order they appear
>  	 * on the transaction lists.  Data blocks go first.
>  	 */
> -
>  	err = 0;
> -	/*
> -	 * Whenever we unlock the journal and sleep, things can get added
> -	 * onto ->t_sync_datalist, so we have to keep looping back to
> -	 * write_out_data until we *know* that the list is empty.
> -	 */
> -	bufs = 0;
> -	/*
> -	 * Cleanup any flushed data buffers from the data list.  Even in
> -	 * abort mode, we want to flush this out as soon as possible.
> -	 */
> -write_out_data:
> -	cond_resched();
> -	spin_lock(&journal->j_list_lock);
> -
> -	while (commit_transaction->t_sync_datalist) {
> -		struct buffer_head *bh;
> -
> -		jh = commit_transaction->t_sync_datalist;
> -		commit_transaction->t_sync_datalist = jh->b_tnext;
> -		bh = jh2bh(jh);
> -		if (buffer_locked(bh)) {
> -			BUFFER_TRACE(bh, "locked");
> -			if (!inverted_lock(journal, bh))
> -				goto write_out_data;
> -			__journal_temp_unlink_buffer(jh);
> -			__journal_file_buffer(jh, commit_transaction,
> -						BJ_Locked);
> -			jbd_unlock_bh_state(bh);
> -			if (lock_need_resched(&journal->j_list_lock)) {
> -				spin_unlock(&journal->j_list_lock);
> -				goto write_out_data;
> -			}
> -		} else {
> -			if (buffer_dirty(bh)) {
> -				BUFFER_TRACE(bh, "start journal writeout");
> -				get_bh(bh);
> -				wbuf[bufs++] = bh;
> -				if (bufs == journal->j_wbufsize) {
> -					jbd_debug(2, "submit %d writes\n",
> -							bufs);
> -					spin_unlock(&journal->j_list_lock);
> -					ll_rw_block(SWRITE, bufs, wbuf);
> -					journal_brelse_array(wbuf, bufs);
> -					bufs = 0;
> -					goto write_out_data;
> -				}
> -			} else {
> -				BUFFER_TRACE(bh, "writeout complete: unfile");
> -				if (!inverted_lock(journal, bh))
> -					goto write_out_data;
> -				__journal_unfile_buffer(jh);
> -				jbd_unlock_bh_state(bh);
> -				journal_remove_journal_head(bh);
> -				put_bh(bh);
> -				if (lock_need_resched(&journal->j_list_lock)) {
> -					spin_unlock(&journal->j_list_lock);
> -					goto write_out_data;
> -				}
> -			}
> -		}
> -	}
> -
> -	if (bufs) {
> -		spin_unlock(&journal->j_list_lock);
> -		ll_rw_block(SWRITE, bufs, wbuf);
> -		journal_brelse_array(wbuf, bufs);
> -		spin_lock(&journal->j_list_lock);
> -	}
> +	journal_submit_data_buffers(journal, commit_transaction);
>  
>  	/*
>  	 * Wait for all previously submitted IO to complete.
>  	 */
> +	spin_lock(&journal->j_list_lock);
>  	while (commit_transaction->t_locked_list) {
>  		struct buffer_head *bh;
>  
> _
>