lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Date:   Thu, 15 Nov 2018 13:46:46 +0800
From:   Xiaoguang Wang <xiaoguang.wang@...ux.alibaba.com>
To:     linux-ext4@...r.kernel.org
Subject: Re: [PATCH v2] jbd2: add proc entry to control whethre doing buffer
 copy-out

hi,

Sorry, you can ignore this patch, this fixing method maybe not good.

Regards,
Xiaoguang Wang
> When jbd2 tries to get write access to one buffer, and if this buffer
> is under writeback with BH_Shadow flag, jbd2 will wait until this buffer
> has been written to disk, but sometimes the time taken to wait may be
> much long, especially disk capacity is almost full.
> 
> Here add a proc entry "force-copy", if its value is not zero, jbd2 will
> always do meta buffer copy-cout, then we can eliminate the unnecessary
> wating time here, and reduce long tail latency for buffered-write.
> 
> I construct such test case below:
> 
> $cat offline.fio
> ; fio-rand-RW.job for fiotest
> 
> [global]
> name=fio-rand-RW
> filename=fio-rand-RW
> rw=randrw
> rwmixread=60
> rwmixwrite=40
> bs=4K
> direct=0
> numjobs=4
> time_based=1
> runtime=900
> 
> [file1]
> size=60G
> ioengine=sync
> iodepth=16
> 
> $cat online.fio
> ; fio-seq-write.job for fiotest
> 
> [global]
> name=fio-seq-write
> filename=fio-seq-write
> rw=write
> bs=256K
> direct=0
> numjobs=1
> time_based=1
> runtime=60
> 
> [file1]
> rate=50m
> size=10G
> ioengine=sync
> iodepth=16
> 
> With this patch:
> $cat /proc/fs/jbd2/sda5-8/force_copy
> 0
> 
> online fio almost always get such long tail latency:
> 
> Jobs: 1 (f=1), 0B/s-0B/s: [W(1)][100.0%][w=50.0MiB/s][w=200 IOPS][eta
> 00m:00s]
> file1: (groupid=0, jobs=1): err= 0: pid=17855: Thu Nov 15 09:45:57 2018
>    write: IOPS=200, BW=50.0MiB/s (52.4MB/s)(3000MiB/60001msec)
>      clat (usec): min=135, max=4086.6k, avg=867.21, stdev=50338.22
>       lat (usec): min=139, max=4086.6k, avg=871.16, stdev=50338.22
>      clat percentiles (usec):
>       |  1.00th=[    141],  5.00th=[    143], 10.00th=[    145],
>       | 20.00th=[    147], 30.00th=[    147], 40.00th=[    149],
>       | 50.00th=[    149], 60.00th=[    151], 70.00th=[    153],
>       | 80.00th=[    155], 90.00th=[    159], 95.00th=[    163],
>       | 99.00th=[    255], 99.50th=[    273], 99.90th=[    429],
>       | 99.95th=[    441], 99.99th=[3640656]
> 
> $cat /proc/fs/jbd2/sda5-8/force_copy
> 1
> 
> online fio latency is much better.
> 
> Jobs: 1 (f=1), 0B/s-0B/s: [W(1)][100.0%][w=50.0MiB/s][w=200 IOPS][eta
> 00m:00s]
> file1: (groupid=0, jobs=1): err= 0: pid=8084: Thu Nov 15 09:31:15 2018
>    write: IOPS=200, BW=50.0MiB/s (52.4MB/s)(3000MiB/60001msec)
>      clat (usec): min=137, max=545, avg=151.35, stdev=16.22
>       lat (usec): min=140, max=548, avg=155.31, stdev=16.65
>      clat percentiles (usec):
>       |  1.00th=[  143],  5.00th=[  145], 10.00th=[  145], 20.00th=[
> 147],
>       | 30.00th=[  147], 40.00th=[  147], 50.00th=[  149], 60.00th=[
> 149],
>       | 70.00th=[  151], 80.00th=[  155], 90.00th=[  157], 95.00th=[
> 161],
>       | 99.00th=[  239], 99.50th=[  269], 99.90th=[  420], 99.95th=[
> 429],
>       | 99.99th=[  537]
> 
> As to the cost: because we'll always need to copy meta buffer, will
> consume minor cpu time and some memory(at most 32MB for 128MB journal
> size).
> Signed-off-by: Xiaoguang Wang <xiaoguang.wang@...ux.alibaba.com>
> 
> v2:
>      add missing "force-copy" directory delete operation
> ---
>   fs/jbd2/journal.c    | 58 ++++++++++++++++++++++++++++++++++++++++++++++++++++
>   include/linux/jbd2.h |  5 +++++
>   2 files changed, 63 insertions(+)
> 
> diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
> index 8ef6b6daaa7a..7fdaf501610a 100644
> --- a/fs/jbd2/journal.c
> +++ b/fs/jbd2/journal.c
> @@ -418,6 +418,9 @@ int jbd2_journal_write_metadata_buffer(transaction_t *transaction,
>   	}
>   	kunmap_atomic(mapped_data);
>   
> +	/* force copy-out */
> +	if (need_copy_out == 0 && journal->j_force_copy)
> +		need_copy_out = 1;
>   	/*
>   	 * Do we need to do a data copy?
>   	 */
> @@ -1100,6 +1103,58 @@ static const struct file_operations jbd2_seq_info_fops = {
>   	.release        = jbd2_seq_info_release,
>   };
>   
> +static int jbd2_seq_force_copy_show(struct seq_file *m, void *v)
> +{
> +	journal_t *journal = m->private;
> +
> +	seq_printf(m, "%u\n", journal->j_force_copy);
> +	return 0;
> +}
> +
> +static int jbd2_seq_force_copy_open(struct inode *inode, struct file *filp)
> +{
> +	journal_t *journal = PDE_DATA(inode);
> +
> +	return single_open(filp, jbd2_seq_force_copy_show, journal);
> +}
> +
> +/* Worst case buffer size needed for holding an integer. */
> +#define PROC_NUMBUF 13
> +
> +static ssize_t jbd2_seq_force_copy_write(struct file *file,
> +			const char __user *buf, size_t count, loff_t *offset)
> +{
> +	struct inode *inode = file_inode(file);
> +	journal_t *journal = PDE_DATA(inode);
> +	char buffer[PROC_NUMBUF];
> +	unsigned int force_copy;
> +	int err;
> +
> +	memset(buffer, 0, sizeof(buffer));
> +	if (count > sizeof(buffer) - 1)
> +		count = sizeof(buffer) - 1;
> +	if (copy_from_user(buffer, buf, count)) {
> +		err = -EFAULT;
> +		goto out;
> +	}
> +
> +	err = kstrtouint(strstrip(buffer), 0, &force_copy);
> +	if (err)
> +		goto out;
> +	journal->j_force_copy = force_copy;
> +out:
> +	return err < 0 ? err : count;
> +}
> +
> +static const struct file_operations jbd2_seq_force_copy_fops = {
> +	.owner		= THIS_MODULE,
> +	.open           = jbd2_seq_force_copy_open,
> +	.read           = seq_read,
> +	.write		= jbd2_seq_force_copy_write,
> +	.llseek         = seq_lseek,
> +	.release        = single_release,
> +};
> +
>   static struct proc_dir_entry *proc_jbd2_stats;
>   
>   static void jbd2_stats_proc_init(journal_t *journal)
> @@ -1108,12 +1163,15 @@ static void jbd2_stats_proc_init(journal_t *journal)
>   	if (journal->j_proc_entry) {
>   		proc_create_data("info", S_IRUGO, journal->j_proc_entry,
>   				 &jbd2_seq_info_fops, journal);
> +		proc_create_data("force_copy", 0644, journal->j_proc_entry,
> +				 &jbd2_seq_force_copy_fops, journal);
>   	}
>   }
>   
>   static void jbd2_stats_proc_exit(journal_t *journal)
>   {
>   	remove_proc_entry("info", journal->j_proc_entry);
> +	remove_proc_entry("force_copy", journal->j_proc_entry);
>   	remove_proc_entry(journal->j_devname, proc_jbd2_stats);
>   }
>   
> diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h
> index b708e5169d1d..7034888eac6b 100644
> --- a/include/linux/jbd2.h
> +++ b/include/linux/jbd2.h
> @@ -1111,6 +1111,11 @@ struct journal_s
>   	 */
>   	struct transaction_stats_s j_stats;
>   
> +	/**
> +	 * @j_force_copy: if not zero, force to do buffer copy-out.
> +	 */
> +	unsigned int j_force_copy;
> +
>   	/**
>   	 * @j_failed_commit: Failed journal commit ID.
>   	 */
> 

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ