lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [thread-next>] [day] [month] [year] [list]
Message-Id: <6.0.0.20.2.20090106134318.06709010@172.19.0.2>
Date:	Thu, 15 Jan 2009 09:32:17 +0900
From:	Hisashi Hifumi <hifumi.hisashi@....ntt.co.jp>
To:	akpm@...ux-foundation.org, linux-ext4@...r.kernel.org,
	linux-fsdevel@...r.kernel.org
Subject: [PATCH] ext2/3/4: change i_mutex usage on lseek

Hi.

I wrote some patch that changed a range of i_mutex on ext2/3/4's lseek.
Ext2/3/4 uses generic_file_llseek, this function is inside i_mutex.
I think there is room for optimization in some cases.
When SEEK_END is specified from caller, in this case we should handle
inode->i_size so i_mutex is needed. But in other cases such as SEEK_CUR or
SEEK_SET, i_mutex is not needed because just changing file->f_pos value without
touching i_size.

I did some test to measure i_mutex contention.
This test do:
	1. make an 128MB file.
	2. fork 100 processes. repeat 10000000 times lseeking randomly on each process to this file.
	3, gauge seconds between start and end of this test.

The result was:

	-2.6.29-rc1
	# time ./lseek_test
	315 sec

	real    5m15.407s
	user    1m19.128s
	sys     5m38.884s

	-2.6.29-rc1-patched
	# time ./lseek_test
	13 sec

	real    0m13.039s
	user    1m14.730s
	sys     2m9.633s 

Hardware environment:
CPU 2.4GHz(Quad Core) *4
Memory 64GB

This improvement is derived from just removal of lseek's i_mutex contention.
There is i_mutex contention not only around lseek, but also fsync or write.
So,  I think we also can mitigate i_mutex contention between fsync and lseek.

Thanks.

Signed-off-by: Hisashi Hifumi <hifumi.hisashi@....ntt.co.jp>

diff -Nrup linux-2.6.29-rc1.org/fs/ext2/file.c linux-2.6.29-rc1/fs/ext2/file.c
--- linux-2.6.29-rc1.org/fs/ext2/file.c	2008-12-25 08:26:37.000000000 +0900
+++ linux-2.6.29-rc1/fs/ext2/file.c	2009-01-13 11:58:16.000000000 +0900
@@ -38,12 +38,24 @@ static int ext2_release_file (struct ino
 	return 0;
 }
 
+static loff_t ext2_file_llseek(struct file *file, loff_t offset, int origin)
+{
+	loff_t retval;
+
+	if (origin == SEEK_END)
+		retval = generic_file_llseek(file, offset, origin);
+	else
+		retval = generic_file_llseek_unlocked(file, offset, origin);
+
+	return retval;
+}
+
 /*
  * We have mostly NULL's here: the current defaults are ok for
  * the ext2 filesystem.
  */
 const struct file_operations ext2_file_operations = {
-	.llseek		= generic_file_llseek,
+	.llseek		= ext2_file_llseek,
 	.read		= do_sync_read,
 	.write		= do_sync_write,
 	.aio_read	= generic_file_aio_read,
@@ -62,7 +74,7 @@ const struct file_operations ext2_file_o
 
 #ifdef CONFIG_EXT2_FS_XIP
 const struct file_operations ext2_xip_file_operations = {
-	.llseek		= generic_file_llseek,
+	.llseek		= ext2_file_llseek,
 	.read		= xip_file_read,
 	.write		= xip_file_write,
 	.unlocked_ioctl = ext2_ioctl,
diff -Nrup linux-2.6.29-rc1.org/fs/ext3/file.c linux-2.6.29-rc1/fs/ext3/file.c
--- linux-2.6.29-rc1.org/fs/ext3/file.c	2008-12-25 08:26:37.000000000 +0900
+++ linux-2.6.29-rc1/fs/ext3/file.c	2009-01-13 11:58:16.000000000 +0900
@@ -106,8 +106,20 @@ force_commit:
 	return ret;
 }
 
+static loff_t ext3_file_llseek(struct file *file, loff_t offset, int origin)
+{
+	loff_t retval;
+
+	if (origin == SEEK_END)
+		retval = generic_file_llseek(file, offset, origin);
+	else
+		retval = generic_file_llseek_unlocked(file, offset, origin);
+
+	return retval;
+}
+
 const struct file_operations ext3_file_operations = {
-	.llseek		= generic_file_llseek,
+	.llseek		= ext3_file_llseek,
 	.read		= do_sync_read,
 	.write		= do_sync_write,
 	.aio_read	= generic_file_aio_read,
diff -Nrup linux-2.6.29-rc1.org/fs/ext4/file.c linux-2.6.29-rc1/fs/ext4/file.c
--- linux-2.6.29-rc1.org/fs/ext4/file.c	2009-01-13 11:55:09.000000000 +0900
+++ linux-2.6.29-rc1/fs/ext4/file.c	2009-01-13 12:09:59.000000000 +0900
@@ -140,8 +140,20 @@ static int ext4_file_mmap(struct file *f
 	return 0;
 }
 
+static loff_t ext4_file_llseek(struct file *file, loff_t offset, int origin)
+{
+	loff_t retval;
+
+	if (origin == SEEK_END)
+		retval = generic_file_llseek(file, offset, origin);
+	else
+		retval = generic_file_llseek_unlocked(file, offset, origin);
+
+	return retval;
+}
+
 const struct file_operations ext4_file_operations = {
-	.llseek		= generic_file_llseek,
+	.llseek		= ext4_file_llseek,
 	.read		= do_sync_read,
 	.write		= do_sync_write,
 	.aio_read	= generic_file_aio_read,




Following is the test program "lseek_test.c".

#include <stdio.h>
#include <sched.h>
#include <stdlib.h>
#include <string.h>
#include <sys/types.h>
#include <sys/wait.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <unistd.h>
#include <signal.h>
#define NUM 100
#define LEN 4096
#define LOOP 32*1024

int num;

void catch_SIGCHLD(int signo)
{
	pid_t child_pid = 0;
	do {
		int child_ret;
		child_pid = waitpid(-1, &child_ret, WNOHANG);
		if (child_pid > 0)
			num++;
	} while (child_pid > 0);
}
main()
{
        int  i, pid;
	char buf[LEN];
	unsigned long offset, filesize;
	time_t t1, t2;
	struct sigaction act;
	memset(buf, 0, LEN);
	memset(&act, 0, sizeof(act));
	act.sa_handler = catch_SIGCHLD;
	act.sa_flags = SA_NOCLDSTOP|SA_RESTART;
	sigemptyset(&act.sa_mask);
	sigaction(SIGCHLD, &act, NULL);	

	filesize = LEN * LOOP;
	int fd = open("targetfile1",O_RDWR|O_CREAT);	

	/* create a 128MB file */
	for(i = 0; i < LOOP; i++)
		write(fd, buf, LEN);
	fsync(fd);
	close(fd);
	
	time(&t1);	
	for (i = 0; i < NUM; i++) {	
		pid = fork();
		if(pid == 0){
		/* child */
			int fd = open("targetfile1", O_RDWR);	
			int j;
			for (j = 0; j < 10000000; j++) {
				offset = (random() % filesize);
				lseek(fd, offset, SEEK_SET);
			}
			close(fd);
			exit(0);
		}
	}
	while(num < NUM)
		sleep(1);
	time(&t2);	
	printf("%d sec\n",t2-t1);
}


--
To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ