lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite for Android: free password hash cracker in your pocket
[<prev] [next>] [thread-next>] [day] [month] [year] [list]
Date:	Thu, 11 Aug 2011 16:48:49 +0200
From:	Lukas Czerner <lczerner@...hat.com>
To:	linux-ext4@...r.kernel.org
Cc:	tytso@....edu, Lukas Czerner <lczerner@...hat.com>
Subject: [PATCH] e2fsprogs: Use punch hole as "discard" on regular files

If e2fsprogs tools (mke2fs, e2fsck) is run on regular file instead of
on block device, we can use punch hole instead of regular discard
command which would not work on regular file anyway. This gives us
several advantages. First of all when e2fsck is run with '-E discard'
parameter it will punch out all ununsed space from the image, hence
trimming down the file system image. And secondly, when creating an
file system on regular file (with '-E discard' which is default), we
can use punch hole to clear the file content, hence we can skip inode
table initialization, because reads from sparse area returns zeros. This
will result in faster file system creation (without the need to specify
lazy_itable_init) and smaller images.

This commit also fixes some tests that would wail due to mke2fs showing
discard progress, hence the output would differ.

Signed-off-by: Lukas Czerner <lczerner@...hat.com>
---
 lib/ext2fs/ext2_io.h            |    1 +
 lib/ext2fs/unix_io.c            |   46 ++++++++++++++++++++++++++++++++++----
 misc/mke2fs.c                   |   10 ++++++-
 tests/f_resize_inode/expect     |    1 +
 tests/m_dasd_bs/expect.1        |    1 +
 tests/m_extent_journal/expect.1 |    1 +
 tests/m_large_file/expect.1     |    1 +
 tests/m_meta_bg/expect.1        |    1 +
 tests/m_no_opt/expect.1         |    1 +
 tests/m_raid_opt/expect.1       |    1 +
 tests/m_std/expect.1            |    1 +
 tests/m_uninit/expect.1         |    1 +
 12 files changed, 59 insertions(+), 7 deletions(-)

diff --git a/lib/ext2fs/ext2_io.h b/lib/ext2fs/ext2_io.h
index e71ada9..bcc2f87 100644
--- a/lib/ext2fs/ext2_io.h
+++ b/lib/ext2fs/ext2_io.h
@@ -30,6 +30,7 @@ typedef struct struct_io_stats *io_stats;
 
 #define CHANNEL_FLAGS_WRITETHROUGH	0x01
 #define CHANNEL_FLAGS_DISCARD_ZEROES	0x02
+#define CHANNEL_FLAGS_BLOCK_DEVICE	0x04
 
 #define io_channel_discard_zeroes_data(i) (i->flags & CHANNEL_FLAGS_DISCARD_ZEROES)
 
diff --git a/lib/ext2fs/unix_io.c b/lib/ext2fs/unix_io.c
index c1d0561..eb5df55 100644
--- a/lib/ext2fs/unix_io.c
+++ b/lib/ext2fs/unix_io.c
@@ -441,7 +441,11 @@ static errcode_t unix_open(const char *name, int flags, io_channel *channel)
 	struct unix_private_data *data = NULL;
 	errcode_t	retval;
 	int		open_flags, zeroes = 0;
+#ifdef HAVE_OPEN64
+	struct stat64	st;
+#else
 	struct stat	st;
+#endif
 #ifdef __linux__
 	struct 		utsname ut;
 #endif
@@ -482,11 +486,26 @@ static errcode_t unix_open(const char *name, int flags, io_channel *channel)
 #endif
 	data->flags = flags;
 
+	st.st_mode = 0;
 #ifdef HAVE_OPEN64
 	data->dev = open64(io->name, open_flags);
+	stat64(io->name, &st);
 #else
 	data->dev = open(io->name, open_flags);
+	stat(io->name, &st);
 #endif
+	/*
+	 * If the device is really a block device, then set the
+	 * appropriate flag, otherwise we can set DISCARD_ZEROES flag
+	 * because we are going to use punch hole instead of discard
+	 * and if it succeed, subsequent read from sparse area returns
+	 * zero.
+	 */
+	if (S_ISBLK(st.st_mode))
+		io->flags |= CHANNEL_FLAGS_BLOCK_DEVICE;
+	else
+		io->flags |= CHANNEL_FLAGS_DISCARD_ZEROES;
+
 	if (data->dev < 0) {
 		retval = errno;
 		goto cleanup;
@@ -552,7 +571,11 @@ static errcode_t unix_open(const char *name, int flags, io_channel *channel)
 	     (ut.release[2] == '4') && (ut.release[3] == '.') &&
 	     (ut.release[4] == '1') && (ut.release[5] >= '0') &&
 	     (ut.release[5] < '8')) &&
-	    (fstat(data->dev, &st) == 0) &&
+#ifdef HAVE_OPEN64
+	    (stat64(io->name, &st) == 0) &&
+#else
+	    (stat(io->name, &st) == 0) &&
+#endif
 	    (S_ISBLK(st.st_mode))) {
 		struct rlimit	rlim;
 
@@ -857,7 +880,9 @@ static errcode_t unix_set_option(io_channel channel, const char *option,
 }
 
 #if defined(__linux__) && !defined(BLKDISCARD)
-#define BLKDISCARD	_IO(0x12,119)
+#define BLKDISCARD		_IO(0x12,119)
+#define FALLOC_FL_KEEP_SIZE	0x01
+#define FALLOC_FL_PUNCH_HOLE	0x02
 #endif
 
 static errcode_t unix_discard(io_channel channel, unsigned long long block,
@@ -872,10 +897,21 @@ static errcode_t unix_discard(io_channel channel, unsigned long long block,
 	data = (struct unix_private_data *) channel->private_data;
 	EXT2_CHECK_MAGIC(data, EXT2_ET_MAGIC_UNIX_IO_CHANNEL);
 
-	range[0] = (__uint64_t)(block) * channel->block_size;
-	range[1] = (__uint64_t)(count) * channel->block_size;
+	if (channel->flags & CHANNEL_FLAGS_BLOCK_DEVICE) {
+		range[0] = (__uint64_t)(block) * channel->block_size;
+		range[1] = (__uint64_t)(count) * channel->block_size;
 
-	ret = ioctl(data->dev, BLKDISCARD, &range);
+		ret = ioctl(data->dev, BLKDISCARD, &range);
+	} else {
+		/*
+		 * If we are not on block device, try to use punch hole
+		 * to reclaim free space.
+		 */
+		ret = fallocate(data->dev,
+				FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE,
+				(off_t)(block) * channel->block_size,
+				(off_t)(count) * channel->block_size);
+		}
 	if (ret < 0)
 		return errno;
 	return 0;
diff --git a/misc/mke2fs.c b/misc/mke2fs.c
index e062bda..9685e2d 100644
--- a/misc/mke2fs.c
+++ b/misc/mke2fs.c
@@ -2073,12 +2073,18 @@ static int mke2fs_discard_device(ext2_filsys fs)
 	struct ext2fs_numeric_progress_struct progress;
 	blk64_t blocks = ext2fs_blocks_count(fs->super);
 	blk64_t count = DISCARD_STEP_MB;
-	blk64_t cur = 0;
+	blk64_t cur;
 	int retval = 0;
 
-	retval = io_channel_discard(fs->io, 0, 0);
+	/*
+	 * Let's try if discard really works on the device, so
+	 * we do not print numeric progress resulting in failure
+	 * afterwards.
+	 */
+	retval = io_channel_discard(fs->io, 0, fs->blocksize);
 	if (retval)
 		return retval;
+	cur = fs->blocksize;
 
 	count *= (1024 * 1024);
 	count /= fs->blocksize;
diff --git a/tests/f_resize_inode/expect b/tests/f_resize_inode/expect
index a396927..cfc699c 100644
--- a/tests/f_resize_inode/expect
+++ b/tests/f_resize_inode/expect
@@ -1,4 +1,5 @@
 mke2fs -F -O resize_inode -o Linux -b 1024 -g 1024 test.img 16384
+Discarding device blocks:  1024/16384...........           ...........done                            
 Filesystem label=
 OS type: Linux
 Block size=1024 (log=0)
diff --git a/tests/m_dasd_bs/expect.1 b/tests/m_dasd_bs/expect.1
index 31db54b..310fc95 100644
--- a/tests/m_dasd_bs/expect.1
+++ b/tests/m_dasd_bs/expect.1
@@ -1,3 +1,4 @@
+Discarding device blocks:  2048/32768...........           ...........done                            
 Filesystem label=
 OS type: Linux
 Block size=2048 (log=1)
diff --git a/tests/m_extent_journal/expect.1 b/tests/m_extent_journal/expect.1
index 88ea2d9..fb07588 100644
--- a/tests/m_extent_journal/expect.1
+++ b/tests/m_extent_journal/expect.1
@@ -1,3 +1,4 @@
+Discarding device blocks:  1024/65536...........           ...........done                            
 Filesystem label=
 OS type: Linux
 Block size=1024 (log=0)
diff --git a/tests/m_large_file/expect.1 b/tests/m_large_file/expect.1
index 2b40c84..3ebd03c 100644
--- a/tests/m_large_file/expect.1
+++ b/tests/m_large_file/expect.1
@@ -1,3 +1,4 @@
+Discarding device blocks:  4096/16384...........           ...........done                            
 Filesystem label=
 OS type: Linux
 Block size=4096 (log=2)
diff --git a/tests/m_meta_bg/expect.1 b/tests/m_meta_bg/expect.1
index f1c9cef..f947da9 100644
--- a/tests/m_meta_bg/expect.1
+++ b/tests/m_meta_bg/expect.1
@@ -1,3 +1,4 @@
+Discarding device blocks:   1024/131072.............             .............done                            
 Filesystem label=
 OS type: Linux
 Block size=1024 (log=0)
diff --git a/tests/m_no_opt/expect.1 b/tests/m_no_opt/expect.1
index 7f3e5aa..81764d3 100644
--- a/tests/m_no_opt/expect.1
+++ b/tests/m_no_opt/expect.1
@@ -1,3 +1,4 @@
+Discarding device blocks:  1024/65536...........           ...........done                            
 Filesystem label=
 OS type: Linux
 Block size=1024 (log=0)
diff --git a/tests/m_raid_opt/expect.1 b/tests/m_raid_opt/expect.1
index 0c6acc1..c8667e8 100644
--- a/tests/m_raid_opt/expect.1
+++ b/tests/m_raid_opt/expect.1
@@ -1,3 +1,4 @@
+Discarding device blocks:   1024/131072.............             .............done                            
 Filesystem label=
 OS type: Linux
 Block size=1024 (log=0)
diff --git a/tests/m_std/expect.1 b/tests/m_std/expect.1
index d0bf27c..a2b6c3f 100644
--- a/tests/m_std/expect.1
+++ b/tests/m_std/expect.1
@@ -1,3 +1,4 @@
+Discarding device blocks:  1024/65536...........           ...........done                            
 Filesystem label=
 OS type: Linux
 Block size=1024 (log=0)
diff --git a/tests/m_uninit/expect.1 b/tests/m_uninit/expect.1
index 173c072..72c652c 100644
--- a/tests/m_uninit/expect.1
+++ b/tests/m_uninit/expect.1
@@ -1,3 +1,4 @@
+Discarding device blocks:   1024/131072.............             .............done                            
 Filesystem label=
 OS type: Linux
 Block size=1024 (log=0)
-- 
1.7.4.4

--
To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ