lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <20220802030342.46302-6-jefflexu@linux.alibaba.com>
Date:   Tue,  2 Aug 2022 11:03:38 +0800
From:   Jingbo Xu <jefflexu@...ux.alibaba.com>
To:     dhowells@...hat.com, linux-cachefs@...hat.com
Cc:     linux-kernel@...r.kernel.org, xiang@...nel.org
Subject: [PATCH RFC 5/9] cachefiles: mark content map on write to the backing file

Mark the content map on completion of the write to the backing file.

The expansion of the content map (when the backing file is truncated to
a larger size), and the allocation of the content map (when the backing
file is a newly created tmpfile) is delayed to the point when the
content map needs to be marked. It shall be safe to allocate memory with
GFP_KERNEL inside the iocb.ki_complete() callback, since the callback is
scheduled by workqueue for DIRECT IO.

The content map is sized in granule of block size of backing filesystem,
so that the backing content map file can be easily punched hole if the
content map gets truncated or invalidated. Currently the content map is
sized in PAGE_SIZE unit, which shall be multiples times of the block
size of backing filesystem. Each bit of the content map indicates the
existence of 4KB data of the backing file, thus each (4K sized) chunk of
content map covers 128MB data of the backing file.

When expanding the content map, a new content map needs to be allocated.
A new offset inside the backing content map file also needs to be
allocated, with the old range starting from the old offset getting
punched hole. Currently the new offset is always allocated in an append
style, i.e. the previous hole will not be reused.

Signed-off-by: Jingbo Xu <jefflexu@...ux.alibaba.com>
---
 fs/cachefiles/content-map.c | 129 ++++++++++++++++++++++++++++++++++++
 fs/cachefiles/internal.h    |   2 +
 fs/cachefiles/io.c          |   3 +
 3 files changed, 134 insertions(+)

diff --git a/fs/cachefiles/content-map.c b/fs/cachefiles/content-map.c
index 3432efdecbcf..877ff79e181b 100644
--- a/fs/cachefiles/content-map.c
+++ b/fs/cachefiles/content-map.c
@@ -1,8 +1,24 @@
 #include <linux/fs.h>
 #include <linux/namei.h>
 #include <linux/uio.h>
+#include <linux/falloc.h>
 #include "internal.h"
 
+/*
+ * Return the size of the content map in bytes.
+ *
+ * There's one bit per granule (CACHEFILES_GRAN_SIZE, i.e. 4K). We size it in
+ * terms of block size chunks (e.g. 4K), so that the map file can be punched
+ * hole when the content map is truncated or invalidated. In this case, each 4K
+ * chunk spans (4096 * BITS_PER_BYTE * CACHEFILES_GRAN_SIZE, i.e. 128M) of file
+ * space.
+ */
+static size_t cachefiles_map_size(loff_t i_size)
+{
+	i_size = round_up(i_size, PAGE_SIZE * BITS_PER_BYTE * CACHEFILES_GRAN_SIZE);
+	return i_size / BITS_PER_BYTE / CACHEFILES_GRAN_SIZE;
+}
+
 /*
  * Zero the unused tail.
  *
@@ -91,3 +107,116 @@ void cachefiles_save_content_map(struct cachefiles_object *object)
 	if (ret != object->content_map_size)
 		object->content_info = CACHEFILES_CONTENT_NO_DATA;
 }
+
+static loff_t cachefiles_expand_map_off(struct file *file, loff_t old_off,
+					size_t old_size, size_t new_size)
+{
+	struct inode *inode = file_inode(file);
+	loff_t new_off;
+	bool punch = false;
+
+	inode_lock(inode);
+	new_off = i_size_read(inode);
+	/*
+	 * Simply expand the old content map range if possible; or discard the
+	 * old content map range and create a new one.
+	 */
+	if (new_off == old_off + old_size) {
+		i_size_write(inode, old_off + new_size);
+		new_off = old_off;
+	} else {
+		i_size_write(inode, new_off + new_size);
+		punch = true;
+	}
+	inode_unlock(inode);
+
+	if (punch)
+		vfs_fallocate(file, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE,
+			      old_off, old_size);
+
+	return new_off;
+}
+
+/*
+ * Expand the content map to a larger file size.
+ */
+static void cachefiles_expand_content_map(struct cachefiles_object *object)
+{
+	struct file *file = object->volume->content_map[(u8)object->cookie->key_hash];
+	size_t size, zap_size;
+	void *map, *zap;
+	loff_t off;
+
+	size = cachefiles_map_size(object->cookie->object_size);
+	if (size <= object->content_map_size)
+		return;
+
+	map = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, get_order(size));
+	if (!map)
+		return;
+
+	write_lock_bh(&object->content_map_lock);
+	if (size > object->content_map_size) {
+		zap = object->content_map;
+		zap_size = object->content_map_size;
+		memcpy(map, zap, zap_size);
+		object->content_map = map;
+		object->content_map_size = size;
+
+		/* expand the content map file */
+		off = object->content_map_off;
+		if (off != CACHEFILES_CONTENT_MAP_OFF_INVAL)
+			object->content_map_off = cachefiles_expand_map_off(file,
+				off, zap_size, size);
+	} else {
+		zap = map;
+		zap_size = size;
+	}
+	write_unlock_bh(&object->content_map_lock);
+
+	free_pages((unsigned long)zap, get_order(zap_size));
+}
+
+void cachefiles_mark_content_map(struct cachefiles_object *object,
+				 loff_t start, loff_t len)
+{
+	pgoff_t granule;
+	loff_t end = start + len;
+
+	if (object->cookie->advice & FSCACHE_ADV_SINGLE_CHUNK) {
+		if (start == 0) {
+			object->content_info = CACHEFILES_CONTENT_SINGLE;
+			set_bit(FSCACHE_COOKIE_NEEDS_UPDATE, &object->cookie->flags);
+		}
+		return;
+	}
+
+	if (object->content_info == CACHEFILES_CONTENT_NO_DATA)
+		object->content_info = CACHEFILES_CONTENT_MAP;
+
+	/* TODO: set CACHEFILES_CONTENT_BACKFS_MAP accordingly */
+
+	if (object->content_info != CACHEFILES_CONTENT_MAP)
+		return;
+
+	read_lock_bh(&object->content_map_lock);
+	start = round_down(start, CACHEFILES_GRAN_SIZE);
+	do {
+		granule = start / CACHEFILES_GRAN_SIZE;
+		if (granule / BITS_PER_BYTE >= object->content_map_size) {
+			read_unlock_bh(&object->content_map_lock);
+			cachefiles_expand_content_map(object);
+			read_lock_bh(&object->content_map_lock);
+		}
+
+		if (WARN_ON(granule / BITS_PER_BYTE >= object->content_map_size))
+			break;
+
+		set_bit(granule, object->content_map);
+		start += CACHEFILES_GRAN_SIZE;
+	} while (start < end);
+
+	set_bit(FSCACHE_COOKIE_NEEDS_UPDATE, &object->cookie->flags);
+	read_unlock_bh(&object->content_map_lock);
+}
+
diff --git a/fs/cachefiles/internal.h b/fs/cachefiles/internal.h
index 1335ea5f4a5e..c252746c8f9b 100644
--- a/fs/cachefiles/internal.h
+++ b/fs/cachefiles/internal.h
@@ -181,6 +181,8 @@ extern int cachefiles_has_space(struct cachefiles_cache *cache,
  */
 extern int cachefiles_load_content_map(struct cachefiles_object *object);
 extern void cachefiles_save_content_map(struct cachefiles_object *object);
+extern void cachefiles_mark_content_map(struct cachefiles_object *object,
+					loff_t start, loff_t len);
 
 /*
  * daemon.c
diff --git a/fs/cachefiles/io.c b/fs/cachefiles/io.c
index b513d9bf81f1..27171fac649e 100644
--- a/fs/cachefiles/io.c
+++ b/fs/cachefiles/io.c
@@ -264,6 +264,9 @@ static void cachefiles_write_complete(struct kiocb *iocb, long ret)
 	__sb_writers_acquired(inode->i_sb, SB_FREEZE_WRITE);
 	__sb_end_write(inode->i_sb, SB_FREEZE_WRITE);
 
+	if (ret == ki->len)
+		cachefiles_mark_content_map(ki->object, ki->start, ki->len);
+
 	if (ret < 0)
 		trace_cachefiles_io_error(object, inode, ret,
 					  cachefiles_trace_write_error);
-- 
2.27.0

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ