lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [thread-next>] [day] [month] [year] [list]
Date:	Tue,  6 Mar 2012 22:51:19 +0200
From:	Pekka Enberg <penberg@...nel.org>
To:	linux-kernel@...r.kernel.org
Cc:	leonid.moiseichuk@...ia.com, Pekka Enberg <penberg@...nel.org>,
	David Rientjes <rientjes@...gle.com>,
	Anton Vorontsov <anton.vorontsov@...aro.org>
Subject: [PATCH] vmevent: Use 'struct vmevent_attr' for vmevent_fd() ABI

This patch introduces 'struct vmevent_attr' and converts the vmevent_fd() ABI
to use it which makes the ABI much more flexible.

Originally-by: Leonid Moiseichuk <leonid.moiseichuk@...ia.com>
Cc: David Rientjes <rientjes@...gle.com>
Cc: Anton Vorontsov <anton.vorontsov@...aro.org>
Signed-off-by: Pekka Enberg <penberg@...nel.org>
---
 include/linux/vmevent.h              |   66 +++++++++----
 mm/vmevent.c                         |  186 +++++++++++++++++++++++-----------
 tools/testing/vmevent/vmevent-test.c |   54 +++++++---
 3 files changed, 212 insertions(+), 94 deletions(-)

diff --git a/include/linux/vmevent.h b/include/linux/vmevent.h
index 4f577ee..64357e4 100644
--- a/include/linux/vmevent.h
+++ b/include/linux/vmevent.h
@@ -3,53 +3,83 @@
 
 #include <linux/types.h>
 
+/*
+ * Types of memory attributes which could be monitored through vmevent API
+ */
 enum {
-	VMEVENT_TYPE_FREE_THRESHOLD	= 1ULL << 0,
-	VMEVENT_TYPE_SAMPLE		= 1ULL << 1,
+	VMEVENT_ATTR_NR_AVAIL_PAGES	= 1UL,
+	VMEVENT_ATTR_NR_FREE_PAGES	= 2UL,
+	VMEVENT_ATTR_NR_SWAP_PAGES	= 3UL,
+
+	VMEVENT_ATTR_MAX		/* non-ABI */
 };
 
+/*
+ * Attribute state bits for threshold
+ */
 enum {
-	VMEVENT_EATTR_NR_AVAIL_PAGES	= 1ULL << 0,
-	VMEVENT_EATTR_NR_FREE_PAGES	= 1ULL << 1,
-	VMEVENT_EATTR_NR_SWAP_PAGES	= 1ULL << 2,
+	/*
+	 * Sample value is less than user-specified value
+	 */
+	VMEVENT_ATTR_STATE_VALUE_LT	= (1UL << 0),
 };
 
-struct vmevent_config {
+struct vmevent_attr {
 	/*
-	 * Size of the struct for ABI extensibility.
+	 * Value in pages delivered with pointed attribute
 	 */
-	__u32			size;
+	__u64			value;
 
 	/*
-	 * Notification type bitmask
+	 * Type of profiled attribute from VMEVENT_ATTR_XXX
 	 */
-	__u64			type;
+	__u32			type;
+
+        /*
+	 * Bitmask of current attribute value (see VMEVENT_ATTR_STATE_XXX)
+	*/
+	__u32			state;
+};
 
+#define VMEVENT_CONFIG_MAX_ATTRS	32
+
+/*
+ * Configuration structure to get notifications and attributes values
+ */
+struct vmevent_config {
 	/*
-	 * Attributes that are delivered as part of events.
+	 * Size of the struct for ABI extensibility.
 	 */
-	__u64			event_attrs;
+	__u32			size;
 
 	/*
-	 * Threshold of free pages in the system.
+	 * Counter of number monitored attributes
 	 */
-	__u32			free_pages_threshold;
+	__u32			counter;
 
 	/*
 	 * Sample period in nanoseconds
 	 */
 	__u64			sample_period_ns;
+
+	/*
+	 * Attributes that are monitored and delivered as part of events
+	 */
+	struct vmevent_attr	attrs[VMEVENT_CONFIG_MAX_ATTRS];
 };
 
 struct vmevent_event {
 	/*
-	 * Size of the struct for ABI extensibility.
+	 * Counter of attributes in this VM event
 	 */
-	__u32			size;
+	__u32			counter;
 
-	__u64			attrs;
+	__u32			padding;
 
-	__u64			attr_values[];
+	/*
+	 * Attributes for this VM event
+	 */
+	struct vmevent_attr	attrs[];
 };
 
 #endif /* _LINUX_VMEVENT_H */
diff --git a/mm/vmevent.c b/mm/vmevent.c
index 37d2c5f..ab6a043 100644
--- a/mm/vmevent.c
+++ b/mm/vmevent.c
@@ -24,10 +24,10 @@ struct vmevent_watch {
 	bool				pending;
 
 	/*
- 	 * Attributes
- 	 */
+	 * Attributes that are exported as part of delivered VM events.
+	 */
 	unsigned long			nr_attrs;
-	u64				attr_values[64];
+	struct vmevent_attr		*sample_attrs;
 
 	/* sampling */
 	struct hrtimer			timer;
@@ -36,54 +36,87 @@ struct vmevent_watch {
 	wait_queue_head_t		waitq;
 };
 
-static bool vmevent_match(struct vmevent_watch *watch,
-			   struct vmevent_watch_event *event)
+typedef u64 (*vmevent_attr_sample_fn)(struct vmevent_watch *watch);
+
+static u64 vmevent_attr_swap_pages(struct vmevent_watch *watch)
 {
-	if (watch->config.type & VMEVENT_TYPE_FREE_THRESHOLD) {
-		if (event->nr_free_pages > watch->config.free_pages_threshold)
-			return false;
-	}
+#ifdef CONFIG_SWAP
+	struct sysinfo si;
+
+	si_swapinfo(&si);
 
-	return true;
+	return si.totalswap;
+#else
+	return 0;
+#endif
 }
 
-static void vmevent_sample(struct vmevent_watch *watch)
+static u64 vmevent_attr_free_pages(struct vmevent_watch *watch)
+{
+	return global_page_state(NR_FREE_PAGES);
+}
+
+static u64 vmevent_attr_avail_pages(struct vmevent_watch *watch)
 {
-	struct vmevent_watch_event event;
 	struct sysinfo si;
-	int n = 0;
 
-	memset(&event, 0, sizeof(event));
+	si_meminfo(&si);
 
-	event.nr_free_pages	= global_page_state(NR_FREE_PAGES);
+	return si.totalram;
+}
 
-	si_meminfo(&si);
-	event.nr_avail_pages	= si.totalram;
+static vmevent_attr_sample_fn attr_samplers[] = {
+	[VMEVENT_ATTR_NR_AVAIL_PAGES]   = vmevent_attr_avail_pages,
+	[VMEVENT_ATTR_NR_FREE_PAGES]    = vmevent_attr_free_pages,
+	[VMEVENT_ATTR_NR_SWAP_PAGES]    = vmevent_attr_swap_pages,
+};
 
-#ifdef CONFIG_SWAP
-	if (watch->config.event_attrs & VMEVENT_EATTR_NR_SWAP_PAGES) {
-		si_swapinfo(&si);
-		event.nr_swap_pages	= si.totalswap;
+static u64 vmevent_sample_attr(struct vmevent_watch *watch, struct vmevent_attr *attr)
+{
+	vmevent_attr_sample_fn fn = attr_samplers[attr->type];
+
+	return fn(watch);
+}
+
+static bool vmevent_match(struct vmevent_watch *watch)
+{
+	struct vmevent_config *config = &watch->config;
+	int i;
+
+	for (i = 0; i < config->counter; i++) {
+		struct vmevent_attr *attr = &config->attrs[i];
+		u64 value;
+
+		if (!attr->state)
+			continue;
+
+		value = vmevent_sample_attr(watch, attr);
+
+		if (attr->state & VMEVENT_ATTR_STATE_VALUE_LT) {
+			if (value < attr->value)
+				return true;
+		}
 	}
-#endif
 
-	if (!vmevent_match(watch, &event))
+	return false;
+}
+
+static void vmevent_sample(struct vmevent_watch *watch)
+{
+	int i;
+
+	if (!vmevent_match(watch))
 		return;
 
 	mutex_lock(&watch->mutex);
 
 	watch->pending = true;
 
-	if (watch->config.event_attrs & VMEVENT_EATTR_NR_AVAIL_PAGES)
-		watch->attr_values[n++] = event.nr_avail_pages;
-
-	if (watch->config.event_attrs & VMEVENT_EATTR_NR_FREE_PAGES)
-		watch->attr_values[n++] = event.nr_free_pages;
-
-	if (watch->config.event_attrs & VMEVENT_EATTR_NR_SWAP_PAGES)
-		watch->attr_values[n++] = event.nr_swap_pages;
+	for (i = 0; i < watch->nr_attrs; i++) {
+		struct vmevent_attr *attr = &watch->sample_attrs[i];
 
-	watch->nr_attrs = n;
+		attr->value = vmevent_sample_attr(watch, attr);
+	}
 
 	mutex_unlock(&watch->mutex);
 }
@@ -132,43 +165,45 @@ static unsigned int vmevent_poll(struct file *file, poll_table *wait)
 static ssize_t vmevent_read(struct file *file, char __user *buf, size_t count, loff_t *ppos)
 {
 	struct vmevent_watch *watch = file->private_data;
-	struct vmevent_event event;
+	struct vmevent_event *event;
 	ssize_t ret = 0;
-	u64 attr_size;
-
-	mutex_lock(&watch->mutex);
+	u32 size;
+	int i;
 
-	if (!watch->pending)
-		goto out_unlock;
+	size = sizeof(*event) + watch->nr_attrs * sizeof(struct vmevent_attr);
 
-	attr_size = watch->nr_attrs * sizeof(u64);
+	if (count < size)
+		return -EINVAL;
 
-	memset(&event, 0, sizeof(event));
-	event.size	= sizeof(struct vmevent_event) + attr_size;
-	event.attrs	= watch->config.event_attrs;
+	mutex_lock(&watch->mutex);
 
-	if (count < sizeof(event))
+	if (!watch->pending)
 		goto out_unlock;
 
-	if (copy_to_user(buf, &event, sizeof(event))) {
-		ret = -EFAULT;
+	event = kmalloc(size, GFP_KERNEL);
+	if (!event) {
+		ret = -ENOMEM;
 		goto out_unlock;
 	}
 
-	count -= sizeof(event);
+	for (i = 0; i < watch->nr_attrs; i++) {
+		memcpy(&event->attrs[i], &watch->sample_attrs[i], sizeof(struct vmevent_attr));
+	}
 
-	if (count > attr_size)
-		count = attr_size;
+	event->counter = watch->nr_attrs;
 
-	if (copy_to_user(buf + sizeof(event), watch->attr_values, count)) {
+	if (copy_to_user(buf, event, size)) {
 		ret = -EFAULT;
-		goto out_unlock;
+		goto out_free;
 	}
 
 	ret = count;
 
 	watch->pending = false;
 
+out_free:
+	kfree(event);
+
 out_unlock:
 	mutex_unlock(&watch->mutex);
 
@@ -207,6 +242,42 @@ static struct vmevent_watch *vmevent_watch_alloc(void)
 	return watch;
 }
 
+static int vmevent_setup_watch(struct vmevent_watch *watch)
+{
+	struct vmevent_config *config = &watch->config;
+	struct vmevent_attr *attrs = NULL;
+	unsigned long nr;
+	int i;
+
+	nr = 0;
+
+	for (i = 0; i < config->counter; i++) {
+		struct vmevent_attr *attr = &config->attrs[i];
+		size_t size;
+		void *new;
+
+		if (attr->type >= VMEVENT_ATTR_MAX)
+			continue;
+
+		size = sizeof(struct vmevent_attr) * (nr + 1);
+
+		new = krealloc(attrs, size, GFP_KERNEL);
+		if (!new) {
+			kfree(attrs);
+			return -ENOMEM;
+		}
+
+		attrs = new;
+
+		attrs[nr++].type = attr->type;
+	}
+
+	watch->sample_attrs	= attrs;
+	watch->nr_attrs		= nr;
+
+	return 0;
+}
+
 static int vmevent_copy_config(struct vmevent_config __user *uconfig,
 				struct vmevent_config *config)
 {
@@ -216,14 +287,6 @@ static int vmevent_copy_config(struct vmevent_config __user *uconfig,
 	if (ret)
 		return -EFAULT;
 
-	if (!config->type)
-		return -EINVAL;
-
-	if (config->type & VMEVENT_TYPE_SAMPLE) {
-		if (config->sample_period_ns < NSEC_PER_MSEC)
-			return -EINVAL;
-	}
-
 	return 0;
 }
 
@@ -243,6 +306,10 @@ SYSCALL_DEFINE1(vmevent_fd,
 	if (err)
 		goto err_free;
 
+	err = vmevent_setup_watch(watch);
+	if (err)
+		goto err_free;
+
 	fd = get_unused_fd_flags(O_RDONLY);
 	if (fd < 0) {
 		err = fd;
@@ -257,8 +324,7 @@ SYSCALL_DEFINE1(vmevent_fd,
 
 	fd_install(fd, file);
 
-	if (watch->config.type & VMEVENT_TYPE_SAMPLE)
-		vmevent_start_timer(watch);
+	vmevent_start_timer(watch);
 
 	return fd;
 
diff --git a/tools/testing/vmevent/vmevent-test.c b/tools/testing/vmevent/vmevent-test.c
index f268034..534f827 100644
--- a/tools/testing/vmevent/vmevent-test.c
+++ b/tools/testing/vmevent/vmevent-test.c
@@ -32,12 +32,24 @@ int main(int argc, char *argv[])
 	printf("Physical pages: %ld\n", phys_pages);
 
 	config = (struct vmevent_config) {
-		.type			= VMEVENT_TYPE_SAMPLE | VMEVENT_TYPE_FREE_THRESHOLD,
-		.event_attrs		= VMEVENT_EATTR_NR_AVAIL_PAGES
-					| VMEVENT_EATTR_NR_FREE_PAGES
-					| VMEVENT_EATTR_NR_SWAP_PAGES,
 		.sample_period_ns	= 1000000000L,
-		.free_pages_threshold	= phys_pages,
+		.counter		= 4,
+		.attrs			= {
+			[0]			= {
+				.type	= VMEVENT_ATTR_NR_FREE_PAGES,
+				.state	= VMEVENT_ATTR_STATE_VALUE_LT,
+				.value	= phys_pages,
+			},
+			[1]			= {
+				.type	= VMEVENT_ATTR_NR_AVAIL_PAGES,
+			},
+			[2]			= {
+				.type	= VMEVENT_ATTR_NR_SWAP_PAGES,
+			},
+			[3]			= {
+				.type	= 0xffff, /* invalid */
+			},
+		},
 	};
 
 	fd = sys_vmevent_fd(&config);
@@ -47,9 +59,10 @@ int main(int argc, char *argv[])
 	}
 
 	for (i = 0; i < 10; i++) {
-		char buffer[sizeof(struct vmevent_event) + 3 * sizeof(uint64_t)];
+		char buffer[sizeof(struct vmevent_event) + 4 * sizeof(struct vmevent_attr)];
 		struct vmevent_event *event;
 		int n = 0;
+		int idx;
 
 		pollfd.fd		= fd;
 		pollfd.events		= POLLIN;
@@ -68,16 +81,25 @@ int main(int argc, char *argv[])
 
 		event = (void *) buffer;
 
-		printf("VM event (%Lu bytes):\n", event->size);
-
-		if (event->attrs & VMEVENT_EATTR_NR_AVAIL_PAGES)
-			printf("  VMEVENT_EATTR_NR_AVAIL_PAGES: %Lu\n", event->attr_values[n++]);
-
-		if (event->attrs & VMEVENT_EATTR_NR_FREE_PAGES)
-			printf("  VMEVENT_EATTR_NR_FREE_PAGES : %Lu\n", event->attr_values[n++]);
-
-		if (event->attrs & VMEVENT_EATTR_NR_SWAP_PAGES)
-			printf("  VMEVENT_EATTR_NR_SWAP_PAGES : %Lu\n", event->attr_values[n++]);
+		printf("VM event (%u attributes):\n", event->counter);
+
+		for (idx = 0; idx < event->counter; idx++) {
+			struct vmevent_attr *attr = &event->attrs[idx];
+
+			switch (attr->type) {
+			case VMEVENT_ATTR_NR_AVAIL_PAGES:
+				printf("  VMEVENT_ATTR_NR_AVAIL_PAGES: %Lu\n", attr->value);
+				break;
+			case VMEVENT_ATTR_NR_FREE_PAGES:
+				printf("  VMEVENT_ATTR_NR_FREE_PAGES: %Lu\n", attr->value);
+				break;
+			case VMEVENT_ATTR_NR_SWAP_PAGES:
+				printf("  VMEVENT_ATTR_NR_SWAP_PAGES: %Lu\n", attr->value);
+				break;
+			default:
+				printf("  Unknown attribute: %Lu\n", attr->value);
+			}
+		}
 	}
 	if (close(fd) < 0) {
 		perror("close failed");
-- 
1.7.6.5

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ