lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <1440081408-12302-10-git-send-email-willemb@google.com>
Date:	Thu, 20 Aug 2015 10:36:48 -0400
From:	Willem de Bruijn <willemb@...gle.com>
To:	netdev@...r.kernel.org
Cc:	mst@...hat.com, jasowang@...hat.com,
	Willem de Bruijn <willemb@...gle.com>
Subject: [PATCH net-next RFC 09/10] sock: sendmsg zerocopy ulimit

From: Willem de Bruijn <willemb@...gle.com>

Bound the number of pages that a userspace process may pin.

Account pinned pages to the locked page count (`ulimit -l`) of the
caller and fail beyond the administrator controlled threshold, similar
to infiniband.

Use an atomic variable to avoid having to take mmap_sem. Taking the
lock is expensive and requires scheduling a worker on destruction,
as taking the lock may sleep, but ubuf_info are often destroyed in
atomic context.

The current mm_struct.pinned_vm_ is a hack. A non-RFC patchset would
convert unsigned long pinned_vm_ and all its callers (infiniband) to
atomic_long_t.

Signed-off-by: Willem de Bruijn <willemb@...gle.com>
---
 include/linux/mm_types.h |  1 +
 include/linux/skbuff.h   |  5 +++++
 net/core/skbuff.c        | 46 ++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 52 insertions(+)

diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 0038ac7..dc6e12a 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -402,6 +402,7 @@ struct mm_struct {
 	unsigned long total_vm;		/* Total pages mapped */
 	unsigned long locked_vm;	/* Pages that have PG_mlocked set */
 	unsigned long pinned_vm;	/* Refcount permanently increased */
+	atomic_t pinned_vm_;
 	unsigned long shared_vm;	/* Shared pages (files) */
 	unsigned long exec_vm;		/* VM_EXEC & ~VM_WRITE */
 	unsigned long stack_vm;		/* VM_GROWSUP/DOWN */
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index c1ea855..95a9f75 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -331,6 +331,11 @@ struct ubuf_info {
 		};
 	};
 	atomic_t refcnt;
+
+	struct mmpin {
+		struct mm_struct *mm;
+		int num_pg;
+	} mmp;
 };
 
 #define skb_uarg(SKB)	((struct ubuf_info *)(skb_shinfo(SKB)->destructor_arg))
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 4ae60ee..3742968 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -840,6 +840,42 @@ struct sk_buff *skb_morph(struct sk_buff *dst, struct sk_buff *src)
 }
 EXPORT_SYMBOL_GPL(skb_morph);
 
+static int mm_account_pinned_pages(struct mmpin *mmp, size_t size)
+{
+	unsigned long max_pg, num_pg, new_pg, old_pg;
+	struct mm_struct *mm;
+
+	if (capable(CAP_IPC_LOCK) || !size)
+		return 0;
+
+	num_pg = (size >> PAGE_SHIFT) + 2;	/* worst case */
+	max_pg = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
+	mm = mmp->mm ? : current->mm;
+
+	do {
+		old_pg = atomic_read(&mm->pinned_vm_);
+		new_pg = old_pg + num_pg;
+		if (new_pg > max_pg)
+			return -ENOMEM;
+	} while (atomic_cmpxchg(&mm->pinned_vm_, old_pg, new_pg) != old_pg);
+
+	if (!mmp->mm) {
+		mmp->mm = mm;
+		atomic_inc(&mm->mm_count);
+	}
+
+	mmp->num_pg += num_pg;
+	return 0;
+}
+
+static void mm_unaccount_pinned_pages(struct mmpin *mmp)
+{
+	if (mmp->mm) {
+		atomic_sub(mmp->num_pg, &mmp->mm->pinned_vm_);
+		mmdrop(mmp->mm);
+	}
+}
+
 /* must only be called from process context */
 struct ubuf_info *sock_zerocopy_alloc(struct sock *sk, size_t size)
 {
@@ -852,6 +888,12 @@ struct ubuf_info *sock_zerocopy_alloc(struct sock *sk, size_t size)
 
 	BUILD_BUG_ON(sizeof(*uarg) > sizeof(skb->cb));
 	uarg = (void *)skb->cb;
+	uarg->mmp.mm = NULL;
+
+	if (mm_account_pinned_pages(&uarg->mmp, size)) {
+		kfree_skb(skb);
+		return NULL;
+	}
 
 	uarg->callback = sock_zerocopy_callback;
 	uarg->id = ((u16)atomic_inc_return(&sk->sk_zckey)) - 1;
@@ -880,6 +922,8 @@ struct ubuf_info *sock_zerocopy_realloc(struct sock *sk, size_t size,
 
 		next = atomic_read(&sk->sk_zckey);
 		if ((u16)(uarg->id + uarg->len) == next) {
+			if (mm_account_pinned_pages(&uarg->mmp, size))
+				return NULL;
 			uarg->len++;
 			atomic_set(&sk->sk_zckey, ++next);
 			return uarg;
@@ -946,6 +990,8 @@ EXPORT_SYMBOL_GPL(sock_zerocopy_callback);
 void sock_zerocopy_put(struct ubuf_info *uarg)
 {
 	if (uarg && atomic_dec_and_test(&uarg->refcnt)) {
+		mm_unaccount_pinned_pages(&uarg->mmp);
+
 		/* if !len, there was only 1 call, and it was aborted */
 		if (uarg->callback && uarg->len)
 			uarg->callback(uarg, true);
-- 
2.5.0.276.gf5e568e

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ