lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <20110607125900.28590.16071.sendpatchset@localhost6.localdomain6>
Date:	Tue, 07 Jun 2011 18:29:00 +0530
From:	Srikar Dronamraju <srikar@...ux.vnet.ibm.com>
To:	Peter Zijlstra <peterz@...radead.org>, Ingo Molnar <mingo@...e.hu>
Cc:	Steven Rostedt <rostedt@...dmis.org>,
	Srikar Dronamraju <srikar@...ux.vnet.ibm.com>,
	Linux-mm <linux-mm@...ck.org>,
	Arnaldo Carvalho de Melo <acme@...radead.org>,
	Linus Torvalds <torvalds@...ux-foundation.org>,
	Ananth N Mavinakayanahalli <ananth@...ibm.com>,
	Hugh Dickins <hughd@...gle.com>,
	Christoph Hellwig <hch@...radead.org>,
	Jonathan Corbet <corbet@....net>,
	Thomas Gleixner <tglx@...utronix.de>,
	Masami Hiramatsu <masami.hiramatsu.pt@...achi.com>,
	Oleg Nesterov <oleg@...hat.com>,
	Andrew Morton <akpm@...ux-foundation.org>,
	Jim Keniston <jkenisto@...ux.vnet.ibm.com>,
	Roland McGrath <roland@...k.frob.com>,
	Andi Kleen <andi@...stfloor.org>,
	LKML <linux-kernel@...r.kernel.org>
Subject: [PATCH v4 3.0-rc2-tip 4/22]  4: Uprobes: register/unregister probes.


A probe is specified by a file:offset.  While registering, a breakpoint
is inserted for the first consumer, On subsequent probes, the consumer
gets appended to the existing consumers. While unregistering a
breakpoint is removed if the consumer happens to be the last consumer.
All other unregisterations, the consumer is deleted from the list of
consumers.

Probe specifications are maintained in a rb tree. A probe specification
is converted into a uprobe before store in a rb tree.  A uprobe can be
shared by many consumers.

Given a inode, we get a list of mm's that have mapped the inode.
However we want to limit the probes to certain processes/threads.  The
filtering should be at thread level. To limit the probes to a certain
processes/threads, we would want to walk through the list of threads
whose mm member refer to a given mm.

Here are the options that I thought of:
1. Use mm->owner and walk thro the thread_group of mm->owner, siblings
of mm->owner, siblings of parent of mm->owner.  This should be
good list to traverse. Not sure if this is an exhaustive
enough list that all tasks that have a mm set to this mm_struct are
walked through.

2. Install probes on all mm's that have mapped the probes and filter
only at probe hit time.

3. walk thro do_each_thread; while_each_thread; I think this will catch
all tasks that have a mm set to the given mm. However this might
be too heavy esp if mm corresponds to a library.

4. add a list_head element to the mm struct and update the list whenever
the task->mm thread gets updated. This could mean extending the current
mm->owner. However there is some maintainance overhead.

Currently we use the second approach, i.e probe all mm's that have mapped
the probes and filter only at probe hit.

Signed-off-by: Srikar Dronamraju <srikar@...ux.vnet.ibm.com>
---
 include/linux/mm_types.h |    5 +
 include/linux/uprobes.h  |   32 +++++
 kernel/uprobes.c         |  314 ++++++++++++++++++++++++++++++++++++++++++++--
 3 files changed, 340 insertions(+), 11 deletions(-)

diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 027935c..7bfef2e 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -316,6 +316,11 @@ struct mm_struct {
 #ifdef CONFIG_CPUMASK_OFFSTACK
 	struct cpumask cpumask_allocation;
 #endif
+#ifdef CONFIG_UPROBES
+	unsigned long uprobes_vaddr;
+	struct list_head uprobes_list; /* protected by uprobes_mutex */
+	atomic_t uprobes_count;
+#endif
 };
 
 static inline void mm_init_cpumask(struct mm_struct *mm)
diff --git a/include/linux/uprobes.h b/include/linux/uprobes.h
index 9187df3..4087cc3 100644
--- a/include/linux/uprobes.h
+++ b/include/linux/uprobes.h
@@ -31,6 +31,7 @@
  * ARCH_SUPPORTS_UPROBES is not defined.
  */
 typedef u8 uprobe_opcode_t;
+struct uprobe_arch_info	{};		/* arch specific info*/
 #endif /* CONFIG_ARCH_SUPPORTS_UPROBES */
 
 /* Post-execution fixups.  Some architectures may define others. */
@@ -62,6 +63,19 @@ struct uprobe_consumer {
 	struct uprobe_consumer *next;
 };
 
+struct uprobe {
+	struct rb_node		rb_node;	/* node in the rb tree */
+	atomic_t		ref;
+	struct rw_semaphore	consumer_rwsem;
+	struct uprobe_arch_info	arch_info;	/* arch specific info if any */
+	struct uprobe_consumer	*consumers;
+	struct inode		*inode;		/* Also hold a ref to inode */
+	loff_t			offset;
+	u8			insn[MAX_UINSN_BYTES];	/* orig instruction */
+	u16			fixups;
+	int			copy;
+};
+
 /*
  * Most architectures can use the default versions of @read_opcode(),
  * @set_bkpt(), @set_orig_insn(), and @is_bkpt_insn();
@@ -90,4 +104,22 @@ struct uprobe_consumer {
  *	the probed instruction stream.  @tskinfo is as for @pre_xol().
  *	You must provide this function.
  */
+
+#ifdef CONFIG_UPROBES
+extern int register_uprobe(struct inode *inode, loff_t offset,
+				struct uprobe_consumer *consumer);
+extern void unregister_uprobe(struct inode *inode, loff_t offset,
+				struct uprobe_consumer *consumer);
+#else /* CONFIG_UPROBES is not defined */
+static inline int register_uprobe(struct inode *inode, loff_t offset,
+				struct uprobe_consumer *consumer)
+{
+	return -ENOSYS;
+}
+static inline void unregister_uprobe(struct inode *inode, loff_t offset,
+				struct uprobe_consumer *consumer)
+{
+}
+
+#endif /* CONFIG_UPROBES */
 #endif	/* _LINUX_UPROBES_H */
diff --git a/kernel/uprobes.c b/kernel/uprobes.c
index aace4d9..c6c2f5e 100644
--- a/kernel/uprobes.c
+++ b/kernel/uprobes.c
@@ -34,17 +34,6 @@
 #include <linux/mmu_notifier.h> /* needed for set_pte_at_notify */
 #include <linux/swap.h>	/* needed for try_to_free_swap */
 
-struct uprobe {
-	struct rb_node		rb_node;	/* node in the rb tree */
-	atomic_t		ref;		/* lifetime muck */
-	struct rw_semaphore	consumer_rwsem;
-	struct uprobe_consumer	*consumers;
-	struct inode		*inode;		/* we hold a ref */
-	loff_t			offset;
-	u8			insn[MAX_UINSN_BYTES];
-	u16			fixups;
-};
-
 static bool valid_vma(struct vm_area_struct *vma)
 {
 	if (!vma->vm_file)
@@ -517,3 +506,306 @@ static bool del_consumer(struct uprobe *uprobe,
 	up_write(&uprobe->consumer_rwsem);
 	return ret;
 }
+
+static struct task_struct *get_mm_owner(struct mm_struct *mm)
+{
+	struct task_struct *tsk;
+
+	rcu_read_lock();
+	tsk = rcu_dereference(mm->owner);
+	if (tsk)
+		get_task_struct(tsk);
+	rcu_read_unlock();
+	return tsk;
+}
+
+static int install_breakpoint(struct mm_struct *mm, struct uprobe *uprobe)
+{
+	int ret = 0;
+
+	/*TODO: install breakpoint */
+	if (!ret)
+		atomic_inc(&mm->uprobes_count);
+	return ret;
+}
+
+static int __remove_breakpoint(struct mm_struct *mm, struct uprobe *uprobe)
+{
+	int ret = 0;
+
+	/*TODO: remove breakpoint */
+	if (!ret)
+		atomic_dec(&mm->uprobes_count);
+
+	return ret;
+}
+
+static void remove_breakpoint(struct mm_struct *mm, struct uprobe *uprobe)
+{
+	down_read(&mm->mmap_sem);
+	__remove_breakpoint(mm, uprobe);
+	list_del(&mm->uprobes_list);
+	up_read(&mm->mmap_sem);
+	mmput(mm);
+}
+
+/*
+ * There could be threads that have hit the breakpoint and are entering the
+ * notifier code and trying to acquire the uprobes_treelock. The thread
+ * calling delete_uprobe() that is removing the uprobe from the rb_tree can
+ * race with these threads and might acquire the uprobes_treelock compared
+ * to some of the breakpoint hit threads. In such a case, the breakpoint hit
+ * threads will not find the uprobe. Finding if a "trap" instruction was
+ * present at the interrupting address is racy. Hence provide some extra
+ * time (by way of synchronize_sched() for breakpoint hit threads to acquire
+ * the uprobes_treelock before the uprobe is removed from the rbtree.
+ */
+static void delete_uprobe(struct uprobe *uprobe)
+{
+	unsigned long flags;
+
+	synchronize_sched();
+	spin_lock_irqsave(&uprobes_treelock, flags);
+	rb_erase(&uprobe->rb_node, &uprobes_tree);
+	spin_unlock_irqrestore(&uprobes_treelock, flags);
+	iput(uprobe->inode);
+}
+
+static DEFINE_MUTEX(uprobes_mutex);
+
+/*
+ * register_uprobe - register a probe
+ * @inode: the file in which the probe has to be placed.
+ * @offset: offset from the start of the file.
+ * @consumer: information on howto handle the probe..
+ *
+ * Apart from the access refcount, register_uprobe() takes a creation
+ * refcount (thro alloc_uprobe) if and only if this @uprobe is getting
+ * inserted into the rbtree (i.e first consumer for a @inode:@offset
+ * tuple).  Creation refcount stops unregister_uprobe from freeing the
+ * @uprobe even before the register operation is complete. Creation
+ * refcount is released when the last @consumer for the @uprobe
+ * unregisters.
+ *
+ * Return errno if it cannot successully install probes
+ * else return 0 (success)
+ */
+int register_uprobe(struct inode *inode, loff_t offset,
+				struct uprobe_consumer *consumer)
+{
+	struct prio_tree_iter iter;
+	struct list_head try_list, success_list;
+	struct address_space *mapping;
+	struct mm_struct *mm, *tmpmm;
+	struct vm_area_struct *vma;
+	struct uprobe *uprobe;
+	int ret = -1;
+
+	if (!inode || !consumer || consumer->next)
+		return -EINVAL;
+
+	if (offset > inode->i_size)
+		return -EINVAL;
+
+	uprobe = alloc_uprobe(inode, offset);
+	if (!uprobe)
+		return -ENOMEM;
+
+	INIT_LIST_HEAD(&try_list);
+	INIT_LIST_HEAD(&success_list);
+	mapping = inode->i_mapping;
+
+	mutex_lock(&uprobes_mutex);
+	if (uprobe->consumers) {
+		ret = 0;
+		goto consumers_add;
+	}
+
+	mutex_lock(&mapping->i_mmap_mutex);
+	vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap, 0, 0) {
+		loff_t vaddr;
+		struct task_struct *tsk;
+
+		if (!atomic_inc_not_zero(&vma->vm_mm->mm_users))
+			continue;
+
+		mm = vma->vm_mm;
+		if (!valid_vma(vma)) {
+			mmput(mm);
+			continue;
+		}
+
+		vaddr = vma->vm_start + offset;
+		vaddr -= vma->vm_pgoff << PAGE_SHIFT;
+		if (vaddr < vma->vm_start || vaddr > vma->vm_end) {
+			/* Not in this vma */
+			mmput(mm);
+			continue;
+		}
+		tsk = get_mm_owner(mm);
+		if (tsk && vaddr > TASK_SIZE_OF(tsk)) {
+			/*
+			 * We cannot have a virtual address that is
+			 * greater than TASK_SIZE_OF(tsk)
+			 */
+			put_task_struct(tsk);
+			mmput(mm);
+			continue;
+		}
+		put_task_struct(tsk);
+		mm->uprobes_vaddr = (unsigned long) vaddr;
+		list_add(&mm->uprobes_list, &try_list);
+	}
+	mutex_unlock(&mapping->i_mmap_mutex);
+
+	if (list_empty(&try_list)) {
+		ret = 0;
+		goto consumers_add;
+	}
+	list_for_each_entry_safe(mm, tmpmm, &try_list, uprobes_list) {
+		down_read(&mm->mmap_sem);
+		ret = install_breakpoint(mm, uprobe);
+
+		if (ret && (ret != -ESRCH || ret != -EEXIST)) {
+			up_read(&mm->mmap_sem);
+			break;
+		}
+		if (!ret)
+			list_move(&mm->uprobes_list, &success_list);
+		else {
+			/*
+			 * install_breakpoint failed as there are no active
+			 * threads for the mm; ignore the error.
+			 */
+			list_del(&mm->uprobes_list);
+			mmput(mm);
+		}
+		up_read(&mm->mmap_sem);
+	}
+
+	if (list_empty(&try_list)) {
+		/*
+		 * All install_breakpoints were successful;
+		 * cleanup successful entries.
+		 */
+		ret = 0;
+		list_for_each_entry_safe(mm, tmpmm, &success_list,
+						uprobes_list) {
+			list_del(&mm->uprobes_list);
+			mmput(mm);
+		}
+		goto consumers_add;
+	}
+
+	/*
+	 * Atleast one unsuccessful install_breakpoint;
+	 * remove successful probes and cleanup untried entries.
+	 */
+	list_for_each_entry_safe(mm, tmpmm, &success_list, uprobes_list)
+		remove_breakpoint(mm, uprobe);
+	list_for_each_entry_safe(mm, tmpmm, &try_list, uprobes_list) {
+		list_del(&mm->uprobes_list);
+		mmput(mm);
+	}
+	delete_uprobe(uprobe);
+	goto put_unlock;
+
+consumers_add:
+	add_consumer(uprobe, consumer);
+
+put_unlock:
+	mutex_unlock(&uprobes_mutex);
+	put_uprobe(uprobe); /* drop access ref */
+	return ret;
+}
+
+/*
+ * unregister_uprobe - unregister a already registered probe.
+ * @inode: the file in which the probe has to be removed.
+ * @offset: offset from the start of the file.
+ * @consumer: identify which probe if multiple probes are colocated.
+ */
+void unregister_uprobe(struct inode *inode, loff_t offset,
+				struct uprobe_consumer *consumer)
+{
+	struct prio_tree_iter iter;
+	struct list_head tmp_list;
+	struct address_space *mapping;
+	struct mm_struct *mm, *tmpmm;
+	struct vm_area_struct *vma;
+	struct uprobe *uprobe;
+
+	if (!inode || !consumer)
+		return;
+
+	uprobe = find_uprobe(inode, offset);
+	if (!uprobe) {
+		pr_debug("No uprobe found with inode:offset %p %lld\n",
+				inode, offset);
+		return;
+	}
+
+	if (!del_consumer(uprobe, consumer)) {
+		pr_debug("No uprobe found with consumer %p\n",
+				consumer);
+		return;
+	}
+
+	INIT_LIST_HEAD(&tmp_list);
+
+	mapping = inode->i_mapping;
+
+	mutex_lock(&uprobes_mutex);
+	if (uprobe->consumers)
+		goto put_unlock;
+
+	mutex_lock(&mapping->i_mmap_mutex);
+	vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap, 0, 0) {
+		struct task_struct *tsk;
+
+		if (!atomic_inc_not_zero(&vma->vm_mm->mm_users))
+			continue;
+
+		mm = vma->vm_mm;
+
+		if (!atomic_read(&mm->uprobes_count)) {
+			mmput(mm);
+			continue;
+		}
+
+		if (valid_vma(vma)) {
+			loff_t vaddr;
+
+			vaddr = vma->vm_start + offset;
+			vaddr -= vma->vm_pgoff << PAGE_SHIFT;
+			if (vaddr < vma->vm_start || vaddr > vma->vm_end) {
+				/* Not in this vma */
+				mmput(mm);
+				continue;
+			}
+			tsk = get_mm_owner(mm);
+			if (tsk && vaddr > TASK_SIZE_OF(tsk)) {
+				/*
+				 * We cannot have a virtual address that is
+				 * greater than TASK_SIZE_OF(tsk)
+				 */
+				put_task_struct(tsk);
+				mmput(mm);
+				continue;
+			}
+			put_task_struct(tsk);
+			mm->uprobes_vaddr = (unsigned long) vaddr;
+			list_add(&mm->uprobes_list, &tmp_list);
+		} else
+			mmput(mm);
+	}
+	mutex_unlock(&mapping->i_mmap_mutex);
+	list_for_each_entry_safe(mm, tmpmm, &tmp_list, uprobes_list)
+		remove_breakpoint(mm, uprobe);
+
+	delete_uprobe(uprobe);
+
+put_unlock:
+	mutex_unlock(&uprobes_mutex);
+	put_uprobe(uprobe); /* drop access ref */
+}
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ