lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [thread-next>] [day] [month] [year] [list]
Message-Id: <20201218141534.9918-1-bostroesser@gmail.com>
Date:   Fri, 18 Dec 2020 15:15:34 +0100
From:   Bodo Stroesser <bostroesser@...il.com>
To:     linux-scsi@...r.kernel.org, target-devel@...r.kernel.org,
        linux-kernel@...r.kernel.org,
        "Martin K. Petersen" <martin.petersen@...cle.com>
Cc:     Bodo Stroesser <bostroesser@...il.com>,
        Greg Kroah-Hartman <gregkh@...uxfoundation.org>,
        Mike Christie <michael.christie@...cle.com>
Subject: [PATCH] scsi: target: tcmu: Fix wrong uio handling causing big memory leak

tcmu calls uio_unregister_device from tcmu_destroy_device.
After that uio will never call tcmu_release for this device.
If userspace still had the uio device open and / or mmap'ed
during uio_unregister_device, tcmu_release will not be called and
udev->kref will never go down to 0.

So tcmu in this situation will not free:
 - cmds or tmrs still in the queue or the ring
 - all pages allocated for mailbox and cmd_ring (vmalloc)
 - all pages allocated for data space
 - the udev itself

The vmalloc'ed area is 8 MB, amount of pages allocated for data
space depends on previous usage of the tcmu device. Theoretically
that can be up to 1GB.

This patch moves the call of uio_unregister_device to the
beginning of tcmu_dev_kref_release, which is called when
udev->kref drops down to zero. So we know, that uio device is
closed and unmap'ed.

In case tcmu_realease drops the last kref, we would end up doing
the uio_unregister_device from a function called by uio_release,
which causes the process to block forever.
So we now do the kref_put from new worker function
tcmu_release_work_fn which is scheduled by tcmu_release.

To make userspace still aware of the device being deleted,
tcmu_destroy_device instead of uio_unregister_device now does:
 - sets a bit in udev, so that tcmu_open and tcmu_mmap can check
   and fail with -EIO
 - resets udev->uio_info->irq to 0, so uio will fail read() and
   write() with -EIO
 - wakes up userspace possibly waiting in read(), so the read
   fails with -EIO

Avoid possible races in tcmu_open by replacing kref_get with
kref_get_unless_zero.

Signed-off-by: Bodo Stroesser <bostroesser@...il.com>
---
 drivers/target/target_core_user.c | 54 ++++++++++++++++++++++++++++++++++++---
 1 file changed, 50 insertions(+), 4 deletions(-)

diff --git a/drivers/target/target_core_user.c b/drivers/target/target_core_user.c
index 0458bfb143f8..080760985ebf 100644
--- a/drivers/target/target_core_user.c
+++ b/drivers/target/target_core_user.c
@@ -21,6 +21,7 @@
 #include <linux/configfs.h>
 #include <linux/mutex.h>
 #include <linux/workqueue.h>
+#include <linux/delay.h>
 #include <net/genetlink.h>
 #include <scsi/scsi_common.h>
 #include <scsi/scsi_proto.h>
@@ -109,6 +110,7 @@ struct tcmu_nl_cmd {
 struct tcmu_dev {
 	struct list_head node;
 	struct kref kref;
+	struct work_struct release_work;
 
 	struct se_device se_dev;
 
@@ -119,6 +121,7 @@ struct tcmu_dev {
 #define TCMU_DEV_BIT_BROKEN 1
 #define TCMU_DEV_BIT_BLOCKED 2
 #define TCMU_DEV_BIT_TMR_NOTIFY 3
+#define TCMU_DEV_BIT_GOING_DOWN 4
 	unsigned long flags;
 
 	struct uio_info uio_info;
@@ -1527,6 +1530,8 @@ static void tcmu_detach_hba(struct se_hba *hba)
 	hba->hba_ptr = NULL;
 }
 
+static void tcmu_release_work_fn(struct work_struct *work);
+
 static struct se_device *tcmu_alloc_device(struct se_hba *hba, const char *name)
 {
 	struct tcmu_dev *udev;
@@ -1542,6 +1547,8 @@ static struct se_device *tcmu_alloc_device(struct se_hba *hba, const char *name)
 		return NULL;
 	}
 
+	INIT_WORK(&udev->release_work, tcmu_release_work_fn);
+
 	udev->hba = hba;
 	udev->cmd_time_out = TCMU_TIME_OUT;
 	udev->qfull_time_out = -1;
@@ -1719,6 +1726,9 @@ static int tcmu_mmap(struct uio_info *info, struct vm_area_struct *vma)
 {
 	struct tcmu_dev *udev = container_of(info, struct tcmu_dev, uio_info);
 
+	if (test_bit(TCMU_DEV_BIT_GOING_DOWN, &udev->flags))
+		return -EIO;
+
 	vma->vm_flags |= VM_DONTEXPAND | VM_DONTDUMP;
 	vma->vm_ops = &tcmu_vm_ops;
 
@@ -1735,12 +1745,17 @@ static int tcmu_open(struct uio_info *info, struct inode *inode)
 {
 	struct tcmu_dev *udev = container_of(info, struct tcmu_dev, uio_info);
 
+	if (test_bit(TCMU_DEV_BIT_GOING_DOWN, &udev->flags))
+		return -EIO;
+
 	/* O_EXCL not supported for char devs, so fake it? */
 	if (test_and_set_bit(TCMU_DEV_BIT_OPEN, &udev->flags))
 		return -EBUSY;
 
 	udev->inode = inode;
-	kref_get(&udev->kref);
+	if (!kref_get_unless_zero(&udev->kref))
+		/* Race between open and device going down */
+		return -EIO;
 
 	pr_debug("open\n");
 
@@ -1799,6 +1814,8 @@ static void tcmu_dev_kref_release(struct kref *kref)
 	bool all_expired = true;
 	int i;
 
+	uio_unregister_device(&udev->uio_info);
+
 	vfree(udev->mb_addr);
 	udev->mb_addr = NULL;
 
@@ -1827,6 +1844,15 @@ static void tcmu_dev_kref_release(struct kref *kref)
 	call_rcu(&dev->rcu_head, tcmu_dev_call_rcu);
 }
 
+static void tcmu_release_work_fn(struct work_struct *work)
+{
+	struct tcmu_dev *udev = container_of(work, struct tcmu_dev,
+					     release_work);
+
+	/* release ref from open */
+	kref_put(&udev->kref, tcmu_dev_kref_release);
+}
+
 static int tcmu_release(struct uio_info *info, struct inode *inode)
 {
 	struct tcmu_dev *udev = container_of(info, struct tcmu_dev, uio_info);
@@ -1834,8 +1860,17 @@ static int tcmu_release(struct uio_info *info, struct inode *inode)
 	clear_bit(TCMU_DEV_BIT_OPEN, &udev->flags);
 
 	pr_debug("close\n");
-	/* release ref from open */
-	kref_put(&udev->kref, tcmu_dev_kref_release);
+
+	/*
+	 * We must not put kref directly from here, since dropping down kref to
+	 * zero would implicitly call tcmu_dev_kref_release, which calls
+	 * uio_unregister_device --> process hangs forever, since tcmu_release
+	 * is called from uio.
+	 * So we leave it to tcmu_release_work_fn to put the kref.
+	 */
+	while (!schedule_work(&udev->release_work))
+		usleep_range(1000, 5000);
+
 	return 0;
 }
 
@@ -2166,7 +2201,18 @@ static void tcmu_destroy_device(struct se_device *dev)
 
 	tcmu_send_dev_remove_event(udev);
 
-	uio_unregister_device(&udev->uio_info);
+	/*
+	 * We must not call uio_unregister_device here. If there is a userspace
+	 * process with open or mmap'ed uio device, uio would not call
+	 * tcmu_release on later unmap or close.
+	 */
+
+	/* reset uio_info->irq, so uio will reject read() and write() */
+	udev->uio_info.irq = 0;
+	/* Set bit, so we can reject later calls to tcmu_open and tcmu_mmap */
+	set_bit(TCMU_DEV_BIT_GOING_DOWN, &udev->flags);
+	/* wake up possible sleeper in uio_read(), it will return -EIO */
+	uio_event_notify(&udev->uio_info);
 
 	/* release ref from configure */
 	kref_put(&udev->kref, tcmu_dev_kref_release);
-- 
2.12.3

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ