lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [day] [month] [year] [list]
Message-ID: <20221102212429.623848-1-Felix.Kuehling@amd.com>
Date:   Wed, 2 Nov 2022 17:24:29 -0400
From:   Felix Kuehling <Felix.Kuehling@....com>
To:     <amd-gfx@...ts.freedesktop.org>
CC:     <linux-kernel@...r.kernel.org>, Jann Horn <jannh@...gle.com>,
        "Rajneesh Bhardwaj" <Rajneesh.Bhardwaj@....com>
Subject: [PATCH v2] drm/amdkfd: Fix error handling in criu_checkpoint

Checkpoint BOs last. That way we don't need to close dmabuf FDs if
something else fails later. This avoids problematic access to user mode
memory in the error handling code path.

criu_checkpoint_bos has its own error handling and cleanup that does not
depend on access to user memory.

criu_restore is updated to match the order in which objects are saved to
make sure restored BOs use the correct private data. Since this is a
change in the layout of the checkpoint private data, bump
KFD_CRIU_PRIV_VERSION.

Fixes: be072b06c739 ("drm/amdkfd: CRIU export BOs as prime dmabuf objects")
Reported-by: Jann Horn <jannh@...gle.com>
CC: Rajneesh Bhardwaj <Rajneesh.Bhardwaj@....com>
Signed-off-by: Felix Kuehling <Felix.Kuehling@....com>

---

v2: Also changed the order on restore and bump KFD_CRIU_PRIV_VERSION
---
 drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 31 ++++++++----------------
 drivers/gpu/drm/amd/amdkfd/kfd_priv.h    |  7 ++++--
 2 files changed, 15 insertions(+), 23 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
index 5feaba6a77de..666edcb40354 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
@@ -1994,38 +1994,27 @@ static int criu_checkpoint(struct file *filep,
 	if (ret)
 		goto exit_unlock;
 
-	ret = criu_checkpoint_bos(p, num_bos, (uint8_t __user *)args->bos,
-			    (uint8_t __user *)args->priv_data, &priv_offset);
-	if (ret)
-		goto exit_unlock;
-
 	if (num_objects) {
 		ret = kfd_criu_checkpoint_queues(p, (uint8_t __user *)args->priv_data,
 						 &priv_offset);
 		if (ret)
-			goto close_bo_fds;
+			goto exit_unlock;
 
 		ret = kfd_criu_checkpoint_events(p, (uint8_t __user *)args->priv_data,
 						 &priv_offset);
 		if (ret)
-			goto close_bo_fds;
+			goto exit_unlock;
 
 		ret = kfd_criu_checkpoint_svm(p, (uint8_t __user *)args->priv_data, &priv_offset);
 		if (ret)
-			goto close_bo_fds;
+			goto exit_unlock;
 	}
 
-close_bo_fds:
-	if (ret) {
-		/* If IOCTL returns err, user assumes all FDs opened in criu_dump_bos are closed */
-		uint32_t i;
-		struct kfd_criu_bo_bucket *bo_buckets = (struct kfd_criu_bo_bucket *) args->bos;
-
-		for (i = 0; i < num_bos; i++) {
-			if (bo_buckets[i].alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM)
-				close_fd(bo_buckets[i].dmabuf_fd);
-		}
-	}
+	/* This must be the last thing in this function that can fail.
+	 * Otherwise we leak dmabuf file descriptors.
+	 */
+	ret = criu_checkpoint_bos(p, num_bos, (uint8_t __user *)args->bos,
+			   (uint8_t __user *)args->priv_data, &priv_offset);
 
 exit_unlock:
 	mutex_unlock(&p->mutex);
@@ -2477,11 +2466,11 @@ static int criu_restore(struct file *filep,
 	if (ret)
 		goto exit_unlock;
 
-	ret = criu_restore_bos(p, args, &priv_offset, args->priv_data_size);
+	ret = criu_restore_objects(filep, p, args, &priv_offset, args->priv_data_size);
 	if (ret)
 		goto exit_unlock;
 
-	ret = criu_restore_objects(filep, p, args, &priv_offset, args->priv_data_size);
+	ret = criu_restore_bos(p, args, &priv_offset, args->priv_data_size);
 	if (ret)
 		goto exit_unlock;
 
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
index 552c3ac85a13..069977d37605 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
@@ -1063,9 +1063,12 @@ void kfd_process_set_trap_handler(struct qcm_process_device *qpd,
  * kfd_criu_queue_priv_data
  * kfd_criu_event_priv_data
  * kfd_criu_svm_range_priv_data
+ *
+ * Version history:
+ * 1: Initial upstream version
+ * 2: BOs are saved last to fix and simplify error handling
  */
-
-#define KFD_CRIU_PRIV_VERSION 1
+#define KFD_CRIU_PRIV_VERSION 2
 
 struct kfd_criu_process_priv_data {
 	uint32_t version;
-- 
2.32.0

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ