[<prev] [next>] [thread-next>] [day] [month] [year] [list]
Message-ID: <20240209112536.2262967-1-quic_dibasing@quicinc.com>
Date: Fri, 9 Feb 2024 16:55:36 +0530
From: Dibakar Singh <quic_dibasing@...cinc.com>
To: <andersson@...nel.org>, <konrad.dybcio@...aro.org>,
        <krzysztof.kozlowski@...aro.org>, <luzmaximilian@...il.com>,
        <bartosz.golaszewski@...aro.org>, <quic_eberman@...cinc.com>,
        <quic_gurus@...cinc.com>
CC: <linux-arm-msm@...r.kernel.org>, <linux-kernel@...r.kernel.org>,
        <quic_guptap@...cinc.com>, <quic_pkondeti@...cinc.com>,
        <quic_pheragu@...cinc.com>, <quic_dibasing@...cinc.com>
Subject: [PATCH] firmware: qcom_scm: Introduce batching of hyp assign calls
Expose an API qcom_scm_assign_table to allow client drivers to batch
multiple memory regions in a single hyp assign call.
In the existing situation, if our goal is to process an sg_table and
transfer its ownership from the current VM to a different VM, we have a
couple of strategies. The first strategy involves processing the entire
sg_table at once and then transferring the ownership. However, this
method may have an adverse impact on the system because during an SMC
call, the NS interrupts are disabled, and this delay could be
significant when dealing with large sg_tables. To address this issue, we
can adopt a second strategy, which involves processing each sg_list in
the sg_table individually and reassigning memory ownership. Although
this method is slower and potentially impacts performance, it will not
keep the NS interrupts disabled for an extended period.
A more efficient strategy is to process the sg_table in batches. This
approach addresses both scenarios by involving memory processing in
batches, thus avoiding prolonged NS interrupt disablement for longer
duration when dealing with large sg_tables. Moreover, since we process
in batches, this method is faster compared to processing each item
individually. The observations on testing both the approaches for
performance is as follows:
Allocation Size/            256MB            512MB            1024MB
Algorithm Used           ===========      ===========      ============
Processing each sg_list   73708(us)        149289(us)       266964(us)
in sg_table one by one
Processing sg_table in    46925(us)         92691(us)       176893(us)
batches
This implementation serves as a wrapper around the helper function
__qcom_scm_assign_mem, which takes an sg_list and processes it in
batches. We’ve set the limit to a minimum of 32 sg_list in a batch or a
total batch size of 512 pages. The selection of these numbers is
heuristic, based on the test runs conducted. Opting for a smaller number
would compromise performance, while a larger number would result in
non-secure interrupts being disabled for an extended duration.
Co-developed-by: Prakruthi Deepak Heragu <quic_pheragu@...cinc.com>
Signed-off-by: Prakruthi Deepak Heragu <quic_pheragu@...cinc.com>
Signed-off-by: Dibakar Singh <quic_dibasing@...cinc.com>
---
 drivers/firmware/qcom/qcom_scm.c       | 211 +++++++++++++++++++++++++
 include/linux/firmware/qcom/qcom_scm.h |   7 +
 2 files changed, 218 insertions(+)
diff --git a/drivers/firmware/qcom/qcom_scm.c b/drivers/firmware/qcom/qcom_scm.c
index 520de9b5633a..038b96503d65 100644
--- a/drivers/firmware/qcom/qcom_scm.c
+++ b/drivers/firmware/qcom/qcom_scm.c
@@ -21,6 +21,8 @@
 #include <linux/platform_device.h>
 #include <linux/reset-controller.h>
 #include <linux/types.h>
+#include <linux/scatterlist.h>
+#include <linux/slab.h>
 
 #include "qcom_scm.h"
 
@@ -1048,6 +1050,215 @@ int qcom_scm_assign_mem(phys_addr_t mem_addr, size_t mem_sz,
 }
 EXPORT_SYMBOL_GPL(qcom_scm_assign_mem);
 
+/**
+ * qcom_scm_assign_mem_batch() - Make a secure call to reassign memory
+ *				   ownership of several memory regions
+ * @mem_regions:    A buffer describing the set of memory regions that need to
+ *		    be reassigned
+ * @nr_mem_regions: The number of memory regions that need to be reassigned
+ * @srcvms:	    A buffer populated with he vmid(s) for the current set of
+ *		    owners
+ * @src_sz:	    The size of the srcvms buffer (in bytes)
+ * @destvms:	    A buffer populated with the new owners and corresponding
+ *		    permission flags.
+ * @dest_sz:	    The size of the destvms buffer (in bytes)
+ *
+ * Return negative errno on failure, 0 on success.
+ */
+static int qcom_scm_assign_mem_batch(struct qcom_scm_mem_map_info *mem_regions,
+				     size_t nr_mem_regions, u32 *srcvms,
+				     size_t src_sz,
+				     struct qcom_scm_current_perm_info *destvms,
+				     size_t dest_sz)
+{
+	dma_addr_t mem_dma_addr;
+	size_t mem_regions_sz;
+	int ret = 0, i;
+
+	for (i = 0; i < nr_mem_regions; i++) {
+		mem_regions[i].mem_addr = cpu_to_le64(mem_regions[i].mem_addr);
+		mem_regions[i].mem_size = cpu_to_le64(mem_regions[i].mem_size);
+	}
+
+	mem_regions_sz = nr_mem_regions * sizeof(*mem_regions);
+	mem_dma_addr = dma_map_single(__scm->dev, mem_regions, mem_regions_sz,
+				      DMA_TO_DEVICE);
+	if (dma_mapping_error(__scm->dev, mem_dma_addr)) {
+		dev_err(__scm->dev, "mem_dma_addr mapping failed\n");
+		return -ENOMEM;
+	}
+
+	ret = __qcom_scm_assign_mem(__scm->dev, virt_to_phys(mem_regions),
+				    mem_regions_sz, virt_to_phys(srcvms), src_sz,
+				    virt_to_phys(destvms), dest_sz);
+
+	dma_unmap_single(__scm->dev, mem_dma_addr, mem_regions_sz, DMA_TO_DEVICE);
+	return ret;
+}
+
+/**
+ * qcom_scm_prepare_mem_batch() - Prepare batches of memory regions
+ * @sg_table:       A scatter list whose memory needs to be reassigned
+ * @srcvms:	    A buffer populated with he vmid(s) for the current set of
+ *		    owners
+ * @nr_src:	    The number of the src_vms buffer
+ * @destvms:	    A buffer populated with he vmid(s) for the new owners
+ * @destvms_perms:  A buffer populated with the permission flags of new owners
+ * @nr_dest:	    The number of the destvms
+ * @last_sgl:	    Denotes to the last scatter list element. Used in case of rollback
+ * @roll_back:	    Identifies whether we are executing rollback in case of failure
+ *
+ * Return negative errno on failure, 0 on success.
+ */
+static int qcom_scm_prepare_mem_batch(struct sg_table *table,
+				      u32 *srcvms, int nr_src,
+				      int *destvms, int *destvms_perms,
+				      int nr_dest,
+				      struct scatterlist *last_sgl, bool roll_back)
+{
+	struct qcom_scm_current_perm_info *destvms_cp;
+	struct qcom_scm_mem_map_info *mem_regions_buf;
+	struct scatterlist *curr_sgl = table->sgl;
+	dma_addr_t source_dma_addr, dest_dma_addr;
+	size_t batch_iterator;
+	size_t batch_start = 0;
+	size_t destvms_cp_sz;
+	size_t srcvms_cp_sz;
+	size_t batch_size;
+	u32 *srcvms_cp;
+	int ret = 0;
+	int i;
+
+	if (!table || !table->sgl || !srcvms || !nr_src ||
+	    !destvms || !destvms_perms || !nr_dest || !table->nents)
+		return -EINVAL;
+
+	srcvms_cp_sz = sizeof(*srcvms_cp) * nr_src;
+	srcvms_cp = kmemdup(srcvms, srcvms_cp_sz, GFP_KERNEL);
+	if (!srcvms_cp)
+		return -ENOMEM;
+
+	for (i = 0; i < nr_src; i++)
+		srcvms_cp[i] = cpu_to_le32(srcvms_cp[i]);
+
+	source_dma_addr = dma_map_single(__scm->dev, srcvms_cp,
+					 srcvms_cp_sz, DMA_TO_DEVICE);
+
+	if (dma_mapping_error(__scm->dev, source_dma_addr)) {
+		ret = -ENOMEM;
+		goto out_free_source;
+	}
+
+	destvms_cp_sz = sizeof(*destvms_cp) * nr_dest;
+	destvms_cp = kzalloc(destvms_cp_sz, GFP_KERNEL);
+
+	if (!destvms_cp) {
+		ret = -ENOMEM;
+		goto out_unmap_source;
+	}
+
+	for (i = 0; i < nr_dest; i++) {
+		destvms_cp[i].vmid = cpu_to_le32(destvms[i]);
+		destvms_cp[i].perm = cpu_to_le32(destvms_perms[i]);
+		destvms_cp[i].ctx = 0;
+		destvms_cp[i].ctx_size = 0;
+	}
+
+	dest_dma_addr = dma_map_single(__scm->dev, destvms_cp,
+				       destvms_cp_sz, DMA_TO_DEVICE);
+	if (dma_mapping_error(__scm->dev, dest_dma_addr)) {
+		ret = -ENOMEM;
+		goto out_free_dest;
+	}
+
+	mem_regions_buf = kcalloc(QCOM_SCM_MAX_BATCH_SECTION, sizeof(*mem_regions_buf),
+				  GFP_KERNEL);
+	if (!mem_regions_buf)
+		return -ENOMEM;
+
+	while (batch_start < table->nents) {
+		batch_size = 0;
+		batch_iterator = 0;
+
+		do {
+			mem_regions_buf[batch_iterator].mem_addr = page_to_phys(sg_page(curr_sgl));
+			mem_regions_buf[batch_iterator].mem_size = curr_sgl->length;
+			batch_size += curr_sgl->length;
+			batch_iterator++;
+			if (roll_back && curr_sgl == last_sgl)
+				break;
+			curr_sgl = sg_next(curr_sgl);
+		} while (curr_sgl && batch_iterator < QCOM_SCM_MAX_BATCH_SECTION &&
+				curr_sgl->length + batch_size < QCOM_SCM_MAX_BATCH_SIZE);
+
+		batch_start += batch_iterator;
+
+		ret = qcom_scm_assign_mem_batch(mem_regions_buf, batch_iterator,
+						srcvms_cp, srcvms_cp_sz, destvms_cp, destvms_cp_sz);
+
+		if (ret) {
+			dev_info(__scm->dev, "Failed to assign memory protection, ret = %d\n", ret);
+			last_sgl = curr_sgl;
+			ret = -EADDRNOTAVAIL;
+			break;
+		}
+		if (roll_back && curr_sgl == last_sgl)
+			break;
+	}
+	kfree(mem_regions_buf);
+
+	dma_unmap_single(__scm->dev, dest_dma_addr,
+			 destvms_cp_sz, DMA_TO_DEVICE);
+
+out_free_dest:
+	kfree(destvms_cp);
+out_unmap_source:
+	dma_unmap_single(__scm->dev, source_dma_addr,
+			 srcvms_cp_sz, DMA_TO_DEVICE);
+out_free_source:
+	kfree(srcvms_cp);
+	return ret;
+}
+
+/**
+ * qcom_scm_assign_table() - Make a call to prepare batches of memory regions
+ *			     and reassign memory ownership of several memory regions at once
+ * @sg_table:       A scatter list whose memory needs to be reassigned
+ * @srcvms:	    A buffer populated with he vmid(s) for the current set of
+ *		    owners
+ * @nr_src:	    The number of the src_vms buffer
+ * @destvms:	    A buffer populated with he vmid(s) for the new owners
+ * @destvms_perms:  A buffer populated with the permission flags of new owners
+ * @nr_dest:	    The number of the destvms
+ *
+ * Return negative errno on failure, 0 on success.
+ */
+int qcom_scm_assign_table(struct sg_table *table,
+			  u32 *srcvms, int nr_src,
+			  int *destvms, int *destvms_perms,
+			  int nr_dest)
+{
+	struct scatterlist *last_sgl = NULL;
+	int rb_ret = 0;
+	u32 new_dests;
+	int new_perms;
+	int ret = 0;
+
+	ret = qcom_scm_prepare_mem_batch(table, srcvms, nr_src,
+					 destvms, destvms_perms, nr_dest, last_sgl, false);
+
+	if (!ret)
+		goto out;
+	new_dests = QCOM_SCM_VMID_HLOS;
+	new_perms = QCOM_SCM_PERM_EXEC | QCOM_SCM_PERM_WRITE | QCOM_SCM_PERM_READ;
+	rb_ret = qcom_scm_prepare_mem_batch(table, destvms, nr_dest, &new_dests,
+					    &new_perms, nr_src, last_sgl, true);
+	WARN_ON_ONCE(rb_ret);
+out:
+	return ret;
+}
+EXPORT_SYMBOL_GPL(qcom_scm_assign_table);
+
 /**
  * qcom_scm_ocmem_lock_available() - is OCMEM lock/unlock interface available
  */
diff --git a/include/linux/firmware/qcom/qcom_scm.h b/include/linux/firmware/qcom/qcom_scm.h
index ccaf28846054..abd675c7ef49 100644
--- a/include/linux/firmware/qcom/qcom_scm.h
+++ b/include/linux/firmware/qcom/qcom_scm.h
@@ -8,6 +8,7 @@
 #include <linux/err.h>
 #include <linux/types.h>
 #include <linux/cpumask.h>
+#include <linux/scatterlist.h>
 
 #include <dt-bindings/firmware/qcom,scm.h>
 
@@ -15,6 +16,8 @@
 #define QCOM_SCM_CPU_PWR_DOWN_L2_ON	0x0
 #define QCOM_SCM_CPU_PWR_DOWN_L2_OFF	0x1
 #define QCOM_SCM_HDCP_MAX_REQ_CNT	5
+#define QCOM_SCM_MAX_BATCH_SECTION	32
+#define QCOM_SCM_MAX_BATCH_SIZE		SZ_2M
 
 struct qcom_scm_hdcp_req {
 	u32 addr;
@@ -93,6 +96,10 @@ int qcom_scm_mem_protect_video_var(u32 cp_start, u32 cp_size,
 int qcom_scm_assign_mem(phys_addr_t mem_addr, size_t mem_sz, u64 *src,
 			const struct qcom_scm_vmperm *newvm,
 			unsigned int dest_cnt);
+int qcom_scm_assign_table(struct sg_table *table,
+			  u32 *srcvms, int nr_src,
+			  int *destvms, int *destvms_perms,
+			  int nr_dest);
 
 bool qcom_scm_ocmem_lock_available(void);
 int qcom_scm_ocmem_lock(enum qcom_scm_ocmem_client id, u32 offset, u32 size,
-- 
2.34.1
Powered by blists - more mailing lists
 
