linux-kernel - [183/197] IB/iser: Rewrite SG handling for RDMA logic

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <20100422190923.821364162@kvm.kroah.org>
Date:	Thu, 22 Apr 2010 12:10:34 -0700
From:	Greg KH <gregkh@...e.de>
To:	linux-kernel@...r.kernel.org, stable@...nel.org
Cc:	stable-review@...nel.org, torvalds@...ux-foundation.org,
	akpm@...ux-foundation.org, alan@...rguk.ukuu.org.uk,
	Alexander Nezhinsky <alexandern@...taire.com>,
	Or Gerlitz <ogerlitz@...taire.com>,
	Roland Dreier <rolandd@...co.com>,
	maximilian attems <max@...o.at>
Subject: [183/197] IB/iser: Rewrite SG handling for RDMA logic

2.6.32-stable review patch.  If anyone has any objections, please let us know.

------------------

From: Or Gerlitz <ogerlitz@...taire.com>

commit c1ccaf2478f84c2665cf57f981db143aa582d646 upstream.

After dma-mapping an SG list provided by the SCSI midlayer, iser has
to make sure the mapped SG is "aligned for RDMA" in the sense that its
possible to produce one mapping in the HCA IOMMU which represents the
whole SG. Next, the mapped SG is formatted for registration with the HCA.

This patch re-writes the logic that does the above, to make it clearer
and simpler. It also fixes a bug in the being aligned for RDMA checks,
where a "start" check wasn't done but rather only "end" check.

[commit message in RH kernel tree: "Under heavy load, without the patch,
the HCA can be programmed to write (corrupt) into pages/location which
doesn't belong to the SG associated with the actual I/O and cause a
kernel oops."]

Signed-off-by: Alexander Nezhinsky <alexandern@...taire.com>
Signed-off-by: Or Gerlitz <ogerlitz@...taire.com>
Signed-off-by: Roland Dreier <rolandd@...co.com>
Cc: maximilian attems <max@...o.at>
Signed-off-by: Greg Kroah-Hartman <gregkh@...e.de>

---
 drivers/infiniband/ulp/iser/iser_memory.c |  120 +++++++++++++-----------------
 1 file changed, 55 insertions(+), 65 deletions(-)

--- a/drivers/infiniband/ulp/iser/iser_memory.c
+++ b/drivers/infiniband/ulp/iser/iser_memory.c
@@ -209,6 +209,8 @@ void iser_finalize_rdma_unaligned_sg(str
 	mem_copy->copy_buf = NULL;
 }
 
+#define IS_4K_ALIGNED(addr)	((((unsigned long)addr) & ~MASK_4K) == 0)
+
 /**
  * iser_sg_to_page_vec - Translates scatterlist entries to physical addresses
  * and returns the length of resulting physical address array (may be less than
@@ -221,62 +223,52 @@ void iser_finalize_rdma_unaligned_sg(str
  * where --few fragments of the same page-- are present in the SG as
  * consecutive elements. Also, it handles one entry SG.
  */
+
 static int iser_sg_to_page_vec(struct iser_data_buf *data,
 			       struct iser_page_vec *page_vec,
 			       struct ib_device *ibdev)
 {
-	struct scatterlist *sgl = (struct scatterlist *)data->buf;
-	struct scatterlist *sg;
-	u64 first_addr, last_addr, page;
-	int end_aligned;
-	unsigned int cur_page = 0;
+	struct scatterlist *sg, *sgl = (struct scatterlist *)data->buf;
+	u64 start_addr, end_addr, page, chunk_start = 0;
 	unsigned long total_sz = 0;
-	int i;
+	unsigned int dma_len;
+	int i, new_chunk, cur_page, last_ent = data->dma_nents - 1;
 
 	/* compute the offset of first element */
 	page_vec->offset = (u64) sgl[0].offset & ~MASK_4K;
 
+	new_chunk = 1;
+	cur_page  = 0;
 	for_each_sg(sgl, sg, data->dma_nents, i) {
-		unsigned int dma_len = ib_sg_dma_len(ibdev, sg);
-
+		start_addr = ib_sg_dma_address(ibdev, sg);
+		if (new_chunk)
+			chunk_start = start_addr;
+		dma_len = ib_sg_dma_len(ibdev, sg);
+		end_addr = start_addr + dma_len;
 		total_sz += dma_len;
 
-		first_addr = ib_sg_dma_address(ibdev, sg);
-		last_addr  = first_addr + dma_len;
-
-		end_aligned   = !(last_addr  & ~MASK_4K);
-
-		/* continue to collect page fragments till aligned or SG ends */
-		while (!end_aligned && (i + 1 < data->dma_nents)) {
-			sg = sg_next(sg);
-			i++;
-			dma_len = ib_sg_dma_len(ibdev, sg);
-			total_sz += dma_len;
-			last_addr = ib_sg_dma_address(ibdev, sg) + dma_len;
-			end_aligned = !(last_addr  & ~MASK_4K);
+		/* collect page fragments until aligned or end of SG list */
+		if (!IS_4K_ALIGNED(end_addr) && i < last_ent) {
+			new_chunk = 0;
+			continue;
 		}
+		new_chunk = 1;
 
-		/* handle the 1st page in the 1st DMA element */
-		if (cur_page == 0) {
-			page = first_addr & MASK_4K;
-			page_vec->pages[cur_page] = page;
-			cur_page++;
+		/* address of the first page in the contiguous chunk;
+		   masking relevant for the very first SG entry,
+		   which might be unaligned */
+		page = chunk_start & MASK_4K;
+		do {
+			page_vec->pages[cur_page++] = page;
 			page += SIZE_4K;
-		} else
-			page = first_addr;
-
-		for (; page < last_addr; page += SIZE_4K) {
-			page_vec->pages[cur_page] = page;
-			cur_page++;
-		}
-
+		} while (page < end_addr);
 	}
+
 	page_vec->data_size = total_sz;
 	iser_dbg("page_vec->data_size:%d cur_page %d\n", page_vec->data_size,cur_page);
 	return cur_page;
 }
 
-#define IS_4K_ALIGNED(addr)	((((unsigned long)addr) & ~MASK_4K) == 0)
 
 /**
  * iser_data_buf_aligned_len - Tries to determine the maximal correctly aligned
@@ -284,42 +276,40 @@ static int iser_sg_to_page_vec(struct is
  * the number of entries which are aligned correctly. Supports the case where
  * consecutive SG elements are actually fragments of the same physcial page.
  */
-static unsigned int iser_data_buf_aligned_len(struct iser_data_buf *data,
-					      struct ib_device *ibdev)
+static int iser_data_buf_aligned_len(struct iser_data_buf *data,
+				      struct ib_device *ibdev)
 {
-	struct scatterlist *sgl, *sg;
-	u64 end_addr, next_addr;
-	int i, cnt;
-	unsigned int ret_len = 0;
+	struct scatterlist *sgl, *sg, *next_sg = NULL;
+	u64 start_addr, end_addr;
+	int i, ret_len, start_check = 0;
+
+	if (data->dma_nents == 1)
+		return 1;
 
 	sgl = (struct scatterlist *)data->buf;
+	start_addr  = ib_sg_dma_address(ibdev, sgl);
 
-	cnt = 0;
 	for_each_sg(sgl, sg, data->dma_nents, i) {
-		/* iser_dbg("Checking sg iobuf [%d]: phys=0x%08lX "
-		   "offset: %ld sz: %ld\n", i,
-		   (unsigned long)sg_phys(sg),
-		   (unsigned long)sg->offset,
-		   (unsigned long)sg->length); */
-		end_addr = ib_sg_dma_address(ibdev, sg) +
-			   ib_sg_dma_len(ibdev, sg);
-		/* iser_dbg("Checking sg iobuf end address "
-		       "0x%08lX\n", end_addr); */
-		if (i + 1 < data->dma_nents) {
-			next_addr = ib_sg_dma_address(ibdev, sg_next(sg));
-			/* are i, i+1 fragments of the same page? */
-			if (end_addr == next_addr) {
-				cnt++;
-				continue;
-			} else if (!IS_4K_ALIGNED(end_addr)) {
-				ret_len = cnt + 1;
-				break;
-			}
-		}
-		cnt++;
+		if (start_check && !IS_4K_ALIGNED(start_addr))
+			break;
+
+		next_sg = sg_next(sg);
+		if (!next_sg)
+			break;
+
+		end_addr    = start_addr + ib_sg_dma_len(ibdev, sg);
+		start_addr  = ib_sg_dma_address(ibdev, next_sg);
+
+		if (end_addr == start_addr) {
+			start_check = 0;
+			continue;
+		} else
+			start_check = 1;
+
+		if (!IS_4K_ALIGNED(end_addr))
+			break;
 	}
-	if (i == data->dma_nents)
-		ret_len = cnt;	/* loop ended */
+	ret_len = (next_sg) ? i : i+1;
 	iser_dbg("Found %d aligned entries out of %d in sg:0x%p\n",
 		 ret_len, data->dma_nents, data);
 	return ret_len;


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/