lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [day] [month] [year] [list]
Message-ID: <fadd212730bf5e3c7c5c0cf594698caf@coraid.com>
Date:	Wed, 20 Sep 2006 14:36:49 -0400
From:	"Ed L. Cashin" <ecashin@...aid.com>
To:	linux-kernel@...r.kernel.org
Cc:	ecashin@...aid.com, Greg K-H <greg@...ah.com>
Subject: [PATCH 2.6.18-rc4] aoe [04/14]: zero copy write 1 of 2

Avoid memory copy on writes.
(This patch depends on fixes in patch 9 to follow.)

Although skb->len should not be set when working with linear skbuffs,
the skb->tail pointer maintained by skb_put/skb_trim is not relevant
to what happens when the skb_fill_page_desc function is called.  This
issue was raised without comment in linux-kernel and netdev earlier
this month:

  http://thread.gmane.org/gmane.linux.kernel/446474/
  http://thread.gmane.org/gmane.linux.network/45444/

So until there is something analogous to skb_put that works for
zero-copy write skbuffs, we will do what the other callers of
skb_fill_page_desc are doing.

Signed-off-by: "Ed L. Cashin" <ecashin@...aid.com>
---

diff -upr 2.6.18-rc4-orig/drivers/block/aoe/aoe.h 2.6.18-rc4-aoe/drivers/block/aoe/aoe.h
--- 2.6.18-rc4-orig/drivers/block/aoe/aoe.h	2006-09-20 14:29:35.000000000 -0400
+++ 2.6.18-rc4-aoe/drivers/block/aoe/aoe.h	2006-09-20 14:29:35.000000000 -0400
@@ -107,11 +107,7 @@ struct frame {
 	ulong waited;
 	struct buf *buf;
 	char *bufaddr;
-	int writedatalen;
-	int ndata;
-
-	/* largest possible */
-	unsigned char data[sizeof(struct aoe_hdr) + sizeof(struct aoe_atahdr)];
+	struct sk_buff *skb;
 };
 
 struct aoedev {
@@ -157,6 +153,7 @@ void aoecmd_cfg(ushort aoemajor, unsigne
 void aoecmd_ata_rsp(struct sk_buff *);
 void aoecmd_cfg_rsp(struct sk_buff *);
 void aoecmd_sleepwork(void *vp);
+struct sk_buff *new_skb(ulong);
 
 int aoedev_init(void);
 void aoedev_exit(void);
diff -upr 2.6.18-rc4-orig/drivers/block/aoe/aoecmd.c 2.6.18-rc4-aoe/drivers/block/aoe/aoecmd.c
--- 2.6.18-rc4-orig/drivers/block/aoe/aoecmd.c	2006-09-20 14:29:35.000000000 -0400
+++ 2.6.18-rc4-aoe/drivers/block/aoe/aoecmd.c	2006-09-20 14:29:35.000000000 -0400
@@ -17,15 +17,14 @@
 #define MAXTIMER (HZ << 1)
 #define MAXWAIT (60 * 3)	/* After MAXWAIT seconds, give up and fail dev */
 
-static struct sk_buff *
-new_skb(struct net_device *if_dev, ulong len)
+struct sk_buff *
+new_skb(ulong len)
 {
 	struct sk_buff *skb;
 
 	skb = alloc_skb(len, GFP_ATOMIC);
 	if (skb) {
 		skb->nh.raw = skb->mac.raw = skb->data;
-		skb->dev = if_dev;
 		skb->protocol = __constant_htons(ETH_P_AOE);
 		skb->priority = 0;
 		skb_put(skb, len);
@@ -40,29 +39,6 @@ new_skb(struct net_device *if_dev, ulong
 	return skb;
 }
 
-static struct sk_buff *
-skb_prepare(struct aoedev *d, struct frame *f)
-{
-	struct sk_buff *skb;
-	char *p;
-
-	skb = new_skb(d->ifp, f->ndata + f->writedatalen);
-	if (!skb) {
-		printk(KERN_INFO "aoe: skb_prepare: failure to allocate skb\n");
-		return NULL;
-	}
-
-	p = skb->mac.raw;
-	memcpy(p, f->data, f->ndata);
-
-	if (f->writedatalen) {
-		p += sizeof(struct aoe_hdr) + sizeof(struct aoe_atahdr);
-		memcpy(p, f->bufaddr, f->writedatalen);
-	}
-
-	return skb;
-}
-
 static struct frame *
 getframe(struct aoedev *d, int tag)
 {
@@ -129,10 +105,11 @@ aoecmd_ata_rw(struct aoedev *d, struct f
 		bcnt = MAXATADATA;
 
 	/* initialize the headers & frame */
-	h = (struct aoe_hdr *) f->data;
+	skb = f->skb;
+	h = (struct aoe_hdr *) skb->mac.raw;
 	ah = (struct aoe_atahdr *) (h+1);
-	f->ndata = sizeof *h + sizeof *ah;
-	memset(h, 0, f->ndata);
+	skb->len = sizeof *h + sizeof *ah;
+	memset(h, 0, skb->len);
 	f->tag = aoehdr_atainit(d, h);
 	f->waited = 0;
 	f->buf = buf;
@@ -155,11 +132,13 @@ aoecmd_ata_rw(struct aoedev *d, struct f
 	}
 
 	if (bio_data_dir(buf->bio) == WRITE) {
+		skb_fill_page_desc(skb, 0, virt_to_page(f->bufaddr),
+			offset_in_page(f->bufaddr), bcnt);
 		ah->aflags |= AOEAFL_WRITE;
-		f->writedatalen = bcnt;
 	} else {
+		skb_shinfo(skb)->nr_frags = 0;
+		skb->len = ETH_ZLEN;
 		writebit = 0;
-		f->writedatalen = 0;
 	}
 
 	ah->cmdstat = WIN_READ | writebit | extbit;
@@ -179,15 +158,14 @@ aoecmd_ata_rw(struct aoedev *d, struct f
 		buf->bufaddr = page_address(buf->bv->bv_page) + buf->bv->bv_offset;
 	}
 
-	skb = skb_prepare(d, f);
-	if (skb) {
-		skb->next = NULL;
-		if (d->sendq_hd)
-			d->sendq_tl->next = skb;
-		else
-			d->sendq_hd = skb;
-		d->sendq_tl = skb;
-	}
+	skb->dev = d->ifp;
+	skb_get(skb);
+	skb->next = NULL;
+	if (d->sendq_hd)
+		d->sendq_tl->next = skb;
+	else
+		d->sendq_hd = skb;
+	d->sendq_tl = skb;
 }
 
 /* some callers cannot sleep, and they can call this function,
@@ -209,11 +187,12 @@ aoecmd_cfg_pkts(ushort aoemajor, unsigne
 		if (!is_aoe_netif(ifp))
 			continue;
 
-		skb = new_skb(ifp, sizeof *h + sizeof *ch);
+		skb = new_skb(sizeof *h + sizeof *ch);
 		if (skb == NULL) {
 			printk(KERN_INFO "aoe: aoecmd_cfg: skb alloc failure\n");
 			continue;
 		}
+		skb->dev = ifp;
 		if (sl_tail == NULL)
 			sl_tail = skb;
 		h = (struct aoe_hdr *) skb->mac.raw;
@@ -283,21 +262,21 @@ rexmit(struct aoedev *d, struct frame *f
 		d->aoemajor, d->aoeminor, f->tag, jiffies, n);
 	aoechr_error(buf);
 
-	h = (struct aoe_hdr *) f->data;
+	skb = f->skb;
+	h = (struct aoe_hdr *) skb->mac.raw;
 	f->tag = n;
 	h->tag = cpu_to_be32(n);
 	memcpy(h->dst, d->addr, sizeof h->dst);
 	memcpy(h->src, d->ifp->dev_addr, sizeof h->src);
 
-	skb = skb_prepare(d, f);
-	if (skb) {
-		skb->next = NULL;
-		if (d->sendq_hd)
-			d->sendq_tl->next = skb;
-		else
-			d->sendq_hd = skb;
-		d->sendq_tl = skb;
-	}
+	skb->dev = d->ifp;
+	skb_get(skb);
+	skb->next = NULL;
+	if (d->sendq_hd)
+		d->sendq_tl->next = skb;
+	else
+		d->sendq_hd = skb;
+	d->sendq_tl = skb;
 }
 
 static int
@@ -514,7 +493,7 @@ aoecmd_ata_rsp(struct sk_buff *skb)
 	calc_rttavg(d, tsince(f->tag));
 
 	ahin = (struct aoe_atahdr *) (hin+1);
-	ahout = (struct aoe_atahdr *) (f->data + sizeof(struct aoe_hdr));
+	ahout = (struct aoe_atahdr *) (f->skb->mac.raw + sizeof(struct aoe_hdr));
 	buf = f->buf;
 
 	if (ahout->cmdstat == WIN_IDENTIFY)
@@ -620,20 +599,21 @@ aoecmd_ata_id(struct aoedev *d)
 	}
 
 	/* initialize the headers & frame */
-	h = (struct aoe_hdr *) f->data;
+	skb = f->skb;
+	h = (struct aoe_hdr *) skb->mac.raw;
 	ah = (struct aoe_atahdr *) (h+1);
-	f->ndata = sizeof *h + sizeof *ah;
-	memset(h, 0, f->ndata);
+	skb->len = sizeof *h + sizeof *ah;
+	memset(h, 0, skb->len);
 	f->tag = aoehdr_atainit(d, h);
 	f->waited = 0;
-	f->writedatalen = 0;
 
 	/* set up ata header */
 	ah->scnt = 1;
 	ah->cmdstat = WIN_IDENTIFY;
 	ah->lba3 = 0xa0;
 
-	skb = skb_prepare(d, f);
+	skb->dev = d->ifp;
+	skb_get(skb);
 
 	d->rttavg = MAXTIMER;
 	d->timer.function = rexmit_timer;
diff -upr 2.6.18-rc4-orig/drivers/block/aoe/aoedev.c 2.6.18-rc4-aoe/drivers/block/aoe/aoedev.c
--- 2.6.18-rc4-orig/drivers/block/aoe/aoedev.c	2006-09-20 14:29:35.000000000 -0400
+++ 2.6.18-rc4-aoe/drivers/block/aoe/aoedev.c	2006-09-20 14:29:35.000000000 -0400
@@ -63,22 +63,32 @@ aoedev_newdev(ulong nframes)
 	struct frame *f, *e;
 
 	d = kzalloc(sizeof *d, GFP_ATOMIC);
-	if (d == NULL)
-		return NULL;
 	f = kcalloc(nframes, sizeof *f, GFP_ATOMIC);
-	if (f == NULL) {
-		kfree(d);
+ 	switch (!d || !f) {
+ 	case 0:
+ 		d->nframes = nframes;
+ 		d->frames = f;
+ 		e = f + nframes;
+ 		for (; f<e; f++) {
+ 			f->tag = FREETAG;
+ 			f->skb = new_skb(ETH_ZLEN);
+ 			if (!f->skb)
+ 				break;
+ 		}
+ 		if (f == e)
+ 			break;
+ 		while (f > d->frames) {
+ 			f--;
+ 			dev_kfree_skb(f->skb);
+ 		}
+ 	default:
+ 		if (f)
+ 			kfree(f);
+ 		if (d)
+ 			kfree(d);
 		return NULL;
 	}
-
 	INIT_WORK(&d->work, aoecmd_sleepwork, d);
-
-	d->nframes = nframes;
-	d->frames = f;
-	e = f + nframes;
-	for (; f<e; f++)
-		f->tag = FREETAG;
-
 	spin_lock_init(&d->lock);
 	init_timer(&d->timer);
 	d->timer.data = (ulong) d;
@@ -160,11 +170,19 @@ aoedev_by_sysminor_m(ulong sysminor, ulo
 static void
 aoedev_freedev(struct aoedev *d)
 {
+	struct frame *f, *e;
+
 	if (d->gd) {
 		aoedisk_rm_sysfs(d);
 		del_gendisk(d->gd);
 		put_disk(d->gd);
 	}
+	f = d->frames;
+	e = f + d->nframes;
+	for (; f<e; f++) {
+		skb_shinfo(f->skb)->nr_frags = 0;
+		dev_kfree_skb(f->skb);
+	}
 	kfree(d->frames);
 	if (d->bufpool)
 		mempool_destroy(d->bufpool);


-- 
  "Ed L. Cashin" <ecashin@...aid.com>
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ