lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <4891B118.3060600@panasas.com>
Date:	Thu, 31 Jul 2008 15:33:28 +0300
From:	Boaz Harrosh <bharrosh@...asas.com>
To:	open-iscsi@...glegroups.com
CC:	netdev@...r.kernel.org, jgarzik@...ox.com, davem@...emloft.net,
	michaelc@...wisc.edu, swise@...ngridcomputing.com,
	rdreier@...co.com, daisyc@...ibm.com, wenxiong@...ibm.com,
	bhua@...ibm.com, divy@...lsio.com, dm@...lsio.com,
	leedom@...lsio.com, linux-scsi <linux-scsi@...r.kernel.org>
Subject: Re: [RFC][PATCH 1/1] cxgb3i: cxgb3 iSCSI initiator

Karen Xie wrote:
> Cxgb3i iSCSI driver
> 

Sorry for not following the cxgb3i thread , but I would like a little
description about this HW. What are it's capabilities over a regular
NIC, and in what way it is special.

Also I would like some documentation like:

  This card does ....
  It is setup the same as iscsi_tcp ....

  Additional parameters not available with iscsi_tcp are:
    ...

  iscsi_tcp Parameters not available:
    ...

  Special System considerations ...

Also is there a website that has more Document ion/information

(More comments in code)

> Signed-off-by: Karen Xie <kxie@...lsio.com>
> ---
> 
>  drivers/scsi/cxgb3i/Kconfig          |    6 
>  drivers/scsi/cxgb3i/Makefile         |    5 
>  drivers/scsi/cxgb3i/cxgb3i.h         |  155 +++
>  drivers/scsi/cxgb3i/cxgb3i_init.c    |  109 ++
>  drivers/scsi/cxgb3i/cxgb3i_iscsi.c   |  800 ++++++++++++++
>  drivers/scsi/cxgb3i/cxgb3i_offload.c | 2001 ++++++++++++++++++++++++++++++++++
>  drivers/scsi/cxgb3i/cxgb3i_offload.h |  242 ++++
>  drivers/scsi/cxgb3i/cxgb3i_ulp2.c    |  692 ++++++++++++
>  drivers/scsi/cxgb3i/cxgb3i_ulp2.h    |  106 ++
>  9 files changed, 4116 insertions(+), 0 deletions(-)
>  create mode 100644 drivers/scsi/cxgb3i/Kconfig
>  create mode 100644 drivers/scsi/cxgb3i/Makefile
>  create mode 100644 drivers/scsi/cxgb3i/cxgb3i.h
>  create mode 100644 drivers/scsi/cxgb3i/cxgb3i_init.c
>  create mode 100644 drivers/scsi/cxgb3i/cxgb3i_iscsi.c
>  create mode 100644 drivers/scsi/cxgb3i/cxgb3i_offload.c
>  create mode 100644 drivers/scsi/cxgb3i/cxgb3i_offload.h
>  create mode 100644 drivers/scsi/cxgb3i/cxgb3i_ulp2.c
>  create mode 100644 drivers/scsi/cxgb3i/cxgb3i_ulp2.h
> 
> 
> diff --git a/drivers/scsi/cxgb3i/Kconfig b/drivers/scsi/cxgb3i/Kconfig
> new file mode 100644
> index 0000000..2762814
> --- /dev/null
> +++ b/drivers/scsi/cxgb3i/Kconfig
> @@ -0,0 +1,6 @@
> +config SCSI_CXGB3_ISCSI
> +	tristate "Chelsio S3xx iSCSI support"
> +	select CHELSIO_T3
> +	select SCSI_ISCSI_ATTRS
> +	---help---
> +	This driver supports iSCSI offload for the Chelsio S3 series devices.
> diff --git a/drivers/scsi/cxgb3i/Makefile b/drivers/scsi/cxgb3i/Makefile
> new file mode 100644
> index 0000000..8c8a894
> --- /dev/null
> +++ b/drivers/scsi/cxgb3i/Makefile
> @@ -0,0 +1,5 @@
> +EXTRA_CFLAGS += -I$(TOPDIR)/drivers/net/cxgb3
> +
> +cxgb3i-y := cxgb3i_init.o cxgb3i_iscsi.o cxgb3i_ulp2.o cxgb3i_offload.o
> +
> +obj-$(CONFIG_SCSI_CXGB3_ISCSI) += cxgb3i.o
> diff --git a/drivers/scsi/cxgb3i/cxgb3i.h b/drivers/scsi/cxgb3i/cxgb3i.h
> new file mode 100644
> index 0000000..3c44c3c
> --- /dev/null
> +++ b/drivers/scsi/cxgb3i/cxgb3i.h
> @@ -0,0 +1,155 @@
> +/*
> + * cxgb3i.h: Chelsio S3xx iSCSI driver.
> + *
> + * Copyright (c) 2008 Chelsio Communications, Inc.
> + *
> + * This program is free software; you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License as published by
> + * the Free Software Foundation.
> + *
> + * Written by: Karen Xie (kxie@...lsio.com)
> + */
> +
> +#ifndef __CXGB3I_H__
> +#define __CXGB3I_H__
> +
> +#include <linux/module.h>
> +#include <linux/moduleparam.h>
> +#include <linux/errno.h>
> +#include <linux/types.h>
> +#include <linux/list.h>
> +#include <linux/netdevice.h>
> +#include <linux/scatterlist.h>
> +
> +/* from cxgb3 LLD */
> +#include "common.h"
> +#include "t3_cpl.h"
> +#include "t3cdev.h"
> +#include "cxgb3_ctl_defs.h"
> +#include "cxgb3_offload.h"
> +#include "firmware_exports.h"
> +#include "cxgb3i_offload.h"
> +

#include "../iscsi_tcp.h"

> +#define CXGB3I_SCSI_QDEPTH_DFLT	128
> +#define ISCSI_PDU_HEADER_MAX	(56 + 256) /* bhs + digests + ahs */

#define ISCSI_PDU_HEADER_MAX sizeof(struct iscsi_hdr_buff)

> +
> +struct cxgb3i_adapter;
> +struct cxgb3i_hba;
> +struct cxgb3i_endpoint;
> +
> +/**
> + * struct cxgb3i_tag_format - cxgb3i ulp tag for steering pdu payload
> + *
> + * @rsvd_bits:	# of bits used by h/w
> + * @rsvd_shift:	shift left
> + * @rsvd_mask:  bit mask
> + *
> + */
> +struct cxgb3i_tag_format {
> +	unsigned char idx_bits;
> +	unsigned char age_bits;
> +	unsigned char rsvd_bits;
> +	unsigned char rsvd_shift;
> +	u32 rsvd_mask;
> +};
> +
> +/**
> + * struct cxgb3i_ddp_info - cxgb3i direct data placement for pdu payload
> + *
> + * @llimit:	lower bound of the page pod memory
> + * @ulimit:	upper bound of the page pod memory
> + * @nppods:	# of page pod entries
> + * @idx_last:	page pod entry last used
> + * @map_lock:	lock to synchonize access to the page pod map
> + * @map:	page pod map
> + */
> +struct cxgb3i_ddp_info {
> +	unsigned int llimit;
> +	unsigned int ulimit;
> +	unsigned int nppods;
> +	unsigned int idx_last;
> +	spinlock_t map_lock;
> +	u8 *map;
> +};
> +
> +struct cxgb3i_hba {
> +	struct cxgb3i_adapter *snic;
> +	struct net_device *ndev;
> +	struct Scsi_Host *shost;
> +
> +	rwlock_t cconn_rwlock;
> +	struct list_head cconn_list;
> +};
> +
> +struct cxgb3i_adapter {
> +	struct list_head list_head;
> +	spinlock_t lock;
> +	struct t3cdev *tdev;
> +	struct pci_dev *pdev;
> +	unsigned char hba_cnt;
> +	struct cxgb3i_hba *hba[MAX_NPORTS];
> +
> +	unsigned int tx_max_size;
> +	unsigned int rx_max_size;
> +
> +	struct cxgb3i_tag_format tag_format;
> +	struct cxgb3i_ddp_info ddp;
> +};
> +
> +struct cxgb3i_conn {
> +	struct list_head list_head;
> +
> +	struct cxgb3i_endpoint *cep;
> +	struct iscsi_conn *conn;
> +	struct cxgb3i_hba *hba;
> +};
> +
> +struct cxgb3i_endpoint {
> +	struct s3_conn *c3cn;
> +	struct cxgb3i_hba *hba;
> +	struct cxgb3i_conn *cconn;
> +};
> +
> +int cxgb3i_iscsi_init(void);
> +void cxgb3i_iscsi_cleanup(void);
> +
> +struct cxgb3i_adapter *cxgb3i_adapter_find_by_tdev(struct t3cdev *);
> +struct cxgb3i_adapter *cxgb3i_adapter_add(struct t3cdev *);
> +void cxgb3i_adapter_remove(struct cxgb3i_adapter *);
> +int cxgb3i_adapter_ulp_init(struct cxgb3i_adapter *);
> +void cxgb3i_adapter_ulp_cleanup(struct cxgb3i_adapter *);
> +
> +struct cxgb3i_hba *cxgb3i_hba_find_by_netdev(struct net_device *);
> +struct cxgb3i_hba *cxgb3i_hba_host_add(struct cxgb3i_adapter *,
> +				       struct net_device *);
> +void cxgb3i_hba_host_remove(struct cxgb3i_hba *);
> +
> +void cxgb3i_hba_conn_add(struct cxgb3i_conn *, struct cxgb3i_hba *);
> +void cxgb3i_hba_conn_remove(struct cxgb3i_conn *);
> +
> +int cxgb3i_ulp2_init(void);
> +void cxgb3i_ulp2_cleanup(void);
> +int cxgb3i_conn_ulp_setup(struct cxgb3i_conn *, int, int);
> +
> +void cxgb3i_ddp_tag_release(struct cxgb3i_adapter *, u32,
> +			    struct scatterlist *, unsigned int);
> +u32 cxgb3i_ddp_tag_reserve(struct cxgb3i_adapter *, unsigned int,
> +			   u32, unsigned int, struct scatterlist *,
> +			   unsigned int);
> +static inline void cxgb3i_parse_tag(struct cxgb3i_tag_format *format,
> +				    u32 tag, u32 *rsvd_bits, u32 *sw_bits)
> +{
> +	if (rsvd_bits)
> +		*rsvd_bits = (tag >> format->rsvd_shift) & format->rsvd_mask;
> +	if (sw_bits) {
> +		*sw_bits = (tag >> (format->rsvd_shift + format->rsvd_bits))
> +		    << format->rsvd_shift;
> +		*sw_bits |= tag & ((1 << format->rsvd_shift) - 1);
> +	}
> +}
> +
> +int cxgb3i_conn_ulp2_xmit(struct iscsi_conn *);
> +
> +void cxgb3i_display_byte_string(char *, unsigned char *, int, int);
> +
> +#endif
> diff --git a/drivers/scsi/cxgb3i/cxgb3i_init.c b/drivers/scsi/cxgb3i/cxgb3i_init.c
> new file mode 100644
> index 0000000..1c91bb0
> --- /dev/null
> +++ b/drivers/scsi/cxgb3i/cxgb3i_init.c
> @@ -0,0 +1,109 @@
> +/* cxgb3i_init.c: Chelsio S3xx iSCSI driver.
> + *
> + * Copyright (c) 2008 Chelsio Communications, Inc.
> + *
> + * This program is free software; you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License as published by
> + * the Free Software Foundation.
> + *
> + * Written by: Karen Xie (kxie@...lsio.com)
> + */
> +
> +#include "cxgb3i.h"
> +
> +#define DRV_MODULE_NAME         "cxgb3i"
> +#define DRV_MODULE_VERSION      "1.0.0"
> +#define DRV_MODULE_RELDATE      "May 1, 2008"
> +
> +static char version[] =
> +    "Chelsio S3xx iSCSI Driver " DRV_MODULE_NAME
> +    " v" DRV_MODULE_VERSION " (" DRV_MODULE_RELDATE ")\n";
> +
> +MODULE_AUTHOR("Karen Xie <kxie@...lsio.com>");
> +MODULE_DESCRIPTION("Chelsio S3xx iSCSI Driver");
> +MODULE_LICENSE("GPL");
> +MODULE_VERSION(DRV_MODULE_VERSION);
> +
> +static void open_s3_dev(struct t3cdev *);
> +static void close_s3_dev(struct t3cdev *);
> +cxgb3_cpl_handler_func cxgb3i_cpl_handlers[NUM_CPL_CMDS];
> +struct cxgb3_client t3c_client = {
> +	.name = "iscsi_cxgb3",
> +	.handlers = cxgb3i_cpl_handlers,
> +	.add = open_s3_dev,
> +	.remove = close_s3_dev,
> +};
> +
> +/**
> + * open_s3_dev - register with cxgb3 LLD
> + * @t3dev	cxgb3 adapter instance
> + */
> +static void open_s3_dev(struct t3cdev *t3dev)
> +{
> +	static int vers_printed;
> +
> +	if (!vers_printed) {
> +		printk(KERN_INFO "%s", version);
> +		vers_printed = 1;
> +	}
> +
> +	cxgb3i_log_debug("open cxgb3 %s.\n", t3dev->name);
> +
> +	cxgb3i_sdev_add(t3dev, &t3c_client);
> +	cxgb3i_adapter_add(t3dev);
> +}
> +
> +/**
> + * close_s3_dev - de-register with cxgb3 LLD
> + * @t3dev	cxgb3 adapter instance
> + */
> +static void close_s3_dev(struct t3cdev *t3dev)
> +{
> +	struct cxgb3i_adapter *snic = cxgb3i_adapter_find_by_tdev(t3dev);
> +	cxgb3i_log_debug("close cxgb3 %s.\n", t3dev->name);
> +	if (snic)
> +		cxgb3i_adapter_remove(snic);
> +	cxgb3i_sdev_remove(t3dev);
> +}
> +
> +/**
> + * cxgb3i_init_module - module init entry point
> + *
> + * initialize any driver wide global data structures and register itself
> + *	with the cxgb3 module
> + */
> +static int __init cxgb3i_init_module(void)
> +{
> +	int err;
> +
> +	err = cxgb3i_sdev_init(cxgb3i_cpl_handlers);
> +	if (err < 0)
> +		return err;
> +
> +	err = cxgb3i_iscsi_init();
> +	if (err < 0)
> +		return err;
> +
> +	err = cxgb3i_ulp2_init();
> +	if (err < 0)
> +		return err;
> +
> +	cxgb3_register_client(&t3c_client);
> +	return 0;
> +}
> +
> +/**
> + * cxgb3i_exit_module - module cleanup/exit entry point
> + *
> + * go through the driver hba list and for each hba, release any resource held.
> + *	and unregisters iscsi transport and the cxgb3 module
> + */
> +static void __exit cxgb3i_exit_module(void)
> +{
> +	cxgb3_unregister_client(&t3c_client);
> +	cxgb3i_ulp2_cleanup();
> +	cxgb3i_iscsi_cleanup();
> +}
> +
> +module_init(cxgb3i_init_module);
> +module_exit(cxgb3i_exit_module);
> diff --git a/drivers/scsi/cxgb3i/cxgb3i_iscsi.c b/drivers/scsi/cxgb3i/cxgb3i_iscsi.c
> new file mode 100644
> index 0000000..ed3d340
> --- /dev/null
> +++ b/drivers/scsi/cxgb3i/cxgb3i_iscsi.c
> @@ -0,0 +1,800 @@
> +/* cxgb3i_iscsi.c: Chelsio S3xx iSCSI driver.
> + *
> + * Copyright (c) 2008 Chelsio Communications, Inc.
> + *
> + * This program is free software; you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License as published by
> + * the Free Software Foundation.
> + *
> + * Written by: Karen Xie (kxie@...lsio.com)
> + */
> +
> +#include <net/tcp.h>
> +#include <scsi/scsi_cmnd.h>
> +#include <scsi/scsi_device.h>
> +#include <scsi/scsi_eh.h>
> +#include <scsi/scsi_host.h>
> +#include <scsi/scsi.h>
> +#include <scsi/iscsi_proto.h>
> +#include <scsi/libiscsi.h>
> +#include <scsi/scsi_transport_iscsi.h>
> +#include <linux/crypto.h>
> +#include "../iscsi_tcp.h"
> +
> +#include "cxgb3i.h"
> +
> +static struct scsi_transport_template *cxgb3i_scsi_transport;
> +static struct scsi_host_template cxgb3i_host_template;
> +static struct iscsi_transport cxgb3i_iscsi_transport;
> +
> +static LIST_HEAD(cxgb3i_snic_list);
> +static DEFINE_RWLOCK(cxgb3i_snic_rwlock);
> +
> +/**
> + * cxgb3i_adapter_add - initialize a s3 adapter structure and any h/w settings
> + *	necessary
> + * @snic:	pointer to adapter instance
> + */
> +struct cxgb3i_adapter *cxgb3i_adapter_add(struct t3cdev *t3dev)
> +{
> +	struct cxgb3i_adapter *snic;
> +	struct adapter *adapter = tdev2adap(t3dev);
> +	int i;
> +
> +	snic = kzalloc(sizeof(*snic), GFP_KERNEL);
> +	if (!snic) {
> +		cxgb3i_log_debug("cxgb3 %s, OOM.\n", t3dev->name);
> +		return NULL;
> +	}
> +
> +	spin_lock_init(&snic->lock);
> +	snic->tdev = t3dev;
> +	snic->pdev = adapter->pdev;
> +
> +	if (cxgb3i_adapter_ulp_init(snic))
> +		goto free_snic;
> +
> +	for_each_port(adapter, i) {
> +		snic->hba[i] = cxgb3i_hba_host_add(snic, adapter->port[i]);
> +		if (!snic->hba[i])
> +			goto ulp_cleanup;
> +	}
> +	snic->hba_cnt = adapter->params.nports;
> +
> +	/* add to the list */
> +	write_lock(&cxgb3i_snic_rwlock);
> +	list_add_tail(&snic->list_head, &cxgb3i_snic_list);
> +	write_unlock(&cxgb3i_snic_rwlock);
> +
> +	return snic;
> +
> +ulp_cleanup:
> +	cxgb3i_adapter_ulp_cleanup(snic);
> +free_snic:
> +	kfree(snic);
> +	return NULL;
> +}
> +
> +/**
> + * cxgb3i_snic_cleanup - release all the resources held and cleanup any h/w
> + *	settings necessary
> + * @snic:	pointer to adapter instance
> + */
> +void cxgb3i_adapter_remove(struct cxgb3i_adapter *snic)
> +{
> +	int i;
> +
> +	/* remove from the list */
> +	write_lock(&cxgb3i_snic_rwlock);
> +	list_del(&snic->list_head);
> +	write_unlock(&cxgb3i_snic_rwlock);
> +
> +	for (i = 0; i < snic->hba_cnt; i++) {
> +		if (snic->hba[i]) {
> +			cxgb3i_hba_host_remove(snic->hba[i]);
> +			snic->hba[i] = NULL;
> +		}
> +	}
> +
> +	/* release ddp resources */
> +	cxgb3i_adapter_ulp_cleanup(snic);
> +	kfree(snic);
> +}
> +
> +struct cxgb3i_adapter *cxgb3i_adapter_find_by_tdev(struct t3cdev *t3dev)
> +{
> +	struct cxgb3i_adapter *snic;
> +
> +	read_lock(&cxgb3i_snic_rwlock);
> +	list_for_each_entry(snic, &cxgb3i_snic_list, list_head) {
> +		if (snic->tdev == t3dev) {
> +			read_unlock(&cxgb3i_snic_rwlock);
> +			return snic;
> +		}
> +	}
> +	read_unlock(&cxgb3i_snic_rwlock);
> +
> +	return NULL;
> +}
> +
> +struct cxgb3i_hba *cxgb3i_hba_find_by_netdev(struct net_device *ndev)
> +{
> +	struct cxgb3i_adapter *snic;
> +	int i;
> +
> +	read_lock(&cxgb3i_snic_rwlock);
> +	list_for_each_entry(snic, &cxgb3i_snic_list, list_head) {
> +		for (i = 0; i < snic->hba_cnt; i++) {
> +			if (snic->hba[i]->ndev == ndev) {
> +				read_unlock(&cxgb3i_snic_rwlock);
> +				return (snic->hba[i]);
> +			}
> +		}
> +	}
> +	read_unlock(&cxgb3i_snic_rwlock);
> +	return NULL;
> +}
> +
> +void cxgb3i_hba_conn_add(struct cxgb3i_conn *cconn, struct cxgb3i_hba *hba)
> +{
> +	cconn->hba = hba;
> +	write_lock(&hba->cconn_rwlock);
> +	list_add_tail(&cconn->list_head, &hba->cconn_list);
> +	write_unlock(&hba->cconn_rwlock);
> +}
> +
> +void cxgb3i_hba_conn_remove(struct cxgb3i_conn *cconn)
> +{
> +	struct cxgb3i_hba *hba = cconn->hba;
> +
> +	if (hba) {
> +		write_lock(&hba->cconn_rwlock);
> +		list_del(&cconn->list_head);
> +		write_unlock(&hba->cconn_rwlock);
> +	}
> +}
> +
> +struct cxgb3i_hba *cxgb3i_hba_host_add(struct cxgb3i_adapter *snic,
> +				       struct net_device *ndev)
> +{
> +	struct cxgb3i_hba *hba;
> +	struct Scsi_Host *shost;
> +	int err;
> +
> +	shost = iscsi_host_alloc(&cxgb3i_host_template,
> +				 sizeof(struct cxgb3i_hba),
> +				 CXGB3I_SCSI_QDEPTH_DFLT);
> +	if (!shost) {
> +		cxgb3i_log_info("iscsi_host_alloc failed.\n");
> +		return NULL;
> +	}
> +
> +	shost->transportt = cxgb3i_scsi_transport;
> +	shost->max_lun = 512;
> +	shost->max_id = 0;
> +	shost->max_channel = 0;
> +	shost->max_cmd_len = 16;

It looks like code supports AHS, is that some hardware
limitation? I could not find this limitation in submitted
code. Maybe = SCSI_MAX_VARLEN_CDB_SIZE.

> +
> +	hba = iscsi_host_priv(shost);
> +	INIT_LIST_HEAD(&hba->cconn_list);
> +	rwlock_init(&hba->cconn_rwlock);
> +	hba->snic = snic;
> +	hba->ndev = ndev;
> +	hba->shost = shost;
> +
> +	pci_dev_get(snic->pdev);
> +	err = iscsi_host_add(shost, &snic->pdev->dev);
> +	if (err) {
> +		cxgb3i_log_info("iscsi_host_add failed.\n");
> +		goto pci_dev_put;
> +	}
> +
> +	cxgb3i_log_debug("shost 0x%p, hba 0x%p, no %u.\n",
> +			 shost, hba, shost->host_no);
> +
> +	return hba;
> +
> +pci_dev_put:
> +	pci_dev_put(snic->pdev);
> +	scsi_host_put(shost);
> +	return NULL;
> +}
> +
> +void cxgb3i_hba_host_remove(struct cxgb3i_hba *hba)
> +{
> +	if (hba->shost) {
> +		cxgb3i_log_debug("shost 0x%p, hba 0x%p, no %u.\n",
> +				 hba->shost, hba, hba->shost->host_no);
> +		iscsi_host_remove(hba->shost);
> +		pci_dev_put(hba->snic->pdev);
> +		/* cleanup connections ? */
> +		iscsi_host_free(hba->shost);
> +	}
> +}
> +
> +/**
> + * cxgb3i_ep_connect - establish TCP connection to target portal
> + * @dst_addr:		target IP address
> + * @non_blocking:	blocking or non-blocking call
> + *
> + * Initiates a TCP/IP connection to the dst_addr
> + */
> +static struct iscsi_endpoint *cxgb3i_ep_connect(struct sockaddr *dst_addr,
> +						int non_blocking)
> +{
> +	struct iscsi_endpoint *ep;
> +	struct cxgb3i_endpoint *cep;
> +	struct cxgb3i_hba *hba;
> +	struct s3_conn *c3cn;
> +	int err;
> +
> +	c3cn = cxgb3i_c3cn_create();
> +	if (!c3cn) {
> +		cxgb3i_log_info("ep connect OOM.\n");
> +		return NULL;
> +	}
> +
> +	err = cxgb3i_c3cn_connect(c3cn, (struct sockaddr_in *)dst_addr);
> +	if (err < 0) {
> +		cxgb3i_log_info("ep connect failed.\n");
> +		goto release_conn;
> +	}
> +	hba = cxgb3i_hba_find_by_netdev(c3cn->dst_cache->dev);
> +	if (!hba) {
> +		cxgb3i_log_info("NOT going through cxgbi device.\n");
> +		goto release_conn;
> +	}
> +
> +	ep = iscsi_create_endpoint(sizeof(*cep));
> +	if (!ep) {
> +		cxgb3i_log_info("iscsi alloc ep, OOM.\n");
> +		goto release_conn;
> +	}
> +	cep = ep->dd_data;
> +	cep->c3cn = c3cn;
> +	cep->hba = hba;
> +
> +	cxgb3i_log_debug("iscsi_ep 0x%p, cxgb_ep 0x%p, hba 0x%p, c3cn 0x%p.\n",
> +			  ep, cep, hba, c3cn);
> +	return ep;
> +
> +release_conn:
> +	c3cn_release(c3cn);
> +	return NULL;
> +}
> +
> +/**
> + * cxgb3i_ep_poll - polls for TCP connection establishement
> + * @ep:		TCP connection (endpoint) handle
> + * @timeout_ms:	timeout value in milli secs
> + *
> + * polls for TCP connect request to complete
> + */
> +static int cxgb3i_ep_poll(struct iscsi_endpoint *ep, int timeout_ms)
> +{
> +	cxgb3i_log_debug("iscsi_ep 0x%p, timeout_ms %d.\n", ep, timeout_ms);
> +	return 1;
> +}
> +
> +/**
> + * cxgb3i_ep_disconnect - teardown TCP connection
> + * @ep:		TCP connection (endpoint) handle
> + *
> + * teardown TCP connection
> + */
> +static void cxgb3i_ep_disconnect(struct iscsi_endpoint *ep)
> +{
> +	struct cxgb3i_endpoint *cep = (struct cxgb3i_endpoint *)ep->dd_data;
> +	struct cxgb3i_conn *cconn = cep->cconn;
> +
> +	cxgb3i_log_debug("ep 0x%p, cep 0x%p.\n", ep, cep);
> +
> +	if (cconn && cconn->conn) {
> +		struct iscsi_conn *conn = cconn->conn;
> +		struct iscsi_tcp_conn *tcp_conn = conn->dd_data;
> +		write_lock_bh(&cep->c3cn->callback_lock);
> +		cep->c3cn->user_data = NULL;
> +		set_bit(ISCSI_SUSPEND_BIT, &conn->suspend_rx);
> +		cconn->cep = NULL;
> +		tcp_conn->sock = NULL;
> +		write_unlock_bh(&cep->c3cn->callback_lock);
> +	}
> +
> +	c3cn_release(cep->c3cn);
> +	iscsi_destroy_endpoint(ep);
> +}
> +
> +/**
> + * cxgb3i_session_create - create a new iscsi session
> + * @cmds_max:		max # of commands
> + * @qdepth:		scsi queue depth
> + * @initial_cmdsn:	initial iscsi CMDSN for this session
> + * @host_no:		pointer to return host no
> + *
> + * Creates a new iSCSI session
> + */
> +static struct iscsi_cls_session *cxgb3i_session_create(struct iscsi_endpoint
> +						       *ep, uint16_t cmds_max,
> +						       uint16_t qdepth,
> +						       uint32_t initial_cmdsn,
> +						       uint32_t *host_no)
> +{
> +	struct cxgb3i_endpoint *cep;
> +	struct cxgb3i_hba *hba;
> +	struct Scsi_Host *shost;
> +	struct iscsi_cls_session *cls_session;
> +	struct iscsi_session *session;
> +	int i;
> +
> +	if (!ep) {
> +		cxgb3i_log_error("%s, missing endpoint.\n", __func__);
> +		return NULL;
> +	}
> +
> +	cep = (struct cxgb3i_endpoint *)ep->dd_data;
> +	hba = cep->hba;
> +	shost = hba->shost;
> +	cxgb3i_log_debug("ep 0x%p, cep 0x%p, hba 0x%p.\n", ep, cep, hba);
> +	BUG_ON(hba != iscsi_host_priv(shost));
> +
> +	*host_no = shost->host_no;
> +
> +	cls_session = iscsi_session_setup(&cxgb3i_iscsi_transport, shost,
> +					  cmds_max,
> +					  sizeof(struct iscsi_tcp_task),
> +					  initial_cmdsn, ISCSI_MAX_TARGET);
> +	if (!cls_session)
> +		return NULL;
> +
> +	session = cls_session->dd_data;
> +
> +	for (i = 0; i < session->cmds_max; i++) {
> +		struct iscsi_task *task = session->cmds[i];
> +		struct iscsi_tcp_task *tcp_task = task->dd_data;
> +
> +		task->hdr = &tcp_task->hdr.cmd_hdr;
> +		task->hdr_max = sizeof(tcp_task->hdr) - ISCSI_DIGEST_SIZE;

This little code will signal libiscsi to support AHS. By making
room at hdr_max

> +	}
> +
> +	if (iscsi_r2tpool_alloc(session))
> +		goto remove_session;
> +
> +	return cls_session;
> +
> +remove_session:
> +	iscsi_session_teardown(cls_session);
> +	return NULL;
> +}
> +
> +/**
> + * cxgb3i_session_destroy - destroys iscsi session
> + * @cls_session:	pointer to iscsi cls session
> + *
> + * Destroys an iSCSI session instance and releases its all resources held
> + */
> +static void cxgb3i_session_destroy(struct iscsi_cls_session *cls_session)
> +{
> +	cxgb3i_log_debug("sess 0x%p.\n", cls_session);
> +	iscsi_r2tpool_free(cls_session->dd_data);
> +	iscsi_session_teardown(cls_session);
> +}
> +
> +/**
> + * cxgb3i_conn_create - create iscsi connection instance
> + * @cls_session:	pointer to iscsi cls session
> + * @cid:		iscsi cid
> + *
> + * Creates a new iSCSI connection instance for a given session
> + */
> +static struct iscsi_cls_conn *cxgb3i_conn_create(struct iscsi_cls_session
> +						 *cls_session, uint32_t cid)
> +{
> +	struct iscsi_cls_conn *cls_conn;
> +	struct iscsi_conn *conn;
> +	struct iscsi_tcp_conn *tcp_conn;
> +	struct cxgb3i_conn *cconn;
> +
> +	cxgb3i_log_debug("sess 0x%p, cid %u.\n", cls_session, cid);
> +
> +	cls_conn = iscsi_conn_setup(cls_session,
> +				    sizeof(*tcp_conn) + sizeof(*cconn), cid);
> +	if (!cls_conn)
> +		return NULL;
> +	conn = cls_conn->dd_data;
> +
> +	conn->max_xmit_dlength = conn->max_recv_dlength = 16224 - 56 - 256;

Why not ULP2_PDU_PAYLOAD_DFLT (Which is defined but never used)

> +
> +	tcp_conn = conn->dd_data;
> +	tcp_conn->iscsi_conn = conn;
> +
> +	cconn = (struct cxgb3i_conn *)(tcp_conn + 1);
> +	cconn->conn = conn;
> +
> +	return cls_conn;
> +}
> +
> +/**
> + * cxgb3i_conn_bind - binds iscsi sess, conn and endpoint together
> + * @cls_session:	pointer to iscsi cls session
> + * @cls_conn:		pointer to iscsi cls conn
> + * @transport_eph:	64-bit EP handle
> + * @is_leading:		leading connection on this session?
> + *
> + * Binds together an iSCSI session, an iSCSI connection and a
> + *	TCP connection. This routine returns error code if the TCP
> + *	connection does not belong on the device iSCSI sess/conn is bound
> + */
> +
> +static int cxgb3i_conn_bind(struct iscsi_cls_session *cls_session,
> +			    struct iscsi_cls_conn *cls_conn,
> +			    uint64_t transport_eph, int is_leading)
> +{
> +	struct iscsi_conn *conn = cls_conn->dd_data;
> +	struct iscsi_tcp_conn *tcp_conn = conn->dd_data;
> +	struct cxgb3i_conn *cconn = (struct cxgb3i_conn *)(tcp_conn + 1);
> +	struct iscsi_endpoint *ep;
> +	struct cxgb3i_endpoint *cep;
> +	struct s3_conn *c3cn;
> +	int err;
> +
> +	ep = iscsi_lookup_endpoint(transport_eph);
> +	if (!ep)
> +		return -EINVAL;
> +
> +	cxgb3i_log_debug("ep 0x%p, cls sess 0x%p, cls conn 0x%p.\n",
> +			 ep, cls_session, cls_conn);
> +
> +	err = iscsi_conn_bind(cls_session, cls_conn, is_leading);
> +	if (err)
> +		return -EINVAL;
> +
> +	cep = (struct cxgb3i_endpoint *)ep->dd_data;
> +	c3cn = cep->c3cn;
> +
> +	read_lock(&c3cn->callback_lock);
> +	tcp_conn->sock = (struct socket *)c3cn;
> +	c3cn->user_data = conn;
> +	read_unlock(&c3cn->callback_lock);
> +
> +	cconn->hba = cep->hba;
> +	cconn->cep = cep;
> +	cep->cconn = cconn;
> +
> +	conn->max_recv_dlength = cconn->hba->snic->rx_max_size - ISCSI_PDU_HEADER_MAX;
> +	conn->max_xmit_dlength = cconn->hba->snic->tx_max_size - ISCSI_PDU_HEADER_MAX;
> +
> +	spin_lock_bh(&conn->session->lock);
> +	sprintf(conn->portal_address, NIPQUAD_FMT,
> +		NIPQUAD(c3cn->daddr.sin_addr.s_addr));
> +	conn->portal_port = ntohs(c3cn->daddr.sin_port);
> +	spin_unlock_bh(&conn->session->lock);
> +
> +	iscsi_tcp_hdr_recv_prep(tcp_conn);
> +
> +	return 0;
> +}
> +
> +/**
> + * cxgb3i_conn_flush - flush tx
> + * @conn:	pointer to iscsi conn
> + */
> +static int cxgb3i_conn_flush(struct iscsi_conn *conn)
> +{
> +	struct iscsi_tcp_conn *tcp_conn = conn->dd_data;
> +	struct iscsi_segment *segment = &tcp_conn->out.segment;
> +
> +	if (segment->total_copied < segment->total_size)
> +		return cxgb3i_conn_ulp2_xmit(conn);
> +	return 0;
> +}
> +
> +/**
> + * cxgb3i_conn_get_param - return iscsi connection parameter to caller
> + * @cls_conn:	pointer to iscsi cls conn
> + * @param:	parameter type identifier
> + * @buf:	buffer pointer
> + *
> + * returns iSCSI connection parameters
> + */
> +static int cxgb3i_conn_get_param(struct iscsi_cls_conn *cls_conn,
> +				 enum iscsi_param param, char *buf)
> +{
> +	struct iscsi_conn *conn = cls_conn->dd_data;
> +	int len;
> +
> +	cxgb3i_log_debug("cls_conn 0x%p, param %d.\n", cls_conn, param);
> +
> +	switch (param) {
> +	case ISCSI_PARAM_CONN_PORT:
> +		spin_lock_bh(&conn->session->lock);
> +		len = sprintf(buf, "%hu\n", conn->portal_port);
> +		spin_unlock_bh(&conn->session->lock);
> +		break;
> +	case ISCSI_PARAM_CONN_ADDRESS:
> +		spin_lock_bh(&conn->session->lock);
> +		len = sprintf(buf, "%s\n", conn->portal_address);
> +		spin_unlock_bh(&conn->session->lock);
> +		break;
> +	default:
> +		return iscsi_conn_get_param(cls_conn, param, buf);
> +	}
> +
> +	return len;
> +}
> +
> +static int cxgb3i_conn_set_param(struct iscsi_cls_conn *cls_conn,
> +				 enum iscsi_param param, char *buf, int buflen)
> +{
> +	struct iscsi_conn *conn = cls_conn->dd_data;
> +	struct iscsi_session *session = conn->session;
> +	struct iscsi_tcp_conn *tcp_conn = conn->dd_data;
> +	struct cxgb3i_conn *cconn = (struct cxgb3i_conn *)(tcp_conn + 1);
> +	int value, err = 0;
> +
> +	switch (param) {
> +	case ISCSI_PARAM_HDRDGST_EN:
> +		err = iscsi_set_param(cls_conn, param, buf, buflen);
> +		if (!err && conn->hdrdgst_en)
> +			cxgb3i_conn_ulp_setup(cconn, conn->hdrdgst_en,
> +					      conn->datadgst_en);
> +		break;
> +	case ISCSI_PARAM_DATADGST_EN:
> +		err = iscsi_set_param(cls_conn, param, buf, buflen);
> +		if (!err && conn->datadgst_en)
> +			cxgb3i_conn_ulp_setup(cconn, conn->hdrdgst_en,
> +					      conn->datadgst_en);
> +		break;
> +	case ISCSI_PARAM_MAX_R2T:
> +		sscanf(buf, "%d", &value);
> +		if (value <= 0 || !is_power_of_2(value))
> +			return -EINVAL;
> +		if (session->max_r2t == value)
> +			break;
> +		iscsi_r2tpool_free(session);
> +		err = iscsi_set_param(cls_conn, param, buf, buflen);
> +		if (!err && iscsi_r2tpool_alloc(session))
> +			return -ENOMEM;
> +	case ISCSI_PARAM_MAX_RECV_DLENGTH:
> +		err = iscsi_set_param(cls_conn, param, buf, buflen);
> +		cxgb3i_log_debug("MAX_RECV %u.\n", conn->max_recv_dlength);
> +		break;
> +	case ISCSI_PARAM_MAX_XMIT_DLENGTH:
> +		err = iscsi_set_param(cls_conn, param, buf, buflen);
> +		cxgb3i_log_debug("MAX_XMIT %u.\n", conn->max_xmit_dlength);
> +		break;
> +	default:
> +		return iscsi_set_param(cls_conn, param, buf, buflen);
> +	}
> +	return err;
> +}
> +
> +/**
> + * cxgb3i_host_get_param - returns host (adapter) related parameters
> + * @shost:	scsi host pointer
> + * @param:	parameter type identifier
> + * @buf:	buffer pointer
> + */
> +static int cxgb3i_host_get_param(struct Scsi_Host *shost,
> +				 enum iscsi_host_param param, char *buf)
> +{
> +	struct cxgb3i_hba *hba = iscsi_host_priv(shost);
> +	int i;
> +	int len = 0;
> +
> +	switch (param) {
> +	case ISCSI_HOST_PARAM_HWADDRESS:
> +		for (i = 0; i < 6; i++)
> +			len +=
> +			    sprintf(buf + len, "%02x.",
> +				    hba->ndev->dev_addr[i]);
> +		len--;
> +		buf[len] = '\0';
> +		break;
> +	case ISCSI_HOST_PARAM_NETDEV_NAME:
> +		len = sprintf(buf, "%s\n", hba->ndev->name);
> +		break;
> +	default:
> +		return iscsi_host_get_param(shost, param, buf);
> +	}
> +	return len;
> +}
> +
> +/**
> + * cxgb3i_conn_get_stats - returns iSCSI stats
> + * @cls_conn:	pointer to iscsi cls conn
> + * @stats:	pointer to iscsi statistic struct
> + */
> +static void cxgb3i_conn_get_stats(struct iscsi_cls_conn *cls_conn,
> +				  struct iscsi_stats *stats)
> +{
> +	struct iscsi_conn *conn = cls_conn->dd_data;
> +
> +	stats->txdata_octets = conn->txdata_octets;
> +	stats->rxdata_octets = conn->rxdata_octets;
> +	stats->scsicmd_pdus = conn->scsicmd_pdus_cnt;
> +	stats->dataout_pdus = conn->dataout_pdus_cnt;
> +	stats->scsirsp_pdus = conn->scsirsp_pdus_cnt;
> +	stats->datain_pdus = conn->datain_pdus_cnt;
> +	stats->r2t_pdus = conn->r2t_pdus_cnt;
> +	stats->tmfcmd_pdus = conn->tmfcmd_pdus_cnt;
> +	stats->tmfrsp_pdus = conn->tmfrsp_pdus_cnt;
> +	stats->digest_err = 0;
> +	stats->timeout_err = 0;
> +	stats->custom_length = 1;
> +	strcpy(stats->custom[0].desc, "eh_abort_cnt");
> +	stats->custom[0].value = conn->eh_abort_cnt;
> +}
> +
> +static inline u32 tag_base(struct cxgb3i_tag_format *format,
> +			   unsigned int idx, unsigned int age)
> +{
> +	u32 sw_bits = idx | (age << format->idx_bits);
> +	u32 tag = sw_bits >> format->rsvd_shift;
> +	tag <<= format->rsvd_bits + format->rsvd_shift;
> +	tag |= sw_bits & ((1 << format->rsvd_shift) - 1);
> +	return tag;
> +}
> +
> +static void cxgb3i_parse_itt(struct iscsi_conn *conn, itt_t itt,
> +			     int *idx, int *age)
> +{
> +	struct iscsi_tcp_conn *tcp_conn = conn->dd_data;
> +	struct cxgb3i_conn *cconn = (struct cxgb3i_conn *)(tcp_conn + 1);
> +	struct cxgb3i_adapter *snic = cconn->hba->snic;
> +	u32 sw_bits;
> +
> +	cxgb3i_parse_tag(&snic->tag_format, itt, NULL, &sw_bits);
> +	if (idx)
> +		*idx = sw_bits & ISCSI_ITT_MASK;
> +	if (age)
> +		*age = (sw_bits >> snic->tag_format.idx_bits) & ISCSI_AGE_MASK;
> +}
> +
> +static int cxgb3i_reserve_itt(struct iscsi_task *task, itt_t *hdr_itt)
> +{
> +	struct scsi_cmnd *sc = task->sc;
> +	struct iscsi_conn *conn = task->conn;
> +	struct iscsi_session *sess = conn->session;
> +	struct iscsi_tcp_conn *tcp_conn = conn->dd_data;
> +	struct cxgb3i_conn *cconn = (struct cxgb3i_conn *)(tcp_conn + 1);
> +	struct cxgb3i_adapter *snic = cconn->hba->snic;
> +	u32 sw_tag = tag_base(&snic->tag_format, task->itt, sess->age);
> +	u32 tag = RESERVED_ITT;
> +
> +	if (sc && (sc->sc_data_direction == DMA_FROM_DEVICE)) {
> +		struct s3_conn *c3cn =
> +			(struct s3_conn *)(tcp_conn->sock);
> +		tag =
> +		    cxgb3i_ddp_tag_reserve(snic, c3cn->tid, sw_tag,
> +					   scsi_out(sc)->length,
> +					   scsi_out(sc)->table.sgl,
> +					   scsi_out(sc)->table.nents);

sc->sc_data_direction == DMA_FROM_DEVICE should use scsi_in(sc)
you had no problems because for uni-direction commands the scsi_out() and
scsi_in() are the same

> +	}
> +	if (tag == RESERVED_ITT)
> +		tag = sw_tag | (snic->tag_format.rsvd_mask <<
> +				snic->tag_format.rsvd_shift);
> +	*hdr_itt = htonl(tag);
> +	return 0;
> +}
> +
> +static void cxgb3i_release_itt(struct iscsi_task *task, itt_t hdr_itt)
> +{
> +	struct scsi_cmnd *sc = task->sc;
> +	struct iscsi_conn *conn = task->conn;
> +	struct iscsi_tcp_conn *tcp_conn = conn->dd_data;
> +	struct cxgb3i_conn *cconn = (struct cxgb3i_conn *)(tcp_conn + 1);
> +	struct cxgb3i_adapter *snic = cconn->hba->snic;
> +
> +	hdr_itt = ntohl(hdr_itt);
> +	if (sc && (sc->sc_data_direction == DMA_FROM_DEVICE))
> +		cxgb3i_ddp_tag_release(snic, hdr_itt,
> +				       scsi_out(sc)->table.sgl,
> +				       scsi_out(sc)->table.nents);

here also scsi_in()

> +}
> +
> +/**
> + * cxgb3i_host_template -- Scsi_Host_Template structure
> + *	used when registering with the scsi mid layer
> + */
> +static struct scsi_host_template cxgb3i_host_template = {
> +	.module = THIS_MODULE,
> +	.name = "Chelsio S3xx iSCSI Initiator",
> +	.proc_name = "cxgb3i",
> +	.queuecommand = iscsi_queuecommand,
> +	.change_queue_depth = iscsi_change_queue_depth,
> +	.can_queue = 128 * (ISCSI_DEF_XMIT_CMDS_MAX - 1),
> +	.sg_tablesize = SG_ALL,

iscsi_tcp supports sg_chaining here. It looks like submitted
code is sg-safe you can easily put:
	.sg_tablesize = ~0,

> +	.max_sectors = 0xFFFF,
> +	.cmd_per_lun = ISCSI_DEF_CMD_PER_LUN,
> +	.eh_abort_handler = iscsi_eh_abort,
> +	.eh_device_reset_handler = iscsi_eh_device_reset,
> +	.eh_target_reset_handler = iscsi_eh_target_reset,
> +	.use_clustering = DISABLE_CLUSTERING,
> +	.slave_alloc = iscsi_slave_alloc,
> +	.this_id = -1,
> +};
> +
> +static struct iscsi_transport cxgb3i_iscsi_transport = {
> +	.owner = THIS_MODULE,
> +	.name = "cxgb3i",
> +	.caps = CAP_RECOVERY_L0 | CAP_MULTI_R2T | CAP_HDRDGST
> +	    | CAP_DATADGST | CAP_DIGEST_OFFLOAD,
> +	.param_mask = ISCSI_MAX_RECV_DLENGTH |
> +	    ISCSI_MAX_XMIT_DLENGTH |
> +	    ISCSI_HDRDGST_EN |
> +	    ISCSI_DATADGST_EN |
> +	    ISCSI_INITIAL_R2T_EN |
> +	    ISCSI_MAX_R2T |
> +	    ISCSI_IMM_DATA_EN |
> +	    ISCSI_FIRST_BURST |
> +	    ISCSI_MAX_BURST |
> +	    ISCSI_PDU_INORDER_EN |
> +	    ISCSI_DATASEQ_INORDER_EN |
> +	    ISCSI_ERL |
> +	    ISCSI_CONN_PORT |
> +	    ISCSI_CONN_ADDRESS |
> +	    ISCSI_EXP_STATSN |
> +	    ISCSI_PERSISTENT_PORT |
> +	    ISCSI_PERSISTENT_ADDRESS |
> +	    ISCSI_TARGET_NAME | ISCSI_TPGT |
> +	    ISCSI_USERNAME | ISCSI_PASSWORD |
> +	    ISCSI_USERNAME_IN | ISCSI_PASSWORD_IN |
> +	    ISCSI_FAST_ABORT | ISCSI_ABORT_TMO |
> +	    ISCSI_LU_RESET_TMO |
> +	    ISCSI_PING_TMO | ISCSI_RECV_TMO |
> +	    ISCSI_IFACE_NAME | ISCSI_INITIATOR_NAME,
> +	.host_param_mask = ISCSI_HOST_HWADDRESS | ISCSI_HOST_IPADDRESS |
> +	    ISCSI_HOST_INITIATOR_NAME | ISCSI_HOST_NETDEV_NAME,
> +	.get_host_param = cxgb3i_host_get_param,
> +	/* session management */
> +	.create_session = cxgb3i_session_create,
> +	.destroy_session = cxgb3i_session_destroy,
> +	.get_session_param = iscsi_session_get_param,
> +	/* connection management */
> +	.create_conn = cxgb3i_conn_create,
> +	.bind_conn = cxgb3i_conn_bind,
> +	.destroy_conn = iscsi_conn_teardown,
> +	.start_conn = iscsi_conn_start,
> +	.stop_conn = iscsi_conn_stop,
> +	.flush_conn = cxgb3i_conn_flush,
> +	.get_conn_param = cxgb3i_conn_get_param,
> +	.set_param = cxgb3i_conn_set_param,
> +	.get_stats = cxgb3i_conn_get_stats,
> +	/* pdu xmit req. from user space */
> +	.send_pdu = iscsi_conn_send_pdu,
> +	/* task */
> +	.init_task = iscsi_tcp_task_init,
> +	.xmit_task = iscsi_tcp_task_xmit,
> +	.cleanup_task = iscsi_tcp_cleanup_task,
> +	.parse_itt = cxgb3i_parse_itt,
> +	.reserve_itt = cxgb3i_reserve_itt,
> +	.release_itt = cxgb3i_release_itt,
> +	/* TCP connect/disconnect */
> +	.ep_connect = cxgb3i_ep_connect,
> +	.ep_poll = cxgb3i_ep_poll,
> +	.ep_disconnect = cxgb3i_ep_disconnect,
> +	/* Error recovery timeout call */
> +	.session_recovery_timedout = iscsi_session_recovery_timedout,
> +};
> +
> +int cxgb3i_iscsi_init(void)
> +{
> +	cxgb3i_scsi_transport =
> +	    iscsi_register_transport(&cxgb3i_iscsi_transport);
> +	if (!cxgb3i_scsi_transport) {
> +		cxgb3i_log_error("Could not register cxgb3i transport.\n");
> +		return -ENODEV;
> +	}
> +	cxgb3i_log_debug("cxgb3i transport 0x%p.\n", cxgb3i_scsi_transport);
> +	return 0;
> +}
> +
> +void cxgb3i_iscsi_cleanup(void)
> +{
> +	if (cxgb3i_scsi_transport) {
> +		cxgb3i_log_debug("cxgb3i transport 0x%p.\n",
> +				 cxgb3i_scsi_transport);
> +		iscsi_unregister_transport(&cxgb3i_iscsi_transport);
> +		cxgb3i_scsi_transport = NULL;
> +	}
> +}
> diff --git a/drivers/scsi/cxgb3i/cxgb3i_offload.c b/drivers/scsi/cxgb3i/cxgb3i_offload.c
> new file mode 100644
> index 0000000..d4d8b85
> --- /dev/null
> +++ b/drivers/scsi/cxgb3i/cxgb3i_offload.c
> @@ -0,0 +1,2001 @@
> +/*
> + * Copyright (C) 2003-2008 Chelsio Communications.  All rights reserved.
> + *
> + * Written by Dimitris Michailidis (dm@...lsio.com)
> + *
> + * This program is distributed in the hope that it will be useful, but WITHOUT
> + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
> + * FITNESS FOR A PARTICULAR PURPOSE.  See the LICENSE file included in this
> + * release for licensing terms and conditions.
> + */
> +
> +#include <linux/if_vlan.h>
> +#include <linux/version.h>
> +
> +#include "cxgb3_defs.h"
> +#include "cxgb3_ctl_defs.h"
> +#include "firmware_exports.h"
> +#include "cxgb3i_offload.h"
> +#include "cxgb3i_ulp2.h"
> +
> +static int rcv_win = 256 * 1024;
> +module_param(rcv_win, int, 0644);
> +MODULE_PARM_DESC(rcv_win, "TCP receive window in bytes (default=256KB)");
> +
> +static int snd_win = 32 * 1024;
> +module_param(snd_win, int, 0644);
> +MODULE_PARM_DESC(snd_win, "TCP send window in bytes (default=32KB)");
> +
> +static int rx_credit_thres = 10 * 1024;
> +module_param(rx_credit_thres, int, 0644);
> +MODULE_PARM_DESC(snd_win, "RX credits return threshold in bytes (default=10KB)");
> +
> +static unsigned int max_connect = 8 * 1024;
> +module_param(max_connect, uint, 0644);
> +MODULE_PARM_DESC(max_connect, "Max. # of connections (default=8092)");
> +
> +static unsigned int sport_base = 20000;
> +module_param(sport_base, uint, 0644);
> +MODULE_PARM_DESC(sport_start, "starting port number (default=20000)");
> +
> +#ifdef __DEBUG_C3CN_CONN__
> +#define c3cn_conn_debug         cxgb3i_log_debug
> +#else
> +#define c3cn_conn_debug(fmt...)
> +#endif
> +
> +#ifdef __DEBUG_C3CN_TX__
> +#define c3cn_tx_debug         cxgb3i_log_debug
> +#else
> +#define c3cn_tx_debug(fmt...)
> +#endif
> +
> +/* minimal port allocation management scheme */
> +spinlock_t sport_map_lock;
> +unsigned int sport_map_next = 0;
> +unsigned char *sport_map = NULL;
> +
> +/*
> + * Find a free source port in our allocation map.  We use a very simple rotor
> + * scheme to look for the next free port.
> + *
> + * If a source port has been specified make sure that it doesn't collide with
> + * our normal source port allocation map.  If it's outside the range of our
> + * allocation scheme just let them use it.
> + */
> +static int c3cn_get_port(struct s3_conn *c3cn)
> +{
> +	unsigned int start;
> +
> +	if (!sport_map)
> +		goto error_out;
> +
> +	if (c3cn->saddr.sin_port != 0) {
> +		int sport = ntohs(c3cn->saddr.sin_port) - sport_base;
> +		int err = 0;
> +
> +		if (sport < 0 || sport >= max_connect)
> +			return 0;
> +		spin_lock(&sport_map_lock);
> +		err = __test_and_set_bit(sport, sport_map);
> +		spin_unlock(&sport_map_lock);
> +		return (err ? -EADDRINUSE : 0);
> +	}
> +
> +	spin_lock(&sport_map_lock);
> +	start = sport_map_next;
> +	do {
> +		unsigned int new = sport_map_next;
> +		if (++sport_map_next >= max_connect)
> +			sport_map_next = 0;
> +		if (!(__test_and_set_bit(new, sport_map))) {
> +			spin_unlock(&sport_map_lock);
> +			c3cn->saddr.sin_port = htons(sport_base + new);
> +			return 0;
> +		}
> +	} while (sport_map_next != start);
> +	spin_unlock(&sport_map_lock);
> +
> +error_out:
> +	return -EADDRNOTAVAIL;
> +}
> +
> +/*
> + * Deallocate a source port from the allocation map.  If the source port is
> + * outside our allocation range just return -- the caller is responsible for
> + * keeping track of their port usage outside of our allocation map.
> + */
> +static void c3cn_put_port(struct s3_conn *c3cn)
> +{
> +	int old = ntohs(c3cn->saddr.sin_port) - sport_base;
> +	c3cn->saddr.sin_port = 0;
> +
> +	if (old < 0 || old >= max_connect)
> +		return;
> +
> +	spin_lock(&sport_map_lock);
> +	__clear_bit(old, sport_map);
> +	spin_unlock(&sport_map_lock);
> +}
> +
> +static inline unsigned int c3cn_in_state(const struct s3_conn *c3cn,
> +                                         unsigned int states)
> +{
> +	return (states & c3cn->state);
> +}
> +
> +static void c3cn_set_state(struct s3_conn *c3cn, int state)
> +{
> +	c3cn_conn_debug("c3cn 0x%p state -> 0x%x.\n", c3cn, state);
> +	if (state == C3CN_STATE_CLOSE)
> +		c3cn_put_port(c3cn);
> +	c3cn->state = state;
> +}
> +
> +
> +void c3cn_reset_timer(struct s3_conn *c3cn, struct timer_list* timer,
> +		      unsigned long expires)
> +{
> +	if (!mod_timer(timer, expires))
> +		c3cn_hold(c3cn);
> +}
> +
> +typedef int (cxgb3_cpl_handler_decl) (struct t3cdev *,
> +				      struct sk_buff *, void *);
> +
> +static cxgb3_cpl_handler_decl do_act_establish;
> +static cxgb3_cpl_handler_decl do_act_open_rpl;
> +static cxgb3_cpl_handler_decl do_wr_ack;
> +static cxgb3_cpl_handler_decl do_peer_close;
> +static cxgb3_cpl_handler_decl do_abort_req;
> +static cxgb3_cpl_handler_decl do_abort_rpl;
> +static cxgb3_cpl_handler_decl do_close_con_rpl;
> +static cxgb3_cpl_handler_decl do_iscsi_hdr;
> +
> +/*
> + * Protocol functions for our connections.
> + */
> +static int c3cn_destroy(struct s3_conn *);
> +static void process_deferq(struct work_struct *);
> +
> +static LIST_HEAD(cxgb3_list);
> +static DEFINE_MUTEX(cxgb3_list_lock);
> +
> +/*
> + * For ULP connections HW may inserts digest bytes into the pdu. This array
> + * contains the compensating extra lengths for ULP packets.  It is indexed by
> + * a packet's ULP submode.
> + */
> +static const unsigned int cxgb3_ulp_extra_len[] = { 0, 4, 4, 8 };
> +
> +/*
> + * Return the length of any HW additions that will be made to a Tx packet.
> + * Such additions can happen for some types of ULP packets.
> + */
> +static inline unsigned int ulp_extra_len(const struct sk_buff *skb)
> +{
> +	return cxgb3_ulp_extra_len[skb_ulp_mode(skb) & 3];
> +}
> +
> +/*
> + * Size of WRs in bytes.  Note that we assume all devices we are handling have
> + * the same WR size.
> + */
> +static unsigned int wrlen __read_mostly;
> +
> +/*
> + * The number of WRs needed for an skb depends on the number of page fragments
> + * in the skb and whether it has any payload in its main body.  This maps the
> + * length of the gather list represented by an skb into the # of necessary WRs.
> + */
> +static unsigned int skb_wrs[MAX_SKB_FRAGS + 2] __read_mostly;
> +
> +static void s3_init_wr_tab(unsigned int wr_len)
> +{
> +	int i;
> +
> +	if (skb_wrs[1])		/* already initialized */
> +		return;
> +
> +	for (i = 1; i < ARRAY_SIZE(skb_wrs); i++) {
> +		int sgl_len = (3 * i) / 2 + (i & 1);
> +
> +		sgl_len += 3;
> +		skb_wrs[i] = (sgl_len <= wr_len
> +			      ? 1 : 1 + (sgl_len - 2) / (wr_len - 1));
> +	}
> +
> +	wrlen = wr_len * 8;
> +}
> +
> +/*
> + * Initialization/cleanup cxgb3 API operations.
> + */
> +/*
> + * large memory chunk allocation/release
> + */
> +void *cxgb3i_alloc_big_mem(unsigned int size)
> +{
> +	void *p = kmalloc(size, GFP_KERNEL);
> +	if (!p)
> +		p = vmalloc(size);
> +	if (p)
> +		memset(p, 0, size);
> +	return p;
> +}
> +
> +void cxgb3i_free_big_mem(void *addr)
> +{
> +	unsigned long p = (unsigned long)addr;
> +	if (p >= VMALLOC_START && p < VMALLOC_END)
> +		vfree(addr);
> +	else
> +		kfree(addr);
> +}
> +
> +void cxgb3i_sdev_cleanup(cxgb3_cpl_handler_func *cpl_handlers)
> +{
> +	memset(cpl_handlers, 0, NUM_CPL_CMDS*(sizeof(*cpl_handlers)));
> +	if (sport_map)
> +		cxgb3i_free_big_mem(sport_map);
> +}
> +
> +int cxgb3i_sdev_init(cxgb3_cpl_handler_func *cpl_handlers)
> +{
> +	cpl_handlers[CPL_ACT_ESTABLISH] = do_act_establish;
> +	cpl_handlers[CPL_ACT_OPEN_RPL] = do_act_open_rpl;
> +	cpl_handlers[CPL_PEER_CLOSE] = do_peer_close;
> +	cpl_handlers[CPL_ABORT_REQ_RSS] = do_abort_req;
> +	cpl_handlers[CPL_ABORT_RPL_RSS] = do_abort_rpl;
> +	cpl_handlers[CPL_CLOSE_CON_RPL] = do_close_con_rpl;
> +	cpl_handlers[CPL_TX_DMA_ACK] = do_wr_ack;
> +	cpl_handlers[CPL_ISCSI_HDR] = do_iscsi_hdr;
> +
> +	sport_map = cxgb3i_alloc_big_mem((max_connect + 7)/8);
> +	if (!sport_map)
> +		return -ENOMEM;
> +	return 0;
> +}
> +
> +void cxgb3i_sdev_add(struct t3cdev *cdev, struct cxgb3_client *client)
> +{
> +	struct cxgb3i_sdev_data *cdata;
> +	struct adap_ports *ports;
> +	struct ofld_page_info rx_page_info;
> +	unsigned int wr_len;
> +	int i;
> +
> +	cdata = kzalloc(sizeof *cdata, GFP_KERNEL);
> +	if (!cdata)
> +		return;
> +	ports = kzalloc(sizeof *ports, GFP_KERNEL);
> +	if (!ports)
> +		goto free_ports;
> +	cdata->ports = ports;
> +
> +	if (cdev->ctl(cdev, GET_WR_LEN, &wr_len) < 0 ||
> +	    cdev->ctl(cdev, GET_PORTS, cdata->ports) < 0 ||
> +	    cdev->ctl(cdev, GET_RX_PAGE_INFO, &rx_page_info) < 0)
> +		goto free_ports;
> +
> +	s3_init_wr_tab(wr_len);
> +
> +	INIT_LIST_HEAD(&cdata->list);
> +	cdata->cdev = cdev;
> +	cdata->client = client;
> +	cdata->rx_page_size = rx_page_info.page_size;
> +	skb_queue_head_init(&cdata->deferq);
> +	INIT_WORK(&cdata->deferq_task, process_deferq);
> +
> +	for (i = 0; i < ports->nports; i++)
> +		NDEV2CDATA(ports->lldevs[i]) = cdata;
> +
> +	mutex_lock(&cxgb3_list_lock);
> +	list_add_tail(&cdata->list, &cxgb3_list);
> +	mutex_unlock(&cxgb3_list_lock);
> +
> +	return;
> +
> +free_ports:
> +	kfree(ports);
> +	kfree(cdata);
> +}
> +
> +void cxgb3i_sdev_remove(struct t3cdev *cdev)
> +{
> +	struct cxgb3i_sdev_data *cdata = CXGB3_SDEV_DATA(cdev);
> +	struct adap_ports *ports = cdata->ports;
> +	int i;
> +
> +	for (i = 0; i < ports->nports; i++)
> +		NDEV2CDATA(ports->lldevs[i]) = NULL;
> +
> +	mutex_lock(&cxgb3_list_lock);
> +	list_del(&cdata->list);
> +	mutex_unlock(&cxgb3_list_lock);
> +
> +	kfree(ports);
> +	kfree(cdata);
> +}
> +
> +/*
> + * Return TRUE if the specified net device is for a port on one of our
> + * registered adapters.
> + */
> +static int is_cxgb3_dev(struct net_device *dev)
> +{
> +	struct cxgb3i_sdev_data *cdata;
> +
> +	mutex_lock(&cxgb3_list_lock);
> +	list_for_each_entry(cdata, &cxgb3_list, list) {
> +		struct adap_ports *ports = cdata->ports;
> +		int i;
> +
> +		for (i = 0; i < ports->nports; i++)
> +			if (dev == ports->lldevs[i]) {
> +				mutex_unlock(&cxgb3_list_lock);
> +				return 1;
> +			}
> +	}
> +	mutex_unlock(&cxgb3_list_lock);
> +	return 0;
> +}
> +
> +/*
> + * Primary cxgb3 API operations.
> + * =============================
> + */
> +
> +static int s3_push_frames(struct s3_conn *, int);
> +static int s3_send_reset(struct s3_conn *, int, struct sk_buff *);
> +
> +struct s3_conn * cxgb3i_c3cn_create(void)
> +{
> +	struct s3_conn *c3cn;
> +
> +	c3cn = kzalloc(sizeof(*c3cn), GFP_KERNEL);
> +	if (c3cn == NULL)
> +		return NULL;
> +
> +	c3cn->flags = 0;
> +	spin_lock_init(&c3cn->lock);
> +	atomic_set(&c3cn->refcnt, 1);
> +	skb_queue_head_init(&c3cn->receive_queue);
> +	skb_queue_head_init(&c3cn->write_queue);
> +	setup_timer(&c3cn->retry_timer, NULL, (unsigned long)c3cn);
> +	rwlock_init(&c3cn->callback_lock);
> +
> +	return c3cn;
> +}
> +
> +static void mk_close_req(struct s3_conn *);
> +static inline void s3_purge_write_queue(struct s3_conn *);
> +
> +/*
> + * Release a connection's local port if the connection is bound.
> + */
> +static inline void release_port(struct s3_conn *c3cn)
> +{
> +	c3cn_conn_debug("c3cn 0x%p, port %u.\n", c3cn, c3cn->saddr.sin_port);
> +	if (c3cn->saddr.sin_port)
> +		c3cn_put_port(c3cn);
> +}
> +
> +static void c3cn_done(struct s3_conn *c3cn)
> +{
> +	c3cn_conn_debug("c3cn 0x%p.\n", c3cn);
> +
> +	c3cn_set_state(c3cn, C3CN_STATE_CLOSE);
> +	c3cn->shutdown = C3CN_SHUTDOWN_MASK;
> +
> +	cxgb3i_conn_closing(c3cn);
> +}
> +
> +void c3cn_close(struct s3_conn *c3cn)
> +{
> +	int data_lost, old_state;
> +
> +	c3cn_conn_debug("c3cn 0x%p, state 0x%x, flag 0x%lx.\n",
> +			 c3cn, c3cn->state, c3cn->flags);
> +
> +	dst_confirm(c3cn->dst_cache);
> +
> +	spin_lock_bh(&c3cn->lock);
> +	c3cn->shutdown |= C3CN_SHUTDOWN_MASK;
> +
> +	/*
> +	 * We need to flush the receive buffs.  We do this only on the
> +	 * descriptor close, not protocol-sourced closes, because the
> +	 * reader process may not have drained the data yet!  Make a note
> +	 * of whether any received data will be lost so we can decide whether
> +	 * to FIN or RST.
> +	 */
> +	data_lost = skb_queue_len(&c3cn->receive_queue);
> +	__skb_queue_purge(&c3cn->receive_queue);
> +
> +	if (c3cn->state == C3CN_STATE_CLOSE)	/* Nothing if we are already closed */
> +		;
> +	else if (data_lost || c3cn->state == C3CN_STATE_SYN_SENT) {
> +		/* Unread data was tossed, zap the connection. */
> +		s3_send_reset(c3cn, CPL_ABORT_SEND_RST, NULL);
> +		release_port(c3cn);
> +		goto unlock;
> +	} else if (c3cn->state == C3CN_STATE_ESTABLISHED) {
> +		c3cn_set_state(c3cn, C3CN_STATE_CLOSING);
> +		mk_close_req(c3cn);
> +	}
> +
> +unlock:
> +	old_state = c3cn->state;
> +	c3cn_hold(c3cn); /* must last past the potential destroy() */
> +
> +	spin_unlock_bh(&c3cn->lock); /* Final release in connection's lifetime. */
> +
> +	/*
> +	 * There are no more user references at this point.  Grab the
> +	 * connection lock and finish the close.
> +	 */
> +	local_bh_disable();
> +	spin_lock(&c3cn->lock);
> +
> +	/*
> +	 * Because the connection was orphaned before the spin_lock()
> +	 * either the backlog or a BH may have already destroyed it.
> +	 * Bail out if so.
> +	 */
> +	if (old_state != C3CN_STATE_CLOSE && c3cn->state == C3CN_STATE_CLOSE)
> +		goto out;
> +
> +	if (c3cn->state == C3CN_STATE_CLOSE)
> +		c3cn_destroy(c3cn);
> +
> +out:
> +	spin_unlock(&c3cn->lock);
> +	local_bh_enable();
> +	c3cn_put(c3cn);
> +}
> +
> +/*
> + * Destroy connection.  Purge the write queue and drop a reference on the
> + * connection.
> + */
> +static int c3cn_destroy(struct s3_conn *c3cn)
> +{
> +	c3cn_conn_debug("c3cn 0x%p.\n", c3cn);
> +
> +	s3_purge_write_queue(c3cn);
> +	c3cn_put(c3cn);
> +	return 0;
> +}
> +
> +/*
> + * Local utility routines used to implement primary cxgb3 API operations.
> + * ======================================================================
> + */
> +
> +static int s3_connect(struct s3_conn *);
> +static u32 s3_send_rx_credits(struct s3_conn *, u32, u32, int);
> +static void mk_act_open_req(struct s3_conn *, struct sk_buff *,
> +			    unsigned int, const struct l2t_entry *);
> +static void skb_entail(struct s3_conn *, struct sk_buff *, int);
> +
> +static inline void reset_wr_list(struct s3_conn *c3cn)
> +{
> +	c3cn->wr_pending_head = NULL;
> +}
> +
> +/*
> + * Add a WR to a connections's list of pending WRs.  This is a singly-linked
> + * list of sk_buffs operating as a FIFO.  The head is kept in wr_pending_head
> + * and the tail in wr_pending_tail.
> + */
> +static inline void enqueue_wr(struct s3_conn *c3cn,
> +			      struct sk_buff *skb)
> +{
> +	skb->sp = NULL;
> +
> +	/*
> +	 * We want to take an extra reference since both us and the driver
> +	 * need to free the packet before it's really freed.  We know there's
> +	 * just one user currently so we use atomic_set rather than skb_get
> +	 * to avoid the atomic op.
> +	 */
> +	atomic_set(&skb->users, 2);
> +
> +	if (!c3cn->wr_pending_head)
> +		c3cn->wr_pending_head = skb;
> +	else
> +		c3cn->wr_pending_tail->sp = (void *)skb;
> +	c3cn->wr_pending_tail = skb;
> +}
> +
> +/*
> + * The next two functions calculate the option 0 value for a connection.
> + */
> +static inline int compute_wscale(int win)
> +{
> +	int wscale = 0; 
> +	while (wscale < 14 && (65535<<wscale) < win)
> +		wscale++;
> +	return wscale;
> +}
> +
> +static inline unsigned int calc_opt0h(struct s3_conn *c3cn)
> +{
> +	int wscale = compute_wscale(rcv_win);
> +	return (V_KEEP_ALIVE(1) |
> +		F_TCAM_BYPASS |
> +		V_WND_SCALE(wscale) |
> +		V_MSS_IDX(c3cn->mss_idx));
> +}
> +
> +static inline unsigned int calc_opt0l(struct s3_conn *c3cn)
> +{
> +	return (V_ULP_MODE(ULP_MODE_ISCSI) |
> +		V_RCV_BUFSIZ(rcv_win>>10));	
> +}
> +
> +static inline void make_tx_data_wr(struct s3_conn *c3cn,
> +				   struct sk_buff *skb, int len)
> +{
> +	struct tx_data_wr *req;
> +
> +	skb_reset_transport_header(skb);
> +	req = (struct tx_data_wr *)__skb_push(skb, sizeof(*req));
> +	req->wr_hi = htonl(V_WR_OP(FW_WROPCODE_OFLD_TX_DATA));
> +	req->wr_lo = htonl(V_WR_TID(c3cn->tid));
> +	req->sndseq = htonl(c3cn->snd_nxt);
> +	/* len includes the length of any HW ULP additions */
> +	req->len = htonl(len);
> +	req->param = htonl(V_TX_PORT(c3cn->l2t->smt_idx));
> +	/* V_TX_ULP_SUBMODE sets both the mode and submode */
> +	req->flags = htonl(V_TX_ULP_SUBMODE(skb_ulp_mode(skb)) |
> +			   V_TX_SHOVE((skb_peek(&c3cn->write_queue) ? 0 : 1)));
> +
> +	if (!c3cn_flag(c3cn, C3CN_TX_DATA_SENT)) {
> +
> +		req->flags |= htonl(V_TX_ACK_PAGES(2) | F_TX_INIT |
> +				    V_TX_CPU_IDX(c3cn->qset));
> +
> +		/* Sendbuffer is in units of 32KB.
> +		 */
> +		req->param |= htonl(V_TX_SNDBUF(snd_win >> 15));
> +		c3cn_set_flag(c3cn, C3CN_TX_DATA_SENT);
> +	}
> +}
> +
> +static struct rtable *find_route(__be32 saddr, __be32 daddr,
> +				 __be16 sport, __be16 dport)
> +{
> +	struct rtable *rt;
> +	struct flowi fl = {
> +		.oif = 0,
> +		.nl_u = {
> +			 .ip4_u = {
> +				   .daddr = daddr,
> +				   .saddr = saddr,
> +				   .tos = 0 } },
> +		.proto = IPPROTO_TCP,
> +		.uli_u = {
> +			  .ports = {
> +				    .sport = sport,
> +				    .dport = dport } } };
> +
> +	if (ip_route_output_flow(&init_net, &rt, &fl, NULL, 0))
> +		return NULL;
> +	return rt;
> +}
> +
> +int cxgb3i_c3cn_connect(struct s3_conn *c3cn, struct sockaddr_in *usin)
> +{
> +	struct rtable *rt;
> +	int err;
> +
> +	if (usin->sin_family != AF_INET)
> +		return -EAFNOSUPPORT;
> +
> +	/* get a source port if one hasn't been provided */
> +	err = c3cn_get_port(c3cn);
> +	if (err)
> +		return err;
> +	c3cn_conn_debug("c3cn 0x%p get port %u.\n", c3cn, ntohs(c3cn->saddr.sin_port));
> +
> +	c3cn->daddr.sin_port = usin->sin_port;
> +	c3cn->daddr.sin_addr.s_addr = usin->sin_addr.s_addr;
> +
> +	rt = find_route(c3cn->saddr.sin_addr.s_addr,
> +			c3cn->daddr.sin_addr.s_addr,
> +			c3cn->saddr.sin_port,
> +			c3cn->daddr.sin_port);
> +	if (rt == NULL) {
> +		c3cn_conn_debug("NO route to 0x%x, port %u.\n", 
> +				c3cn->daddr.sin_addr.s_addr,
> +				ntohs(c3cn->daddr.sin_port));
> +		return -ENETUNREACH;
> +	}
> +
> +	if (rt->rt_flags & (RTCF_MULTICAST | RTCF_BROADCAST)) {
> +		c3cn_conn_debug("multi-cast route to 0x%x, port %u.\n", 
> +				c3cn->daddr.sin_addr.s_addr,
> +				ntohs(c3cn->daddr.sin_port));
> +		ip_rt_put(rt);
> +		return -ENETUNREACH;
> +	}
> +
> +	if (!c3cn->saddr.sin_addr.s_addr)
> +		c3cn->saddr.sin_addr.s_addr = rt->rt_src;
> +
> +	c3cn_conn_debug("c3cn 0x%p -> SYN_SENT.\n", c3cn);
> +	c3cn_set_state(c3cn, C3CN_STATE_SYN_SENT);
> +
> +	/* now commit destination to connection */
> +	c3cn->dst_cache = &rt->u.dst;
> +
> +	if (s3_connect(c3cn))
> +		return 0;
> +	/*
> +	 * If we get here, we don't have an offload connection so simply
> +	 * return a failure.
> +	 */
> +	err = -ENOTSUPP;
> +
> +	/*
> +	 * This trashes the connection and releases the local port,
> +	 * if necessary.
> +	 */
> +	c3cn_conn_debug("c3cn 0x%p -> CLOSE.\n", c3cn);
> +	c3cn_set_state(c3cn, C3CN_STATE_CLOSE);
> +	ip_rt_put(rt);
> +	c3cn_put_port(c3cn);
> +	c3cn->daddr.sin_port = 0;
> +	return err;
> +}
> +
> +/*
> + * Set of states for which we should return RX credits.
> + */
> +#define CREDIT_RETURN_STATE (C3CN_STATE_ESTABLISHED)
> +
> +/*
> + * Called after some received data has been read.  It returns RX credits
> + * to the HW for the amount of data processed.
> + */
> +void cxgb3i_c3cn_rx_credits(struct s3_conn *c3cn, int copied)
> +{
> +	struct t3cdev *cdev;
> +	int must_send;
> +	u32 credits, dack = 0;
> +
> +	if (!c3cn_in_state(c3cn, CREDIT_RETURN_STATE))
> +		return;
> +
> +	credits = c3cn->copied_seq - c3cn->rcv_wup;
> +	if (unlikely(!credits))
> +		return;
> +
> +	cdev = c3cn->cdev;
> +
> +	if (unlikely(rx_credit_thres == 0))
> +		return;
> +
> +	dack = F_RX_DACK_CHANGE | V_RX_DACK_MODE(1);
> +
> +	/*
> +	 * For coalescing to work effectively ensure the receive window has
> +	 * at least 16KB left.
> +	 */
> +	must_send = credits + 16384 >= rcv_win;
> +
> +	if (must_send || credits >= rx_credit_thres)
> +		c3cn->rcv_wup += s3_send_rx_credits(c3cn, credits, dack, must_send);
> +}
> +
> +/*
> + * Generic ARP failure handler that discards the buffer.
> + */
> +static void arp_failure_discard(struct t3cdev *cdev, struct sk_buff *skb)
> +{
> +	kfree_skb(skb);
> +}
> +
> +/*
> + * Prepends TX_DATA_WR or CPL_CLOSE_CON_REQ headers to buffers waiting in a
> + * connection's send queue and sends them on to T3.  Must be called with the
> + * connection's lock held.  Returns the amount of send buffer space that was
> + * freed as a result of sending queued data to T3.
> + */
> +static int s3_push_frames(struct s3_conn *c3cn, int req_completion)
> +{
> +	int total_size = 0;
> +	struct sk_buff *skb;
> +	struct t3cdev *cdev;
> +	struct cxgb3i_sdev_data *cdata;
> +
> +	if (unlikely(c3cn_in_state(c3cn, C3CN_STATE_SYN_SENT | C3CN_STATE_CLOSE)))
> +		return 0;
> +
> +	/*
> +	 * We shouldn't really be called at all after an abort but check just
> +	 * in case.
> +	 */
> +	if (unlikely(c3cn_flag(c3cn, C3CN_ABORT_SHUTDOWN)))
> +		return 0;
> +
> +	cdev = c3cn->cdev;
> +	cdata = CXGB3_SDEV_DATA(cdev);
> +
> +	while (c3cn->wr_avail
> +	       && (skb = skb_peek(&c3cn->write_queue)) != NULL
> +	       && !c3cn_flag(c3cn, C3CN_TX_WAIT_IDLE)) {
> +
> +		int len = skb->len;	/* length before skb_push */
> +		int frags = skb_shinfo(skb)->nr_frags + (len != skb->data_len);
> +		int wrs_needed = skb_wrs[frags];
> +
> +		if (wrs_needed > 1 && len + sizeof(struct tx_data_wr) <= wrlen)
> +			wrs_needed = 1;
> +
> +		WARN_ON(frags >= ARRAY_SIZE(skb_wrs) || wrs_needed < 1);
> +		if (c3cn->wr_avail < wrs_needed)
> +			break;
> +
> +		__skb_unlink(skb, &c3cn->write_queue);
> +		skb->priority = CPL_PRIORITY_DATA;
> +		skb->csum = wrs_needed;	/* remember this until the WR_ACK */
> +		c3cn->wr_avail -= wrs_needed;
> +		c3cn->wr_unacked += wrs_needed;
> +		enqueue_wr(c3cn, skb);
> +
> +		if (likely(CXGB3_SKB_CB(skb)->flags & C3CB_FLAG_NEED_HDR)) {
> +			len += ulp_extra_len(skb);
> +			make_tx_data_wr(c3cn, skb, len);
> +			c3cn->snd_nxt += len;
> +			if ((req_completion
> +			     && c3cn->wr_unacked == wrs_needed)
> +			    || (CXGB3_SKB_CB(skb)->flags & C3CB_FLAG_COMPL)
> +			    || c3cn->wr_unacked >= c3cn->wr_max / 2) {
> +				struct work_request_hdr *wr = cplhdr(skb);
> +
> +				wr->wr_hi |= htonl(F_WR_COMPL);
> +				c3cn->wr_unacked = 0;
> +			}
> +			CXGB3_SKB_CB(skb)->flags &= ~C3CB_FLAG_NEED_HDR;
> +		} else if (skb->data[0] == FW_WROPCODE_OFLD_CLOSE_CON)
> +			c3cn_set_flag(c3cn, C3CN_CLOSE_CON_REQUESTED);
> +
> +		total_size += skb->truesize;
> +		set_arp_failure_handler(skb, arp_failure_discard);
> +		l2t_send(cdev, skb, c3cn->l2t);
> +	}
> +	return total_size;
> +}
> +
> +/*
> + * Handle an ARP failure for a CPL_ABORT_REQ.  Change it into a no RST variant
> + * and send it along.
> + */
> +static void abort_arp_failure(struct t3cdev *cdev, struct sk_buff *skb)
> +{
> +	struct cpl_abort_req *req = cplhdr(skb);
> +
> +	req->cmd = CPL_ABORT_NO_RST;
> +	cxgb3_ofld_send(cdev, skb);
> +}
> +
> +/*
> + * Send an ABORT_REQ message.  Cannot fail.  This routine makes sure we do
> + * not send multiple ABORT_REQs for the same connection and also that we do
> + * not try to send a message after the connection has closed.  Returns 1 if
> + * an ABORT_REQ wasn't generated after all, 0 otherwise.
> + */
> +static int s3_send_reset(struct s3_conn *c3cn, int mode,
> +			 struct sk_buff *skb)
> +{
> +	struct cpl_abort_req *req;
> +	unsigned int tid = c3cn->tid;
> +
> +	if (unlikely(c3cn_flag(c3cn, C3CN_ABORT_SHUTDOWN) || !c3cn->cdev)) {
> +		if (skb)
> +			__kfree_skb(skb);
> +		return 1;
> +	}
> +
> +	c3cn_conn_debug("c3cn 0x%p, mode %d.\n", c3cn, mode);
> +
> +	c3cn_set_flag(c3cn, C3CN_ABORT_RPL_PENDING);
> +	c3cn_set_flag(c3cn, C3CN_ABORT_SHUTDOWN);
> +
> +	/* Purge the send queue so we don't send anything after an abort. */
> +	s3_purge_write_queue(c3cn);
> +
> +	if (!skb)
> +		skb = alloc_skb(sizeof(*req), GFP_KERNEL | __GFP_NOFAIL);
> +	skb->priority = CPL_PRIORITY_DATA;
> +	set_arp_failure_handler(skb, abort_arp_failure);
> +
> +	req = (struct cpl_abort_req *)skb_put(skb, sizeof(*req));
> +	req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_OFLD_HOST_ABORT_CON_REQ));
> +	req->wr.wr_lo = htonl(V_WR_TID(tid));
> +	OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_ABORT_REQ, tid));
> +	req->rsvd0 = htonl(c3cn->snd_nxt);
> +	req->rsvd1 = !c3cn_flag(c3cn, C3CN_TX_DATA_SENT);
> +	req->cmd = mode;
> +
> +	l2t_send(c3cn->cdev, skb, c3cn->l2t);
> +	return 0;
> +}
> +
> +/*
> + * Add a list of skbs to a connection send queue.  This interface is intended
> + * for use by in-kernel ULPs.  The skbs must comply with the max size limit of
> + * the device and have a headroom of at least TX_HEADER_LEN bytes.
> + */
> +int cxgb3i_c3cn_send_pdus(struct s3_conn *c3cn, struct sk_buff *skb, int flags)
> +{
> +	struct sk_buff *next;
> +	int err, copied = 0;
> +
> +	spin_lock_bh(&c3cn->lock);
> +
> +	if (!c3cn_in_state(c3cn, C3CN_STATE_ESTABLISHED)) {
> +		err = -EAGAIN;
> +		goto out_err;
> +	}
> +
> +	err = -EPIPE;
> +	if (c3cn->err || (c3cn->shutdown & C3CN_SEND_SHUTDOWN))
> +		goto out_err;
> +
> +	while (skb) {
> +		if (unlikely(skb_headroom(skb) < TX_HEADER_LEN)) {
> +			c3cn_tx_debug("c3cn 0x%p, skb head.\n", c3cn);
> +			err = -EINVAL;
> +			goto out_err;
> +		}
> +
> +		next = skb->next;
> +		skb->next = NULL;
> +		skb_entail(c3cn, skb, C3CB_FLAG_NO_APPEND | C3CB_FLAG_NEED_HDR);
> +		copied += skb->len;
> +		c3cn->write_seq += skb->len + ulp_extra_len(skb);
> +		skb = next;
> +	}
> +done:
> +	if (likely(skb_queue_len(&c3cn->write_queue)))
> +		s3_push_frames(c3cn, 1);
> +	spin_unlock_bh(&c3cn->lock);
> +	return copied;
> +
> +out_err:
> +	if (copied == 0 && err == -EPIPE)
> +		copied = c3cn->err ? c3cn->err : -EPIPE;
> +	goto done;
> +}
> +
> +/*
> + * Low-level utility routines for primary API functions.
> + * =====================================================
> + */
> +/* routines to implement CPL message processing */
> +static void c3cn_act_establish(struct s3_conn *, struct sk_buff *);
> +static void active_open_failed(struct s3_conn *, struct sk_buff *);
> +static void wr_ack(struct s3_conn *, struct sk_buff *);
> +static void do_peer_fin(struct s3_conn *, struct sk_buff *);
> +static void process_abort_req(struct s3_conn *, struct sk_buff *);
> +static void process_abort_rpl(struct s3_conn *, struct sk_buff *);
> +static void process_close_con_rpl(struct s3_conn *, struct sk_buff *);
> +static void process_rx_iscsi_hdr(struct s3_conn *, struct sk_buff *);
> +
> +static struct sk_buff *__get_cpl_reply_skb(struct sk_buff *, size_t, gfp_t);
> +
> +static int act_open(struct s3_conn *, struct net_device *);
> +static void fail_act_open(struct s3_conn *, int);
> +static void init_offload_conn(struct s3_conn *, struct t3cdev *,
> +			      struct dst_entry *);
> +
> +/*
> + * Insert a connection into the TID table and take an extra reference.
> + */
> +static inline void c3cn_insert_tid(struct cxgb3i_sdev_data *cdata,
> +				   struct s3_conn *c3cn,
> +				   unsigned int tid)
> +{
> +	c3cn_hold(c3cn);
> +	cxgb3_insert_tid(cdata->cdev, cdata->client, c3cn, tid);
> +}
> +
> +static inline void free_atid(struct t3cdev *cdev, unsigned int tid)
> +{
> +	struct s3_conn *c3cn = cxgb3_free_atid(cdev, tid);
> +	if (c3cn)
> +		c3cn_put(c3cn);
> +}
> +
> +/*
> + * This function is intended for allocations of small control messages.
> + * Such messages go as immediate data and usually the pakets are freed
> + * immediately.  We maintain a cache of one small sk_buff and use it whenever
> + * it is available (has a user count of 1).  Otherwise we get a fresh buffer.
> + */
> +#define CTRL_SKB_LEN 120
> +
> +static struct sk_buff *alloc_ctrl_skb(const struct s3_conn *c3cn,
> +				      int len)
> +{
> +	struct sk_buff *skb = c3cn->ctrl_skb_cache;
> +
> +	if (likely(skb && !skb_shared(skb) && !skb_cloned(skb))) {
> +		__skb_trim(skb, 0);
> +		atomic_set(&skb->users, 2);
> +	} else if (likely(!in_atomic()))
> +		skb = alloc_skb(len, GFP_ATOMIC | __GFP_NOFAIL);
> +	else
> +		skb = alloc_skb(len, GFP_ATOMIC);
> +	return skb;
> +}
> +
> +/**
> + * cxgb3_egress_dev - return the cxgb3 egress device or NULL if the egress
> + *     device isn't one of our ports.
> + *
> + * @root_dev: the root device anchoring the search
> + * @c3cn: the connection used to determine egress port in bonding mode
> + * @context: in bonding mode, indicates a connection set up or failover
> + *
> + * Given a root network device it returns the physical egress device that is a
> + * descendant of the root device.  The root device may be either a physical
> + * device, in which case it is the device returned, or a virtual device, such
> + * as a VLAN or bonding device.  In case of a bonding device the search
> + * considers the decisions of the bonding device given its mode to locate the
> + * correct egress device.
> + */
> +static struct net_device *cxgb3_egress_dev(struct net_device *root_dev,
> +					   struct s3_conn *c3cn,
> +					   int context)
> +{
> +	while (root_dev) {
> +		if (root_dev->priv_flags & IFF_802_1Q_VLAN)
> +			root_dev = vlan_dev_info(root_dev)->real_dev;
> +		else if (is_cxgb3_dev(root_dev))
> +			return root_dev;
> +		else
> +			return NULL;
> +	}
> +	return NULL;
> +}
> +
> +/*
> + * Return TRUE if we're able to establish an offload connection; otherwise
> + * return FALSE.
> + */
> +static int s3_connect(struct s3_conn *c3cn)
> +{
> +	struct net_device *dev = cxgb3_egress_dev(c3cn->dst_cache->dev,
> +						  c3cn, 0);
> +	if (dev == NULL) {
> +		c3cn_conn_debug("c3cn 0x%p, egress dev NULL.\n", c3cn);
> +		return 0;
> +	}
> +	return act_open(c3cn, dev) == 0;
> +}
> +
> +/*
> + * Handle an ARP failure for an active open.
> + */
> +static void act_open_req_arp_failure(struct t3cdev *dev, struct sk_buff *skb)
> +{
> +	struct s3_conn *c3cn = (struct s3_conn *)skb->sk;
> +
> +	c3cn_hold(c3cn);
> +	spin_lock(&c3cn->lock);
> +	if (c3cn->state == C3CN_STATE_SYN_SENT) {
> +		fail_act_open(c3cn, EHOSTUNREACH);
> +		__kfree_skb(skb);
> +	}
> +	spin_unlock(&c3cn->lock);
> +	c3cn_put(c3cn);
> +}
> +
> +/*
> + * Send an active open request.
> + */
> +static int act_open(struct s3_conn *c3cn, struct net_device *dev)
> +{
> +	struct cxgb3i_sdev_data *cdata = NDEV2CDATA(dev);
> +	struct t3cdev *cdev = cdata->cdev;
> +	struct dst_entry *dst = c3cn->dst_cache;
> +	struct sk_buff *skb;
> +
> +	c3cn_conn_debug("c3cn 0x%p.\n", c3cn);
> +	/*
> +	 * Initialize connection data.  Note that the flags and ULP mode are
> +	 * initialized higher up ...
> +	 */
> +	c3cn->dev = dev;
> +	c3cn->cdev = cdev;
> +	c3cn->tid = cxgb3_alloc_atid(cdev, cdata->client, c3cn);
> +	if (c3cn->tid < 0)
> +		goto out_err;
> +	c3cn->qset = 0;
> +	c3cn->l2t = t3_l2t_get(cdev, dst->neighbour, dev);
> +	if (!c3cn->l2t)
> +		goto free_tid;
> +
> +	skb = alloc_skb(sizeof(struct cpl_act_open_req),
> +			GFP_KERNEL | __GFP_NOFAIL);
> +	skb->sk = (struct sock *)c3cn;
> +	set_arp_failure_handler(skb, act_open_req_arp_failure);
> +
> +	c3cn_hold(c3cn);
> +
> +	init_offload_conn(c3cn, cdev, dst);
> +	c3cn->err = 0;
> +	c3cn_reset_flag(c3cn, C3CN_DONE);
> +
> +	mk_act_open_req(c3cn, skb, c3cn->tid, c3cn->l2t);
> +	l2t_send(cdev, skb, c3cn->l2t);
> +	return 0;
> +
> +free_tid:
> +	free_atid(cdev, c3cn->tid);
> +	c3cn->tid = 0;
> +out_err:
> +	return -1;
> +}
> +
> +/*
> + * Close a connection by sending a CPL_CLOSE_CON_REQ message.  Cannot fail
> + * under any circumstances.  We take the easy way out and always queue the
> + * message to the write_queue.  We can optimize the case where the queue is
> + * already empty though the optimization is probably not worth it.
> + */
> +static void mk_close_req(struct s3_conn *c3cn)
> +{
> +	struct sk_buff *skb;
> +	struct cpl_close_con_req *req;
> +	unsigned int tid = c3cn->tid;
> +
> +	c3cn_conn_debug("c3cn 0x%p.\n", c3cn);
> +
> +	skb = alloc_skb(sizeof(struct cpl_close_con_req),
> +			GFP_KERNEL | __GFP_NOFAIL);
> +	req = (struct cpl_close_con_req *)__skb_put(skb, sizeof(*req));
> +	req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_OFLD_CLOSE_CON));
> +	req->wr.wr_lo = htonl(V_WR_TID(tid));
> +	OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_CLOSE_CON_REQ, tid));
> +	req->rsvd = htonl(c3cn->write_seq);
> +
> +	skb_entail(c3cn, skb, C3CB_FLAG_NO_APPEND);
> +	if (c3cn->state != C3CN_STATE_SYN_SENT)
> +		s3_push_frames(c3cn, 1);
> +}
> +
> +static void skb_entail(struct s3_conn *c3cn, struct sk_buff *skb,
> +		       int flags)
> +{
> +	CXGB3_SKB_CB(skb)->seq = c3cn->write_seq;
> +	CXGB3_SKB_CB(skb)->flags = flags;
> +	__skb_queue_tail(&c3cn->write_queue, skb);
> +}
> +
> +/*
> + * Send RX credits through an RX_DATA_ACK CPL message.  If nofail is 0 we are
> + * permitted to return without sending the message in case we cannot allocate
> + * an sk_buff.  Returns the number of credits sent.
> + */
> +static u32 s3_send_rx_credits(struct s3_conn *c3cn, u32 credits, u32 dack,
> +			      int nofail)
> +{
> +	struct sk_buff *skb;
> +	struct cpl_rx_data_ack *req;
> +
> +	skb = (nofail ? alloc_ctrl_skb(c3cn, sizeof(*req))
> +	       : alloc_skb(sizeof(*req), GFP_ATOMIC));
> +	if (!skb)
> +		return 0;
> +
> +	req = (struct cpl_rx_data_ack *)__skb_put(skb, sizeof(*req));
> +	req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
> +	OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_RX_DATA_ACK, c3cn->tid));
> +	req->credit_dack = htonl(dack | V_RX_CREDITS(credits));
> +	skb->priority = CPL_PRIORITY_ACK;
> +	cxgb3_ofld_send(c3cn->cdev, skb);
> +	return credits;
> +}
> +
> +static void mk_act_open_req(struct s3_conn *c3cn, struct sk_buff *skb,
> +			    unsigned int atid, const struct l2t_entry *e)
> +{
> +	struct cpl_act_open_req *req;
> +
> +	c3cn_conn_debug("c3cn 0x%p, atid 0x%x.\n", c3cn, atid);
> +
> +	skb->priority = CPL_PRIORITY_SETUP;
> +	req = (struct cpl_act_open_req *)__skb_put(skb, sizeof(*req));
> +	req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
> +	OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_ACT_OPEN_REQ, atid));
> +	req->local_port = c3cn->saddr.sin_port;
> +	req->peer_port = c3cn->daddr.sin_port;
> +	req->local_ip = c3cn->saddr.sin_addr.s_addr;
> +	req->peer_ip = c3cn->daddr.sin_addr.s_addr;
> +	req->opt0h = htonl(calc_opt0h(c3cn) | V_L2T_IDX(e->idx) |
> +			   V_TX_CHANNEL(e->smt_idx));
> +	req->opt0l = htonl(calc_opt0l(c3cn));
> +	req->params = 0;
> +}
> +
> +static inline void s3_purge_write_queue(struct s3_conn *c3cn)
> +{
> +	struct sk_buff *skb;
> +
> +	while ((skb = __skb_dequeue(&c3cn->write_queue))) {
> +		__kfree_skb(skb);
> +	}
> +}
> +
> +/*
> + * Definitions and declarations for CPL handler functions.
> + * =======================================================
> + */
> +
> +/*
> + * Similar to process_cpl_msg() but takes an extra connection reference around
> + * the call to the handler.  Should be used if the handler may drop a
> + * connection reference.
> + */
> +static inline void process_cpl_msg_ref(void (*fn) (struct s3_conn *,
> +						   struct sk_buff *),
> +				       struct s3_conn *c3cn,
> +				       struct sk_buff *skb)
> +{
> +	c3cn_hold(c3cn);
> +	process_cpl_msg(fn, c3cn, skb);
> +	c3cn_put(c3cn);
> +}
> +
> +/*
> + * Return whether a failed active open has allocated a TID
> + */
> +static inline int act_open_has_tid(int status)
> +{
> +	return status != CPL_ERR_TCAM_FULL && status != CPL_ERR_CONN_EXIST &&
> +	    status != CPL_ERR_ARP_MISS;
> +}
> +
> +/*
> + * Returns true if a connection cannot accept new Rx data.
> + */
> +static inline int c3cn_no_receive(const struct s3_conn *c3cn)
> +{
> +	return (c3cn->shutdown & C3CN_RCV_SHUTDOWN);
> +}
> +
> +/*
> + * A helper function that aborts a connection and increments the given MIB
> + * counter.  The supplied skb is used to generate the ABORT_REQ message if
> + * possible.  Must be called with softirqs disabled.
> + */
> +static inline void abort_conn(struct s3_conn *c3cn,
> +			      struct sk_buff *skb)
> +{
> +	struct sk_buff *abort_skb;
> +
> +	abort_skb = __get_cpl_reply_skb(skb, sizeof(struct cpl_abort_req),
> +					GFP_ATOMIC);
> +	if (abort_skb)
> +		s3_send_reset(c3cn, CPL_ABORT_SEND_RST, abort_skb);
> +}
> +
> +/*
> + * Returns whether an ABORT_REQ_RSS message is a negative advice.
> + */
> +static inline int is_neg_adv_abort(unsigned int status)
> +{
> +	return (status == CPL_ERR_RTX_NEG_ADVICE
> +		|| status == CPL_ERR_PERSIST_NEG_ADVICE);
> +}
> +
> +/*
> + * CPL handler functions.
> + * ======================
> + */
> +
> +/*
> + * Process a CPL_ACT_ESTABLISH message.
> + */
> +static int do_act_establish(struct t3cdev *cdev, struct sk_buff *skb,
> +			    void *ctx)
> +{
> +	struct cpl_act_establish *req = cplhdr(skb);
> +	unsigned int tid = GET_TID(req);
> +	unsigned int atid = G_PASS_OPEN_TID(ntohl(req->tos_tid));
> +	struct s3_conn *c3cn = (struct s3_conn *)ctx;
> +	struct cxgb3i_sdev_data *cdata = CXGB3_SDEV_DATA(cdev);
> +
> +	c3cn_conn_debug("c3cn 0x%p, tid 0x%x.\n", c3cn, tid);
> +	/*
> +	 * It's OK if the TID is currently in use, the owning connection may
> +	 * have backlogged its last CPL message(s).  Just take it away.
> +	 */
> +	c3cn->tid = tid;
> +	c3cn_insert_tid(cdata, c3cn, tid);
> +	free_atid(cdev, atid);
> +
> +	c3cn->qset = G_QNUM(ntohl(skb->csum));
> +
> +	process_cpl_msg(c3cn_act_establish, c3cn, skb);
> +	return 0;
> +}
> +
> +/*
> + * Process an ACT_OPEN_RPL CPL message.
> + */
> +static int do_act_open_rpl(struct t3cdev *cdev, struct sk_buff *skb, void *ctx)
> +{
> +	struct s3_conn *c3cn = (struct s3_conn *)ctx;
> +	struct cpl_act_open_rpl *rpl = cplhdr(skb);
> +
> +	c3cn_conn_debug("c3cn 0x%p, status 0x%x.\n", c3cn, rpl->status);
> +
> +	if (act_open_has_tid(rpl->status))
> +		cxgb3_queue_tid_release(cdev, GET_TID(rpl));
> +
> +	process_cpl_msg_ref(active_open_failed, c3cn, skb);
> +	return 0;
> +}
> +
> +/*
> + * Handler RX_ISCSI_HDR CPL messages.
> + */
> +static int do_iscsi_hdr(struct t3cdev *t3dev, struct sk_buff *skb, void *ctx)
> +{
> +	struct s3_conn *c3cn = (struct s3_conn *)ctx;
> +	process_cpl_msg(process_rx_iscsi_hdr, c3cn, skb);
> +	return 0;
> +}
> +
> +/*
> + * Handler for TX_DATA_ACK CPL messages.
> + */
> +static int do_wr_ack(struct t3cdev *cdev, struct sk_buff *skb, void *ctx)
> +{
> +	struct s3_conn *c3cn = (struct s3_conn *)ctx;
> +
> +	process_cpl_msg(wr_ack, c3cn, skb);
> +	return 0;
> +}
> +
> +/*
> + * Handler for PEER_CLOSE CPL messages.
> + */
> +static int do_peer_close(struct t3cdev *cdev, struct sk_buff *skb, void *ctx)
> +{
> +	struct s3_conn *c3cn = (struct s3_conn *)ctx;
> +
> +	c3cn_conn_debug("c3cn 0x%p, state 0x%x, flag 0x%lx.\n",
> +			 c3cn, c3cn->state, c3cn->flags);
> +	process_cpl_msg_ref(do_peer_fin, c3cn, skb);
> +	return 0;
> +}
> +
> +/*
> + * Handle an ABORT_REQ_RSS CPL message.
> + */
> +static int do_abort_req(struct t3cdev *cdev, struct sk_buff *skb, void *ctx)
> +{
> +	const struct cpl_abort_req_rss *req = cplhdr(skb);
> +	struct s3_conn *c3cn = (struct s3_conn *)ctx;
> +
> +	if (is_neg_adv_abort(req->status)) {
> +		__kfree_skb(skb);
> +		return 0;
> +	}
> +
> +	c3cn_conn_debug("c3cn 0x%p, state 0x%x, flag 0x%lx.\n",
> +			 c3cn, c3cn->state, c3cn->flags);
> +
> +	process_cpl_msg_ref(process_abort_req, c3cn, skb);
> +	return 0;
> +}
> +
> +/*
> + * Handle an ABORT_RPL_RSS CPL message.
> + */
> +static int do_abort_rpl(struct t3cdev *cdev, struct sk_buff *skb, void *ctx)
> +{
> +	struct cpl_abort_rpl_rss *rpl = cplhdr(skb);
> +	struct s3_conn *c3cn;
> +
> +	/*
> +	 * Ignore replies to post-close aborts indicating that the abort was
> +	 * requested too late.  These connections are terminated when we get
> +	 * PEER_CLOSE or CLOSE_CON_RPL and by the time the abort_rpl_rss
> +	 * arrives the TID is either no longer used or it has been recycled.
> +	 */
> +	if (rpl->status == CPL_ERR_ABORT_FAILED) {
> +discard:
> +		__kfree_skb(skb);
> +		return 0;
> +	}
> +
> +	c3cn = (struct s3_conn *)ctx;
> +	c3cn_conn_debug("c3cn 0x%p, state 0x%x, flag 0x%lx.\n",
> +			 c3cn, c3cn->state, c3cn->flags);
> +
> +	/*
> +	 * Sometimes we've already closed the connection, e.g., a post-close
> +	 * abort races with ABORT_REQ_RSS, the latter frees the connection
> +	 * expecting the ABORT_REQ will fail with CPL_ERR_ABORT_FAILED,
> +	 * but FW turns the ABORT_REQ into a regular one and so we get
> +	 * ABORT_RPL_RSS with status 0 and no connection.  Only on T3A.
> +	 */
> +	if (!c3cn)
> +		goto discard;
> +
> +	process_cpl_msg_ref(process_abort_rpl, c3cn, skb);
> +	return 0;
> +}
> +
> +/*
> + * Handler for CLOSE_CON_RPL CPL messages.
> + */
> +static int do_close_con_rpl(struct t3cdev *cdev, struct sk_buff *skb,
> +			    void *ctx)
> +{
> +	struct s3_conn *c3cn = (struct s3_conn *)ctx;
> +
> +	c3cn_conn_debug("c3cn 0x%p, state 0x%x, flag 0x%lx.\n",
> +			 c3cn, c3cn->state, c3cn->flags);
> +
> +	process_cpl_msg_ref(process_close_con_rpl, c3cn, skb);
> +	return 0;
> +}
> +
> +/*
> + * Definitions and declarations for CPL message processing.
> + * ========================================================
> + */
> +
> +static void make_established(struct s3_conn *, u32, unsigned int);
> +static void t3_release_offload_resources(struct s3_conn *);
> +static void act_open_retry_timer(unsigned long);
> +static void mk_act_open_req(struct s3_conn *, struct sk_buff *,
> +			    unsigned int, const struct l2t_entry *);
> +static int act_open_rpl_status_to_errno(int);
> +static void handle_excess_rx(struct s3_conn *, struct sk_buff *);
> +static int abort_status_to_errno(struct s3_conn *, int, int *);
> +static void send_abort_rpl(struct sk_buff *, struct t3cdev *, int);
> +static struct sk_buff *get_cpl_reply_skb(struct sk_buff *, size_t, gfp_t);
> +static void t3_defer_reply(struct sk_buff *, struct t3cdev *, defer_handler_t);
> +static void send_deferred_abort_rpl(struct t3cdev *, struct sk_buff *);
> +
> +/*
> + * Dequeue and return the first unacknowledged's WR on a connections's pending
> + * list.
> + */
> +static inline struct sk_buff *dequeue_wr(struct s3_conn *c3cn)
> +{
> +	struct sk_buff *skb = c3cn->wr_pending_head;
> +
> +	if (likely(skb)) {
> +		/* Don't bother clearing the tail */
> +		c3cn->wr_pending_head = (struct sk_buff *)skb->sp;
> +		skb->sp = NULL;
> +	}
> +	return skb;
> +}
> +
> +/*
> + * Return the first pending WR without removing it from the list.
> + */
> +static inline struct sk_buff *peek_wr(const struct s3_conn *c3cn)
> +{
> +	return c3cn->wr_pending_head;
> +}
> +
> +static inline void free_wr_skb(struct sk_buff *skb)
> +{
> +	kfree_skb(skb);
> +}
> +
> +static void purge_wr_queue(struct s3_conn *c3cn)
> +{
> +	struct sk_buff *skb;
> +	while ((skb = dequeue_wr(c3cn)) != NULL)
> +		free_wr_skb(skb);
> +}
> +
> +static inline void set_abort_rpl_wr(struct sk_buff *skb, unsigned int tid,
> +				    int cmd)
> +{
> +	struct cpl_abort_rpl *rpl = cplhdr(skb);
> +
> +	rpl->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_OFLD_HOST_ABORT_CON_RPL));
> +	rpl->wr.wr_lo = htonl(V_WR_TID(tid));
> +	OPCODE_TID(rpl) = htonl(MK_OPCODE_TID(CPL_ABORT_RPL, tid));
> +	rpl->cmd = cmd;
> +}
> +
> +/*
> + * CPL message processing ...
> + * ==========================
> + */
> +
> +/*
> + * Updates connection state from an active establish CPL message.  Runs with
> + * the connection lock held.
> + */
> +static void c3cn_act_establish(struct s3_conn *c3cn,
> +			       struct sk_buff *skb)
> +{
> +	struct cpl_act_establish *req = cplhdr(skb);
> +	u32 rcv_isn = ntohl(req->rcv_isn);	/* real RCV_ISN + 1 */
> +
> +	if (unlikely(c3cn->state != C3CN_STATE_SYN_SENT))
> +		printk(KERN_ERR "TID %u expected SYN_SENT, found %d\n",
> +		       c3cn->tid, c3cn->state);
> +
> +	c3cn->copied_seq = c3cn->rcv_wup = c3cn->rcv_nxt = rcv_isn;
> +	make_established(c3cn, ntohl(req->snd_isn), ntohs(req->tcp_opt));
> +
> +	__kfree_skb(skb);
> +
> +	if (s3_push_frames(c3cn, 1))
> +		cxgb3i_conn_tx_open(c3cn);
> +}
> +
> +/*
> + * Handle active open failures.
> + */
> +static void active_open_failed(struct s3_conn *c3cn,
> +			       struct sk_buff *skb)
> +{
> +	struct cpl_act_open_rpl *rpl = cplhdr(skb);
> +
> +	if (rpl->status == CPL_ERR_CONN_EXIST &&
> +	    c3cn->retry_timer.function != act_open_retry_timer) {
> +		c3cn->retry_timer.function = act_open_retry_timer;
> +		c3cn_reset_timer(c3cn, &c3cn->retry_timer,
> +				 jiffies + HZ / 2);
> +	} else
> +		fail_act_open(c3cn, act_open_rpl_status_to_errno(rpl->status));
> +	__kfree_skb(skb);
> +}
> +
> +/*
> + * Process received pdu for a connection.
> + */
> +static void process_rx_iscsi_hdr(struct s3_conn *c3cn,
> +				 struct sk_buff *skb)
> +{
> +	struct cpl_iscsi_hdr *hdr_cpl = cplhdr(skb);
> +	struct cpl_iscsi_hdr_norss data_cpl;
> +	struct cpl_rx_data_ddp_norss ddp_cpl;
> +	unsigned int hdr_len, data_len, status;
> +	unsigned int len;
> +	int err;
> +
> +	if (unlikely(c3cn_no_receive(c3cn))) {
> +		handle_excess_rx(c3cn, skb);
> +		return;
> +	}
> +
> +	CXGB3_SKB_CB(skb)->seq = ntohl(hdr_cpl->seq);
> +	CXGB3_SKB_CB(skb)->flags = 0;
> +
> +	skb_reset_transport_header(skb);
> +	__skb_pull(skb, sizeof(struct cpl_iscsi_hdr));
> +
> +	len = hdr_len = ntohs(hdr_cpl->len);
> +	/* msg coalesce is off or not enough data received */
> +	if (skb->len <= hdr_len) {
> +		printk(KERN_ERR "%s: TID %u, ISCSI_HDR, skb len %u < %u.\n",
> +		       c3cn->cdev->name, c3cn->tid, skb->len, hdr_len);
> +		goto abort_conn;
> +	}
> +
> +	err = skb_copy_bits(skb, skb->len - sizeof(ddp_cpl), &ddp_cpl,
> +			    sizeof(ddp_cpl));
> +	if (err < 0)
> +		goto abort_conn;
> +
> +	skb_ulp_mode(skb) = ULP2_FLAG_DATA_READY;
> +	skb_ulp_pdulen(skb) = ntohs(ddp_cpl.len);
> +	skb_ulp_ddigest(skb) = ntohl(ddp_cpl.ulp_crc);
> +	status = ntohl(ddp_cpl.ddp_status);
> +
> +	if (status & (1 << RX_DDP_STATUS_HCRC_SHIFT))
> +		skb_ulp_mode(skb) |= ULP2_FLAG_HCRC_ERROR;
> +	if (status & (1 << RX_DDP_STATUS_DCRC_SHIFT))
> +		skb_ulp_mode(skb) |= ULP2_FLAG_DCRC_ERROR;
> +	if (status & (1 << RX_DDP_STATUS_PAD_SHIFT))
> +		skb_ulp_mode(skb) |= ULP2_FLAG_PAD_ERROR;
> +
> +	if (skb->len > (hdr_len + sizeof(ddp_cpl))) {
> +		err = skb_copy_bits(skb, hdr_len, &data_cpl, sizeof(data_cpl));
> +		if (err < 0)
> +			goto abort_conn;
> +		data_len = ntohs(data_cpl.len);
> +		len += sizeof(data_cpl) + data_len;
> +	} else if (status & (1 << RX_DDP_STATUS_DDP_SHIFT))
> +		skb_ulp_mode(skb) |= ULP2_FLAG_DATA_DDPED;
> +
> +	c3cn->rcv_nxt = ntohl(ddp_cpl.seq) + skb_ulp_pdulen(skb);
> +	__pskb_trim(skb, len);
> +	__skb_queue_tail(&c3cn->receive_queue, skb);
> +	cxgb3i_conn_pdu_ready(c3cn);
> +
> +	return;
> +
> +abort_conn:
> +	s3_send_reset(c3cn, CPL_ABORT_SEND_RST, NULL);
> +	__kfree_skb(skb);
> +}
> +
> +/*
> + * Process an acknowledgment of WR completion.  Advance snd_una and send the
> + * next batch of work requests from the write queue.
> + */
> +static void wr_ack(struct s3_conn *c3cn, struct sk_buff *skb)
> +{
> +	struct cpl_wr_ack *hdr = cplhdr(skb);
> +	unsigned int credits = ntohs(hdr->credits);
> +	u32 snd_una = ntohl(hdr->snd_una);
> +
> +	c3cn->wr_avail += credits;
> +	if (c3cn->wr_unacked > c3cn->wr_max - c3cn->wr_avail)
> +		c3cn->wr_unacked = c3cn->wr_max - c3cn->wr_avail;
> +
> +	while (credits) {
> +		struct sk_buff *p = peek_wr(c3cn);
> +
> +		if (unlikely(!p)) {
> +			printk(KERN_ERR "%u WR_ACK credits for TID %u with "
> +			       "nothing pending, state %u\n",
> +			       credits, c3cn->tid, c3cn->state);
> +			break;
> +		}
> +		if (unlikely(credits < p->csum)) {
> +			p->csum -= credits;
> +			break;
> +		} else {
> +			dequeue_wr(c3cn);
> +			credits -= p->csum;
> +			free_wr_skb(p);
> +		}
> +	}
> +
> +	if (unlikely(before(snd_una, c3cn->snd_una))) {
> +		goto out_free;
> +	}
> +
> +	if (c3cn->snd_una != snd_una) {
> +		c3cn->snd_una = snd_una;
> +		dst_confirm(c3cn->dst_cache);
> +		if (c3cn->snd_una == c3cn->snd_nxt)
> +			c3cn_reset_flag(c3cn, C3CN_TX_WAIT_IDLE);
> +	}
> +
> +	if (skb_queue_len(&c3cn->write_queue) && s3_push_frames(c3cn, 0))
> +		cxgb3i_conn_tx_open(c3cn);
> +out_free:
> +	__kfree_skb(skb);
> +}
> +
> +/*
> + * Handle a peer FIN.
> + */
> +static void do_peer_fin(struct s3_conn *c3cn, struct sk_buff *skb)
> +{
> +	int keep = 0;
> +
> +	if (c3cn_flag(c3cn, C3CN_ABORT_RPL_PENDING))
> +		goto out;
> +
> +	c3cn->shutdown |= C3CN_RCV_SHUTDOWN;
> +	c3cn_set_flag(c3cn, C3CN_DONE);
> +
> +	switch (c3cn->state) {
> +	case C3CN_STATE_ESTABLISHED:
> +		break;
> +	case C3CN_STATE_CLOSING:
> +		t3_release_offload_resources(c3cn);
> +		c3cn_done(c3cn);
> +		break;
> +	default:
> +		printk(KERN_ERR
> +		       "%s: TID %u received PEER_CLOSE in bad state %d\n",
> +		       c3cn->cdev->name, c3cn->tid, c3cn->state);
> +	}
> +	
> +	cxgb3i_conn_closing(c3cn);
> +out:
> +	if (!keep)
> +		__kfree_skb(skb);
> +}
> +
> +/*
> + * Process abort requests.  If we are waiting for an ABORT_RPL we ignore this
> + * request except that we need to reply to it.
> + */
> +static void process_abort_req(struct s3_conn *c3cn,
> +			      struct sk_buff *skb)
> +{
> +	int rst_status = CPL_ABORT_NO_RST;
> +	const struct cpl_abort_req_rss *req = cplhdr(skb);
> +
> +	if (!c3cn_flag(c3cn, C3CN_ABORT_REQ_RCVD)) {
> +		c3cn_set_flag(c3cn, C3CN_ABORT_REQ_RCVD);
> +		c3cn_set_flag(c3cn, C3CN_ABORT_SHUTDOWN);
> +		__kfree_skb(skb);
> +		return;
> +	}
> +	c3cn_reset_flag(c3cn, C3CN_ABORT_REQ_RCVD);
> +
> +	/*
> +	 * Three cases to consider:
> +	 * a) We haven't sent an abort_req; close the connection.
> +	 * b) We have sent a post-close abort_req that will get to TP too late
> +	 *    and will generate a CPL_ERR_ABORT_FAILED reply.  The reply will
> +	 *    be ignored and the connection should be closed now.
> +	 * c) We have sent a regular abort_req that will get to TP too late.
> +	 *    That will generate an abort_rpl with status 0, wait for it.
> +	 */
> +	send_abort_rpl(skb, c3cn->cdev, rst_status);
> +
> +	if (!c3cn_flag(c3cn, C3CN_ABORT_RPL_PENDING)) {
> +		c3cn->err =
> +		    abort_status_to_errno(c3cn, req->status, &rst_status);
> +
> +		t3_release_offload_resources(c3cn);
> +		c3cn_done(c3cn);
> +	}
> +}
> +
> +/*
> + * Process abort replies.  We only process these messages if we anticipate
> + * them as the coordination between SW and HW in this area is somewhat lacking
> + * and sometimes we get ABORT_RPLs after we are done with the connection that
> + * originated the ABORT_REQ.
> + */
> +static void process_abort_rpl(struct s3_conn *c3cn,
> +			      struct sk_buff *skb)
> +{
> +	if (c3cn_flag(c3cn, C3CN_ABORT_RPL_PENDING)) {
> +		if (!c3cn_flag(c3cn, C3CN_ABORT_RPL_RCVD))
> +			c3cn_set_flag(c3cn, C3CN_ABORT_RPL_RCVD);
> +		else {
> +			c3cn_reset_flag(c3cn, C3CN_ABORT_RPL_RCVD);
> +			c3cn_reset_flag(c3cn, C3CN_ABORT_RPL_PENDING);
> +			BUG_ON(c3cn_flag(c3cn, C3CN_ABORT_REQ_RCVD));
> +			t3_release_offload_resources(c3cn);
> +			c3cn_done(c3cn);
> +		}
> +	}
> +	__kfree_skb(skb);
> +}
> +
> +/*
> + * Process a peer ACK to our FIN.
> + */
> +static void process_close_con_rpl(struct s3_conn *c3cn,
> +				  struct sk_buff *skb)
> +{
> +	struct cpl_close_con_rpl *rpl = cplhdr(skb);
> +
> +	c3cn->snd_una = ntohl(rpl->snd_nxt) - 1;	/* exclude FIN */
> +
> +	if (c3cn_flag(c3cn, C3CN_ABORT_RPL_PENDING))
> +		goto out;
> +
> +	if (c3cn->state == C3CN_STATE_CLOSING) {
> +		t3_release_offload_resources(c3cn);
> +		c3cn_done(c3cn);
> +	} else 
> +		printk(KERN_ERR
> +		       "%s: TID %u received CLOSE_CON_RPL in bad state %d\n",
> +		       c3cn->cdev->name, c3cn->tid, c3cn->state);
> +out:
> +	kfree_skb(skb);
> +}
> +
> +/*
> + * Random utility functions for CPL message processing ...
> + * =======================================================
> + */
> +
> +/**
> + *	find_best_mtu - find the entry in the MTU table closest to an MTU
> + *	@d: TOM state
> + *	@mtu: the target MTU
> + *
> + *	Returns the index of the value in the MTU table that is closest to but
> + *	does not exceed the target MTU.
> + */
> +static unsigned int find_best_mtu(const struct t3c_data *d, unsigned short mtu)
> +{
> +	int i = 0;
> +
> +	while (i < d->nmtus - 1 && d->mtus[i + 1] <= mtu)
> +		++i;
> +	return i;
> +}
> +
> +static unsigned int select_mss(struct s3_conn *c3cn, unsigned int pmtu)
> +{
> +	unsigned int idx;
> +	struct dst_entry *dst = c3cn->dst_cache;
> +	struct t3cdev *cdev = c3cn->cdev;
> +	const struct t3c_data *td = T3C_DATA(cdev);
> +	u16 advmss = dst_metric(dst, RTAX_ADVMSS);
> +
> +	if (advmss > pmtu - 40)
> +		advmss = pmtu - 40;
> +	if (advmss < td->mtus[0] - 40)
> +		advmss = td->mtus[0] - 40;
> +	idx = find_best_mtu(td, advmss + 40);
> +	return idx;
> +}
> +
> +static void fail_act_open(struct s3_conn *c3cn, int errno)
> +{
> +	c3cn->err = errno;
> +	t3_release_offload_resources(c3cn);
> +	c3cn_done(c3cn);
> +}
> +
> +/*
> + * Assign offload parameters to some connection fields.
> + */
> +static void init_offload_conn(struct s3_conn *c3cn,
> +			      struct t3cdev *cdev,
> +			      struct dst_entry *dst)
> +{
> +	BUG_ON(c3cn->cdev != cdev);
> +	c3cn->wr_max = c3cn->wr_avail = T3C_DATA(cdev)->max_wrs;
> +	c3cn->wr_unacked = 0;
> +	c3cn->mss_idx = select_mss(c3cn, dst_mtu(dst));
> +
> +	c3cn->ctrl_skb_cache = alloc_skb(CTRL_SKB_LEN, gfp_any());
> +	reset_wr_list(c3cn);
> +}
> +
> +static void act_open_retry_timer(unsigned long data)
> +{
> +	struct sk_buff *skb;
> +	struct s3_conn *c3cn = (struct s3_conn *)data;
> +
> +	spin_lock(&c3cn->lock);
> +	skb = alloc_skb(sizeof(struct cpl_act_open_req), GFP_ATOMIC);
> +	if (!skb)
> +		fail_act_open(c3cn, ENOMEM);
> +	else {
> +		skb->sk = (struct sock *)c3cn;
> +		set_arp_failure_handler(skb, act_open_req_arp_failure);
> +		mk_act_open_req(c3cn, skb, c3cn->tid, c3cn->l2t);
> +		l2t_send(c3cn->cdev, skb, c3cn->l2t);
> +	}
> +	spin_unlock(&c3cn->lock);
> +	c3cn_put(c3cn);
> +}
> +
> +/*
> + * Convert an ACT_OPEN_RPL status to a Linux errno.
> + */
> +static int act_open_rpl_status_to_errno(int status)
> +{
> +	switch (status) {
> +	case CPL_ERR_CONN_RESET:
> +		return ECONNREFUSED;
> +	case CPL_ERR_ARP_MISS:
> +		return EHOSTUNREACH;
> +	case CPL_ERR_CONN_TIMEDOUT:
> +		return ETIMEDOUT;
> +	case CPL_ERR_TCAM_FULL:
> +		return ENOMEM;
> +	case CPL_ERR_CONN_EXIST:
> +		printk(KERN_ERR "ACTIVE_OPEN_RPL: 4-tuple in use\n");
> +		return EADDRINUSE;
> +	default:
> +		return EIO;
> +	}
> +}
> +
> +/*
> + * Convert the status code of an ABORT_REQ into a Linux error code.  Also
> + * indicate whether RST should be sent in response.
> + */
> +static int abort_status_to_errno(struct s3_conn *c3cn,
> +				 int abort_reason, int *need_rst)
> +{
> +	switch (abort_reason) {
> +	case CPL_ERR_BAD_SYN: /* fall through */
> +	case CPL_ERR_CONN_RESET:
> +		return c3cn->state == C3CN_STATE_CLOSING ? EPIPE : ECONNRESET;
> +	case CPL_ERR_XMIT_TIMEDOUT:
> +	case CPL_ERR_PERSIST_TIMEDOUT:
> +	case CPL_ERR_FINWAIT2_TIMEDOUT:
> +	case CPL_ERR_KEEPALIVE_TIMEDOUT:
> +		return ETIMEDOUT;
> +	default:
> +		return EIO;
> +	}
> +}
> +
> +static void send_abort_rpl(struct sk_buff *skb, struct t3cdev *cdev,
> +			   int rst_status)
> +{
> +	struct sk_buff *reply_skb;
> +	struct cpl_abort_req_rss *req = cplhdr(skb);
> +
> +	reply_skb = get_cpl_reply_skb(skb, sizeof(struct cpl_abort_rpl),
> +				      gfp_any());
> +	if (!reply_skb) {
> +		/* Defer the reply.  Stick rst_status into req->cmd. */
> +		req->status = rst_status;
> +		t3_defer_reply(skb, cdev, send_deferred_abort_rpl);
> +		return;
> +	}
> +
> +	reply_skb->priority = CPL_PRIORITY_DATA;
> +	set_abort_rpl_wr(reply_skb, GET_TID(req), rst_status);
> +	kfree_skb(skb);
> +	cxgb3_ofld_send(cdev, reply_skb);
> +}
> +
> +/*
> + * Returns an sk_buff for a reply CPL message of size len.  If the input
> + * sk_buff has no other users it is trimmed and reused, otherwise a new buffer
> + * is allocated.  The input skb must be of size at least len.  Note that this
> + * operation does not destroy the original skb data even if it decides to reuse
> + * the buffer.
> + */
> +static struct sk_buff *get_cpl_reply_skb(struct sk_buff *skb, size_t len,
> +					 gfp_t gfp)
> +{
> +	if (likely(!skb_cloned(skb))) {
> +		BUG_ON(skb->len < len);
> +		__skb_trim(skb, len);
> +		skb_get(skb);
> +	} else {
> +		skb = alloc_skb(len, gfp);
> +		if (skb)
> +			__skb_put(skb, len);
> +	}
> +	return skb;
> +}
> +
> +/*
> + * Add an skb to the deferred skb queue for processing from process context.
> + */
> +static void t3_defer_reply(struct sk_buff *skb, struct t3cdev *cdev,
> +			   defer_handler_t handler)
> +{
> +	struct cxgb3i_sdev_data *cdata = CXGB3_SDEV_DATA(cdev);
> +
> +	DEFERRED_SKB_CB(skb)->handler = handler;
> +	spin_lock_bh(&cdata->deferq.lock);
> +	__skb_queue_tail(&cdata->deferq, skb);
> +	if (skb_queue_len(&cdata->deferq) == 1)
> +		schedule_work(&cdata->deferq_task);
> +	spin_unlock_bh(&cdata->deferq.lock);
> +}
> +
> +/*
> + * Process the defer queue.
> + */
> +static void process_deferq(struct work_struct *task_param)
> +{
> +	struct sk_buff *skb;
> +	struct cxgb3i_sdev_data *cdata = container_of(task_param,
> +						     struct cxgb3i_sdev_data,
> +						     deferq_task);
> +
> +	spin_lock_bh(&cdata->deferq.lock);
> +	while ((skb = __skb_dequeue(&cdata->deferq)) != NULL) {
> +		spin_unlock_bh(&cdata->deferq.lock);
> +		DEFERRED_SKB_CB(skb)->handler(cdata->cdev, skb);
> +		spin_lock_bh(&cdata->deferq.lock);
> +	}
> +	spin_unlock_bh(&cdata->deferq.lock);
> +}
> +
> +static void send_deferred_abort_rpl(struct t3cdev *cdev, struct sk_buff *skb)
> +{
> +	struct sk_buff *reply_skb;
> +	struct cpl_abort_req_rss *req = cplhdr(skb);
> +
> +	reply_skb = alloc_skb(sizeof(struct cpl_abort_rpl),
> +			      GFP_KERNEL | __GFP_NOFAIL);
> +	reply_skb->priority = CPL_PRIORITY_DATA;
> +	__skb_put(reply_skb, sizeof(struct cpl_abort_rpl));
> +	set_abort_rpl_wr(reply_skb, GET_TID(req), req->status);
> +	cxgb3_ofld_send(cdev, reply_skb);
> +	kfree_skb(skb);
> +}
> +
> +/*
> + * Release resources held by an offload connection (TID, L2T entry, etc.)
> + */
> +static void t3_release_offload_resources(struct s3_conn *c3cn)
> +{
> +	struct t3cdev *cdev = c3cn->cdev;
> +	unsigned int tid = c3cn->tid;
> +
> +	if (!cdev)
> +		return;
> +
> +	c3cn->qset = 0;
> +
> +	kfree_skb(c3cn->ctrl_skb_cache);
> +	c3cn->ctrl_skb_cache = NULL;
> +
> +	if (c3cn->wr_avail != c3cn->wr_max) {
> +		purge_wr_queue(c3cn);
> +		reset_wr_list(c3cn);
> +	}
> +
> +	if (c3cn->l2t) {
> +		l2t_release(L2DATA(cdev), c3cn->l2t);
> +		c3cn->l2t = NULL;
> +	}
> +
> +	if (c3cn->state == C3CN_STATE_SYN_SENT) /* we have ATID */
> +		free_atid(cdev, tid);
> +	else {		/* we have TID */
> +		cxgb3_remove_tid(cdev, (void *)c3cn, tid);
> +		c3cn_put(c3cn);
> +	}
> +
> +	c3cn->cdev = NULL;
> +}
> +
> +/*
> + * Handles Rx data that arrives in a state where the connection isn't
> + * accepting new data.
> + */
> +static void handle_excess_rx(struct s3_conn *c3cn, struct sk_buff *skb)
> +{
> +	if (!c3cn_flag(c3cn, C3CN_ABORT_SHUTDOWN))
> +		abort_conn(c3cn, skb);
> +
> +	kfree_skb(skb);
> +}
> +
> +/*
> + * Like get_cpl_reply_skb() but the returned buffer starts out empty.
> + */
> +static struct sk_buff *__get_cpl_reply_skb(struct sk_buff *skb, size_t len,
> +					   gfp_t gfp)
> +{
> +	if (likely(!skb_cloned(skb) && !skb->data_len)) {
> +		__skb_trim(skb, 0);
> +		skb_get(skb);
> +	} else
> +		skb = alloc_skb(len, gfp);
> +	return skb;
> +}
> +
> +/*
> + * Completes some final bits of initialization for just established connections
> + * and changes their state to C3CN_STATE_ESTABLISHED.
> + *
> + * snd_isn here is the ISN after the SYN, i.e., the true ISN + 1.
> + */
> +static void make_established(struct s3_conn *c3cn, u32 snd_isn,
> +			     unsigned int opt)
> +{
> +	c3cn->write_seq = c3cn->snd_nxt = c3cn->snd_una = snd_isn;
> +
> +	/*
> +	 * Causes the first RX_DATA_ACK to supply any Rx credits we couldn't
> +	 * pass through opt0.
> +	 */
> +	if (rcv_win > (M_RCV_BUFSIZ << 10))
> +		c3cn->rcv_wup -= rcv_win - (M_RCV_BUFSIZ << 10);
> +
> +	dst_confirm(c3cn->dst_cache);
> +
> +	smp_mb();
> +	c3cn_set_state(c3cn, C3CN_STATE_ESTABLISHED);
> +}
> diff --git a/drivers/scsi/cxgb3i/cxgb3i_offload.h b/drivers/scsi/cxgb3i/cxgb3i_offload.h
> new file mode 100644
> index 0000000..98d5c7d
> --- /dev/null
> +++ b/drivers/scsi/cxgb3i/cxgb3i_offload.h
> @@ -0,0 +1,242 @@
> +/*
> + * Copyright (C) 2003-2008 Chelsio Communications.  All rights reserved.
> + *
> + * Written by Dimitris Michailidis (dm@...lsio.com)
> + *
> + * This program is distributed in the hope that it will be useful, but WITHOUT
> + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
> + * FITNESS FOR A PARTICULAR PURPOSE.  See the LICENSE file included in this
> + * release for licensing terms and conditions.
> + */
> +
> +#ifndef _CXGB3I_OFFLOAD_H
> +#define _CXGB3I_OFFLOAD_H
> +
> +#include <linux/skbuff.h>
> +#include <net/tcp.h>
> +
> +#include "t3cdev.h"
> +#include "cxgb3_offload.h"
> +
> +#define cxgb3i_log_error(fmt...) printk(KERN_ERR "cxgb3i: ERR! " fmt)
> +#define cxgb3i_log_warn(fmt...)	 printk(KERN_WARNING "cxgb3i: WARN! " fmt)
> +#define cxgb3i_log_info(fmt...)  printk(KERN_INFO "cxgb3i: " fmt)
> +
> +#ifdef __DEBUG_CXGB3I__
> +#define cxgb3i_log_debug(fmt, args...) \
> +        printk(KERN_ERR "cxgb3i: %s - " fmt, __func__ , ## args)
> +#else
> +#define cxgb3i_log_debug(fmt...)
> +#endif
> +
> +/*
> + * Data structure to keep track of cxgb3 connection.
> + */
> +struct s3_conn {
> +	struct net_device *dev;
> +	struct t3cdev *cdev;
> +	unsigned long flags;
> +	int tid;
> +	int qset;
> +	int mss_idx;
> +	struct l2t_entry *l2t;
> +	int wr_max;
> +	int wr_avail;
> +	int wr_unacked;
> +	struct sk_buff *wr_pending_head;
> +	struct sk_buff *wr_pending_tail;
> +	struct sk_buff *ctrl_skb_cache;
> +
> +	spinlock_t lock;
> +	atomic_t refcnt;
> +	volatile unsigned int state;
> +	struct sockaddr_in saddr;
> +	struct sockaddr_in daddr;
> +	struct dst_entry *dst_cache;
> +	unsigned char shutdown;
> +	struct sk_buff_head receive_queue;
> +	struct sk_buff_head write_queue;
> +	struct timer_list retry_timer;
> +	int err;
> +	rwlock_t callback_lock;
> +	void *user_data;
> +
> +	u32 rcv_nxt;		/* What we want to receive next		*/
> +	u32 copied_seq;		/* Head of yet unread data		*/
> +	u32 rcv_wup;		/* rcv_nxt on last window update sent	*/
> +	u32 snd_nxt;		/* Next sequence we send		*/
> +	u32 snd_una;		/* First byte we want an ack for	*/
> +
> +	u32 write_seq;		/* Tail(+1) of data held in send buffer */
> +};
> +
> +/* Flags in c3cn->shutdown */
> +#define C3CN_SHUTDOWN_MASK	3
> +#define C3CN_RCV_SHUTDOWN	1
> +#define C3CN_SEND_SHUTDOWN	2
> +
> +/*
> + * connection state bitmap
> + */
> +#define C3CN_STATE_CLOSE	0x1
> +#define C3CN_STATE_SYN_SENT	0x2
> +#define C3CN_STATE_ESTABLISHED	0x4
> +#define C3CN_STATE_CLOSING	0x8
> +#define C3CN_STATE_ABORING	0x10
> +
> +#define C3CN_STATE_MASK		0xFF
> +#define C3CN_NEED_CLOSE		0x100
> +
> +/*
> + * Connection flags -- many to track some close related events.
> + */
> +enum c3cn_flags {
> +	C3CN_ABORT_RPL_RCVD,	/* received one ABORT_RPL_RSS message */
> +	C3CN_ABORT_REQ_RCVD,	/* received one ABORT_REQ_RSS message */
> +	C3CN_TX_WAIT_IDLE,	/* suspend Tx until in-flight data is ACKed */
> +	C3CN_ABORT_SHUTDOWN,	/* shouldn't send more abort requests */
> +	C3CN_ABORT_RPL_PENDING,	/* expecting an abort reply */
> +	C3CN_CLOSE_CON_REQUESTED,	/* we've sent a close_conn_req */
> +	C3CN_TX_DATA_SENT,	/* already sent a TX_DATA WR */
> +
> +	C3CN_DONE,
> +};
> +
> +static inline void c3cn_set_flag(struct s3_conn *c3cn,
> +				 enum c3cn_flags flag)
> +{
> +	__set_bit(flag, &c3cn->flags);
> +}
> +
> +static inline void c3cn_reset_flag(struct s3_conn *c3cn,
> +				   enum c3cn_flags flag)
> +{
> +	__clear_bit(flag, &c3cn->flags);
> +}
> +
> +static inline int c3cn_flag(struct s3_conn *c3cn, enum c3cn_flags flag)
> +{
> +	if (c3cn == NULL)
> +		return 0;
> +	return test_bit(flag, &c3cn->flags);
> +}
> +
> +/*
> + * Per adapter data.  Linked off of each Ethernet device port on the adapter.
> + * Also available via the t3cdev structure since we have pointers to our port
> + * net_device's there ...
> + */
> +struct cxgb3i_sdev_data {
> +	struct list_head list;
> +	struct t3cdev *cdev;
> +	struct cxgb3_client *client;
> +	struct adap_ports *ports;
> +	unsigned int rx_page_size;
> +	struct sk_buff_head deferq;
> +	struct work_struct deferq_task;
> +};
> +#define NDEV2CDATA(ndev) (*(struct cxgb3i_sdev_data **)&(ndev)->ec_ptr)
> +#define CXGB3_SDEV_DATA(cdev) NDEV2CDATA((cdev)->lldev)
> +
> +static inline void c3cn_hold(struct s3_conn *c3cn)
> +{
> +	atomic_inc(&c3cn->refcnt);
> +}
> +
> +static inline void c3cn_put(struct s3_conn *c3cn)
> +{
> +	if (atomic_dec_and_test(&c3cn->refcnt))
> +		kfree(c3cn);
> +}
> +
> +void c3cn_close(struct s3_conn *);
> +static inline void c3cn_release(struct s3_conn *c3cn)
> +{
> +	c3cn_close(c3cn);
> +	c3cn_put(c3cn);
> +}
> +
> +/*
> + * Primary API routines.
> + */
> +
> +int cxgb3i_sdev_init(cxgb3_cpl_handler_func *);
> +void cxgb3i_sdev_add(struct t3cdev *, struct cxgb3_client *);
> +void cxgb3i_sdev_remove(struct t3cdev *);
> +
> +struct s3_conn *cxgb3i_c3cn_create(void);
> +int cxgb3i_c3cn_connect(struct s3_conn *, struct sockaddr_in *);
> +void cxgb3i_c3cn_rx_credits(struct s3_conn *, int);
> +int cxgb3i_c3cn_send_pdus(struct s3_conn *, struct sk_buff *, int);
> +
> +/*
> + * Definitions for sk_buff state and ULP mode management.
> + */
> +
> +struct cxgb3_skb_cb {
> +	__u8 flags;		
> +	__u8 ulp_mode;		/* ULP mode/submode of sk_buff */
> +	__u32 seq;		/* sequence number */
> +	__u32 ddigest;		/* ULP rx_data_ddp selected field */
> +	__u32 pdulen;		/* ULP rx_data_ddp selected field */
> +	__u8 ulp_data[16];	/* scratch area for ULP */
> +};
> +
> +#define CXGB3_SKB_CB(skb)	((struct cxgb3_skb_cb *)&((skb)->cb[0]))
> +
> +#define skb_ulp_mode(skb)	(CXGB3_SKB_CB(skb)->ulp_mode)
> +#define skb_ulp_ddigest(skb)	(CXGB3_SKB_CB(skb)->ddigest)
> +#define skb_ulp_pdulen(skb)	(CXGB3_SKB_CB(skb)->pdulen)
> +#define skb_ulp_data(skb)	(CXGB3_SKB_CB(skb)->ulp_data)
> +
> +enum {
> +	C3CB_FLAG_NEED_HDR = 1 << 0,	/* packet needs a TX_DATA_WR header */
> +	C3CB_FLAG_NO_APPEND = 1 << 1,	/* don't grow this skb */
> +	C3CB_FLAG_BARRIER = 1 << 2,	/* set TX_WAIT_IDLE after sending */
> +	C3CB_FLAG_COMPL = 1 << 4,	/* request WR completion */
> +};
> +
> +/*
> + * Definitions for managing deferred CPL replies from process context.
> + */
> +
> +typedef void (*defer_handler_t) (struct t3cdev *, struct sk_buff *);
> +
> +struct deferred_skb_cb {
> +	defer_handler_t handler;
> +	struct t3cdev *cdev;
> +};
> +
> +#define DEFERRED_SKB_CB(skb) ((struct deferred_skb_cb *)(skb)->cb)
> +
> +/*
> + * Top-level CPL message processing used by most CPL messages that
> + * pertain to connections.
> + */
> +static inline void process_cpl_msg(void (*fn)(struct s3_conn *,
> +					      struct sk_buff *),
> +				   struct s3_conn *c3cn,
> +				   struct sk_buff *skb)
> +{
> +	spin_lock(&c3cn->lock);
> +	fn(c3cn, skb);
> +	spin_unlock(&c3cn->lock);
> +}
> +
> +/*
> + * Opaque version of structure the SGE stores at skb->head of TX_DATA packets
> + * and for which we must reserve space.
> + */
> +struct sge_opaque_hdr {
> +	void *dev;
> +	dma_addr_t addr[MAX_SKB_FRAGS + 1];
> +};
> +
> +/* for TX: a skb must have a headroom of at least TX_HEADER_LEN bytes */
> +#define TX_HEADER_LEN \
> +		(sizeof(struct tx_data_wr) + sizeof(struct sge_opaque_hdr))
> +
> +void *cxgb3i_alloc_big_mem(unsigned int);
> +void cxgb3i_free_big_mem(void *);
> +
> +#endif /* _CXGB3_OFFLOAD_H */
> diff --git a/drivers/scsi/cxgb3i/cxgb3i_ulp2.c b/drivers/scsi/cxgb3i/cxgb3i_ulp2.c
> new file mode 100644
> index 0000000..313bb90
> --- /dev/null
> +++ b/drivers/scsi/cxgb3i/cxgb3i_ulp2.c
> @@ -0,0 +1,692 @@
> +/*
> + * cxgb3i_ddp.c: Chelsio S3xx iSCSI driver.
> + *
> + * Copyright (c) 2008 Chelsio Communications, Inc.
> + *
> + * This program is free software; you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License as published by
> + * the Free Software Foundation.
> + *
> + * Written by: Karen Xie (kxie@...lsio.com)
> + */
> +
> +#include <linux/skbuff.h>
> +#include <scsi/scsi_cmnd.h>
> +#include <scsi/scsi_host.h>
> +#include <linux/crypto.h>
> +#include "../iscsi_tcp.h"
> +
> +#include "cxgb3i.h"
> +#include "cxgb3i_ulp2.h"
> +
> +#ifdef __DEBUG_CXGB3I_RX__
> +#define cxgb3i_rx_debug		cxgb3i_log_debug
> +#else
> +#define cxgb3i_rx_debug(fmt...)
> +#endif
> +
> +#ifdef __DEBUG_CXGB3I_TX__
> +#define cxgb3i_tx_debug		cxgb3i_log_debug
> +#else
> +#define cxgb3i_tx_debug(fmt...)
> +#endif
> +
> +#ifdef __DEBUG_CXGB3I_TAG__
> +#define cxgb3i_tag_debug	cxgb3i_log_debug
> +#else
> +#define cxgb3i_tag_debug(fmt...)
> +#endif
> +
> +#ifdef __DEBUG_CXGB3I_DDP__
> +#define cxgb3i_ddp_debug	cxgb3i_log_debug
> +#else
> +#define cxgb3i_ddp_debug(fmt...)
> +#endif
> +
> +static struct page *pad_page;
> +
> +#define ULP2_PGIDX_MAX		4
> +#define ULP2_4K_PAGE_SHIFT	12
> +#define ULP2_4K_PAGE_MASK	(~((1UL << ULP2_4K_PAGE_SHIFT) - 1))
> +static unsigned char ddp_page_order[ULP2_PGIDX_MAX];
> +static unsigned long ddp_page_size[ULP2_PGIDX_MAX];
> +static unsigned char ddp_page_shift[ULP2_PGIDX_MAX];
> +static unsigned char sw_tag_idx_bits;
> +static unsigned char sw_tag_age_bits;
> +
> +static void cxgb3i_ddp_page_init(void)
> +{
> +	int i;
> +	unsigned long n = PAGE_SIZE >> ULP2_4K_PAGE_SHIFT;
> +
> +	if (PAGE_SIZE & (~ULP2_4K_PAGE_MASK)) {
> +		cxgb3i_log_warn("PAGE_SIZE 0x%lx is not multiple of 4K, "
> +				"ddp disabled.\n", PAGE_SIZE);
> +		return;
> +	}
> +	n = __ilog2_u32(n);
> +	for (i = 0; i < ULP2_PGIDX_MAX; i++, n++) {
> +		ddp_page_order[i] = n;
> +		ddp_page_shift[i] = ULP2_4K_PAGE_SHIFT + n;
> +		ddp_page_size[i] = 1 << ddp_page_shift[i];
> +		cxgb3i_log_debug("%d, order %u, shift %u, size 0x%lx.\n", i,
> +				 ddp_page_order[i], ddp_page_shift[i],
> +				 ddp_page_size[i]);
> +	}
> +
> +	sw_tag_idx_bits = (__ilog2_u32(ISCSI_ITT_MASK)) + 1;
> +	sw_tag_age_bits = (__ilog2_u32(ISCSI_AGE_MASK)) + 1;
> +}
> +
> +static inline void ulp_mem_io_set_hdr(struct sk_buff *skb, unsigned int addr)
> +{
> +	struct ulp_mem_io *req = (struct ulp_mem_io *)skb->head;
> +	req->wr.wr_lo = 0;
> +	req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_BYPASS));
> +	req->cmd_lock_addr = htonl(V_ULP_MEMIO_ADDR(addr >> 5) |
> +				   V_ULPTX_CMD(ULP_MEM_WRITE));
> +	req->len = htonl(V_ULP_MEMIO_DATA_LEN(PPOD_SIZE >> 5) |
> +			 V_ULPTX_NFLITS((PPOD_SIZE >> 3) + 1));
> +}
> +
> +static int set_ddp_map(struct cxgb3i_adapter *snic, struct pagepod_hdr *hdr,
> +		       unsigned int idx, unsigned int npods,
> +		       struct scatterlist *sgl, unsigned int sgcnt)
> +{
> +	struct cxgb3i_ddp_info *ddp = &snic->ddp;
> +	struct scatterlist *sg = sgl;
> +	unsigned int pm_addr = (idx << PPOD_SIZE_SHIFT) + ddp->llimit;
> +	int i;
> +
> +	for (i = 0; i < npods; i++, pm_addr += PPOD_SIZE) {
> +		struct sk_buff *skb;
> +		struct pagepod *ppod;
> +		int j, k;
> +		skb =
> +		    alloc_skb(sizeof(struct ulp_mem_io) + PPOD_SIZE,
> +			      GFP_ATOMIC);
> +		if (!skb)
> +			return -ENOMEM;
> +		skb_put(skb, sizeof(struct ulp_mem_io) + PPOD_SIZE);
> +
> +		ulp_mem_io_set_hdr(skb, pm_addr);
> +		ppod =
> +		    (struct pagepod *)(skb->head + sizeof(struct ulp_mem_io));
> +		memcpy(&(ppod->hdr), hdr, sizeof(struct pagepod));
> +		for (j = 0, k = i * 4; j < 5; j++, k++) {
> +			if (k < sgcnt) {
> +				ppod->addr[j] = cpu_to_be64(sg_dma_address(sg));
> +				if (j < 4)
> +					sg = sg_next(sg);
> +			} else
> +				ppod->addr[j] = 0UL;
> +		}
> +
> +		skb->priority = CPL_PRIORITY_CONTROL;
> +		cxgb3_ofld_send(snic->tdev, skb);
> +	}
> +	return 0;
> +}
> +
> +static int clear_ddp_map(struct cxgb3i_adapter *snic, unsigned int idx,
> +			 unsigned int npods)
> +{
> +	struct cxgb3i_ddp_info *ddp = &snic->ddp;
> +	unsigned int pm_addr = (idx << PPOD_SIZE_SHIFT) + ddp->llimit;
> +	int i;
> +
> +	for (i = 0; i < npods; i++, pm_addr += PPOD_SIZE) {
> +		struct sk_buff *skb;
> +		skb =
> +		    alloc_skb(sizeof(struct ulp_mem_io) + PPOD_SIZE,
> +			      GFP_ATOMIC);
> +		if (!skb)
> +			return -ENOMEM;
> +		skb_put(skb, sizeof(struct ulp_mem_io) + PPOD_SIZE);
> +		memset((skb->head + sizeof(struct ulp_mem_io)), 0, PPOD_SIZE);
> +		ulp_mem_io_set_hdr(skb, pm_addr);
> +		skb->priority = CPL_PRIORITY_CONTROL;
> +		cxgb3_ofld_send(snic->tdev, skb);
> +	}
> +	return 0;
> +}
> +
> +static int cxgb3i_ddp_sgl_check(struct scatterlist *sgl, unsigned int sgcnt)
> +{
> +	struct scatterlist *sg;
> +	int i;
> +
> +	/* make sure the sgl is fit for ddp:
> +	 *      each has the same page size, and
> +	 *      first & last page do not need to be used completely, and
> +	 *      the rest of page must be used completely
> +	 */
> +	for_each_sg(sgl, sg, sgcnt, i) {
> +		if ((i && sg->offset) ||
> +		    ((i != sgcnt - 1) &&
> +		     (sg->length + sg->offset) != PAGE_SIZE))
> +			return -EINVAL;

I just want to see if I understood correctly?
If we fail here then it means the request will go through the
regular SW stack, with out HW offloading. But will not fail completely.

This is because in OSD we chose to append disjoint memory segments,
which works well with regular iscsi_tcp.

> +	}
> +
> +	return 0;
> +}
> +
> +static inline int ddp_find_unused_entries(struct cxgb3i_ddp_info *ddp,
> +					  int start, int max, int count)
> +{
> +	unsigned int i, j;
> +
> +	spin_lock(&ddp->map_lock);
> +	for (i = start; i <= max;) {
> +		for (j = 0; j < count; j++) {
> +			if (ddp->map[i + j])
> +				break;
> +		}
> +		if (j == count) {
> +			memset(&ddp->map[i], 1, count);
> +			spin_unlock(&ddp->map_lock);
> +			return i;
> +		}
> +		i += j + 1;
> +	}
> +	spin_unlock(&ddp->map_lock);
> +	return -EBUSY;
> +}
> +
> +static inline void ddp_unmark_entries(struct cxgb3i_ddp_info *ddp,
> +				      int start, int count)
> +{
> +	spin_lock(&ddp->map_lock);
> +	memset(&ddp->map[start], 0, count);
> +	spin_unlock(&ddp->map_lock);
> +}
> +
> +u32 cxgb3i_ddp_tag_reserve(struct cxgb3i_adapter *snic, unsigned int tid,
> +			   u32 sw_tag, unsigned int xferlen,
> +			   struct scatterlist *sgl, unsigned int sgcnt)
> +{
> +	struct cxgb3i_ddp_info *ddp = &snic->ddp;
> +	struct pagepod_hdr hdr;
> +	unsigned int npods;
> +	int idx = -1, idx_max;
> +	u32 tag;
> +	int err;
> +
> +	if (!ddp || !sgcnt || xferlen < PAGE_SIZE)
> +		return RESERVED_ITT;
> +
> +	err = cxgb3i_ddp_sgl_check(sgl, sgcnt);
> +	if (err < 0)
> +		return RESERVED_ITT;
> +
> +	npods = (sgcnt + PPOD_PAGES_MAX - 1) >> PPOD_PAGES_SHIFT;
> +	idx_max = ddp->nppods - npods + 1;
> +
> +	if (ddp->idx_last == ddp->nppods)
> +		idx = ddp_find_unused_entries(ddp, 0, idx_max, npods);
> +	else {
> +		idx = ddp_find_unused_entries(ddp, ddp->idx_last + 1, idx_max,
> +					      npods);
> +		if ((idx < 0) && (ddp->idx_last >= npods))
> +			idx = ddp_find_unused_entries(ddp, 0,
> +						      ddp->idx_last - npods + 1,
> +						      npods);
> +	}
> +	if (idx < 0)
> +		return RESERVED_ITT;
> +
> +	if (pci_map_sg(snic->pdev, sgl, sgcnt, PCI_DMA_FROMDEVICE) <= 0)
> +		goto unmark_entries;
> +
> +	tag = sw_tag | (idx << snic->tag_format.rsvd_shift);
> +
> +	hdr.rsvd = 0;
> +	hdr.vld_tid = htonl(F_PPOD_VALID | V_PPOD_TID(tid));
> +	hdr.pgsz_tag_clr = htonl(tag);
> +	hdr.maxoffset = htonl(xferlen);
> +	hdr.pgoffset = htonl(sgl->offset);
> +
> +	if (set_ddp_map(snic, &hdr, idx, npods, sgl, sgcnt) < 0)
> +		goto unmap_sgl;
> +
> +	ddp->idx_last = idx;
> +	cxgb3i_tag_debug("tid 0x%x, xfer %u, 0x%x -> ddp tag 0x%x (%u, %u).\n",
> +			 tid, xferlen, sw_tag, tag, idx, npods);
> +	return tag;
> +
> +unmap_sgl:
> +	pci_unmap_sg(snic->pdev, sgl, sgcnt, PCI_DMA_FROMDEVICE);
> +
> +unmark_entries:
> +	ddp_unmark_entries(ddp, idx, npods);
> +	return RESERVED_ITT;
> +}
> +
> +void cxgb3i_ddp_tag_release(struct cxgb3i_adapter *snic, u32 tag,
> +			    struct scatterlist *sgl, unsigned int sgcnt)
> +{
> +	u32 idx = (tag >> snic->tag_format.rsvd_shift) &
> +	    snic->tag_format.rsvd_mask;
> +	unsigned int npods = (sgcnt + PPOD_PAGES_MAX - 1) >> PPOD_PAGES_SHIFT;
> +
> +	if (idx < snic->tag_format.rsvd_mask) {
> +		cxgb3i_tag_debug("ddp tag 0x%x, release idx 0x%x, npods %u.\n",
> +				 tag, idx, npods);
> +		clear_ddp_map(snic, idx, npods);
> +		ddp_unmark_entries(&snic->ddp, idx, npods);
> +		pci_unmap_sg(snic->pdev, sgl, sgcnt, PCI_DMA_FROMDEVICE);
> +	}
> +}
> +
> +int cxgb3i_conn_ulp_setup(struct cxgb3i_conn *cconn, int hcrc, int dcrc)
> +{
> +	struct iscsi_tcp_conn *tcp_conn = cconn->conn->dd_data;
> +	struct s3_conn *c3cn = (struct s3_conn *)(tcp_conn->sock);
> +	struct sk_buff *skb = alloc_skb(sizeof(struct cpl_set_tcb_field),
> +					GFP_KERNEL | __GFP_NOFAIL);
> +	struct cpl_set_tcb_field *req;
> +	u32 submode = (hcrc ? 1 : 0) | (dcrc ? 2 : 0);
> +
> +	/* set up ulp submode and page size */
> +	req = (struct cpl_set_tcb_field *)skb_put(skb, sizeof(*req));
> +	req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
> +	OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_SET_TCB_FIELD, c3cn->tid));
> +	req->reply = V_NO_REPLY(1);
> +	req->cpu_idx = 0;
> +	req->word = htons(31);
> +	req->mask = cpu_to_be64(0xFF000000);
> +	/* the connection page size is always the same as ddp-pgsz0 */
> +	req->val = cpu_to_be64(submode << 24);
> +	skb->priority = CPL_PRIORITY_CONTROL;
> +
> +	cxgb3_ofld_send(c3cn->cdev, skb);
> +	return 0;
> +}
> +
> +static int cxgb3i_conn_read_pdu_skb(struct iscsi_conn *conn,
> +				    struct sk_buff *skb)
> +{
> +	struct iscsi_tcp_conn *tcp_conn = conn->dd_data;
> +	struct iscsi_segment *segment = &tcp_conn->in.segment;
> +	struct iscsi_hdr *hdr = (struct iscsi_hdr *)tcp_conn->in.hdr_buf;
> +	unsigned char *buf = (unsigned char *)hdr;
> +	unsigned int offset = sizeof(struct iscsi_hdr);
> +	int err;
> +
> +	cxgb3i_rx_debug("conn 0x%p, skb 0x%p, len %u, flag 0x%x.\n",
> +			conn, skb, skb->len, skb_ulp_mode(skb));
> +
> +	/* read bhs */
> +	err = skb_copy_bits(skb, 0, buf, sizeof(struct iscsi_hdr));
> +	if (err < 0)
> +		return err;
> +	segment->copied = sizeof(struct iscsi_hdr);
> +	/* read ahs */
> +	if (hdr->hlength) {
> +		unsigned int ahslen = hdr->hlength << 2;
> +		/* Make sure we don't overflow */
> +		if (sizeof(*hdr) + ahslen > sizeof(tcp_conn->in.hdr_buf))
> +			return -ISCSI_ERR_AHSLEN;
> +		err = skb_copy_bits(skb, offset, buf + offset, ahslen);
> +		if (err < 0)
> +			return err;
> +		offset += ahslen;
> +	}
> +	/* header digest */
> +	if (conn->hdrdgst_en)
> +		offset += ISCSI_DIGEST_SIZE;
> +
> +	/* check header digest */
> +	segment->status = (conn->hdrdgst_en &&
> +			   (skb_ulp_mode(skb) & ULP2_FLAG_HCRC_ERROR)) ?
> +	    ISCSI_SEGMENT_DGST_ERR : 0;
> +
> +	hdr->itt = ntohl(hdr->itt);
> +	segment->total_copied = segment->total_size;
> +	tcp_conn->in.hdr = hdr;
> +	err = iscsi_tcp_hdr_dissect(conn, hdr);
> +	if (err)
> +		return err;
> +
> +	if (tcp_conn->in.datalen) {
> +		segment = &tcp_conn->in.segment;
> +		segment->status = (conn->datadgst_en &&
> +				   (skb_ulp_mode(skb) & ULP2_FLAG_DCRC_ERROR)) ?
> +		    ISCSI_SEGMENT_DGST_ERR : 0;
> +		if (skb_ulp_mode(skb) & ULP2_FLAG_DATA_DDPED) {
> +			cxgb3i_ddp_debug("opcode 0x%x, data %u, ddp'ed.\n",
> +					 hdr->opcode & ISCSI_OPCODE_MASK,
> +					 tcp_conn->in.datalen);
> +			segment->total_copied = segment->total_size;
> +		} else {
> +			cxgb3i_ddp_debug("opcode 0x%x, data %u, not ddp'ed.\n",
> +					 hdr->opcode & ISCSI_OPCODE_MASK,
> +					 tcp_conn->in.datalen);
> +			offset += sizeof(struct cpl_iscsi_hdr_norss);
> +		}
> +		while (segment->total_copied < segment->total_size) {
> +			iscsi_tcp_segment_map(segment, 1);
> +			err = skb_copy_bits(skb, offset, segment->data,
> +					    segment->size);
> +			iscsi_tcp_segment_unmap(segment);
> +			if (err)
> +				return err;
> +			segment->total_copied += segment->size;
> +			offset += segment->size;
> +
> +			if (segment->total_copied < segment->total_size)
> +				iscsi_tcp_segment_init_sg(segment,
> +							  sg_next(segment->sg),
> +							  0);
> +		}
> +		err = segment->done(tcp_conn, segment);
> +	}
> +	return err;
> +}
> +
> +static inline void tx_skb_setmode(struct sk_buff *skb, int hcrc, int dcrc)
> +{
> +	u8 submode = 0;
> +	if (hcrc)
> +		submode |= 1;
> +	if (dcrc)
> +		submode |= 2;
> +	skb_ulp_mode(skb) = (ULP_MODE_ISCSI << 4) | submode;
> +}
> +
> +int cxgb3i_conn_ulp2_xmit(struct iscsi_conn *conn)
> +{
> +	struct iscsi_tcp_conn *tcp_conn = conn->dd_data;
> +	struct iscsi_segment *hdr_seg = &tcp_conn->out.segment;
> +	struct iscsi_segment *data_seg = &tcp_conn->out.data_segment;
> +	unsigned int hdrlen = hdr_seg->total_size;
> +	unsigned int datalen = data_seg->total_size;
> +	unsigned int padlen = iscsi_padding(datalen);
> +	unsigned int copymax = SKB_MAX_HEAD(TX_HEADER_LEN);
> +	unsigned int copylen;
> +	struct sk_buff *skb;
> +	unsigned char *dst;
> +	int err = -EAGAIN;
> +
> +	if (conn->suspend_tx)
> +		return 0;
> +
> +	if (data_seg->data && ((datalen + padlen) < copymax))
> +		copylen = hdrlen + datalen + padlen;
> +	else
> +		copylen = hdrlen;
> +
> +	/* supports max. 16K pdus, so one skb is enough to hold all the data */
> +	skb = alloc_skb(TX_HEADER_LEN + copylen, GFP_ATOMIC);
> +	if (!skb)
> +		return -EAGAIN;
> +
> +	skb_reserve(skb, TX_HEADER_LEN);
> +	skb_put(skb, copylen);
> +	dst = skb->data;
> +
> +	tx_skb_setmode(skb, conn->hdrdgst_en, datalen ? conn->datadgst_en : 0);
> +
> +	memcpy(dst, hdr_seg->data, hdrlen);
> +	dst += hdrlen;
> +
> +	if (!datalen)
> +		goto send_pdu;
> +
> +	if (data_seg->data) {
> +		/* data is in a linear buffer */
> +		if (copylen > hdrlen) {
> +			/* data fits in the skb's headroom */
> +			memcpy(dst, data_seg->data, datalen);
> +			dst += datalen;
> +			if (padlen)
> +				memset(dst, 0, padlen);
> +		} else {
> +			unsigned int offset = 0;
> +			while (datalen) {
> +				struct page *page =
> +				    alloc_pages(GFP_ATOMIC, 0);
> +				int idx = skb_shinfo(skb)->nr_frags;
> +				skb_frag_t *frag = &skb_shinfo(skb)->frags[idx];
> +
> +				if (!page)
> +					goto free_skb;
> +
> +				frag->page = page;
> +				frag->page_offset = 0;
> +				if (datalen > PAGE_SIZE)
> +					frag->size = PAGE_SIZE;
> +				else
> +					frag->size = datalen;
> +				memcpy(page_address(page),
> +				       data_seg->data + offset, frag->size);
> +
> +				skb_shinfo(skb)->nr_frags++;
> +				datalen -= frag->size;
> +				offset += frag->size;
> +			}
> +		}
> +	} else {
> +		struct scatterlist *sg = data_seg->sg;
> +		unsigned int offset = data_seg->sg_offset;
> +		while (datalen) {
> +			int idx = skb_shinfo(skb)->nr_frags;
> +			skb_frag_t *frag = &skb_shinfo(skb)->frags[idx];
> +			struct page *pg = sg_page(sg);
> +
> +			get_page(pg);
> +			frag->page = pg;
> +			frag->page_offset = offset + sg->offset;
> +			frag->size = min(sg->length, datalen);
> +
> +			offset = 0;
> +			skb_shinfo(skb)->nr_frags++;
> +			datalen -= frag->size;
> +			sg = sg_next(sg);
> +		}
> +	}
> +
> +	if (skb_shinfo(skb)->nr_frags) {
> +		if (padlen) {
> +			int idx = skb_shinfo(skb)->nr_frags;
> +			skb_frag_t *frag = &skb_shinfo(skb)->frags[idx];
> +			frag->page = pad_page;
> +			frag->page_offset = 0;
> +			frag->size = padlen;
> +			skb_shinfo(skb)->nr_frags++;
> +		}
> +		datalen = data_seg->total_size + padlen;
> +		skb->data_len += datalen;
> +		skb->truesize += datalen;
> +		skb->len += datalen;
> +	}
> +
> +send_pdu:
> +	err = cxgb3i_c3cn_send_pdus((struct s3_conn *)tcp_conn->sock,
> +				    skb, MSG_DONTWAIT | MSG_NOSIGNAL);
> +	if (err > 0) {
> +		int pdulen = hdrlen + datalen + padlen;
> +		if (conn->hdrdgst_en)
> +			pdulen += ISCSI_DIGEST_SIZE;
> +		if (datalen && conn->datadgst_en)
> +			pdulen += ISCSI_DIGEST_SIZE;
> +
> +		hdr_seg->total_copied = hdr_seg->total_size;
> +		if (datalen)
> +			data_seg->total_copied = data_seg->total_size;
> +		conn->txdata_octets += pdulen;
> +		return pdulen;
> +	}
> +
> +free_skb:
> +	kfree_skb(skb);
> +	if (err < 0 && err != -EAGAIN) {
> +		cxgb3i_log_error("conn 0x%p, xmit err %d.\n", conn, err);
> +		iscsi_conn_failure(conn, ISCSI_ERR_CONN_FAILED);
> +		return err;
> +	}
> +	return -EAGAIN;
> +}
> +
> +int cxgb3i_ulp2_init(void)
> +{
> +	pad_page = alloc_page(GFP_KERNEL);
> +	if (!pad_page)
> +		return -ENOMEM;
> +	memset(page_address(pad_page), 0, PAGE_SIZE);
> +	cxgb3i_ddp_page_init();
> +	return 0;
> +}
> +
> +void cxgb3i_ulp2_cleanup(void)
> +{
> +	if (pad_page) {
> +		__free_page(pad_page);
> +		pad_page = NULL;
> +	}
> +}
> +
> +void cxgb3i_conn_pdu_ready(struct s3_conn *c3cn)
> +{
> +	struct sk_buff *skb;
> +	unsigned int read = 0;
> +	struct iscsi_conn *conn = c3cn->user_data;
> +	int err = 0;
> +
> +	cxgb3i_rx_debug("cn 0x%p.\n", c3cn);
> +
> +	read_lock(&c3cn->callback_lock);
> +	if (unlikely(!conn || conn->suspend_rx)) {
> +		cxgb3i_rx_debug("conn 0x%p, id %d, suspend_rx %d!\n", 
> +				conn, conn ? conn->id : 0xFF,
> +				conn ? conn->suspend_rx : 0xFF);
> +		read_unlock(&c3cn->callback_lock);
> +		return;
> +	}
> +	skb = skb_peek(&c3cn->receive_queue);
> +	while (!err && skb) {
> +		__skb_unlink(skb, &c3cn->receive_queue);
> +		read += skb_ulp_pdulen(skb);
> +		err = cxgb3i_conn_read_pdu_skb(conn, skb);
> +		__kfree_skb(skb);
> +		skb = skb_peek(&c3cn->receive_queue);
> +	}
> +	read_unlock(&c3cn->callback_lock);
> +	if (c3cn) {
> +		c3cn->copied_seq += read;
> +		cxgb3i_c3cn_rx_credits(c3cn, read);
> +	}
> +	conn->rxdata_octets += read;
> +
> +	if (err) {
> +		cxgb3i_log_info("conn 0x%p rx failed err %d.\n", conn, err);
> +		iscsi_conn_failure(conn, ISCSI_ERR_CONN_FAILED);
> +	}
> +}
> +
> +void cxgb3i_conn_tx_open(struct s3_conn *c3cn)
> +{
> +	struct iscsi_conn *conn = (struct iscsi_conn *)c3cn->user_data;
> +	struct iscsi_tcp_conn *tcp_conn;
> +	cxgb3i_tx_debug("cn 0x%p.\n", c3cn);
> +	if (conn) {
> +		cxgb3i_tx_debug("cn 0x%p, cid %d.\n", c3cn, conn->id);
> +		tcp_conn = conn->dd_data;
> +		scsi_queue_work(conn->session->host, &conn->xmitwork);
> +	}
> +}
> +
> +void cxgb3i_conn_closing(struct s3_conn *c3cn)
> +{
> +	struct iscsi_conn *conn;
> +	read_lock(&c3cn->callback_lock);
> +	conn = (struct iscsi_conn *)c3cn->user_data;
> +	if (conn && c3cn->state != C3CN_STATE_ESTABLISHED)
> +		iscsi_conn_failure(conn, ISCSI_ERR_CONN_FAILED);
> +	read_unlock(&c3cn->callback_lock);
> +}
> +
> +int cxgb3i_adapter_ulp_init(struct cxgb3i_adapter *snic)
> +{
> +	struct t3cdev *tdev = snic->tdev;
> +	struct cxgb3i_ddp_info *ddp = &snic->ddp;
> +	struct ulp_iscsi_info uinfo;
> +	unsigned int ppmax, bits, max_bits;
> +	int i, err;
> +
> +	spin_lock_init(&ddp->map_lock);
> +
> +	err = tdev->ctl(tdev, ULP_ISCSI_GET_PARAMS, &uinfo);
> +	if (err < 0) {
> +		cxgb3i_log_error("%s, failed to get iscsi param err=%d.\n",
> +				 tdev->name, err);
> +		return err;
> +	}
> +
> +	ppmax = (uinfo.ulimit - uinfo.llimit + 1) >> PPOD_SIZE_SHIFT;
> +	max_bits = min(PPOD_IDX_MAX_SIZE,
> +		       (32 - sw_tag_idx_bits - sw_tag_age_bits));
> +	bits = __ilog2_u32(ppmax) + 1;
> +	if (bits > max_bits)
> +		bits = max_bits;
> +	ppmax = (1 << bits) - 1;
> +
> +	snic->tx_max_size = uinfo.max_txsz;
> +	snic->rx_max_size = uinfo.max_rxsz;
> +	cxgb3i_log_debug("snic tx %u, rx %u.\n", snic->tx_max_size,
> +			 snic->rx_max_size);
> +	snic->tag_format.idx_bits = sw_tag_idx_bits;
> +	snic->tag_format.age_bits = sw_tag_age_bits;
> +	snic->tag_format.rsvd_bits = bits;
> +	snic->tag_format.rsvd_shift = PPOD_IDX_SHIFT;
> +	snic->tag_format.rsvd_mask = (1 << snic->tag_format.rsvd_bits) - 1;
> +
> +	cxgb3i_log_debug("snic nppods %u, rsvd shift %u, bits %u, mask 0x%x.\n",
> +			 ppmax, snic->tag_format.rsvd_shift,
> +			 snic->tag_format.rsvd_bits,
> +			 snic->tag_format.rsvd_mask);
> +
> +	ddp->map = cxgb3i_alloc_big_mem(ppmax);
> +	if (!ddp->map) {
> +		cxgb3i_log_warn("snic unable to alloc ddp ppod 0x%u, "
> +				"ddp disabled.\n", ppmax);
> +		return 0;
> +	}
> +	ddp->llimit = uinfo.llimit;
> +	ddp->ulimit = uinfo.ulimit;
> +
> +	uinfo.tagmask =
> +	    snic->tag_format.rsvd_mask << snic->tag_format.rsvd_shift;
> +	for (i = 0; i < ULP2_PGIDX_MAX; i++)
> +		uinfo.pgsz_factor[i] = ddp_page_order[i];
> +
> +	err = tdev->ctl(tdev, ULP_ISCSI_SET_PARAMS, &uinfo);
> +	if (err < 0) {
> +		cxgb3i_log_warn("snic unable to set iscsi param err=%d, "
> +				"ddp disabled.\n", err);
> +		goto free_ppod_map;
> +	}
> +
> +	ddp->nppods = ppmax;
> +	ddp->idx_last = ppmax;
> +
> +	tdev->ulp_iscsi = ddp;
> +
> +	return 0;
> +
> +free_ppod_map:
> +	cxgb3i_free_big_mem(ddp->map);
> +	return 0;
> +}
> +
> +void cxgb3i_adapter_ulp_cleanup(struct cxgb3i_adapter *snic)
> +{
> +	u8 *map = snic->ddp.map;
> +	if (map) {
> +		snic->tdev->ulp_iscsi = NULL;
> +		spin_lock(&snic->lock);
> +		snic->ddp.map = NULL;
> +		spin_unlock(&snic->lock);
> +		cxgb3i_free_big_mem(map);
> +	}
> +}
> diff --git a/drivers/scsi/cxgb3i/cxgb3i_ulp2.h b/drivers/scsi/cxgb3i/cxgb3i_ulp2.h
> new file mode 100644
> index 0000000..e3f46dc
> --- /dev/null
> +++ b/drivers/scsi/cxgb3i/cxgb3i_ulp2.h
> @@ -0,0 +1,106 @@
> +/*
> + * cxgb3i_ulp2.h: Chelsio S3xx iSCSI driver.
> + *
> + * Copyright (c) 2008 Chelsio Communications, Inc.
> + *
> + * This program is free software; you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License as published by
> + * the Free Software Foundation.
> + *
> + * Written by: Karen Xie (kxie@...lsio.com)
> + */
> +
> +#ifndef __CXGB3I_ULP2_H__
> +#define __CXGB3I_ULP2_H__
> +
> +#define ULP2_PDU_PAYLOAD_DFLT	(16224 - ISCSI_PDU_HEADER_MAX)
> +#define PPOD_PAGES_MAX		4
> +#define PPOD_PAGES_SHIFT	2	/* 4 pages per pod */
> +
> +struct pagepod_hdr {
> +	u32 vld_tid;
> +	u32 pgsz_tag_clr;
> +	u32 maxoffset;
> +	u32 pgoffset;
> +	u64 rsvd;
> +};
> +
> +struct pagepod {
> +	struct pagepod_hdr hdr;
> +	u64 addr[PPOD_PAGES_MAX + 1];
> +};
> +
> +#define PPOD_SIZE		sizeof(struct pagepod)	/* 64 */
> +#define PPOD_SIZE_SHIFT		6
> +
> +#define PPOD_COLOR_SHIFT	0
> +#define PPOD_COLOR_SIZE		6
> +#define PPOD_COLOR_MASK		((1 << PPOD_COLOR_SIZE) - 1)
> +
> +#define PPOD_IDX_SHIFT		PPOD_COLOR_SIZE
> +#define PPOD_IDX_MAX_SIZE	24
> +
> +#define S_PPOD_TID    0
> +#define M_PPOD_TID    0xFFFFFF
> +#define V_PPOD_TID(x) ((x) << S_PPOD_TID)
> +
> +#define S_PPOD_VALID    24
> +#define V_PPOD_VALID(x) ((x) << S_PPOD_VALID)
> +#define F_PPOD_VALID    V_PPOD_VALID(1U)
> +
> +#define S_PPOD_COLOR    0
> +#define M_PPOD_COLOR    0x3F
> +#define V_PPOD_COLOR(x) ((x) << S_PPOD_COLOR)
> +
> +#define S_PPOD_TAG    6
> +#define M_PPOD_TAG    0xFFFFFF
> +#define V_PPOD_TAG(x) ((x) << S_PPOD_TAG)
> +
> +#define S_PPOD_PGSZ    30
> +#define M_PPOD_PGSZ    0x3
> +#define V_PPOD_PGSZ(x) ((x) << S_PPOD_PGSZ)
> +
> +struct cpl_iscsi_hdr_norss {
> +	union opcode_tid ot;
> +	u16 pdu_len_ddp;
> +	u16 len;
> +	u32 seq;
> +	u16 urg;
> +	u8 rsvd;
> +	u8 status;
> +};
> +
> +struct cpl_rx_data_ddp_norss {
> +	union opcode_tid ot;
> +	u16 urg;
> +	u16 len;
> +	u32 seq;
> +	u32 nxt_seq;
> +	u32 ulp_crc;
> +	u32 ddp_status;
> +};
> +
> +#define RX_DDP_STATUS_IPP_SHIFT		27	/* invalid pagepod */
> +#define RX_DDP_STATUS_TID_SHIFT		26	/* tid mismatch */
> +#define RX_DDP_STATUS_COLOR_SHIFT	25	/* color mismatch */
> +#define RX_DDP_STATUS_OFFSET_SHIFT	24	/* offset mismatch */
> +#define RX_DDP_STATUS_ULIMIT_SHIFT	23	/* ulimit error */
> +#define RX_DDP_STATUS_TAG_SHIFT		22	/* tag mismatch */
> +#define RX_DDP_STATUS_DCRC_SHIFT	21	/* dcrc error */
> +#define RX_DDP_STATUS_HCRC_SHIFT	20	/* hcrc error */
> +#define RX_DDP_STATUS_PAD_SHIFT		19	/* pad error */
> +#define RX_DDP_STATUS_PPP_SHIFT		18	/* pagepod parity error */
> +#define RX_DDP_STATUS_LLIMIT_SHIFT	17	/* llimit error */
> +#define RX_DDP_STATUS_DDP_SHIFT		16	/* ddp'able */
> +#define RX_DDP_STATUS_PMM_SHIFT		15	/* pagepod mismatch */
> +
> +#define ULP2_FLAG_DATA_READY		0x1
> +#define ULP2_FLAG_DATA_DDPED		0x2
> +#define ULP2_FLAG_HCRC_ERROR		0x10
> +#define ULP2_FLAG_DCRC_ERROR		0x20
> +#define ULP2_FLAG_PAD_ERROR		0x40
> +
> +void cxgb3i_conn_closing(struct s3_conn *);
> +void cxgb3i_conn_pdu_ready(struct s3_conn *c3cn);
> +void cxgb3i_conn_tx_open(struct s3_conn *c3cn);
> +#endif
> 

Is the HW bidi ready in theory. I mean, does the firmware
do it's own iscsi parsing and will confuse on iscsi-bidi commands?

Will you, please, be willing to test an OSD stack on trough this card
to test if BIDI and VARLEN works.
What you will need is to set up an OSD target on one machine and set up
my OSD Initiator on this machine. (I'll send you all the instructions)

Or alternatively you could send me a card and some documentation and I
can set it up here and do the testing and debugging.

And one last personal request. Please don't send the RFC as one big patch. 
Send it as small reviewable patches, just for the practical reason of when
editing/reading the reply, it is too long and hard to find.

Thanks
Boaz
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ