lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [thread-next>] [day] [month] [year] [list]
Message-ID: <20081007030540.782.28987.stgit@gitlost.lost>
Date:	Mon, 06 Oct 2008 20:05:45 -0700
From:	Jeff Kirsher <jeffrey.t.kirsher@...el.com>
To:	jeff@...zik.org, davem@...emloft.net
Cc:	netdev@...r.kernel.org, peter.p.waskiewicz.jr@...el.com,
	Alexander Duyck <alexander.h.duyck@...el.com>,
	Jeff Kirsher <jeffrey.t.kirsher@...el.com>
Subject: [NET-NEXT PATCH 1/6] ixgbe: this patch adds support for DCB to the
	kernel and ixgbe driver

From: Alexander Duyck <alexander.h.duyck@...el.com>

Signed-off-by: Alexander Duyck <alexander.h.duyck@...el.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@...el.com>
---

 drivers/net/ixgbe/Makefile          |    3 
 drivers/net/ixgbe/ixgbe.h           |   26 +
 drivers/net/ixgbe/ixgbe_dcb.c       |  332 +++++++++++++++++
 drivers/net/ixgbe/ixgbe_dcb.h       |  158 ++++++++
 drivers/net/ixgbe/ixgbe_dcb_82598.c |  398 ++++++++++++++++++++
 drivers/net/ixgbe/ixgbe_dcb_82598.h |   94 +++++
 drivers/net/ixgbe/ixgbe_dcb_nl.c    |  352 ++++++++++++++++++
 drivers/net/ixgbe/ixgbe_ethtool.c   |   37 ++
 drivers/net/ixgbe/ixgbe_main.c      |  189 +++++++++-
 include/linux/dcbnl.h               |  230 ++++++++++++
 include/linux/netdevice.h           |    8 
 include/linux/rtnetlink.h           |    5 
 include/net/dcbnl.h                 |   44 ++
 net/Kconfig                         |    1 
 net/Makefile                        |    3 
 net/dcb/Kconfig                     |   12 +
 net/dcb/Makefile                    |    1 
 net/dcb/dcbnl.c                     |  682 +++++++++++++++++++++++++++++++++++
 18 files changed, 2555 insertions(+), 20 deletions(-)
 create mode 100644 drivers/net/ixgbe/ixgbe_dcb.c
 create mode 100644 drivers/net/ixgbe/ixgbe_dcb.h
 create mode 100644 drivers/net/ixgbe/ixgbe_dcb_82598.c
 create mode 100644 drivers/net/ixgbe/ixgbe_dcb_82598.h
 create mode 100644 drivers/net/ixgbe/ixgbe_dcb_nl.c
 create mode 100644 include/linux/dcbnl.h
 create mode 100644 include/net/dcbnl.h
 create mode 100644 net/dcb/Kconfig
 create mode 100644 net/dcb/Makefile
 create mode 100644 net/dcb/dcbnl.c

diff --git a/drivers/net/ixgbe/Makefile b/drivers/net/ixgbe/Makefile
index ccd83d9..2a45fa0 100644
--- a/drivers/net/ixgbe/Makefile
+++ b/drivers/net/ixgbe/Makefile
@@ -33,4 +33,5 @@
 obj-$(CONFIG_IXGBE) += ixgbe.o
 
 ixgbe-objs := ixgbe_main.o ixgbe_common.o ixgbe_ethtool.o \
-              ixgbe_82598.o ixgbe_phy.o
+              ixgbe_82598.o ixgbe_phy.o ixgbe_dcb.o ixgbe_dcb_82598.o \
+              ixgbe_dcb_nl.o
diff --git a/drivers/net/ixgbe/ixgbe.h b/drivers/net/ixgbe/ixgbe.h
index 2198b77..b21a8c9 100644
--- a/drivers/net/ixgbe/ixgbe.h
+++ b/drivers/net/ixgbe/ixgbe.h
@@ -35,6 +35,7 @@
 
 #include "ixgbe_type.h"
 #include "ixgbe_common.h"
+#include "ixgbe_dcb.h"
 
 #if defined(CONFIG_DCA) || defined(CONFIG_DCA_MODULE)
 #include <linux/dca.h>
@@ -84,6 +85,7 @@
 #define IXGBE_TX_FLAGS_TSO		(u32)(1 << 2)
 #define IXGBE_TX_FLAGS_IPV4		(u32)(1 << 3)
 #define IXGBE_TX_FLAGS_VLAN_MASK	0xffff0000
+#define IXGBE_TX_FLAGS_VLAN_PRIO_MASK	0x0000e000
 #define IXGBE_TX_FLAGS_VLAN_SHIFT	16
 
 #define IXGBE_MAX_LRO_DESCRIPTORS       8
@@ -134,7 +136,7 @@ struct ixgbe_ring {
 
 	u16 reg_idx; /* holds the special value that gets the hardware register
 		      * offset associated with this ring, which is different
-		      * for DCE and RSS modes */
+		      * for DCB and RSS modes */
 
 #if defined(CONFIG_DCA) || defined(CONFIG_DCA_MODULE)
 	/* cpu for tx queue */
@@ -152,8 +154,10 @@ struct ixgbe_ring {
 	u16 rx_buf_len;
 };
 
+#define RING_F_DCB  0
 #define RING_F_VMDQ 1
 #define RING_F_RSS  2
+#define IXGBE_MAX_DCB_INDICES   8
 #define IXGBE_MAX_RSS_INDICES  16
 #define IXGBE_MAX_VMDQ_INDICES 16
 struct ixgbe_ring_feature {
@@ -164,6 +168,10 @@ struct ixgbe_ring_feature {
 #define MAX_RX_QUEUES 64
 #define MAX_TX_QUEUES 32
 
+#define MAX_RX_PACKET_BUFFERS ((adapter->flags & IXGBE_FLAG_DCB_ENABLED) \
+			       ? 8 : 1)
+#define MAX_TX_PACKET_BUFFERS MAX_RX_PACKET_BUFFERS
+
 /* MAX_MSIX_Q_VECTORS of these are allocated,
  * but we only use one per queue-specific vector.
  */
@@ -215,6 +223,9 @@ struct ixgbe_adapter {
 	struct work_struct reset_task;
 	struct ixgbe_q_vector q_vector[MAX_MSIX_Q_VECTORS];
 	char name[MAX_MSIX_COUNT][IFNAMSIZ + 5];
+	struct ixgbe_dcb_config dcb_cfg;
+	struct ixgbe_dcb_config temp_dcb_cfg;
+	u8 dcb_set_bitmap;
 
 	/* Interrupt Throttle Rate */
 	u32 itr_setting;
@@ -269,6 +280,7 @@ struct ixgbe_adapter {
 #define IXGBE_FLAG_VMDQ_ENABLED                 (u32)(1 << 19)
 #define IXGBE_FLAG_NEED_LINK_UPDATE             (u32)(1 << 22)
 #define IXGBE_FLAG_IN_WATCHDOG_TASK             (u32)(1 << 23)
+#define IXGBE_FLAG_DCB_ENABLED                  (u32)(1 << 24)
 
 /* default to trying for four seconds */
 #define IXGBE_TRY_LINK_TIMEOUT (4 * HZ)
@@ -312,6 +324,13 @@ enum ixgbe_boards {
 };
 
 extern struct ixgbe_info ixgbe_82598_info;
+#ifdef CONFIG_DCBNL
+extern struct dcbnl_rtnl_ops dcbnl_ops;
+extern int ixgbe_copy_dcb_cfg(struct ixgbe_dcb_config *src_dcb_cfg,
+                              struct ixgbe_dcb_config *dst_dcb_cfg, int tc_max);
+#endif
+
+
 
 extern char ixgbe_driver_name[];
 extern const char ixgbe_driver_version[];
@@ -326,5 +345,8 @@ extern int ixgbe_setup_tx_resources(struct ixgbe_adapter *, struct ixgbe_ring *)
 extern void ixgbe_free_rx_resources(struct ixgbe_adapter *, struct ixgbe_ring *);
 extern void ixgbe_free_tx_resources(struct ixgbe_adapter *, struct ixgbe_ring *);
 extern void ixgbe_update_stats(struct ixgbe_adapter *adapter);
-
+extern void ixgbe_reset_interrupt_capability(struct ixgbe_adapter *adapter);
+extern int ixgbe_init_interrupt_scheme(struct ixgbe_adapter *adapter);
+void ixgbe_napi_add_all(struct ixgbe_adapter *adapter);
+void ixgbe_napi_del_all(struct ixgbe_adapter *adapter);
 #endif /* _IXGBE_H_ */
diff --git a/drivers/net/ixgbe/ixgbe_dcb.c b/drivers/net/ixgbe/ixgbe_dcb.c
new file mode 100644
index 0000000..e2e28ac
--- /dev/null
+++ b/drivers/net/ixgbe/ixgbe_dcb.c
@@ -0,0 +1,332 @@
+/*******************************************************************************
+
+  Intel 10 Gigabit PCI Express Linux driver
+  Copyright(c) 1999 - 2007 Intel Corporation.
+
+  This program is free software; you can redistribute it and/or modify it
+  under the terms and conditions of the GNU General Public License,
+  version 2, as published by the Free Software Foundation.
+
+  This program is distributed in the hope it will be useful, but WITHOUT
+  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+  more details.
+
+  You should have received a copy of the GNU General Public License along with
+  this program; if not, write to the Free Software Foundation, Inc.,
+  51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+
+  The full GNU General Public License is included in this distribution in
+  the file called "COPYING".
+
+  Contact Information:
+  Linux NICS <linux.nics@...el.com>
+  e1000-devel Mailing List <e1000-devel@...ts.sourceforge.net>
+  Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
+
+*******************************************************************************/
+
+
+#include "ixgbe.h"
+#include "ixgbe_type.h"
+#include "ixgbe_dcb.h"
+#include "ixgbe_dcb_82598.h"
+
+/**
+ * ixgbe_dcb_config - Struct containing DCB settings.
+ * @dcb_config: Pointer to DCB config structure
+ *
+ * This function checks DCB rules for DCB settings.
+ * The following rules are checked:
+ * 1. The sum of bandwidth percentages of all Bandwidth Groups must total 100%.
+ * 2. The sum of bandwidth percentages of all Traffic Classes within a Bandwidth
+ *    Group must total 100.
+ * 3. A Traffic Class should not be set to both Link Strict Priority
+ *    and Group Strict Priority.
+ * 4. Link strict Bandwidth Groups can only have link strict traffic classes
+ *    with zero bandwidth.
+ */
+s32 ixgbe_dcb_check_config(struct ixgbe_dcb_config *dcb_config)
+{
+	struct tc_bw_alloc *p;
+	s32 ret_val = 0;
+	u8 i, j, bw = 0, bw_id;
+	u8 bw_sum[2][MAX_BW_GROUP];
+	bool link_strict[2][MAX_BW_GROUP];
+
+	memset(bw_sum, 0, sizeof(bw_sum));
+	memset(link_strict, 0, sizeof(link_strict));
+
+	/* First Tx, then Rx */
+	for (i = 0; i < 2; i++) {
+		/* Check each traffic class for rule violation */
+		for (j = 0; j < MAX_TRAFFIC_CLASS; j++) {
+			p = &dcb_config->tc_config[j].path[i];
+
+			bw = p->bwg_percent;
+			bw_id = p->bwg_id;
+
+			if (bw_id >= MAX_BW_GROUP) {
+				ret_val = DCB_ERR_CONFIG;
+				goto err_config;
+			}
+			if (p->prio_type == prio_link) {
+				link_strict[i][bw_id] = true;
+				/* Link strict should have zero bandwidth */
+				if (bw) {
+					ret_val = DCB_ERR_LS_BW_NONZERO;
+					goto err_config;
+				}
+			} else if (!bw) {
+				/*
+				 * Traffic classes without link strict
+				 * should have non-zero bandwidth.
+				 */
+				ret_val = DCB_ERR_TC_BW_ZERO;
+				goto err_config;
+			}
+			bw_sum[i][bw_id] += bw;
+		}
+
+		bw = 0;
+
+		/* Check each bandwidth group for rule violation */
+		for (j = 0; j < MAX_BW_GROUP; j++) {
+			bw += dcb_config->bw_percentage[i][j];
+			/*
+			 * Sum of bandwidth percentages of all traffic classes
+			 * within a Bandwidth Group must total 100 except for
+			 * link strict group (zero bandwidth).
+			 */
+			if (link_strict[i][j]) {
+				if (bw_sum[i][j]) {
+					/*
+					 * Link strict group should have zero
+					 * bandwidth.
+					 */
+					ret_val = DCB_ERR_LS_BWG_NONZERO;
+					goto err_config;
+				}
+			} else if (bw_sum[i][j] != BW_PERCENT &&
+				   bw_sum[i][j] != 0) {
+				ret_val = DCB_ERR_TC_BW;
+				goto err_config;
+			}
+		}
+
+		if (bw != BW_PERCENT) {
+			ret_val = DCB_ERR_BW_GROUP;
+			goto err_config;
+		}
+	}
+
+err_config:
+	return ret_val;
+}
+
+/**
+ * ixgbe_dcb_calculate_tc_credits - Calculates traffic class credits
+ * @ixgbe_dcb_config: Struct containing DCB settings.
+ * @direction: Configuring either Tx or Rx.
+ *
+ * This function calculates the credits allocated to each traffic class.
+ * It should be called only after the rules are checked by
+ * ixgbe_dcb_check_config().
+ */
+s32 ixgbe_dcb_calculate_tc_credits(struct ixgbe_dcb_config *dcb_config,
+                                   u8 direction)
+{
+	struct tc_bw_alloc *p;
+	s32 ret_val = 0;
+	/* Initialization values default for Tx settings */
+	u32 credit_refill       = 0;
+	u32 credit_max          = 0;
+	u16 link_percentage     = 0;
+	u8  bw_percent          = 0;
+	u8  i;
+
+	if (dcb_config == NULL) {
+		ret_val = DCB_ERR_CONFIG;
+		goto out;
+	}
+
+	/* Find out the link percentage for each TC first */
+	for (i = 0; i < MAX_TRAFFIC_CLASS; i++) {
+		p = &dcb_config->tc_config[i].path[direction];
+		bw_percent = dcb_config->bw_percentage[direction][p->bwg_id];
+
+		link_percentage = p->bwg_percent;
+		/* Must be careful of integer division for very small nums */
+		link_percentage = (link_percentage * bw_percent) / 100;
+		if (p->bwg_percent > 0 && link_percentage == 0)
+			link_percentage = 1;
+
+		/* Save link_percentage for reference */
+		p->link_percent = (u8)link_percentage;
+
+		/* Calculate credit refill and save it */
+		credit_refill = link_percentage * MINIMUM_CREDIT_REFILL;
+		p->data_credits_refill = (u16)credit_refill;
+
+		/* Calculate maximum credit for the TC */
+		credit_max = (link_percentage * MAX_CREDIT) / 100;
+
+		/*
+		 * Adjustment based on rule checking, if the percentage
+		 * of a TC is too small, the maximum credit may not be
+		 * enough to send out a jumbo frame in data plane arbitration.
+		 */
+		if (credit_max && (credit_max < MINIMUM_CREDIT_FOR_JUMBO))
+			credit_max = MINIMUM_CREDIT_FOR_JUMBO;
+
+		if (direction == DCB_TX_CONFIG) {
+			/*
+			 * Adjustment based on rule checking, if the
+			 * percentage of a TC is too small, the maximum
+			 * credit may not be enough to send out a TSO
+			 * packet in descriptor plane arbitration.
+			 */
+			if (credit_max &&
+			    (credit_max < MINIMUM_CREDIT_FOR_TSO))
+				credit_max = MINIMUM_CREDIT_FOR_TSO;
+
+			dcb_config->tc_config[i].desc_credits_max =
+				(u16)credit_max;
+		}
+
+		p->data_credits_max = (u16)credit_max;
+	}
+
+out:
+	return ret_val;
+}
+
+/**
+ * ixgbe_dcb_get_tc_stats - Returns status of each traffic class
+ * @hw: pointer to hardware structure
+ * @stats: pointer to statistics structure
+ * @tc_count:  Number of elements in bwg_array.
+ *
+ * This function returns the status data for each of the Traffic Classes in use.
+ */
+s32 ixgbe_dcb_get_tc_stats(struct ixgbe_hw *hw, struct ixgbe_hw_stats *stats,
+                           u8 tc_count)
+{
+	s32 ret = 0;
+	if (hw->mac.type == ixgbe_mac_82598EB)
+		ret = ixgbe_dcb_get_tc_stats_82598(hw, stats, tc_count);
+	return ret;
+}
+
+/**
+ * ixgbe_dcb_get_pfc_stats - Returns CBFC status of each traffic class
+ * hw - pointer to hardware structure
+ * stats - pointer to statistics structure
+ * tc_count -  Number of elements in bwg_array.
+ *
+ * This function returns the CBFC status data for each of the Traffic Classes.
+ */
+s32 ixgbe_dcb_get_pfc_stats(struct ixgbe_hw *hw, struct ixgbe_hw_stats *stats,
+                            u8 tc_count)
+{
+	s32 ret = 0;
+	if (hw->mac.type == ixgbe_mac_82598EB)
+		ret = ixgbe_dcb_get_pfc_stats_82598(hw, stats, tc_count);
+	return ret;
+}
+
+/**
+ * ixgbe_dcb_config_rx_arbiter - Config Rx arbiter
+ * @hw: pointer to hardware structure
+ * @dcb_config: pointer to ixgbe_dcb_config structure
+ *
+ * Configure Rx Data Arbiter and credits for each traffic class.
+ */
+s32 ixgbe_dcb_config_rx_arbiter(struct ixgbe_hw *hw,
+                                struct ixgbe_dcb_config *dcb_config)
+{
+	s32 ret = 0;
+	if (hw->mac.type == ixgbe_mac_82598EB)
+		ret = ixgbe_dcb_config_rx_arbiter_82598(hw, dcb_config);
+	return ret;
+}
+
+/**
+ * ixgbe_dcb_config_tx_desc_arbiter - Config Tx Desc arbiter
+ * @hw: pointer to hardware structure
+ * @dcb_config: pointer to ixgbe_dcb_config structure
+ *
+ * Configure Tx Descriptor Arbiter and credits for each traffic class.
+ */
+s32 ixgbe_dcb_config_tx_desc_arbiter(struct ixgbe_hw *hw,
+                                     struct ixgbe_dcb_config *dcb_config)
+{
+	s32 ret = 0;
+	if (hw->mac.type == ixgbe_mac_82598EB)
+		ret = ixgbe_dcb_config_tx_desc_arbiter_82598(hw, dcb_config);
+	return ret;
+}
+
+/**
+ * ixgbe_dcb_config_tx_data_arbiter - Config Tx data arbiter
+ * @hw: pointer to hardware structure
+ * @dcb_config: pointer to ixgbe_dcb_config structure
+ *
+ * Configure Tx Data Arbiter and credits for each traffic class.
+ */
+s32 ixgbe_dcb_config_tx_data_arbiter(struct ixgbe_hw *hw,
+                                     struct ixgbe_dcb_config *dcb_config)
+{
+	s32 ret = 0;
+	if (hw->mac.type == ixgbe_mac_82598EB)
+		ret = ixgbe_dcb_config_tx_data_arbiter_82598(hw, dcb_config);
+	return ret;
+}
+
+/**
+ * ixgbe_dcb_config_pfc - Config priority flow control
+ * @hw: pointer to hardware structure
+ * @dcb_config: pointer to ixgbe_dcb_config structure
+ *
+ * Configure Priority Flow Control for each traffic class.
+ */
+s32 ixgbe_dcb_config_pfc(struct ixgbe_hw *hw,
+                         struct ixgbe_dcb_config *dcb_config)
+{
+	s32 ret = 0;
+	if (hw->mac.type == ixgbe_mac_82598EB)
+		ret = ixgbe_dcb_config_pfc_82598(hw, dcb_config);
+	return ret;
+}
+
+/**
+ * ixgbe_dcb_config_tc_stats - Config traffic class statistics
+ * @hw: pointer to hardware structure
+ *
+ * Configure queue statistics registers, all queues belonging to same traffic
+ * class uses a single set of queue statistics counters.
+ */
+s32 ixgbe_dcb_config_tc_stats(struct ixgbe_hw *hw)
+{
+	s32 ret = 0;
+	if (hw->mac.type == ixgbe_mac_82598EB)
+		ret = ixgbe_dcb_config_tc_stats_82598(hw);
+	return ret;
+}
+
+/**
+ * ixgbe_dcb_hw_config - Config and enable DCB
+ * @hw: pointer to hardware structure
+ * @dcb_config: pointer to ixgbe_dcb_config structure
+ *
+ * Configure dcb settings and enable dcb mode.
+ */
+s32 ixgbe_dcb_hw_config(struct ixgbe_hw *hw,
+                        struct ixgbe_dcb_config *dcb_config)
+{
+	s32 ret = 0;
+	if (hw->mac.type == ixgbe_mac_82598EB)
+		ret = ixgbe_dcb_hw_config_82598(hw, dcb_config);
+	return ret;
+}
+
diff --git a/drivers/net/ixgbe/ixgbe_dcb.h b/drivers/net/ixgbe/ixgbe_dcb.h
new file mode 100644
index 0000000..bc05c63
--- /dev/null
+++ b/drivers/net/ixgbe/ixgbe_dcb.h
@@ -0,0 +1,158 @@
+/*******************************************************************************
+
+  Intel 10 Gigabit PCI Express Linux driver
+  Copyright(c) 1999 - 2007 Intel Corporation.
+
+  This program is free software; you can redistribute it and/or modify it
+  under the terms and conditions of the GNU General Public License,
+  version 2, as published by the Free Software Foundation.
+
+  This program is distributed in the hope it will be useful, but WITHOUT
+  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+  more details.
+
+  You should have received a copy of the GNU General Public License along with
+  this program; if not, write to the Free Software Foundation, Inc.,
+  51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+
+  The full GNU General Public License is included in this distribution in
+  the file called "COPYING".
+
+  Contact Information:
+  Linux NICS <linux.nics@...el.com>
+  e1000-devel Mailing List <e1000-devel@...ts.sourceforge.net>
+  Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
+
+*******************************************************************************/
+
+#ifndef _DCB_CONFIG_H_
+#define _DCB_CONFIG_H_
+
+#include "ixgbe_type.h"
+
+/* DCB data structures */
+
+#define IXGBE_MAX_PACKET_BUFFERS 8
+#define MAX_USER_PRIORITY        8
+#define MAX_TRAFFIC_CLASS        8
+#define MAX_BW_GROUP             8
+#define BW_PERCENT               100
+
+#define DCB_TX_CONFIG            0
+#define DCB_RX_CONFIG            1
+
+/* DCB error Codes */
+#define DCB_SUCCESS              0
+#define DCB_ERR_CONFIG           -1
+#define DCB_ERR_PARAM            -2
+
+/* Transmit and receive Errors */
+/* Error in bandwidth group allocation */
+#define DCB_ERR_BW_GROUP        -3
+/* Error in traffic class bandwidth allocation */
+#define DCB_ERR_TC_BW           -4
+/* Traffic class has both link strict and group strict enabled */
+#define DCB_ERR_LS_GS           -5
+/* Link strict traffic class has non zero bandwidth */
+#define DCB_ERR_LS_BW_NONZERO   -6
+/* Link strict bandwidth group has non zero bandwidth */
+#define DCB_ERR_LS_BWG_NONZERO  -7
+/*  Traffic class has zero bandwidth */
+#define DCB_ERR_TC_BW_ZERO      -8
+
+#define DCB_NOT_IMPLEMENTED      0x7FFFFFFF
+
+struct dcb_pfc_tc_debug {
+	u8  tc;
+	u8  pause_status;
+	u64 pause_quanta;
+};
+
+enum strict_prio_type {
+	prio_none = 0,
+	prio_group,
+	prio_link
+};
+
+/* Traffic class bandwidth allocation per direction */
+struct tc_bw_alloc {
+	u8 bwg_id;		  /* Bandwidth Group (BWG) ID */
+	u8 bwg_percent;		  /* % of BWG's bandwidth */
+	u8 link_percent;	  /* % of link bandwidth */
+	u8 up_to_tc_bitmap;	  /* User Priority to Traffic Class mapping */
+	u16 data_credits_refill;  /* Credit refill amount in 64B granularity */
+	u16 data_credits_max;	  /* Max credits for a configured packet buffer
+				   * in 64B granularity.*/
+	enum strict_prio_type prio_type; /* Link or Group Strict Priority */
+};
+
+enum dcb_pfc_type {
+	pfc_disabled = 0,
+	pfc_enabled_full,
+	pfc_enabled_tx,
+	pfc_enabled_rx
+};
+
+/* Traffic class configuration */
+struct tc_configuration {
+	struct tc_bw_alloc path[2]; /* One each for Tx/Rx */
+	enum dcb_pfc_type  dcb_pfc; /* Class based flow control setting */
+
+	u16 desc_credits_max; /* For Tx Descriptor arbitration */
+	u8 tc; /* Traffic class (TC) */
+};
+
+enum dcb_rx_pba_cfg {
+	pba_equal,     /* PBA[0-7] each use 64KB FIFO */
+	pba_80_48      /* PBA[0-3] each use 80KB, PBA[4-7] each use 48KB */
+};
+
+struct ixgbe_dcb_config {
+	struct tc_configuration tc_config[MAX_TRAFFIC_CLASS];
+	u8     bw_percentage[2][MAX_BW_GROUP]; /* One each for Tx/Rx */
+
+	bool  round_robin_enable;
+
+	enum dcb_rx_pba_cfg rx_pba_cfg;
+
+	u32  dcb_cfg_version; /* Not used...OS-specific? */
+	u32  link_speed; /* For bandwidth allocation validation purpose */
+};
+
+
+/* DCB driver APIs */
+
+/* DCB rule checking function.*/
+s32 ixgbe_dcb_check_config(struct ixgbe_dcb_config *config);
+
+/* DCB credits calculation */
+s32 ixgbe_dcb_calculate_tc_credits(struct ixgbe_dcb_config *, u8);
+
+/* DCB PFC functions */
+s32 ixgbe_dcb_config_pfc(struct ixgbe_hw *, struct ixgbe_dcb_config *g);
+s32 ixgbe_dcb_get_pfc_stats(struct ixgbe_hw *, struct ixgbe_hw_stats *, u8);
+
+/* DCB traffic class stats */
+s32 ixgbe_dcb_config_tc_stats(struct ixgbe_hw *);
+s32 ixgbe_dcb_get_tc_stats(struct ixgbe_hw *, struct ixgbe_hw_stats *, u8);
+
+/* DCB config arbiters */
+s32 ixgbe_dcb_config_tx_desc_arbiter(struct ixgbe_hw *,
+                                     struct ixgbe_dcb_config *);
+s32 ixgbe_dcb_config_tx_data_arbiter(struct ixgbe_hw *,
+                                     struct ixgbe_dcb_config *);
+s32 ixgbe_dcb_config_rx_arbiter(struct ixgbe_hw *, struct ixgbe_dcb_config *);
+
+/* DCB hw initialization */
+s32 ixgbe_dcb_hw_config(struct ixgbe_hw *, struct ixgbe_dcb_config *);
+
+/* DCB definitions for credit calculation */
+#define MAX_CREDIT_REFILL       511  /* 0x1FF * 64B = 32704B */
+#define MINIMUM_CREDIT_REFILL   5    /* 5*64B = 320B */
+#define MINIMUM_CREDIT_FOR_JUMBO 145  /* 145= UpperBound((9*1024+54)/64B) for 9KB jumbo frame */
+#define DCB_MAX_TSO_SIZE        (32*1024) /* MAX TSO packet size supported in DCB mode */
+#define MINIMUM_CREDIT_FOR_TSO  (DCB_MAX_TSO_SIZE/64 + 1) /* 513 for 32KB TSO packet */
+#define MAX_CREDIT              4095 /* Maximum credit supported: 256KB * 1204 / 64B */
+
+#endif /* _DCB_CONFIG_H */
diff --git a/drivers/net/ixgbe/ixgbe_dcb_82598.c b/drivers/net/ixgbe/ixgbe_dcb_82598.c
new file mode 100644
index 0000000..fce6867
--- /dev/null
+++ b/drivers/net/ixgbe/ixgbe_dcb_82598.c
@@ -0,0 +1,398 @@
+/*******************************************************************************
+
+  Intel 10 Gigabit PCI Express Linux driver
+  Copyright(c) 1999 - 2007 Intel Corporation.
+
+  This program is free software; you can redistribute it and/or modify it
+  under the terms and conditions of the GNU General Public License,
+  version 2, as published by the Free Software Foundation.
+
+  This program is distributed in the hope it will be useful, but WITHOUT
+  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+  more details.
+
+  You should have received a copy of the GNU General Public License along with
+  this program; if not, write to the Free Software Foundation, Inc.,
+  51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+
+  The full GNU General Public License is included in this distribution in
+  the file called "COPYING".
+
+  Contact Information:
+  Linux NICS <linux.nics@...el.com>
+  e1000-devel Mailing List <e1000-devel@...ts.sourceforge.net>
+  Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
+
+*******************************************************************************/
+
+#include "ixgbe.h"
+#include "ixgbe_type.h"
+#include "ixgbe_dcb.h"
+#include "ixgbe_dcb_82598.h"
+
+/**
+ * ixgbe_dcb_get_tc_stats_82598 - Return status data for each traffic class
+ * @hw: pointer to hardware structure
+ * @stats: pointer to statistics structure
+ * @tc_count:  Number of elements in bwg_array.
+ *
+ * This function returns the status data for each of the Traffic Classes in use.
+ */
+s32 ixgbe_dcb_get_tc_stats_82598(struct ixgbe_hw *hw,
+                                 struct ixgbe_hw_stats *stats,
+                                 u8 tc_count)
+{
+	int tc;
+
+	if (tc_count > MAX_TRAFFIC_CLASS)
+		return DCB_ERR_PARAM;
+
+	/* Statistics pertaining to each traffic class */
+	for (tc = 0; tc < tc_count; tc++) {
+		/* Transmitted Packets */
+		stats->qptc[tc] += IXGBE_READ_REG(hw, IXGBE_QPTC(tc));
+		/* Transmitted Bytes */
+		stats->qbtc[tc] += IXGBE_READ_REG(hw, IXGBE_QBTC(tc));
+		/* Received Packets */
+		stats->qprc[tc] += IXGBE_READ_REG(hw, IXGBE_QPRC(tc));
+		/* Received Bytes */
+		stats->qbrc[tc] += IXGBE_READ_REG(hw, IXGBE_QBRC(tc));
+	}
+
+	return 0;
+}
+
+/**
+ * ixgbe_dcb_get_pfc_stats_82598 - Returns CBFC status data
+ * @hw: pointer to hardware structure
+ * @stats: pointer to statistics structure
+ * @tc_count:  Number of elements in bwg_array.
+ *
+ * This function returns the CBFC status data for each of the Traffic Classes.
+ */
+s32 ixgbe_dcb_get_pfc_stats_82598(struct ixgbe_hw *hw,
+                                  struct ixgbe_hw_stats *stats,
+                                  u8 tc_count)
+{
+	int tc;
+
+	if (tc_count > MAX_TRAFFIC_CLASS)
+		return DCB_ERR_PARAM;
+
+	for (tc = 0; tc < tc_count; tc++) {
+		/* Priority XOFF Transmitted */
+		stats->pxofftxc[tc] += IXGBE_READ_REG(hw, IXGBE_PXOFFTXC(tc));
+		/* Priority XOFF Received */
+		stats->pxoffrxc[tc] += IXGBE_READ_REG(hw, IXGBE_PXOFFRXC(tc));
+	}
+
+	return 0;
+}
+
+/**
+ * ixgbe_dcb_config_packet_buffers_82598 - Configure packet buffers
+ * @hw: pointer to hardware structure
+ * @dcb_config: pointer to ixgbe_dcb_config structure
+ *
+ * Configure packet buffers for DCB mode.
+ */
+s32 ixgbe_dcb_config_packet_buffers_82598(struct ixgbe_hw *hw,
+                                          struct ixgbe_dcb_config *dcb_config)
+{
+	s32 ret_val = 0;
+	u32 value = IXGBE_RXPBSIZE_64KB;
+	u8  i = 0;
+
+	/* Setup Rx packet buffer sizes */
+	switch (dcb_config->rx_pba_cfg) {
+	case pba_80_48:
+		/* Setup the first four at 80KB */
+		value = IXGBE_RXPBSIZE_80KB;
+		for (; i < 4; i++)
+			IXGBE_WRITE_REG(hw, IXGBE_RXPBSIZE(i), value);
+		/* Setup the last four at 48KB...don't re-init i */
+		value = IXGBE_RXPBSIZE_48KB;
+		/* Fall Through */
+	case pba_equal:
+	default:
+		for (; i < IXGBE_MAX_PACKET_BUFFERS; i++)
+			IXGBE_WRITE_REG(hw, IXGBE_RXPBSIZE(i), value);
+
+		/* Setup Tx packet buffer sizes */
+		for (i = 0; i < IXGBE_MAX_PACKET_BUFFERS; i++) {
+			IXGBE_WRITE_REG(hw, IXGBE_TXPBSIZE(i),
+					IXGBE_TXPBSIZE_40KB);
+		}
+		break;
+	}
+
+	return ret_val;
+}
+
+/**
+ * ixgbe_dcb_config_rx_arbiter_82598 - Config Rx data arbiter
+ * @hw: pointer to hardware structure
+ * @dcb_config: pointer to ixgbe_dcb_config structure
+ *
+ * Configure Rx Data Arbiter and credits for each traffic class.
+ */
+s32 ixgbe_dcb_config_rx_arbiter_82598(struct ixgbe_hw *hw,
+                                      struct ixgbe_dcb_config *dcb_config)
+{
+	struct tc_bw_alloc    *p;
+	u32    reg           = 0;
+	u32    credit_refill = 0;
+	u32    credit_max    = 0;
+	u8     i             = 0;
+
+	reg = IXGBE_READ_REG(hw, IXGBE_RUPPBMR) | IXGBE_RUPPBMR_MQA;
+	IXGBE_WRITE_REG(hw, IXGBE_RUPPBMR, reg);
+
+	reg = IXGBE_READ_REG(hw, IXGBE_RMCS);
+	/* Enable Arbiter */
+	reg &= ~IXGBE_RMCS_ARBDIS;
+	/* Enable Receive Recycle within the BWG */
+	reg |= IXGBE_RMCS_RRM;
+	/* Enable Deficit Fixed Priority arbitration*/
+	reg |= IXGBE_RMCS_DFP;
+
+	IXGBE_WRITE_REG(hw, IXGBE_RMCS, reg);
+
+	/* Configure traffic class credits and priority */
+	for (i = 0; i < MAX_TRAFFIC_CLASS; i++) {
+		p = &dcb_config->tc_config[i].path[DCB_RX_CONFIG];
+		credit_refill = p->data_credits_refill;
+		credit_max    = p->data_credits_max;
+
+		reg = credit_refill | (credit_max << IXGBE_RT2CR_MCL_SHIFT);
+
+		if (p->prio_type == prio_link)
+			reg |= IXGBE_RT2CR_LSP;
+
+		IXGBE_WRITE_REG(hw, IXGBE_RT2CR(i), reg);
+	}
+
+	reg = IXGBE_READ_REG(hw, IXGBE_RDRXCTL);
+	reg |= IXGBE_RDRXCTL_RDMTS_1_2;
+	reg |= IXGBE_RDRXCTL_MPBEN;
+	reg |= IXGBE_RDRXCTL_MCEN;
+	IXGBE_WRITE_REG(hw, IXGBE_RDRXCTL, reg);
+
+	reg = IXGBE_READ_REG(hw, IXGBE_RXCTRL);
+	/* Make sure there is enough descriptors before arbitration */
+	reg &= ~IXGBE_RXCTRL_DMBYPS;
+	IXGBE_WRITE_REG(hw, IXGBE_RXCTRL, reg);
+
+	return 0;
+}
+
+/**
+ * ixgbe_dcb_config_tx_desc_arbiter_82598 - Config Tx Desc. arbiter
+ * @hw: pointer to hardware structure
+ * @dcb_config: pointer to ixgbe_dcb_config structure
+ *
+ * Configure Tx Descriptor Arbiter and credits for each traffic class.
+ */
+s32 ixgbe_dcb_config_tx_desc_arbiter_82598(struct ixgbe_hw *hw,
+                                           struct ixgbe_dcb_config *dcb_config)
+{
+	struct tc_bw_alloc *p;
+	u32    reg, max_credits;
+	u8     i;
+
+	reg = IXGBE_READ_REG(hw, IXGBE_DPMCS);
+
+	/* Enable arbiter */
+	reg &= ~IXGBE_DPMCS_ARBDIS;
+	if (!(dcb_config->round_robin_enable)) {
+		/* Enable DFP and Recycle mode */
+		reg |= (IXGBE_DPMCS_TDPAC | IXGBE_DPMCS_TRM);
+	}
+	reg |= IXGBE_DPMCS_TSOEF;
+	/* Configure Max TSO packet size 34KB including payload and headers */
+	reg |= (0x4 << IXGBE_DPMCS_MTSOS_SHIFT);
+
+	IXGBE_WRITE_REG(hw, IXGBE_DPMCS, reg);
+
+	/* Configure traffic class credits and priority */
+	for (i = 0; i < MAX_TRAFFIC_CLASS; i++) {
+		p = &dcb_config->tc_config[i].path[DCB_TX_CONFIG];
+		max_credits = dcb_config->tc_config[i].desc_credits_max;
+		reg = max_credits << IXGBE_TDTQ2TCCR_MCL_SHIFT;
+		reg |= p->data_credits_refill;
+		reg |= (u32)(p->bwg_id) << IXGBE_TDTQ2TCCR_BWG_SHIFT;
+
+		if (p->prio_type == prio_group)
+			reg |= IXGBE_TDTQ2TCCR_GSP;
+
+		if (p->prio_type == prio_link)
+			reg |= IXGBE_TDTQ2TCCR_LSP;
+
+		IXGBE_WRITE_REG(hw, IXGBE_TDTQ2TCCR(i), reg);
+	}
+
+	return 0;
+}
+
+/**
+ * ixgbe_dcb_config_tx_data_arbiter_82598 - Config Tx data arbiter
+ * @hw: pointer to hardware structure
+ * @dcb_config: pointer to ixgbe_dcb_config structure
+ *
+ * Configure Tx Data Arbiter and credits for each traffic class.
+ */
+s32 ixgbe_dcb_config_tx_data_arbiter_82598(struct ixgbe_hw *hw,
+                                           struct ixgbe_dcb_config *dcb_config)
+{
+	struct tc_bw_alloc *p;
+	u32 reg;
+	u8 i;
+
+	reg = IXGBE_READ_REG(hw, IXGBE_PDPMCS);
+	/* Enable Data Plane Arbiter */
+	reg &= ~IXGBE_PDPMCS_ARBDIS;
+	/* Enable DFP and Transmit Recycle Mode */
+	reg |= (IXGBE_PDPMCS_TPPAC | IXGBE_PDPMCS_TRM);
+
+	IXGBE_WRITE_REG(hw, IXGBE_PDPMCS, reg);
+
+	/* Configure traffic class credits and priority */
+	for (i = 0; i < MAX_TRAFFIC_CLASS; i++) {
+		p = &dcb_config->tc_config[i].path[DCB_TX_CONFIG];
+		reg = p->data_credits_refill;
+		reg |= (u32)(p->data_credits_max) << IXGBE_TDPT2TCCR_MCL_SHIFT;
+		reg |= (u32)(p->bwg_id) << IXGBE_TDPT2TCCR_BWG_SHIFT;
+
+		if (p->prio_type == prio_group)
+			reg |= IXGBE_TDPT2TCCR_GSP;
+
+		if (p->prio_type == prio_link)
+			reg |= IXGBE_TDPT2TCCR_LSP;
+
+		IXGBE_WRITE_REG(hw, IXGBE_TDPT2TCCR(i), reg);
+	}
+
+	/* Enable Tx packet buffer division */
+	reg = IXGBE_READ_REG(hw, IXGBE_DTXCTL);
+	reg |= IXGBE_DTXCTL_ENDBUBD;
+	IXGBE_WRITE_REG(hw, IXGBE_DTXCTL, reg);
+
+	return 0;
+}
+
+/**
+ * ixgbe_dcb_config_pfc_82598 - Config priority flow control
+ * @hw: pointer to hardware structure
+ * @dcb_config: pointer to ixgbe_dcb_config structure
+ *
+ * Configure Priority Flow Control for each traffic class.
+ */
+s32 ixgbe_dcb_config_pfc_82598(struct ixgbe_hw *hw,
+                               struct ixgbe_dcb_config *dcb_config)
+{
+	u32 reg, rx_pba_size;
+	u8  i;
+
+	/* Enable Transmit Priority Flow Control */
+	reg = IXGBE_READ_REG(hw, IXGBE_RMCS);
+	reg &= ~IXGBE_RMCS_TFCE_802_3X;
+	/* correct the reporting of our flow control status */
+	hw->fc.type = ixgbe_fc_none;
+	reg |= IXGBE_RMCS_TFCE_PRIORITY;
+	IXGBE_WRITE_REG(hw, IXGBE_RMCS, reg);
+
+	/* Enable Receive Priority Flow Control */
+	reg = IXGBE_READ_REG(hw, IXGBE_FCTRL);
+	reg &= ~IXGBE_FCTRL_RFCE;
+	reg |= IXGBE_FCTRL_RPFCE;
+	IXGBE_WRITE_REG(hw, IXGBE_FCTRL, reg);
+
+	/*
+	 * Configure flow control thresholds and enable priority flow control
+	 * for each traffic class.
+	 */
+	for (i = 0; i < MAX_TRAFFIC_CLASS; i++) {
+		if (dcb_config->rx_pba_cfg == pba_equal) {
+			rx_pba_size = IXGBE_RXPBSIZE_64KB;
+		} else {
+			rx_pba_size = (i < 4) ? IXGBE_RXPBSIZE_80KB
+					      : IXGBE_RXPBSIZE_48KB;
+		}
+
+		reg = ((rx_pba_size >> 5) &  0xFFF0);
+		if (dcb_config->tc_config[i].dcb_pfc == pfc_enabled_tx ||
+		    dcb_config->tc_config[i].dcb_pfc == pfc_enabled_full)
+			reg |= IXGBE_FCRTL_XONE;
+
+		IXGBE_WRITE_REG(hw, IXGBE_FCRTL(i), reg);
+
+		reg = ((rx_pba_size >> 2) & 0xFFF0);
+		if (dcb_config->tc_config[i].dcb_pfc == pfc_enabled_tx ||
+		    dcb_config->tc_config[i].dcb_pfc == pfc_enabled_full)
+			reg |= IXGBE_FCRTH_FCEN;
+
+		IXGBE_WRITE_REG(hw, IXGBE_FCRTH(i), reg);
+	}
+
+	/* Configure pause time */
+	for (i = 0; i < (MAX_TRAFFIC_CLASS >> 1); i++)
+		IXGBE_WRITE_REG(hw, IXGBE_FCTTV(i), 0x68006800);
+
+	/* Configure flow control refresh threshold value */
+	IXGBE_WRITE_REG(hw, IXGBE_FCRTV, 0x3400);
+
+	return 0;
+}
+
+/**
+ * ixgbe_dcb_config_tc_stats_82598 - Configure traffic class statistics
+ * @hw: pointer to hardware structure
+ *
+ * Configure queue statistics registers, all queues belonging to same traffic
+ * class uses a single set of queue statistics counters.
+ */
+s32 ixgbe_dcb_config_tc_stats_82598(struct ixgbe_hw *hw)
+{
+	u32 reg = 0;
+	u8  i   = 0;
+	u8  j   = 0;
+
+	/* Receive Queues stats setting -  8 queues per statistics reg */
+	for (i = 0, j = 0; i < 15 && j < 8; i = i + 2, j++) {
+		reg = IXGBE_READ_REG(hw, IXGBE_RQSMR(i));
+		reg |= ((0x1010101) * j);
+		IXGBE_WRITE_REG(hw, IXGBE_RQSMR(i), reg);
+		reg = IXGBE_READ_REG(hw, IXGBE_RQSMR(i + 1));
+		reg |= ((0x1010101) * j);
+		IXGBE_WRITE_REG(hw, IXGBE_RQSMR(i + 1), reg);
+	}
+	/* Transmit Queues stats setting -  4 queues per statistics reg */
+	for (i = 0; i < 8; i++) {
+		reg = IXGBE_READ_REG(hw, IXGBE_TQSMR(i));
+		reg |= ((0x1010101) * i);
+		IXGBE_WRITE_REG(hw, IXGBE_TQSMR(i), reg);
+	}
+
+	return 0;
+}
+
+/**
+ * ixgbe_dcb_hw_config_82598 - Config and enable DCB
+ * @hw: pointer to hardware structure
+ * @dcb_config: pointer to ixgbe_dcb_config structure
+ *
+ * Configure dcb settings and enable dcb mode.
+ */
+s32 ixgbe_dcb_hw_config_82598(struct ixgbe_hw *hw,
+                              struct ixgbe_dcb_config *dcb_config)
+{
+	ixgbe_dcb_config_packet_buffers_82598(hw, dcb_config);
+	ixgbe_dcb_config_rx_arbiter_82598(hw, dcb_config);
+	ixgbe_dcb_config_tx_desc_arbiter_82598(hw, dcb_config);
+	ixgbe_dcb_config_tx_data_arbiter_82598(hw, dcb_config);
+	ixgbe_dcb_config_pfc_82598(hw, dcb_config);
+	ixgbe_dcb_config_tc_stats_82598(hw);
+
+	return 0;
+}
diff --git a/drivers/net/ixgbe/ixgbe_dcb_82598.h b/drivers/net/ixgbe/ixgbe_dcb_82598.h
new file mode 100644
index 0000000..1e6a313
--- /dev/null
+++ b/drivers/net/ixgbe/ixgbe_dcb_82598.h
@@ -0,0 +1,94 @@
+/*******************************************************************************
+
+  Intel 10 Gigabit PCI Express Linux driver
+  Copyright(c) 1999 - 2007 Intel Corporation.
+
+  This program is free software; you can redistribute it and/or modify it
+  under the terms and conditions of the GNU General Public License,
+  version 2, as published by the Free Software Foundation.
+
+  This program is distributed in the hope it will be useful, but WITHOUT
+  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+  more details.
+
+  You should have received a copy of the GNU General Public License along with
+  this program; if not, write to the Free Software Foundation, Inc.,
+  51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+
+  The full GNU General Public License is included in this distribution in
+  the file called "COPYING".
+
+  Contact Information:
+  Linux NICS <linux.nics@...el.com>
+  e1000-devel Mailing List <e1000-devel@...ts.sourceforge.net>
+  Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
+
+*******************************************************************************/
+
+#ifndef _DCB_82598_CONFIG_H_
+#define _DCB_82598_CONFIG_H_
+
+/* DCB register definitions */
+
+#define IXGBE_DPMCS_MTSOS_SHIFT 16
+#define IXGBE_DPMCS_TDPAC       0x00000001 /* 0 Round Robin, 1 DFP - Deficit Fixed Priority */
+#define IXGBE_DPMCS_TRM         0x00000010 /* Transmit Recycle Mode */
+#define IXGBE_DPMCS_ARBDIS      0x00000040 /* DCB arbiter disable */
+#define IXGBE_DPMCS_TSOEF       0x00080000 /* TSO Expand Factor: 0=x4, 1=x2 */
+
+#define IXGBE_RUPPBMR_MQA       0x80000000 /* Enable UP to queue mapping */
+
+#define IXGBE_RT2CR_MCL_SHIFT   12 /* Offset to Max Credit Limit setting */
+#define IXGBE_RT2CR_LSP         0x80000000 /* LSP enable bit */
+
+#define IXGBE_RDRXCTL_MPBEN     0x00000010 /* DMA config for multiple packet buffers enable */
+#define IXGBE_RDRXCTL_MCEN      0x00000040 /* DMA config for multiple cores (RSS) enable */
+
+#define IXGBE_TDTQ2TCCR_MCL_SHIFT   12
+#define IXGBE_TDTQ2TCCR_BWG_SHIFT   9
+#define IXGBE_TDTQ2TCCR_GSP     0x40000000
+#define IXGBE_TDTQ2TCCR_LSP     0x80000000
+
+#define IXGBE_TDPT2TCCR_MCL_SHIFT   12
+#define IXGBE_TDPT2TCCR_BWG_SHIFT   9
+#define IXGBE_TDPT2TCCR_GSP     0x40000000
+#define IXGBE_TDPT2TCCR_LSP     0x80000000
+
+#define IXGBE_PDPMCS_TPPAC      0x00000020 /* 0 Round Robin, 1 for DFP - Deficit Fixed Priority */
+#define IXGBE_PDPMCS_ARBDIS     0x00000040 /* Arbiter disable */
+#define IXGBE_PDPMCS_TRM        0x00000100 /* Transmit Recycle Mode enable */
+
+#define IXGBE_DTXCTL_ENDBUBD    0x00000004 /* Enable DBU buffer division */
+
+#define IXGBE_TXPBSIZE_40KB     0x0000A000 /* 40KB Packet Buffer */
+#define IXGBE_RXPBSIZE_48KB     0x0000C000 /* 48KB Packet Buffer */
+#define IXGBE_RXPBSIZE_64KB     0x00010000 /* 64KB Packet Buffer */
+#define IXGBE_RXPBSIZE_80KB     0x00014000 /* 80KB Packet Buffer */
+
+#define IXGBE_RDRXCTL_RDMTS_1_2 0x00000000
+
+/* DCB hardware-specific driver APIs */
+
+/* DCB PFC functions */
+s32 ixgbe_dcb_config_pfc_82598(struct ixgbe_hw *, struct ixgbe_dcb_config *);
+s32 ixgbe_dcb_get_pfc_stats_82598(struct ixgbe_hw *, struct ixgbe_hw_stats *,
+                                  u8);
+
+/* DCB traffic class stats */
+s32 ixgbe_dcb_config_tc_stats_82598(struct ixgbe_hw *);
+s32 ixgbe_dcb_get_tc_stats_82598(struct ixgbe_hw *, struct ixgbe_hw_stats *,
+                                 u8);
+
+/* DCB config arbiters */
+s32 ixgbe_dcb_config_tx_desc_arbiter_82598(struct ixgbe_hw *,
+                                           struct ixgbe_dcb_config *);
+s32 ixgbe_dcb_config_tx_data_arbiter_82598(struct ixgbe_hw *,
+                                           struct ixgbe_dcb_config *);
+s32 ixgbe_dcb_config_rx_arbiter_82598(struct ixgbe_hw *,
+                                      struct ixgbe_dcb_config *);
+
+/* DCB hw initialization */
+s32 ixgbe_dcb_hw_config_82598(struct ixgbe_hw *, struct ixgbe_dcb_config *);
+
+#endif /* _DCB_82598_CONFIG_H */
diff --git a/drivers/net/ixgbe/ixgbe_dcb_nl.c b/drivers/net/ixgbe/ixgbe_dcb_nl.c
new file mode 100644
index 0000000..5915ad3
--- /dev/null
+++ b/drivers/net/ixgbe/ixgbe_dcb_nl.c
@@ -0,0 +1,352 @@
+/*******************************************************************************
+
+  Intel 10 Gigabit PCI Express Linux driver
+  Copyright(c) 1999 - 2008 Intel Corporation.
+
+  This program is free software; you can redistribute it and/or modify it
+  under the terms and conditions of the GNU General Public License,
+  version 2, as published by the Free Software Foundation.
+
+  This program is distributed in the hope it will be useful, but WITHOUT
+  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+  more details.
+
+  You should have received a copy of the GNU General Public License along with
+  this program; if not, write to the Free Software Foundation, Inc.,
+  51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+
+  The full GNU General Public License is included in this distribution in
+  the file called "COPYING".
+
+  Contact Information:
+  Linux NICS <linux.nics@...el.com>
+  e1000-devel Mailing List <e1000-devel@...ts.sourceforge.net>
+  Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
+
+*******************************************************************************/
+
+#include "ixgbe.h"
+#include <linux/dcbnl.h>
+
+/* Callbacks for DCB netlink in the kernel */
+#define BIT_DCB_MODE	0x01
+#define BIT_PFC		0x02
+#define BIT_PG_RX	0x04
+#define BIT_PG_TX	0x08
+
+int ixgbe_copy_dcb_cfg(struct ixgbe_dcb_config *src_dcb_cfg,
+                       struct ixgbe_dcb_config *dst_dcb_cfg, int tc_max)
+{
+	struct tc_configuration *src_tc_cfg = NULL;
+	struct tc_configuration *dst_tc_cfg = NULL;
+	int i;
+
+	if (!src_dcb_cfg || !dst_dcb_cfg)
+		return -EINVAL;
+
+	for (i = DCB_PG_ATTR_TC_0; i < tc_max + DCB_PG_ATTR_TC_0; i++) {
+		src_tc_cfg = &src_dcb_cfg->tc_config[i - DCB_PG_ATTR_TC_0];
+		dst_tc_cfg = &dst_dcb_cfg->tc_config[i - DCB_PG_ATTR_TC_0];
+
+		dst_tc_cfg->path[DCB_TX_CONFIG].prio_type =
+				src_tc_cfg->path[DCB_TX_CONFIG].prio_type;
+
+		dst_tc_cfg->path[DCB_TX_CONFIG].bwg_id =
+				src_tc_cfg->path[DCB_TX_CONFIG].bwg_id;
+
+		dst_tc_cfg->path[DCB_TX_CONFIG].bwg_percent =
+				src_tc_cfg->path[DCB_TX_CONFIG].bwg_percent;
+
+		dst_tc_cfg->path[DCB_TX_CONFIG].up_to_tc_bitmap =
+				src_tc_cfg->path[DCB_TX_CONFIG].up_to_tc_bitmap;
+
+		dst_tc_cfg->path[DCB_RX_CONFIG].prio_type =
+				src_tc_cfg->path[DCB_RX_CONFIG].prio_type;
+
+		dst_tc_cfg->path[DCB_RX_CONFIG].bwg_id =
+				src_tc_cfg->path[DCB_RX_CONFIG].bwg_id;
+
+		dst_tc_cfg->path[DCB_RX_CONFIG].bwg_percent =
+				src_tc_cfg->path[DCB_RX_CONFIG].bwg_percent;
+
+		dst_tc_cfg->path[DCB_RX_CONFIG].up_to_tc_bitmap =
+				src_tc_cfg->path[DCB_RX_CONFIG].up_to_tc_bitmap;
+	}
+
+	for (i = DCB_PG_ATTR_BW_ID_0; i < DCB_PG_ATTR_BW_ID_MAX; i++) {
+		dst_dcb_cfg->bw_percentage[DCB_TX_CONFIG]
+			[i-DCB_PG_ATTR_BW_ID_0] = src_dcb_cfg->bw_percentage
+				[DCB_TX_CONFIG][i-DCB_PG_ATTR_BW_ID_0];
+		dst_dcb_cfg->bw_percentage[DCB_RX_CONFIG]
+			[i-DCB_PG_ATTR_BW_ID_0] = src_dcb_cfg->bw_percentage
+				[DCB_RX_CONFIG][i-DCB_PG_ATTR_BW_ID_0];
+	}
+
+	for (i = DCB_PFC_UP_ATTR_0; i < DCB_PFC_UP_ATTR_MAX; i++) {
+		dst_dcb_cfg->tc_config[i - DCB_PFC_UP_ATTR_0].dcb_pfc =
+			src_dcb_cfg->tc_config[i - DCB_PFC_UP_ATTR_0].dcb_pfc;
+	}
+
+	return 0;
+}
+
+static u8 ixgbe_dcbnl_get_state(struct net_device *netdev)
+{
+	struct ixgbe_adapter *adapter = netdev_priv(netdev);
+
+	DPRINTK(DRV, INFO, "Get DCB Admin Mode.\n");
+
+	return !!(adapter->flags & IXGBE_FLAG_DCB_ENABLED);
+}
+
+static u16 ixgbe_dcb_select_queue(struct net_device *dev, struct sk_buff *skb)
+{
+	/* All traffic should default to class 0 */
+	return 0;
+}
+
+static void ixgbe_dcbnl_set_state(struct net_device *netdev, u8 state)
+{
+	struct ixgbe_adapter *adapter = netdev_priv(netdev);
+
+	DPRINTK(DRV, INFO, "Set DCB Admin Mode.\n");
+
+	if (state > 0) {
+		/* Turn on DCB */
+		if (adapter->flags & IXGBE_FLAG_DCB_ENABLED) {
+			return;
+		} else {
+			if (netdev->flags & IFF_UP)
+				netdev->stop(netdev);
+			ixgbe_reset_interrupt_capability(adapter);
+			ixgbe_napi_del_all(adapter);
+			kfree(adapter->tx_ring);
+			kfree(adapter->rx_ring);
+			adapter->tx_ring = NULL;
+			adapter->rx_ring = NULL;
+			netdev->select_queue = &ixgbe_dcb_select_queue;
+
+			adapter->flags &= ~IXGBE_FLAG_RSS_ENABLED;
+			adapter->flags |= IXGBE_FLAG_DCB_ENABLED;
+			ixgbe_init_interrupt_scheme(adapter);
+			ixgbe_napi_add_all(adapter);
+			if (netdev->flags & IFF_UP)
+				netdev->open(netdev);
+		}
+	} else {
+		/* Turn off DCB */
+		if (adapter->flags & IXGBE_FLAG_DCB_ENABLED) {
+			if (netdev->flags & IFF_UP)
+				netdev->stop(netdev);
+			ixgbe_reset_interrupt_capability(adapter);
+			ixgbe_napi_del_all(adapter);
+			kfree(adapter->tx_ring);
+			kfree(adapter->rx_ring);
+			adapter->tx_ring = NULL;
+			adapter->rx_ring = NULL;
+			netdev->select_queue = NULL;
+
+			adapter->flags &= ~IXGBE_FLAG_DCB_ENABLED;
+			adapter->flags |= IXGBE_FLAG_RSS_ENABLED;
+			ixgbe_init_interrupt_scheme(adapter);
+			ixgbe_napi_add_all(adapter);
+			if (netdev->flags & IFF_UP)
+				netdev->open(netdev);
+		} else {
+			return;
+		}
+	}
+}
+
+static void ixgbe_dcbnl_get_perm_hw_addr(struct net_device *netdev,
+					 u8 *perm_addr)
+{
+	struct ixgbe_adapter *adapter = netdev_priv(netdev);
+	int i;
+
+	for (i = 0; i < netdev->addr_len; i++)
+		perm_addr[i] = adapter->hw.mac.perm_addr[i];
+}
+
+static void ixgbe_dcbnl_set_pg_tc_cfg_tx(struct net_device *netdev, int tc,
+                                         u8 prio, u8 bwg_id, u8 bw_pct,
+                                         u8 up_map)
+{
+	struct ixgbe_adapter *adapter = netdev_priv(netdev);
+
+	if (prio != DCB_ATTR_VALUE_UNDEFINED)
+		adapter->temp_dcb_cfg.tc_config[tc].path[0].prio_type = prio;
+	if (bwg_id != DCB_ATTR_VALUE_UNDEFINED)
+		adapter->temp_dcb_cfg.tc_config[tc].path[0].bwg_id = bwg_id;
+	if (bw_pct != DCB_ATTR_VALUE_UNDEFINED)
+		adapter->temp_dcb_cfg.tc_config[tc].path[0].bwg_percent =
+			bw_pct;
+	if (up_map != DCB_ATTR_VALUE_UNDEFINED)
+		adapter->temp_dcb_cfg.tc_config[tc].path[0].up_to_tc_bitmap =
+			up_map;
+
+	if ((adapter->temp_dcb_cfg.tc_config[tc].path[0].prio_type !=
+	     adapter->dcb_cfg.tc_config[tc].path[0].prio_type) ||
+	    (adapter->temp_dcb_cfg.tc_config[tc].path[0].bwg_id !=
+	     adapter->dcb_cfg.tc_config[tc].path[0].bwg_id) ||
+	    (adapter->temp_dcb_cfg.tc_config[tc].path[0].bwg_percent !=
+	     adapter->dcb_cfg.tc_config[tc].path[0].bwg_percent) ||
+	    (adapter->temp_dcb_cfg.tc_config[tc].path[0].up_to_tc_bitmap !=
+	     adapter->dcb_cfg.tc_config[tc].path[0].up_to_tc_bitmap))
+		adapter->dcb_set_bitmap |= BIT_PG_TX;
+}
+
+static void ixgbe_dcbnl_set_pg_bwg_cfg_tx(struct net_device *netdev, int bwg_id,
+                                          u8 bw_pct)
+{
+	struct ixgbe_adapter *adapter = netdev_priv(netdev);
+
+	adapter->temp_dcb_cfg.bw_percentage[0][bwg_id] = bw_pct;
+
+	if (adapter->temp_dcb_cfg.bw_percentage[0][bwg_id] !=
+	    adapter->dcb_cfg.bw_percentage[0][bwg_id])
+		adapter->dcb_set_bitmap |= BIT_PG_RX;
+}
+
+static void ixgbe_dcbnl_set_pg_tc_cfg_rx(struct net_device *netdev, int tc,
+                                         u8 prio, u8 bwg_id, u8 bw_pct,
+                                         u8 up_map)
+{
+	struct ixgbe_adapter *adapter = netdev_priv(netdev);
+
+	if (prio != DCB_ATTR_VALUE_UNDEFINED)
+		adapter->temp_dcb_cfg.tc_config[tc].path[1].prio_type = prio;
+	if (bwg_id != DCB_ATTR_VALUE_UNDEFINED)
+		adapter->temp_dcb_cfg.tc_config[tc].path[1].bwg_id = bwg_id;
+	if (bw_pct != DCB_ATTR_VALUE_UNDEFINED)
+		adapter->temp_dcb_cfg.tc_config[tc].path[1].bwg_percent =
+			bw_pct;
+	if (up_map != DCB_ATTR_VALUE_UNDEFINED)
+		adapter->temp_dcb_cfg.tc_config[tc].path[1].up_to_tc_bitmap =
+			up_map;
+
+	if ((adapter->temp_dcb_cfg.tc_config[tc].path[1].prio_type !=
+	     adapter->dcb_cfg.tc_config[tc].path[1].prio_type) ||
+	    (adapter->temp_dcb_cfg.tc_config[tc].path[1].bwg_id !=
+	     adapter->dcb_cfg.tc_config[tc].path[1].bwg_id) ||
+	    (adapter->temp_dcb_cfg.tc_config[tc].path[1].bwg_percent !=
+	     adapter->dcb_cfg.tc_config[tc].path[1].bwg_percent) ||
+	    (adapter->temp_dcb_cfg.tc_config[tc].path[1].up_to_tc_bitmap !=
+	     adapter->dcb_cfg.tc_config[tc].path[1].up_to_tc_bitmap))
+		adapter->dcb_set_bitmap |= BIT_PG_RX;
+}
+
+static void ixgbe_dcbnl_set_pg_bwg_cfg_rx(struct net_device *netdev, int bwg_id,
+                                          u8 bw_pct)
+{
+	struct ixgbe_adapter *adapter = netdev_priv(netdev);
+
+	adapter->temp_dcb_cfg.bw_percentage[1][bwg_id] = bw_pct;
+
+	if (adapter->temp_dcb_cfg.bw_percentage[1][bwg_id] !=
+	    adapter->dcb_cfg.bw_percentage[1][bwg_id])
+		adapter->dcb_set_bitmap |= BIT_PG_RX;
+}
+
+static void ixgbe_dcbnl_get_pg_tc_cfg_tx(struct net_device *netdev, int tc,
+                                         u8 *prio, u8 *bwg_id, u8 *bw_pct,
+                                         u8 *up_map)
+{
+	struct ixgbe_adapter *adapter = netdev_priv(netdev);
+
+	*prio = adapter->dcb_cfg.tc_config[tc].path[0].prio_type;
+	*bwg_id = adapter->dcb_cfg.tc_config[tc].path[0].bwg_id;
+	*bw_pct = adapter->dcb_cfg.tc_config[tc].path[0].bwg_percent;
+	*up_map = adapter->dcb_cfg.tc_config[tc].path[0].up_to_tc_bitmap;
+}
+
+static void ixgbe_dcbnl_get_pg_bwg_cfg_tx(struct net_device *netdev, int bwg_id,
+                                          u8 *bw_pct)
+{
+	struct ixgbe_adapter *adapter = netdev_priv(netdev);
+
+	*bw_pct = adapter->dcb_cfg.bw_percentage[0][bwg_id];
+}
+
+static void ixgbe_dcbnl_get_pg_tc_cfg_rx(struct net_device *netdev, int tc,
+                                         u8 *prio, u8 *bwg_id, u8 *bw_pct,
+                                         u8 *up_map)
+{
+	struct ixgbe_adapter *adapter = netdev_priv(netdev);
+
+	*prio = adapter->dcb_cfg.tc_config[tc].path[1].prio_type;
+	*bwg_id = adapter->dcb_cfg.tc_config[tc].path[1].bwg_id;
+	*bw_pct = adapter->dcb_cfg.tc_config[tc].path[1].bwg_percent;
+	*up_map = adapter->dcb_cfg.tc_config[tc].path[1].up_to_tc_bitmap;
+}
+
+static void ixgbe_dcbnl_get_pg_bwg_cfg_rx(struct net_device *netdev, int bwg_id,
+                                          u8 *bw_pct)
+{
+	struct ixgbe_adapter *adapter = netdev_priv(netdev);
+
+	*bw_pct = adapter->dcb_cfg.bw_percentage[1][bwg_id];
+}
+
+static void ixgbe_dcbnl_set_pfc_cfg(struct net_device *netdev, int priority,
+                                    u8 setting)
+{
+	struct ixgbe_adapter *adapter = netdev_priv(netdev);
+
+	adapter->temp_dcb_cfg.tc_config[priority].dcb_pfc = setting;
+	if (adapter->temp_dcb_cfg.tc_config[priority].dcb_pfc !=
+	    adapter->dcb_cfg.tc_config[priority].dcb_pfc)
+		adapter->dcb_set_bitmap |= BIT_PFC;
+}
+
+static void ixgbe_dcbnl_get_pfc_cfg(struct net_device *netdev, int priority,
+                                    u8 *setting)
+{
+	struct ixgbe_adapter *adapter = netdev_priv(netdev);
+
+	*setting = adapter->dcb_cfg.tc_config[priority].dcb_pfc;
+}
+
+static u8 ixgbe_dcbnl_set_all(struct net_device *netdev)
+{
+	struct ixgbe_adapter *adapter = netdev_priv(netdev);
+	int ret;
+
+	if (!adapter->dcb_set_bitmap)
+		return 1;
+
+	while (test_and_set_bit(__IXGBE_RESETTING, &adapter->state))
+		msleep(1);
+
+	ret = ixgbe_copy_dcb_cfg(&adapter->temp_dcb_cfg, &adapter->dcb_cfg,
+				 adapter->ring_feature[RING_F_DCB].indices);
+	if (ret) {
+		clear_bit(__IXGBE_RESETTING, &adapter->state);
+		return ret;
+	}
+
+	ixgbe_down(adapter);
+	ixgbe_up(adapter);
+	adapter->dcb_set_bitmap = 0x00;
+	clear_bit(__IXGBE_RESETTING, &adapter->state);
+	return ret;
+}
+
+struct dcbnl_rtnl_ops dcbnl_ops = {
+	.getstate	= ixgbe_dcbnl_get_state,
+	.setstate	= ixgbe_dcbnl_set_state,
+	.getpermhwaddr	= ixgbe_dcbnl_get_perm_hw_addr,
+	.setpgtccfgtx	= ixgbe_dcbnl_set_pg_tc_cfg_tx,
+	.setpgbwgcfgtx	= ixgbe_dcbnl_set_pg_bwg_cfg_tx,
+	.setpgtccfgrx	= ixgbe_dcbnl_set_pg_tc_cfg_rx,
+	.setpgbwgcfgrx	= ixgbe_dcbnl_set_pg_bwg_cfg_rx,
+	.getpgtccfgtx	= ixgbe_dcbnl_get_pg_tc_cfg_tx,
+	.getpgbwgcfgtx	= ixgbe_dcbnl_get_pg_bwg_cfg_tx,
+	.getpgtccfgrx	= ixgbe_dcbnl_get_pg_tc_cfg_rx,
+	.getpgbwgcfgrx	= ixgbe_dcbnl_get_pg_bwg_cfg_rx,
+	.setpfccfg	= ixgbe_dcbnl_set_pfc_cfg,
+	.getpfccfg	= ixgbe_dcbnl_get_pfc_cfg,
+	.setall		= ixgbe_dcbnl_set_all
+};
+
diff --git a/drivers/net/ixgbe/ixgbe_ethtool.c b/drivers/net/ixgbe/ixgbe_ethtool.c
index 81a9c4b..87dda98 100644
--- a/drivers/net/ixgbe/ixgbe_ethtool.c
+++ b/drivers/net/ixgbe/ixgbe_ethtool.c
@@ -97,9 +97,18 @@ static struct ixgbe_stats ixgbe_gstrings_stats[] = {
                 ((((struct ixgbe_adapter *)netdev->priv)->num_tx_queues + \
                  ((struct ixgbe_adapter *)netdev->priv)->num_rx_queues) * \
                  (sizeof(struct ixgbe_queue_stats) / sizeof(u64)))
-#define IXGBE_STATS_LEN (IXGBE_GLOBAL_STATS_LEN + IXGBE_QUEUE_STATS_LEN)
 #define IXGBE_GLOBAL_STATS_LEN ARRAY_SIZE(ixgbe_gstrings_stats)
-#define IXGBE_STATS_LEN (IXGBE_GLOBAL_STATS_LEN + IXGBE_QUEUE_STATS_LEN)
+#define IXGBE_PB_STATS_LEN ( \
+                 (((struct ixgbe_adapter *)netdev->priv)->flags & \
+                 IXGBE_FLAG_DCB_ENABLED) ? \
+                 (sizeof(((struct ixgbe_adapter *)0)->stats.pxonrxc) + \
+                  sizeof(((struct ixgbe_adapter *)0)->stats.pxontxc) + \
+                  sizeof(((struct ixgbe_adapter *)0)->stats.pxoffrxc) + \
+                  sizeof(((struct ixgbe_adapter *)0)->stats.pxofftxc)) \
+                  / sizeof(u64) : 0)
+#define IXGBE_STATS_LEN (IXGBE_GLOBAL_STATS_LEN + \
+                         IXGBE_PB_STATS_LEN + \
+                         IXGBE_QUEUE_STATS_LEN)
 
 static int ixgbe_get_settings(struct net_device *netdev,
                               struct ethtool_cmd *ecmd)
@@ -804,6 +813,16 @@ static void ixgbe_get_ethtool_stats(struct net_device *netdev,
 			data[i + k] = queue_stat[k];
 		i += k;
 	}
+	if (adapter->flags & IXGBE_FLAG_DCB_ENABLED) {
+		for (j = 0; j < MAX_TX_PACKET_BUFFERS; j++) {
+			data[i++] = adapter->stats.pxontxc[j];
+			data[i++] = adapter->stats.pxofftxc[j];
+		}
+		for (j = 0; j < MAX_RX_PACKET_BUFFERS; j++) {
+			data[i++] = adapter->stats.pxonrxc[j];
+			data[i++] = adapter->stats.pxoffrxc[j];
+		}
+	}
 }
 
 static void ixgbe_get_strings(struct net_device *netdev, u32 stringset,
@@ -832,6 +851,20 @@ static void ixgbe_get_strings(struct net_device *netdev, u32 stringset,
 			sprintf(p, "rx_queue_%u_bytes", i);
 			p += ETH_GSTRING_LEN;
 		}
+		if (adapter->flags & IXGBE_FLAG_DCB_ENABLED) {
+			for (i = 0; i < MAX_TX_PACKET_BUFFERS; i++) {
+				sprintf(p, "tx_pb_%u_pxon", i);
+				p += ETH_GSTRING_LEN;
+				sprintf(p, "tx_pb_%u_pxoff", i);
+				p += ETH_GSTRING_LEN;
+			}
+			for (i = 0; i < MAX_RX_PACKET_BUFFERS; i++) {
+				sprintf(p, "rx_pb_%u_pxon", i);
+				p += ETH_GSTRING_LEN;
+				sprintf(p, "rx_pb_%u_pxoff", i);
+				p += ETH_GSTRING_LEN;
+			}
+		}
 		/* BUG_ON(p - data != IXGBE_STATS_LEN * ETH_GSTRING_LEN); */
 		break;
 	}
diff --git a/drivers/net/ixgbe/ixgbe_main.c b/drivers/net/ixgbe/ixgbe_main.c
index ca17af4..cbbfc8a 100644
--- a/drivers/net/ixgbe/ixgbe_main.c
+++ b/drivers/net/ixgbe/ixgbe_main.c
@@ -402,7 +402,7 @@ static void ixgbe_receive_skb(struct ixgbe_adapter *adapter,
 
 	if (adapter->netdev->features & NETIF_F_LRO &&
 	    skb->ip_summed == CHECKSUM_UNNECESSARY) {
-		if (adapter->vlgrp && is_vlan)
+		if (adapter->vlgrp && is_vlan && (tag != 0))
 			lro_vlan_hwaccel_receive_skb(&ring->lro_mgr, skb,
 			                             adapter->vlgrp, tag,
 			                             rx_desc);
@@ -411,12 +411,12 @@ static void ixgbe_receive_skb(struct ixgbe_adapter *adapter,
 		ring->lro_used = true;
 	} else {
 		if (!(adapter->flags & IXGBE_FLAG_IN_NETPOLL)) {
-			if (adapter->vlgrp && is_vlan)
+			if (adapter->vlgrp && is_vlan && (tag != 0))
 				vlan_hwaccel_receive_skb(skb, adapter->vlgrp, tag);
 			else
 				netif_receive_skb(skb);
 		} else {
-			if (adapter->vlgrp && is_vlan)
+			if (adapter->vlgrp && is_vlan && (tag != 0))
 				vlan_hwaccel_rx(skb, adapter->vlgrp, tag);
 			else
 				netif_rx(skb);
@@ -1654,10 +1654,12 @@ static void ixgbe_configure_rx(struct ixgbe_adapter *adapter)
 	 * effects of setting this bit are only that SRRCTL must be
 	 * fully programmed [0..15]
 	 */
-	rdrxctl = IXGBE_READ_REG(hw, IXGBE_RDRXCTL);
-	rdrxctl |= IXGBE_RDRXCTL_MVMEN;
-	IXGBE_WRITE_REG(hw, IXGBE_RDRXCTL, rdrxctl);
-
+	if (adapter->flags &
+	    (IXGBE_FLAG_RSS_ENABLED | IXGBE_FLAG_VMDQ_ENABLED)) {
+		rdrxctl = IXGBE_READ_REG(hw, IXGBE_RDRXCTL);
+		rdrxctl |= IXGBE_RDRXCTL_MVMEN;
+		IXGBE_WRITE_REG(hw, IXGBE_RDRXCTL, rdrxctl);
+	}
 
 	if (adapter->flags & IXGBE_FLAG_RSS_ENABLED) {
 		/* Fill out redirection table */
@@ -1716,6 +1718,16 @@ static void ixgbe_vlan_rx_register(struct net_device *netdev,
 		ixgbe_irq_disable(adapter);
 	adapter->vlgrp = grp;
 
+	/*
+	 * For a DCB driver, always enable VLAN tag stripping so we can
+	 * still receive traffic from a DCB-enabled host even if we're
+	 * not in DCB mode.
+	 */
+	ctrl = IXGBE_READ_REG(&adapter->hw, IXGBE_VLNCTRL);
+	ctrl |= IXGBE_VLNCTRL_VME;
+	ctrl &= ~IXGBE_VLNCTRL_CFIEN;
+	IXGBE_WRITE_REG(&adapter->hw, IXGBE_VLNCTRL, ctrl);
+
 	if (grp) {
 		/* enable VLAN tag insert/strip */
 		ctrl = IXGBE_READ_REG(&adapter->hw, IXGBE_VLNCTRL);
@@ -1880,6 +1892,42 @@ static void ixgbe_napi_disable_all(struct ixgbe_adapter *adapter)
 	}
 }
 
+/*
+ * ixgbe_configure_dcb - Configure DCB hardware
+ * @adapter: ixgbe adapter struct
+ *
+ * This is called by the driver on open to configure the DCB hardware.
+ * This is also called by the gennetlink interface when reconfiguring
+ * the DCB state.
+ */
+static void ixgbe_configure_dcb(struct ixgbe_adapter *adapter)
+{
+	struct ixgbe_hw *hw = &adapter->hw;
+	u32 txdctl, vlnctrl;
+	int i, j;
+
+	ixgbe_dcb_check_config(&adapter->dcb_cfg);
+	ixgbe_dcb_calculate_tc_credits(&adapter->dcb_cfg, DCB_TX_CONFIG);
+	ixgbe_dcb_calculate_tc_credits(&adapter->dcb_cfg, DCB_RX_CONFIG);
+
+	/* reconfigure the hardware */
+	ixgbe_dcb_hw_config(&adapter->hw, &adapter->dcb_cfg);
+
+	for (i = 0; i < adapter->num_tx_queues; i++) {
+		j = adapter->tx_ring[i].reg_idx;
+		txdctl = IXGBE_READ_REG(hw, IXGBE_TXDCTL(j));
+		/* PThresh workaround for Tx hang with DFP enabled. */
+		txdctl |= 32;
+		IXGBE_WRITE_REG(hw, IXGBE_TXDCTL(j), txdctl);
+	}
+	/* Enable VLAN tag insert/strip */
+	vlnctrl = IXGBE_READ_REG(hw, IXGBE_VLNCTRL);
+	vlnctrl |= IXGBE_VLNCTRL_VME | IXGBE_VLNCTRL_VFE;
+	vlnctrl &= ~IXGBE_VLNCTRL_CFIEN;
+	IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, vlnctrl);
+	hw->mac.ops.set_vfta(&adapter->hw, 0, 0, true);
+}
+
 static void ixgbe_configure(struct ixgbe_adapter *adapter)
 {
 	struct net_device *netdev = adapter->netdev;
@@ -1888,6 +1936,12 @@ static void ixgbe_configure(struct ixgbe_adapter *adapter)
 	ixgbe_set_rx_mode(netdev);
 
 	ixgbe_restore_vlan(adapter);
+	if (adapter->flags & IXGBE_FLAG_DCB_ENABLED) {
+		netif_set_gso_max_size(netdev, 32768);
+		ixgbe_configure_dcb(adapter);
+	} else {
+		netif_set_gso_max_size(netdev, 65536);
+	}
 
 	ixgbe_configure_tx(adapter);
 	ixgbe_configure_rx(adapter);
@@ -2234,6 +2288,11 @@ static void ixgbe_reset_task(struct work_struct *work)
 	struct ixgbe_adapter *adapter;
 	adapter = container_of(work, struct ixgbe_adapter, reset_task);
 
+	/* If we're already down or resetting, just bail */
+	if (test_bit(__IXGBE_DOWN, &adapter->state) ||
+	    test_bit(__IXGBE_RESETTING, &adapter->state))
+		return;
+
 	adapter->tx_timeout_count++;
 
 	ixgbe_reinit_locked(adapter);
@@ -2243,15 +2302,31 @@ static void ixgbe_set_num_queues(struct ixgbe_adapter *adapter)
 {
 	int nrq = 1, ntq = 1;
 	int feature_mask = 0, rss_i, rss_m;
+	int dcb_i, dcb_m;
 
 	/* Number of supported queues */
 	switch (adapter->hw.mac.type) {
 	case ixgbe_mac_82598EB:
+		dcb_i = adapter->ring_feature[RING_F_DCB].indices;
+		dcb_m = 0;
 		rss_i = adapter->ring_feature[RING_F_RSS].indices;
 		rss_m = 0;
 		feature_mask |= IXGBE_FLAG_RSS_ENABLED;
+		feature_mask |= IXGBE_FLAG_DCB_ENABLED;
 
 		switch (adapter->flags & feature_mask) {
+		case (IXGBE_FLAG_RSS_ENABLED | IXGBE_FLAG_DCB_ENABLED):
+			dcb_m = 0x7 << 3;
+			rss_i = min(8, rss_i);
+			rss_m = 0x7;
+			nrq = dcb_i * rss_i;
+			ntq = min(MAX_TX_QUEUES, dcb_i * rss_i);
+			break;
+		case (IXGBE_FLAG_DCB_ENABLED):
+			dcb_m = 0x7 << 3;
+			nrq = dcb_i;
+			ntq = dcb_i;
+			break;
 		case (IXGBE_FLAG_RSS_ENABLED):
 			rss_m = 0xF;
 			nrq = rss_i;
@@ -2259,6 +2334,8 @@ static void ixgbe_set_num_queues(struct ixgbe_adapter *adapter)
 			break;
 		case 0:
 		default:
+			dcb_i = 0;
+			dcb_m = 0;
 			rss_i = 0;
 			rss_m = 0;
 			nrq = 1;
@@ -2266,6 +2343,12 @@ static void ixgbe_set_num_queues(struct ixgbe_adapter *adapter)
 			break;
 		}
 
+		/* Sanity check, we should never have zero queues */
+		nrq = (nrq ?:1);
+		ntq = (ntq ?:1);
+
+		adapter->ring_feature[RING_F_DCB].indices = dcb_i;
+		adapter->ring_feature[RING_F_DCB].mask = dcb_m;
 		adapter->ring_feature[RING_F_RSS].indices = rss_i;
 		adapter->ring_feature[RING_F_RSS].mask = rss_m;
 		break;
@@ -2317,6 +2400,7 @@ static void ixgbe_acquire_msix_vectors(struct ixgbe_adapter *adapter,
 		adapter->flags &= ~IXGBE_FLAG_MSIX_ENABLED;
 		kfree(adapter->msix_entries);
 		adapter->msix_entries = NULL;
+		adapter->flags &= ~IXGBE_FLAG_DCB_ENABLED;
 		adapter->flags &= ~IXGBE_FLAG_RSS_ENABLED;
 		ixgbe_set_num_queues(adapter);
 	} else {
@@ -2336,15 +2420,42 @@ static void __devinit ixgbe_cache_ring_register(struct ixgbe_adapter *adapter)
 {
 	int feature_mask = 0, rss_i;
 	int i, txr_idx, rxr_idx;
+	int dcb_i;
 
 	/* Number of supported queues */
 	switch (adapter->hw.mac.type) {
 	case ixgbe_mac_82598EB:
+		dcb_i = adapter->ring_feature[RING_F_DCB].indices;
 		rss_i = adapter->ring_feature[RING_F_RSS].indices;
 		txr_idx = 0;
 		rxr_idx = 0;
+		feature_mask |= IXGBE_FLAG_DCB_ENABLED;
 		feature_mask |= IXGBE_FLAG_RSS_ENABLED;
 		switch (adapter->flags & feature_mask) {
+		case (IXGBE_FLAG_RSS_ENABLED | IXGBE_FLAG_DCB_ENABLED):
+			for (i = 0; i < dcb_i; i++) {
+				int j;
+				/* Rx first */
+				for (j = 0; j < adapter->num_rx_queues; j++) {
+					adapter->rx_ring[rxr_idx].reg_idx =
+						i << 3 | j;
+					rxr_idx++;
+				}
+				/* Tx now */
+				for (j = 0; j < adapter->num_tx_queues; j++) {
+					adapter->tx_ring[txr_idx].reg_idx =
+						i << 2 | (j >> 1);
+					if (j & 1)
+						txr_idx++;
+				}
+			}
+		case (IXGBE_FLAG_DCB_ENABLED):
+			/* the number of queues is assumed to be symmetric */
+			for (i = 0; i < dcb_i; i++) {
+				adapter->rx_ring[i].reg_idx = i << 3;
+				adapter->tx_ring[i].reg_idx = i << 2;
+			}
+			break;
 		case (IXGBE_FLAG_RSS_ENABLED):
 			for (i = 0; i < adapter->num_rx_queues; i++)
 				adapter->rx_ring[i].reg_idx = i;
@@ -2369,7 +2480,7 @@ static void __devinit ixgbe_cache_ring_register(struct ixgbe_adapter *adapter)
  * number of queues at compile-time.  The polling_netdev array is
  * intended for Multiqueue, but should work fine with a single queue.
  **/
-static int __devinit ixgbe_alloc_queues(struct ixgbe_adapter *adapter)
+static int ixgbe_alloc_queues(struct ixgbe_adapter *adapter)
 {
 	int i;
 
@@ -2439,6 +2550,7 @@ static int __devinit ixgbe_set_interrupt_capability(struct ixgbe_adapter
 	adapter->msix_entries = kcalloc(v_budget,
 	                                sizeof(struct msix_entry), GFP_KERNEL);
 	if (!adapter->msix_entries) {
+		adapter->flags &= ~IXGBE_FLAG_DCB_ENABLED;
 		adapter->flags &= ~IXGBE_FLAG_RSS_ENABLED;
 		ixgbe_set_num_queues(adapter);
 		kfree(adapter->tx_ring);
@@ -2479,7 +2591,7 @@ out:
 	return err;
 }
 
-static void ixgbe_reset_interrupt_capability(struct ixgbe_adapter *adapter)
+void ixgbe_reset_interrupt_capability(struct ixgbe_adapter *adapter)
 {
 	if (adapter->flags & IXGBE_FLAG_MSIX_ENABLED) {
 		adapter->flags &= ~IXGBE_FLAG_MSIX_ENABLED;
@@ -2503,7 +2615,7 @@ static void ixgbe_reset_interrupt_capability(struct ixgbe_adapter *adapter)
  * - Hardware queue count (num_*_queues)
  *   - defined by miscellaneous hardware support/features (RSS, etc.)
  **/
-static int __devinit ixgbe_init_interrupt_scheme(struct ixgbe_adapter *adapter)
+int ixgbe_init_interrupt_scheme(struct ixgbe_adapter *adapter)
 {
 	int err;
 
@@ -2551,6 +2663,8 @@ static int __devinit ixgbe_sw_init(struct ixgbe_adapter *adapter)
 	struct ixgbe_hw *hw = &adapter->hw;
 	struct pci_dev *pdev = adapter->pdev;
 	unsigned int rss;
+	int j;
+	struct tc_configuration *tc;
 
 	/* PCI config space info */
 
@@ -2564,6 +2678,26 @@ static int __devinit ixgbe_sw_init(struct ixgbe_adapter *adapter)
 	rss = min(IXGBE_MAX_RSS_INDICES, (int)num_online_cpus());
 	adapter->ring_feature[RING_F_RSS].indices = rss;
 	adapter->flags |= IXGBE_FLAG_RSS_ENABLED;
+	adapter->ring_feature[RING_F_DCB].indices = IXGBE_MAX_DCB_INDICES;
+
+	/* Configure DCB traffic classes */
+	for (j = 0; j < MAX_TRAFFIC_CLASS; j++) {
+		tc = &adapter->dcb_cfg.tc_config[j];
+		tc->path[DCB_TX_CONFIG].bwg_id = 0;
+		tc->path[DCB_TX_CONFIG].bwg_percent = 12 + (j & 1);
+		tc->path[DCB_RX_CONFIG].bwg_id = 0;
+		tc->path[DCB_RX_CONFIG].bwg_percent = 12 + (j & 1);
+		tc->dcb_pfc = pfc_disabled;
+	}
+	adapter->dcb_cfg.bw_percentage[DCB_TX_CONFIG][0] = 100;
+	adapter->dcb_cfg.bw_percentage[DCB_RX_CONFIG][0] = 100;
+	adapter->dcb_cfg.rx_pba_cfg = pba_equal;
+	adapter->dcb_cfg.round_robin_enable = false;
+	adapter->dcb_set_bitmap = 0x00;
+#ifdef CONFIG_DCBNL
+	ixgbe_copy_dcb_cfg(&adapter->dcb_cfg, &adapter->temp_dcb_cfg,
+	                   adapter->ring_feature[RING_F_DCB].indices);
+#endif
 
 	/* default flow control settings */
 	hw->fc.original_type = ixgbe_fc_none;
@@ -2938,7 +3072,7 @@ static int ixgbe_close(struct net_device *netdev)
  * @adapter: private struct
  * helper function to napi_add each possible q_vector->napi
  */
-static void ixgbe_napi_add_all(struct ixgbe_adapter *adapter)
+void ixgbe_napi_add_all(struct ixgbe_adapter *adapter)
 {
 	int q_idx, q_vectors;
 	int (*poll)(struct napi_struct *, int);
@@ -2959,7 +3093,7 @@ static void ixgbe_napi_add_all(struct ixgbe_adapter *adapter)
 	}
 }
 
-static void ixgbe_napi_del_all(struct ixgbe_adapter *adapter)
+void ixgbe_napi_del_all(struct ixgbe_adapter *adapter)
 {
 	int q_idx;
 	int q_vectors = adapter->num_msix_vectors - NON_Q_VECTORS;
@@ -3080,6 +3214,18 @@ void ixgbe_update_stats(struct ixgbe_adapter *adapter)
 		adapter->stats.mpc[i] += mpc;
 		total_mpc += adapter->stats.mpc[i];
 		adapter->stats.rnbc[i] += IXGBE_READ_REG(hw, IXGBE_RNBC(i));
+		adapter->stats.qptc[i] += IXGBE_READ_REG(hw, IXGBE_QPTC(i));
+		adapter->stats.qbtc[i] += IXGBE_READ_REG(hw, IXGBE_QBTC(i));
+		adapter->stats.qprc[i] += IXGBE_READ_REG(hw, IXGBE_QPRC(i));
+		adapter->stats.qbrc[i] += IXGBE_READ_REG(hw, IXGBE_QBRC(i));
+		adapter->stats.pxonrxc[i] += IXGBE_READ_REG(hw,
+							    IXGBE_PXONRXC(i));
+		adapter->stats.pxontxc[i] += IXGBE_READ_REG(hw,
+							    IXGBE_PXONTXC(i));
+		adapter->stats.pxoffrxc[i] += IXGBE_READ_REG(hw,
+							     IXGBE_PXOFFRXC(i));
+		adapter->stats.pxofftxc[i] += IXGBE_READ_REG(hw,
+							     IXGBE_PXOFFTXC(i));
 	}
 	adapter->stats.gprc += IXGBE_READ_REG(hw, IXGBE_GPRC);
 	/* work around hardware counting issue */
@@ -3577,6 +3723,14 @@ static int ixgbe_xmit_frame(struct sk_buff *skb, struct net_device *netdev)
 
 	if (adapter->vlgrp && vlan_tx_tag_present(skb)) {
 		tx_flags |= vlan_tx_tag_get(skb);
+		if (adapter->flags & IXGBE_FLAG_DCB_ENABLED) {
+			tx_flags &= ~IXGBE_TX_FLAGS_VLAN_PRIO_MASK;
+			tx_flags |= (skb->queue_mapping << 13);
+		}
+		tx_flags <<= IXGBE_TX_FLAGS_VLAN_SHIFT;
+		tx_flags |= IXGBE_TX_FLAGS_VLAN;
+	} else if (adapter->flags & IXGBE_FLAG_DCB_ENABLED) {
+		tx_flags |= (skb->queue_mapping << 13);
 		tx_flags <<= IXGBE_TX_FLAGS_VLAN_SHIFT;
 		tx_flags |= IXGBE_TX_FLAGS_VLAN;
 	}
@@ -3843,6 +3997,13 @@ static int __devinit ixgbe_probe(struct pci_dev *pdev,
 	netdev->vlan_features |= NETIF_F_IP_CSUM;
 	netdev->vlan_features |= NETIF_F_SG;
 
+	if (adapter->flags & IXGBE_FLAG_DCB_ENABLED)
+		adapter->flags &= ~IXGBE_FLAG_RSS_ENABLED;
+
+#ifdef CONFIG_DCBNL
+	netdev->dcbnl_ops = &dcbnl_ops;
+#endif
+
 	if (pci_using_dac)
 		netdev->features |= NETIF_F_HIGHDMA;
 
@@ -4099,7 +4260,6 @@ static struct pci_driver ixgbe_driver = {
  **/
 static int __init ixgbe_init_module(void)
 {
-	int ret;
 	printk(KERN_INFO "%s: %s - version %s\n", ixgbe_driver_name,
 	       ixgbe_driver_string, ixgbe_driver_version);
 
@@ -4109,8 +4269,7 @@ static int __init ixgbe_init_module(void)
 	dca_register_notify(&dca_notifier);
 
 #endif
-	ret = pci_register_driver(&ixgbe_driver);
-	return ret;
+	return pci_register_driver(&ixgbe_driver);
 }
 
 module_init(ixgbe_init_module);
diff --git a/include/linux/dcbnl.h b/include/linux/dcbnl.h
new file mode 100644
index 0000000..32d32c1
--- /dev/null
+++ b/include/linux/dcbnl.h
@@ -0,0 +1,230 @@
+/*
+ * Copyright (c) 2008, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ *
+ * Author: Lucy Liu <lucy.liu@...el.com>
+ */
+
+#ifndef __LINUX_DCBNL_H__
+#define __LINUX_DCBNL_H__
+
+#define DCB_PROTO_VERSION 1
+
+struct dcbmsg {
+	unsigned char      dcb_family;
+	__u8               cmd;
+	__u16              dcb_pad;
+};
+
+/**
+ * enum dcbnl_commands - supported DCB commands
+ *
+ * @DCB_CMD_UNDEFINED: unspecified command to catch errors
+ * @DCB_CMD_GSTATE: request the state of DCB in the device
+ * @DCB_CMD_SSTATE: set the state of DCB in the device
+ * @DCB_CMD_PGTX_GCFG: request the priority group configuration for Tx
+ * @DCB_CMD_PGTX_SCFG: set the priority group configuration for Tx
+ * @DCB_CMD_PGRX_GCFG: request the priority group configuration for Rx
+ * @DCB_CMD_PGRX_SCFG: set the priority group configuration for Rx
+ * @DCB_CMD_PFC_GCFG: request the priority flow control configuration
+ * @DCB_CMD_PFC_SCFG: set the priority flow control configuration
+ * @DCB_CMD_SET_ALL: apply all changes to the underlying device
+ * @DCB_CMD_GPERM_HWADDR: get the permanent MAC address of the underlying
+ *                        device.  Only useful when using bonding.
+ */
+enum dcbnl_commands {
+	DCB_CMD_UNDEFINED,
+
+	DCB_CMD_GSTATE,
+	DCB_CMD_SSTATE,
+
+	DCB_CMD_PGTX_GCFG,
+	DCB_CMD_PGTX_SCFG,
+	DCB_CMD_PGRX_GCFG,
+	DCB_CMD_PGRX_SCFG,
+
+	DCB_CMD_PFC_GCFG,
+	DCB_CMD_PFC_SCFG,
+
+	DCB_CMD_SET_ALL,
+	DCB_CMD_GPERM_HWADDR,
+
+	__DCB_CMD_ENUM_MAX,
+	DCB_CMD_MAX = __DCB_CMD_ENUM_MAX - 1,
+};
+
+
+/**
+ * enum dcbnl_attrs - DCB top-level netlink attributes
+ *
+ * @DCB_ATTR_UNDEFINED: unspecified attribute to catch errors
+ * @DCB_ATTR_IFNAME: interface name of the underlying device (NLA_STRING)
+ * @DCB_ATTR_STATE: enable state of DCB in the device (NLA_U8)
+ * @DCB_ATTR_PFC_STATE: enable state of PFC in the device (NLA_U8)
+ * @DCB_ATTR_PFC_CFG: priority flow control configuration (NLA_NESTED)
+ * @DCB_ATTR_NUM_TC: number of traffic classes supported in the device (NLA_U8)
+ * @DCB_ATTR_PG_CFG: priority group configuration (NLA_NESTED)
+ * @DCB_ATTR_SET_ALL: bool to commit changes to hardware or not (NLA_U8)
+ * @DCB_ATTR_PERM_HWADDR: MAC address of the physical device (NLA_NESTED)
+ */
+enum dcbnl_attrs {
+	DCB_ATTR_UNDEFINED,
+
+	DCB_ATTR_IFNAME,
+	DCB_ATTR_STATE,
+	DCB_ATTR_PFC_STATE,
+	DCB_ATTR_PFC_CFG,
+	DCB_ATTR_NUM_TC,
+	DCB_ATTR_PG_CFG,
+	DCB_ATTR_SET_ALL,
+	DCB_ATTR_PERM_HWADDR,
+
+	__DCB_ATTR_ENUM_MAX,
+	DCB_ATTR_MAX = __DCB_ATTR_ENUM_MAX - 1,
+};
+
+/**
+ * enum dcbnl_pfc_attrs - DCB Priority Flow Control user priority nested attrs
+ *
+ * @DCB_PFC_UP_ATTR_UNDEFINED: unspecified attribute to catch errors
+ * @DCB_PFC_UP_ATTR_0: Priority Flow Control value for User Priority 0 (NLA_U8)
+ * @DCB_PFC_UP_ATTR_1: Priority Flow Control value for User Priority 1 (NLA_U8)
+ * @DCB_PFC_UP_ATTR_2: Priority Flow Control value for User Priority 2 (NLA_U8)
+ * @DCB_PFC_UP_ATTR_3: Priority Flow Control value for User Priority 3 (NLA_U8)
+ * @DCB_PFC_UP_ATTR_4: Priority Flow Control value for User Priority 4 (NLA_U8)
+ * @DCB_PFC_UP_ATTR_5: Priority Flow Control value for User Priority 5 (NLA_U8)
+ * @DCB_PFC_UP_ATTR_6: Priority Flow Control value for User Priority 6 (NLA_U8)
+ * @DCB_PFC_UP_ATTR_7: Priority Flow Control value for User Priority 7 (NLA_U8)
+ * @DCB_PFC_UP_ATTR_MAX: highest attribute number currently defined
+ * @DCB_PFC_UP_ATTR_ALL: apply to all priority flow control attrs (NLA_FLAG)
+ *
+ */
+enum dcbnl_pfc_up_attrs {
+	DCB_PFC_UP_ATTR_UNDEFINED,
+
+	DCB_PFC_UP_ATTR_0,
+	DCB_PFC_UP_ATTR_1,
+	DCB_PFC_UP_ATTR_2,
+	DCB_PFC_UP_ATTR_3,
+	DCB_PFC_UP_ATTR_4,
+	DCB_PFC_UP_ATTR_5,
+	DCB_PFC_UP_ATTR_6,
+	DCB_PFC_UP_ATTR_7,
+	DCB_PFC_UP_ATTR_ALL,
+
+	__DCB_PFC_UP_ATTR_ENUM_MAX,
+	DCB_PFC_UP_ATTR_MAX = __DCB_PFC_UP_ATTR_ENUM_MAX - 1,
+};
+
+/**
+ * enum dcbnl_pg_attrs - DCB Priority Group attributes
+ *
+ * @DCB_PG_ATTR_UNDEFINED: unspecified attribute to catch errors
+ * @DCB_PG_ATTR_TC_0: Priority Group Traffic Class 0 configuration (NLA_NESTED)
+ * @DCB_PG_ATTR_TC_1: Priority Group Traffic Class 1 configuration (NLA_NESTED)
+ * @DCB_PG_ATTR_TC_2: Priority Group Traffic Class 2 configuration (NLA_NESTED)
+ * @DCB_PG_ATTR_TC_3: Priority Group Traffic Class 3 configuration (NLA_NESTED)
+ * @DCB_PG_ATTR_TC_4: Priority Group Traffic Class 4 configuration (NLA_NESTED)
+ * @DCB_PG_ATTR_TC_5: Priority Group Traffic Class 5 configuration (NLA_NESTED)
+ * @DCB_PG_ATTR_TC_6: Priority Group Traffic Class 6 configuration (NLA_NESTED)
+ * @DCB_PG_ATTR_TC_7: Priority Group Traffic Class 7 configuration (NLA_NESTED)
+ * @DCB_PG_ATTR_TC_MAX: highest attribute number currently defined
+ * @DCB_PG_ATTR_TC_ALL: apply to all traffic classes (NLA_NESTED)
+ * @DCB_PG_ATTR_BW_ID_0: Percent of link bandwidth for Priority Group 0 (NLA_U8)
+ * @DCB_PG_ATTR_BW_ID_1: Percent of link bandwidth for Priority Group 1 (NLA_U8)
+ * @DCB_PG_ATTR_BW_ID_2: Percent of link bandwidth for Priority Group 2 (NLA_U8)
+ * @DCB_PG_ATTR_BW_ID_3: Percent of link bandwidth for Priority Group 3 (NLA_U8)
+ * @DCB_PG_ATTR_BW_ID_4: Percent of link bandwidth for Priority Group 4 (NLA_U8)
+ * @DCB_PG_ATTR_BW_ID_5: Percent of link bandwidth for Priority Group 5 (NLA_U8)
+ * @DCB_PG_ATTR_BW_ID_6: Percent of link bandwidth for Priority Group 6 (NLA_U8)
+ * @DCB_PG_ATTR_BW_ID_7: Percent of link bandwidth for Priority Group 7 (NLA_U8)
+ * @DCB_PG_ATTR_BW_ID_MAX: highest attribute number currently defined
+ * @DCB_PG_ATTR_BW_ID_ALL: apply to all priority groups (NLA_FLAG)
+ *
+ */
+enum dcbnl_pg_attrs {
+	DCB_PG_ATTR_UNDEFINED,
+
+	DCB_PG_ATTR_TC_0,
+	DCB_PG_ATTR_TC_1,
+	DCB_PG_ATTR_TC_2,
+	DCB_PG_ATTR_TC_3,
+	DCB_PG_ATTR_TC_4,
+	DCB_PG_ATTR_TC_5,
+	DCB_PG_ATTR_TC_6,
+	DCB_PG_ATTR_TC_7,
+	DCB_PG_ATTR_TC_MAX,
+	DCB_PG_ATTR_TC_ALL,
+
+	DCB_PG_ATTR_BW_ID_0,
+	DCB_PG_ATTR_BW_ID_1,
+	DCB_PG_ATTR_BW_ID_2,
+	DCB_PG_ATTR_BW_ID_3,
+	DCB_PG_ATTR_BW_ID_4,
+	DCB_PG_ATTR_BW_ID_5,
+	DCB_PG_ATTR_BW_ID_6,
+	DCB_PG_ATTR_BW_ID_7,
+	DCB_PG_ATTR_BW_ID_MAX,
+	DCB_PG_ATTR_BW_ID_ALL,
+
+	__DCB_PG_ATTR_ENUM_MAX,
+	DCB_PG_ATTR_MAX = __DCB_PG_ATTR_ENUM_MAX - 1,
+};
+
+/**
+ * enum dcbnl_tc_attrs - DCB Traffic Class attributes
+ *
+ * @DCB_TC_ATTR_PARAM_UNDEFINED: unspecified attribute to catch errors
+ * @DCB_TC_ATTR_PARAM_PGID: (NLA_U8) Priority group the traffic class belongs to
+ *                          Valid values are:  0-7
+ * @DCB_TC_ATTR_PARAM_UP_MAPPING: (NLA_U8) Traffic class to user priority map
+ *                                Some devices may not support changing the
+ *                                user priority map of a TC.
+ * @DCB_TC_ATTR_PARAM_STRICT_PRIO: (NLA_U8) Strict priority setting
+ *                                 0 - none
+ *                                 1 - group strict
+ *                                 2 - link strict
+ * @DCB_TC_ATTR_PARAM_BW_PCT: optional - (NLA_U8) If supported by the device and
+ *                            not configured to use link strict priority,
+ *                            this is the percentage of bandwidth of the
+ *                            priority group this traffic class belongs to
+ * @DCB_TC_ATTR_PARAM_ALL: (NLA_FLAG) all traffic class parameters
+ *
+ */
+enum dcbnl_tc_attrs {
+	DCB_TC_ATTR_PARAM_UNDEFINED,
+
+	DCB_TC_ATTR_PARAM_PGID,
+	DCB_TC_ATTR_PARAM_UP_MAPPING,
+	DCB_TC_ATTR_PARAM_STRICT_PRIO,
+	DCB_TC_ATTR_PARAM_BW_PCT,
+	DCB_TC_ATTR_PARAM_ALL,
+
+	__DCB_TC_ATTR_PARAM_ENUM_MAX,
+	DCB_TC_ATTR_PARAM_MAX = __DCB_TC_ATTR_PARAM_ENUM_MAX - 1,
+};
+
+/**
+ * enum dcb_general_attr_values - general DCB attribute values
+ *
+ * @DCB_ATTR_UNDEFINED: value used to indicate an attribute is not supported
+ *
+ */
+enum dcb_general_attr_values {
+	DCB_ATTR_VALUE_UNDEFINED = 0xff
+};
+
+
+#endif /* __LINUX_DCBNL_H__ */
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 9cfd20b..f010c1a 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -42,6 +42,9 @@
 #include <linux/workqueue.h>
 
 #include <net/net_namespace.h>
+#ifdef CONFIG_DCBNL
+#include <net/dcbnl.h>
+#endif
 
 struct vlan_group;
 struct ethtool_ops;
@@ -751,6 +754,11 @@ struct net_device
 	/* for setting kernel sock attribute on TCP connection setup */
 #define GSO_MAX_SIZE		65536
 	unsigned int		gso_max_size;
+
+#ifdef CONFIG_DCBNL
+	/* Data Center Bridging netlink ops */
+	struct dcbnl_rtnl_ops *dcbnl_ops;
+#endif
 };
 #define to_net_dev(d) container_of(d, struct net_device, dev)
 
diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h
index 2b3d51c..e88f705 100644
--- a/include/linux/rtnetlink.h
+++ b/include/linux/rtnetlink.h
@@ -107,6 +107,11 @@ enum {
 	RTM_GETADDRLABEL,
 #define RTM_GETADDRLABEL RTM_GETADDRLABEL
 
+	RTM_GETDCB = 78,
+#define RTM_GETDCB RTM_GETDCB
+	RTM_SETDCB,
+#define RTM_SETDCB RTM_SETDCB
+
 	__RTM_MAX,
 #define RTM_MAX		(((__RTM_MAX + 3) & ~3) - 1)
 };
diff --git a/include/net/dcbnl.h b/include/net/dcbnl.h
new file mode 100644
index 0000000..0ef0c5a
--- /dev/null
+++ b/include/net/dcbnl.h
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2008, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ *
+ * Author: Lucy Liu <lucy.liu@...el.com>
+ */
+
+#ifndef __NET_DCBNL_H__
+#define __NET_DCBNL_H__
+
+/*
+ * Ops struct for the netlink callbacks.  Used by DCB-enabled drivers through
+ * the netdevice struct.
+ */
+struct dcbnl_rtnl_ops {
+	u8   (*getstate)(struct net_device *);
+	void (*setstate)(struct net_device *, u8);
+	void (*getpermhwaddr)(struct net_device *, u8 *);
+	void (*setpgtccfgtx)(struct net_device *, int, u8, u8, u8, u8);
+	void (*setpgbwgcfgtx)(struct net_device *, int, u8);
+	void (*setpgtccfgrx)(struct net_device *, int, u8, u8, u8, u8);
+	void (*setpgbwgcfgrx)(struct net_device *, int, u8);
+	void (*getpgtccfgtx)(struct net_device *, int, u8 *, u8 *, u8 *, u8 *);
+	void (*getpgbwgcfgtx)(struct net_device *, int, u8 *);
+	void (*getpgtccfgrx)(struct net_device *, int, u8 *, u8 *, u8 *, u8 *);
+	void (*getpgbwgcfgrx)(struct net_device *, int, u8 *);
+	void (*setpfccfg)(struct net_device *, int, u8);
+	void (*getpfccfg)(struct net_device *, int, u8 *);
+	u8   (*setall)(struct net_device *);
+};
+
+#endif /* __NET_DCBNL_H__ */
diff --git a/net/Kconfig b/net/Kconfig
index 9103a16..de441be 100644
--- a/net/Kconfig
+++ b/net/Kconfig
@@ -190,6 +190,7 @@ source "net/lapb/Kconfig"
 source "net/econet/Kconfig"
 source "net/wanrouter/Kconfig"
 source "net/sched/Kconfig"
+source "net/dcb/Kconfig"
 
 menu "Network testing"
 
diff --git a/net/Makefile b/net/Makefile
index acaf819..7718f19 100644
--- a/net/Makefile
+++ b/net/Makefile
@@ -56,6 +56,9 @@ obj-$(CONFIG_NETLABEL)		+= netlabel/
 obj-$(CONFIG_IUCV)		+= iucv/
 obj-$(CONFIG_RFKILL)		+= rfkill/
 obj-$(CONFIG_NET_9P)		+= 9p/
+ifeq ($(CONFIG_DCBNL),y)
+obj-$(CONFIG_DCB)		+= dcb/
+endif
 
 ifeq ($(CONFIG_NET),y)
 obj-$(CONFIG_SYSCTL)		+= sysctl_net.o
diff --git a/net/dcb/Kconfig b/net/dcb/Kconfig
new file mode 100644
index 0000000..bdf3880
--- /dev/null
+++ b/net/dcb/Kconfig
@@ -0,0 +1,12 @@
+config DCB
+        tristate "Data Center Bridging support"
+
+config DCBNL
+	bool "Data Center Bridging netlink interface support"
+	depends on DCB
+	default n
+	---help---
+	  This option turns on the netlink interface
+	  (dcbnl) for Data Center Bridging capable devices.
+
+	  If unsure, say N.
diff --git a/net/dcb/Makefile b/net/dcb/Makefile
new file mode 100644
index 0000000..9930f4c
--- /dev/null
+++ b/net/dcb/Makefile
@@ -0,0 +1 @@
+obj-$(CONFIG_DCB) += dcbnl.o
diff --git a/net/dcb/dcbnl.c b/net/dcb/dcbnl.c
new file mode 100644
index 0000000..f340d09
--- /dev/null
+++ b/net/dcb/dcbnl.c
@@ -0,0 +1,682 @@
+/*
+ * Copyright (c) 2008, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ *
+ * Author: Lucy Liu <lucy.liu@...el.com>
+ */
+
+#include <linux/netdevice.h>
+#include <linux/netlink.h>
+#include <net/netlink.h>
+#include <net/rtnetlink.h>
+#include <linux/dcbnl.h>
+#include <linux/rtnetlink.h>
+#include <net/sock.h>
+
+MODULE_AUTHOR("Lucy Liu, <lucy.liu@...el.com>");
+MODULE_DESCRIPTION("Data Center Bridging generic netlink interface");
+MODULE_LICENSE("GPL");
+
+/**************** DCB attribute policies *************************************/
+
+/* DCB netlink attributes policy */
+static struct nla_policy dcbnl_rtnl_policy[DCB_ATTR_MAX + 1] = {
+	[DCB_ATTR_IFNAME]    = {.type = NLA_STRING, .len = IFNAMSIZ - 1},
+	[DCB_ATTR_STATE]     = {.type = NLA_U8},
+	[DCB_ATTR_PFC_CFG]   = {.type = NLA_NESTED},
+	[DCB_ATTR_PG_CFG]    = {.type = NLA_NESTED},
+	[DCB_ATTR_SET_ALL]   = {.type = NLA_U8},
+	[DCB_ATTR_PERM_HWADDR] = {.type = NLA_FLAG},
+};
+
+/* DCB priority flow control to User Priority nested attributes */
+static struct nla_policy dcbnl_pfc_up_nest[DCB_PFC_UP_ATTR_MAX + 1] = {
+	[DCB_PFC_UP_ATTR_0]   = {.type = NLA_U8},
+	[DCB_PFC_UP_ATTR_1]   = {.type = NLA_U8},
+	[DCB_PFC_UP_ATTR_2]   = {.type = NLA_U8},
+	[DCB_PFC_UP_ATTR_3]   = {.type = NLA_U8},
+	[DCB_PFC_UP_ATTR_4]   = {.type = NLA_U8},
+	[DCB_PFC_UP_ATTR_5]   = {.type = NLA_U8},
+	[DCB_PFC_UP_ATTR_6]   = {.type = NLA_U8},
+	[DCB_PFC_UP_ATTR_7]   = {.type = NLA_U8},
+	[DCB_PFC_UP_ATTR_ALL] = {.type = NLA_FLAG},
+};
+
+/* DCB priority grouping nested attributes */
+static struct nla_policy dcbnl_pg_nest[DCB_PG_ATTR_MAX + 1] = {
+	[DCB_PG_ATTR_TC_0]      = {.type = NLA_NESTED},
+	[DCB_PG_ATTR_TC_1]      = {.type = NLA_NESTED},
+	[DCB_PG_ATTR_TC_2]      = {.type = NLA_NESTED},
+	[DCB_PG_ATTR_TC_3]      = {.type = NLA_NESTED},
+	[DCB_PG_ATTR_TC_4]      = {.type = NLA_NESTED},
+	[DCB_PG_ATTR_TC_5]      = {.type = NLA_NESTED},
+	[DCB_PG_ATTR_TC_6]      = {.type = NLA_NESTED},
+	[DCB_PG_ATTR_TC_7]      = {.type = NLA_NESTED},
+	[DCB_PG_ATTR_TC_ALL]    = {.type = NLA_NESTED},
+	[DCB_PG_ATTR_BW_ID_0]   = {.type = NLA_U8},
+	[DCB_PG_ATTR_BW_ID_1]   = {.type = NLA_U8},
+	[DCB_PG_ATTR_BW_ID_2]   = {.type = NLA_U8},
+	[DCB_PG_ATTR_BW_ID_3]   = {.type = NLA_U8},
+	[DCB_PG_ATTR_BW_ID_4]   = {.type = NLA_U8},
+	[DCB_PG_ATTR_BW_ID_5]   = {.type = NLA_U8},
+	[DCB_PG_ATTR_BW_ID_6]   = {.type = NLA_U8},
+	[DCB_PG_ATTR_BW_ID_7]   = {.type = NLA_U8},
+	[DCB_PG_ATTR_BW_ID_ALL] = {.type = NLA_FLAG},
+};
+
+/* DCB traffic class nested attributes. */
+static struct nla_policy dcbnl_tc_param_nest[DCB_TC_ATTR_PARAM_MAX + 1] = {
+	[DCB_TC_ATTR_PARAM_PGID]            = {.type = NLA_U8},
+	[DCB_TC_ATTR_PARAM_UP_MAPPING]      = {.type = NLA_U8},
+	[DCB_TC_ATTR_PARAM_STRICT_PRIO]     = {.type = NLA_U8},
+	[DCB_TC_ATTR_PARAM_BW_PCT]          = {.type = NLA_U8},
+	[DCB_TC_ATTR_PARAM_ALL]             = {.type = NLA_FLAG},
+};
+
+
+/* standard netlink reply call */
+static int dcbnl_reply(u8 value, u8 event, u8 cmd, u8 attr, u32 pid,
+                       u32 seq, u16 flags)
+{
+	struct sk_buff *dcbnl_skb;
+	struct dcbmsg *dcb;
+	struct nlmsghdr *nlh;
+	int ret = -EINVAL;
+
+	dcbnl_skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+	if (!dcbnl_skb)
+		return ret;
+
+	nlh = NLMSG_NEW(dcbnl_skb, pid, seq, event, sizeof(*dcb), flags);
+
+	dcb = NLMSG_DATA(nlh);
+	dcb->dcb_family = AF_UNSPEC;
+	dcb->cmd = cmd;
+	dcb->dcb_pad = 0;
+
+	ret = nla_put_u8(dcbnl_skb, attr, value);
+	if (ret)
+		goto err;
+
+	/* end the message, assign the nlmsg_len. */
+	nlmsg_end(dcbnl_skb, nlh);
+	ret = rtnl_unicast(dcbnl_skb, &init_net, pid);
+	if (ret)
+		goto err;
+
+	return 0;
+nlmsg_failure:
+err:
+	kfree(dcbnl_skb);
+	return ret;
+}
+
+static int dcbnl_getstate(struct net_device *netdev, struct nlattr **tb,
+                          u32 pid, u32 seq, u16 flags)
+{
+	int ret = -EINVAL;
+
+	/* if (!tb[DCB_ATTR_STATE] || !netdev->dcbnl_ops->getstate) */
+	if (!netdev->dcbnl_ops->getstate)
+		return ret;
+
+	ret = dcbnl_reply(netdev->dcbnl_ops->getstate(netdev), RTM_GETDCB,
+	                  DCB_CMD_GSTATE, DCB_ATTR_STATE, pid, seq, flags);
+
+	return ret;
+}
+
+static int dcbnl_getpfccfg(struct net_device *netdev, struct nlattr **tb,
+                           u32 pid, u32 seq, u16 flags)
+{
+	struct sk_buff *dcbnl_skb;
+	struct nlmsghdr *nlh;
+	struct dcbmsg *dcb;
+	struct nlattr *data[DCB_PFC_UP_ATTR_MAX + 1], *nest;
+	u8 value;
+	int ret = -EINVAL;
+	int i;
+	int getall = 0;
+
+	if (!tb[DCB_ATTR_PFC_CFG] || !netdev->dcbnl_ops->getpfccfg)
+		return ret;
+
+	ret = nla_parse_nested(data, DCB_PFC_UP_ATTR_MAX,
+	                       tb[DCB_ATTR_PFC_CFG],
+	                       dcbnl_pfc_up_nest);
+	if (ret)
+		goto err_out;
+
+	dcbnl_skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+	if (!dcbnl_skb)
+		goto err_out;
+
+	nlh = NLMSG_NEW(dcbnl_skb, pid, seq, RTM_GETDCB, sizeof(*dcb), flags);
+
+	dcb = NLMSG_DATA(nlh);
+	dcb->dcb_family = AF_UNSPEC;
+	dcb->cmd = DCB_CMD_PFC_GCFG;
+
+	nest = nla_nest_start(dcbnl_skb, DCB_ATTR_PFC_CFG);
+	if (!nest)
+		goto err;
+
+	if (data[DCB_PFC_UP_ATTR_ALL])
+		getall = 1;
+
+	for (i = DCB_PFC_UP_ATTR_0; i <= DCB_PFC_UP_ATTR_7; i++) {
+		if (!getall && !data[i])
+			continue;
+
+		netdev->dcbnl_ops->getpfccfg(netdev, i - DCB_PFC_UP_ATTR_0,
+		                             &value);
+		ret = nla_put_u8(dcbnl_skb, i, value);
+
+		if (ret) {
+			nla_nest_cancel(dcbnl_skb, nest);
+			goto err;
+		}
+	}
+	nla_nest_end(dcbnl_skb, nest);
+
+	nlmsg_end(dcbnl_skb, nlh);
+
+	ret = rtnl_unicast(dcbnl_skb, &init_net, pid);
+	if (ret)
+		goto err;
+
+	return 0;
+nlmsg_failure:
+err:
+	kfree(dcbnl_skb);
+err_out:
+	return -EINVAL;
+}
+
+static int dcbnl_getperm_hwaddr(struct net_device *netdev, struct nlattr **tb,
+                                u32 pid, u32 seq, u16 flags)
+{
+	struct sk_buff *dcbnl_skb;
+	struct nlmsghdr *nlh;
+	struct dcbmsg *dcb;
+	u8 perm_addr[MAX_ADDR_LEN];
+	int ret = -EINVAL;
+
+	if (!netdev->dcbnl_ops->getpermhwaddr)
+		return ret;
+
+	dcbnl_skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+	if (!dcbnl_skb)
+		goto err_out;
+
+	nlh = NLMSG_NEW(dcbnl_skb, pid, seq, RTM_GETDCB, sizeof(*dcb), flags);
+
+	dcb = NLMSG_DATA(nlh);
+	dcb->dcb_family = AF_UNSPEC;
+	dcb->cmd = DCB_CMD_GPERM_HWADDR;
+
+	netdev->dcbnl_ops->getpermhwaddr(netdev, perm_addr);
+
+	ret = nla_put(dcbnl_skb, DCB_ATTR_PERM_HWADDR, sizeof(perm_addr),
+	              perm_addr);
+
+	nlmsg_end(dcbnl_skb, nlh);
+
+	ret = rtnl_unicast(dcbnl_skb, &init_net, pid);
+	if (ret)
+		goto err;
+
+	return 0;
+
+nlmsg_failure:
+err:
+	kfree(dcbnl_skb);
+err_out:
+	return -EINVAL;
+}
+
+static int __dcbnl_pg_getcfg(struct net_device *netdev, struct nlattr **tb,
+                             u32 pid, u32 seq, u16 flags, int dir)
+{
+	struct sk_buff *dcbnl_skb;
+	struct nlmsghdr *nlh;
+	struct dcbmsg *dcb;
+	struct nlattr *pg_nest, *param_nest, *data;
+	struct nlattr *pg_tb[DCB_PG_ATTR_MAX + 1];
+	struct nlattr *param_tb[DCB_TC_ATTR_PARAM_MAX + 1];
+	u8 prio, pgid, tc_pct, up_map;
+	int ret  = -EINVAL;
+	int getall = 0;
+	int i;
+
+	if (!tb[DCB_ATTR_PG_CFG] ||
+	    !netdev->dcbnl_ops->getpgtccfgtx ||
+	    !netdev->dcbnl_ops->getpgtccfgrx ||
+	    !netdev->dcbnl_ops->getpgbwgcfgtx ||
+	    !netdev->dcbnl_ops->getpgbwgcfgrx)
+		return ret;
+
+	ret = nla_parse_nested(pg_tb, DCB_PG_ATTR_MAX,
+	                       tb[DCB_ATTR_PG_CFG], dcbnl_pg_nest);
+
+	if (ret)
+		goto err_out;
+
+	dcbnl_skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+	if (!dcbnl_skb)
+		goto err_out;
+
+	nlh = NLMSG_NEW(dcbnl_skb, pid, seq, RTM_GETDCB, sizeof(*dcb), flags);
+
+	dcb = NLMSG_DATA(nlh);
+	dcb->dcb_family = AF_UNSPEC;
+	dcb->cmd = (dir) ? DCB_CMD_PGRX_GCFG : DCB_CMD_PGTX_GCFG;
+
+	pg_nest = nla_nest_start(dcbnl_skb, DCB_ATTR_PG_CFG);
+	if (!pg_nest)
+		goto err;
+
+	if (pg_tb[DCB_PG_ATTR_TC_ALL])
+		getall = 1;
+
+	for (i = DCB_PG_ATTR_TC_0; i <= DCB_PG_ATTR_TC_7; i++) {
+		if (!getall && !pg_tb[i])
+			continue;
+
+		if (pg_tb[DCB_PG_ATTR_TC_ALL])
+			data = pg_tb[DCB_PG_ATTR_TC_ALL];
+		else
+			data = pg_tb[i];
+		ret = nla_parse_nested(param_tb, DCB_TC_ATTR_PARAM_MAX,
+				       data, dcbnl_tc_param_nest);
+		if (ret)
+			goto err_pg;
+
+		param_nest = nla_nest_start(dcbnl_skb, i);
+		if (!param_nest)
+			goto err_pg;
+
+		pgid = DCB_ATTR_VALUE_UNDEFINED;
+		prio = DCB_ATTR_VALUE_UNDEFINED;
+		tc_pct = DCB_ATTR_VALUE_UNDEFINED;
+		up_map = DCB_ATTR_VALUE_UNDEFINED;
+
+		if (dir) {
+			/* Rx */
+			netdev->dcbnl_ops->getpgtccfgrx(netdev,
+						i - DCB_PG_ATTR_TC_0, &prio,
+						&pgid, &tc_pct, &up_map);
+		} else {
+			/* Tx */
+			netdev->dcbnl_ops->getpgtccfgtx(netdev,
+						i - DCB_PG_ATTR_TC_0, &prio,
+						&pgid, &tc_pct, &up_map);
+		}
+
+		if (param_tb[DCB_TC_ATTR_PARAM_PGID] ||
+		    param_tb[DCB_TC_ATTR_PARAM_ALL]) {
+			ret = nla_put_u8(dcbnl_skb,
+			                 DCB_TC_ATTR_PARAM_PGID, pgid);
+			if (ret)
+				goto err_param;
+		}
+		if (param_tb[DCB_TC_ATTR_PARAM_UP_MAPPING] ||
+		    param_tb[DCB_TC_ATTR_PARAM_ALL]) {
+			ret = nla_put_u8(dcbnl_skb,
+			                 DCB_TC_ATTR_PARAM_UP_MAPPING, up_map);
+			if (ret)
+				goto err_param;
+		}
+		if (param_tb[DCB_TC_ATTR_PARAM_STRICT_PRIO] ||
+		    param_tb[DCB_TC_ATTR_PARAM_ALL]) {
+			ret = nla_put_u8(dcbnl_skb,
+			                 DCB_TC_ATTR_PARAM_STRICT_PRIO, prio);
+			if (ret)
+				goto err_param;
+		}
+		if (param_tb[DCB_TC_ATTR_PARAM_BW_PCT] ||
+		    param_tb[DCB_TC_ATTR_PARAM_ALL]) {
+			ret = nla_put_u8(dcbnl_skb, DCB_TC_ATTR_PARAM_BW_PCT,
+			                 tc_pct);
+			if (ret)
+				goto err_param;
+		}
+		nla_nest_end(dcbnl_skb, param_nest);
+	}
+
+	if (pg_tb[DCB_PG_ATTR_BW_ID_ALL])
+		getall = 1;
+	else
+		getall = 0;
+
+	for (i = DCB_PG_ATTR_BW_ID_0; i <= DCB_PG_ATTR_BW_ID_7; i++) {
+		if (!getall && !pg_tb[i])
+			continue;
+
+		tc_pct = DCB_ATTR_VALUE_UNDEFINED;
+
+		if (dir) {
+			/* Rx */
+			netdev->dcbnl_ops->getpgbwgcfgrx(netdev,
+					i - DCB_PG_ATTR_BW_ID_0, &tc_pct);
+		} else {
+			/* Tx */
+			netdev->dcbnl_ops->getpgbwgcfgtx(netdev,
+					i - DCB_PG_ATTR_BW_ID_0, &tc_pct);
+		}
+		ret = nla_put_u8(dcbnl_skb, i, tc_pct);
+
+		if (ret)
+			goto err_pg;
+	}
+
+	nla_nest_end(dcbnl_skb, pg_nest);
+
+	nlmsg_end(dcbnl_skb, nlh);
+
+	ret = rtnl_unicast(dcbnl_skb, &init_net, pid);
+	if (ret)
+		goto err;
+
+	return 0;
+
+err_param:
+	nla_nest_cancel(dcbnl_skb, param_nest);
+err_pg:
+	nla_nest_cancel(dcbnl_skb, pg_nest);
+nlmsg_failure:
+err:
+	kfree(dcbnl_skb);
+err_out:
+	ret  = -EINVAL;
+	return ret;
+}
+
+static int dcbnl_pgtx_getcfg(struct net_device *netdev, struct nlattr **tb,
+                             u32 pid, u32 seq, u16 flags)
+{
+	return __dcbnl_pg_getcfg(netdev, tb, pid, seq, flags, 0);
+}
+
+static int dcbnl_pgrx_getcfg(struct net_device *netdev, struct nlattr **tb,
+                             u32 pid, u32 seq, u16 flags)
+{
+	return __dcbnl_pg_getcfg(netdev, tb, pid, seq, flags, 1);
+}
+
+static int dcbnl_setstate(struct net_device *netdev, struct nlattr **tb,
+                          u32 pid, u32 seq, u16 flags)
+{
+	int ret = -EINVAL;
+	u8 value;
+
+	if (!tb[DCB_ATTR_STATE] || !netdev->dcbnl_ops->setstate)
+		return ret;
+
+	value = nla_get_u8(tb[DCB_ATTR_STATE]);
+
+	netdev->dcbnl_ops->setstate(netdev, value);
+
+	ret = dcbnl_reply(0, RTM_SETDCB, DCB_CMD_SSTATE, DCB_ATTR_STATE,
+	                  pid, seq, flags);
+
+	return ret;
+}
+
+static int dcbnl_setpfccfg(struct net_device *netdev, struct nlattr **tb,
+                           u32 pid, u32 seq, u16 flags)
+{
+	struct nlattr *data[DCB_PFC_UP_ATTR_MAX + 1];
+	int i;
+	int ret = -EINVAL;
+	u8 value;
+
+	if (!tb[DCB_ATTR_PFC_CFG] || !netdev->dcbnl_ops->setpfccfg)
+		return ret;
+
+	ret = nla_parse_nested(data, DCB_PFC_UP_ATTR_MAX,
+	                       tb[DCB_ATTR_PFC_CFG],
+	                       dcbnl_pfc_up_nest);
+	if (ret)
+		goto err;
+
+	for (i = DCB_PFC_UP_ATTR_0; i <= DCB_PFC_UP_ATTR_7; i++) {
+		if (data[i] == NULL)
+			continue;
+		value = nla_get_u8(data[i]);
+		netdev->dcbnl_ops->setpfccfg(netdev,
+			data[i]->nla_type - DCB_PFC_UP_ATTR_0, value);
+	}
+
+	ret = dcbnl_reply(0, RTM_SETDCB, DCB_CMD_PFC_SCFG, DCB_ATTR_PFC_CFG,
+	                  pid, seq, flags);
+err:
+	return ret;
+}
+
+static int dcbnl_setall(struct net_device *netdev, struct nlattr **tb,
+                        u32 pid, u32 seq, u16 flags)
+{
+	int ret = -EINVAL;
+
+	if (!tb[DCB_ATTR_SET_ALL] || !netdev->dcbnl_ops->setall)
+		return ret;
+
+	ret = dcbnl_reply(netdev->dcbnl_ops->setall(netdev), RTM_SETDCB,
+	                  DCB_CMD_SET_ALL, DCB_ATTR_SET_ALL, pid, seq, flags);
+
+	return ret;
+}
+
+static int __dcbnl_pg_setcfg(struct net_device *netdev, struct nlattr **tb,
+                             u32 pid, u32 seq, u16 flags, int dir)
+{
+	struct nlattr *pg_tb[DCB_PG_ATTR_MAX + 1];
+	struct nlattr *param_tb[DCB_TC_ATTR_PARAM_MAX + 1];
+	int ret = -EINVAL;
+	int i;
+	u8 pgid;
+	u8 up_map;
+	u8 prio;
+	u8 tc_pct;
+
+	if (!tb[DCB_ATTR_PG_CFG] ||
+	    !netdev->dcbnl_ops->setpgtccfgtx ||
+	    !netdev->dcbnl_ops->setpgtccfgrx ||
+	    !netdev->dcbnl_ops->setpgbwgcfgtx ||
+	    !netdev->dcbnl_ops->setpgbwgcfgrx)
+		return ret;
+
+	ret = nla_parse_nested(pg_tb, DCB_PG_ATTR_MAX,
+	                       tb[DCB_ATTR_PG_CFG], dcbnl_pg_nest);
+	if (ret)
+		goto err;
+
+	for (i = DCB_PG_ATTR_TC_0; i <= DCB_PG_ATTR_TC_7; i++) {
+		if (!pg_tb[i])
+			continue;
+
+		ret = nla_parse_nested(param_tb, DCB_TC_ATTR_PARAM_MAX,
+		                       pg_tb[i], dcbnl_tc_param_nest);
+		if (ret)
+			goto err;
+
+		pgid = DCB_ATTR_VALUE_UNDEFINED;
+		prio = DCB_ATTR_VALUE_UNDEFINED;
+		tc_pct = DCB_ATTR_VALUE_UNDEFINED;
+		up_map = DCB_ATTR_VALUE_UNDEFINED;
+
+		if (param_tb[DCB_TC_ATTR_PARAM_STRICT_PRIO])
+			prio =
+			    nla_get_u8(param_tb[DCB_TC_ATTR_PARAM_STRICT_PRIO]);
+
+		if (param_tb[DCB_TC_ATTR_PARAM_PGID])
+			pgid = nla_get_u8(param_tb[DCB_TC_ATTR_PARAM_PGID]);
+
+		if (param_tb[DCB_TC_ATTR_PARAM_BW_PCT])
+			tc_pct = nla_get_u8(param_tb[DCB_TC_ATTR_PARAM_BW_PCT]);
+
+		if (param_tb[DCB_TC_ATTR_PARAM_UP_MAPPING])
+			up_map =
+			     nla_get_u8(param_tb[DCB_TC_ATTR_PARAM_UP_MAPPING]);
+
+		/* dir: Tx = 0, Rx = 1 */
+		if (dir) {
+			/* Rx */
+			netdev->dcbnl_ops->setpgtccfgrx(netdev,
+				i - DCB_PG_ATTR_TC_0,
+				prio, pgid, tc_pct, up_map);
+		} else {
+			/* Tx */
+			netdev->dcbnl_ops->setpgtccfgtx(netdev,
+				i - DCB_PG_ATTR_TC_0,
+				prio, pgid, tc_pct, up_map);
+		}
+	}
+
+	for (i = DCB_PG_ATTR_BW_ID_0; i <= DCB_PG_ATTR_BW_ID_7; i++) {
+		if (!pg_tb[i])
+			continue;
+
+		tc_pct = nla_get_u8(pg_tb[i]);
+
+		/* dir: Tx = 0, Rx = 1 */
+		if (dir) {
+			/* Rx */
+			netdev->dcbnl_ops->setpgbwgcfgrx(netdev,
+					 i - DCB_PG_ATTR_BW_ID_0, tc_pct);
+		} else {
+			/* Tx */
+			netdev->dcbnl_ops->setpgbwgcfgtx(netdev,
+					 i - DCB_PG_ATTR_BW_ID_0, tc_pct);
+		}
+	}
+
+	ret = dcbnl_reply(0, RTM_SETDCB,
+			  (dir ? DCB_CMD_PGRX_SCFG : DCB_CMD_PGTX_SCFG),
+			  DCB_ATTR_PG_CFG, pid, seq, flags);
+
+err:
+	return ret;
+}
+
+static int dcbnl_pgtx_setcfg(struct net_device *netdev, struct nlattr **tb,
+                             u32 pid, u32 seq, u16 flags)
+{
+	return __dcbnl_pg_setcfg(netdev, tb, pid, seq, flags, 0);
+}
+
+static int dcbnl_pgrx_setcfg(struct net_device *netdev, struct nlattr **tb,
+                             u32 pid, u32 seq, u16 flags)
+{
+	return __dcbnl_pg_setcfg(netdev, tb, pid, seq, flags, 1);
+}
+
+static int dcb_doit(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
+{
+	struct net *net = sock_net(skb->sk);
+	struct net_device *netdev;
+	struct dcbmsg  *dcb = (struct dcbmsg *)NLMSG_DATA(nlh);
+	struct nlattr *tb[DCB_ATTR_MAX + 1];
+	u32 pid = skb ? NETLINK_CB(skb).pid : 0;
+	int ret = -EINVAL;
+
+	if (net != &init_net)
+		return -EINVAL;
+
+	ret = nlmsg_parse(nlh, sizeof(*dcb), tb, DCB_ATTR_MAX,
+			  dcbnl_rtnl_policy);
+	if (ret < 0)
+		return ret;
+
+	if (!tb[DCB_ATTR_IFNAME])
+		return -EINVAL;
+
+	netdev = dev_get_by_name(&init_net, nla_data(tb[DCB_ATTR_IFNAME]));
+	if (!netdev)
+		return -EINVAL;
+
+	if (!netdev->dcbnl_ops)
+		goto errout;
+
+	switch (dcb->cmd) {
+	case DCB_CMD_GSTATE:
+		ret = dcbnl_getstate(netdev, tb, pid, nlh->nlmsg_seq,
+		                     nlh->nlmsg_flags);
+		goto out;
+	case DCB_CMD_PFC_GCFG:
+		ret = dcbnl_getpfccfg(netdev, tb, pid, nlh->nlmsg_seq,
+		                      nlh->nlmsg_flags);
+		goto out;
+	case DCB_CMD_GPERM_HWADDR:
+		ret = dcbnl_getperm_hwaddr(netdev, tb, pid, nlh->nlmsg_seq,
+		                           nlh->nlmsg_flags);
+		goto out;
+	case DCB_CMD_PGTX_GCFG:
+		ret = dcbnl_pgtx_getcfg(netdev, tb, pid, nlh->nlmsg_seq,
+		                        nlh->nlmsg_flags);
+		goto out;
+	case DCB_CMD_PGRX_GCFG:
+		ret = dcbnl_pgrx_getcfg(netdev, tb, pid, nlh->nlmsg_seq,
+		                        nlh->nlmsg_flags);
+		goto out;
+	case DCB_CMD_SSTATE:
+		ret = dcbnl_setstate(netdev, tb, pid, nlh->nlmsg_seq,
+		                     nlh->nlmsg_flags);
+		goto out;
+	case DCB_CMD_PFC_SCFG:
+		ret = dcbnl_setpfccfg(netdev, tb, pid, nlh->nlmsg_seq,
+		                      nlh->nlmsg_flags);
+		goto out;
+
+	case DCB_CMD_SET_ALL:
+		ret = dcbnl_setall(netdev, tb, pid, nlh->nlmsg_seq,
+		                   nlh->nlmsg_flags);
+		goto out;
+	case DCB_CMD_PGTX_SCFG:
+		ret = dcbnl_pgtx_setcfg(netdev, tb, pid, nlh->nlmsg_seq,
+		                        nlh->nlmsg_flags);
+		goto out;
+	case DCB_CMD_PGRX_SCFG:
+		ret = dcbnl_pgrx_setcfg(netdev, tb, pid, nlh->nlmsg_seq,
+		                        nlh->nlmsg_flags);
+		goto out;
+	default:
+		goto errout;
+	}
+errout:
+	ret = -EINVAL;
+out:
+	dev_put(netdev);
+	return ret;
+}
+
+static int __init dcbnl_init(void)
+{
+	rtnl_register(PF_UNSPEC, RTM_GETDCB, dcb_doit, NULL);
+	rtnl_register(PF_UNSPEC, RTM_SETDCB, dcb_doit, NULL);
+
+	return 0;
+}
+module_init(dcbnl_init);
+
+static void __exit dcbnl_exit(void)
+{
+	rtnl_unregister(PF_UNSPEC, RTM_GETDCB);
+	rtnl_unregister(PF_UNSPEC, RTM_SETDCB);
+}
+module_exit(dcbnl_exit);
+
+

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ