[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <1319044264-779-6-git-send-email-vipul@chelsio.com>
Date: Wed, 19 Oct 2011 22:40:59 +0530
From: Vipul Pandya <vipul@...lsio.com>
To: linux-rdma@...r.kernel.org, netdev@...r.kernel.org
Cc: roland@...estorage.com, davem@...emloft.net, divy@...lsio.com,
dm@...lsio.com, kumaras@...lsio.com, swise@...ngridcomputing.com,
Vipul Pandya <vipul@...lsio.com>
Subject: [PATCH 05/10] RDMA/cxgb4: Add DB Overflow Avoidance.
- get FULL/EMPTY/DROP events from LLD
- on FULL event, disable normal user mode DB rings.
- add modify_qp semantics to allow user processes to call into
the kernel to ring doobells without overflowing.
Add DB Full/Empty/Drop stats.
Mark queues when created indicating the doorbell state.
If we're in the middle of db overflow avoidance, then newly created
queues should start out in this mode.
Signed-off-by: Vipul Pandya <vipul@...lsio.com>
Signed-off-by: Steve Wise <swise@...ngridcomputing.com>
---
drivers/infiniband/hw/cxgb4/device.c | 84 +++++++++++++++++++++++++++++--
drivers/infiniband/hw/cxgb4/iw_cxgb4.h | 37 ++++++++++++--
drivers/infiniband/hw/cxgb4/qp.c | 51 +++++++++++++++++++-
3 files changed, 161 insertions(+), 11 deletions(-)
diff --git a/drivers/infiniband/hw/cxgb4/device.c b/drivers/infiniband/hw/cxgb4/device.c
index 8483111..9062ed9 100644
--- a/drivers/infiniband/hw/cxgb4/device.c
+++ b/drivers/infiniband/hw/cxgb4/device.c
@@ -44,6 +44,12 @@ MODULE_DESCRIPTION("Chelsio T4 RDMA Driver");
MODULE_LICENSE("Dual BSD/GPL");
MODULE_VERSION(DRV_VERSION);
+struct uld_ctx {
+ struct list_head entry;
+ struct cxgb4_lld_info lldi;
+ struct c4iw_dev *dev;
+};
+
static LIST_HEAD(uld_ctx_list);
static DEFINE_MUTEX(dev_mutex);
@@ -263,6 +269,9 @@ static int stats_show(struct seq_file *seq, void *v)
seq_printf(seq, " OCQPMEM: %10llu %10llu %10llu\n",
dev->rdev.stats.ocqp.total, dev->rdev.stats.ocqp.cur,
dev->rdev.stats.ocqp.max);
+ seq_printf(seq, " DB FULL: %10llu\n", dev->rdev.stats.db_full);
+ seq_printf(seq, " DB EMPTY: %10llu\n", dev->rdev.stats.db_empty);
+ seq_printf(seq, " DB DROP: %10llu\n", dev->rdev.stats.db_drop);
return 0;
}
@@ -283,6 +292,9 @@ static ssize_t stats_clear(struct file *file, const char __user *buf,
dev->rdev.stats.pbl.max = 0;
dev->rdev.stats.rqt.max = 0;
dev->rdev.stats.ocqp.max = 0;
+ dev->rdev.stats.db_full = 0;
+ dev->rdev.stats.db_empty = 0;
+ dev->rdev.stats.db_drop = 0;
mutex_unlock(&dev->rdev.stats.lock);
return count;
}
@@ -443,12 +455,6 @@ static void c4iw_rdev_close(struct c4iw_rdev *rdev)
c4iw_destroy_resource(&rdev->resource);
}
-struct uld_ctx {
- struct list_head entry;
- struct cxgb4_lld_info lldi;
- struct c4iw_dev *dev;
-};
-
static void c4iw_dealloc(struct uld_ctx *ctx)
{
c4iw_rdev_close(&ctx->dev->rdev);
@@ -514,6 +520,7 @@ static struct c4iw_dev *c4iw_alloc(const struct cxgb4_lld_info *infop)
idr_init(&devp->mmidr);
spin_lock_init(&devp->lock);
mutex_init(&devp->rdev.stats.lock);
+ mutex_init(&devp->db_mutex);
if (c4iw_debugfs_root) {
devp->debugfs_root = debugfs_create_dir(
@@ -659,11 +666,76 @@ static int c4iw_uld_state_change(void *handle, enum cxgb4_state new_state)
return 0;
}
+static int disable_qp_db(int id, void *p, void *data)
+{
+ struct c4iw_qp *qp = p;
+
+ t4_disable_wq_db(&qp->wq);
+ return 0;
+}
+
+static void stop_queues(struct uld_ctx *ctx)
+{
+ spin_lock_irq(&ctx->dev->lock);
+ ctx->dev->db_state = FLOW_CONTROL;
+ idr_for_each(&ctx->dev->qpidr, disable_qp_db, NULL);
+ spin_unlock_irq(&ctx->dev->lock);
+}
+
+static int enable_qp_db(int id, void *p, void *data)
+{
+ struct c4iw_qp *qp = p;
+
+ t4_enable_wq_db(&qp->wq);
+ return 0;
+}
+
+static void resume_queues(struct uld_ctx *ctx)
+{
+ spin_lock_irq(&ctx->dev->lock);
+ ctx->dev->db_state = NORMAL;
+ idr_for_each(&ctx->dev->qpidr, enable_qp_db, NULL);
+ spin_unlock_irq(&ctx->dev->lock);
+}
+
+static int c4iw_uld_control(void *handle, enum cxgb4_control control, ...)
+{
+ struct uld_ctx *ctx = handle;
+
+ switch (control) {
+ case CXGB4_CONTROL_DB_FULL:
+ stop_queues(ctx);
+ mutex_lock(&ctx->dev->rdev.stats.lock);
+ ctx->dev->rdev.stats.db_full++;
+ mutex_unlock(&ctx->dev->rdev.stats.lock);
+ break;
+ case CXGB4_CONTROL_DB_EMPTY:
+ resume_queues(ctx);
+ mutex_lock(&ctx->dev->rdev.stats.lock);
+ ctx->dev->rdev.stats.db_empty++;
+ mutex_unlock(&ctx->dev->rdev.stats.lock);
+ break;
+ case CXGB4_CONTROL_DB_DROP:
+ printk(KERN_WARNING MOD "%s: Fatal DB DROP\n",
+ pci_name(ctx->lldi.pdev));
+ mutex_lock(&ctx->dev->rdev.stats.lock);
+ ctx->dev->rdev.stats.db_drop++;
+ mutex_unlock(&ctx->dev->rdev.stats.lock);
+ break;
+ default:
+ printk(KERN_WARNING MOD "%s: unknown control cmd %u\n",
+ pci_name(ctx->lldi.pdev), control);
+ break;
+ }
+ return 0;
+}
+
static struct cxgb4_uld_info c4iw_uld_info = {
.name = DRV_NAME,
.add = c4iw_uld_add,
.rx_handler = c4iw_uld_rx_handler,
.state_change = c4iw_uld_state_change,
+ .control = c4iw_uld_control,
};
static int __init c4iw_init_module(void)
diff --git a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h
index ec7c848..1924c19 100644
--- a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h
+++ b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h
@@ -117,6 +117,9 @@ struct c4iw_stats {
struct c4iw_stat pbl;
struct c4iw_stat rqt;
struct c4iw_stat ocqp;
+ u64 db_full;
+ u64 db_empty;
+ u64 db_drop;
};
struct c4iw_rdev {
@@ -192,6 +195,12 @@ static inline int c4iw_wait_for_reply(struct c4iw_rdev *rdev,
return wr_waitp->ret;
}
+enum db_state {
+ NORMAL = 0,
+ FLOW_CONTROL = 1,
+ RECOVERY = 2
+};
+
struct c4iw_dev {
struct ib_device ibdev;
struct c4iw_rdev rdev;
@@ -200,7 +209,9 @@ struct c4iw_dev {
struct idr qpidr;
struct idr mmidr;
spinlock_t lock;
+ struct mutex db_mutex;
struct dentry *debugfs_root;
+ enum db_state db_state;
};
static inline struct c4iw_dev *to_c4iw_dev(struct ib_device *ibdev)
@@ -228,8 +239,8 @@ static inline struct c4iw_mr *get_mhp(struct c4iw_dev *rhp, u32 mmid)
return idr_find(&rhp->mmidr, mmid);
}
-static inline int insert_handle(struct c4iw_dev *rhp, struct idr *idr,
- void *handle, u32 id)
+static inline int _insert_handle(struct c4iw_dev *rhp, struct idr *idr,
+ void *handle, u32 id, int lock)
{
int ret;
int newid;
@@ -237,15 +248,29 @@ static inline int insert_handle(struct c4iw_dev *rhp, struct idr *idr,
do {
if (!idr_pre_get(idr, GFP_KERNEL))
return -ENOMEM;
- spin_lock_irq(&rhp->lock);
+ if (lock)
+ spin_lock_irq(&rhp->lock);
ret = idr_get_new_above(idr, handle, id, &newid);
BUG_ON(newid != id);
- spin_unlock_irq(&rhp->lock);
+ if (lock)
+ spin_unlock_irq(&rhp->lock);
} while (ret == -EAGAIN);
return ret;
}
+static inline int insert_handle(struct c4iw_dev *rhp, struct idr *idr,
+ void *handle, u32 id)
+{
+ return _insert_handle(rhp, idr, handle, id, 1);
+}
+
+static inline int insert_handle_nolock(struct c4iw_dev *rhp, struct idr *idr,
+ void *handle, u32 id)
+{
+ return _insert_handle(rhp, idr, handle, id, 0);
+}
+
static inline void remove_handle(struct c4iw_dev *rhp, struct idr *idr, u32 id)
{
spin_lock_irq(&rhp->lock);
@@ -369,6 +394,8 @@ struct c4iw_qp_attributes {
struct c4iw_ep *llp_stream_handle;
u8 layer_etype;
u8 ecode;
+ u16 sq_db_inc;
+ u16 rq_db_inc;
};
struct c4iw_qp {
@@ -443,6 +470,8 @@ static inline void insert_mmap(struct c4iw_ucontext *ucontext,
enum c4iw_qp_attr_mask {
C4IW_QP_ATTR_NEXT_STATE = 1 << 0,
+ C4IW_QP_ATTR_SQ_DB = 1<<1,
+ C4IW_QP_ATTR_RQ_DB = 1<<2,
C4IW_QP_ATTR_ENABLE_RDMA_READ = 1 << 7,
C4IW_QP_ATTR_ENABLE_RDMA_WRITE = 1 << 8,
C4IW_QP_ATTR_ENABLE_RDMA_BIND = 1 << 9,
diff --git a/drivers/infiniband/hw/cxgb4/qp.c b/drivers/infiniband/hw/cxgb4/qp.c
index 74df98e..36fc94d 100644
--- a/drivers/infiniband/hw/cxgb4/qp.c
+++ b/drivers/infiniband/hw/cxgb4/qp.c
@@ -34,6 +34,10 @@
#include "iw_cxgb4.h"
+static int db_delay_usecs = 1;
+module_param(db_delay_usecs, int, 0644);
+MODULE_PARM_DESC(db_delay_usecs, "Usecs to delay awaiting db fifo to drain");
+
static int ocqp_support = 1;
module_param(ocqp_support, int, 0644);
MODULE_PARM_DESC(ocqp_support, "Support on-chip SQs (default=1)");
@@ -1117,6 +1121,29 @@ out:
return ret;
}
+/*
+ * Called by the library when the qp has user dbs disabled due to
+ * a DB_FULL condition. This function will single-thread all user
+ * DB rings to avoid overflowing the hw db-fifo.
+ */
+static int ring_kernel_db(struct c4iw_qp *qhp, u32 qid, u16 inc)
+{
+ int delay = db_delay_usecs;
+
+ mutex_lock(&qhp->rhp->db_mutex);
+ do {
+ if (cxgb4_dbfifo_count(qhp->rhp->rdev.lldi.ports[0], 1) < 768) {
+ writel(V_QID(qid) | V_PIDX(inc), qhp->wq.db);
+ break;
+ }
+ set_current_state(TASK_UNINTERRUPTIBLE);
+ schedule_timeout(usecs_to_jiffies(delay));
+ delay = min(delay << 1, 200000);
+ } while (1);
+ mutex_unlock(&qhp->rhp->db_mutex);
+ return 0;
+}
+
int c4iw_modify_qp(struct c4iw_dev *rhp, struct c4iw_qp *qhp,
enum c4iw_qp_attr_mask mask,
struct c4iw_qp_attributes *attrs,
@@ -1165,6 +1192,15 @@ int c4iw_modify_qp(struct c4iw_dev *rhp, struct c4iw_qp *qhp,
qhp->attr = newattr;
}
+ if (mask & C4IW_QP_ATTR_SQ_DB) {
+ ret = ring_kernel_db(qhp, qhp->wq.sq.qid, attrs->sq_db_inc);
+ goto out;
+ }
+ if (mask & C4IW_QP_ATTR_RQ_DB) {
+ ret = ring_kernel_db(qhp, qhp->wq.rq.qid, attrs->rq_db_inc);
+ goto out;
+ }
+
if (!(mask & C4IW_QP_ATTR_NEXT_STATE))
goto out;
if (qhp->attr.state == attrs->next_state)
@@ -1454,7 +1490,11 @@ struct ib_qp *c4iw_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *attrs,
init_waitqueue_head(&qhp->wait);
atomic_set(&qhp->refcnt, 1);
- ret = insert_handle(rhp, &rhp->qpidr, qhp, qhp->wq.sq.qid);
+ spin_lock_irq(&rhp->lock);
+ if (rhp->db_state != NORMAL)
+ t4_disable_wq_db(&qhp->wq);
+ ret = insert_handle_nolock(rhp, &rhp->qpidr, qhp, qhp->wq.sq.qid);
+ spin_unlock_irq(&rhp->lock);
if (ret)
goto err2;
@@ -1598,6 +1638,15 @@ int c4iw_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
C4IW_QP_ATTR_ENABLE_RDMA_WRITE |
C4IW_QP_ATTR_ENABLE_RDMA_BIND) : 0;
+ /*
+ * Use SQ_PSN and RQ_PSN to pass in IDX_INC values for
+ * ringing the queue db when we're in DB_FULL mode.
+ */
+ attrs.sq_db_inc = attr->sq_psn;
+ attrs.rq_db_inc = attr->rq_psn;
+ mask |= (attr_mask & IB_QP_SQ_PSN) ? C4IW_QP_ATTR_SQ_DB : 0;
+ mask |= (attr_mask & IB_QP_RQ_PSN) ? C4IW_QP_ATTR_RQ_DB : 0;
+
return c4iw_modify_qp(rhp, qhp, mask, &attrs, 0);
}
--
1.7.1
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Powered by blists - more mailing lists