lists.openwall.net | lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC | |
Open Source and information security mailing list archives
| ||
|
Date: Thu, 29 May 2008 15:26:54 +0530 From: Ramachandra K <ramachandra.kuchimanchi@...gic.com> To: rdreier@...co.com, general@...ts.openfabrics.org, netdev@...r.kernel.org Cc: poornima.kamath@...gic.com, amar.mudrankit@...gic.com Subject: [PATCH v3 06/13] QLogic VNIC: IB core stack interaction From: Ramachandra K <ramachandra.kuchimanchi@...gic.com> The patch implements the interaction of the QLogic VNIC driver with the underlying core infiniband stack. Signed-off-by: Ramachandra K <ramachandra.kuchimanchi@...gic.com> Signed-off-by: Poornima Kamath <poornima.kamath@...gic.com> Signed-off-by: Amar Mudrankit <amar.mudrankit@...gic.com> --- drivers/infiniband/ulp/qlgc_vnic/vnic_ib.c | 1043 ++++++++++++++++++++++++++++ drivers/infiniband/ulp/qlgc_vnic/vnic_ib.h | 206 ++++++ 2 files changed, 1249 insertions(+), 0 deletions(-) create mode 100644 drivers/infiniband/ulp/qlgc_vnic/vnic_ib.c create mode 100644 drivers/infiniband/ulp/qlgc_vnic/vnic_ib.h diff --git a/drivers/infiniband/ulp/qlgc_vnic/vnic_ib.c b/drivers/infiniband/ulp/qlgc_vnic/vnic_ib.c new file mode 100644 index 0000000..c43e69e --- /dev/null +++ b/drivers/infiniband/ulp/qlgc_vnic/vnic_ib.c @@ -0,0 +1,1043 @@ +/* + * Copyright (c) 2006 QLogic, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <linux/string.h> +#include <linux/random.h> +#include <linux/netdevice.h> +#include <linux/list.h> + +#include "vnic_util.h" +#include "vnic_data.h" +#include "vnic_config.h" +#include "vnic_ib.h" +#include "vnic_viport.h" +#include "vnic_sys.h" +#include "vnic_main.h" +#include "vnic_stats.h" + +static int vnic_ib_inited; +static void vnic_add_one(struct ib_device *device); +static void vnic_remove_one(struct ib_device *device); +static int vnic_defer_completion(void *ptr); + +static int vnic_ib_mc_init_qp(struct mc_data *mc_data, + struct vnic_ib_config *config, + struct ib_pd *pd, + struct viport_config *viport_config); + +static struct ib_client vnic_client = { + .name = "vnic", + .add = vnic_add_one, + .remove = vnic_remove_one +}; + +struct ib_sa_client vnic_sa_client; + +int vnic_ib_init(void) +{ + int ret = -1; + + IB_FUNCTION("vnic_ib_init()\n"); + + /* class has to be registered before + * calling ib_register_client() because, that call + * will trigger vnic_add_port() which will register + * class_device for the port with the parent class + * as vnic_class + */ + ret = class_register(&vnic_class); + if (ret) { + printk(KERN_ERR PFX "couldn't register class" + " infiniband_qlgc_vnic; error %d", ret); + goto out; + } + + ib_sa_register_client(&vnic_sa_client); + ret = ib_register_client(&vnic_client); + if (ret) { + printk(KERN_ERR PFX "couldn't register IB client;" + " error %d", ret); + goto err_ib_reg; + } + + interface_dev.dev.class = &vnic_class; + interface_dev.dev.release = vnic_release_dev; + snprintf(interface_dev.dev.bus_id, + BUS_ID_SIZE, "interfaces"); + init_completion(&interface_dev.released); + ret = device_register(&interface_dev.dev); + if (ret) { + printk(KERN_ERR PFX "couldn't register class interfaces;" + " error %d", ret); + goto err_class_dev; + } + ret = device_create_file(&interface_dev.dev, + &dev_attr_delete_vnic); + if (ret) { + printk(KERN_ERR PFX "couldn't create class file" + " 'delete_vnic'; error %d", ret); + goto err_class_file; + } + + vnic_ib_inited = 1; + + return ret; +err_class_file: + device_unregister(&interface_dev.dev); +err_class_dev: + ib_unregister_client(&vnic_client); +err_ib_reg: + ib_sa_unregister_client(&vnic_sa_client); + class_unregister(&vnic_class); +out: + return ret; +} + +static struct vnic_ib_port *vnic_add_port(struct vnic_ib_device *device, + u8 port_num) +{ + struct vnic_ib_port *port; + + port = kzalloc(sizeof *port, GFP_KERNEL); + if (!port) + return NULL; + + init_completion(&port->pdev_info.released); + port->dev = device; + port->port_num = port_num; + + port->pdev_info.dev.class = &vnic_class; + port->pdev_info.dev.parent = NULL; + port->pdev_info.dev.release = vnic_release_dev; + snprintf(port->pdev_info.dev.bus_id, BUS_ID_SIZE, + "vnic-%s-%d", device->dev->name, port_num); + + if (device_register(&port->pdev_info.dev)) + goto free_port; + + if (device_create_file(&port->pdev_info.dev, + &dev_attr_create_primary)) + goto err_class; + if (device_create_file(&port->pdev_info.dev, + &dev_attr_create_secondary)) + goto err_class; + + return port; +err_class: + device_unregister(&port->pdev_info.dev); +free_port: + kfree(port); + + return NULL; +} + +static void vnic_add_one(struct ib_device *device) +{ + struct vnic_ib_device *vnic_dev; + struct vnic_ib_port *port; + int s, e, p; + + vnic_dev = kmalloc(sizeof *vnic_dev, GFP_KERNEL); + if (!vnic_dev) + return; + + vnic_dev->dev = device; + INIT_LIST_HEAD(&vnic_dev->port_list); + + if (device->node_type == RDMA_NODE_IB_SWITCH) { + s = 0; + e = 0; + + } else { + s = 1; + e = device->phys_port_cnt; + + } + + for (p = s; p <= e; p++) { + port = vnic_add_port(vnic_dev, p); + if (port) + list_add_tail(&port->list, &vnic_dev->port_list); + } + + ib_set_client_data(device, &vnic_client, vnic_dev); + +} + +static void vnic_remove_one(struct ib_device *device) +{ + struct vnic_ib_device *vnic_dev; + struct vnic_ib_port *port, *tmp_port; + + vnic_dev = ib_get_client_data(device, &vnic_client); + list_for_each_entry_safe(port, tmp_port, + &vnic_dev->port_list, list) { + device_unregister(&port->pdev_info.dev); + /* + * wait for sysfs entries to go away, so that no new vnics + * are created + */ + wait_for_completion(&port->pdev_info.released); + kfree(port); + + } + kfree(vnic_dev); + + /* TODO Only those vnic interfaces associated with + * the HCA whose remove event is called should be freed + * Currently all the vnic interfaces are freed + */ + + while (!list_empty(&vnic_list)) { + struct vnic *vnic = + list_entry(vnic_list.next, struct vnic, list_ptrs); + vnic_free(vnic); + } + + vnic_npevent_cleanup(); + viport_cleanup(); + +} + +void vnic_ib_cleanup(void) +{ + IB_FUNCTION("vnic_ib_cleanup()\n"); + + if (!vnic_ib_inited) + return; + + device_unregister(&interface_dev.dev); + wait_for_completion(&interface_dev.released); + + ib_unregister_client(&vnic_client); + ib_sa_unregister_client(&vnic_sa_client); + class_unregister(&vnic_class); +} + +static void vnic_path_rec_completion(int status, + struct ib_sa_path_rec *pathrec, + void *context) +{ + struct vnic_ib_path_info *p = context; + p->status = status; + if (!status) + p->path = *pathrec; + + complete(&p->done); +} + +int vnic_ib_get_path(struct netpath *netpath, struct vnic *vnic) +{ + struct viport_config *config = netpath->viport->config; + int ret = 0; + + init_completion(&config->path_info.done); + IB_INFO("Using SA path rec get time out value of %d\n", + config->sa_path_rec_get_timeout); + config->path_info.path_query_id = + ib_sa_path_rec_get(&vnic_sa_client, + config->ibdev, + config->port, + &config->path_info.path, + IB_SA_PATH_REC_DGID | + IB_SA_PATH_REC_SGID | + IB_SA_PATH_REC_NUMB_PATH | + IB_SA_PATH_REC_PKEY, + config->sa_path_rec_get_timeout, + GFP_KERNEL, + vnic_path_rec_completion, + &config->path_info, + &config->path_info.path_query); + + if (config->path_info.path_query_id < 0) { + IB_ERROR("SA path record query failed; error %d\n", + config->path_info.path_query_id); + ret = config->path_info.path_query_id; + goto out; + } + + wait_for_completion(&config->path_info.done); + + if (config->path_info.status < 0) { + printk(KERN_WARNING PFX "connection not available to dgid " + "%04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x", + (int)be16_to_cpu(*(__be16 *) &config->path_info.path. + dgid.raw[0]), + (int)be16_to_cpu(*(__be16 *) &config->path_info.path. + dgid.raw[2]), + (int)be16_to_cpu(*(__be16 *) &config->path_info.path. + dgid.raw[4]), + (int)be16_to_cpu(*(__be16 *) &config->path_info.path. + dgid.raw[6]), + (int)be16_to_cpu(*(__be16 *) &config->path_info.path. + dgid.raw[8]), + (int)be16_to_cpu(*(__be16 *) &config->path_info.path. + dgid.raw[10]), + (int)be16_to_cpu(*(__be16 *) &config->path_info.path. + dgid.raw[12]), + (int)be16_to_cpu(*(__be16 *) &config->path_info.path. + dgid.raw[14])); + + if (config->path_info.status == -ETIMEDOUT) + printk(KERN_INFO " path query timed out\n"); + else if (config->path_info.status == -EIO) + printk(KERN_INFO " path query sending error\n"); + else + printk(KERN_INFO " error %d\n", + config->path_info.status); + + ret = config->path_info.status; + } +out: + if (ret) + netpath_timer(netpath, vnic->config->no_path_timeout); + + return ret; +} + +static inline void vnic_ib_handle_completions(struct ib_wc *wc, + struct vnic_ib_conn *ib_conn, + u32 *comp_num, + cycles_t *comp_time) +{ + struct io *io; + + io = (struct io *)(wc->wr_id); + vnic_ib_comp_stats(ib_conn, comp_num); + if (wc->status) { + IB_INFO("completion error wc.status %d" + " wc.opcode %d vendor err 0x%x\n", + wc->status, wc->opcode, wc->vendor_err); + } else if (io) { + vnic_ib_io_stats(io, ib_conn, *comp_time); + if (io->type == RECV_UD) { + struct ud_recv_io *recv_io = + container_of(io, struct ud_recv_io, io); + recv_io->len = wc->byte_len; + } + if (io->routine) + (*io->routine) (io); + } +} + +static void ib_qp_event(struct ib_event *event, void *context) +{ + IB_ERROR("QP event %d\n", event->event); +} + +static void vnic_ib_completion(struct ib_cq *cq, void *ptr) +{ + struct vnic_ib_conn *ib_conn = ptr; + unsigned long flags; + int compl_received; + struct ib_wc wc; + cycles_t comp_time; + u32 comp_num = 0; + + /* for multicast, cm_id is NULL, so skip that test */ + if (ib_conn->cm_id && + (ib_conn->state != IB_CONN_CONNECTED)) + return; + + /* Check if completion processing is taking place in thread + * If not then process completions in this handler, + * else set compl_received if not set, to indicate that + * there are more completions to process in thread. + */ + + spin_lock_irqsave(&ib_conn->compl_received_lock, flags); + compl_received = ib_conn->compl_received; + spin_unlock_irqrestore(&ib_conn->compl_received_lock, flags); + + if (ib_conn->in_thread || compl_received) { + if (!compl_received) { + spin_lock_irqsave(&ib_conn->compl_received_lock, flags); + ib_conn->compl_received = 1; + spin_unlock_irqrestore(&ib_conn->compl_received_lock, + flags); + } + wake_up(&(ib_conn->callback_wait_queue)); + } else { + vnic_ib_note_comptime_stats(&comp_time); + vnic_ib_callback_stats(ib_conn); + ib_req_notify_cq(cq, IB_CQ_NEXT_COMP); + while (ib_poll_cq(cq, 1, &wc) > 0) { + vnic_ib_handle_completions(&wc, ib_conn, &comp_num, + &comp_time); + if (ib_conn->cm_id && + ib_conn->state != IB_CONN_CONNECTED) + break; + + /* If we get more completions than the completion limit + * defer completion to the thread + */ + if ((!ib_conn->in_thread) && + (comp_num >= ib_conn->ib_config->completion_limit)) { + ib_conn->in_thread = 1; + spin_lock_irqsave( + &ib_conn->compl_received_lock, flags); + ib_conn->compl_received = 1; + spin_unlock_irqrestore( + &ib_conn->compl_received_lock, flags); + wake_up(&(ib_conn->callback_wait_queue)); + break; + } + + } + vnic_ib_maxio_stats(ib_conn, comp_num); + } +} + +static int vnic_ib_mod_qp_to_rts(struct ib_cm_id *cm_id, + struct vnic_ib_conn *ib_conn) +{ + int attr_mask = 0; + int ret; + struct ib_qp_attr *qp_attr = NULL; + + qp_attr = kmalloc(sizeof *qp_attr, GFP_KERNEL); + if (!qp_attr) + return -ENOMEM; + + qp_attr->qp_state = IB_QPS_RTR; + + ret = ib_cm_init_qp_attr(cm_id, qp_attr, &attr_mask); + if (ret) + goto out; + + ret = ib_modify_qp(ib_conn->qp, qp_attr, attr_mask); + if (ret) + goto out; + + IB_INFO("QP RTR\n"); + + qp_attr->qp_state = IB_QPS_RTS; + + ret = ib_cm_init_qp_attr(cm_id, qp_attr, &attr_mask); + if (ret) + goto out; + + ret = ib_modify_qp(ib_conn->qp, qp_attr, attr_mask); + if (ret) + goto out; + + IB_INFO("QP RTS\n"); + + ret = ib_send_cm_rtu(cm_id, NULL, 0); + if (ret) + goto out; +out: + kfree(qp_attr); + return ret; +} + +int vnic_ib_cm_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event) +{ + struct vnic_ib_conn *ib_conn = cm_id->context; + struct viport *viport = ib_conn->viport; + int err = 0; + + switch (event->event) { + case IB_CM_REQ_ERROR: + IB_ERROR("sending CM REQ failed\n"); + err = 1; + viport->retry = 1; + break; + case IB_CM_REP_RECEIVED: + IB_INFO("CM REP recvd\n"); + if (vnic_ib_mod_qp_to_rts(cm_id, ib_conn)) + err = 1; + else { + ib_conn->state = IB_CONN_CONNECTED; + vnic_ib_connected_time_stats(ib_conn); + IB_INFO("RTU SENT\n"); + } + break; + case IB_CM_REJ_RECEIVED: + printk(KERN_ERR PFX " CM rejected control connection\n"); + if (event->param.rej_rcvd.reason == + IB_CM_REJ_INVALID_SERVICE_ID) + printk(KERN_ERR "reason: invalid service ID. " + "IOCGUID value specified may be incorrect\n"); + else + printk(KERN_ERR "reason code : 0x%x\n", + event->param.rej_rcvd.reason); + + err = 1; + viport->retry = 1; + break; + case IB_CM_MRA_RECEIVED: + IB_INFO("CM MRA received\n"); + break; + + case IB_CM_DREP_RECEIVED: + IB_INFO("CM DREP recvd\n"); + ib_conn->state = IB_CONN_DISCONNECTED; + break; + + case IB_CM_TIMEWAIT_EXIT: + IB_ERROR("CM timewait exit\n"); + err = 1; + break; + + default: + IB_INFO("unhandled CM event %d\n", event->event); + break; + + } + + if (err) { + ib_conn->state = IB_CONN_DISCONNECTED; + viport_failure(viport); + } + + viport_kick(viport); + return 0; +} + + +int vnic_ib_cm_connect(struct vnic_ib_conn *ib_conn) +{ + struct ib_cm_req_param *req = NULL; + struct viport *viport; + int ret = -1; + + if (!vnic_ib_conn_initted(ib_conn)) { + IB_ERROR("IB Connection out of state for CM connect (%d)\n", + ib_conn->state); + return -EINVAL; + } + + vnic_ib_conntime_stats(ib_conn); + req = kzalloc(sizeof *req, GFP_KERNEL); + if (!req) + return -ENOMEM; + + viport = ib_conn->viport; + + req->primary_path = &viport->config->path_info.path; + req->alternate_path = NULL; + req->qp_num = ib_conn->qp->qp_num; + req->qp_type = ib_conn->qp->qp_type; + req->service_id = ib_conn->ib_config->service_id; + req->private_data = &ib_conn->ib_config->conn_data; + req->private_data_len = sizeof(struct vnic_connection_data); + req->flow_control = 1; + + get_random_bytes(&req->starting_psn, 4); + req->starting_psn &= 0xffffff; + + /* + * Both responder_resources and initiator_depth are set to zero + * as we do not need RDMA read. + * + * They also must be set to zero, otherwise data connections + * are rejected by VEx. + */ + req->responder_resources = 0; + req->initiator_depth = 0; + req->remote_cm_response_timeout = 20; + req->local_cm_response_timeout = 20; + req->retry_count = ib_conn->ib_config->retry_count; + req->rnr_retry_count = ib_conn->ib_config->rnr_retry_count; + req->max_cm_retries = 15; + + ib_conn->state = IB_CONN_CONNECTING; + + ret = ib_send_cm_req(ib_conn->cm_id, req); + + kfree(req); + + if (ret) { + IB_ERROR("CM REQ sending failed; error %d \n", ret); + ib_conn->state = IB_CONN_DISCONNECTED; + } + + return ret; +} + +static int vnic_ib_init_qp(struct vnic_ib_conn *ib_conn, + struct vnic_ib_config *config, + struct ib_pd *pd, + struct viport_config *viport_config) +{ + struct ib_qp_init_attr *init_attr; + struct ib_qp_attr *attr; + int ret; + + init_attr = kzalloc(sizeof *init_attr, GFP_KERNEL); + if (!init_attr) + return -ENOMEM; + + init_attr->event_handler = ib_qp_event; + init_attr->cap.max_send_wr = config->num_sends; + init_attr->cap.max_recv_wr = config->num_recvs; + init_attr->cap.max_recv_sge = config->recv_scatter; + init_attr->cap.max_send_sge = config->send_gather; + init_attr->sq_sig_type = IB_SIGNAL_ALL_WR; + init_attr->qp_type = IB_QPT_RC; + init_attr->send_cq = ib_conn->cq; + init_attr->recv_cq = ib_conn->cq; + + ib_conn->qp = ib_create_qp(pd, init_attr); + + if (IS_ERR(ib_conn->qp)) { + ret = -1; + IB_ERROR("could not create QP\n"); + goto free_init_attr; + } + + attr = kmalloc(sizeof *attr, GFP_KERNEL); + if (!attr) { + ret = -ENOMEM; + goto destroy_qp; + } + + ret = ib_find_pkey(viport_config->ibdev, viport_config->port, + be16_to_cpu(viport_config->path_info.path.pkey), + &attr->pkey_index); + if (ret) { + printk(KERN_WARNING PFX "ib_find_pkey() failed; " + "error %d\n", ret); + goto freeattr; + } + + attr->qp_state = IB_QPS_INIT; + attr->qp_access_flags = IB_ACCESS_REMOTE_WRITE; + attr->port_num = viport_config->port; + + ret = ib_modify_qp(ib_conn->qp, attr, + IB_QP_STATE | + IB_QP_PKEY_INDEX | + IB_QP_ACCESS_FLAGS | IB_QP_PORT); + if (ret) { + printk(KERN_WARNING PFX "could not modify QP; error %d \n", + ret); + goto freeattr; + } + + kfree(attr); + kfree(init_attr); + return ret; + +freeattr: + kfree(attr); +destroy_qp: + ib_destroy_qp(ib_conn->qp); +free_init_attr: + kfree(init_attr); + return ret; +} + +int vnic_ib_conn_init(struct vnic_ib_conn *ib_conn, struct viport *viport, + struct ib_pd *pd, struct vnic_ib_config *config) +{ + struct viport_config *viport_config = viport->config; + int ret = -1; + unsigned int cq_size = config->num_sends + config->num_recvs; + + + if (!vnic_ib_conn_uninitted(ib_conn)) { + IB_ERROR("IB Connection out of state for init (%d)\n", + ib_conn->state); + return -EINVAL; + } + + ib_conn->cq = ib_create_cq(viport_config->ibdev, vnic_ib_completion, +#ifdef BUILD_FOR_OFED_1_2 + NULL, ib_conn, cq_size); +#else + NULL, ib_conn, cq_size, 0); +#endif + if (IS_ERR(ib_conn->cq)) { + IB_ERROR("could not create CQ\n"); + goto out; + } + + IB_INFO("cq created %p %d\n", ib_conn->cq, cq_size); + ib_req_notify_cq(ib_conn->cq, IB_CQ_NEXT_COMP); + init_waitqueue_head(&(ib_conn->callback_wait_queue)); + init_completion(&(ib_conn->callback_thread_exit)); + + spin_lock_init(&ib_conn->compl_received_lock); + + ib_conn->callback_thread = kthread_run(vnic_defer_completion, ib_conn, + "qlgc_vnic_def_compl"); + if (IS_ERR(ib_conn->callback_thread)) { + IB_ERROR("Could not create vnic_callback_thread;" + " error %d\n", (int) PTR_ERR(ib_conn->callback_thread)); + ib_conn->callback_thread = NULL; + goto destroy_cq; + } + + ret = vnic_ib_init_qp(ib_conn, config, pd, viport_config); + + if (ret) + goto destroy_thread; + + spin_lock_init(&ib_conn->conn_lock); + ib_conn->state = IB_CONN_INITTED; + + return ret; + +destroy_thread: + completion_callback_cleanup(ib_conn); +destroy_cq: + ib_destroy_cq(ib_conn->cq); +out: + return ret; +} + +int vnic_ib_post_recv(struct vnic_ib_conn *ib_conn, struct io *io) +{ + cycles_t post_time; + struct ib_recv_wr *bad_wr; + int ret = -1; + unsigned long flags; + + IB_FUNCTION("vnic_ib_post_recv()\n"); + + spin_lock_irqsave(&ib_conn->conn_lock, flags); + + if (!vnic_ib_conn_initted(ib_conn) && + !vnic_ib_conn_connected(ib_conn)) { + ret = -EINVAL; + goto out; + } + + vnic_ib_pre_rcvpost_stats(ib_conn, io, &post_time); + io->type = RECV; + ret = ib_post_recv(ib_conn->qp, &io->rwr, &bad_wr); + if (ret) { + IB_ERROR("error in posting rcv wr; error %d\n", ret); + ib_conn->state = IB_CONN_ERRORED; + goto out; + } + + vnic_ib_post_rcvpost_stats(ib_conn, post_time); +out: + spin_unlock_irqrestore(&ib_conn->conn_lock, flags); + return ret; + +} + +int vnic_ib_post_send(struct vnic_ib_conn *ib_conn, struct io *io) +{ + cycles_t post_time; + unsigned long flags; + struct ib_send_wr *bad_wr; + int ret = -1; + + IB_FUNCTION("vnic_ib_post_send()\n"); + + spin_lock_irqsave(&ib_conn->conn_lock, flags); + if (!vnic_ib_conn_connected(ib_conn)) { + IB_ERROR("IB Connection out of state for" + " posting sends (%d)\n", ib_conn->state); + goto out; + } + + vnic_ib_pre_sendpost_stats(io, &post_time); + if (io->swr.opcode == IB_WR_RDMA_WRITE) + io->type = RDMA; + else + io->type = SEND; + + ret = ib_post_send(ib_conn->qp, &io->swr, &bad_wr); + if (ret) { + IB_ERROR("error in posting send wr; error %d\n", ret); + ib_conn->state = IB_CONN_ERRORED; + goto out; + } + + vnic_ib_post_sendpost_stats(ib_conn, io, post_time); +out: + spin_unlock_irqrestore(&ib_conn->conn_lock, flags); + return ret; +} + +static int vnic_defer_completion(void *ptr) +{ + struct vnic_ib_conn *ib_conn = ptr; + struct ib_wc wc; + struct ib_cq *cq = ib_conn->cq; + cycles_t comp_time; + u32 comp_num = 0; + unsigned long flags; + + while (!ib_conn->callback_thread_end) { + wait_event_interruptible(ib_conn->callback_wait_queue, + ib_conn->compl_received || + ib_conn->callback_thread_end); + ib_conn->in_thread = 1; + spin_lock_irqsave(&ib_conn->compl_received_lock, flags); + ib_conn->compl_received = 0; + spin_unlock_irqrestore(&ib_conn->compl_received_lock, flags); + if (ib_conn->cm_id && + ib_conn->state != IB_CONN_CONNECTED) + goto out_thread; + + vnic_ib_note_comptime_stats(&comp_time); + vnic_ib_callback_stats(ib_conn); + ib_req_notify_cq(cq, IB_CQ_NEXT_COMP); + while (ib_poll_cq(cq, 1, &wc) > 0) { + vnic_ib_handle_completions(&wc, ib_conn, &comp_num, + &comp_time); + if (ib_conn->cm_id && + ib_conn->state != IB_CONN_CONNECTED) + break; + } + vnic_ib_maxio_stats(ib_conn, comp_num); +out_thread: + ib_conn->in_thread = 0; + } + complete_and_exit(&(ib_conn->callback_thread_exit), 0); + return 0; +} + +void completion_callback_cleanup(struct vnic_ib_conn *ib_conn) +{ + if (ib_conn->callback_thread) { + ib_conn->callback_thread_end = 1; + wake_up(&(ib_conn->callback_wait_queue)); + wait_for_completion(&(ib_conn->callback_thread_exit)); + ib_conn->callback_thread = NULL; + } +} + +int vnic_ib_mc_init(struct mc_data *mc_data, struct viport *viport, + struct ib_pd *pd, struct vnic_ib_config *config) +{ + struct viport_config *viport_config = viport->config; + int ret = -1; + unsigned int cq_size = config->num_recvs; /* recvs only */ + + IB_FUNCTION("vnic_ib_mc_init\n"); + + mc_data->ib_conn.cq = ib_create_cq(viport_config->ibdev, vnic_ib_completion, +#ifdef BUILD_FOR_OFED_1_2 + NULL, &mc_data->ib_conn, cq_size); +#else + NULL, &mc_data->ib_conn, cq_size, 0); +#endif + if (IS_ERR(mc_data->ib_conn.cq)) { + IB_ERROR("ib_create_cq failed\n"); + goto out; + } + IB_INFO("mc cq created %p %d\n", mc_data->ib_conn.cq, cq_size); + + ret = ib_req_notify_cq(mc_data->ib_conn.cq, IB_CQ_NEXT_COMP); + if (ret) { + IB_ERROR("ib_req_notify_cq failed %x \n", ret); + goto destroy_cq; + } + + init_waitqueue_head(&(mc_data->ib_conn.callback_wait_queue)); + init_completion(&(mc_data->ib_conn.callback_thread_exit)); + + spin_lock_init(&mc_data->ib_conn.compl_received_lock); + mc_data->ib_conn.callback_thread = kthread_run(vnic_defer_completion, + &mc_data->ib_conn, + "qlgc_vnic_mc_def_compl"); + if (IS_ERR(mc_data->ib_conn.callback_thread)) { + IB_ERROR("Could not create vnic_callback_thread for MULTICAST;" + " error %d\n", + (int) PTR_ERR(mc_data->ib_conn.callback_thread)); + mc_data->ib_conn.callback_thread = NULL; + goto destroy_cq; + } + IB_INFO("callback_thread created\n"); + + ret = vnic_ib_mc_init_qp(mc_data, config, pd, viport_config); + if (ret) + goto destroy_thread; + + spin_lock_init(&mc_data->ib_conn.conn_lock); + mc_data->ib_conn.state = IB_CONN_INITTED; /* stays in this state */ + + return ret; + +destroy_thread: + completion_callback_cleanup(&mc_data->ib_conn); +destroy_cq: + ib_destroy_cq(mc_data->ib_conn.cq); + mc_data->ib_conn.cq = (struct ib_cq *)ERR_PTR(-EINVAL); +out: + return ret; +} + +static int vnic_ib_mc_init_qp(struct mc_data *mc_data, + struct vnic_ib_config *config, + struct ib_pd *pd, + struct viport_config *viport_config) +{ + struct ib_qp_init_attr *init_attr; + struct ib_qp_attr *qp_attr; + int ret; + + IB_FUNCTION("vnic_ib_mc_init_qp\n"); + + if (!mc_data->ib_conn.cq) { + IB_ERROR("cq is null\n"); + return -ENOMEM; + } + + init_attr = kzalloc(sizeof *init_attr, GFP_KERNEL); + if (!init_attr) { + IB_ERROR("failed to alloc init_attr\n"); + return -ENOMEM; + } + + init_attr->cap.max_recv_wr = config->num_recvs; + init_attr->cap.max_send_wr = 1; + init_attr->cap.max_recv_sge = 2; + init_attr->cap.max_send_sge = 1; + + /* Completion for all work requests. */ + init_attr->sq_sig_type = IB_SIGNAL_ALL_WR; + + init_attr->qp_type = IB_QPT_UD; + + init_attr->send_cq = mc_data->ib_conn.cq; + init_attr->recv_cq = mc_data->ib_conn.cq; + + IB_INFO("creating qp %d \n", config->num_recvs); + + mc_data->ib_conn.qp = ib_create_qp(pd, init_attr); + + if (IS_ERR(mc_data->ib_conn.qp)) { + ret = -1; + IB_ERROR("could not create QP\n"); + goto free_init_attr; + } + + qp_attr = kzalloc(sizeof *qp_attr, GFP_KERNEL); + if (!qp_attr) { + ret = -ENOMEM; + goto destroy_qp; + } + + qp_attr->qp_state = IB_QPS_INIT; + qp_attr->port_num = viport_config->port; + qp_attr->qkey = IOC_NUMBER(be64_to_cpu(viport_config->ioc_guid)); + qp_attr->pkey_index = 0; + /* cannot set access flags for UD qp + qp_attr->qp_access_flags = IB_ACCESS_REMOTE_WRITE; */ + + IB_INFO("port_num:%d qkey:%d pkey:%d\n", qp_attr->port_num, + qp_attr->qkey, qp_attr->pkey_index); + ret = ib_modify_qp(mc_data->ib_conn.qp, qp_attr, + IB_QP_STATE | + IB_QP_PKEY_INDEX | + IB_QP_QKEY | + + /* cannot set this for UD + IB_QP_ACCESS_FLAGS | */ + + IB_QP_PORT); + if (ret) { + IB_ERROR("ib_modify_qp to INIT failed %d \n", ret); + goto free_qp_attr; + } + + kfree(qp_attr); + kfree(init_attr); + return ret; + +free_qp_attr: + kfree(qp_attr); +destroy_qp: + ib_destroy_qp(mc_data->ib_conn.qp); + mc_data->ib_conn.qp = ERR_PTR(-EINVAL); +free_init_attr: + kfree(init_attr); + return ret; +} + +int vnic_ib_mc_mod_qp_to_rts(struct ib_qp *qp) +{ + int ret; + struct ib_qp_attr *qp_attr = NULL; + + IB_FUNCTION("vnic_ib_mc_mod_qp_to_rts\n"); + qp_attr = kmalloc(sizeof *qp_attr, GFP_KERNEL); + if (!qp_attr) + return -ENOMEM; + + memset(qp_attr, 0, sizeof *qp_attr); + qp_attr->qp_state = IB_QPS_RTR; + + ret = ib_modify_qp(qp, qp_attr, IB_QP_STATE); + if (ret) { + IB_ERROR("ib_modify_qp to RTR failed %d\n", ret); + goto out; + } + IB_INFO("MC QP RTR\n"); + + memset(qp_attr, 0, sizeof *qp_attr); + qp_attr->qp_state = IB_QPS_RTS; + qp_attr->sq_psn = 0; + + ret = ib_modify_qp(qp, qp_attr, IB_QP_STATE | IB_QP_SQ_PSN); + if (ret) { + IB_ERROR("ib_modify_qp to RTS failed %d\n", ret); + goto out; + } + IB_INFO("MC QP RTS\n"); + + return 0; + +out: + kfree(qp_attr); + return -1; +} + +int vnic_ib_mc_post_recv(struct mc_data *mc_data, struct io *io) +{ + cycles_t post_time; + struct ib_recv_wr *bad_wr; + int ret = -1; + + IB_FUNCTION("vnic_ib_mc_post_recv()\n"); + + vnic_ib_pre_rcvpost_stats(&mc_data->ib_conn, io, &post_time); + io->type = RECV_UD; + ret = ib_post_recv(mc_data->ib_conn.qp, &io->rwr, &bad_wr); + if (ret) { + IB_ERROR("error in posting rcv wr; error %d\n", ret); + goto out; + } + vnic_ib_post_rcvpost_stats(&mc_data->ib_conn, post_time); + +out: + return ret; +} diff --git a/drivers/infiniband/ulp/qlgc_vnic/vnic_ib.h b/drivers/infiniband/ulp/qlgc_vnic/vnic_ib.h new file mode 100644 index 0000000..ebf9ef5 --- /dev/null +++ b/drivers/infiniband/ulp/qlgc_vnic/vnic_ib.h @@ -0,0 +1,206 @@ +/* + * Copyright (c) 2006 QLogic, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef VNIC_IB_H_INCLUDED +#define VNIC_IB_H_INCLUDED + +#include <linux/timex.h> +#include <linux/completion.h> +#include <rdma/ib_verbs.h> +#include <rdma/ib_pack.h> +#include <rdma/ib_sa.h> +#include <rdma/ib_cm.h> + +#include "vnic_sys.h" +#include "vnic_netpath.h" +#define PFX "qlgc_vnic: " + +struct io; +typedef void (comp_routine_t) (struct io *io); + +enum vnic_ib_conn_state { + IB_CONN_UNINITTED = 0, + IB_CONN_INITTED = 1, + IB_CONN_CONNECTING = 2, + IB_CONN_CONNECTED = 3, + IB_CONN_DISCONNECTED = 4, + IB_CONN_ERRORED = 5 +}; + +struct vnic_ib_conn { + struct viport *viport; + struct vnic_ib_config *ib_config; + spinlock_t conn_lock; + enum vnic_ib_conn_state state; + struct ib_qp *qp; + struct ib_cq *cq; + struct ib_cm_id *cm_id; + int callback_thread_end; + struct task_struct *callback_thread; + wait_queue_head_t callback_wait_queue; + u32 in_thread; + u32 compl_received; + struct completion callback_thread_exit; + spinlock_t compl_received_lock; +#ifdef CONFIG_INFINIBAND_QLGC_VNIC_STATS + struct { + cycles_t connection_time; + cycles_t rdma_post_time; + u32 rdma_post_ios; + cycles_t rdma_comp_time; + u32 rdma_comp_ios; + cycles_t send_post_time; + u32 send_post_ios; + cycles_t send_comp_time; + u32 send_comp_ios; + cycles_t recv_post_time; + u32 recv_post_ios; + cycles_t recv_comp_time; + u32 recv_comp_ios; + u32 num_ios; + u32 num_callbacks; + u32 max_ios; + } statistics; +#endif /* CONFIG_INFINIBAND_QLGC_VNIC_STATS */ +}; + +struct vnic_ib_path_info { + struct ib_sa_path_rec path; + struct ib_sa_query *path_query; + int path_query_id; + int status; + struct completion done; +}; + +struct vnic_ib_device { + struct ib_device *dev; + struct list_head port_list; +}; + +struct vnic_ib_port { + struct vnic_ib_device *dev; + u8 port_num; + struct dev_info pdev_info; + struct list_head list; +}; + +struct io { + struct list_head list_ptrs; + struct viport *viport; + comp_routine_t *routine; + struct ib_recv_wr rwr; + struct ib_send_wr swr; +#ifdef CONFIG_INFINIBAND_QLGC_VNIC_STATS + cycles_t time; +#endif /* CONFIG_INFINIBAND_QLGC_VNIC_STATS */ + enum {RECV, RDMA, SEND, RECV_UD} type; +}; + +struct rdma_io { + struct io io; + struct ib_sge list[2]; + u16 index; + u16 len; + u8 *data; + dma_addr_t data_dma; + struct sk_buff *skb; + dma_addr_t skb_data_dma; + struct viport_trailer *trailer; + dma_addr_t trailer_dma; +}; + +struct send_io { + struct io io; + struct ib_sge list; + u8 *virtual_addr; +}; + +struct recv_io { + struct io io; + struct ib_sge list; + u8 *virtual_addr; +}; + +struct ud_recv_io { + struct io io; + u16 len; + dma_addr_t skb_data_dma; + struct ib_sge list[2]; /* one for grh and other for rest of pkt. */ + struct sk_buff *skb; +}; + +int vnic_ib_init(void); +void vnic_ib_cleanup(void); + +struct vnic; +int vnic_ib_get_path(struct netpath *netpath, struct vnic *vnic); +int vnic_ib_conn_init(struct vnic_ib_conn *ib_conn, struct viport *viport, + struct ib_pd *pd, struct vnic_ib_config *config); + +int vnic_ib_post_recv(struct vnic_ib_conn *ib_conn, struct io *io); +int vnic_ib_post_send(struct vnic_ib_conn *ib_conn, struct io *io); +int vnic_ib_cm_connect(struct vnic_ib_conn *ib_conn); +int vnic_ib_cm_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event); + +#define vnic_ib_conn_uninitted(ib_conn) \ + ((ib_conn)->state == IB_CONN_UNINITTED) +#define vnic_ib_conn_initted(ib_conn) \ + ((ib_conn)->state == IB_CONN_INITTED) +#define vnic_ib_conn_connecting(ib_conn) \ + ((ib_conn)->state == IB_CONN_CONNECTING) +#define vnic_ib_conn_connected(ib_conn) \ + ((ib_conn)->state == IB_CONN_CONNECTED) +#define vnic_ib_conn_disconnected(ib_conn) \ + ((ib_conn)->state == IB_CONN_DISCONNECTED) + +#define MCAST_GROUP_INVALID 0x00 /* viport failed to join or left mc group */ +#define MCAST_GROUP_JOINING 0x01 /* wait for completion */ +#define MCAST_GROUP_JOINED 0x02 /* join process completed successfully */ + +/* vnic_sa_client is used to register with sa once. It is needed to join and + * leave multicast groups. + */ +extern struct ib_sa_client vnic_sa_client; + +/* The following functions are using initialize and handle multicast + * components. + */ +struct mc_data; /* forward declaration */ +/* Initialize all necessary mc components */ +int vnic_ib_mc_init(struct mc_data *mc_data, struct viport *viport, + struct ib_pd *pd, struct vnic_ib_config *config); +/* Put multicast qp in RTS */ +int vnic_ib_mc_mod_qp_to_rts(struct ib_qp *qp); +/* Post multicast receive buffers */ +int vnic_ib_mc_post_recv(struct mc_data *mc_data, struct io *io); + +#endif /* VNIC_IB_H_INCLUDED */ -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majordomo@...r.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Powered by blists - more mailing lists