lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite for Android: free password hash cracker in your pocket
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date:	Tue, 29 Sep 2015 18:16:25 -0700
From:	Ashutosh Dixit <ashutosh.dixit@...el.com>
To:	Greg Kroah-Hartman <gregkh@...uxfoundation.org>,
	linux-kernel@...r.kernel.org
Cc:	Arnd Bergmann <arnd@...db.de>,
	Ashutosh Dixit <ashutosh.dixit@...el.com>,
	Sudeep Dutt <sudeep.dutt@...el.com>,
	Nikhil Rao <nikhil.rao@...el.com>
Subject: [PATCH char-misc-next v2 22/22] misc: mic: SCIF RMA nodeqp and minor miscellaneous changes

From: Sudeep Dutt <sudeep.dutt@...el.com>

This patch adds the SCIF kernel node QP control messages required to
enable SCIF RMAs. Examples of such node QP control messages include
registration, unregistration, remote memory allocation requests,
remote memory unmap and SCIF remote fence requests.

The patch also updates the SCIF driver with minor changes required to
enable SCIF RMAs by adding the new files to the build, initializing
RMA specific information during SCIF endpoint creation, reserving SCIF
DMA channels, initializing SCIF RMA specific global data structures,
adding the IOCTL hooks required for SCIF RMAs and updating RMA
specific debugfs hooks.

Reviewed-by: Ashutosh Dixit <ashutosh.dixit@...el.com>
Reviewed-by: Nikhil Rao <nikhil.rao@...el.com>
Signed-off-by: Sudeep Dutt <sudeep.dutt@...el.com>
---
 drivers/misc/mic/Kconfig              |   1 +
 drivers/misc/mic/scif/Makefile        |   5 +
 drivers/misc/mic/scif/scif_api.c      |  33 +++++--
 drivers/misc/mic/scif/scif_debugfs.c  |  85 ++++++++++++++++-
 drivers/misc/mic/scif/scif_epd.c      |  26 +++---
 drivers/misc/mic/scif/scif_epd.h      |  28 ++++++
 drivers/misc/mic/scif/scif_fd.c       | 169 +++++++++++++++++++++++++++++++++-
 drivers/misc/mic/scif/scif_main.c     |  23 ++++-
 drivers/misc/mic/scif/scif_main.h     |  30 +++++-
 drivers/misc/mic/scif/scif_map.h      |  25 ++++-
 drivers/misc/mic/scif/scif_nm.c       |  10 +-
 drivers/misc/mic/scif/scif_nodeqp.c   |  65 +++++++++++--
 drivers/misc/mic/scif/scif_nodeqp.h   |  42 ++++++++-
 drivers/misc/mic/scif/scif_peer_bus.c |  14 +++
 14 files changed, 516 insertions(+), 40 deletions(-)

diff --git a/drivers/misc/mic/Kconfig b/drivers/misc/mic/Kconfig
index 1488232..60376fb 100644
--- a/drivers/misc/mic/Kconfig
+++ b/drivers/misc/mic/Kconfig
@@ -75,6 +75,7 @@ comment "SCIF Driver"
 config SCIF
 	tristate "SCIF Driver"
 	depends on 64BIT && PCI && X86 && SCIF_BUS
+	select IOMMU_IOVA
 	help
 	  This enables SCIF Driver support for the Intel Many Integrated
 	  Core (MIC) family of PCIe form factor coprocessor devices that
diff --git a/drivers/misc/mic/scif/Makefile b/drivers/misc/mic/scif/Makefile
index bf10bb7..29cfc3e 100644
--- a/drivers/misc/mic/scif/Makefile
+++ b/drivers/misc/mic/scif/Makefile
@@ -13,3 +13,8 @@ scif-objs += scif_epd.o
 scif-objs += scif_rb.o
 scif-objs += scif_nodeqp.o
 scif-objs += scif_nm.o
+scif-objs += scif_dma.o
+scif-objs += scif_fence.o
+scif-objs += scif_mmap.o
+scif-objs += scif_rma.o
+scif-objs += scif_rma_list.o
diff --git a/drivers/misc/mic/scif/scif_api.c b/drivers/misc/mic/scif/scif_api.c
index b47d56d..ddc9e4b 100644
--- a/drivers/misc/mic/scif/scif_api.c
+++ b/drivers/misc/mic/scif/scif_api.c
@@ -70,6 +70,7 @@ scif_epd_t scif_open(void)
 	mutex_init(&ep->sendlock);
 	mutex_init(&ep->recvlock);
 
+	scif_rma_ep_init(ep);
 	ep->state = SCIFEP_UNBOUND;
 	dev_dbg(scif_info.mdev.this_device,
 		"SCIFAPI open: ep %p success\n", ep);
@@ -184,8 +185,11 @@ int scif_close(scif_epd_t epd)
 
 	switch (oldstate) {
 	case SCIFEP_ZOMBIE:
+		dev_err(scif_info.mdev.this_device,
+			"SCIFAPI close: zombie state unexpected\n");
 	case SCIFEP_DISCONNECTED:
 		spin_unlock(&ep->lock);
+		scif_unregister_all_windows(epd);
 		/* Remove from the disconnected list */
 		mutex_lock(&scif_info.connlock);
 		list_for_each_safe(pos, tmpq, &scif_info.disconnected) {
@@ -207,6 +211,7 @@ int scif_close(scif_epd_t epd)
 	case SCIFEP_CLOSING:
 	{
 		spin_unlock(&ep->lock);
+		scif_unregister_all_windows(epd);
 		scif_disconnect_ep(ep);
 		break;
 	}
@@ -218,7 +223,7 @@ int scif_close(scif_epd_t epd)
 		struct scif_endpt *aep;
 
 		spin_unlock(&ep->lock);
-		spin_lock(&scif_info.eplock);
+		mutex_lock(&scif_info.eplock);
 
 		/* remove from listen list */
 		list_for_each_safe(pos, tmpq, &scif_info.listen) {
@@ -240,7 +245,7 @@ int scif_close(scif_epd_t epd)
 					break;
 				}
 			}
-			spin_unlock(&scif_info.eplock);
+			mutex_unlock(&scif_info.eplock);
 			mutex_lock(&scif_info.connlock);
 			list_for_each_safe(pos, tmpq, &scif_info.connected) {
 				tmpep = list_entry(pos,
@@ -260,13 +265,13 @@ int scif_close(scif_epd_t epd)
 			}
 			mutex_unlock(&scif_info.connlock);
 			scif_teardown_ep(aep);
-			spin_lock(&scif_info.eplock);
+			mutex_lock(&scif_info.eplock);
 			scif_add_epd_to_zombie_list(aep, SCIF_EPLOCK_HELD);
 			ep->acceptcnt--;
 		}
 
 		spin_lock(&ep->lock);
-		spin_unlock(&scif_info.eplock);
+		mutex_unlock(&scif_info.eplock);
 
 		/* Remove and reject any pending connection requests. */
 		while (ep->conreqcnt) {
@@ -428,9 +433,9 @@ int scif_listen(scif_epd_t epd, int backlog)
 	scif_teardown_ep(ep);
 	ep->qp_info.qp = NULL;
 
-	spin_lock(&scif_info.eplock);
+	mutex_lock(&scif_info.eplock);
 	list_add_tail(&ep->list, &scif_info.listen);
-	spin_unlock(&scif_info.eplock);
+	mutex_unlock(&scif_info.eplock);
 	return 0;
 }
 EXPORT_SYMBOL_GPL(scif_listen);
@@ -469,6 +474,13 @@ static int scif_conn_func(struct scif_endpt *ep)
 	struct scifmsg msg;
 	struct device *spdev;
 
+	err = scif_reserve_dma_chan(ep);
+	if (err) {
+		dev_err(&ep->remote_dev->sdev->dev,
+			"%s %d err %d\n", __func__, __LINE__, err);
+		ep->state = SCIFEP_BOUND;
+		goto connect_error_simple;
+	}
 	/* Initiate the first part of the endpoint QP setup */
 	err = scif_setup_qp_connect(ep->qp_info.qp, &ep->qp_info.qp_offset,
 				    SCIF_ENDPT_QP_SIZE, ep->remote_dev);
@@ -804,6 +816,15 @@ retry_connection:
 	cep->remote_dev = &scif_dev[peer->node];
 	cep->remote_ep = conreq->msg.payload[0];
 
+	scif_rma_ep_init(cep);
+
+	err = scif_reserve_dma_chan(cep);
+	if (err) {
+		dev_err(scif_info.mdev.this_device,
+			"%s %d err %d\n", __func__, __LINE__, err);
+		goto scif_accept_error_qpalloc;
+	}
+
 	cep->qp_info.qp = kzalloc(sizeof(*cep->qp_info.qp), GFP_KERNEL);
 	if (!cep->qp_info.qp) {
 		err = -ENOMEM;
diff --git a/drivers/misc/mic/scif/scif_debugfs.c b/drivers/misc/mic/scif/scif_debugfs.c
index 51f14e2..6884dad 100644
--- a/drivers/misc/mic/scif/scif_debugfs.c
+++ b/drivers/misc/mic/scif/scif_debugfs.c
@@ -62,10 +62,87 @@ static const struct file_operations scif_dev_ops = {
 	.release = scif_dev_test_release
 };
 
-void __init scif_init_debugfs(void)
+static void scif_display_window(struct scif_window *window, struct seq_file *s)
+{
+	int j;
+	struct scatterlist *sg;
+	scif_pinned_pages_t pin = window->pinned_pages;
+
+	seq_printf(s, "window %p type %d temp %d offset 0x%llx ",
+		   window, window->type, window->temp, window->offset);
+	seq_printf(s, "nr_pages 0x%llx nr_contig_chunks 0x%x prot %d ",
+		   window->nr_pages, window->nr_contig_chunks, window->prot);
+	seq_printf(s, "ref_count %d magic 0x%llx peer_window 0x%llx ",
+		   window->ref_count, window->magic, window->peer_window);
+	seq_printf(s, "unreg_state 0x%x va_for_temp 0x%lx\n",
+		   window->unreg_state, window->va_for_temp);
+
+	for (j = 0; j < window->nr_contig_chunks; j++)
+		seq_printf(s, "page[%d] dma_addr 0x%llx num_pages 0x%llx\n", j,
+			   window->dma_addr[j], window->num_pages[j]);
+
+	if (window->type == SCIF_WINDOW_SELF && pin)
+		for (j = 0; j < window->nr_pages; j++)
+			seq_printf(s, "page[%d] = pinned_pages %p address %p\n",
+				   j, pin->pages[j],
+				   page_address(pin->pages[j]));
+
+	if (window->st)
+		for_each_sg(window->st->sgl, sg, window->st->nents, j)
+			seq_printf(s, "sg[%d] dma addr 0x%llx length 0x%x\n",
+				   j, sg_dma_address(sg), sg_dma_len(sg));
+}
+
+static void scif_display_all_windows(struct list_head *head, struct seq_file *s)
 {
-	struct dentry *d;
+	struct list_head *item;
+	struct scif_window *window;
 
+	list_for_each(item, head) {
+		window = list_entry(item, struct scif_window, list);
+		scif_display_window(window, s);
+	}
+}
+
+static int scif_rma_test(struct seq_file *s, void *unused)
+{
+	struct scif_endpt *ep;
+	struct list_head *pos;
+
+	mutex_lock(&scif_info.connlock);
+	list_for_each(pos, &scif_info.connected) {
+		ep = list_entry(pos, struct scif_endpt, list);
+		seq_printf(s, "ep %p self windows\n", ep);
+		mutex_lock(&ep->rma_info.rma_lock);
+		scif_display_all_windows(&ep->rma_info.reg_list, s);
+		seq_printf(s, "ep %p remote windows\n", ep);
+		scif_display_all_windows(&ep->rma_info.remote_reg_list, s);
+		mutex_unlock(&ep->rma_info.rma_lock);
+	}
+	mutex_unlock(&scif_info.connlock);
+	return 0;
+}
+
+static int scif_rma_test_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, scif_rma_test, inode->i_private);
+}
+
+static int scif_rma_test_release(struct inode *inode, struct file *file)
+{
+	return single_release(inode, file);
+}
+
+static const struct file_operations scif_rma_ops = {
+	.owner   = THIS_MODULE,
+	.open    = scif_rma_test_open,
+	.read    = seq_read,
+	.llseek  = seq_lseek,
+	.release = scif_rma_test_release
+};
+
+void __init scif_init_debugfs(void)
+{
 	scif_dbg = debugfs_create_dir(KBUILD_MODNAME, NULL);
 	if (!scif_dbg) {
 		dev_err(scif_info.mdev.this_device,
@@ -73,8 +150,8 @@ void __init scif_init_debugfs(void)
 		return;
 	}
 
-	d = debugfs_create_file("scif_dev", 0444, scif_dbg,
-				NULL, &scif_dev_ops);
+	debugfs_create_file("scif_dev", 0444, scif_dbg, NULL, &scif_dev_ops);
+	debugfs_create_file("scif_rma", 0444, scif_dbg, NULL, &scif_rma_ops);
 	debugfs_create_u8("en_msg_log", 0666, scif_dbg, &scif_info.en_msg_log);
 	debugfs_create_u8("p2p_enable", 0666, scif_dbg, &scif_info.p2p_enable);
 }
diff --git a/drivers/misc/mic/scif/scif_epd.c b/drivers/misc/mic/scif/scif_epd.c
index b4bfbb0..00e5d6d 100644
--- a/drivers/misc/mic/scif/scif_epd.c
+++ b/drivers/misc/mic/scif/scif_epd.c
@@ -65,14 +65,14 @@ void scif_teardown_ep(void *endpt)
 void scif_add_epd_to_zombie_list(struct scif_endpt *ep, bool eplock_held)
 {
 	if (!eplock_held)
-		spin_lock(&scif_info.eplock);
+		mutex_lock(&scif_info.eplock);
 	spin_lock(&ep->lock);
 	ep->state = SCIFEP_ZOMBIE;
 	spin_unlock(&ep->lock);
 	list_add_tail(&ep->list, &scif_info.zombie);
 	scif_info.nr_zombies++;
 	if (!eplock_held)
-		spin_unlock(&scif_info.eplock);
+		mutex_unlock(&scif_info.eplock);
 	schedule_work(&scif_info.misc_work);
 }
 
@@ -81,16 +81,15 @@ static struct scif_endpt *scif_find_listen_ep(u16 port)
 	struct scif_endpt *ep = NULL;
 	struct list_head *pos, *tmpq;
 
-	spin_lock(&scif_info.eplock);
+	mutex_lock(&scif_info.eplock);
 	list_for_each_safe(pos, tmpq, &scif_info.listen) {
 		ep = list_entry(pos, struct scif_endpt, list);
 		if (ep->port.port == port) {
-			spin_lock(&ep->lock);
-			spin_unlock(&scif_info.eplock);
+			mutex_unlock(&scif_info.eplock);
 			return ep;
 		}
 	}
-	spin_unlock(&scif_info.eplock);
+	mutex_unlock(&scif_info.eplock);
 	return NULL;
 }
 
@@ -99,14 +98,17 @@ void scif_cleanup_zombie_epd(void)
 	struct list_head *pos, *tmpq;
 	struct scif_endpt *ep;
 
-	spin_lock(&scif_info.eplock);
+	mutex_lock(&scif_info.eplock);
 	list_for_each_safe(pos, tmpq, &scif_info.zombie) {
 		ep = list_entry(pos, struct scif_endpt, list);
-		list_del(pos);
-		scif_info.nr_zombies--;
-		kfree(ep);
+		if (scif_rma_ep_can_uninit(ep)) {
+			list_del(pos);
+			scif_info.nr_zombies--;
+			put_iova_domain(&ep->rma_info.iovad);
+			kfree(ep);
+		}
 	}
-	spin_unlock(&scif_info.eplock);
+	mutex_unlock(&scif_info.eplock);
 }
 
 /**
@@ -137,6 +139,8 @@ void scif_cnctreq(struct scif_dev *scifdev, struct scifmsg *msg)
 	if (!ep)
 		/*  Send reject due to no listening ports */
 		goto conreq_sendrej_free;
+	else
+		spin_lock(&ep->lock);
 
 	if (ep->backlog <= ep->conreqcnt) {
 		/*  Send reject due to too many pending requests */
diff --git a/drivers/misc/mic/scif/scif_epd.h b/drivers/misc/mic/scif/scif_epd.h
index 4dc4ccd..1771d7a 100644
--- a/drivers/misc/mic/scif/scif_epd.h
+++ b/drivers/misc/mic/scif/scif_epd.h
@@ -98,6 +98,8 @@ struct scif_endpt_qp_info {
  * @conn_async_state: Async connection
  * @conn_pend_wq: Used by poll while waiting for incoming connections
  * @conn_list: List of async connection requests
+ * @rma_info: Information for triggering SCIF RMA and DMA operations
+ * @mmu_list: link to list of MMU notifier cleanup work
  * @anon: anonymous file for use in kernel mode scif poll
  */
 struct scif_endpt {
@@ -129,6 +131,8 @@ struct scif_endpt {
 	int conn_async_state;
 	wait_queue_head_t conn_pend_wq;
 	struct list_head conn_list;
+	struct scif_endpt_rma_info rma_info;
+	struct list_head mmu_list;
 	struct file *anon;
 };
 
@@ -137,6 +141,27 @@ static inline int scifdev_alive(struct scif_endpt *ep)
 	return _scifdev_alive(ep->remote_dev);
 }
 
+/*
+ * scif_verify_epd:
+ * ep: SCIF endpoint
+ *
+ * Checks several generic error conditions and returns the
+ * appropriate error.
+ */
+static inline int scif_verify_epd(struct scif_endpt *ep)
+{
+	if (ep->state == SCIFEP_DISCONNECTED)
+		return -ECONNRESET;
+
+	if (ep->state != SCIFEP_CONNECTED)
+		return -ENOTCONN;
+
+	if (!scifdev_alive(ep))
+		return -ENODEV;
+
+	return 0;
+}
+
 static inline int scif_anon_inode_getfile(scif_epd_t epd)
 {
 	epd->anon = anon_inode_getfile("scif", &scif_anon_fops, NULL, 0);
@@ -177,6 +202,9 @@ void scif_clientsend(struct scif_dev *scifdev, struct scifmsg *msg);
 void scif_clientrcvd(struct scif_dev *scifdev, struct scifmsg *msg);
 int __scif_connect(scif_epd_t epd, struct scif_port_id *dst, bool non_block);
 int __scif_flush(scif_epd_t epd);
+int scif_mmap(struct vm_area_struct *vma, scif_epd_t epd);
 unsigned int __scif_pollfd(struct file *f, poll_table *wait,
 			   struct scif_endpt *ep);
+int __scif_pin_pages(void *addr, size_t len, int *out_prot,
+		     int map_flags, scif_pinned_pages_t *pages);
 #endif /* SCIF_EPD_H */
diff --git a/drivers/misc/mic/scif/scif_fd.c b/drivers/misc/mic/scif/scif_fd.c
index 24e47f7..f7e8261 100644
--- a/drivers/misc/mic/scif/scif_fd.c
+++ b/drivers/misc/mic/scif/scif_fd.c
@@ -34,6 +34,13 @@ static int scif_fdclose(struct inode *inode, struct file *f)
 	return scif_close(priv);
 }
 
+static int scif_fdmmap(struct file *f, struct vm_area_struct *vma)
+{
+	struct scif_endpt *priv = f->private_data;
+
+	return scif_mmap(vma, priv);
+}
+
 static unsigned int scif_fdpoll(struct file *f, poll_table *wait)
 {
 	struct scif_endpt *priv = f->private_data;
@@ -147,12 +154,12 @@ static long scif_fdioctl(struct file *f, unsigned int cmd, unsigned long arg)
 		 * Add to the list of user mode eps where the second half
 		 * of the accept is not yet completed.
 		 */
-		spin_lock(&scif_info.eplock);
+		mutex_lock(&scif_info.eplock);
 		list_add_tail(&((*ep)->miacceptlist), &scif_info.uaccept);
 		list_add_tail(&((*ep)->liacceptlist), &priv->li_accept);
 		(*ep)->listenep = priv;
 		priv->acceptcnt++;
-		spin_unlock(&scif_info.eplock);
+		mutex_unlock(&scif_info.eplock);
 
 		return 0;
 	}
@@ -170,7 +177,7 @@ static long scif_fdioctl(struct file *f, unsigned int cmd, unsigned long arg)
 			return -EFAULT;
 
 		/* Remove form the user accept queue */
-		spin_lock(&scif_info.eplock);
+		mutex_lock(&scif_info.eplock);
 		list_for_each_safe(pos, tmpq, &scif_info.uaccept) {
 			tmpep = list_entry(pos,
 					   struct scif_endpt, miacceptlist);
@@ -182,7 +189,7 @@ static long scif_fdioctl(struct file *f, unsigned int cmd, unsigned long arg)
 		}
 
 		if (!fep) {
-			spin_unlock(&scif_info.eplock);
+			mutex_unlock(&scif_info.eplock);
 			return -ENOENT;
 		}
 
@@ -197,7 +204,7 @@ static long scif_fdioctl(struct file *f, unsigned int cmd, unsigned long arg)
 			}
 		}
 
-		spin_unlock(&scif_info.eplock);
+		mutex_unlock(&scif_info.eplock);
 
 		/* Free the resources automatically created from the open. */
 		scif_anon_inode_fput(priv);
@@ -298,6 +305,157 @@ getnodes_err1:
 getnodes_err2:
 		return err;
 	}
+	case SCIF_REG:
+	{
+		struct scif_endpt *priv = f->private_data;
+		struct scifioctl_reg reg;
+		off_t ret;
+
+		if (copy_from_user(&reg, argp, sizeof(reg))) {
+			err = -EFAULT;
+			goto reg_err;
+		}
+		if (reg.flags & SCIF_MAP_KERNEL) {
+			err = -EINVAL;
+			goto reg_err;
+		}
+		ret = scif_register(priv, (void *)reg.addr, reg.len,
+				    reg.offset, reg.prot, reg.flags);
+		if (ret < 0) {
+			err = (int)ret;
+			goto reg_err;
+		}
+
+		if (copy_to_user(&((struct scifioctl_reg __user *)argp)
+				 ->out_offset, &ret, sizeof(reg.out_offset))) {
+			err = -EFAULT;
+			goto reg_err;
+		}
+		err = 0;
+reg_err:
+		scif_err_debug(err, "scif_register");
+		return err;
+	}
+	case SCIF_UNREG:
+	{
+		struct scif_endpt *priv = f->private_data;
+		struct scifioctl_unreg unreg;
+
+		if (copy_from_user(&unreg, argp, sizeof(unreg))) {
+			err = -EFAULT;
+			goto unreg_err;
+		}
+		err = scif_unregister(priv, unreg.offset, unreg.len);
+unreg_err:
+		scif_err_debug(err, "scif_unregister");
+		return err;
+	}
+	case SCIF_READFROM:
+	{
+		struct scif_endpt *priv = f->private_data;
+		struct scifioctl_copy copy;
+
+		if (copy_from_user(&copy, argp, sizeof(copy))) {
+			err = -EFAULT;
+			goto readfrom_err;
+		}
+		err = scif_readfrom(priv, copy.loffset, copy.len, copy.roffset,
+				    copy.flags);
+readfrom_err:
+		scif_err_debug(err, "scif_readfrom");
+		return err;
+	}
+	case SCIF_WRITETO:
+	{
+		struct scif_endpt *priv = f->private_data;
+		struct scifioctl_copy copy;
+
+		if (copy_from_user(&copy, argp, sizeof(copy))) {
+			err = -EFAULT;
+			goto writeto_err;
+		}
+		err = scif_writeto(priv, copy.loffset, copy.len, copy.roffset,
+				   copy.flags);
+writeto_err:
+		scif_err_debug(err, "scif_writeto");
+		return err;
+	}
+	case SCIF_VREADFROM:
+	{
+		struct scif_endpt *priv = f->private_data;
+		struct scifioctl_copy copy;
+
+		if (copy_from_user(&copy, argp, sizeof(copy))) {
+			err = -EFAULT;
+			goto vreadfrom_err;
+		}
+		err = scif_vreadfrom(priv, (void __force *)copy.addr, copy.len,
+				     copy.roffset, copy.flags);
+vreadfrom_err:
+		scif_err_debug(err, "scif_vreadfrom");
+		return err;
+	}
+	case SCIF_VWRITETO:
+	{
+		struct scif_endpt *priv = f->private_data;
+		struct scifioctl_copy copy;
+
+		if (copy_from_user(&copy, argp, sizeof(copy))) {
+			err = -EFAULT;
+			goto vwriteto_err;
+		}
+		err = scif_vwriteto(priv, (void __force *)copy.addr, copy.len,
+				    copy.roffset, copy.flags);
+vwriteto_err:
+		scif_err_debug(err, "scif_vwriteto");
+		return err;
+	}
+	case SCIF_FENCE_MARK:
+	{
+		struct scif_endpt *priv = f->private_data;
+		struct scifioctl_fence_mark mark;
+		int tmp_mark = 0;
+
+		if (copy_from_user(&mark, argp, sizeof(mark))) {
+			err = -EFAULT;
+			goto fence_mark_err;
+		}
+		err = scif_fence_mark(priv, mark.flags, &tmp_mark);
+		if (err)
+			goto fence_mark_err;
+		if (copy_to_user((void __user *)mark.mark, &tmp_mark,
+				 sizeof(tmp_mark))) {
+			err = -EFAULT;
+			goto fence_mark_err;
+		}
+fence_mark_err:
+		scif_err_debug(err, "scif_fence_mark");
+		return err;
+	}
+	case SCIF_FENCE_WAIT:
+	{
+		struct scif_endpt *priv = f->private_data;
+
+		err = scif_fence_wait(priv, arg);
+		scif_err_debug(err, "scif_fence_wait");
+		return err;
+	}
+	case SCIF_FENCE_SIGNAL:
+	{
+		struct scif_endpt *priv = f->private_data;
+		struct scifioctl_fence_signal signal;
+
+		if (copy_from_user(&signal, argp, sizeof(signal))) {
+			err = -EFAULT;
+			goto fence_signal_err;
+		}
+
+		err = scif_fence_signal(priv, signal.loff, signal.lval,
+					signal.roff, signal.rval, signal.flags);
+fence_signal_err:
+		scif_err_debug(err, "scif_fence_signal");
+		return err;
+	}
 	}
 	return -EINVAL;
 }
@@ -306,6 +464,7 @@ const struct file_operations scif_fops = {
 	.open = scif_fdopen,
 	.release = scif_fdclose,
 	.unlocked_ioctl = scif_fdioctl,
+	.mmap = scif_fdmmap,
 	.poll = scif_fdpoll,
 	.flush = scif_fdflush,
 	.owner = THIS_MODULE,
diff --git a/drivers/misc/mic/scif/scif_main.c b/drivers/misc/mic/scif/scif_main.c
index f90bd06..36d847a 100644
--- a/drivers/misc/mic/scif/scif_main.c
+++ b/drivers/misc/mic/scif/scif_main.c
@@ -34,6 +34,7 @@ struct scif_info scif_info = {
 };
 
 struct scif_dev *scif_dev;
+struct kmem_cache *unaligned_cache;
 static atomic_t g_loopb_cnt;
 
 /* Runs in the context of intr_wq */
@@ -263,27 +264,44 @@ static int _scif_init(void)
 {
 	int rc;
 
-	spin_lock_init(&scif_info.eplock);
+	mutex_init(&scif_info.eplock);
+	spin_lock_init(&scif_info.rmalock);
 	spin_lock_init(&scif_info.nb_connect_lock);
 	spin_lock_init(&scif_info.port_lock);
 	mutex_init(&scif_info.conflock);
 	mutex_init(&scif_info.connlock);
+	mutex_init(&scif_info.fencelock);
 	INIT_LIST_HEAD(&scif_info.uaccept);
 	INIT_LIST_HEAD(&scif_info.listen);
 	INIT_LIST_HEAD(&scif_info.zombie);
 	INIT_LIST_HEAD(&scif_info.connected);
 	INIT_LIST_HEAD(&scif_info.disconnected);
+	INIT_LIST_HEAD(&scif_info.rma);
+	INIT_LIST_HEAD(&scif_info.rma_tc);
+	INIT_LIST_HEAD(&scif_info.mmu_notif_cleanup);
+	INIT_LIST_HEAD(&scif_info.fence);
 	INIT_LIST_HEAD(&scif_info.nb_connect_list);
 	init_waitqueue_head(&scif_info.exitwq);
+	scif_info.rma_tc_limit = SCIF_RMA_TEMP_CACHE_LIMIT;
 	scif_info.en_msg_log = 0;
 	scif_info.p2p_enable = 1;
 	rc = scif_setup_scifdev();
 	if (rc)
 		goto error;
+	unaligned_cache = kmem_cache_create("Unaligned_DMA",
+					    SCIF_KMEM_UNALIGNED_BUF_SIZE,
+					    0, SLAB_HWCACHE_ALIGN, NULL);
+	if (!unaligned_cache) {
+		rc = -ENOMEM;
+		goto free_sdev;
+	}
 	INIT_WORK(&scif_info.misc_work, scif_misc_handler);
+	INIT_WORK(&scif_info.mmu_notif_work, scif_mmu_notif_handler);
 	INIT_WORK(&scif_info.conn_work, scif_conn_handler);
 	idr_init(&scif_ports);
 	return 0;
+free_sdev:
+	scif_destroy_scifdev();
 error:
 	return rc;
 }
@@ -291,6 +309,7 @@ error:
 static void _scif_exit(void)
 {
 	idr_destroy(&scif_ports);
+	kmem_cache_destroy(unaligned_cache);
 	scif_destroy_scifdev();
 }
 
@@ -300,6 +319,7 @@ static int __init scif_init(void)
 	int rc;
 
 	_scif_init();
+	iova_cache_get();
 	rc = scif_peer_bus_init();
 	if (rc)
 		goto exit;
@@ -326,6 +346,7 @@ static void __exit scif_exit(void)
 	misc_deregister(&scif_info.mdev);
 	scif_unregister_driver(&scif_driver);
 	scif_peer_bus_exit();
+	iova_cache_put();
 	_scif_exit();
 }
 
diff --git a/drivers/misc/mic/scif/scif_main.h b/drivers/misc/mic/scif/scif_main.h
index b0795b0..a08f0b6 100644
--- a/drivers/misc/mic/scif/scif_main.h
+++ b/drivers/misc/mic/scif/scif_main.h
@@ -22,16 +22,18 @@
 #include <linux/pci.h>
 #include <linux/miscdevice.h>
 #include <linux/dmaengine.h>
+#include <linux/iova.h>
 #include <linux/anon_inodes.h>
 #include <linux/file.h>
+#include <linux/vmalloc.h>
 #include <linux/scif.h>
-
 #include "../common/mic_dev.h"
 
 #define SCIF_MGMT_NODE 0
 #define SCIF_DEFAULT_WATCHDOG_TO 30
 #define SCIF_NODE_ACCEPT_TIMEOUT (3 * HZ)
 #define SCIF_NODE_ALIVE_TIMEOUT (SCIF_DEFAULT_WATCHDOG_TO * HZ)
+#define SCIF_RMA_TEMP_CACHE_LIMIT 0x20000
 
 /*
  * Generic state used for certain node QP message exchanges
@@ -74,13 +76,21 @@ enum scif_msg_state {
  * @loopb_work: Used for submitting work to loopb_wq
  * @loopb_recv_q: List of messages received on the loopb_wq
  * @card_initiated_exit: set when the card has initiated the exit
+ * @rmalock: Synchronize access to RMA operations
+ * @fencelock: Synchronize access to list of remote fences requested.
+ * @rma: List of temporary registered windows to be destroyed.
+ * @rma_tc: List of temporary registered & cached Windows to be destroyed
+ * @fence: List of remote fence requests
+ * @mmu_notif_work: Work for registration caching MMU notifier workqueue
+ * @mmu_notif_cleanup: List of temporary cached windows for reg cache
+ * @rma_tc_limit: RMA temporary cache limit
  */
 struct scif_info {
 	u8 nodeid;
 	u8 maxid;
 	u8 total;
 	u32 nr_zombies;
-	spinlock_t eplock;
+	struct mutex eplock;
 	struct mutex connlock;
 	spinlock_t nb_connect_lock;
 	spinlock_t port_lock;
@@ -103,6 +113,14 @@ struct scif_info {
 	struct work_struct loopb_work;
 	struct list_head loopb_recv_q;
 	bool card_initiated_exit;
+	spinlock_t rmalock;
+	struct mutex fencelock;
+	struct list_head rma;
+	struct list_head rma_tc;
+	struct list_head fence;
+	struct work_struct mmu_notif_work;
+	struct list_head mmu_notif_cleanup;
+	unsigned long rma_tc_limit;
 };
 
 /*
@@ -153,6 +171,8 @@ struct scif_p2p_info {
  * @disconn_rescnt: Keeps track of number of node remove requests sent
  * @exit: Status of exit message
  * @qp_dma_addr: Queue pair DMA address passed to the peer
+ * @dma_ch_idx: Round robin index for DMA channels
+ * @signal_pool: DMA pool used for scheduling scif_fence_signal DMA's
 */
 struct scif_dev {
 	u8 node;
@@ -179,8 +199,12 @@ struct scif_dev {
 	atomic_t disconn_rescnt;
 	enum scif_msg_state exit;
 	dma_addr_t qp_dma_addr;
+	int dma_ch_idx;
+	struct dma_pool *signal_pool;
 };
 
+extern bool scif_reg_cache_enable;
+extern bool scif_ulimit_check;
 extern struct scif_info scif_info;
 extern struct idr scif_ports;
 extern struct bus_type scif_peer_bus;
@@ -192,6 +216,8 @@ extern const struct file_operations scif_anon_fops;
 #define SCIF_NODE_QP_SIZE 0x10000
 
 #include "scif_nodeqp.h"
+#include "scif_rma.h"
+#include "scif_rma_list.h"
 
 /*
  * scifdev_self:
diff --git a/drivers/misc/mic/scif/scif_map.h b/drivers/misc/mic/scif/scif_map.h
index 20e50b4..3e86360 100644
--- a/drivers/misc/mic/scif/scif_map.h
+++ b/drivers/misc/mic/scif/scif_map.h
@@ -80,7 +80,7 @@ scif_unmap_single(dma_addr_t local, struct scif_dev *scifdev,
 		  size_t size)
 {
 	if (!scifdev_self(scifdev)) {
-		if (scifdev_is_p2p(scifdev) && local > scifdev->base_addr)
+		if (scifdev_is_p2p(scifdev))
 			local = local - scifdev->base_addr;
 		dma_unmap_single(&scifdev->sdev->dev, local,
 				 size, DMA_BIDIRECTIONAL);
@@ -110,4 +110,27 @@ scif_iounmap(void *virt, size_t len, struct scif_dev *scifdev)
 		sdev->hw_ops->iounmap(sdev, (void __force __iomem *)virt);
 	}
 }
+
+static __always_inline int
+scif_map_page(dma_addr_t *dma_handle, struct page *page,
+	      struct scif_dev *scifdev)
+{
+	int err = 0;
+
+	if (scifdev_self(scifdev)) {
+		*dma_handle = page_to_phys(page);
+	} else {
+		struct scif_hw_dev *sdev = scifdev->sdev;
+		*dma_handle = dma_map_page(&sdev->dev,
+					   page, 0x0, PAGE_SIZE,
+					   DMA_BIDIRECTIONAL);
+		if (dma_mapping_error(&sdev->dev, *dma_handle))
+			err = -ENOMEM;
+		else if (scifdev_is_p2p(scifdev))
+			*dma_handle = *dma_handle + scifdev->base_addr;
+	}
+	if (err)
+		*dma_handle = 0;
+	return err;
+}
 #endif  /* SCIF_MAP_H */
diff --git a/drivers/misc/mic/scif/scif_nm.c b/drivers/misc/mic/scif/scif_nm.c
index f1b1b97..79f26a0 100644
--- a/drivers/misc/mic/scif/scif_nm.c
+++ b/drivers/misc/mic/scif/scif_nm.c
@@ -34,6 +34,7 @@ static void scif_invalidate_ep(int node)
 	list_for_each_safe(pos, tmpq, &scif_info.disconnected) {
 		ep = list_entry(pos, struct scif_endpt, list);
 		if (ep->remote_dev->node == node) {
+			scif_unmap_all_windows(ep);
 			spin_lock(&ep->lock);
 			scif_cleanup_ep_qp(ep);
 			spin_unlock(&ep->lock);
@@ -50,6 +51,7 @@ static void scif_invalidate_ep(int node)
 			wake_up_interruptible(&ep->sendwq);
 			wake_up_interruptible(&ep->recvwq);
 			spin_unlock(&ep->lock);
+			scif_unmap_all_windows(ep);
 		}
 	}
 	mutex_unlock(&scif_info.connlock);
@@ -61,8 +63,8 @@ void scif_free_qp(struct scif_dev *scifdev)
 
 	if (!qp)
 		return;
-	scif_free_coherent((void *)qp->inbound_q.rb_base,
-			   qp->local_buf, scifdev, qp->inbound_q.size);
+	scif_unmap_single(qp->local_buf, scifdev, qp->inbound_q.size);
+	kfree(qp->inbound_q.rb_base);
 	scif_unmap_single(qp->local_qp, scifdev, sizeof(struct scif_qp));
 	kfree(scifdev->qpairs);
 	scifdev->qpairs = NULL;
@@ -125,8 +127,12 @@ void scif_cleanup_scifdev(struct scif_dev *dev)
 		}
 		scif_destroy_intr_wq(dev);
 	}
+	flush_work(&scif_info.misc_work);
 	scif_destroy_p2p(dev);
 	scif_invalidate_ep(dev->node);
+	scif_zap_mmaps(dev->node);
+	scif_cleanup_rma_for_zombies(dev->node);
+	flush_work(&scif_info.misc_work);
 	scif_send_acks(dev);
 	if (!dev->node && scif_info.card_initiated_exit) {
 		/*
diff --git a/drivers/misc/mic/scif/scif_nodeqp.c b/drivers/misc/mic/scif/scif_nodeqp.c
index 5cdb9f0..7180d56 100644
--- a/drivers/misc/mic/scif/scif_nodeqp.c
+++ b/drivers/misc/mic/scif/scif_nodeqp.c
@@ -105,18 +105,22 @@
 int scif_setup_qp_connect(struct scif_qp *qp, dma_addr_t *qp_offset,
 			  int local_size, struct scif_dev *scifdev)
 {
-	void *local_q = NULL;
+	void *local_q = qp->inbound_q.rb_base;
 	int err = 0;
 	u32 tmp_rd = 0;
 
 	spin_lock_init(&qp->send_lock);
 	spin_lock_init(&qp->recv_lock);
 
-	local_q = kzalloc(local_size, GFP_KERNEL);
+	/* Allocate rb only if not already allocated */
 	if (!local_q) {
-		err = -ENOMEM;
-		return err;
+		local_q = kzalloc(local_size, GFP_KERNEL);
+		if (!local_q) {
+			err = -ENOMEM;
+			return err;
+		}
 	}
+
 	err = scif_map_single(&qp->local_buf, local_q, scifdev, local_size);
 	if (err)
 		goto kfree;
@@ -552,7 +556,29 @@ static char *message_types[] = {"BAD",
 				"DISCNT_ACK",
 				"CLIENT_SENT",
 				"CLIENT_RCVD",
-				"SCIF_GET_NODE_INFO"};
+				"SCIF_GET_NODE_INFO",
+				"REGISTER",
+				"REGISTER_ACK",
+				"REGISTER_NACK",
+				"UNREGISTER",
+				"UNREGISTER_ACK",
+				"UNREGISTER_NACK",
+				"ALLOC_REQ",
+				"ALLOC_GNT",
+				"ALLOC_REJ",
+				"FREE_PHYS",
+				"FREE_VIRT",
+				"MUNMAP",
+				"MARK",
+				"MARK_ACK",
+				"MARK_NACK",
+				"WAIT",
+				"WAIT_ACK",
+				"WAIT_NACK",
+				"SIGNAL_LOCAL",
+				"SIGNAL_REMOTE",
+				"SIG_ACK",
+				"SIG_NACK"};
 
 static void
 scif_display_message(struct scif_dev *scifdev, struct scifmsg *msg,
@@ -646,10 +672,16 @@ int scif_nodeqp_send(struct scif_dev *scifdev, struct scifmsg *msg)
  *
  * Work queue handler for servicing miscellaneous SCIF tasks.
  * Examples include:
- * 1) Cleanup of zombie endpoints.
+ * 1) Remote fence requests.
+ * 2) Destruction of temporary registered windows
+ *    created during scif_vreadfrom()/scif_vwriteto().
+ * 3) Cleanup of zombie endpoints.
  */
 void scif_misc_handler(struct work_struct *work)
 {
+	scif_rma_handle_remote_fences();
+	scif_rma_destroy_windows();
+	scif_rma_destroy_tcw_invalid();
 	scif_cleanup_zombie_epd();
 }
 
@@ -995,6 +1027,27 @@ static void (*scif_intr_func[SCIF_MAX_MSG + 1])
 	scif_clientsend,	/* SCIF_CLIENT_SENT */
 	scif_clientrcvd,	/* SCIF_CLIENT_RCVD */
 	scif_get_node_info_resp,/* SCIF_GET_NODE_INFO */
+	scif_recv_reg,		/* SCIF_REGISTER */
+	scif_recv_reg_ack,	/* SCIF_REGISTER_ACK */
+	scif_recv_reg_nack,	/* SCIF_REGISTER_NACK */
+	scif_recv_unreg,	/* SCIF_UNREGISTER */
+	scif_recv_unreg_ack,	/* SCIF_UNREGISTER_ACK */
+	scif_recv_unreg_nack,	/* SCIF_UNREGISTER_NACK */
+	scif_alloc_req,		/* SCIF_ALLOC_REQ */
+	scif_alloc_gnt_rej,	/* SCIF_ALLOC_GNT */
+	scif_alloc_gnt_rej,	/* SCIF_ALLOC_REJ */
+	scif_free_virt,		/* SCIF_FREE_VIRT */
+	scif_recv_munmap,	/* SCIF_MUNMAP */
+	scif_recv_mark,		/* SCIF_MARK */
+	scif_recv_mark_resp,	/* SCIF_MARK_ACK */
+	scif_recv_mark_resp,	/* SCIF_MARK_NACK */
+	scif_recv_wait,		/* SCIF_WAIT */
+	scif_recv_wait_resp,	/* SCIF_WAIT_ACK */
+	scif_recv_wait_resp,	/* SCIF_WAIT_NACK */
+	scif_recv_sig_local,	/* SCIF_SIG_LOCAL */
+	scif_recv_sig_remote,	/* SCIF_SIG_REMOTE */
+	scif_recv_sig_resp,	/* SCIF_SIG_ACK */
+	scif_recv_sig_resp,	/* SCIF_SIG_NACK */
 };
 
 /**
diff --git a/drivers/misc/mic/scif/scif_nodeqp.h b/drivers/misc/mic/scif/scif_nodeqp.h
index 6c0ed67..9589627 100644
--- a/drivers/misc/mic/scif/scif_nodeqp.h
+++ b/drivers/misc/mic/scif/scif_nodeqp.h
@@ -74,7 +74,28 @@
 #define SCIF_CLIENT_SENT 16 /* Notify the peer that data has been written */
 #define SCIF_CLIENT_RCVD 17 /* Notify the peer that data has been read */
 #define SCIF_GET_NODE_INFO 18 /* Get current node mask from the mgmt node*/
-#define SCIF_MAX_MSG SCIF_GET_NODE_INFO
+#define SCIF_REGISTER 19 /* Tell peer about a new registered window */
+#define SCIF_REGISTER_ACK 20 /* Notify peer about unregistration success */
+#define SCIF_REGISTER_NACK 21 /* Notify peer about registration success */
+#define SCIF_UNREGISTER 22 /* Tell peer about unregistering a window */
+#define SCIF_UNREGISTER_ACK 23 /* Notify peer about registration failure */
+#define SCIF_UNREGISTER_NACK 24 /* Notify peer about unregistration failure */
+#define SCIF_ALLOC_REQ 25 /* Request a mapped buffer */
+#define SCIF_ALLOC_GNT 26 /* Notify peer about allocation success */
+#define SCIF_ALLOC_REJ 27 /* Notify peer about allocation failure */
+#define SCIF_FREE_VIRT 28 /* Free previously allocated virtual memory */
+#define SCIF_MUNMAP 29 /* Acknowledgment for a SCIF_MMAP request */
+#define SCIF_MARK 30 /* SCIF Remote Fence Mark Request */
+#define SCIF_MARK_ACK 31 /* SCIF Remote Fence Mark Success */
+#define SCIF_MARK_NACK 32 /* SCIF Remote Fence Mark Failure */
+#define SCIF_WAIT 33 /* SCIF Remote Fence Wait Request */
+#define SCIF_WAIT_ACK 34 /* SCIF Remote Fence Wait Success */
+#define SCIF_WAIT_NACK 35 /* SCIF Remote Fence Wait Failure */
+#define SCIF_SIG_LOCAL 36 /* SCIF Remote Fence Local Signal Request */
+#define SCIF_SIG_REMOTE 37 /* SCIF Remote Fence Remote Signal Request */
+#define SCIF_SIG_ACK 38 /* SCIF Remote Fence Remote Signal Success */
+#define SCIF_SIG_NACK 39 /* SCIF Remote Fence Remote Signal Failure */
+#define SCIF_MAX_MSG SCIF_SIG_NACK
 
 /*
  * struct scifmsg - Node QP message format
@@ -92,6 +113,24 @@ struct scifmsg {
 } __packed;
 
 /*
+ * struct scif_allocmsg - Used with SCIF_ALLOC_REQ to request
+ * the remote note to allocate memory
+ *
+ * phys_addr: Physical address of the buffer
+ * vaddr: Virtual address of the buffer
+ * size: Size of the buffer
+ * state: Current state
+ * allocwq: wait queue for status
+ */
+struct scif_allocmsg {
+	dma_addr_t phys_addr;
+	unsigned long vaddr;
+	size_t size;
+	enum scif_msg_state state;
+	wait_queue_head_t allocwq;
+};
+
+/*
  * struct scif_qp - Node Queue Pair
  *
  * Interesting structure -- a little difficult because we can only
@@ -158,7 +197,6 @@ int scif_setup_qp_connect_response(struct scif_dev *scifdev,
 int scif_setup_loopback_qp(struct scif_dev *scifdev);
 int scif_destroy_loopback_qp(struct scif_dev *scifdev);
 void scif_poll_qp_state(struct work_struct *work);
-void scif_qp_response_ack(struct work_struct *work);
 void scif_destroy_p2p(struct scif_dev *scifdev);
 void scif_send_exit(struct scif_dev *scifdev);
 static inline struct device *scif_get_peer_dev(struct scif_dev *scifdev)
diff --git a/drivers/misc/mic/scif/scif_peer_bus.c b/drivers/misc/mic/scif/scif_peer_bus.c
index 547bf7b..6ffa3bd 100644
--- a/drivers/misc/mic/scif/scif_peer_bus.c
+++ b/drivers/misc/mic/scif/scif_peer_bus.c
@@ -72,6 +72,7 @@ err:
 static int scif_peer_add_device(struct scif_dev *scifdev)
 {
 	struct scif_peer_dev *spdev = rcu_dereference(scifdev->spdev);
+	char pool_name[16];
 	int ret;
 
 	ret = device_add(&spdev->dev);
@@ -81,8 +82,21 @@ static int scif_peer_add_device(struct scif_dev *scifdev)
 			"dnode %d: peer device_add failed\n", scifdev->node);
 		goto put_spdev;
 	}
+
+	scnprintf(pool_name, sizeof(pool_name), "scif-%d", spdev->dnode);
+	scifdev->signal_pool = dmam_pool_create(pool_name, &scifdev->sdev->dev,
+						sizeof(struct scif_status), 1,
+						0);
+	if (!scifdev->signal_pool) {
+		dev_err(&scifdev->sdev->dev,
+			"dnode %d: dmam_pool_create failed\n", scifdev->node);
+		ret = -ENOMEM;
+		goto del_spdev;
+	}
 	dev_dbg(&spdev->dev, "Added peer dnode %d\n", spdev->dnode);
 	return 0;
+del_spdev:
+	device_del(&spdev->dev);
 put_spdev:
 	RCU_INIT_POINTER(scifdev->spdev, NULL);
 	synchronize_rcu();
-- 
2.0.0.rc3.2.g998f840

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ