[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <alpine.DEB.2.10.1706141219350.12156@sstabellini-ThinkPad-X260>
Date: Wed, 14 Jun 2017 12:27:14 -0700 (PDT)
From: Stefano Stabellini <sstabellini@...nel.org>
To: Juergen Gross <jgross@...e.com>
cc: Stefano Stabellini <sstabellini@...nel.org>,
xen-devel@...ts.xen.org, linux-kernel@...r.kernel.org,
boris.ostrovsky@...cle.com,
Stefano Stabellini <stefano@...reto.com>
Subject: Re: [PATCH v3 11/18] xen/pvcalls: implement accept command
On Wed, 14 Jun 2017, Juergen Gross wrote:
> On 14/06/17 02:47, Stefano Stabellini wrote:
> > On Tue, 13 Jun 2017, Juergen Gross wrote:
> >> On 02/06/17 21:31, Stefano Stabellini wrote:
> >>> Implement the accept command by calling inet_accept. To avoid blocking
> >>> in the kernel, call inet_accept(O_NONBLOCK) from a workqueue, which get
> >>> scheduled on sk_data_ready (for a passive socket, it means that there
> >>> are connections to accept).
> >>>
> >>> Use the reqcopy field to store the request. Accept the new socket from
> >>> the delayed work function, create a new sock_mapping for it, map
> >>> the indexes page and data ring, and reply to the other end. Allocate an
> >>> ioworker for the socket.
> >>>
> >>> Only support one outstanding blocking accept request for every socket at
> >>> any time.
> >>>
> >>> Add a field to sock_mapping to remember the passive socket from which an
> >>> active socket was created.
> >>>
> >>> Signed-off-by: Stefano Stabellini <stefano@...reto.com>
> >>> CC: boris.ostrovsky@...cle.com
> >>> CC: jgross@...e.com
> >>> ---
> >>> drivers/xen/pvcalls-back.c | 109 ++++++++++++++++++++++++++++++++++++++++++++-
> >>> 1 file changed, 108 insertions(+), 1 deletion(-)
> >>>
> >>> diff --git a/drivers/xen/pvcalls-back.c b/drivers/xen/pvcalls-back.c
> >>> index a75586e..f1173f4 100644
> >>> --- a/drivers/xen/pvcalls-back.c
> >>> +++ b/drivers/xen/pvcalls-back.c
> >>> @@ -65,6 +65,7 @@ struct pvcalls_ioworker {
> >>> struct sock_mapping {
> >>> struct list_head list;
> >>> struct pvcalls_fedata *priv;
> >>> + struct sockpass_mapping *sockpass;
> >>> struct socket *sock;
> >>> uint64_t id;
> >>> grant_ref_t ref;
> >>> @@ -275,10 +276,79 @@ static int pvcalls_back_release(struct xenbus_device *dev,
> >>>
> >>> static void __pvcalls_back_accept(struct work_struct *work)
> >>> {
> >>> + struct sockpass_mapping *mappass = container_of(
> >>> + work, struct sockpass_mapping, register_work);
> >>> + struct sock_mapping *map;
> >>> + struct pvcalls_ioworker *iow;
> >>> + struct pvcalls_fedata *priv;
> >>> + struct socket *sock;
> >>> + struct xen_pvcalls_response *rsp;
> >>> + struct xen_pvcalls_request *req;
> >>> + int notify;
> >>> + int ret = -EINVAL;
> >>> + unsigned long flags;
> >>> +
> >>> + priv = mappass->priv;
> >>> + /* We only need to check the value of "cmd" atomically on read. */
> >>> + spin_lock_irqsave(&mappass->copy_lock, flags);
> >>> + req = &mappass->reqcopy;
> >>> + if (req->cmd != PVCALLS_ACCEPT) {
> >>> + spin_unlock_irqrestore(&mappass->copy_lock, flags);
> >>> + return;
> >>> + }
> >>> + spin_unlock_irqrestore(&mappass->copy_lock, flags);
> >>
> >> What about:
> >> req = &mappass->reqcopy;
> >> if (ACCESS_ONCE(req->cmd) != PVCALLS_ACCEPT)
> >> return;
> >>
> >> I can't see the need for taking a lock here.
> >
> > Sure, good idea
> >
> >
> >>> +
> >>> + sock = sock_alloc();
> >>> + if (sock == NULL)
> >>> + goto out_error;
> >>> + sock->type = mappass->sock->type;
> >>> + sock->ops = mappass->sock->ops;
> >>> +
> >>> + ret = inet_accept(mappass->sock, sock, O_NONBLOCK, true);
> >>> + if (ret == -EAGAIN) {
> >>> + sock_release(sock);
> >>> + goto out_error;
> >>> + }
> >>> +
> >>> + map = pvcalls_new_active_socket(priv,
> >>> + req->u.accept.id_new,
> >>> + req->u.accept.ref,
> >>> + req->u.accept.evtchn,
> >>> + sock);
> >>> + if (!map) {
> >>> + sock_release(sock);
> >>> + goto out_error;
> >>> + }
> >>> +
> >>> + map->sockpass = mappass;
> >>> + iow = &map->ioworker;
> >>> + atomic_inc(&map->read);
> >>> + atomic_inc(&map->io);
> >>> + queue_work_on(iow->cpu, iow->wq, &iow->register_work);
> >>> +
> >>> +out_error:
> >>> + rsp = RING_GET_RESPONSE(&priv->ring, priv->ring.rsp_prod_pvt++);
> >>> + rsp->req_id = req->req_id;
> >>> + rsp->cmd = req->cmd;
> >>> + rsp->u.accept.id = req->u.accept.id;
> >>> + rsp->ret = ret;
> >>> + RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&priv->ring, notify);
> >>> + if (notify)
> >>> + notify_remote_via_irq(priv->irq);
> >>> +
> >>> + spin_lock_irqsave(&mappass->copy_lock, flags);
> >>> + mappass->reqcopy.cmd = 0;
> >>> + spin_unlock_irqrestore(&mappass->copy_lock, flags);
> >>
> >> ACCESS_ONCE(mappass->reqcopy.cmd) = 0;
> >
> > OK
> >
> >
> >>> }
> >>>
> >>> static void pvcalls_pass_sk_data_ready(struct sock *sock)
> >>> {
> >>> + struct sockpass_mapping *mappass = sock->sk_user_data;
> >>> +
> >>> + if (mappass == NULL)
> >>> + return;
> >>> +
> >>> + queue_work(mappass->wq, &mappass->register_work);
> >>> }
> >>>
> >>> static int pvcalls_back_bind(struct xenbus_device *dev,
> >>> @@ -380,7 +450,44 @@ static int pvcalls_back_listen(struct xenbus_device *dev,
> >>> static int pvcalls_back_accept(struct xenbus_device *dev,
> >>> struct xen_pvcalls_request *req)
> >>> {
> >>> - return 0;
> >>> + struct pvcalls_fedata *priv;
> >>> + struct sockpass_mapping *mappass;
> >>> + int ret = -EINVAL;
> >>> + struct xen_pvcalls_response *rsp;
> >>> + unsigned long flags;
> >>> +
> >>> + priv = dev_get_drvdata(&dev->dev);
> >>> +
> >>> + mappass = radix_tree_lookup(&priv->socketpass_mappings,
> >>> + req->u.accept.id);
> >>> + if (mappass == NULL)
> >>> + goto out_error;
> >>> +
> >>> + /*
> >>> + * Limitation of the current implementation: only support one
> >>> + * concurrent accept or poll call on one socket.
> >>> + */
> >>> + spin_lock_irqsave(&mappass->copy_lock, flags);
> >>> + if (mappass->reqcopy.cmd != 0) {
> >>> + spin_unlock_irqrestore(&mappass->copy_lock, flags);
> >>> + ret = -EINTR;
> >>> + goto out_error;
> >>> + }
> >>> +
> >>> + mappass->reqcopy = *req;
> >>
> >> This time you need the lock, however you should use:
> >>
> >> ACCESS_ONCE(mappass->reqcopy) = *req;
> >
> > I don't think that guarantees atomic accesses to the cmd field of the
> > struct. Shouldn't this be:
> >
> > ACCESS_ONCE(mappass->reqcopy.cmd) = req->cmd;
> > mappass->reqcopy = *req;
>
> Hmm, what if the frontend changes cmd between those two accesses?
This cannot happen because req is a copy of the guest request here.
However, it is possible that __pvcalls_back_accept is racing against
pvcalls_back_accept. In that case, I would need to make sure not only
that cmd is written atomically, but now that I am thinking about this,
that cmd is written *after* the rest of reqcopy: otherwise
__pvcalls_back_accept could see a partially updated reqcopy.
It would be possible to do this with atomic accesses and barriers, but
I am thinking that it is not worth the effort. I am tempted to roll back
to the previous version with spinlocks.
> You either need another local buffer or you have to copy cmd via
> ACCESS_ONCE() and the rest of *req separately (seems not to be
> that hard: its just cmd, req_id and u).
>
> BTW: Maybe you should use READ_ONCE() and WRITE_ONCE() instead of
> ACCESS_ONCE(), as those seem to be preferred nowadays.
Powered by blists - more mailing lists