[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <f7cc2d60-5fd0-491e-7097-51b55d22500f@oracle.com>
Date: Fri, 6 Oct 2017 16:36:24 -0400
From: Boris Ostrovsky <boris.ostrovsky@...cle.com>
To: Stefano Stabellini <sstabellini@...nel.org>
Cc: xen-devel@...ts.xen.org, linux-kernel@...r.kernel.org,
jgross@...e.com, Stefano Stabellini <stefano@...reto.com>
Subject: Re: [PATCH v4 04/13] xen/pvcalls: implement socket command and handle
events
On 10/06/2017 02:38 PM, Stefano Stabellini wrote:
> On Thu, 21 Sep 2017, Boris Ostrovsky wrote:
>>> +
>>> +static inline int get_request(struct pvcalls_bedata *bedata, int *req_id)
>>> +{
>>> + *req_id = bedata->ring.req_prod_pvt & (RING_SIZE(&bedata->ring) - 1);
>>> + if (RING_FULL(&bedata->ring) ||
>>> + READ_ONCE(bedata->rsp[*req_id].req_id) != PVCALLS_INVALID_ID)
>>> + return -EAGAIN;
>>> + return 0;
>>> +}
>>> +
>>> static irqreturn_t pvcalls_front_event_handler(int irq, void *dev_id)
>>> {
>>> + struct xenbus_device *dev = dev_id;
>>> + struct pvcalls_bedata *bedata;
>>> + struct xen_pvcalls_response *rsp;
>>> + uint8_t *src, *dst;
>>> + int req_id = 0, more = 0, done = 0;
>>> +
>>> + if (dev == NULL)
>>> + return IRQ_HANDLED;
>>> +
>>> + pvcalls_enter;
>>> + bedata = dev_get_drvdata(&dev->dev);
>>> + if (bedata == NULL) {
>>> + pvcalls_exit;
>>> + return IRQ_HANDLED;
>>> + }
>>> +
>>> +again:
>>> + while (RING_HAS_UNCONSUMED_RESPONSES(&bedata->ring)) {
>>> + rsp = RING_GET_RESPONSE(&bedata->ring, bedata->ring.rsp_cons);
>>> +
>>> + req_id = rsp->req_id;
>>> + dst = (uint8_t *)&bedata->rsp[req_id] + sizeof(rsp->req_id);
>>> + src = (uint8_t *)rsp + sizeof(rsp->req_id);
>>> + memcpy(dst, src, sizeof(*rsp) - sizeof(rsp->req_id));
>>> + /*
>>> + * First copy the rest of the data, then req_id. It is
>>> + * paired with the barrier when accessing bedata->rsp.
>>> + */
>>> + smp_wmb();
>>> + WRITE_ONCE(bedata->rsp[req_id].req_id, rsp->req_id);
>>> +
>>> + done = 1;
>>> + bedata->ring.rsp_cons++;
>>> + }
>>> +
>>> + RING_FINAL_CHECK_FOR_RESPONSES(&bedata->ring, more);
>>> + if (more)
>>> + goto again;
>>> + if (done)
>>> + wake_up(&bedata->inflight_req);
>>> + pvcalls_exit;
>>> return IRQ_HANDLED;
>>> }
>>>
>>> +
>>> + bedata = dev_get_drvdata(&pvcalls_front_dev->dev);
>>> +
>>> + map = kzalloc(sizeof(*map), GFP_KERNEL);
>>> + if (map == NULL) {
>>> + pvcalls_exit;
>>> + return -ENOMEM;
>>> + }
>>> +
>>> + spin_lock(&bedata->socket_lock);
>>> +
>>> + ret = get_request(bedata, &req_id);
>>> + if (ret < 0) {
>>> + kfree(map);
>>> + spin_unlock(&bedata->socket_lock);
>>> + pvcalls_exit;
>>> + return ret;
>>> + }
>>> +
>>> + /*
>>> + * sock->sk->sk_send_head is not used for ip sockets: reuse the
>>> + * field to store a pointer to the struct sock_mapping
>>> + * corresponding to the socket. This way, we can easily get the
>>> + * struct sock_mapping from the struct socket.
>>> + */
>>> + sock->sk->sk_send_head = (void *)map;
>>> + list_add_tail(&map->list, &bedata->socket_mappings);
>>> +
>>> + req = RING_GET_REQUEST(&bedata->ring, req_id);
>>> + req->req_id = req_id;
>>> + req->cmd = PVCALLS_SOCKET;
>>> + req->u.socket.id = (uint64_t) map;
>>> + req->u.socket.domain = AF_INET;
>>> + req->u.socket.type = SOCK_STREAM;
>>> + req->u.socket.protocol = IPPROTO_IP;
>>> +
>>> + bedata->ring.req_prod_pvt++;
>>> + RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&bedata->ring, notify);
>>> + spin_unlock(&bedata->socket_lock);
>>> + if (notify)
>>> + notify_remote_via_irq(bedata->irq);
>>> +
>>> + wait_event(bedata->inflight_req,
>>> + READ_ONCE(bedata->rsp[req_id].req_id) == req_id);
>>> +
>>> + ret = bedata->rsp[req_id].ret;
>>> + /* read ret, then set this rsp slot to be reused */
>>> + smp_mb();
>>> + WRITE_ONCE(bedata->rsp[req_id].req_id, PVCALLS_INVALID_ID);
>> Now that I looked at what *_ONCE macros do I am not sure this is needed, given
>> the smp_mb() above. In pvcalls_front_event_handler() too. And, in fact, in
>> get_request() as well.
>>
>> (Again, this will probably be also applicable to subsequent patches)
> Yes, I think you are right, although I think is much easier to use
> *_ONCE everywhere we don't want reads/writes to be mangled. Nonetheless,
> I removed WRITE_ONCE from places where the write is done immediately
> after a barrier. I kept *_ONCE elsewhere.
Adding them when not necessary makes code more difficult to read (and
possibly larger).
Juergen is out now but it would be good to hear what he thinks when he
gets back.
-boris
Powered by blists - more mailing lists