[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <6877621c-f129-1fc7-d366-14e5499c8d8b@kernel.dk>
Date: Mon, 8 Oct 2018 14:06:43 -0600
From: Jens Axboe <axboe@...nel.dk>
To: David Miller <davem@...emloft.net>, keescook@...omium.org
Cc: linux-block@...r.kernel.org, linux-kernel@...r.kernel.org,
sparclinux@...r.kernel.org
Subject: Re: [PATCH sparc-next] sunvdc: Remove VLA usage
On 10/8/18 1:16 PM, Jens Axboe wrote:
> On 10/8/18 12:10 PM, David Miller wrote:
>> From: Kees Cook <keescook@...omium.org>
>> Date: Mon, 8 Oct 2018 08:46:51 -0700
>>
>>> In the quest to remove all stack VLA usage from the kernel[1], this moves
>>> the math for cookies calculation into macros and allocates a fixed size
>>> array for the maximum number of cookies and adds a runtime sanity check.
>>> (Note that the size was always fixed, but just hidden from the compiler.)
>>>
>>> [1] https://lkml.kernel.org/r/CA+55aFzCG-zNmZwX4A2FQpadafLfEzK6CC=qPXydAacU1RqZWA@mail.gmail.com
>>>
>>> Cc: Jens Axboe <axboe@...nel.dk>
>>> Cc: linux-block@...r.kernel.org
>>> Signed-off-by: Kees Cook <keescook@...omium.org>
>>
>> Applied.
>
> FWIW, you can add my reviewed-by if you haven't already queued it up.
>
> On the topic of vdc, do you have a way to test it? I converted it to
> use blk-mq, to make some progress on killing the legacy IO path.
> See below, would be great if someone was able to test this...
Improved version below, changes the reset timer to delayed work
instead - if not, we can't block waiting for the drain.
diff --git a/drivers/block/sunvdc.c b/drivers/block/sunvdc.c
index f68e9baffad7..40e1f2028906 100644
--- a/drivers/block/sunvdc.c
+++ b/drivers/block/sunvdc.c
@@ -17,6 +17,7 @@
#include <linux/init.h>
#include <linux/list.h>
#include <linux/scatterlist.h>
+#include <linux/blk-mq.h>
#include <asm/vio.h>
#include <asm/ldc.h>
@@ -66,9 +67,10 @@ struct vdc_port {
u64 max_xfer_size;
u32 vdisk_block_size;
+ u32 drain;
u64 ldc_timeout;
- struct timer_list ldc_reset_timer;
+ struct delayed_work ldc_reset_timer_work;
struct work_struct ldc_reset_work;
/* The server fills these in for us in the disk attribute
@@ -80,12 +82,14 @@ struct vdc_port {
u8 vdisk_mtype;
u32 vdisk_phys_blksz;
+ struct blk_mq_tag_set tag_set;
+
char disk_name[32];
};
static void vdc_ldc_reset(struct vdc_port *port);
static void vdc_ldc_reset_work(struct work_struct *work);
-static void vdc_ldc_reset_timer(struct timer_list *t);
+static void vdc_ldc_reset_timer_work(struct work_struct *work);
static inline struct vdc_port *to_vdc_port(struct vio_driver_state *vio)
{
@@ -175,11 +179,8 @@ static void vdc_blk_queue_start(struct vdc_port *port)
* handshake completes, so check for initial handshake before we've
* allocated a disk.
*/
- if (port->disk && blk_queue_stopped(port->disk->queue) &&
- vdc_tx_dring_avail(dr) * 100 / VDC_TX_RING_SIZE >= 50) {
- blk_start_queue(port->disk->queue);
- }
-
+ if (port->disk && vdc_tx_dring_avail(dr) * 100 / VDC_TX_RING_SIZE >= 50)
+ blk_mq_start_hw_queues(port->disk->queue);
}
static void vdc_finish(struct vio_driver_state *vio, int err, int waiting_for)
@@ -197,7 +198,7 @@ static void vdc_handshake_complete(struct vio_driver_state *vio)
{
struct vdc_port *port = to_vdc_port(vio);
- del_timer(&port->ldc_reset_timer);
+ cancel_delayed_work(&port->ldc_reset_timer_work);
vdc_finish(vio, 0, WAITING_FOR_LINK_UP);
vdc_blk_queue_start(port);
}
@@ -320,7 +321,7 @@ static void vdc_end_one(struct vdc_port *port, struct vio_dring_state *dr,
rqe->req = NULL;
- __blk_end_request(req, (desc->status ? BLK_STS_IOERR : 0), desc->size);
+ blk_mq_end_request(req, desc->status ? BLK_STS_IOERR : 0);
vdc_blk_queue_start(port);
}
@@ -525,29 +526,41 @@ static int __send_request(struct request *req)
return err;
}
-static void do_vdc_request(struct request_queue *rq)
+static blk_status_t vdc_queue_rq(struct blk_mq_hw_ctx *hctx,
+ const struct blk_mq_queue_data *bd)
{
- struct request *req;
+ struct vdc_port *port = hctx->queue->queuedata;
+ struct vio_dring_state *dr;
+ unsigned long flags;
- while ((req = blk_peek_request(rq)) != NULL) {
- struct vdc_port *port;
- struct vio_dring_state *dr;
+ dr = &port->vio.drings[VIO_DRIVER_TX_RING];
- port = req->rq_disk->private_data;
- dr = &port->vio.drings[VIO_DRIVER_TX_RING];
- if (unlikely(vdc_tx_dring_avail(dr) < 1))
- goto wait;
+ blk_mq_start_request(bd->rq);
- blk_start_request(req);
+ spin_lock_irqsave(&port->vio.lock, flags);
- if (__send_request(req) < 0) {
- blk_requeue_request(rq, req);
-wait:
- /* Avoid pointless unplugs. */
- blk_stop_queue(rq);
- break;
- }
+ /*
+ * Doing drain, just end the request in error
+ */
+ if (unlikely(port->drain)) {
+ spin_unlock_irqrestore(&port->vio.lock, flags);
+ return BLK_STS_IOERR;
+ }
+
+ if (unlikely(vdc_tx_dring_avail(dr) < 1))
+ goto wait;
+
+ if (__send_request(bd->rq) < 0) {
+ blk_mq_requeue_request(bd->rq, false);
+ goto wait;
}
+
+ spin_unlock_irqrestore(&port->vio.lock, flags);
+ return BLK_STS_OK;
+wait:
+ spin_unlock_irqrestore(&port->vio.lock, flags);
+ blk_mq_stop_hw_queue(hctx);
+ return BLK_STS_RESOURCE;
}
static int generic_request(struct vdc_port *port, u8 op, void *buf, int len)
@@ -759,6 +772,43 @@ static void vdc_port_down(struct vdc_port *port)
vio_ldc_free(&port->vio);
}
+static const struct blk_mq_ops vdc_mq_ops = {
+ .queue_rq = vdc_queue_rq,
+};
+
+static void cleanup_queue(struct request_queue *q)
+{
+ blk_mq_free_tag_set(q->tag_set);
+ blk_cleanup_queue(q);
+}
+
+static struct request_queue *init_queue(struct vdc_port *port)
+{
+ struct blk_mq_tag_set *set = &port->tag_set;
+ struct request_queue *q;
+ int ret;
+
+ memset(set, 0, sizeof(*set));
+ set->ops = &vdc_mq_ops;
+ set->nr_hw_queues = 1;
+ set->queue_depth = VDC_TX_RING_SIZE;
+ set->numa_node = NUMA_NO_NODE;
+ set->flags = BLK_MQ_F_SHOULD_MERGE;
+
+ ret = blk_mq_alloc_tag_set(set);
+ if (ret)
+ return ERR_PTR(ret);
+
+ q = blk_mq_init_queue(set);
+ if (IS_ERR(q)) {
+ blk_mq_free_tag_set(set);
+ return q;
+ }
+
+ q->queuedata = port;
+ return q;
+}
+
static int probe_disk(struct vdc_port *port)
{
struct request_queue *q;
@@ -796,17 +846,17 @@ static int probe_disk(struct vdc_port *port)
(u64)geom.num_sec);
}
- q = blk_init_queue(do_vdc_request, &port->vio.lock);
- if (!q) {
+ q = init_queue(port);
+ if (IS_ERR(q)) {
printk(KERN_ERR PFX "%s: Could not allocate queue.\n",
port->vio.name);
- return -ENOMEM;
+ return PTR_ERR(q);
}
g = alloc_disk(1 << PARTITION_SHIFT);
if (!g) {
printk(KERN_ERR PFX "%s: Could not allocate gendisk.\n",
port->vio.name);
- blk_cleanup_queue(q);
+ cleanup_queue(q);
return -ENOMEM;
}
@@ -981,7 +1031,7 @@ static int vdc_port_probe(struct vio_dev *vdev, const struct vio_device_id *id)
*/
ldc_timeout = mdesc_get_property(hp, vdev->mp, "vdc-timeout", NULL);
port->ldc_timeout = ldc_timeout ? *ldc_timeout : 0;
- timer_setup(&port->ldc_reset_timer, vdc_ldc_reset_timer, 0);
+ INIT_DELAYED_WORK(&port->ldc_reset_timer_work, vdc_ldc_reset_timer_work);
INIT_WORK(&port->ldc_reset_work, vdc_ldc_reset_work);
err = vio_driver_init(&port->vio, vdev, VDEV_DISK,
@@ -1034,18 +1084,14 @@ static int vdc_port_remove(struct vio_dev *vdev)
struct vdc_port *port = dev_get_drvdata(&vdev->dev);
if (port) {
- unsigned long flags;
-
- spin_lock_irqsave(&port->vio.lock, flags);
- blk_stop_queue(port->disk->queue);
- spin_unlock_irqrestore(&port->vio.lock, flags);
+ blk_mq_stop_hw_queues(port->disk->queue);
flush_work(&port->ldc_reset_work);
- del_timer_sync(&port->ldc_reset_timer);
+ cancel_delayed_work_sync(&port->ldc_reset_timer_work);
del_timer_sync(&port->vio.timer);
del_gendisk(port->disk);
- blk_cleanup_queue(port->disk->queue);
+ cleanup_queue(port->disk->queue);
put_disk(port->disk);
port->disk = NULL;
@@ -1080,32 +1126,46 @@ static void vdc_requeue_inflight(struct vdc_port *port)
}
rqe->req = NULL;
- blk_requeue_request(port->disk->queue, req);
+ blk_mq_requeue_request(req, false);
}
}
static void vdc_queue_drain(struct vdc_port *port)
{
- struct request *req;
+ struct request_queue *q = port->disk->queue;
- while ((req = blk_fetch_request(port->disk->queue)) != NULL)
- __blk_end_request_all(req, BLK_STS_IOERR);
+ /*
+ * Mark the queue as draining, then freeze/quiesce to ensure
+ * that all existing requests are seen in ->queue_rq() and killed
+ */
+ port->drain = 1;
+ spin_unlock_irq(&port->vio.lock);
+
+ blk_mq_freeze_queue(q);
+ blk_mq_quiesce_queue(q);
+
+ spin_lock_irq(&port->vio.lock);
+ port->drain = 0;
+ blk_mq_unquiesce_queue(q);
+ blk_mq_unfreeze_queue(q);
}
-static void vdc_ldc_reset_timer(struct timer_list *t)
+static void vdc_ldc_reset_timer_work(struct work_struct *work)
{
- struct vdc_port *port = from_timer(port, t, ldc_reset_timer);
- struct vio_driver_state *vio = &port->vio;
- unsigned long flags;
+ struct vdc_port *port;
+ struct vio_driver_state *vio;
- spin_lock_irqsave(&vio->lock, flags);
+ port = container_of(work, struct vdc_port, ldc_reset_timer_work.work);
+ vio = &port->vio;
+
+ spin_lock_irq(&vio->lock);
if (!(port->vio.hs_state & VIO_HS_COMPLETE)) {
pr_warn(PFX "%s ldc down %llu seconds, draining queue\n",
port->disk_name, port->ldc_timeout);
vdc_queue_drain(port);
vdc_blk_queue_start(port);
}
- spin_unlock_irqrestore(&vio->lock, flags);
+ spin_unlock_irq(&vio->lock);
}
static void vdc_ldc_reset_work(struct work_struct *work)
@@ -1129,7 +1189,7 @@ static void vdc_ldc_reset(struct vdc_port *port)
assert_spin_locked(&port->vio.lock);
pr_warn(PFX "%s ldc link reset\n", port->disk_name);
- blk_stop_queue(port->disk->queue);
+ blk_mq_stop_hw_queues(port->disk->queue);
vdc_requeue_inflight(port);
vdc_port_down(port);
@@ -1146,7 +1206,7 @@ static void vdc_ldc_reset(struct vdc_port *port)
}
if (port->ldc_timeout)
- mod_timer(&port->ldc_reset_timer,
+ mod_delayed_work(system_wq, &port->ldc_reset_timer_work,
round_jiffies(jiffies + HZ * port->ldc_timeout));
mod_timer(&port->vio.timer, round_jiffies(jiffies + HZ));
return;
--
Jens Axboe
Powered by blists - more mailing lists