lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <20230313111019.1521680-10-john.g.garry@oracle.com>
Date:   Mon, 13 Mar 2023 11:10:19 +0000
From:   John Garry <john.g.garry@...cle.com>
To:     jejb@...ux.ibm.com, martin.petersen@...cle.com, hch@....de,
        dgilbert@...erlog.com, hare@...e.com, ming.lei@...hat.com,
        bvanassche@....org
Cc:     linux-scsi@...r.kernel.org, linux-kernel@...r.kernel.org,
        John Garry <john.g.garry@...cle.com>
Subject: [PATCH RFC 9/9] scsi: scsi_debug: Drop sdebug_queue concept

The struct sdebug_queue is to manage tags and hold the associated
queued command entry pointer (for that tag).

Since the tagset iters are now used for functions like
sdebug_blk_mq_poll(), there is no real need to manage these queues.
Indeed, blk-mq already provides what we need for managing tags and
requests.

However the sdebug_queue concept was still being used in max_queue_store()
to find tags which we need to "retire" when lowering sdebug_max_queue.
However the method to reduce sdebug_max_queue separate from
shost->can_queue is fundamentally broken.

The shost->can_queue value is initially used to set per-HW queue context
tag depth in the block layer. This ensures that the shost is not sent too
many commands which it can deal with. However lowering sdebug_max_queue
separately meant that we can easily overload the shost.

Indeed, since sdebug_q_arr is shared amount all shosts, it is possible
to easily consume all tags in sdebug_q_arr while the shosts are not
overloaded according to SCSI ml/block layer, i.e. shosts may be sent more
commands than they can consume.

Resolve this by changing per-HW queue context tag depth when we change
sdebug_max_queue.

The submit queue tags info shown in scsi_debug_show_info() is dropped -
tag info may be got from /sys/kernel/debug/block/ path.

Signed-off-by: John Garry <john.g.garry@...cle.com>
---
We should not call sbitmap_queue_resize(), below - any better ideas?

 drivers/scsi/scsi_debug.c | 246 +++++++-------------------------------
 1 file changed, 42 insertions(+), 204 deletions(-)

diff --git a/drivers/scsi/scsi_debug.c b/drivers/scsi/scsi_debug.c
index 7ef195072d97..2bfb6db538fc 100644
--- a/drivers/scsi/scsi_debug.c
+++ b/drivers/scsi/scsi_debug.c
@@ -341,8 +341,6 @@ struct sdebug_defer {
 	struct hrtimer hrt;
 	struct execute_work ew;
 	ktime_t cmpl_ts;/* time since boot to complete this cmd */
-	int sqa_idx;	/* index of sdebug_queue array */
-	int hc_idx;	/* hostwide tag index */
 	int issuing_cpu;
 	bool aborted;	/* true when blk_abort_request() already called */
 	enum sdeb_defer_type defer_t;
@@ -360,12 +358,6 @@ struct sdebug_scsi_cmd {
 	spinlock_t   lock;
 };
 
-struct sdebug_queue {
-	struct sdebug_queued_cmd *qc_arr[SDEBUG_CANQUEUE];
-	unsigned long in_use_bm[SDEBUG_CANQUEUE_WORDS];
-	spinlock_t qc_lock;
-};
-
 static atomic_t sdebug_cmnd_count;   /* number of incoming commands */
 static atomic_t sdebug_completions;  /* count of deferred completions */
 static atomic_t sdebug_miss_cpus;    /* submission + completion cpus differ */
@@ -848,8 +840,8 @@ static int sdeb_zbc_max_open = DEF_ZBC_MAX_OPEN_ZONES;
 static int sdeb_zbc_nr_conv = DEF_ZBC_NR_CONV_ZONES;
 
 static int submit_queues = DEF_SUBMIT_QUEUES;  /* > 1 for multi-queue (mq) */
+
 static int poll_queues = 2; /* iouring iopoll interface.*/
-static struct sdebug_queue *sdebug_q_arr;  /* ptr to array of submit queues */
 
 static DEFINE_RWLOCK(atomic_rw);
 static DEFINE_RWLOCK(atomic_rw2);
@@ -4904,20 +4896,6 @@ static int resp_rwp_zone(struct scsi_cmnd *scp, struct sdebug_dev_info *devip)
 	return res;
 }
 
-static struct sdebug_queue *get_queue(struct scsi_cmnd *cmnd)
-{
-	u16 hwq;
-	u32 tag = blk_mq_unique_tag(scsi_cmd_to_rq(cmnd));
-
-	hwq = blk_mq_unique_tag_to_hwq(tag);
-
-	pr_debug("tag=%#x, hwq=%d\n", tag, hwq);
-	if (WARN_ON_ONCE(hwq >= submit_queues))
-		hwq = 0;
-
-	return sdebug_q_arr + hwq;
-}
-
 static u32 get_tag(struct scsi_cmnd *cmnd)
 {
 	return blk_mq_unique_tag(scsi_cmd_to_rq(cmnd));
@@ -4927,68 +4905,30 @@ static u32 get_tag(struct scsi_cmnd *cmnd)
 static void sdebug_q_cmd_complete(struct sdebug_defer *sd_dp)
 {
 	struct sdebug_queued_cmd *sqcp = container_of(sd_dp, struct sdebug_queued_cmd, sd_dp);
-	int qc_idx;
-	int retiring = 0;
-	unsigned long flags, iflags;
+	unsigned long flags;
 	struct scsi_cmnd *scp = sqcp->scmd;
 	struct sdebug_scsi_cmd *sdsc;
 	bool aborted;
-	struct sdebug_queue *sqp;
 
-	qc_idx = sd_dp->sqa_idx;
 	if (sdebug_statistics) {
 		atomic_inc(&sdebug_completions);
 		if (raw_smp_processor_id() != sd_dp->issuing_cpu)
 			atomic_inc(&sdebug_miss_cpus);
 	}
+
 	if (!scp) {
 		pr_err("scmd=NULL\n");
 		goto out;
 	}
-	if (unlikely((qc_idx < 0) || (qc_idx >= SDEBUG_CANQUEUE))) {
-		pr_err("wild qc_idx=%d\n", qc_idx);
-		goto out;
-	}
 
 	sdsc = scsi_cmd_priv(scp);
-	sqp = get_queue(scp);
-	spin_lock_irqsave(&sqp->qc_lock, iflags);
 	spin_lock_irqsave(&sdsc->lock, flags);
 	aborted = sd_dp->aborted;
 	if (unlikely(aborted))
 		sd_dp->aborted = false;
 	ASSIGN_QEUEUED_CMD(scp, NULL);
 
-	if (unlikely(atomic_read(&retired_max_queue) > 0))
-		retiring = 1;
-
-	sqp->qc_arr[qc_idx] = NULL;
-	if (unlikely(!test_and_clear_bit(qc_idx, sqp->in_use_bm))) {
-		spin_unlock_irqrestore(&sdsc->lock, flags);
-		spin_unlock_irqrestore(&sqp->qc_lock, iflags);
-		pr_err("Unexpected completion qc_idx=%d\n", qc_idx);
-		goto out;
-	}
-
-	if (unlikely(retiring)) {	/* user has reduced max_queue */
-		int k, retval;
-
-		retval = atomic_read(&retired_max_queue);
-		if (qc_idx >= retval) {
-			spin_unlock_irqrestore(&sdsc->lock, flags);
-			spin_unlock_irqrestore(&sqp->qc_lock, iflags);
-			pr_err("index %d too large\n", retval);
-			goto out;
-		}
-		k = find_last_bit(sqp->in_use_bm, retval);
-		if ((k < sdebug_max_queue) || (k == retval))
-			atomic_set(&retired_max_queue, 0);
-		else
-			atomic_set(&retired_max_queue, k + 1);
-	}
-
 	spin_unlock_irqrestore(&sdsc->lock, flags);
-	spin_unlock_irqrestore(&sqp->qc_lock, iflags);
 
 	if (aborted) {
 		pr_info("bypassing scsi_done() due to aborted cmd, kicking-off EH\n");
@@ -5275,21 +5215,18 @@ static bool stop_qc_helper(struct sdebug_defer *sd_dp,
 }
 
 
-static bool scsi_debug_stop_cmnd(struct scsi_cmnd *cmnd, int *sqa_idx)
+static bool scsi_debug_stop_cmnd(struct scsi_cmnd *cmnd)
 {
 	enum sdeb_defer_type l_defer_t;
-	struct sdebug_queued_cmd *sqcp;
 	struct sdebug_defer *sd_dp;
 	struct sdebug_scsi_cmd *sdsc = scsi_cmd_priv(cmnd);
+	struct sdebug_queued_cmd *sqcp = TO_QEUEUED_CMD(cmnd);
 
 	lockdep_assert_held(&sdsc->lock);
 
-	sqcp = TO_QEUEUED_CMD(cmnd);
 	if (!sqcp)
 		return false;
 	sd_dp = &sqcp->sd_dp;
-	if (sqa_idx)
-		*sqa_idx = sd_dp->sqa_idx;
 	l_defer_t = READ_ONCE(sd_dp->defer_t);
 	ASSIGN_QEUEUED_CMD(cmnd, NULL);
 
@@ -5305,22 +5242,13 @@ static bool scsi_debug_stop_cmnd(struct scsi_cmnd *cmnd, int *sqa_idx)
 static bool scsi_debug_abort_cmnd(struct scsi_cmnd *cmnd)
 {
 	struct sdebug_scsi_cmd *sdsc = scsi_cmd_priv(cmnd);
-	struct sdebug_queue *sqp = get_queue(cmnd);
-	unsigned long flags, iflags;
-	int k = -1;
+	unsigned long flags;
 	bool res;
 
 	spin_lock_irqsave(&sdsc->lock, flags);
-	res = scsi_debug_stop_cmnd(cmnd, &k);
+	res = scsi_debug_stop_cmnd(cmnd);
 	spin_unlock_irqrestore(&sdsc->lock, flags);
 
-	if (k >= 0) {
-		spin_lock_irqsave(&sqp->qc_lock, iflags);
-		clear_bit(k, sqp->in_use_bm);
-		sqp->qc_arr[k] = NULL;
-		spin_unlock_irqrestore(&sqp->qc_lock, iflags);
-	}
-
 	return res;
 }
 
@@ -5578,7 +5506,6 @@ struct sdebug_queued_cmd *sdebug_alloc_queued_cmd(struct scsi_cmnd *scmd)
 	INIT_WORK(&sd_dp->ew.work, sdebug_q_cmd_wq_complete);
 
 	sqcp->scmd = scmd;
-	sd_dp->sqa_idx = -1;
 
 	return sqcp;
 }
@@ -5597,13 +5524,11 @@ static int schedule_resp(struct scsi_cmnd *cmnd, struct sdebug_dev_info *devip,
 	struct request *rq = scsi_cmd_to_rq(cmnd);
 	bool polled = rq->cmd_flags & REQ_POLLED;
 	struct sdebug_scsi_cmd *sdsc = scsi_cmd_priv(cmnd);
-	unsigned long iflags, flags;
+	unsigned long flags;
 	u64 ns_from_boot = 0;
-	struct sdebug_queue *sqp;
 	struct sdebug_queued_cmd *sqcp;
 	struct scsi_device *sdp;
 	struct sdebug_defer *sd_dp;
-	int k;
 
 	if (unlikely(devip == NULL)) {
 		if (scsi_result == 0)
@@ -5615,8 +5540,6 @@ static int schedule_resp(struct scsi_cmnd *cmnd, struct sdebug_dev_info *devip,
 	if (delta_jiff == 0)
 		goto respond_in_thread;
 
-	sqp = get_queue(cmnd);
-	spin_lock_irqsave(&sqp->qc_lock, iflags);
 
 	if (unlikely(sdebug_every_nth && (SDEBUG_OPT_RARE_TSF & sdebug_opts) &&
 		     (scsi_result == 0))) {
@@ -5635,33 +5558,12 @@ static int schedule_resp(struct scsi_cmnd *cmnd, struct sdebug_dev_info *devip,
 		}
 	}
 
-	k = find_first_zero_bit(sqp->in_use_bm, sdebug_max_queue);
-	if (unlikely(k >= sdebug_max_queue)) {
-		spin_unlock_irqrestore(&sqp->qc_lock, iflags);
-		if (scsi_result)
-			goto respond_in_thread;
-		scsi_result = device_qfull_result;
-		if (SDEBUG_OPT_Q_NOISE & sdebug_opts)
-			sdev_printk(KERN_INFO, sdp, "%s: max_queue=%d exceeded: TASK SET FULL\n",
-				    __func__, sdebug_max_queue);
-		goto respond_in_thread;
-	}
-	set_bit(k, sqp->in_use_bm);
-
 	sqcp = sdebug_alloc_queued_cmd(cmnd);
 	if (!sqcp) {
-		clear_bit(k, sqp->in_use_bm);
-		spin_unlock_irqrestore(&sqp->qc_lock, iflags);
+		pr_err("%s no alloc\n", __func__);
 		return SCSI_MLQUEUE_HOST_BUSY;
 	}
 	sd_dp = &sqcp->sd_dp;
-	sd_dp->sqa_idx = k;
-	sqp->qc_arr[k] = sqcp;
-	spin_unlock_irqrestore(&sqp->qc_lock, iflags);
-
-	/* Set the hostwide tag */
-	if (sdebug_host_max_queue)
-		sd_dp->hc_idx = get_tag(cmnd);
 
 	if (polled)
 		ns_from_boot = ktime_get_boottime_ns();
@@ -5708,10 +5610,6 @@ static int schedule_resp(struct scsi_cmnd *cmnd, struct sdebug_dev_info *devip,
 				u64 d = ktime_get_boottime_ns() - ns_from_boot;
 
 				if (kt <= d) {	/* elapsed duration >= kt */
-					spin_lock_irqsave(&sqp->qc_lock, iflags);
-					sqp->qc_arr[k] = NULL;
-					clear_bit(k, sqp->in_use_bm);
-					spin_unlock_irqrestore(&sqp->qc_lock, iflags);
 					/* call scsi_done() from this thread */
 					sdebug_free_queued_cmd(sqcp);
 					scsi_done(cmnd);
@@ -5974,8 +5872,7 @@ static int scsi_debug_write_info(struct Scsi_Host *host, char *buffer,
  * output are not atomics so might be inaccurate in a busy system. */
 static int scsi_debug_show_info(struct seq_file *m, struct Scsi_Host *host)
 {
-	int f, j, l;
-	struct sdebug_queue *sqp;
+	int j;
 	struct sdebug_host_info *sdhp;
 
 	seq_printf(m, "scsi_debug adapter driver, version %s [%s]\n",
@@ -6003,16 +5900,6 @@ static int scsi_debug_show_info(struct seq_file *m, struct Scsi_Host *host)
 		   atomic_read(&sdebug_a_tsf),
 		   atomic_read(&sdeb_mq_poll_count));
 
-	seq_printf(m, "submit_queues=%d\n", submit_queues);
-	for (j = 0, sqp = sdebug_q_arr; j < submit_queues; ++j, ++sqp) {
-		seq_printf(m, "  queue %d:\n", j);
-		f = find_first_bit(sqp->in_use_bm, sdebug_max_queue);
-		if (f != sdebug_max_queue) {
-			l = find_last_bit(sqp->in_use_bm, sdebug_max_queue);
-			seq_printf(m, "    in_use_bm BUSY: %s: %d,%d\n",
-				   "first,last bits", f, l);
-		}
-	}
 
 	seq_printf(m, "this host_no=%d\n", host->host_no);
 	if (!xa_empty(per_store_ap)) {
@@ -6428,30 +6315,36 @@ static ssize_t max_queue_show(struct device_driver *ddp, char *buf)
 static ssize_t max_queue_store(struct device_driver *ddp, const char *buf,
 			       size_t count)
 {
-	int j, n, k, a;
-	struct sdebug_queue *sqp;
+	int n;
 
 	if ((count > 0) && (1 == sscanf(buf, "%d", &n)) && (n > 0) &&
-	    (n <= SDEBUG_CANQUEUE) &&
-	    (sdebug_host_max_queue == 0)) {
+	    (n <= SDEBUG_CANQUEUE)) {
+		struct sdebug_host_info *sdhp;
+
 		mutex_lock(&sdebug_host_list_mutex);
-		block_unblock_all_queues(true);
-		k = 0;
-		for (j = 0, sqp = sdebug_q_arr; j < submit_queues;
-		     ++j, ++sqp) {
-			a = find_last_bit(sqp->in_use_bm, SDEBUG_CANQUEUE);
-			if (a > k)
-				k = a;
+		list_for_each_entry(sdhp, &sdebug_host_list, host_list) {
+			struct Scsi_Host *shost = sdhp->shost;
+			struct blk_mq_tag_set *tag_set = &shost->tag_set;
+			struct scsi_device *sdev;
+			int i;
+
+			scsi_block_requests(shost);
+			shost_for_each_device(sdev, shost)
+				scsi_device_quiesce(sdev);
+
+			for (i = 0; i < tag_set->nr_hw_queues; i++) {
+				struct blk_mq_tags *tags = tag_set->tags[i];
+
+				/* yuck, we should not do this */
+				sbitmap_queue_resize(&tags->bitmap_tags, n);
+			}
+
+			shost_for_each_device(sdev, shost)
+				scsi_device_resume(sdev);
+			scsi_unblock_requests(shost);
 		}
-		sdebug_max_queue = n;
-		if (k == SDEBUG_CANQUEUE)
-			atomic_set(&retired_max_queue, 0);
-		else if (k >= n)
-			atomic_set(&retired_max_queue, k + 1);
-		else
-			atomic_set(&retired_max_queue, 0);
-		block_unblock_all_queues(false);
 		mutex_unlock(&sdebug_host_list_mutex);
+		sdebug_max_queue = n;
 		return count;
 	}
 	return -EINVAL;
@@ -6975,13 +6868,6 @@ static int __init scsi_debug_init(void)
 			sdebug_max_queue);
 	}
 
-	sdebug_q_arr = kcalloc(submit_queues, sizeof(struct sdebug_queue),
-			       GFP_KERNEL);
-	if (sdebug_q_arr == NULL)
-		return -ENOMEM;
-	for (k = 0; k < submit_queues; ++k)
-		spin_lock_init(&sdebug_q_arr[k].qc_lock);
-
 	/*
 	 * check for host managed zoned block device specified with
 	 * ptype=0x14 or zbc=XXX.
@@ -6990,10 +6876,8 @@ static int __init scsi_debug_init(void)
 		sdeb_zbc_model = BLK_ZONED_HM;
 	} else if (sdeb_zbc_model_s && *sdeb_zbc_model_s) {
 		k = sdeb_zbc_model_str(sdeb_zbc_model_s);
-		if (k < 0) {
-			ret = k;
-			goto free_q_arr;
-		}
+		if (k < 0)
+			return k;
 		sdeb_zbc_model = k;
 		switch (sdeb_zbc_model) {
 		case BLK_ZONED_NONE:
@@ -7005,8 +6889,7 @@ static int __init scsi_debug_init(void)
 			break;
 		default:
 			pr_err("Invalid ZBC model\n");
-			ret = -EINVAL;
-			goto free_q_arr;
+			return -EINVAL;
 		}
 	}
 	if (sdeb_zbc_model != BLK_ZONED_NONE) {
@@ -7053,17 +6936,14 @@ static int __init scsi_debug_init(void)
 		    sdebug_unmap_granularity <=
 		    sdebug_unmap_alignment) {
 			pr_err("ERR: unmap_granularity <= unmap_alignment\n");
-			ret = -EINVAL;
-			goto free_q_arr;
+			return -EINVAL;
 		}
 	}
 	xa_init_flags(per_store_ap, XA_FLAGS_ALLOC | XA_FLAGS_LOCK_IRQ);
 	if (want_store) {
 		idx = sdebug_add_store();
-		if (idx < 0) {
-			ret = idx;
-			goto free_q_arr;
-		}
+		if (idx < 0)
+			return idx;
 	}
 
 	pseudo_primary = root_device_register("pseudo_0");
@@ -7120,8 +7000,6 @@ static int __init scsi_debug_init(void)
 	root_device_unregister(pseudo_primary);
 free_vm:
 	sdebug_erase_store(idx, NULL);
-free_q_arr:
-	kfree(sdebug_q_arr);
 	return ret;
 }
 
@@ -7138,7 +7016,6 @@ static void __exit scsi_debug_exit(void)
 
 	sdebug_erase_all_stores(false);
 	xa_destroy(per_store_ap);
-	kfree(sdebug_q_arr);
 }
 
 device_initcall(scsi_debug_init);
@@ -7514,11 +7391,8 @@ static bool sdebug_blk_mq_poll_iter(struct request *rq, void *opaque)
 	u32 unique_tag = blk_mq_unique_tag(rq);
 	u16 hwq = blk_mq_unique_tag_to_hwq(unique_tag);
 	struct sdebug_queued_cmd *sqcp;
-	struct sdebug_queue *sqp;
 	unsigned long flags;
 	int queue_num = data->queue_num;
-	bool retiring = false;
-	int qc_idx;
 	ktime_t time;
 
 	/* We're only interested in one queue for this iteration */
@@ -7537,7 +7411,7 @@ static bool sdebug_blk_mq_poll_iter(struct request *rq, void *opaque)
 		spin_unlock_irqrestore(&sdsc->lock, flags);
 		return true;
 	}
-	sqp = sdebug_q_arr + queue_num;
+
 	sd_dp = &sqcp->sd_dp;
 
 
@@ -7552,36 +7426,6 @@ static bool sdebug_blk_mq_poll_iter(struct request *rq, void *opaque)
 		return true;
 	}
 
-	if (unlikely(atomic_read(&retired_max_queue) > 0))
-		retiring = true;
-
-	qc_idx = sd_dp->sqa_idx;
-	sqp->qc_arr[qc_idx] = NULL;
-	if (unlikely(!test_and_clear_bit(qc_idx, sqp->in_use_bm))) {
-		spin_unlock_irqrestore(&sdsc->lock, flags);
-		pr_err("Unexpected completion sqp %p queue_num=%d qc_idx=%u\n",
-			sqp, queue_num, qc_idx);
-		sdebug_free_queued_cmd(sqcp);
-		return true;
-	}
-
-	if (unlikely(retiring)) {	/* user has reduced max_queue */
-		int k, retval = atomic_read(&retired_max_queue);
-
-		if (qc_idx >= retval) {
-			pr_err("index %d too large\n", retval);
-			spin_unlock_irqrestore(&sdsc->lock, flags);
-			sdebug_free_queued_cmd(sqcp);
-			return true;
-		}
-
-		k = find_last_bit(sqp->in_use_bm, retval);
-		if ((k < sdebug_max_queue) || (k == retval))
-			atomic_set(&retired_max_queue, 0);
-		else
-			atomic_set(&retired_max_queue, k + 1);
-	}
-
 	ASSIGN_QEUEUED_CMD(cmd, NULL);
 	spin_unlock_irqrestore(&sdsc->lock, flags);
 
@@ -7601,20 +7445,14 @@ static bool sdebug_blk_mq_poll_iter(struct request *rq, void *opaque)
 static int sdebug_blk_mq_poll(struct Scsi_Host *shost, unsigned int queue_num)
 {
 	int num_entries = 0;
-	unsigned long iflags;
-	struct sdebug_queue *sqp;
 	struct sdebug_blk_mq_poll_data data = {
 		.queue_num = queue_num,
 		.num_entries = &num_entries,
 	};
-	sqp = sdebug_q_arr + queue_num;
-
-	spin_lock_irqsave(&sqp->qc_lock, iflags);
 
 	blk_mq_tagset_busy_iter(&shost->tag_set, sdebug_blk_mq_poll_iter,
 				&data);
 
-	spin_unlock_irqrestore(&sqp->qc_lock, iflags);
 	if (num_entries > 0)
 		atomic_add(num_entries, &sdeb_mq_poll_count);
 	return num_entries;
-- 
2.35.3

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ