From 007d773af7b65a1f1ca543f031ca58b3afa5b7d9 Mon Sep 17 00:00:00 2001 From: Max Gurtovoy Date: Thu, 19 Jul 2018 12:42:00 +0000 Subject: [PATCH 1/1] blk-mq: fix RDMA queue/cpu mappings assignments for mq Signed-off-by: Max Gurtovoy Signed-off-by: Israel Rukshin --- block/blk-mq-cpumap.c | 41 ++++++++++++++---------- block/blk-mq-rdma.c | 84 ++++++++++++++++++++++++++++++++++++++++++++++---- include/linux/blk-mq.h | 1 + 3 files changed, 103 insertions(+), 23 deletions(-) diff --git a/block/blk-mq-cpumap.c b/block/blk-mq-cpumap.c index 3eb169f..02b888f 100644 --- a/block/blk-mq-cpumap.c +++ b/block/blk-mq-cpumap.c @@ -30,29 +30,36 @@ static int get_first_sibling(unsigned int cpu) return cpu; } -int blk_mq_map_queues(struct blk_mq_tag_set *set) +void blk_mq_map_queue_to_cpu(struct blk_mq_tag_set *set, unsigned int cpu) { unsigned int *map = set->mq_map; unsigned int nr_queues = set->nr_hw_queues; - unsigned int cpu, first_sibling; + unsigned int first_sibling; - for_each_possible_cpu(cpu) { - /* - * First do sequential mapping between CPUs and queues. - * In case we still have CPUs to map, and we have some number of - * threads per cores then map sibling threads to the same queue for - * performace optimizations. - */ - if (cpu < nr_queues) { + /* + * First do sequential mapping between CPUs and queues. + * In case we still have CPUs to map, and we have some number of + * threads per cores then map sibling threads to the same queue for + * performace optimizations. + */ + if (cpu < nr_queues) { + map[cpu] = cpu_to_queue_index(nr_queues, cpu); + } else { + first_sibling = get_first_sibling(cpu); + if (first_sibling == cpu) map[cpu] = cpu_to_queue_index(nr_queues, cpu); - } else { - first_sibling = get_first_sibling(cpu); - if (first_sibling == cpu) - map[cpu] = cpu_to_queue_index(nr_queues, cpu); - else - map[cpu] = map[first_sibling]; - } + else + map[cpu] = map[first_sibling]; } +} +EXPORT_SYMBOL_GPL(blk_mq_map_queue_to_cpu); + +int blk_mq_map_queues(struct blk_mq_tag_set *set) +{ + unsigned int cpu; + + for_each_possible_cpu(cpu) + blk_mq_map_queue_to_cpu(set, cpu); return 0; } diff --git a/block/blk-mq-rdma.c b/block/blk-mq-rdma.c index 996167f..621d5f0 100644 --- a/block/blk-mq-rdma.c +++ b/block/blk-mq-rdma.c @@ -14,6 +14,33 @@ #include #include +static int blk_mq_rdma_map_queues_by_affinity(struct blk_mq_tag_set *set, + struct ib_device *dev, int first_vec, int max_mapping) +{ + const struct cpumask *mask; + unsigned int queue, cpu; + int num_of_mapping = 0; + + for (queue = 0; queue < set->nr_hw_queues; queue++) { + if (num_of_mapping == max_mapping) + return num_of_mapping; + + mask = ib_get_vector_affinity(dev, first_vec + queue); + if (!mask) + return -1; + + for_each_cpu(cpu, mask) { + if (set->mq_map[cpu] == UINT_MAX) { + set->mq_map[cpu] = queue; + num_of_mapping++; + /* Each queue mapped to 1 cpu */ + break; + } + } + } + return num_of_mapping; +} + /** * blk_mq_rdma_map_queues - provide a default queue mapping for rdma device * @set: tagset to provide the mapping for @@ -32,18 +59,63 @@ int blk_mq_rdma_map_queues(struct blk_mq_tag_set *set, struct ib_device *dev, int first_vec) { - const struct cpumask *mask; unsigned int queue, cpu; + bool mapped; + int remaining_cpus = num_possible_cpus(); + int unmapped_queues = set->nr_hw_queues; + int ret; - for (queue = 0; queue < set->nr_hw_queues; queue++) { - mask = ib_get_vector_affinity(dev, first_vec + queue); - if (!mask) + /* reset all CPUs mapping */ + for_each_possible_cpu(cpu) + set->mq_map[cpu] = UINT_MAX; + + /* Try to map the queues according to affinity */ + ret = blk_mq_rdma_map_queues_by_affinity(set, dev, first_vec, + remaining_cpus); + if (ret == -1) + goto fallback; + remaining_cpus -= ret; + unmapped_queues -= ret; + + /* Map queues with more than one cpu according to affinity */ + while (remaining_cpus > unmapped_queues) { + ret = blk_mq_rdma_map_queues_by_affinity(set, dev, first_vec, + remaining_cpus - unmapped_queues); + if (ret == -1) goto fallback; + if (!ret) + break; + remaining_cpus -= ret; + } - for_each_cpu(cpu, mask) - set->mq_map[cpu] = queue; + /* Map the unmapped queues in a naive way */ + for (queue = 0; queue < set->nr_hw_queues; queue++) { + mapped = false; + for_each_possible_cpu(cpu) { + if (set->mq_map[cpu] == queue) { + mapped = true; + break; + } + } + if (!mapped) { + for_each_possible_cpu(cpu) { + if (set->mq_map[cpu] == UINT_MAX) { + set->mq_map[cpu] = queue; + mapped = true; + break; + } + } + } + /* This case should never happen */ + if (WARN_ON_ONCE(!mapped)) + goto fallback; } + /* set all the rest of the CPUs */ + for_each_possible_cpu(cpu) + if (set->mq_map[cpu] == UINT_MAX) + blk_mq_map_queue_to_cpu(set, cpu); + return 0; fallback: diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index e3147eb..d6cd114 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -282,6 +282,7 @@ void blk_mq_tagset_busy_iter(struct blk_mq_tag_set *tagset, int blk_mq_freeze_queue_wait_timeout(struct request_queue *q, unsigned long timeout); +void blk_mq_map_queue_to_cpu(struct blk_mq_tag_set *set, unsigned int cpu); int blk_mq_map_queues(struct blk_mq_tag_set *set); void blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set, int nr_hw_queues); -- 1.8.3.1