lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20190605232348.6452-9-saeedm@mellanox.com>
Date:   Wed, 5 Jun 2019 23:24:50 +0000
From:   Saeed Mahameed <saeedm@...lanox.com>
To:     "David S. Miller" <davem@...emloft.net>,
        Jason Gunthorpe <jgg@...lanox.com>,
        Doug Ledford <dledford@...hat.com>
CC:     Michael Chan <michael.chan@...adcom.com>,
        Andy Gospodarek <andy@...yhouse.net>,
        Tal Gilboa <talgi@...lanox.com>,
        "linux-rdma@...r.kernel.org" <linux-rdma@...r.kernel.org>,
        "netdev@...r.kernel.org" <netdev@...r.kernel.org>,
        Yamin Friedman <yaminf@...lanox.com>,
        Max Gurtovoy <maxg@...lanox.com>,
        Saeed Mahameed <saeedm@...lanox.com>
Subject: [for-next 8/9] linux/dim: Implement rdma_dim

From: Yamin Friedman <yaminf@...lanox.com>

rdma_dim implements a different algorithm than net_dim and is based on
completions which is how we can implement interrupt moderation in RDMA.
The algorithm optimizes for number of completions and ratio between
completions and events.
It also has a feature for fast reduction of moderation level when the
traffic changes in such a way as to no longer require high moderation in
order to avoid long latencies.

rdma_dim.h will be called from the ib_core module.

Signed-off-by: Yamin Friedman <yaminf@...lanox.com>
Reviewed-by: Max Gurtovoy <maxg@...lanox.com>
Signed-off-by: Saeed Mahameed <saeedm@...lanox.com>
---
 MAINTAINERS              |   1 +
 include/linux/rdma_dim.h |  28 +++++++
 lib/dim/Makefile         |   7 +-
 lib/dim/rdma_dim.c       | 162 +++++++++++++++++++++++++++++++++++++++
 4 files changed, 197 insertions(+), 1 deletion(-)
 create mode 100644 include/linux/rdma_dim.h
 create mode 100644 lib/dim/rdma_dim.c

diff --git a/MAINTAINERS b/MAINTAINERS
index cb621d5cf223..86e4698ab390 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -5589,6 +5589,7 @@ DYNAMIC INTERRUPT MODERATION
 M:	Tal Gilboa <talgi@...lanox.com>
 S:	Maintained
 F:	include/linux/net_dim.h
+F:	include/linux/rdma_dim.h
 F:	include/linux/dim.h
 F:	lib/dim/
 
diff --git a/include/linux/rdma_dim.h b/include/linux/rdma_dim.h
new file mode 100644
index 000000000000..0623ea5a1e78
--- /dev/null
+++ b/include/linux/rdma_dim.h
@@ -0,0 +1,28 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2019 Mellanox Technologies. */
+
+#ifndef RDMA_DIM_H
+#define RDMA_DIM_H
+
+#include <linux/module.h>
+#include <linux/dim.h>
+
+#define RDMA_DIM_PARAMS_NUM_PROFILES 9
+#define RDMA_DIM_START_PROFILE 0
+
+static const struct dim_cq_moder
+rdma_dim_prof[RDMA_DIM_PARAMS_NUM_PROFILES] = {
+	{1,   0, 1,  0},
+	{1,   0, 4,  0},
+	{2,   0, 4,  0},
+	{2,   0, 8,  0},
+	{4,   0, 8,  0},
+	{16,  0, 8,  0},
+	{16,  0, 16, 0},
+	{32,  0, 16, 0},
+	{32,  0, 32, 0},
+};
+
+void rdma_dim(struct dim *dim, u64 completions);
+
+#endif /* RDMA_DIM_H */
diff --git a/lib/dim/Makefile b/lib/dim/Makefile
index 160afe288df0..73ddd0c64661 100644
--- a/lib/dim/Makefile
+++ b/lib/dim/Makefile
@@ -2,8 +2,13 @@
 # DIM Dynamic Interrupt Moderation library
 #
 
-obj-$(CONFIG_DIMLIB) = net_dim.o
+obj-$(CONFIG_DIMLIB) += net_dim.o
+obj-$(CONFIG_DIMLIB) += rdma_dim.o
 
 net_dim-y = \
 	dim.o		\
 	net_dim.o
+
+rdma_dim-y = \
+	dim.o		\
+	rdma_dim.o
diff --git a/lib/dim/rdma_dim.c b/lib/dim/rdma_dim.c
new file mode 100644
index 000000000000..503881ec5614
--- /dev/null
+++ b/lib/dim/rdma_dim.c
@@ -0,0 +1,162 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/*
+ * Copyright (c) 2019, Mellanox Technologies inc.  All rights reserved.
+ */
+
+#include <linux/rdma_dim.h>
+
+/**
+ ** rdma_dim_step: - Moves the moderation profile one step.
+ ** @dim: The moderation struct.
+ **
+ ** Description: Moves the moderation profile of @dim by one step. If we
+ ** are at the edge of the profile range returns DIM_ON_EDGE without
+ ** moving.
+ **/
+static int rdma_dim_step(struct dim *dim)
+{
+	switch (dim->tune_state) {
+	case DIM_PARKING_ON_TOP:
+		/* fall through */
+	case DIM_PARKING_TIRED:
+		break;
+	case DIM_GOING_RIGHT:
+		if (dim->profile_ix == (RDMA_DIM_PARAMS_NUM_PROFILES - 1))
+			return DIM_ON_EDGE;
+		dim->profile_ix++;
+		dim->steps_right++;
+		break;
+	case DIM_GOING_LEFT:
+		if (dim->profile_ix == 0)
+			return DIM_ON_EDGE;
+		dim->profile_ix--;
+		dim->steps_left++;
+		break;
+	}
+
+	return DIM_STEPPED;
+}
+
+/**
+ ** rdma_dim_stats_compare - Compares the current stats to the previous stats.
+ ** @curr: The current dim stats.
+ ** @prev: The previous dim stats.
+ **
+ ** Description: Checks to see if we have significantly more or less
+ ** completions.
+ ** If the completions are not greatly changed checks if the completion to
+ ** event ratio has significantly changed.
+ **/
+static int rdma_dim_stats_compare(struct dim_stats *curr,
+				  struct dim_stats *prev)
+{
+	/* first stat */
+	if (!prev->cpms)
+		return DIM_STATS_SAME;
+
+	if (IS_SIGNIFICANT_DIFF(curr->cpms, prev->cpms))
+		return (curr->cpms > prev->cpms) ? DIM_STATS_BETTER :
+						DIM_STATS_WORSE;
+
+	if (IS_SIGNIFICANT_DIFF(curr->cpe_ratio, prev->cpe_ratio))
+		return (curr->cpe_ratio > prev->cpe_ratio) ? DIM_STATS_BETTER :
+						DIM_STATS_WORSE;
+
+	return DIM_STATS_SAME;
+}
+
+/**
+ ** rdma_dim_decision - Decides the next moderation level.
+ ** @curr_stats: The current dim stats.
+ ** @dim: The moderation struct.
+ **
+ ** Description: Uses rdma_dim_stats_compare to decide what the next moderation
+ ** level should be. If the completion to event ratio is low compared to the
+ ** current level we reset the moderation to keep latency low.
+ **/
+static bool rdma_dim_decision(struct dim_stats *curr_stats, struct dim *dim)
+{
+	int prev_ix = dim->profile_ix;
+	int stats_res;
+	int step_res;
+
+	switch (dim->tune_state) {
+	case DIM_PARKING_ON_TOP:
+		/* fall through */
+	case DIM_PARKING_TIRED:
+		break;
+	case DIM_GOING_RIGHT:
+		/* fall through */
+	case DIM_GOING_LEFT:
+		stats_res = rdma_dim_stats_compare(curr_stats,
+						   &dim->prev_stats);
+
+		switch (stats_res) {
+		case DIM_STATS_SAME:
+			if (curr_stats->cpe_ratio <= 50 * prev_ix)
+				dim->profile_ix = 0;
+			break;
+		case DIM_STATS_WORSE:
+			dim_turn(dim);
+			/* fall through */
+		case DIM_STATS_BETTER:
+			step_res = rdma_dim_step(dim);
+			if (step_res == DIM_ON_EDGE)
+				dim_turn(dim);
+			break;
+		}
+		break;
+	}
+
+	dim->prev_stats = *curr_stats;
+
+	return dim->profile_ix != prev_ix;
+}
+
+/**
+ ** rdma_dim - Runs the adaptive moderation.
+ ** @dim: The moderation struct.
+ ** @completions: The number of completions collected in this round.
+ **
+ ** Description: Each call to rdma_dim takes the latest amount of
+ ** completions that have been collected and counts them as a new event.
+ ** Once enough events have been collected the algorithm decides a new
+ ** moderation level.
+ **/
+void rdma_dim(struct dim *dim, u64 completions)
+{
+	struct dim_stats curr_stats;
+	u32 nevents;
+	struct dim_sample *curr_sample = &dim->measuring_sample;
+
+	dim_update_sample_with_comps(curr_sample->event_ctr + 1,
+				     curr_sample->pkt_ctr,
+				     curr_sample->byte_ctr,
+				     curr_sample->comp_ctr + completions,
+				     &dim->measuring_sample);
+
+	switch (dim->state) {
+	case DIM_MEASURE_IN_PROGRESS:
+		nevents = curr_sample->event_ctr - dim->start_sample.event_ctr;
+		if (nevents < DIM_NEVENTS)
+			break;
+		dim_calc_stats(&dim->start_sample, curr_sample, &curr_stats);
+		if (rdma_dim_decision(&curr_stats, dim)) {
+			dim->state = DIM_APPLY_NEW_PROFILE;
+			schedule_work(&dim->work);
+			break;
+		}
+		/* fall through */
+	case DIM_START_MEASURE:
+		dim->state = DIM_MEASURE_IN_PROGRESS;
+		dim_update_sample_with_comps(curr_sample->event_ctr,
+					     curr_sample->pkt_ctr,
+					     curr_sample->byte_ctr,
+					     curr_sample->comp_ctr,
+					     &dim->start_sample);
+		break;
+	case DIM_APPLY_NEW_PROFILE:
+		break;
+	}
+}
+EXPORT_SYMBOL(rdma_dim);
-- 
2.21.0

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ