lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <20220106160652.821176-8-idosch@nvidia.com>
Date:   Thu,  6 Jan 2022 18:06:51 +0200
From:   Ido Schimmel <idosch@...dia.com>
To:     netdev@...r.kernel.org
Cc:     davem@...emloft.net, kuba@...nel.org, petrm@...dia.com,
        amcohen@...dia.com, mlxsw@...dia.com,
        Ido Schimmel <idosch@...dia.com>
Subject: [PATCH net-next 7/8] mlxsw: spectrum_acl_bloom_filter: Add support for Spectrum-4 calculation

From: Amit Cohen <amcohen@...dia.com>

Spectrum-4 will calculate hash function for bloom filter differently
from the existing ASICs.

First, two hash functions will be used to calculate 16 bits result.
The final result will be combination of the two results - 6 bits which
are result of CRC-6 will be used as MSB and 10 bits which are result of
CRC-10 will be used as LSB.

Second, while in Spectrum{2,3}, there is a padding in each chunk, so the
chunks use a sequence of whole bytes, in Spectrum-4 there is no padding,
so each chunk use 20 bytes minus 2 bits, so it is necessary to align the
chunks to be without holes.

Add dedicated 'mlxsw_sp_acl_bf_ops' for Spectrum-4 and add the required
tables for CRC calculations.

All the details are documented as part of the code for future use.

Signed-off-by: Amit Cohen <amcohen@...dia.com>
Reviewed-by: Petr Machata <petrm@...dia.com>
Signed-off-by: Ido Schimmel <idosch@...dia.com>
---
 .../net/ethernet/mellanox/mlxsw/spectrum.h    |   1 +
 .../mlxsw/spectrum_acl_bloom_filter.c         | 267 ++++++++++++++++--
 2 files changed, 252 insertions(+), 16 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
index e7da6c83c442..bb2442e1f705 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
@@ -1111,6 +1111,7 @@ extern const struct mlxsw_afk_ops mlxsw_sp4_afk_ops;
 
 /* spectrum_acl_bloom_filter.c */
 extern const struct mlxsw_sp_acl_bf_ops mlxsw_sp2_acl_bf_ops;
+extern const struct mlxsw_sp_acl_bf_ops mlxsw_sp4_acl_bf_ops;
 
 /* spectrum_matchall.c */
 struct mlxsw_sp_mall_ops {
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_bloom_filter.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_bloom_filter.c
index c6dab9615a0a..e2aced7ab454 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_bloom_filter.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_bloom_filter.c
@@ -17,9 +17,9 @@ struct mlxsw_sp_acl_bf {
 };
 
 /* Bloom filter uses a crc-16 hash over chunks of data which contain 4 key
- * blocks, eRP ID and region ID. In Spectrum-2, region key is combined of up to
- * 12 key blocks, so there can be up to 3 chunks in the Bloom filter key,
- * depending on the actual number of key blocks used in the region.
+ * blocks, eRP ID and region ID. In Spectrum-2 and above, region key is combined
+ * of up to 12 key blocks, so there can be up to 3 chunks in the Bloom filter
+ * key, depending on the actual number of key blocks used in the region.
  * The layout of the Bloom filter key is as follows:
  *
  * +-------------------------+------------------------+------------------------+
@@ -27,6 +27,8 @@ struct mlxsw_sp_acl_bf {
  * +-------------------------+------------------------+------------------------+
  */
 #define MLXSW_BLOOM_KEY_CHUNKS 3
+
+/* Spectrum-2 and Spectrum-3 chunks */
 #define MLXSW_SP2_BLOOM_KEY_LEN 69
 
 /* Each chunk size is 23 bytes. 18 bytes of it contain 4 key blocks, each is
@@ -51,19 +53,9 @@ struct mlxsw_sp_acl_bf {
  */
 #define MLXSW_SP2_BLOOM_CHUNK_KEY_OFFSET 5
 
-/* Each chunk contains 4 key blocks. Chunk 2 uses key blocks 11-8,
- * and we need to populate it with 4 key blocks copied from the entry encoded
- * key. Since the encoded key contains a padding, key block 11 starts at offset
- * 2. block 7 that is used in chunk 1 starts at offset 20 as 4 key blocks take
- * 18 bytes.
- * This array defines key offsets for easy access when copying key blocks from
- * entry key to Bloom filter chunk.
- */
-static const u8 chunk_key_offsets[MLXSW_BLOOM_KEY_CHUNKS] = {2, 20, 38};
-
-/* This table is just the CRC of each possible byte. It is
- * computed, Msbit first, for the Bloom filter polynomial
- * which is 0x8529 (1 + x^3 + x^5 + x^8 + x^10 + x^15 and
+/* This table is just the CRC of each possible byte which is used for
+ * Spectrum-{2-3}. It is computed, Msbit first, for the Bloom filter
+ * polynomial which is 0x8529 (1 + x^3 + x^5 + x^8 + x^10 + x^15 and
  * the implicit x^16).
  */
 static const u16 mlxsw_sp2_acl_bf_crc16_tab[256] = {
@@ -101,6 +93,127 @@ static const u16 mlxsw_sp2_acl_bf_crc16_tab[256] = {
 0x0c4c, 0x8965, 0x8337, 0x061e, 0x9793, 0x12ba, 0x18e8, 0x9dc1,
 };
 
+/* Spectrum-4 chunks */
+#define MLXSW_SP4_BLOOM_KEY_LEN 60
+
+/* In Spectrum-4, there is no padding. Each chunk size is 20 bytes.
+ * 18 bytes of it contain 4 key blocks, each is 36 bits, and 2 bytes which hold
+ * eRP ID and region ID.
+ * The layout of each chunk is as follows:
+ *
+ * +----------------------+-----------------------------------+
+ * |        2 bytes       |              18 bytes             |
+ * +-----------+----------+-----------------------------------+
+ * |  157:148  | 147:144  |               143:0               |
+ * +---------+-----------+----------+-------------------------+
+ * | region ID |  eRP ID  |      4 Key blocks (18 Bytes)      |
+ * +-----------+----------+-----------------------------------+
+ */
+
+#define MLXSW_SP4_BLOOM_CHUNK_PAD_BYTES 0
+#define MLXSW_SP4_BLOOM_CHUNK_KEY_BYTES 18
+#define MLXSW_SP4_BLOOM_KEY_CHUNK_BYTES 20
+
+/* The offset of the key block within a chunk is 2 bytes as it comes after
+ * 16 bits of region ID and eRP ID.
+ */
+#define MLXSW_SP4_BLOOM_CHUNK_KEY_OFFSET 2
+
+/* For Spectrum-4, two hash functions are used, CRC-10 and CRC-6 based.
+ * The result is combination of the two calculations -
+ * 6 bit column are MSB (result of CRC-6),
+ * 10 bit row are LSB (result of CRC-10).
+ */
+
+/* This table is just the CRC of each possible byte which is used for
+ * Spectrum-4. It is computed, Msbit first, for the Bloom filter
+ * polynomial which is 0x1b (1 + x^1 + x^3 + x^4 and the implicit x^10).
+ */
+static const u16 mlxsw_sp4_acl_bf_crc10_tab[256] = {
+0x0000, 0x001b, 0x0036, 0x002d, 0x006c, 0x0077, 0x005a, 0x0041,
+0x00d8, 0x00c3, 0x00ee, 0x00f5, 0x00b4, 0x00af, 0x0082, 0x0099,
+0x01b0, 0x01ab, 0x0186, 0x019d, 0x01dc, 0x01c7, 0x01ea, 0x01f1,
+0x0168, 0x0173, 0x015e, 0x0145, 0x0104, 0x011f, 0x0132, 0x0129,
+0x0360, 0x037b, 0x0356, 0x034d, 0x030c, 0x0317, 0x033a, 0x0321,
+0x03b8, 0x03a3, 0x038e, 0x0395, 0x03d4, 0x03cf, 0x03e2, 0x03f9,
+0x02d0, 0x02cb, 0x02e6, 0x02fd, 0x02bc, 0x02a7, 0x028a, 0x0291,
+0x0208, 0x0213, 0x023e, 0x0225, 0x0264, 0x027f, 0x0252, 0x0249,
+0x02db, 0x02c0, 0x02ed, 0x02f6, 0x02b7, 0x02ac, 0x0281, 0x029a,
+0x0203, 0x0218, 0x0235, 0x022e, 0x026f, 0x0274, 0x0259, 0x0242,
+0x036b, 0x0370, 0x035d, 0x0346, 0x0307, 0x031c, 0x0331, 0x032a,
+0x03b3, 0x03a8, 0x0385, 0x039e, 0x03df, 0x03c4, 0x03e9, 0x03f2,
+0x01bb, 0x01a0, 0x018d, 0x0196, 0x01d7, 0x01cc, 0x01e1, 0x01fa,
+0x0163, 0x0178, 0x0155, 0x014e, 0x010f, 0x0114, 0x0139, 0x0122,
+0x000b, 0x0010, 0x003d, 0x0026, 0x0067, 0x007c, 0x0051, 0x004a,
+0x00d3, 0x00c8, 0x00e5, 0x00fe, 0x00bf, 0x00a4, 0x0089, 0x0092,
+0x01ad, 0x01b6, 0x019b, 0x0180, 0x01c1, 0x01da, 0x01f7, 0x01ec,
+0x0175, 0x016e, 0x0143, 0x0158, 0x0119, 0x0102, 0x012f, 0x0134,
+0x001d, 0x0006, 0x002b, 0x0030, 0x0071, 0x006a, 0x0047, 0x005c,
+0x00c5, 0x00de, 0x00f3, 0x00e8, 0x00a9, 0x00b2, 0x009f, 0x0084,
+0x02cd, 0x02d6, 0x02fb, 0x02e0, 0x02a1, 0x02ba, 0x0297, 0x028c,
+0x0215, 0x020e, 0x0223, 0x0238, 0x0279, 0x0262, 0x024f, 0x0254,
+0x037d, 0x0366, 0x034b, 0x0350, 0x0311, 0x030a, 0x0327, 0x033c,
+0x03a5, 0x03be, 0x0393, 0x0388, 0x03c9, 0x03d2, 0x03ff, 0x03e4,
+0x0376, 0x036d, 0x0340, 0x035b, 0x031a, 0x0301, 0x032c, 0x0337,
+0x03ae, 0x03b5, 0x0398, 0x0383, 0x03c2, 0x03d9, 0x03f4, 0x03ef,
+0x02c6, 0x02dd, 0x02f0, 0x02eb, 0x02aa, 0x02b1, 0x029c, 0x0287,
+0x021e, 0x0205, 0x0228, 0x0233, 0x0272, 0x0269, 0x0244, 0x025f,
+0x0016, 0x000d, 0x0020, 0x003b, 0x007a, 0x0061, 0x004c, 0x0057,
+0x00ce, 0x00d5, 0x00f8, 0x00e3, 0x00a2, 0x00b9, 0x0094, 0x008f,
+0x01a6, 0x01bd, 0x0190, 0x018b, 0x01ca, 0x01d1, 0x01fc, 0x01e7,
+0x017e, 0x0165, 0x0148, 0x0153, 0x0112, 0x0109, 0x0124, 0x013f,
+};
+
+/* This table is just the CRC of each possible byte which is used for
+ * Spectrum-4. It is computed, Msbit first, for the Bloom filter
+ * polynomial which is 0x2d (1 + x^2+ x^3 + x^5 and the implicit x^6).
+ */
+static const u8 mlxsw_sp4_acl_bf_crc6_tab[256] = {
+0x00, 0x2d, 0x37, 0x1a, 0x03, 0x2e, 0x34, 0x19,
+0x06, 0x2b, 0x31, 0x1c, 0x05, 0x28, 0x32, 0x1f,
+0x0c, 0x21, 0x3b, 0x16, 0x0f, 0x22, 0x38, 0x15,
+0x0a, 0x27, 0x3d, 0x10, 0x09, 0x24, 0x3e, 0x13,
+0x18, 0x35, 0x2f, 0x02, 0x1b, 0x36, 0x2c, 0x01,
+0x1e, 0x33, 0x29, 0x04, 0x1d, 0x30, 0x2a, 0x07,
+0x14, 0x39, 0x23, 0x0e, 0x17, 0x3a, 0x20, 0x0d,
+0x12, 0x3f, 0x25, 0x08, 0x11, 0x3c, 0x26, 0x0b,
+0x30, 0x1d, 0x07, 0x2a, 0x33, 0x1e, 0x04, 0x29,
+0x36, 0x1b, 0x01, 0x2c, 0x35, 0x18, 0x02, 0x2f,
+0x3c, 0x11, 0x0b, 0x26, 0x3f, 0x12, 0x08, 0x25,
+0x3a, 0x17, 0x0d, 0x20, 0x39, 0x14, 0x0e, 0x23,
+0x28, 0x05, 0x1f, 0x32, 0x2b, 0x06, 0x1c, 0x31,
+0x2e, 0x03, 0x19, 0x34, 0x2d, 0x00, 0x1a, 0x37,
+0x24, 0x09, 0x13, 0x3e, 0x27, 0x0a, 0x10, 0x3d,
+0x22, 0x0f, 0x15, 0x38, 0x21, 0x0c, 0x16, 0x3b,
+0x0d, 0x20, 0x3a, 0x17, 0x0e, 0x23, 0x39, 0x14,
+0x0b, 0x26, 0x3c, 0x11, 0x08, 0x25, 0x3f, 0x12,
+0x01, 0x2c, 0x36, 0x1b, 0x02, 0x2f, 0x35, 0x18,
+0x07, 0x2a, 0x30, 0x1d, 0x04, 0x29, 0x33, 0x1e,
+0x15, 0x38, 0x22, 0x0f, 0x16, 0x3b, 0x21, 0x0c,
+0x13, 0x3e, 0x24, 0x09, 0x10, 0x3d, 0x27, 0x0a,
+0x19, 0x34, 0x2e, 0x03, 0x1a, 0x37, 0x2d, 0x00,
+0x1f, 0x32, 0x28, 0x05, 0x1c, 0x31, 0x2b, 0x06,
+0x3d, 0x10, 0x0a, 0x27, 0x3e, 0x13, 0x09, 0x24,
+0x3b, 0x16, 0x0c, 0x21, 0x38, 0x15, 0x0f, 0x22,
+0x31, 0x1c, 0x06, 0x2b, 0x32, 0x1f, 0x05, 0x28,
+0x37, 0x1a, 0x00, 0x2d, 0x34, 0x19, 0x03, 0x2e,
+0x25, 0x08, 0x12, 0x3f, 0x26, 0x0b, 0x11, 0x3c,
+0x23, 0x0e, 0x14, 0x39, 0x20, 0x0d, 0x17, 0x3a,
+0x29, 0x04, 0x1e, 0x33, 0x2a, 0x07, 0x1d, 0x30,
+0x2f, 0x02, 0x18, 0x35, 0x2c, 0x01, 0x1b, 0x36,
+};
+
+/* Each chunk contains 4 key blocks. Chunk 2 uses key blocks 11-8,
+ * and we need to populate it with 4 key blocks copied from the entry encoded
+ * key. The original keys layout is same for Spectrum-{2,3,4}.
+ * Since the encoded key contains a 2 bytes padding, key block 11 starts at
+ * offset 2. block 7 that is used in chunk 1 starts at offset 20 as 4 key blocks
+ * take 18 bytes. See 'MLXSW_SP2_AFK_BLOCK_LAYOUT' for more details.
+ * This array defines key offsets for easy access when copying key blocks from
+ * entry key to Bloom filter chunk.
+ */
+static const u8 chunk_key_offsets[MLXSW_BLOOM_KEY_CHUNKS] = {2, 20, 38};
+
 static u16 mlxsw_sp2_acl_bf_crc16_byte(u16 crc, u8 c)
 {
 	return (crc << 8) ^ mlxsw_sp2_acl_bf_crc16_tab[(crc >> 8) ^ c];
@@ -168,6 +281,124 @@ mlxsw_sp2_acl_bf_index_get(struct mlxsw_sp_acl_bf *bf,
 	return mlxsw_sp2_acl_bf_crc(bf_key, bf_size);
 }
 
+static u16 mlxsw_sp4_acl_bf_crc10_byte(u16 crc, u8 c)
+{
+	u8 index = ((crc >> 2) ^ c) & 0xff;
+
+	return ((crc << 8) ^ mlxsw_sp4_acl_bf_crc10_tab[index]) & 0x3ff;
+}
+
+static u16 mlxsw_sp4_acl_bf_crc6_byte(u16 crc, u8 c)
+{
+	u8 index = (crc ^ c) & 0xff;
+
+	return ((crc << 6) ^ (mlxsw_sp4_acl_bf_crc6_tab[index] << 2)) & 0xfc;
+}
+
+static u16 mlxsw_sp4_acl_bf_crc(const u8 *buffer, size_t len)
+{
+	u16 crc_row = 0, crc_col = 0;
+
+	while (len--) {
+		crc_row = mlxsw_sp4_acl_bf_crc10_byte(crc_row, *buffer);
+		crc_col = mlxsw_sp4_acl_bf_crc6_byte(crc_col, *buffer);
+		buffer++;
+	}
+
+	crc_col >>= 2;
+
+	/* 6 bit column are MSB, 10 bit row are LSB */
+	return (crc_col << 10) | crc_row;
+}
+
+static void right_shift_array(char *arr, u8 len, u8 shift_bits)
+{
+	u8 byte_mask = 0xff >> shift_bits;
+	int i;
+
+	if (WARN_ON(!shift_bits || shift_bits >= 8))
+		return;
+
+	for (i = len - 1; i >= 0; i--) {
+		/* The first iteration looks like out-of-bounds access,
+		 * but actually references a buffer that the array is shifted
+		 * into. This move is legal as we never send the last chunk to
+		 * this function.
+		 */
+		arr[i + 1] &= byte_mask;
+		arr[i + 1] |= arr[i] << (8 - shift_bits);
+		arr[i] = arr[i] >> shift_bits;
+	}
+}
+
+static void mlxsw_sp4_bf_key_shift_chunks(u8 chunk_count, char *output)
+{
+	/* The chunks are suppoosed to be continuous, with no padding.
+	 * Since region ID and eRP ID use 14 bits, and not fully 2 bytes,
+	 * and in Spectrum-4 there is no padding, it is necessary to shift some
+	 * chunks 2 bits right.
+	 */
+	switch (chunk_count) {
+	case 2:
+		/* The chunks are copied as follow:
+		 * +-------------+-----------------+
+		 * | Chunk 0     |   Chunk 1       |
+		 * | IDs  | keys |(**) IDs  | keys |
+		 * +-------------+-----------------+
+		 * In (**), there are two unused bits, therefore, chunk 0 needs
+		 * to be shifted two bits right.
+		 */
+		right_shift_array(output, MLXSW_SP4_BLOOM_KEY_CHUNK_BYTES, 2);
+		break;
+	case 3:
+		/* The chunks are copied as follow:
+		 * +-------------+-----------------+-----------------+
+		 * | Chunk 0     |   Chunk 1       |   Chunk 2       |
+		 * | IDs  | keys |(**) IDs  | keys |(**) IDs  | keys |
+		 * +-------------+-----------------+-----------------+
+		 * In (**), there are two unused bits, therefore, chunk 1 needs
+		 * to be shifted two bits right and chunk 0 needs to be shifted
+		 * four bits right.
+		 */
+		right_shift_array(output + MLXSW_SP4_BLOOM_KEY_CHUNK_BYTES,
+				  MLXSW_SP4_BLOOM_KEY_CHUNK_BYTES, 2);
+		right_shift_array(output, MLXSW_SP4_BLOOM_KEY_CHUNK_BYTES, 4);
+		break;
+	default:
+		WARN_ON(chunk_count > MLXSW_BLOOM_KEY_CHUNKS);
+	}
+}
+
+static void
+mlxsw_sp4_acl_bf_key_encode(struct mlxsw_sp_acl_atcam_region *aregion,
+			    struct mlxsw_sp_acl_atcam_entry *aentry,
+			    char *output, u8 *len)
+{
+	struct mlxsw_afk_key_info *key_info = aregion->region->key_info;
+	u8 block_count = mlxsw_afk_key_info_blocks_count_get(key_info);
+	u8 chunk_count = 1 + ((block_count - 1) >> 2);
+
+	__mlxsw_sp_acl_bf_key_encode(aregion, aentry, output, len,
+				     MLXSW_BLOOM_KEY_CHUNKS,
+				     MLXSW_SP4_BLOOM_CHUNK_PAD_BYTES,
+				     MLXSW_SP4_BLOOM_CHUNK_KEY_OFFSET,
+				     MLXSW_SP4_BLOOM_CHUNK_KEY_BYTES,
+				     MLXSW_SP4_BLOOM_KEY_CHUNK_BYTES);
+	mlxsw_sp4_bf_key_shift_chunks(chunk_count, output);
+}
+
+static unsigned int
+mlxsw_sp4_acl_bf_index_get(struct mlxsw_sp_acl_bf *bf,
+			   struct mlxsw_sp_acl_atcam_region *aregion,
+			   struct mlxsw_sp_acl_atcam_entry *aentry)
+{
+	char bf_key[MLXSW_SP4_BLOOM_KEY_LEN] = {};
+	u8 bf_size;
+
+	mlxsw_sp4_acl_bf_key_encode(aregion, aentry, bf_key, &bf_size);
+	return mlxsw_sp4_acl_bf_crc(bf_key, bf_size);
+}
+
 static unsigned int
 mlxsw_sp_acl_bf_rule_count_index_get(struct mlxsw_sp_acl_bf *bf,
 				     unsigned int erp_bank,
@@ -285,3 +516,7 @@ void mlxsw_sp_acl_bf_fini(struct mlxsw_sp_acl_bf *bf)
 const struct mlxsw_sp_acl_bf_ops mlxsw_sp2_acl_bf_ops = {
 	.index_get = mlxsw_sp2_acl_bf_index_get,
 };
+
+const struct mlxsw_sp_acl_bf_ops mlxsw_sp4_acl_bf_ops = {
+	.index_get = mlxsw_sp4_acl_bf_index_get,
+};
-- 
2.33.1

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ