lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <676630bb-5000-49ba-ba1b-7a9a6e538e5e@amd.com>
Date: Tue, 2 Apr 2024 23:56:17 -0400
From: Yazen Ghannam <yazen.ghannam@....com>
To: John Allen <john.allen@....com>, bp@...en8.de,
 linux-edac@...r.kernel.org, tony.luck@...el.com
Cc: yazen.ghannam@....com, linux-kernel@...r.kernel.org,
 avadhut.naik@....com, muralidhara.mk@....com
Subject: Re: [PATCH v2 4/4] RAS/AMD/ATL: Implement DF 4.5 NP2 denormalization



On 3/27/24 14:52, John Allen wrote:
> Unlike with previous Data Fabric versions, with Data Fabric 4.5, there

Highlight that this is just for non-power-of-2 cases.

> are bits of the system physical address that can't be reconstructed from

"...can't be fully reconstructed..."

> the normalized address. Using NPS0_24CHAN_1K_HASH as an example, the
> normalized address consists of bits [63:13] (divided by 3), bits
> [11:10], and bits [7:0] of the system physical address.
> 
> In this case, the remainder from the divide by 3 and bits 8, 9, and 12

I think an example like this is best kept in the code comments. And the
commit message can have a more high-level description.

"Some bits will be indeterminate, i.e., they will have multiple possible
solutions."

> are missing. To determine the proper combination of missing system
> physical address bits, iterate through each possible combination of
> these bits, normalize the resulting system physical address, and compare
> to the original address that is being translated. If the addresses
> match, then the correct permutation of bits has been found.
> 
> Signed-off-by: John Allen <john.allen@....com
> ---
> v2:
>    - Move map validation to patch 3/4.
> ---
>   drivers/ras/amd/atl/denormalize.c | 530 ++++++++++++++++++++++++++++++
>   drivers/ras/amd/atl/internal.h    |  40 +++
>   2 files changed, 570 insertions(+)
> 
> diff --git a/drivers/ras/amd/atl/denormalize.c b/drivers/ras/amd/atl/denormalize.c
> index e279224288d6..b03bba851e14 100644
> --- a/drivers/ras/amd/atl/denormalize.c
> +++ b/drivers/ras/amd/atl/denormalize.c
> @@ -448,6 +448,105 @@ static u16 get_logical_coh_st_fabric_id(struct addr_ctx *ctx)
>   	return (phys_fabric_id & df_cfg.node_id_mask) | log_fabric_id;
>   }
>   
> +static u64 get_logical_coh_st_fabric_id_for_current_spa(struct addr_ctx *ctx,
> +							struct df4p5_denorm_ctx *denorm_ctx)
> +{
> +	bool hash_ctl_64k, hash_ctl_2M, hash_ctl_1G, hash_ctl_1T;
> +	bool hash_pa8, hash_pa9, hash_pa12, hash_pa13;
> +	u64 cs_id = 0;
> +
> +	hash_ctl_64k	= FIELD_GET(DF4_HASH_CTL_64K,  ctx->map.ctl);
> +	hash_ctl_2M	= FIELD_GET(DF4_HASH_CTL_2M,   ctx->map.ctl);
> +	hash_ctl_1G	= FIELD_GET(DF4_HASH_CTL_1G,   ctx->map.ctl);
> +	hash_ctl_1T	= FIELD_GET(DF4p5_HASH_CTL_1T, ctx->map.ctl);
> +
> +	hash_pa8  = FIELD_GET(BIT_ULL(8),  denorm_ctx->current_spa);
> +	hash_pa8 ^= FIELD_GET(BIT_ULL(14), denorm_ctx->current_spa);
> +	hash_pa8 ^= FIELD_GET(BIT_ULL(16), denorm_ctx->current_spa) & hash_ctl_64k;
> +	hash_pa8 ^= FIELD_GET(BIT_ULL(21), denorm_ctx->current_spa) & hash_ctl_2M;
> +	hash_pa8 ^= FIELD_GET(BIT_ULL(30), denorm_ctx->current_spa) & hash_ctl_1G;
> +	hash_pa8 ^= FIELD_GET(BIT_ULL(40), denorm_ctx->current_spa) & hash_ctl_1T;
> +
> +	hash_pa9  = FIELD_GET(BIT_ULL(9),  denorm_ctx->current_spa);
> +	hash_pa9 ^= FIELD_GET(BIT_ULL(17), denorm_ctx->current_spa) & hash_ctl_64k;
> +	hash_pa9 ^= FIELD_GET(BIT_ULL(22), denorm_ctx->current_spa) & hash_ctl_2M;
> +	hash_pa9 ^= FIELD_GET(BIT_ULL(31), denorm_ctx->current_spa) & hash_ctl_1G;
> +	hash_pa9 ^= FIELD_GET(BIT_ULL(41), denorm_ctx->current_spa) & hash_ctl_1T;
> +
> +	hash_pa12  = FIELD_GET(BIT_ULL(12), denorm_ctx->current_spa);
> +	hash_pa12 ^= FIELD_GET(BIT_ULL(18), denorm_ctx->current_spa) & hash_ctl_64k;
> +	hash_pa12 ^= FIELD_GET(BIT_ULL(23), denorm_ctx->current_spa) & hash_ctl_2M;
> +	hash_pa12 ^= FIELD_GET(BIT_ULL(32), denorm_ctx->current_spa) & hash_ctl_1G;
> +	hash_pa12 ^= FIELD_GET(BIT_ULL(42), denorm_ctx->current_spa) & hash_ctl_1T;
> +
> +	hash_pa13  = FIELD_GET(BIT_ULL(13), denorm_ctx->current_spa);
> +	hash_pa13 ^= FIELD_GET(BIT_ULL(19), denorm_ctx->current_spa) & hash_ctl_64k;
> +	hash_pa13 ^= FIELD_GET(BIT_ULL(24), denorm_ctx->current_spa) & hash_ctl_2M;
> +	hash_pa13 ^= FIELD_GET(BIT_ULL(33), denorm_ctx->current_spa) & hash_ctl_1G;
> +	hash_pa13 ^= FIELD_GET(BIT_ULL(43), denorm_ctx->current_spa) & hash_ctl_1T;
> +
> +	switch (ctx->map.intlv_mode) {
> +	case DF4p5_NPS0_24CHAN_1K_HASH:
> +		cs_id = FIELD_GET(GENMASK_ULL(63, 13), denorm_ctx->current_spa) << 3;
> +		cs_id %= denorm_ctx->mod_value;
> +		cs_id <<= 2;
> +		cs_id |= (hash_pa9 | (hash_pa12 << 1));
> +		cs_id |= hash_pa8 << df_cfg.socket_id_shift;
> +		break;
> +	case DF4p5_NPS0_24CHAN_2K_HASH:
> +		cs_id = FIELD_GET(GENMASK_ULL(63, 14), denorm_ctx->current_spa) << 4;
> +		cs_id %= denorm_ctx->mod_value;
> +		cs_id <<= 2;
> +		cs_id |= (hash_pa12 | (hash_pa13 << 1));
> +		cs_id |= hash_pa8 << df_cfg.socket_id_shift;
> +		break;
> +	case DF4p5_NPS1_12CHAN_1K_HASH:
> +		cs_id = FIELD_GET(GENMASK_ULL(63, 12), denorm_ctx->current_spa) << 2;
> +		cs_id %= denorm_ctx->mod_value;
> +		cs_id <<= 2;
> +		cs_id |= (hash_pa8 | (hash_pa9 << 1));
> +		break;
> +	case DF4p5_NPS1_12CHAN_2K_HASH:
> +		cs_id = FIELD_GET(GENMASK_ULL(63, 13), denorm_ctx->current_spa) << 3;
> +		cs_id %= denorm_ctx->mod_value;
> +		cs_id <<= 2;
> +		cs_id |= (hash_pa8 | (hash_pa12 << 1));
> +		break;
> +	case DF4p5_NPS2_6CHAN_1K_HASH:
> +	case DF4p5_NPS1_10CHAN_1K_HASH:
> +		cs_id = FIELD_GET(GENMASK_ULL(63, 12), denorm_ctx->current_spa) << 2;
> +		cs_id |= (FIELD_GET(BIT_ULL(9), denorm_ctx->current_spa) << 1);
> +		cs_id %= denorm_ctx->mod_value;
> +		cs_id <<= 1;
> +		cs_id |= hash_pa8;
> +		break;
> +	case DF4p5_NPS2_6CHAN_2K_HASH:
> +	case DF4p5_NPS1_10CHAN_2K_HASH:
> +		cs_id = FIELD_GET(GENMASK_ULL(63, 12), denorm_ctx->current_spa) << 2;
> +		cs_id %= denorm_ctx->mod_value;
> +		cs_id <<= 1;
> +		cs_id |= hash_pa8;
> +		break;
> +	case DF4p5_NPS4_3CHAN_1K_HASH:
> +	case DF4p5_NPS2_5CHAN_1K_HASH:
> +		cs_id = FIELD_GET(GENMASK_ULL(63, 12), denorm_ctx->current_spa) << 2;
> +		cs_id |= FIELD_GET(GENMASK_ULL(9, 8), denorm_ctx->current_spa);
> +		cs_id %= denorm_ctx->mod_value;
> +		break;
> +	case DF4p5_NPS4_3CHAN_2K_HASH:
> +	case DF4p5_NPS2_5CHAN_2K_HASH:
> +		cs_id = FIELD_GET(GENMASK_ULL(63, 12), denorm_ctx->current_spa) << 2;
> +		cs_id |= FIELD_GET(BIT_ULL(8), denorm_ctx->current_spa) << 1;
> +		cs_id %= denorm_ctx->mod_value;
> +		break;
> +	default:
> +		atl_debug_on_bad_intlv_mode(ctx);
> +		return 0;
> +	}
> +

Using u64 type for cs_id makes sense to handle all the bit operations
above. But the final value should fit within a u16.

So I think this function should return a u16. And there should be a
final check here to make sure that temporary u64 value fits within a
u16. Otherwise, the math above could have a bug.

> +	return cs_id;
> +}
> +
>   static int denorm_addr_common(struct addr_ctx *ctx)
>   {
>   	u64 denorm_addr;
> @@ -699,6 +798,424 @@ static int denorm_addr_df4_np2(struct addr_ctx *ctx)
>   	return 0;
>   }
>   
> +static u64 normalize_addr_df4p5_np2(struct addr_ctx *ctx, struct df4p5_denorm_ctx *denorm_ctx,
> +				    u64 addr)
> +{
> +	u64 temp_addr_a, temp_addr_b;
> +
> +	temp_addr_a = 0;
> +	temp_addr_b = 0;

Initialize these when defining.

> +
> +	switch (ctx->map.intlv_mode) {
> +	case DF4p5_NPS0_24CHAN_1K_HASH:
> +	case DF4p5_NPS1_12CHAN_1K_HASH:
> +	case DF4p5_NPS2_6CHAN_1K_HASH:
> +	case DF4p5_NPS4_3CHAN_1K_HASH:
> +	case DF4p5_NPS1_10CHAN_1K_HASH:
> +	case DF4p5_NPS2_5CHAN_1K_HASH:
> +		temp_addr_a = FIELD_GET(GENMASK_ULL(11, 10), addr) << 8;
> +		break;
> +	case DF4p5_NPS0_24CHAN_2K_HASH:
> +	case DF4p5_NPS1_12CHAN_2K_HASH:
> +	case DF4p5_NPS2_6CHAN_2K_HASH:
> +	case DF4p5_NPS4_3CHAN_2K_HASH:
> +	case DF4p5_NPS1_10CHAN_2K_HASH:
> +	case DF4p5_NPS2_5CHAN_2K_HASH:
> +		temp_addr_a = FIELD_GET(GENMASK_ULL(11, 9), addr) << 8;
> +		break;
> +	default:
> +		atl_debug_on_bad_intlv_mode(ctx);
> +		return 0;
> +	}
> +
> +	switch (ctx->map.intlv_mode) {
> +	case DF4p5_NPS0_24CHAN_1K_HASH:
> +		temp_addr_b = FIELD_GET(GENMASK_ULL(63, 13), addr) / denorm_ctx->mod_value;
> +		temp_addr_b <<= 10;
> +		break;
> +	case DF4p5_NPS0_24CHAN_2K_HASH:
> +		temp_addr_b = FIELD_GET(GENMASK_ULL(63, 14), addr) / denorm_ctx->mod_value;
> +		temp_addr_b <<= 11;
> +		break;
> +	case DF4p5_NPS1_12CHAN_1K_HASH:
> +		temp_addr_b = FIELD_GET(GENMASK_ULL(63, 12), addr) / denorm_ctx->mod_value;
> +		temp_addr_b <<= 10;
> +		break;
> +	case DF4p5_NPS1_12CHAN_2K_HASH:
> +		temp_addr_b = FIELD_GET(GENMASK_ULL(63, 13), addr) / denorm_ctx->mod_value;
> +		temp_addr_b <<= 11;
> +		break;
> +	case DF4p5_NPS2_6CHAN_1K_HASH:
> +	case DF4p5_NPS1_10CHAN_1K_HASH:
> +		temp_addr_b = FIELD_GET(GENMASK_ULL(63, 12), addr) << 1;
> +		temp_addr_b |= FIELD_GET(BIT_ULL(9), addr);
> +		temp_addr_b /= denorm_ctx->mod_value;
> +		temp_addr_b <<= 10;
> +		break;
> +	case DF4p5_NPS2_6CHAN_2K_HASH:
> +	case DF4p5_NPS1_10CHAN_2K_HASH:
> +		temp_addr_b = FIELD_GET(GENMASK_ULL(63, 12), addr) / denorm_ctx->mod_value;
> +		temp_addr_b <<= 11;
> +		break;
> +	case DF4p5_NPS4_3CHAN_1K_HASH:
> +	case DF4p5_NPS2_5CHAN_1K_HASH:
> +		temp_addr_b = FIELD_GET(GENMASK_ULL(63, 12), addr) << 2;
> +		temp_addr_b |= FIELD_GET(GENMASK_ULL(9, 8), addr);
> +		temp_addr_b /= denorm_ctx->mod_value;
> +		temp_addr_b <<= 10;
> +		break;
> +	case DF4p5_NPS4_3CHAN_2K_HASH:
> +	case DF4p5_NPS2_5CHAN_2K_HASH:
> +		temp_addr_b = FIELD_GET(GENMASK_ULL(63, 12), addr) << 1;
> +		temp_addr_b |= FIELD_GET(BIT_ULL(8), addr);
> +		temp_addr_b /= denorm_ctx->mod_value;
> +		temp_addr_b <<= 11;
> +		break;
> +	default:
> +		atl_debug_on_bad_intlv_mode(ctx);
> +		return 0;
> +	}
> +
> +	return denorm_ctx->base_denorm_addr | temp_addr_a | temp_addr_b;
> +}
> +
> +static void recalculate_hashed_bits_df4p5_np2(struct addr_ctx *ctx,
> +					      struct df4p5_denorm_ctx *denorm_ctx)
> +{
> +	bool hash_ctl_64k, hash_ctl_2M, hash_ctl_1G, hash_ctl_1T, hashed_bit;
> +
> +	if (!denorm_ctx->rehash_vector)
> +		return;
> +
> +	hash_ctl_64k	= FIELD_GET(DF4_HASH_CTL_64K,  ctx->map.ctl);
> +	hash_ctl_2M	= FIELD_GET(DF4_HASH_CTL_2M,   ctx->map.ctl);
> +	hash_ctl_1G	= FIELD_GET(DF4_HASH_CTL_1G,   ctx->map.ctl);
> +	hash_ctl_1T	= FIELD_GET(DF4p5_HASH_CTL_1T, ctx->map.ctl);
> +
> +	if (denorm_ctx->rehash_vector & BIT_ULL(8)) {
> +		hashed_bit  = FIELD_GET(BIT_ULL(8),  denorm_ctx->current_spa);
> +		hashed_bit ^= FIELD_GET(BIT_ULL(14), denorm_ctx->current_spa);
> +		hashed_bit ^= FIELD_GET(BIT_ULL(16), denorm_ctx->current_spa) & hash_ctl_64k;
> +		hashed_bit ^= FIELD_GET(BIT_ULL(21), denorm_ctx->current_spa) & hash_ctl_2M;
> +		hashed_bit ^= FIELD_GET(BIT_ULL(30), denorm_ctx->current_spa) & hash_ctl_1G;
> +		hashed_bit ^= FIELD_GET(BIT_ULL(40), denorm_ctx->current_spa) & hash_ctl_1T;
> +
> +		if (FIELD_GET(BIT_ULL(8), denorm_ctx->current_spa) != hashed_bit)
> +			denorm_ctx->current_spa ^= BIT_ULL(8);
> +	}
> +
> +	if (denorm_ctx->rehash_vector & BIT_ULL(9)) {
> +		hashed_bit  = FIELD_GET(BIT_ULL(9),  denorm_ctx->current_spa);
> +		hashed_bit ^= FIELD_GET(BIT_ULL(17), denorm_ctx->current_spa) & hash_ctl_64k;
> +		hashed_bit ^= FIELD_GET(BIT_ULL(22), denorm_ctx->current_spa) & hash_ctl_2M;
> +		hashed_bit ^= FIELD_GET(BIT_ULL(31), denorm_ctx->current_spa) & hash_ctl_1G;
> +		hashed_bit ^= FIELD_GET(BIT_ULL(41), denorm_ctx->current_spa) & hash_ctl_1T;
> +
> +		if (FIELD_GET(BIT_ULL(9), denorm_ctx->current_spa) != hashed_bit)
> +			denorm_ctx->current_spa ^= BIT_ULL(9);
> +	}
> +
> +	if (denorm_ctx->rehash_vector & BIT_ULL(12)) {
> +		hashed_bit  = FIELD_GET(BIT_ULL(12), denorm_ctx->current_spa);
> +		hashed_bit ^= FIELD_GET(BIT_ULL(18), denorm_ctx->current_spa) & hash_ctl_64k;
> +		hashed_bit ^= FIELD_GET(BIT_ULL(23), denorm_ctx->current_spa) & hash_ctl_2M;
> +		hashed_bit ^= FIELD_GET(BIT_ULL(32), denorm_ctx->current_spa) & hash_ctl_1G;
> +		hashed_bit ^= FIELD_GET(BIT_ULL(42), denorm_ctx->current_spa) & hash_ctl_1T;
> +
> +		if (FIELD_GET(BIT_ULL(12), denorm_ctx->current_spa) != hashed_bit)
> +			denorm_ctx->current_spa ^= BIT_ULL(12);
> +	}
> +
> +	if (denorm_ctx->rehash_vector & BIT_ULL(13)) {
> +		hashed_bit  = FIELD_GET(BIT_ULL(13), denorm_ctx->current_spa);
> +		hashed_bit ^= FIELD_GET(BIT_ULL(19), denorm_ctx->current_spa) & hash_ctl_64k;
> +		hashed_bit ^= FIELD_GET(BIT_ULL(24), denorm_ctx->current_spa) & hash_ctl_2M;
> +		hashed_bit ^= FIELD_GET(BIT_ULL(33), denorm_ctx->current_spa) & hash_ctl_1G;
> +		hashed_bit ^= FIELD_GET(BIT_ULL(43), denorm_ctx->current_spa) & hash_ctl_1T;
> +
> +		if (FIELD_GET(BIT_ULL(13), denorm_ctx->current_spa) != hashed_bit)
> +			denorm_ctx->current_spa ^= BIT_ULL(13);
> +	}
> +}
> +
> +static bool check_logical_coh_st_fabric_id(struct addr_ctx *ctx,

check->match

..to highlight that the check condition is a match.

> +					   struct df4p5_denorm_ctx *denorm_ctx)
> +{
> +	unsigned int logical_coh_st_fabric_id;

Local variables don't need such descriptive names. Just use "id".

Also, denorm_ctx->coh_st_fabric_id is a u16. So the types should match
unless there's a reason to use something different.

> +
> +	/*
> +	 * The logical CS fabric ID of the permutation must be calculated from the
> +	 * current SPA with the base and with the MMIO hole.
> +	 */
> +	logical_coh_st_fabric_id = get_logical_coh_st_fabric_id_for_current_spa(ctx, denorm_ctx);
> +
> +	atl_debug(ctx, "Checking calculated logical coherent station fabric id:\n");
> +	atl_debug(ctx, "  calculated fabric id         = 0x%x\n", logical_coh_st_fabric_id);
> +	atl_debug(ctx, "  expected fabric id           = 0x%x\n", denorm_ctx->coh_st_fabric_id);
> +
> +	if (denorm_ctx->coh_st_fabric_id != logical_coh_st_fabric_id)
> +		return false;
> +
> +	return true;

	return denorm_ctx->coh_st_fabric_id == id;

> +}
> +
> +static bool check_norm_addr(struct addr_ctx *ctx, struct df4p5_denorm_ctx *denorm_ctx)

check->match

> +{
> +	u64 current_spa_without_base = remove_base_and_hole(ctx, denorm_ctx->current_spa);
> +	u64 norm_addr;

Just need one variable and reuse it.

	u64 addr = remove_base_and_hole();

> +
> +	/*
> +	 * The normalized address must be calculated with the current SPA without
> +	 * the base and without the MMIO hole.
> +	 */
> +	norm_addr = normalize_addr_df4p5_np2(ctx, denorm_ctx, current_spa_without_base);

	addr = normalize...(..., addr);

> +
> +	atl_debug(ctx, "Checking calculated normalized address:\n");
> +	atl_debug(ctx, "  calculated normalized addr = 0x%016llx\n", norm_addr);
> +	atl_debug(ctx, "  expected normalized addr   = 0x%016llx\n", ctx->ret_addr);
> +
> +	if (norm_addr != ctx->ret_addr)
> +		return false;
> +
> +	return true;

	return ctx->ret_addr == addr;

> +}
> +
> +static int check_permutations(struct addr_ctx *ctx, struct df4p5_denorm_ctx *denorm_ctx)
> +{
> +	u64 test_perm, temp_addr, denorm_addr, num_perms;
> +	unsigned int dropped_remainder;
> +
> +	denorm_ctx->div_addr *= denorm_ctx->mod_value;
> +
> +	/*
> +	 * The high order bits of num_permutations represent the permutations
> +	 * of the dropped remainder. This will be either 0-3 or 0-5 depending
> +	 * on the interleave mode. The low order bits represent the
> +	 * permutations of other "lost" bits which will be any combination of
> +	 * 1, 2, or 3 bits depending on the interleave mode.
> +	 */
> +	num_perms = denorm_ctx->mod_value << denorm_ctx->perm_shift;
> +
> +	for (test_perm = 0; test_perm < num_perms; test_perm++) {
> +		denorm_addr = denorm_ctx->base_denorm_addr;
> +		dropped_remainder = test_perm >> denorm_ctx->perm_shift;
> +		temp_addr = denorm_ctx->div_addr + dropped_remainder;
> +
> +		switch (ctx->map.intlv_mode) {
> +		case DF4p5_NPS0_24CHAN_2K_HASH:
> +			denorm_addr |= temp_addr << 14;
> +			break;
> +		case DF4p5_NPS0_24CHAN_1K_HASH:
> +		case DF4p5_NPS1_12CHAN_2K_HASH:
> +			denorm_addr |= temp_addr << 13;
> +			break;
> +		case DF4p5_NPS1_12CHAN_1K_HASH:
> +		case DF4p5_NPS2_6CHAN_2K_HASH:
> +		case DF4p5_NPS1_10CHAN_2K_HASH:
> +			denorm_addr |= temp_addr << 12;
> +			break;
> +		case DF4p5_NPS2_6CHAN_1K_HASH:
> +		case DF4p5_NPS1_10CHAN_1K_HASH:
> +			denorm_addr |= FIELD_GET(BIT_ULL(0), temp_addr) << 9;
> +			denorm_addr |= FIELD_GET(GENMASK_ULL(63, 1), temp_addr) << 12;
> +			break;
> +		case DF4p5_NPS4_3CHAN_1K_HASH:
> +		case DF4p5_NPS2_5CHAN_1K_HASH:
> +			denorm_addr |= FIELD_GET(GENMASK_ULL(1, 0), temp_addr) << 8;
> +			denorm_addr |= FIELD_GET(GENMASK_ULL(63, 2), (temp_addr)) << 12;
> +			break;
> +		case DF4p5_NPS4_3CHAN_2K_HASH:
> +		case DF4p5_NPS2_5CHAN_2K_HASH:
> +			denorm_addr |= FIELD_GET(BIT_ULL(0), temp_addr) << 8;
> +			denorm_addr |= FIELD_GET(GENMASK_ULL(63, 1), temp_addr) << 12;
> +			break;
> +		default:
> +			atl_debug_on_bad_intlv_mode(ctx);
> +			return -EINVAL;
> +		}
> +
> +		switch (ctx->map.intlv_mode) {
> +		case DF4p5_NPS0_24CHAN_1K_HASH:
> +			denorm_addr |= FIELD_GET(BIT_ULL(0), test_perm) << 8;
> +			denorm_addr |= FIELD_GET(BIT_ULL(1), test_perm) << 9;
> +			denorm_addr |= FIELD_GET(BIT_ULL(2), test_perm) << 12;
> +			break;
> +		case DF4p5_NPS0_24CHAN_2K_HASH:
> +			denorm_addr |= FIELD_GET(BIT_ULL(0), test_perm) << 8;
> +			denorm_addr |= FIELD_GET(BIT_ULL(1), test_perm) << 12;
> +			denorm_addr |= FIELD_GET(BIT_ULL(2), test_perm) << 13;
> +			break;
> +		case DF4p5_NPS1_12CHAN_2K_HASH:
> +			denorm_addr |= FIELD_GET(BIT_ULL(0), test_perm) << 8;
> +			denorm_addr |= FIELD_GET(BIT_ULL(1), test_perm) << 12;
> +			break;
> +		case DF4p5_NPS1_12CHAN_1K_HASH:
> +		case DF4p5_NPS4_3CHAN_1K_HASH:
> +		case DF4p5_NPS2_5CHAN_1K_HASH:
> +			denorm_addr |= FIELD_GET(BIT_ULL(0), test_perm) << 8;
> +			denorm_addr |= FIELD_GET(BIT_ULL(1), test_perm) << 9;
> +			break;
> +		case DF4p5_NPS2_6CHAN_1K_HASH:
> +		case DF4p5_NPS2_6CHAN_2K_HASH:
> +		case DF4p5_NPS4_3CHAN_2K_HASH:
> +		case DF4p5_NPS1_10CHAN_1K_HASH:
> +		case DF4p5_NPS1_10CHAN_2K_HASH:
> +		case DF4p5_NPS2_5CHAN_2K_HASH:
> +			denorm_addr |= FIELD_GET(BIT_ULL(0), test_perm) << 8;
> +			break;
> +		default:
> +			atl_debug_on_bad_intlv_mode(ctx);
> +			return -EINVAL;
> +		}
> +
> +		denorm_ctx->current_spa = add_base_and_hole(ctx, denorm_addr);
> +		recalculate_hashed_bits_df4p5_np2(ctx, denorm_ctx);
> +
> +		atl_debug(ctx, "Checking potential system physical address 0x%016llx\n",
> +			  denorm_ctx->current_spa);
> +
> +		if (!check_logical_coh_st_fabric_id(ctx, denorm_ctx))
> +			continue;
> +
> +		if (!check_norm_addr(ctx, denorm_ctx))
> +			continue;
> +
> +		if (denorm_ctx->resolved_spa == INVALID_SPA ||
> +		    denorm_ctx->current_spa > denorm_ctx->resolved_spa)
> +			denorm_ctx->resolved_spa = denorm_ctx->current_spa;
> +	}
> +
> +	if (denorm_ctx->resolved_spa == INVALID_SPA) {
> +		atl_debug(ctx, "Failed to find valid SPA for normalized address 0x%016llx\n",
> +			  ctx->ret_addr);
> +		return -EINVAL;
> +	}
> +
> +	/* Return the resolved SPA without the base, without the MMIO hole */
> +	ctx->ret_addr = remove_base_and_hole(ctx, denorm_ctx->resolved_spa);
> +
> +	return 0;
> +}
> +
> +static int init_df4p5_denorm_ctx(struct addr_ctx *ctx, struct df4p5_denorm_ctx *denorm_ctx)
> +{
> +	denorm_ctx->current_spa = INVALID_SPA;
> +	denorm_ctx->resolved_spa = INVALID_SPA;
> +
> +	switch (ctx->map.intlv_mode) {
> +	case DF4p5_NPS0_24CHAN_1K_HASH:
> +		denorm_ctx->perm_shift    = 3;
> +		denorm_ctx->rehash_vector = BIT(8) | BIT(9) | BIT(12);
> +		break;
> +	case DF4p5_NPS0_24CHAN_2K_HASH:
> +		denorm_ctx->perm_shift    = 3;
> +		denorm_ctx->rehash_vector = BIT(8) | BIT(12) | BIT(13);
> +		break;
> +	case DF4p5_NPS1_12CHAN_1K_HASH:
> +		denorm_ctx->perm_shift    = 2;
> +		denorm_ctx->rehash_vector = BIT(8);
> +		break;
> +	case DF4p5_NPS1_12CHAN_2K_HASH:
> +		denorm_ctx->perm_shift    = 2;
> +		denorm_ctx->rehash_vector = BIT(8) | BIT(12);
> +		break;
> +	case DF4p5_NPS2_6CHAN_1K_HASH:
> +	case DF4p5_NPS2_6CHAN_2K_HASH:
> +	case DF4p5_NPS1_10CHAN_1K_HASH:
> +	case DF4p5_NPS1_10CHAN_2K_HASH:
> +		denorm_ctx->perm_shift    = 1;
> +		denorm_ctx->rehash_vector = BIT(8);
> +		break;
> +	case DF4p5_NPS4_3CHAN_1K_HASH:
> +	case DF4p5_NPS2_5CHAN_1K_HASH:
> +		denorm_ctx->perm_shift    = 2;
> +		denorm_ctx->rehash_vector = 0;
> +		break;
> +	case DF4p5_NPS4_3CHAN_2K_HASH:
> +	case DF4p5_NPS2_5CHAN_2K_HASH:
> +		denorm_ctx->perm_shift    = 1;
> +		denorm_ctx->rehash_vector = 0;
> +		break;
> +	default:
> +		atl_debug_on_bad_intlv_mode(ctx);
> +		return -EINVAL;
> +	}
> +
> +	denorm_ctx->base_denorm_addr = FIELD_GET(GENMASK_ULL(7, 0), ctx->ret_addr);
> +
> +	switch (ctx->map.intlv_mode) {
> +	case DF4p5_NPS0_24CHAN_1K_HASH:
> +	case DF4p5_NPS1_12CHAN_1K_HASH:
> +	case DF4p5_NPS2_6CHAN_1K_HASH:
> +	case DF4p5_NPS4_3CHAN_1K_HASH:
> +	case DF4p5_NPS1_10CHAN_1K_HASH:
> +	case DF4p5_NPS2_5CHAN_1K_HASH:
> +		denorm_ctx->base_denorm_addr |= FIELD_GET(GENMASK_ULL(9, 8), ctx->ret_addr) << 10;
> +		denorm_ctx->div_addr          = FIELD_GET(GENMASK_ULL(63, 10), ctx->ret_addr);
> +		break;
> +	case DF4p5_NPS0_24CHAN_2K_HASH:
> +	case DF4p5_NPS1_12CHAN_2K_HASH:
> +	case DF4p5_NPS2_6CHAN_2K_HASH:
> +	case DF4p5_NPS4_3CHAN_2K_HASH:
> +	case DF4p5_NPS1_10CHAN_2K_HASH:
> +	case DF4p5_NPS2_5CHAN_2K_HASH:
> +		denorm_ctx->base_denorm_addr |= FIELD_GET(GENMASK_ULL(10, 8), ctx->ret_addr) << 9;
> +		denorm_ctx->div_addr          = FIELD_GET(GENMASK_ULL(63, 11), ctx->ret_addr);
> +		break;
> +	default:
> +		atl_debug_on_bad_intlv_mode(ctx);
> +		return -EINVAL;
> +	}
> +
> +	if (ctx->map.num_intlv_chan % 3 == 0)
> +		denorm_ctx->mod_value = 3;
> +	else
> +		denorm_ctx->mod_value = 5;
> +
> +	denorm_ctx->coh_st_fabric_id = get_logical_coh_st_fabric_id(ctx) - get_dst_fabric_id(ctx);
> +
> +	atl_debug(ctx, "Initialized df4p5_denorm_ctx:");
> +	atl_debug(ctx, "  mod_value         = %d", denorm_ctx->mod_value);
> +	atl_debug(ctx, "  perm_shift        = %d", denorm_ctx->perm_shift);
> +	atl_debug(ctx, "  rehash_vector     = 0x%x", denorm_ctx->rehash_vector);
> +	atl_debug(ctx, "  base_denorm_addr  = 0x%016llx", denorm_ctx->base_denorm_addr);
> +	atl_debug(ctx, "  div_addr          = 0x%016llx", denorm_ctx->div_addr);
> +	atl_debug(ctx, "  coh_st_fabric_id  = 0x%x", denorm_ctx->coh_st_fabric_id);
> +
> +	return 0;
> +}
> +
> +/*
> + * For DF 4.5, parts of the physical address can be directly pulled from the
> + * normalized address. The exact bits will differ between interleave modes, but
> + * using NPS0_24CHAN_1K_HASH as an example, the normalized address consists of
> + * bits [63:13] (divided by 3), bits [11:10], and bits [7:0] of the system
> + * physical address.
> + *
> + * In this case, there is no way to reconstruct the missing bits (bits 8, 9,
> + * and 12) from the normalized address. Additionally, when bits [63:13] are
> + * divided by 3, the remainder is dropped. Determine the proper combination of
> + * "lost" bits and dropped remainder by iterating through each possible
> + * permutation of these bits and then normalizing the generated system physical
> + * addresses. If the normalized address matches the address we are trying to
> + * translate, then we have found the correct permutation of bits.
> + */
> +static int denorm_addr_df4p5_np2(struct addr_ctx *ctx)
> +{
> +	struct df4p5_denorm_ctx denorm_ctx;
> +	int ret = 0;
> +
> +	memset(&denorm_ctx, 0, sizeof(denorm_ctx));
> +
> +	atl_debug(ctx, "Denormalizing DF 4.5 normalized address 0x%016llx", ctx->ret_addr);
> +
> +	ret = init_df4p5_denorm_ctx(ctx, &denorm_ctx);
> +	if (ret)
> +		return ret;
> +
> +	return check_permutations(ctx, &denorm_ctx);
> +}
> +
>   int denormalize_address(struct addr_ctx *ctx)
>   {
>   	switch (ctx->map.intlv_mode) {
> @@ -710,6 +1227,19 @@ int denormalize_address(struct addr_ctx *ctx)
>   	case DF4_NPS2_5CHAN_HASH:
>   	case DF4_NPS1_10CHAN_HASH:
>   		return denorm_addr_df4_np2(ctx);
> +	case DF4p5_NPS0_24CHAN_1K_HASH:
> +	case DF4p5_NPS4_3CHAN_1K_HASH:
> +	case DF4p5_NPS2_6CHAN_1K_HASH:
> +	case DF4p5_NPS1_12CHAN_1K_HASH:
> +	case DF4p5_NPS2_5CHAN_1K_HASH:
> +	case DF4p5_NPS1_10CHAN_1K_HASH:
> +	case DF4p5_NPS4_3CHAN_2K_HASH:
> +	case DF4p5_NPS2_6CHAN_2K_HASH:
> +	case DF4p5_NPS1_12CHAN_2K_HASH:
> +	case DF4p5_NPS0_24CHAN_2K_HASH:
> +	case DF4p5_NPS2_5CHAN_2K_HASH:
> +	case DF4p5_NPS1_10CHAN_2K_HASH:
> +		return denorm_addr_df4p5_np2(ctx);
>   	case DF3_6CHAN:
>   		return denorm_addr_df3_6chan(ctx);
>   	default:
> diff --git a/drivers/ras/amd/atl/internal.h b/drivers/ras/amd/atl/internal.h
> index 05b870fcb24e..946e36c053c5 100644
> --- a/drivers/ras/amd/atl/internal.h
> +++ b/drivers/ras/amd/atl/internal.h
> @@ -34,6 +34,8 @@
>   #define DF_DRAM_BASE_LIMIT_LSB		28
>   #define MI300_DRAM_LIMIT_LSB		20
>   
> +#define INVALID_SPA ~0ULL
> +
>   enum df_revisions {
>   	UNKNOWN,
>   	DF2,
> @@ -90,6 +92,44 @@ enum intlv_modes {
>   	DF4p5_NPS1_10CHAN_2K_HASH	= 0x49,
>   };
>   
> +struct df4p5_denorm_ctx {
> +	/* perm_shift: Indicates the number of "lost" bits. This will be 1, 2, or 3. */
> +	u8 perm_shift;
> +
> +	/* rehash_vector: A mask indicating the bits that need to be rehashed. */
> +	u16 rehash_vector;
> +
> +	/*
> +	 * mod_value: Represents the value that the high bits of the normalized
> +	 * address are divided by during normalization. This value will be 3
> +	 * for interleave modes with a number of channels divisible by 3 or the
> +	 * value will be 5 for interleave modes with a number of channels
> +	 * divisible by 5. Power-of-two interleave modes are handled
> +	 * separately.
> +	 */
> +	u8 mod_value;
> +
> +	/*
> +	 * base_denorm_addr: Represents the bits that can be directly pulled
> +	 * from the normalized address. In each case, pass through bits [7:0]
> +	 * of the normalized address. The other bits depend on the interleave
> +	 * bit position which will be bit 10 for 1K interleave stripe cases and
> +	 * bit 11 for 2K interleave stripe cases.
> +	 */
> +	u64 base_denorm_addr;
> +
> +	/*
> +	 * div_addr: Represents the high bits of the physical address that have
> +	 * been divided by the mod_value.
> +	 */
> +	u64 div_addr;
> +
> +	u64 current_spa;
> +	u64 resolved_spa;
> +
> +	u16 coh_st_fabric_id;
> +};
> +
>   struct df_flags {
>   	__u8	legacy_ficaa		: 1,
>   		socket_id_shift_quirk	: 1,

Thanks,
Yazen

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ