lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite for Android: free password hash cracker in your pocket
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20150914202123.GA27964@google.com>
Date:	Mon, 14 Sep 2015 15:21:23 -0500
From:	Bjorn Helgaas <bhelgaas@...gle.com>
To:	Yinghai Lu <yinghai@...nel.org>
Cc:	David Miller <davem@...emloft.net>,
	Benjamin Herrenschmidt <benh@...nel.crashing.org>,
	Wei Yang <weiyang@...ux.vnet.ibm.com>, TJ <linux@....tj>,
	Yijing Wang <wangyijing@...wei.com>,
	Andrew Morton <akpm@...ux-foundation.org>,
	linux-pci@...r.kernel.org, linux-kernel@...r.kernel.org
Subject: Re: [PATCH v4 04/52] PCI: Optimize bus min_align/size calculation
 during sizing

I spent a couple hours trying to understand the v3 version of this
patch, and I gave you some detailed examples and questions.  If you
want me to look at v4, I expect you to try to clarify the changelog to
help answer those questions.

The goal is that someone with reasonable familiarity with Linux and
PCI would be able to read the changelogs and understand what happened,
without having to search out all the email discussions.

On Thu, Aug 20, 2015 at 11:20:19PM -0700, Yinghai Lu wrote:
> Current code try to get align as small as possible and use that to
> align final size. But it does not handle resource that size is bigger
> than align in optimal way, kernel only use max align for them.
> 
> For example:
>  when we have resources with align/size: 1M/2M, 512M/512M,
>    bus resource min_align/size0 will be 512M/1024M,
>    but optimal value should be 256M/768M.
> 
> For following cases that we have resource size that is bigger
> than resource alignment:
> 1. SRIOV bar.
> 2. PCI bridges with children that need several MMIOs that are more than 1M.
> 
> We can keep on trying to allocate children devices resources under range
> [offset, offset + aligned_size) and offset is aligned with half_align.
> If sucesses, we can use that half_align as new min_align.
> 
> After this patch, we get:
>  align/size: 1M/2M, 2M/4M, 4M/8M, 8M/16M
>  new min_align/min_size: 4M/32M, and old is 8M/32M
> 
>  align/size: 1M/2M, 2M/4M, 4M/8M
>  new min_align/min_size: 2M/14M, and old is 4M/16M
> 
>  align/size: 1M/2M, 512M/512M
>  new min_align/min_size: 256M/768M, and old is 512M/1024M
> 
> The real result from one system with one pcie card that has
> four functions that support sriov:
>  children resources with align/size:
>    00800000/00800000
>    00800000/00800000
>    00800000/00800000
>    00800000/00800000
>    00010000/00200000
>    00010000/00200000
>    00010000/00200000
>    00010000/00200000
>    00008000/00008000
>    00008000/00008000
>    00008000/00008000
>    00008000/00008000
>    00004000/00080000
>    00004000/00080000
>    00004000/00080000
>    00004000/00080000
> the for the bridge
> with orig code we have min_align/min_size: 00400000/02c00000
> and with this patch we have min_align/min_size: 00100000/02b00000
> So align will be 1M instead of 4M and even have smaller size.
> 
> Link: https://bugzilla.kernel.org/show_bug.cgi?id=81431
> Reported-by: TJ <linux@....tj>
> Signed-off-by: Yinghai Lu <yinghai@...nel.org>
> ---
>  drivers/pci/setup-bus.c | 195 ++++++++++++++++++++++++++++++++++++++----------
>  1 file changed, 157 insertions(+), 38 deletions(-)
> 
> diff --git a/drivers/pci/setup-bus.c b/drivers/pci/setup-bus.c
> index aea35b6..861fe68 100644
> --- a/drivers/pci/setup-bus.c
> +++ b/drivers/pci/setup-bus.c
> @@ -30,6 +30,34 @@
>  
>  unsigned int pci_flags;
>  
> +static inline bool is_before(resource_size_t align1, resource_size_t size1,
> +			     resource_size_t align2, resource_size_t size2)
> +{
> +	resource_size_t size1_left, size2_left;
> +
> +	/* big align is before small align */
> +	if (align1 > align2)
> +		return true;
> +
> +	/*
> +	 * for same align:
> +	 *   aligned is before not aligned
> +	 *   for not aligned, big remainder is before small remainder
> +	 */
> +	if (align1 == align2) {
> +		size1_left = size1 & (align1 - 1);
> +		if (!size1_left)
> +			size1_left = align1;
> +		size2_left = size2 & (align2 - 1);
> +		if (!size2_left)
> +			size2_left = align2;
> +		if (size1_left > size2_left)
> +			return true;
> +	}
> +
> +	return false;
> +}
> +
>  struct pci_dev_resource {
>  	struct list_head list;
>  	struct resource *res;
> @@ -998,26 +1026,125 @@ static void pbus_size_io(struct pci_bus *bus, resource_size_t min_size,
>  	}
>  }
>  
> -static inline resource_size_t calculate_mem_align(resource_size_t *aligns,
> -						  int max_order)
> +struct align_test_res {
> +	struct list_head list;
> +	struct resource res;
> +	resource_size_t size;
> +	resource_size_t align;
> +};
> +
> +static void free_align_test_list(struct list_head *head)
>  {
> -	resource_size_t align = 0;
> -	resource_size_t min_align = 0;
> -	int order;
> +	struct align_test_res *p, *tmp;
>  
> -	for (order = 0; order <= max_order; order++) {
> -		resource_size_t align1 = 1;
> +	list_for_each_entry_safe(p, tmp, head, list) {
> +		list_del(&p->list);
> +		kfree(p);
> +	}
> +}
>  
> -		align1 <<= (order + 20);
> +static int add_to_align_test_list(struct list_head *head,
> +				  resource_size_t align, resource_size_t size)
> +{
> +	struct align_test_res *tmp;
> +
> +	tmp = kzalloc(sizeof(*tmp), GFP_KERNEL);
> +	if (!tmp)
> +		return -ENOMEM;
> +
> +	tmp->align = align;
> +	tmp->size = size;
> +
> +	list_add_tail(&tmp->list, head);
> +
> +	return 0;
> +}
> +
> +static void sort_align_test(struct list_head *head)
> +{
> +	struct align_test_res *res1, *tmp_res, *res2;
>  
> -		if (!align)
> -			min_align = align1;
> -		else if (ALIGN(align + min_align, min_align) < align1)
> -			min_align = align1 >> 1;
> -		align += aligns[order];
> +	list_for_each_entry_safe(res1, tmp_res, head, list) {
> +		/* reorder it */
> +		list_for_each_entry(res2, head, list) {
> +			if (res2 == res1)
> +				break;
> +
> +			if (is_before(res1->align, res1->size,
> +				      res2->align, res2->size)) {
> +				list_move_tail(&res1->list, &res2->list);
> +				break;
> +			}
> +		}
> +	}
> +}
> +
> +static bool is_align_size_good(struct list_head *head,
> +			resource_size_t min_align, resource_size_t size,
> +			resource_size_t start)
> +{
> +	struct align_test_res *p;
> +	struct resource root;
> +
> +	memset(&root, 0, sizeof(root));
> +	root.start = start;
> +	root.end = start + size - 1;
> +
> +	list_for_each_entry(p, head, list)
> +		memset(&p->res, 0, sizeof(p->res));
> +
> +	list_for_each_entry(p, head, list)
> +		if (allocate_resource(&root, &p->res, p->size,
> +				0, (resource_size_t)-1ULL,
> +				p->align, NULL, NULL))
> +			return false;
> +
> +	return true;
> +}
> +
> +static resource_size_t calculate_mem_align(struct list_head *head,
> +				resource_size_t max_align, resource_size_t size,
> +				resource_size_t align_low)
> +{
> +	struct align_test_res *p;
> +	resource_size_t min_align, good_align, aligned_size, start;
> +	int count = 0;
> +
> +	if (max_align <= align_low) {
> +		good_align = align_low;
> +		goto out;
>  	}
>  
> -	return min_align;
> +	good_align = max_align;
> +
> +	list_for_each_entry(p, head, list)
> +		count++;
> +
> +	if (count <= 1)
> +		goto out;
> +
> +	sort_align_test(head);
> +
> +	do {
> +		/* check if we can use smaller align */
> +		min_align = good_align >> 1;
> +		aligned_size = ALIGN(size, min_align);
> +
> +		/* need to make sure every offset work */
> +		for (start = min_align; start < max_align; start += min_align) {
> +			/* checked already with last align ? */
> +			if (!(start & (good_align - 1)))
> +				continue;
> +
> +			if (!is_align_size_good(head, min_align, aligned_size,
> +					       start))
> +				goto out;
> +		}
> +		good_align = min_align;
> +	} while (min_align > align_low);
> +
> +out:
> +	return good_align;
>  }
>  
>  /**
> @@ -1047,19 +1174,17 @@ static int pbus_size_mem(struct pci_bus *bus, unsigned long mask,
>  {
>  	struct pci_dev *dev;
>  	resource_size_t min_align, align, size, size0, size1;
> -	resource_size_t aligns[18];	/* Alignments from 1Mb to 128Gb */
> -	int order, max_order;
> +	resource_size_t max_align = 0;
>  	struct resource *b_res = find_free_bus_resource(bus,
>  					mask | IORESOURCE_PREFETCH, type);
>  	resource_size_t children_add_size = 0;
>  	resource_size_t children_add_align = 0;
>  	resource_size_t add_align = 0;
> +	LIST_HEAD(align_test_list);
>  
>  	if (!b_res)
>  		return -ENOSPC;
>  
> -	memset(aligns, 0, sizeof(aligns));
> -	max_order = 0;
>  	size = 0;
>  
>  	list_for_each_entry(dev, &bus->devices, bus_list) {
> @@ -1085,29 +1210,20 @@ static int pbus_size_mem(struct pci_bus *bus, unsigned long mask,
>  				continue;
>  			}
>  #endif
> -			/*
> -			 * aligns[0] is for 1MB (since bridge memory
> -			 * windows are always at least 1MB aligned), so
> -			 * keep "order" from being negative for smaller
> -			 * resources.
> -			 */
>  			align = pci_resource_alignment(dev, r);
> -			order = __ffs(align) - 20;
> -			if (order < 0)
> -				order = 0;
> -			if (order >= ARRAY_SIZE(aligns)) {
> +			if (align > (1ULL<<37)) { /*128 Gb*/
>  				dev_warn(&dev->dev, "disabling BAR %d: %pR (bad alignment %#llx)\n",
> -					 i, r, (unsigned long long) align);
> +					i, r, (unsigned long long) align);
>  				r->flags = 0;
>  				continue;
>  			}
> +
> +			if (r_size > 1)
> +				add_to_align_test_list(&align_test_list,
> +							align, r_size);
>  			size += r_size;
> -			/* Exclude ranges with size > align from
> -			   calculation of the alignment. */
> -			if (r_size == align)
> -				aligns[order] += align;
> -			if (order > max_order)
> -				max_order = order;
> +			if (align > max_align)
> +				max_align = align;
>  
>  			if (realloc_head) {
>  				children_add_size += get_res_add_size(realloc_head, r);
> @@ -1117,9 +1233,12 @@ static int pbus_size_mem(struct pci_bus *bus, unsigned long mask,
>  		}
>  	}
>  
> -	min_align = calculate_mem_align(aligns, max_order);
> -	min_align = max(min_align, window_alignment(bus, b_res->flags));
> -	size0 = calculate_memsize(size, min_size, 0, resource_size(b_res), min_align);
> +	max_align = max(max_align, window_alignment(bus, b_res->flags));
> +	min_align = calculate_mem_align(&align_test_list, max_align, size,
> +					window_alignment(bus, b_res->flags));
> +	size0 = calculate_memsize(size, min_size, 0,
> +				  resource_size(b_res), min_align);
> +	free_align_test_list(&align_test_list);
>  	add_align = max(min_align, add_align);
>  	if (children_add_size > add_size)
>  		add_size = children_add_size;
> -- 
> 1.8.4.5
> 
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ