lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <4C51D908.2000007@goop.org>
Date:	Thu, 29 Jul 2010 12:39:52 -0700
From:	Jeremy Fitzhardinge <jeremy@...p.org>
To:	Daniel Kiper <dkiper@...-space.pl>
CC:	gregkh@...e.de, xen-devel@...ts.xensource.com,
	linux-kernel@...r.kernel.org, linux-mm@...ck.org
Subject: Re: [PATCH] GSoC 2010 - Memory hotplug support for Xen guests - fully
 working version

  On 07/26/2010 05:41 PM, Daniel Kiper wrote:
> Hi,
>
> Currently there is fully working version.
> It has been tested on Xen Ver. 4.0.0 in PV
> guest i386/x86_64 with Linux kernel Ver. 2.6.32.16
> and Ver. 2.6.34.1. This patch cleanly applys
> to Ver. 2.6.34.1

Thanks.  I've pushed this into xen.git as xen/memory-hotplug so people 
can play with it more easily (but I haven't merged it into any of the 
other branches yet).

Some quick notes:

> This patch enables two modes of operation:
>    - enabled by CONFIG_XEN_MEMORY_HOTPLUG config option:
>        - set memory limit for chosen domU from dom0:
>            xm mem-max<domU>  <new_memory_size_limit>
>        - add memory in chosen domU: echo<unused_address>  >  \
>            /sys/devices/system/memory/probe; memory is added
>          in sections which sizes differ from arch to arch
>          (i386: 512 MiB, x86_64: 128 MiB; it could by checked
>          by cat /sys/devices/system/memory/block_size_bytes;
>          this value is in HEX); it is preffered to choose
>          address at section boundary,

It seems odd that the user should choose the address.  Can't it work it 
out for itself?  What if the address is actually in use?

Is there any reason you would want to do this, rather than use the 
balloon driver method?

>        - online memory in chosen domU: echo online>  \
>            /sys/devices/system/memory/memory<section_number>/state;
>          <section_number>  could be established in following manner:
>          (int)(<unused_address>  /<section_size>)
>    - enabled by CONFIG_XEN_BALLOON_MEMORY_HOTPLUG config option:
>        - set memory limit for chosen domU from dom0:
>            xm mem-max<domU>  <new_memory_size_limit>
>        - add memory for chosen domU from dom0:
>            xm mem-set<domU>  <new_memory_size>
>
> If you have a questions please drop me a line.
>
> Daniel
>
> Signed-off-by: Daniel Kiper<dkiper@...-space.pl>
> ---
>   arch/x86/Kconfig               |    2 +-
>   drivers/base/memory.c          |   23 +++++
>   drivers/xen/Kconfig            |   10 ++
>   drivers/xen/balloon.c          |  196 +++++++++++++++++++++++++++++++++++++++-
>   include/linux/memory_hotplug.h |    8 ++
>   include/xen/balloon.h          |    6 ++
>   mm/Kconfig                     |    9 ++
>   mm/memory_hotplug.c            |  140 ++++++++++++++++++++++++++++
>   8 files changed, 390 insertions(+), 4 deletions(-)
>
> diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
> index 9458685..38434da 100644
> --- a/arch/x86/Kconfig
> +++ b/arch/x86/Kconfig
> @@ -1273,7 +1273,7 @@ config ARCH_SELECT_MEMORY_MODEL
>   	depends on ARCH_SPARSEMEM_ENABLE
>
>   config ARCH_MEMORY_PROBE
> -	def_bool X86_64
> +	def_bool y
>   	depends on MEMORY_HOTPLUG
>
>   config ILLEGAL_POINTER_VALUE
> diff --git a/drivers/base/memory.c b/drivers/base/memory.c
> index 933442f..709457b 100644
> --- a/drivers/base/memory.c
> +++ b/drivers/base/memory.c
> @@ -27,6 +27,14 @@
>   #include<asm/atomic.h>
>   #include<asm/uaccess.h>
>
> +#ifdef CONFIG_XEN_MEMORY_HOTPLUG
> +#include<xen/xen.h>
> +#endif
> +
> +#if defined(CONFIG_XEN_MEMORY_HOTPLUG)&&  defined(CONFIG_XEN_BALLOON)
> +#include<xen/balloon.h>
> +#endif

It's not necessary to #ifdef #includes.

> +
>   #define MEMORY_CLASS_NAME	"memory"
>
>   static struct sysdev_class memory_sysdev_class = {
> @@ -215,6 +223,10 @@ memory_block_action(struct memory_block *mem, unsigned long action)
>   		case MEM_ONLINE:
>   			start_pfn = page_to_pfn(first_page);
>   			ret = online_pages(start_pfn, PAGES_PER_SECTION);
> +#if defined(CONFIG_XEN_MEMORY_HOTPLUG)&&  defined(CONFIG_XEN_BALLOON)
> +			if (xen_domain()&&  !ret)
> +				balloon_update_stats(PAGES_PER_SECTION);
> +#endif

We need to work out a way to avoid having these kinds of Xen-specific 
calls in generic code.

Also, as a matter of style, I presume CONFIG_XEN_MEMORY_HOTPLUG already 
depends on CONFIG_XEN_BALLOON (and if not, it should), so just testing 
CONFIG_XEN_MEMORY_HOTPLUG should be enough.

>   			break;
>   		case MEM_OFFLINE:
>   			mem->state = MEM_GOING_OFFLINE;
> @@ -225,6 +237,10 @@ memory_block_action(struct memory_block *mem, unsigned long action)
>   				mem->state = old_state;
>   				break;
>   			}
> +#if defined(CONFIG_XEN_MEMORY_HOTPLUG)&&  defined(CONFIG_XEN_BALLOON)
> +			if (xen_domain())
> +				balloon_update_stats(-PAGES_PER_SECTION);
> +#endif
>   			break;
>   		default:
>   			WARN(1, KERN_WARNING "%s(%p, %ld) unknown action: %ld\n",
> @@ -341,6 +357,13 @@ memory_probe_store(struct class *class, struct class_attribute *attr,
>
>   	phys_addr = simple_strtoull(buf, NULL, 0);
>
> +#ifdef CONFIG_XEN_MEMORY_HOTPLUG
> +	if (xen_domain()) {
> +		ret = xen_memory_probe(phys_addr);
> +		return ret ? ret : count;
> +	}
> +#endif
> +
>   	nid = memory_add_physaddr_to_nid(phys_addr);
>   	ret = add_memory(nid, phys_addr, PAGES_PER_SECTION<<  PAGE_SHIFT);
>
> diff --git a/drivers/xen/Kconfig b/drivers/xen/Kconfig
> index fad3df2..9713048 100644
> --- a/drivers/xen/Kconfig
> +++ b/drivers/xen/Kconfig
> @@ -9,6 +9,16 @@ config XEN_BALLOON
>   	  the system to expand the domain's memory allocation, or alternatively
>   	  return unneeded memory to the system.
>
> +config XEN_BALLOON_MEMORY_HOTPLUG
> +	bool "Xen memory balloon driver with memory hotplug support"
> +	depends on EXPERIMENTAL&&  XEN_BALLOON&&  MEMORY_HOTPLUG

(EXPERIMENTAL is pretty useless)

> +	default n
> +	help
> +	  Xen memory balloon driver with memory hotplug support allows expanding
> +	  memory available for the system above limit declared at system startup.
> +	  It is very useful on critical systems which require long run without
> +	  rebooting.
> +
>   config XEN_SCRUB_PAGES
>   	bool "Scrub pages before returning them to system"
>   	depends on XEN_BALLOON
> diff --git a/drivers/xen/balloon.c b/drivers/xen/balloon.c
> index 1a0d8c2..f80bba0 100644
> --- a/drivers/xen/balloon.c
> +++ b/drivers/xen/balloon.c
> @@ -6,6 +6,7 @@
>    * Copyright (c) 2003, B Dragovic
>    * Copyright (c) 2003-2004, M Williamson, K Fraser
>    * Copyright (c) 2005 Dan M. Smith, IBM Corporation
> + * Copyright (c) 2010 Daniel Kiper
>    *
>    * This program is free software; you can redistribute it and/or
>    * modify it under the terms of the GNU General Public License version 2
> @@ -61,6 +62,10 @@
>   #include<xen/features.h>
>   #include<xen/page.h>
>
> +#ifdef CONFIG_XEN_BALLOON_MEMORY_HOTPLUG
> +#include<linux/memory.h>
> +#endif

Again re #ifdefing #includes.

> +
>   #define PAGES2KB(_p) ((_p)<<(PAGE_SHIFT-10))
>
>   #define BALLOON_CLASS_NAME "xen_memory"
> @@ -77,6 +82,11 @@ struct balloon_stats {
>   	/* Number of pages in high- and low-memory balloons. */
>   	unsigned long balloon_low;
>   	unsigned long balloon_high;
> +#ifdef CONFIG_XEN_BALLOON_MEMORY_HOTPLUG
> +	unsigned long boot_max_pfn;
> +	u64 hotplug_start_paddr;
> +	u64 hotplug_size;
> +#endif
>   };
>
>   static DEFINE_MUTEX(balloon_mutex);
> @@ -184,6 +194,12 @@ static void balloon_alarm(unsigned long unused)
>   	schedule_work(&balloon_worker);
>   }
>
> +#ifdef CONFIG_XEN_BALLOON_MEMORY_HOTPLUG
> +static inline unsigned long current_target(void)
> +{
> +	return balloon_stats.target_pages;

Why does this need its own version?

> +}
> +#else
>   static unsigned long current_target(void)
>   {
>   	unsigned long target = balloon_stats.target_pages;
> @@ -195,11 +211,12 @@ static unsigned long current_target(void)
>
>   	return target;
>   }
> +#endif
>
>   static int increase_reservation(unsigned long nr_pages)
>   {
> -	unsigned long  pfn, i, flags;
> -	struct page   *page;
> +	unsigned long  uninitialized_var(pfn), i, flags;
> +	struct page    *uninitialized_var(page);
>   	long           rc;
>   	struct xen_memory_reservation reservation = {
>   		.address_bits = 0,
> @@ -207,11 +224,63 @@ static int increase_reservation(unsigned long nr_pages)
>   		.domid        = DOMID_SELF
>   	};
>
> +#ifdef CONFIG_XEN_BALLOON_MEMORY_HOTPLUG
> +	resource_size_t r_min, r_size;
> +	struct resource *r;
> +#endif
> +
>   	if (nr_pages>  ARRAY_SIZE(frame_list))
>   		nr_pages = ARRAY_SIZE(frame_list);
>
>   	spin_lock_irqsave(&balloon_lock, flags);
>
> +#ifdef CONFIG_XEN_BALLOON_MEMORY_HOTPLUG
> +	if (!balloon_stats.balloon_low&&  !balloon_stats.balloon_high) {
> +		if (!balloon_stats.hotplug_start_paddr) {

This should be in its own function.  Overall, I think you should 
refactor increase_reservation to avoid all the gotos and - especially - 
all the #ifdefs.

> +
> +			/*
> +			 * Look for first unused memory region starting
> +			 * at page boundary. Skip last memory section created
> +			 * at boot time becuase it may contains unused memory
> +			 * pages with PG_reserved bit not set (online_pages
> +			 * require PG_reserved bit set).
> +			 */
> +
> +			r = kzalloc(sizeof(struct resource), GFP_KERNEL);
> +
> +			if (!r) {
> +				rc = -ENOMEM;
> +				goto out;
> +			}
> +
> +			r->name = "System RAM";
> +			r->flags = IORESOURCE_MEM | IORESOURCE_BUSY;
> +			r_min = PFN_PHYS(section_nr_to_pfn(pfn_to_section_nr(balloon_stats.boot_max_pfn) + 1));
> +			r_size = (balloon_stats.target_pages - balloon_stats.current_pages)<<  PAGE_SHIFT;
> +
> +			rc = allocate_resource(&iomem_resource, r,
> +						r_size, r_min, ULONG_MAX,
> +						PAGE_SIZE, NULL, NULL);
> +
> +			if (rc<  0) {
> +				kfree(r);
> +				goto out;
> +			}
> +
> +			balloon_stats.hotplug_start_paddr = r->start;
> +		}
> +
> +		pfn = PFN_DOWN(balloon_stats.hotplug_start_paddr +
> +					balloon_stats.hotplug_size);
> +
> +		for (i = 0; i<  nr_pages; ++i, ++pfn)
> +			frame_list[i] = pfn;
> +
> +		pfn -= nr_pages + 1;
> +		goto populate_physmap;
> +	}
> +#endif
> +
>   	page = balloon_first_page();
>   	for (i = 0; i<  nr_pages; i++) {
>   		BUG_ON(page == NULL);
> @@ -219,6 +288,9 @@ static int increase_reservation(unsigned long nr_pages)
>   		page = balloon_next_page(page);
>   	}
>
> +#ifdef CONFIG_XEN_BALLOON_MEMORY_HOTPLUG
> +populate_physmap:
> +#endif
>   	set_xen_guest_handle(reservation.extent_start, frame_list);
>   	reservation.nr_extents = nr_pages;
>   	rc = HYPERVISOR_memory_op(XENMEM_populate_physmap,&reservation);
> @@ -226,17 +298,33 @@ static int increase_reservation(unsigned long nr_pages)
>   		goto out;
>
>   	for (i = 0; i<  rc; i++) {
> +#ifdef CONFIG_XEN_BALLOON_MEMORY_HOTPLUG
> +		if (balloon_stats.hotplug_start_paddr) {
> +			++pfn;
> +			goto set_p2m;
> +		}

What does this do?

> +#endif
> +
>   		page = balloon_retrieve();
>   		BUG_ON(page == NULL);
>
>   		pfn = page_to_pfn(page);
> +
> +#ifdef CONFIG_XEN_BALLOON_MEMORY_HOTPLUG
> +set_p2m:
> +#endif
>   		BUG_ON(!xen_feature(XENFEAT_auto_translated_physmap)&&
>   		phys_to_machine_mapping_valid(pfn));
>
>   		set_phys_to_machine(pfn, frame_list[i]);
>
> +#ifdef CONFIG_XEN_BALLOON_MEMORY_HOTPLUG
> +		if (balloon_stats.hotplug_start_paddr)
> +			continue;
> +#endif
> +
>   		/* Link back into the page tables if not highmem. */
> -		if (pfn<  max_low_pfn) {
> +		if (!PageHighMem(page)) {
>   			int ret;
>   			ret = HYPERVISOR_update_va_mapping(
>   				(unsigned long)__va(pfn<<  PAGE_SHIFT),
> @@ -251,6 +339,11 @@ static int increase_reservation(unsigned long nr_pages)
>   		__free_page(page);
>   	}
>
> +#ifdef CONFIG_XEN_BALLOON_MEMORY_HOTPLUG
> +	if (balloon_stats.hotplug_start_paddr)
> +		balloon_stats.hotplug_size += rc<<  PAGE_SHIFT;
> +#endif
> +
>   	balloon_stats.current_pages += rc;
>
>    out:
> @@ -331,6 +424,12 @@ static void balloon_process(struct work_struct *work)
>   	int need_sleep = 0;
>   	long credit;
>
> +#ifdef CONFIG_XEN_BALLOON_MEMORY_HOTPLUG
> +	int nid, ret;
> +	struct memory_block *mem;
> +	unsigned long pfn, pfn_limit;
> +#endif
> +
>   	mutex_lock(&balloon_mutex);
>
>   	do {
> @@ -349,10 +448,93 @@ static void balloon_process(struct work_struct *work)
>   	/* Schedule more work if there is some still to be done. */
>   	if (current_target() != balloon_stats.current_pages)
>   		mod_timer(&balloon_timer, jiffies + HZ);
> +#ifdef CONFIG_XEN_BALLOON_MEMORY_HOTPLUG
> +	else if (balloon_stats.hotplug_start_paddr) {

This should be its own function.

> +		nid = memory_add_physaddr_to_nid(balloon_stats.hotplug_start_paddr);
> +
> +		ret = xen_add_memory(nid, balloon_stats.hotplug_start_paddr,
> +						balloon_stats.hotplug_size);
> +
> +		if (ret) {
> +			printk(KERN_ERR "%s: xen_add_memory: "
> +					"Memory hotplug failed: %i\n",
> +					__func__, ret);
> +			goto error;
> +		}
> +
> +		pfn = PFN_DOWN(balloon_stats.hotplug_start_paddr);
> +		pfn_limit = pfn + (balloon_stats.hotplug_size>>  PAGE_SHIFT);
> +
> +		for (; pfn<  pfn_limit; ++pfn)
> +			if (!PageHighMem(pfn_to_page(pfn)))
> +				BUG_ON(HYPERVISOR_update_va_mapping(
> +					(unsigned long)__va(pfn<<  PAGE_SHIFT),
> +					mfn_pte(pfn_to_mfn(pfn), PAGE_KERNEL), 0));
> +
> +		ret = online_pages(PFN_DOWN(balloon_stats.hotplug_start_paddr),
> +					balloon_stats.hotplug_size>>  PAGE_SHIFT);
> +
> +		if (ret) {
> +			printk(KERN_ERR "%s: online_pages: Failed: %i\n",
> +					__func__, ret);
> +			goto error;
> +		}
> +
> +		pfn = PFN_DOWN(balloon_stats.hotplug_start_paddr);
> +		pfn_limit = pfn + (balloon_stats.hotplug_size>>  PAGE_SHIFT);
> +
> +		for (; pfn<  pfn_limit; pfn += PAGES_PER_SECTION) {
> +			mem = find_memory_block(__pfn_to_section(pfn));
> +			BUG_ON(!mem);
> +			BUG_ON(!present_section_nr(mem->phys_index));
> +			mutex_lock(&mem->state_mutex);
> +			mem->state = MEM_ONLINE;
> +			mutex_unlock(&mem->state_mutex);
> +		}
> +
> +		goto out;
> +
> +error:
> +		balloon_stats.current_pages -= balloon_stats.hotplug_size>>  PAGE_SHIFT;
> +		balloon_stats.target_pages -= balloon_stats.hotplug_size>>  PAGE_SHIFT;
> +
> +out:
> +		balloon_stats.hotplug_start_paddr = 0;
> +		balloon_stats.hotplug_size = 0;
> +	}
> +#endif
>
>   	mutex_unlock(&balloon_mutex);
>   }
>
> +#ifdef CONFIG_XEN_MEMORY_HOTPLUG
> +
> +/* Resets the Xen limit, sets new target, and kicks off processing. */
> +static void balloon_set_new_target(unsigned long target)
> +{
> +	mutex_lock(&balloon_mutex);
> +	balloon_stats.target_pages = target;
> +	mutex_unlock(&balloon_mutex);
> +

I don't think it should be necessary to define a new version of this 
function.

> +	schedule_work(&balloon_worker);
> +}
> +
> +void balloon_update_stats(long nr_pages)
> +{
> +	mutex_lock(&balloon_mutex);
> +
> +	balloon_stats.current_pages += nr_pages;
> +	balloon_stats.target_pages += nr_pages;
> +
> +	xenbus_printf(XBT_NIL, "memory", "target", "%llu",
> +			(unsigned long long)balloon_stats.target_pages<<  (PAGE_SHIFT - 10));

Hm, this looks odd.  In what situations does this get called?

> +
> +	mutex_unlock(&balloon_mutex);
> +}
> +EXPORT_SYMBOL_GPL(balloon_update_stats);
> +
> +#else
> +
>   /* Resets the Xen limit, sets new target, and kicks off processing. */
>   static void balloon_set_new_target(unsigned long target)
>   {
> @@ -361,6 +543,8 @@ static void balloon_set_new_target(unsigned long target)
>   	schedule_work(&balloon_worker);
>   }
>
> +#endif
> +
>   static struct xenbus_watch target_watch =
>   {
>   	.node = "memory/target"
> @@ -416,6 +600,12 @@ static int __init balloon_init(void)
>   	balloon_stats.balloon_high  = 0;
>   	balloon_stats.driver_pages  = 0UL;
>
> +#ifdef CONFIG_XEN_BALLOON_MEMORY_HOTPLUG
> +	balloon_stats.boot_max_pfn = max_pfn;
> +	balloon_stats.hotplug_start_paddr = 0;
> +	balloon_stats.hotplug_size = 0;
> +#endif
> +
>   	init_timer(&balloon_timer);
>   	balloon_timer.data = 0;
>   	balloon_timer.function = balloon_alarm;
> diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h
> index 35b07b7..04e67b8 100644
> --- a/include/linux/memory_hotplug.h
> +++ b/include/linux/memory_hotplug.h
> @@ -211,4 +211,12 @@ extern void sparse_remove_one_section(struct zone *zone, struct mem_section *ms)
>   extern struct page *sparse_decode_mem_map(unsigned long coded_mem_map,
>   					  unsigned long pnum);
>
> +#if defined(CONFIG_XEN_MEMORY_HOTPLUG) || defined(CONFIG_XEN_BALLOON_MEMORY_HOTPLUG)
> +extern int xen_add_memory(int nid, u64 start, u64 size);
> +#endif
> +
> +#ifdef CONFIG_XEN_MEMORY_HOTPLUG
> +extern int xen_memory_probe(u64 phys_addr);
> +#endif

Not necessary to #ifdef prototypes.

> +
>   #endif /* __LINUX_MEMORY_HOTPLUG_H */
> diff --git a/include/xen/balloon.h b/include/xen/balloon.h
> new file mode 100644
> index 0000000..84b17b7
> --- /dev/null
> +++ b/include/xen/balloon.h
> @@ -0,0 +1,6 @@
> +#ifndef _XEN_BALLOON_H
> +#define _XEN_BALLOON_H
> +
> +extern void balloon_update_stats(long nr_pages);

If this is Xen-specific, it should have xen_ in the name.

> +
> +#endif	/* _XEN_BALLOON_H */
> diff --git a/mm/Kconfig b/mm/Kconfig
> index 9c61158..b04f3a8 100644
> --- a/mm/Kconfig
> +++ b/mm/Kconfig
> @@ -144,6 +144,15 @@ config MEMORY_HOTREMOVE
>   	depends on MEMORY_HOTPLUG&&  ARCH_ENABLE_MEMORY_HOTREMOVE
>   	depends on MIGRATION
>
> +config XEN_MEMORY_HOTPLUG
> +	bool "Allow for memory hot-add in Xen guests"
> +	depends on EXPERIMENTAL&&  ARCH_MEMORY_PROBE&&  XEN

Do we need both XEN_MEMORY_HOTPLUG and XEN_BALLOON_MEMORY_HOTPLUG?

> +	default n
> +	help
> +	  Memory hotplug allows expanding memory available for the system
> +	  above limit declared at system startup. It is very useful on critical
> +	  systems which require long run without rebooting.
> +
>   #
>   # If we have space for more page flags then we can enable additional
>   # optimizations and functionality.
> diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
> index be211a5..1c73703 100644
> --- a/mm/memory_hotplug.c
> +++ b/mm/memory_hotplug.c
> @@ -32,6 +32,14 @@
>
>   #include<asm/tlbflush.h>
>
> +#ifdef CONFIG_XEN_MEMORY_HOTPLUG
> +#include<asm/xen/hypercall.h>
> +#include<xen/interface/xen.h>
> +#include<xen/interface/memory.h>
> +#include<xen/features.h>
> +#include<xen/page.h>
> +#endif

We need to think carefully about what the real requirements are here and 
try to abstract this away from being Xen-specific.

> +
>   #include "internal.h"
>
>   /* add this memory to iomem resource */
> @@ -542,6 +550,138 @@ out:
>   }
>   EXPORT_SYMBOL_GPL(add_memory);
>
> +#if defined(CONFIG_XEN_MEMORY_HOTPLUG) || defined(CONFIG_XEN_BALLOON_MEMORY_HOTPLUG)
> +/* we are OK calling __meminit stuff here - we have CONFIG_MEMORY_HOTPLUG */
> +int __ref xen_add_memory(int nid, u64 start, u64 size)
> +{
> +	pg_data_t *pgdat = NULL;
> +	int new_pgdat = 0, ret;
> +
> +	lock_system_sleep();
> +
> +	if (!node_online(nid)) {
> +		pgdat = hotadd_new_pgdat(nid, start);
> +		ret = -ENOMEM;
> +		if (!pgdat)
> +			goto out;
> +		new_pgdat = 1;
> +	}
> +
> +	/* call arch's memory hotadd */
> +	ret = arch_add_memory(nid, start, size);
> +
> +	if (ret<  0)
> +		goto error;
> +
> +	/* we online node here. we can't roll back from here. */
> +	node_set_online(nid);
> +
> +	if (new_pgdat) {
> +		ret = register_one_node(nid);
> +		/*
> +		 * If sysfs file of new node can't create, cpu on the node
> +		 * can't be hot-added. There is no rollback way now.
> +		 * So, check by BUG_ON() to catch it reluctantly..
> +		 */
> +		BUG_ON(ret);
> +	}
> +

This doesn't seem to be doing anything particularly xen-specific.

> +	goto out;
> +
> +error:
> +	/* rollback pgdat allocation */
> +	if (new_pgdat)
> +		rollback_node_hotadd(nid, pgdat);
> +
> +out:
> +	unlock_system_sleep();
> +	return ret;
> +}
> +EXPORT_SYMBOL_GPL(xen_add_memory);
> +#endif
> +
> +#ifdef CONFIG_XEN_MEMORY_HOTPLUG
> +int xen_memory_probe(u64 phys_addr)
> +{
> +	int nr_pages, ret;
> +	struct resource *r;
> +	struct xen_memory_reservation reservation = {
> +		.address_bits = 0,
> +		.extent_order = 0,
> +		.domid = DOMID_SELF,
> +		.nr_extents = PAGES_PER_SECTION
> +	};
> +	unsigned long *frame_list, i, pfn;
> +
> +	r = register_memory_resource(phys_addr, PAGES_PER_SECTION<<  PAGE_SHIFT);
> +
> +	if (!r)
> +		return -EEXIST;
> +
> +	frame_list = vmalloc(PAGES_PER_SECTION * sizeof(unsigned long));

Does this need to be done in PAGES_PER_SECTION chunks?  Couldn't you 
just allocate a single page and batch it in 1024/512 page pieces?

> +
> +	if (!frame_list) {
> +		printk(KERN_ERR "%s: vmalloc: Out of memory\n", __func__);
> +		ret = -ENOMEM;
> +		goto error;
> +	}
> +
> +	set_xen_guest_handle(reservation.extent_start, frame_list);
> +	for (i = 0, pfn = PFN_DOWN(phys_addr); i<  PAGES_PER_SECTION; ++i, ++pfn)
> +		frame_list[i] = pfn;
> +
> +	ret = HYPERVISOR_memory_op(XENMEM_populate_physmap,&reservation);
> +
> +	if (ret<  PAGES_PER_SECTION) {
> +		if (ret>  0) {
> +			printk(KERN_ERR "%s: PHYSMAP is not fully "
> +					"populated: %i/%lu\n", __func__,
> +					ret, PAGES_PER_SECTION);
> +			reservation.nr_extents = nr_pages = ret;
> +			ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation,&reservation);
> +			BUG_ON(ret != nr_pages);

BUG_ON is pretty severe; it should only be used if its impossible to 
recover from the failure, or if failure indicates that some severe bug 
has occurred.

> +			ret = -ENOMEM;
> +		} else {
> +			ret = (ret<  0) ? ret : -ENOMEM;
> +			printk(KERN_ERR "%s: Can't populate PHYSMAP: %i\n", __func__, ret);
> +		}
> +		goto error;
> +	}
> +
> +	for (i = 0, pfn = PFN_DOWN(phys_addr); i<  PAGES_PER_SECTION; ++i, ++pfn) {
> +		BUG_ON(!xen_feature(XENFEAT_auto_translated_physmap)&&
> +			phys_to_machine_mapping_valid(pfn));
> +		set_phys_to_machine(pfn, frame_list[i]);
> +	}
> +
> +	ret = xen_add_memory(memory_add_physaddr_to_nid(phys_addr), phys_addr,
> +				PAGES_PER_SECTION<<  PAGE_SHIFT);
> +
> +	if (ret) {
> +		printk(KERN_ERR "%s: xen_add_memory: Memory hotplug "
> +				"failed: %i\n", __func__, ret);
> +		goto out;
> +	}
> +
> +	for (i = 0, pfn = PFN_DOWN(phys_addr); i<  PAGES_PER_SECTION; ++i, ++pfn)
> +		if (!PageHighMem(pfn_to_page(pfn)))
> +			BUG_ON(HYPERVISOR_update_va_mapping(
> +				(unsigned long)__va(pfn<<  PAGE_SHIFT),
> +				mfn_pte(frame_list[i], PAGE_KERNEL), 0));
> +
> +	goto out;
> +
> +error:
> +	release_memory_resource(r);
> +
> +out:
> +	vfree(frame_list);
> +
> +	return (ret<  0) ? ret : 0;
> +}
> +EXPORT_SYMBOL_GPL(xen_memory_probe);
> +#endif
> +
>   #ifdef CONFIG_MEMORY_HOTREMOVE
>   /*
>    * A free page on the buddy free lists (not the per-cpu lists) has PageBuddy

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ