[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <1231843097-18003-14-git-send-email-tj@kernel.org>
Date: Tue, 13 Jan 2009 19:38:17 +0900
From: Tejun Heo <tj@...nel.org>
To: ebiederm@...ssion.com, cl@...ux-foundation.org,
rusty@...tcorp.com.au, mingo@...e.hu, travis@....com,
linux-kernel@...r.kernel.org, hpa@...or.com,
akpm@...ux-foundation.org, steiner@....com, hugh@...itas.com
Cc: Tejun Heo <tj@...nel.org>
Subject: [PATCH 13/13] x86_32: make percpu symbols zerobased on SMP
This patch makes percpu symbols zerobased on x86_32 SMP by using
PERCPU_VADDR() with 0 vaddr in vmlinux_32.lds.S. A new PHDR is added
as existing ones cannot contain sections near address zero.
The following adjustments have been made to accomodate the address
change.
* code to locate percpu gdt_page in head_64.S is updated to add the
load address to the gdt_page offset.
* for boot CPU, we can't use __KERNEL_DS for %fs. initialize
gdt_page.gdt[GDT_ENTRY_PERCPU] with flags and limit and set the base
address to __per_cpu_load from head_32.S and load it. the base
address needs to be set manually because linker can't shift bits as
required for segment descriptors.
* __per_cpu_offset[0] is now initialized to __per_cpu_load like on
x86_64.
Signed-off-by: Tejun Heo <tj@...nel.org>
Cc: Mike Travis <travis@....com>
---
arch/x86/kernel/cpu/common.c | 8 ++++++++
arch/x86/kernel/head_32.S | 37 ++++++++++++++++++++++++++++++++++---
arch/x86/kernel/setup_percpu.c | 4 ----
arch/x86/kernel/vmlinux_32.lds.S | 16 +++++++++++++++-
4 files changed, 57 insertions(+), 8 deletions(-)
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index bd38a0f..376e142 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -90,7 +90,15 @@ DEFINE_PER_CPU_PAGE_ALIGNED(struct gdt_page, gdt_page) = { .gdt = {
[GDT_ENTRY_APMBIOS_BASE+2] = { { { 0x0000ffff, 0x00409200 } } },
[GDT_ENTRY_ESPFIX_SS] = { { { 0x00000000, 0x00c09200 } } },
+#ifdef CONFIG_SMP
+ /*
+ * Linker can't handle the address bit shifting. Address will
+ * be set in head_32.S for boot CPU and init_gdt for others.
+ */
+ [GDT_ENTRY_PERCPU] = { { { 0x0000ffff, 0x00cf9200 } } },
+#else
[GDT_ENTRY_PERCPU] = { { { 0x00000000, 0x00000000 } } },
+#endif
} };
#endif
EXPORT_PER_CPU_SYMBOL_GPL(gdt_page);
diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S
index e835b4e..3284199 100644
--- a/arch/x86/kernel/head_32.S
+++ b/arch/x86/kernel/head_32.S
@@ -19,6 +19,7 @@
#include <asm/asm-offsets.h>
#include <asm/setup.h>
#include <asm/processor-flags.h>
+#include <asm/percpu.h>
/* Physical address */
#define pa(X) ((X) - __PAGE_OFFSET)
@@ -424,12 +425,39 @@ is386: movl $2,%ecx # set MP
movl %eax,%cr0
call check_x87
+#ifdef CONFIG_SMP
+ /*
+ * early_gdt_base should point to the gdt_page in static percpu init
+ * data area. Computing this requires two symbols - __per_cpu_load
+ * and per_cpu__gdt_page. As linker can't do no such relocation, do
+ * it by hand. As early_gdt_descr is manipulated by C code for
+ * secondary CPUs, this should be done only once for the boot CPU
+ * when early_gdt_descr_base contains zero.
+ *
+ * Also, manually load __per_cpu_load into address fields of PERCPU
+ * segment descriptor for boot CPU. Linker can't do it.
+ */
+ movl early_gdt_descr_base, %eax
+ testl %eax, %eax
+ jnz 3f
+ movl $__per_cpu_load, %eax
+ movl %eax, %ecx
+ addl $per_cpu__gdt_page, %eax
+ movl %eax, early_gdt_descr_base
+
+ movw %cx, 8 * GDT_ENTRY_PERCPU + 2(%eax)
+ shrl $16, %ecx
+ movb %cl, 8 * GDT_ENTRY_PERCPU + 4(%eax)
+ movb %ch, 8 * GDT_ENTRY_PERCPU + 7(%eax)
+3:
+#endif /* CONFIG_SMP */
lgdt early_gdt_descr
lidt idt_descr
ljmp $(__KERNEL_CS),$1f
1: movl $(__KERNEL_DS),%eax # reload all the segment registers
movl %eax,%ss # after changing gdt.
- movl %eax,%fs # gets reset once there's real percpu
+ movl $(__KERNEL_PERCPU),%eax
+ movl %eax,%fs # will be reloaded for boot cpu
movl $(__USER_DS),%eax # DS/ES contains default USER segment
movl %eax,%ds
@@ -446,8 +474,6 @@ is386: movl $2,%ecx # set MP
movb $1, ready
cmpb $0,%cl # the first CPU calls start_kernel
je 1f
- movl $(__KERNEL_PERCPU), %eax
- movl %eax,%fs # set this cpu's percpu
movl (stack_start), %esp
1:
#endif /* CONFIG_SMP */
@@ -702,7 +728,12 @@ idt_descr:
.word 0 # 32 bit align gdt_desc.address
ENTRY(early_gdt_descr)
.word GDT_ENTRIES*8-1
+#ifdef CONFIG_SMP
+early_gdt_descr_base:
+ .long 0x00000000
+#else
.long per_cpu__gdt_page /* Overwritten for secondary CPUs */
+#endif
/*
* The boot_gdt must mirror the equivalent in setup.S and is
diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c
index 9edc081..6f47227 100644
--- a/arch/x86/kernel/setup_percpu.c
+++ b/arch/x86/kernel/setup_percpu.c
@@ -119,13 +119,9 @@ static void __init setup_per_cpu_maps(void)
#endif
}
-#ifdef CONFIG_X86_64
unsigned long __per_cpu_offset[NR_CPUS] __read_mostly = {
[0] = (unsigned long)__per_cpu_load,
};
-#else
-unsigned long __per_cpu_offset[NR_CPUS] __read_mostly;
-#endif
EXPORT_SYMBOL(__per_cpu_offset);
/*
diff --git a/arch/x86/kernel/vmlinux_32.lds.S b/arch/x86/kernel/vmlinux_32.lds.S
index 3eba7f7..03abadf 100644
--- a/arch/x86/kernel/vmlinux_32.lds.S
+++ b/arch/x86/kernel/vmlinux_32.lds.S
@@ -24,6 +24,9 @@ jiffies = jiffies_64;
PHDRS {
text PT_LOAD FLAGS(5); /* R_E */
data PT_LOAD FLAGS(7); /* RWE */
+#ifdef CONFIG_SMP
+ percpu PT_LOAD FLAGS(7); /* RWE */
+#endif
note PT_NOTE FLAGS(0); /* ___ */
}
SECTIONS
@@ -178,7 +181,18 @@ SECTIONS
__initramfs_end = .;
}
#endif
+
+#ifdef CONFIG_SMP
+ /*
+ * percpu offsets are zero-based on SMP. PERCPU_VADDR() changes the
+ * output PHDR, so the next output section - bss - should switch it
+ * back to data.
+ */
+ . = ALIGN(PAGE_SIZE);
+ PERCPU_VADDR(0, :percpu)
+#else
PERCPU(PAGE_SIZE)
+#endif
. = ALIGN(PAGE_SIZE);
/* freed after init ends here */
@@ -193,7 +207,7 @@ SECTIONS
/* This is where the kernel creates the early boot page tables */
. = ALIGN(PAGE_SIZE);
pg0 = . ;
- }
+ } :data /* switch back to data, see PERCPU_VADDR() above */
/* Sections to be discarded */
/DISCARD/ : {
--
1.5.6
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/
Powered by blists - more mailing lists