lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [day] [month] [year] [list]
Message-Id: <200703020255.l222tltk009694@zach-dev.vmware.com>
Date:	Thu, 1 Mar 2007 18:55:47 -0800
From:	Zachary Amsden <zach@...are.com>
To:	Andi Kleen <ak@....de>, Linus Torvalds <torvalds@...l.org>,
	Rusty Russell <rusty@...tcorp.com.au>,
	Jeremy Fitzhardinge <jeremy@...p.org>,
	Chris Wright <chrisw@...s-sol.org>,
	Dan Hecht <dhecht@...are.com>, Dan Arai <arai@...are.com>,
	Andrew Morton <akpm@...l.org>,
	Virtualization Mailing List <virtualization@...ts.osdl.org>,
	Linux Kernel Mailing List <linux-kernel@...r.kernel.org>,
	Zachary Amsden <zach@...are.com>
CC:	Anthony Liguori <anthony@...emonkey.ws>
Subject: [PATCH 8/9] Vmi apic ops.diff

Use para_fill instead of directly setting the APIC ops to the result of the
vmi_get_function call - this allows one to implement a VMI ROM without
implementing APIC functions, just using the native APIC functions.

While doing this, I realized that there is a lot more cleanup that should
have been done.  Basically, we should never assume that the ROM implements
a specific set of functions, and always allow fallback to the native
implementation.

This is critical for future compatibility.

Signed-off-by: Anthony Liguori <anthony@...emonkey.ws>
Signed-off-by: Zachary Amsden <zach@...are.com>

diff -r 0ba8434a5c7e arch/i386/kernel/vmi.c
--- a/arch/i386/kernel/vmi.c	Thu Mar 01 16:49:27 2007 -0800
+++ b/arch/i386/kernel/vmi.c	Thu Mar 01 16:49:33 2007 -0800
@@ -54,6 +54,7 @@ static int disable_tsc;
 static int disable_tsc;
 static int disable_mtrr;
 static int disable_noidle;
+static int disable_vmi_timer;
 
 /* Cached VMI operations */
 struct {
@@ -662,12 +663,12 @@ void vmi_bringup(void)
 void vmi_bringup(void)
 {
  	/* We must establish the lowmem mapping for MMU ops to work */
-	if (vmi_rom)
+	if (vmi_ops.set_linear_mapping)
 		vmi_ops.set_linear_mapping(0, __PAGE_OFFSET, max_low_pfn, 0);
 }
 
 /*
- * Return a pointer to the VMI function or a NOP stub
+ * Return a pointer to a VMI function or NULL if unimplemented
  */
 static void *vmi_get_function(int vmicall)
 {
@@ -678,12 +679,13 @@ static void *vmi_get_function(int vmical
 	if (rel->type == VMI_RELOCATION_CALL_REL)
 		return (void *)rel->eip;
 	else
-		return (void *)vmi_nop;
+		return NULL;
 }
 
 /*
  * Helper macro for making the VMI paravirt-ops fill code readable.
- * For unimplemented operations, fall back to default.
+ * For unimplemented operations, fall back to default, unless nop
+ * is returned by the ROM.
  */
 #define para_fill(opname, vmicall)				\
 do {								\
@@ -692,8 +694,28 @@ do {								\
 	if (rel->type != VMI_RELOCATION_NONE) {			\
 		BUG_ON(rel->type != VMI_RELOCATION_CALL_REL);	\
 		paravirt_ops.opname = (void *)rel->eip;		\
+	} else if (rel->type == VMI_RELOCATION_NOP) 		\
+		paravirt_ops.opname = (void *)vmi_nop;		\
+} while (0)
+
+/*
+ * Helper macro for making the VMI paravirt-ops fill code readable.
+ * For cached operations which do not match the VMI ROM ABI and must
+ * go through a tranlation stub.  Ignore NOPs, since it is not clear
+ * a NOP * VMI function corresponds to a NOP paravirt-op when the
+ * functions are not in 1-1 correspondence.
+ */
+#define para_wrap(opname, wrapper, cache, vmicall)		\
+do {								\
+	reloc = call_vrom_long_func(vmi_rom, get_reloc,		\
+				    VMI_CALL_##vmicall);	\
+	BUG_ON(rel->type == VMI_RELOCATION_JUMP_REL);		\
+	if (rel->type == VMI_RELOCATION_CALL_REL) {		\
+		paravirt_ops.opname = wrapper;			\
+		vmi_ops.cache = (void *)rel->eip;		\
 	}							\
 } while (0)
+
 
 /*
  * Activate the VMI interface and switch into paravirtualized mode
@@ -731,13 +753,8 @@ static inline int __init activate_vmi(vo
 	 *  rdpmc is not yet used in Linux
 	 */
 
-	/* CPUID is special, so very special */
-	reloc = call_vrom_long_func(vmi_rom, get_reloc,	VMI_CALL_CPUID);
-	if (rel->type != VMI_RELOCATION_NONE) {
-		BUG_ON(rel->type != VMI_RELOCATION_CALL_REL);
-		vmi_ops.cpuid = (void *)rel->eip;
-		paravirt_ops.cpuid = vmi_cpuid;
-	}
+	/* CPUID is special, so very special it gets wrapped like a present */
+	para_wrap(cpuid, vmi_cpuid, cpuid, CPUID);
 
 	para_fill(clts, CLTS);
 	para_fill(get_debugreg, GetDR);
@@ -754,6 +771,7 @@ static inline int __init activate_vmi(vo
 	para_fill(restore_fl, SetInterruptMask);
 	para_fill(irq_disable, DisableInterrupts);
 	para_fill(irq_enable, EnableInterrupts);
+
 	/* irq_save_disable !!! sheer pain */
 	patch_offset(&irq_save_disable_callout[IRQ_PATCH_INT_MASK],
 		     (char *)paravirt_ops.save_fl);
@@ -761,26 +779,18 @@ static inline int __init activate_vmi(vo
 		     (char *)paravirt_ops.irq_disable);
 
 	para_fill(wbinvd, WBINVD);
+	para_fill(read_tsc, RDTSC);
+
+	/* The following we emulate with trap and emulate for now */
 	/* paravirt_ops.read_msr = vmi_rdmsr */
 	/* paravirt_ops.write_msr = vmi_wrmsr */
-	para_fill(read_tsc, RDTSC);
 	/* paravirt_ops.rdpmc = vmi_rdpmc */
 
-	/* TR interface doesn't pass TR value */
-	reloc = call_vrom_long_func(vmi_rom, get_reloc,	VMI_CALL_SetTR);
-	if (rel->type != VMI_RELOCATION_NONE) {
-		BUG_ON(rel->type != VMI_RELOCATION_CALL_REL);
-		vmi_ops.set_tr = (void *)rel->eip;
-		paravirt_ops.load_tr_desc = vmi_set_tr;
-	}
+	/* TR interface doesn't pass TR value, wrap */
+	para_wrap(load_tr_desc, vmi_set_tr, set_tr, SetTR);
 
 	/* LDT is special, too */
-	reloc = call_vrom_long_func(vmi_rom, get_reloc,	VMI_CALL_SetLDT);
-	if (rel->type != VMI_RELOCATION_NONE) {
-		BUG_ON(rel->type != VMI_RELOCATION_CALL_REL);
-		vmi_ops._set_ldt = (void *)rel->eip;
-		paravirt_ops.set_ldt = vmi_set_ldt;
-	}
+	para_wrap(set_ldt, vmi_set_ldt, _set_ldt, SetLDT);
 
 	para_fill(load_gdt, SetGDT);
 	para_fill(load_idt, SetIDT);
@@ -791,25 +801,14 @@ static inline int __init activate_vmi(vo
 	para_fill(write_ldt_entry, WriteLDTEntry);
 	para_fill(write_gdt_entry, WriteGDTEntry);
 	para_fill(write_idt_entry, WriteIDTEntry);
-	reloc = call_vrom_long_func(vmi_rom, get_reloc,
-				    VMI_CALL_UpdateKernelStack);
-	if (rel->type != VMI_RELOCATION_NONE) {
-		BUG_ON(rel->type != VMI_RELOCATION_CALL_REL);
-		vmi_ops.set_kernel_stack = (void *)rel->eip;
-		paravirt_ops.load_esp0 = vmi_load_esp0;
-	}
-
+	para_wrap(load_esp0, vmi_load_esp0, set_kernel_stack, UpdateKernelStack);
 	para_fill(set_iopl_mask, SetIOPLMask);
-	paravirt_ops.io_delay = (void *)vmi_nop;
-
+	para_fill(io_delay, IODelay);
 	para_fill(set_lazy_mode, SetLazyMode);
 
-	reloc = call_vrom_long_func(vmi_rom, get_reloc, VMI_CALL_FlushTLB);
-	if (rel->type != VMI_RELOCATION_NONE) {
-		vmi_ops.flush_tlb = (void *)rel->eip;
-		paravirt_ops.flush_tlb_user = vmi_flush_tlb_user;
-		paravirt_ops.flush_tlb_kernel = vmi_flush_tlb_kernel;
-	}
+	/* user and kernel flush are just handled with different flags to FlushTLB */
+	para_wrap(flush_tlb_user, vmi_flush_tlb_user, flush_tlb, FlushTLB);
+	para_wrap(flush_tlb_kernel, vmi_flush_tlb_kernel, flush_tlb, FlushTLB);
 	para_fill(flush_tlb_single, InvalPage);
 
 	/*
@@ -824,28 +823,40 @@ static inline int __init activate_vmi(vo
 	vmi_ops.set_pte = vmi_get_function(VMI_CALL_SetPxE);
 	vmi_ops.update_pte = vmi_get_function(VMI_CALL_UpdatePxE);
 #endif
-	vmi_ops.set_linear_mapping = vmi_get_function(VMI_CALL_SetLinearMapping);
+
+	if (vmi_ops.set_pte) {
+		paravirt_ops.set_pte = vmi_set_pte;
+		paravirt_ops.set_pte_at = vmi_set_pte_at;
+		paravirt_ops.set_pmd = vmi_set_pmd;
+#ifdef CONFIG_X86_PAE
+		paravirt_ops.set_pte_atomic = vmi_set_pte_atomic;
+		paravirt_ops.set_pte_present = vmi_set_pte_present;
+		paravirt_ops.set_pud = vmi_set_pud;
+		paravirt_ops.pte_clear = vmi_pte_clear;
+		paravirt_ops.pmd_clear = vmi_pmd_clear;
+#endif
+	}
+
+	if (vmi_ops.update_pte) {
+		paravirt_ops.pte_update = vmi_update_pte;
+		paravirt_ops.pte_update_defer = vmi_update_pte_defer;
+	}
+
 	vmi_ops.allocate_page = vmi_get_function(VMI_CALL_AllocatePage);
+	if (vmi_ops.allocate_page) {
+		paravirt_ops.alloc_pt = vmi_allocate_pt;
+		paravirt_ops.alloc_pd = vmi_allocate_pd;
+		paravirt_ops.alloc_pd_clone = vmi_allocate_pd_clone;
+	}
+
 	vmi_ops.release_page = vmi_get_function(VMI_CALL_ReleasePage);
-
-	paravirt_ops.map_pt_hook = vmi_map_pt_hook;
-	paravirt_ops.alloc_pt = vmi_allocate_pt;
-	paravirt_ops.alloc_pd = vmi_allocate_pd;
-	paravirt_ops.alloc_pd_clone = vmi_allocate_pd_clone;
-	paravirt_ops.release_pt = vmi_release_pt;
-	paravirt_ops.release_pd = vmi_release_pd;
-	paravirt_ops.set_pte = vmi_set_pte;
-	paravirt_ops.set_pte_at = vmi_set_pte_at;
-	paravirt_ops.set_pmd = vmi_set_pmd;
-	paravirt_ops.pte_update = vmi_update_pte;
-	paravirt_ops.pte_update_defer = vmi_update_pte_defer;
-#ifdef CONFIG_X86_PAE
-	paravirt_ops.set_pte_atomic = vmi_set_pte_atomic;
-	paravirt_ops.set_pte_present = vmi_set_pte_present;
-	paravirt_ops.set_pud = vmi_set_pud;
-	paravirt_ops.pte_clear = vmi_pte_clear;
-	paravirt_ops.pmd_clear = vmi_pmd_clear;
-#endif
+	if (vmi_ops.release_page) {
+		paravirt_ops.release_pt = vmi_release_pt;
+		paravirt_ops.release_pd = vmi_release_pd;
+	}
+	para_wrap(map_pt_hook, vmi_map_pt_hook, set_linear_mapping,
+		  SetLinearMapping);
+
 	/*
 	 * These MUST always be patched.  Don't support indirect jumps
 	 * through these operations, as the VMI interface may use either
@@ -857,21 +868,20 @@ static inline int __init activate_vmi(vo
 	paravirt_ops.iret = (void *)0xbadbab0;
 
 #ifdef CONFIG_SMP
-	paravirt_ops.startup_ipi_hook = vmi_startup_ipi_hook;
-	vmi_ops.set_initial_ap_state = vmi_get_function(VMI_CALL_SetInitialAPState);
+	para_wrap(startup_ipi_hook, vmi_startup_ipi_hook, set_initial_ap_state, SetInitialAPState);
 #endif
 
 #ifdef CONFIG_X86_LOCAL_APIC
-	paravirt_ops.apic_read = vmi_get_function(VMI_CALL_APICRead);
-	paravirt_ops.apic_write = vmi_get_function(VMI_CALL_APICWrite);
-	paravirt_ops.apic_write_atomic = vmi_get_function(VMI_CALL_APICWrite);
+	para_fill(apic_read, APICRead);
+	para_fill(apic_write, APICWrite);
+	para_fill(apic_write_atomic, APICWrite);
 #endif
 
 	/*
 	 * Check for VMI timer functionality by probing for a cycle frequency method
 	 */
 	reloc = call_vrom_long_func(vmi_rom, get_reloc, VMI_CALL_GetCycleFrequency);
-	if (rel->type != VMI_RELOCATION_NONE) {
+	if (!disable_vmi_timer && rel->type != VMI_RELOCATION_NONE) {
 		vmi_timer_ops.get_cycle_frequency = (void *)rel->eip;
 		vmi_timer_ops.get_cycle_counter =
 			vmi_get_function(VMI_CALL_GetCycleCounter);
@@ -891,13 +901,19 @@ static inline int __init activate_vmi(vo
 #endif
 		paravirt_ops.get_scheduled_cycles = vmi_get_sched_cycles;
  		paravirt_ops.get_cpu_khz = vmi_cpu_khz;
-	}
-	if (!disable_noidle)
+
+		/* We have true wallclock functions; disable CMOS clock sync */
+		no_sync_cmos_clock = 1;
+	} else {
+		disable_noidle = 1;
+		disable_vmi_timer = 1;
+	}
+
+	/* No idle HZ mode only works if VMI timer and no idle is enabled */
+	if (disable_noidle || disable_vmi_timer)
 		para_fill(safe_halt, Halt);
-	else {
-		vmi_ops.halt = vmi_get_function(VMI_CALL_Halt);
-		paravirt_ops.safe_halt = vmi_safe_halt;
-	}
+	else
+		para_wrap(safe_halt, vmi_safe_halt, halt, Halt);
 
 	/*
 	 * Alternative instruction rewriting doesn't happen soon enough
@@ -933,10 +949,9 @@ void __init vmi_init(void)
 	activate_vmi();
 
 #ifdef CONFIG_X86_IO_APIC
+	/* This is virtual hardware; timer routing is wired correctly */
 	no_timer_check = 1;
 #endif
-	no_sync_cmos_clock = 1;
-
 	local_irq_restore(flags & X86_EFLAGS_IF);
 }
 
@@ -960,6 +975,9 @@ static int __init parse_vmi(char *arg)
 	} else if (!strcmp(arg, "disable_mtrr")) {
 		clear_bit(X86_FEATURE_MTRR, boot_cpu_data.x86_capability);
 		disable_mtrr = 1;
+	} else if (!strcmp(arg, "disable_timer")) {
+		disable_vmi_timer = 1;
+		disable_noidle = 1;
 	} else if (!strcmp(arg, "disable_noidle"))
 		disable_noidle = 1;
 	return 0;
diff -r 0ba8434a5c7e include/asm-i386/vmi.h
--- a/include/asm-i386/vmi.h	Thu Mar 01 16:49:27 2007 -0800
+++ b/include/asm-i386/vmi.h	Thu Mar 01 16:49:33 2007 -0800
@@ -97,6 +97,7 @@
 #define VMI_CALL_SetInitialAPState	62
 #define VMI_CALL_APICWrite		63
 #define VMI_CALL_APICRead		64
+#define VMI_CALL_IODelay		65
 #define VMI_CALL_SetLazyMode		73
 
 /*
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ