Index: cpu-exec.c =================================================================== --- cpu-exec.c (.../qemu-vendor-drops) (revision 3256) +++ cpu-exec.c (.../release/qemu) (revision 3256) @@ -35,6 +35,10 @@ #include #endif +#ifdef USE_KVM +#include "qemu-kvm.h" +#endif + int tb_invalidated_flag; //#define DEBUG_EXEC @@ -449,6 +453,10 @@ } #endif +#ifdef USE_KVM + kvm_cpu_exec(env); + longjmp(env->jmp_env, 1); +#endif T0 = 0; /* force lookup of first TB */ for(;;) { #if defined(__sparc__) && !defined(HOST_SOLARIS) Index: Makefile.target =================================================================== --- Makefile.target (.../qemu-vendor-drops) (revision 3256) +++ Makefile.target (.../release/qemu) (revision 3256) @@ -201,8 +201,8 @@ OBJS+= libqemu.a # cpu emulator library -LIBOBJS=exec.o kqemu.o translate-op.o translate-all.o cpu-exec.o\ - translate.o op.o +LIBOBJS=exec.o kqemu.o qemu-kvm.o translate-op.o translate-all.o cpu-exec.o\ + translate.o op.o ifdef CONFIG_SOFTFLOAT LIBOBJS+=fpu/softfloat.o else @@ -323,6 +323,10 @@ SOUND_HW += fmopl.o adlib.o endif AUDIODRV+= wavcapture.o +ifdef CONFIG_KVM_INC +DEFINES += -I $(CONFIG_KVM_INC) -I $(CONFIG_KVM_KERNEL_INC) +LIBS += -L $(CONFIG_KVM_LIB) -lkvm +endif # SCSI layer VL_OBJS+= scsi-disk.o cdrom.o lsi53c895a.o Index: exec.c =================================================================== --- exec.c (.../qemu-vendor-drops) (revision 3256) +++ exec.c (.../release/qemu) (revision 3256) @@ -81,6 +81,7 @@ int phys_ram_fd; uint8_t *phys_ram_base; uint8_t *phys_ram_dirty; +uint8_t *bios_mem; CPUState *first_cpu; /* current CPU in the current thread. It is only valid inside @@ -1042,6 +1043,10 @@ if (env->nb_breakpoints >= MAX_BREAKPOINTS) return -1; env->breakpoints[env->nb_breakpoints++] = pc; + +#ifdef USE_KVM + kvm_update_debugger(env); +#endif breakpoint_invalidate(env, pc); return 0; @@ -1065,6 +1070,10 @@ if (i < env->nb_breakpoints) env->breakpoints[i] = env->breakpoints[env->nb_breakpoints]; +#ifdef USE_KVM + kvm_update_debugger(env); +#endif + breakpoint_invalidate(env, pc); return 0; #else @@ -1083,7 +1092,10 @@ /* XXX: only flush what is necessary */ tb_flush(env); } +#ifdef USE_KVM + kvm_update_debugger(env); #endif +#endif } /* enable or disable low levels log */ Index: configure =================================================================== --- configure (.../qemu-vendor-drops) (revision 3256) +++ configure (.../release/qemu) (revision 3256) @@ -87,6 +87,7 @@ bsd="no" linux="no" kqemu="no" +kvm="no" profiler="no" kernel_path="" cocoa="no" @@ -221,6 +222,8 @@ ;; --disable-kqemu) kqemu="no" ;; + --enable-kvm) kvm="yes" + ;; --enable-profiler) profiler="yes" ;; --kernel-path=*) kernel_path="$optarg" @@ -268,6 +271,7 @@ echo "kqemu kernel acceleration support:" echo " --disable-kqemu disable kqemu support" echo " --kernel-path=PATH set the kernel path (configure probes it)" +echo " --enable-kvm enable kernel virtual machine support" echo "" echo "Advanced options (experts only):" echo " --source-path=PATH path of source code [$source_path]" @@ -559,6 +563,7 @@ fi echo "FMOD support $fmod $fmod_support" echo "kqemu support $kqemu" +echo "kvm support $kvm" echo "Documentation $build_docs" [ ! -z "$uname_release" ] && \ echo "uname -r $uname_release" @@ -784,6 +789,15 @@ interp_prefix1=`echo "$interp_prefix" | sed "s/%M/$target_cpu/g"` echo "#define CONFIG_QEMU_PREFIX \"$interp_prefix1\"" >> $config_h +configure_kvm() { + if test $kvm = "yes" -a "$target_softmmu" = "yes" -a $cpu = "$target_cpu" ; then + echo "#define USE_KVM 1" >> $config_h + echo "CONFIG_KVM_INC=$PWD/../user" >> $config_mak + echo "CONFIG_KVM_LIB=$PWD/../user" >> $config_mak + echo "CONFIG_KVM_KERNEL_INC=$PWD/../kernel/include" >> $config_mak + fi +} + if test "$target_cpu" = "i386" ; then echo "TARGET_ARCH=i386" >> $config_mak echo "#define TARGET_ARCH \"i386\"" >> $config_h @@ -791,6 +805,7 @@ if test $kqemu = "yes" -a "$target_softmmu" = "yes" -a $cpu = "i386" ; then echo "#define USE_KQEMU 1" >> $config_h fi + configure_kvm elif test "$target_cpu" = "arm" -o "$target_cpu" = "armeb" ; then echo "TARGET_ARCH=arm" >> $config_mak echo "#define TARGET_ARCH \"arm\"" >> $config_h @@ -822,6 +837,7 @@ if test $kqemu = "yes" -a "$target_softmmu" = "yes" -a $cpu = "x86_64" ; then echo "#define USE_KQEMU 1" >> $config_h fi + configure_kvm elif test "$target_cpu" = "mips" -o "$target_cpu" = "mipsel" ; then echo "TARGET_ARCH=mips" >> $config_mak echo "#define TARGET_ARCH \"mips\"" >> $config_h Index: target-i386/helper.c =================================================================== --- target-i386/helper.c (.../qemu-vendor-drops) (revision 3256) +++ target-i386/helper.c (.../release/qemu) (revision 3256) @@ -184,7 +184,15 @@ if (!(env->tr.flags & DESC_P_MASK)) cpu_abort(env, "invalid tss"); type = (env->tr.flags >> DESC_TYPE_SHIFT) & 0xf; +#ifdef USE_KVM + /* + * Bit 1 is the Busy bit. We believe it is legal to interrupt into a busy + * segment + */ + if ((type & 5) != 1) +#else if ((type & 7) != 1) +#endif cpu_abort(env, "invalid tss type"); shift = type >> 3; index = (dpl * 4 + 2) << shift; @@ -497,7 +505,12 @@ /* TSS must be a valid 32 bit one */ if (!(env->tr.flags & DESC_P_MASK) || +#ifdef USE_KVM + /* Probable qemu bug: 11 is a valid segment type */ + ((env->tr.flags >> DESC_TYPE_SHIFT) & 0xd) != 9 || +#else ((env->tr.flags >> DESC_TYPE_SHIFT) & 0xf) != 9 || +#endif env->tr.limit < 103) goto fail; io_offset = lduw_kernel(env->tr.base + 0x66); @@ -824,6 +837,11 @@ uint32_t e1, e2, e3, ss; target_ulong old_eip, esp, offset; +#ifdef USE_KVM + printf("%s: unexpect\n", __FUNCTION__); + exit(-1); +#endif + has_error_code = 0; if (!is_int && !is_hw) { switch(intno) { @@ -1107,6 +1125,10 @@ int dpl, cpl; uint32_t e2; +#ifdef USE_KVM + printf("%s: unexpect\n", __FUNCTION__); + exit(-1); +#endif dt = &env->idt; ptr = dt->base + (intno * 8); e2 = ldl_kernel(ptr + 4); @@ -1132,6 +1154,10 @@ void do_interrupt(int intno, int is_int, int error_code, target_ulong next_eip, int is_hw) { +#ifdef USE_KVM + printf("%s: unexpect\n", __FUNCTION__); + exit(-1); +#endif if (loglevel & CPU_LOG_INT) { if ((env->cr[0] & CR0_PE_MASK)) { static int count; @@ -1660,6 +1686,12 @@ cpu_x86_load_seg_cache(env, R_CS, (new_cs & 0xfffc) | cpl, get_seg_base(e1, e2), limit, e2); EIP = new_eip; +#ifdef USE_KVM + if (e2 & DESC_L_MASK) { + env->exception_index = -1; + cpu_loop_exit(); + } +#endif } else { /* jump to call or task gate */ dpl = (e2 >> DESC_DPL_SHIFT) & 3; Index: target-i386/cpu.h =================================================================== --- target-i386/cpu.h (.../qemu-vendor-drops) (revision 3256) +++ target-i386/cpu.h (.../release/qemu) (revision 3256) @@ -154,13 +154,17 @@ #define HF_MP_MASK (1 << HF_MP_SHIFT) #define HF_EM_MASK (1 << HF_EM_SHIFT) #define HF_TS_MASK (1 << HF_TS_SHIFT) +#define HF_IOPL_MASK (3 << HF_IOPL_SHIFT) #define HF_LMA_MASK (1 << HF_LMA_SHIFT) #define HF_CS64_MASK (1 << HF_CS64_SHIFT) #define HF_OSFXSR_MASK (1 << HF_OSFXSR_SHIFT) +#define HF_VM_MASK (1 << HF_VM_SHIFT) #define HF_HALTED_MASK (1 << HF_HALTED_SHIFT) -#define CR0_PE_MASK (1 << 0) -#define CR0_MP_MASK (1 << 1) +#define CR0_PE_SHIFT 0 +#define CR0_PE_MASK (1 << CR0_PE_SHIFT) +#define CR0_MP_SHIFT 1 +#define CR0_MP_MASK (1 << CR0_MP_SHIFT) #define CR0_EM_MASK (1 << 2) #define CR0_TS_MASK (1 << 3) #define CR0_ET_MASK (1 << 4) @@ -177,7 +181,8 @@ #define CR4_PAE_MASK (1 << 5) #define CR4_PGE_MASK (1 << 7) #define CR4_PCE_MASK (1 << 8) -#define CR4_OSFXSR_MASK (1 << 9) +#define CR4_OSFXSR_SHIFT 9 +#define CR4_OSFXSR_MASK (1 << CR4_OSFXSR_SHIFT) #define CR4_OSXMMEXCPT_MASK (1 << 10) #define PG_PRESENT_BIT 0 @@ -524,6 +529,11 @@ int kqemu_enabled; int last_io_time; #endif + +#ifdef USE_KVM + int kvm_pending_int; +#endif + /* in order to simplify APIC support, we leave this pointer to the user */ struct APICState *apic_state; Index: hw/cirrus_vga.c =================================================================== --- hw/cirrus_vga.c (.../qemu-vendor-drops) (revision 3256) +++ hw/cirrus_vga.c (.../release/qemu) (revision 3256) @@ -28,6 +28,9 @@ */ #include "vl.h" #include "vga_int.h" +#ifndef _WIN32 +#include +#endif /* * TODO: @@ -231,6 +234,10 @@ int cirrus_linear_io_addr; int cirrus_linear_bitblt_io_addr; int cirrus_mmio_io_addr; +#ifdef USE_KVM + unsigned long cirrus_lfb_addr; + unsigned long cirrus_lfb_end; +#endif uint32_t cirrus_addr_mask; uint32_t linear_mmio_mask; uint8_t cirrus_shadow_gr0; @@ -267,6 +274,10 @@ int last_hw_cursor_y_end; int real_vram_size; /* XXX: suppress that */ CPUWriteMemoryFunc **cirrus_linear_write; +#ifdef USE_KVM + unsigned long map_addr; + unsigned long map_end; +#endif } CirrusVGAState; typedef struct PCICirrusVGAState { @@ -2520,6 +2531,52 @@ cirrus_linear_bitblt_writel, }; +#ifdef USE_KVM + +#include "qemu-kvm.h" + +extern kvm_context_t kvm_context; + +static void *set_vram_mapping(unsigned long begin, unsigned long end) +{ + void *vram_pointer = NULL; + + printf("set_vram_mapping: memory: %lx - %lx\n", + begin, end); + + /* align begin and end address */ + begin = begin & TARGET_PAGE_MASK; + end = begin + VGA_RAM_SIZE; + end = (end + TARGET_PAGE_SIZE -1 ) & TARGET_PAGE_MASK; + + vram_pointer = kvm_create_phys_mem(kvm_context, begin, end - begin, 1, + 1, 1); + + if (vram_pointer == NULL) { + printf("set_vram_mapping: cannot allocate memory: %m\n"); + return NULL; + } + + memset(vram_pointer, 0, end - begin); + + printf("set_vram_mapping: return %p\n", vram_pointer); + return vram_pointer; +} + +static int unset_vram_mapping(unsigned long begin, unsigned long end) +{ + /* align begin and end address */ + end = begin + VGA_RAM_SIZE; + begin = begin & TARGET_PAGE_MASK; + end = (end + TARGET_PAGE_SIZE -1 ) & TARGET_PAGE_MASK; + + kvm_destroy_phys_mem(kvm_context, begin, end - begin); + + return 0; +} + +#endif + /* Compute the memory access functions */ static void cirrus_update_memory_access(CirrusVGAState *s) { @@ -2538,11 +2595,43 @@ mode = s->gr[0x05] & 0x7; if (mode < 4 || mode > 5 || ((s->gr[0x0B] & 0x4) == 0)) { +#ifdef USE_KVM + if (s->cirrus_lfb_addr && s->cirrus_lfb_end && !s->map_addr) { + void *vram_pointer, *old_vram; + + vram_pointer = set_vram_mapping(s->cirrus_lfb_addr, + s->cirrus_lfb_end); + if (!vram_pointer) + fprintf(stderr, "NULL vram_pointer\n"); + else { + old_vram = vga_update_vram((VGAState *)s, vram_pointer, + VGA_RAM_SIZE); + qemu_free(old_vram); + } + s->map_addr = s->cirrus_lfb_addr; + s->map_end = s->cirrus_lfb_end; + } +#endif s->cirrus_linear_write[0] = cirrus_linear_mem_writeb; s->cirrus_linear_write[1] = cirrus_linear_mem_writew; s->cirrus_linear_write[2] = cirrus_linear_mem_writel; } else { generic_io: +#ifdef USE_KVM + if (s->cirrus_lfb_addr && s->cirrus_lfb_end && s->map_addr) { + int error; + void *old_vram = NULL; + + error = unset_vram_mapping(s->cirrus_lfb_addr, + s->cirrus_lfb_end); + if (!error) + old_vram = vga_update_vram((VGAState *)s, NULL, + VGA_RAM_SIZE); + if (old_vram) + munmap(old_vram, s->map_addr - s->map_end); + s->map_addr = s->map_end = 0; + } +#endif s->cirrus_linear_write[0] = cirrus_linear_writeb; s->cirrus_linear_write[1] = cirrus_linear_writew; s->cirrus_linear_write[2] = cirrus_linear_writel; @@ -2938,6 +3027,11 @@ qemu_put_be32s(f, &s->hw_cursor_y); /* XXX: we do not save the bitblt state - we assume we do not save the state when the blitter is active */ + +#ifdef USE_KVM + qemu_put_be32s(f, &s->real_vram_size); + qemu_put_buffer(f, s->vram_ptr, s->real_vram_size); +#endif } static int cirrus_vga_load(QEMUFile *f, void *opaque, int version_id) @@ -2981,6 +3075,22 @@ qemu_get_be32s(f, &s->hw_cursor_x); qemu_get_be32s(f, &s->hw_cursor_y); +#ifdef USE_KVM + { + int real_vram_size; + qemu_get_be32s(f, &real_vram_size); + if (real_vram_size != s->real_vram_size) { + if (real_vram_size > s->real_vram_size) + real_vram_size = s->real_vram_size; + printf("%s: REAL_VRAM_SIZE MISMATCH !!!!!! SAVED=%d CURRENT=%d", + __FUNCTION__, real_vram_size, s->real_vram_size); + } + qemu_get_buffer(f, s->vram_ptr, real_vram_size); + cirrus_update_memory_access(s); + } +#endif + + /* force refresh */ s->graphic_mode = -1; cirrus_update_bank_ptr(s, 0); @@ -3136,6 +3246,15 @@ /* XXX: add byte swapping apertures */ cpu_register_physical_memory(addr, s->vram_size, s->cirrus_linear_io_addr); +#ifdef USE_KVM + s->cirrus_lfb_addr = addr; + s->cirrus_lfb_end = addr + VGA_RAM_SIZE; + + if (s->map_addr && (s->cirrus_lfb_addr != s->map_addr) && + (s->cirrus_lfb_end != s->map_end)) + printf("cirrus vga map change while on lfb mode\n"); +#endif + cpu_register_physical_memory(addr + 0x1000000, 0x400000, s->cirrus_linear_bitblt_io_addr); } Index: hw/vga_int.h =================================================================== --- hw/vga_int.h (.../qemu-vendor-drops) (revision 3256) +++ hw/vga_int.h (.../release/qemu) (revision 3256) @@ -169,5 +169,6 @@ unsigned int color0, unsigned int color1, unsigned int color_xor); +void *vga_update_vram(VGAState *s, void *vga_ram_base, int vga_ram_size); extern const uint8_t sr_mask[8]; extern const uint8_t gr_mask[16]; Index: hw/pc.c =================================================================== --- hw/pc.c (.../qemu-vendor-drops) (revision 3256) +++ hw/pc.c (.../release/qemu) (revision 3256) @@ -22,6 +22,9 @@ * THE SOFTWARE. */ #include "vl.h" +#ifdef USE_KVM +#include "qemu-kvm.h" +#endif /* output Bochs bios info messages */ //#define DEBUG_BIOS @@ -605,6 +608,10 @@ nb_ne2k++; } +#ifdef USE_KVM +extern kvm_context_t kvm_context; +#endif + /* PC hardware initialisation */ static void pc_init1(int ram_size, int vga_ram_size, int boot_device, DisplayState *ds, const char **fd_filename, int snapshot, @@ -674,6 +681,9 @@ /* setup basic memory access */ cpu_register_physical_memory(0xc0000, 0x10000, vga_bios_offset | IO_MEM_ROM); +#ifdef USE_KVM + memcpy(phys_ram_base + 0xc0000, phys_ram_base + vga_bios_offset, 0x10000); +#endif /* map the last 128KB of the BIOS in ISA space */ isa_bios_size = bios_size; @@ -684,10 +694,25 @@ cpu_register_physical_memory(0x100000 - isa_bios_size, isa_bios_size, (bios_offset + bios_size - isa_bios_size) | IO_MEM_ROM); +#ifdef USE_KVM + memcpy(phys_ram_base + 0x100000 - isa_bios_size, phys_ram_base + (bios_offset + bios_size - isa_bios_size), isa_bios_size); +#endif /* map all the bios at the top of memory */ cpu_register_physical_memory((uint32_t)(-bios_size), bios_size, bios_offset | IO_MEM_ROM); +#ifdef USE_KVM + bios_mem = kvm_create_phys_mem(kvm_context, (uint32_t)(-bios_size), + bios_size, 2, 0, 1); + if (!bios_mem) { + exit(1); + } + memcpy(bios_mem, phys_ram_base + bios_offset, bios_size); + + cpu_register_physical_memory(phys_ram_size - KVM_EXTRA_PAGES * 4096, KVM_EXTRA_PAGES * 4096, + (phys_ram_size - KVM_EXTRA_PAGES * 4096) | IO_MEM_ROM); +#endif + bochs_bios_init(); if (linux_boot) { Index: hw/vga.c =================================================================== --- hw/vga.c (.../qemu-vendor-drops) (revision 3256) +++ hw/vga.c (.../release/qemu) (revision 3256) @@ -1359,6 +1359,22 @@ } } +#ifdef USE_KVM + +#include "kvmctl.h" +extern kvm_context_t kvm_context; + +static int bitmap_get_dirty(unsigned long *bitmap, unsigned nr) +{ + unsigned word = nr / ((sizeof bitmap[0]) * 8); + unsigned bit = nr % ((sizeof bitmap[0]) * 8); + + //printf("%x -> %ld\n", nr, (bitmap[word] >> bit) & 1); + return (bitmap[word] >> bit) & 1; +} + +#endif + /* * graphic modes */ @@ -1371,6 +1387,19 @@ uint32_t v, addr1, addr; vga_draw_line_func *vga_draw_line; +#ifdef USE_KVM + + /* HACK ALERT */ +#define BITMAP_SIZE ((8*1024*1024) / 4096 / 8 / sizeof(long)) + unsigned long bitmap[BITMAP_SIZE]; + + kvm_get_dirty_pages(kvm_context, 1, &bitmap); + +#define cpu_physical_memory_get_dirty(addr, type) \ + (bitmap_get_dirty(bitmap, (addr - s->vram_offset) >> TARGET_PAGE_BITS) \ + | cpu_physical_memory_get_dirty(addr, type)) +#endif + full_update |= update_basic_params(s); s->get_resolution(s, &width, &height); @@ -1722,6 +1751,7 @@ } } +/* when used on xen/kvm environment, the vga_ram_base is not used */ void vga_common_init(VGAState *s, DisplayState *ds, uint8_t *vga_ram_base, unsigned long vga_ram_offset, int vga_ram_size) { @@ -1752,7 +1782,11 @@ vga_reset(s); +#ifndef USE_KVM s->vram_ptr = vga_ram_base; +#else + s->vram_ptr = qemu_malloc(vga_ram_size); +#endif s->vram_offset = vga_ram_offset; s->vram_size = vga_ram_size; s->ds = ds; @@ -1843,6 +1877,7 @@ /* XXX: vga_ram_size must be a power of two */ pci_register_io_region(d, 0, vga_ram_size, PCI_ADDRESS_SPACE_MEM_PREFETCH, vga_map); + printf("vga_bios_size %d\n", vga_bios_size); if (vga_bios_size != 0) { unsigned int bios_total_size; s->bios_offset = vga_bios_offset; @@ -1864,6 +1899,33 @@ return 0; } +void *vga_update_vram(VGAState *s, void *vga_ram_base, int vga_ram_size) +{ + uint8_t *old_pointer; + + printf("vga_update_vram: base %p ptr %p\n", vga_ram_base, s->vram_ptr); + if (s->vram_size != vga_ram_size) { + fprintf(stderr, "No support to change vga_ram_size\n"); + return NULL; + } + + if (!vga_ram_base) { + vga_ram_base = qemu_malloc(vga_ram_size); + if (!vga_ram_base) { + fprintf(stderr, "reallocate error\n"); + return NULL; + } + } + + /* XXX lock needed? */ + memcpy(vga_ram_base, s->vram_ptr, vga_ram_size); + old_pointer = s->vram_ptr; + s->vram_ptr = vga_ram_base; + + printf("vga_update_vram: done\n"); + return old_pointer; +} + /********************************************************/ /* vga screen dump */ Index: cpu-all.h =================================================================== --- cpu-all.h (.../qemu-vendor-drops) (revision 3256) +++ cpu-all.h (.../release/qemu) (revision 3256) @@ -818,6 +818,7 @@ extern int phys_ram_fd; extern uint8_t *phys_ram_base; extern uint8_t *phys_ram_dirty; +extern uint8_t *bios_mem; /* physical memory access */ #define TLB_INVALID_MASK (1 << 3) Index: qemu-kvm.c =================================================================== --- qemu-kvm.c (.../qemu-vendor-drops) (revision 0) +++ qemu-kvm.c (.../release/qemu) (revision 3256) @@ -0,0 +1,476 @@ + +#include "config.h" +#include "config-host.h" + +#ifdef USE_KVM + +#include "exec.h" + +#include "qemu-kvm.h" +#include +#include + +kvm_context_t kvm_context; + +#define NR_CPU 16 +static CPUState *saved_env[NR_CPU]; + +static void load_regs(CPUState *env) +{ + struct kvm_regs regs; + struct kvm_sregs sregs; + + /* hack: save env */ + if (!saved_env[0]) + saved_env[0] = env; + + regs.rax = env->regs[R_EAX]; + regs.rbx = env->regs[R_EBX]; + regs.rcx = env->regs[R_ECX]; + regs.rdx = env->regs[R_EDX]; + regs.rsi = env->regs[R_ESI]; + regs.rdi = env->regs[R_EDI]; + regs.rsp = env->regs[R_ESP]; + regs.rbp = env->regs[R_EBP]; +#ifdef TARGET_X86_64 + regs.r8 = env->regs[8]; + regs.r9 = env->regs[9]; + regs.r10 = env->regs[10]; + regs.r11 = env->regs[11]; + regs.r12 = env->regs[12]; + regs.r13 = env->regs[13]; + regs.r14 = env->regs[14]; + regs.r15 = env->regs[15]; +#endif + + regs.rflags = env->eflags; + regs.rip = env->eip; + + kvm_set_regs(kvm_context, 0, ®s); + +#define set_seg(var, seg, default_s, default_type) \ + do { \ + unsigned flags = env->seg.flags; \ + unsigned valid = flags & ~DESC_P_MASK; \ + sregs.var.selector = env->seg.selector; \ + sregs.var.base = env->seg.base; \ + sregs.var.limit = env->seg.limit; \ + sregs.var.type = valid ? (flags >> DESC_TYPE_SHIFT) & 15 : default_type; \ + sregs.var.present = valid ? (flags & DESC_P_MASK) != 0 : 1; \ + sregs.var.dpl = env->seg.selector & 3; \ + sregs.var.db = valid ? (flags >> DESC_B_SHIFT) & 1 : 0; \ + sregs.var.s = valid ? (flags & DESC_S_MASK) != 0 : default_s; \ + sregs.var.l = valid ? (flags >> DESC_L_SHIFT) & 1 : 0; \ + sregs.var.g = valid ? (flags & DESC_G_MASK) != 0 : 0; \ + sregs.var.avl = (flags & DESC_AVL_MASK) != 0; \ + sregs.var.unusable = 0; \ + } while (0) + + +#define set_v8086_seg(var, seg) \ + do { \ + sregs.var.selector = env->seg.selector; \ + sregs.var.base = env->seg.base; \ + sregs.var.limit = env->seg.limit; \ + sregs.var.type = 3; \ + sregs.var.present = 1; \ + sregs.var.dpl = 3; \ + sregs.var.db = 0; \ + sregs.var.s = 1; \ + sregs.var.l = 0; \ + sregs.var.g = 0; \ + sregs.var.avl = 0; \ + sregs.var.unusable = 0; \ + } while (0) + + + if ((env->eflags & VM_MASK)) { + set_v8086_seg(cs, segs[R_CS]); + set_v8086_seg(ds, segs[R_DS]); + set_v8086_seg(es, segs[R_ES]); + set_v8086_seg(fs, segs[R_FS]); + set_v8086_seg(gs, segs[R_GS]); + set_v8086_seg(ss, segs[R_SS]); + } else { + set_seg(cs, segs[R_CS], 1, 11); + set_seg(ds, segs[R_DS], 1, 3); + set_seg(es, segs[R_ES], 1, 3); + set_seg(fs, segs[R_FS], 1, 3); + set_seg(gs, segs[R_GS], 1, 3); + set_seg(ss, segs[R_SS], 1, 3); + + if (env->cr[0] & CR0_PE_MASK) { + /* force ss cpl to cs cpl */ + sregs.ss.selector = (sregs.ss.selector & ~3) | + (sregs.cs.selector & 3); + sregs.ss.dpl = sregs.ss.selector & 3; + } + } + + set_seg(tr, tr, 0, 3); + set_seg(ldt, ldt, 0, 2); + + sregs.idt.limit = env->idt.limit; + sregs.idt.base = env->idt.base; + sregs.gdt.limit = env->gdt.limit; + sregs.gdt.base = env->gdt.base; + + sregs.cr0 = env->cr[0]; + sregs.cr2 = env->cr[2]; + sregs.cr3 = env->cr[3]; + sregs.cr4 = env->cr[4]; + sregs.cr8 = cpu_get_apic_tpr(env); + sregs.apic_base = cpu_get_apic_base(env); + sregs.efer = env->efer; + + kvm_set_sregs(kvm_context, 0, &sregs); +} + +static void save_regs(CPUState *env) +{ + struct kvm_regs regs; + struct kvm_sregs sregs; + uint32_t hflags; + + kvm_get_regs(kvm_context, 0, ®s); + + env->regs[R_EAX] = regs.rax; + env->regs[R_EBX] = regs.rbx; + env->regs[R_ECX] = regs.rcx; + env->regs[R_EDX] = regs.rdx; + env->regs[R_ESI] = regs.rsi; + env->regs[R_EDI] = regs.rdi; + env->regs[R_ESP] = regs.rsp; + env->regs[R_EBP] = regs.rbp; +#ifdef TARGET_X86_64 + env->regs[8] = regs.r8; + env->regs[9] = regs.r9; + env->regs[10] = regs.r10; + env->regs[11] = regs.r11; + env->regs[12] = regs.r12; + env->regs[13] = regs.r13; + env->regs[14] = regs.r14; + env->regs[15] = regs.r15; +#endif + + env->eflags = regs.rflags; + env->eip = regs.rip; + + kvm_get_sregs(kvm_context, 0, &sregs); + +#define get_seg(var, seg) \ + env->seg.selector = sregs.var.selector; \ + env->seg.base = sregs.var.base; \ + env->seg.limit = sregs.var.limit ; \ + env->seg.flags = \ + (sregs.var.type << DESC_TYPE_SHIFT) \ + | (sregs.var.present * DESC_P_MASK) \ + | (sregs.var.dpl << DESC_DPL_SHIFT) \ + | (sregs.var.db << DESC_B_SHIFT) \ + | (sregs.var.s * DESC_S_MASK) \ + | (sregs.var.l << DESC_L_SHIFT) \ + | (sregs.var.g * DESC_G_MASK) \ + | (sregs.var.avl * DESC_AVL_MASK) + + get_seg(cs, segs[R_CS]); + get_seg(ds, segs[R_DS]); + get_seg(es, segs[R_ES]); + get_seg(fs, segs[R_FS]); + get_seg(gs, segs[R_GS]); + get_seg(ss, segs[R_SS]); + + get_seg(tr, tr); + get_seg(ldt, ldt); + + env->idt.limit = sregs.idt.limit; + env->idt.base = sregs.idt.base; + env->gdt.limit = sregs.gdt.limit; + env->gdt.base = sregs.gdt.base; + + env->cr[0] = sregs.cr0; + env->cr[2] = sregs.cr2; + env->cr[3] = sregs.cr3; + env->cr[4] = sregs.cr4; + + cpu_set_apic_tpr(env, sregs.cr8); + cpu_set_apic_base(env, sregs.apic_base); + + env->efer = sregs.efer; + +#define HFLAG_COPY_MASK ~( \ + HF_CPL_MASK | HF_PE_MASK | HF_MP_MASK | HF_EM_MASK | \ + HF_TS_MASK | HF_TF_MASK | HF_VM_MASK | HF_IOPL_MASK | \ + HF_OSFXSR_MASK | HF_LMA_MASK | HF_CS32_MASK | \ + HF_SS32_MASK | HF_CS64_MASK | HF_ADDSEG_MASK) + + + + hflags = (env->segs[R_CS].flags >> DESC_DPL_SHIFT) & HF_CPL_MASK; + hflags |= (env->cr[0] & CR0_PE_MASK) << (HF_PE_SHIFT - CR0_PE_SHIFT); + hflags |= (env->cr[0] << (HF_MP_SHIFT - CR0_MP_SHIFT)) & + (HF_MP_MASK | HF_EM_MASK | HF_TS_MASK); + hflags |= (env->eflags & (HF_TF_MASK | HF_VM_MASK | HF_IOPL_MASK)); + hflags |= (env->cr[4] & CR4_OSFXSR_MASK) << + (HF_OSFXSR_SHIFT - CR4_OSFXSR_SHIFT); + + if (env->efer & MSR_EFER_LMA) { + hflags |= HF_LMA_MASK; + } + + if ((hflags & HF_LMA_MASK) && (env->segs[R_CS].flags & DESC_L_MASK)) { + hflags |= HF_CS32_MASK | HF_SS32_MASK | HF_CS64_MASK; + } else { + hflags |= (env->segs[R_CS].flags & DESC_B_MASK) >> + (DESC_B_SHIFT - HF_CS32_SHIFT); + hflags |= (env->segs[R_SS].flags & DESC_B_MASK) >> + (DESC_B_SHIFT - HF_SS32_SHIFT); + if (!(env->cr[0] & CR0_PE_MASK) || + (env->eflags & VM_MASK) || + !(hflags & HF_CS32_MASK)) { + hflags |= HF_ADDSEG_MASK; + } else { + hflags |= ((env->segs[R_DS].base | + env->segs[R_ES].base | + env->segs[R_SS].base) != 0) << + HF_ADDSEG_SHIFT; + } + } + env->hflags = (env->hflags & HFLAG_COPY_MASK) | hflags; + CC_SRC = env->eflags & (CC_O | CC_S | CC_Z | CC_A | CC_P | CC_C); + DF = 1 - (2 * ((env->eflags >> 10) & 1)); + CC_OP = CC_OP_EFLAGS; + env->eflags &= ~(DF_MASK | CC_O | CC_S | CC_Z | CC_A | CC_P | CC_C); + + tlb_flush(env, 1); + + env->kvm_pending_int = sregs.pending_int; +} + + +#include + +static inline void push_interrupts(CPUState *env) +{ + if (!(env->interrupt_request & CPU_INTERRUPT_HARD) || + !(env->eflags & IF_MASK) || env->kvm_pending_int) { + if ((env->interrupt_request & CPU_INTERRUPT_EXIT)) { + env->interrupt_request &= ~CPU_INTERRUPT_EXIT; + env->exception_index = EXCP_INTERRUPT; + cpu_loop_exit(); + } + return; + } + + do { + env->interrupt_request &= ~CPU_INTERRUPT_HARD; + + // for now using cpu 0 + kvm_inject_irq(kvm_context, 0, cpu_get_pic_interrupt(env)); + } while ( (env->interrupt_request & CPU_INTERRUPT_HARD) && (env->cr[2] & CR0_PG_MASK) ); +} + +void kvm_load_registers(CPUState *env) +{ + load_regs(env); +} + +int kvm_cpu_exec(CPUState *env) +{ + + push_interrupts(env); + + if (!saved_env[0]) + saved_env[0] = env; + + kvm_run(kvm_context, 0); + + save_regs(env); + + return 0; +} + + +static int kvm_cpuid(void *opaque, uint64_t *rax, uint64_t *rbx, + uint64_t *rcx, uint64_t *rdx) +{ + CPUState **envs = opaque; + CPUState *saved_env; + + saved_env = env; + env = envs[0]; + + env->regs[R_EAX] = *rax; + env->regs[R_EBX] = *rbx; + env->regs[R_ECX] = *rcx; + env->regs[R_EDX] = *rdx; + helper_cpuid(); + *rdx = env->regs[R_EDX]; + *rcx = env->regs[R_ECX]; + *rbx = env->regs[R_EBX]; + *rax = env->regs[R_EAX]; + env = saved_env; + return 0; +} + +static int kvm_debug(void *opaque, int vcpu) +{ + CPUState **envs = opaque; + + env = envs[0]; + save_regs(env); + env->exception_index = EXCP_DEBUG; + return 1; +} + +static int kvm_inb(void *opaque, uint16_t addr, uint8_t *data) +{ + *data = cpu_inb(0, addr); + return 0; +} + +static int kvm_inw(void *opaque, uint16_t addr, uint16_t *data) +{ + *data = cpu_inw(0, addr); + return 0; +} + +static int kvm_inl(void *opaque, uint16_t addr, uint32_t *data) +{ + *data = cpu_inl(0, addr); + return 0; +} + +static int kvm_outb(void *opaque, uint16_t addr, uint8_t data) +{ + cpu_outb(0, addr, data); + return 0; +} + +static int kvm_outw(void *opaque, uint16_t addr, uint16_t data) +{ + cpu_outw(0, addr, data); + return 0; +} + +static int kvm_outl(void *opaque, uint16_t addr, uint32_t data) +{ + cpu_outl(0, addr, data); + return 0; +} + +static int kvm_readb(void *opaque, uint64_t addr, uint8_t *data) +{ + *data = ldub_phys(addr); + return 0; +} + +static int kvm_readw(void *opaque, uint64_t addr, uint16_t *data) +{ + *data = lduw_phys(addr); + return 0; +} + +static int kvm_readl(void *opaque, uint64_t addr, uint32_t *data) +{ + *data = ldl_phys(addr); + return 0; +} + +static int kvm_readq(void *opaque, uint64_t addr, uint64_t *data) +{ + *data = ldq_phys(addr); + return 0; +} + +static int kvm_writeb(void *opaque, uint64_t addr, uint8_t data) +{ + stb_phys(addr, data); + return 0; +} + +static int kvm_writew(void *opaque, uint64_t addr, uint16_t data) +{ + stw_phys(addr, data); + return 0; +} + +static int kvm_writel(void *opaque, uint64_t addr, uint32_t data) +{ + stl_phys(addr, data); + return 0; +} + +static int kvm_writeq(void *opaque, uint64_t addr, uint64_t data) +{ + stq_phys(addr, data); + return 0; +} + +static int kvm_io_window(void *opaque) +{ + return 1; +} + + +static int kvm_halt(void *opaque, int vcpu) +{ + CPUState **envs = opaque, *env; + + env = envs[0]; + save_regs(env); + + if (!((env->kvm_pending_int || + (env->interrupt_request & CPU_INTERRUPT_HARD)) && + (env->eflags & IF_MASK))) { + env->hflags |= HF_HALTED_MASK; + env->exception_index = EXCP_HLT; + } + return 1; +} + +static struct kvm_callbacks qemu_kvm_ops = { + .cpuid = kvm_cpuid, + .debug = kvm_debug, + .inb = kvm_inb, + .inw = kvm_inw, + .inl = kvm_inl, + .outb = kvm_outb, + .outw = kvm_outw, + .outl = kvm_outl, + .readb = kvm_readb, + .readw = kvm_readw, + .readl = kvm_readl, + .readq = kvm_readq, + .writeb = kvm_writeb, + .writew = kvm_writew, + .writel = kvm_writel, + .writeq = kvm_writeq, + .halt = kvm_halt, + .io_window = kvm_io_window, +}; + +void kvm_qemu_init() +{ + kvm_context = kvm_init(&qemu_kvm_ops, saved_env); + kvm_create(kvm_context, phys_ram_size, (void**)&phys_ram_base); +} + +int kvm_update_debugger(CPUState *env) +{ + struct kvm_debug_guest dbg; + int i; + + dbg.enabled = 0; + if (env->nb_breakpoints || env->singlestep_enabled) { + dbg.enabled = 1; + for (i = 0; i < 4 && i < env->nb_breakpoints; ++i) { + dbg.breakpoints[i].enabled = 1; + dbg.breakpoints[i].address = env->breakpoints[i]; + } + dbg.singlestep = env->singlestep_enabled; + } + return kvm_guest_debug(kvm_context, 0, &dbg); +} + + +#endif Index: qemu-kvm.h =================================================================== --- qemu-kvm.h (.../qemu-vendor-drops) (revision 0) +++ qemu-kvm.h (.../release/qemu) (revision 3256) @@ -0,0 +1,11 @@ +#ifndef QEMU_KVM_H +#define QEMU_KVM_H + +#include "kvmctl.h" + +void kvm_qemu_init(void); +void kvm_load_registers(CPUState *env); +int kvm_cpu_exec(CPUState *env); +int kvm_update_debugger(CPUState *env); + +#endif Index: vl.c =================================================================== --- vl.c (.../qemu-vendor-drops) (revision 3256) +++ vl.c (.../release/qemu) (revision 3256) @@ -87,6 +87,10 @@ #include "exec-all.h" +#if USE_KVM +#include "qemu-kvm.h" +#endif + #define DEFAULT_NETWORK_SCRIPT "/etc/qemu-ifup" //#define DEBUG_UNUSED_IOPORT @@ -4587,6 +4593,9 @@ /* XXX: compute hflags from scratch, except for CPL and IIF */ env->hflags = hflags; tlb_flush(env, 1); +#ifdef USE_KVM + kvm_load_registers(env); +#endif return 0; } @@ -4751,6 +4760,10 @@ int i; qemu_put_be32(f, phys_ram_size); for(i = 0; i < phys_ram_size; i+= TARGET_PAGE_SIZE) { +#ifdef USE_KVM + if ((i>=0xa0000) && (i<0xc0000)) /* do not access video-addresses */ + continue; +#endif ram_put_page(f, phys_ram_base + i, TARGET_PAGE_SIZE); } } @@ -4764,6 +4777,10 @@ if (qemu_get_be32(f) != phys_ram_size) return -EINVAL; for(i = 0; i < phys_ram_size; i+= TARGET_PAGE_SIZE) { +#ifdef USE_KVM + if ((i>=0xa0000) && (i<0xc0000)) /* do not access video-addresses */ + continue; +#endif ret = ram_get_page(f, phys_ram_base + i, TARGET_PAGE_SIZE); if (ret) return ret; @@ -6070,13 +6087,17 @@ } /* init the memory */ +#if USE_KVM + phys_ram_size = ram_size + vga_ram_size + bios_size + KVM_EXTRA_PAGES * 4096; + kvm_qemu_init(); +#else phys_ram_size = ram_size + vga_ram_size + bios_size; - phys_ram_base = qemu_vmalloc(phys_ram_size); if (!phys_ram_base) { fprintf(stderr, "Could not allocate physical memory\n"); exit(1); } +#endif /* we always create the cdrom drive, even if no disk is there */ bdrv_init(); Index: vl.h =================================================================== --- vl.h (.../qemu-vendor-drops) (revision 3256) +++ vl.h (.../release/qemu) (revision 3256) @@ -161,6 +161,10 @@ #define BIOS_SIZE ((256 + 64) * 1024) #endif +#if USE_KVM +#define KVM_EXTRA_PAGES 3 +#endif + /* keyboard/mouse support */ #define MOUSE_EVENT_LBUTTON 0x01