/* * x86 instruction analysis * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. * * Copyright (C) IBM Corporation, 2002, 2004, 2009 */ #ifdef __KERNEL__ #include #include #include #include #else #include #include "insn.h" #endif #define get_next(t, insn) \ ({t r; r = *(t*)insn->next_byte; insn->next_byte += sizeof(t); r; }) #define peek_next(t, insn) \ ({t r; r = *(t*)insn->next_byte; r; }) /** * insn_init() - initialize struct insn * @insn: &struct insn to be initialized * @kaddr: address (in kernel memory) of instruction (or copy thereof) * @x86_64: true for 64-bit kernel or 64-bit app */ void insn_init(struct insn *insn, const u8 *kaddr, bool x86_64) { memset(insn, 0, sizeof(*insn)); insn->kaddr = kaddr; insn->next_byte = kaddr; insn->x86_64 = x86_64; insn->opnd_bytes = 4; if (x86_64) insn->addr_bytes = 8; else insn->addr_bytes = 4; } EXPORT_SYMBOL_GPL(insn_init); /** * insn_get_prefixes - scan x86 instruction prefix bytes * @insn: &struct insn containing instruction * * Populates the @insn->prefixes bitmap, and updates @insn->next_byte * to point to the (first) opcode. No effect if @insn->prefixes.got * is already true. */ void insn_get_prefixes(struct insn *insn) { struct insn_field *prefixes = &insn->prefixes; insn_attr_t attr; u8 b; if (prefixes->got) return; prefixes->nbytes = 0; while (prefixes->nbytes < 4) { b = peek_next(u8, insn); attr = inat_get_opcode_attribute(b); if (!INAT_IS_PREFIX(attr)) break; prefixes->bytes[prefixes->nbytes] = b; prefixes->nbytes++; insn->next_byte++; if (INAT_IS_ADDRSZ(attr)) { /* address size switches 2/4 or 4/8 */ if (insn->x86_64) insn->addr_bytes ^= 12; else insn->addr_bytes ^= 6; } else if (INAT_IS_OPNDSZ(attr)) { /* oprand size switches 2/4 */ insn->opnd_bytes ^= 6; } } if (insn->x86_64) { b = peek_next(u8, insn); attr = inat_get_opcode_attribute(b); if (INAT_IS_REX_PREFIX(attr)) { insn->rex_prefix.value = b; insn->rex_prefix.nbytes = 1; insn->rex_prefix.got = true; insn->next_byte++; if (REX_W(insn)) /* REX.W overrides opnd_size */ insn->opnd_bytes = 8; } } prefixes->got = true; return; } EXPORT_SYMBOL_GPL(insn_get_prefixes); /** * insn_get_opcode - collect opcode(s) * @insn: &struct insn containing instruction * * Populates @insn->opcode, updates @insn->next_byte to point past the * opcode byte(s), and set @insn->attr (except for groups). * If necessary, first collects any preceding (prefix) bytes. * Sets @insn->opcode.value = opcode1. No effect if @insn->opcode.got * is already true. * */ void insn_get_opcode(struct insn *insn) { struct insn_field *opcode = &insn->opcode; u8 op, pfx; if (opcode->got) return; if (!insn->prefixes.got) insn_get_prefixes(insn); /* Get first opcode */ op = get_next(u8, insn); OPCODE1(insn) = op; opcode->nbytes = 1; insn->attr = inat_get_opcode_attribute(op); while (INAT_IS_ESCAPE(insn->attr)) { /* Get escaped opcode */ op = get_next(u8, insn); opcode->bytes[opcode->nbytes++] = op; pfx = insn_last_prefix(insn); insn->attr = inat_get_escape_attribute(op, pfx, insn->attr); } opcode->got = true; } EXPORT_SYMBOL_GPL(insn_get_opcode); /** * insn_get_modrm - collect ModRM byte, if any * @insn: &struct insn containing instruction * * Populates @insn->modrm and updates @insn->next_byte to point past the * ModRM byte, if any. If necessary, first collects the preceding bytes * (prefixes and opcode(s)). No effect if @insn->modrm.got is already true. */ void insn_get_modrm(struct insn *insn) { struct insn_field *modrm = &insn->modrm; u8 pfx, mod; if (modrm->got) return; if (!insn->opcode.got) insn_get_opcode(insn); if (INAT_HAS_MODRM(insn->attr)) { mod = get_next(u8, insn); modrm->value = mod; modrm->nbytes = 1; if (INAT_IS_GROUP(insn->attr)) { pfx = insn_last_prefix(insn); insn->attr = inat_get_group_attribute(mod, pfx, insn->attr); } } if (insn->x86_64 && INAT_IS_FORCE64(insn->attr)) insn->opnd_bytes = 8; modrm->got = true; } EXPORT_SYMBOL_GPL(insn_get_modrm); /** * insn_rip_relative() - Does instruction use RIP-relative addressing mode? * @insn: &struct insn containing instruction * * If necessary, first collects the instruction up to and including the * ModRM byte. No effect if @insn->x86_64 is false. */ bool insn_rip_relative(struct insn *insn) { struct insn_field *modrm = &insn->modrm; if (!insn->x86_64) return false; if (!modrm->got) insn_get_modrm(insn); /* * For rip-relative instructions, the mod field (top 2 bits) * is zero and the r/m field (bottom 3 bits) is 0x5. */ return (insn_field_exists(modrm) && (modrm->value & 0xc7) == 0x5); } EXPORT_SYMBOL_GPL(insn_rip_relative); /** * * insn_get_sib() - Get the SIB byte of instruction * @insn: &struct insn containing instruction * * If necessary, first collects the instruction up to and including the * ModRM byte. */ void insn_get_sib(struct insn *insn) { if (insn->sib.got) return; if (!insn->modrm.got) insn_get_modrm(insn); if (insn->modrm.nbytes) if (insn->addr_bytes != 2 && MODRM_MOD(insn) != 3 && MODRM_RM(insn) == 4) { insn->sib.value = get_next(u8, insn); insn->sib.nbytes = 1; } insn->sib.got = true; } EXPORT_SYMBOL_GPL(insn_get_sib); /** * * insn_get_displacement() - Get the displacement of instruction * @insn: &struct insn containing instruction * * If necessary, first collects the instruction up to and including the * SIB byte. * Displacement value is sign-expanded. */ void insn_get_displacement(struct insn *insn) { u8 mod; if (insn->displacement.got) return; if (!insn->sib.got) insn_get_sib(insn); if (insn->modrm.nbytes) { /* * Interpreting the modrm byte: * mod = 00 - no displacement fields (exceptions below) * mod = 01 - 1-byte displacement field * mod = 10 - displacement field is 4 bytes, or 2 bytes if * address size = 2 (0x67 prefix in 32-bit mode) * mod = 11 - no memory operand * * If address size = 2... * mod = 00, r/m = 110 - displacement field is 2 bytes * * If address size != 2... * mod != 11, r/m = 100 - SIB byte exists * mod = 00, SIB base = 101 - displacement field is 4 bytes * mod = 00, r/m = 101 - rip-relative addressing, displacement * field is 4 bytes */ mod = MODRM_MOD(insn); if (mod == 3) goto out; if (mod == 1) { insn->displacement.value = get_next(s8, insn); insn->displacement.nbytes = 1; } else if (insn->addr_bytes == 2) { if ((mod == 0 && MODRM_RM(insn) == 6) || mod == 2) { insn->displacement.value = get_next(s16, insn); insn->displacement.nbytes = 2; } } else { if ((mod == 0 && MODRM_RM(insn) == 5) || mod == 2 || (mod == 0 && SIB_BASE(insn) == 5)) { insn->displacement.value = get_next(s32, insn); insn->displacement.nbytes = 4; } } } out: insn->displacement.got = true; } EXPORT_SYMBOL_GPL(insn_get_displacement); /* Decode moffset16/32/64 */ static void __get_moffset(struct insn *insn) { switch (insn->addr_bytes) { case 2: insn->moffset1.value = get_next(s16, insn); insn->moffset1.nbytes = 2; break; case 4: insn->moffset1.value = get_next(s32, insn); insn->moffset1.nbytes = 4; break; case 8: insn->moffset1.value = get_next(s32, insn); insn->moffset1.nbytes = 4; insn->moffset2.value = get_next(s32, insn); insn->moffset2.nbytes = 4; break; } insn->moffset1.got = insn->moffset2.got = true; } /* Decode imm v32(Iz) */ static void __get_immv32(struct insn *insn) { switch (insn->opnd_bytes) { case 2: insn->immediate.value = get_next(s16, insn); insn->immediate.nbytes = 2; break; case 4: case 8: insn->immediate.value = get_next(s32, insn); insn->immediate.nbytes = 4; break; } } /* Decode imm v64(Iv/Ov) */ static void __get_immv(struct insn *insn) { switch (insn->opnd_bytes) { case 2: insn->immediate1.value = get_next(s16, insn); insn->immediate1.nbytes = 2; break; case 4: insn->immediate1.value = get_next(s32, insn); insn->immediate1.nbytes = 4; break; case 8: insn->immediate1.value = get_next(s32, insn); insn->immediate1.nbytes = 4; insn->immediate2.value = get_next(s32, insn); insn->immediate2.nbytes = 4; break; } insn->immediate1.got = insn->immediate2.got = true; } /* Decode ptr16:16/32(Ap) */ static void __get_immptr(struct insn *insn) { switch (insn->opnd_bytes) { case 2: insn->immediate1.value = get_next(s16, insn); insn->immediate1.nbytes = 2; break; case 4: insn->immediate1.value = get_next(s32, insn); insn->immediate1.nbytes = 4; break; case 8: /* ptr16:64 is not supported (no segment) */ WARN_ON(1); return; } insn->immediate2.value = get_next(u16, insn); insn->immediate2.nbytes = 2; insn->immediate1.got = insn->immediate2.got = true; } /** * * insn_get_immediate() - Get the immediates of instruction * @insn: &struct insn containing instruction * * If necessary, first collects the instruction up to and including the * displacement bytes. * Basically, most of immediates are sign-expanded. Unsigned-value can be * get by bit masking with ((1 << (nbytes * 8)) - 1) */ void insn_get_immediate(struct insn *insn) { if (insn->immediate.got) return; if (!insn->displacement.got) insn_get_displacement(insn); if (INAT_HAS_MOFFSET(insn->attr)) { __get_moffset(insn); goto done; } if (!INAT_HAS_IMM(insn->attr)) /* no immediates */ goto done; switch (INAT_IMM_SIZE(insn->attr)) { case INAT_IMM_BYTE: insn->immediate.value = get_next(s8, insn); insn->immediate.nbytes = 1; break; case INAT_IMM_WORD: insn->immediate.value = get_next(s16, insn); insn->immediate.nbytes = 2; break; case INAT_IMM_DWORD: insn->immediate.value = get_next(s32, insn); insn->immediate.nbytes = 4; break; case INAT_IMM_QWORD: insn->immediate1.value = get_next(s32, insn); insn->immediate1.nbytes = 4; insn->immediate2.value = get_next(s32, insn); insn->immediate2.nbytes = 4; break; case INAT_IMM_PTR: __get_immptr(insn); break; case INAT_IMM_VWORD32: __get_immv32(insn); break; case INAT_IMM_VWORD: __get_immv(insn); break; default: break; } if (INAT_HAS_ADDIMM(insn->attr)) { insn->immediate2.value = get_next(s8, insn); insn->immediate2.nbytes = 1; } done: insn->immediate.got = true; } EXPORT_SYMBOL_GPL(insn_get_immediate); /** * * insn_get_length() - Get the length of instruction * @insn: &struct insn containing instruction * * If necessary, first collects the instruction up to and including the * immediates bytes. */ void insn_get_length(struct insn *insn) { if (insn->length) return; if (!insn->immediate.got) insn_get_immediate(insn); insn->length = (u8)((unsigned long)insn->next_byte - (unsigned long)insn->kaddr); } EXPORT_SYMBOL_GPL(insn_get_length);