/* * Userspace Probes (UProbes) * arch/i386/kernel/uprobes_i386.c * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see . * * Copyright (C) IBM Corporation, 2006 */ /* * In versions of uprobes built in the SystemTap runtime, this file * is #included at the end of uprobes.c. */ #include /* Adapted from arch/x86_64/kprobes.c */ #undef W #define W(row,b0,b1,b2,b3,b4,b5,b6,b7,b8,b9,ba,bb,bc,bd,be,bf) \ (((b0##UL << 0x0)|(b1##UL << 0x1)|(b2##UL << 0x2)|(b3##UL << 0x3) | \ (b4##UL << 0x4)|(b5##UL << 0x5)|(b6##UL << 0x6)|(b7##UL << 0x7) | \ (b8##UL << 0x8)|(b9##UL << 0x9)|(ba##UL << 0xa)|(bb##UL << 0xb) | \ (bc##UL << 0xc)|(bd##UL << 0xd)|(be##UL << 0xe)|(bf##UL << 0xf)) \ << (row % 32)) static const volatile unsigned long good_insns[256 / 32] = { /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */ /* ------------------------------- */ W(0x00, 1,1,1,1,1,1,1,0,1,1,1,1,1,1,1,0)| /* 00 */ W(0x10, 1,1,1,1,1,1,1,0,1,1,1,1,1,1,1,0), /* 10 */ W(0x20, 1,1,1,1,1,1,0,1,1,1,1,1,1,1,0,1)| /* 20 */ W(0x30, 1,1,1,1,1,1,0,1,1,1,1,1,1,1,0,1), /* 30 */ W(0x40, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1)| /* 40 */ W(0x50, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1), /* 50 */ W(0x60, 1,1,1,0,1,1,0,0,1,1,1,1,0,0,0,0)| /* 60 */ W(0x70, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1), /* 70 */ W(0x80, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1)| /* 80 */ W(0x90, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1), /* 90 */ W(0xa0, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1)| /* a0 */ W(0xb0, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1), /* b0 */ W(0xc0, 1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0)| /* c0 */ W(0xd0, 1,1,1,1,1,1,0,1,1,1,1,1,1,1,1,1), /* d0 */ W(0xe0, 1,1,1,1,0,0,0,0,1,1,1,1,0,0,0,0)| /* e0 */ W(0xf0, 0,0,1,1,0,1,1,1,1,1,0,0,1,1,1,1) /* f0 */ /* ------------------------------- */ /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */ }; static const volatile unsigned long good_2byte_insns[256 / 32] = { /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */ /* ------------------------------- */ W(0x00, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1)| /* 00 */ W(0x10, 1,1,1,1,1,1,1,1,0,1,1,1,1,1,1,1), /* 10 */ W(0x20, 1,1,1,1,0,0,0,0,1,1,1,1,1,1,1,1)| /* 20 */ W(0x30, 0,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0), /* 30 */ W(0x40, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1)| /* 40 */ W(0x50, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1), /* 50 */ W(0x60, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1)| /* 60 */ W(0x70, 1,1,1,1,1,1,1,1,0,0,0,0,0,0,1,1), /* 70 */ W(0x80, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1)| /* 80 */ W(0x90, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1), /* 90 */ W(0xa0, 1,1,1,1,1,1,0,0,1,1,1,1,1,1,0,1)| /* a0 */ W(0xb0, 1,1,1,1,1,1,1,1,0,1,1,1,1,1,1,1), /* b0 */ W(0xc0, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1)| /* c0 */ W(0xd0, 0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1), /* d0 */ W(0xe0, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1)| /* e0 */ W(0xf0, 0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0) /* f0 */ /* ------------------------------- */ /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */ }; /* * TODO: * - Where necessary, examine the modrm byte and allow only valid instructions * in the different Groups and fpu instructions. * - Note: If we go past the first byte, do we need to verify that * subsequent bytes were actually there, rather than off the last page? * Probably overkill. We don't verify that they specified the first byte * of the instruction, either. * - Be clearer about which instructions we'll never probe. */ /* * opcodes we'll probably never support: * 63 - arpl * 6c-6d, e4-e5, ec-ed - in * 6e-6f, e6-e7, ee-ef - out * cc, cd - int3, int * cf - iret * d6 - illegal instruction * f1 - int1/icebp * f4 - hlt * fa, fb - cli, sti * * opcodes we may need to refine support for: * 66 - data16 prefix * 8f - Group 1 - only reg = 0 is OK * c6-c7 - Group 11 - only reg = 0 is OK * d9-df - fpu insns with some illegal encodings * f2, f3 - repnz, repz prefixes. These are also the first byte for * certain floating-point instructions, such as addsd. * fe - Group 4 - only reg = 0 or 1 is OK * ff - Group 5 - only reg = 0-6 is OK * * others -- Do we need to support these? * 07, 17, 1f - pop es, pop ss, pop ds * 26, 2e, 36, 3e, - es:, cs:, ss:, ds: segment prefixes -- * but 64 and 65 (fs: and gs:) seems to be used, so we support them. * 67 - addr16 prefix * ce - into * f0 - lock prefix */ /* * Return 1 if this is a legacy instruction prefix we support, -1 if * it's one we don't support, or 0 if it's not a prefix at all. */ static inline int check_legacy_prefix(u8 byte) { switch (byte) { case 0x26: case 0x2e: case 0x36: case 0x3e: case 0xf0: return -1; case 0x64: case 0x65: case 0x66: case 0x67: case 0xf2: case 0xf3: return 1; default: return 0; } } static void report_bad_1byte_opcode(uprobe_opcode_t op) { printk(KERN_ERR "uprobes does not currently support probing " "instructions whose first byte is 0x%2.2x\n", op); } static void report_bad_2byte_opcode(uprobe_opcode_t op) { printk(KERN_ERR "uprobes does not currently support probing " "instructions with the 2-byte opcode 0x0f 0x%2.2x\n", op); } static void report_bad_opcode_prefix(uprobe_opcode_t op, uprobe_opcode_t prefix) { printk(KERN_ERR "uprobes does not currently support probing " "instructions whose first byte is 0x%2.2x " "with a prefix 0x%2.2x\n", op, prefix); } /* Figure out how uprobe_post_ssout should perform ip fixup. */ static int setup_uprobe_post_ssout(struct uprobe_probept *ppt, uprobe_opcode_t *insn) { /* * Some of these require special treatment, but we don't know what to * do with arbitrary prefixes, so we refuse to probe them. */ int prefix_ok = 0; switch (*insn) { case 0xc3: /* ret */ if ((insn - ppt->insn == 1) && (*ppt->insn == 0xf3 || *ppt->insn == 0xf2)) /* * "rep ret" is an AMD kludge that's used by GCC, * so we need to treat it like a normal ret. */ prefix_ok = 1; case 0xcb: /* more ret/lret */ case 0xc2: case 0xca: /* eip is correct */ ppt->arch_info.flags |= UPFIX_ABS_IP; break; case 0xe8: /* call relative - Fix return addr */ ppt->arch_info.flags |= UPFIX_RETURN; break; case 0x9a: /* call absolute - Fix return addr */ ppt->arch_info.flags |= UPFIX_RETURN | UPFIX_ABS_IP; break; case 0xff: if ((insn[1] & 0x30) == 0x10) { /* call absolute, indirect */ /* Fix return addr; eip is correct. */ ppt->arch_info.flags |= UPFIX_ABS_IP | UPFIX_RETURN; } else if ((insn[1] & 0x31) == 0x20 || /* jmp near, absolute indirect */ (insn[1] & 0x31) == 0x21) { /* jmp far, absolute indirect */ /* eip is correct. */ ppt->arch_info.flags |= UPFIX_ABS_IP; } break; case 0xea: /* jmp absolute -- eip is correct */ ppt->arch_info.flags |= UPFIX_ABS_IP; break; default: /* Assuming that normal ip-fixup is ok for other prefixed opcodes. */ prefix_ok = 1; break; } if (!prefix_ok && insn != ppt->insn) { report_bad_opcode_prefix(*insn, *ppt->insn); return -EPERM; } return 0; } static int arch_validate_probed_insn(struct uprobe_probept *ppt, struct task_struct *tsk) { uprobe_opcode_t *insn = ppt->insn; int pfx, ret; ppt->arch_info.flags = 0x0; /* Skip good instruction prefixes; reject "bad" ones. */ while ((pfx = check_legacy_prefix(insn[0])) == 1) insn++; if (pfx < 0) { report_bad_1byte_opcode(insn[0]); return -EPERM; } if ((ret = setup_uprobe_post_ssout(ppt, insn)) != 0) return ret; if (test_bit(insn[0], good_insns)) return 0; if (insn[0] == 0x0f) { if (test_bit(insn[1], good_2byte_insns)) return 0; report_bad_2byte_opcode(insn[1]); } else report_bad_1byte_opcode(insn[0]); return -EPERM; } /* * Get an instruction slot from the process's SSOL area, containing the * instruction at ppt's probepoint. Point the eip at that slot, in * preparation for single-stepping out of line. */ static void uprobe_pre_ssout(struct uprobe_task *utask, struct uprobe_probept *ppt, struct pt_regs *regs) { struct uprobe_ssol_slot *slot; slot = uprobe_get_insn_slot(ppt); if (!slot) { utask->doomed = 1; return; } regs->eip = (long)slot->insn; utask->singlestep_addr = regs->eip; } /* * Called by uprobe_post_ssout() to adjust the return address * pushed by a call instruction executed out-of-line. */ static void adjust_ret_addr(long esp, long correction, struct uprobe_task *utask) { int nleft; long ra; nleft = copy_from_user(&ra, (const void __user *) esp, 4); if (unlikely(nleft != 0)) goto fail; ra += correction; nleft = copy_to_user((void __user *) esp, &ra, 4); if (unlikely(nleft != 0)) goto fail; return; fail: printk(KERN_ERR "uprobes: Failed to adjust return address after" " single-stepping call instruction;" " pid=%d, esp=%#lx\n", current->pid, esp); utask->doomed = 1; } /* * Called after single-stepping. ppt->vaddr is the address of the * instruction whose first byte has been replaced by the "int3" * instruction. To avoid the SMP problems that can occur when we * temporarily put back the original opcode to single-step, we * single-stepped a copy of the instruction. The address of this * copy is utask->singlestep_addr. * * This function prepares to return from the post-single-step * interrupt. We have to fix up the stack as follows: * * 0) Typically, the new eip is relative to the copied instruction. We * need to make it relative to the original instruction. Exceptions are * return instructions and absolute or indirect jump or call instructions. * * 1) If the single-stepped instruction was a call, the return address * that is atop the stack is the address following the copied instruction. * We need to make it the address following the original instruction. */ static void uprobe_post_ssout(struct uprobe_task *utask, struct uprobe_probept *ppt, struct pt_regs *regs) { long copy_eip = utask->singlestep_addr; long orig_eip = ppt->vaddr; unsigned long flags = ppt->arch_info.flags; up_read(&ppt->slot->rwsem); if (flags & UPFIX_RETURN) adjust_ret_addr(regs->esp, (orig_eip - copy_eip), utask); if (!(flags & UPFIX_ABS_IP)) regs->eip = orig_eip + (regs->eip - copy_eip); } /* * Replace the return address with the trampoline address. Returns * the original return address. */ static unsigned long arch_hijack_uret_addr(unsigned long trampoline_address, struct pt_regs *regs, struct uprobe_task *utask) { int nleft; unsigned long orig_ret_addr; #define RASIZE (sizeof(unsigned long)) nleft = copy_from_user(&orig_ret_addr, (const void __user *)regs->esp, RASIZE); if (unlikely(nleft != 0)) return 0; if (orig_ret_addr == trampoline_address) /* * There's another uretprobe on this function, and it was * processed first, so the return address has already * been hijacked. */ return orig_ret_addr; nleft = copy_to_user((void __user *)regs->esp, &trampoline_address, RASIZE); if (unlikely(nleft != 0)) { if (nleft != RASIZE) { printk(KERN_ERR "uretprobe_entry_handler: " "return address partially clobbered -- " "pid=%d, %%esp=%#lx, %%eip=%#lx\n", current->pid, regs->esp, regs->eip); utask->doomed = 1; } /* else nothing written, so no harm */ return 0; } return orig_ret_addr; } /* * On x86_32, if a function returns a struct or union, the return * value is copied into an area created by the caller. The address * of this area is passed on the stack as a "hidden" first argument. * When such a function returns, it uses a "ret $4" instruction to pop * not only the return address but also the hidden arg. To accommodate * such functions, we add 4 bytes of slop when predicting the return * address. See PR #10078. */ #define STRUCT_RETURN_SLOP 4 static unsigned long arch_predict_sp_at_ret(struct pt_regs *regs, struct task_struct *tsk) { return (unsigned long) (regs->esp + 4 + STRUCT_RETURN_SLOP); } /* Check if instruction is nop and return true. */ static int uprobe_emulate_insn(struct pt_regs *regs, struct uprobe_probept *ppt) { uprobe_opcode_t *insn = ppt->insn; if (insn[0] == 0x90) /* regs->ip already points to the insn after the nop/int3. */ return 1; /* TODO: add multibyte nop instructions */ /* For multibyte nop instructions, we need to set ip accordingly. */ return 0; }