/*
* Userspace Probes (UProbes)
* arch/i386/kernel/uprobes_i386.c
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see .
*
* Copyright (C) IBM Corporation, 2006
*/
/*
* In versions of uprobes built in the SystemTap runtime, this file
* is #included at the end of uprobes.c.
*/
#include
/* Adapted from arch/x86_64/kprobes.c */
#undef W
#define W(row,b0,b1,b2,b3,b4,b5,b6,b7,b8,b9,ba,bb,bc,bd,be,bf) \
(((b0##UL << 0x0)|(b1##UL << 0x1)|(b2##UL << 0x2)|(b3##UL << 0x3) | \
(b4##UL << 0x4)|(b5##UL << 0x5)|(b6##UL << 0x6)|(b7##UL << 0x7) | \
(b8##UL << 0x8)|(b9##UL << 0x9)|(ba##UL << 0xa)|(bb##UL << 0xb) | \
(bc##UL << 0xc)|(bd##UL << 0xd)|(be##UL << 0xe)|(bf##UL << 0xf)) \
<< (row % 32))
static const volatile unsigned long good_insns[256 / 32] = {
/* 0 1 2 3 4 5 6 7 8 9 a b c d e f */
/* ------------------------------- */
W(0x00, 1,1,1,1,1,1,1,0,1,1,1,1,1,1,1,0)| /* 00 */
W(0x10, 1,1,1,1,1,1,1,0,1,1,1,1,1,1,1,0), /* 10 */
W(0x20, 1,1,1,1,1,1,0,1,1,1,1,1,1,1,0,1)| /* 20 */
W(0x30, 1,1,1,1,1,1,0,1,1,1,1,1,1,1,0,1), /* 30 */
W(0x40, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1)| /* 40 */
W(0x50, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1), /* 50 */
W(0x60, 1,1,1,0,1,1,0,0,1,1,1,1,0,0,0,0)| /* 60 */
W(0x70, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1), /* 70 */
W(0x80, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1)| /* 80 */
W(0x90, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1), /* 90 */
W(0xa0, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1)| /* a0 */
W(0xb0, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1), /* b0 */
W(0xc0, 1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0)| /* c0 */
W(0xd0, 1,1,1,1,1,1,0,1,1,1,1,1,1,1,1,1), /* d0 */
W(0xe0, 1,1,1,1,0,0,0,0,1,1,1,1,0,0,0,0)| /* e0 */
W(0xf0, 0,0,1,1,0,1,1,1,1,1,0,0,1,1,1,1) /* f0 */
/* ------------------------------- */
/* 0 1 2 3 4 5 6 7 8 9 a b c d e f */
};
static const volatile unsigned long good_2byte_insns[256 / 32] = {
/* 0 1 2 3 4 5 6 7 8 9 a b c d e f */
/* ------------------------------- */
W(0x00, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1)| /* 00 */
W(0x10, 1,1,1,1,1,1,1,1,0,1,1,1,1,1,1,1), /* 10 */
W(0x20, 1,1,1,1,0,0,0,0,1,1,1,1,1,1,1,1)| /* 20 */
W(0x30, 0,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0), /* 30 */
W(0x40, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1)| /* 40 */
W(0x50, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1), /* 50 */
W(0x60, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1)| /* 60 */
W(0x70, 1,1,1,1,1,1,1,1,0,0,0,0,0,0,1,1), /* 70 */
W(0x80, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1)| /* 80 */
W(0x90, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1), /* 90 */
W(0xa0, 1,1,1,1,1,1,0,0,1,1,1,1,1,1,0,1)| /* a0 */
W(0xb0, 1,1,1,1,1,1,1,1,0,1,1,1,1,1,1,1), /* b0 */
W(0xc0, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1)| /* c0 */
W(0xd0, 0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1), /* d0 */
W(0xe0, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1)| /* e0 */
W(0xf0, 0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0) /* f0 */
/* ------------------------------- */
/* 0 1 2 3 4 5 6 7 8 9 a b c d e f */
};
/*
* TODO:
* - Where necessary, examine the modrm byte and allow only valid instructions
* in the different Groups and fpu instructions.
* - Note: If we go past the first byte, do we need to verify that
* subsequent bytes were actually there, rather than off the last page?
* Probably overkill. We don't verify that they specified the first byte
* of the instruction, either.
* - Be clearer about which instructions we'll never probe.
*/
/*
* opcodes we'll probably never support:
* 63 - arpl
* 6c-6d, e4-e5, ec-ed - in
* 6e-6f, e6-e7, ee-ef - out
* cc, cd - int3, int
* cf - iret
* d6 - illegal instruction
* f1 - int1/icebp
* f4 - hlt
* fa, fb - cli, sti
*
* opcodes we may need to refine support for:
* 66 - data16 prefix
* 8f - Group 1 - only reg = 0 is OK
* c6-c7 - Group 11 - only reg = 0 is OK
* d9-df - fpu insns with some illegal encodings
* f2, f3 - repnz, repz prefixes. These are also the first byte for
* certain floating-point instructions, such as addsd.
* fe - Group 4 - only reg = 0 or 1 is OK
* ff - Group 5 - only reg = 0-6 is OK
*
* others -- Do we need to support these?
* 07, 17, 1f - pop es, pop ss, pop ds
* 26, 2e, 36, 3e, - es:, cs:, ss:, ds: segment prefixes --
* but 64 and 65 (fs: and gs:) seems to be used, so we support them.
* 67 - addr16 prefix
* ce - into
* f0 - lock prefix
*/
/*
* Return 1 if this is a legacy instruction prefix we support, -1 if
* it's one we don't support, or 0 if it's not a prefix at all.
*/
static inline int check_legacy_prefix(u8 byte)
{
switch (byte) {
case 0x26:
case 0x2e:
case 0x36:
case 0x3e:
case 0xf0:
return -1;
case 0x64:
case 0x65:
case 0x66:
case 0x67:
case 0xf2:
case 0xf3:
return 1;
default:
return 0;
}
}
static void report_bad_1byte_opcode(uprobe_opcode_t op)
{
printk(KERN_ERR "uprobes does not currently support probing "
"instructions whose first byte is 0x%2.2x\n", op);
}
static void report_bad_2byte_opcode(uprobe_opcode_t op)
{
printk(KERN_ERR "uprobes does not currently support probing "
"instructions with the 2-byte opcode 0x0f 0x%2.2x\n", op);
}
static void report_bad_opcode_prefix(uprobe_opcode_t op, uprobe_opcode_t prefix)
{
printk(KERN_ERR "uprobes does not currently support probing "
"instructions whose first byte is 0x%2.2x "
"with a prefix 0x%2.2x\n", op, prefix);
}
/* Figure out how uprobe_post_ssout should perform ip fixup. */
static int setup_uprobe_post_ssout(struct uprobe_probept *ppt,
uprobe_opcode_t *insn)
{
/*
* Some of these require special treatment, but we don't know what to
* do with arbitrary prefixes, so we refuse to probe them.
*/
int prefix_ok = 0;
switch (*insn) {
case 0xc3: /* ret */
if ((insn - ppt->insn == 1) &&
(*ppt->insn == 0xf3 || *ppt->insn == 0xf2))
/*
* "rep ret" is an AMD kludge that's used by GCC,
* so we need to treat it like a normal ret.
*/
prefix_ok = 1;
case 0xcb: /* more ret/lret */
case 0xc2:
case 0xca:
/* eip is correct */
ppt->arch_info.flags |= UPFIX_ABS_IP;
break;
case 0xe8: /* call relative - Fix return addr */
ppt->arch_info.flags |= UPFIX_RETURN;
break;
case 0x9a: /* call absolute - Fix return addr */
ppt->arch_info.flags |= UPFIX_RETURN | UPFIX_ABS_IP;
break;
case 0xff:
if ((insn[1] & 0x30) == 0x10) {
/* call absolute, indirect */
/* Fix return addr; eip is correct. */
ppt->arch_info.flags |= UPFIX_ABS_IP | UPFIX_RETURN;
} else if ((insn[1] & 0x31) == 0x20 || /* jmp near, absolute indirect */
(insn[1] & 0x31) == 0x21) { /* jmp far, absolute indirect */
/* eip is correct. */
ppt->arch_info.flags |= UPFIX_ABS_IP;
}
break;
case 0xea: /* jmp absolute -- eip is correct */
ppt->arch_info.flags |= UPFIX_ABS_IP;
break;
default:
/* Assuming that normal ip-fixup is ok for other prefixed opcodes. */
prefix_ok = 1;
break;
}
if (!prefix_ok && insn != ppt->insn) {
report_bad_opcode_prefix(*insn, *ppt->insn);
return -EPERM;
}
return 0;
}
static
int arch_validate_probed_insn(struct uprobe_probept *ppt,
struct task_struct *tsk)
{
uprobe_opcode_t *insn = ppt->insn;
int pfx, ret;
ppt->arch_info.flags = 0x0;
/* Skip good instruction prefixes; reject "bad" ones. */
while ((pfx = check_legacy_prefix(insn[0])) == 1)
insn++;
if (pfx < 0) {
report_bad_1byte_opcode(insn[0]);
return -EPERM;
}
if ((ret = setup_uprobe_post_ssout(ppt, insn)) != 0)
return ret;
if (test_bit(insn[0], good_insns))
return 0;
if (insn[0] == 0x0f) {
if (test_bit(insn[1], good_2byte_insns))
return 0;
report_bad_2byte_opcode(insn[1]);
} else
report_bad_1byte_opcode(insn[0]);
return -EPERM;
}
/*
* Get an instruction slot from the process's SSOL area, containing the
* instruction at ppt's probepoint. Point the eip at that slot, in
* preparation for single-stepping out of line.
*/
static
void uprobe_pre_ssout(struct uprobe_task *utask, struct uprobe_probept *ppt,
struct pt_regs *regs)
{
struct uprobe_ssol_slot *slot;
slot = uprobe_get_insn_slot(ppt);
if (!slot) {
utask->doomed = 1;
return;
}
regs->eip = (long)slot->insn;
utask->singlestep_addr = regs->eip;
}
/*
* Called by uprobe_post_ssout() to adjust the return address
* pushed by a call instruction executed out-of-line.
*/
static void adjust_ret_addr(long esp, long correction,
struct uprobe_task *utask)
{
int nleft;
long ra;
nleft = copy_from_user(&ra, (const void __user *) esp, 4);
if (unlikely(nleft != 0))
goto fail;
ra += correction;
nleft = copy_to_user((void __user *) esp, &ra, 4);
if (unlikely(nleft != 0))
goto fail;
return;
fail:
printk(KERN_ERR
"uprobes: Failed to adjust return address after"
" single-stepping call instruction;"
" pid=%d, esp=%#lx\n", current->pid, esp);
utask->doomed = 1;
}
/*
* Called after single-stepping. ppt->vaddr is the address of the
* instruction whose first byte has been replaced by the "int3"
* instruction. To avoid the SMP problems that can occur when we
* temporarily put back the original opcode to single-step, we
* single-stepped a copy of the instruction. The address of this
* copy is utask->singlestep_addr.
*
* This function prepares to return from the post-single-step
* interrupt. We have to fix up the stack as follows:
*
* 0) Typically, the new eip is relative to the copied instruction. We
* need to make it relative to the original instruction. Exceptions are
* return instructions and absolute or indirect jump or call instructions.
*
* 1) If the single-stepped instruction was a call, the return address
* that is atop the stack is the address following the copied instruction.
* We need to make it the address following the original instruction.
*/
static
void uprobe_post_ssout(struct uprobe_task *utask, struct uprobe_probept *ppt,
struct pt_regs *regs)
{
long copy_eip = utask->singlestep_addr;
long orig_eip = ppt->vaddr;
unsigned long flags = ppt->arch_info.flags;
up_read(&ppt->slot->rwsem);
if (flags & UPFIX_RETURN)
adjust_ret_addr(regs->esp, (orig_eip - copy_eip), utask);
if (!(flags & UPFIX_ABS_IP))
regs->eip = orig_eip + (regs->eip - copy_eip);
}
/*
* Replace the return address with the trampoline address. Returns
* the original return address.
*/
static
unsigned long arch_hijack_uret_addr(unsigned long trampoline_address,
struct pt_regs *regs, struct uprobe_task *utask)
{
int nleft;
unsigned long orig_ret_addr;
#define RASIZE (sizeof(unsigned long))
nleft = copy_from_user(&orig_ret_addr,
(const void __user *)regs->esp, RASIZE);
if (unlikely(nleft != 0))
return 0;
if (orig_ret_addr == trampoline_address)
/*
* There's another uretprobe on this function, and it was
* processed first, so the return address has already
* been hijacked.
*/
return orig_ret_addr;
nleft = copy_to_user((void __user *)regs->esp,
&trampoline_address, RASIZE);
if (unlikely(nleft != 0)) {
if (nleft != RASIZE) {
printk(KERN_ERR "uretprobe_entry_handler: "
"return address partially clobbered -- "
"pid=%d, %%esp=%#lx, %%eip=%#lx\n",
current->pid, regs->esp, regs->eip);
utask->doomed = 1;
} /* else nothing written, so no harm */
return 0;
}
return orig_ret_addr;
}
/*
* On x86_32, if a function returns a struct or union, the return
* value is copied into an area created by the caller. The address
* of this area is passed on the stack as a "hidden" first argument.
* When such a function returns, it uses a "ret $4" instruction to pop
* not only the return address but also the hidden arg. To accommodate
* such functions, we add 4 bytes of slop when predicting the return
* address. See PR #10078.
*/
#define STRUCT_RETURN_SLOP 4
static
unsigned long arch_predict_sp_at_ret(struct pt_regs *regs,
struct task_struct *tsk)
{
return (unsigned long) (regs->esp + 4 + STRUCT_RETURN_SLOP);
}
/* Check if instruction is nop and return true. */
static int uprobe_emulate_insn(struct pt_regs *regs,
struct uprobe_probept *ppt)
{
uprobe_opcode_t *insn = ppt->insn;
if (insn[0] == 0x90)
/* regs->ip already points to the insn after the nop/int3. */
return 1;
/* TODO: add multibyte nop instructions */
/* For multibyte nop instructions, we need to set ip accordingly. */
return 0;
}