#include #ifndef UTRACE_ACTION_RESUME /* * Assume the kernel is running the 2008 version of utrace. * Skip the code in this file and instead use uprobes 2. */ #include "../uprobes2/uprobes.c" #else /* uprobes 1 (based on original utrace) */ /* * Userspace Probes (UProbes) * kernel/uprobes_core.c * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see . * * Copyright (C) IBM Corporation, 2006 */ #include #include #include #include #include #include #include #include #include #define UPROBES_IMPLEMENTATION 1 #include "uprobes.h" #include #include #include #include #include #include #include #define SET_ENGINE_FLAGS 1 #define CLEAR_ENGINE_FLAGS 0 #define MAX_SSOL_SLOTS 1024 #ifdef NO_ACCESS_PROCESS_VM_EXPORT static int __access_process_vm(struct task_struct *tsk, unsigned long addr, void *buf, int len, int write); #define access_process_vm __access_process_vm #else extern int access_process_vm(struct task_struct *tsk, unsigned long addr, void *buf, int len, int write); #endif static int utask_fake_quiesce(struct uprobe_task *utask); static void uprobe_release_ssol_vma(struct uprobe_process *uproc); static void uretprobe_handle_entry(struct uprobe *u, struct pt_regs *regs, struct uprobe_task *utask); static void uretprobe_handle_return(struct pt_regs *regs, struct uprobe_task *utask); static void uretprobe_set_trampoline(struct uprobe_process *uproc, struct task_struct *tsk); static void zap_uretprobe_instances(struct uprobe *u, struct uprobe_process *uproc); typedef void (*uprobe_handler_t)(struct uprobe*, struct pt_regs*); #define URETPROBE_HANDLE_ENTRY ((uprobe_handler_t)-1L) #define is_uretprobe(u) (u->handler == URETPROBE_HANDLE_ENTRY) /* Point utask->active_probe at this while running uretprobe handler. */ static struct uprobe_probept uretprobe_trampoline_dummy_probe; /* Table of currently probed processes, hashed by tgid. */ static struct hlist_head uproc_table[UPROBE_TABLE_SIZE]; /* Protects uproc_table during uprobe (un)registration */ static DEFINE_MUTEX(uproc_mutex); /* Table of uprobe_tasks, hashed by task_struct pointer. */ static struct hlist_head utask_table[UPROBE_TABLE_SIZE]; static DEFINE_SPINLOCK(utask_table_lock); #define lock_uproc_table() mutex_lock(&uproc_mutex) #define unlock_uproc_table() mutex_unlock(&uproc_mutex) #define lock_utask_table(flags) spin_lock_irqsave(&utask_table_lock, (flags)) #define unlock_utask_table(flags) \ spin_unlock_irqrestore(&utask_table_lock, (flags)) /* p_uprobe_utrace_ops = &uprobe_utrace_ops. Fwd refs are a pain w/o this. */ static const struct utrace_engine_ops *p_uprobe_utrace_ops; struct deferred_registration { struct list_head list; struct uprobe *uprobe; int regflag; /* 0 - unregister, 1 - register */ enum uprobe_type type; }; /* * Calling a signal handler cancels single-stepping, so uprobes delays * calling the handler, as necessary, until after single-stepping is completed. */ struct delayed_signal { struct list_head list; siginfo_t info; }; static struct uprobe_task *uprobe_find_utask_locked(struct task_struct *tsk) { struct hlist_head *head; struct hlist_node *node; struct uprobe_task *utask; head = &utask_table[hash_ptr(tsk, UPROBE_HASH_BITS)]; hlist_for_each_entry(utask, node, head, hlist) { if (utask->tsk == tsk) return utask; } return NULL; } static struct uprobe_task *uprobe_find_utask(struct task_struct *tsk) { struct uprobe_task *utask; unsigned long flags; lock_utask_table(flags); utask = uprobe_find_utask_locked(tsk); unlock_utask_table(flags); return utask; } static void uprobe_hash_utask(struct uprobe_task *utask) { struct hlist_head *head; unsigned long flags; INIT_HLIST_NODE(&utask->hlist); lock_utask_table(flags); head = &utask_table[hash_ptr(utask->tsk, UPROBE_HASH_BITS)]; hlist_add_head(&utask->hlist, head); unlock_utask_table(flags); } static void uprobe_unhash_utask(struct uprobe_task *utask) { unsigned long flags; lock_utask_table(flags); hlist_del(&utask->hlist); unlock_utask_table(flags); } static struct uprobe_process * uprobe_get_process(struct uprobe_process *uproc) { if (atomic_inc_not_zero(&uproc->refcount)) return uproc; return NULL; } /* * Decrement uproc's refcount in a situation where we "know" it can't * reach zero. It's OK to call this with uproc locked. Compare with * uprobe_put_process(). */ static inline void uprobe_decref_process(struct uprobe_process *uproc) { if (atomic_dec_and_test(&uproc->refcount)) BUG(); } /* * Runs with the uproc_mutex held. Returns with uproc ref-counted and * write-locked. * * Around exec time, briefly, it's possible to have one (finished) uproc * for the old image and one for the new image. We find the latter. */ static struct uprobe_process *uprobe_find_process(pid_t tgid) { struct hlist_head *head; struct hlist_node *node; struct uprobe_process *uproc; head = &uproc_table[hash_long(tgid, UPROBE_HASH_BITS)]; hlist_for_each_entry(uproc, node, head, hlist) { if (uproc->tgid == tgid && !uproc->finished) { uproc = uprobe_get_process(uproc); if (uproc) down_write(&uproc->rwsem); return uproc; } } return NULL; } /* * In the given uproc's hash table of probepoints, find the one with the * specified virtual address. Runs with uproc->rwsem locked. */ static struct uprobe_probept *uprobe_find_probept(struct uprobe_process *uproc, unsigned long vaddr) { struct uprobe_probept *ppt; struct hlist_node *node; struct hlist_head *head = &uproc->uprobe_table[hash_long(vaddr, UPROBE_HASH_BITS)]; hlist_for_each_entry(ppt, node, head, ut_node) { if (ppt->vaddr == vaddr && ppt->state != UPROBE_DISABLED) return ppt; } return NULL; } /* * set_bp: Store a breakpoint instruction at ppt->vaddr. * Returns BP_INSN_SIZE on success. * * NOTE: BREAKPOINT_INSTRUCTION on all archs is the same size as * uprobe_opcode_t. */ static int set_bp(struct uprobe_probept *ppt, struct task_struct *tsk) { uprobe_opcode_t bp_insn = BREAKPOINT_INSTRUCTION; return access_process_vm(tsk, ppt->vaddr, &bp_insn, BP_INSN_SIZE, 1); } /* * set_orig_insn: For probepoint ppt, replace the breakpoint instruction * with the original opcode. Returns BP_INSN_SIZE on success. */ static int set_orig_insn(struct uprobe_probept *ppt, struct task_struct *tsk) { return access_process_vm(tsk, ppt->vaddr, &ppt->opcode, BP_INSN_SIZE, 1); } static void bkpt_insertion_failed(struct uprobe_probept *ppt, const char *why) { printk(KERN_ERR "Can't place uprobe at pid %d vaddr %#lx: %s\n", ppt->uproc->tgid, ppt->vaddr, why); } /* * Save a copy of the original instruction (so it can be single-stepped * out of line), insert the breakpoint instruction, and awake * register_uprobe(). */ static void insert_bkpt(struct uprobe_probept *ppt, struct task_struct *tsk) { struct uprobe_kimg *uk; long result = 0; int len; if (!tsk) { /* No surviving tasks associated with ppt->uproc */ result = -ESRCH; goto out; } /* * If access_process_vm() transfers fewer bytes than the maximum * instruction size, assume that the probed instruction is smaller * than the max and near the end of the last page of instructions. * But there must be room at least for a breakpoint-size instruction. */ len = access_process_vm(tsk, ppt->vaddr, ppt->insn, MAX_UINSN_BYTES, 0); if (len < BP_INSN_SIZE) { bkpt_insertion_failed(ppt, "error reading original instruction"); result = -EIO; goto out; } memcpy(&ppt->opcode, ppt->insn, BP_INSN_SIZE); if (ppt->opcode == BREAKPOINT_INSTRUCTION) { /* * To avoid filling up the log file with complaints * about breakpoints already existing, don't log this * error. */ //bkpt_insertion_failed(ppt, "bkpt already exists at that addr"); result = -EEXIST; goto out; } if ((result = arch_validate_probed_insn(ppt, tsk)) < 0) { bkpt_insertion_failed(ppt, "instruction type cannot be probed"); goto out; } len = set_bp(ppt, tsk); if (len < BP_INSN_SIZE) { bkpt_insertion_failed(ppt, "failed to insert bkpt instruction"); result = -EIO; goto out; } out: ppt->state = (result ? UPROBE_DISABLED : UPROBE_BP_SET); list_for_each_entry(uk, &ppt->uprobe_list, list) uk->status = result; wake_up_all(&ppt->waitq); } static void remove_bkpt(struct uprobe_probept *ppt, struct task_struct *tsk) { int len; if (tsk) { len = set_orig_insn(ppt, tsk); if (len < BP_INSN_SIZE) { printk(KERN_ERR "Error removing uprobe at pid %d vaddr %#lx:" " can't restore original instruction\n", tsk->tgid, ppt->vaddr); /* * This shouldn't happen, since we were previously * able to write the breakpoint at that address. * There's not much we can do besides let the * process die with a SIGTRAP the next time the * breakpoint is hit. */ } } /* Wake up unregister_uprobe(). */ ppt->state = UPROBE_DISABLED; wake_up_all(&ppt->waitq); } /* * Runs with all of uproc's threads quiesced and uproc->rwsem write-locked. * As specified, insert or remove the breakpoint instruction for each * uprobe_probept on uproc's pending list. * tsk = one of the tasks associated with uproc -- NULL if there are * no surviving threads. * It's OK for uproc->pending_uprobes to be empty here. It can happen * if a register and an unregister are requested (by different probers) * simultaneously for the same pid/vaddr. * Note that the current task may be a thread in uproc, or it may be * a task running [un]register_uprobe() (or both). */ static void handle_pending_uprobes(struct uprobe_process *uproc, struct task_struct *tsk) { struct uprobe_probept *ppt, *tmp; list_for_each_entry_safe(ppt, tmp, &uproc->pending_uprobes, pd_node) { switch (ppt->state) { case UPROBE_INSERTING: insert_bkpt(ppt, tsk); break; case UPROBE_REMOVING: remove_bkpt(ppt, tsk); break; default: BUG(); } list_del(&ppt->pd_node); } } static void utask_adjust_flags(struct uprobe_task *utask, int set, unsigned long flags) { unsigned long newflags, oldflags; newflags = oldflags = utask->engine->flags; if (set) newflags |= flags; else newflags &= ~flags; if (newflags != oldflags) utrace_set_flags(utask->tsk, utask->engine, newflags); } static inline void clear_utrace_quiesce(struct uprobe_task *utask) { utask_adjust_flags(utask, CLEAR_ENGINE_FLAGS, UTRACE_ACTION_QUIESCE | UTRACE_EVENT(QUIESCE)); } /* Opposite of quiesce_all_threads(). Same locking applies. */ static void rouse_all_threads(struct uprobe_process *uproc) { struct uprobe_task *utask; list_for_each_entry(utask, &uproc->thread_list, list) { if (utask->quiescing) { utask->quiescing = 0; if (utask->state == UPTASK_QUIESCENT) { utask->state = UPTASK_RUNNING; uproc->n_quiescent_threads--; clear_utrace_quiesce(utask); } } } /* Wake any threads that decided to sleep rather than quiesce. */ wake_up_all(&uproc->waitq); } /* * If all of uproc's surviving threads have quiesced, do the necessary * breakpoint insertions or removals and then un-quiesce everybody. * tsk is a surviving thread, or NULL if there is none. Runs with * uproc->rwsem write-locked. */ static void check_uproc_quiesced(struct uprobe_process *uproc, struct task_struct *tsk) { if (uproc->n_quiescent_threads >= uproc->nthreads) { handle_pending_uprobes(uproc, tsk); rouse_all_threads(uproc); } } /* * Quiesce all threads in the specified process -- e.g., prior to * breakpoint insertion. Runs with uproc->rwsem write-locked. * Returns the number of threads that haven't died yet. */ static int quiesce_all_threads(struct uprobe_process *uproc, struct uprobe_task **cur_utask_quiescing) { struct uprobe_task *utask; struct task_struct *survivor = NULL; // any survivor int survivors = 0; *cur_utask_quiescing = NULL; list_for_each_entry(utask, &uproc->thread_list, list) { survivor = utask->tsk; survivors++; if (!utask->quiescing) { /* * If utask is currently handling a probepoint, it'll * check utask->quiescing and quiesce when it's done. */ utask->quiescing = 1; if (utask->tsk == current) *cur_utask_quiescing = utask; else if (utask->state == UPTASK_RUNNING) { utask->quiesce_master = current; utask_adjust_flags(utask, SET_ENGINE_FLAGS, UTRACE_ACTION_QUIESCE | UTRACE_EVENT(QUIESCE)); utask->quiesce_master = NULL; } } } /* * If any task was already quiesced (in utrace's opinion) when we * called utask_adjust_flags() on it, uprobe_report_quiesce() was * called, but wasn't in a position to call check_uproc_quiesced(). */ check_uproc_quiesced(uproc, survivor); return survivors; } static void utask_free_uretprobe_instances(struct uprobe_task *utask) { struct uretprobe_instance *ri; struct hlist_node *r1, *r2; hlist_for_each_entry_safe(ri, r1, r2, &utask->uretprobe_instances, hlist) { hlist_del(&ri->hlist); kfree(ri); uprobe_decref_process(utask->uproc); } } /* Called with utask->uproc write-locked. */ static void uprobe_free_task(struct uprobe_task *utask) { struct deferred_registration *dr, *d; struct delayed_signal *ds, *ds2; /* printk(KERN_INFO "uprobe_free_task %p (tid %ld), caller %pS, ctid %ld\n", utask, utask->tsk->pid, _RET_IP_, current->pid); */ /* * Do this first, since a utask that's still in the utask_table * is assumed (e.g., by uprobe_report_exit) to be valid. */ uprobe_unhash_utask(utask); list_del(&utask->list); list_for_each_entry_safe(dr, d, &utask->deferred_registrations, list) { list_del(&dr->list); kfree(dr); } list_for_each_entry_safe(ds, ds2, &utask->delayed_signals, list) { list_del(&ds->list); kfree(ds); } utask_free_uretprobe_instances(utask); kfree(utask); } /* Runs with uproc_mutex held and uproc->rwsem write-locked. */ static void uprobe_free_process(struct uprobe_process *uproc) { struct uprobe_task *utask, *tmp; struct uprobe_ssol_area *area = &uproc->ssol_area; /* printk(KERN_INFO "uprobe_free_process %p (pid %ld), caller %pS, ctid %ld\n", uproc, uproc->tgid, _RET_IP_, current->pid); */ if (!uproc->finished) uprobe_release_ssol_vma(uproc); if (area->slots) kfree(area->slots); if (!hlist_unhashed(&uproc->hlist)) hlist_del(&uproc->hlist); list_for_each_entry_safe(utask, tmp, &uproc->thread_list, list) { /* * utrace_detach() is OK here (required, it seems) even if * utask->tsk == current and we're in a utrace callback. */ if (utask->engine) utrace_detach(utask->tsk, utask->engine); uprobe_free_task(utask); } up_write(&uproc->rwsem); // So kfree doesn't complain /* printk(KERN_INFO "uprobe_free_process zap %p\n", uproc);*/ kfree(uproc); } /* * Decrement uproc's ref count. If it's zero, free uproc and return 1. * Else return 0. If uproc is locked, don't call this; use * uprobe_decref_process(). * * If we free uproc, we also decrement the ref-count on the uprobes * module, if any. If somebody is doing "rmmod --wait uprobes", this * function could schedule removal of the module. Therefore, don't call * this function and then sleep in uprobes code, unless you know you'll * return with the module ref-count > 0. */ static int uprobe_put_process(struct uprobe_process *uproc) { int freed = 0; if (atomic_dec_and_test(&uproc->refcount)) { lock_uproc_table(); down_write(&uproc->rwsem); if (unlikely(atomic_read(&uproc->refcount) != 0)) { /* * The works because uproc_mutex is held any * time the ref count can go from 0 to 1 -- e.g., * register_uprobe() snuck in with a new probe, * or a callback such as uprobe_report_exit() * just started. */ up_write(&uproc->rwsem); } else { uprobe_free_process(uproc); freed = 1; } unlock_uproc_table(); } if (freed) module_put(THIS_MODULE); return freed; } static struct uprobe_kimg *uprobe_mk_kimg(struct uprobe *u) { struct uprobe_kimg *uk = (struct uprobe_kimg*)kzalloc(sizeof *uk, GFP_USER); if (unlikely(!uk)) return ERR_PTR(-ENOMEM); u->kdata = uk; uk->uprobe = u; uk->ppt = NULL; INIT_LIST_HEAD(&uk->list); uk->status = -EBUSY; return uk; } /* * Allocate a uprobe_task object for t and add it to uproc's list. * Called with t "got" and uproc->rwsem write-locked. Called in one of * the following cases: * - before setting the first uprobe in t's process * - we're in uprobe_report_clone() and t is the newly added thread * Returns: * - pointer to new uprobe_task on success * - NULL if t dies before we can utrace_attach it * - negative errno otherwise */ static struct uprobe_task *uprobe_add_task(struct task_struct *t, struct uprobe_process *uproc) { struct uprobe_task *utask; struct utrace_attached_engine *engine; utask = (struct uprobe_task *)kzalloc(sizeof *utask, GFP_USER); if (unlikely(utask == NULL)) return ERR_PTR(-ENOMEM); utask->tsk = t; utask->state = UPTASK_RUNNING; utask->quiescing = 0; utask->uproc = uproc; utask->active_probe = NULL; utask->doomed = 0; INIT_HLIST_HEAD(&utask->uretprobe_instances); INIT_LIST_HEAD(&utask->deferred_registrations); INIT_LIST_HEAD(&utask->delayed_signals); INIT_LIST_HEAD(&utask->list); list_add_tail(&utask->list, &uproc->thread_list); uprobe_hash_utask(utask); engine = utrace_attach(t, UTRACE_ATTACH_CREATE, p_uprobe_utrace_ops, utask); if (IS_ERR(engine)) { long err = PTR_ERR(engine); printk("uprobes: utrace_attach failed, returned %ld\n", err); uprobe_free_task(utask); if (err == -ESRCH) return NULL; return ERR_PTR(err); } utask->engine = engine; /* * Always watch for traps, clones, execs and exits. Caller must * set any other engine flags. */ utask_adjust_flags(utask, SET_ENGINE_FLAGS, UTRACE_EVENT(SIGNAL) | UTRACE_EVENT(SIGNAL_IGN) | UTRACE_EVENT(SIGNAL_CORE) | UTRACE_EVENT(EXEC) | UTRACE_EVENT(CLONE) | UTRACE_EVENT(EXIT)); /* * Note that it's OK if t dies just after utrace_attach, because * with the engine in place, the appropriate report_* callback * should handle it after we release uproc->rwsem. */ return utask; } /* See comment in uprobe_mk_process(). */ static struct task_struct *find_next_thread_to_add(struct uprobe_process *uproc, struct task_struct *start) { struct task_struct *t; struct uprobe_task *utask; read_lock(&tasklist_lock); t = start; do { if (unlikely(t->flags & PF_EXITING)) goto dont_add; list_for_each_entry(utask, &uproc->thread_list, list) { if (utask->tsk == t) /* Already added */ goto dont_add; } /* Found thread/task to add. */ get_task_struct(t); read_unlock(&tasklist_lock); return t; dont_add: t = next_thread(t); } while (t != start); read_unlock(&tasklist_lock); return NULL; } /* * Create a per process uproc struct. * at_fork: indicates uprobe_mk_process is called from * a fork context of a probe process. refer uprobe_fork_uproc * for more details. * * Runs with uproc_mutex held; * Returns with uproc->rwsem write-locked when not called * from fork context. */ static struct uprobe_process *uprobe_mk_process(struct task_struct *p, bool at_fork) { struct uprobe_process *uproc; struct uprobe_task *utask; struct task_struct *add_me; int i; long err; uproc = (struct uprobe_process *)kzalloc(sizeof *uproc, GFP_USER); if (unlikely(uproc == NULL)) return ERR_PTR(-ENOMEM); /* Initialize fields */ atomic_set(&uproc->refcount, 1); init_rwsem(&uproc->rwsem); if (!at_fork) /* not called from fork context. */ down_write(&uproc->rwsem); init_waitqueue_head(&uproc->waitq); for (i = 0; i < UPROBE_TABLE_SIZE; i++) INIT_HLIST_HEAD(&uproc->uprobe_table[i]); uproc->nppt = 0; INIT_LIST_HEAD(&uproc->pending_uprobes); INIT_LIST_HEAD(&uproc->thread_list); uproc->nthreads = 0; uproc->n_quiescent_threads = 0; INIT_HLIST_NODE(&uproc->hlist); uproc->tgid = p->tgid; uproc->finished = 0; uproc->uretprobe_trampoline_addr = NULL; uproc->ssol_area.insn_area = NULL; uproc->ssol_area.initialized = 0; mutex_init(&uproc->ssol_area.setup_mutex); /* Initialize rest of area in uprobe_init_ssol(). */ #ifdef CONFIG_UPROBES_SSOL uproc->sstep_out_of_line = 1; #else uproc->sstep_out_of_line = 0; #endif /* * Create and populate one utask per thread in this process. We * can't call uprobe_add_task() while holding tasklist_lock, so we: * 1. Lock task list. * 2. Find the next task, add_me, in this process that's not * already on uproc's thread_list. (Start search at previous * one found.) * 3. Unlock task list. * 4. uprobe_add_task(add_me, uproc) * Repeat 1-4 'til we have utasks for all tasks. */ add_me = p; while ((add_me = find_next_thread_to_add(uproc, add_me)) != NULL) { utask = uprobe_add_task(add_me, uproc); put_task_struct(add_me); if (IS_ERR(utask)) { err = PTR_ERR(utask); goto fail; } if (utask) uproc->nthreads++; } if (uproc->nthreads == 0) { /* All threads -- even p -- are dead. */ err = -ESRCH; goto fail; } return uproc; fail: uprobe_free_process(uproc); return ERR_PTR(err); } /* * Creates a uprobe_probept and connects it to uk and uproc. Runs with * uproc->rwsem write-locked. */ static struct uprobe_probept *uprobe_add_probept(struct uprobe_kimg *uk, struct uprobe_process *uproc) { struct uprobe_probept *ppt; ppt = (struct uprobe_probept *)kzalloc(sizeof *ppt, GFP_USER); if (unlikely(ppt == NULL)) return ERR_PTR(-ENOMEM); init_waitqueue_head(&ppt->waitq); mutex_init(&ppt->ssil_mutex); mutex_init(&ppt->slot_mutex); ppt->slot = NULL; /* Connect to uk. */ INIT_LIST_HEAD(&ppt->uprobe_list); list_add_tail(&uk->list, &ppt->uprobe_list); uk->ppt = ppt; uk->status = -EBUSY; ppt->vaddr = uk->uprobe->vaddr; /* Connect to uproc. */ ppt->state = UPROBE_INSERTING; ppt->uproc = uproc; INIT_LIST_HEAD(&ppt->pd_node); list_add_tail(&ppt->pd_node, &uproc->pending_uprobes); INIT_HLIST_NODE(&ppt->ut_node); hlist_add_head(&ppt->ut_node, &uproc->uprobe_table[hash_long(ppt->vaddr, UPROBE_HASH_BITS)]); uproc->nppt++; uprobe_get_process(uproc); return ppt; } /* ppt is going away. Free its slot (if it owns one) in the SSOL area. */ static void uprobe_free_slot(struct uprobe_probept *ppt) { struct uprobe_ssol_slot *slot = ppt->slot; if (slot) { down_write(&slot->rwsem); if (slot->owner == ppt) { unsigned long flags; struct uprobe_ssol_area *area = &ppt->uproc->ssol_area; spin_lock_irqsave(&area->lock, flags); slot->state = SSOL_FREE; slot->owner = NULL; area->nfree++; spin_unlock_irqrestore(&area->lock, flags); } up_write(&slot->rwsem); } } /* * Runs with ppt->uproc write-locked. Frees ppt and decrements the ref count * on ppt->uproc (but ref count shouldn't hit 0). */ static void uprobe_free_probept(struct uprobe_probept *ppt) { struct uprobe_process *uproc = ppt->uproc; uprobe_free_slot(ppt); hlist_del(&ppt->ut_node); uproc->nppt--; kfree(ppt); uprobe_decref_process(uproc); } static void uprobe_free_kimg(struct uprobe_kimg *uk) { uk->uprobe->kdata = NULL; kfree(uk); } /* * Runs with uprobe_process write-locked. * Note that we never free u, because the user owns that. */ static void purge_uprobe(struct uprobe_kimg *uk) { struct uprobe_probept *ppt = uk->ppt; list_del(&uk->list); uprobe_free_kimg(uk); if (list_empty(&ppt->uprobe_list)) uprobe_free_probept(ppt); } /* TODO: Avoid code duplication with uprobe_validate_vaddr(). */ static int uprobe_validate_vma(struct task_struct *t, unsigned long vaddr) { struct vm_area_struct *vma; struct mm_struct *mm; int ret = 0; mm = get_task_mm(t); if (!mm) return -EINVAL; down_read(&mm->mmap_sem); vma = find_vma(mm, vaddr); if (!vma || vaddr < vma->vm_start) ret = -ENOENT; else if (!(vma->vm_flags & VM_EXEC)) ret = -EFAULT; up_read(&mm->mmap_sem); mmput(mm); return ret; } /* Probed address must be in an executable VM area, outside the SSOL area. */ static int uprobe_validate_vaddr(struct task_struct *p, unsigned long vaddr, struct uprobe_process *uproc) { struct vm_area_struct *vma; struct mm_struct *mm = p->mm; if (!mm) return -EINVAL; down_read(&mm->mmap_sem); vma = find_vma(mm, vaddr); if (!vma || vaddr < vma->vm_start || !(vma->vm_flags & VM_EXEC) || vma->vm_start == (unsigned long) uproc->ssol_area.insn_area) { up_read(&mm->mmap_sem); return -EINVAL; } up_read(&mm->mmap_sem); return 0; } static struct task_struct *uprobe_get_task(pid_t pid) { struct task_struct *p; rcu_read_lock(); p = find_task_by_pid(pid); if (p) get_task_struct(p); rcu_read_unlock(); return p; } /* Runs with utask->uproc read-locked. Returns -EINPROGRESS on success. */ static int defer_registration(struct uprobe *u, int regflag, struct uprobe_task *utask) { struct deferred_registration *dr = kmalloc(sizeof(struct deferred_registration), GFP_USER); if (!dr) return -ENOMEM; dr->type = (is_uretprobe(u) ? UPTY_URETPROBE : UPTY_UPROBE); dr->uprobe = u; dr->regflag = regflag; INIT_LIST_HEAD(&dr->list); list_add_tail(&dr->list, &utask->deferred_registrations); return -EINPROGRESS; } /* See Documentation/uprobes.txt. */ int register_uprobe(struct uprobe *u) { struct task_struct *p; struct uprobe_process *uproc; struct uprobe_kimg *uk; struct uprobe_probept *ppt; struct uprobe_task *cur_utask, *cur_utask_quiescing = NULL; int survivors, ret = 0, uproc_is_new = 0; if (!u || !u->handler) return -EINVAL; p = uprobe_get_task(u->pid); if (!p) return -ESRCH; cur_utask = uprobe_find_utask(current); if (cur_utask && cur_utask->active_probe) { /* * Called from handler; cur_utask->uproc is read-locked. * Do this registration later. */ put_task_struct(p); return defer_registration(u, 1, cur_utask); } /* Get the uprobe_process for this pid, or make a new one. */ lock_uproc_table(); uproc = uprobe_find_process(p->tgid); if (uproc) unlock_uproc_table(); else { /* Creating a new uprobe_process. Ref-count the module. */ if (!try_module_get(THIS_MODULE)) { /* uprobes.ko is being removed. */ ret = -ENOSYS; unlock_uproc_table(); goto fail_tsk; } uproc = uprobe_mk_process(p, 0); if (IS_ERR(uproc)) { ret = (int) PTR_ERR(uproc); unlock_uproc_table(); module_put(THIS_MODULE); goto fail_tsk; } /* Hold uproc_mutex until we've added uproc to uproc_table. */ uproc_is_new = 1; } if (is_uretprobe(u) && IS_ERR(uproc->uretprobe_trampoline_addr)) { /* Previously failed to set up trampoline. */ ret = -ENOMEM; goto fail_uproc; } if ((ret = uprobe_validate_vaddr(p, u->vaddr, uproc)) < 0) goto fail_uproc; if (u->kdata) { /* * Probe is already/still registered. This is the only * place we return -EBUSY to the user. */ ret = -EBUSY; goto fail_uproc; } uk = uprobe_mk_kimg(u); if (IS_ERR(uk)) { ret = (int) PTR_ERR(uk); goto fail_uproc; } /* See if we already have a probepoint at the vaddr. */ ppt = (uproc_is_new ? NULL : uprobe_find_probept(uproc, u->vaddr)); if (ppt) { /* Breakpoint is already in place, or soon will be. */ uk->ppt = ppt; list_add_tail(&uk->list, &ppt->uprobe_list); switch (ppt->state) { case UPROBE_INSERTING: uk->status = -EBUSY; // in progress if (uproc->tgid == current->tgid) { cur_utask_quiescing = cur_utask; BUG_ON(!cur_utask_quiescing); } break; case UPROBE_REMOVING: /* Wait! Don't remove that bkpt after all! */ ppt->state = UPROBE_BP_SET; list_del(&ppt->pd_node); // Remove from pending list. wake_up_all(&ppt->waitq); // Wake unregister_uprobe(). /*FALLTHROUGH*/ case UPROBE_BP_SET: uk->status = 0; break; default: BUG(); } up_write(&uproc->rwsem); put_task_struct(p); if (uk->status == 0) { uprobe_put_process(uproc); return 0; } goto await_bkpt_insertion; } else { ppt = uprobe_add_probept(uk, uproc); if (IS_ERR(ppt)) { ret = (int) PTR_ERR(ppt); goto fail_uk; } } if (uproc_is_new) { hlist_add_head(&uproc->hlist, &uproc_table[hash_long(uproc->tgid, UPROBE_HASH_BITS)]); unlock_uproc_table(); } put_task_struct(p); survivors = quiesce_all_threads(uproc, &cur_utask_quiescing); if (survivors == 0) { purge_uprobe(uk); up_write(&uproc->rwsem); uprobe_put_process(uproc); return -ESRCH; } up_write(&uproc->rwsem); await_bkpt_insertion: if (cur_utask_quiescing) /* Current task is probing its own process. */ (void) utask_fake_quiesce(cur_utask_quiescing); else wait_event(ppt->waitq, ppt->state != UPROBE_INSERTING); ret = uk->status; if (ret != 0) { down_write(&uproc->rwsem); purge_uprobe(uk); up_write(&uproc->rwsem); } uprobe_put_process(uproc); return ret; fail_uk: uprobe_free_kimg(uk); fail_uproc: if (uproc_is_new) { uprobe_free_process(uproc); unlock_uproc_table(); module_put(THIS_MODULE); } else { up_write(&uproc->rwsem); uprobe_put_process(uproc); } fail_tsk: put_task_struct(p); return ret; } EXPORT_SYMBOL_GPL(register_uprobe); void __unregister_uprobe(struct uprobe *u, bool remove_bkpt) { struct task_struct *p; struct uprobe_process *uproc; struct uprobe_kimg *uk; struct uprobe_probept *ppt; struct uprobe_task *cur_utask, *cur_utask_quiescing = NULL; if (!u) return; p = uprobe_get_task(u->pid); if (!p) return; cur_utask = uprobe_find_utask(current); if (cur_utask && cur_utask->active_probe) { /* Called from handler; uproc is read-locked; do this later */ put_task_struct(p); (void) defer_registration(u, 0, cur_utask); return; } /* * Lock uproc before walking the graph, in case the process we're * probing is exiting. */ lock_uproc_table(); uproc = uprobe_find_process(p->tgid); unlock_uproc_table(); put_task_struct(p); if (!uproc) return; uk = (struct uprobe_kimg *)u->kdata; if (!uk) /* * This probe was never successfully registered, or * has already been unregistered. */ goto done; if (uk->status == -EBUSY) /* Looks like register or unregister is already in progress. */ goto done; ppt = uk->ppt; list_del(&uk->list); uprobe_free_kimg(uk); if (is_uretprobe(u)) zap_uretprobe_instances(u, uproc); if (!list_empty(&ppt->uprobe_list)) goto done; /* The last uprobe at ppt's probepoint is being unregistered. */ if (!remove_bkpt) { uprobe_free_probept(ppt); goto done; } /* Queue the breakpoint for removal. */ ppt->state = UPROBE_REMOVING; list_add_tail(&ppt->pd_node, &uproc->pending_uprobes); (void) quiesce_all_threads(uproc, &cur_utask_quiescing); up_write(&uproc->rwsem); if (cur_utask_quiescing) /* Current task is probing its own process. */ (void) utask_fake_quiesce(cur_utask_quiescing); else wait_event(ppt->waitq, ppt->state != UPROBE_REMOVING); if (likely(ppt->state == UPROBE_DISABLED)) { down_write(&uproc->rwsem); uprobe_free_probept(ppt); /* else somebody else's register_uprobe() resurrected ppt. */ up_write(&uproc->rwsem); } uprobe_put_process(uproc); return; done: up_write(&uproc->rwsem); uprobe_put_process(uproc); } /* See Documentation/uprobes.txt. */ void unregister_uprobe(struct uprobe *u) { __unregister_uprobe(u, true); } EXPORT_SYMBOL_GPL(unregister_uprobe); void unmap_uprobe(struct uprobe *u) { __unregister_uprobe(u, false); } EXPORT_SYMBOL_GPL(unmap_uprobe); /* Find a surviving thread in uproc. Runs with uproc->rwsem locked. */ static struct task_struct *find_surviving_thread(struct uprobe_process *uproc) { struct uprobe_task *utask; list_for_each_entry(utask, &uproc->thread_list, list) return utask->tsk; return NULL; } /* * Run all the deferred_registrations previously queued by the current utask. * Runs with no locks or mutexes held. The current utask's uprobe_process * is ref-counted, so they won't disappear as the result of * unregister_u*probe() called here. */ static void uprobe_run_def_regs(struct list_head *drlist) { struct deferred_registration *dr, *d; list_for_each_entry_safe(dr, d, drlist, list) { int result = 0; struct uprobe *u = dr->uprobe; if (dr->type == UPTY_URETPROBE) { struct uretprobe *rp = container_of(u, struct uretprobe, u); if (dr->regflag) result = register_uretprobe(rp); else unregister_uretprobe(rp); } else { if (dr->regflag) result = register_uprobe(u); else unregister_uprobe(u); } if (u && u->registration_callback) u->registration_callback(u, dr->regflag, dr->type, result); list_del(&dr->list); kfree(dr); } } /* * Functions for allocation of the SSOL area, and the instruction slots * therein */ /* * We leave the SSOL vma in place even after all the probes are gone. * We used to remember its address in current->mm->context.uprobes_ssol_area, * but adding that field to mm_context broke KAPI compatibility. * Instead, when we shut down the uproc for lack of probes, we "tag" the vma * for later identification. This is not particularly robust, but it's * no more vulnerable to ptrace or mprotect mischief than any other part * of the address space. */ #define UPROBES_SSOL_VMA_TAG \ "This is the SSOL area for uprobes. Mess with it at your own risk." #define UPROBES_SSOL_TAGSZ ((int)sizeof(UPROBES_SSOL_VMA_TAG)) /* * Searching downward from ceiling address (0 signifies top of memory), * find the next vma whose flags indicate it could be an SSOL area. * Return its address, or 0 for no match. */ static unsigned long find_next_possible_ssol_vma(unsigned long ceiling) { struct mm_struct *mm = current->mm; struct rb_node *rb_node; struct vm_area_struct *vma; unsigned long good_flags = VM_EXEC | VM_DONTEXPAND; unsigned long bad_flags = VM_WRITE | VM_GROWSDOWN | VM_GROWSUP; unsigned long addr = 0; down_read(&mm->mmap_sem); for (rb_node=rb_last(&mm->mm_rb); rb_node; rb_node=rb_prev(rb_node)) { vma = rb_entry(rb_node, struct vm_area_struct, vm_rb); if (ceiling && vma->vm_start >= ceiling) continue; if ((vma->vm_flags & good_flags) != good_flags) continue; if ((vma->vm_flags & bad_flags) != 0) continue; addr = vma->vm_start; break; } up_read(&mm->mmap_sem); return addr; } static noinline unsigned long find_old_ssol_vma(void) { unsigned long addr; unsigned long ceiling = 0; // top of memory char buf[UPROBES_SSOL_TAGSZ]; while ((addr = find_next_possible_ssol_vma(ceiling)) != 0) { ceiling = addr; if (copy_from_user(buf, (const void __user*)addr, UPROBES_SSOL_TAGSZ)) continue; if (!strcmp(buf, UPROBES_SSOL_VMA_TAG)) return addr; } return 0; } /* * Mmap nbytes bytes for the uprobes SSOL area for the current process. * Returns the address of the page, or a negative errno. * This approach was suggested by Roland McGrath. */ static noinline unsigned long uprobe_setup_ssol_vma(unsigned long nbytes) { unsigned long addr; struct mm_struct *mm = current->mm; struct vm_area_struct *vma; BUG_ON(nbytes & ~PAGE_MASK); if ((addr = find_old_ssol_vma()) != 0) return addr; down_write(&mm->mmap_sem); /* * Find the end of the top mapping and skip a page. * If there is no space for PAGE_SIZE above * that, mmap will ignore our address hint. */ vma = rb_entry(rb_last(&mm->mm_rb), struct vm_area_struct, vm_rb); addr = vma->vm_end + PAGE_SIZE; addr = do_mmap_pgoff(NULL, addr, nbytes, PROT_EXEC, MAP_PRIVATE|MAP_ANONYMOUS, 0); if (addr & ~PAGE_MASK) { up_write(&mm->mmap_sem); printk(KERN_ERR "Uprobes failed to allocate a vma for" " pid/tgid %d/%d for single-stepping out of line.\n", current->pid, current->tgid); return addr; } vma = find_vma(mm, addr); BUG_ON(!vma); /* * Don't expand vma on mremap(). Allow vma to be copied on * fork() -- see uprobe_fork_uproc(). */ vma->vm_flags |= VM_DONTEXPAND; up_write(&mm->mmap_sem); return addr; } /** * uprobe_init_ssol -- initialize per-process area for single stepping * out-of-line. * @uproc: probed process * @tsk: probed task: must be current if @insn_area is %NULL * @insn_area: virtual address of the already-established SSOL vma -- * see uprobe_fork_uproc(). * * Returns with @uproc->ssol_area.insn_area pointing to the initialized * area, or set to a negative errno. */ static void uprobe_init_ssol(struct uprobe_process *uproc, struct task_struct *tsk, __user uprobe_opcode_t *insn_area) { struct uprobe_ssol_area *area = &uproc->ssol_area; struct uprobe_ssol_slot *slot; int i; char *slot_addr; // Simplify pointer arithmetic /* Trampoline setup will either fail or succeed here. */ uproc->uretprobe_trampoline_addr = ERR_PTR(-ENOMEM); if (insn_area) { BUG_ON(IS_ERR(insn_area)); area->insn_area = insn_area; } else { BUG_ON(tsk != current); area->insn_area = (uprobe_opcode_t *) uprobe_setup_ssol_vma(PAGE_SIZE); if (IS_ERR(area->insn_area)) return; } area->nfree = area->nslots = PAGE_SIZE / MAX_UINSN_BYTES; if (area->nslots > MAX_SSOL_SLOTS) area->nfree = area->nslots = MAX_SSOL_SLOTS; area->slots = (struct uprobe_ssol_slot *) kzalloc(sizeof(struct uprobe_ssol_slot) * area->nslots, GFP_USER); if (!area->slots) { area->insn_area = ERR_PTR(-ENOMEM); return; } mutex_init(&area->populate_mutex); spin_lock_init(&area->lock); area->next_slot = 0; slot_addr = (char*) area->insn_area; for (i = 0; i < area->nslots; i++) { slot = &area->slots[i]; init_rwsem(&slot->rwsem); slot->state = SSOL_FREE; slot->owner = NULL; slot->last_used = 0; slot->insn = (__user uprobe_opcode_t *) slot_addr; slot_addr += MAX_UINSN_BYTES; } uretprobe_set_trampoline(uproc, tsk); } /* * Verify that the SSOL area has been set up for uproc. Returns a * pointer to the SSOL area, or a negative erro if we couldn't set it up. */ static __user uprobe_opcode_t *uprobe_verify_ssol(struct uprobe_process *uproc) { struct uprobe_ssol_area *area = &uproc->ssol_area; if (unlikely(!area->initialized)) { /* First time through for this probed process */ mutex_lock(&uproc->ssol_area.setup_mutex); if (likely(!area->initialized)) { /* Nobody snuck in and set things up ahead of us. */ uprobe_init_ssol(uproc, current, NULL); area->initialized = 1; } mutex_unlock(&uproc->ssol_area.setup_mutex); } return area->insn_area; } /* * uproc is going away, but the process lives on. Tag the SSOL vma so a new * uproc can reuse it if more probes are requested. */ static void uprobe_release_ssol_vma(struct uprobe_process *uproc) { unsigned long addr; struct task_struct *tsk; static const char *buf = UPROBES_SSOL_VMA_TAG; int nb; /* No need to muck with dying image's mm_struct. */ BUG_ON(uproc->finished); addr = (unsigned long) uproc->ssol_area.insn_area; if (!addr || IS_ERR_VALUE(addr)) return; tsk = find_surviving_thread(uproc); if (!tsk) return; nb = access_process_vm(tsk, addr, (void*)buf, UPROBES_SSOL_TAGSZ, 1); if (nb != UPROBES_SSOL_TAGSZ) printk(KERN_ERR "Failed to tag uprobes SSOL vma: " "pid/tgid=%d/%d, vaddr=%#lx\n", tsk->pid, tsk->tgid, addr); } static inline int advance_slot(int slot, struct uprobe_ssol_area *area) { /* Slot 0 is reserved for uretprobe trampoline. */ slot++; if (unlikely(slot >= area->nslots)) slot = 1; return slot; } /* * Return the slot number of the least-recently-used slot in the * neighborhood of area->next_slot. Limit the number of slots we test * to keep it fast. Nobody dies if this isn't the best choice. */ static int uprobe_lru_insn_slot(struct uprobe_ssol_area *area) { #define MAX_LRU_TESTS 10 struct uprobe_ssol_slot *s; int lru_slot = -1; unsigned long lru_time = ULONG_MAX; int nr_lru_tests = 0; int slot = area->next_slot; do { s = &area->slots[slot]; if (likely(s->state == SSOL_ASSIGNED)) { if( lru_time > s->last_used) { lru_time = s->last_used; lru_slot = slot; } if (++nr_lru_tests >= MAX_LRU_TESTS) break; } slot = advance_slot(slot, area); } while (slot != area->next_slot); if (unlikely(lru_slot < 0)) /* All slots are in the act of being stolen. Join the melee. */ return area->next_slot; else return lru_slot; } /* * Choose an instruction slot and take it. Choose a free slot if there is one. * Otherwise choose the least-recently-used slot. Returns with slot * read-locked and containing the desired instruction. Runs with * ppt->slot_mutex locked. */ static struct uprobe_ssol_slot *uprobe_take_insn_slot(struct uprobe_probept *ppt) { struct uprobe_process *uproc = ppt->uproc; struct uprobe_ssol_area *area = &uproc->ssol_area; struct uprobe_ssol_slot *s; int len, slot; unsigned long flags; spin_lock_irqsave(&area->lock, flags); if (area->nfree) { for (slot = 0; slot < area->nslots; slot++) { if (area->slots[slot].state == SSOL_FREE) { area->nfree--; goto found_slot; } } /* Shouldn't get here. Fix nfree and get on with life. */ area->nfree = 0; } slot = uprobe_lru_insn_slot(area); found_slot: area->next_slot = advance_slot(slot, area); s = &area->slots[slot]; s->state = SSOL_BEING_STOLEN; spin_unlock_irqrestore(&area->lock, flags); /* Wait for current users of slot to finish. */ down_write(&s->rwsem); ppt->slot = s; s->owner = ppt; s->last_used = jiffies; s->state = SSOL_ASSIGNED; /* Copy the original instruction to the chosen slot. */ mutex_lock(&area->populate_mutex); len = access_process_vm(current, (unsigned long)s->insn, ppt->insn, MAX_UINSN_BYTES, 1); mutex_unlock(&area->populate_mutex); if (unlikely(len < MAX_UINSN_BYTES)) { up_write(&s->rwsem); printk(KERN_ERR "Failed to copy instruction at %#lx" " to SSOL area (%#lx)\n", ppt->vaddr, (unsigned long) area->slots); return NULL; } /* Let other threads single-step in this slot. */ downgrade_write(&s->rwsem); return s; } /* ppt doesn't own a slot. Get one for ppt, and return it read-locked. */ static struct uprobe_ssol_slot *uprobe_find_insn_slot(struct uprobe_probept *ppt) { struct uprobe_ssol_slot *slot; mutex_lock(&ppt->slot_mutex); slot = ppt->slot; if (unlikely(slot && slot->owner == ppt)) { /* Looks like another thread snuck in and got a slot for us. */ down_read(&slot->rwsem); if (likely(slot->owner == ppt)) { slot->last_used = jiffies; mutex_unlock(&ppt->slot_mutex); return slot; } /* ... but then somebody stole it. */ up_read(&slot->rwsem); } slot = uprobe_take_insn_slot(ppt); mutex_unlock(&ppt->slot_mutex); return slot; } /* * Ensure that ppt owns an instruction slot for single-stepping. * Returns with the slot read-locked and ppt->slot pointing at it. */ static struct uprobe_ssol_slot *uprobe_get_insn_slot(struct uprobe_probept *ppt) { struct uprobe_ssol_slot *slot; retry: slot = ppt->slot; if (unlikely(!slot)) return uprobe_find_insn_slot(ppt); down_read(&slot->rwsem); if (unlikely(slot != ppt->slot)) { up_read(&slot->rwsem); goto retry; } if (unlikely(slot->owner != ppt)) { up_read(&slot->rwsem); return uprobe_find_insn_slot(ppt); } slot->last_used = jiffies; return slot; } /* * utrace engine report callbacks */ /* * We've been asked to quiesce, but aren't in a position to do so. * This could happen in either of the following cases: * * 1) Our own thread is doing a register or unregister operation -- * e.g., as called from a u[ret]probe handler or a non-uprobes utrace * callback. We can't wait_event() for ourselves in [un]register_uprobe(). * * 2) We've been asked to quiesce, but we hit a probepoint first. Now * we're in the report_signal callback, having handled the probepoint. * We'd like to just set the UTRACE_ACTION_QUIESCE and * UTRACE_EVENT(QUIESCE) flags and coast into quiescence. Unfortunately, * it's possible to hit a probepoint again before we quiesce. When * processing the SIGTRAP, utrace would call uprobe_report_quiesce(), * which must decline to take any action so as to avoid removing the * uprobe just hit. As a result, we could keep hitting breakpoints * and never quiescing. * * So here we do essentially what we'd prefer to do in uprobe_report_quiesce(). * If we're the last thread to quiesce, handle_pending_uprobes() and * rouse_all_threads(). Otherwise, pretend we're quiescent and sleep until * the last quiescent thread handles that stuff and then wakes us. * * Called and returns with no mutexes held. Returns 1 if we free utask->uproc, * else 0. */ static int utask_fake_quiesce(struct uprobe_task *utask) { struct uprobe_process *uproc = utask->uproc; enum uprobe_task_state prev_state = utask->state; down_write(&uproc->rwsem); /* In case we're somehow set to quiesce for real... */ clear_utrace_quiesce(utask); if (uproc->n_quiescent_threads == uproc->nthreads-1) { /* We're the last thread to "quiesce." */ handle_pending_uprobes(uproc, utask->tsk); rouse_all_threads(uproc); up_write(&uproc->rwsem); return 0; } else { utask->state = UPTASK_SLEEPING; uproc->n_quiescent_threads++; up_write(&uproc->rwsem); /* We ref-count sleepers. */ uprobe_get_process(uproc); wait_event(uproc->waitq, !utask->quiescing); down_write(&uproc->rwsem); utask->state = prev_state; uproc->n_quiescent_threads--; up_write(&uproc->rwsem); /* * If uproc's last uprobe has been unregistered, and * unregister_uprobe() woke up before we did, it's up * to us to free uproc. */ return uprobe_put_process(uproc); } } /* Prepare to single-step ppt's probed instruction inline. */ static inline void uprobe_pre_ssin(struct uprobe_task *utask, struct uprobe_probept *ppt, struct pt_regs *regs) { int len; arch_reset_ip_for_sstep(regs); mutex_lock(&ppt->ssil_mutex); len = set_orig_insn(ppt, utask->tsk); if (unlikely(len != BP_INSN_SIZE)) { printk("Failed to temporarily restore original " "instruction for single-stepping: " "pid/tgid=%d/%d, vaddr=%#lx\n", utask->tsk->pid, utask->tsk->tgid, ppt->vaddr); utask->doomed = 1; } } /* Prepare to continue execution after single-stepping inline. */ static inline void uprobe_post_ssin(struct uprobe_task *utask, struct uprobe_probept *ppt) { int len = set_bp(ppt, utask->tsk); if (unlikely(len != BP_INSN_SIZE)) { printk("Couldn't restore bp: pid/tgid=%d/%d, addr=%#lx\n", utask->tsk->pid, utask->tsk->tgid, ppt->vaddr); ppt->state = UPROBE_DISABLED; } mutex_unlock(&ppt->ssil_mutex); } /* uprobe_pre_ssout() and uprobe_post_ssout() are architecture-specific. */ /* * Delay delivery of the indicated signal until after single-step. * Otherwise single-stepping will be cancelled as part of calling * the signal handler. */ static u32 uprobe_delay_signal(struct uprobe_task *utask, siginfo_t *info) { struct delayed_signal *ds = kmalloc(sizeof(*ds), GFP_USER); if (ds) { ds->info = *info; INIT_LIST_HEAD(&ds->list); list_add_tail(&ds->list, &utask->delayed_signals); } return UTRACE_ACTION_HIDE | UTRACE_SIGNAL_IGN | UTRACE_ACTION_SINGLESTEP | UTRACE_ACTION_NEWSTATE; } static void uprobe_inject_delayed_signals(struct list_head *delayed_signals) { struct delayed_signal *ds, *tmp; list_for_each_entry_safe(ds, tmp, delayed_signals, list) { send_sig_info(ds->info.si_signo, &ds->info, current); list_del(&ds->list); kfree(ds); } } /* * Signal callback: * * We get called here with: * state = UPTASK_RUNNING => we are here due to a breakpoint hit * - Read-lock the process * - Figure out which probepoint, based on regs->IP * - Set state = UPTASK_BP_HIT * - Reset regs->IP to beginning of the insn, if necessary * - Invoke handler for each uprobe at this probepoint * - Set singlestep in motion (UTRACE_ACTION_SINGLESTEP), * with state = UPTASK_SSTEP * * state = UPTASK_SSTEP => here after single-stepping * - Validate we are here per the state machine * - Clean up after single-stepping * - Set state = UPTASK_RUNNING * - Read-unlock the process * - If it's time to quiesce, take appropriate action. * - If the handler(s) we ran called [un]register_uprobe(), * complete those via uprobe_run_def_regs(). * * state = ANY OTHER STATE * - Not our signal, pass it on (UTRACE_ACTION_RESUME) * Note: Intermediate states such as UPTASK_POST_SSTEP help * uprobe_report_exit() decide what to unlock if we die. */ static u32 uprobe_report_signal(struct utrace_attached_engine *engine, struct task_struct *tsk, struct pt_regs *regs, u32 action, siginfo_t *info, const struct k_sigaction *orig_ka, struct k_sigaction *return_ka) { struct uprobe_task *utask; struct uprobe_probept *ppt; struct uprobe_process *uproc; struct uprobe_kimg *uk; u32 ret; unsigned long probept; int hit_uretprobe_trampoline = 0; int registrations_deferred = 0; int uproc_freed = 0; struct list_head delayed_signals; utask = (struct uprobe_task *)rcu_dereference(engine->data); BUG_ON(!utask); /* * info will be null if we're called with action=UTRACE_SIGNAL_HANDLER, * which means that single-stepping has been disabled so a signal * handler can be called in the probed process. That should never * happen because we intercept and delay handled signals (action = * UTRACE_ACTION_RESUME) until after we're done single-stepping. */ BUG_ON(!info); if (action == UTRACE_ACTION_RESUME && utask->active_probe && info->si_signo != SSTEP_SIGNAL) return uprobe_delay_signal(utask, info); if (info->si_signo != BREAKPOINT_SIGNAL && info->si_signo != SSTEP_SIGNAL) goto no_interest; /* * Set up the SSOL area if it's not already there. We do this * here because we have to do it before handling the first * probepoint hit, the probed process has to do it, and this may * be the first time our probed process runs uprobes code. * * We need the SSOL area for the uretprobe trampoline even if * this architectures doesn't single-step out of line. */ uproc = utask->uproc; #ifdef CONFIG_UPROBES_SSOL if (uproc->sstep_out_of_line && unlikely(IS_ERR(uprobe_verify_ssol(uproc)))) uproc->sstep_out_of_line = 0; #elif defined(CONFIG_URETPROBES) (void) uprobe_verify_ssol(uproc); #endif switch (utask->state) { case UPTASK_RUNNING: if (info->si_signo != BREAKPOINT_SIGNAL) goto no_interest; down_read(&uproc->rwsem); clear_utrace_quiesce(utask); probept = arch_get_probept(regs); hit_uretprobe_trampoline = (probept == (unsigned long) uproc->uretprobe_trampoline_addr); if (hit_uretprobe_trampoline) { uretprobe_handle_return(regs, utask); goto bkpt_done; } ppt = uprobe_find_probept(uproc, probept); if (!ppt) { up_read(&uproc->rwsem); goto no_interest; } utask->active_probe = ppt; utask->state = UPTASK_BP_HIT; if (likely(ppt->state == UPROBE_BP_SET)) { list_for_each_entry(uk, &ppt->uprobe_list, list) { struct uprobe *u = uk->uprobe; if (is_uretprobe(u)) uretprobe_handle_entry(u, regs, utask); else if (u->handler) u->handler(u, regs); } } if (uprobe_emulate_insn(regs, ppt)) goto bkpt_done; utask->state = UPTASK_PRE_SSTEP; #ifdef CONFIG_UPROBES_SSOL if (uproc->sstep_out_of_line) uprobe_pre_ssout(utask, ppt, regs); else #endif uprobe_pre_ssin(utask, ppt, regs); if (unlikely(utask->doomed)) do_exit(SIGSEGV); utask->state = UPTASK_SSTEP; /* * No other engines must see this signal, and the * signal shouldn't be passed on either. */ ret = UTRACE_ACTION_HIDE | UTRACE_SIGNAL_IGN | UTRACE_ACTION_SINGLESTEP | UTRACE_ACTION_NEWSTATE; break; case UPTASK_SSTEP: if (info->si_signo != SSTEP_SIGNAL) goto no_interest; ppt = utask->active_probe; BUG_ON(!ppt); utask->state = UPTASK_POST_SSTEP; #ifdef CONFIG_UPROBES_SSOL if (uproc->sstep_out_of_line) uprobe_post_ssout(utask, ppt, regs); else #endif uprobe_post_ssin(utask, ppt); bkpt_done: /* Note: Can come here after running uretprobe handlers */ if (unlikely(utask->doomed)) do_exit(SIGSEGV); utask->active_probe = NULL; if (!list_empty(&utask->deferred_registrations)) { /* * Make sure utask doesn't go away before we run * the deferred registrations. This also keeps * the module from getting unloaded before we're * ready. */ registrations_deferred = 1; uprobe_get_process(uproc); } /* * Delayed signals are a little different. We want * them delivered even if all the probes get unregistered * and uproc and utask go away. So disconnect the list * from utask and make it a local list. */ INIT_LIST_HEAD(&delayed_signals); list_splice_init(&utask->delayed_signals, &delayed_signals); ret = UTRACE_ACTION_HIDE | UTRACE_SIGNAL_IGN | UTRACE_ACTION_NEWSTATE; utask->state = UPTASK_RUNNING; if (utask->quiescing) { up_read(&uproc->rwsem); uproc_freed |= utask_fake_quiesce(utask); } else up_read(&uproc->rwsem); if (hit_uretprobe_trampoline) /* * It's possible that the uretprobe_instance * we just recycled was the last reason for * keeping uproc around. */ uproc_freed |= uprobe_put_process(uproc); if (registrations_deferred) { uprobe_run_def_regs(&utask->deferred_registrations); uproc_freed |= uprobe_put_process(uproc); } uprobe_inject_delayed_signals(&delayed_signals); if (uproc_freed) ret |= UTRACE_ACTION_DETACH; break; default: goto no_interest; } return ret; no_interest: return UTRACE_ACTION_RESUME; } /* * utask_quiesce_pending_sigtrap: The utask entered the quiesce callback * through the signal delivery path, apparently. Check if the associated * signal happened due to a uprobe hit. * * Called with utask->uproc write-locked. Returns 1 if quiesce was * entered with SIGTRAP pending due to a uprobe hit. */ static int utask_quiesce_pending_sigtrap(struct uprobe_task *utask) { const struct utrace_regset_view *view; const struct utrace_regset *regset; struct uprobe_probept *ppt; unsigned long insn_ptr; view = utrace_native_view(utask->tsk); regset = utrace_regset(utask->tsk, utask->engine, view, 0); if (unlikely(regset == NULL)) return -EIO; if ((*regset->get)(utask->tsk, regset, SLOT_IP(utask->tsk) * regset->size, regset->size, &insn_ptr, NULL) != 0) return -EIO; if (regset->size != sizeof(insn_ptr)) { /* Assume 32-bit app and 64-bit kernel. */ u32 *insn_ptr32 = (u32*) &insn_ptr; BUG_ON(regset->size != sizeof(u32)); insn_ptr = *insn_ptr32; } ppt = uprobe_find_probept(utask->uproc, ARCH_BP_INST_PTR(insn_ptr)); return (ppt != NULL); } /* * Quiesce callback: The associated process has one or more breakpoint * insertions or removals pending. If we're the last thread in this * process to quiesce, do the insertion(s) and/or removal(s). */ static u32 uprobe_report_quiesce(struct utrace_attached_engine *engine, struct task_struct *tsk) { struct uprobe_task *utask; struct uprobe_process *uproc; rcu_read_lock(); utask = (struct uprobe_task *)rcu_dereference(engine->data); BUG_ON(!utask); uproc = uprobe_get_process(utask->uproc); rcu_read_unlock(); if (!uproc) return UTRACE_ACTION_DETACH|UTRACE_ACTION_RESUME; if (current == utask->quiesce_master) { /* * tsk was already quiescent when quiesce_all_threads() * called utrace_set_flags(), which in turned called * here. uproc is already locked. Do as little as possible * and get out. */ utask->state = UPTASK_QUIESCENT; uproc->n_quiescent_threads++; return UTRACE_ACTION_RESUME; } BUG_ON(utask->active_probe); down_write(&uproc->rwsem); /* printk(KERN_INFO "uprobe_report_quiesce2 %p %ld=%ld\n", uproc, uproc->tgid, current->pid); */ /* * When a thread hits a breakpoint or single-steps, utrace calls * this quiesce callback before our signal callback. We must * let uprobe_report_signal() handle the uprobe hit and THEN * quiesce, because (a) there's a chance that we're quiescing * in order to remove that very uprobe, and (b) there's a tiny * chance that even though that uprobe isn't marked for removal * now, it may be before all threads manage to quiesce. */ if (!utask->quiescing || utask_quiesce_pending_sigtrap(utask) == 1) { clear_utrace_quiesce(utask); goto done; } utask->state = UPTASK_QUIESCENT; uproc->n_quiescent_threads++; check_uproc_quiesced(uproc, tsk); done: up_write(&uproc->rwsem); uprobe_put_process(uproc); /* printk(KERN_INFO "uprobe_report_quiesce3 %p %ld=%ld\n", uproc, uproc->tgid, current->pid); */ return UTRACE_ACTION_RESUME; } /* * uproc's process is exiting or exec-ing, so zap all the (now irrelevant) * probepoints and uretprobe_instances. Runs with uproc->rwsem write-locked. * Caller must ref-count uproc before calling this function, to ensure that * uproc doesn't get freed in the middle of this. */ static void uprobe_cleanup_process(struct uprobe_process *uproc) { int i; struct uprobe_probept *ppt; struct hlist_node *pnode1, *pnode2; struct hlist_head *head; struct uprobe_kimg *uk, *unode; struct uprobe_task *utask; uproc->finished = 1; for (i = 0; i < UPROBE_TABLE_SIZE; i++) { head = &uproc->uprobe_table[i]; hlist_for_each_entry_safe(ppt, pnode1, pnode2, head, ut_node) { if (ppt->state == UPROBE_INSERTING || ppt->state == UPROBE_REMOVING) { /* * This task is (exec/exit)ing with * a [un]register_uprobe pending. * [un]register_uprobe will free ppt. */ ppt->state = UPROBE_DISABLED; list_del(&ppt->pd_node); list_for_each_entry_safe(uk, unode, &ppt->uprobe_list, list) uk->status = -ESRCH; wake_up_all(&ppt->waitq); } else if (ppt->state == UPROBE_BP_SET) { list_for_each_entry_safe(uk, unode, &ppt->uprobe_list, list) { list_del(&uk->list); uprobe_free_kimg(uk); } uprobe_free_probept(ppt); /* else */ /* * If ppt is UPROBE_DISABLED, assume that * [un]register_uprobe() has been notified * and will free it soon. */ } } } /* * Free uretprobe_instances. This is a nop on exit, since all * the uprobe_tasks are already gone. We do this here on exec * (as opposed to letting uprobe_free_process() take care of it) * because uprobe_free_process() never gets called if we don't * tick down the ref count here (PR #7082). */ list_for_each_entry(utask, &uproc->thread_list, list) utask_free_uretprobe_instances(utask); } /* * Exit callback: The associated task/thread is exiting. */ static u32 uprobe_report_exit(struct utrace_attached_engine *engine, struct task_struct *tsk, long orig_code, long *code) { struct uprobe_task *utask; struct uprobe_process *uproc = NULL; struct uprobe_probept *ppt; int utask_quiescing; utask = (struct uprobe_task *)rcu_dereference(engine->data); if (utask) uproc = uprobe_get_process(utask->uproc); if (!utask || !uproc) /* uprobe_free_process() has probably clobbered utask->proc. */ return UTRACE_ACTION_DETACH; /* printk(KERN_INFO "uprobe_report_exit %p %ld=%ld\n", uproc, uproc->tgid, current->pid); */ ppt = utask->active_probe; if (ppt) { if (utask->state == UPTASK_TRAMPOLINE_HIT) printk(KERN_WARNING "Task died during uretprobe return:" " pid/tgid = %d/%d\n", tsk->pid, tsk->tgid); else printk(KERN_WARNING "Task died at uprobe probepoint:" " pid/tgid = %d/%d, probepoint = %#lx\n", tsk->pid, tsk->tgid, ppt->vaddr); /* Mutex cleanup depends on where we died and SSOL vs. SSIL. */ if (uproc->sstep_out_of_line) { if (utask->state == UPTASK_SSTEP && ppt->slot && ppt->slot->owner == ppt) up_read(&ppt->slot->rwsem); } else { switch (utask->state) { case UPTASK_PRE_SSTEP: case UPTASK_SSTEP: case UPTASK_POST_SSTEP: mutex_unlock(&ppt->ssil_mutex); break; default: break; } } up_read(&uproc->rwsem); if (utask->state == UPTASK_TRAMPOLINE_HIT) uprobe_decref_process(uproc); } down_write(&uproc->rwsem); /* printk(KERN_INFO "uprobe_report_exit2 %p %ld=%ld\n", uproc, uproc->tgid, current->pid); */ utask_quiescing = utask->quiescing; uprobe_free_task(utask); uproc->nthreads--; if (uproc->nthreads) { if (utask_quiescing) /* * In case other threads are waiting for * us to quiesce... */ check_uproc_quiesced(uproc, find_surviving_thread(uproc)); } else { /* * We were the last remaining thread - clean up the uprobe * remnants a la unregister_uprobe(). We don't have to * remove the breakpoints, though. */ uprobe_cleanup_process(uproc); } up_write(&uproc->rwsem); /* printk(KERN_INFO "uprobe_report_exit3 %p %ld=%ld\n", uproc, uproc->tgid, current->pid); */ uprobe_put_process(uproc); return UTRACE_ACTION_DETACH; } /* * Duplicate the FIFO of uretprobe_instances from parent_utask into * child_utask. Zap the uretprobe pointer, since all we care about is * vectoring to the proper return address. Where there are multiple * uretprobe_instances for the same function instance, copy only the * one that contains the real return address. */ static int uprobe_fork_uretprobe_instances(struct uprobe_task *parent_utask, struct uprobe_task *child_utask) { struct uprobe_process *parent_uproc = parent_utask->uproc; struct uprobe_process *child_uproc = child_utask->uproc; __user uprobe_opcode_t *trampoline_addr = child_uproc->uretprobe_trampoline_addr; struct hlist_node *tmp, *tail; struct uretprobe_instance *pri, *cri; BUG_ON(trampoline_addr != parent_uproc->uretprobe_trampoline_addr); /* Since there's no hlist_add_tail()... */ tail = NULL; hlist_for_each_entry(pri, tmp, &parent_utask->uretprobe_instances, hlist) { if (pri->ret_addr == (unsigned long) trampoline_addr) continue; cri = kmalloc(sizeof(*cri), GFP_USER); if (!cri) return -ENOMEM; cri->rp = NULL; cri->ret_addr = pri->ret_addr; cri->sp = pri->sp; INIT_HLIST_NODE(&cri->hlist); if (tail) hlist_add_after(tail, &cri->hlist); else hlist_add_head(&cri->hlist, &child_utask->uretprobe_instances); tail = &cri->hlist; /* Ref-count uretprobe_instances. */ uprobe_get_process(child_uproc); } BUG_ON(hlist_empty(&child_utask->uretprobe_instances)); return 0; } /* * A probed process is forking, and at least one function in the * call stack has a uretprobe on it. Since the child inherits the * call stack, it's possible that the child could attempt to return * through the uretprobe trampoline. Create a uprobe_process for * the child, initialize its SSOL vma (which has been cloned from * the parent), and clone the parent's list of uretprobe_instances. * * Called with uproc_table locked and parent_uproc->rwsem write-locked. * * (On architectures where it's easy to keep track of where in the * stack the return addresses are stored, we could just poke the real * return addresses back into the child's stack. We use this more * general solution.) */ static int uprobe_fork_uproc(struct uprobe_process *parent_uproc, struct uprobe_task *parent_utask, struct task_struct *child_tsk) { int ret = 0; struct uprobe_process *child_uproc; struct uprobe_task *child_utask; BUG_ON(!parent_uproc->uretprobe_trampoline_addr || IS_ERR(parent_uproc->uretprobe_trampoline_addr)); ret = uprobe_validate_vma(child_tsk, (unsigned long) parent_uproc->ssol_area.insn_area); if (ret) { int ret2; printk(KERN_ERR "uprobes: Child %d failed to inherit" " parent %d's SSOL vma at %p. Error = %d\n", child_tsk->pid, parent_utask->tsk->pid, parent_uproc->ssol_area.insn_area, ret); ret2 = uprobe_validate_vma(parent_utask->tsk, (unsigned long) parent_uproc->ssol_area.insn_area); if (ret2 != 0) printk(KERN_ERR "uprobes: Parent %d's SSOL vma" " is no longer valid. Error = %d\n", parent_utask->tsk->pid, ret2); return ret; } if (!try_module_get(THIS_MODULE)) return -ENOSYS; child_uproc = uprobe_mk_process(child_tsk, 1); if (IS_ERR(child_uproc)) { ret = (int) PTR_ERR(child_uproc); module_put(THIS_MODULE); return ret; } mutex_lock(&child_uproc->ssol_area.setup_mutex); uprobe_init_ssol(child_uproc, child_tsk, parent_uproc->ssol_area.insn_area); child_uproc->ssol_area.initialized = 1; mutex_unlock(&child_uproc->ssol_area.setup_mutex); child_utask = uprobe_find_utask(child_tsk); BUG_ON(!child_utask); ret = uprobe_fork_uretprobe_instances(parent_utask, child_utask); hlist_add_head(&child_uproc->hlist, &uproc_table[hash_long(child_uproc->tgid, UPROBE_HASH_BITS)]); uprobe_decref_process(child_uproc); return ret; } /* * Clone callback: The current task has spawned a thread/process. * * NOTE: For now, we don't pass on uprobes from the parent to the * child. We now do the necessary clearing of breakpoints in the * child's address space. * * TODO: * - Provide option for child to inherit uprobes. */ static u32 uprobe_report_clone(struct utrace_attached_engine *engine, struct task_struct *parent, unsigned long clone_flags, struct task_struct *child) { int len; struct uprobe_process *uproc; struct uprobe_task *ptask, *ctask; ptask = (struct uprobe_task *)rcu_dereference(engine->data); uproc = ptask->uproc; /* printk(KERN_INFO "uprobe_report_clone %p %ld=%ld\n", uproc, uproc->tgid, current->pid); */ /* * Lock uproc so no new uprobes can be installed 'til all * report_clone activities are completed. Lock uproc_table * in case we have to run uprobe_fork_uproc(). */ lock_uproc_table(); down_write(&uproc->rwsem); get_task_struct(child); /* printk(KERN_INFO "uprobe_report_clone2 %p %ld=%ld\n", uproc, uproc->tgid, current->pid); */ if (clone_flags & (CLONE_THREAD|CLONE_VM)) { /* New thread in the same process (CLONE_THREAD) or * processes sharing the same memory space (CLONE_VM). */ ctask = uprobe_add_task(child, uproc); BUG_ON(!ctask); if (IS_ERR(ctask)) goto done; uproc->nthreads++; /* * FIXME: Handle the case where uproc is quiescing * (assuming it's possible to clone while quiescing). */ } else { /* * New process spawned by parent. Remove the probepoints * in the child's text. * * Its not necessary to quiesce the child as we are assured * by utrace that this callback happens *before* the child * gets to run userspace. * * We also hold the uproc->rwsem for the parent - so no * new uprobes will be registered 'til we return. */ int i; struct uprobe_probept *ppt; struct hlist_node *node; struct hlist_head *head; for (i = 0; i < UPROBE_TABLE_SIZE; i++) { head = &uproc->uprobe_table[i]; hlist_for_each_entry(ppt, node, head, ut_node) { len = set_orig_insn(ppt, child); if (len != BP_INSN_SIZE) { /* Ratelimit this? */ printk(KERN_ERR "Pid %d forked %d;" " failed to remove probepoint" " at %#lx in child\n", parent->pid, child->pid, ppt->vaddr); } } } if (!hlist_empty(&ptask->uretprobe_instances)) (void) uprobe_fork_uproc(uproc, ptask, child); } done: put_task_struct(child); up_write(&uproc->rwsem); unlock_uproc_table(); return UTRACE_ACTION_RESUME; } /* * Exec callback: The associated process called execve() or friends * * The new program is about to start running and so there is no * possibility of a uprobe from the previous user address space * to be hit. * * NOTE: * Typically, this process would have passed through the clone * callback, where the necessary action *should* have been * taken. However, if we still end up at this callback: * - We don't have to clear the uprobes - memory image * will be overlaid. * - We have to free up uprobe resources associated with * this process. */ static u32 uprobe_report_exec(struct utrace_attached_engine *engine, struct task_struct *tsk, const struct linux_binprm *bprm, struct pt_regs *regs) { struct uprobe_process *uproc = NULL; struct uprobe_task *utask; u32 ret = UTRACE_ACTION_RESUME; utask = (struct uprobe_task *)rcu_dereference(engine->data); if (utask) uproc = uprobe_get_process(utask->uproc); if (!utask || !uproc) /* uprobe_free_process() has probably clobbered utask->proc. */ return UTRACE_ACTION_DETACH; /* printk(KERN_INFO "uprobe_report_exec %p %ld=%ld\n", uproc, uproc->tgid, current->pid); */ /* * Only cleanup if we're the last thread. If we aren't, * uprobe_report_exit() will handle cleanup. * * One instance of this can happen if vfork() was called, * creating 2 tasks that share the same memory space * (CLONE_VFORK|CLONE_VM). In this case we don't want to * remove the probepoints from the child, since that would * also remove them from the parent. Instead, just detach * as if this were a simple thread exit. */ down_write(&uproc->rwsem); if (uproc->nthreads == 1) { uprobe_cleanup_process(uproc); /* * If [un]register_uprobe() is in progress, cancel the * quiesce. Otherwise, utrace_report_exec() might * call uprobe_report_exec() while the * [un]register_uprobe thread is freeing the uproc. */ clear_utrace_quiesce(utask); } else { uprobe_free_task(utask); uproc->nthreads--; ret = UTRACE_ACTION_DETACH; } up_write(&uproc->rwsem); /* printk(KERN_INFO "uprobe_report_exec2 %p %ld=%ld\n", uproc, uproc->tgid, current->pid); */ /* If any [un]register_uprobe is pending, it'll clean up. */ if (uprobe_put_process(uproc)) ret = UTRACE_ACTION_DETACH; /* printk(KERN_INFO "uprobe_report_exec4 %p %ld=%ld ret=%lu\n", uproc, uproc->tgid, current->pid, (unsigned long)ret); */ return ret; } static const struct utrace_engine_ops uprobe_utrace_ops = { .report_quiesce = uprobe_report_quiesce, .report_signal = uprobe_report_signal, .report_exit = uprobe_report_exit, .report_clone = uprobe_report_clone, .report_exec = uprobe_report_exec }; static int __init init_uprobes(void) { int i; for (i = 0; i < UPROBE_TABLE_SIZE; i++) { INIT_HLIST_HEAD(&uproc_table[i]); INIT_HLIST_HEAD(&utask_table[i]); } p_uprobe_utrace_ops = &uprobe_utrace_ops; return 0; } static void __exit exit_uprobes(void) { } module_init(init_uprobes); module_exit(exit_uprobes); #ifdef CONFIG_URETPROBES /* Returns true if ri_sp lies outside the stack (beyond cursp). */ static inline bool compare_stack_ptrs(unsigned long cursp, unsigned long ri_sp) { #ifdef CONFIG_STACK_GROWSUP if (cursp < ri_sp) return true; #else if (cursp > ri_sp) return true; #endif return false; } /* * A longjmp may cause one or more uretprobed functions to terminate without * returning. Those functions' uretprobe_instances need to be recycled. * We detect this when any uretprobed function is subsequently called * or returns. A bypassed uretprobe_instance's stack_ptr is beyond the * current stack. */ static inline void uretprobe_bypass_instances(unsigned long cursp, struct uprobe_task *utask) { struct hlist_node *r1, *r2; struct uretprobe_instance *ri; struct hlist_head *head = &utask->uretprobe_instances; hlist_for_each_entry_safe(ri, r1, r2, head, hlist) { if (compare_stack_ptrs(cursp, ri->sp)) { hlist_del(&ri->hlist); kfree(ri); uprobe_decref_process(utask->uproc); } else return; } } /* Called when the entry-point probe u is hit. */ static void uretprobe_handle_entry(struct uprobe *u, struct pt_regs *regs, struct uprobe_task *utask) { struct uretprobe_instance *ri; unsigned long trampoline_addr; if (IS_ERR(utask->uproc->uretprobe_trampoline_addr)) return; trampoline_addr = (unsigned long) utask->uproc->uretprobe_trampoline_addr; ri = (struct uretprobe_instance *) kmalloc(sizeof(struct uretprobe_instance), GFP_USER); if (!ri) return; ri->ret_addr = arch_hijack_uret_addr(trampoline_addr, regs, utask); if (likely(ri->ret_addr)) { ri->sp = arch_predict_sp_at_ret(regs, utask->tsk); uretprobe_bypass_instances(ri->sp, utask); ri->rp = container_of(u, struct uretprobe, u); INIT_HLIST_NODE(&ri->hlist); hlist_add_head(&ri->hlist, &utask->uretprobe_instances); /* We ref-count outstanding uretprobe_instances. */ uprobe_get_process(utask->uproc); } else kfree(ri); } /* * For each uretprobe_instance pushed onto the LIFO for the function * instance that's now returning, call the handler, free the ri, and * decrement the uproc's ref count. Caller ref-counts uproc, so we * should never hit zero in this function. * * Returns the original return address. * * TODO: Handle longjmp out of uretprobed function. */ static unsigned long uretprobe_run_handlers(struct uprobe_task *utask, struct pt_regs *regs, unsigned long trampoline_addr) { unsigned long ret_addr, cur_sp; struct hlist_head *head = &utask->uretprobe_instances; struct uretprobe_instance *ri; struct hlist_node *r1, *r2; cur_sp = arch_get_cur_sp(regs); uretprobe_bypass_instances(cur_sp, utask); hlist_for_each_entry_safe(ri, r1, r2, head, hlist) { if (ri->rp && ri->rp->handler) ri->rp->handler(ri, regs); ret_addr = ri->ret_addr; hlist_del(&ri->hlist); kfree(ri); uprobe_decref_process(utask->uproc); if (ret_addr != trampoline_addr) /* * This is the first ri (chronologically) pushed for * this particular instance of the probed function. */ return ret_addr; } printk(KERN_ERR "No uretprobe instance with original return address!" " pid/tgid=%d/%d", utask->tsk->pid, utask->tsk->tgid); utask->doomed = 1; return 0; } /* Called when the uretprobe trampoline is hit. */ static void uretprobe_handle_return(struct pt_regs *regs, struct uprobe_task *utask) { unsigned long orig_ret_addr; /* Delay recycling of uproc until end of uprobe_report_signal() */ uprobe_get_process(utask->uproc); utask->state = UPTASK_TRAMPOLINE_HIT; utask->active_probe = &uretprobe_trampoline_dummy_probe; orig_ret_addr = uretprobe_run_handlers(utask, regs, (unsigned long) utask->uproc->uretprobe_trampoline_addr); arch_restore_uret_addr(orig_ret_addr, regs); } int register_uretprobe(struct uretprobe *rp) { if (!rp || !rp->handler) return -EINVAL; rp->u.handler = URETPROBE_HANDLE_ENTRY; return register_uprobe(&rp->u); } EXPORT_SYMBOL_GPL(register_uretprobe); /* * The uretprobe containing u is being unregistered. Its uretprobe_instances * have to hang around 'til their associated instances return (but we can't * run rp's handler). Zap ri->rp for each one to indicate unregistration. * * Runs with uproc write-locked. */ static void zap_uretprobe_instances(struct uprobe *u, struct uprobe_process *uproc) { struct uprobe_task *utask; struct uretprobe *rp = container_of(u, struct uretprobe, u); if (!uproc) return; list_for_each_entry(utask, &uproc->thread_list, list) { struct hlist_node *r; struct uretprobe_instance *ri; hlist_for_each_entry(ri, r, &utask->uretprobe_instances, hlist) if (ri->rp == rp) ri->rp = NULL; } } void unregister_uretprobe(struct uretprobe *rp) { if (!rp) return; unregister_uprobe(&rp->u); } EXPORT_SYMBOL_GPL(unregister_uretprobe); void unmap_uretprobe(struct uretprobe *rp) { if (!rp) return; unmap_uprobe(&rp->u); } EXPORT_SYMBOL_GPL(unmap_uretprobe); /* * uproc->ssol_area has been successfully set up. Establish the * uretprobe trampoline in slot 0. */ static void uretprobe_set_trampoline(struct uprobe_process *uproc, struct task_struct *tsk) { uprobe_opcode_t bp_insn = BREAKPOINT_INSTRUCTION; struct uprobe_ssol_area *area = &uproc->ssol_area; struct uprobe_ssol_slot *slot = &area->slots[0]; if (access_process_vm(tsk, (unsigned long) slot->insn, &bp_insn, BP_INSN_SIZE, 1) == BP_INSN_SIZE) { uproc->uretprobe_trampoline_addr = slot->insn; slot->state = SSOL_RESERVED; area->next_slot = 1; area->nfree--; } else { printk(KERN_ERR "uretprobes disabled for pid %d:" " cannot set uretprobe trampoline at %p\n", uproc->tgid, slot->insn); } } static inline unsigned long lookup_uretprobe(struct hlist_node *r, struct uprobe_process *uproc, unsigned long pc, unsigned long sp) { struct uretprobe_instance *ret_inst; unsigned long trampoline_addr; if (IS_ERR(uproc->uretprobe_trampoline_addr)) return pc; trampoline_addr = (unsigned long)uproc->uretprobe_trampoline_addr; if (pc != trampoline_addr) return pc; hlist_for_each_entry_from(ret_inst, r, hlist) { if (ret_inst->ret_addr == trampoline_addr) continue; /* First handler with a stack pointer lower than the address (or equal) must be the one. */ if (ret_inst->sp == sp || compare_stack_ptrs(ret_inst->sp, sp)) return ret_inst->ret_addr; } printk(KERN_ERR "Original return address for trampoline not found at " "0x%lx pid/tgid=%d/%d\n", sp, current->pid, current->tgid); return 0; } unsigned long uprobe_get_pc(struct uretprobe_instance *ri, unsigned long pc, unsigned long sp) { struct uretprobe *rp; struct uprobe_kimg *uk; struct uprobe_task *utask; struct uprobe_process *uproc; struct hlist_node *r; if (ri == GET_PC_URETPROBE_NONE) { utask = uprobe_find_utask(current); if (!utask) return 0; uproc = utask->uproc; r = utask->uretprobe_instances.first; } else { rp = ri->rp; uk = (struct uprobe_kimg *)rp->u.kdata; if (!uk) return 0; uproc = uk->ppt->uproc; r = &ri->hlist; } return lookup_uretprobe(r, uproc, pc, sp); } EXPORT_SYMBOL_GPL(uprobe_get_pc); unsigned long uprobe_get_pc_task(struct task_struct *task, unsigned long pc, unsigned long sp) { struct uprobe_task *utask; struct uprobe_process *uproc; unsigned long result; utask = uprobe_find_utask(task); if (!utask) { return pc; } else if (current == task && utask->active_probe) { /* everything's locked. */ return uprobe_get_pc(GET_PC_URETPROBE_NONE, pc, sp); } uproc = utask->uproc; down_read(&uproc->rwsem); result = lookup_uretprobe(utask->uretprobe_instances.first, uproc, pc, sp); up_read(&uproc->rwsem); return result; } EXPORT_SYMBOL_GPL(uprobe_get_pc_task); #else /* ! CONFIG_URETPROBES */ static void uretprobe_handle_entry(struct uprobe *u, struct pt_regs *regs, struct uprobe_task *utask) { } static void uretprobe_handle_return(struct pt_regs *regs, struct uprobe_task *utask) { } static void uretprobe_set_trampoline(struct uprobe_process *uproc, struct task_struct *tsk) { } static void zap_uretprobe_instances(struct uprobe *u, struct uprobe_process *uproc) { } #endif /* CONFIG_URETPROBES */ #ifdef NO_ACCESS_PROCESS_VM_EXPORT /* * Some kernel versions export everything that uprobes.ko needs except * access_process_vm, so we copied and pasted it here. Fortunately, * everything it calls is exported. */ #include #include static int __access_process_vm(struct task_struct *tsk, unsigned long addr, void *buf, int len, int write) { struct mm_struct *mm; struct vm_area_struct *vma; struct page *page; void *old_buf = buf; mm = get_task_mm(tsk); if (!mm) return 0; down_read(&mm->mmap_sem); /* ignore errors, just check how much was successfully transfered */ while (len) { int bytes, ret, offset; void *maddr; ret = get_user_pages(tsk, mm, addr, 1, write, 1, &page, &vma); if (ret <= 0) break; bytes = len; offset = addr & (PAGE_SIZE-1); if (bytes > PAGE_SIZE-offset) bytes = PAGE_SIZE-offset; maddr = kmap(page); if (write) { copy_to_user_page(vma, page, addr, maddr + offset, buf, bytes); set_page_dirty_lock(page); } else { copy_from_user_page(vma, page, addr, buf, maddr + offset, bytes); } kunmap(page); page_cache_release(page); len -= bytes; buf += bytes; addr += bytes; } up_read(&mm->mmap_sem); mmput(mm); return buf - old_buf; } #endif #include "uprobes_arch.c" MODULE_LICENSE("GPL"); #endif /* uprobes 1 (based on original utrace) */