// SPDX-License-Identifier: GPL-2.0 /* * Copyright (C) 2014 Red Hat Inc, Steven Rostedt * */ /** FIXME: Convert numbers based on machine and file */ #define _LARGEFILE64_SOURCE #include #include #include #ifndef NO_AUDIT #include #endif #include "trace-local.h" #include "trace-hash.h" #include "trace-hash-local.h" #include "list.h" #include #ifdef WARN_NO_AUDIT # warning "lib audit not found, using raw syscalls " \ "(install libaudit-devel(for fedora) or libaudit-dev(for debian/ubuntu) and try again)" #endif #define TASK_STATE_TO_CHAR_STR "RSDTtXZxKWP" #define TASK_STATE_MAX 1024 #define task_from_item(item) container_of(item, struct task_data, hash) #define start_from_item(item) container_of(item, struct start_data, hash) #define event_from_item(item) container_of(item, struct event_hash, hash) #define stack_from_item(item) container_of(item, struct stack_data, hash) #define group_from_item(item) container_of(item, struct group_data, hash) #define event_data_from_item(item) container_of(item, struct event_data, hash) static unsigned long long nsecs_per_sec(unsigned long long ts) { return ts / NSEC_PER_SEC; } static unsigned long long mod_to_usec(unsigned long long ts) { return ((ts % NSEC_PER_SEC) + NSEC_PER_USEC / 2) / NSEC_PER_USEC; } struct handle_data; struct event_hash; struct event_data; typedef void (*event_data_print)(struct trace_seq *s, struct event_hash *hash); typedef int (*handle_event_func)(struct handle_data *h, unsigned long long pid, struct event_data *data, struct tep_record *record, int cpu); enum event_data_type { EVENT_TYPE_UNDEFINED, EVENT_TYPE_STACK, EVENT_TYPE_SCHED_SWITCH, EVENT_TYPE_WAKEUP, EVENT_TYPE_FUNC, EVENT_TYPE_SYSCALL, EVENT_TYPE_IRQ, EVENT_TYPE_SOFTIRQ, EVENT_TYPE_SOFTIRQ_RAISE, EVENT_TYPE_PROCESS_EXEC, EVENT_TYPE_USER_MATE, }; struct event_data { struct trace_hash_item hash; int id; int trace; struct tep_event *event; struct event_data *end; struct event_data *start; struct tep_format_field *pid_field; struct tep_format_field *start_match_field; /* match with start */ struct tep_format_field *end_match_field; /* match with end */ struct tep_format_field *data_field; /* optional */ event_data_print print_func; handle_event_func handle_event; void *private; int migrate; /* start/end pairs can migrate cpus */ int global; /* use global tasks */ enum event_data_type type; }; struct stack_data { struct trace_hash_item hash; unsigned long long count; unsigned long long time; unsigned long long time_min; unsigned long long ts_min; unsigned long long time_max; unsigned long long ts_max; unsigned long long time_avg; unsigned long size; char caller[]; }; struct stack_holder { unsigned long size; void *caller; struct tep_record *record; }; struct start_data { struct trace_hash_item hash; struct event_data *event_data; struct list_head list; struct task_data *task; unsigned long long timestamp; unsigned long long search_val; unsigned long long val; int cpu; struct stack_holder stack; }; struct event_hash { struct trace_hash_item hash; struct event_data *event_data; unsigned long long search_val; unsigned long long val; unsigned long long count; unsigned long long time_total; unsigned long long time_avg; unsigned long long time_max; unsigned long long ts_max; unsigned long long time_min; unsigned long long ts_min; unsigned long long time_std; unsigned long long last_time; struct trace_hash stacks; }; struct group_data { struct trace_hash_item hash; char *comm; struct trace_hash event_hash; }; struct task_data { struct trace_hash_item hash; int pid; int sleeping; char *comm; struct trace_hash start_hash; struct trace_hash event_hash; struct task_data *proxy; struct start_data *last_start; struct event_hash *last_event; struct tep_record *last_stack; struct handle_data *handle; struct group_data *group; }; struct cpu_info { int current; }; struct sched_switch_data { struct tep_format_field *prev_state; int match_state; }; struct handle_data { struct handle_data *next; struct tracecmd_input *handle; struct tep_handle *pevent; struct trace_hash events; struct trace_hash group_hash; struct cpu_info **cpu_data; struct tep_format_field *common_pid; struct tep_format_field *wakeup_comm; struct tep_format_field *switch_prev_comm; struct tep_format_field *switch_next_comm; struct sched_switch_data sched_switch_blocked; struct sched_switch_data sched_switch_preempt; struct trace_hash task_hash; struct list_head *cpu_starts; struct list_head migrate_starts; struct task_data *global_task; struct task_data *global_percpu_tasks; int cpus; }; static struct handle_data *handles; static struct event_data *stacktrace_event; static bool merge_like_comms = false; void trace_profile_set_merge_like_comms(void) { merge_like_comms = true; } static struct start_data * add_start(struct task_data *task, struct event_data *event_data, struct tep_record *record, unsigned long long search_val, unsigned long long val) { struct start_data *start; start = malloc(sizeof(*start)); if (!start) return NULL; memset(start, 0, sizeof(*start)); start->hash.key = trace_hash(search_val); start->search_val = search_val; start->val = val; start->timestamp = record->ts; start->event_data = event_data; start->cpu = record->cpu; start->task = task; trace_hash_add(&task->start_hash, &start->hash); if (event_data->migrate) list_add(&start->list, &task->handle->migrate_starts); else list_add(&start->list, &task->handle->cpu_starts[record->cpu]); return start; } struct event_data_match { struct event_data *event_data; unsigned long long search_val; unsigned long long val; }; static int match_start(struct trace_hash_item *item, void *data) { struct start_data *start = start_from_item(item); struct event_data_match *edata = data; return start->event_data == edata->event_data && start->search_val == edata->search_val; } static int match_event(struct trace_hash_item *item, void *data) { struct event_data_match *edata = data; struct event_hash *event = event_from_item(item); return event->event_data == edata->event_data && event->search_val == edata->search_val && event->val == edata->val; } static struct event_hash * find_event_hash(struct task_data *task, struct event_data_match *edata) { struct event_hash *event_hash; struct trace_hash_item *item; unsigned long long key; key = (unsigned long)edata->event_data + (unsigned long)edata->search_val + (unsigned long)edata->val; key = trace_hash(key); item = trace_hash_find(&task->event_hash, key, match_event, edata); if (item) return event_from_item(item); event_hash = malloc(sizeof(*event_hash)); if (!event_hash) return NULL; memset(event_hash, 0, sizeof(*event_hash)); event_hash->event_data = edata->event_data; event_hash->search_val = edata->search_val; event_hash->val = edata->val; event_hash->hash.key = key; trace_hash_init(&event_hash->stacks, 32); trace_hash_add(&task->event_hash, &event_hash->hash); return event_hash; } static struct event_hash * find_start_event_hash(struct task_data *task, struct event_data *event_data, struct start_data *start) { struct event_data_match edata; edata.event_data = event_data; edata.search_val = start->search_val; edata.val = start->val; return find_event_hash(task, &edata); } static struct start_data * find_start(struct task_data *task, struct event_data *event_data, unsigned long long search_val) { unsigned long long key = trace_hash(search_val); struct event_data_match edata; void *data = &edata; struct trace_hash_item *item; struct start_data *start; edata.event_data = event_data; edata.search_val = search_val; item = trace_hash_find(&task->start_hash, key, match_start, data); if (!item) return NULL; start = start_from_item(item); return start; } struct stack_match { void *caller; unsigned long size; }; static int match_stack(struct trace_hash_item *item, void *data) { struct stack_data *stack = stack_from_item(item); struct stack_match *match = data; if (match->size != stack->size) return 0; return memcmp(stack->caller, match->caller, stack->size) == 0; } static void add_event_stack(struct event_hash *event_hash, void *caller, unsigned long size, unsigned long long time, unsigned long long ts) { unsigned long long key; struct stack_data *stack; struct stack_match match; struct trace_hash_item *item; int i; match.caller = caller; match.size = size; if (size < sizeof(int)) die("Stack size of less than sizeof(int)??"); for (key = 0, i = 0; i <= size - sizeof(int); i += sizeof(int)) key += trace_hash(*(int *)(caller + i)); item = trace_hash_find(&event_hash->stacks, key, match_stack, &match); if (!item) { stack = malloc(sizeof(*stack) + size); if (!stack) { warning("Could not allocate stack"); return; } memset(stack, 0, sizeof(*stack)); memcpy(&stack->caller, caller, size); stack->size = size; stack->hash.key = key; trace_hash_add(&event_hash->stacks, &stack->hash); } else stack = stack_from_item(item); stack->count++; stack->time += time; if (stack->count == 1 || time < stack->time_min) { stack->time_min = time; stack->ts_min = ts; } if (time > stack->time_max) { stack->time_max = time; stack->ts_max = ts; } } static void free_start(struct start_data *start) { if (start->task->last_start == start) start->task->last_start = NULL; if (start->stack.record) tracecmd_free_record(start->stack.record); trace_hash_del(&start->hash); list_del(&start->list); free(start); } static struct event_hash * add_and_free_start(struct task_data *task, struct start_data *start, struct event_data *event_data, unsigned long long ts) { struct event_hash *event_hash; long long delta; delta = ts - start->timestamp; /* * It's possible on a live trace, because of timestamps being * different on different CPUs, we can go back in time. When * that happens, just zero out the delta. */ if (delta < 0) delta = 0; event_hash = find_start_event_hash(task, event_data, start); if (!event_hash) return NULL; event_hash->count++; event_hash->time_total += delta; event_hash->last_time = delta; if (delta > event_hash->time_max) { event_hash->time_max = delta; event_hash->ts_max = ts; } if (event_hash->count == 1 || delta < event_hash->time_min) { event_hash->time_min = delta; event_hash->ts_min = ts; } if (start->stack.record) { unsigned long size; void *caller; size = start->stack.size; caller = start->stack.caller; add_event_stack(event_hash, caller, size, delta, start->stack.record->ts); tracecmd_free_record(start->stack.record); start->stack.record = NULL; } free_start(start); return event_hash; } static struct event_hash * find_and_update_start(struct task_data *task, struct event_data *event_data, unsigned long long ts, unsigned long long search_val) { struct start_data *start; start = find_start(task, event_data, search_val); if (!start) return NULL; return add_and_free_start(task, start, event_data, ts); } static int match_task(struct trace_hash_item *item, void *data) { struct task_data *task = task_from_item(item); int pid = *(unsigned long *)data; return task->pid == pid; } static void init_task(struct handle_data *h, struct task_data *task) { task->handle = h; trace_hash_init(&task->start_hash, 16); trace_hash_init(&task->event_hash, 32); } static struct task_data * add_task(struct handle_data *h, int pid) { unsigned long long key = trace_hash(pid); struct task_data *task; task = malloc(sizeof(*task)); if (!task) { warning("Could not allocate task"); return NULL; } memset(task, 0, sizeof(*task)); task->pid = pid; task->hash.key = key; trace_hash_add(&h->task_hash, &task->hash); init_task(h, task); return task; } static struct task_data * find_task(struct handle_data *h, int pid) { unsigned long long key = trace_hash(pid); struct trace_hash_item *item; static struct task_data *last_task; void *data = (unsigned long *)&pid; if (last_task && last_task->pid == pid) return last_task; item = trace_hash_find(&h->task_hash, key, match_task, data); if (item) last_task = task_from_item(item); else last_task = add_task(h, pid); return last_task; } static int match_group(struct trace_hash_item *item, void *data) { struct group_data *group = group_from_item(item); return strcmp(group->comm, (char *)data) == 0; } static void add_task_comm(struct task_data *task, struct tep_format_field *field, struct tep_record *record) { const char *comm; task->comm = malloc(field->size + 1); if (!task->comm) { warning("Could not allocate task comm"); return; } comm = record->data + field->offset; memcpy(task->comm, comm, field->size); task->comm[field->size] = 0; } /* Account for tasks that don't have starts */ static void account_task(struct task_data *task, struct event_data *event_data, struct tep_record *record) { struct event_data_match edata; struct event_hash *event_hash; struct task_data *proxy = NULL; unsigned long long search_val = 0; unsigned long long val = 0; unsigned long long pid; /* * If an event has the pid_field set, then find that task for * this event instead. Let this task proxy for it to handle * stack traces on this event. */ if (event_data->pid_field) { tep_read_number_field(event_data->pid_field, record->data, &pid); proxy = task; task = find_task(task->handle, pid); if (!task) return; proxy->proxy = task; } /* * If data_field is defined, use that for val, * if the start_field is defined, use that for search_val. */ if (event_data->data_field) { tep_read_number_field(event_data->data_field, record->data, &val); } if (event_data->start_match_field) { tep_read_number_field(event_data->start_match_field, record->data, &search_val); } edata.event_data = event_data; edata.search_val = val; edata.val = val; event_hash = find_event_hash(task, &edata); if (!event_hash) { warning("failed to allocate event_hash"); return; } event_hash->count++; task->last_event = event_hash; } static struct task_data * find_event_task(struct handle_data *h, struct event_data *event_data, struct tep_record *record, unsigned long long pid) { if (event_data->global) { if (event_data->migrate) return h->global_task; else return &h->global_percpu_tasks[record->cpu]; } /* If pid_field is defined, use that to find the task */ if (event_data->pid_field) tep_read_number_field(event_data->pid_field, record->data, &pid); return find_task(h, pid); } static struct task_data * handle_end_event(struct handle_data *h, struct event_data *event_data, struct tep_record *record, int pid) { struct event_hash *event_hash; struct task_data *task; unsigned long long val; task = find_event_task(h, event_data, record, pid); if (!task) return NULL; tep_read_number_field(event_data->start_match_field, record->data, &val); event_hash = find_and_update_start(task, event_data->start, record->ts, val); task->last_start = NULL; task->last_event = event_hash; return task; } static struct task_data * handle_start_event(struct handle_data *h, struct event_data *event_data, struct tep_record *record, unsigned long long pid) { struct start_data *start; struct task_data *task; unsigned long long val; task = find_event_task(h, event_data, record, pid); if (!task) return NULL; tep_read_number_field(event_data->end_match_field, record->data, &val); start = add_start(task, event_data, record, val, val); if (!start) { warning("Failed to allocate start of task"); return NULL; } task->last_start = start; task->last_event = NULL; return task; } static int handle_event_data(struct handle_data *h, unsigned long long pid, struct event_data *event_data, struct tep_record *record, int cpu) { struct task_data *task = NULL; /* If this is the end of a event pair (start is set) */ if (event_data->start) task = handle_end_event(h, event_data, record, pid); /* If this is the start of a event pair (end is set) */ if (event_data->end) { task = handle_start_event(h, event_data, record, pid); /* handle_start_event only returns NULL on error */ if (!task) return -1; } if (!task) { task = find_task(h, pid); if (!task) return -1; task->proxy = NULL; task->last_start = NULL; task->last_event = NULL; account_task(task, event_data, record); } return 0; } static void handle_missed_events(struct handle_data *h, int cpu) { struct start_data *start; struct start_data *n; /* Clear all starts on this CPU */ list_for_each_entry_safe(start, n, &h->cpu_starts[cpu], list) { free_start(start); } /* Now clear all starts whose events can migrate */ list_for_each_entry_safe(start, n, &h->migrate_starts, list) { free_start(start); } } static int match_event_data(struct trace_hash_item *item, void *data) { struct event_data *event_data = event_data_from_item(item); int id = (int)(unsigned long)data; return event_data->id == id; } static struct event_data * find_event_data(struct handle_data *h, int id) { struct trace_hash_item *item; unsigned long long key = trace_hash(id); void *data = (void *)(unsigned long)id; item = trace_hash_find(&h->events, key, match_event_data, data); if (item) return event_data_from_item(item); return NULL; } static void trace_profile_record(struct tracecmd_input *handle, struct tep_record *record) { static struct handle_data *last_handle; struct tep_record *stack_record; struct event_data *event_data; struct task_data *task; struct handle_data *h; struct tep_handle *pevent; unsigned long long pid; int cpu = record->cpu; int id; if (last_handle && last_handle->handle == handle) h = last_handle; else { for (h = handles; h; h = h->next) { if (h->handle == handle) break; } if (!h) die("Handle not found?"); last_handle = h; } if (record->missed_events) handle_missed_events(h, cpu); pevent = h->pevent; id = tep_data_type(pevent, record); event_data = find_event_data(h, id); if (!event_data) return; /* Get this current PID */ tep_read_number_field(h->common_pid, record->data, &pid); task = find_task(h, pid); if (!task) return; stack_record = task->last_stack; if (event_data->handle_event) event_data->handle_event(h, pid, event_data, record, cpu); else handle_event_data(h, pid, event_data, record, cpu); /* If the last stack hasn't changed, free it */ if (stack_record && task->last_stack == stack_record) { tracecmd_free_record(stack_record); task->last_stack = NULL; } } static struct event_data * add_event(struct handle_data *h, const char *system, const char *event_name, enum event_data_type type) { struct event_data *event_data; struct tep_event *event; event = tep_find_event_by_name(h->pevent, system, event_name); if (!event) return NULL; if (!h->common_pid) { h->common_pid = tep_find_common_field(event, "common_pid"); if (!h->common_pid) die("No 'common_pid' found in event"); } event_data = malloc(sizeof(*event_data)); if (!event_data) { warning("Could not allocate event_data"); return NULL; } memset(event_data, 0, sizeof(*event_data)); event_data->id = event->id; event_data->event = event; event_data->type = type; event_data->hash.key = trace_hash(event_data->event->id); trace_hash_add(&h->events, &event_data->hash); return event_data; } static void mate_events(struct handle_data *h, struct event_data *start, const char *pid_field, const char *end_match_field, struct event_data *end, const char *start_match_field, int migrate, int global) { start->end = end; end->start = start; if (pid_field) { start->pid_field = tep_find_field(start->event, pid_field); if (!start->pid_field) die("Event: %s does not have field %s", start->event->name, pid_field); } /* Field to match with end */ start->end_match_field = tep_find_field(start->event, end_match_field); if (!start->end_match_field) die("Event: %s does not have field %s", start->event->name, end_match_field); /* Field to match with start */ end->start_match_field = tep_find_field(end->event, start_match_field); if (!end->start_match_field) die("Event: %s does not have field %s", end->event->name, start_match_field); start->migrate = migrate; start->global = global; end->migrate = migrate; end->global = global; } /** * tracecmd_mate_events - match events to profile against * @handle: The input handle where the events exist. * @start_event: The event that starts the transaction * @pid_field: Use this over common_pid (may be NULL to use common_pid) * @end_match_field: The field that matches the end events @start_match_field * @end_event: The event that ends the transaction * @start_match_field: The end event field that matches start's @end_match_field * @migrate: Can the transaction switch CPUs? 1 for yes, 0 for no * @global: The events are global and not per task */ void tracecmd_mate_events(struct tracecmd_input *handle, struct tep_event *start_event, const char *pid_field, const char *end_match_field, struct tep_event *end_event, const char *start_match_field, int migrate, int global) { struct handle_data *h; struct event_data *start; struct event_data *end; for (h = handles; h; h = h->next) { if (h->handle == handle) break; } if (!h) die("Handle not found for trace profile"); start = add_event(h, start_event->system, start_event->name, EVENT_TYPE_USER_MATE); end = add_event(h, end_event->system, end_event->name, EVENT_TYPE_USER_MATE); if (!start || !end) return; mate_events(h, start, pid_field, end_match_field, end, start_match_field, migrate, global); } static void func_print(struct trace_seq *s, struct event_hash *event_hash) { const char *func; func = tep_find_function(event_hash->event_data->event->tep, event_hash->val); if (func) trace_seq_printf(s, "func: %s()", func); else trace_seq_printf(s, "func: 0x%llx", event_hash->val); } static void syscall_print(struct trace_seq *s, struct event_hash *event_hash) { #ifndef NO_AUDIT const char *name = NULL; int machine; machine = audit_detect_machine(); if (machine < 0) goto fail; name = audit_syscall_to_name(event_hash->val, machine); if (!name) goto fail; trace_seq_printf(s, "syscall:%s", name); return; fail: #endif trace_seq_printf(s, "%s:%d", event_hash->event_data->event->name, (int)event_hash->val); } /* From Linux include/linux/interrupt.h */ #define SOFTIRQS \ C(HI), \ C(TIMER), \ C(NET_TX), \ C(NET_RX), \ C(BLOCK), \ C(BLOCK_IOPOLL), \ C(TASKLET), \ C(SCHED), \ C(HRTIMER), \ C(RCU), \ C(NR), #undef C #define C(a) a##_SOFTIRQ enum { SOFTIRQS }; #undef C #define C(a) #a static const char *softirq_map[] = { SOFTIRQS }; static void softirq_print(struct trace_seq *s, struct event_hash *event_hash) { int softirq = (int)event_hash->val; if (softirq < NR_SOFTIRQ) trace_seq_printf(s, "%s:%s", event_hash->event_data->event->name, softirq_map[softirq]); else trace_seq_printf(s, "%s:%d", event_hash->event_data->event->name, softirq); } static void sched_switch_print(struct trace_seq *s, struct event_hash *event_hash) { const char states[] = TASK_STATE_TO_CHAR_STR; int i; trace_seq_printf(s, "%s:", event_hash->event_data->event->name); if (event_hash->val) { int val = event_hash->val; for (i = 0; val && i < sizeof(states) - 1; i++, val >>= 1) { if (val & 1) trace_seq_putc(s, states[i+1]); } } else trace_seq_putc(s, 'R'); } static int handle_sched_switch_event(struct handle_data *h, unsigned long long pid, struct event_data *event_data, struct tep_record *record, int cpu) { struct task_data *task; unsigned long long prev_pid; unsigned long long prev_state; unsigned long long next_pid; struct start_data *start; /* pid_field holds prev_pid, data_field holds prev_state */ tep_read_number_field(event_data->pid_field, record->data, &prev_pid); tep_read_number_field(event_data->data_field, record->data, &prev_state); /* only care about real states */ prev_state &= TASK_STATE_MAX - 1; /* end_match_field holds next_pid */ tep_read_number_field(event_data->end_match_field, record->data, &next_pid); task = find_task(h, prev_pid); if (!task) return -1; if (!task->comm) add_task_comm(task, h->switch_prev_comm, record); if (prev_state) task->sleeping = 1; else task->sleeping = 0; /* task is being scheduled out. prev_state tells why */ start = add_start(task, event_data, record, prev_pid, prev_state); task->last_start = start; task->last_event = NULL; task = find_task(h, next_pid); if (!task) return -1; if (!task->comm) add_task_comm(task, h->switch_next_comm, record); /* * If the next task was blocked, it required a wakeup to * restart, and there should be one. * But if it was preempted, we look for the previous sched switch. * Unfortunately, we have to look for both types of events as * we do not know why next_pid scheduled out. * * event_data->start holds the sched_wakeup event data. */ find_and_update_start(task, event_data->start, record->ts, next_pid); /* Look for this task if it was preempted (no wakeup found). */ find_and_update_start(task, event_data, record->ts, next_pid); return 0; } static int handle_stacktrace_event(struct handle_data *h, unsigned long long pid, struct event_data *event_data, struct tep_record *record, int cpu) { struct task_data *orig_task; struct task_data *proxy; struct task_data *task; unsigned long long size; struct event_hash *event_hash; struct start_data *start; void *caller; task = find_task(h, pid); if (!task) return -1; if (task->last_stack) { tracecmd_free_record(task->last_stack); task->last_stack = NULL; } if ((proxy = task->proxy)) { task->proxy = NULL; orig_task = task; task = proxy; } if (!task->last_start && !task->last_event) { /* * Save this stack in case function graph needs it. * Need the original task, not a proxy. */ if (proxy) task = orig_task; tracecmd_record_ref(record); task->last_stack = record; return 0; } /* * start_match_field holds the size. * data_field holds the caller location. */ size = record->size - event_data->data_field->offset; caller = record->data + event_data->data_field->offset; /* * If there's a "start" then don't add the stack until * it finds a matching "end". */ if ((start = task->last_start)) { tracecmd_record_ref(record); start->stack.record = record; start->stack.size = size; start->stack.caller = caller; task->last_start = NULL; task->last_event = NULL; return 0; } event_hash = task->last_event; task->last_event = NULL; add_event_stack(event_hash, caller, size, event_hash->last_time, record->ts); return 0; } static int handle_fgraph_entry_event(struct handle_data *h, unsigned long long pid, struct event_data *event_data, struct tep_record *record, int cpu) { unsigned long long size; struct start_data *start; struct task_data *task; void *caller; task = handle_start_event(h, event_data, record, pid); if (!task) return -1; /* * If a stack trace hasn't been used for a previous task, * then it could be a function trace that we can use for * the function graph. But stack traces come before the function * graph events (unfortunately). So we need to attach the previous * stack trace (if there is one) to this start event. */ if (task->last_stack) { start = task->last_start; record = task->last_stack; size = record->size - stacktrace_event->data_field->offset; caller = record->data + stacktrace_event->data_field->offset; start->stack.record = record; start->stack.size = size; start->stack.caller = caller; task->last_stack = NULL; task->last_event = NULL; } /* Do not map stacks after this event to this event */ task->last_start = NULL; return 0; } static int handle_fgraph_exit_event(struct handle_data *h, unsigned long long pid, struct event_data *event_data, struct tep_record *record, int cpu) { struct task_data *task; task = handle_end_event(h, event_data, record, pid); if (!task) return -1; /* Do not match stacks with function graph exit events */ task->last_event = NULL; return 0; } static int handle_process_exec(struct handle_data *h, unsigned long long pid, struct event_data *event_data, struct tep_record *record, int cpu) { struct task_data *task; unsigned long long val; /* Task has execed, remove the comm for it */ if (event_data->data_field) { tep_read_number_field(event_data->data_field, record->data, &val); pid = val; } task = find_task(h, pid); if (!task) return -1; free(task->comm); task->comm = NULL; return 0; } static int handle_sched_wakeup_event(struct handle_data *h, unsigned long long pid, struct event_data *event_data, struct tep_record *record, int cpu) { struct task_data *proxy; struct task_data *task = NULL; struct start_data *start; unsigned long long success; proxy = find_task(h, pid); if (!proxy) return -1; /* If present, data_field holds "success" */ if (event_data->data_field) { tep_read_number_field(event_data->data_field, record->data, &success); /* If not a successful wakeup, ignore this */ if (!success) return 0; } tep_read_number_field(event_data->pid_field, record->data, &pid); task = find_task(h, pid); if (!task) return -1; if (!task->comm) add_task_comm(task, h->wakeup_comm, record); /* if the task isn't sleeping, then ignore the wake up */ if (!task->sleeping) { /* Ignore any following stack traces */ proxy->proxy = NULL; proxy->last_start = NULL; proxy->last_event = NULL; return 0; } /* It's being woken up */ task->sleeping = 0; /* * We need the stack trace to be hooked to the woken up * task, not the waker. */ proxy->proxy = task; /* There should be a blocked schedule out of this task */ find_and_update_start(task, event_data->start, record->ts, pid); /* Set this up for timing how long the wakeup takes */ start = add_start(task, event_data, record, pid, pid); task->last_event = NULL; task->last_start = start; return 0; } void trace_init_profile(struct tracecmd_input *handle, struct hook_list *hook, int global) { struct tep_handle *pevent = tracecmd_get_tep(handle); struct tep_format_field **fields; struct handle_data *h; struct event_data *event_data; struct event_data *sched_switch; struct event_data *sched_wakeup; struct event_data *irq_entry; struct event_data *irq_exit; struct event_data *softirq_entry; struct event_data *softirq_exit; struct event_data *softirq_raise; struct event_data *fgraph_entry; struct event_data *fgraph_exit; struct event_data *syscall_enter; struct event_data *syscall_exit; struct event_data *process_exec; struct event_data *start_event; struct event_data *end_event; struct tep_event **events; int ret; int i; tracecmd_set_show_data_func(handle, trace_profile_record); h = malloc(sizeof(*h)); if (!h) { warning("Could not allocate handle"); return; }; memset(h, 0, sizeof(*h)); h->next = handles; handles = h; trace_hash_init(&h->task_hash, 1024); trace_hash_init(&h->events, 1024); trace_hash_init(&h->group_hash, 512); h->handle = handle; h->pevent = pevent; h->cpus = tracecmd_cpus(handle); /* * For streaming profiling, cpus will not be set up yet. * In this case, we simply use the number of cpus on the * system. */ if (!h->cpus) h->cpus = tracecmd_count_cpus(); list_head_init(&h->migrate_starts); h->cpu_starts = malloc(sizeof(*h->cpu_starts) * h->cpus); if (!h->cpu_starts) goto free_handle; for (i = 0; i < h->cpus; i++) list_head_init(&h->cpu_starts[i]); h->cpu_data = malloc(h->cpus * sizeof(*h->cpu_data)); if (!h->cpu_data) goto free_starts; memset(h->cpu_data, 0, h->cpus * sizeof(h->cpu_data)); h->global_task = malloc(sizeof(struct task_data)); if (!h->global_task) goto free_data; memset(h->global_task, 0, sizeof(struct task_data)); init_task(h, h->global_task); h->global_task->comm = strdup("Global Events"); if (!h->global_task->comm) die("malloc"); h->global_task->pid = -1; h->global_percpu_tasks = calloc(h->cpus, sizeof(struct task_data)); if (!h->global_percpu_tasks) die("malloc"); for (i = 0; i < h->cpus; i++) { init_task(h, &h->global_percpu_tasks[i]); ret = asprintf(&h->global_percpu_tasks[i].comm, "Global CPU[%d] Events", i); if (ret < 0) die("malloc"); h->global_percpu_tasks[i].pid = -1 - i; } irq_entry = add_event(h, "irq", "irq_handler_entry", EVENT_TYPE_IRQ); irq_exit = add_event(h, "irq", "irq_handler_exit", EVENT_TYPE_IRQ); softirq_entry = add_event(h, "irq", "softirq_entry", EVENT_TYPE_SOFTIRQ); softirq_exit = add_event(h, "irq", "softirq_exit", EVENT_TYPE_SOFTIRQ); softirq_raise = add_event(h, "irq", "softirq_raise", EVENT_TYPE_SOFTIRQ_RAISE); sched_wakeup = add_event(h, "sched", "sched_wakeup", EVENT_TYPE_WAKEUP); sched_switch = add_event(h, "sched", "sched_switch", EVENT_TYPE_SCHED_SWITCH); fgraph_entry = add_event(h, "ftrace", "funcgraph_entry", EVENT_TYPE_FUNC); fgraph_exit = add_event(h, "ftrace", "funcgraph_exit", EVENT_TYPE_FUNC); syscall_enter = add_event(h, "raw_syscalls", "sys_enter", EVENT_TYPE_SYSCALL); syscall_exit = add_event(h, "raw_syscalls", "sys_exit", EVENT_TYPE_SYSCALL); process_exec = add_event(h, "sched", "sched_process_exec", EVENT_TYPE_PROCESS_EXEC); stacktrace_event = add_event(h, "ftrace", "kernel_stack", EVENT_TYPE_STACK); if (stacktrace_event) { stacktrace_event->handle_event = handle_stacktrace_event; stacktrace_event->data_field = tep_find_field(stacktrace_event->event, "caller"); if (!stacktrace_event->data_field) die("Event: %s does not have field caller", stacktrace_event->event->name); } if (process_exec) { process_exec->handle_event = handle_process_exec; process_exec->data_field = tep_find_field(process_exec->event, "old_pid"); } if (sched_switch) { sched_switch->handle_event = handle_sched_switch_event; sched_switch->data_field = tep_find_field(sched_switch->event, "prev_state"); if (!sched_switch->data_field) die("Event: %s does not have field prev_state", sched_switch->event->name); h->switch_prev_comm = tep_find_field(sched_switch->event, "prev_comm"); if (!h->switch_prev_comm) die("Event: %s does not have field prev_comm", sched_switch->event->name); h->switch_next_comm = tep_find_field(sched_switch->event, "next_comm"); if (!h->switch_next_comm) die("Event: %s does not have field next_comm", sched_switch->event->name); sched_switch->print_func = sched_switch_print; } if (sched_switch && sched_wakeup) { mate_events(h, sched_switch, "prev_pid", "next_pid", sched_wakeup, "pid", 1, 0); mate_events(h, sched_wakeup, "pid", "pid", sched_switch, "prev_pid", 1, 0); sched_wakeup->handle_event = handle_sched_wakeup_event; /* The 'success' field may or may not be present */ sched_wakeup->data_field = tep_find_field(sched_wakeup->event, "success"); h->wakeup_comm = tep_find_field(sched_wakeup->event, "comm"); if (!h->wakeup_comm) die("Event: %s does not have field comm", sched_wakeup->event->name); } if (irq_entry && irq_exit) mate_events(h, irq_entry, NULL, "irq", irq_exit, "irq", 0, global); if (softirq_entry) softirq_entry->print_func = softirq_print; if (softirq_exit) softirq_exit->print_func = softirq_print; if (softirq_raise) softirq_raise->print_func = softirq_print; if (softirq_entry && softirq_exit) mate_events(h, softirq_entry, NULL, "vec", softirq_exit, "vec", 0, global); if (softirq_entry && softirq_raise) mate_events(h, softirq_raise, NULL, "vec", softirq_entry, "vec", 0, global); if (fgraph_entry && fgraph_exit) { mate_events(h, fgraph_entry, NULL, "func", fgraph_exit, "func", 1, 0); fgraph_entry->handle_event = handle_fgraph_entry_event; fgraph_exit->handle_event = handle_fgraph_exit_event; fgraph_entry->print_func = func_print; } if (syscall_enter && syscall_exit) { mate_events(h, syscall_enter, NULL, "id", syscall_exit, "id", 1, 0); syscall_enter->print_func = syscall_print; syscall_exit->print_func = syscall_print; } events = tep_list_events(pevent, TEP_EVENT_SORT_ID); if (!events) die("malloc"); /* Add some other events */ event_data = add_event(h, "ftrace", "function", EVENT_TYPE_FUNC); if (event_data) { event_data->data_field = tep_find_field(event_data->event, "ip"); } /* Add any user defined hooks */ for (; hook; hook = hook->next) { start_event = add_event(h, hook->start_system, hook->start_event, EVENT_TYPE_USER_MATE); end_event = add_event(h, hook->end_system, hook->end_event, EVENT_TYPE_USER_MATE); if (!start_event) { warning("Event %s not found", hook->start_event); continue; } if (!end_event) { warning("Event %s not found", hook->end_event); continue; } mate_events(h, start_event, hook->pid, hook->start_match, end_event, hook->end_match, hook->migrate, hook->global); } /* Now add any defined event that we haven't processed */ for (i = 0; events[i]; i++) { event_data = find_event_data(h, events[i]->id); if (event_data) continue; event_data = add_event(h, events[i]->system, events[i]->name, EVENT_TYPE_UNDEFINED); fields = tep_event_fields(events[i]); if (!fields) die("malloc"); if (fields[0]) event_data->data_field = fields[0]; free(fields); } return; free_data: free(h->cpu_data); free_starts: free(h->cpu_starts); free_handle: handles = h->next; free(h); warning("Failed handle allocations"); } static void output_event_stack(struct tep_handle *pevent, struct stack_data *stack) { int longsize = tep_get_long_size(pevent); unsigned long long val; const char *func; unsigned long long stop = -1ULL; void *ptr; int i; if (longsize < 8) stop &= (1ULL << (longsize * 8)) - 1; if (stack->count) stack->time_avg = stack->time / stack->count; printf(" %lld total:%lld min:%lld(ts:%lld.%06lld) max:%lld(ts:%lld.%06lld) avg=%lld\n", stack->count, stack->time, stack->time_min, nsecs_per_sec(stack->ts_min), mod_to_usec(stack->ts_min), stack->time_max, nsecs_per_sec(stack->ts_max), mod_to_usec(stack->ts_max), stack->time_avg); for (i = 0; i < stack->size; i += longsize) { ptr = stack->caller + i; switch (longsize) { case 4: /* todo, read value from pevent */ val = *(unsigned int *)ptr; break; case 8: val = *(unsigned long long *)ptr; break; default: die("Strange long size %d", longsize); } if (val == stop) break; func = tep_find_function(pevent, val); if (func) printf(" => %s (0x%llx)\n", func, val); else printf(" => 0x%llx\n", val); } } struct stack_chain { struct stack_chain *children; unsigned long long val; unsigned long long time; unsigned long long time_min; unsigned long long ts_min; unsigned long long time_max; unsigned long long ts_max; unsigned long long time_avg; unsigned long long count; int percent; int nr_children; }; static int compare_chains(const void *a, const void *b) { const struct stack_chain * A = a; const struct stack_chain * B = b; if (A->time > B->time) return -1; if (A->time < B->time) return 1; /* If stacks don't use time, then use count */ if (A->count > B->count) return -1; if (A->count < B->count) return 1; return 0; } static int calc_percent(unsigned long long val, unsigned long long total) { return (val * 100 + total / 2) / total; } static int stack_overflows(struct stack_data *stack, int longsize, int level) { return longsize * level > stack->size - longsize; } static unsigned long long stack_value(struct stack_data *stack, int longsize, int level) { void *ptr; ptr = &stack->caller[longsize * level]; return longsize == 8 ? *(u64 *)ptr : *(unsigned *)ptr; } static struct stack_chain * make_stack_chain(struct stack_data **stacks, int cnt, int longsize, int level, int *nr_children) { struct stack_chain *chain; unsigned long long total_time = 0; unsigned long long total_count = 0; unsigned long long time; unsigned long long time_min; unsigned long long ts_min; unsigned long long time_max; unsigned long long ts_max; unsigned long long count; unsigned long long stop = -1ULL; int nr_chains = 0; u64 last = 0; u64 val; int start; int i; int x; if (longsize < 8) stop &= (1ULL << (longsize * 8)) - 1; /* First find out how many diffs there are */ for (i = 0; i < cnt; i++) { if (stack_overflows(stacks[i], longsize, level)) continue; val = stack_value(stacks[i], longsize, level); if (val == stop) continue; if (!nr_chains || val != last) nr_chains++; last = val; } if (!nr_chains) { *nr_children = 0; return NULL; } chain = malloc(sizeof(*chain) * nr_chains); if (!chain) { warning("Could not allocate chain"); return NULL; } memset(chain, 0, sizeof(*chain) * nr_chains); x = 0; count = 0; start = 0; time = 0; time_min = 0; time_max = 0; for (i = 0; i < cnt; i++) { if (stack_overflows(stacks[i], longsize, level)) { start = i+1; continue; } val = stack_value(stacks[i], longsize, level); if (val == stop) { start = i+1; continue; } count += stacks[i]->count; time += stacks[i]->time; if (stacks[i]->time_max > time_max) { time_max = stacks[i]->time_max; ts_max = stacks[i]->ts_max; } if (i == start || stacks[i]->time_min < time_min) { time_min = stacks[i]->time_min; ts_min = stacks[i]->ts_min; } if (i == cnt - 1 || stack_overflows(stacks[i+1], longsize, level) || val != stack_value(stacks[i+1], longsize, level)) { total_time += time; total_count += count; chain[x].val = val; chain[x].time_avg = time / count; chain[x].count = count; chain[x].time = time; chain[x].time_min = time_min; chain[x].ts_min = ts_min; chain[x].time_max = time_max; chain[x].ts_max = ts_max; chain[x].children = make_stack_chain(&stacks[start], (i - start) + 1, longsize, level+1, &chain[x].nr_children); x++; start = i + 1; count = 0; time = 0; time_min = 0; time_max = 0; } } qsort(chain, nr_chains, sizeof(*chain), compare_chains); *nr_children = nr_chains; /* Should never happen */ if (!total_time && !total_count) return chain; /* Now calculate percentage */ time = 0; for (i = 0; i < nr_chains; i++) { if (total_time) chain[i].percent = calc_percent(chain[i].time, total_time); /* In case stacks don't have time */ else if (total_count) chain[i].percent = calc_percent(chain[i].count, total_count); } return chain; } static void free_chain(struct stack_chain *chain, int nr_chains) { int i; if (!chain) return; for (i = 0; i < nr_chains; i++) free_chain(chain[i].children, chain[i].nr_children); free(chain); } #define INDENT 5 static void print_indent(int level, unsigned long long mask) { char line; int p; for (p = 0; p < level + 1; p++) { if (mask & (1ULL << p)) line = '|'; else line = ' '; printf("%*c ", INDENT, line); } } static void print_chain_func(struct tep_handle *pevent, struct stack_chain *chain) { unsigned long long val = chain->val; const char *func; func = tep_find_function(pevent, val); if (func) printf("%s (0x%llx)\n", func, val); else printf("0x%llx\n", val); } static void output_chain(struct tep_handle *pevent, struct stack_chain *chain, int level, int nr_chains, unsigned long long *mask) { struct stack_chain *child; int nr_children; int i; char line = '|'; if (!nr_chains) return; *mask |= (1ULL << (level + 1)); print_indent(level + 1, *mask); printf("\n"); for (i = 0; i < nr_chains; i++) { print_indent(level, *mask); printf("%*c ", INDENT, '+'); if (i == nr_chains - 1) { *mask &= ~(1ULL << (level + 1)); line = ' '; } print_chain_func(pevent, &chain[i]); print_indent(level, *mask); printf("%*c ", INDENT, line); printf(" %d%% (%lld)", chain[i].percent, chain[i].count); if (chain[i].time) printf(" time:%lld max:%lld(ts:%lld.%06lld) min:%lld(ts:%lld.%06lld) avg:%lld", chain[i].time, chain[i].time_max, nsecs_per_sec(chain[i].ts_max), mod_to_usec(chain[i].ts_max), chain[i].time_min, nsecs_per_sec(chain[i].ts_min), mod_to_usec(chain[i].ts_min), chain[i].time_avg); printf("\n"); for (child = chain[i].children, nr_children = chain[i].nr_children; child && nr_children == 1; nr_children = child->nr_children, child = child->children) { print_indent(level, *mask); printf("%*c ", INDENT, line); printf(" "); print_chain_func(pevent, child); } if (child) output_chain(pevent, child, level+1, nr_children, mask); print_indent(level + 1, *mask); printf("\n"); } *mask &= ~(1ULL << (level + 1)); print_indent(level, *mask); printf("\n"); } static int compare_stacks(const void *a, const void *b) { struct stack_data * const *A = a; struct stack_data * const *B = b; unsigned int sa, sb; int size; int i; /* only compare up to the smaller size of the two */ if ((*A)->size > (*B)->size) size = (*B)->size; else size = (*A)->size; for (i = 0; i < size; i += sizeof(sa)) { sa = *(unsigned *)&(*A)->caller[i]; sb = *(unsigned *)&(*B)->caller[i]; if (sa > sb) return 1; if (sa < sb) return -1; } /* They are the same up to size. Then bigger size wins */ if ((*A)->size > (*B)->size) return 1; if ((*A)->size < (*B)->size) return -1; return 0; } static void output_stacks(struct tep_handle *pevent, struct trace_hash *stack_hash) { struct trace_hash_item **bucket; struct trace_hash_item *item; struct stack_data **stacks; struct stack_chain *chain; unsigned long long mask = 0; int nr_chains; int longsize = tep_get_long_size(pevent); int nr_stacks; int i; nr_stacks = 0; trace_hash_for_each_bucket(bucket, stack_hash) { trace_hash_for_each_item(item, bucket) { nr_stacks++; } } stacks = malloc(sizeof(*stacks) * nr_stacks); if (!stacks) { warning("Could not allocate stacks"); return; } nr_stacks = 0; trace_hash_for_each_bucket(bucket, stack_hash) { trace_hash_for_each_item(item, bucket) { stacks[nr_stacks++] = stack_from_item(item); } } qsort(stacks, nr_stacks, sizeof(*stacks), compare_stacks); chain = make_stack_chain(stacks, nr_stacks, longsize, 0, &nr_chains); output_chain(pevent, chain, 0, nr_chains, &mask); if (0) for (i = 0; i < nr_stacks; i++) output_event_stack(pevent, stacks[i]); free(stacks); free_chain(chain, nr_chains); } static void output_event(struct event_hash *event_hash) { struct event_data *event_data = event_hash->event_data; struct tep_handle *pevent = event_data->event->tep; struct trace_seq s; trace_seq_init(&s); if (event_data->print_func) event_data->print_func(&s, event_hash); else if (event_data->type == EVENT_TYPE_FUNC) func_print(&s, event_hash); else trace_seq_printf(&s, "%s:0x%llx", event_data->event->name, event_hash->val); trace_seq_terminate(&s); printf(" Event: %s (%lld)", s.buffer, event_hash->count); trace_seq_destroy(&s); if (event_hash->time_total) { event_hash->time_avg = event_hash->time_total / event_hash->count; printf(" Total: %lld Avg: %lld Max: %lld(ts:%lld.%06lld) Min:%lld(ts:%lld.%06lld)", event_hash->time_total, event_hash->time_avg, event_hash->time_max, nsecs_per_sec(event_hash->ts_max), mod_to_usec(event_hash->ts_max), event_hash->time_min, nsecs_per_sec(event_hash->ts_min), mod_to_usec(event_hash->ts_min)); } printf("\n"); output_stacks(pevent, &event_hash->stacks); } static int compare_events(const void *a, const void *b) { struct event_hash * const *A = a; struct event_hash * const *B = b; const struct event_data *event_data_a = (*A)->event_data; const struct event_data *event_data_b = (*B)->event_data; /* Schedule switch goes first */ if (event_data_a->type == EVENT_TYPE_SCHED_SWITCH) { if (event_data_b->type != EVENT_TYPE_SCHED_SWITCH) return -1; /* lower the state the better */ if ((*A)->val > (*B)->val) return 1; if ((*A)->val < (*B)->val) return -1; return 0; } else if (event_data_b->type == EVENT_TYPE_SCHED_SWITCH) return 1; /* Wakeups are next */ if (event_data_a->type == EVENT_TYPE_WAKEUP) { if (event_data_b->type != EVENT_TYPE_WAKEUP) return -1; return 0; } else if (event_data_b->type == EVENT_TYPE_WAKEUP) return 1; if (event_data_a->id > event_data_b->id) return 1; if (event_data_a->id < event_data_b->id) return -1; if ((*A)->time_total > (*B)->time_total) return -1; if ((*A)->time_total < (*B)->time_total) return 1; return 0; } static void output_task(struct handle_data *h, struct task_data *task) { struct trace_hash_item **bucket; struct trace_hash_item *item; struct event_hash **events; const char *comm; int nr_events = 0; int i; if (task->group) return; if (task->comm) comm = task->comm; else comm = tep_data_comm_from_pid(h->pevent, task->pid); if (task->pid < 0) printf("%s\n", task->comm); else printf("\ntask: %s-%d\n", comm, task->pid); trace_hash_for_each_bucket(bucket, &task->event_hash) { trace_hash_for_each_item(item, bucket) { nr_events++; } } events = malloc(sizeof(*events) * nr_events); if (!events) { warning("Could not allocate events"); return; } i = 0; trace_hash_for_each_bucket(bucket, &task->event_hash) { trace_hash_for_each_item(item, bucket) { events[i++] = event_from_item(item); } } qsort(events, nr_events, sizeof(*events), compare_events); for (i = 0; i < nr_events; i++) output_event(events[i]); free(events); } static void output_group(struct handle_data *h, struct group_data *group) { struct trace_hash_item **bucket; struct trace_hash_item *item; struct event_hash **events; int nr_events = 0; int i; printf("\ngroup: %s\n", group->comm); trace_hash_for_each_bucket(bucket, &group->event_hash) { trace_hash_for_each_item(item, bucket) { nr_events++; } } events = malloc(sizeof(*events) * nr_events); if (!events) { warning("Could not allocate events"); return; } i = 0; trace_hash_for_each_bucket(bucket, &group->event_hash) { trace_hash_for_each_item(item, bucket) { events[i++] = event_from_item(item); } } qsort(events, nr_events, sizeof(*events), compare_events); for (i = 0; i < nr_events; i++) output_event(events[i]); free(events); } static int compare_tasks(const void *a, const void *b) { struct task_data * const *A = a; struct task_data * const *B = b; if ((*A)->pid > (*B)->pid) return 1; else if ((*A)->pid < (*B)->pid) return -1; return 0; } static int compare_groups(const void *a, const void *b) { const char *A = a; const char *B = b; return strcmp(A, B); } static void free_event_hash(struct event_hash *event_hash) { struct trace_hash_item **bucket; struct trace_hash_item *item; struct stack_data *stack; trace_hash_for_each_bucket(bucket, &event_hash->stacks) { trace_hash_while_item(item, bucket) { stack = stack_from_item(item); trace_hash_del(&stack->hash); free(stack); } } trace_hash_free(&event_hash->stacks); free(event_hash); } static void __free_task(struct task_data *task) { struct trace_hash_item **bucket; struct trace_hash_item *item; struct start_data *start; struct event_hash *event_hash; free(task->comm); trace_hash_for_each_bucket(bucket, &task->start_hash) { trace_hash_while_item(item, bucket) { start = start_from_item(item); if (start->stack.record) tracecmd_free_record(start->stack.record); list_del(&start->list); trace_hash_del(item); free(start); } } trace_hash_free(&task->start_hash); trace_hash_for_each_bucket(bucket, &task->event_hash) { trace_hash_while_item(item, bucket) { event_hash = event_from_item(item); trace_hash_del(item); free_event_hash(event_hash); } } trace_hash_free(&task->event_hash); if (task->last_stack) tracecmd_free_record(task->last_stack); } static void free_task(struct task_data *task) { __free_task(task); free(task); } static void free_group(struct group_data *group) { struct trace_hash_item **bucket; struct trace_hash_item *item; struct event_hash *event_hash; free(group->comm); trace_hash_for_each_bucket(bucket, &group->event_hash) { trace_hash_while_item(item, bucket) { event_hash = event_from_item(item); trace_hash_del(item); free_event_hash(event_hash); } } trace_hash_free(&group->event_hash); free(group); } static void show_global_task(struct handle_data *h, struct task_data *task) { if (trace_hash_empty(&task->event_hash)) return; output_task(h, task); } static void output_tasks(struct handle_data *h) { struct trace_hash_item **bucket; struct trace_hash_item *item; struct task_data **tasks; int nr_tasks = 0; int i; trace_hash_for_each_bucket(bucket, &h->task_hash) { trace_hash_for_each_item(item, bucket) { nr_tasks++; } } tasks = malloc(sizeof(*tasks) * nr_tasks); if (!tasks) { warning("Could not allocate tasks"); return; } nr_tasks = 0; trace_hash_for_each_bucket(bucket, &h->task_hash) { trace_hash_while_item(item, bucket) { tasks[nr_tasks++] = task_from_item(item); trace_hash_del(item); } } qsort(tasks, nr_tasks, sizeof(*tasks), compare_tasks); for (i = 0; i < nr_tasks; i++) { output_task(h, tasks[i]); free_task(tasks[i]); } free(tasks); } static void output_groups(struct handle_data *h) { struct trace_hash_item **bucket; struct trace_hash_item *item; struct group_data **groups; int nr_groups = 0; int i; trace_hash_for_each_bucket(bucket, &h->group_hash) { trace_hash_for_each_item(item, bucket) { nr_groups++; } } if (nr_groups == 0) return; groups = malloc(sizeof(*groups) * nr_groups); if (!groups) { warning("Could not allocate groups"); return; } nr_groups = 0; trace_hash_for_each_bucket(bucket, &h->group_hash) { trace_hash_while_item(item, bucket) { groups[nr_groups++] = group_from_item(item); trace_hash_del(item); } } qsort(groups, nr_groups, sizeof(*groups), compare_groups); for (i = 0; i < nr_groups; i++) { output_group(h, groups[i]); free_group(groups[i]); } free(groups); } static void output_handle(struct handle_data *h) { int i; show_global_task(h, h->global_task); for (i = 0; i < h->cpus; i++) show_global_task(h, &h->global_percpu_tasks[i]); output_groups(h); output_tasks(h); } static void merge_event_stack(struct event_hash *event, struct stack_data *stack) { struct stack_data *exist; struct trace_hash_item *item; struct stack_match match; match.caller = stack->caller; match.size = stack->size; item = trace_hash_find(&event->stacks, stack->hash.key, match_stack, &match); if (!item) { trace_hash_add(&event->stacks, &stack->hash); return; } exist = stack_from_item(item); exist->count += stack->count; exist->time += stack->time; if (exist->time_max < stack->time_max) { exist->time_max = stack->time_max; exist->ts_max = stack->ts_max; } if (exist->time_min > stack->time_min) { exist->time_min = stack->time_min; exist->ts_min = stack->ts_min; } free(stack); } static void merge_stacks(struct event_hash *exist, struct event_hash *event) { struct stack_data *stack; struct trace_hash_item *item; struct trace_hash_item **bucket; trace_hash_for_each_bucket(bucket, &event->stacks) { trace_hash_while_item(item, bucket) { stack = stack_from_item(item); trace_hash_del(&stack->hash); merge_event_stack(exist, stack); } } } static void merge_event_into_group(struct group_data *group, struct event_hash *event) { struct event_hash *exist; struct trace_hash_item *item; struct event_data_match edata; unsigned long long key; if (event->event_data->type == EVENT_TYPE_WAKEUP) { edata.event_data = event->event_data; event->search_val = 0; event->val = 0; key = trace_hash((unsigned long)event->event_data); } else if (event->event_data->type == EVENT_TYPE_SCHED_SWITCH) { edata.event_data = event->event_data; event->search_val = event->val; key = (unsigned long)event->event_data + ((unsigned long)event->val * 2); key = trace_hash(key); } else { key = event->hash.key; } edata.event_data = event->event_data; edata.search_val = event->search_val; edata.val = event->val; item = trace_hash_find(&group->event_hash, key, match_event, &edata); if (!item) { event->hash.key = key; trace_hash_add(&group->event_hash, &event->hash); return; } exist = event_from_item(item); exist->count += event->count; exist->time_total += event->time_total; if (exist->time_max < event->time_max) { exist->time_max = event->time_max; exist->ts_max = event->ts_max; } if (exist->time_min > event->time_min) { exist->time_min = event->time_min; exist->ts_min = event->ts_min; } merge_stacks(exist, event); free_event_hash(event); } static void add_group(struct handle_data *h, struct task_data *task) { unsigned long long key; struct trace_hash_item *item; struct group_data *grp; struct trace_hash_item **bucket; void *data = task->comm; if (!task->comm) return; key = trace_hash_str(task->comm); item = trace_hash_find(&h->group_hash, key, match_group, data); if (item) { grp = group_from_item(item); } else { grp = malloc(sizeof(*grp)); if (!grp) { warning("Could not allocate group"); return; } memset(grp, 0, sizeof(*grp)); grp->comm = strdup(task->comm); if (!grp->comm) die("strdup"); grp->hash.key = key; trace_hash_add(&h->group_hash, &grp->hash); trace_hash_init(&grp->event_hash, 32); } task->group = grp; trace_hash_for_each_bucket(bucket, &task->event_hash) { trace_hash_while_item(item, bucket) { struct event_hash *event_hash; event_hash = event_from_item(item); trace_hash_del(&event_hash->hash); merge_event_into_group(grp, event_hash); } } } static void merge_tasks(struct handle_data *h) { struct trace_hash_item **bucket; struct trace_hash_item *item; if (!merge_like_comms) return; trace_hash_for_each_bucket(bucket, &h->task_hash) { trace_hash_for_each_item(item, bucket) add_group(h, task_from_item(item)); } } int do_trace_profile(void) { struct handle_data *h; for (h = handles; h; h = h->next) { if (merge_like_comms) merge_tasks(h); output_handle(h); trace_hash_free(&h->task_hash); } return 0; }