Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support for updated cpu hotplug API in 4.10 kernel #744

Merged
merged 7 commits into from
Feb 23, 2017
Merged
199 changes: 128 additions & 71 deletions driver/main.c
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,7 @@ static void record_event_all_consumers(enum ppm_event_type event_type,
struct event_data_t *event_datap);
static int init_ring_buffer(struct ppm_ring_buffer_context *ring);
static void free_ring_buffer(struct ppm_ring_buffer_context *ring);
static void reset_ring_buffer(struct ppm_ring_buffer_context *ring);
void ppm_task_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime_t *st);

#ifndef CONFIG_HAVE_SYSCALL_TRACEPOINTS
Expand Down Expand Up @@ -183,6 +184,10 @@ static bool verbose = 0;

static unsigned int max_consumers = 5;

#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 10, 0))
static enum cpuhp_state hp_state = 0;
#endif

#define vpr_info(fmt, ...) \
do { \
if (verbose) \
Expand Down Expand Up @@ -246,9 +251,7 @@ static void check_remove_consumer(struct ppm_consumer_t *consumer, int remove_fr

for_each_possible_cpu(cpu) {
struct ppm_ring_buffer_context *ring = per_cpu_ptr(consumer->ring_buffers, cpu);

if (ring->cpu_online)
free_ring_buffer(ring);
free_ring_buffer(ring);
}

free_percpu(consumer->ring_buffers);
Expand Down Expand Up @@ -339,6 +342,14 @@ static int ppm_open(struct inode *inode, struct file *filp)
ring->info = NULL;
}

/*
* If a cpu is offline when the consumer is first created, we
* will never get events for that cpu even if it later comes
* online via hotplug. We could allocate these rings on-demand
* later in this function if needed for hotplug, but that
* requires the consumer to know to call open again, and sysdig
* doesn't support that.
*/
for_each_online_cpu(cpu) {
ring = per_cpu_ptr(consumer->ring_buffers, cpu);

Expand All @@ -363,9 +374,11 @@ static int ppm_open(struct inode *inode, struct file *filp)

/*
* Check if the CPU pointed by this device is online. If it isn't stop here and
* return ENODEV.
* return ENODEV. The cpu could be online while buffer is NULL if there's a cpu
* online hotplug callback between the first open on this consumer and the open
* for this particular device.
*/
if (ring->cpu_online == false) {
if (ring->cpu_online == false || ring->buffer == NULL) {
ret = -ENODEV;
goto cleanup_open;
}
Expand Down Expand Up @@ -394,16 +407,7 @@ static int ppm_open(struct inode *inode, struct file *filp)
consumer->need_to_insert_drop_e = 0;
consumer->need_to_insert_drop_x = 0;
bitmap_fill(g_events_mask, PPM_EVENT_MAX); /* Enable all syscall to be passed to userspace */
ring->info->head = 0;
ring->info->tail = 0;
ring->nevents = 0;
ring->info->n_evts = 0;
ring->info->n_drops_buffer = 0;
ring->info->n_drops_pf = 0;
ring->info->n_preemptions = 0;
ring->info->n_context_switches = 0;
ring->capture_enabled = false;
getnstimeofday(&ring->last_print_time);
reset_ring_buffer(ring);
ring->open = true;

if (!g_tracepoint_registered) {
Expand Down Expand Up @@ -1811,7 +1815,7 @@ static int init_ring_buffer(struct ppm_ring_buffer_context *ring)
ring->str_storage = (char *)__get_free_page(GFP_USER);
if (!ring->str_storage) {
pr_err("Error allocating the string storage\n");
goto err_str_storage;
goto init_ring_err;
}

/*
Expand All @@ -1822,7 +1826,7 @@ static int init_ring_buffer(struct ppm_ring_buffer_context *ring)
ring->buffer = vmalloc(RING_BUF_SIZE + 2 * PAGE_SIZE);
if (ring->buffer == NULL) {
pr_err("Error allocating ring memory\n");
goto err_buffer;
goto init_ring_err;
}

for (j = 0; j < RING_BUF_SIZE + 2 * PAGE_SIZE; j++)
Expand All @@ -1834,49 +1838,59 @@ static int init_ring_buffer(struct ppm_ring_buffer_context *ring)
ring->info = vmalloc(sizeof(struct ppm_ring_buffer_info));
if (ring->info == NULL) {
pr_err("Error allocating ring memory\n");
goto err_ring_info;
goto init_ring_err;
}

/*
* Initialize the buffer info structure
*/
ring->open = false;
ring->capture_enabled = false;
ring->info->head = 0;
ring->info->tail = 0;
ring->nevents = 0;
ring->info->n_evts = 0;
ring->info->n_drops_buffer = 0;
ring->info->n_drops_pf = 0;
ring->info->n_preemptions = 0;
ring->info->n_context_switches = 0;
reset_ring_buffer(ring);
atomic_set(&ring->preempt_count, 0);
getnstimeofday(&ring->last_print_time);

pr_info("CPU buffer initialized, size=%d\n", RING_BUF_SIZE);

return 1;

err_ring_info:
vfree((void *)ring->buffer);
ring->buffer = NULL;
err_buffer:
free_page((unsigned long)ring->str_storage);
ring->str_storage = NULL;
err_str_storage:
init_ring_err:
free_ring_buffer(ring);
return 0;
}

static void free_ring_buffer(struct ppm_ring_buffer_context *ring)
{
if (ring->info)
if (ring->info) {
vfree(ring->info);
ring->info = NULL;
}

if (ring->buffer)
if (ring->buffer) {
vfree((void *)ring->buffer);
ring->buffer = NULL;
}

if (ring->str_storage)
if (ring->str_storage) {
free_page((unsigned long)ring->str_storage);
ring->str_storage = NULL;
}
}

static void reset_ring_buffer(struct ppm_ring_buffer_context *ring)
{
/*
* ring->preempt_count is not reset to 0 on purpose, to prevent a race condition
* see ppm_open
*/
ring->open = false;
ring->capture_enabled = false;
ring->info->head = 0;
ring->info->tail = 0;
ring->nevents = 0;
ring->info->n_evts = 0;
ring->info->n_drops_buffer = 0;
ring->info->n_drops_pf = 0;
ring->info->n_preemptions = 0;
ring->info->n_context_switches = 0;
getnstimeofday(&ring->last_print_time);
}

#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 15, 0))
Expand Down Expand Up @@ -1955,18 +1969,61 @@ static char *ppm_devnode(struct device *dev, mode_t *mode)
}
#endif /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 20) */

static int do_cpu_callback(unsigned long cpu, long sd_action)
{
struct ppm_ring_buffer_context *ring;
struct ppm_consumer_t *consumer;
struct event_data_t event_data;

if (sd_action != 0) {
rcu_read_lock();

list_for_each_entry_rcu(consumer, &g_consumer_list, node) {
ring = per_cpu_ptr(consumer->ring_buffers, cpu);
if (sd_action == 1) {
/*
* If the cpu was offline when the consumer was created,
* this won't do anything because we never created a ring
* buffer. We can't safely create one here because we're
* in atomic context, and the consumer needs to call open
* on this device anyways, so do it in ppm_open.
*/
ring->cpu_online = true;
} else if (sd_action == 2) {
ring->cpu_online = false;
}
}

rcu_read_unlock();

event_data.category = PPMC_CONTEXT_SWITCH;
event_data.event_info.context_data.sched_prev = (void *)cpu;
event_data.event_info.context_data.sched_next = (void *)sd_action;
record_event_all_consumers(PPME_CPU_HOTPLUG_E, UF_NEVER_DROP, &event_data);
}
return 0;
}

#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 10, 0))
static int sysdig_cpu_online(unsigned int cpu)
{
vpr_info("sysdig_cpu_online on cpu %d\n", cpu);
return do_cpu_callback(cpu, 1);
}

static int sysdig_cpu_offline(unsigned int cpu)
{
vpr_info("sysdig_cpu_offline on cpu %d\n", cpu);
return do_cpu_callback(cpu, 2);
}
#else /* LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0)) */
/*
* This gets called every time a CPU is added or removed
*/
static int cpu_callback(struct notifier_block *self, unsigned long action,
void *hcpu)
{
long cpu = (long)hcpu;
struct ppm_ring_buffer_context *ring;
struct ppm_consumer_t *consumer;
bool event_recorded = false;
struct timespec ts;
struct event_data_t event_data;
unsigned long cpu = (unsigned long)hcpu;
long sd_action = 0;

switch (action) {
Expand All @@ -1986,38 +2043,17 @@ static int cpu_callback(struct notifier_block *self, unsigned long action,
break;
}

/*
* Based on the action, spit an event in the first available ring
*/
if (sd_action != 0) {
rcu_read_lock();

list_for_each_entry_rcu(consumer, &g_consumer_list, node) {
ring = per_cpu_ptr(consumer->ring_buffers, cpu);
ring->capture_enabled = false;

getnstimeofday(&ts);

event_data.category = PPMC_CONTEXT_SWITCH;
event_data.event_info.context_data.sched_prev = (void *)cpu;
event_data.event_info.context_data.sched_next = (void *)sd_action;

if (!event_recorded) {
record_event_consumer(consumer, PPME_CPU_HOTPLUG_E, UF_NEVER_DROP, &ts, &event_data);
event_recorded = true;
}
}

rcu_read_unlock();
}

return NOTIFY_DONE;
if (do_cpu_callback(cpu, sd_action) < 0)
return NOTIFY_BAD;
else
return NOTIFY_OK;
}

static struct notifier_block cpu_notifier = {
.notifier_call = &cpu_callback,
.next = NULL,
};
#endif /* LINUX_VERSION_CODE >= KERNEL_VERSION(4, 10, 0) */

int sysdig_init(void)
{
Expand All @@ -2026,6 +2062,9 @@ int sysdig_init(void)
unsigned int num_cpus;
int ret;
int acrret = 0;
#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 10, 0))
int hp_ret;
#endif
int j;
int n_created_devices = 0;
#if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 20)
Expand Down Expand Up @@ -2129,7 +2168,20 @@ int sysdig_init(void)
* Set up our callback in case we get a hotplug even while we are
* initializing the cpu structures
*/
#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 10, 0))
hp_ret = cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN,
"sysdig/probe:online",
sysdig_cpu_online,
sysdig_cpu_offline);
if (hp_ret <= 0) {
pr_err("error registering cpu hotplug callback\n");
ret = hp_ret;
goto init_module_err;
}
hp_state = hp_ret;
#else
register_cpu_notifier(&cpu_notifier);
#endif

/*
* All ok. Final initalizations.
Expand Down Expand Up @@ -2178,7 +2230,12 @@ void sysdig_exit(void)
tracepoint_synchronize_unregister();
#endif

#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 10, 0))
if (hp_state > 0)
cpuhp_remove_state_nocalls(hp_state);
#else
unregister_cpu_notifier(&cpu_notifier);
#endif
}

module_init(sysdig_init);
Expand Down