]> git.proxmox.com Git - mirror_ubuntu-bionic-kernel.git/blame - net/core/drop_monitor.c
net: netdev_alloc_skb() use build_skb()
[mirror_ubuntu-bionic-kernel.git] / net / core / drop_monitor.c
CommitLineData
9a8afc8d
NH
1/*
2 * Monitoring code for network dropped packet alerts
3 *
4 * Copyright (C) 2009 Neil Horman <nhorman@tuxdriver.com>
5 */
6
e005d193
JP
7#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
8
9a8afc8d
NH
9#include <linux/netdevice.h>
10#include <linux/etherdevice.h>
11#include <linux/string.h>
12#include <linux/if_arp.h>
13#include <linux/inetdevice.h>
14#include <linux/inet.h>
15#include <linux/interrupt.h>
16#include <linux/netpoll.h>
17#include <linux/sched.h>
18#include <linux/delay.h>
19#include <linux/types.h>
20#include <linux/workqueue.h>
21#include <linux/netlink.h>
22#include <linux/net_dropmon.h>
23#include <linux/percpu.h>
24#include <linux/timer.h>
25#include <linux/bitops.h>
5a0e3ad6 26#include <linux/slab.h>
9a8afc8d 27#include <net/genetlink.h>
4ea7e386 28#include <net/netevent.h>
9a8afc8d 29
ad8d75ff 30#include <trace/events/skb.h>
9cbc1cb8 31#include <trace/events/napi.h>
9a8afc8d
NH
32
33#include <asm/unaligned.h>
34
35#define TRACE_ON 1
36#define TRACE_OFF 0
37
38static void send_dm_alert(struct work_struct *unused);
39
40
41/*
42 * Globals, our netlink socket pointer
43 * and the work handle that will send up
44 * netlink alerts
45 */
4ea7e386 46static int trace_state = TRACE_OFF;
cde2e9a6 47static DEFINE_MUTEX(trace_state_mutex);
9a8afc8d
NH
48
49struct per_cpu_dm_data {
50 struct work_struct dm_alert_work;
3885ca78 51 struct sk_buff __rcu *skb;
9a8afc8d
NH
52 atomic_t dm_hit_count;
53 struct timer_list send_timer;
4fdcfa12 54 int cpu;
9a8afc8d
NH
55};
56
4ea7e386
NH
57struct dm_hw_stat_delta {
58 struct net_device *dev;
5848cc09 59 unsigned long last_rx;
4ea7e386
NH
60 struct list_head list;
61 struct rcu_head rcu;
62 unsigned long last_drop_val;
63};
64
9a8afc8d
NH
65static struct genl_family net_drop_monitor_family = {
66 .id = GENL_ID_GENERATE,
67 .hdrsize = 0,
68 .name = "NET_DM",
683703a2 69 .version = 2,
9a8afc8d
NH
70 .maxattr = NET_DM_CMD_MAX,
71};
72
73static DEFINE_PER_CPU(struct per_cpu_dm_data, dm_cpu_data);
74
75static int dm_hit_limit = 64;
76static int dm_delay = 1;
4ea7e386
NH
77static unsigned long dm_hw_check_delta = 2*HZ;
78static LIST_HEAD(hw_stats_list);
9a8afc8d
NH
79
80static void reset_per_cpu_data(struct per_cpu_dm_data *data)
81{
82 size_t al;
83 struct net_dm_alert_msg *msg;
683703a2 84 struct nlattr *nla;
3885ca78
NH
85 struct sk_buff *skb;
86 struct sk_buff *oskb = rcu_dereference_protected(data->skb, 1);
9a8afc8d
NH
87
88 al = sizeof(struct net_dm_alert_msg);
89 al += dm_hit_limit * sizeof(struct net_dm_drop_point);
683703a2
NH
90 al += sizeof(struct nlattr);
91
3885ca78
NH
92 skb = genlmsg_new(al, GFP_KERNEL);
93
94 if (skb) {
95 genlmsg_put(skb, 0, 0, &net_drop_monitor_family,
96 0, NET_DM_CMD_ALERT);
97 nla = nla_reserve(skb, NLA_UNSPEC,
98 sizeof(struct net_dm_alert_msg));
99 msg = nla_data(nla);
100 memset(msg, 0, al);
4fdcfa12
NH
101 } else
102 schedule_work_on(data->cpu, &data->dm_alert_work);
3885ca78
NH
103
104 /*
105 * Don't need to lock this, since we are guaranteed to only
106 * run this on a single cpu at a time.
107 * Note also that we only update data->skb if the old and new skb
108 * pointers don't match. This ensures that we don't continually call
109 * synchornize_rcu if we repeatedly fail to alloc a new netlink message.
110 */
111 if (skb != oskb) {
112 rcu_assign_pointer(data->skb, skb);
113
114 synchronize_rcu();
115
116 atomic_set(&data->dm_hit_count, dm_hit_limit);
117 }
118
9a8afc8d
NH
119}
120
121static void send_dm_alert(struct work_struct *unused)
122{
123 struct sk_buff *skb;
3885ca78 124 struct per_cpu_dm_data *data = &get_cpu_var(dm_cpu_data);
9a8afc8d 125
4fdcfa12
NH
126 WARN_ON_ONCE(data->cpu != smp_processor_id());
127
9a8afc8d
NH
128 /*
129 * Grab the skb we're about to send
130 */
3885ca78 131 skb = rcu_dereference_protected(data->skb, 1);
9a8afc8d
NH
132
133 /*
134 * Replace it with a new one
135 */
136 reset_per_cpu_data(data);
137
138 /*
139 * Ship it!
140 */
3885ca78
NH
141 if (skb)
142 genlmsg_multicast(skb, 0, NET_DM_GRP_ALERT, GFP_KERNEL);
9a8afc8d 143
3885ca78 144 put_cpu_var(dm_cpu_data);
9a8afc8d
NH
145}
146
147/*
148 * This is the timer function to delay the sending of an alert
149 * in the event that more drops will arrive during the
150 * hysteresis period. Note that it operates under the timer interrupt
151 * so we don't need to disable preemption here
152 */
153static void sched_send_work(unsigned long unused)
154{
3885ca78
NH
155 struct per_cpu_dm_data *data = &get_cpu_var(dm_cpu_data);
156
157 schedule_work_on(smp_processor_id(), &data->dm_alert_work);
9a8afc8d 158
3885ca78 159 put_cpu_var(dm_cpu_data);
9a8afc8d
NH
160}
161
4ea7e386 162static void trace_drop_common(struct sk_buff *skb, void *location)
9a8afc8d
NH
163{
164 struct net_dm_alert_msg *msg;
165 struct nlmsghdr *nlh;
683703a2 166 struct nlattr *nla;
9a8afc8d 167 int i;
3885ca78
NH
168 struct sk_buff *dskb;
169 struct per_cpu_dm_data *data = &get_cpu_var(dm_cpu_data);
9a8afc8d
NH
170
171
3885ca78
NH
172 rcu_read_lock();
173 dskb = rcu_dereference(data->skb);
174
175 if (!dskb)
176 goto out;
177
9a8afc8d
NH
178 if (!atomic_add_unless(&data->dm_hit_count, -1, 0)) {
179 /*
180 * we're already at zero, discard this hit
181 */
182 goto out;
183 }
184
3885ca78 185 nlh = (struct nlmsghdr *)dskb->data;
683703a2
NH
186 nla = genlmsg_data(nlmsg_data(nlh));
187 msg = nla_data(nla);
9a8afc8d
NH
188 for (i = 0; i < msg->entries; i++) {
189 if (!memcmp(&location, msg->points[i].pc, sizeof(void *))) {
190 msg->points[i].count++;
bbe362be 191 atomic_inc(&data->dm_hit_count);
9a8afc8d
NH
192 goto out;
193 }
194 }
195
196 /*
197 * We need to create a new entry
198 */
3885ca78 199 __nla_reserve_nohdr(dskb, sizeof(struct net_dm_drop_point));
683703a2 200 nla->nla_len += NLA_ALIGN(sizeof(struct net_dm_drop_point));
9a8afc8d
NH
201 memcpy(msg->points[msg->entries].pc, &location, sizeof(void *));
202 msg->points[msg->entries].count = 1;
203 msg->entries++;
204
205 if (!timer_pending(&data->send_timer)) {
206 data->send_timer.expires = jiffies + dm_delay * HZ;
207 add_timer_on(&data->send_timer, smp_processor_id());
208 }
209
210out:
3885ca78
NH
211 rcu_read_unlock();
212 put_cpu_var(dm_cpu_data);
9a8afc8d
NH
213 return;
214}
215
38516ab5 216static void trace_kfree_skb_hit(void *ignore, struct sk_buff *skb, void *location)
4ea7e386
NH
217{
218 trace_drop_common(skb, location);
219}
220
38516ab5 221static void trace_napi_poll_hit(void *ignore, struct napi_struct *napi)
4ea7e386
NH
222{
223 struct dm_hw_stat_delta *new_stat;
224
225 /*
5848cc09 226 * Don't check napi structures with no associated device
4ea7e386 227 */
5848cc09 228 if (!napi->dev)
4ea7e386
NH
229 return;
230
231 rcu_read_lock();
232 list_for_each_entry_rcu(new_stat, &hw_stats_list, list) {
5848cc09
NH
233 /*
234 * only add a note to our monitor buffer if:
235 * 1) this is the dev we received on
236 * 2) its after the last_rx delta
237 * 3) our rx_dropped count has gone up
238 */
4ea7e386 239 if ((new_stat->dev == napi->dev) &&
5848cc09 240 (time_after(jiffies, new_stat->last_rx + dm_hw_check_delta)) &&
4ea7e386
NH
241 (napi->dev->stats.rx_dropped != new_stat->last_drop_val)) {
242 trace_drop_common(NULL, NULL);
243 new_stat->last_drop_val = napi->dev->stats.rx_dropped;
5848cc09 244 new_stat->last_rx = jiffies;
4ea7e386
NH
245 break;
246 }
247 }
248 rcu_read_unlock();
249}
250
9a8afc8d
NH
251static int set_all_monitor_traces(int state)
252{
253 int rc = 0;
4ea7e386
NH
254 struct dm_hw_stat_delta *new_stat = NULL;
255 struct dm_hw_stat_delta *temp;
256
cde2e9a6 257 mutex_lock(&trace_state_mutex);
9a8afc8d 258
4b706372
NH
259 if (state == trace_state) {
260 rc = -EAGAIN;
261 goto out_unlock;
262 }
263
9a8afc8d
NH
264 switch (state) {
265 case TRACE_ON:
38516ab5
SR
266 rc |= register_trace_kfree_skb(trace_kfree_skb_hit, NULL);
267 rc |= register_trace_napi_poll(trace_napi_poll_hit, NULL);
9a8afc8d
NH
268 break;
269 case TRACE_OFF:
38516ab5
SR
270 rc |= unregister_trace_kfree_skb(trace_kfree_skb_hit, NULL);
271 rc |= unregister_trace_napi_poll(trace_napi_poll_hit, NULL);
9a8afc8d
NH
272
273 tracepoint_synchronize_unregister();
4ea7e386
NH
274
275 /*
276 * Clean the device list
277 */
278 list_for_each_entry_safe(new_stat, temp, &hw_stats_list, list) {
279 if (new_stat->dev == NULL) {
280 list_del_rcu(&new_stat->list);
fa81c0e1 281 kfree_rcu(new_stat, rcu);
4ea7e386
NH
282 }
283 }
9a8afc8d
NH
284 break;
285 default:
286 rc = 1;
287 break;
288 }
289
4ea7e386
NH
290 if (!rc)
291 trace_state = state;
4b706372
NH
292 else
293 rc = -EINPROGRESS;
4ea7e386 294
4b706372 295out_unlock:
cde2e9a6 296 mutex_unlock(&trace_state_mutex);
4ea7e386 297
9a8afc8d
NH
298 return rc;
299}
300
301
302static int net_dm_cmd_config(struct sk_buff *skb,
303 struct genl_info *info)
304{
305 return -ENOTSUPP;
306}
307
308static int net_dm_cmd_trace(struct sk_buff *skb,
309 struct genl_info *info)
310{
311 switch (info->genlhdr->cmd) {
312 case NET_DM_CMD_START:
313 return set_all_monitor_traces(TRACE_ON);
314 break;
315 case NET_DM_CMD_STOP:
316 return set_all_monitor_traces(TRACE_OFF);
317 break;
318 }
319
320 return -ENOTSUPP;
321}
322
4ea7e386
NH
323static int dropmon_net_event(struct notifier_block *ev_block,
324 unsigned long event, void *ptr)
325{
326 struct net_device *dev = ptr;
327 struct dm_hw_stat_delta *new_stat = NULL;
328 struct dm_hw_stat_delta *tmp;
329
330 switch (event) {
331 case NETDEV_REGISTER:
332 new_stat = kzalloc(sizeof(struct dm_hw_stat_delta), GFP_KERNEL);
333
334 if (!new_stat)
335 goto out;
336
337 new_stat->dev = dev;
5848cc09 338 new_stat->last_rx = jiffies;
cde2e9a6 339 mutex_lock(&trace_state_mutex);
4ea7e386 340 list_add_rcu(&new_stat->list, &hw_stats_list);
cde2e9a6 341 mutex_unlock(&trace_state_mutex);
4ea7e386
NH
342 break;
343 case NETDEV_UNREGISTER:
cde2e9a6 344 mutex_lock(&trace_state_mutex);
4ea7e386
NH
345 list_for_each_entry_safe(new_stat, tmp, &hw_stats_list, list) {
346 if (new_stat->dev == dev) {
347 new_stat->dev = NULL;
348 if (trace_state == TRACE_OFF) {
349 list_del_rcu(&new_stat->list);
fa81c0e1 350 kfree_rcu(new_stat, rcu);
4ea7e386
NH
351 break;
352 }
353 }
354 }
cde2e9a6 355 mutex_unlock(&trace_state_mutex);
4ea7e386
NH
356 break;
357 }
358out:
359 return NOTIFY_DONE;
360}
9a8afc8d
NH
361
362static struct genl_ops dropmon_ops[] = {
363 {
364 .cmd = NET_DM_CMD_CONFIG,
365 .doit = net_dm_cmd_config,
366 },
367 {
368 .cmd = NET_DM_CMD_START,
369 .doit = net_dm_cmd_trace,
370 },
371 {
372 .cmd = NET_DM_CMD_STOP,
373 .doit = net_dm_cmd_trace,
374 },
375};
376
4ea7e386
NH
377static struct notifier_block dropmon_net_notifier = {
378 .notifier_call = dropmon_net_event
379};
380
9a8afc8d
NH
381static int __init init_net_drop_monitor(void)
382{
9a8afc8d 383 struct per_cpu_dm_data *data;
a256be70
CG
384 int cpu, rc;
385
e005d193 386 pr_info("Initializing network drop monitor service\n");
9a8afc8d
NH
387
388 if (sizeof(void *) > 8) {
e005d193 389 pr_err("Unable to store program counters on this arch, Drop monitor failed\n");
9a8afc8d
NH
390 return -ENOSPC;
391 }
392
a256be70
CG
393 rc = genl_register_family_with_ops(&net_drop_monitor_family,
394 dropmon_ops,
395 ARRAY_SIZE(dropmon_ops));
396 if (rc) {
e005d193 397 pr_err("Could not create drop monitor netlink family\n");
a256be70 398 return rc;
9a8afc8d
NH
399 }
400
4ea7e386
NH
401 rc = register_netdevice_notifier(&dropmon_net_notifier);
402 if (rc < 0) {
e005d193 403 pr_crit("Failed to register netdevice notifier\n");
4ea7e386
NH
404 goto out_unreg;
405 }
406
9a8afc8d
NH
407 rc = 0;
408
409 for_each_present_cpu(cpu) {
410 data = &per_cpu(dm_cpu_data, cpu);
4fdcfa12 411 data->cpu = cpu;
9a8afc8d
NH
412 INIT_WORK(&data->dm_alert_work, send_dm_alert);
413 init_timer(&data->send_timer);
414 data->send_timer.data = cpu;
415 data->send_timer.function = sched_send_work;
4fdcfa12 416 reset_per_cpu_data(data);
9a8afc8d 417 }
4ea7e386 418
3885ca78 419
9a8afc8d
NH
420 goto out;
421
422out_unreg:
423 genl_unregister_family(&net_drop_monitor_family);
424out:
425 return rc;
426}
427
428late_initcall(init_net_drop_monitor);