]>
Commit | Line | Data |
---|---|---|
1 | /* | |
2 | * net/core/netprio_cgroup.c Priority Control Group | |
3 | * | |
4 | * This program is free software; you can redistribute it and/or | |
5 | * modify it under the terms of the GNU General Public License | |
6 | * as published by the Free Software Foundation; either version | |
7 | * 2 of the License, or (at your option) any later version. | |
8 | * | |
9 | * Authors: Neil Horman <nhorman@tuxdriver.com> | |
10 | */ | |
11 | ||
12 | #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt | |
13 | ||
14 | #include <linux/module.h> | |
15 | #include <linux/slab.h> | |
16 | #include <linux/types.h> | |
17 | #include <linux/string.h> | |
18 | #include <linux/errno.h> | |
19 | #include <linux/skbuff.h> | |
20 | #include <linux/cgroup.h> | |
21 | #include <linux/rcupdate.h> | |
22 | #include <linux/atomic.h> | |
23 | #include <net/rtnetlink.h> | |
24 | #include <net/pkt_cls.h> | |
25 | #include <net/sock.h> | |
26 | #include <net/netprio_cgroup.h> | |
27 | ||
28 | #include <linux/fdtable.h> | |
29 | ||
30 | #define PRIOMAP_MIN_SZ 128 | |
31 | ||
32 | /* | |
33 | * Extend @dev->priomap so that it's large enough to accommodate | |
34 | * @target_idx. @dev->priomap.priomap_len > @target_idx after successful | |
35 | * return. Must be called under rtnl lock. | |
36 | */ | |
37 | static int extend_netdev_table(struct net_device *dev, u32 target_idx) | |
38 | { | |
39 | struct netprio_map *old, *new; | |
40 | size_t new_sz, new_len; | |
41 | ||
42 | /* is the existing priomap large enough? */ | |
43 | old = rtnl_dereference(dev->priomap); | |
44 | if (old && old->priomap_len > target_idx) | |
45 | return 0; | |
46 | ||
47 | /* | |
48 | * Determine the new size. Let's keep it power-of-two. We start | |
49 | * from PRIOMAP_MIN_SZ and double it until it's large enough to | |
50 | * accommodate @target_idx. | |
51 | */ | |
52 | new_sz = PRIOMAP_MIN_SZ; | |
53 | while (true) { | |
54 | new_len = (new_sz - offsetof(struct netprio_map, priomap)) / | |
55 | sizeof(new->priomap[0]); | |
56 | if (new_len > target_idx) | |
57 | break; | |
58 | new_sz *= 2; | |
59 | /* overflowed? */ | |
60 | if (WARN_ON(new_sz < PRIOMAP_MIN_SZ)) | |
61 | return -ENOSPC; | |
62 | } | |
63 | ||
64 | /* allocate & copy */ | |
65 | new = kzalloc(new_sz, GFP_KERNEL); | |
66 | if (!new) | |
67 | return -ENOMEM; | |
68 | ||
69 | if (old) | |
70 | memcpy(new->priomap, old->priomap, | |
71 | old->priomap_len * sizeof(old->priomap[0])); | |
72 | ||
73 | new->priomap_len = new_len; | |
74 | ||
75 | /* install the new priomap */ | |
76 | rcu_assign_pointer(dev->priomap, new); | |
77 | if (old) | |
78 | kfree_rcu(old, rcu); | |
79 | return 0; | |
80 | } | |
81 | ||
82 | /** | |
83 | * netprio_prio - return the effective netprio of a cgroup-net_device pair | |
84 | * @css: css part of the target pair | |
85 | * @dev: net_device part of the target pair | |
86 | * | |
87 | * Should be called under RCU read or rtnl lock. | |
88 | */ | |
89 | static u32 netprio_prio(struct cgroup_subsys_state *css, struct net_device *dev) | |
90 | { | |
91 | struct netprio_map *map = rcu_dereference_rtnl(dev->priomap); | |
92 | int id = css->cgroup->id; | |
93 | ||
94 | if (map && id < map->priomap_len) | |
95 | return map->priomap[id]; | |
96 | return 0; | |
97 | } | |
98 | ||
99 | /** | |
100 | * netprio_set_prio - set netprio on a cgroup-net_device pair | |
101 | * @css: css part of the target pair | |
102 | * @dev: net_device part of the target pair | |
103 | * @prio: prio to set | |
104 | * | |
105 | * Set netprio to @prio on @css-@dev pair. Should be called under rtnl | |
106 | * lock and may fail under memory pressure for non-zero @prio. | |
107 | */ | |
108 | static int netprio_set_prio(struct cgroup_subsys_state *css, | |
109 | struct net_device *dev, u32 prio) | |
110 | { | |
111 | struct netprio_map *map; | |
112 | int id = css->cgroup->id; | |
113 | int ret; | |
114 | ||
115 | /* avoid extending priomap for zero writes */ | |
116 | map = rtnl_dereference(dev->priomap); | |
117 | if (!prio && (!map || map->priomap_len <= id)) | |
118 | return 0; | |
119 | ||
120 | ret = extend_netdev_table(dev, id); | |
121 | if (ret) | |
122 | return ret; | |
123 | ||
124 | map = rtnl_dereference(dev->priomap); | |
125 | map->priomap[id] = prio; | |
126 | return 0; | |
127 | } | |
128 | ||
129 | static struct cgroup_subsys_state * | |
130 | cgrp_css_alloc(struct cgroup_subsys_state *parent_css) | |
131 | { | |
132 | struct cgroup_subsys_state *css; | |
133 | ||
134 | css = kzalloc(sizeof(*css), GFP_KERNEL); | |
135 | if (!css) | |
136 | return ERR_PTR(-ENOMEM); | |
137 | ||
138 | return css; | |
139 | } | |
140 | ||
141 | static int cgrp_css_online(struct cgroup_subsys_state *css) | |
142 | { | |
143 | struct cgroup_subsys_state *parent_css = css->parent; | |
144 | struct net_device *dev; | |
145 | int ret = 0; | |
146 | ||
147 | if (!parent_css) | |
148 | return 0; | |
149 | ||
150 | rtnl_lock(); | |
151 | /* | |
152 | * Inherit prios from the parent. As all prios are set during | |
153 | * onlining, there is no need to clear them on offline. | |
154 | */ | |
155 | for_each_netdev(&init_net, dev) { | |
156 | u32 prio = netprio_prio(parent_css, dev); | |
157 | ||
158 | ret = netprio_set_prio(css, dev, prio); | |
159 | if (ret) | |
160 | break; | |
161 | } | |
162 | rtnl_unlock(); | |
163 | return ret; | |
164 | } | |
165 | ||
166 | static void cgrp_css_free(struct cgroup_subsys_state *css) | |
167 | { | |
168 | kfree(css); | |
169 | } | |
170 | ||
171 | static u64 read_prioidx(struct cgroup_subsys_state *css, struct cftype *cft) | |
172 | { | |
173 | return css->cgroup->id; | |
174 | } | |
175 | ||
176 | static int read_priomap(struct seq_file *sf, void *v) | |
177 | { | |
178 | struct net_device *dev; | |
179 | ||
180 | rcu_read_lock(); | |
181 | for_each_netdev_rcu(&init_net, dev) | |
182 | seq_printf(sf, "%s %u\n", dev->name, | |
183 | netprio_prio(seq_css(sf), dev)); | |
184 | rcu_read_unlock(); | |
185 | return 0; | |
186 | } | |
187 | ||
188 | static ssize_t write_priomap(struct kernfs_open_file *of, | |
189 | char *buf, size_t nbytes, loff_t off) | |
190 | { | |
191 | char devname[IFNAMSIZ + 1]; | |
192 | struct net_device *dev; | |
193 | u32 prio; | |
194 | int ret; | |
195 | ||
196 | if (sscanf(buf, "%"__stringify(IFNAMSIZ)"s %u", devname, &prio) != 2) | |
197 | return -EINVAL; | |
198 | ||
199 | dev = dev_get_by_name(&init_net, devname); | |
200 | if (!dev) | |
201 | return -ENODEV; | |
202 | ||
203 | rtnl_lock(); | |
204 | ||
205 | ret = netprio_set_prio(of_css(of), dev, prio); | |
206 | ||
207 | rtnl_unlock(); | |
208 | dev_put(dev); | |
209 | return ret ?: nbytes; | |
210 | } | |
211 | ||
212 | static int update_netprio(const void *v, struct file *file, unsigned n) | |
213 | { | |
214 | int err; | |
215 | struct socket *sock = sock_from_file(file, &err); | |
216 | if (sock) | |
217 | sock->sk->sk_cgrp_prioidx = (u32)(unsigned long)v; | |
218 | return 0; | |
219 | } | |
220 | ||
221 | static void net_prio_attach(struct cgroup_subsys_state *css, | |
222 | struct cgroup_taskset *tset) | |
223 | { | |
224 | struct task_struct *p; | |
225 | void *v = (void *)(unsigned long)css->cgroup->id; | |
226 | ||
227 | cgroup_taskset_for_each(p, tset) { | |
228 | task_lock(p); | |
229 | iterate_fd(p->files, 0, update_netprio, v); | |
230 | task_unlock(p); | |
231 | } | |
232 | } | |
233 | ||
234 | static struct cftype ss_files[] = { | |
235 | { | |
236 | .name = "prioidx", | |
237 | .read_u64 = read_prioidx, | |
238 | }, | |
239 | { | |
240 | .name = "ifpriomap", | |
241 | .seq_show = read_priomap, | |
242 | .write = write_priomap, | |
243 | }, | |
244 | { } /* terminate */ | |
245 | }; | |
246 | ||
247 | struct cgroup_subsys net_prio_cgrp_subsys = { | |
248 | .css_alloc = cgrp_css_alloc, | |
249 | .css_online = cgrp_css_online, | |
250 | .css_free = cgrp_css_free, | |
251 | .attach = net_prio_attach, | |
252 | .legacy_cftypes = ss_files, | |
253 | }; | |
254 | ||
255 | static int netprio_device_event(struct notifier_block *unused, | |
256 | unsigned long event, void *ptr) | |
257 | { | |
258 | struct net_device *dev = netdev_notifier_info_to_dev(ptr); | |
259 | struct netprio_map *old; | |
260 | ||
261 | /* | |
262 | * Note this is called with rtnl_lock held so we have update side | |
263 | * protection on our rcu assignments | |
264 | */ | |
265 | ||
266 | switch (event) { | |
267 | case NETDEV_UNREGISTER: | |
268 | old = rtnl_dereference(dev->priomap); | |
269 | RCU_INIT_POINTER(dev->priomap, NULL); | |
270 | if (old) | |
271 | kfree_rcu(old, rcu); | |
272 | break; | |
273 | } | |
274 | return NOTIFY_DONE; | |
275 | } | |
276 | ||
277 | static struct notifier_block netprio_device_notifier = { | |
278 | .notifier_call = netprio_device_event | |
279 | }; | |
280 | ||
281 | static int __init init_cgroup_netprio(void) | |
282 | { | |
283 | register_netdevice_notifier(&netprio_device_notifier); | |
284 | return 0; | |
285 | } | |
286 | ||
287 | subsys_initcall(init_cgroup_netprio); | |
288 | MODULE_LICENSE("GPL v2"); |