]>
Commit | Line | Data |
---|---|---|
0189197f EB |
1 | #include <linux/types.h> |
2 | #include <linux/skbuff.h> | |
3 | #include <linux/socket.h> | |
4 | #include <linux/net.h> | |
5 | #include <linux/module.h> | |
6 | #include <linux/if_arp.h> | |
7 | #include <linux/ipv6.h> | |
8 | #include <linux/mpls.h> | |
9 | #include <net/ip.h> | |
10 | #include <net/dst.h> | |
11 | #include <net/sock.h> | |
12 | #include <net/arp.h> | |
13 | #include <net/ip_fib.h> | |
14 | #include <net/netevent.h> | |
15 | #include <net/netns/generic.h> | |
16 | #include "internal.h" | |
17 | ||
18 | #define MAX_NEW_LABELS 2 | |
19 | ||
20 | /* This maximum ha length copied from the definition of struct neighbour */ | |
21 | #define MAX_VIA_ALEN (ALIGN(MAX_ADDR_LEN, sizeof(unsigned long))) | |
22 | ||
23 | struct mpls_route { /* next hop label forwarding entry */ | |
24 | struct net_device *rt_dev; | |
25 | struct rcu_head rt_rcu; | |
26 | u32 rt_label[MAX_NEW_LABELS]; | |
27 | u8 rt_protocol; /* routing protocol that set this entry */ | |
28 | u8 rt_labels:2, | |
29 | rt_via_alen:6; | |
30 | unsigned short rt_via_family; | |
31 | u8 rt_via[0]; | |
32 | }; | |
33 | ||
34 | static struct mpls_route *mpls_route_input_rcu(struct net *net, unsigned index) | |
35 | { | |
36 | struct mpls_route *rt = NULL; | |
37 | ||
38 | if (index < net->mpls.platform_labels) { | |
39 | struct mpls_route __rcu **platform_label = | |
40 | rcu_dereference(net->mpls.platform_label); | |
41 | rt = rcu_dereference(platform_label[index]); | |
42 | } | |
43 | return rt; | |
44 | } | |
45 | ||
46 | static bool mpls_output_possible(const struct net_device *dev) | |
47 | { | |
48 | return dev && (dev->flags & IFF_UP) && netif_carrier_ok(dev); | |
49 | } | |
50 | ||
51 | static unsigned int mpls_rt_header_size(const struct mpls_route *rt) | |
52 | { | |
53 | /* The size of the layer 2.5 labels to be added for this route */ | |
54 | return rt->rt_labels * sizeof(struct mpls_shim_hdr); | |
55 | } | |
56 | ||
57 | static unsigned int mpls_dev_mtu(const struct net_device *dev) | |
58 | { | |
59 | /* The amount of data the layer 2 frame can hold */ | |
60 | return dev->mtu; | |
61 | } | |
62 | ||
63 | static bool mpls_pkt_too_big(const struct sk_buff *skb, unsigned int mtu) | |
64 | { | |
65 | if (skb->len <= mtu) | |
66 | return false; | |
67 | ||
68 | if (skb_is_gso(skb) && skb_gso_network_seglen(skb) <= mtu) | |
69 | return false; | |
70 | ||
71 | return true; | |
72 | } | |
73 | ||
74 | static bool mpls_egress(struct mpls_route *rt, struct sk_buff *skb, | |
75 | struct mpls_entry_decoded dec) | |
76 | { | |
77 | /* RFC4385 and RFC5586 encode other packets in mpls such that | |
78 | * they don't conflict with the ip version number, making | |
79 | * decoding by examining the ip version correct in everything | |
80 | * except for the strangest cases. | |
81 | * | |
82 | * The strange cases if we choose to support them will require | |
83 | * manual configuration. | |
84 | */ | |
85 | struct iphdr *hdr4 = ip_hdr(skb); | |
86 | bool success = true; | |
87 | ||
88 | if (hdr4->version == 4) { | |
89 | skb->protocol = htons(ETH_P_IP); | |
90 | csum_replace2(&hdr4->check, | |
91 | htons(hdr4->ttl << 8), | |
92 | htons(dec.ttl << 8)); | |
93 | hdr4->ttl = dec.ttl; | |
94 | } | |
95 | else if (hdr4->version == 6) { | |
96 | struct ipv6hdr *hdr6 = ipv6_hdr(skb); | |
97 | skb->protocol = htons(ETH_P_IPV6); | |
98 | hdr6->hop_limit = dec.ttl; | |
99 | } | |
100 | else | |
101 | /* version 0 and version 1 are used by pseudo wires */ | |
102 | success = false; | |
103 | return success; | |
104 | } | |
105 | ||
106 | static int mpls_forward(struct sk_buff *skb, struct net_device *dev, | |
107 | struct packet_type *pt, struct net_device *orig_dev) | |
108 | { | |
109 | struct net *net = dev_net(dev); | |
110 | struct mpls_shim_hdr *hdr; | |
111 | struct mpls_route *rt; | |
112 | struct mpls_entry_decoded dec; | |
113 | struct net_device *out_dev; | |
114 | unsigned int hh_len; | |
115 | unsigned int new_header_size; | |
116 | unsigned int mtu; | |
117 | int err; | |
118 | ||
119 | /* Careful this entire function runs inside of an rcu critical section */ | |
120 | ||
121 | if (skb->pkt_type != PACKET_HOST) | |
122 | goto drop; | |
123 | ||
124 | if ((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL) | |
125 | goto drop; | |
126 | ||
127 | if (!pskb_may_pull(skb, sizeof(*hdr))) | |
128 | goto drop; | |
129 | ||
130 | /* Read and decode the label */ | |
131 | hdr = mpls_hdr(skb); | |
132 | dec = mpls_entry_decode(hdr); | |
133 | ||
134 | /* Pop the label */ | |
135 | skb_pull(skb, sizeof(*hdr)); | |
136 | skb_reset_network_header(skb); | |
137 | ||
138 | skb_orphan(skb); | |
139 | ||
140 | rt = mpls_route_input_rcu(net, dec.label); | |
141 | if (!rt) | |
142 | goto drop; | |
143 | ||
144 | /* Find the output device */ | |
145 | out_dev = rt->rt_dev; | |
146 | if (!mpls_output_possible(out_dev)) | |
147 | goto drop; | |
148 | ||
149 | if (skb_warn_if_lro(skb)) | |
150 | goto drop; | |
151 | ||
152 | skb_forward_csum(skb); | |
153 | ||
154 | /* Verify ttl is valid */ | |
155 | if (dec.ttl <= 2) | |
156 | goto drop; | |
157 | dec.ttl -= 1; | |
158 | ||
159 | /* Verify the destination can hold the packet */ | |
160 | new_header_size = mpls_rt_header_size(rt); | |
161 | mtu = mpls_dev_mtu(out_dev); | |
162 | if (mpls_pkt_too_big(skb, mtu - new_header_size)) | |
163 | goto drop; | |
164 | ||
165 | hh_len = LL_RESERVED_SPACE(out_dev); | |
166 | if (!out_dev->header_ops) | |
167 | hh_len = 0; | |
168 | ||
169 | /* Ensure there is enough space for the headers in the skb */ | |
170 | if (skb_cow(skb, hh_len + new_header_size)) | |
171 | goto drop; | |
172 | ||
173 | skb->dev = out_dev; | |
174 | skb->protocol = htons(ETH_P_MPLS_UC); | |
175 | ||
176 | if (unlikely(!new_header_size && dec.bos)) { | |
177 | /* Penultimate hop popping */ | |
178 | if (!mpls_egress(rt, skb, dec)) | |
179 | goto drop; | |
180 | } else { | |
181 | bool bos; | |
182 | int i; | |
183 | skb_push(skb, new_header_size); | |
184 | skb_reset_network_header(skb); | |
185 | /* Push the new labels */ | |
186 | hdr = mpls_hdr(skb); | |
187 | bos = dec.bos; | |
188 | for (i = rt->rt_labels - 1; i >= 0; i--) { | |
189 | hdr[i] = mpls_entry_encode(rt->rt_label[i], dec.ttl, 0, bos); | |
190 | bos = false; | |
191 | } | |
192 | } | |
193 | ||
194 | err = neigh_xmit(rt->rt_via_family, out_dev, rt->rt_via, skb); | |
195 | if (err) | |
196 | net_dbg_ratelimited("%s: packet transmission failed: %d\n", | |
197 | __func__, err); | |
198 | return 0; | |
199 | ||
200 | drop: | |
201 | kfree_skb(skb); | |
202 | return NET_RX_DROP; | |
203 | } | |
204 | ||
205 | static struct packet_type mpls_packet_type __read_mostly = { | |
206 | .type = cpu_to_be16(ETH_P_MPLS_UC), | |
207 | .func = mpls_forward, | |
208 | }; | |
209 | ||
210 | static struct mpls_route *mpls_rt_alloc(size_t alen) | |
211 | { | |
212 | struct mpls_route *rt; | |
213 | ||
214 | rt = kzalloc(GFP_KERNEL, sizeof(*rt) + alen); | |
215 | if (rt) | |
216 | rt->rt_via_alen = alen; | |
217 | return rt; | |
218 | } | |
219 | ||
220 | static void mpls_rt_free(struct mpls_route *rt) | |
221 | { | |
222 | if (rt) | |
223 | kfree_rcu(rt, rt_rcu); | |
224 | } | |
225 | ||
226 | static void mpls_route_update(struct net *net, unsigned index, | |
227 | struct net_device *dev, struct mpls_route *new, | |
228 | const struct nl_info *info) | |
229 | { | |
230 | struct mpls_route *rt, *old = NULL; | |
231 | ||
232 | ASSERT_RTNL(); | |
233 | ||
234 | rt = net->mpls.platform_label[index]; | |
235 | if (!dev || (rt && (rt->rt_dev == dev))) { | |
236 | rcu_assign_pointer(net->mpls.platform_label[index], new); | |
237 | old = rt; | |
238 | } | |
239 | ||
240 | /* If we removed a route free it now */ | |
241 | mpls_rt_free(old); | |
242 | } | |
243 | ||
244 | static void mpls_ifdown(struct net_device *dev) | |
245 | { | |
246 | struct net *net = dev_net(dev); | |
247 | unsigned index; | |
248 | ||
249 | for (index = 0; index < net->mpls.platform_labels; index++) { | |
250 | struct mpls_route *rt = net->mpls.platform_label[index]; | |
251 | if (!rt) | |
252 | continue; | |
253 | if (rt->rt_dev != dev) | |
254 | continue; | |
255 | rt->rt_dev = NULL; | |
256 | } | |
257 | } | |
258 | ||
259 | static int mpls_dev_notify(struct notifier_block *this, unsigned long event, | |
260 | void *ptr) | |
261 | { | |
262 | struct net_device *dev = netdev_notifier_info_to_dev(ptr); | |
263 | ||
264 | switch(event) { | |
265 | case NETDEV_UNREGISTER: | |
266 | mpls_ifdown(dev); | |
267 | break; | |
268 | } | |
269 | return NOTIFY_OK; | |
270 | } | |
271 | ||
272 | static struct notifier_block mpls_dev_notifier = { | |
273 | .notifier_call = mpls_dev_notify, | |
274 | }; | |
275 | ||
276 | static int mpls_net_init(struct net *net) | |
277 | { | |
278 | net->mpls.platform_labels = 0; | |
279 | net->mpls.platform_label = NULL; | |
280 | ||
281 | return 0; | |
282 | } | |
283 | ||
284 | static void mpls_net_exit(struct net *net) | |
285 | { | |
286 | unsigned int index; | |
287 | ||
288 | /* An rcu grace period haselapsed since there was a device in | |
289 | * the network namespace (and thus the last in fqlight packet) | |
290 | * left this network namespace. This is because | |
291 | * unregister_netdevice_many and netdev_run_todo has completed | |
292 | * for each network device that was in this network namespace. | |
293 | * | |
294 | * As such no additional rcu synchronization is necessary when | |
295 | * freeing the platform_label table. | |
296 | */ | |
297 | rtnl_lock(); | |
298 | for (index = 0; index < net->mpls.platform_labels; index++) { | |
299 | struct mpls_route *rt = net->mpls.platform_label[index]; | |
300 | rcu_assign_pointer(net->mpls.platform_label[index], NULL); | |
301 | mpls_rt_free(rt); | |
302 | } | |
303 | rtnl_unlock(); | |
304 | ||
305 | kvfree(net->mpls.platform_label); | |
306 | } | |
307 | ||
308 | static struct pernet_operations mpls_net_ops = { | |
309 | .init = mpls_net_init, | |
310 | .exit = mpls_net_exit, | |
311 | }; | |
312 | ||
313 | static int __init mpls_init(void) | |
314 | { | |
315 | int err; | |
316 | ||
317 | BUILD_BUG_ON(sizeof(struct mpls_shim_hdr) != 4); | |
318 | ||
319 | err = register_pernet_subsys(&mpls_net_ops); | |
320 | if (err) | |
321 | goto out; | |
322 | ||
323 | err = register_netdevice_notifier(&mpls_dev_notifier); | |
324 | if (err) | |
325 | goto out_unregister_pernet; | |
326 | ||
327 | dev_add_pack(&mpls_packet_type); | |
328 | ||
329 | err = 0; | |
330 | out: | |
331 | return err; | |
332 | ||
333 | out_unregister_pernet: | |
334 | unregister_pernet_subsys(&mpls_net_ops); | |
335 | goto out; | |
336 | } | |
337 | module_init(mpls_init); | |
338 | ||
339 | static void __exit mpls_exit(void) | |
340 | { | |
341 | dev_remove_pack(&mpls_packet_type); | |
342 | unregister_netdevice_notifier(&mpls_dev_notifier); | |
343 | unregister_pernet_subsys(&mpls_net_ops); | |
344 | } | |
345 | module_exit(mpls_exit); | |
346 | ||
347 | MODULE_DESCRIPTION("MultiProtocol Label Switching"); | |
348 | MODULE_LICENSE("GPL v2"); | |
349 | MODULE_ALIAS_NETPROTO(PF_MPLS); |