]> git.proxmox.com Git - mirror_ubuntu-bionic-kernel.git/blame - net/mpls/af_mpls.c
mpls: Basic routing support
[mirror_ubuntu-bionic-kernel.git] / net / mpls / af_mpls.c
CommitLineData
0189197f
EB
1#include <linux/types.h>
2#include <linux/skbuff.h>
3#include <linux/socket.h>
4#include <linux/net.h>
5#include <linux/module.h>
6#include <linux/if_arp.h>
7#include <linux/ipv6.h>
8#include <linux/mpls.h>
9#include <net/ip.h>
10#include <net/dst.h>
11#include <net/sock.h>
12#include <net/arp.h>
13#include <net/ip_fib.h>
14#include <net/netevent.h>
15#include <net/netns/generic.h>
16#include "internal.h"
17
18#define MAX_NEW_LABELS 2
19
20/* This maximum ha length copied from the definition of struct neighbour */
21#define MAX_VIA_ALEN (ALIGN(MAX_ADDR_LEN, sizeof(unsigned long)))
22
23struct mpls_route { /* next hop label forwarding entry */
24 struct net_device *rt_dev;
25 struct rcu_head rt_rcu;
26 u32 rt_label[MAX_NEW_LABELS];
27 u8 rt_protocol; /* routing protocol that set this entry */
28 u8 rt_labels:2,
29 rt_via_alen:6;
30 unsigned short rt_via_family;
31 u8 rt_via[0];
32};
33
34static struct mpls_route *mpls_route_input_rcu(struct net *net, unsigned index)
35{
36 struct mpls_route *rt = NULL;
37
38 if (index < net->mpls.platform_labels) {
39 struct mpls_route __rcu **platform_label =
40 rcu_dereference(net->mpls.platform_label);
41 rt = rcu_dereference(platform_label[index]);
42 }
43 return rt;
44}
45
46static bool mpls_output_possible(const struct net_device *dev)
47{
48 return dev && (dev->flags & IFF_UP) && netif_carrier_ok(dev);
49}
50
51static unsigned int mpls_rt_header_size(const struct mpls_route *rt)
52{
53 /* The size of the layer 2.5 labels to be added for this route */
54 return rt->rt_labels * sizeof(struct mpls_shim_hdr);
55}
56
57static unsigned int mpls_dev_mtu(const struct net_device *dev)
58{
59 /* The amount of data the layer 2 frame can hold */
60 return dev->mtu;
61}
62
63static bool mpls_pkt_too_big(const struct sk_buff *skb, unsigned int mtu)
64{
65 if (skb->len <= mtu)
66 return false;
67
68 if (skb_is_gso(skb) && skb_gso_network_seglen(skb) <= mtu)
69 return false;
70
71 return true;
72}
73
74static bool mpls_egress(struct mpls_route *rt, struct sk_buff *skb,
75 struct mpls_entry_decoded dec)
76{
77 /* RFC4385 and RFC5586 encode other packets in mpls such that
78 * they don't conflict with the ip version number, making
79 * decoding by examining the ip version correct in everything
80 * except for the strangest cases.
81 *
82 * The strange cases if we choose to support them will require
83 * manual configuration.
84 */
85 struct iphdr *hdr4 = ip_hdr(skb);
86 bool success = true;
87
88 if (hdr4->version == 4) {
89 skb->protocol = htons(ETH_P_IP);
90 csum_replace2(&hdr4->check,
91 htons(hdr4->ttl << 8),
92 htons(dec.ttl << 8));
93 hdr4->ttl = dec.ttl;
94 }
95 else if (hdr4->version == 6) {
96 struct ipv6hdr *hdr6 = ipv6_hdr(skb);
97 skb->protocol = htons(ETH_P_IPV6);
98 hdr6->hop_limit = dec.ttl;
99 }
100 else
101 /* version 0 and version 1 are used by pseudo wires */
102 success = false;
103 return success;
104}
105
106static int mpls_forward(struct sk_buff *skb, struct net_device *dev,
107 struct packet_type *pt, struct net_device *orig_dev)
108{
109 struct net *net = dev_net(dev);
110 struct mpls_shim_hdr *hdr;
111 struct mpls_route *rt;
112 struct mpls_entry_decoded dec;
113 struct net_device *out_dev;
114 unsigned int hh_len;
115 unsigned int new_header_size;
116 unsigned int mtu;
117 int err;
118
119 /* Careful this entire function runs inside of an rcu critical section */
120
121 if (skb->pkt_type != PACKET_HOST)
122 goto drop;
123
124 if ((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL)
125 goto drop;
126
127 if (!pskb_may_pull(skb, sizeof(*hdr)))
128 goto drop;
129
130 /* Read and decode the label */
131 hdr = mpls_hdr(skb);
132 dec = mpls_entry_decode(hdr);
133
134 /* Pop the label */
135 skb_pull(skb, sizeof(*hdr));
136 skb_reset_network_header(skb);
137
138 skb_orphan(skb);
139
140 rt = mpls_route_input_rcu(net, dec.label);
141 if (!rt)
142 goto drop;
143
144 /* Find the output device */
145 out_dev = rt->rt_dev;
146 if (!mpls_output_possible(out_dev))
147 goto drop;
148
149 if (skb_warn_if_lro(skb))
150 goto drop;
151
152 skb_forward_csum(skb);
153
154 /* Verify ttl is valid */
155 if (dec.ttl <= 2)
156 goto drop;
157 dec.ttl -= 1;
158
159 /* Verify the destination can hold the packet */
160 new_header_size = mpls_rt_header_size(rt);
161 mtu = mpls_dev_mtu(out_dev);
162 if (mpls_pkt_too_big(skb, mtu - new_header_size))
163 goto drop;
164
165 hh_len = LL_RESERVED_SPACE(out_dev);
166 if (!out_dev->header_ops)
167 hh_len = 0;
168
169 /* Ensure there is enough space for the headers in the skb */
170 if (skb_cow(skb, hh_len + new_header_size))
171 goto drop;
172
173 skb->dev = out_dev;
174 skb->protocol = htons(ETH_P_MPLS_UC);
175
176 if (unlikely(!new_header_size && dec.bos)) {
177 /* Penultimate hop popping */
178 if (!mpls_egress(rt, skb, dec))
179 goto drop;
180 } else {
181 bool bos;
182 int i;
183 skb_push(skb, new_header_size);
184 skb_reset_network_header(skb);
185 /* Push the new labels */
186 hdr = mpls_hdr(skb);
187 bos = dec.bos;
188 for (i = rt->rt_labels - 1; i >= 0; i--) {
189 hdr[i] = mpls_entry_encode(rt->rt_label[i], dec.ttl, 0, bos);
190 bos = false;
191 }
192 }
193
194 err = neigh_xmit(rt->rt_via_family, out_dev, rt->rt_via, skb);
195 if (err)
196 net_dbg_ratelimited("%s: packet transmission failed: %d\n",
197 __func__, err);
198 return 0;
199
200drop:
201 kfree_skb(skb);
202 return NET_RX_DROP;
203}
204
205static struct packet_type mpls_packet_type __read_mostly = {
206 .type = cpu_to_be16(ETH_P_MPLS_UC),
207 .func = mpls_forward,
208};
209
210static struct mpls_route *mpls_rt_alloc(size_t alen)
211{
212 struct mpls_route *rt;
213
214 rt = kzalloc(GFP_KERNEL, sizeof(*rt) + alen);
215 if (rt)
216 rt->rt_via_alen = alen;
217 return rt;
218}
219
220static void mpls_rt_free(struct mpls_route *rt)
221{
222 if (rt)
223 kfree_rcu(rt, rt_rcu);
224}
225
226static void mpls_route_update(struct net *net, unsigned index,
227 struct net_device *dev, struct mpls_route *new,
228 const struct nl_info *info)
229{
230 struct mpls_route *rt, *old = NULL;
231
232 ASSERT_RTNL();
233
234 rt = net->mpls.platform_label[index];
235 if (!dev || (rt && (rt->rt_dev == dev))) {
236 rcu_assign_pointer(net->mpls.platform_label[index], new);
237 old = rt;
238 }
239
240 /* If we removed a route free it now */
241 mpls_rt_free(old);
242}
243
244static void mpls_ifdown(struct net_device *dev)
245{
246 struct net *net = dev_net(dev);
247 unsigned index;
248
249 for (index = 0; index < net->mpls.platform_labels; index++) {
250 struct mpls_route *rt = net->mpls.platform_label[index];
251 if (!rt)
252 continue;
253 if (rt->rt_dev != dev)
254 continue;
255 rt->rt_dev = NULL;
256 }
257}
258
259static int mpls_dev_notify(struct notifier_block *this, unsigned long event,
260 void *ptr)
261{
262 struct net_device *dev = netdev_notifier_info_to_dev(ptr);
263
264 switch(event) {
265 case NETDEV_UNREGISTER:
266 mpls_ifdown(dev);
267 break;
268 }
269 return NOTIFY_OK;
270}
271
272static struct notifier_block mpls_dev_notifier = {
273 .notifier_call = mpls_dev_notify,
274};
275
276static int mpls_net_init(struct net *net)
277{
278 net->mpls.platform_labels = 0;
279 net->mpls.platform_label = NULL;
280
281 return 0;
282}
283
284static void mpls_net_exit(struct net *net)
285{
286 unsigned int index;
287
288 /* An rcu grace period haselapsed since there was a device in
289 * the network namespace (and thus the last in fqlight packet)
290 * left this network namespace. This is because
291 * unregister_netdevice_many and netdev_run_todo has completed
292 * for each network device that was in this network namespace.
293 *
294 * As such no additional rcu synchronization is necessary when
295 * freeing the platform_label table.
296 */
297 rtnl_lock();
298 for (index = 0; index < net->mpls.platform_labels; index++) {
299 struct mpls_route *rt = net->mpls.platform_label[index];
300 rcu_assign_pointer(net->mpls.platform_label[index], NULL);
301 mpls_rt_free(rt);
302 }
303 rtnl_unlock();
304
305 kvfree(net->mpls.platform_label);
306}
307
308static struct pernet_operations mpls_net_ops = {
309 .init = mpls_net_init,
310 .exit = mpls_net_exit,
311};
312
313static int __init mpls_init(void)
314{
315 int err;
316
317 BUILD_BUG_ON(sizeof(struct mpls_shim_hdr) != 4);
318
319 err = register_pernet_subsys(&mpls_net_ops);
320 if (err)
321 goto out;
322
323 err = register_netdevice_notifier(&mpls_dev_notifier);
324 if (err)
325 goto out_unregister_pernet;
326
327 dev_add_pack(&mpls_packet_type);
328
329 err = 0;
330out:
331 return err;
332
333out_unregister_pernet:
334 unregister_pernet_subsys(&mpls_net_ops);
335 goto out;
336}
337module_init(mpls_init);
338
339static void __exit mpls_exit(void)
340{
341 dev_remove_pack(&mpls_packet_type);
342 unregister_netdevice_notifier(&mpls_dev_notifier);
343 unregister_pernet_subsys(&mpls_net_ops);
344}
345module_exit(mpls_exit);
346
347MODULE_DESCRIPTION("MultiProtocol Label Switching");
348MODULE_LICENSE("GPL v2");
349MODULE_ALIAS_NETPROTO(PF_MPLS);