]> git.proxmox.com Git - mirror_ubuntu-jammy-kernel.git/blame - net/mctp/route.c
Merge tag 'efi-urgent-for-v5.15' of git://git.kernel.org/pub/scm/linux/kernel/git...
[mirror_ubuntu-jammy-kernel.git] / net / mctp / route.c
CommitLineData
889b7da2
JK
1// SPDX-License-Identifier: GPL-2.0
2/*
3 * Management Component Transport Protocol (MCTP) - routing
4 * implementation.
5 *
6 * This is currently based on a simple routing table, with no dst cache. The
7 * number of routes should stay fairly small, so the lookup cost is small.
8 *
9 * Copyright (c) 2021 Code Construct
10 * Copyright (c) 2021 Google
11 */
12
13#include <linux/idr.h>
14#include <linux/mctp.h>
15#include <linux/netdevice.h>
16#include <linux/rtnetlink.h>
17#include <linux/skbuff.h>
18
19#include <uapi/linux/if_arp.h>
20
21#include <net/mctp.h>
22#include <net/mctpdevice.h>
06d2f4c5
MJ
23#include <net/netlink.h>
24#include <net/sock.h>
889b7da2 25
4a992bbd
JK
26static const unsigned int mctp_message_maxlen = 64 * 1024;
27
889b7da2
JK
28/* route output callbacks */
29static int mctp_route_discard(struct mctp_route *route, struct sk_buff *skb)
30{
31 kfree_skb(skb);
32 return 0;
33}
34
833ef3b9
JK
35static struct mctp_sock *mctp_lookup_bind(struct net *net, struct sk_buff *skb)
36{
37 struct mctp_skb_cb *cb = mctp_cb(skb);
38 struct mctp_hdr *mh;
39 struct sock *sk;
40 u8 type;
41
42 WARN_ON(!rcu_read_lock_held());
43
44 /* TODO: look up in skb->cb? */
45 mh = mctp_hdr(skb);
46
47 if (!skb_headlen(skb))
48 return NULL;
49
50 type = (*(u8 *)skb->data) & 0x7f;
51
52 sk_for_each_rcu(sk, &net->mctp.binds) {
53 struct mctp_sock *msk = container_of(sk, struct mctp_sock, sk);
54
55 if (msk->bind_net != MCTP_NET_ANY && msk->bind_net != cb->net)
56 continue;
57
58 if (msk->bind_type != type)
59 continue;
60
61 if (msk->bind_addr != MCTP_ADDR_ANY &&
62 msk->bind_addr != mh->dest)
63 continue;
64
65 return msk;
66 }
67
68 return NULL;
69}
70
71static bool mctp_key_match(struct mctp_sk_key *key, mctp_eid_t local,
72 mctp_eid_t peer, u8 tag)
73{
74 if (key->local_addr != local)
75 return false;
76
77 if (key->peer_addr != peer)
78 return false;
79
80 if (key->tag != tag)
81 return false;
82
83 return true;
84}
85
86static struct mctp_sk_key *mctp_lookup_key(struct net *net, struct sk_buff *skb,
87 mctp_eid_t peer)
88{
89 struct mctp_sk_key *key, *ret;
90 struct mctp_hdr *mh;
91 u8 tag;
92
93 WARN_ON(!rcu_read_lock_held());
94
95 mh = mctp_hdr(skb);
96 tag = mh->flags_seq_tag & (MCTP_HDR_TAG_MASK | MCTP_HDR_FLAG_TO);
97
98 ret = NULL;
99
100 hlist_for_each_entry_rcu(key, &net->mctp.keys, hlist) {
101 if (mctp_key_match(key, mh->dest, peer, tag)) {
102 ret = key;
103 break;
104 }
105 }
106
107 return ret;
108}
109
4a992bbd
JK
110static struct mctp_sk_key *mctp_key_alloc(struct mctp_sock *msk,
111 mctp_eid_t local, mctp_eid_t peer,
112 u8 tag, gfp_t gfp)
113{
114 struct mctp_sk_key *key;
115
116 key = kzalloc(sizeof(*key), gfp);
117 if (!key)
118 return NULL;
119
120 key->peer_addr = peer;
121 key->local_addr = local;
122 key->tag = tag;
123 key->sk = &msk->sk;
124 spin_lock_init(&key->reasm_lock);
125
126 return key;
127}
128
129static int mctp_key_add(struct mctp_sk_key *key, struct mctp_sock *msk)
130{
131 struct net *net = sock_net(&msk->sk);
132 struct mctp_sk_key *tmp;
133 unsigned long flags;
134 int rc = 0;
135
136 spin_lock_irqsave(&net->mctp.keys_lock, flags);
137
138 hlist_for_each_entry(tmp, &net->mctp.keys, hlist) {
139 if (mctp_key_match(tmp, key->local_addr, key->peer_addr,
140 key->tag)) {
141 rc = -EEXIST;
142 break;
143 }
144 }
145
146 if (!rc) {
147 hlist_add_head(&key->hlist, &net->mctp.keys);
148 hlist_add_head(&key->sklist, &msk->keys);
149 }
150
151 spin_unlock_irqrestore(&net->mctp.keys_lock, flags);
152
153 return rc;
154}
155
156/* Must be called with key->reasm_lock, which it will release. Will schedule
157 * the key for an RCU free.
158 */
159static void __mctp_key_unlock_drop(struct mctp_sk_key *key, struct net *net,
160 unsigned long flags)
161 __releases(&key->reasm_lock)
162{
163 struct sk_buff *skb;
164
165 skb = key->reasm_head;
166 key->reasm_head = NULL;
167 key->reasm_dead = true;
168 spin_unlock_irqrestore(&key->reasm_lock, flags);
169
170 spin_lock_irqsave(&net->mctp.keys_lock, flags);
171 hlist_del_rcu(&key->hlist);
172 hlist_del_rcu(&key->sklist);
173 spin_unlock_irqrestore(&net->mctp.keys_lock, flags);
174 kfree_rcu(key, rcu);
175
176 if (skb)
177 kfree_skb(skb);
178}
179
180static int mctp_frag_queue(struct mctp_sk_key *key, struct sk_buff *skb)
181{
182 struct mctp_hdr *hdr = mctp_hdr(skb);
183 u8 exp_seq, this_seq;
184
185 this_seq = (hdr->flags_seq_tag >> MCTP_HDR_SEQ_SHIFT)
186 & MCTP_HDR_SEQ_MASK;
187
188 if (!key->reasm_head) {
189 key->reasm_head = skb;
190 key->reasm_tailp = &(skb_shinfo(skb)->frag_list);
191 key->last_seq = this_seq;
192 return 0;
193 }
194
195 exp_seq = (key->last_seq + 1) & MCTP_HDR_SEQ_MASK;
196
197 if (this_seq != exp_seq)
198 return -EINVAL;
199
200 if (key->reasm_head->len + skb->len > mctp_message_maxlen)
201 return -EINVAL;
202
203 skb->next = NULL;
204 skb->sk = NULL;
205 *key->reasm_tailp = skb;
206 key->reasm_tailp = &skb->next;
207
208 key->last_seq = this_seq;
209
210 key->reasm_head->data_len += skb->len;
211 key->reasm_head->len += skb->len;
212 key->reasm_head->truesize += skb->truesize;
213
214 return 0;
215}
216
889b7da2
JK
217static int mctp_route_input(struct mctp_route *route, struct sk_buff *skb)
218{
833ef3b9
JK
219 struct net *net = dev_net(skb->dev);
220 struct mctp_sk_key *key;
221 struct mctp_sock *msk;
222 struct mctp_hdr *mh;
4a992bbd
JK
223 unsigned long f;
224 u8 tag, flags;
225 int rc;
833ef3b9
JK
226
227 msk = NULL;
4a992bbd 228 rc = -EINVAL;
833ef3b9
JK
229
230 /* we may be receiving a locally-routed packet; drop source sk
231 * accounting
232 */
233 skb_orphan(skb);
234
235 /* ensure we have enough data for a header and a type */
236 if (skb->len < sizeof(struct mctp_hdr) + 1)
4a992bbd 237 goto out;
833ef3b9
JK
238
239 /* grab header, advance data ptr */
240 mh = mctp_hdr(skb);
241 skb_pull(skb, sizeof(struct mctp_hdr));
242
243 if (mh->ver != 1)
4a992bbd 244 goto out;
833ef3b9 245
4a992bbd
JK
246 flags = mh->flags_seq_tag & (MCTP_HDR_FLAG_SOM | MCTP_HDR_FLAG_EOM);
247 tag = mh->flags_seq_tag & (MCTP_HDR_TAG_MASK | MCTP_HDR_FLAG_TO);
833ef3b9
JK
248
249 rcu_read_lock();
4a992bbd
JK
250
251 /* lookup socket / reasm context, exactly matching (src,dest,tag) */
833ef3b9
JK
252 key = mctp_lookup_key(net, skb, mh->src);
253
4a992bbd
JK
254 if (flags & MCTP_HDR_FLAG_SOM) {
255 if (key) {
256 msk = container_of(key->sk, struct mctp_sock, sk);
257 } else {
258 /* first response to a broadcast? do a more general
259 * key lookup to find the socket, but don't use this
260 * key for reassembly - we'll create a more specific
261 * one for future packets if required (ie, !EOM).
262 */
263 key = mctp_lookup_key(net, skb, MCTP_ADDR_ANY);
264 if (key) {
265 msk = container_of(key->sk,
266 struct mctp_sock, sk);
267 key = NULL;
268 }
269 }
833ef3b9 270
4a992bbd
JK
271 if (!key && !msk && (tag & MCTP_HDR_FLAG_TO))
272 msk = mctp_lookup_bind(net, skb);
833ef3b9 273
4a992bbd
JK
274 if (!msk) {
275 rc = -ENOENT;
276 goto out_unlock;
277 }
833ef3b9 278
4a992bbd
JK
279 /* single-packet message? deliver to socket, clean up any
280 * pending key.
281 */
282 if (flags & MCTP_HDR_FLAG_EOM) {
283 sock_queue_rcv_skb(&msk->sk, skb);
284 if (key) {
285 spin_lock_irqsave(&key->reasm_lock, f);
286 /* we've hit a pending reassembly; not much we
287 * can do but drop it
288 */
289 __mctp_key_unlock_drop(key, net, f);
290 }
291 rc = 0;
292 goto out_unlock;
293 }
833ef3b9 294
4a992bbd
JK
295 /* broadcast response or a bind() - create a key for further
296 * packets for this message
297 */
298 if (!key) {
299 key = mctp_key_alloc(msk, mh->dest, mh->src,
300 tag, GFP_ATOMIC);
301 if (!key) {
302 rc = -ENOMEM;
303 goto out_unlock;
304 }
833ef3b9 305
4a992bbd
JK
306 /* we can queue without the reasm lock here, as the
307 * key isn't observable yet
308 */
309 mctp_frag_queue(key, skb);
310
311 /* if the key_add fails, we've raced with another
312 * SOM packet with the same src, dest and tag. There's
313 * no way to distinguish future packets, so all we
314 * can do is drop; we'll free the skb on exit from
315 * this function.
316 */
317 rc = mctp_key_add(key, msk);
318 if (rc)
319 kfree(key);
320
321 } else {
322 /* existing key: start reassembly */
323 spin_lock_irqsave(&key->reasm_lock, f);
324
325 if (key->reasm_head || key->reasm_dead) {
326 /* duplicate start? drop everything */
327 __mctp_key_unlock_drop(key, net, f);
328 rc = -EEXIST;
329 } else {
330 rc = mctp_frag_queue(key, skb);
331 spin_unlock_irqrestore(&key->reasm_lock, f);
332 }
333 }
334
335 } else if (key) {
336 /* this packet continues a previous message; reassemble
337 * using the message-specific key
338 */
339
340 spin_lock_irqsave(&key->reasm_lock, f);
341
342 /* we need to be continuing an existing reassembly... */
343 if (!key->reasm_head)
344 rc = -EINVAL;
345 else
346 rc = mctp_frag_queue(key, skb);
347
348 /* end of message? deliver to socket, and we're done with
349 * the reassembly/response key
350 */
351 if (!rc && flags & MCTP_HDR_FLAG_EOM) {
352 sock_queue_rcv_skb(key->sk, key->reasm_head);
353 key->reasm_head = NULL;
354 __mctp_key_unlock_drop(key, net, f);
355 } else {
356 spin_unlock_irqrestore(&key->reasm_lock, f);
357 }
358
359 } else {
360 /* not a start, no matching key */
361 rc = -ENOENT;
362 }
833ef3b9 363
4a992bbd 364out_unlock:
833ef3b9 365 rcu_read_unlock();
4a992bbd
JK
366out:
367 if (rc)
368 kfree_skb(skb);
369 return rc;
370}
371
372static unsigned int mctp_route_mtu(struct mctp_route *rt)
373{
374 return rt->mtu ?: READ_ONCE(rt->dev->dev->mtu);
889b7da2
JK
375}
376
06d2f4c5 377static int mctp_route_output(struct mctp_route *route, struct sk_buff *skb)
889b7da2 378{
26ab3fca
MJ
379 struct mctp_hdr *hdr = mctp_hdr(skb);
380 char daddr_buf[MAX_ADDR_LEN];
381 char *daddr = NULL;
889b7da2
JK
382 unsigned int mtu;
383 int rc;
384
385 skb->protocol = htons(ETH_P_MCTP);
386
387 mtu = READ_ONCE(skb->dev->mtu);
388 if (skb->len > mtu) {
389 kfree_skb(skb);
390 return -EMSGSIZE;
391 }
392
26ab3fca
MJ
393 /* If lookup fails let the device handle daddr==NULL */
394 if (mctp_neigh_lookup(route->dev, hdr->dest, daddr_buf) == 0)
395 daddr = daddr_buf;
396
889b7da2 397 rc = dev_hard_header(skb, skb->dev, ntohs(skb->protocol),
26ab3fca 398 daddr, skb->dev->dev_addr, skb->len);
889b7da2
JK
399 if (rc) {
400 kfree_skb(skb);
401 return -EHOSTUNREACH;
402 }
403
404 rc = dev_queue_xmit(skb);
405 if (rc)
406 rc = net_xmit_errno(rc);
407
408 return rc;
409}
410
411/* route alloc/release */
412static void mctp_route_release(struct mctp_route *rt)
413{
414 if (refcount_dec_and_test(&rt->refs)) {
415 dev_put(rt->dev->dev);
416 kfree_rcu(rt, rcu);
417 }
418}
419
420/* returns a route with the refcount at 1 */
421static struct mctp_route *mctp_route_alloc(void)
422{
423 struct mctp_route *rt;
424
425 rt = kzalloc(sizeof(*rt), GFP_KERNEL);
426 if (!rt)
427 return NULL;
428
429 INIT_LIST_HEAD(&rt->list);
430 refcount_set(&rt->refs, 1);
431 rt->output = mctp_route_discard;
432
433 return rt;
434}
435
03f2bbc4
MJ
436unsigned int mctp_default_net(struct net *net)
437{
438 return READ_ONCE(net->mctp.default_net);
439}
440
441int mctp_default_net_set(struct net *net, unsigned int index)
442{
443 if (index == 0)
444 return -EINVAL;
445 WRITE_ONCE(net->mctp.default_net, index);
446 return 0;
447}
448
833ef3b9
JK
449/* tag management */
450static void mctp_reserve_tag(struct net *net, struct mctp_sk_key *key,
451 struct mctp_sock *msk)
452{
453 struct netns_mctp *mns = &net->mctp;
454
455 lockdep_assert_held(&mns->keys_lock);
456
833ef3b9
JK
457 /* we hold the net->key_lock here, allowing updates to both
458 * then net and sk
459 */
460 hlist_add_head_rcu(&key->hlist, &mns->keys);
461 hlist_add_head_rcu(&key->sklist, &msk->keys);
462}
463
464/* Allocate a locally-owned tag value for (saddr, daddr), and reserve
465 * it for the socket msk
466 */
467static int mctp_alloc_local_tag(struct mctp_sock *msk,
468 mctp_eid_t saddr, mctp_eid_t daddr, u8 *tagp)
469{
470 struct net *net = sock_net(&msk->sk);
471 struct netns_mctp *mns = &net->mctp;
472 struct mctp_sk_key *key, *tmp;
473 unsigned long flags;
474 int rc = -EAGAIN;
475 u8 tagbits;
476
477 /* be optimistic, alloc now */
4a992bbd 478 key = mctp_key_alloc(msk, saddr, daddr, 0, GFP_KERNEL);
833ef3b9
JK
479 if (!key)
480 return -ENOMEM;
833ef3b9
JK
481
482 /* 8 possible tag values */
483 tagbits = 0xff;
484
485 spin_lock_irqsave(&mns->keys_lock, flags);
486
487 /* Walk through the existing keys, looking for potential conflicting
488 * tags. If we find a conflict, clear that bit from tagbits
489 */
490 hlist_for_each_entry(tmp, &mns->keys, hlist) {
491 /* if we don't own the tag, it can't conflict */
492 if (tmp->tag & MCTP_HDR_FLAG_TO)
493 continue;
494
495 if ((tmp->peer_addr == daddr ||
496 tmp->peer_addr == MCTP_ADDR_ANY) &&
497 tmp->local_addr == saddr)
498 tagbits &= ~(1 << tmp->tag);
499
500 if (!tagbits)
501 break;
502 }
503
504 if (tagbits) {
505 key->tag = __ffs(tagbits);
506 mctp_reserve_tag(net, key, msk);
507 *tagp = key->tag;
508 rc = 0;
509 }
510
511 spin_unlock_irqrestore(&mns->keys_lock, flags);
512
513 if (!tagbits)
514 kfree(key);
515
516 return rc;
517}
518
889b7da2
JK
519/* routing lookups */
520static bool mctp_rt_match_eid(struct mctp_route *rt,
521 unsigned int net, mctp_eid_t eid)
522{
523 return READ_ONCE(rt->dev->net) == net &&
524 rt->min <= eid && rt->max >= eid;
525}
526
527/* compares match, used for duplicate prevention */
528static bool mctp_rt_compare_exact(struct mctp_route *rt1,
529 struct mctp_route *rt2)
530{
531 ASSERT_RTNL();
532 return rt1->dev->net == rt2->dev->net &&
533 rt1->min == rt2->min &&
534 rt1->max == rt2->max;
535}
536
537struct mctp_route *mctp_route_lookup(struct net *net, unsigned int dnet,
538 mctp_eid_t daddr)
539{
540 struct mctp_route *tmp, *rt = NULL;
541
542 list_for_each_entry_rcu(tmp, &net->mctp.routes, list) {
543 /* TODO: add metrics */
544 if (mctp_rt_match_eid(tmp, dnet, daddr)) {
545 if (refcount_inc_not_zero(&tmp->refs)) {
546 rt = tmp;
547 break;
548 }
549 }
550 }
551
552 return rt;
553}
554
555/* sends a skb to rt and releases the route. */
556int mctp_do_route(struct mctp_route *rt, struct sk_buff *skb)
557{
558 int rc;
559
560 rc = rt->output(rt, skb);
561 mctp_route_release(rt);
562 return rc;
563}
564
4a992bbd
JK
565static int mctp_do_fragment_route(struct mctp_route *rt, struct sk_buff *skb,
566 unsigned int mtu, u8 tag)
567{
568 const unsigned int hlen = sizeof(struct mctp_hdr);
569 struct mctp_hdr *hdr, *hdr2;
570 unsigned int pos, size;
571 struct sk_buff *skb2;
572 int rc;
573 u8 seq;
574
575 hdr = mctp_hdr(skb);
576 seq = 0;
577 rc = 0;
578
579 if (mtu < hlen + 1) {
580 kfree_skb(skb);
581 return -EMSGSIZE;
582 }
583
584 /* we've got the header */
585 skb_pull(skb, hlen);
586
587 for (pos = 0; pos < skb->len;) {
588 /* size of message payload */
589 size = min(mtu - hlen, skb->len - pos);
590
591 skb2 = alloc_skb(MCTP_HEADER_MAXLEN + hlen + size, GFP_KERNEL);
592 if (!skb2) {
593 rc = -ENOMEM;
594 break;
595 }
596
597 /* generic skb copy */
598 skb2->protocol = skb->protocol;
599 skb2->priority = skb->priority;
600 skb2->dev = skb->dev;
601 memcpy(skb2->cb, skb->cb, sizeof(skb2->cb));
602
603 if (skb->sk)
604 skb_set_owner_w(skb2, skb->sk);
605
606 /* establish packet */
607 skb_reserve(skb2, MCTP_HEADER_MAXLEN);
608 skb_reset_network_header(skb2);
609 skb_put(skb2, hlen + size);
610 skb2->transport_header = skb2->network_header + hlen;
611
612 /* copy header fields, calculate SOM/EOM flags & seq */
613 hdr2 = mctp_hdr(skb2);
614 hdr2->ver = hdr->ver;
615 hdr2->dest = hdr->dest;
616 hdr2->src = hdr->src;
617 hdr2->flags_seq_tag = tag &
618 (MCTP_HDR_TAG_MASK | MCTP_HDR_FLAG_TO);
619
620 if (pos == 0)
621 hdr2->flags_seq_tag |= MCTP_HDR_FLAG_SOM;
622
623 if (pos + size == skb->len)
624 hdr2->flags_seq_tag |= MCTP_HDR_FLAG_EOM;
625
626 hdr2->flags_seq_tag |= seq << MCTP_HDR_SEQ_SHIFT;
627
628 /* copy message payload */
629 skb_copy_bits(skb, pos, skb_transport_header(skb2), size);
630
631 /* do route, but don't drop the rt reference */
632 rc = rt->output(rt, skb2);
633 if (rc)
634 break;
635
636 seq = (seq + 1) & MCTP_HDR_SEQ_MASK;
637 pos += size;
638 }
639
640 mctp_route_release(rt);
641 consume_skb(skb);
642 return rc;
643}
644
889b7da2
JK
645int mctp_local_output(struct sock *sk, struct mctp_route *rt,
646 struct sk_buff *skb, mctp_eid_t daddr, u8 req_tag)
647{
833ef3b9 648 struct mctp_sock *msk = container_of(sk, struct mctp_sock, sk);
889b7da2
JK
649 struct mctp_skb_cb *cb = mctp_cb(skb);
650 struct mctp_hdr *hdr;
651 unsigned long flags;
4a992bbd 652 unsigned int mtu;
889b7da2
JK
653 mctp_eid_t saddr;
654 int rc;
833ef3b9 655 u8 tag;
889b7da2
JK
656
657 if (WARN_ON(!rt->dev))
658 return -EINVAL;
659
660 spin_lock_irqsave(&rt->dev->addrs_lock, flags);
661 if (rt->dev->num_addrs == 0) {
662 rc = -EHOSTUNREACH;
663 } else {
664 /* use the outbound interface's first address as our source */
665 saddr = rt->dev->addrs[0];
666 rc = 0;
667 }
668 spin_unlock_irqrestore(&rt->dev->addrs_lock, flags);
669
670 if (rc)
671 return rc;
672
833ef3b9
JK
673 if (req_tag & MCTP_HDR_FLAG_TO) {
674 rc = mctp_alloc_local_tag(msk, saddr, daddr, &tag);
675 if (rc)
676 return rc;
677 tag |= MCTP_HDR_FLAG_TO;
678 } else {
679 tag = req_tag;
680 }
681
889b7da2 682
4a992bbd
JK
683 skb->protocol = htons(ETH_P_MCTP);
684 skb->priority = 0;
889b7da2
JK
685 skb_reset_transport_header(skb);
686 skb_push(skb, sizeof(struct mctp_hdr));
687 skb_reset_network_header(skb);
4a992bbd
JK
688 skb->dev = rt->dev->dev;
689
690 /* cb->net will have been set on initial ingress */
691 cb->src = saddr;
692
693 /* set up common header fields */
889b7da2
JK
694 hdr = mctp_hdr(skb);
695 hdr->ver = 1;
696 hdr->dest = daddr;
697 hdr->src = saddr;
889b7da2 698
4a992bbd 699 mtu = mctp_route_mtu(rt);
889b7da2 700
4a992bbd
JK
701 if (skb->len + sizeof(struct mctp_hdr) <= mtu) {
702 hdr->flags_seq_tag = MCTP_HDR_FLAG_SOM | MCTP_HDR_FLAG_EOM |
703 tag;
704 return mctp_do_route(rt, skb);
705 } else {
706 return mctp_do_fragment_route(rt, skb, mtu, tag);
707 }
889b7da2
JK
708}
709
710/* route management */
06d2f4c5
MJ
711static int mctp_route_add(struct mctp_dev *mdev, mctp_eid_t daddr_start,
712 unsigned int daddr_extent, unsigned int mtu,
83f0a0b7 713 unsigned char type)
889b7da2 714{
83f0a0b7 715 int (*rtfn)(struct mctp_route *rt, struct sk_buff *skb);
889b7da2
JK
716 struct net *net = dev_net(mdev->dev);
717 struct mctp_route *rt, *ert;
718
06d2f4c5
MJ
719 if (!mctp_address_ok(daddr_start))
720 return -EINVAL;
721
722 if (daddr_extent > 0xff || daddr_start + daddr_extent >= 255)
723 return -EINVAL;
724
83f0a0b7
JK
725 switch (type) {
726 case RTN_LOCAL:
727 rtfn = mctp_route_input;
728 break;
729 case RTN_UNICAST:
730 rtfn = mctp_route_output;
731 break;
732 default:
733 return -EINVAL;
734 }
735
889b7da2
JK
736 rt = mctp_route_alloc();
737 if (!rt)
738 return -ENOMEM;
739
06d2f4c5
MJ
740 rt->min = daddr_start;
741 rt->max = daddr_start + daddr_extent;
742 rt->mtu = mtu;
889b7da2
JK
743 rt->dev = mdev;
744 dev_hold(rt->dev->dev);
83f0a0b7
JK
745 rt->type = type;
746 rt->output = rtfn;
889b7da2
JK
747
748 ASSERT_RTNL();
749 /* Prevent duplicate identical routes. */
750 list_for_each_entry(ert, &net->mctp.routes, list) {
751 if (mctp_rt_compare_exact(rt, ert)) {
752 mctp_route_release(rt);
753 return -EEXIST;
754 }
755 }
756
757 list_add_rcu(&rt->list, &net->mctp.routes);
758
759 return 0;
760}
761
06d2f4c5
MJ
762static int mctp_route_remove(struct mctp_dev *mdev, mctp_eid_t daddr_start,
763 unsigned int daddr_extent)
889b7da2
JK
764{
765 struct net *net = dev_net(mdev->dev);
766 struct mctp_route *rt, *tmp;
06d2f4c5
MJ
767 mctp_eid_t daddr_end;
768 bool dropped;
769
770 if (daddr_extent > 0xff || daddr_start + daddr_extent >= 255)
771 return -EINVAL;
772
773 daddr_end = daddr_start + daddr_extent;
774 dropped = false;
889b7da2
JK
775
776 ASSERT_RTNL();
777
778 list_for_each_entry_safe(rt, tmp, &net->mctp.routes, list) {
06d2f4c5
MJ
779 if (rt->dev == mdev &&
780 rt->min == daddr_start && rt->max == daddr_end) {
889b7da2
JK
781 list_del_rcu(&rt->list);
782 /* TODO: immediate RTM_DELROUTE */
783 mctp_route_release(rt);
06d2f4c5 784 dropped = true;
889b7da2
JK
785 }
786 }
787
06d2f4c5
MJ
788 return dropped ? 0 : -ENOENT;
789}
790
791int mctp_route_add_local(struct mctp_dev *mdev, mctp_eid_t addr)
792{
83f0a0b7 793 return mctp_route_add(mdev, addr, 0, 0, RTN_LOCAL);
06d2f4c5
MJ
794}
795
796int mctp_route_remove_local(struct mctp_dev *mdev, mctp_eid_t addr)
797{
798 return mctp_route_remove(mdev, addr, 0);
889b7da2
JK
799}
800
801/* removes all entries for a given device */
802void mctp_route_remove_dev(struct mctp_dev *mdev)
803{
804 struct net *net = dev_net(mdev->dev);
805 struct mctp_route *rt, *tmp;
806
807 ASSERT_RTNL();
808 list_for_each_entry_safe(rt, tmp, &net->mctp.routes, list) {
809 if (rt->dev == mdev) {
810 list_del_rcu(&rt->list);
811 /* TODO: immediate RTM_DELROUTE */
812 mctp_route_release(rt);
813 }
814 }
815}
816
817/* Incoming packet-handling */
818
819static int mctp_pkttype_receive(struct sk_buff *skb, struct net_device *dev,
820 struct packet_type *pt,
821 struct net_device *orig_dev)
822{
823 struct net *net = dev_net(dev);
824 struct mctp_skb_cb *cb;
825 struct mctp_route *rt;
826 struct mctp_hdr *mh;
827
828 /* basic non-data sanity checks */
829 if (dev->type != ARPHRD_MCTP)
830 goto err_drop;
831
832 if (!pskb_may_pull(skb, sizeof(struct mctp_hdr)))
833 goto err_drop;
834
835 skb_reset_transport_header(skb);
836 skb_reset_network_header(skb);
837
838 /* We have enough for a header; decode and route */
839 mh = mctp_hdr(skb);
840 if (mh->ver < MCTP_VER_MIN || mh->ver > MCTP_VER_MAX)
841 goto err_drop;
842
843 cb = __mctp_cb(skb);
844 rcu_read_lock();
845 cb->net = READ_ONCE(__mctp_dev_get(dev)->net);
846 rcu_read_unlock();
847
848 rt = mctp_route_lookup(net, cb->net, mh->dest);
849 if (!rt)
850 goto err_drop;
851
852 mctp_do_route(rt, skb);
853
854 return NET_RX_SUCCESS;
855
856err_drop:
857 kfree_skb(skb);
858 return NET_RX_DROP;
859}
860
861static struct packet_type mctp_packet_type = {
862 .type = cpu_to_be16(ETH_P_MCTP),
863 .func = mctp_pkttype_receive,
864};
865
06d2f4c5
MJ
866/* netlink interface */
867
868static const struct nla_policy rta_mctp_policy[RTA_MAX + 1] = {
869 [RTA_DST] = { .type = NLA_U8 },
870 [RTA_METRICS] = { .type = NLA_NESTED },
871 [RTA_OIF] = { .type = NLA_U32 },
872};
873
874/* Common part for RTM_NEWROUTE and RTM_DELROUTE parsing.
875 * tb must hold RTA_MAX+1 elements.
876 */
877static int mctp_route_nlparse(struct sk_buff *skb, struct nlmsghdr *nlh,
878 struct netlink_ext_ack *extack,
879 struct nlattr **tb, struct rtmsg **rtm,
880 struct mctp_dev **mdev, mctp_eid_t *daddr_start)
881{
882 struct net *net = sock_net(skb->sk);
883 struct net_device *dev;
884 unsigned int ifindex;
885 int rc;
886
887 rc = nlmsg_parse(nlh, sizeof(struct rtmsg), tb, RTA_MAX,
888 rta_mctp_policy, extack);
889 if (rc < 0) {
890 NL_SET_ERR_MSG(extack, "incorrect format");
891 return rc;
892 }
893
894 if (!tb[RTA_DST]) {
895 NL_SET_ERR_MSG(extack, "dst EID missing");
896 return -EINVAL;
897 }
898 *daddr_start = nla_get_u8(tb[RTA_DST]);
899
900 if (!tb[RTA_OIF]) {
901 NL_SET_ERR_MSG(extack, "ifindex missing");
902 return -EINVAL;
903 }
904 ifindex = nla_get_u32(tb[RTA_OIF]);
905
906 *rtm = nlmsg_data(nlh);
907 if ((*rtm)->rtm_family != AF_MCTP) {
908 NL_SET_ERR_MSG(extack, "route family must be AF_MCTP");
909 return -EINVAL;
910 }
911
912 dev = __dev_get_by_index(net, ifindex);
913 if (!dev) {
914 NL_SET_ERR_MSG(extack, "bad ifindex");
915 return -ENODEV;
916 }
917 *mdev = mctp_dev_get_rtnl(dev);
918 if (!*mdev)
919 return -ENODEV;
920
921 if (dev->flags & IFF_LOOPBACK) {
922 NL_SET_ERR_MSG(extack, "no routes to loopback");
923 return -EINVAL;
924 }
925
926 return 0;
927}
928
929static int mctp_newroute(struct sk_buff *skb, struct nlmsghdr *nlh,
930 struct netlink_ext_ack *extack)
931{
932 struct nlattr *tb[RTA_MAX + 1];
933 mctp_eid_t daddr_start;
934 struct mctp_dev *mdev;
935 struct rtmsg *rtm;
936 unsigned int mtu;
937 int rc;
938
939 rc = mctp_route_nlparse(skb, nlh, extack, tb,
940 &rtm, &mdev, &daddr_start);
941 if (rc < 0)
942 return rc;
943
944 if (rtm->rtm_type != RTN_UNICAST) {
945 NL_SET_ERR_MSG(extack, "rtm_type must be RTN_UNICAST");
946 return -EINVAL;
947 }
948
949 /* TODO: parse mtu from nlparse */
950 mtu = 0;
951
83f0a0b7
JK
952 if (rtm->rtm_type != RTN_UNICAST)
953 return -EINVAL;
954
955 rc = mctp_route_add(mdev, daddr_start, rtm->rtm_dst_len, mtu,
956 rtm->rtm_type);
06d2f4c5
MJ
957 return rc;
958}
959
960static int mctp_delroute(struct sk_buff *skb, struct nlmsghdr *nlh,
961 struct netlink_ext_ack *extack)
962{
963 struct nlattr *tb[RTA_MAX + 1];
964 mctp_eid_t daddr_start;
965 struct mctp_dev *mdev;
966 struct rtmsg *rtm;
967 int rc;
968
969 rc = mctp_route_nlparse(skb, nlh, extack, tb,
970 &rtm, &mdev, &daddr_start);
971 if (rc < 0)
972 return rc;
973
974 /* we only have unicast routes */
975 if (rtm->rtm_type != RTN_UNICAST)
976 return -EINVAL;
977
978 rc = mctp_route_remove(mdev, daddr_start, rtm->rtm_dst_len);
979 return rc;
980}
981
982static int mctp_fill_rtinfo(struct sk_buff *skb, struct mctp_route *rt,
983 u32 portid, u32 seq, int event, unsigned int flags)
984{
985 struct nlmsghdr *nlh;
986 struct rtmsg *hdr;
987 void *metrics;
988
989 nlh = nlmsg_put(skb, portid, seq, event, sizeof(*hdr), flags);
990 if (!nlh)
991 return -EMSGSIZE;
992
993 hdr = nlmsg_data(nlh);
994 hdr->rtm_family = AF_MCTP;
995
996 /* we use the _len fields as a number of EIDs, rather than
997 * a number of bits in the address
998 */
999 hdr->rtm_dst_len = rt->max - rt->min;
1000 hdr->rtm_src_len = 0;
1001 hdr->rtm_tos = 0;
1002 hdr->rtm_table = RT_TABLE_DEFAULT;
1003 hdr->rtm_protocol = RTPROT_STATIC; /* everything is user-defined */
1004 hdr->rtm_scope = RT_SCOPE_LINK; /* TODO: scope in mctp_route? */
83f0a0b7 1005 hdr->rtm_type = rt->type;
06d2f4c5
MJ
1006
1007 if (nla_put_u8(skb, RTA_DST, rt->min))
1008 goto cancel;
1009
1010 metrics = nla_nest_start_noflag(skb, RTA_METRICS);
1011 if (!metrics)
1012 goto cancel;
1013
1014 if (rt->mtu) {
1015 if (nla_put_u32(skb, RTAX_MTU, rt->mtu))
1016 goto cancel;
1017 }
1018
1019 nla_nest_end(skb, metrics);
1020
1021 if (rt->dev) {
1022 if (nla_put_u32(skb, RTA_OIF, rt->dev->dev->ifindex))
1023 goto cancel;
1024 }
1025
1026 /* TODO: conditional neighbour physaddr? */
1027
1028 nlmsg_end(skb, nlh);
1029
1030 return 0;
1031
1032cancel:
1033 nlmsg_cancel(skb, nlh);
1034 return -EMSGSIZE;
1035}
1036
1037static int mctp_dump_rtinfo(struct sk_buff *skb, struct netlink_callback *cb)
1038{
1039 struct net *net = sock_net(skb->sk);
1040 struct mctp_route *rt;
1041 int s_idx, idx;
1042
1043 /* TODO: allow filtering on route data, possibly under
1044 * cb->strict_check
1045 */
1046
1047 /* TODO: change to struct overlay */
1048 s_idx = cb->args[0];
1049 idx = 0;
1050
1051 rcu_read_lock();
1052 list_for_each_entry_rcu(rt, &net->mctp.routes, list) {
1053 if (idx++ < s_idx)
1054 continue;
1055 if (mctp_fill_rtinfo(skb, rt,
1056 NETLINK_CB(cb->skb).portid,
1057 cb->nlh->nlmsg_seq,
1058 RTM_NEWROUTE, NLM_F_MULTI) < 0)
1059 break;
1060 }
1061
1062 rcu_read_unlock();
1063 cb->args[0] = idx;
1064
1065 return skb->len;
1066}
1067
889b7da2
JK
1068/* net namespace implementation */
1069static int __net_init mctp_routes_net_init(struct net *net)
1070{
1071 struct netns_mctp *ns = &net->mctp;
1072
1073 INIT_LIST_HEAD(&ns->routes);
833ef3b9
JK
1074 INIT_HLIST_HEAD(&ns->binds);
1075 mutex_init(&ns->bind_lock);
1076 INIT_HLIST_HEAD(&ns->keys);
1077 spin_lock_init(&ns->keys_lock);
03f2bbc4 1078 WARN_ON(mctp_default_net_set(net, MCTP_INITIAL_DEFAULT_NET));
889b7da2
JK
1079 return 0;
1080}
1081
1082static void __net_exit mctp_routes_net_exit(struct net *net)
1083{
1084 struct mctp_route *rt;
1085
581edcd0 1086 rcu_read_lock();
889b7da2
JK
1087 list_for_each_entry_rcu(rt, &net->mctp.routes, list)
1088 mctp_route_release(rt);
581edcd0 1089 rcu_read_unlock();
889b7da2
JK
1090}
1091
1092static struct pernet_operations mctp_net_ops = {
1093 .init = mctp_routes_net_init,
1094 .exit = mctp_routes_net_exit,
1095};
1096
1097int __init mctp_routes_init(void)
1098{
1099 dev_add_pack(&mctp_packet_type);
06d2f4c5
MJ
1100
1101 rtnl_register_module(THIS_MODULE, PF_MCTP, RTM_GETROUTE,
1102 NULL, mctp_dump_rtinfo, 0);
1103 rtnl_register_module(THIS_MODULE, PF_MCTP, RTM_NEWROUTE,
1104 mctp_newroute, NULL, 0);
1105 rtnl_register_module(THIS_MODULE, PF_MCTP, RTM_DELROUTE,
1106 mctp_delroute, NULL, 0);
1107
889b7da2
JK
1108 return register_pernet_subsys(&mctp_net_ops);
1109}
1110
1111void __exit mctp_routes_exit(void)
1112{
1113 unregister_pernet_subsys(&mctp_net_ops);
06d2f4c5
MJ
1114 rtnl_unregister(PF_MCTP, RTM_DELROUTE);
1115 rtnl_unregister(PF_MCTP, RTM_NEWROUTE);
1116 rtnl_unregister(PF_MCTP, RTM_GETROUTE);
889b7da2
JK
1117 dev_remove_pack(&mctp_packet_type);
1118}