]> git.proxmox.com Git - mirror_ubuntu-artful-kernel.git/blame - net/xfrm/xfrm_state.c
[XFRM]: Dynamic xfrm_state hash table sizing.
[mirror_ubuntu-artful-kernel.git] / net / xfrm / xfrm_state.c
CommitLineData
1da177e4
LT
1/*
2 * xfrm_state.c
3 *
4 * Changes:
5 * Mitsuru KANDA @USAGI
6 * Kazunori MIYAZAWA @USAGI
7 * Kunihiro Ishiguro <kunihiro@ipinfusion.com>
8 * IPv6 support
9 * YOSHIFUJI Hideaki @USAGI
10 * Split up af-specific functions
11 * Derek Atkins <derek@ihtfp.com>
12 * Add UDP Encapsulation
df71837d 13 *
1da177e4
LT
14 */
15
16#include <linux/workqueue.h>
17#include <net/xfrm.h>
18#include <linux/pfkeyv2.h>
19#include <linux/ipsec.h>
20#include <linux/module.h>
f034b5d4
DM
21#include <linux/bootmem.h>
22#include <linux/vmalloc.h>
23#include <linux/cache.h>
1da177e4
LT
24#include <asm/uaccess.h>
25
ee857a7d
DM
26struct sock *xfrm_nl;
27EXPORT_SYMBOL(xfrm_nl);
28
f8cd5488 29u32 sysctl_xfrm_aevent_etime = XFRM_AE_ETIME;
a70fcb0b
DM
30EXPORT_SYMBOL(sysctl_xfrm_aevent_etime);
31
f8cd5488 32u32 sysctl_xfrm_aevent_rseqth = XFRM_AE_SEQT_SIZE;
a70fcb0b
DM
33EXPORT_SYMBOL(sysctl_xfrm_aevent_rseqth);
34
1da177e4
LT
35/* Each xfrm_state may be linked to two tables:
36
37 1. Hash table by (spi,daddr,ah/esp) to find SA by SPI. (input,ctl)
38 2. Hash table by daddr to find what SAs exist for given
39 destination/tunnel endpoint. (output)
40 */
41
42static DEFINE_SPINLOCK(xfrm_state_lock);
43
44/* Hash table to find appropriate SA towards given target (endpoint
45 * of tunnel or destination of transport mode) allowed by selector.
46 *
47 * Main use is finding SA after policy selected tunnel or transport mode.
48 * Also, it can be used by ah/esp icmp error handler to find offending SA.
49 */
f034b5d4
DM
50static struct hlist_head *xfrm_state_bydst __read_mostly;
51static struct hlist_head *xfrm_state_bysrc __read_mostly;
52static struct hlist_head *xfrm_state_byspi __read_mostly;
53static unsigned int xfrm_state_hmask __read_mostly;
54static unsigned int xfrm_state_hashmax __read_mostly = 1 * 1024 * 1024;
55static unsigned int xfrm_state_num;
56
57static inline unsigned int __xfrm4_dst_hash(xfrm_address_t *addr, unsigned int hmask)
edcd5821 58{
f034b5d4 59 unsigned int h;
edcd5821 60 h = ntohl(addr->a4);
f034b5d4 61 h = (h ^ (h>>16)) & hmask;
edcd5821
DM
62 return h;
63}
64
f034b5d4 65static inline unsigned int __xfrm6_dst_hash(xfrm_address_t *addr, unsigned int hmask)
edcd5821 66{
f034b5d4 67 unsigned int h;
edcd5821 68 h = ntohl(addr->a6[2]^addr->a6[3]);
f034b5d4 69 h = (h ^ (h>>16)) & hmask;
edcd5821
DM
70 return h;
71}
72
f034b5d4 73static inline unsigned int __xfrm4_src_hash(xfrm_address_t *addr, unsigned int hmask)
edcd5821 74{
f034b5d4 75 return __xfrm4_dst_hash(addr, hmask);
edcd5821
DM
76}
77
f034b5d4 78static inline unsigned int __xfrm6_src_hash(xfrm_address_t *addr, unsigned int hmask)
edcd5821 79{
f034b5d4 80 return __xfrm6_dst_hash(addr, hmask);
edcd5821
DM
81}
82
f034b5d4 83static inline unsigned __xfrm_src_hash(xfrm_address_t *addr, unsigned short family, unsigned int hmask)
edcd5821
DM
84{
85 switch (family) {
86 case AF_INET:
f034b5d4 87 return __xfrm4_src_hash(addr, hmask);
edcd5821 88 case AF_INET6:
f034b5d4 89 return __xfrm6_src_hash(addr, hmask);
edcd5821
DM
90 }
91 return 0;
92}
93
f034b5d4
DM
94static inline unsigned xfrm_src_hash(xfrm_address_t *addr, unsigned short family)
95{
96 return __xfrm_src_hash(addr, family, xfrm_state_hmask);
97}
98
99static inline unsigned int __xfrm_dst_hash(xfrm_address_t *addr, unsigned short family, unsigned int hmask)
2770834c
DM
100{
101 switch (family) {
102 case AF_INET:
f034b5d4 103 return __xfrm4_dst_hash(addr, hmask);
2770834c 104 case AF_INET6:
f034b5d4 105 return __xfrm6_dst_hash(addr, hmask);
2770834c
DM
106 }
107 return 0;
108}
109
f034b5d4
DM
110static inline unsigned int xfrm_dst_hash(xfrm_address_t *addr, unsigned short family)
111{
112 return __xfrm_dst_hash(addr, family, xfrm_state_hmask);
113}
114
115static inline unsigned int __xfrm4_spi_hash(xfrm_address_t *addr, u32 spi, u8 proto,
116 unsigned int hmask)
edcd5821 117{
f034b5d4 118 unsigned int h;
edcd5821 119 h = ntohl(addr->a4^spi^proto);
f034b5d4 120 h = (h ^ (h>>10) ^ (h>>20)) & hmask;
edcd5821
DM
121 return h;
122}
123
f034b5d4
DM
124static inline unsigned int __xfrm6_spi_hash(xfrm_address_t *addr, u32 spi, u8 proto,
125 unsigned int hmask)
edcd5821 126{
f034b5d4 127 unsigned int h;
edcd5821 128 h = ntohl(addr->a6[2]^addr->a6[3]^spi^proto);
f034b5d4 129 h = (h ^ (h>>10) ^ (h>>20)) & hmask;
edcd5821
DM
130 return h;
131}
132
f034b5d4
DM
133static inline
134unsigned __xfrm_spi_hash(xfrm_address_t *addr, u32 spi, u8 proto, unsigned short family,
135 unsigned int hmask)
edcd5821
DM
136{
137 switch (family) {
138 case AF_INET:
f034b5d4 139 return __xfrm4_spi_hash(addr, spi, proto, hmask);
edcd5821 140 case AF_INET6:
f034b5d4 141 return __xfrm6_spi_hash(addr, spi, proto, hmask);
edcd5821
DM
142 }
143 return 0; /*XXX*/
144}
145
f034b5d4
DM
146static inline unsigned int
147xfrm_spi_hash(xfrm_address_t *addr, u32 spi, u8 proto, unsigned short family)
148{
149 return __xfrm_spi_hash(addr, spi, proto, family, xfrm_state_hmask);
150}
151
152static struct hlist_head *xfrm_state_hash_alloc(unsigned int sz)
153{
154 struct hlist_head *n;
155
156 if (sz <= PAGE_SIZE)
157 n = kmalloc(sz, GFP_KERNEL);
158 else if (hashdist)
159 n = __vmalloc(sz, GFP_KERNEL, PAGE_KERNEL);
160 else
161 n = (struct hlist_head *)
162 __get_free_pages(GFP_KERNEL, get_order(sz));
163
164 if (n)
165 memset(n, 0, sz);
166
167 return n;
168}
169
170static void xfrm_state_hash_free(struct hlist_head *n, unsigned int sz)
171{
172 if (sz <= PAGE_SIZE)
173 kfree(n);
174 else if (hashdist)
175 vfree(n);
176 else
177 free_pages((unsigned long)n, get_order(sz));
178}
179
180static void xfrm_hash_transfer(struct hlist_head *list,
181 struct hlist_head *ndsttable,
182 struct hlist_head *nsrctable,
183 struct hlist_head *nspitable,
184 unsigned int nhashmask)
185{
186 struct hlist_node *entry, *tmp;
187 struct xfrm_state *x;
188
189 hlist_for_each_entry_safe(x, entry, tmp, list, bydst) {
190 unsigned int h;
191
192 h = __xfrm_dst_hash(&x->id.daddr, x->props.family, nhashmask);
193 hlist_add_head(&x->bydst, ndsttable+h);
194
195 h = __xfrm_src_hash(&x->props.saddr, x->props.family,
196 nhashmask);
197 hlist_add_head(&x->bysrc, nsrctable+h);
198
199 h = __xfrm_spi_hash(&x->id.daddr, x->id.spi, x->id.proto,
200 x->props.family, nhashmask);
201 hlist_add_head(&x->byspi, nspitable+h);
202 }
203}
204
205static unsigned long xfrm_hash_new_size(void)
206{
207 return ((xfrm_state_hmask + 1) << 1) *
208 sizeof(struct hlist_head);
209}
210
211static DEFINE_MUTEX(hash_resize_mutex);
212
213static void xfrm_hash_resize(void *__unused)
214{
215 struct hlist_head *ndst, *nsrc, *nspi, *odst, *osrc, *ospi;
216 unsigned long nsize, osize;
217 unsigned int nhashmask, ohashmask;
218 int i;
219
220 mutex_lock(&hash_resize_mutex);
221
222 nsize = xfrm_hash_new_size();
223 ndst = xfrm_state_hash_alloc(nsize);
224 if (!ndst)
225 goto out_unlock;
226 nsrc = xfrm_state_hash_alloc(nsize);
227 if (!nsrc) {
228 xfrm_state_hash_free(ndst, nsize);
229 goto out_unlock;
230 }
231 nspi = xfrm_state_hash_alloc(nsize);
232 if (!nspi) {
233 xfrm_state_hash_free(ndst, nsize);
234 xfrm_state_hash_free(nsrc, nsize);
235 goto out_unlock;
236 }
237
238 spin_lock_bh(&xfrm_state_lock);
239
240 nhashmask = (nsize / sizeof(struct hlist_head)) - 1U;
241 for (i = xfrm_state_hmask; i >= 0; i--)
242 xfrm_hash_transfer(xfrm_state_bydst+i, ndst, nsrc, nspi,
243 nhashmask);
244
245 odst = xfrm_state_bydst;
246 osrc = xfrm_state_bysrc;
247 ospi = xfrm_state_byspi;
248 ohashmask = xfrm_state_hmask;
249
250 xfrm_state_bydst = ndst;
251 xfrm_state_bysrc = nsrc;
252 xfrm_state_byspi = nspi;
253 xfrm_state_hmask = nhashmask;
254
255 spin_unlock_bh(&xfrm_state_lock);
256
257 osize = (ohashmask + 1) * sizeof(struct hlist_head);
258 xfrm_state_hash_free(odst, osize);
259 xfrm_state_hash_free(osrc, osize);
260 xfrm_state_hash_free(ospi, osize);
261
262out_unlock:
263 mutex_unlock(&hash_resize_mutex);
264}
265
266static DECLARE_WORK(xfrm_hash_work, xfrm_hash_resize, NULL);
267
1da177e4
LT
268DECLARE_WAIT_QUEUE_HEAD(km_waitq);
269EXPORT_SYMBOL(km_waitq);
270
271static DEFINE_RWLOCK(xfrm_state_afinfo_lock);
272static struct xfrm_state_afinfo *xfrm_state_afinfo[NPROTO];
273
274static struct work_struct xfrm_state_gc_work;
8f126e37 275static HLIST_HEAD(xfrm_state_gc_list);
1da177e4
LT
276static DEFINE_SPINLOCK(xfrm_state_gc_lock);
277
278static int xfrm_state_gc_flush_bundles;
279
53bc6b4d 280int __xfrm_state_delete(struct xfrm_state *x);
1da177e4
LT
281
282static struct xfrm_state_afinfo *xfrm_state_get_afinfo(unsigned short family);
283static void xfrm_state_put_afinfo(struct xfrm_state_afinfo *afinfo);
284
980ebd25 285int km_query(struct xfrm_state *x, struct xfrm_tmpl *t, struct xfrm_policy *pol);
53bc6b4d 286void km_state_expired(struct xfrm_state *x, int hard, u32 pid);
1da177e4
LT
287
288static void xfrm_state_gc_destroy(struct xfrm_state *x)
289{
290 if (del_timer(&x->timer))
291 BUG();
f8cd5488
JHS
292 if (del_timer(&x->rtimer))
293 BUG();
a51482bd
JJ
294 kfree(x->aalg);
295 kfree(x->ealg);
296 kfree(x->calg);
297 kfree(x->encap);
060f02a3 298 kfree(x->coaddr);
b59f45d0
HX
299 if (x->mode)
300 xfrm_put_mode(x->mode);
1da177e4
LT
301 if (x->type) {
302 x->type->destructor(x);
303 xfrm_put_type(x->type);
304 }
df71837d 305 security_xfrm_state_free(x);
1da177e4
LT
306 kfree(x);
307}
308
309static void xfrm_state_gc_task(void *data)
310{
311 struct xfrm_state *x;
8f126e37
DM
312 struct hlist_node *entry, *tmp;
313 struct hlist_head gc_list;
1da177e4
LT
314
315 if (xfrm_state_gc_flush_bundles) {
316 xfrm_state_gc_flush_bundles = 0;
317 xfrm_flush_bundles();
318 }
319
320 spin_lock_bh(&xfrm_state_gc_lock);
8f126e37
DM
321 gc_list.first = xfrm_state_gc_list.first;
322 INIT_HLIST_HEAD(&xfrm_state_gc_list);
1da177e4
LT
323 spin_unlock_bh(&xfrm_state_gc_lock);
324
8f126e37 325 hlist_for_each_entry_safe(x, entry, tmp, &gc_list, bydst)
1da177e4 326 xfrm_state_gc_destroy(x);
8f126e37 327
1da177e4
LT
328 wake_up(&km_waitq);
329}
330
331static inline unsigned long make_jiffies(long secs)
332{
333 if (secs >= (MAX_SCHEDULE_TIMEOUT-1)/HZ)
334 return MAX_SCHEDULE_TIMEOUT-1;
335 else
336 return secs*HZ;
337}
338
339static void xfrm_timer_handler(unsigned long data)
340{
341 struct xfrm_state *x = (struct xfrm_state*)data;
342 unsigned long now = (unsigned long)xtime.tv_sec;
343 long next = LONG_MAX;
344 int warn = 0;
345
346 spin_lock(&x->lock);
347 if (x->km.state == XFRM_STATE_DEAD)
348 goto out;
349 if (x->km.state == XFRM_STATE_EXPIRED)
350 goto expired;
351 if (x->lft.hard_add_expires_seconds) {
352 long tmo = x->lft.hard_add_expires_seconds +
353 x->curlft.add_time - now;
354 if (tmo <= 0)
355 goto expired;
356 if (tmo < next)
357 next = tmo;
358 }
359 if (x->lft.hard_use_expires_seconds) {
360 long tmo = x->lft.hard_use_expires_seconds +
361 (x->curlft.use_time ? : now) - now;
362 if (tmo <= 0)
363 goto expired;
364 if (tmo < next)
365 next = tmo;
366 }
367 if (x->km.dying)
368 goto resched;
369 if (x->lft.soft_add_expires_seconds) {
370 long tmo = x->lft.soft_add_expires_seconds +
371 x->curlft.add_time - now;
372 if (tmo <= 0)
373 warn = 1;
374 else if (tmo < next)
375 next = tmo;
376 }
377 if (x->lft.soft_use_expires_seconds) {
378 long tmo = x->lft.soft_use_expires_seconds +
379 (x->curlft.use_time ? : now) - now;
380 if (tmo <= 0)
381 warn = 1;
382 else if (tmo < next)
383 next = tmo;
384 }
385
4666faab 386 x->km.dying = warn;
1da177e4 387 if (warn)
53bc6b4d 388 km_state_expired(x, 0, 0);
1da177e4
LT
389resched:
390 if (next != LONG_MAX &&
391 !mod_timer(&x->timer, jiffies + make_jiffies(next)))
392 xfrm_state_hold(x);
393 goto out;
394
395expired:
396 if (x->km.state == XFRM_STATE_ACQ && x->id.spi == 0) {
397 x->km.state = XFRM_STATE_EXPIRED;
398 wake_up(&km_waitq);
399 next = 2;
400 goto resched;
401 }
4666faab 402 if (!__xfrm_state_delete(x) && x->id.spi)
53bc6b4d 403 km_state_expired(x, 1, 0);
1da177e4
LT
404
405out:
406 spin_unlock(&x->lock);
407 xfrm_state_put(x);
408}
409
0ac84752
DM
410static void xfrm_replay_timer_handler(unsigned long data);
411
1da177e4
LT
412struct xfrm_state *xfrm_state_alloc(void)
413{
414 struct xfrm_state *x;
415
0da974f4 416 x = kzalloc(sizeof(struct xfrm_state), GFP_ATOMIC);
1da177e4
LT
417
418 if (x) {
1da177e4
LT
419 atomic_set(&x->refcnt, 1);
420 atomic_set(&x->tunnel_users, 0);
8f126e37
DM
421 INIT_HLIST_NODE(&x->bydst);
422 INIT_HLIST_NODE(&x->bysrc);
423 INIT_HLIST_NODE(&x->byspi);
1da177e4
LT
424 init_timer(&x->timer);
425 x->timer.function = xfrm_timer_handler;
426 x->timer.data = (unsigned long)x;
f8cd5488
JHS
427 init_timer(&x->rtimer);
428 x->rtimer.function = xfrm_replay_timer_handler;
429 x->rtimer.data = (unsigned long)x;
1da177e4
LT
430 x->curlft.add_time = (unsigned long)xtime.tv_sec;
431 x->lft.soft_byte_limit = XFRM_INF;
432 x->lft.soft_packet_limit = XFRM_INF;
433 x->lft.hard_byte_limit = XFRM_INF;
434 x->lft.hard_packet_limit = XFRM_INF;
f8cd5488
JHS
435 x->replay_maxage = 0;
436 x->replay_maxdiff = 0;
1da177e4
LT
437 spin_lock_init(&x->lock);
438 }
439 return x;
440}
441EXPORT_SYMBOL(xfrm_state_alloc);
442
443void __xfrm_state_destroy(struct xfrm_state *x)
444{
445 BUG_TRAP(x->km.state == XFRM_STATE_DEAD);
446
447 spin_lock_bh(&xfrm_state_gc_lock);
8f126e37 448 hlist_add_head(&x->bydst, &xfrm_state_gc_list);
1da177e4
LT
449 spin_unlock_bh(&xfrm_state_gc_lock);
450 schedule_work(&xfrm_state_gc_work);
451}
452EXPORT_SYMBOL(__xfrm_state_destroy);
453
53bc6b4d 454int __xfrm_state_delete(struct xfrm_state *x)
1da177e4 455{
26b15dad
JHS
456 int err = -ESRCH;
457
1da177e4
LT
458 if (x->km.state != XFRM_STATE_DEAD) {
459 x->km.state = XFRM_STATE_DEAD;
460 spin_lock(&xfrm_state_lock);
8f126e37 461 hlist_del(&x->bydst);
21380b81 462 __xfrm_state_put(x);
8f126e37 463 hlist_del(&x->bysrc);
6c44e6b7 464 __xfrm_state_put(x);
1da177e4 465 if (x->id.spi) {
8f126e37 466 hlist_del(&x->byspi);
21380b81 467 __xfrm_state_put(x);
1da177e4 468 }
f034b5d4 469 xfrm_state_num--;
1da177e4
LT
470 spin_unlock(&xfrm_state_lock);
471 if (del_timer(&x->timer))
21380b81 472 __xfrm_state_put(x);
f8cd5488
JHS
473 if (del_timer(&x->rtimer))
474 __xfrm_state_put(x);
1da177e4
LT
475
476 /* The number two in this test is the reference
477 * mentioned in the comment below plus the reference
478 * our caller holds. A larger value means that
479 * there are DSTs attached to this xfrm_state.
480 */
481 if (atomic_read(&x->refcnt) > 2) {
482 xfrm_state_gc_flush_bundles = 1;
483 schedule_work(&xfrm_state_gc_work);
484 }
485
486 /* All xfrm_state objects are created by xfrm_state_alloc.
487 * The xfrm_state_alloc call gives a reference, and that
488 * is what we are dropping here.
489 */
21380b81 490 __xfrm_state_put(x);
26b15dad 491 err = 0;
1da177e4 492 }
26b15dad
JHS
493
494 return err;
1da177e4 495}
53bc6b4d 496EXPORT_SYMBOL(__xfrm_state_delete);
1da177e4 497
26b15dad 498int xfrm_state_delete(struct xfrm_state *x)
1da177e4 499{
26b15dad
JHS
500 int err;
501
1da177e4 502 spin_lock_bh(&x->lock);
26b15dad 503 err = __xfrm_state_delete(x);
1da177e4 504 spin_unlock_bh(&x->lock);
26b15dad
JHS
505
506 return err;
1da177e4
LT
507}
508EXPORT_SYMBOL(xfrm_state_delete);
509
510void xfrm_state_flush(u8 proto)
511{
512 int i;
1da177e4
LT
513
514 spin_lock_bh(&xfrm_state_lock);
f034b5d4 515 for (i = 0; i < xfrm_state_hmask; i++) {
8f126e37
DM
516 struct hlist_node *entry;
517 struct xfrm_state *x;
1da177e4 518restart:
8f126e37 519 hlist_for_each_entry(x, entry, xfrm_state_bydst+i, bydst) {
1da177e4 520 if (!xfrm_state_kern(x) &&
5794708f 521 xfrm_id_proto_match(x->id.proto, proto)) {
1da177e4
LT
522 xfrm_state_hold(x);
523 spin_unlock_bh(&xfrm_state_lock);
524
525 xfrm_state_delete(x);
526 xfrm_state_put(x);
527
528 spin_lock_bh(&xfrm_state_lock);
529 goto restart;
530 }
531 }
532 }
533 spin_unlock_bh(&xfrm_state_lock);
534 wake_up(&km_waitq);
535}
536EXPORT_SYMBOL(xfrm_state_flush);
537
538static int
539xfrm_init_tempsel(struct xfrm_state *x, struct flowi *fl,
540 struct xfrm_tmpl *tmpl,
541 xfrm_address_t *daddr, xfrm_address_t *saddr,
542 unsigned short family)
543{
544 struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family);
545 if (!afinfo)
546 return -1;
547 afinfo->init_tempsel(x, fl, tmpl, daddr, saddr);
548 xfrm_state_put_afinfo(afinfo);
549 return 0;
550}
551
edcd5821
DM
552static struct xfrm_state *__xfrm_state_lookup(xfrm_address_t *daddr, u32 spi, u8 proto, unsigned short family)
553{
554 unsigned int h = xfrm_spi_hash(daddr, spi, proto, family);
555 struct xfrm_state *x;
8f126e37 556 struct hlist_node *entry;
edcd5821 557
8f126e37 558 hlist_for_each_entry(x, entry, xfrm_state_byspi+h, byspi) {
edcd5821
DM
559 if (x->props.family != family ||
560 x->id.spi != spi ||
561 x->id.proto != proto)
562 continue;
563
564 switch (family) {
565 case AF_INET:
566 if (x->id.daddr.a4 != daddr->a4)
567 continue;
568 break;
569 case AF_INET6:
570 if (!ipv6_addr_equal((struct in6_addr *)daddr,
571 (struct in6_addr *)
572 x->id.daddr.a6))
573 continue;
574 break;
575 };
576
577 xfrm_state_hold(x);
578 return x;
579 }
580
581 return NULL;
582}
583
584static struct xfrm_state *__xfrm_state_lookup_byaddr(xfrm_address_t *daddr, xfrm_address_t *saddr, u8 proto, unsigned short family)
585{
586 unsigned int h = xfrm_src_hash(saddr, family);
587 struct xfrm_state *x;
8f126e37 588 struct hlist_node *entry;
edcd5821 589
8f126e37 590 hlist_for_each_entry(x, entry, xfrm_state_bysrc+h, bysrc) {
edcd5821
DM
591 if (x->props.family != family ||
592 x->id.proto != proto)
593 continue;
594
595 switch (family) {
596 case AF_INET:
597 if (x->id.daddr.a4 != daddr->a4 ||
598 x->props.saddr.a4 != saddr->a4)
599 continue;
600 break;
601 case AF_INET6:
602 if (!ipv6_addr_equal((struct in6_addr *)daddr,
603 (struct in6_addr *)
604 x->id.daddr.a6) ||
605 !ipv6_addr_equal((struct in6_addr *)saddr,
606 (struct in6_addr *)
607 x->props.saddr.a6))
608 continue;
609 break;
610 };
611
612 xfrm_state_hold(x);
613 return x;
614 }
615
616 return NULL;
617}
618
619static inline struct xfrm_state *
620__xfrm_state_locate(struct xfrm_state *x, int use_spi, int family)
621{
622 if (use_spi)
623 return __xfrm_state_lookup(&x->id.daddr, x->id.spi,
624 x->id.proto, family);
625 else
626 return __xfrm_state_lookup_byaddr(&x->id.daddr,
627 &x->props.saddr,
628 x->id.proto, family);
629}
630
1da177e4
LT
631struct xfrm_state *
632xfrm_state_find(xfrm_address_t *daddr, xfrm_address_t *saddr,
633 struct flowi *fl, struct xfrm_tmpl *tmpl,
634 struct xfrm_policy *pol, int *err,
635 unsigned short family)
636{
8f126e37
DM
637 unsigned int h = xfrm_dst_hash(daddr, family);
638 struct hlist_node *entry;
1da177e4
LT
639 struct xfrm_state *x, *x0;
640 int acquire_in_progress = 0;
641 int error = 0;
642 struct xfrm_state *best = NULL;
1da177e4 643
1da177e4 644 spin_lock_bh(&xfrm_state_lock);
8f126e37 645 hlist_for_each_entry(x, entry, xfrm_state_bydst+h, bydst) {
1da177e4
LT
646 if (x->props.family == family &&
647 x->props.reqid == tmpl->reqid &&
fbd9a5b4 648 !(x->props.flags & XFRM_STATE_WILDRECV) &&
1da177e4
LT
649 xfrm_state_addr_check(x, daddr, saddr, family) &&
650 tmpl->mode == x->props.mode &&
651 tmpl->id.proto == x->id.proto &&
652 (tmpl->id.spi == x->id.spi || !tmpl->id.spi)) {
653 /* Resolution logic:
654 1. There is a valid state with matching selector.
655 Done.
656 2. Valid state with inappropriate selector. Skip.
657
658 Entering area of "sysdeps".
659
660 3. If state is not valid, selector is temporary,
661 it selects only session which triggered
662 previous resolution. Key manager will do
663 something to install a state with proper
664 selector.
665 */
666 if (x->km.state == XFRM_STATE_VALID) {
df71837d 667 if (!xfrm_selector_match(&x->sel, fl, family) ||
e0d1caa7 668 !security_xfrm_state_pol_flow_match(x, pol, fl))
1da177e4
LT
669 continue;
670 if (!best ||
671 best->km.dying > x->km.dying ||
672 (best->km.dying == x->km.dying &&
673 best->curlft.add_time < x->curlft.add_time))
674 best = x;
675 } else if (x->km.state == XFRM_STATE_ACQ) {
676 acquire_in_progress = 1;
677 } else if (x->km.state == XFRM_STATE_ERROR ||
678 x->km.state == XFRM_STATE_EXPIRED) {
df71837d 679 if (xfrm_selector_match(&x->sel, fl, family) &&
e0d1caa7 680 security_xfrm_state_pol_flow_match(x, pol, fl))
1da177e4
LT
681 error = -ESRCH;
682 }
683 }
684 }
685
686 x = best;
687 if (!x && !error && !acquire_in_progress) {
5c5d281a 688 if (tmpl->id.spi &&
edcd5821
DM
689 (x0 = __xfrm_state_lookup(daddr, tmpl->id.spi,
690 tmpl->id.proto, family)) != NULL) {
1da177e4
LT
691 xfrm_state_put(x0);
692 error = -EEXIST;
693 goto out;
694 }
695 x = xfrm_state_alloc();
696 if (x == NULL) {
697 error = -ENOMEM;
698 goto out;
699 }
700 /* Initialize temporary selector matching only
701 * to current session. */
702 xfrm_init_tempsel(x, fl, tmpl, daddr, saddr, family);
703
e0d1caa7
VY
704 error = security_xfrm_state_alloc_acquire(x, pol->security, fl->secid);
705 if (error) {
706 x->km.state = XFRM_STATE_DEAD;
707 xfrm_state_put(x);
708 x = NULL;
709 goto out;
710 }
711
1da177e4
LT
712 if (km_query(x, tmpl, pol) == 0) {
713 x->km.state = XFRM_STATE_ACQ;
8f126e37 714 hlist_add_head(&x->bydst, xfrm_state_bydst+h);
1da177e4 715 xfrm_state_hold(x);
8f126e37
DM
716 h = xfrm_src_hash(saddr, family);
717 hlist_add_head(&x->bysrc, xfrm_state_bysrc+h);
6c44e6b7 718 xfrm_state_hold(x);
1da177e4
LT
719 if (x->id.spi) {
720 h = xfrm_spi_hash(&x->id.daddr, x->id.spi, x->id.proto, family);
8f126e37 721 hlist_add_head(&x->byspi, xfrm_state_byspi+h);
1da177e4
LT
722 xfrm_state_hold(x);
723 }
724 x->lft.hard_add_expires_seconds = XFRM_ACQ_EXPIRES;
725 xfrm_state_hold(x);
726 x->timer.expires = jiffies + XFRM_ACQ_EXPIRES*HZ;
727 add_timer(&x->timer);
728 } else {
729 x->km.state = XFRM_STATE_DEAD;
730 xfrm_state_put(x);
731 x = NULL;
732 error = -ESRCH;
733 }
734 }
735out:
736 if (x)
737 xfrm_state_hold(x);
738 else
739 *err = acquire_in_progress ? -EAGAIN : error;
740 spin_unlock_bh(&xfrm_state_lock);
1da177e4
LT
741 return x;
742}
743
744static void __xfrm_state_insert(struct xfrm_state *x)
745{
f034b5d4 746 unsigned int h = xfrm_dst_hash(&x->id.daddr, x->props.family);
1da177e4 747
8f126e37 748 hlist_add_head(&x->bydst, xfrm_state_bydst+h);
1da177e4
LT
749 xfrm_state_hold(x);
750
6c44e6b7 751 h = xfrm_src_hash(&x->props.saddr, x->props.family);
1da177e4 752
8f126e37 753 hlist_add_head(&x->bysrc, xfrm_state_bysrc+h);
1da177e4
LT
754 xfrm_state_hold(x);
755
6c44e6b7
MN
756 if (xfrm_id_proto_match(x->id.proto, IPSEC_PROTO_ANY)) {
757 h = xfrm_spi_hash(&x->id.daddr, x->id.spi, x->id.proto,
758 x->props.family);
759
8f126e37 760 hlist_add_head(&x->byspi, xfrm_state_byspi+h);
6c44e6b7
MN
761 xfrm_state_hold(x);
762 }
763
1da177e4
LT
764 if (!mod_timer(&x->timer, jiffies + HZ))
765 xfrm_state_hold(x);
766
f8cd5488
JHS
767 if (x->replay_maxage &&
768 !mod_timer(&x->rtimer, jiffies + x->replay_maxage))
769 xfrm_state_hold(x);
770
1da177e4 771 wake_up(&km_waitq);
f034b5d4
DM
772
773 xfrm_state_num++;
774
775 if (x->bydst.next != NULL &&
776 (xfrm_state_hmask + 1) < xfrm_state_hashmax &&
777 xfrm_state_num > xfrm_state_hmask)
778 schedule_work(&xfrm_hash_work);
1da177e4
LT
779}
780
781void xfrm_state_insert(struct xfrm_state *x)
782{
783 spin_lock_bh(&xfrm_state_lock);
784 __xfrm_state_insert(x);
785 spin_unlock_bh(&xfrm_state_lock);
399c180a
DM
786
787 xfrm_flush_all_bundles();
1da177e4
LT
788}
789EXPORT_SYMBOL(xfrm_state_insert);
790
2770834c
DM
791/* xfrm_state_lock is held */
792static struct xfrm_state *__find_acq_core(unsigned short family, u8 mode, u32 reqid, u8 proto, xfrm_address_t *daddr, xfrm_address_t *saddr, int create)
793{
794 unsigned int h = xfrm_dst_hash(daddr, family);
8f126e37 795 struct hlist_node *entry;
2770834c
DM
796 struct xfrm_state *x;
797
8f126e37 798 hlist_for_each_entry(x, entry, xfrm_state_bydst+h, bydst) {
2770834c
DM
799 if (x->props.reqid != reqid ||
800 x->props.mode != mode ||
801 x->props.family != family ||
802 x->km.state != XFRM_STATE_ACQ ||
803 x->id.spi != 0)
804 continue;
805
806 switch (family) {
807 case AF_INET:
808 if (x->id.daddr.a4 != daddr->a4 ||
809 x->props.saddr.a4 != saddr->a4)
810 continue;
811 break;
812 case AF_INET6:
813 if (!ipv6_addr_equal((struct in6_addr *)x->id.daddr.a6,
814 (struct in6_addr *)daddr) ||
815 !ipv6_addr_equal((struct in6_addr *)
816 x->props.saddr.a6,
817 (struct in6_addr *)saddr))
818 continue;
819 break;
820 };
821
822 xfrm_state_hold(x);
823 return x;
824 }
825
826 if (!create)
827 return NULL;
828
829 x = xfrm_state_alloc();
830 if (likely(x)) {
831 switch (family) {
832 case AF_INET:
833 x->sel.daddr.a4 = daddr->a4;
834 x->sel.saddr.a4 = saddr->a4;
835 x->sel.prefixlen_d = 32;
836 x->sel.prefixlen_s = 32;
837 x->props.saddr.a4 = saddr->a4;
838 x->id.daddr.a4 = daddr->a4;
839 break;
840
841 case AF_INET6:
842 ipv6_addr_copy((struct in6_addr *)x->sel.daddr.a6,
843 (struct in6_addr *)daddr);
844 ipv6_addr_copy((struct in6_addr *)x->sel.saddr.a6,
845 (struct in6_addr *)saddr);
846 x->sel.prefixlen_d = 128;
847 x->sel.prefixlen_s = 128;
848 ipv6_addr_copy((struct in6_addr *)x->props.saddr.a6,
849 (struct in6_addr *)saddr);
850 ipv6_addr_copy((struct in6_addr *)x->id.daddr.a6,
851 (struct in6_addr *)daddr);
852 break;
853 };
854
855 x->km.state = XFRM_STATE_ACQ;
856 x->id.proto = proto;
857 x->props.family = family;
858 x->props.mode = mode;
859 x->props.reqid = reqid;
860 x->lft.hard_add_expires_seconds = XFRM_ACQ_EXPIRES;
861 xfrm_state_hold(x);
862 x->timer.expires = jiffies + XFRM_ACQ_EXPIRES*HZ;
863 add_timer(&x->timer);
864 xfrm_state_hold(x);
8f126e37 865 hlist_add_head(&x->bydst, xfrm_state_bydst+h);
2770834c
DM
866 h = xfrm_src_hash(saddr, family);
867 xfrm_state_hold(x);
8f126e37 868 hlist_add_head(&x->bysrc, xfrm_state_bysrc+h);
2770834c
DM
869 wake_up(&km_waitq);
870 }
871
872 return x;
873}
874
1da177e4
LT
875static struct xfrm_state *__xfrm_find_acq_byseq(u32 seq);
876
877int xfrm_state_add(struct xfrm_state *x)
878{
1da177e4
LT
879 struct xfrm_state *x1;
880 int family;
881 int err;
eb2971b6 882 int use_spi = xfrm_id_proto_match(x->id.proto, IPSEC_PROTO_ANY);
1da177e4
LT
883
884 family = x->props.family;
1da177e4
LT
885
886 spin_lock_bh(&xfrm_state_lock);
887
edcd5821 888 x1 = __xfrm_state_locate(x, use_spi, family);
1da177e4
LT
889 if (x1) {
890 xfrm_state_put(x1);
891 x1 = NULL;
892 err = -EEXIST;
893 goto out;
894 }
895
eb2971b6 896 if (use_spi && x->km.seq) {
1da177e4
LT
897 x1 = __xfrm_find_acq_byseq(x->km.seq);
898 if (x1 && xfrm_addr_cmp(&x1->id.daddr, &x->id.daddr, family)) {
899 xfrm_state_put(x1);
900 x1 = NULL;
901 }
902 }
903
eb2971b6 904 if (use_spi && !x1)
2770834c
DM
905 x1 = __find_acq_core(family, x->props.mode, x->props.reqid,
906 x->id.proto,
907 &x->id.daddr, &x->props.saddr, 0);
1da177e4
LT
908
909 __xfrm_state_insert(x);
910 err = 0;
911
912out:
913 spin_unlock_bh(&xfrm_state_lock);
1da177e4 914
399c180a
DM
915 if (!err)
916 xfrm_flush_all_bundles();
917
1da177e4
LT
918 if (x1) {
919 xfrm_state_delete(x1);
920 xfrm_state_put(x1);
921 }
922
923 return err;
924}
925EXPORT_SYMBOL(xfrm_state_add);
926
927int xfrm_state_update(struct xfrm_state *x)
928{
1da177e4
LT
929 struct xfrm_state *x1;
930 int err;
eb2971b6 931 int use_spi = xfrm_id_proto_match(x->id.proto, IPSEC_PROTO_ANY);
1da177e4 932
1da177e4 933 spin_lock_bh(&xfrm_state_lock);
edcd5821 934 x1 = __xfrm_state_locate(x, use_spi, x->props.family);
1da177e4
LT
935
936 err = -ESRCH;
937 if (!x1)
938 goto out;
939
940 if (xfrm_state_kern(x1)) {
941 xfrm_state_put(x1);
942 err = -EEXIST;
943 goto out;
944 }
945
946 if (x1->km.state == XFRM_STATE_ACQ) {
947 __xfrm_state_insert(x);
948 x = NULL;
949 }
950 err = 0;
951
952out:
953 spin_unlock_bh(&xfrm_state_lock);
1da177e4
LT
954
955 if (err)
956 return err;
957
958 if (!x) {
959 xfrm_state_delete(x1);
960 xfrm_state_put(x1);
961 return 0;
962 }
963
964 err = -EINVAL;
965 spin_lock_bh(&x1->lock);
966 if (likely(x1->km.state == XFRM_STATE_VALID)) {
967 if (x->encap && x1->encap)
968 memcpy(x1->encap, x->encap, sizeof(*x1->encap));
060f02a3
NT
969 if (x->coaddr && x1->coaddr) {
970 memcpy(x1->coaddr, x->coaddr, sizeof(*x1->coaddr));
971 }
972 if (!use_spi && memcmp(&x1->sel, &x->sel, sizeof(x1->sel)))
973 memcpy(&x1->sel, &x->sel, sizeof(x1->sel));
1da177e4
LT
974 memcpy(&x1->lft, &x->lft, sizeof(x1->lft));
975 x1->km.dying = 0;
976
977 if (!mod_timer(&x1->timer, jiffies + HZ))
978 xfrm_state_hold(x1);
979 if (x1->curlft.use_time)
980 xfrm_state_check_expire(x1);
981
982 err = 0;
983 }
984 spin_unlock_bh(&x1->lock);
985
986 xfrm_state_put(x1);
987
988 return err;
989}
990EXPORT_SYMBOL(xfrm_state_update);
991
992int xfrm_state_check_expire(struct xfrm_state *x)
993{
994 if (!x->curlft.use_time)
995 x->curlft.use_time = (unsigned long)xtime.tv_sec;
996
997 if (x->km.state != XFRM_STATE_VALID)
998 return -EINVAL;
999
1000 if (x->curlft.bytes >= x->lft.hard_byte_limit ||
1001 x->curlft.packets >= x->lft.hard_packet_limit) {
4666faab
HX
1002 x->km.state = XFRM_STATE_EXPIRED;
1003 if (!mod_timer(&x->timer, jiffies))
1da177e4
LT
1004 xfrm_state_hold(x);
1005 return -EINVAL;
1006 }
1007
1008 if (!x->km.dying &&
1009 (x->curlft.bytes >= x->lft.soft_byte_limit ||
4666faab
HX
1010 x->curlft.packets >= x->lft.soft_packet_limit)) {
1011 x->km.dying = 1;
53bc6b4d 1012 km_state_expired(x, 0, 0);
4666faab 1013 }
1da177e4
LT
1014 return 0;
1015}
1016EXPORT_SYMBOL(xfrm_state_check_expire);
1017
1018static int xfrm_state_check_space(struct xfrm_state *x, struct sk_buff *skb)
1019{
1020 int nhead = x->props.header_len + LL_RESERVED_SPACE(skb->dst->dev)
1021 - skb_headroom(skb);
1022
1023 if (nhead > 0)
1024 return pskb_expand_head(skb, nhead, 0, GFP_ATOMIC);
1025
1026 /* Check tail too... */
1027 return 0;
1028}
1029
1030int xfrm_state_check(struct xfrm_state *x, struct sk_buff *skb)
1031{
1032 int err = xfrm_state_check_expire(x);
1033 if (err < 0)
1034 goto err;
1035 err = xfrm_state_check_space(x, skb);
1036err:
1037 return err;
1038}
1039EXPORT_SYMBOL(xfrm_state_check);
1040
1041struct xfrm_state *
1042xfrm_state_lookup(xfrm_address_t *daddr, u32 spi, u8 proto,
1043 unsigned short family)
1044{
1045 struct xfrm_state *x;
1da177e4
LT
1046
1047 spin_lock_bh(&xfrm_state_lock);
edcd5821 1048 x = __xfrm_state_lookup(daddr, spi, proto, family);
1da177e4 1049 spin_unlock_bh(&xfrm_state_lock);
1da177e4
LT
1050 return x;
1051}
1052EXPORT_SYMBOL(xfrm_state_lookup);
1053
1054struct xfrm_state *
eb2971b6
MN
1055xfrm_state_lookup_byaddr(xfrm_address_t *daddr, xfrm_address_t *saddr,
1056 u8 proto, unsigned short family)
1057{
1058 struct xfrm_state *x;
eb2971b6
MN
1059
1060 spin_lock_bh(&xfrm_state_lock);
edcd5821 1061 x = __xfrm_state_lookup_byaddr(daddr, saddr, proto, family);
eb2971b6 1062 spin_unlock_bh(&xfrm_state_lock);
eb2971b6
MN
1063 return x;
1064}
1065EXPORT_SYMBOL(xfrm_state_lookup_byaddr);
1066
1067struct xfrm_state *
1da177e4
LT
1068xfrm_find_acq(u8 mode, u32 reqid, u8 proto,
1069 xfrm_address_t *daddr, xfrm_address_t *saddr,
1070 int create, unsigned short family)
1071{
1072 struct xfrm_state *x;
1da177e4
LT
1073
1074 spin_lock_bh(&xfrm_state_lock);
2770834c 1075 x = __find_acq_core(family, mode, reqid, proto, daddr, saddr, create);
1da177e4 1076 spin_unlock_bh(&xfrm_state_lock);
2770834c 1077
1da177e4
LT
1078 return x;
1079}
1080EXPORT_SYMBOL(xfrm_find_acq);
1081
41a49cc3
MN
1082#ifdef CONFIG_XFRM_SUB_POLICY
1083int
1084xfrm_tmpl_sort(struct xfrm_tmpl **dst, struct xfrm_tmpl **src, int n,
1085 unsigned short family)
1086{
1087 int err = 0;
1088 struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family);
1089 if (!afinfo)
1090 return -EAFNOSUPPORT;
1091
1092 spin_lock_bh(&xfrm_state_lock);
1093 if (afinfo->tmpl_sort)
1094 err = afinfo->tmpl_sort(dst, src, n);
1095 spin_unlock_bh(&xfrm_state_lock);
1096 xfrm_state_put_afinfo(afinfo);
1097 return err;
1098}
1099EXPORT_SYMBOL(xfrm_tmpl_sort);
1100
1101int
1102xfrm_state_sort(struct xfrm_state **dst, struct xfrm_state **src, int n,
1103 unsigned short family)
1104{
1105 int err = 0;
1106 struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family);
1107 if (!afinfo)
1108 return -EAFNOSUPPORT;
1109
1110 spin_lock_bh(&xfrm_state_lock);
1111 if (afinfo->state_sort)
1112 err = afinfo->state_sort(dst, src, n);
1113 spin_unlock_bh(&xfrm_state_lock);
1114 xfrm_state_put_afinfo(afinfo);
1115 return err;
1116}
1117EXPORT_SYMBOL(xfrm_state_sort);
1118#endif
1119
1da177e4
LT
1120/* Silly enough, but I'm lazy to build resolution list */
1121
1122static struct xfrm_state *__xfrm_find_acq_byseq(u32 seq)
1123{
1124 int i;
1da177e4 1125
f034b5d4 1126 for (i = 0; i <= xfrm_state_hmask; i++) {
8f126e37
DM
1127 struct hlist_node *entry;
1128 struct xfrm_state *x;
1129
1130 hlist_for_each_entry(x, entry, xfrm_state_bydst+i, bydst) {
1131 if (x->km.seq == seq &&
1132 x->km.state == XFRM_STATE_ACQ) {
1da177e4
LT
1133 xfrm_state_hold(x);
1134 return x;
1135 }
1136 }
1137 }
1138 return NULL;
1139}
1140
1141struct xfrm_state *xfrm_find_acq_byseq(u32 seq)
1142{
1143 struct xfrm_state *x;
1144
1145 spin_lock_bh(&xfrm_state_lock);
1146 x = __xfrm_find_acq_byseq(seq);
1147 spin_unlock_bh(&xfrm_state_lock);
1148 return x;
1149}
1150EXPORT_SYMBOL(xfrm_find_acq_byseq);
1151
1152u32 xfrm_get_acqseq(void)
1153{
1154 u32 res;
1155 static u32 acqseq;
1156 static DEFINE_SPINLOCK(acqseq_lock);
1157
1158 spin_lock_bh(&acqseq_lock);
1159 res = (++acqseq ? : ++acqseq);
1160 spin_unlock_bh(&acqseq_lock);
1161 return res;
1162}
1163EXPORT_SYMBOL(xfrm_get_acqseq);
1164
1165void
1166xfrm_alloc_spi(struct xfrm_state *x, u32 minspi, u32 maxspi)
1167{
f034b5d4 1168 unsigned int h;
1da177e4
LT
1169 struct xfrm_state *x0;
1170
1171 if (x->id.spi)
1172 return;
1173
1174 if (minspi == maxspi) {
1175 x0 = xfrm_state_lookup(&x->id.daddr, minspi, x->id.proto, x->props.family);
1176 if (x0) {
1177 xfrm_state_put(x0);
1178 return;
1179 }
1180 x->id.spi = minspi;
1181 } else {
1182 u32 spi = 0;
1183 minspi = ntohl(minspi);
1184 maxspi = ntohl(maxspi);
1185 for (h=0; h<maxspi-minspi+1; h++) {
1186 spi = minspi + net_random()%(maxspi-minspi+1);
1187 x0 = xfrm_state_lookup(&x->id.daddr, htonl(spi), x->id.proto, x->props.family);
1188 if (x0 == NULL) {
1189 x->id.spi = htonl(spi);
1190 break;
1191 }
1192 xfrm_state_put(x0);
1193 }
1194 }
1195 if (x->id.spi) {
1196 spin_lock_bh(&xfrm_state_lock);
1197 h = xfrm_spi_hash(&x->id.daddr, x->id.spi, x->id.proto, x->props.family);
8f126e37 1198 hlist_add_head(&x->byspi, xfrm_state_byspi+h);
1da177e4
LT
1199 xfrm_state_hold(x);
1200 spin_unlock_bh(&xfrm_state_lock);
1201 wake_up(&km_waitq);
1202 }
1203}
1204EXPORT_SYMBOL(xfrm_alloc_spi);
1205
1206int xfrm_state_walk(u8 proto, int (*func)(struct xfrm_state *, int, void*),
1207 void *data)
1208{
1209 int i;
1210 struct xfrm_state *x;
8f126e37 1211 struct hlist_node *entry;
1da177e4
LT
1212 int count = 0;
1213 int err = 0;
1214
1215 spin_lock_bh(&xfrm_state_lock);
f034b5d4 1216 for (i = 0; i <= xfrm_state_hmask; i++) {
8f126e37 1217 hlist_for_each_entry(x, entry, xfrm_state_bydst+i, bydst) {
5794708f 1218 if (xfrm_id_proto_match(x->id.proto, proto))
1da177e4
LT
1219 count++;
1220 }
1221 }
1222 if (count == 0) {
1223 err = -ENOENT;
1224 goto out;
1225 }
1226
f034b5d4 1227 for (i = 0; i <= xfrm_state_hmask; i++) {
8f126e37 1228 hlist_for_each_entry(x, entry, xfrm_state_bydst+i, bydst) {
5794708f 1229 if (!xfrm_id_proto_match(x->id.proto, proto))
1da177e4
LT
1230 continue;
1231 err = func(x, --count, data);
1232 if (err)
1233 goto out;
1234 }
1235 }
1236out:
1237 spin_unlock_bh(&xfrm_state_lock);
1238 return err;
1239}
1240EXPORT_SYMBOL(xfrm_state_walk);
1241
f8cd5488
JHS
1242
1243void xfrm_replay_notify(struct xfrm_state *x, int event)
1244{
1245 struct km_event c;
1246 /* we send notify messages in case
1247 * 1. we updated on of the sequence numbers, and the seqno difference
1248 * is at least x->replay_maxdiff, in this case we also update the
1249 * timeout of our timer function
1250 * 2. if x->replay_maxage has elapsed since last update,
1251 * and there were changes
1252 *
1253 * The state structure must be locked!
1254 */
1255
1256 switch (event) {
1257 case XFRM_REPLAY_UPDATE:
1258 if (x->replay_maxdiff &&
1259 (x->replay.seq - x->preplay.seq < x->replay_maxdiff) &&
2717096a
JHS
1260 (x->replay.oseq - x->preplay.oseq < x->replay_maxdiff)) {
1261 if (x->xflags & XFRM_TIME_DEFER)
1262 event = XFRM_REPLAY_TIMEOUT;
1263 else
1264 return;
1265 }
f8cd5488
JHS
1266
1267 break;
1268
1269 case XFRM_REPLAY_TIMEOUT:
1270 if ((x->replay.seq == x->preplay.seq) &&
1271 (x->replay.bitmap == x->preplay.bitmap) &&
2717096a
JHS
1272 (x->replay.oseq == x->preplay.oseq)) {
1273 x->xflags |= XFRM_TIME_DEFER;
f8cd5488 1274 return;
2717096a 1275 }
f8cd5488
JHS
1276
1277 break;
1278 }
1279
1280 memcpy(&x->preplay, &x->replay, sizeof(struct xfrm_replay_state));
1281 c.event = XFRM_MSG_NEWAE;
1282 c.data.aevent = event;
1283 km_state_notify(x, &c);
1284
f8cd5488 1285 if (x->replay_maxage &&
2717096a 1286 !mod_timer(&x->rtimer, jiffies + x->replay_maxage)) {
f8cd5488 1287 xfrm_state_hold(x);
2717096a
JHS
1288 x->xflags &= ~XFRM_TIME_DEFER;
1289 }
f8cd5488 1290}
a70fcb0b 1291EXPORT_SYMBOL(xfrm_replay_notify);
f8cd5488
JHS
1292
1293static void xfrm_replay_timer_handler(unsigned long data)
1294{
1295 struct xfrm_state *x = (struct xfrm_state*)data;
1296
1297 spin_lock(&x->lock);
1298
2717096a
JHS
1299 if (x->km.state == XFRM_STATE_VALID) {
1300 if (xfrm_aevent_is_on())
1301 xfrm_replay_notify(x, XFRM_REPLAY_TIMEOUT);
1302 else
1303 x->xflags |= XFRM_TIME_DEFER;
1304 }
f8cd5488
JHS
1305
1306 spin_unlock(&x->lock);
2717096a 1307 xfrm_state_put(x);
f8cd5488
JHS
1308}
1309
1da177e4
LT
1310int xfrm_replay_check(struct xfrm_state *x, u32 seq)
1311{
1312 u32 diff;
1313
1314 seq = ntohl(seq);
1315
1316 if (unlikely(seq == 0))
1317 return -EINVAL;
1318
1319 if (likely(seq > x->replay.seq))
1320 return 0;
1321
1322 diff = x->replay.seq - seq;
1323 if (diff >= x->props.replay_window) {
1324 x->stats.replay_window++;
1325 return -EINVAL;
1326 }
1327
1328 if (x->replay.bitmap & (1U << diff)) {
1329 x->stats.replay++;
1330 return -EINVAL;
1331 }
1332 return 0;
1333}
1334EXPORT_SYMBOL(xfrm_replay_check);
1335
1336void xfrm_replay_advance(struct xfrm_state *x, u32 seq)
1337{
1338 u32 diff;
1339
1340 seq = ntohl(seq);
1341
1342 if (seq > x->replay.seq) {
1343 diff = seq - x->replay.seq;
1344 if (diff < x->props.replay_window)
1345 x->replay.bitmap = ((x->replay.bitmap) << diff) | 1;
1346 else
1347 x->replay.bitmap = 1;
1348 x->replay.seq = seq;
1349 } else {
1350 diff = x->replay.seq - seq;
1351 x->replay.bitmap |= (1U << diff);
1352 }
f8cd5488
JHS
1353
1354 if (xfrm_aevent_is_on())
1355 xfrm_replay_notify(x, XFRM_REPLAY_UPDATE);
1da177e4
LT
1356}
1357EXPORT_SYMBOL(xfrm_replay_advance);
1358
1359static struct list_head xfrm_km_list = LIST_HEAD_INIT(xfrm_km_list);
1360static DEFINE_RWLOCK(xfrm_km_lock);
1361
26b15dad 1362void km_policy_notify(struct xfrm_policy *xp, int dir, struct km_event *c)
1da177e4
LT
1363{
1364 struct xfrm_mgr *km;
1365
26b15dad
JHS
1366 read_lock(&xfrm_km_lock);
1367 list_for_each_entry(km, &xfrm_km_list, list)
1368 if (km->notify_policy)
1369 km->notify_policy(xp, dir, c);
1370 read_unlock(&xfrm_km_lock);
1371}
1da177e4 1372
26b15dad
JHS
1373void km_state_notify(struct xfrm_state *x, struct km_event *c)
1374{
1375 struct xfrm_mgr *km;
1da177e4
LT
1376 read_lock(&xfrm_km_lock);
1377 list_for_each_entry(km, &xfrm_km_list, list)
26b15dad
JHS
1378 if (km->notify)
1379 km->notify(x, c);
1da177e4 1380 read_unlock(&xfrm_km_lock);
26b15dad
JHS
1381}
1382
1383EXPORT_SYMBOL(km_policy_notify);
1384EXPORT_SYMBOL(km_state_notify);
1385
53bc6b4d 1386void km_state_expired(struct xfrm_state *x, int hard, u32 pid)
26b15dad
JHS
1387{
1388 struct km_event c;
1389
bf08867f 1390 c.data.hard = hard;
53bc6b4d 1391 c.pid = pid;
f60f6b8f 1392 c.event = XFRM_MSG_EXPIRE;
26b15dad 1393 km_state_notify(x, &c);
1da177e4
LT
1394
1395 if (hard)
1396 wake_up(&km_waitq);
1397}
1398
53bc6b4d 1399EXPORT_SYMBOL(km_state_expired);
26b15dad
JHS
1400/*
1401 * We send to all registered managers regardless of failure
1402 * We are happy with one success
1403*/
980ebd25 1404int km_query(struct xfrm_state *x, struct xfrm_tmpl *t, struct xfrm_policy *pol)
1da177e4 1405{
26b15dad 1406 int err = -EINVAL, acqret;
1da177e4
LT
1407 struct xfrm_mgr *km;
1408
1409 read_lock(&xfrm_km_lock);
1410 list_for_each_entry(km, &xfrm_km_list, list) {
26b15dad
JHS
1411 acqret = km->acquire(x, t, pol, XFRM_POLICY_OUT);
1412 if (!acqret)
1413 err = acqret;
1da177e4
LT
1414 }
1415 read_unlock(&xfrm_km_lock);
1416 return err;
1417}
980ebd25 1418EXPORT_SYMBOL(km_query);
1da177e4
LT
1419
1420int km_new_mapping(struct xfrm_state *x, xfrm_address_t *ipaddr, u16 sport)
1421{
1422 int err = -EINVAL;
1423 struct xfrm_mgr *km;
1424
1425 read_lock(&xfrm_km_lock);
1426 list_for_each_entry(km, &xfrm_km_list, list) {
1427 if (km->new_mapping)
1428 err = km->new_mapping(x, ipaddr, sport);
1429 if (!err)
1430 break;
1431 }
1432 read_unlock(&xfrm_km_lock);
1433 return err;
1434}
1435EXPORT_SYMBOL(km_new_mapping);
1436
6c5c8ca7 1437void km_policy_expired(struct xfrm_policy *pol, int dir, int hard, u32 pid)
1da177e4 1438{
26b15dad 1439 struct km_event c;
1da177e4 1440
bf08867f 1441 c.data.hard = hard;
6c5c8ca7 1442 c.pid = pid;
f60f6b8f 1443 c.event = XFRM_MSG_POLEXPIRE;
26b15dad 1444 km_policy_notify(pol, dir, &c);
1da177e4
LT
1445
1446 if (hard)
1447 wake_up(&km_waitq);
1448}
a70fcb0b 1449EXPORT_SYMBOL(km_policy_expired);
1da177e4 1450
97a64b45
MN
1451int km_report(u8 proto, struct xfrm_selector *sel, xfrm_address_t *addr)
1452{
1453 int err = -EINVAL;
1454 int ret;
1455 struct xfrm_mgr *km;
1456
1457 read_lock(&xfrm_km_lock);
1458 list_for_each_entry(km, &xfrm_km_list, list) {
1459 if (km->report) {
1460 ret = km->report(proto, sel, addr);
1461 if (!ret)
1462 err = ret;
1463 }
1464 }
1465 read_unlock(&xfrm_km_lock);
1466 return err;
1467}
1468EXPORT_SYMBOL(km_report);
1469
1da177e4
LT
1470int xfrm_user_policy(struct sock *sk, int optname, u8 __user *optval, int optlen)
1471{
1472 int err;
1473 u8 *data;
1474 struct xfrm_mgr *km;
1475 struct xfrm_policy *pol = NULL;
1476
1477 if (optlen <= 0 || optlen > PAGE_SIZE)
1478 return -EMSGSIZE;
1479
1480 data = kmalloc(optlen, GFP_KERNEL);
1481 if (!data)
1482 return -ENOMEM;
1483
1484 err = -EFAULT;
1485 if (copy_from_user(data, optval, optlen))
1486 goto out;
1487
1488 err = -EINVAL;
1489 read_lock(&xfrm_km_lock);
1490 list_for_each_entry(km, &xfrm_km_list, list) {
cb969f07 1491 pol = km->compile_policy(sk, optname, data,
1da177e4
LT
1492 optlen, &err);
1493 if (err >= 0)
1494 break;
1495 }
1496 read_unlock(&xfrm_km_lock);
1497
1498 if (err >= 0) {
1499 xfrm_sk_policy_insert(sk, err, pol);
1500 xfrm_pol_put(pol);
1501 err = 0;
1502 }
1503
1504out:
1505 kfree(data);
1506 return err;
1507}
1508EXPORT_SYMBOL(xfrm_user_policy);
1509
1510int xfrm_register_km(struct xfrm_mgr *km)
1511{
1512 write_lock_bh(&xfrm_km_lock);
1513 list_add_tail(&km->list, &xfrm_km_list);
1514 write_unlock_bh(&xfrm_km_lock);
1515 return 0;
1516}
1517EXPORT_SYMBOL(xfrm_register_km);
1518
1519int xfrm_unregister_km(struct xfrm_mgr *km)
1520{
1521 write_lock_bh(&xfrm_km_lock);
1522 list_del(&km->list);
1523 write_unlock_bh(&xfrm_km_lock);
1524 return 0;
1525}
1526EXPORT_SYMBOL(xfrm_unregister_km);
1527
1528int xfrm_state_register_afinfo(struct xfrm_state_afinfo *afinfo)
1529{
1530 int err = 0;
1531 if (unlikely(afinfo == NULL))
1532 return -EINVAL;
1533 if (unlikely(afinfo->family >= NPROTO))
1534 return -EAFNOSUPPORT;
f3111502 1535 write_lock_bh(&xfrm_state_afinfo_lock);
1da177e4
LT
1536 if (unlikely(xfrm_state_afinfo[afinfo->family] != NULL))
1537 err = -ENOBUFS;
edcd5821 1538 else
1da177e4 1539 xfrm_state_afinfo[afinfo->family] = afinfo;
f3111502 1540 write_unlock_bh(&xfrm_state_afinfo_lock);
1da177e4
LT
1541 return err;
1542}
1543EXPORT_SYMBOL(xfrm_state_register_afinfo);
1544
1545int xfrm_state_unregister_afinfo(struct xfrm_state_afinfo *afinfo)
1546{
1547 int err = 0;
1548 if (unlikely(afinfo == NULL))
1549 return -EINVAL;
1550 if (unlikely(afinfo->family >= NPROTO))
1551 return -EAFNOSUPPORT;
f3111502 1552 write_lock_bh(&xfrm_state_afinfo_lock);
1da177e4
LT
1553 if (likely(xfrm_state_afinfo[afinfo->family] != NULL)) {
1554 if (unlikely(xfrm_state_afinfo[afinfo->family] != afinfo))
1555 err = -EINVAL;
edcd5821 1556 else
1da177e4 1557 xfrm_state_afinfo[afinfo->family] = NULL;
1da177e4 1558 }
f3111502 1559 write_unlock_bh(&xfrm_state_afinfo_lock);
1da177e4
LT
1560 return err;
1561}
1562EXPORT_SYMBOL(xfrm_state_unregister_afinfo);
1563
1564static struct xfrm_state_afinfo *xfrm_state_get_afinfo(unsigned short family)
1565{
1566 struct xfrm_state_afinfo *afinfo;
1567 if (unlikely(family >= NPROTO))
1568 return NULL;
1569 read_lock(&xfrm_state_afinfo_lock);
1570 afinfo = xfrm_state_afinfo[family];
546be240
HX
1571 if (unlikely(!afinfo))
1572 read_unlock(&xfrm_state_afinfo_lock);
1da177e4
LT
1573 return afinfo;
1574}
1575
1576static void xfrm_state_put_afinfo(struct xfrm_state_afinfo *afinfo)
1577{
546be240 1578 read_unlock(&xfrm_state_afinfo_lock);
1da177e4
LT
1579}
1580
1581/* Temporarily located here until net/xfrm/xfrm_tunnel.c is created */
1582void xfrm_state_delete_tunnel(struct xfrm_state *x)
1583{
1584 if (x->tunnel) {
1585 struct xfrm_state *t = x->tunnel;
1586
1587 if (atomic_read(&t->tunnel_users) == 2)
1588 xfrm_state_delete(t);
1589 atomic_dec(&t->tunnel_users);
1590 xfrm_state_put(t);
1591 x->tunnel = NULL;
1592 }
1593}
1594EXPORT_SYMBOL(xfrm_state_delete_tunnel);
1595
80b30c10
HX
1596/*
1597 * This function is NOT optimal. For example, with ESP it will give an
1598 * MTU that's usually two bytes short of being optimal. However, it will
1599 * usually give an answer that's a multiple of 4 provided the input is
1600 * also a multiple of 4.
1601 */
1da177e4
LT
1602int xfrm_state_mtu(struct xfrm_state *x, int mtu)
1603{
1604 int res = mtu;
1605
1606 res -= x->props.header_len;
1607
1608 for (;;) {
1609 int m = res;
1610
1611 if (m < 68)
1612 return 68;
1613
1614 spin_lock_bh(&x->lock);
1615 if (x->km.state == XFRM_STATE_VALID &&
1616 x->type && x->type->get_max_size)
1617 m = x->type->get_max_size(x, m);
1618 else
1619 m += x->props.header_len;
1620 spin_unlock_bh(&x->lock);
1621
1622 if (m <= mtu)
1623 break;
1624 res -= (m - mtu);
1625 }
1626
1627 return res;
1628}
1629
72cb6962
HX
1630int xfrm_init_state(struct xfrm_state *x)
1631{
d094cd83
HX
1632 struct xfrm_state_afinfo *afinfo;
1633 int family = x->props.family;
72cb6962
HX
1634 int err;
1635
d094cd83
HX
1636 err = -EAFNOSUPPORT;
1637 afinfo = xfrm_state_get_afinfo(family);
1638 if (!afinfo)
1639 goto error;
1640
1641 err = 0;
1642 if (afinfo->init_flags)
1643 err = afinfo->init_flags(x);
1644
1645 xfrm_state_put_afinfo(afinfo);
1646
1647 if (err)
1648 goto error;
1649
1650 err = -EPROTONOSUPPORT;
1651 x->type = xfrm_get_type(x->id.proto, family);
72cb6962
HX
1652 if (x->type == NULL)
1653 goto error;
1654
1655 err = x->type->init_state(x);
1656 if (err)
1657 goto error;
1658
b59f45d0
HX
1659 x->mode = xfrm_get_mode(x->props.mode, family);
1660 if (x->mode == NULL)
1661 goto error;
1662
72cb6962
HX
1663 x->km.state = XFRM_STATE_VALID;
1664
1665error:
1666 return err;
1667}
1668
1669EXPORT_SYMBOL(xfrm_init_state);
1da177e4
LT
1670
1671void __init xfrm_state_init(void)
1672{
f034b5d4
DM
1673 unsigned int sz;
1674
1675 sz = sizeof(struct hlist_head) * 8;
1676
1677 xfrm_state_bydst = xfrm_state_hash_alloc(sz);
1678 xfrm_state_bysrc = xfrm_state_hash_alloc(sz);
1679 xfrm_state_byspi = xfrm_state_hash_alloc(sz);
1680 if (!xfrm_state_bydst || !xfrm_state_bysrc || !xfrm_state_byspi)
1681 panic("XFRM: Cannot allocate bydst/bysrc/byspi hashes.");
1682 xfrm_state_hmask = ((sz / sizeof(struct hlist_head)) - 1);
1da177e4 1683
1da177e4
LT
1684 INIT_WORK(&xfrm_state_gc_work, xfrm_state_gc_task, NULL);
1685}
1686