]>
Commit | Line | Data |
---|---|---|
e905a9ed | 1 | /* |
1da177e4 LT |
2 | * xfrm4_policy.c |
3 | * | |
4 | * Changes: | |
5 | * Kazunori MIYAZAWA @USAGI | |
6 | * YOSHIFUJI Hideaki @USAGI | |
7 | * Split up af-specific portion | |
e905a9ed | 8 | * |
1da177e4 LT |
9 | */ |
10 | ||
66cdb3ca HX |
11 | #include <linux/err.h> |
12 | #include <linux/kernel.h> | |
aabc9761 | 13 | #include <linux/inetdevice.h> |
cc9ff19d | 14 | #include <linux/if_tunnel.h> |
45ff5a3f | 15 | #include <net/dst.h> |
1da177e4 LT |
16 | #include <net/xfrm.h> |
17 | #include <net/ip.h> | |
18 | ||
1da177e4 LT |
19 | static struct xfrm_policy_afinfo xfrm4_policy_afinfo; |
20 | ||
8f01cb08 DM |
21 | static struct dst_entry *__xfrm4_dst_lookup(struct net *net, struct flowi4 *fl4, |
22 | int tos, | |
23 | const xfrm_address_t *saddr, | |
24 | const xfrm_address_t *daddr) | |
1da177e4 | 25 | { |
66cdb3ca | 26 | struct rtable *rt; |
a1e59abf | 27 | |
8f01cb08 DM |
28 | memset(fl4, 0, sizeof(*fl4)); |
29 | fl4->daddr = daddr->a4; | |
30 | fl4->flowi4_tos = tos; | |
66cdb3ca | 31 | if (saddr) |
8f01cb08 | 32 | fl4->saddr = saddr->a4; |
66cdb3ca | 33 | |
8f01cb08 | 34 | rt = __ip_route_output_key(net, fl4); |
b23dd4fe DM |
35 | if (!IS_ERR(rt)) |
36 | return &rt->dst; | |
37 | ||
38 | return ERR_CAST(rt); | |
66cdb3ca HX |
39 | } |
40 | ||
8f01cb08 DM |
41 | static struct dst_entry *xfrm4_dst_lookup(struct net *net, int tos, |
42 | const xfrm_address_t *saddr, | |
43 | const xfrm_address_t *daddr) | |
44 | { | |
45 | struct flowi4 fl4; | |
46 | ||
47 | return __xfrm4_dst_lookup(net, &fl4, tos, saddr, daddr); | |
48 | } | |
49 | ||
fbda33b2 AD |
50 | static int xfrm4_get_saddr(struct net *net, |
51 | xfrm_address_t *saddr, xfrm_address_t *daddr) | |
66cdb3ca HX |
52 | { |
53 | struct dst_entry *dst; | |
8f01cb08 | 54 | struct flowi4 fl4; |
66cdb3ca | 55 | |
8f01cb08 | 56 | dst = __xfrm4_dst_lookup(net, &fl4, 0, NULL, daddr); |
66cdb3ca HX |
57 | if (IS_ERR(dst)) |
58 | return -EHOSTUNREACH; | |
59 | ||
8f01cb08 | 60 | saddr->a4 = fl4.saddr; |
66cdb3ca HX |
61 | dst_release(dst); |
62 | return 0; | |
a1e59abf PM |
63 | } |
64 | ||
05d84025 | 65 | static int xfrm4_get_tos(const struct flowi *fl) |
1da177e4 | 66 | { |
7e1dc7b6 | 67 | return IPTOS_RT_MASK & fl->u.ip4.flowi4_tos; /* Strip ECN bits */ |
25ee3286 | 68 | } |
1da177e4 | 69 | |
a1b05140 MN |
70 | static int xfrm4_init_path(struct xfrm_dst *path, struct dst_entry *dst, |
71 | int nfheader_len) | |
72 | { | |
73 | return 0; | |
74 | } | |
75 | ||
87c1e12b | 76 | static int xfrm4_fill_dst(struct xfrm_dst *xdst, struct net_device *dev, |
0c7b3eef | 77 | const struct flowi *fl) |
25ee3286 HX |
78 | { |
79 | struct rtable *rt = (struct rtable *)xdst->route; | |
7e1dc7b6 | 80 | const struct flowi4 *fl4 = &fl->u.ip4; |
1da177e4 | 81 | |
b7323396 YZ |
82 | xdst->u.rt.rt_key_dst = fl4->daddr; |
83 | xdst->u.rt.rt_key_src = fl4->saddr; | |
84 | xdst->u.rt.rt_key_tos = fl4->flowi4_tos; | |
85 | xdst->u.rt.rt_route_iif = fl4->flowi4_iif; | |
86 | xdst->u.rt.rt_iif = fl4->flowi4_iif; | |
87 | xdst->u.rt.rt_oif = fl4->flowi4_oif; | |
88 | xdst->u.rt.rt_mark = fl4->flowi4_mark; | |
1da177e4 | 89 | |
25ee3286 HX |
90 | xdst->u.dst.dev = dev; |
91 | dev_hold(dev); | |
43372262 | 92 | |
97bab73f | 93 | rt_transfer_peer(&xdst->u.rt, rt); |
66cdb3ca | 94 | |
25ee3286 HX |
95 | /* Sheit... I remember I did this right. Apparently, |
96 | * it was magically lost, so this code needs audit */ | |
97 | xdst->u.rt.rt_flags = rt->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST | | |
98 | RTCF_LOCAL); | |
99 | xdst->u.rt.rt_type = rt->rt_type; | |
100 | xdst->u.rt.rt_src = rt->rt_src; | |
101 | xdst->u.rt.rt_dst = rt->rt_dst; | |
102 | xdst->u.rt.rt_gateway = rt->rt_gateway; | |
1da177e4 | 103 | |
1da177e4 | 104 | return 0; |
1da177e4 LT |
105 | } |
106 | ||
107 | static void | |
d5422efe | 108 | _decode_session4(struct sk_buff *skb, struct flowi *fl, int reverse) |
1da177e4 | 109 | { |
b71d1d42 | 110 | const struct iphdr *iph = ip_hdr(skb); |
d56f90a7 | 111 | u8 *xprth = skb_network_header(skb) + iph->ihl * 4; |
7e1dc7b6 | 112 | struct flowi4 *fl4 = &fl->u.ip4; |
1da177e4 | 113 | |
7e1dc7b6 DM |
114 | memset(fl4, 0, sizeof(struct flowi4)); |
115 | fl4->flowi4_mark = skb->mark; | |
44b451f1 | 116 | |
56f8a75c | 117 | if (!ip_is_fragment(iph)) { |
1da177e4 LT |
118 | switch (iph->protocol) { |
119 | case IPPROTO_UDP: | |
ba4e58ec | 120 | case IPPROTO_UDPLITE: |
1da177e4 LT |
121 | case IPPROTO_TCP: |
122 | case IPPROTO_SCTP: | |
9e999993 | 123 | case IPPROTO_DCCP: |
c615c9f3 WY |
124 | if (xprth + 4 < skb->data || |
125 | pskb_may_pull(skb, xprth + 4 - skb->data)) { | |
8c689a6e | 126 | __be16 *ports = (__be16 *)xprth; |
1da177e4 | 127 | |
9cce96df DM |
128 | fl4->fl4_sport = ports[!!reverse]; |
129 | fl4->fl4_dport = ports[!reverse]; | |
1da177e4 LT |
130 | } |
131 | break; | |
132 | ||
133 | case IPPROTO_ICMP: | |
134 | if (pskb_may_pull(skb, xprth + 2 - skb->data)) { | |
135 | u8 *icmp = xprth; | |
136 | ||
9cce96df DM |
137 | fl4->fl4_icmp_type = icmp[0]; |
138 | fl4->fl4_icmp_code = icmp[1]; | |
1da177e4 LT |
139 | } |
140 | break; | |
141 | ||
142 | case IPPROTO_ESP: | |
143 | if (pskb_may_pull(skb, xprth + 4 - skb->data)) { | |
4324a174 | 144 | __be32 *ehdr = (__be32 *)xprth; |
1da177e4 | 145 | |
9cce96df | 146 | fl4->fl4_ipsec_spi = ehdr[0]; |
1da177e4 LT |
147 | } |
148 | break; | |
149 | ||
150 | case IPPROTO_AH: | |
151 | if (pskb_may_pull(skb, xprth + 8 - skb->data)) { | |
5e73ea1a | 152 | __be32 *ah_hdr = (__be32 *)xprth; |
1da177e4 | 153 | |
9cce96df | 154 | fl4->fl4_ipsec_spi = ah_hdr[1]; |
1da177e4 LT |
155 | } |
156 | break; | |
157 | ||
158 | case IPPROTO_COMP: | |
159 | if (pskb_may_pull(skb, xprth + 4 - skb->data)) { | |
4324a174 | 160 | __be16 *ipcomp_hdr = (__be16 *)xprth; |
1da177e4 | 161 | |
9cce96df | 162 | fl4->fl4_ipsec_spi = htonl(ntohs(ipcomp_hdr[1])); |
1da177e4 LT |
163 | } |
164 | break; | |
cc9ff19d TT |
165 | |
166 | case IPPROTO_GRE: | |
167 | if (pskb_may_pull(skb, xprth + 12 - skb->data)) { | |
168 | __be16 *greflags = (__be16 *)xprth; | |
169 | __be32 *gre_hdr = (__be32 *)xprth; | |
170 | ||
171 | if (greflags[0] & GRE_KEY) { | |
172 | if (greflags[0] & GRE_CSUM) | |
173 | gre_hdr++; | |
9cce96df | 174 | fl4->fl4_gre_key = gre_hdr[1]; |
cc9ff19d TT |
175 | } |
176 | } | |
177 | break; | |
178 | ||
1da177e4 | 179 | default: |
9cce96df | 180 | fl4->fl4_ipsec_spi = 0; |
1da177e4 | 181 | break; |
3ff50b79 | 182 | } |
1da177e4 | 183 | } |
7e1dc7b6 DM |
184 | fl4->flowi4_proto = iph->protocol; |
185 | fl4->daddr = reverse ? iph->saddr : iph->daddr; | |
186 | fl4->saddr = reverse ? iph->daddr : iph->saddr; | |
187 | fl4->flowi4_tos = iph->tos; | |
1da177e4 LT |
188 | } |
189 | ||
569d3645 | 190 | static inline int xfrm4_garbage_collect(struct dst_ops *ops) |
1da177e4 | 191 | { |
d7c7544c AD |
192 | struct net *net = container_of(ops, struct net, xfrm.xfrm4_dst_ops); |
193 | ||
194 | xfrm4_policy_afinfo.garbage_collect(net); | |
fc66f95c | 195 | return (dst_entries_get_slow(ops) > ops->gc_thresh * 2); |
1da177e4 LT |
196 | } |
197 | ||
198 | static void xfrm4_update_pmtu(struct dst_entry *dst, u32 mtu) | |
199 | { | |
200 | struct xfrm_dst *xdst = (struct xfrm_dst *)dst; | |
201 | struct dst_entry *path = xdst->route; | |
202 | ||
203 | path->ops->update_pmtu(path, mtu); | |
204 | } | |
205 | ||
aabc9761 HX |
206 | static void xfrm4_dst_destroy(struct dst_entry *dst) |
207 | { | |
208 | struct xfrm_dst *xdst = (struct xfrm_dst *)dst; | |
209 | ||
62fa8a84 DM |
210 | dst_destroy_metrics_generic(dst); |
211 | ||
97bab73f DM |
212 | if (rt_has_peer(&xdst->u.rt)) { |
213 | struct inet_peer *peer = rt_peer_ptr(&xdst->u.rt); | |
214 | inet_putpeer(peer); | |
215 | } | |
62fa8a84 | 216 | |
aabc9761 HX |
217 | xfrm_dst_destroy(xdst); |
218 | } | |
219 | ||
220 | static void xfrm4_dst_ifdown(struct dst_entry *dst, struct net_device *dev, | |
221 | int unregister) | |
222 | { | |
aabc9761 HX |
223 | if (!unregister) |
224 | return; | |
225 | ||
aabc9761 HX |
226 | xfrm_dst_ifdown(dst, dev); |
227 | } | |
228 | ||
1da177e4 LT |
229 | static struct dst_ops xfrm4_dst_ops = { |
230 | .family = AF_INET, | |
09640e63 | 231 | .protocol = cpu_to_be16(ETH_P_IP), |
1da177e4 LT |
232 | .gc = xfrm4_garbage_collect, |
233 | .update_pmtu = xfrm4_update_pmtu, | |
62fa8a84 | 234 | .cow_metrics = dst_cow_metrics_generic, |
aabc9761 HX |
235 | .destroy = xfrm4_dst_destroy, |
236 | .ifdown = xfrm4_dst_ifdown, | |
862b82c6 | 237 | .local_out = __ip_local_out, |
1da177e4 | 238 | .gc_thresh = 1024, |
1da177e4 LT |
239 | }; |
240 | ||
241 | static struct xfrm_policy_afinfo xfrm4_policy_afinfo = { | |
242 | .family = AF_INET, | |
1da177e4 LT |
243 | .dst_ops = &xfrm4_dst_ops, |
244 | .dst_lookup = xfrm4_dst_lookup, | |
a1e59abf | 245 | .get_saddr = xfrm4_get_saddr, |
1da177e4 | 246 | .decode_session = _decode_session4, |
25ee3286 | 247 | .get_tos = xfrm4_get_tos, |
a1b05140 | 248 | .init_path = xfrm4_init_path, |
25ee3286 | 249 | .fill_dst = xfrm4_fill_dst, |
2774c131 | 250 | .blackhole_route = ipv4_blackhole_route, |
1da177e4 LT |
251 | }; |
252 | ||
f816700a | 253 | #ifdef CONFIG_SYSCTL |
a44a4a00 NH |
254 | static struct ctl_table xfrm4_policy_table[] = { |
255 | { | |
a44a4a00 | 256 | .procname = "xfrm4_gc_thresh", |
d7c7544c | 257 | .data = &init_net.xfrm.xfrm4_dst_ops.gc_thresh, |
a44a4a00 NH |
258 | .maxlen = sizeof(int), |
259 | .mode = 0644, | |
260 | .proc_handler = proc_dointvec, | |
261 | }, | |
262 | { } | |
263 | }; | |
264 | ||
265 | static struct ctl_table_header *sysctl_hdr; | |
f816700a | 266 | #endif |
a44a4a00 | 267 | |
1da177e4 LT |
268 | static void __init xfrm4_policy_init(void) |
269 | { | |
270 | xfrm_policy_register_afinfo(&xfrm4_policy_afinfo); | |
271 | } | |
272 | ||
273 | static void __exit xfrm4_policy_fini(void) | |
274 | { | |
f816700a | 275 | #ifdef CONFIG_SYSCTL |
a44a4a00 NH |
276 | if (sysctl_hdr) |
277 | unregister_net_sysctl_table(sysctl_hdr); | |
f816700a | 278 | #endif |
1da177e4 LT |
279 | xfrm_policy_unregister_afinfo(&xfrm4_policy_afinfo); |
280 | } | |
281 | ||
a33bc5c1 | 282 | void __init xfrm4_init(int rt_max_size) |
1da177e4 | 283 | { |
a33bc5c1 NH |
284 | /* |
285 | * Select a default value for the gc_thresh based on the main route | |
286 | * table hash size. It seems to me the worst case scenario is when | |
287 | * we have ipsec operating in transport mode, in which we create a | |
288 | * dst_entry per socket. The xfrm gc algorithm starts trying to remove | |
289 | * entries at gc_thresh, and prevents new allocations as 2*gc_thresh | |
290 | * so lets set an initial xfrm gc_thresh value at the rt_max_size/2. | |
291 | * That will let us store an ipsec connection per route table entry, | |
292 | * and start cleaning when were 1/2 full | |
293 | */ | |
294 | xfrm4_dst_ops.gc_thresh = rt_max_size/2; | |
fc66f95c | 295 | dst_entries_init(&xfrm4_dst_ops); |
d7c7544c AD |
296 | |
297 | xfrm4_state_init(); | |
298 | xfrm4_policy_init(); | |
f816700a | 299 | #ifdef CONFIG_SYSCTL |
ec8f23ce EB |
300 | sysctl_hdr = register_net_sysctl(&init_net, "net/ipv4", |
301 | xfrm4_policy_table); | |
f816700a | 302 | #endif |
1da177e4 LT |
303 | } |
304 |