]>
Commit | Line | Data |
---|---|---|
1 | /* | |
2 | * xfrm4_policy.c | |
3 | * | |
4 | * Changes: | |
5 | * Kazunori MIYAZAWA @USAGI | |
6 | * YOSHIFUJI Hideaki @USAGI | |
7 | * Split up af-specific portion | |
8 | * | |
9 | */ | |
10 | ||
11 | #include <linux/err.h> | |
12 | #include <linux/kernel.h> | |
13 | #include <linux/inetdevice.h> | |
14 | #include <linux/if_tunnel.h> | |
15 | #include <net/dst.h> | |
16 | #include <net/xfrm.h> | |
17 | #include <net/ip.h> | |
18 | ||
19 | static struct xfrm_policy_afinfo xfrm4_policy_afinfo; | |
20 | ||
21 | static struct dst_entry *xfrm4_dst_lookup(struct net *net, int tos, | |
22 | const xfrm_address_t *saddr, | |
23 | const xfrm_address_t *daddr) | |
24 | { | |
25 | struct flowi fl = { | |
26 | .fl4_dst = daddr->a4, | |
27 | .fl4_tos = tos, | |
28 | }; | |
29 | struct dst_entry *dst; | |
30 | struct rtable *rt; | |
31 | int err; | |
32 | ||
33 | if (saddr) | |
34 | fl.fl4_src = saddr->a4; | |
35 | ||
36 | err = __ip_route_output_key(net, &rt, &fl); | |
37 | dst = &rt->dst; | |
38 | if (err) | |
39 | dst = ERR_PTR(err); | |
40 | return dst; | |
41 | } | |
42 | ||
43 | static int xfrm4_get_saddr(struct net *net, | |
44 | xfrm_address_t *saddr, xfrm_address_t *daddr) | |
45 | { | |
46 | struct dst_entry *dst; | |
47 | struct rtable *rt; | |
48 | ||
49 | dst = xfrm4_dst_lookup(net, 0, NULL, daddr); | |
50 | if (IS_ERR(dst)) | |
51 | return -EHOSTUNREACH; | |
52 | ||
53 | rt = (struct rtable *)dst; | |
54 | saddr->a4 = rt->rt_src; | |
55 | dst_release(dst); | |
56 | return 0; | |
57 | } | |
58 | ||
59 | static int xfrm4_get_tos(const struct flowi *fl) | |
60 | { | |
61 | return IPTOS_RT_MASK & fl->fl4_tos; /* Strip ECN bits */ | |
62 | } | |
63 | ||
64 | static int xfrm4_init_path(struct xfrm_dst *path, struct dst_entry *dst, | |
65 | int nfheader_len) | |
66 | { | |
67 | return 0; | |
68 | } | |
69 | ||
70 | static int xfrm4_fill_dst(struct xfrm_dst *xdst, struct net_device *dev, | |
71 | const struct flowi *fl) | |
72 | { | |
73 | struct rtable *rt = (struct rtable *)xdst->route; | |
74 | ||
75 | xdst->u.rt.fl = *fl; | |
76 | ||
77 | xdst->u.dst.dev = dev; | |
78 | dev_hold(dev); | |
79 | ||
80 | xdst->u.rt.peer = rt->peer; | |
81 | if (rt->peer) | |
82 | atomic_inc(&rt->peer->refcnt); | |
83 | ||
84 | /* Sheit... I remember I did this right. Apparently, | |
85 | * it was magically lost, so this code needs audit */ | |
86 | xdst->u.rt.rt_flags = rt->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST | | |
87 | RTCF_LOCAL); | |
88 | xdst->u.rt.rt_type = rt->rt_type; | |
89 | xdst->u.rt.rt_src = rt->rt_src; | |
90 | xdst->u.rt.rt_dst = rt->rt_dst; | |
91 | xdst->u.rt.rt_gateway = rt->rt_gateway; | |
92 | xdst->u.rt.rt_spec_dst = rt->rt_spec_dst; | |
93 | ||
94 | return 0; | |
95 | } | |
96 | ||
97 | static void | |
98 | _decode_session4(struct sk_buff *skb, struct flowi *fl, int reverse) | |
99 | { | |
100 | struct iphdr *iph = ip_hdr(skb); | |
101 | u8 *xprth = skb_network_header(skb) + iph->ihl * 4; | |
102 | ||
103 | memset(fl, 0, sizeof(struct flowi)); | |
104 | fl->mark = skb->mark; | |
105 | ||
106 | if (!(iph->frag_off & htons(IP_MF | IP_OFFSET))) { | |
107 | switch (iph->protocol) { | |
108 | case IPPROTO_UDP: | |
109 | case IPPROTO_UDPLITE: | |
110 | case IPPROTO_TCP: | |
111 | case IPPROTO_SCTP: | |
112 | case IPPROTO_DCCP: | |
113 | if (xprth + 4 < skb->data || | |
114 | pskb_may_pull(skb, xprth + 4 - skb->data)) { | |
115 | __be16 *ports = (__be16 *)xprth; | |
116 | ||
117 | fl->fl_ip_sport = ports[!!reverse]; | |
118 | fl->fl_ip_dport = ports[!reverse]; | |
119 | } | |
120 | break; | |
121 | ||
122 | case IPPROTO_ICMP: | |
123 | if (pskb_may_pull(skb, xprth + 2 - skb->data)) { | |
124 | u8 *icmp = xprth; | |
125 | ||
126 | fl->fl_icmp_type = icmp[0]; | |
127 | fl->fl_icmp_code = icmp[1]; | |
128 | } | |
129 | break; | |
130 | ||
131 | case IPPROTO_ESP: | |
132 | if (pskb_may_pull(skb, xprth + 4 - skb->data)) { | |
133 | __be32 *ehdr = (__be32 *)xprth; | |
134 | ||
135 | fl->fl_ipsec_spi = ehdr[0]; | |
136 | } | |
137 | break; | |
138 | ||
139 | case IPPROTO_AH: | |
140 | if (pskb_may_pull(skb, xprth + 8 - skb->data)) { | |
141 | __be32 *ah_hdr = (__be32*)xprth; | |
142 | ||
143 | fl->fl_ipsec_spi = ah_hdr[1]; | |
144 | } | |
145 | break; | |
146 | ||
147 | case IPPROTO_COMP: | |
148 | if (pskb_may_pull(skb, xprth + 4 - skb->data)) { | |
149 | __be16 *ipcomp_hdr = (__be16 *)xprth; | |
150 | ||
151 | fl->fl_ipsec_spi = htonl(ntohs(ipcomp_hdr[1])); | |
152 | } | |
153 | break; | |
154 | ||
155 | case IPPROTO_GRE: | |
156 | if (pskb_may_pull(skb, xprth + 12 - skb->data)) { | |
157 | __be16 *greflags = (__be16 *)xprth; | |
158 | __be32 *gre_hdr = (__be32 *)xprth; | |
159 | ||
160 | if (greflags[0] & GRE_KEY) { | |
161 | if (greflags[0] & GRE_CSUM) | |
162 | gre_hdr++; | |
163 | fl->fl_gre_key = gre_hdr[1]; | |
164 | } | |
165 | } | |
166 | break; | |
167 | ||
168 | default: | |
169 | fl->fl_ipsec_spi = 0; | |
170 | break; | |
171 | } | |
172 | } | |
173 | fl->proto = iph->protocol; | |
174 | fl->fl4_dst = reverse ? iph->saddr : iph->daddr; | |
175 | fl->fl4_src = reverse ? iph->daddr : iph->saddr; | |
176 | fl->fl4_tos = iph->tos; | |
177 | } | |
178 | ||
179 | static inline int xfrm4_garbage_collect(struct dst_ops *ops) | |
180 | { | |
181 | struct net *net = container_of(ops, struct net, xfrm.xfrm4_dst_ops); | |
182 | ||
183 | xfrm4_policy_afinfo.garbage_collect(net); | |
184 | return (dst_entries_get_slow(ops) > ops->gc_thresh * 2); | |
185 | } | |
186 | ||
187 | static void xfrm4_update_pmtu(struct dst_entry *dst, u32 mtu) | |
188 | { | |
189 | struct xfrm_dst *xdst = (struct xfrm_dst *)dst; | |
190 | struct dst_entry *path = xdst->route; | |
191 | ||
192 | path->ops->update_pmtu(path, mtu); | |
193 | } | |
194 | ||
195 | static void xfrm4_dst_destroy(struct dst_entry *dst) | |
196 | { | |
197 | struct xfrm_dst *xdst = (struct xfrm_dst *)dst; | |
198 | ||
199 | dst_destroy_metrics_generic(dst); | |
200 | ||
201 | if (likely(xdst->u.rt.peer)) | |
202 | inet_putpeer(xdst->u.rt.peer); | |
203 | ||
204 | xfrm_dst_destroy(xdst); | |
205 | } | |
206 | ||
207 | static void xfrm4_dst_ifdown(struct dst_entry *dst, struct net_device *dev, | |
208 | int unregister) | |
209 | { | |
210 | if (!unregister) | |
211 | return; | |
212 | ||
213 | xfrm_dst_ifdown(dst, dev); | |
214 | } | |
215 | ||
216 | static struct dst_ops xfrm4_dst_ops = { | |
217 | .family = AF_INET, | |
218 | .protocol = cpu_to_be16(ETH_P_IP), | |
219 | .gc = xfrm4_garbage_collect, | |
220 | .update_pmtu = xfrm4_update_pmtu, | |
221 | .cow_metrics = dst_cow_metrics_generic, | |
222 | .destroy = xfrm4_dst_destroy, | |
223 | .ifdown = xfrm4_dst_ifdown, | |
224 | .local_out = __ip_local_out, | |
225 | .gc_thresh = 1024, | |
226 | }; | |
227 | ||
228 | static struct xfrm_policy_afinfo xfrm4_policy_afinfo = { | |
229 | .family = AF_INET, | |
230 | .dst_ops = &xfrm4_dst_ops, | |
231 | .dst_lookup = xfrm4_dst_lookup, | |
232 | .get_saddr = xfrm4_get_saddr, | |
233 | .decode_session = _decode_session4, | |
234 | .get_tos = xfrm4_get_tos, | |
235 | .init_path = xfrm4_init_path, | |
236 | .fill_dst = xfrm4_fill_dst, | |
237 | .blackhole_route = ipv4_blackhole_route, | |
238 | }; | |
239 | ||
240 | #ifdef CONFIG_SYSCTL | |
241 | static struct ctl_table xfrm4_policy_table[] = { | |
242 | { | |
243 | .procname = "xfrm4_gc_thresh", | |
244 | .data = &init_net.xfrm.xfrm4_dst_ops.gc_thresh, | |
245 | .maxlen = sizeof(int), | |
246 | .mode = 0644, | |
247 | .proc_handler = proc_dointvec, | |
248 | }, | |
249 | { } | |
250 | }; | |
251 | ||
252 | static struct ctl_table_header *sysctl_hdr; | |
253 | #endif | |
254 | ||
255 | static void __init xfrm4_policy_init(void) | |
256 | { | |
257 | xfrm_policy_register_afinfo(&xfrm4_policy_afinfo); | |
258 | } | |
259 | ||
260 | static void __exit xfrm4_policy_fini(void) | |
261 | { | |
262 | #ifdef CONFIG_SYSCTL | |
263 | if (sysctl_hdr) | |
264 | unregister_net_sysctl_table(sysctl_hdr); | |
265 | #endif | |
266 | xfrm_policy_unregister_afinfo(&xfrm4_policy_afinfo); | |
267 | } | |
268 | ||
269 | void __init xfrm4_init(int rt_max_size) | |
270 | { | |
271 | /* | |
272 | * Select a default value for the gc_thresh based on the main route | |
273 | * table hash size. It seems to me the worst case scenario is when | |
274 | * we have ipsec operating in transport mode, in which we create a | |
275 | * dst_entry per socket. The xfrm gc algorithm starts trying to remove | |
276 | * entries at gc_thresh, and prevents new allocations as 2*gc_thresh | |
277 | * so lets set an initial xfrm gc_thresh value at the rt_max_size/2. | |
278 | * That will let us store an ipsec connection per route table entry, | |
279 | * and start cleaning when were 1/2 full | |
280 | */ | |
281 | xfrm4_dst_ops.gc_thresh = rt_max_size/2; | |
282 | dst_entries_init(&xfrm4_dst_ops); | |
283 | ||
284 | xfrm4_state_init(); | |
285 | xfrm4_policy_init(); | |
286 | #ifdef CONFIG_SYSCTL | |
287 | sysctl_hdr = register_net_sysctl_table(&init_net, net_ipv4_ctl_path, | |
288 | xfrm4_policy_table); | |
289 | #endif | |
290 | } | |
291 |