]> git.proxmox.com Git - mirror_ubuntu-jammy-kernel.git/blame - net/netfilter/nf_nat_masquerade.c
Merge tag 'trace-v5.12-rc7' of git://git.kernel.org/pub/scm/linux/kernel/git/rostedt...
[mirror_ubuntu-jammy-kernel.git] / net / netfilter / nf_nat_masquerade.c
CommitLineData
d1aca8ab 1// SPDX-License-Identifier: GPL-2.0
8dd33cc9
AB
2
3#include <linux/types.h>
8dd33cc9
AB
4#include <linux/atomic.h>
5#include <linux/inetdevice.h>
8dd33cc9 6#include <linux/netfilter.h>
8dd33cc9 7#include <linux/netfilter_ipv4.h>
d1aca8ab
FW
8#include <linux/netfilter_ipv6.h>
9
bf8981a2 10#include <net/netfilter/nf_nat_masquerade.h>
d1aca8ab
FW
11
12static DEFINE_MUTEX(masq_mutex);
610a4314 13static unsigned int masq_refcnt __read_mostly;
8dd33cc9
AB
14
15unsigned int
16nf_nat_masquerade_ipv4(struct sk_buff *skb, unsigned int hooknum,
2eb0f624 17 const struct nf_nat_range2 *range,
8dd33cc9
AB
18 const struct net_device *out)
19{
20 struct nf_conn *ct;
21 struct nf_conn_nat *nat;
22 enum ip_conntrack_info ctinfo;
2eb0f624 23 struct nf_nat_range2 newrange;
8dd33cc9
AB
24 const struct rtable *rt;
25 __be32 newsrc, nh;
26
44d6e2f2 27 WARN_ON(hooknum != NF_INET_POST_ROUTING);
8dd33cc9
AB
28
29 ct = nf_ct_get(skb, &ctinfo);
8dd33cc9 30
44d6e2f2
VR
31 WARN_ON(!(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED ||
32 ctinfo == IP_CT_RELATED_REPLY)));
8dd33cc9
AB
33
34 /* Source address is 0.0.0.0 - locally generated packet that is
35 * probably not supposed to be masqueraded.
36 */
37 if (ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3.ip == 0)
38 return NF_ACCEPT;
39
40 rt = skb_rtable(skb);
41 nh = rt_nexthop(rt, ip_hdr(skb)->daddr);
42 newsrc = inet_select_addr(out, nh, RT_SCOPE_UNIVERSE);
43 if (!newsrc) {
44 pr_info("%s ate my IP address\n", out->name);
45 return NF_DROP;
46 }
47
ff459018
FW
48 nat = nf_ct_nat_ext_add(ct);
49 if (nat)
50 nat->masq_index = out->ifindex;
8dd33cc9
AB
51
52 /* Transfer from original range. */
53 memset(&newrange.min_addr, 0, sizeof(newrange.min_addr));
54 memset(&newrange.max_addr, 0, sizeof(newrange.max_addr));
55 newrange.flags = range->flags | NF_NAT_RANGE_MAP_IPS;
56 newrange.min_addr.ip = newsrc;
57 newrange.max_addr.ip = newsrc;
58 newrange.min_proto = range->min_proto;
59 newrange.max_proto = range->max_proto;
60
61 /* Hand modified range to generic setup. */
62 return nf_nat_setup_info(ct, &newrange, NF_NAT_MANIP_SRC);
63}
64EXPORT_SYMBOL_GPL(nf_nat_masquerade_ipv4);
65
66static int device_cmp(struct nf_conn *i, void *ifindex)
67{
68 const struct nf_conn_nat *nat = nfct_nat(i);
69
70 if (!nat)
71 return 0;
8dd33cc9
AB
72 return nat->masq_index == (int)(long)ifindex;
73}
74
75static int masq_device_event(struct notifier_block *this,
76 unsigned long event,
77 void *ptr)
78{
79 const struct net_device *dev = netdev_notifier_info_to_dev(ptr);
80 struct net *net = dev_net(dev);
81
82 if (event == NETDEV_DOWN) {
83 /* Device was downed. Search entire table for
84 * conntracks which were associated with that device,
85 * and forget them.
86 */
8dd33cc9 87
9fd6452d
FW
88 nf_ct_iterate_cleanup_net(net, device_cmp,
89 (void *)(long)dev->ifindex, 0, 0);
8dd33cc9
AB
90 }
91
92 return NOTIFY_DONE;
93}
94
097f95d3
TH
95static int inet_cmp(struct nf_conn *ct, void *ptr)
96{
97 struct in_ifaddr *ifa = (struct in_ifaddr *)ptr;
98 struct net_device *dev = ifa->ifa_dev->dev;
99 struct nf_conntrack_tuple *tuple;
100
101 if (!device_cmp(ct, (void *)(long)dev->ifindex))
102 return 0;
103
104 tuple = &ct->tuplehash[IP_CT_DIR_REPLY].tuple;
105
106 return ifa->ifa_address == tuple->dst.u3.ip;
107}
108
8dd33cc9
AB
109static int masq_inet_event(struct notifier_block *this,
110 unsigned long event,
111 void *ptr)
112{
fbd40ea0 113 struct in_device *idev = ((struct in_ifaddr *)ptr)->ifa_dev;
097f95d3 114 struct net *net = dev_net(idev->dev);
8dd33cc9 115
fbd40ea0
DM
116 /* The masq_dev_notifier will catch the case of the device going
117 * down. So if the inetdev is dead and being destroyed we have
118 * no work to do. Otherwise this is an individual address removal
119 * and we have to perform the flush.
120 */
121 if (idev->dead)
122 return NOTIFY_DONE;
123
097f95d3
TH
124 if (event == NETDEV_DOWN)
125 nf_ct_iterate_cleanup_net(net, inet_cmp, ptr, 0, 0);
126
127 return NOTIFY_DONE;
8dd33cc9
AB
128}
129
130static struct notifier_block masq_dev_notifier = {
131 .notifier_call = masq_device_event,
132};
133
134static struct notifier_block masq_inet_notifier = {
135 .notifier_call = masq_inet_event,
136};
137
d1aca8ab
FW
138#if IS_ENABLED(CONFIG_IPV6)
139static atomic_t v6_worker_count __read_mostly;
140
141static int
142nat_ipv6_dev_get_saddr(struct net *net, const struct net_device *dev,
143 const struct in6_addr *daddr, unsigned int srcprefs,
144 struct in6_addr *saddr)
145{
146#ifdef CONFIG_IPV6_MODULE
147 const struct nf_ipv6_ops *v6_ops = nf_get_ipv6_ops();
148
149 if (!v6_ops)
150 return -EHOSTUNREACH;
151
152 return v6_ops->dev_get_saddr(net, dev, daddr, srcprefs, saddr);
153#else
154 return ipv6_dev_get_saddr(net, dev, daddr, srcprefs, saddr);
155#endif
156}
157
158unsigned int
159nf_nat_masquerade_ipv6(struct sk_buff *skb, const struct nf_nat_range2 *range,
160 const struct net_device *out)
161{
162 enum ip_conntrack_info ctinfo;
163 struct nf_conn_nat *nat;
164 struct in6_addr src;
165 struct nf_conn *ct;
166 struct nf_nat_range2 newrange;
167
168 ct = nf_ct_get(skb, &ctinfo);
169 WARN_ON(!(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED ||
170 ctinfo == IP_CT_RELATED_REPLY)));
171
172 if (nat_ipv6_dev_get_saddr(nf_ct_net(ct), out,
173 &ipv6_hdr(skb)->daddr, 0, &src) < 0)
174 return NF_DROP;
175
176 nat = nf_ct_nat_ext_add(ct);
177 if (nat)
178 nat->masq_index = out->ifindex;
179
180 newrange.flags = range->flags | NF_NAT_RANGE_MAP_IPS;
181 newrange.min_addr.in6 = src;
182 newrange.max_addr.in6 = src;
183 newrange.min_proto = range->min_proto;
184 newrange.max_proto = range->max_proto;
185
186 return nf_nat_setup_info(ct, &newrange, NF_NAT_MANIP_SRC);
187}
188EXPORT_SYMBOL_GPL(nf_nat_masquerade_ipv6);
189
190struct masq_dev_work {
191 struct work_struct work;
192 struct net *net;
193 struct in6_addr addr;
194 int ifindex;
195};
196
197static int inet6_cmp(struct nf_conn *ct, void *work)
198{
199 struct masq_dev_work *w = (struct masq_dev_work *)work;
200 struct nf_conntrack_tuple *tuple;
201
202 if (!device_cmp(ct, (void *)(long)w->ifindex))
203 return 0;
204
205 tuple = &ct->tuplehash[IP_CT_DIR_REPLY].tuple;
206
207 return ipv6_addr_equal(&w->addr, &tuple->dst.u3.in6);
208}
209
210static void iterate_cleanup_work(struct work_struct *work)
211{
212 struct masq_dev_work *w;
213
214 w = container_of(work, struct masq_dev_work, work);
215
216 nf_ct_iterate_cleanup_net(w->net, inet6_cmp, (void *)w, 0, 0);
217
218 put_net(w->net);
219 kfree(w);
220 atomic_dec(&v6_worker_count);
221 module_put(THIS_MODULE);
222}
223
224/* atomic notifier; can't call nf_ct_iterate_cleanup_net (it can sleep).
225 *
226 * Defer it to the system workqueue.
227 *
228 * As we can have 'a lot' of inet_events (depending on amount of ipv6
229 * addresses being deleted), we also need to limit work item queue.
230 */
231static int masq_inet6_event(struct notifier_block *this,
232 unsigned long event, void *ptr)
233{
234 struct inet6_ifaddr *ifa = ptr;
235 const struct net_device *dev;
236 struct masq_dev_work *w;
237 struct net *net;
238
239 if (event != NETDEV_DOWN || atomic_read(&v6_worker_count) >= 16)
240 return NOTIFY_DONE;
241
242 dev = ifa->idev->dev;
243 net = maybe_get_net(dev_net(dev));
244 if (!net)
245 return NOTIFY_DONE;
246
247 if (!try_module_get(THIS_MODULE))
248 goto err_module;
249
250 w = kmalloc(sizeof(*w), GFP_ATOMIC);
251 if (w) {
252 atomic_inc(&v6_worker_count);
253
254 INIT_WORK(&w->work, iterate_cleanup_work);
255 w->ifindex = dev->ifindex;
256 w->net = net;
257 w->addr = ifa->addr;
258 schedule_work(&w->work);
259
260 return NOTIFY_DONE;
261 }
262
263 module_put(THIS_MODULE);
264 err_module:
265 put_net(net);
266 return NOTIFY_DONE;
267}
268
269static struct notifier_block masq_inet6_notifier = {
270 .notifier_call = masq_inet6_event,
271};
272
610a4314
FW
273static int nf_nat_masquerade_ipv6_register_notifier(void)
274{
275 return register_inet6addr_notifier(&masq_inet6_notifier);
276}
277#else
278static inline int nf_nat_masquerade_ipv6_register_notifier(void) { return 0; }
279#endif
280
281int nf_nat_masquerade_inet_register_notifiers(void)
d1aca8ab
FW
282{
283 int ret = 0;
284
285 mutex_lock(&masq_mutex);
610a4314 286 if (WARN_ON_ONCE(masq_refcnt == UINT_MAX)) {
46f7487e 287 ret = -EOVERFLOW;
d1aca8ab 288 goto out_unlock;
46f7487e 289 }
d1aca8ab 290
610a4314
FW
291 /* check if the notifier was already set */
292 if (++masq_refcnt > 1)
46f7487e 293 goto out_unlock;
d1aca8ab 294
610a4314
FW
295 /* Register for device down reports */
296 ret = register_netdevice_notifier(&masq_dev_notifier);
d1aca8ab 297 if (ret)
46f7487e 298 goto err_dec;
610a4314
FW
299 /* Register IP address change reports */
300 ret = register_inetaddr_notifier(&masq_inet_notifier);
301 if (ret)
302 goto err_unregister;
303
304 ret = nf_nat_masquerade_ipv6_register_notifier();
305 if (ret)
306 goto err_unreg_inet;
d1aca8ab
FW
307
308 mutex_unlock(&masq_mutex);
309 return ret;
610a4314
FW
310err_unreg_inet:
311 unregister_inetaddr_notifier(&masq_inet_notifier);
312err_unregister:
313 unregister_netdevice_notifier(&masq_dev_notifier);
d1aca8ab 314err_dec:
610a4314 315 masq_refcnt--;
d1aca8ab
FW
316out_unlock:
317 mutex_unlock(&masq_mutex);
318 return ret;
319}
610a4314 320EXPORT_SYMBOL_GPL(nf_nat_masquerade_inet_register_notifiers);
d1aca8ab 321
610a4314 322void nf_nat_masquerade_inet_unregister_notifiers(void)
d1aca8ab
FW
323{
324 mutex_lock(&masq_mutex);
610a4314
FW
325 /* check if the notifiers still have clients */
326 if (--masq_refcnt > 0)
d1aca8ab
FW
327 goto out_unlock;
328
610a4314
FW
329 unregister_netdevice_notifier(&masq_dev_notifier);
330 unregister_inetaddr_notifier(&masq_inet_notifier);
331#if IS_ENABLED(CONFIG_IPV6)
d1aca8ab 332 unregister_inet6addr_notifier(&masq_inet6_notifier);
610a4314 333#endif
d1aca8ab
FW
334out_unlock:
335 mutex_unlock(&masq_mutex);
336}
610a4314 337EXPORT_SYMBOL_GPL(nf_nat_masquerade_inet_unregister_notifiers);