2 * IPVS An implementation of the IP virtual server support for the
3 * LINUX operating system. IPVS is now implemented as a module
4 * over the NetFilter framework. IPVS can be used to build a
5 * high-performance and highly available server based on a
8 * Authors: Wensong Zhang <wensong@linuxvirtualserver.org>
9 * Peter Kese <peter.kese@ijs.si>
10 * Julian Anastasov <ja@ssi.bg>
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation; either version
15 * 2 of the License, or (at your option) any later version.
21 #define KMSG_COMPONENT "IPVS"
22 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
24 #include <linux/module.h>
25 #include <linux/init.h>
26 #include <linux/types.h>
27 #include <linux/capability.h>
29 #include <linux/sysctl.h>
30 #include <linux/proc_fs.h>
31 #include <linux/workqueue.h>
32 #include <linux/swap.h>
33 #include <linux/seq_file.h>
34 #include <linux/slab.h>
36 #include <linux/netfilter.h>
37 #include <linux/netfilter_ipv4.h>
38 #include <linux/mutex.h>
40 #include <net/net_namespace.h>
41 #include <linux/nsproxy.h>
43 #ifdef CONFIG_IP_VS_IPV6
45 #include <net/ip6_route.h>
47 #include <net/route.h>
49 #include <net/genetlink.h>
51 #include <asm/uaccess.h>
53 #include <net/ip_vs.h>
55 /* semaphore for IPVS sockopts. And, [gs]etsockopt may sleep. */
56 static DEFINE_MUTEX(__ip_vs_mutex
);
58 /* lock for service table */
59 static DEFINE_RWLOCK(__ip_vs_svc_lock
);
61 /* sysctl variables */
63 #ifdef CONFIG_IP_VS_DEBUG
64 static int sysctl_ip_vs_debug_level
= 0;
66 int ip_vs_get_debug_level(void)
68 return sysctl_ip_vs_debug_level
;
74 static void __ip_vs_del_service(struct ip_vs_service
*svc
);
77 #ifdef CONFIG_IP_VS_IPV6
78 /* Taken from rt6_fill_node() in net/ipv6/route.c, is there a better way? */
79 static bool __ip_vs_addr_is_local_v6(struct net
*net
,
80 const struct in6_addr
*addr
)
85 struct dst_entry
*dst
= ip6_route_output(net
, NULL
, &fl6
);
88 is_local
= !dst
->error
&& dst
->dev
&& (dst
->dev
->flags
& IFF_LOOPBACK
);
97 * update_defense_level is called from keventd and from sysctl,
98 * so it needs to protect itself from softirqs
100 static void update_defense_level(struct netns_ipvs
*ipvs
)
103 static int old_secure_tcp
= 0;
108 /* we only count free and buffered memory (in pages) */
110 availmem
= i
.freeram
+ i
.bufferram
;
111 /* however in linux 2.5 the i.bufferram is total page cache size,
113 /* si_swapinfo(&i); */
114 /* availmem = availmem - (i.totalswap - i.freeswap); */
116 nomem
= (availmem
< ipvs
->sysctl_amemthresh
);
121 spin_lock(&ipvs
->dropentry_lock
);
122 switch (ipvs
->sysctl_drop_entry
) {
124 atomic_set(&ipvs
->dropentry
, 0);
128 atomic_set(&ipvs
->dropentry
, 1);
129 ipvs
->sysctl_drop_entry
= 2;
131 atomic_set(&ipvs
->dropentry
, 0);
136 atomic_set(&ipvs
->dropentry
, 1);
138 atomic_set(&ipvs
->dropentry
, 0);
139 ipvs
->sysctl_drop_entry
= 1;
143 atomic_set(&ipvs
->dropentry
, 1);
146 spin_unlock(&ipvs
->dropentry_lock
);
149 spin_lock(&ipvs
->droppacket_lock
);
150 switch (ipvs
->sysctl_drop_packet
) {
156 ipvs
->drop_rate
= ipvs
->drop_counter
157 = ipvs
->sysctl_amemthresh
/
158 (ipvs
->sysctl_amemthresh
-availmem
);
159 ipvs
->sysctl_drop_packet
= 2;
166 ipvs
->drop_rate
= ipvs
->drop_counter
167 = ipvs
->sysctl_amemthresh
/
168 (ipvs
->sysctl_amemthresh
-availmem
);
171 ipvs
->sysctl_drop_packet
= 1;
175 ipvs
->drop_rate
= ipvs
->sysctl_am_droprate
;
178 spin_unlock(&ipvs
->droppacket_lock
);
181 spin_lock(&ipvs
->securetcp_lock
);
182 switch (ipvs
->sysctl_secure_tcp
) {
184 if (old_secure_tcp
>= 2)
189 if (old_secure_tcp
< 2)
191 ipvs
->sysctl_secure_tcp
= 2;
193 if (old_secure_tcp
>= 2)
199 if (old_secure_tcp
< 2)
202 if (old_secure_tcp
>= 2)
204 ipvs
->sysctl_secure_tcp
= 1;
208 if (old_secure_tcp
< 2)
212 old_secure_tcp
= ipvs
->sysctl_secure_tcp
;
214 ip_vs_protocol_timeout_change(ipvs
,
215 ipvs
->sysctl_secure_tcp
> 1);
216 spin_unlock(&ipvs
->securetcp_lock
);
223 * Timer for checking the defense
225 #define DEFENSE_TIMER_PERIOD 1*HZ
227 static void defense_work_handler(struct work_struct
*work
)
229 struct netns_ipvs
*ipvs
=
230 container_of(work
, struct netns_ipvs
, defense_work
.work
);
232 update_defense_level(ipvs
);
233 if (atomic_read(&ipvs
->dropentry
))
234 ip_vs_random_dropentry(ipvs
->net
);
235 schedule_delayed_work(&ipvs
->defense_work
, DEFENSE_TIMER_PERIOD
);
240 ip_vs_use_count_inc(void)
242 return try_module_get(THIS_MODULE
);
246 ip_vs_use_count_dec(void)
248 module_put(THIS_MODULE
);
253 * Hash table: for virtual service lookups
255 #define IP_VS_SVC_TAB_BITS 8
256 #define IP_VS_SVC_TAB_SIZE (1 << IP_VS_SVC_TAB_BITS)
257 #define IP_VS_SVC_TAB_MASK (IP_VS_SVC_TAB_SIZE - 1)
259 /* the service table hashed by <protocol, addr, port> */
260 static struct list_head ip_vs_svc_table
[IP_VS_SVC_TAB_SIZE
];
261 /* the service table hashed by fwmark */
262 static struct list_head ip_vs_svc_fwm_table
[IP_VS_SVC_TAB_SIZE
];
266 * Returns hash value for virtual service
268 static inline unsigned int
269 ip_vs_svc_hashkey(struct net
*net
, int af
, unsigned int proto
,
270 const union nf_inet_addr
*addr
, __be16 port
)
272 register unsigned int porth
= ntohs(port
);
273 __be32 addr_fold
= addr
->ip
;
276 #ifdef CONFIG_IP_VS_IPV6
278 addr_fold
= addr
->ip6
[0]^addr
->ip6
[1]^
279 addr
->ip6
[2]^addr
->ip6
[3];
281 ahash
= ntohl(addr_fold
);
282 ahash
^= ((size_t) net
>> 8);
284 return (proto
^ ahash
^ (porth
>> IP_VS_SVC_TAB_BITS
) ^ porth
) &
289 * Returns hash value of fwmark for virtual service lookup
291 static inline unsigned int ip_vs_svc_fwm_hashkey(struct net
*net
, __u32 fwmark
)
293 return (((size_t)net
>>8) ^ fwmark
) & IP_VS_SVC_TAB_MASK
;
297 * Hashes a service in the ip_vs_svc_table by <netns,proto,addr,port>
298 * or in the ip_vs_svc_fwm_table by fwmark.
299 * Should be called with locked tables.
301 static int ip_vs_svc_hash(struct ip_vs_service
*svc
)
305 if (svc
->flags
& IP_VS_SVC_F_HASHED
) {
306 pr_err("%s(): request for already hashed, called from %pF\n",
307 __func__
, __builtin_return_address(0));
311 if (svc
->fwmark
== 0) {
313 * Hash it by <netns,protocol,addr,port> in ip_vs_svc_table
315 hash
= ip_vs_svc_hashkey(svc
->net
, svc
->af
, svc
->protocol
,
316 &svc
->addr
, svc
->port
);
317 list_add(&svc
->s_list
, &ip_vs_svc_table
[hash
]);
320 * Hash it by fwmark in svc_fwm_table
322 hash
= ip_vs_svc_fwm_hashkey(svc
->net
, svc
->fwmark
);
323 list_add(&svc
->f_list
, &ip_vs_svc_fwm_table
[hash
]);
326 svc
->flags
|= IP_VS_SVC_F_HASHED
;
327 /* increase its refcnt because it is referenced by the svc table */
328 atomic_inc(&svc
->refcnt
);
334 * Unhashes a service from svc_table / svc_fwm_table.
335 * Should be called with locked tables.
337 static int ip_vs_svc_unhash(struct ip_vs_service
*svc
)
339 if (!(svc
->flags
& IP_VS_SVC_F_HASHED
)) {
340 pr_err("%s(): request for unhash flagged, called from %pF\n",
341 __func__
, __builtin_return_address(0));
345 if (svc
->fwmark
== 0) {
346 /* Remove it from the svc_table table */
347 list_del(&svc
->s_list
);
349 /* Remove it from the svc_fwm_table table */
350 list_del(&svc
->f_list
);
353 svc
->flags
&= ~IP_VS_SVC_F_HASHED
;
354 atomic_dec(&svc
->refcnt
);
360 * Get service by {netns, proto,addr,port} in the service table.
362 static inline struct ip_vs_service
*
363 __ip_vs_service_find(struct net
*net
, int af
, __u16 protocol
,
364 const union nf_inet_addr
*vaddr
, __be16 vport
)
367 struct ip_vs_service
*svc
;
369 /* Check for "full" addressed entries */
370 hash
= ip_vs_svc_hashkey(net
, af
, protocol
, vaddr
, vport
);
372 list_for_each_entry(svc
, &ip_vs_svc_table
[hash
], s_list
){
374 && ip_vs_addr_equal(af
, &svc
->addr
, vaddr
)
375 && (svc
->port
== vport
)
376 && (svc
->protocol
== protocol
)
377 && net_eq(svc
->net
, net
)) {
388 * Get service by {fwmark} in the service table.
390 static inline struct ip_vs_service
*
391 __ip_vs_svc_fwm_find(struct net
*net
, int af
, __u32 fwmark
)
394 struct ip_vs_service
*svc
;
396 /* Check for fwmark addressed entries */
397 hash
= ip_vs_svc_fwm_hashkey(net
, fwmark
);
399 list_for_each_entry(svc
, &ip_vs_svc_fwm_table
[hash
], f_list
) {
400 if (svc
->fwmark
== fwmark
&& svc
->af
== af
401 && net_eq(svc
->net
, net
)) {
410 struct ip_vs_service
*
411 ip_vs_service_get(struct net
*net
, int af
, __u32 fwmark
, __u16 protocol
,
412 const union nf_inet_addr
*vaddr
, __be16 vport
)
414 struct ip_vs_service
*svc
;
415 struct netns_ipvs
*ipvs
= net_ipvs(net
);
417 read_lock(&__ip_vs_svc_lock
);
420 * Check the table hashed by fwmark first
423 svc
= __ip_vs_svc_fwm_find(net
, af
, fwmark
);
429 * Check the table hashed by <protocol,addr,port>
430 * for "full" addressed entries
432 svc
= __ip_vs_service_find(net
, af
, protocol
, vaddr
, vport
);
435 && protocol
== IPPROTO_TCP
436 && atomic_read(&ipvs
->ftpsvc_counter
)
437 && (vport
== FTPDATA
|| ntohs(vport
) >= PROT_SOCK
)) {
439 * Check if ftp service entry exists, the packet
440 * might belong to FTP data connections.
442 svc
= __ip_vs_service_find(net
, af
, protocol
, vaddr
, FTPPORT
);
446 && atomic_read(&ipvs
->nullsvc_counter
)) {
448 * Check if the catch-all port (port zero) exists
450 svc
= __ip_vs_service_find(net
, af
, protocol
, vaddr
, 0);
455 atomic_inc(&svc
->usecnt
);
456 read_unlock(&__ip_vs_svc_lock
);
458 IP_VS_DBG_BUF(9, "lookup service: fwm %u %s %s:%u %s\n",
459 fwmark
, ip_vs_proto_name(protocol
),
460 IP_VS_DBG_ADDR(af
, vaddr
), ntohs(vport
),
461 svc
? "hit" : "not hit");
468 __ip_vs_bind_svc(struct ip_vs_dest
*dest
, struct ip_vs_service
*svc
)
470 atomic_inc(&svc
->refcnt
);
475 __ip_vs_unbind_svc(struct ip_vs_dest
*dest
)
477 struct ip_vs_service
*svc
= dest
->svc
;
480 if (atomic_dec_and_test(&svc
->refcnt
)) {
481 IP_VS_DBG_BUF(3, "Removing service %u/%s:%u usecnt=%d\n",
483 IP_VS_DBG_ADDR(svc
->af
, &svc
->addr
),
484 ntohs(svc
->port
), atomic_read(&svc
->usecnt
));
485 free_percpu(svc
->stats
.cpustats
);
492 * Returns hash value for real service
494 static inline unsigned int ip_vs_rs_hashkey(int af
,
495 const union nf_inet_addr
*addr
,
498 register unsigned int porth
= ntohs(port
);
499 __be32 addr_fold
= addr
->ip
;
501 #ifdef CONFIG_IP_VS_IPV6
503 addr_fold
= addr
->ip6
[0]^addr
->ip6
[1]^
504 addr
->ip6
[2]^addr
->ip6
[3];
507 return (ntohl(addr_fold
)^(porth
>>IP_VS_RTAB_BITS
)^porth
)
511 /* Hash ip_vs_dest in rs_table by <proto,addr,port>. */
512 static void ip_vs_rs_hash(struct netns_ipvs
*ipvs
, struct ip_vs_dest
*dest
)
516 if (dest
->in_rs_table
)
520 * Hash by proto,addr,port,
521 * which are the parameters of the real service.
523 hash
= ip_vs_rs_hashkey(dest
->af
, &dest
->addr
, dest
->port
);
525 hlist_add_head_rcu(&dest
->d_list
, &ipvs
->rs_table
[hash
]);
526 dest
->in_rs_table
= 1;
529 /* Unhash ip_vs_dest from rs_table. */
530 static void ip_vs_rs_unhash(struct ip_vs_dest
*dest
)
533 * Remove it from the rs_table table.
535 if (dest
->in_rs_table
) {
536 hlist_del_rcu(&dest
->d_list
);
537 dest
->in_rs_table
= 0;
541 /* Check if real service by <proto,addr,port> is present */
542 bool ip_vs_has_real_service(struct net
*net
, int af
, __u16 protocol
,
543 const union nf_inet_addr
*daddr
, __be16 dport
)
545 struct netns_ipvs
*ipvs
= net_ipvs(net
);
547 struct ip_vs_dest
*dest
;
549 /* Check for "full" addressed entries */
550 hash
= ip_vs_rs_hashkey(af
, daddr
, dport
);
553 hlist_for_each_entry_rcu(dest
, &ipvs
->rs_table
[hash
], d_list
) {
554 if (dest
->port
== dport
&&
556 ip_vs_addr_equal(af
, &dest
->addr
, daddr
) &&
557 (dest
->protocol
== protocol
|| dest
->vfwmark
)) {
569 * Lookup destination by {addr,port} in the given service
571 static struct ip_vs_dest
*
572 ip_vs_lookup_dest(struct ip_vs_service
*svc
, const union nf_inet_addr
*daddr
,
575 struct ip_vs_dest
*dest
;
578 * Find the destination for the given service
580 list_for_each_entry(dest
, &svc
->destinations
, n_list
) {
581 if ((dest
->af
== svc
->af
)
582 && ip_vs_addr_equal(svc
->af
, &dest
->addr
, daddr
)
583 && (dest
->port
== dport
)) {
593 * Find destination by {daddr,dport,vaddr,protocol}
594 * Cretaed to be used in ip_vs_process_message() in
595 * the backup synchronization daemon. It finds the
596 * destination to be bound to the received connection
599 struct ip_vs_dest
*ip_vs_find_dest(struct net
*net
, int af
,
600 const union nf_inet_addr
*daddr
,
602 const union nf_inet_addr
*vaddr
,
603 __be16 vport
, __u16 protocol
, __u32 fwmark
,
606 struct ip_vs_dest
*dest
;
607 struct ip_vs_service
*svc
;
610 svc
= ip_vs_service_get(net
, af
, fwmark
, protocol
, vaddr
, vport
);
613 if (fwmark
&& (flags
& IP_VS_CONN_F_FWD_MASK
) != IP_VS_CONN_F_MASQ
)
615 dest
= ip_vs_lookup_dest(svc
, daddr
, port
);
617 dest
= ip_vs_lookup_dest(svc
, daddr
, port
^ dport
);
619 ip_vs_dest_hold(dest
);
620 ip_vs_service_put(svc
);
624 void ip_vs_dest_dst_rcu_free(struct rcu_head
*head
)
626 struct ip_vs_dest_dst
*dest_dst
= container_of(head
,
627 struct ip_vs_dest_dst
,
630 dst_release(dest_dst
->dst_cache
);
634 /* Release dest_dst and dst_cache for dest in user context */
635 static void __ip_vs_dst_cache_reset(struct ip_vs_dest
*dest
)
637 struct ip_vs_dest_dst
*old
;
639 old
= rcu_dereference_protected(dest
->dest_dst
, 1);
641 RCU_INIT_POINTER(dest
->dest_dst
, NULL
);
642 call_rcu(&old
->rcu_head
, ip_vs_dest_dst_rcu_free
);
647 * Lookup dest by {svc,addr,port} in the destination trash.
648 * The destination trash is used to hold the destinations that are removed
649 * from the service table but are still referenced by some conn entries.
650 * The reason to add the destination trash is when the dest is temporary
651 * down (either by administrator or by monitor program), the dest can be
652 * picked back from the trash, the remaining connections to the dest can
653 * continue, and the counting information of the dest is also useful for
656 static struct ip_vs_dest
*
657 ip_vs_trash_get_dest(struct ip_vs_service
*svc
, const union nf_inet_addr
*daddr
,
660 struct ip_vs_dest
*dest
, *nxt
;
661 struct netns_ipvs
*ipvs
= net_ipvs(svc
->net
);
664 * Find the destination in trash
666 list_for_each_entry_safe(dest
, nxt
, &ipvs
->dest_trash
, n_list
) {
667 IP_VS_DBG_BUF(3, "Destination %u/%s:%u still in trash, "
670 IP_VS_DBG_ADDR(svc
->af
, &dest
->addr
),
672 atomic_read(&dest
->refcnt
));
673 if (dest
->af
== svc
->af
&&
674 ip_vs_addr_equal(svc
->af
, &dest
->addr
, daddr
) &&
675 dest
->port
== dport
&&
676 dest
->vfwmark
== svc
->fwmark
&&
677 dest
->protocol
== svc
->protocol
&&
679 (ip_vs_addr_equal(svc
->af
, &dest
->vaddr
, &svc
->addr
) &&
680 dest
->vport
== svc
->port
))) {
686 * Try to purge the destination from trash if not referenced
688 if (atomic_read(&dest
->refcnt
) == 1) {
689 IP_VS_DBG_BUF(3, "Removing destination %u/%s:%u "
692 IP_VS_DBG_ADDR(svc
->af
, &dest
->addr
),
694 list_del(&dest
->n_list
);
695 __ip_vs_dst_cache_reset(dest
);
696 __ip_vs_unbind_svc(dest
);
697 free_percpu(dest
->stats
.cpustats
);
698 kfree_rcu(dest
, rcu_head
);
707 * Clean up all the destinations in the trash
708 * Called by the ip_vs_control_cleanup()
710 * When the ip_vs_control_clearup is activated by ipvs module exit,
711 * the service tables must have been flushed and all the connections
712 * are expired, and the refcnt of each destination in the trash must
713 * be 1, so we simply release them here.
715 static void ip_vs_trash_cleanup(struct net
*net
)
717 struct ip_vs_dest
*dest
, *nxt
;
718 struct netns_ipvs
*ipvs
= net_ipvs(net
);
720 list_for_each_entry_safe(dest
, nxt
, &ipvs
->dest_trash
, n_list
) {
721 list_del(&dest
->n_list
);
722 __ip_vs_dst_cache_reset(dest
);
723 __ip_vs_unbind_svc(dest
);
724 free_percpu(dest
->stats
.cpustats
);
725 kfree_rcu(dest
, rcu_head
);
730 ip_vs_copy_stats(struct ip_vs_stats_user
*dst
, struct ip_vs_stats
*src
)
732 #define IP_VS_SHOW_STATS_COUNTER(c) dst->c = src->ustats.c - src->ustats0.c
734 spin_lock_bh(&src
->lock
);
736 IP_VS_SHOW_STATS_COUNTER(conns
);
737 IP_VS_SHOW_STATS_COUNTER(inpkts
);
738 IP_VS_SHOW_STATS_COUNTER(outpkts
);
739 IP_VS_SHOW_STATS_COUNTER(inbytes
);
740 IP_VS_SHOW_STATS_COUNTER(outbytes
);
742 ip_vs_read_estimator(dst
, src
);
744 spin_unlock_bh(&src
->lock
);
748 ip_vs_zero_stats(struct ip_vs_stats
*stats
)
750 spin_lock_bh(&stats
->lock
);
752 /* get current counters as zero point, rates are zeroed */
754 #define IP_VS_ZERO_STATS_COUNTER(c) stats->ustats0.c = stats->ustats.c
756 IP_VS_ZERO_STATS_COUNTER(conns
);
757 IP_VS_ZERO_STATS_COUNTER(inpkts
);
758 IP_VS_ZERO_STATS_COUNTER(outpkts
);
759 IP_VS_ZERO_STATS_COUNTER(inbytes
);
760 IP_VS_ZERO_STATS_COUNTER(outbytes
);
762 ip_vs_zero_estimator(stats
);
764 spin_unlock_bh(&stats
->lock
);
768 * Update a destination in the given service
771 __ip_vs_update_dest(struct ip_vs_service
*svc
, struct ip_vs_dest
*dest
,
772 struct ip_vs_dest_user_kern
*udest
, int add
)
774 struct netns_ipvs
*ipvs
= net_ipvs(svc
->net
);
777 /* set the weight and the flags */
778 atomic_set(&dest
->weight
, udest
->weight
);
779 conn_flags
= udest
->conn_flags
& IP_VS_CONN_F_DEST_MASK
;
780 conn_flags
|= IP_VS_CONN_F_INACTIVE
;
782 /* set the IP_VS_CONN_F_NOOUTPUT flag if not masquerading/NAT */
783 if ((conn_flags
& IP_VS_CONN_F_FWD_MASK
) != IP_VS_CONN_F_MASQ
) {
784 conn_flags
|= IP_VS_CONN_F_NOOUTPUT
;
787 * Put the real service in rs_table if not present.
788 * For now only for NAT!
790 ip_vs_rs_hash(ipvs
, dest
);
792 atomic_set(&dest
->conn_flags
, conn_flags
);
794 /* bind the service */
796 __ip_vs_bind_svc(dest
, svc
);
798 if (dest
->svc
!= svc
) {
799 __ip_vs_unbind_svc(dest
);
800 ip_vs_zero_stats(&dest
->stats
);
801 __ip_vs_bind_svc(dest
, svc
);
805 /* set the dest status flags */
806 dest
->flags
|= IP_VS_DEST_F_AVAILABLE
;
808 if (udest
->u_threshold
== 0 || udest
->u_threshold
> dest
->u_threshold
)
809 dest
->flags
&= ~IP_VS_DEST_F_OVERLOAD
;
810 dest
->u_threshold
= udest
->u_threshold
;
811 dest
->l_threshold
= udest
->l_threshold
;
813 spin_lock_bh(&dest
->dst_lock
);
814 __ip_vs_dst_cache_reset(dest
);
815 spin_unlock_bh(&dest
->dst_lock
);
818 ip_vs_start_estimator(svc
->net
, &dest
->stats
);
820 write_lock_bh(&__ip_vs_svc_lock
);
822 /* Wait until all other svc users go away */
823 IP_VS_WAIT_WHILE(atomic_read(&svc
->usecnt
) > 0);
826 list_add(&dest
->n_list
, &svc
->destinations
);
828 if (svc
->scheduler
->add_dest
)
829 svc
->scheduler
->add_dest(svc
, dest
);
831 if (svc
->scheduler
->upd_dest
)
832 svc
->scheduler
->upd_dest(svc
, dest
);
835 /* call the update_service, because server weight may be changed */
836 if (svc
->scheduler
->update_service
)
837 svc
->scheduler
->update_service(svc
);
839 write_unlock_bh(&__ip_vs_svc_lock
);
844 * Create a destination for the given service
847 ip_vs_new_dest(struct ip_vs_service
*svc
, struct ip_vs_dest_user_kern
*udest
,
848 struct ip_vs_dest
**dest_p
)
850 struct ip_vs_dest
*dest
;
855 #ifdef CONFIG_IP_VS_IPV6
856 if (svc
->af
== AF_INET6
) {
857 atype
= ipv6_addr_type(&udest
->addr
.in6
);
858 if ((!(atype
& IPV6_ADDR_UNICAST
) ||
859 atype
& IPV6_ADDR_LINKLOCAL
) &&
860 !__ip_vs_addr_is_local_v6(svc
->net
, &udest
->addr
.in6
))
865 atype
= inet_addr_type(svc
->net
, udest
->addr
.ip
);
866 if (atype
!= RTN_LOCAL
&& atype
!= RTN_UNICAST
)
870 dest
= kzalloc(sizeof(struct ip_vs_dest
), GFP_KERNEL
);
874 dest
->stats
.cpustats
= alloc_percpu(struct ip_vs_cpu_stats
);
875 if (!dest
->stats
.cpustats
)
879 dest
->protocol
= svc
->protocol
;
880 dest
->vaddr
= svc
->addr
;
881 dest
->vport
= svc
->port
;
882 dest
->vfwmark
= svc
->fwmark
;
883 ip_vs_addr_copy(svc
->af
, &dest
->addr
, &udest
->addr
);
884 dest
->port
= udest
->port
;
886 atomic_set(&dest
->activeconns
, 0);
887 atomic_set(&dest
->inactconns
, 0);
888 atomic_set(&dest
->persistconns
, 0);
889 atomic_set(&dest
->refcnt
, 1);
891 INIT_HLIST_NODE(&dest
->d_list
);
892 spin_lock_init(&dest
->dst_lock
);
893 spin_lock_init(&dest
->stats
.lock
);
894 __ip_vs_update_dest(svc
, dest
, udest
, 1);
908 * Add a destination into an existing service
911 ip_vs_add_dest(struct ip_vs_service
*svc
, struct ip_vs_dest_user_kern
*udest
)
913 struct ip_vs_dest
*dest
;
914 union nf_inet_addr daddr
;
915 __be16 dport
= udest
->port
;
920 if (udest
->weight
< 0) {
921 pr_err("%s(): server weight less than zero\n", __func__
);
925 if (udest
->l_threshold
> udest
->u_threshold
) {
926 pr_err("%s(): lower threshold is higher than upper threshold\n",
931 ip_vs_addr_copy(svc
->af
, &daddr
, &udest
->addr
);
934 * Check if the dest already exists in the list
936 dest
= ip_vs_lookup_dest(svc
, &daddr
, dport
);
939 IP_VS_DBG(1, "%s(): dest already exists\n", __func__
);
944 * Check if the dest already exists in the trash and
945 * is from the same service
947 dest
= ip_vs_trash_get_dest(svc
, &daddr
, dport
);
950 IP_VS_DBG_BUF(3, "Get destination %s:%u from trash, "
951 "dest->refcnt=%d, service %u/%s:%u\n",
952 IP_VS_DBG_ADDR(svc
->af
, &daddr
), ntohs(dport
),
953 atomic_read(&dest
->refcnt
),
955 IP_VS_DBG_ADDR(svc
->af
, &dest
->vaddr
),
959 * Get the destination from the trash
961 list_del(&dest
->n_list
);
963 __ip_vs_update_dest(svc
, dest
, udest
, 1);
967 * Allocate and initialize the dest structure
969 ret
= ip_vs_new_dest(svc
, udest
, &dest
);
978 * Edit a destination in the given service
981 ip_vs_edit_dest(struct ip_vs_service
*svc
, struct ip_vs_dest_user_kern
*udest
)
983 struct ip_vs_dest
*dest
;
984 union nf_inet_addr daddr
;
985 __be16 dport
= udest
->port
;
989 if (udest
->weight
< 0) {
990 pr_err("%s(): server weight less than zero\n", __func__
);
994 if (udest
->l_threshold
> udest
->u_threshold
) {
995 pr_err("%s(): lower threshold is higher than upper threshold\n",
1000 ip_vs_addr_copy(svc
->af
, &daddr
, &udest
->addr
);
1003 * Lookup the destination list
1005 dest
= ip_vs_lookup_dest(svc
, &daddr
, dport
);
1008 IP_VS_DBG(1, "%s(): dest doesn't exist\n", __func__
);
1012 __ip_vs_update_dest(svc
, dest
, udest
, 0);
1020 * Delete a destination (must be already unlinked from the service)
1022 static void __ip_vs_del_dest(struct net
*net
, struct ip_vs_dest
*dest
)
1024 struct netns_ipvs
*ipvs
= net_ipvs(net
);
1026 ip_vs_stop_estimator(net
, &dest
->stats
);
1029 * Remove it from the d-linked list with the real services.
1031 ip_vs_rs_unhash(dest
);
1034 * Decrease the refcnt of the dest, and free the dest
1035 * if nobody refers to it (refcnt=0). Otherwise, throw
1036 * the destination into the trash.
1038 if (atomic_dec_and_test(&dest
->refcnt
)) {
1039 IP_VS_DBG_BUF(3, "Removing destination %u/%s:%u\n",
1041 IP_VS_DBG_ADDR(dest
->af
, &dest
->addr
),
1043 __ip_vs_dst_cache_reset(dest
);
1044 /* simply decrease svc->refcnt here, let the caller check
1045 and release the service if nobody refers to it.
1046 Only user context can release destination and service,
1047 and only one user context can update virtual service at a
1048 time, so the operation here is OK */
1049 atomic_dec(&dest
->svc
->refcnt
);
1050 free_percpu(dest
->stats
.cpustats
);
1051 kfree_rcu(dest
, rcu_head
);
1053 IP_VS_DBG_BUF(3, "Moving dest %s:%u into trash, "
1054 "dest->refcnt=%d\n",
1055 IP_VS_DBG_ADDR(dest
->af
, &dest
->addr
),
1057 atomic_read(&dest
->refcnt
));
1058 list_add(&dest
->n_list
, &ipvs
->dest_trash
);
1059 ip_vs_dest_hold(dest
);
1065 * Unlink a destination from the given service
1067 static void __ip_vs_unlink_dest(struct ip_vs_service
*svc
,
1068 struct ip_vs_dest
*dest
,
1071 dest
->flags
&= ~IP_VS_DEST_F_AVAILABLE
;
1074 * Remove it from the d-linked destination list.
1076 list_del(&dest
->n_list
);
1079 if (svcupd
&& svc
->scheduler
->del_dest
)
1080 svc
->scheduler
->del_dest(svc
, dest
);
1083 * Call the update_service function of its scheduler
1085 if (svcupd
&& svc
->scheduler
->update_service
)
1086 svc
->scheduler
->update_service(svc
);
1091 * Delete a destination server in the given service
1094 ip_vs_del_dest(struct ip_vs_service
*svc
, struct ip_vs_dest_user_kern
*udest
)
1096 struct ip_vs_dest
*dest
;
1097 __be16 dport
= udest
->port
;
1101 dest
= ip_vs_lookup_dest(svc
, &udest
->addr
, dport
);
1104 IP_VS_DBG(1, "%s(): destination not found!\n", __func__
);
1108 write_lock_bh(&__ip_vs_svc_lock
);
1111 * Wait until all other svc users go away.
1113 IP_VS_WAIT_WHILE(atomic_read(&svc
->usecnt
) > 0);
1116 * Unlink dest from the service
1118 __ip_vs_unlink_dest(svc
, dest
, 1);
1120 write_unlock_bh(&__ip_vs_svc_lock
);
1123 * Delete the destination
1125 __ip_vs_del_dest(svc
->net
, dest
);
1134 * Add a service into the service hash table
1137 ip_vs_add_service(struct net
*net
, struct ip_vs_service_user_kern
*u
,
1138 struct ip_vs_service
**svc_p
)
1141 struct ip_vs_scheduler
*sched
= NULL
;
1142 struct ip_vs_pe
*pe
= NULL
;
1143 struct ip_vs_service
*svc
= NULL
;
1144 struct netns_ipvs
*ipvs
= net_ipvs(net
);
1146 /* increase the module use count */
1147 ip_vs_use_count_inc();
1149 /* Lookup the scheduler by 'u->sched_name' */
1150 sched
= ip_vs_scheduler_get(u
->sched_name
);
1151 if (sched
== NULL
) {
1152 pr_info("Scheduler module ip_vs_%s not found\n", u
->sched_name
);
1157 if (u
->pe_name
&& *u
->pe_name
) {
1158 pe
= ip_vs_pe_getbyname(u
->pe_name
);
1160 pr_info("persistence engine module ip_vs_pe_%s "
1161 "not found\n", u
->pe_name
);
1167 #ifdef CONFIG_IP_VS_IPV6
1168 if (u
->af
== AF_INET6
&& (u
->netmask
< 1 || u
->netmask
> 128)) {
1174 svc
= kzalloc(sizeof(struct ip_vs_service
), GFP_KERNEL
);
1176 IP_VS_DBG(1, "%s(): no memory\n", __func__
);
1180 svc
->stats
.cpustats
= alloc_percpu(struct ip_vs_cpu_stats
);
1181 if (!svc
->stats
.cpustats
) {
1186 /* I'm the first user of the service */
1187 atomic_set(&svc
->usecnt
, 0);
1188 atomic_set(&svc
->refcnt
, 0);
1191 svc
->protocol
= u
->protocol
;
1192 ip_vs_addr_copy(svc
->af
, &svc
->addr
, &u
->addr
);
1193 svc
->port
= u
->port
;
1194 svc
->fwmark
= u
->fwmark
;
1195 svc
->flags
= u
->flags
;
1196 svc
->timeout
= u
->timeout
* HZ
;
1197 svc
->netmask
= u
->netmask
;
1200 INIT_LIST_HEAD(&svc
->destinations
);
1201 rwlock_init(&svc
->sched_lock
);
1202 spin_lock_init(&svc
->stats
.lock
);
1204 /* Bind the scheduler */
1205 ret
= ip_vs_bind_scheduler(svc
, sched
);
1210 /* Bind the ct retriever */
1211 ip_vs_bind_pe(svc
, pe
);
1214 /* Update the virtual service counters */
1215 if (svc
->port
== FTPPORT
)
1216 atomic_inc(&ipvs
->ftpsvc_counter
);
1217 else if (svc
->port
== 0)
1218 atomic_inc(&ipvs
->nullsvc_counter
);
1220 ip_vs_start_estimator(net
, &svc
->stats
);
1222 /* Count only IPv4 services for old get/setsockopt interface */
1223 if (svc
->af
== AF_INET
)
1224 ipvs
->num_services
++;
1226 /* Hash the service into the service table */
1227 write_lock_bh(&__ip_vs_svc_lock
);
1228 ip_vs_svc_hash(svc
);
1229 write_unlock_bh(&__ip_vs_svc_lock
);
1232 /* Now there is a service - full throttle */
1239 ip_vs_unbind_scheduler(svc
);
1242 ip_vs_app_inc_put(svc
->inc
);
1245 if (svc
->stats
.cpustats
)
1246 free_percpu(svc
->stats
.cpustats
);
1249 ip_vs_scheduler_put(sched
);
1252 /* decrease the module use count */
1253 ip_vs_use_count_dec();
1260 * Edit a service and bind it with a new scheduler
1263 ip_vs_edit_service(struct ip_vs_service
*svc
, struct ip_vs_service_user_kern
*u
)
1265 struct ip_vs_scheduler
*sched
, *old_sched
;
1266 struct ip_vs_pe
*pe
= NULL
, *old_pe
= NULL
;
1270 * Lookup the scheduler, by 'u->sched_name'
1272 sched
= ip_vs_scheduler_get(u
->sched_name
);
1273 if (sched
== NULL
) {
1274 pr_info("Scheduler module ip_vs_%s not found\n", u
->sched_name
);
1279 if (u
->pe_name
&& *u
->pe_name
) {
1280 pe
= ip_vs_pe_getbyname(u
->pe_name
);
1282 pr_info("persistence engine module ip_vs_pe_%s "
1283 "not found\n", u
->pe_name
);
1290 #ifdef CONFIG_IP_VS_IPV6
1291 if (u
->af
== AF_INET6
&& (u
->netmask
< 1 || u
->netmask
> 128)) {
1297 write_lock_bh(&__ip_vs_svc_lock
);
1300 * Wait until all other svc users go away.
1302 IP_VS_WAIT_WHILE(atomic_read(&svc
->usecnt
) > 0);
1305 * Set the flags and timeout value
1307 svc
->flags
= u
->flags
| IP_VS_SVC_F_HASHED
;
1308 svc
->timeout
= u
->timeout
* HZ
;
1309 svc
->netmask
= u
->netmask
;
1311 old_sched
= svc
->scheduler
;
1312 if (sched
!= old_sched
) {
1314 * Unbind the old scheduler
1316 if ((ret
= ip_vs_unbind_scheduler(svc
))) {
1322 * Bind the new scheduler
1324 if ((ret
= ip_vs_bind_scheduler(svc
, sched
))) {
1326 * If ip_vs_bind_scheduler fails, restore the old
1328 * The main reason of failure is out of memory.
1330 * The question is if the old scheduler can be
1331 * restored all the time. TODO: if it cannot be
1332 * restored some time, we must delete the service,
1333 * otherwise the system may crash.
1335 ip_vs_bind_scheduler(svc
, old_sched
);
1343 ip_vs_unbind_pe(svc
);
1344 ip_vs_bind_pe(svc
, pe
);
1348 write_unlock_bh(&__ip_vs_svc_lock
);
1350 ip_vs_scheduler_put(old_sched
);
1351 ip_vs_pe_put(old_pe
);
1357 * Delete a service from the service list
1358 * - The service must be unlinked, unlocked and not referenced!
1359 * - We are called under _bh lock
1361 static void __ip_vs_del_service(struct ip_vs_service
*svc
)
1363 struct ip_vs_dest
*dest
, *nxt
;
1364 struct ip_vs_scheduler
*old_sched
;
1365 struct ip_vs_pe
*old_pe
;
1366 struct netns_ipvs
*ipvs
= net_ipvs(svc
->net
);
1368 pr_info("%s: enter\n", __func__
);
1370 /* Count only IPv4 services for old get/setsockopt interface */
1371 if (svc
->af
== AF_INET
)
1372 ipvs
->num_services
--;
1374 ip_vs_stop_estimator(svc
->net
, &svc
->stats
);
1376 /* Unbind scheduler */
1377 old_sched
= svc
->scheduler
;
1378 ip_vs_unbind_scheduler(svc
);
1379 ip_vs_scheduler_put(old_sched
);
1381 /* Unbind persistence engine */
1383 ip_vs_unbind_pe(svc
);
1384 ip_vs_pe_put(old_pe
);
1386 /* Unbind app inc */
1388 ip_vs_app_inc_put(svc
->inc
);
1393 * Unlink the whole destination list
1395 list_for_each_entry_safe(dest
, nxt
, &svc
->destinations
, n_list
) {
1396 __ip_vs_unlink_dest(svc
, dest
, 0);
1397 __ip_vs_del_dest(svc
->net
, dest
);
1401 * Update the virtual service counters
1403 if (svc
->port
== FTPPORT
)
1404 atomic_dec(&ipvs
->ftpsvc_counter
);
1405 else if (svc
->port
== 0)
1406 atomic_dec(&ipvs
->nullsvc_counter
);
1409 * Free the service if nobody refers to it
1411 if (atomic_read(&svc
->refcnt
) == 0) {
1412 IP_VS_DBG_BUF(3, "Removing service %u/%s:%u usecnt=%d\n",
1414 IP_VS_DBG_ADDR(svc
->af
, &svc
->addr
),
1415 ntohs(svc
->port
), atomic_read(&svc
->usecnt
));
1416 free_percpu(svc
->stats
.cpustats
);
1420 /* decrease the module use count */
1421 ip_vs_use_count_dec();
1425 * Unlink a service from list and try to delete it if its refcnt reached 0
1427 static void ip_vs_unlink_service(struct ip_vs_service
*svc
)
1430 * Unhash it from the service table
1432 write_lock_bh(&__ip_vs_svc_lock
);
1434 ip_vs_svc_unhash(svc
);
1437 * Wait until all the svc users go away.
1439 IP_VS_WAIT_WHILE(atomic_read(&svc
->usecnt
) > 0);
1441 __ip_vs_del_service(svc
);
1443 write_unlock_bh(&__ip_vs_svc_lock
);
1447 * Delete a service from the service list
1449 static int ip_vs_del_service(struct ip_vs_service
*svc
)
1453 ip_vs_unlink_service(svc
);
1460 * Flush all the virtual services
1462 static int ip_vs_flush(struct net
*net
)
1465 struct ip_vs_service
*svc
, *nxt
;
1468 * Flush the service table hashed by <netns,protocol,addr,port>
1470 for(idx
= 0; idx
< IP_VS_SVC_TAB_SIZE
; idx
++) {
1471 list_for_each_entry_safe(svc
, nxt
, &ip_vs_svc_table
[idx
],
1473 if (net_eq(svc
->net
, net
))
1474 ip_vs_unlink_service(svc
);
1479 * Flush the service table hashed by fwmark
1481 for(idx
= 0; idx
< IP_VS_SVC_TAB_SIZE
; idx
++) {
1482 list_for_each_entry_safe(svc
, nxt
,
1483 &ip_vs_svc_fwm_table
[idx
], f_list
) {
1484 if (net_eq(svc
->net
, net
))
1485 ip_vs_unlink_service(svc
);
1493 * Delete service by {netns} in the service table.
1494 * Called by __ip_vs_cleanup()
1496 void ip_vs_service_net_cleanup(struct net
*net
)
1499 /* Check for "full" addressed entries */
1500 mutex_lock(&__ip_vs_mutex
);
1502 mutex_unlock(&__ip_vs_mutex
);
1506 /* Put all references for device (dst_cache) */
1508 ip_vs_forget_dev(struct ip_vs_dest
*dest
, struct net_device
*dev
)
1510 spin_lock_bh(&dest
->dst_lock
);
1511 if (dest
->dest_dst
&& dest
->dest_dst
->dst_cache
->dev
== dev
) {
1512 IP_VS_DBG_BUF(3, "Reset dev:%s dest %s:%u ,dest->refcnt=%d\n",
1514 IP_VS_DBG_ADDR(dest
->af
, &dest
->addr
),
1516 atomic_read(&dest
->refcnt
));
1517 __ip_vs_dst_cache_reset(dest
);
1519 spin_unlock_bh(&dest
->dst_lock
);
1522 /* Netdev event receiver
1523 * Currently only NETDEV_DOWN is handled to release refs to cached dsts
1525 static int ip_vs_dst_event(struct notifier_block
*this, unsigned long event
,
1528 struct net_device
*dev
= ptr
;
1529 struct net
*net
= dev_net(dev
);
1530 struct netns_ipvs
*ipvs
= net_ipvs(net
);
1531 struct ip_vs_service
*svc
;
1532 struct ip_vs_dest
*dest
;
1535 if (event
!= NETDEV_DOWN
|| !ipvs
)
1537 IP_VS_DBG(3, "%s() dev=%s\n", __func__
, dev
->name
);
1539 mutex_lock(&__ip_vs_mutex
);
1540 for (idx
= 0; idx
< IP_VS_SVC_TAB_SIZE
; idx
++) {
1541 list_for_each_entry(svc
, &ip_vs_svc_table
[idx
], s_list
) {
1542 if (net_eq(svc
->net
, net
)) {
1543 list_for_each_entry(dest
, &svc
->destinations
,
1545 ip_vs_forget_dev(dest
, dev
);
1550 list_for_each_entry(svc
, &ip_vs_svc_fwm_table
[idx
], f_list
) {
1551 if (net_eq(svc
->net
, net
)) {
1552 list_for_each_entry(dest
, &svc
->destinations
,
1554 ip_vs_forget_dev(dest
, dev
);
1561 list_for_each_entry(dest
, &ipvs
->dest_trash
, n_list
) {
1562 ip_vs_forget_dev(dest
, dev
);
1564 mutex_unlock(&__ip_vs_mutex
);
1570 * Zero counters in a service or all services
1572 static int ip_vs_zero_service(struct ip_vs_service
*svc
)
1574 struct ip_vs_dest
*dest
;
1576 write_lock_bh(&__ip_vs_svc_lock
);
1577 list_for_each_entry(dest
, &svc
->destinations
, n_list
) {
1578 ip_vs_zero_stats(&dest
->stats
);
1580 ip_vs_zero_stats(&svc
->stats
);
1581 write_unlock_bh(&__ip_vs_svc_lock
);
1585 static int ip_vs_zero_all(struct net
*net
)
1588 struct ip_vs_service
*svc
;
1590 for(idx
= 0; idx
< IP_VS_SVC_TAB_SIZE
; idx
++) {
1591 list_for_each_entry(svc
, &ip_vs_svc_table
[idx
], s_list
) {
1592 if (net_eq(svc
->net
, net
))
1593 ip_vs_zero_service(svc
);
1597 for(idx
= 0; idx
< IP_VS_SVC_TAB_SIZE
; idx
++) {
1598 list_for_each_entry(svc
, &ip_vs_svc_fwm_table
[idx
], f_list
) {
1599 if (net_eq(svc
->net
, net
))
1600 ip_vs_zero_service(svc
);
1604 ip_vs_zero_stats(&net_ipvs(net
)->tot_stats
);
1608 #ifdef CONFIG_SYSCTL
1611 static int three
= 3;
1614 proc_do_defense_mode(ctl_table
*table
, int write
,
1615 void __user
*buffer
, size_t *lenp
, loff_t
*ppos
)
1617 struct net
*net
= current
->nsproxy
->net_ns
;
1618 int *valp
= table
->data
;
1622 rc
= proc_dointvec(table
, write
, buffer
, lenp
, ppos
);
1623 if (write
&& (*valp
!= val
)) {
1624 if ((*valp
< 0) || (*valp
> 3)) {
1625 /* Restore the correct value */
1628 update_defense_level(net_ipvs(net
));
1635 proc_do_sync_threshold(ctl_table
*table
, int write
,
1636 void __user
*buffer
, size_t *lenp
, loff_t
*ppos
)
1638 int *valp
= table
->data
;
1642 /* backup the value first */
1643 memcpy(val
, valp
, sizeof(val
));
1645 rc
= proc_dointvec(table
, write
, buffer
, lenp
, ppos
);
1646 if (write
&& (valp
[0] < 0 || valp
[1] < 0 ||
1647 (valp
[0] >= valp
[1] && valp
[1]))) {
1648 /* Restore the correct value */
1649 memcpy(valp
, val
, sizeof(val
));
1655 proc_do_sync_mode(ctl_table
*table
, int write
,
1656 void __user
*buffer
, size_t *lenp
, loff_t
*ppos
)
1658 int *valp
= table
->data
;
1662 rc
= proc_dointvec(table
, write
, buffer
, lenp
, ppos
);
1663 if (write
&& (*valp
!= val
)) {
1664 if ((*valp
< 0) || (*valp
> 1)) {
1665 /* Restore the correct value */
1673 proc_do_sync_ports(ctl_table
*table
, int write
,
1674 void __user
*buffer
, size_t *lenp
, loff_t
*ppos
)
1676 int *valp
= table
->data
;
1680 rc
= proc_dointvec(table
, write
, buffer
, lenp
, ppos
);
1681 if (write
&& (*valp
!= val
)) {
1682 if (*valp
< 1 || !is_power_of_2(*valp
)) {
1683 /* Restore the correct value */
1691 * IPVS sysctl table (under the /proc/sys/net/ipv4/vs/)
1692 * Do not change order or insert new entries without
1693 * align with netns init in ip_vs_control_net_init()
1696 static struct ctl_table vs_vars
[] = {
1698 .procname
= "amemthresh",
1699 .maxlen
= sizeof(int),
1701 .proc_handler
= proc_dointvec
,
1704 .procname
= "am_droprate",
1705 .maxlen
= sizeof(int),
1707 .proc_handler
= proc_dointvec
,
1710 .procname
= "drop_entry",
1711 .maxlen
= sizeof(int),
1713 .proc_handler
= proc_do_defense_mode
,
1716 .procname
= "drop_packet",
1717 .maxlen
= sizeof(int),
1719 .proc_handler
= proc_do_defense_mode
,
1721 #ifdef CONFIG_IP_VS_NFCT
1723 .procname
= "conntrack",
1724 .maxlen
= sizeof(int),
1726 .proc_handler
= &proc_dointvec
,
1730 .procname
= "secure_tcp",
1731 .maxlen
= sizeof(int),
1733 .proc_handler
= proc_do_defense_mode
,
1736 .procname
= "snat_reroute",
1737 .maxlen
= sizeof(int),
1739 .proc_handler
= &proc_dointvec
,
1742 .procname
= "sync_version",
1743 .maxlen
= sizeof(int),
1745 .proc_handler
= &proc_do_sync_mode
,
1748 .procname
= "sync_ports",
1749 .maxlen
= sizeof(int),
1751 .proc_handler
= &proc_do_sync_ports
,
1754 .procname
= "sync_qlen_max",
1755 .maxlen
= sizeof(int),
1757 .proc_handler
= proc_dointvec
,
1760 .procname
= "sync_sock_size",
1761 .maxlen
= sizeof(int),
1763 .proc_handler
= proc_dointvec
,
1766 .procname
= "cache_bypass",
1767 .maxlen
= sizeof(int),
1769 .proc_handler
= proc_dointvec
,
1772 .procname
= "expire_nodest_conn",
1773 .maxlen
= sizeof(int),
1775 .proc_handler
= proc_dointvec
,
1778 .procname
= "expire_quiescent_template",
1779 .maxlen
= sizeof(int),
1781 .proc_handler
= proc_dointvec
,
1784 .procname
= "sync_threshold",
1786 sizeof(((struct netns_ipvs
*)0)->sysctl_sync_threshold
),
1788 .proc_handler
= proc_do_sync_threshold
,
1791 .procname
= "sync_refresh_period",
1792 .maxlen
= sizeof(int),
1794 .proc_handler
= proc_dointvec_jiffies
,
1797 .procname
= "sync_retries",
1798 .maxlen
= sizeof(int),
1800 .proc_handler
= proc_dointvec_minmax
,
1805 .procname
= "nat_icmp_send",
1806 .maxlen
= sizeof(int),
1808 .proc_handler
= proc_dointvec
,
1811 .procname
= "pmtu_disc",
1812 .maxlen
= sizeof(int),
1814 .proc_handler
= proc_dointvec
,
1817 .procname
= "backup_only",
1818 .maxlen
= sizeof(int),
1820 .proc_handler
= proc_dointvec
,
1822 #ifdef CONFIG_IP_VS_DEBUG
1824 .procname
= "debug_level",
1825 .data
= &sysctl_ip_vs_debug_level
,
1826 .maxlen
= sizeof(int),
1828 .proc_handler
= proc_dointvec
,
1833 .procname
= "timeout_established",
1834 .data
= &vs_timeout_table_dos
.timeout
[IP_VS_S_ESTABLISHED
],
1835 .maxlen
= sizeof(int),
1837 .proc_handler
= proc_dointvec_jiffies
,
1840 .procname
= "timeout_synsent",
1841 .data
= &vs_timeout_table_dos
.timeout
[IP_VS_S_SYN_SENT
],
1842 .maxlen
= sizeof(int),
1844 .proc_handler
= proc_dointvec_jiffies
,
1847 .procname
= "timeout_synrecv",
1848 .data
= &vs_timeout_table_dos
.timeout
[IP_VS_S_SYN_RECV
],
1849 .maxlen
= sizeof(int),
1851 .proc_handler
= proc_dointvec_jiffies
,
1854 .procname
= "timeout_finwait",
1855 .data
= &vs_timeout_table_dos
.timeout
[IP_VS_S_FIN_WAIT
],
1856 .maxlen
= sizeof(int),
1858 .proc_handler
= proc_dointvec_jiffies
,
1861 .procname
= "timeout_timewait",
1862 .data
= &vs_timeout_table_dos
.timeout
[IP_VS_S_TIME_WAIT
],
1863 .maxlen
= sizeof(int),
1865 .proc_handler
= proc_dointvec_jiffies
,
1868 .procname
= "timeout_close",
1869 .data
= &vs_timeout_table_dos
.timeout
[IP_VS_S_CLOSE
],
1870 .maxlen
= sizeof(int),
1872 .proc_handler
= proc_dointvec_jiffies
,
1875 .procname
= "timeout_closewait",
1876 .data
= &vs_timeout_table_dos
.timeout
[IP_VS_S_CLOSE_WAIT
],
1877 .maxlen
= sizeof(int),
1879 .proc_handler
= proc_dointvec_jiffies
,
1882 .procname
= "timeout_lastack",
1883 .data
= &vs_timeout_table_dos
.timeout
[IP_VS_S_LAST_ACK
],
1884 .maxlen
= sizeof(int),
1886 .proc_handler
= proc_dointvec_jiffies
,
1889 .procname
= "timeout_listen",
1890 .data
= &vs_timeout_table_dos
.timeout
[IP_VS_S_LISTEN
],
1891 .maxlen
= sizeof(int),
1893 .proc_handler
= proc_dointvec_jiffies
,
1896 .procname
= "timeout_synack",
1897 .data
= &vs_timeout_table_dos
.timeout
[IP_VS_S_SYNACK
],
1898 .maxlen
= sizeof(int),
1900 .proc_handler
= proc_dointvec_jiffies
,
1903 .procname
= "timeout_udp",
1904 .data
= &vs_timeout_table_dos
.timeout
[IP_VS_S_UDP
],
1905 .maxlen
= sizeof(int),
1907 .proc_handler
= proc_dointvec_jiffies
,
1910 .procname
= "timeout_icmp",
1911 .data
= &vs_timeout_table_dos
.timeout
[IP_VS_S_ICMP
],
1912 .maxlen
= sizeof(int),
1914 .proc_handler
= proc_dointvec_jiffies
,
1922 #ifdef CONFIG_PROC_FS
1925 struct seq_net_private p
; /* Do not move this, netns depends upon it*/
1926 struct list_head
*table
;
1931 * Write the contents of the VS rule table to a PROCfs file.
1932 * (It is kept just for backward compatibility)
1934 static inline const char *ip_vs_fwd_name(unsigned int flags
)
1936 switch (flags
& IP_VS_CONN_F_FWD_MASK
) {
1937 case IP_VS_CONN_F_LOCALNODE
:
1939 case IP_VS_CONN_F_TUNNEL
:
1941 case IP_VS_CONN_F_DROUTE
:
1949 /* Get the Nth entry in the two lists */
1950 static struct ip_vs_service
*ip_vs_info_array(struct seq_file
*seq
, loff_t pos
)
1952 struct net
*net
= seq_file_net(seq
);
1953 struct ip_vs_iter
*iter
= seq
->private;
1955 struct ip_vs_service
*svc
;
1957 /* look in hash by protocol */
1958 for (idx
= 0; idx
< IP_VS_SVC_TAB_SIZE
; idx
++) {
1959 list_for_each_entry(svc
, &ip_vs_svc_table
[idx
], s_list
) {
1960 if (net_eq(svc
->net
, net
) && pos
-- == 0) {
1961 iter
->table
= ip_vs_svc_table
;
1968 /* keep looking in fwmark */
1969 for (idx
= 0; idx
< IP_VS_SVC_TAB_SIZE
; idx
++) {
1970 list_for_each_entry(svc
, &ip_vs_svc_fwm_table
[idx
], f_list
) {
1971 if (net_eq(svc
->net
, net
) && pos
-- == 0) {
1972 iter
->table
= ip_vs_svc_fwm_table
;
1982 static void *ip_vs_info_seq_start(struct seq_file
*seq
, loff_t
*pos
)
1983 __acquires(__ip_vs_svc_lock
)
1986 read_lock_bh(&__ip_vs_svc_lock
);
1987 return *pos
? ip_vs_info_array(seq
, *pos
- 1) : SEQ_START_TOKEN
;
1991 static void *ip_vs_info_seq_next(struct seq_file
*seq
, void *v
, loff_t
*pos
)
1993 struct list_head
*e
;
1994 struct ip_vs_iter
*iter
;
1995 struct ip_vs_service
*svc
;
1998 if (v
== SEQ_START_TOKEN
)
1999 return ip_vs_info_array(seq
,0);
2002 iter
= seq
->private;
2004 if (iter
->table
== ip_vs_svc_table
) {
2005 /* next service in table hashed by protocol */
2006 if ((e
= svc
->s_list
.next
) != &ip_vs_svc_table
[iter
->bucket
])
2007 return list_entry(e
, struct ip_vs_service
, s_list
);
2010 while (++iter
->bucket
< IP_VS_SVC_TAB_SIZE
) {
2011 list_for_each_entry(svc
,&ip_vs_svc_table
[iter
->bucket
],
2017 iter
->table
= ip_vs_svc_fwm_table
;
2022 /* next service in hashed by fwmark */
2023 if ((e
= svc
->f_list
.next
) != &ip_vs_svc_fwm_table
[iter
->bucket
])
2024 return list_entry(e
, struct ip_vs_service
, f_list
);
2027 while (++iter
->bucket
< IP_VS_SVC_TAB_SIZE
) {
2028 list_for_each_entry(svc
, &ip_vs_svc_fwm_table
[iter
->bucket
],
2036 static void ip_vs_info_seq_stop(struct seq_file
*seq
, void *v
)
2037 __releases(__ip_vs_svc_lock
)
2039 read_unlock_bh(&__ip_vs_svc_lock
);
2043 static int ip_vs_info_seq_show(struct seq_file
*seq
, void *v
)
2045 if (v
== SEQ_START_TOKEN
) {
2047 "IP Virtual Server version %d.%d.%d (size=%d)\n",
2048 NVERSION(IP_VS_VERSION_CODE
), ip_vs_conn_tab_size
);
2050 "Prot LocalAddress:Port Scheduler Flags\n");
2052 " -> RemoteAddress:Port Forward Weight ActiveConn InActConn\n");
2054 const struct ip_vs_service
*svc
= v
;
2055 const struct ip_vs_iter
*iter
= seq
->private;
2056 const struct ip_vs_dest
*dest
;
2058 if (iter
->table
== ip_vs_svc_table
) {
2059 #ifdef CONFIG_IP_VS_IPV6
2060 if (svc
->af
== AF_INET6
)
2061 seq_printf(seq
, "%s [%pI6]:%04X %s ",
2062 ip_vs_proto_name(svc
->protocol
),
2065 svc
->scheduler
->name
);
2068 seq_printf(seq
, "%s %08X:%04X %s %s ",
2069 ip_vs_proto_name(svc
->protocol
),
2070 ntohl(svc
->addr
.ip
),
2072 svc
->scheduler
->name
,
2073 (svc
->flags
& IP_VS_SVC_F_ONEPACKET
)?"ops ":"");
2075 seq_printf(seq
, "FWM %08X %s %s",
2076 svc
->fwmark
, svc
->scheduler
->name
,
2077 (svc
->flags
& IP_VS_SVC_F_ONEPACKET
)?"ops ":"");
2080 if (svc
->flags
& IP_VS_SVC_F_PERSISTENT
)
2081 seq_printf(seq
, "persistent %d %08X\n",
2083 ntohl(svc
->netmask
));
2085 seq_putc(seq
, '\n');
2087 list_for_each_entry(dest
, &svc
->destinations
, n_list
) {
2088 #ifdef CONFIG_IP_VS_IPV6
2089 if (dest
->af
== AF_INET6
)
2092 " %-7s %-6d %-10d %-10d\n",
2095 ip_vs_fwd_name(atomic_read(&dest
->conn_flags
)),
2096 atomic_read(&dest
->weight
),
2097 atomic_read(&dest
->activeconns
),
2098 atomic_read(&dest
->inactconns
));
2103 "%-7s %-6d %-10d %-10d\n",
2104 ntohl(dest
->addr
.ip
),
2106 ip_vs_fwd_name(atomic_read(&dest
->conn_flags
)),
2107 atomic_read(&dest
->weight
),
2108 atomic_read(&dest
->activeconns
),
2109 atomic_read(&dest
->inactconns
));
2116 static const struct seq_operations ip_vs_info_seq_ops
= {
2117 .start
= ip_vs_info_seq_start
,
2118 .next
= ip_vs_info_seq_next
,
2119 .stop
= ip_vs_info_seq_stop
,
2120 .show
= ip_vs_info_seq_show
,
2123 static int ip_vs_info_open(struct inode
*inode
, struct file
*file
)
2125 return seq_open_net(inode
, file
, &ip_vs_info_seq_ops
,
2126 sizeof(struct ip_vs_iter
));
2129 static const struct file_operations ip_vs_info_fops
= {
2130 .owner
= THIS_MODULE
,
2131 .open
= ip_vs_info_open
,
2133 .llseek
= seq_lseek
,
2134 .release
= seq_release_net
,
2137 static int ip_vs_stats_show(struct seq_file
*seq
, void *v
)
2139 struct net
*net
= seq_file_single_net(seq
);
2140 struct ip_vs_stats_user show
;
2142 /* 01234567 01234567 01234567 0123456701234567 0123456701234567 */
2144 " Total Incoming Outgoing Incoming Outgoing\n");
2146 " Conns Packets Packets Bytes Bytes\n");
2148 ip_vs_copy_stats(&show
, &net_ipvs(net
)->tot_stats
);
2149 seq_printf(seq
, "%8X %8X %8X %16LX %16LX\n\n", show
.conns
,
2150 show
.inpkts
, show
.outpkts
,
2151 (unsigned long long) show
.inbytes
,
2152 (unsigned long long) show
.outbytes
);
2154 /* 01234567 01234567 01234567 0123456701234567 0123456701234567 */
2156 " Conns/s Pkts/s Pkts/s Bytes/s Bytes/s\n");
2157 seq_printf(seq
, "%8X %8X %8X %16X %16X\n",
2158 show
.cps
, show
.inpps
, show
.outpps
,
2159 show
.inbps
, show
.outbps
);
2164 static int ip_vs_stats_seq_open(struct inode
*inode
, struct file
*file
)
2166 return single_open_net(inode
, file
, ip_vs_stats_show
);
2169 static const struct file_operations ip_vs_stats_fops
= {
2170 .owner
= THIS_MODULE
,
2171 .open
= ip_vs_stats_seq_open
,
2173 .llseek
= seq_lseek
,
2174 .release
= single_release_net
,
2177 static int ip_vs_stats_percpu_show(struct seq_file
*seq
, void *v
)
2179 struct net
*net
= seq_file_single_net(seq
);
2180 struct ip_vs_stats
*tot_stats
= &net_ipvs(net
)->tot_stats
;
2181 struct ip_vs_cpu_stats
*cpustats
= tot_stats
->cpustats
;
2182 struct ip_vs_stats_user rates
;
2185 /* 01234567 01234567 01234567 0123456701234567 0123456701234567 */
2187 " Total Incoming Outgoing Incoming Outgoing\n");
2189 "CPU Conns Packets Packets Bytes Bytes\n");
2191 for_each_possible_cpu(i
) {
2192 struct ip_vs_cpu_stats
*u
= per_cpu_ptr(cpustats
, i
);
2194 __u64 inbytes
, outbytes
;
2197 start
= u64_stats_fetch_begin_bh(&u
->syncp
);
2198 inbytes
= u
->ustats
.inbytes
;
2199 outbytes
= u
->ustats
.outbytes
;
2200 } while (u64_stats_fetch_retry_bh(&u
->syncp
, start
));
2202 seq_printf(seq
, "%3X %8X %8X %8X %16LX %16LX\n",
2203 i
, u
->ustats
.conns
, u
->ustats
.inpkts
,
2204 u
->ustats
.outpkts
, (__u64
)inbytes
,
2208 spin_lock_bh(&tot_stats
->lock
);
2210 seq_printf(seq
, " ~ %8X %8X %8X %16LX %16LX\n\n",
2211 tot_stats
->ustats
.conns
, tot_stats
->ustats
.inpkts
,
2212 tot_stats
->ustats
.outpkts
,
2213 (unsigned long long) tot_stats
->ustats
.inbytes
,
2214 (unsigned long long) tot_stats
->ustats
.outbytes
);
2216 ip_vs_read_estimator(&rates
, tot_stats
);
2218 spin_unlock_bh(&tot_stats
->lock
);
2220 /* 01234567 01234567 01234567 0123456701234567 0123456701234567 */
2222 " Conns/s Pkts/s Pkts/s Bytes/s Bytes/s\n");
2223 seq_printf(seq
, " %8X %8X %8X %16X %16X\n",
2233 static int ip_vs_stats_percpu_seq_open(struct inode
*inode
, struct file
*file
)
2235 return single_open_net(inode
, file
, ip_vs_stats_percpu_show
);
2238 static const struct file_operations ip_vs_stats_percpu_fops
= {
2239 .owner
= THIS_MODULE
,
2240 .open
= ip_vs_stats_percpu_seq_open
,
2242 .llseek
= seq_lseek
,
2243 .release
= single_release_net
,
2248 * Set timeout values for tcp tcpfin udp in the timeout_table.
2250 static int ip_vs_set_timeout(struct net
*net
, struct ip_vs_timeout_user
*u
)
2252 #if defined(CONFIG_IP_VS_PROTO_TCP) || defined(CONFIG_IP_VS_PROTO_UDP)
2253 struct ip_vs_proto_data
*pd
;
2256 IP_VS_DBG(2, "Setting timeout tcp:%d tcpfin:%d udp:%d\n",
2261 #ifdef CONFIG_IP_VS_PROTO_TCP
2262 if (u
->tcp_timeout
) {
2263 pd
= ip_vs_proto_data_get(net
, IPPROTO_TCP
);
2264 pd
->timeout_table
[IP_VS_TCP_S_ESTABLISHED
]
2265 = u
->tcp_timeout
* HZ
;
2268 if (u
->tcp_fin_timeout
) {
2269 pd
= ip_vs_proto_data_get(net
, IPPROTO_TCP
);
2270 pd
->timeout_table
[IP_VS_TCP_S_FIN_WAIT
]
2271 = u
->tcp_fin_timeout
* HZ
;
2275 #ifdef CONFIG_IP_VS_PROTO_UDP
2276 if (u
->udp_timeout
) {
2277 pd
= ip_vs_proto_data_get(net
, IPPROTO_UDP
);
2278 pd
->timeout_table
[IP_VS_UDP_S_NORMAL
]
2279 = u
->udp_timeout
* HZ
;
2286 #define SET_CMDID(cmd) (cmd - IP_VS_BASE_CTL)
2287 #define SERVICE_ARG_LEN (sizeof(struct ip_vs_service_user))
2288 #define SVCDEST_ARG_LEN (sizeof(struct ip_vs_service_user) + \
2289 sizeof(struct ip_vs_dest_user))
2290 #define TIMEOUT_ARG_LEN (sizeof(struct ip_vs_timeout_user))
2291 #define DAEMON_ARG_LEN (sizeof(struct ip_vs_daemon_user))
2292 #define MAX_ARG_LEN SVCDEST_ARG_LEN
2294 static const unsigned char set_arglen
[SET_CMDID(IP_VS_SO_SET_MAX
)+1] = {
2295 [SET_CMDID(IP_VS_SO_SET_ADD
)] = SERVICE_ARG_LEN
,
2296 [SET_CMDID(IP_VS_SO_SET_EDIT
)] = SERVICE_ARG_LEN
,
2297 [SET_CMDID(IP_VS_SO_SET_DEL
)] = SERVICE_ARG_LEN
,
2298 [SET_CMDID(IP_VS_SO_SET_FLUSH
)] = 0,
2299 [SET_CMDID(IP_VS_SO_SET_ADDDEST
)] = SVCDEST_ARG_LEN
,
2300 [SET_CMDID(IP_VS_SO_SET_DELDEST
)] = SVCDEST_ARG_LEN
,
2301 [SET_CMDID(IP_VS_SO_SET_EDITDEST
)] = SVCDEST_ARG_LEN
,
2302 [SET_CMDID(IP_VS_SO_SET_TIMEOUT
)] = TIMEOUT_ARG_LEN
,
2303 [SET_CMDID(IP_VS_SO_SET_STARTDAEMON
)] = DAEMON_ARG_LEN
,
2304 [SET_CMDID(IP_VS_SO_SET_STOPDAEMON
)] = DAEMON_ARG_LEN
,
2305 [SET_CMDID(IP_VS_SO_SET_ZERO
)] = SERVICE_ARG_LEN
,
2308 static void ip_vs_copy_usvc_compat(struct ip_vs_service_user_kern
*usvc
,
2309 struct ip_vs_service_user
*usvc_compat
)
2311 memset(usvc
, 0, sizeof(*usvc
));
2314 usvc
->protocol
= usvc_compat
->protocol
;
2315 usvc
->addr
.ip
= usvc_compat
->addr
;
2316 usvc
->port
= usvc_compat
->port
;
2317 usvc
->fwmark
= usvc_compat
->fwmark
;
2319 /* Deep copy of sched_name is not needed here */
2320 usvc
->sched_name
= usvc_compat
->sched_name
;
2322 usvc
->flags
= usvc_compat
->flags
;
2323 usvc
->timeout
= usvc_compat
->timeout
;
2324 usvc
->netmask
= usvc_compat
->netmask
;
2327 static void ip_vs_copy_udest_compat(struct ip_vs_dest_user_kern
*udest
,
2328 struct ip_vs_dest_user
*udest_compat
)
2330 memset(udest
, 0, sizeof(*udest
));
2332 udest
->addr
.ip
= udest_compat
->addr
;
2333 udest
->port
= udest_compat
->port
;
2334 udest
->conn_flags
= udest_compat
->conn_flags
;
2335 udest
->weight
= udest_compat
->weight
;
2336 udest
->u_threshold
= udest_compat
->u_threshold
;
2337 udest
->l_threshold
= udest_compat
->l_threshold
;
2341 do_ip_vs_set_ctl(struct sock
*sk
, int cmd
, void __user
*user
, unsigned int len
)
2343 struct net
*net
= sock_net(sk
);
2345 unsigned char arg
[MAX_ARG_LEN
];
2346 struct ip_vs_service_user
*usvc_compat
;
2347 struct ip_vs_service_user_kern usvc
;
2348 struct ip_vs_service
*svc
;
2349 struct ip_vs_dest_user
*udest_compat
;
2350 struct ip_vs_dest_user_kern udest
;
2351 struct netns_ipvs
*ipvs
= net_ipvs(net
);
2353 if (!ns_capable(sock_net(sk
)->user_ns
, CAP_NET_ADMIN
))
2356 if (cmd
< IP_VS_BASE_CTL
|| cmd
> IP_VS_SO_SET_MAX
)
2358 if (len
< 0 || len
> MAX_ARG_LEN
)
2360 if (len
!= set_arglen
[SET_CMDID(cmd
)]) {
2361 pr_err("set_ctl: len %u != %u\n",
2362 len
, set_arglen
[SET_CMDID(cmd
)]);
2366 if (copy_from_user(arg
, user
, len
) != 0)
2369 /* increase the module use count */
2370 ip_vs_use_count_inc();
2372 /* Handle daemons since they have another lock */
2373 if (cmd
== IP_VS_SO_SET_STARTDAEMON
||
2374 cmd
== IP_VS_SO_SET_STOPDAEMON
) {
2375 struct ip_vs_daemon_user
*dm
= (struct ip_vs_daemon_user
*)arg
;
2377 if (mutex_lock_interruptible(&ipvs
->sync_mutex
)) {
2381 if (cmd
== IP_VS_SO_SET_STARTDAEMON
)
2382 ret
= start_sync_thread(net
, dm
->state
, dm
->mcast_ifn
,
2385 ret
= stop_sync_thread(net
, dm
->state
);
2386 mutex_unlock(&ipvs
->sync_mutex
);
2390 if (mutex_lock_interruptible(&__ip_vs_mutex
)) {
2395 if (cmd
== IP_VS_SO_SET_FLUSH
) {
2396 /* Flush the virtual service */
2397 ret
= ip_vs_flush(net
);
2399 } else if (cmd
== IP_VS_SO_SET_TIMEOUT
) {
2400 /* Set timeout values for (tcp tcpfin udp) */
2401 ret
= ip_vs_set_timeout(net
, (struct ip_vs_timeout_user
*)arg
);
2405 usvc_compat
= (struct ip_vs_service_user
*)arg
;
2406 udest_compat
= (struct ip_vs_dest_user
*)(usvc_compat
+ 1);
2408 /* We only use the new structs internally, so copy userspace compat
2409 * structs to extended internal versions */
2410 ip_vs_copy_usvc_compat(&usvc
, usvc_compat
);
2411 ip_vs_copy_udest_compat(&udest
, udest_compat
);
2413 if (cmd
== IP_VS_SO_SET_ZERO
) {
2414 /* if no service address is set, zero counters in all */
2415 if (!usvc
.fwmark
&& !usvc
.addr
.ip
&& !usvc
.port
) {
2416 ret
= ip_vs_zero_all(net
);
2421 /* Check for valid protocol: TCP or UDP or SCTP, even for fwmark!=0 */
2422 if (usvc
.protocol
!= IPPROTO_TCP
&& usvc
.protocol
!= IPPROTO_UDP
&&
2423 usvc
.protocol
!= IPPROTO_SCTP
) {
2424 pr_err("set_ctl: invalid protocol: %d %pI4:%d %s\n",
2425 usvc
.protocol
, &usvc
.addr
.ip
,
2426 ntohs(usvc
.port
), usvc
.sched_name
);
2431 /* Lookup the exact service by <protocol, addr, port> or fwmark */
2432 if (usvc
.fwmark
== 0)
2433 svc
= __ip_vs_service_find(net
, usvc
.af
, usvc
.protocol
,
2434 &usvc
.addr
, usvc
.port
);
2436 svc
= __ip_vs_svc_fwm_find(net
, usvc
.af
, usvc
.fwmark
);
2438 if (cmd
!= IP_VS_SO_SET_ADD
2439 && (svc
== NULL
|| svc
->protocol
!= usvc
.protocol
)) {
2445 case IP_VS_SO_SET_ADD
:
2449 ret
= ip_vs_add_service(net
, &usvc
, &svc
);
2451 case IP_VS_SO_SET_EDIT
:
2452 ret
= ip_vs_edit_service(svc
, &usvc
);
2454 case IP_VS_SO_SET_DEL
:
2455 ret
= ip_vs_del_service(svc
);
2459 case IP_VS_SO_SET_ZERO
:
2460 ret
= ip_vs_zero_service(svc
);
2462 case IP_VS_SO_SET_ADDDEST
:
2463 ret
= ip_vs_add_dest(svc
, &udest
);
2465 case IP_VS_SO_SET_EDITDEST
:
2466 ret
= ip_vs_edit_dest(svc
, &udest
);
2468 case IP_VS_SO_SET_DELDEST
:
2469 ret
= ip_vs_del_dest(svc
, &udest
);
2476 mutex_unlock(&__ip_vs_mutex
);
2478 /* decrease the module use count */
2479 ip_vs_use_count_dec();
2486 ip_vs_copy_service(struct ip_vs_service_entry
*dst
, struct ip_vs_service
*src
)
2488 dst
->protocol
= src
->protocol
;
2489 dst
->addr
= src
->addr
.ip
;
2490 dst
->port
= src
->port
;
2491 dst
->fwmark
= src
->fwmark
;
2492 strlcpy(dst
->sched_name
, src
->scheduler
->name
, sizeof(dst
->sched_name
));
2493 dst
->flags
= src
->flags
;
2494 dst
->timeout
= src
->timeout
/ HZ
;
2495 dst
->netmask
= src
->netmask
;
2496 dst
->num_dests
= src
->num_dests
;
2497 ip_vs_copy_stats(&dst
->stats
, &src
->stats
);
2501 __ip_vs_get_service_entries(struct net
*net
,
2502 const struct ip_vs_get_services
*get
,
2503 struct ip_vs_get_services __user
*uptr
)
2506 struct ip_vs_service
*svc
;
2507 struct ip_vs_service_entry entry
;
2510 for (idx
= 0; idx
< IP_VS_SVC_TAB_SIZE
; idx
++) {
2511 list_for_each_entry(svc
, &ip_vs_svc_table
[idx
], s_list
) {
2512 /* Only expose IPv4 entries to old interface */
2513 if (svc
->af
!= AF_INET
|| !net_eq(svc
->net
, net
))
2516 if (count
>= get
->num_services
)
2518 memset(&entry
, 0, sizeof(entry
));
2519 ip_vs_copy_service(&entry
, svc
);
2520 if (copy_to_user(&uptr
->entrytable
[count
],
2521 &entry
, sizeof(entry
))) {
2529 for (idx
= 0; idx
< IP_VS_SVC_TAB_SIZE
; idx
++) {
2530 list_for_each_entry(svc
, &ip_vs_svc_fwm_table
[idx
], f_list
) {
2531 /* Only expose IPv4 entries to old interface */
2532 if (svc
->af
!= AF_INET
|| !net_eq(svc
->net
, net
))
2535 if (count
>= get
->num_services
)
2537 memset(&entry
, 0, sizeof(entry
));
2538 ip_vs_copy_service(&entry
, svc
);
2539 if (copy_to_user(&uptr
->entrytable
[count
],
2540 &entry
, sizeof(entry
))) {
2552 __ip_vs_get_dest_entries(struct net
*net
, const struct ip_vs_get_dests
*get
,
2553 struct ip_vs_get_dests __user
*uptr
)
2555 struct ip_vs_service
*svc
;
2556 union nf_inet_addr addr
= { .ip
= get
->addr
};
2560 svc
= __ip_vs_svc_fwm_find(net
, AF_INET
, get
->fwmark
);
2562 svc
= __ip_vs_service_find(net
, AF_INET
, get
->protocol
, &addr
,
2567 struct ip_vs_dest
*dest
;
2568 struct ip_vs_dest_entry entry
;
2570 list_for_each_entry(dest
, &svc
->destinations
, n_list
) {
2571 if (count
>= get
->num_dests
)
2574 entry
.addr
= dest
->addr
.ip
;
2575 entry
.port
= dest
->port
;
2576 entry
.conn_flags
= atomic_read(&dest
->conn_flags
);
2577 entry
.weight
= atomic_read(&dest
->weight
);
2578 entry
.u_threshold
= dest
->u_threshold
;
2579 entry
.l_threshold
= dest
->l_threshold
;
2580 entry
.activeconns
= atomic_read(&dest
->activeconns
);
2581 entry
.inactconns
= atomic_read(&dest
->inactconns
);
2582 entry
.persistconns
= atomic_read(&dest
->persistconns
);
2583 ip_vs_copy_stats(&entry
.stats
, &dest
->stats
);
2584 if (copy_to_user(&uptr
->entrytable
[count
],
2585 &entry
, sizeof(entry
))) {
2597 __ip_vs_get_timeouts(struct net
*net
, struct ip_vs_timeout_user
*u
)
2599 #if defined(CONFIG_IP_VS_PROTO_TCP) || defined(CONFIG_IP_VS_PROTO_UDP)
2600 struct ip_vs_proto_data
*pd
;
2603 memset(u
, 0, sizeof (*u
));
2605 #ifdef CONFIG_IP_VS_PROTO_TCP
2606 pd
= ip_vs_proto_data_get(net
, IPPROTO_TCP
);
2607 u
->tcp_timeout
= pd
->timeout_table
[IP_VS_TCP_S_ESTABLISHED
] / HZ
;
2608 u
->tcp_fin_timeout
= pd
->timeout_table
[IP_VS_TCP_S_FIN_WAIT
] / HZ
;
2610 #ifdef CONFIG_IP_VS_PROTO_UDP
2611 pd
= ip_vs_proto_data_get(net
, IPPROTO_UDP
);
2613 pd
->timeout_table
[IP_VS_UDP_S_NORMAL
] / HZ
;
2618 #define GET_CMDID(cmd) (cmd - IP_VS_BASE_CTL)
2619 #define GET_INFO_ARG_LEN (sizeof(struct ip_vs_getinfo))
2620 #define GET_SERVICES_ARG_LEN (sizeof(struct ip_vs_get_services))
2621 #define GET_SERVICE_ARG_LEN (sizeof(struct ip_vs_service_entry))
2622 #define GET_DESTS_ARG_LEN (sizeof(struct ip_vs_get_dests))
2623 #define GET_TIMEOUT_ARG_LEN (sizeof(struct ip_vs_timeout_user))
2624 #define GET_DAEMON_ARG_LEN (sizeof(struct ip_vs_daemon_user) * 2)
2626 static const unsigned char get_arglen
[GET_CMDID(IP_VS_SO_GET_MAX
)+1] = {
2627 [GET_CMDID(IP_VS_SO_GET_VERSION
)] = 64,
2628 [GET_CMDID(IP_VS_SO_GET_INFO
)] = GET_INFO_ARG_LEN
,
2629 [GET_CMDID(IP_VS_SO_GET_SERVICES
)] = GET_SERVICES_ARG_LEN
,
2630 [GET_CMDID(IP_VS_SO_GET_SERVICE
)] = GET_SERVICE_ARG_LEN
,
2631 [GET_CMDID(IP_VS_SO_GET_DESTS
)] = GET_DESTS_ARG_LEN
,
2632 [GET_CMDID(IP_VS_SO_GET_TIMEOUT
)] = GET_TIMEOUT_ARG_LEN
,
2633 [GET_CMDID(IP_VS_SO_GET_DAEMON
)] = GET_DAEMON_ARG_LEN
,
2637 do_ip_vs_get_ctl(struct sock
*sk
, int cmd
, void __user
*user
, int *len
)
2639 unsigned char arg
[128];
2641 unsigned int copylen
;
2642 struct net
*net
= sock_net(sk
);
2643 struct netns_ipvs
*ipvs
= net_ipvs(net
);
2646 if (!ns_capable(sock_net(sk
)->user_ns
, CAP_NET_ADMIN
))
2649 if (cmd
< IP_VS_BASE_CTL
|| cmd
> IP_VS_SO_GET_MAX
)
2652 if (*len
< get_arglen
[GET_CMDID(cmd
)]) {
2653 pr_err("get_ctl: len %u < %u\n",
2654 *len
, get_arglen
[GET_CMDID(cmd
)]);
2658 copylen
= get_arglen
[GET_CMDID(cmd
)];
2662 if (copy_from_user(arg
, user
, copylen
) != 0)
2665 * Handle daemons first since it has its own locking
2667 if (cmd
== IP_VS_SO_GET_DAEMON
) {
2668 struct ip_vs_daemon_user d
[2];
2670 memset(&d
, 0, sizeof(d
));
2671 if (mutex_lock_interruptible(&ipvs
->sync_mutex
))
2672 return -ERESTARTSYS
;
2674 if (ipvs
->sync_state
& IP_VS_STATE_MASTER
) {
2675 d
[0].state
= IP_VS_STATE_MASTER
;
2676 strlcpy(d
[0].mcast_ifn
, ipvs
->master_mcast_ifn
,
2677 sizeof(d
[0].mcast_ifn
));
2678 d
[0].syncid
= ipvs
->master_syncid
;
2680 if (ipvs
->sync_state
& IP_VS_STATE_BACKUP
) {
2681 d
[1].state
= IP_VS_STATE_BACKUP
;
2682 strlcpy(d
[1].mcast_ifn
, ipvs
->backup_mcast_ifn
,
2683 sizeof(d
[1].mcast_ifn
));
2684 d
[1].syncid
= ipvs
->backup_syncid
;
2686 if (copy_to_user(user
, &d
, sizeof(d
)) != 0)
2688 mutex_unlock(&ipvs
->sync_mutex
);
2692 if (mutex_lock_interruptible(&__ip_vs_mutex
))
2693 return -ERESTARTSYS
;
2696 case IP_VS_SO_GET_VERSION
:
2700 sprintf(buf
, "IP Virtual Server version %d.%d.%d (size=%d)",
2701 NVERSION(IP_VS_VERSION_CODE
), ip_vs_conn_tab_size
);
2702 if (copy_to_user(user
, buf
, strlen(buf
)+1) != 0) {
2706 *len
= strlen(buf
)+1;
2710 case IP_VS_SO_GET_INFO
:
2712 struct ip_vs_getinfo info
;
2713 info
.version
= IP_VS_VERSION_CODE
;
2714 info
.size
= ip_vs_conn_tab_size
;
2715 info
.num_services
= ipvs
->num_services
;
2716 if (copy_to_user(user
, &info
, sizeof(info
)) != 0)
2721 case IP_VS_SO_GET_SERVICES
:
2723 struct ip_vs_get_services
*get
;
2726 get
= (struct ip_vs_get_services
*)arg
;
2727 size
= sizeof(*get
) +
2728 sizeof(struct ip_vs_service_entry
) * get
->num_services
;
2730 pr_err("length: %u != %u\n", *len
, size
);
2734 ret
= __ip_vs_get_service_entries(net
, get
, user
);
2738 case IP_VS_SO_GET_SERVICE
:
2740 struct ip_vs_service_entry
*entry
;
2741 struct ip_vs_service
*svc
;
2742 union nf_inet_addr addr
;
2744 entry
= (struct ip_vs_service_entry
*)arg
;
2745 addr
.ip
= entry
->addr
;
2747 svc
= __ip_vs_svc_fwm_find(net
, AF_INET
, entry
->fwmark
);
2749 svc
= __ip_vs_service_find(net
, AF_INET
,
2750 entry
->protocol
, &addr
,
2753 ip_vs_copy_service(entry
, svc
);
2754 if (copy_to_user(user
, entry
, sizeof(*entry
)) != 0)
2761 case IP_VS_SO_GET_DESTS
:
2763 struct ip_vs_get_dests
*get
;
2766 get
= (struct ip_vs_get_dests
*)arg
;
2767 size
= sizeof(*get
) +
2768 sizeof(struct ip_vs_dest_entry
) * get
->num_dests
;
2770 pr_err("length: %u != %u\n", *len
, size
);
2774 ret
= __ip_vs_get_dest_entries(net
, get
, user
);
2778 case IP_VS_SO_GET_TIMEOUT
:
2780 struct ip_vs_timeout_user t
;
2782 __ip_vs_get_timeouts(net
, &t
);
2783 if (copy_to_user(user
, &t
, sizeof(t
)) != 0)
2793 mutex_unlock(&__ip_vs_mutex
);
2798 static struct nf_sockopt_ops ip_vs_sockopts
= {
2800 .set_optmin
= IP_VS_BASE_CTL
,
2801 .set_optmax
= IP_VS_SO_SET_MAX
+1,
2802 .set
= do_ip_vs_set_ctl
,
2803 .get_optmin
= IP_VS_BASE_CTL
,
2804 .get_optmax
= IP_VS_SO_GET_MAX
+1,
2805 .get
= do_ip_vs_get_ctl
,
2806 .owner
= THIS_MODULE
,
2810 * Generic Netlink interface
2813 /* IPVS genetlink family */
2814 static struct genl_family ip_vs_genl_family
= {
2815 .id
= GENL_ID_GENERATE
,
2817 .name
= IPVS_GENL_NAME
,
2818 .version
= IPVS_GENL_VERSION
,
2819 .maxattr
= IPVS_CMD_MAX
,
2820 .netnsok
= true, /* Make ipvsadm to work on netns */
2823 /* Policy used for first-level command attributes */
2824 static const struct nla_policy ip_vs_cmd_policy
[IPVS_CMD_ATTR_MAX
+ 1] = {
2825 [IPVS_CMD_ATTR_SERVICE
] = { .type
= NLA_NESTED
},
2826 [IPVS_CMD_ATTR_DEST
] = { .type
= NLA_NESTED
},
2827 [IPVS_CMD_ATTR_DAEMON
] = { .type
= NLA_NESTED
},
2828 [IPVS_CMD_ATTR_TIMEOUT_TCP
] = { .type
= NLA_U32
},
2829 [IPVS_CMD_ATTR_TIMEOUT_TCP_FIN
] = { .type
= NLA_U32
},
2830 [IPVS_CMD_ATTR_TIMEOUT_UDP
] = { .type
= NLA_U32
},
2833 /* Policy used for attributes in nested attribute IPVS_CMD_ATTR_DAEMON */
2834 static const struct nla_policy ip_vs_daemon_policy
[IPVS_DAEMON_ATTR_MAX
+ 1] = {
2835 [IPVS_DAEMON_ATTR_STATE
] = { .type
= NLA_U32
},
2836 [IPVS_DAEMON_ATTR_MCAST_IFN
] = { .type
= NLA_NUL_STRING
,
2837 .len
= IP_VS_IFNAME_MAXLEN
},
2838 [IPVS_DAEMON_ATTR_SYNC_ID
] = { .type
= NLA_U32
},
2841 /* Policy used for attributes in nested attribute IPVS_CMD_ATTR_SERVICE */
2842 static const struct nla_policy ip_vs_svc_policy
[IPVS_SVC_ATTR_MAX
+ 1] = {
2843 [IPVS_SVC_ATTR_AF
] = { .type
= NLA_U16
},
2844 [IPVS_SVC_ATTR_PROTOCOL
] = { .type
= NLA_U16
},
2845 [IPVS_SVC_ATTR_ADDR
] = { .type
= NLA_BINARY
,
2846 .len
= sizeof(union nf_inet_addr
) },
2847 [IPVS_SVC_ATTR_PORT
] = { .type
= NLA_U16
},
2848 [IPVS_SVC_ATTR_FWMARK
] = { .type
= NLA_U32
},
2849 [IPVS_SVC_ATTR_SCHED_NAME
] = { .type
= NLA_NUL_STRING
,
2850 .len
= IP_VS_SCHEDNAME_MAXLEN
},
2851 [IPVS_SVC_ATTR_PE_NAME
] = { .type
= NLA_NUL_STRING
,
2852 .len
= IP_VS_PENAME_MAXLEN
},
2853 [IPVS_SVC_ATTR_FLAGS
] = { .type
= NLA_BINARY
,
2854 .len
= sizeof(struct ip_vs_flags
) },
2855 [IPVS_SVC_ATTR_TIMEOUT
] = { .type
= NLA_U32
},
2856 [IPVS_SVC_ATTR_NETMASK
] = { .type
= NLA_U32
},
2857 [IPVS_SVC_ATTR_STATS
] = { .type
= NLA_NESTED
},
2860 /* Policy used for attributes in nested attribute IPVS_CMD_ATTR_DEST */
2861 static const struct nla_policy ip_vs_dest_policy
[IPVS_DEST_ATTR_MAX
+ 1] = {
2862 [IPVS_DEST_ATTR_ADDR
] = { .type
= NLA_BINARY
,
2863 .len
= sizeof(union nf_inet_addr
) },
2864 [IPVS_DEST_ATTR_PORT
] = { .type
= NLA_U16
},
2865 [IPVS_DEST_ATTR_FWD_METHOD
] = { .type
= NLA_U32
},
2866 [IPVS_DEST_ATTR_WEIGHT
] = { .type
= NLA_U32
},
2867 [IPVS_DEST_ATTR_U_THRESH
] = { .type
= NLA_U32
},
2868 [IPVS_DEST_ATTR_L_THRESH
] = { .type
= NLA_U32
},
2869 [IPVS_DEST_ATTR_ACTIVE_CONNS
] = { .type
= NLA_U32
},
2870 [IPVS_DEST_ATTR_INACT_CONNS
] = { .type
= NLA_U32
},
2871 [IPVS_DEST_ATTR_PERSIST_CONNS
] = { .type
= NLA_U32
},
2872 [IPVS_DEST_ATTR_STATS
] = { .type
= NLA_NESTED
},
2875 static int ip_vs_genl_fill_stats(struct sk_buff
*skb
, int container_type
,
2876 struct ip_vs_stats
*stats
)
2878 struct ip_vs_stats_user ustats
;
2879 struct nlattr
*nl_stats
= nla_nest_start(skb
, container_type
);
2883 ip_vs_copy_stats(&ustats
, stats
);
2885 if (nla_put_u32(skb
, IPVS_STATS_ATTR_CONNS
, ustats
.conns
) ||
2886 nla_put_u32(skb
, IPVS_STATS_ATTR_INPKTS
, ustats
.inpkts
) ||
2887 nla_put_u32(skb
, IPVS_STATS_ATTR_OUTPKTS
, ustats
.outpkts
) ||
2888 nla_put_u64(skb
, IPVS_STATS_ATTR_INBYTES
, ustats
.inbytes
) ||
2889 nla_put_u64(skb
, IPVS_STATS_ATTR_OUTBYTES
, ustats
.outbytes
) ||
2890 nla_put_u32(skb
, IPVS_STATS_ATTR_CPS
, ustats
.cps
) ||
2891 nla_put_u32(skb
, IPVS_STATS_ATTR_INPPS
, ustats
.inpps
) ||
2892 nla_put_u32(skb
, IPVS_STATS_ATTR_OUTPPS
, ustats
.outpps
) ||
2893 nla_put_u32(skb
, IPVS_STATS_ATTR_INBPS
, ustats
.inbps
) ||
2894 nla_put_u32(skb
, IPVS_STATS_ATTR_OUTBPS
, ustats
.outbps
))
2895 goto nla_put_failure
;
2896 nla_nest_end(skb
, nl_stats
);
2901 nla_nest_cancel(skb
, nl_stats
);
2905 static int ip_vs_genl_fill_service(struct sk_buff
*skb
,
2906 struct ip_vs_service
*svc
)
2908 struct nlattr
*nl_service
;
2909 struct ip_vs_flags flags
= { .flags
= svc
->flags
,
2912 nl_service
= nla_nest_start(skb
, IPVS_CMD_ATTR_SERVICE
);
2916 if (nla_put_u16(skb
, IPVS_SVC_ATTR_AF
, svc
->af
))
2917 goto nla_put_failure
;
2919 if (nla_put_u32(skb
, IPVS_SVC_ATTR_FWMARK
, svc
->fwmark
))
2920 goto nla_put_failure
;
2922 if (nla_put_u16(skb
, IPVS_SVC_ATTR_PROTOCOL
, svc
->protocol
) ||
2923 nla_put(skb
, IPVS_SVC_ATTR_ADDR
, sizeof(svc
->addr
), &svc
->addr
) ||
2924 nla_put_u16(skb
, IPVS_SVC_ATTR_PORT
, svc
->port
))
2925 goto nla_put_failure
;
2928 if (nla_put_string(skb
, IPVS_SVC_ATTR_SCHED_NAME
, svc
->scheduler
->name
) ||
2930 nla_put_string(skb
, IPVS_SVC_ATTR_PE_NAME
, svc
->pe
->name
)) ||
2931 nla_put(skb
, IPVS_SVC_ATTR_FLAGS
, sizeof(flags
), &flags
) ||
2932 nla_put_u32(skb
, IPVS_SVC_ATTR_TIMEOUT
, svc
->timeout
/ HZ
) ||
2933 nla_put_u32(skb
, IPVS_SVC_ATTR_NETMASK
, svc
->netmask
))
2934 goto nla_put_failure
;
2935 if (ip_vs_genl_fill_stats(skb
, IPVS_SVC_ATTR_STATS
, &svc
->stats
))
2936 goto nla_put_failure
;
2938 nla_nest_end(skb
, nl_service
);
2943 nla_nest_cancel(skb
, nl_service
);
2947 static int ip_vs_genl_dump_service(struct sk_buff
*skb
,
2948 struct ip_vs_service
*svc
,
2949 struct netlink_callback
*cb
)
2953 hdr
= genlmsg_put(skb
, NETLINK_CB(cb
->skb
).portid
, cb
->nlh
->nlmsg_seq
,
2954 &ip_vs_genl_family
, NLM_F_MULTI
,
2955 IPVS_CMD_NEW_SERVICE
);
2959 if (ip_vs_genl_fill_service(skb
, svc
) < 0)
2960 goto nla_put_failure
;
2962 return genlmsg_end(skb
, hdr
);
2965 genlmsg_cancel(skb
, hdr
);
2969 static int ip_vs_genl_dump_services(struct sk_buff
*skb
,
2970 struct netlink_callback
*cb
)
2973 int start
= cb
->args
[0];
2974 struct ip_vs_service
*svc
;
2975 struct net
*net
= skb_sknet(skb
);
2977 mutex_lock(&__ip_vs_mutex
);
2978 for (i
= 0; i
< IP_VS_SVC_TAB_SIZE
; i
++) {
2979 list_for_each_entry(svc
, &ip_vs_svc_table
[i
], s_list
) {
2980 if (++idx
<= start
|| !net_eq(svc
->net
, net
))
2982 if (ip_vs_genl_dump_service(skb
, svc
, cb
) < 0) {
2984 goto nla_put_failure
;
2989 for (i
= 0; i
< IP_VS_SVC_TAB_SIZE
; i
++) {
2990 list_for_each_entry(svc
, &ip_vs_svc_fwm_table
[i
], f_list
) {
2991 if (++idx
<= start
|| !net_eq(svc
->net
, net
))
2993 if (ip_vs_genl_dump_service(skb
, svc
, cb
) < 0) {
2995 goto nla_put_failure
;
3001 mutex_unlock(&__ip_vs_mutex
);
3007 static int ip_vs_genl_parse_service(struct net
*net
,
3008 struct ip_vs_service_user_kern
*usvc
,
3009 struct nlattr
*nla
, int full_entry
,
3010 struct ip_vs_service
**ret_svc
)
3012 struct nlattr
*attrs
[IPVS_SVC_ATTR_MAX
+ 1];
3013 struct nlattr
*nla_af
, *nla_port
, *nla_fwmark
, *nla_protocol
, *nla_addr
;
3014 struct ip_vs_service
*svc
;
3016 /* Parse mandatory identifying service fields first */
3018 nla_parse_nested(attrs
, IPVS_SVC_ATTR_MAX
, nla
, ip_vs_svc_policy
))
3021 nla_af
= attrs
[IPVS_SVC_ATTR_AF
];
3022 nla_protocol
= attrs
[IPVS_SVC_ATTR_PROTOCOL
];
3023 nla_addr
= attrs
[IPVS_SVC_ATTR_ADDR
];
3024 nla_port
= attrs
[IPVS_SVC_ATTR_PORT
];
3025 nla_fwmark
= attrs
[IPVS_SVC_ATTR_FWMARK
];
3027 if (!(nla_af
&& (nla_fwmark
|| (nla_port
&& nla_protocol
&& nla_addr
))))
3030 memset(usvc
, 0, sizeof(*usvc
));
3032 usvc
->af
= nla_get_u16(nla_af
);
3033 #ifdef CONFIG_IP_VS_IPV6
3034 if (usvc
->af
!= AF_INET
&& usvc
->af
!= AF_INET6
)
3036 if (usvc
->af
!= AF_INET
)
3038 return -EAFNOSUPPORT
;
3041 usvc
->protocol
= IPPROTO_TCP
;
3042 usvc
->fwmark
= nla_get_u32(nla_fwmark
);
3044 usvc
->protocol
= nla_get_u16(nla_protocol
);
3045 nla_memcpy(&usvc
->addr
, nla_addr
, sizeof(usvc
->addr
));
3046 usvc
->port
= nla_get_u16(nla_port
);
3051 svc
= __ip_vs_svc_fwm_find(net
, usvc
->af
, usvc
->fwmark
);
3053 svc
= __ip_vs_service_find(net
, usvc
->af
, usvc
->protocol
,
3054 &usvc
->addr
, usvc
->port
);
3057 /* If a full entry was requested, check for the additional fields */
3059 struct nlattr
*nla_sched
, *nla_flags
, *nla_pe
, *nla_timeout
,
3061 struct ip_vs_flags flags
;
3063 nla_sched
= attrs
[IPVS_SVC_ATTR_SCHED_NAME
];
3064 nla_pe
= attrs
[IPVS_SVC_ATTR_PE_NAME
];
3065 nla_flags
= attrs
[IPVS_SVC_ATTR_FLAGS
];
3066 nla_timeout
= attrs
[IPVS_SVC_ATTR_TIMEOUT
];
3067 nla_netmask
= attrs
[IPVS_SVC_ATTR_NETMASK
];
3069 if (!(nla_sched
&& nla_flags
&& nla_timeout
&& nla_netmask
))
3072 nla_memcpy(&flags
, nla_flags
, sizeof(flags
));
3074 /* prefill flags from service if it already exists */
3076 usvc
->flags
= svc
->flags
;
3078 /* set new flags from userland */
3079 usvc
->flags
= (usvc
->flags
& ~flags
.mask
) |
3080 (flags
.flags
& flags
.mask
);
3081 usvc
->sched_name
= nla_data(nla_sched
);
3082 usvc
->pe_name
= nla_pe
? nla_data(nla_pe
) : NULL
;
3083 usvc
->timeout
= nla_get_u32(nla_timeout
);
3084 usvc
->netmask
= nla_get_u32(nla_netmask
);
3090 static struct ip_vs_service
*ip_vs_genl_find_service(struct net
*net
,
3093 struct ip_vs_service_user_kern usvc
;
3094 struct ip_vs_service
*svc
;
3097 ret
= ip_vs_genl_parse_service(net
, &usvc
, nla
, 0, &svc
);
3098 return ret
? ERR_PTR(ret
) : svc
;
3101 static int ip_vs_genl_fill_dest(struct sk_buff
*skb
, struct ip_vs_dest
*dest
)
3103 struct nlattr
*nl_dest
;
3105 nl_dest
= nla_nest_start(skb
, IPVS_CMD_ATTR_DEST
);
3109 if (nla_put(skb
, IPVS_DEST_ATTR_ADDR
, sizeof(dest
->addr
), &dest
->addr
) ||
3110 nla_put_u16(skb
, IPVS_DEST_ATTR_PORT
, dest
->port
) ||
3111 nla_put_u32(skb
, IPVS_DEST_ATTR_FWD_METHOD
,
3112 (atomic_read(&dest
->conn_flags
) &
3113 IP_VS_CONN_F_FWD_MASK
)) ||
3114 nla_put_u32(skb
, IPVS_DEST_ATTR_WEIGHT
,
3115 atomic_read(&dest
->weight
)) ||
3116 nla_put_u32(skb
, IPVS_DEST_ATTR_U_THRESH
, dest
->u_threshold
) ||
3117 nla_put_u32(skb
, IPVS_DEST_ATTR_L_THRESH
, dest
->l_threshold
) ||
3118 nla_put_u32(skb
, IPVS_DEST_ATTR_ACTIVE_CONNS
,
3119 atomic_read(&dest
->activeconns
)) ||
3120 nla_put_u32(skb
, IPVS_DEST_ATTR_INACT_CONNS
,
3121 atomic_read(&dest
->inactconns
)) ||
3122 nla_put_u32(skb
, IPVS_DEST_ATTR_PERSIST_CONNS
,
3123 atomic_read(&dest
->persistconns
)))
3124 goto nla_put_failure
;
3125 if (ip_vs_genl_fill_stats(skb
, IPVS_DEST_ATTR_STATS
, &dest
->stats
))
3126 goto nla_put_failure
;
3128 nla_nest_end(skb
, nl_dest
);
3133 nla_nest_cancel(skb
, nl_dest
);
3137 static int ip_vs_genl_dump_dest(struct sk_buff
*skb
, struct ip_vs_dest
*dest
,
3138 struct netlink_callback
*cb
)
3142 hdr
= genlmsg_put(skb
, NETLINK_CB(cb
->skb
).portid
, cb
->nlh
->nlmsg_seq
,
3143 &ip_vs_genl_family
, NLM_F_MULTI
,
3148 if (ip_vs_genl_fill_dest(skb
, dest
) < 0)
3149 goto nla_put_failure
;
3151 return genlmsg_end(skb
, hdr
);
3154 genlmsg_cancel(skb
, hdr
);
3158 static int ip_vs_genl_dump_dests(struct sk_buff
*skb
,
3159 struct netlink_callback
*cb
)
3162 int start
= cb
->args
[0];
3163 struct ip_vs_service
*svc
;
3164 struct ip_vs_dest
*dest
;
3165 struct nlattr
*attrs
[IPVS_CMD_ATTR_MAX
+ 1];
3166 struct net
*net
= skb_sknet(skb
);
3168 mutex_lock(&__ip_vs_mutex
);
3170 /* Try to find the service for which to dump destinations */
3171 if (nlmsg_parse(cb
->nlh
, GENL_HDRLEN
, attrs
,
3172 IPVS_CMD_ATTR_MAX
, ip_vs_cmd_policy
))
3176 svc
= ip_vs_genl_find_service(net
, attrs
[IPVS_CMD_ATTR_SERVICE
]);
3177 if (IS_ERR(svc
) || svc
== NULL
)
3180 /* Dump the destinations */
3181 list_for_each_entry(dest
, &svc
->destinations
, n_list
) {
3184 if (ip_vs_genl_dump_dest(skb
, dest
, cb
) < 0) {
3186 goto nla_put_failure
;
3194 mutex_unlock(&__ip_vs_mutex
);
3199 static int ip_vs_genl_parse_dest(struct ip_vs_dest_user_kern
*udest
,
3200 struct nlattr
*nla
, int full_entry
)
3202 struct nlattr
*attrs
[IPVS_DEST_ATTR_MAX
+ 1];
3203 struct nlattr
*nla_addr
, *nla_port
;
3205 /* Parse mandatory identifying destination fields first */
3207 nla_parse_nested(attrs
, IPVS_DEST_ATTR_MAX
, nla
, ip_vs_dest_policy
))
3210 nla_addr
= attrs
[IPVS_DEST_ATTR_ADDR
];
3211 nla_port
= attrs
[IPVS_DEST_ATTR_PORT
];
3213 if (!(nla_addr
&& nla_port
))
3216 memset(udest
, 0, sizeof(*udest
));
3218 nla_memcpy(&udest
->addr
, nla_addr
, sizeof(udest
->addr
));
3219 udest
->port
= nla_get_u16(nla_port
);
3221 /* If a full entry was requested, check for the additional fields */
3223 struct nlattr
*nla_fwd
, *nla_weight
, *nla_u_thresh
,
3226 nla_fwd
= attrs
[IPVS_DEST_ATTR_FWD_METHOD
];
3227 nla_weight
= attrs
[IPVS_DEST_ATTR_WEIGHT
];
3228 nla_u_thresh
= attrs
[IPVS_DEST_ATTR_U_THRESH
];
3229 nla_l_thresh
= attrs
[IPVS_DEST_ATTR_L_THRESH
];
3231 if (!(nla_fwd
&& nla_weight
&& nla_u_thresh
&& nla_l_thresh
))
3234 udest
->conn_flags
= nla_get_u32(nla_fwd
)
3235 & IP_VS_CONN_F_FWD_MASK
;
3236 udest
->weight
= nla_get_u32(nla_weight
);
3237 udest
->u_threshold
= nla_get_u32(nla_u_thresh
);
3238 udest
->l_threshold
= nla_get_u32(nla_l_thresh
);
3244 static int ip_vs_genl_fill_daemon(struct sk_buff
*skb
, __be32 state
,
3245 const char *mcast_ifn
, __be32 syncid
)
3247 struct nlattr
*nl_daemon
;
3249 nl_daemon
= nla_nest_start(skb
, IPVS_CMD_ATTR_DAEMON
);
3253 if (nla_put_u32(skb
, IPVS_DAEMON_ATTR_STATE
, state
) ||
3254 nla_put_string(skb
, IPVS_DAEMON_ATTR_MCAST_IFN
, mcast_ifn
) ||
3255 nla_put_u32(skb
, IPVS_DAEMON_ATTR_SYNC_ID
, syncid
))
3256 goto nla_put_failure
;
3257 nla_nest_end(skb
, nl_daemon
);
3262 nla_nest_cancel(skb
, nl_daemon
);
3266 static int ip_vs_genl_dump_daemon(struct sk_buff
*skb
, __be32 state
,
3267 const char *mcast_ifn
, __be32 syncid
,
3268 struct netlink_callback
*cb
)
3271 hdr
= genlmsg_put(skb
, NETLINK_CB(cb
->skb
).portid
, cb
->nlh
->nlmsg_seq
,
3272 &ip_vs_genl_family
, NLM_F_MULTI
,
3273 IPVS_CMD_NEW_DAEMON
);
3277 if (ip_vs_genl_fill_daemon(skb
, state
, mcast_ifn
, syncid
))
3278 goto nla_put_failure
;
3280 return genlmsg_end(skb
, hdr
);
3283 genlmsg_cancel(skb
, hdr
);
3287 static int ip_vs_genl_dump_daemons(struct sk_buff
*skb
,
3288 struct netlink_callback
*cb
)
3290 struct net
*net
= skb_sknet(skb
);
3291 struct netns_ipvs
*ipvs
= net_ipvs(net
);
3293 mutex_lock(&ipvs
->sync_mutex
);
3294 if ((ipvs
->sync_state
& IP_VS_STATE_MASTER
) && !cb
->args
[0]) {
3295 if (ip_vs_genl_dump_daemon(skb
, IP_VS_STATE_MASTER
,
3296 ipvs
->master_mcast_ifn
,
3297 ipvs
->master_syncid
, cb
) < 0)
3298 goto nla_put_failure
;
3303 if ((ipvs
->sync_state
& IP_VS_STATE_BACKUP
) && !cb
->args
[1]) {
3304 if (ip_vs_genl_dump_daemon(skb
, IP_VS_STATE_BACKUP
,
3305 ipvs
->backup_mcast_ifn
,
3306 ipvs
->backup_syncid
, cb
) < 0)
3307 goto nla_put_failure
;
3313 mutex_unlock(&ipvs
->sync_mutex
);
3318 static int ip_vs_genl_new_daemon(struct net
*net
, struct nlattr
**attrs
)
3320 if (!(attrs
[IPVS_DAEMON_ATTR_STATE
] &&
3321 attrs
[IPVS_DAEMON_ATTR_MCAST_IFN
] &&
3322 attrs
[IPVS_DAEMON_ATTR_SYNC_ID
]))
3325 return start_sync_thread(net
,
3326 nla_get_u32(attrs
[IPVS_DAEMON_ATTR_STATE
]),
3327 nla_data(attrs
[IPVS_DAEMON_ATTR_MCAST_IFN
]),
3328 nla_get_u32(attrs
[IPVS_DAEMON_ATTR_SYNC_ID
]));
3331 static int ip_vs_genl_del_daemon(struct net
*net
, struct nlattr
**attrs
)
3333 if (!attrs
[IPVS_DAEMON_ATTR_STATE
])
3336 return stop_sync_thread(net
,
3337 nla_get_u32(attrs
[IPVS_DAEMON_ATTR_STATE
]));
3340 static int ip_vs_genl_set_config(struct net
*net
, struct nlattr
**attrs
)
3342 struct ip_vs_timeout_user t
;
3344 __ip_vs_get_timeouts(net
, &t
);
3346 if (attrs
[IPVS_CMD_ATTR_TIMEOUT_TCP
])
3347 t
.tcp_timeout
= nla_get_u32(attrs
[IPVS_CMD_ATTR_TIMEOUT_TCP
]);
3349 if (attrs
[IPVS_CMD_ATTR_TIMEOUT_TCP_FIN
])
3351 nla_get_u32(attrs
[IPVS_CMD_ATTR_TIMEOUT_TCP_FIN
]);
3353 if (attrs
[IPVS_CMD_ATTR_TIMEOUT_UDP
])
3354 t
.udp_timeout
= nla_get_u32(attrs
[IPVS_CMD_ATTR_TIMEOUT_UDP
]);
3356 return ip_vs_set_timeout(net
, &t
);
3359 static int ip_vs_genl_set_daemon(struct sk_buff
*skb
, struct genl_info
*info
)
3363 struct netns_ipvs
*ipvs
;
3365 net
= skb_sknet(skb
);
3366 ipvs
= net_ipvs(net
);
3367 cmd
= info
->genlhdr
->cmd
;
3369 if (cmd
== IPVS_CMD_NEW_DAEMON
|| cmd
== IPVS_CMD_DEL_DAEMON
) {
3370 struct nlattr
*daemon_attrs
[IPVS_DAEMON_ATTR_MAX
+ 1];
3372 mutex_lock(&ipvs
->sync_mutex
);
3373 if (!info
->attrs
[IPVS_CMD_ATTR_DAEMON
] ||
3374 nla_parse_nested(daemon_attrs
, IPVS_DAEMON_ATTR_MAX
,
3375 info
->attrs
[IPVS_CMD_ATTR_DAEMON
],
3376 ip_vs_daemon_policy
)) {
3381 if (cmd
== IPVS_CMD_NEW_DAEMON
)
3382 ret
= ip_vs_genl_new_daemon(net
, daemon_attrs
);
3384 ret
= ip_vs_genl_del_daemon(net
, daemon_attrs
);
3386 mutex_unlock(&ipvs
->sync_mutex
);
3391 static int ip_vs_genl_set_cmd(struct sk_buff
*skb
, struct genl_info
*info
)
3393 struct ip_vs_service
*svc
= NULL
;
3394 struct ip_vs_service_user_kern usvc
;
3395 struct ip_vs_dest_user_kern udest
;
3397 int need_full_svc
= 0, need_full_dest
= 0;
3400 net
= skb_sknet(skb
);
3401 cmd
= info
->genlhdr
->cmd
;
3403 mutex_lock(&__ip_vs_mutex
);
3405 if (cmd
== IPVS_CMD_FLUSH
) {
3406 ret
= ip_vs_flush(net
);
3408 } else if (cmd
== IPVS_CMD_SET_CONFIG
) {
3409 ret
= ip_vs_genl_set_config(net
, info
->attrs
);
3411 } else if (cmd
== IPVS_CMD_ZERO
&&
3412 !info
->attrs
[IPVS_CMD_ATTR_SERVICE
]) {
3413 ret
= ip_vs_zero_all(net
);
3417 /* All following commands require a service argument, so check if we
3418 * received a valid one. We need a full service specification when
3419 * adding / editing a service. Only identifying members otherwise. */
3420 if (cmd
== IPVS_CMD_NEW_SERVICE
|| cmd
== IPVS_CMD_SET_SERVICE
)
3423 ret
= ip_vs_genl_parse_service(net
, &usvc
,
3424 info
->attrs
[IPVS_CMD_ATTR_SERVICE
],
3425 need_full_svc
, &svc
);
3429 /* Unless we're adding a new service, the service must already exist */
3430 if ((cmd
!= IPVS_CMD_NEW_SERVICE
) && (svc
== NULL
)) {
3435 /* Destination commands require a valid destination argument. For
3436 * adding / editing a destination, we need a full destination
3438 if (cmd
== IPVS_CMD_NEW_DEST
|| cmd
== IPVS_CMD_SET_DEST
||
3439 cmd
== IPVS_CMD_DEL_DEST
) {
3440 if (cmd
!= IPVS_CMD_DEL_DEST
)
3443 ret
= ip_vs_genl_parse_dest(&udest
,
3444 info
->attrs
[IPVS_CMD_ATTR_DEST
],
3451 case IPVS_CMD_NEW_SERVICE
:
3453 ret
= ip_vs_add_service(net
, &usvc
, &svc
);
3457 case IPVS_CMD_SET_SERVICE
:
3458 ret
= ip_vs_edit_service(svc
, &usvc
);
3460 case IPVS_CMD_DEL_SERVICE
:
3461 ret
= ip_vs_del_service(svc
);
3462 /* do not use svc, it can be freed */
3464 case IPVS_CMD_NEW_DEST
:
3465 ret
= ip_vs_add_dest(svc
, &udest
);
3467 case IPVS_CMD_SET_DEST
:
3468 ret
= ip_vs_edit_dest(svc
, &udest
);
3470 case IPVS_CMD_DEL_DEST
:
3471 ret
= ip_vs_del_dest(svc
, &udest
);
3474 ret
= ip_vs_zero_service(svc
);
3481 mutex_unlock(&__ip_vs_mutex
);
3486 static int ip_vs_genl_get_cmd(struct sk_buff
*skb
, struct genl_info
*info
)
3488 struct sk_buff
*msg
;
3490 int ret
, cmd
, reply_cmd
;
3493 net
= skb_sknet(skb
);
3494 cmd
= info
->genlhdr
->cmd
;
3496 if (cmd
== IPVS_CMD_GET_SERVICE
)
3497 reply_cmd
= IPVS_CMD_NEW_SERVICE
;
3498 else if (cmd
== IPVS_CMD_GET_INFO
)
3499 reply_cmd
= IPVS_CMD_SET_INFO
;
3500 else if (cmd
== IPVS_CMD_GET_CONFIG
)
3501 reply_cmd
= IPVS_CMD_SET_CONFIG
;
3503 pr_err("unknown Generic Netlink command\n");
3507 msg
= nlmsg_new(NLMSG_DEFAULT_SIZE
, GFP_KERNEL
);
3511 mutex_lock(&__ip_vs_mutex
);
3513 reply
= genlmsg_put_reply(msg
, info
, &ip_vs_genl_family
, 0, reply_cmd
);
3515 goto nla_put_failure
;
3518 case IPVS_CMD_GET_SERVICE
:
3520 struct ip_vs_service
*svc
;
3522 svc
= ip_vs_genl_find_service(net
,
3523 info
->attrs
[IPVS_CMD_ATTR_SERVICE
]);
3528 ret
= ip_vs_genl_fill_service(msg
, svc
);
3530 goto nla_put_failure
;
3539 case IPVS_CMD_GET_CONFIG
:
3541 struct ip_vs_timeout_user t
;
3543 __ip_vs_get_timeouts(net
, &t
);
3544 #ifdef CONFIG_IP_VS_PROTO_TCP
3545 if (nla_put_u32(msg
, IPVS_CMD_ATTR_TIMEOUT_TCP
,
3547 nla_put_u32(msg
, IPVS_CMD_ATTR_TIMEOUT_TCP_FIN
,
3549 goto nla_put_failure
;
3551 #ifdef CONFIG_IP_VS_PROTO_UDP
3552 if (nla_put_u32(msg
, IPVS_CMD_ATTR_TIMEOUT_UDP
, t
.udp_timeout
))
3553 goto nla_put_failure
;
3559 case IPVS_CMD_GET_INFO
:
3560 if (nla_put_u32(msg
, IPVS_INFO_ATTR_VERSION
,
3561 IP_VS_VERSION_CODE
) ||
3562 nla_put_u32(msg
, IPVS_INFO_ATTR_CONN_TAB_SIZE
,
3563 ip_vs_conn_tab_size
))
3564 goto nla_put_failure
;
3568 genlmsg_end(msg
, reply
);
3569 ret
= genlmsg_reply(msg
, info
);
3573 pr_err("not enough space in Netlink message\n");
3579 mutex_unlock(&__ip_vs_mutex
);
3585 static struct genl_ops ip_vs_genl_ops
[] __read_mostly
= {
3587 .cmd
= IPVS_CMD_NEW_SERVICE
,
3588 .flags
= GENL_ADMIN_PERM
,
3589 .policy
= ip_vs_cmd_policy
,
3590 .doit
= ip_vs_genl_set_cmd
,
3593 .cmd
= IPVS_CMD_SET_SERVICE
,
3594 .flags
= GENL_ADMIN_PERM
,
3595 .policy
= ip_vs_cmd_policy
,
3596 .doit
= ip_vs_genl_set_cmd
,
3599 .cmd
= IPVS_CMD_DEL_SERVICE
,
3600 .flags
= GENL_ADMIN_PERM
,
3601 .policy
= ip_vs_cmd_policy
,
3602 .doit
= ip_vs_genl_set_cmd
,
3605 .cmd
= IPVS_CMD_GET_SERVICE
,
3606 .flags
= GENL_ADMIN_PERM
,
3607 .doit
= ip_vs_genl_get_cmd
,
3608 .dumpit
= ip_vs_genl_dump_services
,
3609 .policy
= ip_vs_cmd_policy
,
3612 .cmd
= IPVS_CMD_NEW_DEST
,
3613 .flags
= GENL_ADMIN_PERM
,
3614 .policy
= ip_vs_cmd_policy
,
3615 .doit
= ip_vs_genl_set_cmd
,
3618 .cmd
= IPVS_CMD_SET_DEST
,
3619 .flags
= GENL_ADMIN_PERM
,
3620 .policy
= ip_vs_cmd_policy
,
3621 .doit
= ip_vs_genl_set_cmd
,
3624 .cmd
= IPVS_CMD_DEL_DEST
,
3625 .flags
= GENL_ADMIN_PERM
,
3626 .policy
= ip_vs_cmd_policy
,
3627 .doit
= ip_vs_genl_set_cmd
,
3630 .cmd
= IPVS_CMD_GET_DEST
,
3631 .flags
= GENL_ADMIN_PERM
,
3632 .policy
= ip_vs_cmd_policy
,
3633 .dumpit
= ip_vs_genl_dump_dests
,
3636 .cmd
= IPVS_CMD_NEW_DAEMON
,
3637 .flags
= GENL_ADMIN_PERM
,
3638 .policy
= ip_vs_cmd_policy
,
3639 .doit
= ip_vs_genl_set_daemon
,
3642 .cmd
= IPVS_CMD_DEL_DAEMON
,
3643 .flags
= GENL_ADMIN_PERM
,
3644 .policy
= ip_vs_cmd_policy
,
3645 .doit
= ip_vs_genl_set_daemon
,
3648 .cmd
= IPVS_CMD_GET_DAEMON
,
3649 .flags
= GENL_ADMIN_PERM
,
3650 .dumpit
= ip_vs_genl_dump_daemons
,
3653 .cmd
= IPVS_CMD_SET_CONFIG
,
3654 .flags
= GENL_ADMIN_PERM
,
3655 .policy
= ip_vs_cmd_policy
,
3656 .doit
= ip_vs_genl_set_cmd
,
3659 .cmd
= IPVS_CMD_GET_CONFIG
,
3660 .flags
= GENL_ADMIN_PERM
,
3661 .doit
= ip_vs_genl_get_cmd
,
3664 .cmd
= IPVS_CMD_GET_INFO
,
3665 .flags
= GENL_ADMIN_PERM
,
3666 .doit
= ip_vs_genl_get_cmd
,
3669 .cmd
= IPVS_CMD_ZERO
,
3670 .flags
= GENL_ADMIN_PERM
,
3671 .policy
= ip_vs_cmd_policy
,
3672 .doit
= ip_vs_genl_set_cmd
,
3675 .cmd
= IPVS_CMD_FLUSH
,
3676 .flags
= GENL_ADMIN_PERM
,
3677 .doit
= ip_vs_genl_set_cmd
,
3681 static int __init
ip_vs_genl_register(void)
3683 return genl_register_family_with_ops(&ip_vs_genl_family
,
3684 ip_vs_genl_ops
, ARRAY_SIZE(ip_vs_genl_ops
));
3687 static void ip_vs_genl_unregister(void)
3689 genl_unregister_family(&ip_vs_genl_family
);
3692 /* End of Generic Netlink interface definitions */
3695 * per netns intit/exit func.
3697 #ifdef CONFIG_SYSCTL
3698 static int __net_init
ip_vs_control_net_init_sysctl(struct net
*net
)
3701 struct netns_ipvs
*ipvs
= net_ipvs(net
);
3702 struct ctl_table
*tbl
;
3704 atomic_set(&ipvs
->dropentry
, 0);
3705 spin_lock_init(&ipvs
->dropentry_lock
);
3706 spin_lock_init(&ipvs
->droppacket_lock
);
3707 spin_lock_init(&ipvs
->securetcp_lock
);
3709 if (!net_eq(net
, &init_net
)) {
3710 tbl
= kmemdup(vs_vars
, sizeof(vs_vars
), GFP_KERNEL
);
3714 /* Don't export sysctls to unprivileged users */
3715 if (net
->user_ns
!= &init_user_ns
)
3716 tbl
[0].procname
= NULL
;
3719 /* Initialize sysctl defaults */
3721 ipvs
->sysctl_amemthresh
= 1024;
3722 tbl
[idx
++].data
= &ipvs
->sysctl_amemthresh
;
3723 ipvs
->sysctl_am_droprate
= 10;
3724 tbl
[idx
++].data
= &ipvs
->sysctl_am_droprate
;
3725 tbl
[idx
++].data
= &ipvs
->sysctl_drop_entry
;
3726 tbl
[idx
++].data
= &ipvs
->sysctl_drop_packet
;
3727 #ifdef CONFIG_IP_VS_NFCT
3728 tbl
[idx
++].data
= &ipvs
->sysctl_conntrack
;
3730 tbl
[idx
++].data
= &ipvs
->sysctl_secure_tcp
;
3731 ipvs
->sysctl_snat_reroute
= 1;
3732 tbl
[idx
++].data
= &ipvs
->sysctl_snat_reroute
;
3733 ipvs
->sysctl_sync_ver
= 1;
3734 tbl
[idx
++].data
= &ipvs
->sysctl_sync_ver
;
3735 ipvs
->sysctl_sync_ports
= 1;
3736 tbl
[idx
++].data
= &ipvs
->sysctl_sync_ports
;
3737 ipvs
->sysctl_sync_qlen_max
= nr_free_buffer_pages() / 32;
3738 tbl
[idx
++].data
= &ipvs
->sysctl_sync_qlen_max
;
3739 ipvs
->sysctl_sync_sock_size
= 0;
3740 tbl
[idx
++].data
= &ipvs
->sysctl_sync_sock_size
;
3741 tbl
[idx
++].data
= &ipvs
->sysctl_cache_bypass
;
3742 tbl
[idx
++].data
= &ipvs
->sysctl_expire_nodest_conn
;
3743 tbl
[idx
++].data
= &ipvs
->sysctl_expire_quiescent_template
;
3744 ipvs
->sysctl_sync_threshold
[0] = DEFAULT_SYNC_THRESHOLD
;
3745 ipvs
->sysctl_sync_threshold
[1] = DEFAULT_SYNC_PERIOD
;
3746 tbl
[idx
].data
= &ipvs
->sysctl_sync_threshold
;
3747 tbl
[idx
++].maxlen
= sizeof(ipvs
->sysctl_sync_threshold
);
3748 ipvs
->sysctl_sync_refresh_period
= DEFAULT_SYNC_REFRESH_PERIOD
;
3749 tbl
[idx
++].data
= &ipvs
->sysctl_sync_refresh_period
;
3750 ipvs
->sysctl_sync_retries
= clamp_t(int, DEFAULT_SYNC_RETRIES
, 0, 3);
3751 tbl
[idx
++].data
= &ipvs
->sysctl_sync_retries
;
3752 tbl
[idx
++].data
= &ipvs
->sysctl_nat_icmp_send
;
3753 ipvs
->sysctl_pmtu_disc
= 1;
3754 tbl
[idx
++].data
= &ipvs
->sysctl_pmtu_disc
;
3755 tbl
[idx
++].data
= &ipvs
->sysctl_backup_only
;
3758 ipvs
->sysctl_hdr
= register_net_sysctl(net
, "net/ipv4/vs", tbl
);
3759 if (ipvs
->sysctl_hdr
== NULL
) {
3760 if (!net_eq(net
, &init_net
))
3764 ip_vs_start_estimator(net
, &ipvs
->tot_stats
);
3765 ipvs
->sysctl_tbl
= tbl
;
3766 /* Schedule defense work */
3767 INIT_DELAYED_WORK(&ipvs
->defense_work
, defense_work_handler
);
3768 schedule_delayed_work(&ipvs
->defense_work
, DEFENSE_TIMER_PERIOD
);
3773 static void __net_exit
ip_vs_control_net_cleanup_sysctl(struct net
*net
)
3775 struct netns_ipvs
*ipvs
= net_ipvs(net
);
3777 cancel_delayed_work_sync(&ipvs
->defense_work
);
3778 cancel_work_sync(&ipvs
->defense_work
.work
);
3779 unregister_net_sysctl_table(ipvs
->sysctl_hdr
);
3784 static int __net_init
ip_vs_control_net_init_sysctl(struct net
*net
) { return 0; }
3785 static void __net_exit
ip_vs_control_net_cleanup_sysctl(struct net
*net
) { }
3789 static struct notifier_block ip_vs_dst_notifier
= {
3790 .notifier_call
= ip_vs_dst_event
,
3793 int __net_init
ip_vs_control_net_init(struct net
*net
)
3796 struct netns_ipvs
*ipvs
= net_ipvs(net
);
3798 /* Initialize rs_table */
3799 for (idx
= 0; idx
< IP_VS_RTAB_SIZE
; idx
++)
3800 INIT_HLIST_HEAD(&ipvs
->rs_table
[idx
]);
3802 INIT_LIST_HEAD(&ipvs
->dest_trash
);
3803 atomic_set(&ipvs
->ftpsvc_counter
, 0);
3804 atomic_set(&ipvs
->nullsvc_counter
, 0);
3807 ipvs
->tot_stats
.cpustats
= alloc_percpu(struct ip_vs_cpu_stats
);
3808 if (!ipvs
->tot_stats
.cpustats
)
3811 spin_lock_init(&ipvs
->tot_stats
.lock
);
3813 proc_create("ip_vs", 0, net
->proc_net
, &ip_vs_info_fops
);
3814 proc_create("ip_vs_stats", 0, net
->proc_net
, &ip_vs_stats_fops
);
3815 proc_create("ip_vs_stats_percpu", 0, net
->proc_net
,
3816 &ip_vs_stats_percpu_fops
);
3818 if (ip_vs_control_net_init_sysctl(net
))
3824 free_percpu(ipvs
->tot_stats
.cpustats
);
3828 void __net_exit
ip_vs_control_net_cleanup(struct net
*net
)
3830 struct netns_ipvs
*ipvs
= net_ipvs(net
);
3832 ip_vs_trash_cleanup(net
);
3833 ip_vs_stop_estimator(net
, &ipvs
->tot_stats
);
3834 ip_vs_control_net_cleanup_sysctl(net
);
3835 remove_proc_entry("ip_vs_stats_percpu", net
->proc_net
);
3836 remove_proc_entry("ip_vs_stats", net
->proc_net
);
3837 remove_proc_entry("ip_vs", net
->proc_net
);
3838 free_percpu(ipvs
->tot_stats
.cpustats
);
3841 int __init
ip_vs_register_nl_ioctl(void)
3845 ret
= nf_register_sockopt(&ip_vs_sockopts
);
3847 pr_err("cannot register sockopt.\n");
3851 ret
= ip_vs_genl_register();
3853 pr_err("cannot register Generic Netlink interface.\n");
3859 nf_unregister_sockopt(&ip_vs_sockopts
);
3864 void ip_vs_unregister_nl_ioctl(void)
3866 ip_vs_genl_unregister();
3867 nf_unregister_sockopt(&ip_vs_sockopts
);
3870 int __init
ip_vs_control_init(void)
3877 /* Initialize svc_table, ip_vs_svc_fwm_table */
3878 for (idx
= 0; idx
< IP_VS_SVC_TAB_SIZE
; idx
++) {
3879 INIT_LIST_HEAD(&ip_vs_svc_table
[idx
]);
3880 INIT_LIST_HEAD(&ip_vs_svc_fwm_table
[idx
]);
3883 smp_wmb(); /* Do we really need it now ? */
3885 ret
= register_netdevice_notifier(&ip_vs_dst_notifier
);
3894 void ip_vs_control_cleanup(void)
3897 unregister_netdevice_notifier(&ip_vs_dst_notifier
);