1 /*
2  * IPVS         An implementation of the IP virtual server support for the
3  *              LINUX operating system.  IPVS is now implemented as a module
4  *              over the NetFilter framework. IPVS can be used to build a
5  *              high-performance and highly available server based on a
6  *              cluster of servers.
7  *
8  * Authors:     Wensong Zhang <wensong@linuxvirtualserver.org>
9  *              Peter Kese <peter.kese@ijs.si>
10  *              Julian Anastasov <ja@ssi.bg>
11  *
12  *              This program is free software; you can redistribute it and/or
13  *              modify it under the terms of the GNU General Public License
14  *              as published by the Free Software Foundation; either version
15  *              2 of the License, or (at your option) any later version.
16  *
17  * Changes:
18  *
19  */
20 
21 #define KMSG_COMPONENT "IPVS"
22 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
23 
24 #include <linux/module.h>
25 #include <linux/init.h>
26 #include <linux/types.h>
27 #include <linux/capability.h>
28 #include <linux/fs.h>
29 #include <linux/sysctl.h>
30 #include <linux/proc_fs.h>
31 #include <linux/workqueue.h>
32 #include <linux/swap.h>
33 #include <linux/seq_file.h>
34 #include <linux/slab.h>
35 
36 #include <linux/netfilter.h>
37 #include <linux/netfilter_ipv4.h>
38 #include <linux/mutex.h>
39 
40 #include <net/net_namespace.h>
41 #include <linux/nsproxy.h>
42 #include <net/ip.h>
43 #ifdef CONFIG_IP_VS_IPV6
44 #include <net/ipv6.h>
45 #include <net/ip6_route.h>
46 #endif
47 #include <net/route.h>
48 #include <net/sock.h>
49 #include <net/genetlink.h>
50 
51 #include <asm/uaccess.h>
52 
53 #include <net/ip_vs.h>
54 
55 /* semaphore for IPVS sockopts. And, [gs]etsockopt may sleep. */
56 static DEFINE_MUTEX(__ip_vs_mutex);
57 
58 /* lock for service table */
59 static DEFINE_RWLOCK(__ip_vs_svc_lock);
60 
61 /* sysctl variables */
62 
63 #ifdef CONFIG_IP_VS_DEBUG
64 static int sysctl_ip_vs_debug_level = 0;
65 
ip_vs_get_debug_level(void)66 int ip_vs_get_debug_level(void)
67 {
68 	return sysctl_ip_vs_debug_level;
69 }
70 #endif
71 
72 
73 /*  Protos */
74 static void __ip_vs_del_service(struct ip_vs_service *svc);
75 
76 
77 #ifdef CONFIG_IP_VS_IPV6
78 /* Taken from rt6_fill_node() in net/ipv6/route.c, is there a better way? */
__ip_vs_addr_is_local_v6(struct net * net,const struct in6_addr * addr)79 static int __ip_vs_addr_is_local_v6(struct net *net,
80 				    const struct in6_addr *addr)
81 {
82 	struct rt6_info *rt;
83 	struct flowi6 fl6 = {
84 		.daddr = *addr,
85 	};
86 
87 	rt = (struct rt6_info *)ip6_route_output(net, NULL, &fl6);
88 	if (rt && rt->rt6i_dev && (rt->rt6i_dev->flags & IFF_LOOPBACK))
89 		return 1;
90 
91 	return 0;
92 }
93 #endif
94 
95 #ifdef CONFIG_SYSCTL
96 /*
97  *	update_defense_level is called from keventd and from sysctl,
98  *	so it needs to protect itself from softirqs
99  */
update_defense_level(struct netns_ipvs * ipvs)100 static void update_defense_level(struct netns_ipvs *ipvs)
101 {
102 	struct sysinfo i;
103 	static int old_secure_tcp = 0;
104 	int availmem;
105 	int nomem;
106 	int to_change = -1;
107 
108 	/* we only count free and buffered memory (in pages) */
109 	si_meminfo(&i);
110 	availmem = i.freeram + i.bufferram;
111 	/* however in linux 2.5 the i.bufferram is total page cache size,
112 	   we need adjust it */
113 	/* si_swapinfo(&i); */
114 	/* availmem = availmem - (i.totalswap - i.freeswap); */
115 
116 	nomem = (availmem < ipvs->sysctl_amemthresh);
117 
118 	local_bh_disable();
119 
120 	/* drop_entry */
121 	spin_lock(&ipvs->dropentry_lock);
122 	switch (ipvs->sysctl_drop_entry) {
123 	case 0:
124 		atomic_set(&ipvs->dropentry, 0);
125 		break;
126 	case 1:
127 		if (nomem) {
128 			atomic_set(&ipvs->dropentry, 1);
129 			ipvs->sysctl_drop_entry = 2;
130 		} else {
131 			atomic_set(&ipvs->dropentry, 0);
132 		}
133 		break;
134 	case 2:
135 		if (nomem) {
136 			atomic_set(&ipvs->dropentry, 1);
137 		} else {
138 			atomic_set(&ipvs->dropentry, 0);
139 			ipvs->sysctl_drop_entry = 1;
140 		};
141 		break;
142 	case 3:
143 		atomic_set(&ipvs->dropentry, 1);
144 		break;
145 	}
146 	spin_unlock(&ipvs->dropentry_lock);
147 
148 	/* drop_packet */
149 	spin_lock(&ipvs->droppacket_lock);
150 	switch (ipvs->sysctl_drop_packet) {
151 	case 0:
152 		ipvs->drop_rate = 0;
153 		break;
154 	case 1:
155 		if (nomem) {
156 			ipvs->drop_rate = ipvs->drop_counter
157 				= ipvs->sysctl_amemthresh /
158 				(ipvs->sysctl_amemthresh-availmem);
159 			ipvs->sysctl_drop_packet = 2;
160 		} else {
161 			ipvs->drop_rate = 0;
162 		}
163 		break;
164 	case 2:
165 		if (nomem) {
166 			ipvs->drop_rate = ipvs->drop_counter
167 				= ipvs->sysctl_amemthresh /
168 				(ipvs->sysctl_amemthresh-availmem);
169 		} else {
170 			ipvs->drop_rate = 0;
171 			ipvs->sysctl_drop_packet = 1;
172 		}
173 		break;
174 	case 3:
175 		ipvs->drop_rate = ipvs->sysctl_am_droprate;
176 		break;
177 	}
178 	spin_unlock(&ipvs->droppacket_lock);
179 
180 	/* secure_tcp */
181 	spin_lock(&ipvs->securetcp_lock);
182 	switch (ipvs->sysctl_secure_tcp) {
183 	case 0:
184 		if (old_secure_tcp >= 2)
185 			to_change = 0;
186 		break;
187 	case 1:
188 		if (nomem) {
189 			if (old_secure_tcp < 2)
190 				to_change = 1;
191 			ipvs->sysctl_secure_tcp = 2;
192 		} else {
193 			if (old_secure_tcp >= 2)
194 				to_change = 0;
195 		}
196 		break;
197 	case 2:
198 		if (nomem) {
199 			if (old_secure_tcp < 2)
200 				to_change = 1;
201 		} else {
202 			if (old_secure_tcp >= 2)
203 				to_change = 0;
204 			ipvs->sysctl_secure_tcp = 1;
205 		}
206 		break;
207 	case 3:
208 		if (old_secure_tcp < 2)
209 			to_change = 1;
210 		break;
211 	}
212 	old_secure_tcp = ipvs->sysctl_secure_tcp;
213 	if (to_change >= 0)
214 		ip_vs_protocol_timeout_change(ipvs,
215 					      ipvs->sysctl_secure_tcp > 1);
216 	spin_unlock(&ipvs->securetcp_lock);
217 
218 	local_bh_enable();
219 }
220 
221 
222 /*
223  *	Timer for checking the defense
224  */
225 #define DEFENSE_TIMER_PERIOD	1*HZ
226 
defense_work_handler(struct work_struct * work)227 static void defense_work_handler(struct work_struct *work)
228 {
229 	struct netns_ipvs *ipvs =
230 		container_of(work, struct netns_ipvs, defense_work.work);
231 
232 	update_defense_level(ipvs);
233 	if (atomic_read(&ipvs->dropentry))
234 		ip_vs_random_dropentry(ipvs->net);
235 	schedule_delayed_work(&ipvs->defense_work, DEFENSE_TIMER_PERIOD);
236 }
237 #endif
238 
239 int
ip_vs_use_count_inc(void)240 ip_vs_use_count_inc(void)
241 {
242 	return try_module_get(THIS_MODULE);
243 }
244 
245 void
ip_vs_use_count_dec(void)246 ip_vs_use_count_dec(void)
247 {
248 	module_put(THIS_MODULE);
249 }
250 
251 
252 /*
253  *	Hash table: for virtual service lookups
254  */
255 #define IP_VS_SVC_TAB_BITS 8
256 #define IP_VS_SVC_TAB_SIZE (1 << IP_VS_SVC_TAB_BITS)
257 #define IP_VS_SVC_TAB_MASK (IP_VS_SVC_TAB_SIZE - 1)
258 
259 /* the service table hashed by <protocol, addr, port> */
260 static struct list_head ip_vs_svc_table[IP_VS_SVC_TAB_SIZE];
261 /* the service table hashed by fwmark */
262 static struct list_head ip_vs_svc_fwm_table[IP_VS_SVC_TAB_SIZE];
263 
264 
265 /*
266  *	Returns hash value for virtual service
267  */
268 static inline unsigned
ip_vs_svc_hashkey(struct net * net,int af,unsigned proto,const union nf_inet_addr * addr,__be16 port)269 ip_vs_svc_hashkey(struct net *net, int af, unsigned proto,
270 		  const union nf_inet_addr *addr, __be16 port)
271 {
272 	register unsigned porth = ntohs(port);
273 	__be32 addr_fold = addr->ip;
274 
275 #ifdef CONFIG_IP_VS_IPV6
276 	if (af == AF_INET6)
277 		addr_fold = addr->ip6[0]^addr->ip6[1]^
278 			    addr->ip6[2]^addr->ip6[3];
279 #endif
280 	addr_fold ^= ((size_t)net>>8);
281 
282 	return (proto^ntohl(addr_fold)^(porth>>IP_VS_SVC_TAB_BITS)^porth)
283 		& IP_VS_SVC_TAB_MASK;
284 }
285 
286 /*
287  *	Returns hash value of fwmark for virtual service lookup
288  */
ip_vs_svc_fwm_hashkey(struct net * net,__u32 fwmark)289 static inline unsigned ip_vs_svc_fwm_hashkey(struct net *net, __u32 fwmark)
290 {
291 	return (((size_t)net>>8) ^ fwmark) & IP_VS_SVC_TAB_MASK;
292 }
293 
294 /*
295  *	Hashes a service in the ip_vs_svc_table by <netns,proto,addr,port>
296  *	or in the ip_vs_svc_fwm_table by fwmark.
297  *	Should be called with locked tables.
298  */
ip_vs_svc_hash(struct ip_vs_service * svc)299 static int ip_vs_svc_hash(struct ip_vs_service *svc)
300 {
301 	unsigned hash;
302 
303 	if (svc->flags & IP_VS_SVC_F_HASHED) {
304 		pr_err("%s(): request for already hashed, called from %pF\n",
305 		       __func__, __builtin_return_address(0));
306 		return 0;
307 	}
308 
309 	if (svc->fwmark == 0) {
310 		/*
311 		 *  Hash it by <netns,protocol,addr,port> in ip_vs_svc_table
312 		 */
313 		hash = ip_vs_svc_hashkey(svc->net, svc->af, svc->protocol,
314 					 &svc->addr, svc->port);
315 		list_add(&svc->s_list, &ip_vs_svc_table[hash]);
316 	} else {
317 		/*
318 		 *  Hash it by fwmark in svc_fwm_table
319 		 */
320 		hash = ip_vs_svc_fwm_hashkey(svc->net, svc->fwmark);
321 		list_add(&svc->f_list, &ip_vs_svc_fwm_table[hash]);
322 	}
323 
324 	svc->flags |= IP_VS_SVC_F_HASHED;
325 	/* increase its refcnt because it is referenced by the svc table */
326 	atomic_inc(&svc->refcnt);
327 	return 1;
328 }
329 
330 
331 /*
332  *	Unhashes a service from svc_table / svc_fwm_table.
333  *	Should be called with locked tables.
334  */
ip_vs_svc_unhash(struct ip_vs_service * svc)335 static int ip_vs_svc_unhash(struct ip_vs_service *svc)
336 {
337 	if (!(svc->flags & IP_VS_SVC_F_HASHED)) {
338 		pr_err("%s(): request for unhash flagged, called from %pF\n",
339 		       __func__, __builtin_return_address(0));
340 		return 0;
341 	}
342 
343 	if (svc->fwmark == 0) {
344 		/* Remove it from the svc_table table */
345 		list_del(&svc->s_list);
346 	} else {
347 		/* Remove it from the svc_fwm_table table */
348 		list_del(&svc->f_list);
349 	}
350 
351 	svc->flags &= ~IP_VS_SVC_F_HASHED;
352 	atomic_dec(&svc->refcnt);
353 	return 1;
354 }
355 
356 
357 /*
358  *	Get service by {netns, proto,addr,port} in the service table.
359  */
360 static inline struct ip_vs_service *
__ip_vs_service_find(struct net * net,int af,__u16 protocol,const union nf_inet_addr * vaddr,__be16 vport)361 __ip_vs_service_find(struct net *net, int af, __u16 protocol,
362 		     const union nf_inet_addr *vaddr, __be16 vport)
363 {
364 	unsigned hash;
365 	struct ip_vs_service *svc;
366 
367 	/* Check for "full" addressed entries */
368 	hash = ip_vs_svc_hashkey(net, af, protocol, vaddr, vport);
369 
370 	list_for_each_entry(svc, &ip_vs_svc_table[hash], s_list){
371 		if ((svc->af == af)
372 		    && ip_vs_addr_equal(af, &svc->addr, vaddr)
373 		    && (svc->port == vport)
374 		    && (svc->protocol == protocol)
375 		    && net_eq(svc->net, net)) {
376 			/* HIT */
377 			return svc;
378 		}
379 	}
380 
381 	return NULL;
382 }
383 
384 
385 /*
386  *	Get service by {fwmark} in the service table.
387  */
388 static inline struct ip_vs_service *
__ip_vs_svc_fwm_find(struct net * net,int af,__u32 fwmark)389 __ip_vs_svc_fwm_find(struct net *net, int af, __u32 fwmark)
390 {
391 	unsigned hash;
392 	struct ip_vs_service *svc;
393 
394 	/* Check for fwmark addressed entries */
395 	hash = ip_vs_svc_fwm_hashkey(net, fwmark);
396 
397 	list_for_each_entry(svc, &ip_vs_svc_fwm_table[hash], f_list) {
398 		if (svc->fwmark == fwmark && svc->af == af
399 		    && net_eq(svc->net, net)) {
400 			/* HIT */
401 			return svc;
402 		}
403 	}
404 
405 	return NULL;
406 }
407 
408 struct ip_vs_service *
ip_vs_service_get(struct net * net,int af,__u32 fwmark,__u16 protocol,const union nf_inet_addr * vaddr,__be16 vport)409 ip_vs_service_get(struct net *net, int af, __u32 fwmark, __u16 protocol,
410 		  const union nf_inet_addr *vaddr, __be16 vport)
411 {
412 	struct ip_vs_service *svc;
413 	struct netns_ipvs *ipvs = net_ipvs(net);
414 
415 	read_lock(&__ip_vs_svc_lock);
416 
417 	/*
418 	 *	Check the table hashed by fwmark first
419 	 */
420 	if (fwmark) {
421 		svc = __ip_vs_svc_fwm_find(net, af, fwmark);
422 		if (svc)
423 			goto out;
424 	}
425 
426 	/*
427 	 *	Check the table hashed by <protocol,addr,port>
428 	 *	for "full" addressed entries
429 	 */
430 	svc = __ip_vs_service_find(net, af, protocol, vaddr, vport);
431 
432 	if (svc == NULL
433 	    && protocol == IPPROTO_TCP
434 	    && atomic_read(&ipvs->ftpsvc_counter)
435 	    && (vport == FTPDATA || ntohs(vport) >= PROT_SOCK)) {
436 		/*
437 		 * Check if ftp service entry exists, the packet
438 		 * might belong to FTP data connections.
439 		 */
440 		svc = __ip_vs_service_find(net, af, protocol, vaddr, FTPPORT);
441 	}
442 
443 	if (svc == NULL
444 	    && atomic_read(&ipvs->nullsvc_counter)) {
445 		/*
446 		 * Check if the catch-all port (port zero) exists
447 		 */
448 		svc = __ip_vs_service_find(net, af, protocol, vaddr, 0);
449 	}
450 
451   out:
452 	if (svc)
453 		atomic_inc(&svc->usecnt);
454 	read_unlock(&__ip_vs_svc_lock);
455 
456 	IP_VS_DBG_BUF(9, "lookup service: fwm %u %s %s:%u %s\n",
457 		      fwmark, ip_vs_proto_name(protocol),
458 		      IP_VS_DBG_ADDR(af, vaddr), ntohs(vport),
459 		      svc ? "hit" : "not hit");
460 
461 	return svc;
462 }
463 
464 
465 static inline void
__ip_vs_bind_svc(struct ip_vs_dest * dest,struct ip_vs_service * svc)466 __ip_vs_bind_svc(struct ip_vs_dest *dest, struct ip_vs_service *svc)
467 {
468 	atomic_inc(&svc->refcnt);
469 	dest->svc = svc;
470 }
471 
472 static void
__ip_vs_unbind_svc(struct ip_vs_dest * dest)473 __ip_vs_unbind_svc(struct ip_vs_dest *dest)
474 {
475 	struct ip_vs_service *svc = dest->svc;
476 
477 	dest->svc = NULL;
478 	if (atomic_dec_and_test(&svc->refcnt)) {
479 		IP_VS_DBG_BUF(3, "Removing service %u/%s:%u usecnt=%d\n",
480 			      svc->fwmark,
481 			      IP_VS_DBG_ADDR(svc->af, &svc->addr),
482 			      ntohs(svc->port), atomic_read(&svc->usecnt));
483 		free_percpu(svc->stats.cpustats);
484 		kfree(svc);
485 	}
486 }
487 
488 
489 /*
490  *	Returns hash value for real service
491  */
ip_vs_rs_hashkey(int af,const union nf_inet_addr * addr,__be16 port)492 static inline unsigned ip_vs_rs_hashkey(int af,
493 					    const union nf_inet_addr *addr,
494 					    __be16 port)
495 {
496 	register unsigned porth = ntohs(port);
497 	__be32 addr_fold = addr->ip;
498 
499 #ifdef CONFIG_IP_VS_IPV6
500 	if (af == AF_INET6)
501 		addr_fold = addr->ip6[0]^addr->ip6[1]^
502 			    addr->ip6[2]^addr->ip6[3];
503 #endif
504 
505 	return (ntohl(addr_fold)^(porth>>IP_VS_RTAB_BITS)^porth)
506 		& IP_VS_RTAB_MASK;
507 }
508 
509 /*
510  *	Hashes ip_vs_dest in rs_table by <proto,addr,port>.
511  *	should be called with locked tables.
512  */
ip_vs_rs_hash(struct netns_ipvs * ipvs,struct ip_vs_dest * dest)513 static int ip_vs_rs_hash(struct netns_ipvs *ipvs, struct ip_vs_dest *dest)
514 {
515 	unsigned hash;
516 
517 	if (!list_empty(&dest->d_list)) {
518 		return 0;
519 	}
520 
521 	/*
522 	 *	Hash by proto,addr,port,
523 	 *	which are the parameters of the real service.
524 	 */
525 	hash = ip_vs_rs_hashkey(dest->af, &dest->addr, dest->port);
526 
527 	list_add(&dest->d_list, &ipvs->rs_table[hash]);
528 
529 	return 1;
530 }
531 
532 /*
533  *	UNhashes ip_vs_dest from rs_table.
534  *	should be called with locked tables.
535  */
ip_vs_rs_unhash(struct ip_vs_dest * dest)536 static int ip_vs_rs_unhash(struct ip_vs_dest *dest)
537 {
538 	/*
539 	 * Remove it from the rs_table table.
540 	 */
541 	if (!list_empty(&dest->d_list)) {
542 		list_del(&dest->d_list);
543 		INIT_LIST_HEAD(&dest->d_list);
544 	}
545 
546 	return 1;
547 }
548 
549 /*
550  *	Lookup real service by <proto,addr,port> in the real service table.
551  */
552 struct ip_vs_dest *
ip_vs_lookup_real_service(struct net * net,int af,__u16 protocol,const union nf_inet_addr * daddr,__be16 dport)553 ip_vs_lookup_real_service(struct net *net, int af, __u16 protocol,
554 			  const union nf_inet_addr *daddr,
555 			  __be16 dport)
556 {
557 	struct netns_ipvs *ipvs = net_ipvs(net);
558 	unsigned hash;
559 	struct ip_vs_dest *dest;
560 
561 	/*
562 	 *	Check for "full" addressed entries
563 	 *	Return the first found entry
564 	 */
565 	hash = ip_vs_rs_hashkey(af, daddr, dport);
566 
567 	read_lock(&ipvs->rs_lock);
568 	list_for_each_entry(dest, &ipvs->rs_table[hash], d_list) {
569 		if ((dest->af == af)
570 		    && ip_vs_addr_equal(af, &dest->addr, daddr)
571 		    && (dest->port == dport)
572 		    && ((dest->protocol == protocol) ||
573 			dest->vfwmark)) {
574 			/* HIT */
575 			read_unlock(&ipvs->rs_lock);
576 			return dest;
577 		}
578 	}
579 	read_unlock(&ipvs->rs_lock);
580 
581 	return NULL;
582 }
583 
584 /*
585  *	Lookup destination by {addr,port} in the given service
586  */
587 static struct ip_vs_dest *
ip_vs_lookup_dest(struct ip_vs_service * svc,const union nf_inet_addr * daddr,__be16 dport)588 ip_vs_lookup_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr,
589 		  __be16 dport)
590 {
591 	struct ip_vs_dest *dest;
592 
593 	/*
594 	 * Find the destination for the given service
595 	 */
596 	list_for_each_entry(dest, &svc->destinations, n_list) {
597 		if ((dest->af == svc->af)
598 		    && ip_vs_addr_equal(svc->af, &dest->addr, daddr)
599 		    && (dest->port == dport)) {
600 			/* HIT */
601 			return dest;
602 		}
603 	}
604 
605 	return NULL;
606 }
607 
608 /*
609  * Find destination by {daddr,dport,vaddr,protocol}
610  * Cretaed to be used in ip_vs_process_message() in
611  * the backup synchronization daemon. It finds the
612  * destination to be bound to the received connection
613  * on the backup.
614  *
615  * ip_vs_lookup_real_service() looked promissing, but
616  * seems not working as expected.
617  */
ip_vs_find_dest(struct net * net,int af,const union nf_inet_addr * daddr,__be16 dport,const union nf_inet_addr * vaddr,__be16 vport,__u16 protocol,__u32 fwmark)618 struct ip_vs_dest *ip_vs_find_dest(struct net  *net, int af,
619 				   const union nf_inet_addr *daddr,
620 				   __be16 dport,
621 				   const union nf_inet_addr *vaddr,
622 				   __be16 vport, __u16 protocol, __u32 fwmark)
623 {
624 	struct ip_vs_dest *dest;
625 	struct ip_vs_service *svc;
626 
627 	svc = ip_vs_service_get(net, af, fwmark, protocol, vaddr, vport);
628 	if (!svc)
629 		return NULL;
630 	dest = ip_vs_lookup_dest(svc, daddr, dport);
631 	if (dest)
632 		atomic_inc(&dest->refcnt);
633 	ip_vs_service_put(svc);
634 	return dest;
635 }
636 
637 /*
638  *  Lookup dest by {svc,addr,port} in the destination trash.
639  *  The destination trash is used to hold the destinations that are removed
640  *  from the service table but are still referenced by some conn entries.
641  *  The reason to add the destination trash is when the dest is temporary
642  *  down (either by administrator or by monitor program), the dest can be
643  *  picked back from the trash, the remaining connections to the dest can
644  *  continue, and the counting information of the dest is also useful for
645  *  scheduling.
646  */
647 static struct ip_vs_dest *
ip_vs_trash_get_dest(struct ip_vs_service * svc,const union nf_inet_addr * daddr,__be16 dport)648 ip_vs_trash_get_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr,
649 		     __be16 dport)
650 {
651 	struct ip_vs_dest *dest, *nxt;
652 	struct netns_ipvs *ipvs = net_ipvs(svc->net);
653 
654 	/*
655 	 * Find the destination in trash
656 	 */
657 	list_for_each_entry_safe(dest, nxt, &ipvs->dest_trash, n_list) {
658 		IP_VS_DBG_BUF(3, "Destination %u/%s:%u still in trash, "
659 			      "dest->refcnt=%d\n",
660 			      dest->vfwmark,
661 			      IP_VS_DBG_ADDR(svc->af, &dest->addr),
662 			      ntohs(dest->port),
663 			      atomic_read(&dest->refcnt));
664 		if (dest->af == svc->af &&
665 		    ip_vs_addr_equal(svc->af, &dest->addr, daddr) &&
666 		    dest->port == dport &&
667 		    dest->vfwmark == svc->fwmark &&
668 		    dest->protocol == svc->protocol &&
669 		    (svc->fwmark ||
670 		     (ip_vs_addr_equal(svc->af, &dest->vaddr, &svc->addr) &&
671 		      dest->vport == svc->port))) {
672 			/* HIT */
673 			return dest;
674 		}
675 
676 		/*
677 		 * Try to purge the destination from trash if not referenced
678 		 */
679 		if (atomic_read(&dest->refcnt) == 1) {
680 			IP_VS_DBG_BUF(3, "Removing destination %u/%s:%u "
681 				      "from trash\n",
682 				      dest->vfwmark,
683 				      IP_VS_DBG_ADDR(svc->af, &dest->addr),
684 				      ntohs(dest->port));
685 			list_del(&dest->n_list);
686 			ip_vs_dst_reset(dest);
687 			__ip_vs_unbind_svc(dest);
688 			free_percpu(dest->stats.cpustats);
689 			kfree(dest);
690 		}
691 	}
692 
693 	return NULL;
694 }
695 
696 
697 /*
698  *  Clean up all the destinations in the trash
699  *  Called by the ip_vs_control_cleanup()
700  *
701  *  When the ip_vs_control_clearup is activated by ipvs module exit,
702  *  the service tables must have been flushed and all the connections
703  *  are expired, and the refcnt of each destination in the trash must
704  *  be 1, so we simply release them here.
705  */
ip_vs_trash_cleanup(struct net * net)706 static void ip_vs_trash_cleanup(struct net *net)
707 {
708 	struct ip_vs_dest *dest, *nxt;
709 	struct netns_ipvs *ipvs = net_ipvs(net);
710 
711 	list_for_each_entry_safe(dest, nxt, &ipvs->dest_trash, n_list) {
712 		list_del(&dest->n_list);
713 		ip_vs_dst_reset(dest);
714 		__ip_vs_unbind_svc(dest);
715 		free_percpu(dest->stats.cpustats);
716 		kfree(dest);
717 	}
718 }
719 
720 static void
ip_vs_copy_stats(struct ip_vs_stats_user * dst,struct ip_vs_stats * src)721 ip_vs_copy_stats(struct ip_vs_stats_user *dst, struct ip_vs_stats *src)
722 {
723 #define IP_VS_SHOW_STATS_COUNTER(c) dst->c = src->ustats.c - src->ustats0.c
724 
725 	spin_lock_bh(&src->lock);
726 
727 	IP_VS_SHOW_STATS_COUNTER(conns);
728 	IP_VS_SHOW_STATS_COUNTER(inpkts);
729 	IP_VS_SHOW_STATS_COUNTER(outpkts);
730 	IP_VS_SHOW_STATS_COUNTER(inbytes);
731 	IP_VS_SHOW_STATS_COUNTER(outbytes);
732 
733 	ip_vs_read_estimator(dst, src);
734 
735 	spin_unlock_bh(&src->lock);
736 }
737 
738 static void
ip_vs_zero_stats(struct ip_vs_stats * stats)739 ip_vs_zero_stats(struct ip_vs_stats *stats)
740 {
741 	spin_lock_bh(&stats->lock);
742 
743 	/* get current counters as zero point, rates are zeroed */
744 
745 #define IP_VS_ZERO_STATS_COUNTER(c) stats->ustats0.c = stats->ustats.c
746 
747 	IP_VS_ZERO_STATS_COUNTER(conns);
748 	IP_VS_ZERO_STATS_COUNTER(inpkts);
749 	IP_VS_ZERO_STATS_COUNTER(outpkts);
750 	IP_VS_ZERO_STATS_COUNTER(inbytes);
751 	IP_VS_ZERO_STATS_COUNTER(outbytes);
752 
753 	ip_vs_zero_estimator(stats);
754 
755 	spin_unlock_bh(&stats->lock);
756 }
757 
758 /*
759  *	Update a destination in the given service
760  */
761 static void
__ip_vs_update_dest(struct ip_vs_service * svc,struct ip_vs_dest * dest,struct ip_vs_dest_user_kern * udest,int add)762 __ip_vs_update_dest(struct ip_vs_service *svc, struct ip_vs_dest *dest,
763 		    struct ip_vs_dest_user_kern *udest, int add)
764 {
765 	struct netns_ipvs *ipvs = net_ipvs(svc->net);
766 	int conn_flags;
767 
768 	/* set the weight and the flags */
769 	atomic_set(&dest->weight, udest->weight);
770 	conn_flags = udest->conn_flags & IP_VS_CONN_F_DEST_MASK;
771 	conn_flags |= IP_VS_CONN_F_INACTIVE;
772 
773 	/* set the IP_VS_CONN_F_NOOUTPUT flag if not masquerading/NAT */
774 	if ((conn_flags & IP_VS_CONN_F_FWD_MASK) != IP_VS_CONN_F_MASQ) {
775 		conn_flags |= IP_VS_CONN_F_NOOUTPUT;
776 	} else {
777 		/*
778 		 *    Put the real service in rs_table if not present.
779 		 *    For now only for NAT!
780 		 */
781 		write_lock_bh(&ipvs->rs_lock);
782 		ip_vs_rs_hash(ipvs, dest);
783 		write_unlock_bh(&ipvs->rs_lock);
784 	}
785 	atomic_set(&dest->conn_flags, conn_flags);
786 
787 	/* bind the service */
788 	if (!dest->svc) {
789 		__ip_vs_bind_svc(dest, svc);
790 	} else {
791 		if (dest->svc != svc) {
792 			__ip_vs_unbind_svc(dest);
793 			ip_vs_zero_stats(&dest->stats);
794 			__ip_vs_bind_svc(dest, svc);
795 		}
796 	}
797 
798 	/* set the dest status flags */
799 	dest->flags |= IP_VS_DEST_F_AVAILABLE;
800 
801 	if (udest->u_threshold == 0 || udest->u_threshold > dest->u_threshold)
802 		dest->flags &= ~IP_VS_DEST_F_OVERLOAD;
803 	dest->u_threshold = udest->u_threshold;
804 	dest->l_threshold = udest->l_threshold;
805 
806 	spin_lock_bh(&dest->dst_lock);
807 	ip_vs_dst_reset(dest);
808 	spin_unlock_bh(&dest->dst_lock);
809 
810 	if (add)
811 		ip_vs_start_estimator(svc->net, &dest->stats);
812 
813 	write_lock_bh(&__ip_vs_svc_lock);
814 
815 	/* Wait until all other svc users go away */
816 	IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
817 
818 	if (add) {
819 		list_add(&dest->n_list, &svc->destinations);
820 		svc->num_dests++;
821 	}
822 
823 	/* call the update_service, because server weight may be changed */
824 	if (svc->scheduler->update_service)
825 		svc->scheduler->update_service(svc);
826 
827 	write_unlock_bh(&__ip_vs_svc_lock);
828 }
829 
830 
831 /*
832  *	Create a destination for the given service
833  */
834 static int
ip_vs_new_dest(struct ip_vs_service * svc,struct ip_vs_dest_user_kern * udest,struct ip_vs_dest ** dest_p)835 ip_vs_new_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest,
836 	       struct ip_vs_dest **dest_p)
837 {
838 	struct ip_vs_dest *dest;
839 	unsigned atype;
840 
841 	EnterFunction(2);
842 
843 #ifdef CONFIG_IP_VS_IPV6
844 	if (svc->af == AF_INET6) {
845 		atype = ipv6_addr_type(&udest->addr.in6);
846 		if ((!(atype & IPV6_ADDR_UNICAST) ||
847 			atype & IPV6_ADDR_LINKLOCAL) &&
848 			!__ip_vs_addr_is_local_v6(svc->net, &udest->addr.in6))
849 			return -EINVAL;
850 	} else
851 #endif
852 	{
853 		atype = inet_addr_type(svc->net, udest->addr.ip);
854 		if (atype != RTN_LOCAL && atype != RTN_UNICAST)
855 			return -EINVAL;
856 	}
857 
858 	dest = kzalloc(sizeof(struct ip_vs_dest), GFP_KERNEL);
859 	if (dest == NULL) {
860 		pr_err("%s(): no memory.\n", __func__);
861 		return -ENOMEM;
862 	}
863 	dest->stats.cpustats = alloc_percpu(struct ip_vs_cpu_stats);
864 	if (!dest->stats.cpustats) {
865 		pr_err("%s() alloc_percpu failed\n", __func__);
866 		goto err_alloc;
867 	}
868 
869 	dest->af = svc->af;
870 	dest->protocol = svc->protocol;
871 	dest->vaddr = svc->addr;
872 	dest->vport = svc->port;
873 	dest->vfwmark = svc->fwmark;
874 	ip_vs_addr_copy(svc->af, &dest->addr, &udest->addr);
875 	dest->port = udest->port;
876 
877 	atomic_set(&dest->activeconns, 0);
878 	atomic_set(&dest->inactconns, 0);
879 	atomic_set(&dest->persistconns, 0);
880 	atomic_set(&dest->refcnt, 1);
881 
882 	INIT_LIST_HEAD(&dest->d_list);
883 	spin_lock_init(&dest->dst_lock);
884 	spin_lock_init(&dest->stats.lock);
885 	__ip_vs_update_dest(svc, dest, udest, 1);
886 
887 	*dest_p = dest;
888 
889 	LeaveFunction(2);
890 	return 0;
891 
892 err_alloc:
893 	kfree(dest);
894 	return -ENOMEM;
895 }
896 
897 
898 /*
899  *	Add a destination into an existing service
900  */
901 static int
ip_vs_add_dest(struct ip_vs_service * svc,struct ip_vs_dest_user_kern * udest)902 ip_vs_add_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
903 {
904 	struct ip_vs_dest *dest;
905 	union nf_inet_addr daddr;
906 	__be16 dport = udest->port;
907 	int ret;
908 
909 	EnterFunction(2);
910 
911 	if (udest->weight < 0) {
912 		pr_err("%s(): server weight less than zero\n", __func__);
913 		return -ERANGE;
914 	}
915 
916 	if (udest->l_threshold > udest->u_threshold) {
917 		pr_err("%s(): lower threshold is higher than upper threshold\n",
918 			__func__);
919 		return -ERANGE;
920 	}
921 
922 	ip_vs_addr_copy(svc->af, &daddr, &udest->addr);
923 
924 	/*
925 	 * Check if the dest already exists in the list
926 	 */
927 	dest = ip_vs_lookup_dest(svc, &daddr, dport);
928 
929 	if (dest != NULL) {
930 		IP_VS_DBG(1, "%s(): dest already exists\n", __func__);
931 		return -EEXIST;
932 	}
933 
934 	/*
935 	 * Check if the dest already exists in the trash and
936 	 * is from the same service
937 	 */
938 	dest = ip_vs_trash_get_dest(svc, &daddr, dport);
939 
940 	if (dest != NULL) {
941 		IP_VS_DBG_BUF(3, "Get destination %s:%u from trash, "
942 			      "dest->refcnt=%d, service %u/%s:%u\n",
943 			      IP_VS_DBG_ADDR(svc->af, &daddr), ntohs(dport),
944 			      atomic_read(&dest->refcnt),
945 			      dest->vfwmark,
946 			      IP_VS_DBG_ADDR(svc->af, &dest->vaddr),
947 			      ntohs(dest->vport));
948 
949 		/*
950 		 * Get the destination from the trash
951 		 */
952 		list_del(&dest->n_list);
953 
954 		__ip_vs_update_dest(svc, dest, udest, 1);
955 		ret = 0;
956 	} else {
957 		/*
958 		 * Allocate and initialize the dest structure
959 		 */
960 		ret = ip_vs_new_dest(svc, udest, &dest);
961 	}
962 	LeaveFunction(2);
963 
964 	return ret;
965 }
966 
967 
968 /*
969  *	Edit a destination in the given service
970  */
971 static int
ip_vs_edit_dest(struct ip_vs_service * svc,struct ip_vs_dest_user_kern * udest)972 ip_vs_edit_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
973 {
974 	struct ip_vs_dest *dest;
975 	union nf_inet_addr daddr;
976 	__be16 dport = udest->port;
977 
978 	EnterFunction(2);
979 
980 	if (udest->weight < 0) {
981 		pr_err("%s(): server weight less than zero\n", __func__);
982 		return -ERANGE;
983 	}
984 
985 	if (udest->l_threshold > udest->u_threshold) {
986 		pr_err("%s(): lower threshold is higher than upper threshold\n",
987 			__func__);
988 		return -ERANGE;
989 	}
990 
991 	ip_vs_addr_copy(svc->af, &daddr, &udest->addr);
992 
993 	/*
994 	 *  Lookup the destination list
995 	 */
996 	dest = ip_vs_lookup_dest(svc, &daddr, dport);
997 
998 	if (dest == NULL) {
999 		IP_VS_DBG(1, "%s(): dest doesn't exist\n", __func__);
1000 		return -ENOENT;
1001 	}
1002 
1003 	__ip_vs_update_dest(svc, dest, udest, 0);
1004 	LeaveFunction(2);
1005 
1006 	return 0;
1007 }
1008 
1009 
1010 /*
1011  *	Delete a destination (must be already unlinked from the service)
1012  */
__ip_vs_del_dest(struct net * net,struct ip_vs_dest * dest)1013 static void __ip_vs_del_dest(struct net *net, struct ip_vs_dest *dest)
1014 {
1015 	struct netns_ipvs *ipvs = net_ipvs(net);
1016 
1017 	ip_vs_stop_estimator(net, &dest->stats);
1018 
1019 	/*
1020 	 *  Remove it from the d-linked list with the real services.
1021 	 */
1022 	write_lock_bh(&ipvs->rs_lock);
1023 	ip_vs_rs_unhash(dest);
1024 	write_unlock_bh(&ipvs->rs_lock);
1025 
1026 	/*
1027 	 *  Decrease the refcnt of the dest, and free the dest
1028 	 *  if nobody refers to it (refcnt=0). Otherwise, throw
1029 	 *  the destination into the trash.
1030 	 */
1031 	if (atomic_dec_and_test(&dest->refcnt)) {
1032 		IP_VS_DBG_BUF(3, "Removing destination %u/%s:%u\n",
1033 			      dest->vfwmark,
1034 			      IP_VS_DBG_ADDR(dest->af, &dest->addr),
1035 			      ntohs(dest->port));
1036 		ip_vs_dst_reset(dest);
1037 		/* simply decrease svc->refcnt here, let the caller check
1038 		   and release the service if nobody refers to it.
1039 		   Only user context can release destination and service,
1040 		   and only one user context can update virtual service at a
1041 		   time, so the operation here is OK */
1042 		atomic_dec(&dest->svc->refcnt);
1043 		free_percpu(dest->stats.cpustats);
1044 		kfree(dest);
1045 	} else {
1046 		IP_VS_DBG_BUF(3, "Moving dest %s:%u into trash, "
1047 			      "dest->refcnt=%d\n",
1048 			      IP_VS_DBG_ADDR(dest->af, &dest->addr),
1049 			      ntohs(dest->port),
1050 			      atomic_read(&dest->refcnt));
1051 		list_add(&dest->n_list, &ipvs->dest_trash);
1052 		atomic_inc(&dest->refcnt);
1053 	}
1054 }
1055 
1056 
1057 /*
1058  *	Unlink a destination from the given service
1059  */
__ip_vs_unlink_dest(struct ip_vs_service * svc,struct ip_vs_dest * dest,int svcupd)1060 static void __ip_vs_unlink_dest(struct ip_vs_service *svc,
1061 				struct ip_vs_dest *dest,
1062 				int svcupd)
1063 {
1064 	dest->flags &= ~IP_VS_DEST_F_AVAILABLE;
1065 
1066 	/*
1067 	 *  Remove it from the d-linked destination list.
1068 	 */
1069 	list_del(&dest->n_list);
1070 	svc->num_dests--;
1071 
1072 	/*
1073 	 *  Call the update_service function of its scheduler
1074 	 */
1075 	if (svcupd && svc->scheduler->update_service)
1076 			svc->scheduler->update_service(svc);
1077 }
1078 
1079 
1080 /*
1081  *	Delete a destination server in the given service
1082  */
1083 static int
ip_vs_del_dest(struct ip_vs_service * svc,struct ip_vs_dest_user_kern * udest)1084 ip_vs_del_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
1085 {
1086 	struct ip_vs_dest *dest;
1087 	__be16 dport = udest->port;
1088 
1089 	EnterFunction(2);
1090 
1091 	dest = ip_vs_lookup_dest(svc, &udest->addr, dport);
1092 
1093 	if (dest == NULL) {
1094 		IP_VS_DBG(1, "%s(): destination not found!\n", __func__);
1095 		return -ENOENT;
1096 	}
1097 
1098 	write_lock_bh(&__ip_vs_svc_lock);
1099 
1100 	/*
1101 	 *	Wait until all other svc users go away.
1102 	 */
1103 	IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
1104 
1105 	/*
1106 	 *	Unlink dest from the service
1107 	 */
1108 	__ip_vs_unlink_dest(svc, dest, 1);
1109 
1110 	write_unlock_bh(&__ip_vs_svc_lock);
1111 
1112 	/*
1113 	 *	Delete the destination
1114 	 */
1115 	__ip_vs_del_dest(svc->net, dest);
1116 
1117 	LeaveFunction(2);
1118 
1119 	return 0;
1120 }
1121 
1122 
1123 /*
1124  *	Add a service into the service hash table
1125  */
1126 static int
ip_vs_add_service(struct net * net,struct ip_vs_service_user_kern * u,struct ip_vs_service ** svc_p)1127 ip_vs_add_service(struct net *net, struct ip_vs_service_user_kern *u,
1128 		  struct ip_vs_service **svc_p)
1129 {
1130 	int ret = 0;
1131 	struct ip_vs_scheduler *sched = NULL;
1132 	struct ip_vs_pe *pe = NULL;
1133 	struct ip_vs_service *svc = NULL;
1134 	struct netns_ipvs *ipvs = net_ipvs(net);
1135 
1136 	/* increase the module use count */
1137 	ip_vs_use_count_inc();
1138 
1139 	/* Lookup the scheduler by 'u->sched_name' */
1140 	sched = ip_vs_scheduler_get(u->sched_name);
1141 	if (sched == NULL) {
1142 		pr_info("Scheduler module ip_vs_%s not found\n", u->sched_name);
1143 		ret = -ENOENT;
1144 		goto out_err;
1145 	}
1146 
1147 	if (u->pe_name && *u->pe_name) {
1148 		pe = ip_vs_pe_getbyname(u->pe_name);
1149 		if (pe == NULL) {
1150 			pr_info("persistence engine module ip_vs_pe_%s "
1151 				"not found\n", u->pe_name);
1152 			ret = -ENOENT;
1153 			goto out_err;
1154 		}
1155 	}
1156 
1157 #ifdef CONFIG_IP_VS_IPV6
1158 	if (u->af == AF_INET6 && (u->netmask < 1 || u->netmask > 128)) {
1159 		ret = -EINVAL;
1160 		goto out_err;
1161 	}
1162 #endif
1163 
1164 	svc = kzalloc(sizeof(struct ip_vs_service), GFP_KERNEL);
1165 	if (svc == NULL) {
1166 		IP_VS_DBG(1, "%s(): no memory\n", __func__);
1167 		ret = -ENOMEM;
1168 		goto out_err;
1169 	}
1170 	svc->stats.cpustats = alloc_percpu(struct ip_vs_cpu_stats);
1171 	if (!svc->stats.cpustats) {
1172 		pr_err("%s() alloc_percpu failed\n", __func__);
1173 		goto out_err;
1174 	}
1175 
1176 	/* I'm the first user of the service */
1177 	atomic_set(&svc->usecnt, 0);
1178 	atomic_set(&svc->refcnt, 0);
1179 
1180 	svc->af = u->af;
1181 	svc->protocol = u->protocol;
1182 	ip_vs_addr_copy(svc->af, &svc->addr, &u->addr);
1183 	svc->port = u->port;
1184 	svc->fwmark = u->fwmark;
1185 	svc->flags = u->flags;
1186 	svc->timeout = u->timeout * HZ;
1187 	svc->netmask = u->netmask;
1188 	svc->net = net;
1189 
1190 	INIT_LIST_HEAD(&svc->destinations);
1191 	rwlock_init(&svc->sched_lock);
1192 	spin_lock_init(&svc->stats.lock);
1193 
1194 	/* Bind the scheduler */
1195 	ret = ip_vs_bind_scheduler(svc, sched);
1196 	if (ret)
1197 		goto out_err;
1198 	sched = NULL;
1199 
1200 	/* Bind the ct retriever */
1201 	ip_vs_bind_pe(svc, pe);
1202 	pe = NULL;
1203 
1204 	/* Update the virtual service counters */
1205 	if (svc->port == FTPPORT)
1206 		atomic_inc(&ipvs->ftpsvc_counter);
1207 	else if (svc->port == 0)
1208 		atomic_inc(&ipvs->nullsvc_counter);
1209 
1210 	ip_vs_start_estimator(net, &svc->stats);
1211 
1212 	/* Count only IPv4 services for old get/setsockopt interface */
1213 	if (svc->af == AF_INET)
1214 		ipvs->num_services++;
1215 
1216 	/* Hash the service into the service table */
1217 	write_lock_bh(&__ip_vs_svc_lock);
1218 	ip_vs_svc_hash(svc);
1219 	write_unlock_bh(&__ip_vs_svc_lock);
1220 
1221 	*svc_p = svc;
1222 	/* Now there is a service - full throttle */
1223 	ipvs->enable = 1;
1224 	return 0;
1225 
1226 
1227  out_err:
1228 	if (svc != NULL) {
1229 		ip_vs_unbind_scheduler(svc);
1230 		if (svc->inc) {
1231 			local_bh_disable();
1232 			ip_vs_app_inc_put(svc->inc);
1233 			local_bh_enable();
1234 		}
1235 		if (svc->stats.cpustats)
1236 			free_percpu(svc->stats.cpustats);
1237 		kfree(svc);
1238 	}
1239 	ip_vs_scheduler_put(sched);
1240 	ip_vs_pe_put(pe);
1241 
1242 	/* decrease the module use count */
1243 	ip_vs_use_count_dec();
1244 
1245 	return ret;
1246 }
1247 
1248 
1249 /*
1250  *	Edit a service and bind it with a new scheduler
1251  */
1252 static int
ip_vs_edit_service(struct ip_vs_service * svc,struct ip_vs_service_user_kern * u)1253 ip_vs_edit_service(struct ip_vs_service *svc, struct ip_vs_service_user_kern *u)
1254 {
1255 	struct ip_vs_scheduler *sched, *old_sched;
1256 	struct ip_vs_pe *pe = NULL, *old_pe = NULL;
1257 	int ret = 0;
1258 
1259 	/*
1260 	 * Lookup the scheduler, by 'u->sched_name'
1261 	 */
1262 	sched = ip_vs_scheduler_get(u->sched_name);
1263 	if (sched == NULL) {
1264 		pr_info("Scheduler module ip_vs_%s not found\n", u->sched_name);
1265 		return -ENOENT;
1266 	}
1267 	old_sched = sched;
1268 
1269 	if (u->pe_name && *u->pe_name) {
1270 		pe = ip_vs_pe_getbyname(u->pe_name);
1271 		if (pe == NULL) {
1272 			pr_info("persistence engine module ip_vs_pe_%s "
1273 				"not found\n", u->pe_name);
1274 			ret = -ENOENT;
1275 			goto out;
1276 		}
1277 		old_pe = pe;
1278 	}
1279 
1280 #ifdef CONFIG_IP_VS_IPV6
1281 	if (u->af == AF_INET6 && (u->netmask < 1 || u->netmask > 128)) {
1282 		ret = -EINVAL;
1283 		goto out;
1284 	}
1285 #endif
1286 
1287 	write_lock_bh(&__ip_vs_svc_lock);
1288 
1289 	/*
1290 	 * Wait until all other svc users go away.
1291 	 */
1292 	IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
1293 
1294 	/*
1295 	 * Set the flags and timeout value
1296 	 */
1297 	svc->flags = u->flags | IP_VS_SVC_F_HASHED;
1298 	svc->timeout = u->timeout * HZ;
1299 	svc->netmask = u->netmask;
1300 
1301 	old_sched = svc->scheduler;
1302 	if (sched != old_sched) {
1303 		/*
1304 		 * Unbind the old scheduler
1305 		 */
1306 		if ((ret = ip_vs_unbind_scheduler(svc))) {
1307 			old_sched = sched;
1308 			goto out_unlock;
1309 		}
1310 
1311 		/*
1312 		 * Bind the new scheduler
1313 		 */
1314 		if ((ret = ip_vs_bind_scheduler(svc, sched))) {
1315 			/*
1316 			 * If ip_vs_bind_scheduler fails, restore the old
1317 			 * scheduler.
1318 			 * The main reason of failure is out of memory.
1319 			 *
1320 			 * The question is if the old scheduler can be
1321 			 * restored all the time. TODO: if it cannot be
1322 			 * restored some time, we must delete the service,
1323 			 * otherwise the system may crash.
1324 			 */
1325 			ip_vs_bind_scheduler(svc, old_sched);
1326 			old_sched = sched;
1327 			goto out_unlock;
1328 		}
1329 	}
1330 
1331 	old_pe = svc->pe;
1332 	if (pe != old_pe) {
1333 		ip_vs_unbind_pe(svc);
1334 		ip_vs_bind_pe(svc, pe);
1335 	}
1336 
1337   out_unlock:
1338 	write_unlock_bh(&__ip_vs_svc_lock);
1339   out:
1340 	ip_vs_scheduler_put(old_sched);
1341 	ip_vs_pe_put(old_pe);
1342 	return ret;
1343 }
1344 
1345 
1346 /*
1347  *	Delete a service from the service list
1348  *	- The service must be unlinked, unlocked and not referenced!
1349  *	- We are called under _bh lock
1350  */
__ip_vs_del_service(struct ip_vs_service * svc)1351 static void __ip_vs_del_service(struct ip_vs_service *svc)
1352 {
1353 	struct ip_vs_dest *dest, *nxt;
1354 	struct ip_vs_scheduler *old_sched;
1355 	struct ip_vs_pe *old_pe;
1356 	struct netns_ipvs *ipvs = net_ipvs(svc->net);
1357 
1358 	pr_info("%s: enter\n", __func__);
1359 
1360 	/* Count only IPv4 services for old get/setsockopt interface */
1361 	if (svc->af == AF_INET)
1362 		ipvs->num_services--;
1363 
1364 	ip_vs_stop_estimator(svc->net, &svc->stats);
1365 
1366 	/* Unbind scheduler */
1367 	old_sched = svc->scheduler;
1368 	ip_vs_unbind_scheduler(svc);
1369 	ip_vs_scheduler_put(old_sched);
1370 
1371 	/* Unbind persistence engine */
1372 	old_pe = svc->pe;
1373 	ip_vs_unbind_pe(svc);
1374 	ip_vs_pe_put(old_pe);
1375 
1376 	/* Unbind app inc */
1377 	if (svc->inc) {
1378 		ip_vs_app_inc_put(svc->inc);
1379 		svc->inc = NULL;
1380 	}
1381 
1382 	/*
1383 	 *    Unlink the whole destination list
1384 	 */
1385 	list_for_each_entry_safe(dest, nxt, &svc->destinations, n_list) {
1386 		__ip_vs_unlink_dest(svc, dest, 0);
1387 		__ip_vs_del_dest(svc->net, dest);
1388 	}
1389 
1390 	/*
1391 	 *    Update the virtual service counters
1392 	 */
1393 	if (svc->port == FTPPORT)
1394 		atomic_dec(&ipvs->ftpsvc_counter);
1395 	else if (svc->port == 0)
1396 		atomic_dec(&ipvs->nullsvc_counter);
1397 
1398 	/*
1399 	 *    Free the service if nobody refers to it
1400 	 */
1401 	if (atomic_read(&svc->refcnt) == 0) {
1402 		IP_VS_DBG_BUF(3, "Removing service %u/%s:%u usecnt=%d\n",
1403 			      svc->fwmark,
1404 			      IP_VS_DBG_ADDR(svc->af, &svc->addr),
1405 			      ntohs(svc->port), atomic_read(&svc->usecnt));
1406 		free_percpu(svc->stats.cpustats);
1407 		kfree(svc);
1408 	}
1409 
1410 	/* decrease the module use count */
1411 	ip_vs_use_count_dec();
1412 }
1413 
1414 /*
1415  * Unlink a service from list and try to delete it if its refcnt reached 0
1416  */
ip_vs_unlink_service(struct ip_vs_service * svc)1417 static void ip_vs_unlink_service(struct ip_vs_service *svc)
1418 {
1419 	/*
1420 	 * Unhash it from the service table
1421 	 */
1422 	write_lock_bh(&__ip_vs_svc_lock);
1423 
1424 	ip_vs_svc_unhash(svc);
1425 
1426 	/*
1427 	 * Wait until all the svc users go away.
1428 	 */
1429 	IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
1430 
1431 	__ip_vs_del_service(svc);
1432 
1433 	write_unlock_bh(&__ip_vs_svc_lock);
1434 }
1435 
1436 /*
1437  *	Delete a service from the service list
1438  */
ip_vs_del_service(struct ip_vs_service * svc)1439 static int ip_vs_del_service(struct ip_vs_service *svc)
1440 {
1441 	if (svc == NULL)
1442 		return -EEXIST;
1443 	ip_vs_unlink_service(svc);
1444 
1445 	return 0;
1446 }
1447 
1448 
1449 /*
1450  *	Flush all the virtual services
1451  */
ip_vs_flush(struct net * net)1452 static int ip_vs_flush(struct net *net)
1453 {
1454 	int idx;
1455 	struct ip_vs_service *svc, *nxt;
1456 
1457 	/*
1458 	 * Flush the service table hashed by <netns,protocol,addr,port>
1459 	 */
1460 	for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1461 		list_for_each_entry_safe(svc, nxt, &ip_vs_svc_table[idx],
1462 					 s_list) {
1463 			if (net_eq(svc->net, net))
1464 				ip_vs_unlink_service(svc);
1465 		}
1466 	}
1467 
1468 	/*
1469 	 * Flush the service table hashed by fwmark
1470 	 */
1471 	for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1472 		list_for_each_entry_safe(svc, nxt,
1473 					 &ip_vs_svc_fwm_table[idx], f_list) {
1474 			if (net_eq(svc->net, net))
1475 				ip_vs_unlink_service(svc);
1476 		}
1477 	}
1478 
1479 	return 0;
1480 }
1481 
1482 /*
1483  *	Delete service by {netns} in the service table.
1484  *	Called by __ip_vs_cleanup()
1485  */
__ip_vs_service_cleanup(struct net * net)1486 void __ip_vs_service_cleanup(struct net *net)
1487 {
1488 	EnterFunction(2);
1489 	/* Check for "full" addressed entries */
1490 	mutex_lock(&__ip_vs_mutex);
1491 	ip_vs_flush(net);
1492 	mutex_unlock(&__ip_vs_mutex);
1493 	LeaveFunction(2);
1494 }
1495 /*
1496  * Release dst hold by dst_cache
1497  */
1498 static inline void
__ip_vs_dev_reset(struct ip_vs_dest * dest,struct net_device * dev)1499 __ip_vs_dev_reset(struct ip_vs_dest *dest, struct net_device *dev)
1500 {
1501 	spin_lock_bh(&dest->dst_lock);
1502 	if (dest->dst_cache && dest->dst_cache->dev == dev) {
1503 		IP_VS_DBG_BUF(3, "Reset dev:%s dest %s:%u ,dest->refcnt=%d\n",
1504 			      dev->name,
1505 			      IP_VS_DBG_ADDR(dest->af, &dest->addr),
1506 			      ntohs(dest->port),
1507 			      atomic_read(&dest->refcnt));
1508 		ip_vs_dst_reset(dest);
1509 	}
1510 	spin_unlock_bh(&dest->dst_lock);
1511 
1512 }
1513 /*
1514  * Netdev event receiver
1515  * Currently only NETDEV_UNREGISTER is handled, i.e. if we hold a reference to
1516  * a device that is "unregister" it must be released.
1517  */
ip_vs_dst_event(struct notifier_block * this,unsigned long event,void * ptr)1518 static int ip_vs_dst_event(struct notifier_block *this, unsigned long event,
1519 			    void *ptr)
1520 {
1521 	struct net_device *dev = ptr;
1522 	struct net *net = dev_net(dev);
1523 	struct ip_vs_service *svc;
1524 	struct ip_vs_dest *dest;
1525 	unsigned int idx;
1526 
1527 	if (event != NETDEV_UNREGISTER)
1528 		return NOTIFY_DONE;
1529 	IP_VS_DBG(3, "%s() dev=%s\n", __func__, dev->name);
1530 	EnterFunction(2);
1531 	mutex_lock(&__ip_vs_mutex);
1532 	for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1533 		list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
1534 			if (net_eq(svc->net, net)) {
1535 				list_for_each_entry(dest, &svc->destinations,
1536 						    n_list) {
1537 					__ip_vs_dev_reset(dest, dev);
1538 				}
1539 			}
1540 		}
1541 
1542 		list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
1543 			if (net_eq(svc->net, net)) {
1544 				list_for_each_entry(dest, &svc->destinations,
1545 						    n_list) {
1546 					__ip_vs_dev_reset(dest, dev);
1547 				}
1548 			}
1549 
1550 		}
1551 	}
1552 
1553 	list_for_each_entry(dest, &net_ipvs(net)->dest_trash, n_list) {
1554 		__ip_vs_dev_reset(dest, dev);
1555 	}
1556 	mutex_unlock(&__ip_vs_mutex);
1557 	LeaveFunction(2);
1558 	return NOTIFY_DONE;
1559 }
1560 
1561 /*
1562  *	Zero counters in a service or all services
1563  */
ip_vs_zero_service(struct ip_vs_service * svc)1564 static int ip_vs_zero_service(struct ip_vs_service *svc)
1565 {
1566 	struct ip_vs_dest *dest;
1567 
1568 	write_lock_bh(&__ip_vs_svc_lock);
1569 	list_for_each_entry(dest, &svc->destinations, n_list) {
1570 		ip_vs_zero_stats(&dest->stats);
1571 	}
1572 	ip_vs_zero_stats(&svc->stats);
1573 	write_unlock_bh(&__ip_vs_svc_lock);
1574 	return 0;
1575 }
1576 
ip_vs_zero_all(struct net * net)1577 static int ip_vs_zero_all(struct net *net)
1578 {
1579 	int idx;
1580 	struct ip_vs_service *svc;
1581 
1582 	for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1583 		list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
1584 			if (net_eq(svc->net, net))
1585 				ip_vs_zero_service(svc);
1586 		}
1587 	}
1588 
1589 	for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1590 		list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
1591 			if (net_eq(svc->net, net))
1592 				ip_vs_zero_service(svc);
1593 		}
1594 	}
1595 
1596 	ip_vs_zero_stats(&net_ipvs(net)->tot_stats);
1597 	return 0;
1598 }
1599 
1600 #ifdef CONFIG_SYSCTL
1601 static int
proc_do_defense_mode(ctl_table * table,int write,void __user * buffer,size_t * lenp,loff_t * ppos)1602 proc_do_defense_mode(ctl_table *table, int write,
1603 		     void __user *buffer, size_t *lenp, loff_t *ppos)
1604 {
1605 	struct net *net = current->nsproxy->net_ns;
1606 	int *valp = table->data;
1607 	int val = *valp;
1608 	int rc;
1609 
1610 	rc = proc_dointvec(table, write, buffer, lenp, ppos);
1611 	if (write && (*valp != val)) {
1612 		if ((*valp < 0) || (*valp > 3)) {
1613 			/* Restore the correct value */
1614 			*valp = val;
1615 		} else {
1616 			update_defense_level(net_ipvs(net));
1617 		}
1618 	}
1619 	return rc;
1620 }
1621 
1622 static int
proc_do_sync_threshold(ctl_table * table,int write,void __user * buffer,size_t * lenp,loff_t * ppos)1623 proc_do_sync_threshold(ctl_table *table, int write,
1624 		       void __user *buffer, size_t *lenp, loff_t *ppos)
1625 {
1626 	int *valp = table->data;
1627 	int val[2];
1628 	int rc;
1629 
1630 	/* backup the value first */
1631 	memcpy(val, valp, sizeof(val));
1632 
1633 	rc = proc_dointvec(table, write, buffer, lenp, ppos);
1634 	if (write && (valp[0] < 0 || valp[1] < 0 || valp[0] >= valp[1])) {
1635 		/* Restore the correct value */
1636 		memcpy(valp, val, sizeof(val));
1637 	}
1638 	return rc;
1639 }
1640 
1641 static int
proc_do_sync_mode(ctl_table * table,int write,void __user * buffer,size_t * lenp,loff_t * ppos)1642 proc_do_sync_mode(ctl_table *table, int write,
1643 		     void __user *buffer, size_t *lenp, loff_t *ppos)
1644 {
1645 	int *valp = table->data;
1646 	int val = *valp;
1647 	int rc;
1648 
1649 	rc = proc_dointvec(table, write, buffer, lenp, ppos);
1650 	if (write && (*valp != val)) {
1651 		if ((*valp < 0) || (*valp > 1)) {
1652 			/* Restore the correct value */
1653 			*valp = val;
1654 		} else {
1655 			struct net *net = current->nsproxy->net_ns;
1656 			ip_vs_sync_switch_mode(net, val);
1657 		}
1658 	}
1659 	return rc;
1660 }
1661 
1662 /*
1663  *	IPVS sysctl table (under the /proc/sys/net/ipv4/vs/)
1664  *	Do not change order or insert new entries without
1665  *	align with netns init in __ip_vs_control_init()
1666  */
1667 
1668 static struct ctl_table vs_vars[] = {
1669 	{
1670 		.procname	= "amemthresh",
1671 		.maxlen		= sizeof(int),
1672 		.mode		= 0644,
1673 		.proc_handler	= proc_dointvec,
1674 	},
1675 	{
1676 		.procname	= "am_droprate",
1677 		.maxlen		= sizeof(int),
1678 		.mode		= 0644,
1679 		.proc_handler	= proc_dointvec,
1680 	},
1681 	{
1682 		.procname	= "drop_entry",
1683 		.maxlen		= sizeof(int),
1684 		.mode		= 0644,
1685 		.proc_handler	= proc_do_defense_mode,
1686 	},
1687 	{
1688 		.procname	= "drop_packet",
1689 		.maxlen		= sizeof(int),
1690 		.mode		= 0644,
1691 		.proc_handler	= proc_do_defense_mode,
1692 	},
1693 #ifdef CONFIG_IP_VS_NFCT
1694 	{
1695 		.procname	= "conntrack",
1696 		.maxlen		= sizeof(int),
1697 		.mode		= 0644,
1698 		.proc_handler	= &proc_dointvec,
1699 	},
1700 #endif
1701 	{
1702 		.procname	= "secure_tcp",
1703 		.maxlen		= sizeof(int),
1704 		.mode		= 0644,
1705 		.proc_handler	= proc_do_defense_mode,
1706 	},
1707 	{
1708 		.procname	= "snat_reroute",
1709 		.maxlen		= sizeof(int),
1710 		.mode		= 0644,
1711 		.proc_handler	= &proc_dointvec,
1712 	},
1713 	{
1714 		.procname	= "sync_version",
1715 		.maxlen		= sizeof(int),
1716 		.mode		= 0644,
1717 		.proc_handler	= &proc_do_sync_mode,
1718 	},
1719 	{
1720 		.procname	= "cache_bypass",
1721 		.maxlen		= sizeof(int),
1722 		.mode		= 0644,
1723 		.proc_handler	= proc_dointvec,
1724 	},
1725 	{
1726 		.procname	= "expire_nodest_conn",
1727 		.maxlen		= sizeof(int),
1728 		.mode		= 0644,
1729 		.proc_handler	= proc_dointvec,
1730 	},
1731 	{
1732 		.procname	= "expire_quiescent_template",
1733 		.maxlen		= sizeof(int),
1734 		.mode		= 0644,
1735 		.proc_handler	= proc_dointvec,
1736 	},
1737 	{
1738 		.procname	= "sync_threshold",
1739 		.maxlen		=
1740 			sizeof(((struct netns_ipvs *)0)->sysctl_sync_threshold),
1741 		.mode		= 0644,
1742 		.proc_handler	= proc_do_sync_threshold,
1743 	},
1744 	{
1745 		.procname	= "nat_icmp_send",
1746 		.maxlen		= sizeof(int),
1747 		.mode		= 0644,
1748 		.proc_handler	= proc_dointvec,
1749 	},
1750 #ifdef CONFIG_IP_VS_DEBUG
1751 	{
1752 		.procname	= "debug_level",
1753 		.data		= &sysctl_ip_vs_debug_level,
1754 		.maxlen		= sizeof(int),
1755 		.mode		= 0644,
1756 		.proc_handler	= proc_dointvec,
1757 	},
1758 #endif
1759 #if 0
1760 	{
1761 		.procname	= "timeout_established",
1762 		.data	= &vs_timeout_table_dos.timeout[IP_VS_S_ESTABLISHED],
1763 		.maxlen		= sizeof(int),
1764 		.mode		= 0644,
1765 		.proc_handler	= proc_dointvec_jiffies,
1766 	},
1767 	{
1768 		.procname	= "timeout_synsent",
1769 		.data	= &vs_timeout_table_dos.timeout[IP_VS_S_SYN_SENT],
1770 		.maxlen		= sizeof(int),
1771 		.mode		= 0644,
1772 		.proc_handler	= proc_dointvec_jiffies,
1773 	},
1774 	{
1775 		.procname	= "timeout_synrecv",
1776 		.data	= &vs_timeout_table_dos.timeout[IP_VS_S_SYN_RECV],
1777 		.maxlen		= sizeof(int),
1778 		.mode		= 0644,
1779 		.proc_handler	= proc_dointvec_jiffies,
1780 	},
1781 	{
1782 		.procname	= "timeout_finwait",
1783 		.data	= &vs_timeout_table_dos.timeout[IP_VS_S_FIN_WAIT],
1784 		.maxlen		= sizeof(int),
1785 		.mode		= 0644,
1786 		.proc_handler	= proc_dointvec_jiffies,
1787 	},
1788 	{
1789 		.procname	= "timeout_timewait",
1790 		.data	= &vs_timeout_table_dos.timeout[IP_VS_S_TIME_WAIT],
1791 		.maxlen		= sizeof(int),
1792 		.mode		= 0644,
1793 		.proc_handler	= proc_dointvec_jiffies,
1794 	},
1795 	{
1796 		.procname	= "timeout_close",
1797 		.data	= &vs_timeout_table_dos.timeout[IP_VS_S_CLOSE],
1798 		.maxlen		= sizeof(int),
1799 		.mode		= 0644,
1800 		.proc_handler	= proc_dointvec_jiffies,
1801 	},
1802 	{
1803 		.procname	= "timeout_closewait",
1804 		.data	= &vs_timeout_table_dos.timeout[IP_VS_S_CLOSE_WAIT],
1805 		.maxlen		= sizeof(int),
1806 		.mode		= 0644,
1807 		.proc_handler	= proc_dointvec_jiffies,
1808 	},
1809 	{
1810 		.procname	= "timeout_lastack",
1811 		.data	= &vs_timeout_table_dos.timeout[IP_VS_S_LAST_ACK],
1812 		.maxlen		= sizeof(int),
1813 		.mode		= 0644,
1814 		.proc_handler	= proc_dointvec_jiffies,
1815 	},
1816 	{
1817 		.procname	= "timeout_listen",
1818 		.data	= &vs_timeout_table_dos.timeout[IP_VS_S_LISTEN],
1819 		.maxlen		= sizeof(int),
1820 		.mode		= 0644,
1821 		.proc_handler	= proc_dointvec_jiffies,
1822 	},
1823 	{
1824 		.procname	= "timeout_synack",
1825 		.data	= &vs_timeout_table_dos.timeout[IP_VS_S_SYNACK],
1826 		.maxlen		= sizeof(int),
1827 		.mode		= 0644,
1828 		.proc_handler	= proc_dointvec_jiffies,
1829 	},
1830 	{
1831 		.procname	= "timeout_udp",
1832 		.data	= &vs_timeout_table_dos.timeout[IP_VS_S_UDP],
1833 		.maxlen		= sizeof(int),
1834 		.mode		= 0644,
1835 		.proc_handler	= proc_dointvec_jiffies,
1836 	},
1837 	{
1838 		.procname	= "timeout_icmp",
1839 		.data	= &vs_timeout_table_dos.timeout[IP_VS_S_ICMP],
1840 		.maxlen		= sizeof(int),
1841 		.mode		= 0644,
1842 		.proc_handler	= proc_dointvec_jiffies,
1843 	},
1844 #endif
1845 	{ }
1846 };
1847 
1848 const struct ctl_path net_vs_ctl_path[] = {
1849 	{ .procname = "net", },
1850 	{ .procname = "ipv4", },
1851 	{ .procname = "vs", },
1852 	{ }
1853 };
1854 EXPORT_SYMBOL_GPL(net_vs_ctl_path);
1855 #endif
1856 
1857 #ifdef CONFIG_PROC_FS
1858 
1859 struct ip_vs_iter {
1860 	struct seq_net_private p;  /* Do not move this, netns depends upon it*/
1861 	struct list_head *table;
1862 	int bucket;
1863 };
1864 
1865 /*
1866  *	Write the contents of the VS rule table to a PROCfs file.
1867  *	(It is kept just for backward compatibility)
1868  */
ip_vs_fwd_name(unsigned flags)1869 static inline const char *ip_vs_fwd_name(unsigned flags)
1870 {
1871 	switch (flags & IP_VS_CONN_F_FWD_MASK) {
1872 	case IP_VS_CONN_F_LOCALNODE:
1873 		return "Local";
1874 	case IP_VS_CONN_F_TUNNEL:
1875 		return "Tunnel";
1876 	case IP_VS_CONN_F_DROUTE:
1877 		return "Route";
1878 	default:
1879 		return "Masq";
1880 	}
1881 }
1882 
1883 
1884 /* Get the Nth entry in the two lists */
ip_vs_info_array(struct seq_file * seq,loff_t pos)1885 static struct ip_vs_service *ip_vs_info_array(struct seq_file *seq, loff_t pos)
1886 {
1887 	struct net *net = seq_file_net(seq);
1888 	struct ip_vs_iter *iter = seq->private;
1889 	int idx;
1890 	struct ip_vs_service *svc;
1891 
1892 	/* look in hash by protocol */
1893 	for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1894 		list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
1895 			if (net_eq(svc->net, net) && pos-- == 0) {
1896 				iter->table = ip_vs_svc_table;
1897 				iter->bucket = idx;
1898 				return svc;
1899 			}
1900 		}
1901 	}
1902 
1903 	/* keep looking in fwmark */
1904 	for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1905 		list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
1906 			if (net_eq(svc->net, net) && pos-- == 0) {
1907 				iter->table = ip_vs_svc_fwm_table;
1908 				iter->bucket = idx;
1909 				return svc;
1910 			}
1911 		}
1912 	}
1913 
1914 	return NULL;
1915 }
1916 
ip_vs_info_seq_start(struct seq_file * seq,loff_t * pos)1917 static void *ip_vs_info_seq_start(struct seq_file *seq, loff_t *pos)
1918 __acquires(__ip_vs_svc_lock)
1919 {
1920 
1921 	read_lock_bh(&__ip_vs_svc_lock);
1922 	return *pos ? ip_vs_info_array(seq, *pos - 1) : SEQ_START_TOKEN;
1923 }
1924 
1925 
ip_vs_info_seq_next(struct seq_file * seq,void * v,loff_t * pos)1926 static void *ip_vs_info_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1927 {
1928 	struct list_head *e;
1929 	struct ip_vs_iter *iter;
1930 	struct ip_vs_service *svc;
1931 
1932 	++*pos;
1933 	if (v == SEQ_START_TOKEN)
1934 		return ip_vs_info_array(seq,0);
1935 
1936 	svc = v;
1937 	iter = seq->private;
1938 
1939 	if (iter->table == ip_vs_svc_table) {
1940 		/* next service in table hashed by protocol */
1941 		if ((e = svc->s_list.next) != &ip_vs_svc_table[iter->bucket])
1942 			return list_entry(e, struct ip_vs_service, s_list);
1943 
1944 
1945 		while (++iter->bucket < IP_VS_SVC_TAB_SIZE) {
1946 			list_for_each_entry(svc,&ip_vs_svc_table[iter->bucket],
1947 					    s_list) {
1948 				return svc;
1949 			}
1950 		}
1951 
1952 		iter->table = ip_vs_svc_fwm_table;
1953 		iter->bucket = -1;
1954 		goto scan_fwmark;
1955 	}
1956 
1957 	/* next service in hashed by fwmark */
1958 	if ((e = svc->f_list.next) != &ip_vs_svc_fwm_table[iter->bucket])
1959 		return list_entry(e, struct ip_vs_service, f_list);
1960 
1961  scan_fwmark:
1962 	while (++iter->bucket < IP_VS_SVC_TAB_SIZE) {
1963 		list_for_each_entry(svc, &ip_vs_svc_fwm_table[iter->bucket],
1964 				    f_list)
1965 			return svc;
1966 	}
1967 
1968 	return NULL;
1969 }
1970 
ip_vs_info_seq_stop(struct seq_file * seq,void * v)1971 static void ip_vs_info_seq_stop(struct seq_file *seq, void *v)
1972 __releases(__ip_vs_svc_lock)
1973 {
1974 	read_unlock_bh(&__ip_vs_svc_lock);
1975 }
1976 
1977 
ip_vs_info_seq_show(struct seq_file * seq,void * v)1978 static int ip_vs_info_seq_show(struct seq_file *seq, void *v)
1979 {
1980 	if (v == SEQ_START_TOKEN) {
1981 		seq_printf(seq,
1982 			"IP Virtual Server version %d.%d.%d (size=%d)\n",
1983 			NVERSION(IP_VS_VERSION_CODE), ip_vs_conn_tab_size);
1984 		seq_puts(seq,
1985 			 "Prot LocalAddress:Port Scheduler Flags\n");
1986 		seq_puts(seq,
1987 			 "  -> RemoteAddress:Port Forward Weight ActiveConn InActConn\n");
1988 	} else {
1989 		const struct ip_vs_service *svc = v;
1990 		const struct ip_vs_iter *iter = seq->private;
1991 		const struct ip_vs_dest *dest;
1992 
1993 		if (iter->table == ip_vs_svc_table) {
1994 #ifdef CONFIG_IP_VS_IPV6
1995 			if (svc->af == AF_INET6)
1996 				seq_printf(seq, "%s  [%pI6]:%04X %s ",
1997 					   ip_vs_proto_name(svc->protocol),
1998 					   &svc->addr.in6,
1999 					   ntohs(svc->port),
2000 					   svc->scheduler->name);
2001 			else
2002 #endif
2003 				seq_printf(seq, "%s  %08X:%04X %s %s ",
2004 					   ip_vs_proto_name(svc->protocol),
2005 					   ntohl(svc->addr.ip),
2006 					   ntohs(svc->port),
2007 					   svc->scheduler->name,
2008 					   (svc->flags & IP_VS_SVC_F_ONEPACKET)?"ops ":"");
2009 		} else {
2010 			seq_printf(seq, "FWM  %08X %s %s",
2011 				   svc->fwmark, svc->scheduler->name,
2012 				   (svc->flags & IP_VS_SVC_F_ONEPACKET)?"ops ":"");
2013 		}
2014 
2015 		if (svc->flags & IP_VS_SVC_F_PERSISTENT)
2016 			seq_printf(seq, "persistent %d %08X\n",
2017 				svc->timeout,
2018 				ntohl(svc->netmask));
2019 		else
2020 			seq_putc(seq, '\n');
2021 
2022 		list_for_each_entry(dest, &svc->destinations, n_list) {
2023 #ifdef CONFIG_IP_VS_IPV6
2024 			if (dest->af == AF_INET6)
2025 				seq_printf(seq,
2026 					   "  -> [%pI6]:%04X"
2027 					   "      %-7s %-6d %-10d %-10d\n",
2028 					   &dest->addr.in6,
2029 					   ntohs(dest->port),
2030 					   ip_vs_fwd_name(atomic_read(&dest->conn_flags)),
2031 					   atomic_read(&dest->weight),
2032 					   atomic_read(&dest->activeconns),
2033 					   atomic_read(&dest->inactconns));
2034 			else
2035 #endif
2036 				seq_printf(seq,
2037 					   "  -> %08X:%04X      "
2038 					   "%-7s %-6d %-10d %-10d\n",
2039 					   ntohl(dest->addr.ip),
2040 					   ntohs(dest->port),
2041 					   ip_vs_fwd_name(atomic_read(&dest->conn_flags)),
2042 					   atomic_read(&dest->weight),
2043 					   atomic_read(&dest->activeconns),
2044 					   atomic_read(&dest->inactconns));
2045 
2046 		}
2047 	}
2048 	return 0;
2049 }
2050 
2051 static const struct seq_operations ip_vs_info_seq_ops = {
2052 	.start = ip_vs_info_seq_start,
2053 	.next  = ip_vs_info_seq_next,
2054 	.stop  = ip_vs_info_seq_stop,
2055 	.show  = ip_vs_info_seq_show,
2056 };
2057 
ip_vs_info_open(struct inode * inode,struct file * file)2058 static int ip_vs_info_open(struct inode *inode, struct file *file)
2059 {
2060 	return seq_open_net(inode, file, &ip_vs_info_seq_ops,
2061 			sizeof(struct ip_vs_iter));
2062 }
2063 
2064 static const struct file_operations ip_vs_info_fops = {
2065 	.owner	 = THIS_MODULE,
2066 	.open    = ip_vs_info_open,
2067 	.read    = seq_read,
2068 	.llseek  = seq_lseek,
2069 	.release = seq_release_net,
2070 };
2071 
2072 #endif
2073 
2074 #ifdef CONFIG_PROC_FS
ip_vs_stats_show(struct seq_file * seq,void * v)2075 static int ip_vs_stats_show(struct seq_file *seq, void *v)
2076 {
2077 	struct net *net = seq_file_single_net(seq);
2078 	struct ip_vs_stats_user show;
2079 
2080 /*               01234567 01234567 01234567 0123456701234567 0123456701234567 */
2081 	seq_puts(seq,
2082 		 "   Total Incoming Outgoing         Incoming         Outgoing\n");
2083 	seq_printf(seq,
2084 		   "   Conns  Packets  Packets            Bytes            Bytes\n");
2085 
2086 	ip_vs_copy_stats(&show, &net_ipvs(net)->tot_stats);
2087 	seq_printf(seq, "%8X %8X %8X %16LX %16LX\n\n", show.conns,
2088 		   show.inpkts, show.outpkts,
2089 		   (unsigned long long) show.inbytes,
2090 		   (unsigned long long) show.outbytes);
2091 
2092 /*                 01234567 01234567 01234567 0123456701234567 0123456701234567 */
2093 	seq_puts(seq,
2094 		   " Conns/s   Pkts/s   Pkts/s          Bytes/s          Bytes/s\n");
2095 	seq_printf(seq, "%8X %8X %8X %16X %16X\n",
2096 			show.cps, show.inpps, show.outpps,
2097 			show.inbps, show.outbps);
2098 
2099 	return 0;
2100 }
2101 
ip_vs_stats_seq_open(struct inode * inode,struct file * file)2102 static int ip_vs_stats_seq_open(struct inode *inode, struct file *file)
2103 {
2104 	return single_open_net(inode, file, ip_vs_stats_show);
2105 }
2106 
2107 static const struct file_operations ip_vs_stats_fops = {
2108 	.owner = THIS_MODULE,
2109 	.open = ip_vs_stats_seq_open,
2110 	.read = seq_read,
2111 	.llseek = seq_lseek,
2112 	.release = single_release_net,
2113 };
2114 
ip_vs_stats_percpu_show(struct seq_file * seq,void * v)2115 static int ip_vs_stats_percpu_show(struct seq_file *seq, void *v)
2116 {
2117 	struct net *net = seq_file_single_net(seq);
2118 	struct ip_vs_stats *tot_stats = &net_ipvs(net)->tot_stats;
2119 	struct ip_vs_cpu_stats *cpustats = tot_stats->cpustats;
2120 	struct ip_vs_stats_user rates;
2121 	int i;
2122 
2123 /*               01234567 01234567 01234567 0123456701234567 0123456701234567 */
2124 	seq_puts(seq,
2125 		 "       Total Incoming Outgoing         Incoming         Outgoing\n");
2126 	seq_printf(seq,
2127 		   "CPU    Conns  Packets  Packets            Bytes            Bytes\n");
2128 
2129 	for_each_possible_cpu(i) {
2130 		struct ip_vs_cpu_stats *u = per_cpu_ptr(cpustats, i);
2131 		unsigned int start;
2132 		__u64 inbytes, outbytes;
2133 
2134 		do {
2135 			start = u64_stats_fetch_begin_bh(&u->syncp);
2136 			inbytes = u->ustats.inbytes;
2137 			outbytes = u->ustats.outbytes;
2138 		} while (u64_stats_fetch_retry_bh(&u->syncp, start));
2139 
2140 		seq_printf(seq, "%3X %8X %8X %8X %16LX %16LX\n",
2141 			   i, u->ustats.conns, u->ustats.inpkts,
2142 			   u->ustats.outpkts, (__u64)inbytes,
2143 			   (__u64)outbytes);
2144 	}
2145 
2146 	spin_lock_bh(&tot_stats->lock);
2147 
2148 	seq_printf(seq, "  ~ %8X %8X %8X %16LX %16LX\n\n",
2149 		   tot_stats->ustats.conns, tot_stats->ustats.inpkts,
2150 		   tot_stats->ustats.outpkts,
2151 		   (unsigned long long) tot_stats->ustats.inbytes,
2152 		   (unsigned long long) tot_stats->ustats.outbytes);
2153 
2154 	ip_vs_read_estimator(&rates, tot_stats);
2155 
2156 	spin_unlock_bh(&tot_stats->lock);
2157 
2158 /*                 01234567 01234567 01234567 0123456701234567 0123456701234567 */
2159 	seq_puts(seq,
2160 		   "     Conns/s   Pkts/s   Pkts/s          Bytes/s          Bytes/s\n");
2161 	seq_printf(seq, "    %8X %8X %8X %16X %16X\n",
2162 			rates.cps,
2163 			rates.inpps,
2164 			rates.outpps,
2165 			rates.inbps,
2166 			rates.outbps);
2167 
2168 	return 0;
2169 }
2170 
ip_vs_stats_percpu_seq_open(struct inode * inode,struct file * file)2171 static int ip_vs_stats_percpu_seq_open(struct inode *inode, struct file *file)
2172 {
2173 	return single_open_net(inode, file, ip_vs_stats_percpu_show);
2174 }
2175 
2176 static const struct file_operations ip_vs_stats_percpu_fops = {
2177 	.owner = THIS_MODULE,
2178 	.open = ip_vs_stats_percpu_seq_open,
2179 	.read = seq_read,
2180 	.llseek = seq_lseek,
2181 	.release = single_release_net,
2182 };
2183 #endif
2184 
2185 /*
2186  *	Set timeout values for tcp tcpfin udp in the timeout_table.
2187  */
ip_vs_set_timeout(struct net * net,struct ip_vs_timeout_user * u)2188 static int ip_vs_set_timeout(struct net *net, struct ip_vs_timeout_user *u)
2189 {
2190 #if defined(CONFIG_IP_VS_PROTO_TCP) || defined(CONFIG_IP_VS_PROTO_UDP)
2191 	struct ip_vs_proto_data *pd;
2192 #endif
2193 
2194 	IP_VS_DBG(2, "Setting timeout tcp:%d tcpfin:%d udp:%d\n",
2195 		  u->tcp_timeout,
2196 		  u->tcp_fin_timeout,
2197 		  u->udp_timeout);
2198 
2199 #ifdef CONFIG_IP_VS_PROTO_TCP
2200 	if (u->tcp_timeout) {
2201 		pd = ip_vs_proto_data_get(net, IPPROTO_TCP);
2202 		pd->timeout_table[IP_VS_TCP_S_ESTABLISHED]
2203 			= u->tcp_timeout * HZ;
2204 	}
2205 
2206 	if (u->tcp_fin_timeout) {
2207 		pd = ip_vs_proto_data_get(net, IPPROTO_TCP);
2208 		pd->timeout_table[IP_VS_TCP_S_FIN_WAIT]
2209 			= u->tcp_fin_timeout * HZ;
2210 	}
2211 #endif
2212 
2213 #ifdef CONFIG_IP_VS_PROTO_UDP
2214 	if (u->udp_timeout) {
2215 		pd = ip_vs_proto_data_get(net, IPPROTO_UDP);
2216 		pd->timeout_table[IP_VS_UDP_S_NORMAL]
2217 			= u->udp_timeout * HZ;
2218 	}
2219 #endif
2220 	return 0;
2221 }
2222 
2223 
2224 #define SET_CMDID(cmd)		(cmd - IP_VS_BASE_CTL)
2225 #define SERVICE_ARG_LEN		(sizeof(struct ip_vs_service_user))
2226 #define SVCDEST_ARG_LEN		(sizeof(struct ip_vs_service_user) +	\
2227 				 sizeof(struct ip_vs_dest_user))
2228 #define TIMEOUT_ARG_LEN		(sizeof(struct ip_vs_timeout_user))
2229 #define DAEMON_ARG_LEN		(sizeof(struct ip_vs_daemon_user))
2230 #define MAX_ARG_LEN		SVCDEST_ARG_LEN
2231 
2232 static const unsigned char set_arglen[SET_CMDID(IP_VS_SO_SET_MAX)+1] = {
2233 	[SET_CMDID(IP_VS_SO_SET_ADD)]		= SERVICE_ARG_LEN,
2234 	[SET_CMDID(IP_VS_SO_SET_EDIT)]		= SERVICE_ARG_LEN,
2235 	[SET_CMDID(IP_VS_SO_SET_DEL)]		= SERVICE_ARG_LEN,
2236 	[SET_CMDID(IP_VS_SO_SET_FLUSH)]		= 0,
2237 	[SET_CMDID(IP_VS_SO_SET_ADDDEST)]	= SVCDEST_ARG_LEN,
2238 	[SET_CMDID(IP_VS_SO_SET_DELDEST)]	= SVCDEST_ARG_LEN,
2239 	[SET_CMDID(IP_VS_SO_SET_EDITDEST)]	= SVCDEST_ARG_LEN,
2240 	[SET_CMDID(IP_VS_SO_SET_TIMEOUT)]	= TIMEOUT_ARG_LEN,
2241 	[SET_CMDID(IP_VS_SO_SET_STARTDAEMON)]	= DAEMON_ARG_LEN,
2242 	[SET_CMDID(IP_VS_SO_SET_STOPDAEMON)]	= DAEMON_ARG_LEN,
2243 	[SET_CMDID(IP_VS_SO_SET_ZERO)]		= SERVICE_ARG_LEN,
2244 };
2245 
ip_vs_copy_usvc_compat(struct ip_vs_service_user_kern * usvc,struct ip_vs_service_user * usvc_compat)2246 static void ip_vs_copy_usvc_compat(struct ip_vs_service_user_kern *usvc,
2247 				  struct ip_vs_service_user *usvc_compat)
2248 {
2249 	memset(usvc, 0, sizeof(*usvc));
2250 
2251 	usvc->af		= AF_INET;
2252 	usvc->protocol		= usvc_compat->protocol;
2253 	usvc->addr.ip		= usvc_compat->addr;
2254 	usvc->port		= usvc_compat->port;
2255 	usvc->fwmark		= usvc_compat->fwmark;
2256 
2257 	/* Deep copy of sched_name is not needed here */
2258 	usvc->sched_name	= usvc_compat->sched_name;
2259 
2260 	usvc->flags		= usvc_compat->flags;
2261 	usvc->timeout		= usvc_compat->timeout;
2262 	usvc->netmask		= usvc_compat->netmask;
2263 }
2264 
ip_vs_copy_udest_compat(struct ip_vs_dest_user_kern * udest,struct ip_vs_dest_user * udest_compat)2265 static void ip_vs_copy_udest_compat(struct ip_vs_dest_user_kern *udest,
2266 				   struct ip_vs_dest_user *udest_compat)
2267 {
2268 	memset(udest, 0, sizeof(*udest));
2269 
2270 	udest->addr.ip		= udest_compat->addr;
2271 	udest->port		= udest_compat->port;
2272 	udest->conn_flags	= udest_compat->conn_flags;
2273 	udest->weight		= udest_compat->weight;
2274 	udest->u_threshold	= udest_compat->u_threshold;
2275 	udest->l_threshold	= udest_compat->l_threshold;
2276 }
2277 
2278 static int
do_ip_vs_set_ctl(struct sock * sk,int cmd,void __user * user,unsigned int len)2279 do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
2280 {
2281 	struct net *net = sock_net(sk);
2282 	int ret;
2283 	unsigned char arg[MAX_ARG_LEN];
2284 	struct ip_vs_service_user *usvc_compat;
2285 	struct ip_vs_service_user_kern usvc;
2286 	struct ip_vs_service *svc;
2287 	struct ip_vs_dest_user *udest_compat;
2288 	struct ip_vs_dest_user_kern udest;
2289 
2290 	if (!capable(CAP_NET_ADMIN))
2291 		return -EPERM;
2292 
2293 	if (cmd < IP_VS_BASE_CTL || cmd > IP_VS_SO_SET_MAX)
2294 		return -EINVAL;
2295 	if (len < 0 || len >  MAX_ARG_LEN)
2296 		return -EINVAL;
2297 	if (len != set_arglen[SET_CMDID(cmd)]) {
2298 		pr_err("set_ctl: len %u != %u\n",
2299 		       len, set_arglen[SET_CMDID(cmd)]);
2300 		return -EINVAL;
2301 	}
2302 
2303 	if (copy_from_user(arg, user, len) != 0)
2304 		return -EFAULT;
2305 
2306 	/* increase the module use count */
2307 	ip_vs_use_count_inc();
2308 
2309 	if (mutex_lock_interruptible(&__ip_vs_mutex)) {
2310 		ret = -ERESTARTSYS;
2311 		goto out_dec;
2312 	}
2313 
2314 	if (cmd == IP_VS_SO_SET_FLUSH) {
2315 		/* Flush the virtual service */
2316 		ret = ip_vs_flush(net);
2317 		goto out_unlock;
2318 	} else if (cmd == IP_VS_SO_SET_TIMEOUT) {
2319 		/* Set timeout values for (tcp tcpfin udp) */
2320 		ret = ip_vs_set_timeout(net, (struct ip_vs_timeout_user *)arg);
2321 		goto out_unlock;
2322 	} else if (cmd == IP_VS_SO_SET_STARTDAEMON) {
2323 		struct ip_vs_daemon_user *dm = (struct ip_vs_daemon_user *)arg;
2324 		ret = start_sync_thread(net, dm->state, dm->mcast_ifn,
2325 					dm->syncid);
2326 		goto out_unlock;
2327 	} else if (cmd == IP_VS_SO_SET_STOPDAEMON) {
2328 		struct ip_vs_daemon_user *dm = (struct ip_vs_daemon_user *)arg;
2329 		ret = stop_sync_thread(net, dm->state);
2330 		goto out_unlock;
2331 	}
2332 
2333 	usvc_compat = (struct ip_vs_service_user *)arg;
2334 	udest_compat = (struct ip_vs_dest_user *)(usvc_compat + 1);
2335 
2336 	/* We only use the new structs internally, so copy userspace compat
2337 	 * structs to extended internal versions */
2338 	ip_vs_copy_usvc_compat(&usvc, usvc_compat);
2339 	ip_vs_copy_udest_compat(&udest, udest_compat);
2340 
2341 	if (cmd == IP_VS_SO_SET_ZERO) {
2342 		/* if no service address is set, zero counters in all */
2343 		if (!usvc.fwmark && !usvc.addr.ip && !usvc.port) {
2344 			ret = ip_vs_zero_all(net);
2345 			goto out_unlock;
2346 		}
2347 	}
2348 
2349 	/* Check for valid protocol: TCP or UDP or SCTP, even for fwmark!=0 */
2350 	if (usvc.protocol != IPPROTO_TCP && usvc.protocol != IPPROTO_UDP &&
2351 	    usvc.protocol != IPPROTO_SCTP) {
2352 		pr_err("set_ctl: invalid protocol: %d %pI4:%d %s\n",
2353 		       usvc.protocol, &usvc.addr.ip,
2354 		       ntohs(usvc.port), usvc.sched_name);
2355 		ret = -EFAULT;
2356 		goto out_unlock;
2357 	}
2358 
2359 	/* Lookup the exact service by <protocol, addr, port> or fwmark */
2360 	if (usvc.fwmark == 0)
2361 		svc = __ip_vs_service_find(net, usvc.af, usvc.protocol,
2362 					   &usvc.addr, usvc.port);
2363 	else
2364 		svc = __ip_vs_svc_fwm_find(net, usvc.af, usvc.fwmark);
2365 
2366 	if (cmd != IP_VS_SO_SET_ADD
2367 	    && (svc == NULL || svc->protocol != usvc.protocol)) {
2368 		ret = -ESRCH;
2369 		goto out_unlock;
2370 	}
2371 
2372 	switch (cmd) {
2373 	case IP_VS_SO_SET_ADD:
2374 		if (svc != NULL)
2375 			ret = -EEXIST;
2376 		else
2377 			ret = ip_vs_add_service(net, &usvc, &svc);
2378 		break;
2379 	case IP_VS_SO_SET_EDIT:
2380 		ret = ip_vs_edit_service(svc, &usvc);
2381 		break;
2382 	case IP_VS_SO_SET_DEL:
2383 		ret = ip_vs_del_service(svc);
2384 		if (!ret)
2385 			goto out_unlock;
2386 		break;
2387 	case IP_VS_SO_SET_ZERO:
2388 		ret = ip_vs_zero_service(svc);
2389 		break;
2390 	case IP_VS_SO_SET_ADDDEST:
2391 		ret = ip_vs_add_dest(svc, &udest);
2392 		break;
2393 	case IP_VS_SO_SET_EDITDEST:
2394 		ret = ip_vs_edit_dest(svc, &udest);
2395 		break;
2396 	case IP_VS_SO_SET_DELDEST:
2397 		ret = ip_vs_del_dest(svc, &udest);
2398 		break;
2399 	default:
2400 		ret = -EINVAL;
2401 	}
2402 
2403   out_unlock:
2404 	mutex_unlock(&__ip_vs_mutex);
2405   out_dec:
2406 	/* decrease the module use count */
2407 	ip_vs_use_count_dec();
2408 
2409 	return ret;
2410 }
2411 
2412 
2413 static void
ip_vs_copy_service(struct ip_vs_service_entry * dst,struct ip_vs_service * src)2414 ip_vs_copy_service(struct ip_vs_service_entry *dst, struct ip_vs_service *src)
2415 {
2416 	dst->protocol = src->protocol;
2417 	dst->addr = src->addr.ip;
2418 	dst->port = src->port;
2419 	dst->fwmark = src->fwmark;
2420 	strlcpy(dst->sched_name, src->scheduler->name, sizeof(dst->sched_name));
2421 	dst->flags = src->flags;
2422 	dst->timeout = src->timeout / HZ;
2423 	dst->netmask = src->netmask;
2424 	dst->num_dests = src->num_dests;
2425 	ip_vs_copy_stats(&dst->stats, &src->stats);
2426 }
2427 
2428 static inline int
__ip_vs_get_service_entries(struct net * net,const struct ip_vs_get_services * get,struct ip_vs_get_services __user * uptr)2429 __ip_vs_get_service_entries(struct net *net,
2430 			    const struct ip_vs_get_services *get,
2431 			    struct ip_vs_get_services __user *uptr)
2432 {
2433 	int idx, count=0;
2434 	struct ip_vs_service *svc;
2435 	struct ip_vs_service_entry entry;
2436 	int ret = 0;
2437 
2438 	for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
2439 		list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
2440 			/* Only expose IPv4 entries to old interface */
2441 			if (svc->af != AF_INET || !net_eq(svc->net, net))
2442 				continue;
2443 
2444 			if (count >= get->num_services)
2445 				goto out;
2446 			memset(&entry, 0, sizeof(entry));
2447 			ip_vs_copy_service(&entry, svc);
2448 			if (copy_to_user(&uptr->entrytable[count],
2449 					 &entry, sizeof(entry))) {
2450 				ret = -EFAULT;
2451 				goto out;
2452 			}
2453 			count++;
2454 		}
2455 	}
2456 
2457 	for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
2458 		list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
2459 			/* Only expose IPv4 entries to old interface */
2460 			if (svc->af != AF_INET || !net_eq(svc->net, net))
2461 				continue;
2462 
2463 			if (count >= get->num_services)
2464 				goto out;
2465 			memset(&entry, 0, sizeof(entry));
2466 			ip_vs_copy_service(&entry, svc);
2467 			if (copy_to_user(&uptr->entrytable[count],
2468 					 &entry, sizeof(entry))) {
2469 				ret = -EFAULT;
2470 				goto out;
2471 			}
2472 			count++;
2473 		}
2474 	}
2475   out:
2476 	return ret;
2477 }
2478 
2479 static inline int
__ip_vs_get_dest_entries(struct net * net,const struct ip_vs_get_dests * get,struct ip_vs_get_dests __user * uptr)2480 __ip_vs_get_dest_entries(struct net *net, const struct ip_vs_get_dests *get,
2481 			 struct ip_vs_get_dests __user *uptr)
2482 {
2483 	struct ip_vs_service *svc;
2484 	union nf_inet_addr addr = { .ip = get->addr };
2485 	int ret = 0;
2486 
2487 	if (get->fwmark)
2488 		svc = __ip_vs_svc_fwm_find(net, AF_INET, get->fwmark);
2489 	else
2490 		svc = __ip_vs_service_find(net, AF_INET, get->protocol, &addr,
2491 					   get->port);
2492 
2493 	if (svc) {
2494 		int count = 0;
2495 		struct ip_vs_dest *dest;
2496 		struct ip_vs_dest_entry entry;
2497 
2498 		list_for_each_entry(dest, &svc->destinations, n_list) {
2499 			if (count >= get->num_dests)
2500 				break;
2501 
2502 			entry.addr = dest->addr.ip;
2503 			entry.port = dest->port;
2504 			entry.conn_flags = atomic_read(&dest->conn_flags);
2505 			entry.weight = atomic_read(&dest->weight);
2506 			entry.u_threshold = dest->u_threshold;
2507 			entry.l_threshold = dest->l_threshold;
2508 			entry.activeconns = atomic_read(&dest->activeconns);
2509 			entry.inactconns = atomic_read(&dest->inactconns);
2510 			entry.persistconns = atomic_read(&dest->persistconns);
2511 			ip_vs_copy_stats(&entry.stats, &dest->stats);
2512 			if (copy_to_user(&uptr->entrytable[count],
2513 					 &entry, sizeof(entry))) {
2514 				ret = -EFAULT;
2515 				break;
2516 			}
2517 			count++;
2518 		}
2519 	} else
2520 		ret = -ESRCH;
2521 	return ret;
2522 }
2523 
2524 static inline void
__ip_vs_get_timeouts(struct net * net,struct ip_vs_timeout_user * u)2525 __ip_vs_get_timeouts(struct net *net, struct ip_vs_timeout_user *u)
2526 {
2527 #if defined(CONFIG_IP_VS_PROTO_TCP) || defined(CONFIG_IP_VS_PROTO_UDP)
2528 	struct ip_vs_proto_data *pd;
2529 #endif
2530 
2531 #ifdef CONFIG_IP_VS_PROTO_TCP
2532 	pd = ip_vs_proto_data_get(net, IPPROTO_TCP);
2533 	u->tcp_timeout = pd->timeout_table[IP_VS_TCP_S_ESTABLISHED] / HZ;
2534 	u->tcp_fin_timeout = pd->timeout_table[IP_VS_TCP_S_FIN_WAIT] / HZ;
2535 #endif
2536 #ifdef CONFIG_IP_VS_PROTO_UDP
2537 	pd = ip_vs_proto_data_get(net, IPPROTO_UDP);
2538 	u->udp_timeout =
2539 			pd->timeout_table[IP_VS_UDP_S_NORMAL] / HZ;
2540 #endif
2541 }
2542 
2543 
2544 #define GET_CMDID(cmd)		(cmd - IP_VS_BASE_CTL)
2545 #define GET_INFO_ARG_LEN	(sizeof(struct ip_vs_getinfo))
2546 #define GET_SERVICES_ARG_LEN	(sizeof(struct ip_vs_get_services))
2547 #define GET_SERVICE_ARG_LEN	(sizeof(struct ip_vs_service_entry))
2548 #define GET_DESTS_ARG_LEN	(sizeof(struct ip_vs_get_dests))
2549 #define GET_TIMEOUT_ARG_LEN	(sizeof(struct ip_vs_timeout_user))
2550 #define GET_DAEMON_ARG_LEN	(sizeof(struct ip_vs_daemon_user) * 2)
2551 
2552 static const unsigned char get_arglen[GET_CMDID(IP_VS_SO_GET_MAX)+1] = {
2553 	[GET_CMDID(IP_VS_SO_GET_VERSION)]	= 64,
2554 	[GET_CMDID(IP_VS_SO_GET_INFO)]		= GET_INFO_ARG_LEN,
2555 	[GET_CMDID(IP_VS_SO_GET_SERVICES)]	= GET_SERVICES_ARG_LEN,
2556 	[GET_CMDID(IP_VS_SO_GET_SERVICE)]	= GET_SERVICE_ARG_LEN,
2557 	[GET_CMDID(IP_VS_SO_GET_DESTS)]		= GET_DESTS_ARG_LEN,
2558 	[GET_CMDID(IP_VS_SO_GET_TIMEOUT)]	= GET_TIMEOUT_ARG_LEN,
2559 	[GET_CMDID(IP_VS_SO_GET_DAEMON)]	= GET_DAEMON_ARG_LEN,
2560 };
2561 
2562 static int
do_ip_vs_get_ctl(struct sock * sk,int cmd,void __user * user,int * len)2563 do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
2564 {
2565 	unsigned char arg[128];
2566 	int ret = 0;
2567 	unsigned int copylen;
2568 	struct net *net = sock_net(sk);
2569 	struct netns_ipvs *ipvs = net_ipvs(net);
2570 
2571 	BUG_ON(!net);
2572 	if (!capable(CAP_NET_ADMIN))
2573 		return -EPERM;
2574 
2575 	if (cmd < IP_VS_BASE_CTL || cmd > IP_VS_SO_GET_MAX)
2576 		return -EINVAL;
2577 
2578 	if (*len < get_arglen[GET_CMDID(cmd)]) {
2579 		pr_err("get_ctl: len %u < %u\n",
2580 		       *len, get_arglen[GET_CMDID(cmd)]);
2581 		return -EINVAL;
2582 	}
2583 
2584 	copylen = get_arglen[GET_CMDID(cmd)];
2585 	if (copylen > 128)
2586 		return -EINVAL;
2587 
2588 	if (copy_from_user(arg, user, copylen) != 0)
2589 		return -EFAULT;
2590 
2591 	if (mutex_lock_interruptible(&__ip_vs_mutex))
2592 		return -ERESTARTSYS;
2593 
2594 	switch (cmd) {
2595 	case IP_VS_SO_GET_VERSION:
2596 	{
2597 		char buf[64];
2598 
2599 		sprintf(buf, "IP Virtual Server version %d.%d.%d (size=%d)",
2600 			NVERSION(IP_VS_VERSION_CODE), ip_vs_conn_tab_size);
2601 		if (copy_to_user(user, buf, strlen(buf)+1) != 0) {
2602 			ret = -EFAULT;
2603 			goto out;
2604 		}
2605 		*len = strlen(buf)+1;
2606 	}
2607 	break;
2608 
2609 	case IP_VS_SO_GET_INFO:
2610 	{
2611 		struct ip_vs_getinfo info;
2612 		info.version = IP_VS_VERSION_CODE;
2613 		info.size = ip_vs_conn_tab_size;
2614 		info.num_services = ipvs->num_services;
2615 		if (copy_to_user(user, &info, sizeof(info)) != 0)
2616 			ret = -EFAULT;
2617 	}
2618 	break;
2619 
2620 	case IP_VS_SO_GET_SERVICES:
2621 	{
2622 		struct ip_vs_get_services *get;
2623 		int size;
2624 
2625 		get = (struct ip_vs_get_services *)arg;
2626 		size = sizeof(*get) +
2627 			sizeof(struct ip_vs_service_entry) * get->num_services;
2628 		if (*len != size) {
2629 			pr_err("length: %u != %u\n", *len, size);
2630 			ret = -EINVAL;
2631 			goto out;
2632 		}
2633 		ret = __ip_vs_get_service_entries(net, get, user);
2634 	}
2635 	break;
2636 
2637 	case IP_VS_SO_GET_SERVICE:
2638 	{
2639 		struct ip_vs_service_entry *entry;
2640 		struct ip_vs_service *svc;
2641 		union nf_inet_addr addr;
2642 
2643 		entry = (struct ip_vs_service_entry *)arg;
2644 		addr.ip = entry->addr;
2645 		if (entry->fwmark)
2646 			svc = __ip_vs_svc_fwm_find(net, AF_INET, entry->fwmark);
2647 		else
2648 			svc = __ip_vs_service_find(net, AF_INET,
2649 						   entry->protocol, &addr,
2650 						   entry->port);
2651 		if (svc) {
2652 			ip_vs_copy_service(entry, svc);
2653 			if (copy_to_user(user, entry, sizeof(*entry)) != 0)
2654 				ret = -EFAULT;
2655 		} else
2656 			ret = -ESRCH;
2657 	}
2658 	break;
2659 
2660 	case IP_VS_SO_GET_DESTS:
2661 	{
2662 		struct ip_vs_get_dests *get;
2663 		int size;
2664 
2665 		get = (struct ip_vs_get_dests *)arg;
2666 		size = sizeof(*get) +
2667 			sizeof(struct ip_vs_dest_entry) * get->num_dests;
2668 		if (*len != size) {
2669 			pr_err("length: %u != %u\n", *len, size);
2670 			ret = -EINVAL;
2671 			goto out;
2672 		}
2673 		ret = __ip_vs_get_dest_entries(net, get, user);
2674 	}
2675 	break;
2676 
2677 	case IP_VS_SO_GET_TIMEOUT:
2678 	{
2679 		struct ip_vs_timeout_user t;
2680 
2681 		__ip_vs_get_timeouts(net, &t);
2682 		if (copy_to_user(user, &t, sizeof(t)) != 0)
2683 			ret = -EFAULT;
2684 	}
2685 	break;
2686 
2687 	case IP_VS_SO_GET_DAEMON:
2688 	{
2689 		struct ip_vs_daemon_user d[2];
2690 
2691 		memset(&d, 0, sizeof(d));
2692 		if (ipvs->sync_state & IP_VS_STATE_MASTER) {
2693 			d[0].state = IP_VS_STATE_MASTER;
2694 			strlcpy(d[0].mcast_ifn, ipvs->master_mcast_ifn,
2695 				sizeof(d[0].mcast_ifn));
2696 			d[0].syncid = ipvs->master_syncid;
2697 		}
2698 		if (ipvs->sync_state & IP_VS_STATE_BACKUP) {
2699 			d[1].state = IP_VS_STATE_BACKUP;
2700 			strlcpy(d[1].mcast_ifn, ipvs->backup_mcast_ifn,
2701 				sizeof(d[1].mcast_ifn));
2702 			d[1].syncid = ipvs->backup_syncid;
2703 		}
2704 		if (copy_to_user(user, &d, sizeof(d)) != 0)
2705 			ret = -EFAULT;
2706 	}
2707 	break;
2708 
2709 	default:
2710 		ret = -EINVAL;
2711 	}
2712 
2713   out:
2714 	mutex_unlock(&__ip_vs_mutex);
2715 	return ret;
2716 }
2717 
2718 
2719 static struct nf_sockopt_ops ip_vs_sockopts = {
2720 	.pf		= PF_INET,
2721 	.set_optmin	= IP_VS_BASE_CTL,
2722 	.set_optmax	= IP_VS_SO_SET_MAX+1,
2723 	.set		= do_ip_vs_set_ctl,
2724 	.get_optmin	= IP_VS_BASE_CTL,
2725 	.get_optmax	= IP_VS_SO_GET_MAX+1,
2726 	.get		= do_ip_vs_get_ctl,
2727 	.owner		= THIS_MODULE,
2728 };
2729 
2730 /*
2731  * Generic Netlink interface
2732  */
2733 
2734 /* IPVS genetlink family */
2735 static struct genl_family ip_vs_genl_family = {
2736 	.id		= GENL_ID_GENERATE,
2737 	.hdrsize	= 0,
2738 	.name		= IPVS_GENL_NAME,
2739 	.version	= IPVS_GENL_VERSION,
2740 	.maxattr	= IPVS_CMD_MAX,
2741 	.netnsok        = true,         /* Make ipvsadm to work on netns */
2742 };
2743 
2744 /* Policy used for first-level command attributes */
2745 static const struct nla_policy ip_vs_cmd_policy[IPVS_CMD_ATTR_MAX + 1] = {
2746 	[IPVS_CMD_ATTR_SERVICE]		= { .type = NLA_NESTED },
2747 	[IPVS_CMD_ATTR_DEST]		= { .type = NLA_NESTED },
2748 	[IPVS_CMD_ATTR_DAEMON]		= { .type = NLA_NESTED },
2749 	[IPVS_CMD_ATTR_TIMEOUT_TCP]	= { .type = NLA_U32 },
2750 	[IPVS_CMD_ATTR_TIMEOUT_TCP_FIN]	= { .type = NLA_U32 },
2751 	[IPVS_CMD_ATTR_TIMEOUT_UDP]	= { .type = NLA_U32 },
2752 };
2753 
2754 /* Policy used for attributes in nested attribute IPVS_CMD_ATTR_DAEMON */
2755 static const struct nla_policy ip_vs_daemon_policy[IPVS_DAEMON_ATTR_MAX + 1] = {
2756 	[IPVS_DAEMON_ATTR_STATE]	= { .type = NLA_U32 },
2757 	[IPVS_DAEMON_ATTR_MCAST_IFN]	= { .type = NLA_NUL_STRING,
2758 					    .len = IP_VS_IFNAME_MAXLEN },
2759 	[IPVS_DAEMON_ATTR_SYNC_ID]	= { .type = NLA_U32 },
2760 };
2761 
2762 /* Policy used for attributes in nested attribute IPVS_CMD_ATTR_SERVICE */
2763 static const struct nla_policy ip_vs_svc_policy[IPVS_SVC_ATTR_MAX + 1] = {
2764 	[IPVS_SVC_ATTR_AF]		= { .type = NLA_U16 },
2765 	[IPVS_SVC_ATTR_PROTOCOL]	= { .type = NLA_U16 },
2766 	[IPVS_SVC_ATTR_ADDR]		= { .type = NLA_BINARY,
2767 					    .len = sizeof(union nf_inet_addr) },
2768 	[IPVS_SVC_ATTR_PORT]		= { .type = NLA_U16 },
2769 	[IPVS_SVC_ATTR_FWMARK]		= { .type = NLA_U32 },
2770 	[IPVS_SVC_ATTR_SCHED_NAME]	= { .type = NLA_NUL_STRING,
2771 					    .len = IP_VS_SCHEDNAME_MAXLEN },
2772 	[IPVS_SVC_ATTR_PE_NAME]		= { .type = NLA_NUL_STRING,
2773 					    .len = IP_VS_PENAME_MAXLEN },
2774 	[IPVS_SVC_ATTR_FLAGS]		= { .type = NLA_BINARY,
2775 					    .len = sizeof(struct ip_vs_flags) },
2776 	[IPVS_SVC_ATTR_TIMEOUT]		= { .type = NLA_U32 },
2777 	[IPVS_SVC_ATTR_NETMASK]		= { .type = NLA_U32 },
2778 	[IPVS_SVC_ATTR_STATS]		= { .type = NLA_NESTED },
2779 };
2780 
2781 /* Policy used for attributes in nested attribute IPVS_CMD_ATTR_DEST */
2782 static const struct nla_policy ip_vs_dest_policy[IPVS_DEST_ATTR_MAX + 1] = {
2783 	[IPVS_DEST_ATTR_ADDR]		= { .type = NLA_BINARY,
2784 					    .len = sizeof(union nf_inet_addr) },
2785 	[IPVS_DEST_ATTR_PORT]		= { .type = NLA_U16 },
2786 	[IPVS_DEST_ATTR_FWD_METHOD]	= { .type = NLA_U32 },
2787 	[IPVS_DEST_ATTR_WEIGHT]		= { .type = NLA_U32 },
2788 	[IPVS_DEST_ATTR_U_THRESH]	= { .type = NLA_U32 },
2789 	[IPVS_DEST_ATTR_L_THRESH]	= { .type = NLA_U32 },
2790 	[IPVS_DEST_ATTR_ACTIVE_CONNS]	= { .type = NLA_U32 },
2791 	[IPVS_DEST_ATTR_INACT_CONNS]	= { .type = NLA_U32 },
2792 	[IPVS_DEST_ATTR_PERSIST_CONNS]	= { .type = NLA_U32 },
2793 	[IPVS_DEST_ATTR_STATS]		= { .type = NLA_NESTED },
2794 };
2795 
ip_vs_genl_fill_stats(struct sk_buff * skb,int container_type,struct ip_vs_stats * stats)2796 static int ip_vs_genl_fill_stats(struct sk_buff *skb, int container_type,
2797 				 struct ip_vs_stats *stats)
2798 {
2799 	struct ip_vs_stats_user ustats;
2800 	struct nlattr *nl_stats = nla_nest_start(skb, container_type);
2801 	if (!nl_stats)
2802 		return -EMSGSIZE;
2803 
2804 	ip_vs_copy_stats(&ustats, stats);
2805 
2806 	NLA_PUT_U32(skb, IPVS_STATS_ATTR_CONNS, ustats.conns);
2807 	NLA_PUT_U32(skb, IPVS_STATS_ATTR_INPKTS, ustats.inpkts);
2808 	NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTPKTS, ustats.outpkts);
2809 	NLA_PUT_U64(skb, IPVS_STATS_ATTR_INBYTES, ustats.inbytes);
2810 	NLA_PUT_U64(skb, IPVS_STATS_ATTR_OUTBYTES, ustats.outbytes);
2811 	NLA_PUT_U32(skb, IPVS_STATS_ATTR_CPS, ustats.cps);
2812 	NLA_PUT_U32(skb, IPVS_STATS_ATTR_INPPS, ustats.inpps);
2813 	NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTPPS, ustats.outpps);
2814 	NLA_PUT_U32(skb, IPVS_STATS_ATTR_INBPS, ustats.inbps);
2815 	NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTBPS, ustats.outbps);
2816 
2817 	nla_nest_end(skb, nl_stats);
2818 
2819 	return 0;
2820 
2821 nla_put_failure:
2822 	nla_nest_cancel(skb, nl_stats);
2823 	return -EMSGSIZE;
2824 }
2825 
ip_vs_genl_fill_service(struct sk_buff * skb,struct ip_vs_service * svc)2826 static int ip_vs_genl_fill_service(struct sk_buff *skb,
2827 				   struct ip_vs_service *svc)
2828 {
2829 	struct nlattr *nl_service;
2830 	struct ip_vs_flags flags = { .flags = svc->flags,
2831 				     .mask = ~0 };
2832 
2833 	nl_service = nla_nest_start(skb, IPVS_CMD_ATTR_SERVICE);
2834 	if (!nl_service)
2835 		return -EMSGSIZE;
2836 
2837 	NLA_PUT_U16(skb, IPVS_SVC_ATTR_AF, svc->af);
2838 
2839 	if (svc->fwmark) {
2840 		NLA_PUT_U32(skb, IPVS_SVC_ATTR_FWMARK, svc->fwmark);
2841 	} else {
2842 		NLA_PUT_U16(skb, IPVS_SVC_ATTR_PROTOCOL, svc->protocol);
2843 		NLA_PUT(skb, IPVS_SVC_ATTR_ADDR, sizeof(svc->addr), &svc->addr);
2844 		NLA_PUT_U16(skb, IPVS_SVC_ATTR_PORT, svc->port);
2845 	}
2846 
2847 	NLA_PUT_STRING(skb, IPVS_SVC_ATTR_SCHED_NAME, svc->scheduler->name);
2848 	if (svc->pe)
2849 		NLA_PUT_STRING(skb, IPVS_SVC_ATTR_PE_NAME, svc->pe->name);
2850 	NLA_PUT(skb, IPVS_SVC_ATTR_FLAGS, sizeof(flags), &flags);
2851 	NLA_PUT_U32(skb, IPVS_SVC_ATTR_TIMEOUT, svc->timeout / HZ);
2852 	NLA_PUT_U32(skb, IPVS_SVC_ATTR_NETMASK, svc->netmask);
2853 
2854 	if (ip_vs_genl_fill_stats(skb, IPVS_SVC_ATTR_STATS, &svc->stats))
2855 		goto nla_put_failure;
2856 
2857 	nla_nest_end(skb, nl_service);
2858 
2859 	return 0;
2860 
2861 nla_put_failure:
2862 	nla_nest_cancel(skb, nl_service);
2863 	return -EMSGSIZE;
2864 }
2865 
ip_vs_genl_dump_service(struct sk_buff * skb,struct ip_vs_service * svc,struct netlink_callback * cb)2866 static int ip_vs_genl_dump_service(struct sk_buff *skb,
2867 				   struct ip_vs_service *svc,
2868 				   struct netlink_callback *cb)
2869 {
2870 	void *hdr;
2871 
2872 	hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq,
2873 			  &ip_vs_genl_family, NLM_F_MULTI,
2874 			  IPVS_CMD_NEW_SERVICE);
2875 	if (!hdr)
2876 		return -EMSGSIZE;
2877 
2878 	if (ip_vs_genl_fill_service(skb, svc) < 0)
2879 		goto nla_put_failure;
2880 
2881 	return genlmsg_end(skb, hdr);
2882 
2883 nla_put_failure:
2884 	genlmsg_cancel(skb, hdr);
2885 	return -EMSGSIZE;
2886 }
2887 
ip_vs_genl_dump_services(struct sk_buff * skb,struct netlink_callback * cb)2888 static int ip_vs_genl_dump_services(struct sk_buff *skb,
2889 				    struct netlink_callback *cb)
2890 {
2891 	int idx = 0, i;
2892 	int start = cb->args[0];
2893 	struct ip_vs_service *svc;
2894 	struct net *net = skb_sknet(skb);
2895 
2896 	mutex_lock(&__ip_vs_mutex);
2897 	for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) {
2898 		list_for_each_entry(svc, &ip_vs_svc_table[i], s_list) {
2899 			if (++idx <= start || !net_eq(svc->net, net))
2900 				continue;
2901 			if (ip_vs_genl_dump_service(skb, svc, cb) < 0) {
2902 				idx--;
2903 				goto nla_put_failure;
2904 			}
2905 		}
2906 	}
2907 
2908 	for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) {
2909 		list_for_each_entry(svc, &ip_vs_svc_fwm_table[i], f_list) {
2910 			if (++idx <= start || !net_eq(svc->net, net))
2911 				continue;
2912 			if (ip_vs_genl_dump_service(skb, svc, cb) < 0) {
2913 				idx--;
2914 				goto nla_put_failure;
2915 			}
2916 		}
2917 	}
2918 
2919 nla_put_failure:
2920 	mutex_unlock(&__ip_vs_mutex);
2921 	cb->args[0] = idx;
2922 
2923 	return skb->len;
2924 }
2925 
ip_vs_genl_parse_service(struct net * net,struct ip_vs_service_user_kern * usvc,struct nlattr * nla,int full_entry,struct ip_vs_service ** ret_svc)2926 static int ip_vs_genl_parse_service(struct net *net,
2927 				    struct ip_vs_service_user_kern *usvc,
2928 				    struct nlattr *nla, int full_entry,
2929 				    struct ip_vs_service **ret_svc)
2930 {
2931 	struct nlattr *attrs[IPVS_SVC_ATTR_MAX + 1];
2932 	struct nlattr *nla_af, *nla_port, *nla_fwmark, *nla_protocol, *nla_addr;
2933 	struct ip_vs_service *svc;
2934 
2935 	/* Parse mandatory identifying service fields first */
2936 	if (nla == NULL ||
2937 	    nla_parse_nested(attrs, IPVS_SVC_ATTR_MAX, nla, ip_vs_svc_policy))
2938 		return -EINVAL;
2939 
2940 	nla_af		= attrs[IPVS_SVC_ATTR_AF];
2941 	nla_protocol	= attrs[IPVS_SVC_ATTR_PROTOCOL];
2942 	nla_addr	= attrs[IPVS_SVC_ATTR_ADDR];
2943 	nla_port	= attrs[IPVS_SVC_ATTR_PORT];
2944 	nla_fwmark	= attrs[IPVS_SVC_ATTR_FWMARK];
2945 
2946 	if (!(nla_af && (nla_fwmark || (nla_port && nla_protocol && nla_addr))))
2947 		return -EINVAL;
2948 
2949 	memset(usvc, 0, sizeof(*usvc));
2950 
2951 	usvc->af = nla_get_u16(nla_af);
2952 #ifdef CONFIG_IP_VS_IPV6
2953 	if (usvc->af != AF_INET && usvc->af != AF_INET6)
2954 #else
2955 	if (usvc->af != AF_INET)
2956 #endif
2957 		return -EAFNOSUPPORT;
2958 
2959 	if (nla_fwmark) {
2960 		usvc->protocol = IPPROTO_TCP;
2961 		usvc->fwmark = nla_get_u32(nla_fwmark);
2962 	} else {
2963 		usvc->protocol = nla_get_u16(nla_protocol);
2964 		nla_memcpy(&usvc->addr, nla_addr, sizeof(usvc->addr));
2965 		usvc->port = nla_get_u16(nla_port);
2966 		usvc->fwmark = 0;
2967 	}
2968 
2969 	if (usvc->fwmark)
2970 		svc = __ip_vs_svc_fwm_find(net, usvc->af, usvc->fwmark);
2971 	else
2972 		svc = __ip_vs_service_find(net, usvc->af, usvc->protocol,
2973 					   &usvc->addr, usvc->port);
2974 	*ret_svc = svc;
2975 
2976 	/* If a full entry was requested, check for the additional fields */
2977 	if (full_entry) {
2978 		struct nlattr *nla_sched, *nla_flags, *nla_pe, *nla_timeout,
2979 			      *nla_netmask;
2980 		struct ip_vs_flags flags;
2981 
2982 		nla_sched = attrs[IPVS_SVC_ATTR_SCHED_NAME];
2983 		nla_pe = attrs[IPVS_SVC_ATTR_PE_NAME];
2984 		nla_flags = attrs[IPVS_SVC_ATTR_FLAGS];
2985 		nla_timeout = attrs[IPVS_SVC_ATTR_TIMEOUT];
2986 		nla_netmask = attrs[IPVS_SVC_ATTR_NETMASK];
2987 
2988 		if (!(nla_sched && nla_flags && nla_timeout && nla_netmask))
2989 			return -EINVAL;
2990 
2991 		nla_memcpy(&flags, nla_flags, sizeof(flags));
2992 
2993 		/* prefill flags from service if it already exists */
2994 		if (svc)
2995 			usvc->flags = svc->flags;
2996 
2997 		/* set new flags from userland */
2998 		usvc->flags = (usvc->flags & ~flags.mask) |
2999 			      (flags.flags & flags.mask);
3000 		usvc->sched_name = nla_data(nla_sched);
3001 		usvc->pe_name = nla_pe ? nla_data(nla_pe) : NULL;
3002 		usvc->timeout = nla_get_u32(nla_timeout);
3003 		usvc->netmask = nla_get_u32(nla_netmask);
3004 	}
3005 
3006 	return 0;
3007 }
3008 
ip_vs_genl_find_service(struct net * net,struct nlattr * nla)3009 static struct ip_vs_service *ip_vs_genl_find_service(struct net *net,
3010 						     struct nlattr *nla)
3011 {
3012 	struct ip_vs_service_user_kern usvc;
3013 	struct ip_vs_service *svc;
3014 	int ret;
3015 
3016 	ret = ip_vs_genl_parse_service(net, &usvc, nla, 0, &svc);
3017 	return ret ? ERR_PTR(ret) : svc;
3018 }
3019 
ip_vs_genl_fill_dest(struct sk_buff * skb,struct ip_vs_dest * dest)3020 static int ip_vs_genl_fill_dest(struct sk_buff *skb, struct ip_vs_dest *dest)
3021 {
3022 	struct nlattr *nl_dest;
3023 
3024 	nl_dest = nla_nest_start(skb, IPVS_CMD_ATTR_DEST);
3025 	if (!nl_dest)
3026 		return -EMSGSIZE;
3027 
3028 	NLA_PUT(skb, IPVS_DEST_ATTR_ADDR, sizeof(dest->addr), &dest->addr);
3029 	NLA_PUT_U16(skb, IPVS_DEST_ATTR_PORT, dest->port);
3030 
3031 	NLA_PUT_U32(skb, IPVS_DEST_ATTR_FWD_METHOD,
3032 		    atomic_read(&dest->conn_flags) & IP_VS_CONN_F_FWD_MASK);
3033 	NLA_PUT_U32(skb, IPVS_DEST_ATTR_WEIGHT, atomic_read(&dest->weight));
3034 	NLA_PUT_U32(skb, IPVS_DEST_ATTR_U_THRESH, dest->u_threshold);
3035 	NLA_PUT_U32(skb, IPVS_DEST_ATTR_L_THRESH, dest->l_threshold);
3036 	NLA_PUT_U32(skb, IPVS_DEST_ATTR_ACTIVE_CONNS,
3037 		    atomic_read(&dest->activeconns));
3038 	NLA_PUT_U32(skb, IPVS_DEST_ATTR_INACT_CONNS,
3039 		    atomic_read(&dest->inactconns));
3040 	NLA_PUT_U32(skb, IPVS_DEST_ATTR_PERSIST_CONNS,
3041 		    atomic_read(&dest->persistconns));
3042 
3043 	if (ip_vs_genl_fill_stats(skb, IPVS_DEST_ATTR_STATS, &dest->stats))
3044 		goto nla_put_failure;
3045 
3046 	nla_nest_end(skb, nl_dest);
3047 
3048 	return 0;
3049 
3050 nla_put_failure:
3051 	nla_nest_cancel(skb, nl_dest);
3052 	return -EMSGSIZE;
3053 }
3054 
ip_vs_genl_dump_dest(struct sk_buff * skb,struct ip_vs_dest * dest,struct netlink_callback * cb)3055 static int ip_vs_genl_dump_dest(struct sk_buff *skb, struct ip_vs_dest *dest,
3056 				struct netlink_callback *cb)
3057 {
3058 	void *hdr;
3059 
3060 	hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq,
3061 			  &ip_vs_genl_family, NLM_F_MULTI,
3062 			  IPVS_CMD_NEW_DEST);
3063 	if (!hdr)
3064 		return -EMSGSIZE;
3065 
3066 	if (ip_vs_genl_fill_dest(skb, dest) < 0)
3067 		goto nla_put_failure;
3068 
3069 	return genlmsg_end(skb, hdr);
3070 
3071 nla_put_failure:
3072 	genlmsg_cancel(skb, hdr);
3073 	return -EMSGSIZE;
3074 }
3075 
ip_vs_genl_dump_dests(struct sk_buff * skb,struct netlink_callback * cb)3076 static int ip_vs_genl_dump_dests(struct sk_buff *skb,
3077 				 struct netlink_callback *cb)
3078 {
3079 	int idx = 0;
3080 	int start = cb->args[0];
3081 	struct ip_vs_service *svc;
3082 	struct ip_vs_dest *dest;
3083 	struct nlattr *attrs[IPVS_CMD_ATTR_MAX + 1];
3084 	struct net *net = skb_sknet(skb);
3085 
3086 	mutex_lock(&__ip_vs_mutex);
3087 
3088 	/* Try to find the service for which to dump destinations */
3089 	if (nlmsg_parse(cb->nlh, GENL_HDRLEN, attrs,
3090 			IPVS_CMD_ATTR_MAX, ip_vs_cmd_policy))
3091 		goto out_err;
3092 
3093 
3094 	svc = ip_vs_genl_find_service(net, attrs[IPVS_CMD_ATTR_SERVICE]);
3095 	if (IS_ERR(svc) || svc == NULL)
3096 		goto out_err;
3097 
3098 	/* Dump the destinations */
3099 	list_for_each_entry(dest, &svc->destinations, n_list) {
3100 		if (++idx <= start)
3101 			continue;
3102 		if (ip_vs_genl_dump_dest(skb, dest, cb) < 0) {
3103 			idx--;
3104 			goto nla_put_failure;
3105 		}
3106 	}
3107 
3108 nla_put_failure:
3109 	cb->args[0] = idx;
3110 
3111 out_err:
3112 	mutex_unlock(&__ip_vs_mutex);
3113 
3114 	return skb->len;
3115 }
3116 
ip_vs_genl_parse_dest(struct ip_vs_dest_user_kern * udest,struct nlattr * nla,int full_entry)3117 static int ip_vs_genl_parse_dest(struct ip_vs_dest_user_kern *udest,
3118 				 struct nlattr *nla, int full_entry)
3119 {
3120 	struct nlattr *attrs[IPVS_DEST_ATTR_MAX + 1];
3121 	struct nlattr *nla_addr, *nla_port;
3122 
3123 	/* Parse mandatory identifying destination fields first */
3124 	if (nla == NULL ||
3125 	    nla_parse_nested(attrs, IPVS_DEST_ATTR_MAX, nla, ip_vs_dest_policy))
3126 		return -EINVAL;
3127 
3128 	nla_addr	= attrs[IPVS_DEST_ATTR_ADDR];
3129 	nla_port	= attrs[IPVS_DEST_ATTR_PORT];
3130 
3131 	if (!(nla_addr && nla_port))
3132 		return -EINVAL;
3133 
3134 	memset(udest, 0, sizeof(*udest));
3135 
3136 	nla_memcpy(&udest->addr, nla_addr, sizeof(udest->addr));
3137 	udest->port = nla_get_u16(nla_port);
3138 
3139 	/* If a full entry was requested, check for the additional fields */
3140 	if (full_entry) {
3141 		struct nlattr *nla_fwd, *nla_weight, *nla_u_thresh,
3142 			      *nla_l_thresh;
3143 
3144 		nla_fwd		= attrs[IPVS_DEST_ATTR_FWD_METHOD];
3145 		nla_weight	= attrs[IPVS_DEST_ATTR_WEIGHT];
3146 		nla_u_thresh	= attrs[IPVS_DEST_ATTR_U_THRESH];
3147 		nla_l_thresh	= attrs[IPVS_DEST_ATTR_L_THRESH];
3148 
3149 		if (!(nla_fwd && nla_weight && nla_u_thresh && nla_l_thresh))
3150 			return -EINVAL;
3151 
3152 		udest->conn_flags = nla_get_u32(nla_fwd)
3153 				    & IP_VS_CONN_F_FWD_MASK;
3154 		udest->weight = nla_get_u32(nla_weight);
3155 		udest->u_threshold = nla_get_u32(nla_u_thresh);
3156 		udest->l_threshold = nla_get_u32(nla_l_thresh);
3157 	}
3158 
3159 	return 0;
3160 }
3161 
ip_vs_genl_fill_daemon(struct sk_buff * skb,__be32 state,const char * mcast_ifn,__be32 syncid)3162 static int ip_vs_genl_fill_daemon(struct sk_buff *skb, __be32 state,
3163 				  const char *mcast_ifn, __be32 syncid)
3164 {
3165 	struct nlattr *nl_daemon;
3166 
3167 	nl_daemon = nla_nest_start(skb, IPVS_CMD_ATTR_DAEMON);
3168 	if (!nl_daemon)
3169 		return -EMSGSIZE;
3170 
3171 	NLA_PUT_U32(skb, IPVS_DAEMON_ATTR_STATE, state);
3172 	NLA_PUT_STRING(skb, IPVS_DAEMON_ATTR_MCAST_IFN, mcast_ifn);
3173 	NLA_PUT_U32(skb, IPVS_DAEMON_ATTR_SYNC_ID, syncid);
3174 
3175 	nla_nest_end(skb, nl_daemon);
3176 
3177 	return 0;
3178 
3179 nla_put_failure:
3180 	nla_nest_cancel(skb, nl_daemon);
3181 	return -EMSGSIZE;
3182 }
3183 
ip_vs_genl_dump_daemon(struct sk_buff * skb,__be32 state,const char * mcast_ifn,__be32 syncid,struct netlink_callback * cb)3184 static int ip_vs_genl_dump_daemon(struct sk_buff *skb, __be32 state,
3185 				  const char *mcast_ifn, __be32 syncid,
3186 				  struct netlink_callback *cb)
3187 {
3188 	void *hdr;
3189 	hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq,
3190 			  &ip_vs_genl_family, NLM_F_MULTI,
3191 			  IPVS_CMD_NEW_DAEMON);
3192 	if (!hdr)
3193 		return -EMSGSIZE;
3194 
3195 	if (ip_vs_genl_fill_daemon(skb, state, mcast_ifn, syncid))
3196 		goto nla_put_failure;
3197 
3198 	return genlmsg_end(skb, hdr);
3199 
3200 nla_put_failure:
3201 	genlmsg_cancel(skb, hdr);
3202 	return -EMSGSIZE;
3203 }
3204 
ip_vs_genl_dump_daemons(struct sk_buff * skb,struct netlink_callback * cb)3205 static int ip_vs_genl_dump_daemons(struct sk_buff *skb,
3206 				   struct netlink_callback *cb)
3207 {
3208 	struct net *net = skb_sknet(skb);
3209 	struct netns_ipvs *ipvs = net_ipvs(net);
3210 
3211 	mutex_lock(&__ip_vs_mutex);
3212 	if ((ipvs->sync_state & IP_VS_STATE_MASTER) && !cb->args[0]) {
3213 		if (ip_vs_genl_dump_daemon(skb, IP_VS_STATE_MASTER,
3214 					   ipvs->master_mcast_ifn,
3215 					   ipvs->master_syncid, cb) < 0)
3216 			goto nla_put_failure;
3217 
3218 		cb->args[0] = 1;
3219 	}
3220 
3221 	if ((ipvs->sync_state & IP_VS_STATE_BACKUP) && !cb->args[1]) {
3222 		if (ip_vs_genl_dump_daemon(skb, IP_VS_STATE_BACKUP,
3223 					   ipvs->backup_mcast_ifn,
3224 					   ipvs->backup_syncid, cb) < 0)
3225 			goto nla_put_failure;
3226 
3227 		cb->args[1] = 1;
3228 	}
3229 
3230 nla_put_failure:
3231 	mutex_unlock(&__ip_vs_mutex);
3232 
3233 	return skb->len;
3234 }
3235 
ip_vs_genl_new_daemon(struct net * net,struct nlattr ** attrs)3236 static int ip_vs_genl_new_daemon(struct net *net, struct nlattr **attrs)
3237 {
3238 	if (!(attrs[IPVS_DAEMON_ATTR_STATE] &&
3239 	      attrs[IPVS_DAEMON_ATTR_MCAST_IFN] &&
3240 	      attrs[IPVS_DAEMON_ATTR_SYNC_ID]))
3241 		return -EINVAL;
3242 
3243 	return start_sync_thread(net,
3244 				 nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE]),
3245 				 nla_data(attrs[IPVS_DAEMON_ATTR_MCAST_IFN]),
3246 				 nla_get_u32(attrs[IPVS_DAEMON_ATTR_SYNC_ID]));
3247 }
3248 
ip_vs_genl_del_daemon(struct net * net,struct nlattr ** attrs)3249 static int ip_vs_genl_del_daemon(struct net *net, struct nlattr **attrs)
3250 {
3251 	if (!attrs[IPVS_DAEMON_ATTR_STATE])
3252 		return -EINVAL;
3253 
3254 	return stop_sync_thread(net,
3255 				nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE]));
3256 }
3257 
ip_vs_genl_set_config(struct net * net,struct nlattr ** attrs)3258 static int ip_vs_genl_set_config(struct net *net, struct nlattr **attrs)
3259 {
3260 	struct ip_vs_timeout_user t;
3261 
3262 	__ip_vs_get_timeouts(net, &t);
3263 
3264 	if (attrs[IPVS_CMD_ATTR_TIMEOUT_TCP])
3265 		t.tcp_timeout = nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_TCP]);
3266 
3267 	if (attrs[IPVS_CMD_ATTR_TIMEOUT_TCP_FIN])
3268 		t.tcp_fin_timeout =
3269 			nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_TCP_FIN]);
3270 
3271 	if (attrs[IPVS_CMD_ATTR_TIMEOUT_UDP])
3272 		t.udp_timeout = nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_UDP]);
3273 
3274 	return ip_vs_set_timeout(net, &t);
3275 }
3276 
ip_vs_genl_set_cmd(struct sk_buff * skb,struct genl_info * info)3277 static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info)
3278 {
3279 	struct ip_vs_service *svc = NULL;
3280 	struct ip_vs_service_user_kern usvc;
3281 	struct ip_vs_dest_user_kern udest;
3282 	int ret = 0, cmd;
3283 	int need_full_svc = 0, need_full_dest = 0;
3284 	struct net *net;
3285 	struct netns_ipvs *ipvs;
3286 
3287 	net = skb_sknet(skb);
3288 	ipvs = net_ipvs(net);
3289 	cmd = info->genlhdr->cmd;
3290 
3291 	mutex_lock(&__ip_vs_mutex);
3292 
3293 	if (cmd == IPVS_CMD_FLUSH) {
3294 		ret = ip_vs_flush(net);
3295 		goto out;
3296 	} else if (cmd == IPVS_CMD_SET_CONFIG) {
3297 		ret = ip_vs_genl_set_config(net, info->attrs);
3298 		goto out;
3299 	} else if (cmd == IPVS_CMD_NEW_DAEMON ||
3300 		   cmd == IPVS_CMD_DEL_DAEMON) {
3301 
3302 		struct nlattr *daemon_attrs[IPVS_DAEMON_ATTR_MAX + 1];
3303 
3304 		if (!info->attrs[IPVS_CMD_ATTR_DAEMON] ||
3305 		    nla_parse_nested(daemon_attrs, IPVS_DAEMON_ATTR_MAX,
3306 				     info->attrs[IPVS_CMD_ATTR_DAEMON],
3307 				     ip_vs_daemon_policy)) {
3308 			ret = -EINVAL;
3309 			goto out;
3310 		}
3311 
3312 		if (cmd == IPVS_CMD_NEW_DAEMON)
3313 			ret = ip_vs_genl_new_daemon(net, daemon_attrs);
3314 		else
3315 			ret = ip_vs_genl_del_daemon(net, daemon_attrs);
3316 		goto out;
3317 	} else if (cmd == IPVS_CMD_ZERO &&
3318 		   !info->attrs[IPVS_CMD_ATTR_SERVICE]) {
3319 		ret = ip_vs_zero_all(net);
3320 		goto out;
3321 	}
3322 
3323 	/* All following commands require a service argument, so check if we
3324 	 * received a valid one. We need a full service specification when
3325 	 * adding / editing a service. Only identifying members otherwise. */
3326 	if (cmd == IPVS_CMD_NEW_SERVICE || cmd == IPVS_CMD_SET_SERVICE)
3327 		need_full_svc = 1;
3328 
3329 	ret = ip_vs_genl_parse_service(net, &usvc,
3330 				       info->attrs[IPVS_CMD_ATTR_SERVICE],
3331 				       need_full_svc, &svc);
3332 	if (ret)
3333 		goto out;
3334 
3335 	/* Unless we're adding a new service, the service must already exist */
3336 	if ((cmd != IPVS_CMD_NEW_SERVICE) && (svc == NULL)) {
3337 		ret = -ESRCH;
3338 		goto out;
3339 	}
3340 
3341 	/* Destination commands require a valid destination argument. For
3342 	 * adding / editing a destination, we need a full destination
3343 	 * specification. */
3344 	if (cmd == IPVS_CMD_NEW_DEST || cmd == IPVS_CMD_SET_DEST ||
3345 	    cmd == IPVS_CMD_DEL_DEST) {
3346 		if (cmd != IPVS_CMD_DEL_DEST)
3347 			need_full_dest = 1;
3348 
3349 		ret = ip_vs_genl_parse_dest(&udest,
3350 					    info->attrs[IPVS_CMD_ATTR_DEST],
3351 					    need_full_dest);
3352 		if (ret)
3353 			goto out;
3354 	}
3355 
3356 	switch (cmd) {
3357 	case IPVS_CMD_NEW_SERVICE:
3358 		if (svc == NULL)
3359 			ret = ip_vs_add_service(net, &usvc, &svc);
3360 		else
3361 			ret = -EEXIST;
3362 		break;
3363 	case IPVS_CMD_SET_SERVICE:
3364 		ret = ip_vs_edit_service(svc, &usvc);
3365 		break;
3366 	case IPVS_CMD_DEL_SERVICE:
3367 		ret = ip_vs_del_service(svc);
3368 		/* do not use svc, it can be freed */
3369 		break;
3370 	case IPVS_CMD_NEW_DEST:
3371 		ret = ip_vs_add_dest(svc, &udest);
3372 		break;
3373 	case IPVS_CMD_SET_DEST:
3374 		ret = ip_vs_edit_dest(svc, &udest);
3375 		break;
3376 	case IPVS_CMD_DEL_DEST:
3377 		ret = ip_vs_del_dest(svc, &udest);
3378 		break;
3379 	case IPVS_CMD_ZERO:
3380 		ret = ip_vs_zero_service(svc);
3381 		break;
3382 	default:
3383 		ret = -EINVAL;
3384 	}
3385 
3386 out:
3387 	mutex_unlock(&__ip_vs_mutex);
3388 
3389 	return ret;
3390 }
3391 
ip_vs_genl_get_cmd(struct sk_buff * skb,struct genl_info * info)3392 static int ip_vs_genl_get_cmd(struct sk_buff *skb, struct genl_info *info)
3393 {
3394 	struct sk_buff *msg;
3395 	void *reply;
3396 	int ret, cmd, reply_cmd;
3397 	struct net *net;
3398 	struct netns_ipvs *ipvs;
3399 
3400 	net = skb_sknet(skb);
3401 	ipvs = net_ipvs(net);
3402 	cmd = info->genlhdr->cmd;
3403 
3404 	if (cmd == IPVS_CMD_GET_SERVICE)
3405 		reply_cmd = IPVS_CMD_NEW_SERVICE;
3406 	else if (cmd == IPVS_CMD_GET_INFO)
3407 		reply_cmd = IPVS_CMD_SET_INFO;
3408 	else if (cmd == IPVS_CMD_GET_CONFIG)
3409 		reply_cmd = IPVS_CMD_SET_CONFIG;
3410 	else {
3411 		pr_err("unknown Generic Netlink command\n");
3412 		return -EINVAL;
3413 	}
3414 
3415 	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
3416 	if (!msg)
3417 		return -ENOMEM;
3418 
3419 	mutex_lock(&__ip_vs_mutex);
3420 
3421 	reply = genlmsg_put_reply(msg, info, &ip_vs_genl_family, 0, reply_cmd);
3422 	if (reply == NULL)
3423 		goto nla_put_failure;
3424 
3425 	switch (cmd) {
3426 	case IPVS_CMD_GET_SERVICE:
3427 	{
3428 		struct ip_vs_service *svc;
3429 
3430 		svc = ip_vs_genl_find_service(net,
3431 					      info->attrs[IPVS_CMD_ATTR_SERVICE]);
3432 		if (IS_ERR(svc)) {
3433 			ret = PTR_ERR(svc);
3434 			goto out_err;
3435 		} else if (svc) {
3436 			ret = ip_vs_genl_fill_service(msg, svc);
3437 			if (ret)
3438 				goto nla_put_failure;
3439 		} else {
3440 			ret = -ESRCH;
3441 			goto out_err;
3442 		}
3443 
3444 		break;
3445 	}
3446 
3447 	case IPVS_CMD_GET_CONFIG:
3448 	{
3449 		struct ip_vs_timeout_user t;
3450 
3451 		__ip_vs_get_timeouts(net, &t);
3452 #ifdef CONFIG_IP_VS_PROTO_TCP
3453 		NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_TCP, t.tcp_timeout);
3454 		NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_TCP_FIN,
3455 			    t.tcp_fin_timeout);
3456 #endif
3457 #ifdef CONFIG_IP_VS_PROTO_UDP
3458 		NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_UDP, t.udp_timeout);
3459 #endif
3460 
3461 		break;
3462 	}
3463 
3464 	case IPVS_CMD_GET_INFO:
3465 		NLA_PUT_U32(msg, IPVS_INFO_ATTR_VERSION, IP_VS_VERSION_CODE);
3466 		NLA_PUT_U32(msg, IPVS_INFO_ATTR_CONN_TAB_SIZE,
3467 			    ip_vs_conn_tab_size);
3468 		break;
3469 	}
3470 
3471 	genlmsg_end(msg, reply);
3472 	ret = genlmsg_reply(msg, info);
3473 	goto out;
3474 
3475 nla_put_failure:
3476 	pr_err("not enough space in Netlink message\n");
3477 	ret = -EMSGSIZE;
3478 
3479 out_err:
3480 	nlmsg_free(msg);
3481 out:
3482 	mutex_unlock(&__ip_vs_mutex);
3483 
3484 	return ret;
3485 }
3486 
3487 
3488 static struct genl_ops ip_vs_genl_ops[] __read_mostly = {
3489 	{
3490 		.cmd	= IPVS_CMD_NEW_SERVICE,
3491 		.flags	= GENL_ADMIN_PERM,
3492 		.policy	= ip_vs_cmd_policy,
3493 		.doit	= ip_vs_genl_set_cmd,
3494 	},
3495 	{
3496 		.cmd	= IPVS_CMD_SET_SERVICE,
3497 		.flags	= GENL_ADMIN_PERM,
3498 		.policy	= ip_vs_cmd_policy,
3499 		.doit	= ip_vs_genl_set_cmd,
3500 	},
3501 	{
3502 		.cmd	= IPVS_CMD_DEL_SERVICE,
3503 		.flags	= GENL_ADMIN_PERM,
3504 		.policy	= ip_vs_cmd_policy,
3505 		.doit	= ip_vs_genl_set_cmd,
3506 	},
3507 	{
3508 		.cmd	= IPVS_CMD_GET_SERVICE,
3509 		.flags	= GENL_ADMIN_PERM,
3510 		.doit	= ip_vs_genl_get_cmd,
3511 		.dumpit	= ip_vs_genl_dump_services,
3512 		.policy	= ip_vs_cmd_policy,
3513 	},
3514 	{
3515 		.cmd	= IPVS_CMD_NEW_DEST,
3516 		.flags	= GENL_ADMIN_PERM,
3517 		.policy	= ip_vs_cmd_policy,
3518 		.doit	= ip_vs_genl_set_cmd,
3519 	},
3520 	{
3521 		.cmd	= IPVS_CMD_SET_DEST,
3522 		.flags	= GENL_ADMIN_PERM,
3523 		.policy	= ip_vs_cmd_policy,
3524 		.doit	= ip_vs_genl_set_cmd,
3525 	},
3526 	{
3527 		.cmd	= IPVS_CMD_DEL_DEST,
3528 		.flags	= GENL_ADMIN_PERM,
3529 		.policy	= ip_vs_cmd_policy,
3530 		.doit	= ip_vs_genl_set_cmd,
3531 	},
3532 	{
3533 		.cmd	= IPVS_CMD_GET_DEST,
3534 		.flags	= GENL_ADMIN_PERM,
3535 		.policy	= ip_vs_cmd_policy,
3536 		.dumpit	= ip_vs_genl_dump_dests,
3537 	},
3538 	{
3539 		.cmd	= IPVS_CMD_NEW_DAEMON,
3540 		.flags	= GENL_ADMIN_PERM,
3541 		.policy	= ip_vs_cmd_policy,
3542 		.doit	= ip_vs_genl_set_cmd,
3543 	},
3544 	{
3545 		.cmd	= IPVS_CMD_DEL_DAEMON,
3546 		.flags	= GENL_ADMIN_PERM,
3547 		.policy	= ip_vs_cmd_policy,
3548 		.doit	= ip_vs_genl_set_cmd,
3549 	},
3550 	{
3551 		.cmd	= IPVS_CMD_GET_DAEMON,
3552 		.flags	= GENL_ADMIN_PERM,
3553 		.dumpit	= ip_vs_genl_dump_daemons,
3554 	},
3555 	{
3556 		.cmd	= IPVS_CMD_SET_CONFIG,
3557 		.flags	= GENL_ADMIN_PERM,
3558 		.policy	= ip_vs_cmd_policy,
3559 		.doit	= ip_vs_genl_set_cmd,
3560 	},
3561 	{
3562 		.cmd	= IPVS_CMD_GET_CONFIG,
3563 		.flags	= GENL_ADMIN_PERM,
3564 		.doit	= ip_vs_genl_get_cmd,
3565 	},
3566 	{
3567 		.cmd	= IPVS_CMD_GET_INFO,
3568 		.flags	= GENL_ADMIN_PERM,
3569 		.doit	= ip_vs_genl_get_cmd,
3570 	},
3571 	{
3572 		.cmd	= IPVS_CMD_ZERO,
3573 		.flags	= GENL_ADMIN_PERM,
3574 		.policy	= ip_vs_cmd_policy,
3575 		.doit	= ip_vs_genl_set_cmd,
3576 	},
3577 	{
3578 		.cmd	= IPVS_CMD_FLUSH,
3579 		.flags	= GENL_ADMIN_PERM,
3580 		.doit	= ip_vs_genl_set_cmd,
3581 	},
3582 };
3583 
ip_vs_genl_register(void)3584 static int __init ip_vs_genl_register(void)
3585 {
3586 	return genl_register_family_with_ops(&ip_vs_genl_family,
3587 		ip_vs_genl_ops, ARRAY_SIZE(ip_vs_genl_ops));
3588 }
3589 
ip_vs_genl_unregister(void)3590 static void ip_vs_genl_unregister(void)
3591 {
3592 	genl_unregister_family(&ip_vs_genl_family);
3593 }
3594 
3595 /* End of Generic Netlink interface definitions */
3596 
3597 /*
3598  * per netns intit/exit func.
3599  */
3600 #ifdef CONFIG_SYSCTL
__ip_vs_control_init_sysctl(struct net * net)3601 int __net_init __ip_vs_control_init_sysctl(struct net *net)
3602 {
3603 	int idx;
3604 	struct netns_ipvs *ipvs = net_ipvs(net);
3605 	struct ctl_table *tbl;
3606 
3607 	atomic_set(&ipvs->dropentry, 0);
3608 	spin_lock_init(&ipvs->dropentry_lock);
3609 	spin_lock_init(&ipvs->droppacket_lock);
3610 	spin_lock_init(&ipvs->securetcp_lock);
3611 
3612 	if (!net_eq(net, &init_net)) {
3613 		tbl = kmemdup(vs_vars, sizeof(vs_vars), GFP_KERNEL);
3614 		if (tbl == NULL)
3615 			return -ENOMEM;
3616 	} else
3617 		tbl = vs_vars;
3618 	/* Initialize sysctl defaults */
3619 	idx = 0;
3620 	ipvs->sysctl_amemthresh = 1024;
3621 	tbl[idx++].data = &ipvs->sysctl_amemthresh;
3622 	ipvs->sysctl_am_droprate = 10;
3623 	tbl[idx++].data = &ipvs->sysctl_am_droprate;
3624 	tbl[idx++].data = &ipvs->sysctl_drop_entry;
3625 	tbl[idx++].data = &ipvs->sysctl_drop_packet;
3626 #ifdef CONFIG_IP_VS_NFCT
3627 	tbl[idx++].data = &ipvs->sysctl_conntrack;
3628 #endif
3629 	tbl[idx++].data = &ipvs->sysctl_secure_tcp;
3630 	ipvs->sysctl_snat_reroute = 1;
3631 	tbl[idx++].data = &ipvs->sysctl_snat_reroute;
3632 	ipvs->sysctl_sync_ver = 1;
3633 	tbl[idx++].data = &ipvs->sysctl_sync_ver;
3634 	tbl[idx++].data = &ipvs->sysctl_cache_bypass;
3635 	tbl[idx++].data = &ipvs->sysctl_expire_nodest_conn;
3636 	tbl[idx++].data = &ipvs->sysctl_expire_quiescent_template;
3637 	ipvs->sysctl_sync_threshold[0] = DEFAULT_SYNC_THRESHOLD;
3638 	ipvs->sysctl_sync_threshold[1] = DEFAULT_SYNC_PERIOD;
3639 	tbl[idx].data = &ipvs->sysctl_sync_threshold;
3640 	tbl[idx++].maxlen = sizeof(ipvs->sysctl_sync_threshold);
3641 	tbl[idx++].data = &ipvs->sysctl_nat_icmp_send;
3642 
3643 
3644 	ipvs->sysctl_hdr = register_net_sysctl_table(net, net_vs_ctl_path,
3645 						     tbl);
3646 	if (ipvs->sysctl_hdr == NULL) {
3647 		if (!net_eq(net, &init_net))
3648 			kfree(tbl);
3649 		return -ENOMEM;
3650 	}
3651 	ip_vs_start_estimator(net, &ipvs->tot_stats);
3652 	ipvs->sysctl_tbl = tbl;
3653 	/* Schedule defense work */
3654 	INIT_DELAYED_WORK(&ipvs->defense_work, defense_work_handler);
3655 	schedule_delayed_work(&ipvs->defense_work, DEFENSE_TIMER_PERIOD);
3656 
3657 	return 0;
3658 }
3659 
__ip_vs_control_cleanup_sysctl(struct net * net)3660 void __net_init __ip_vs_control_cleanup_sysctl(struct net *net)
3661 {
3662 	struct netns_ipvs *ipvs = net_ipvs(net);
3663 
3664 	cancel_delayed_work_sync(&ipvs->defense_work);
3665 	cancel_work_sync(&ipvs->defense_work.work);
3666 	unregister_net_sysctl_table(ipvs->sysctl_hdr);
3667 }
3668 
3669 #else
3670 
__ip_vs_control_init_sysctl(struct net * net)3671 int __net_init __ip_vs_control_init_sysctl(struct net *net) { return 0; }
__ip_vs_control_cleanup_sysctl(struct net * net)3672 void __net_init __ip_vs_control_cleanup_sysctl(struct net *net) { }
3673 
3674 #endif
3675 
3676 static struct notifier_block ip_vs_dst_notifier = {
3677 	.notifier_call = ip_vs_dst_event,
3678 };
3679 
__ip_vs_control_init(struct net * net)3680 int __net_init __ip_vs_control_init(struct net *net)
3681 {
3682 	int idx;
3683 	struct netns_ipvs *ipvs = net_ipvs(net);
3684 
3685 	ipvs->rs_lock = __RW_LOCK_UNLOCKED(ipvs->rs_lock);
3686 
3687 	/* Initialize rs_table */
3688 	for (idx = 0; idx < IP_VS_RTAB_SIZE; idx++)
3689 		INIT_LIST_HEAD(&ipvs->rs_table[idx]);
3690 
3691 	INIT_LIST_HEAD(&ipvs->dest_trash);
3692 	atomic_set(&ipvs->ftpsvc_counter, 0);
3693 	atomic_set(&ipvs->nullsvc_counter, 0);
3694 
3695 	/* procfs stats */
3696 	ipvs->tot_stats.cpustats = alloc_percpu(struct ip_vs_cpu_stats);
3697 	if (!ipvs->tot_stats.cpustats) {
3698 		pr_err("%s(): alloc_percpu.\n", __func__);
3699 		return -ENOMEM;
3700 	}
3701 	spin_lock_init(&ipvs->tot_stats.lock);
3702 
3703 	proc_net_fops_create(net, "ip_vs", 0, &ip_vs_info_fops);
3704 	proc_net_fops_create(net, "ip_vs_stats", 0, &ip_vs_stats_fops);
3705 	proc_net_fops_create(net, "ip_vs_stats_percpu", 0,
3706 			     &ip_vs_stats_percpu_fops);
3707 
3708 	if (__ip_vs_control_init_sysctl(net))
3709 		goto err;
3710 
3711 	return 0;
3712 
3713 err:
3714 	free_percpu(ipvs->tot_stats.cpustats);
3715 	return -ENOMEM;
3716 }
3717 
__ip_vs_control_cleanup(struct net * net)3718 void __net_exit __ip_vs_control_cleanup(struct net *net)
3719 {
3720 	struct netns_ipvs *ipvs = net_ipvs(net);
3721 
3722 	ip_vs_trash_cleanup(net);
3723 	ip_vs_stop_estimator(net, &ipvs->tot_stats);
3724 	__ip_vs_control_cleanup_sysctl(net);
3725 	proc_net_remove(net, "ip_vs_stats_percpu");
3726 	proc_net_remove(net, "ip_vs_stats");
3727 	proc_net_remove(net, "ip_vs");
3728 	free_percpu(ipvs->tot_stats.cpustats);
3729 }
3730 
ip_vs_control_init(void)3731 int __init ip_vs_control_init(void)
3732 {
3733 	int idx;
3734 	int ret;
3735 
3736 	EnterFunction(2);
3737 
3738 	/* Initialize svc_table, ip_vs_svc_fwm_table, rs_table */
3739 	for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++)  {
3740 		INIT_LIST_HEAD(&ip_vs_svc_table[idx]);
3741 		INIT_LIST_HEAD(&ip_vs_svc_fwm_table[idx]);
3742 	}
3743 
3744 	smp_wmb();	/* Do we really need it now ? */
3745 
3746 	ret = nf_register_sockopt(&ip_vs_sockopts);
3747 	if (ret) {
3748 		pr_err("cannot register sockopt.\n");
3749 		goto err_sock;
3750 	}
3751 
3752 	ret = ip_vs_genl_register();
3753 	if (ret) {
3754 		pr_err("cannot register Generic Netlink interface.\n");
3755 		goto err_genl;
3756 	}
3757 
3758 	ret = register_netdevice_notifier(&ip_vs_dst_notifier);
3759 	if (ret < 0)
3760 		goto err_notf;
3761 
3762 	LeaveFunction(2);
3763 	return 0;
3764 
3765 err_notf:
3766 	ip_vs_genl_unregister();
3767 err_genl:
3768 	nf_unregister_sockopt(&ip_vs_sockopts);
3769 err_sock:
3770 	return ret;
3771 }
3772 
3773 
ip_vs_control_cleanup(void)3774 void ip_vs_control_cleanup(void)
3775 {
3776 	EnterFunction(2);
3777 	ip_vs_genl_unregister();
3778 	nf_unregister_sockopt(&ip_vs_sockopts);
3779 	LeaveFunction(2);
3780 }
3781