1 /*
2  * IPVS         An implementation of the IP virtual server support for the
3  *              LINUX operating system.  IPVS is now implemented as a module
4  *              over the NetFilter framework. IPVS can be used to build a
5  *              high-performance and highly available server based on a
6  *              cluster of servers.
7  *
8  * Version:     $Id: ip_vs_ctl.c,v 1.30.2.3 2003/07/29 14:37:12 wensong Exp $
9  *
10  * Authors:     Wensong Zhang <wensong@linuxvirtualserver.org>
11  *              Peter Kese <peter.kese@ijs.si>
12  *              Julian Anastasov <ja@ssi.bg>
13  *
14  *              This program is free software; you can redistribute it and/or
15  *              modify it under the terms of the GNU General Public License
16  *              as published by the Free Software Foundation; either version
17  *              2 of the License, or (at your option) any later version.
18  *
19  * Changes:
20  *
21  */
22 
23 #include <linux/module.h>
24 #include <linux/init.h>
25 #include <linux/types.h>
26 #include <linux/fs.h>
27 #include <linux/sysctl.h>
28 #include <linux/proc_fs.h>
29 #include <linux/timer.h>
30 #include <linux/swap.h>
31 #include <linux/proc_fs.h>
32 
33 #include <linux/netfilter.h>
34 #include <linux/netfilter_ipv4.h>
35 
36 #include <net/ip.h>
37 #include <net/sock.h>
38 
39 #include <asm/uaccess.h>
40 
41 #include <net/ip_vs.h>
42 
43 /* semaphore for IPVS sockopts. And, [gs]etsockopt may sleep. */
44 static DECLARE_MUTEX(__ip_vs_mutex);
45 
46 /* lock for service table */
47 rwlock_t __ip_vs_svc_lock = RW_LOCK_UNLOCKED;
48 
49 /* lock for table with the real services */
50 static rwlock_t __ip_vs_rs_lock = RW_LOCK_UNLOCKED;
51 
52 /* lock for state and timeout tables */
53 static rwlock_t __ip_vs_securetcp_lock = RW_LOCK_UNLOCKED;
54 
55 /* lock for drop entry handling */
56 static spinlock_t __ip_vs_dropentry_lock = SPIN_LOCK_UNLOCKED;
57 
58 /* lock for drop packet handling */
59 static spinlock_t __ip_vs_droppacket_lock = SPIN_LOCK_UNLOCKED;
60 
61 /* 1/rate drop and drop-entry variables */
62 int ip_vs_drop_rate = 0;
63 int ip_vs_drop_counter = 0;
64 atomic_t ip_vs_dropentry = ATOMIC_INIT(0);
65 
66 /* number of virtual services */
67 static int ip_vs_num_services = 0;
68 
69 /* sysctl variables */
70 static int sysctl_ip_vs_drop_entry = 0;
71 static int sysctl_ip_vs_drop_packet = 0;
72 static int sysctl_ip_vs_secure_tcp = 0;
73 static int sysctl_ip_vs_amemthresh = 2048;
74 static int sysctl_ip_vs_am_droprate = 10;
75 int sysctl_ip_vs_cache_bypass = 0;
76 int sysctl_ip_vs_expire_nodest_conn = 0;
77 int sysctl_ip_vs_expire_quiescent_template = 0;
78 int sysctl_ip_vs_sync_threshold = 3;
79 int sysctl_ip_vs_nat_icmp_send = 0;
80 
81 #ifdef CONFIG_IP_VS_DEBUG
82 static int sysctl_ip_vs_debug_level = 0;
83 
ip_vs_get_debug_level(void)84 int ip_vs_get_debug_level(void)
85 {
86 	return sysctl_ip_vs_debug_level;
87 }
88 #endif
89 
90 /*
91  *	update_defense_level is called from timer bh and from sysctl.
92  */
update_defense_level(void)93 static void update_defense_level(void)
94 {
95 	struct sysinfo i;
96 	int availmem;
97 	int nomem;
98 
99 	/* we only count free and buffered memory (in pages) */
100 	si_meminfo(&i);
101 	availmem = i.freeram + i.bufferram;
102 
103 	nomem = (availmem < sysctl_ip_vs_amemthresh);
104 
105 	/* drop_entry */
106 	spin_lock(&__ip_vs_dropentry_lock);
107 	switch (sysctl_ip_vs_drop_entry) {
108 	case 0:
109 		atomic_set(&ip_vs_dropentry, 0);
110 		break;
111 	case 1:
112 		if (nomem) {
113 			atomic_set(&ip_vs_dropentry, 1);
114 			sysctl_ip_vs_drop_entry = 2;
115 		} else {
116 			atomic_set(&ip_vs_dropentry, 0);
117 		}
118 		break;
119 	case 2:
120 		if (nomem) {
121 			atomic_set(&ip_vs_dropentry, 1);
122 		} else {
123 			atomic_set(&ip_vs_dropentry, 0);
124 			sysctl_ip_vs_drop_entry = 1;
125 		};
126 		break;
127 	case 3:
128 		atomic_set(&ip_vs_dropentry, 1);
129 		break;
130 	}
131 	spin_unlock(&__ip_vs_dropentry_lock);
132 
133 	/* drop_packet */
134 	spin_lock(&__ip_vs_droppacket_lock);
135 	switch (sysctl_ip_vs_drop_packet) {
136 	case 0:
137 		ip_vs_drop_rate = 0;
138 		break;
139 	case 1:
140 		if (nomem) {
141 			ip_vs_drop_rate = ip_vs_drop_counter
142 				= sysctl_ip_vs_amemthresh /
143 				(sysctl_ip_vs_amemthresh - availmem);
144 			sysctl_ip_vs_drop_packet = 2;
145 		} else {
146 			ip_vs_drop_rate = 0;
147 		}
148 		break;
149 	case 2:
150 		if (nomem) {
151 			ip_vs_drop_rate = ip_vs_drop_counter
152 				= sysctl_ip_vs_amemthresh /
153 				(sysctl_ip_vs_amemthresh - availmem);
154 		} else {
155 			ip_vs_drop_rate = 0;
156 			sysctl_ip_vs_drop_packet = 1;
157 		}
158 		break;
159 	case 3:
160 		ip_vs_drop_rate = sysctl_ip_vs_am_droprate;
161 		break;
162 	}
163 	spin_unlock(&__ip_vs_droppacket_lock);
164 
165 	/* secure_tcp */
166 	write_lock(&__ip_vs_securetcp_lock);
167 	switch (sysctl_ip_vs_secure_tcp) {
168 	case 0:
169 		ip_vs_secure_tcp_set(0);
170 		break;
171 	case 1:
172 		if (nomem) {
173 			ip_vs_secure_tcp_set(1);
174 			sysctl_ip_vs_secure_tcp = 2;
175 		} else {
176 			ip_vs_secure_tcp_set(0);
177 		}
178 		break;
179 	case 2:
180 		if (nomem) {
181 			ip_vs_secure_tcp_set(1);
182 		} else {
183 			ip_vs_secure_tcp_set(0);
184 			sysctl_ip_vs_secure_tcp = 1;
185 		}
186 		break;
187 	case 3:
188 		ip_vs_secure_tcp_set(1);
189 		break;
190 	}
191 	write_unlock(&__ip_vs_securetcp_lock);
192 }
193 
194 
195 /*
196  *	Timer for checking the defense
197  */
198 static struct timer_list defense_timer;
199 #define DEFENSE_TIMER_PERIOD	1*HZ
200 
defense_timer_handler(unsigned long data)201 static void defense_timer_handler(unsigned long data)
202 {
203 	update_defense_level();
204 	if (atomic_read(&ip_vs_dropentry))
205 		ip_vs_random_dropentry();
206 
207 	mod_timer(&defense_timer, jiffies + DEFENSE_TIMER_PERIOD);
208 }
209 
210 
211 /*
212  *  Hash table: for virtual service lookups
213  */
214 #define IP_VS_SVC_TAB_BITS 8
215 #define IP_VS_SVC_TAB_SIZE (1 << IP_VS_SVC_TAB_BITS)
216 #define IP_VS_SVC_TAB_MASK (IP_VS_SVC_TAB_SIZE - 1)
217 
218 /* the service table hashed by <protocol, addr, port> */
219 static struct list_head ip_vs_svc_table[IP_VS_SVC_TAB_SIZE];
220 /* the service table hashed by fwmark */
221 static struct list_head ip_vs_svc_fwm_table[IP_VS_SVC_TAB_SIZE];
222 
223 /*
224  *  Hash table: for real service lookups
225  */
226 #define IP_VS_RTAB_BITS 4
227 #define IP_VS_RTAB_SIZE (1 << IP_VS_RTAB_BITS)
228 #define IP_VS_RTAB_MASK (IP_VS_RTAB_SIZE - 1)
229 
230 static struct list_head ip_vs_rtable[IP_VS_RTAB_SIZE];
231 
232 /*
233  * Trash for destinations
234  */
235 static LIST_HEAD(ip_vs_dest_trash);
236 
237 /*
238  * FTP & NULL virtual service counters
239  */
240 static atomic_t ip_vs_ftpsvc_counter = ATOMIC_INIT(0);
241 static atomic_t ip_vs_nullsvc_counter = ATOMIC_INIT(0);
242 
243 
244 /*
245  *  Returns hash value for virtual service
246  */
247 static __inline__ unsigned
ip_vs_svc_hashkey(unsigned proto,__u32 addr,__u16 port)248 ip_vs_svc_hashkey(unsigned proto, __u32 addr, __u16 port)
249 {
250 	register unsigned porth = ntohs(port);
251 
252 	return (proto^ntohl(addr)^(porth>>IP_VS_SVC_TAB_BITS)^porth)
253 		& IP_VS_SVC_TAB_MASK;
254 }
255 
256 /*
257  *  Returns hash value of fwmark for virtual service lookup
258  */
ip_vs_svc_fwm_hashkey(__u32 fwmark)259 static __inline__ unsigned ip_vs_svc_fwm_hashkey(__u32 fwmark)
260 {
261 	return fwmark & IP_VS_SVC_TAB_MASK;
262 }
263 
264 /*
265  *  Hashes ip_vs_service in the ip_vs_svc_table by <proto,addr,port>
266  *  or in the ip_vs_svc_fwm_table by fwmark.
267  *  Should be called with locked tables.
268  *  Returns bool success.
269  */
ip_vs_svc_hash(struct ip_vs_service * svc)270 static int ip_vs_svc_hash(struct ip_vs_service *svc)
271 {
272 	unsigned hash;
273 
274 	if (svc->flags & IP_VS_SVC_F_HASHED) {
275 		IP_VS_ERR("ip_vs_svc_hash(): request for already hashed, "
276 			  "called from %p\n", __builtin_return_address(0));
277 		return 0;
278 	}
279 
280 	if (svc->fwmark == 0) {
281 		/*
282 		 *  Hash it by <protocol,addr,port> in ip_vs_svc_table
283 		 */
284 		hash = ip_vs_svc_hashkey(svc->protocol, svc->addr, svc->port);
285 		list_add(&svc->s_list, &ip_vs_svc_table[hash]);
286 	} else {
287 		/*
288 		 *  Hash it by fwmark in ip_vs_svc_fwm_table
289 		 */
290 		hash = ip_vs_svc_fwm_hashkey(svc->fwmark);
291 		list_add(&svc->f_list, &ip_vs_svc_fwm_table[hash]);
292 	}
293 
294 	svc->flags |= IP_VS_SVC_F_HASHED;
295 	/* increase its refcnt because it is referenced by the svc table */
296 	atomic_inc(&svc->refcnt);
297 	return 1;
298 }
299 
300 
301 /*
302  *  Unhashes ip_vs_service from ip_vs_svc_table/ip_vs_svc_fwm_table.
303  *  Should be called with locked tables.
304  *  Returns bool success.
305  */
ip_vs_svc_unhash(struct ip_vs_service * svc)306 static int ip_vs_svc_unhash(struct ip_vs_service *svc)
307 {
308 	if (!(svc->flags & IP_VS_SVC_F_HASHED)) {
309 		IP_VS_ERR("ip_vs_svc_unhash(): request for unhash flagged, "
310 			  "called from %p\n", __builtin_return_address(0));
311 		return 0;
312 	}
313 
314 	if (svc->fwmark == 0) {
315 		/*
316 		 * Remove it from the ip_vs_svc_table table.
317 		 */
318 		list_del(&svc->s_list);
319 	} else {
320 		/*
321 		 * Remove it from the ip_vs_svc_fwm_table table.
322 		 */
323 		list_del(&svc->f_list);
324 	}
325 
326 	svc->flags &= ~IP_VS_SVC_F_HASHED;
327 	atomic_dec(&svc->refcnt);
328 	return 1;
329 }
330 
331 
332 /*
333  *  Get service by {proto,addr,port} in the service table.
334  */
335 static __inline__ struct ip_vs_service *
__ip_vs_service_get(__u16 protocol,__u32 vaddr,__u16 vport)336 __ip_vs_service_get(__u16 protocol, __u32 vaddr, __u16 vport)
337 {
338 	unsigned hash;
339 	struct ip_vs_service *svc;
340 	struct list_head *l,*e;
341 
342 	/*
343 	 *	Check for "full" addressed entries
344 	 */
345 	hash = ip_vs_svc_hashkey(protocol, vaddr, vport);
346 
347 	l = &ip_vs_svc_table[hash];
348 	for (e=l->next; e!=l; e=e->next) {
349 		svc = list_entry(e, struct ip_vs_service, s_list);
350 		if ((svc->addr == vaddr)
351 		    && (svc->port == vport)
352 		    && (svc->protocol == protocol)) {
353 			/* HIT */
354 			atomic_inc(&svc->usecnt);
355 			return svc;
356 		}
357 	}
358 
359 	return NULL;
360 }
361 
362 
363 /*
364  *  Get service by {fwmark} in the service table.
365  */
__ip_vs_svc_fwm_get(__u32 fwmark)366 static __inline__ struct ip_vs_service *__ip_vs_svc_fwm_get(__u32 fwmark)
367 {
368 	unsigned hash;
369 	struct ip_vs_service *svc;
370 	struct list_head *l,*e;
371 
372 	/*
373 	 *	Check for "full" addressed entries
374 	 */
375 	hash = ip_vs_svc_fwm_hashkey(fwmark);
376 
377 	l = &ip_vs_svc_fwm_table[hash];
378 	for (e=l->next; e!=l; e=e->next) {
379 		svc = list_entry(e, struct ip_vs_service, f_list);
380 		if (svc->fwmark == fwmark) {
381 			/* HIT */
382 			atomic_inc(&svc->usecnt);
383 			return svc;
384 		}
385 	}
386 
387 	return NULL;
388 }
389 
390 struct ip_vs_service *
ip_vs_service_get(__u32 fwmark,__u16 protocol,__u32 vaddr,__u16 vport)391 ip_vs_service_get(__u32 fwmark, __u16 protocol, __u32 vaddr, __u16 vport)
392 {
393 	struct ip_vs_service *svc;
394 
395 	read_lock(&__ip_vs_svc_lock);
396 
397 	/*
398 	 *	Check the table hashed by fwmark first
399 	 */
400 	if (fwmark && (svc = __ip_vs_svc_fwm_get(fwmark)))
401 		goto out;
402 
403 	/*
404 	 *	Check the table hashed by <protocol,addr,port>
405 	 *	for "full" addressed entries
406 	 */
407 	svc = __ip_vs_service_get(protocol, vaddr, vport);
408 
409 	if (svc == NULL
410 	    && protocol == IPPROTO_TCP
411 	    && atomic_read(&ip_vs_ftpsvc_counter)
412 	    && (vport == FTPDATA || ntohs(vport) >= PROT_SOCK)) {
413 		/*
414 		 * Check if ftp service entry exists, the packet
415 		 * might belong to FTP data connections.
416 		 */
417 		svc = __ip_vs_service_get(protocol, vaddr, FTPPORT);
418 	}
419 
420 	if (svc == NULL
421 	    && atomic_read(&ip_vs_nullsvc_counter)) {
422 		/*
423 		 * Check if the catch-all port (port zero) exists
424 		 */
425 		svc = __ip_vs_service_get(protocol, vaddr, 0);
426 	}
427 
428   out:
429 	read_unlock(&__ip_vs_svc_lock);
430 
431 	IP_VS_DBG(6, "lookup service: fwm %u %s %u.%u.%u.%u:%u %s\n",
432 		  fwmark, ip_vs_proto_name(protocol),
433 		  NIPQUAD(vaddr), ntohs(vport),
434 		  svc?"hit":"not hit");
435 
436 	return svc;
437 }
438 
439 
440 static inline void
__ip_vs_bind_svc(struct ip_vs_dest * dest,struct ip_vs_service * svc)441 __ip_vs_bind_svc(struct ip_vs_dest *dest, struct ip_vs_service *svc)
442 {
443 	atomic_inc(&svc->refcnt);
444 	dest->svc = svc;
445 }
446 
447 static inline void
__ip_vs_unbind_svc(struct ip_vs_dest * dest)448 __ip_vs_unbind_svc(struct ip_vs_dest *dest)
449 {
450 	struct ip_vs_service *svc = dest->svc;
451 
452 	dest->svc = NULL;
453 	if (atomic_dec_and_test(&svc->refcnt))
454 		kfree(svc);
455 }
456 
457 /*
458  *  Returns hash value for real service
459  */
ip_vs_rs_hashkey(__u32 addr,__u16 port)460 static __inline__ unsigned ip_vs_rs_hashkey(__u32 addr, __u16 port)
461 {
462 	register unsigned porth = ntohs(port);
463 
464 	return (ntohl(addr)^(porth>>IP_VS_RTAB_BITS)^porth)
465 		& IP_VS_RTAB_MASK;
466 }
467 
468 /*
469  *  Hashes ip_vs_dest in ip_vs_rtable by proto,addr,port.
470  *  should be called with locked tables.
471  *  returns bool success.
472  */
ip_vs_rs_hash(struct ip_vs_dest * dest)473 static int ip_vs_rs_hash(struct ip_vs_dest *dest)
474 {
475 	unsigned hash;
476 
477 	if (!list_empty(&dest->d_list)) {
478 		return 0;
479 	}
480 
481 	/*
482 	 *	Hash by proto,addr,port,
483 	 *	which are the parameters of the real service.
484 	 */
485 	hash = ip_vs_rs_hashkey(dest->addr, dest->port);
486 	list_add(&dest->d_list, &ip_vs_rtable[hash]);
487 
488 	return 1;
489 }
490 
491 /*
492  *  UNhashes ip_vs_dest from ip_vs_rtable.
493  *  should be called with locked tables.
494  *  returns bool success.
495  */
ip_vs_rs_unhash(struct ip_vs_dest * dest)496 static int ip_vs_rs_unhash(struct ip_vs_dest *dest)
497 {
498 	/*
499 	 * Remove it from the ip_vs_rtable table.
500 	 */
501 	if (!list_empty(&dest->d_list)) {
502 		list_del(&dest->d_list);
503 		INIT_LIST_HEAD(&dest->d_list);
504 	}
505 
506 	return 1;
507 }
508 
509 /*
510  *  Lookup real service by {proto,addr,port} in the real service table.
511  */
512 struct ip_vs_dest *
ip_vs_lookup_real_service(__u16 protocol,__u32 daddr,__u16 dport)513 ip_vs_lookup_real_service(__u16 protocol, __u32 daddr, __u16 dport)
514 {
515 	unsigned hash;
516 	struct ip_vs_dest *dest;
517 	struct list_head *l,*e;
518 
519 	/*
520 	 *	Check for "full" addressed entries
521 	 *	Return the first found entry
522 	 */
523 	hash = ip_vs_rs_hashkey(daddr, dport);
524 
525 	l = &ip_vs_rtable[hash];
526 
527 	read_lock(&__ip_vs_rs_lock);
528 	for (e=l->next; e!=l; e=e->next) {
529 		dest = list_entry(e, struct ip_vs_dest, d_list);
530 		if ((dest->addr == daddr)
531 		    && (dest->port == dport)
532 		    && ((dest->protocol == protocol) ||
533 			dest->vfwmark)) {
534 			/* HIT */
535 			read_unlock(&__ip_vs_rs_lock);
536 			return dest;
537 		}
538 	}
539 	read_unlock(&__ip_vs_rs_lock);
540 
541 	return NULL;
542 }
543 
544 /*
545  *  Lookup destination by {addr,port} in the given service
546  */
547 static struct ip_vs_dest *
ip_vs_lookup_dest(struct ip_vs_service * svc,__u32 daddr,__u16 dport)548 ip_vs_lookup_dest(struct ip_vs_service *svc, __u32 daddr, __u16 dport)
549 {
550 	struct ip_vs_dest *dest;
551 	struct list_head *l, *e;
552 
553 	/*
554 	 * Find the destination for the given service
555 	 */
556 	l = &svc->destinations;
557 	for (e=l->next; e!=l; e=e->next) {
558 		dest = list_entry(e, struct ip_vs_dest, n_list);
559 		if ((dest->addr == daddr) && (dest->port == dport)) {
560 			/* HIT */
561 			return dest;
562 		}
563 	}
564 
565 	return NULL;
566 }
567 
568 
569 /*
570  *  Lookup dest by {svc,addr,port} in the destination trash.
571  *  The destination trash is used to hold the destinations that are removed
572  *  from the service table but are still referenced by some conn entries.
573  *  The reason to add the destination trash is when the dest is temporary
574  *  down (either by administrator or by monitor program), the dest can be
575  *  picked back from the trash, the remaining connections to the dest can
576  *  continue, and the counting information of the dest is also useful for
577  *  scheduling.
578  */
579 static struct ip_vs_dest *
ip_vs_trash_get_dest(struct ip_vs_service * svc,__u32 daddr,__u16 dport)580 ip_vs_trash_get_dest(struct ip_vs_service *svc, __u32 daddr, __u16 dport)
581 {
582 	struct ip_vs_dest *dest;
583 	struct list_head *l, *e;
584 
585 	/*
586 	 * Find the destination in trash
587 	 */
588 	l = &ip_vs_dest_trash;
589 
590 	for (e=l->next; e!=l; e=e->next) {
591 		dest = list_entry(e, struct ip_vs_dest, n_list);
592 		IP_VS_DBG(3, "Destination %u/%u.%u.%u.%u:%u still in trash, "
593 			  "refcnt=%d\n",
594 			  dest->vfwmark,
595 			  NIPQUAD(dest->addr), ntohs(dest->port),
596 			  atomic_read(&dest->refcnt));
597 		if (dest->addr == daddr &&
598 		    dest->port == dport &&
599 		    dest->vfwmark == svc->fwmark &&
600 		    dest->protocol == svc->protocol &&
601 		    (svc->fwmark ||
602 		     (dest->vaddr == svc->addr &&
603 		      dest->vport == svc->port))) {
604 			/* HIT */
605 			return dest;
606 		}
607 
608 		/*
609 		 * Try to purge the destination from trash if not referenced
610 		 */
611 		if (atomic_read(&dest->refcnt) == 1) {
612 			IP_VS_DBG(3, "Removing destination %u/%u.%u.%u.%u:%u "
613 				  "from trash\n",
614 				  dest->vfwmark,
615 				  NIPQUAD(dest->addr), ntohs(dest->port));
616 			e = e->prev;
617 			list_del(&dest->n_list);
618 			__ip_vs_dst_reset(dest);
619 			__ip_vs_unbind_svc(dest);
620 			kfree(dest);
621 		}
622 	}
623 
624 	return NULL;
625 }
626 
627 
628 /*
629  *  Clean up all the destinations in the trash
630  *  Called by the ip_vs_control_cleanup()
631  *
632  *  When the ip_vs_control_clearup is activated by ipvs module exit,
633  *  the service tables must have been flushed and all the connections
634  *  are expired, and the refcnt of each destination in the trash must
635  *  be 1, so we simply release them here.
636  */
ip_vs_trash_cleanup(void)637 static void ip_vs_trash_cleanup(void)
638 {
639 	struct ip_vs_dest *dest;
640 	struct list_head *l;
641 
642 	l = &ip_vs_dest_trash;
643 
644 	while (l->next != l) {
645 		dest = list_entry(l->next, struct ip_vs_dest, n_list);
646 		list_del(&dest->n_list);
647 		__ip_vs_dst_reset(dest);
648 		__ip_vs_unbind_svc(dest);
649 		kfree(dest);
650 	}
651 }
652 
653 
654 static inline void
__ip_vs_zero_stats(struct ip_vs_stats * stats)655 __ip_vs_zero_stats(struct ip_vs_stats *stats)
656 {
657 	spin_lock_bh(&stats->lock);
658 	memset(stats, 0, (char *)&stats->lock - (char *)stats);
659 	spin_unlock_bh(&stats->lock);
660 	ip_vs_zero_estimator(stats);
661 }
662 
663 /*
664  *  Update a destination in the given service
665  */
__ip_vs_update_dest(struct ip_vs_service * svc,struct ip_vs_dest * dest,struct ip_vs_rule_user * ur)666 static void __ip_vs_update_dest(struct ip_vs_service *svc,
667 				struct ip_vs_dest *dest,
668 				struct ip_vs_rule_user *ur)
669 {
670 	int conn_flags;
671 
672 	/*
673 	 *    Set the weight and the flags
674 	 */
675 	atomic_set(&dest->weight, ur->weight);
676 
677 	conn_flags = ur->conn_flags | IP_VS_CONN_F_INACTIVE;
678 
679 	/*
680 	 *    Check if local node and update the flags
681 	 */
682 	if (inet_addr_type(ur->daddr) == RTN_LOCAL) {
683 		conn_flags = (conn_flags & ~IP_VS_CONN_F_FWD_MASK)
684 			| IP_VS_CONN_F_LOCALNODE;
685 	}
686 
687 	/*
688 	 *    Set the IP_VS_CONN_F_NOOUTPUT flag if not masquerading
689 	 */
690 	if ((conn_flags & IP_VS_CONN_F_FWD_MASK) != 0) {
691 		conn_flags |= IP_VS_CONN_F_NOOUTPUT;
692 	} else {
693 		/*
694 		 *    Put the real service in ip_vs_rtable if not present.
695 		 *    For now only for NAT!
696 		 */
697 		write_lock_bh(&__ip_vs_rs_lock);
698 		ip_vs_rs_hash(dest);
699 		write_unlock_bh(&__ip_vs_rs_lock);
700 	}
701 	atomic_set(&dest->conn_flags, conn_flags);
702 
703 	/* bind the service */
704 	if (!dest->svc) {
705 		__ip_vs_bind_svc(dest, svc);
706 	} else {
707 		if (dest->svc != svc) {
708 			__ip_vs_unbind_svc(dest);
709 			__ip_vs_zero_stats(&dest->stats);
710 			__ip_vs_bind_svc(dest, svc);
711 		}
712 	}
713 
714 	/* set the dest status flags */
715 	dest->flags |= IP_VS_DEST_F_AVAILABLE;
716 }
717 
718 
719 /*
720  *  Create a destination for the given service
721  */
722 static int
ip_vs_new_dest(struct ip_vs_service * svc,struct ip_vs_rule_user * ur,struct ip_vs_dest ** destp)723 ip_vs_new_dest(struct ip_vs_service *svc, struct ip_vs_rule_user *ur,
724 	       struct ip_vs_dest **destp)
725 {
726 	struct ip_vs_dest *dest;
727 	unsigned atype;
728 
729 	EnterFunction(2);
730 
731 	atype = inet_addr_type(ur->daddr);
732 	if (atype != RTN_LOCAL && atype != RTN_UNICAST)
733 		return -EINVAL;
734 
735 	*destp = dest = (struct ip_vs_dest*)
736 		kmalloc(sizeof(struct ip_vs_dest), GFP_ATOMIC);
737 	if (dest == NULL) {
738 		IP_VS_ERR("ip_vs_new_dest: kmalloc failed.\n");
739 		return -ENOMEM;
740 	}
741 	memset(dest, 0, sizeof(struct ip_vs_dest));
742 
743 	dest->protocol = svc->protocol;
744 	dest->vaddr = svc->addr;
745 	dest->vport = svc->port;
746 	dest->vfwmark = svc->fwmark;
747 	dest->addr = ur->daddr;
748 	dest->port = ur->dport;
749 
750 	atomic_set(&dest->activeconns, 0);
751 	atomic_set(&dest->inactconns, 0);
752 	atomic_set(&dest->refcnt, 0);
753 
754 	INIT_LIST_HEAD(&dest->d_list);
755 	dest->dst_lock = SPIN_LOCK_UNLOCKED;
756 	dest->stats.lock = SPIN_LOCK_UNLOCKED;
757 	__ip_vs_update_dest(svc, dest, ur);
758 	ip_vs_new_estimator(&dest->stats);
759 
760 	LeaveFunction(2);
761 	return 0;
762 }
763 
764 
765 /*
766  *  Add a destination into an existing service
767  */
ip_vs_add_dest(struct ip_vs_service * svc,struct ip_vs_rule_user * ur)768 static int ip_vs_add_dest(struct ip_vs_service *svc,
769 			  struct ip_vs_rule_user *ur)
770 {
771 	struct ip_vs_dest *dest;
772 	__u32 daddr = ur->daddr;
773 	__u16 dport = ur->dport;
774 	int ret;
775 
776 	EnterFunction(2);
777 
778 	if (ur->weight < 0) {
779 		IP_VS_ERR("ip_vs_add_dest(): server weight less than zero\n");
780 		return -ERANGE;
781 	}
782 
783 	/*
784 	 * Check if the dest already exists in the list
785 	 */
786 	dest = ip_vs_lookup_dest(svc, daddr, dport);
787 	if (dest != NULL) {
788 		IP_VS_DBG(1, "ip_vs_add_dest(): dest already exists\n");
789 		return -EEXIST;
790 	}
791 
792 	/*
793 	 * Check if the dest already exists in the trash and
794 	 * is from the same service
795 	 */
796 	dest = ip_vs_trash_get_dest(svc, daddr, dport);
797 	if (dest != NULL) {
798 		IP_VS_DBG(3, "Get destination %u.%u.%u.%u:%u from trash, "
799 			  "refcnt=%d, service %u/%u.%u.%u.%u:%u\n",
800 			  NIPQUAD(daddr), ntohs(dport),
801 			  atomic_read(&dest->refcnt),
802 			  dest->vfwmark,
803 			  NIPQUAD(dest->vaddr),
804 			  ntohs(dest->vport));
805 		__ip_vs_update_dest(svc, dest, ur);
806 
807 		/*
808 		 * Get the destination from the trash
809 		 */
810 		list_del(&dest->n_list);
811 
812 		ip_vs_new_estimator(&dest->stats);
813 
814 		write_lock_bh(&__ip_vs_svc_lock);
815 
816 		/*
817 		 * Wait until all other svc users go away.
818 		 */
819 		while (atomic_read(&svc->usecnt) > 1) {};
820 
821 		list_add(&dest->n_list, &svc->destinations);
822 		svc->num_dests++;
823 
824 		/* call the update_service function of its scheduler */
825 		svc->scheduler->update_service(svc);
826 
827 		write_unlock_bh(&__ip_vs_svc_lock);
828 		return 0;
829 	}
830 
831 	/*
832 	 * Allocate and initialize the dest structure
833 	 */
834 	ret = ip_vs_new_dest(svc, ur, &dest);
835 	if (ret) {
836 		return ret;
837 	}
838 
839 	/*
840 	 * Add the dest entry into the list
841 	 */
842 	atomic_inc(&dest->refcnt);
843 
844 	write_lock_bh(&__ip_vs_svc_lock);
845 
846 	/*
847 	 * Wait until all other svc users go away.
848 	 */
849 	while (atomic_read(&svc->usecnt) > 1) {};
850 
851 	list_add(&dest->n_list, &svc->destinations);
852 	svc->num_dests++;
853 
854 	/* call the update_service function of its scheduler */
855 	svc->scheduler->update_service(svc);
856 
857 	write_unlock_bh(&__ip_vs_svc_lock);
858 
859 	LeaveFunction(2);
860 
861 	return 0;
862 }
863 
864 
865 /*
866  *  Edit a destination in the given service
867  */
ip_vs_edit_dest(struct ip_vs_service * svc,struct ip_vs_rule_user * ur)868 static int ip_vs_edit_dest(struct ip_vs_service *svc,
869 			   struct ip_vs_rule_user *ur)
870 {
871 	struct ip_vs_dest *dest;
872 	__u32 daddr = ur->daddr;
873 	__u16 dport = ur->dport;
874 
875 	EnterFunction(2);
876 
877 	if (ur->weight < 0) {
878 		IP_VS_ERR("ip_vs_edit_dest(): server weight less than zero\n");
879 		return -ERANGE;
880 	}
881 
882 	/*
883 	 *  Lookup the destination list
884 	 */
885 	dest = ip_vs_lookup_dest(svc, daddr, dport);
886 	if (dest == NULL) {
887 		IP_VS_DBG(1, "ip_vs_edit_dest(): dest doesn't exist\n");
888 		return -ENOENT;
889 	}
890 
891 	__ip_vs_update_dest(svc, dest, ur);
892 
893 	write_lock_bh(&__ip_vs_svc_lock);
894 
895 	/* Wait until all other svc users go away */
896 	while (atomic_read(&svc->usecnt) > 1) {};
897 
898 	/* call the update_service, because server weight may be changed */
899 	svc->scheduler->update_service(svc);
900 
901 	write_unlock_bh(&__ip_vs_svc_lock);
902 
903 	LeaveFunction(2);
904 
905 	return 0;
906 }
907 
908 
909 /*
910  *  Delete a destination (must be already unlinked from the service)
911  */
__ip_vs_del_dest(struct ip_vs_dest * dest)912 static void __ip_vs_del_dest(struct ip_vs_dest *dest)
913 {
914 	ip_vs_kill_estimator(&dest->stats);
915 
916 	/*
917 	 *  Remove it from the d-linked list with the real services.
918 	 */
919 	write_lock_bh(&__ip_vs_rs_lock);
920 	ip_vs_rs_unhash(dest);
921 	write_unlock_bh(&__ip_vs_rs_lock);
922 
923 	/*
924 	 *  Decrease the refcnt of the dest, and free the dest
925 	 *  if nobody refers to it (refcnt=0). Otherwise, throw
926 	 *  the destination into the trash.
927 	 */
928 	if (atomic_dec_and_test(&dest->refcnt)) {
929 		__ip_vs_dst_reset(dest);
930 		/* simply decrease svc->refcnt here, let the caller check
931 		   and release the service if nobody refers to it.
932 		   Only user context can release destination and service,
933 		   and only one user context can update virtual service at a
934 		   time, so the operation here is OK */
935 		atomic_dec(&dest->svc->refcnt);
936 		kfree(dest);
937 	} else {
938 		IP_VS_DBG(3, "Moving dest %u.%u.%u.%u:%u into trash, refcnt=%d\n",
939 			  NIPQUAD(dest->addr), ntohs(dest->port),
940 			  atomic_read(&dest->refcnt));
941 		list_add(&dest->n_list, &ip_vs_dest_trash);
942 		atomic_inc(&dest->refcnt);
943 	}
944 }
945 
946 
947 /*
948  *  Unlink a destination from the given service
949  */
__ip_vs_unlink_dest(struct ip_vs_service * svc,struct ip_vs_dest * dest,int svcupd)950 static void __ip_vs_unlink_dest(struct ip_vs_service *svc,
951 				struct ip_vs_dest *dest,
952 				int svcupd)
953 {
954 	dest->flags &= ~IP_VS_DEST_F_AVAILABLE;
955 
956 	/*
957 	 *  Remove it from the d-linked destination list.
958 	 */
959 	list_del(&dest->n_list);
960 	svc->num_dests--;
961 	if (svcupd) {
962 		/*
963 		 *  Call the update_service function of its scheduler
964 		 */
965 		svc->scheduler->update_service(svc);
966 	}
967 }
968 
969 
970 /*
971  *  Delete a destination server in the given service
972  */
ip_vs_del_dest(struct ip_vs_service * svc,struct ip_vs_rule_user * ur)973 static int ip_vs_del_dest(struct ip_vs_service *svc,struct ip_vs_rule_user *ur)
974 {
975 	struct ip_vs_dest *dest;
976 	__u32 daddr = ur->daddr;
977 	__u16 dport = ur->dport;
978 
979 	EnterFunction(2);
980 
981 	dest = ip_vs_lookup_dest(svc, daddr, dport);
982 	if (dest == NULL) {
983 		IP_VS_DBG(1, "ip_vs_del_dest(): destination not found!\n");
984 		return -ENOENT;
985 	}
986 
987 	write_lock_bh(&__ip_vs_svc_lock);
988 
989 	/*
990 	 *	Wait until all other svc users go away.
991 	 */
992 	while (atomic_read(&svc->usecnt) > 1) {};
993 
994 	/*
995 	 *	Unlink dest from the service
996 	 */
997 	__ip_vs_unlink_dest(svc, dest, 1);
998 
999 	write_unlock_bh(&__ip_vs_svc_lock);
1000 
1001 	/*
1002 	 *	Delete the destination
1003 	 */
1004 	__ip_vs_del_dest(dest);
1005 
1006 	LeaveFunction(2);
1007 
1008 	return 0;
1009 }
1010 
1011 
1012 /*
1013  *  Add a service into the service hash table
1014  */
1015 static int
ip_vs_add_service(struct ip_vs_rule_user * ur,struct ip_vs_service ** svc_p)1016 ip_vs_add_service(struct ip_vs_rule_user *ur, struct ip_vs_service **svc_p)
1017 {
1018 	int ret = 0;
1019 	struct ip_vs_scheduler *sched;
1020 	struct ip_vs_service *svc = NULL;
1021 
1022 	MOD_INC_USE_COUNT;
1023 
1024 	/*
1025 	 * Lookup the scheduler, by 'ur->sched_name'
1026 	 */
1027 	sched = ip_vs_scheduler_get(ur->sched_name);
1028 	if (sched == NULL) {
1029 		IP_VS_INFO("Scheduler module ip_vs_%s.o not found\n",
1030 			   ur->sched_name);
1031 		ret = -ENOENT;
1032 		goto out_mod_dec;
1033 	}
1034 
1035 	svc = (struct ip_vs_service*)
1036 		kmalloc(sizeof(struct ip_vs_service), GFP_ATOMIC);
1037 	if (svc == NULL) {
1038 		IP_VS_DBG(1, "ip_vs_add_service: kmalloc failed.\n");
1039 		ret = -ENOMEM;
1040 		goto out_err;
1041 	}
1042 	memset(svc, 0, sizeof(struct ip_vs_service));
1043 
1044 	svc->protocol = ur->protocol;
1045 	svc->addr = ur->vaddr;
1046 	svc->port = ur->vport;
1047 	svc->fwmark = ur->vfwmark;
1048 	svc->flags = ur->vs_flags;
1049 	svc->timeout = ur->timeout * HZ;
1050 	svc->netmask = ur->netmask;
1051 
1052 	INIT_LIST_HEAD(&svc->destinations);
1053 	svc->sched_lock = RW_LOCK_UNLOCKED;
1054 	svc->stats.lock = SPIN_LOCK_UNLOCKED;
1055 
1056 	/*
1057 	 *    Bind the scheduler
1058 	 */
1059 	ret = ip_vs_bind_scheduler(svc, sched);
1060 	if (ret) {
1061 		goto out_err;
1062 	}
1063 
1064 	/*
1065 	 *    Update the virtual service counters
1066 	 */
1067 	if (svc->port == FTPPORT)
1068 		atomic_inc(&ip_vs_ftpsvc_counter);
1069 	else if (svc->port == 0)
1070 		atomic_inc(&ip_vs_nullsvc_counter);
1071 
1072 	/*
1073 	 *    I'm the first user of the service
1074 	 */
1075 	atomic_set(&svc->usecnt, 1);
1076 	atomic_set(&svc->refcnt, 0);
1077 
1078 	ip_vs_new_estimator(&svc->stats);
1079 	ip_vs_num_services++;
1080 
1081 	/*
1082 	 *    Hash the service into the service table
1083 	 */
1084 	write_lock_bh(&__ip_vs_svc_lock);
1085 	ip_vs_svc_hash(svc);
1086 	write_unlock_bh(&__ip_vs_svc_lock);
1087 
1088 	*svc_p = svc;
1089 	return 0;
1090 
1091   out_err:
1092 	if (svc)
1093 		kfree(svc);
1094 	ip_vs_scheduler_put(sched);
1095   out_mod_dec:
1096 	MOD_DEC_USE_COUNT;
1097 	return ret;
1098 }
1099 
1100 
1101 /*
1102  *	Edit a service and bind it with a new scheduler
1103  */
ip_vs_edit_service(struct ip_vs_service * svc,struct ip_vs_rule_user * ur)1104 static int ip_vs_edit_service(struct ip_vs_service *svc,
1105 			      struct ip_vs_rule_user *ur)
1106 {
1107 	struct ip_vs_scheduler *sched, *old_sched;
1108 	int ret = 0;
1109 
1110 	/*
1111 	 * Lookup the scheduler, by 'ur->sched_name'
1112 	 */
1113 	sched = ip_vs_scheduler_get(ur->sched_name);
1114 	if (sched == NULL) {
1115 		IP_VS_INFO("Scheduler module ip_vs_%s.o not found\n",
1116 			   ur->sched_name);
1117 		return -ENOENT;
1118 	}
1119 
1120 	write_lock_bh(&__ip_vs_svc_lock);
1121 
1122 	/*
1123 	 * Wait until all other svc users go away.
1124 	 */
1125 	while (atomic_read(&svc->usecnt) > 1) {};
1126 
1127 	/*
1128 	 * Set the flags and timeout value
1129 	 */
1130 	svc->flags = ur->vs_flags | IP_VS_SVC_F_HASHED;
1131 	svc->timeout = ur->timeout * HZ;
1132 	svc->netmask = ur->netmask;
1133 
1134 	old_sched = svc->scheduler;
1135 	if (sched != old_sched) {
1136 		/*
1137 		 * Unbind the old scheduler
1138 		 */
1139 		if ((ret = ip_vs_unbind_scheduler(svc))) {
1140 			old_sched = sched;
1141 			goto out;
1142 		}
1143 
1144 		/*
1145 		 * Bind the new scheduler
1146 		 */
1147 		if ((ret = ip_vs_bind_scheduler(svc, sched))) {
1148 			/*
1149 			 * If ip_vs_bind_scheduler fails, restore the old
1150 			 * scheduler.
1151 			 * The main reason of failure is out of memory.
1152 			 *
1153 			 * The question is if the old scheduler can be
1154 			 * restored all the time. TODO: if it cannot be
1155 			 * restored some time, we must delete the service,
1156 			 * otherwise the system may crash.
1157 			 */
1158 			ip_vs_bind_scheduler(svc, old_sched);
1159 			old_sched = sched;
1160 		}
1161 	}
1162 
1163   out:
1164 	write_unlock_bh(&__ip_vs_svc_lock);
1165 
1166 	if (old_sched)
1167 		ip_vs_scheduler_put(old_sched);
1168 
1169 	return ret;
1170 }
1171 
1172 
1173 /*
1174  *  Delete a service from the service list
1175  *  The service must be unlinked, unlocked and not referenced!
1176  */
__ip_vs_del_service(struct ip_vs_service * svc)1177 static void __ip_vs_del_service(struct ip_vs_service *svc)
1178 {
1179 	struct list_head *l;
1180 	struct ip_vs_dest *dest;
1181 	struct ip_vs_scheduler *old_sched;
1182 
1183 	ip_vs_num_services--;
1184 	ip_vs_kill_estimator(&svc->stats);
1185 
1186 	/*
1187 	 *    Unbind scheduler
1188 	 */
1189 	old_sched = svc->scheduler;
1190 	ip_vs_unbind_scheduler(svc);
1191 	if (old_sched && old_sched->module)
1192 		__MOD_DEC_USE_COUNT(old_sched->module);
1193 
1194 	/*
1195 	 *    Unlink the whole destination list
1196 	 */
1197 	l = &svc->destinations;
1198 	while (l->next != l) {
1199 		dest = list_entry(l->next, struct ip_vs_dest, n_list);
1200 		__ip_vs_unlink_dest(svc, dest, 0);
1201 		__ip_vs_del_dest(dest);
1202 	}
1203 
1204 	/*
1205 	 *    Update the virtual service counters
1206 	 */
1207 	if (svc->port == FTPPORT)
1208 		atomic_dec(&ip_vs_ftpsvc_counter);
1209 	else if (svc->port == 0)
1210 		atomic_dec(&ip_vs_nullsvc_counter);
1211 
1212 	/*
1213 	 *    Free the service if nobody refers to it
1214 	 */
1215 	if (atomic_read(&svc->refcnt) == 0)
1216 		kfree(svc);
1217 	MOD_DEC_USE_COUNT;
1218 }
1219 
1220 /*
1221  *  Delete a service from the service list
1222  */
ip_vs_del_service(struct ip_vs_service * svc)1223 static int ip_vs_del_service(struct ip_vs_service *svc)
1224 {
1225 	if (svc == NULL)
1226 		return -EEXIST;
1227 
1228 	/*
1229 	 * Unhash it from the service table
1230 	 */
1231 	write_lock_bh(&__ip_vs_svc_lock);
1232 
1233 	ip_vs_svc_unhash(svc);
1234 
1235 	/*
1236 	 * Wait until all the svc users go away.
1237 	 */
1238 	while (atomic_read(&svc->usecnt) > 1) {};
1239 
1240 	__ip_vs_del_service(svc);
1241 
1242 	write_unlock_bh(&__ip_vs_svc_lock);
1243 
1244 	return 0;
1245 }
1246 
1247 
1248 /*
1249  *  Flush all the virtual services
1250  */
ip_vs_flush(void)1251 static int ip_vs_flush(void)
1252 {
1253 	int idx;
1254 	struct ip_vs_service *svc;
1255 	struct list_head *l;
1256 
1257 	/*
1258 	 * Flush the service table hashed by <protocol,addr,port>
1259 	 */
1260 	for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1261 		l = &ip_vs_svc_table[idx];
1262 		while (l->next != l) {
1263 			svc = list_entry(l->next,struct ip_vs_service,s_list);
1264 			write_lock_bh(&__ip_vs_svc_lock);
1265 			ip_vs_svc_unhash(svc);
1266 			/*
1267 			 * Wait until all the svc users go away.
1268 			 */
1269 			while (atomic_read(&svc->usecnt) > 0) {};
1270 			__ip_vs_del_service(svc);
1271 			write_unlock_bh(&__ip_vs_svc_lock);
1272 		}
1273 	}
1274 
1275 	/*
1276 	 * Flush the service table hashed by fwmark
1277 	 */
1278 	for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1279 		l = &ip_vs_svc_fwm_table[idx];
1280 		while (l->next != l) {
1281 			svc = list_entry(l->next,struct ip_vs_service,f_list);
1282 			write_lock_bh(&__ip_vs_svc_lock);
1283 			ip_vs_svc_unhash(svc);
1284 			/*
1285 			 * Wait until all the svc users go away.
1286 			 */
1287 			while (atomic_read(&svc->usecnt) > 0) {};
1288 			__ip_vs_del_service(svc);
1289 			write_unlock_bh(&__ip_vs_svc_lock);
1290 		}
1291 	}
1292 
1293 	return 0;
1294 }
1295 
1296 
1297 /*
1298  *  Zero counters in a service or all services
1299  */
ip_vs_zero_service(struct ip_vs_service * svc)1300 static int ip_vs_zero_service(struct ip_vs_service *svc)
1301 {
1302 	struct list_head *l;
1303 	struct ip_vs_dest *dest;
1304 
1305 	write_lock_bh(&__ip_vs_svc_lock);
1306 	list_for_each (l, &svc->destinations) {
1307 		dest = list_entry(l, struct ip_vs_dest, n_list);
1308 		__ip_vs_zero_stats(&dest->stats);
1309 	}
1310 	__ip_vs_zero_stats(&svc->stats);
1311 	write_unlock_bh(&__ip_vs_svc_lock);
1312 	return 0;
1313 }
1314 
ip_vs_zero_all(void)1315 static int ip_vs_zero_all(void)
1316 {
1317 	int idx;
1318 	struct list_head *l;
1319 	struct ip_vs_service *svc;
1320 
1321 	for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1322 		list_for_each (l, &ip_vs_svc_table[idx]) {
1323 			svc = list_entry(l, struct ip_vs_service, s_list);
1324 			ip_vs_zero_service(svc);
1325 		}
1326 	}
1327 
1328 	for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1329 		list_for_each (l, &ip_vs_svc_fwm_table[idx]) {
1330 			svc = list_entry(l, struct ip_vs_service, f_list);
1331 			ip_vs_zero_service(svc);
1332 		}
1333 	}
1334 
1335 	__ip_vs_zero_stats(&ip_vs_stats);
1336 	return 0;
1337 }
1338 
1339 
ip_vs_sysctl_defense_mode(ctl_table * ctl,int write,struct file * filp,void * buffer,size_t * lenp)1340 static int ip_vs_sysctl_defense_mode(ctl_table *ctl, int write,
1341 	struct file * filp, void *buffer, size_t *lenp)
1342 {
1343 	int *valp = ctl->data;
1344 	int val = *valp;
1345 	int ret;
1346 
1347 	ret = proc_dointvec(ctl, write, filp, buffer, lenp);
1348 	if (write && (*valp != val)) {
1349 		if ((*valp < 0) || (*valp > 3)) {
1350 			/* Restore the correct value */
1351 			*valp = val;
1352 		} else {
1353 			local_bh_disable();
1354 			update_defense_level();
1355 			local_bh_enable();
1356 		}
1357 	}
1358 	return ret;
1359 }
1360 
1361 
1362 /*
1363  *      IPVS sysctl table
1364  */
1365 struct ip_vs_sysctl_table {
1366 	struct ctl_table_header *sysctl_header;
1367 	ctl_table vs_vars[NET_IPV4_VS_LAST];
1368 	ctl_table vs_dir[2];
1369 	ctl_table ipv4_dir[2];
1370 	ctl_table root_dir[2];
1371 };
1372 
1373 
1374 static struct ip_vs_sysctl_table ipv4_vs_table = {
1375 	NULL,
1376 	{{NET_IPV4_VS_AMEMTHRESH, "amemthresh",
1377 	  &sysctl_ip_vs_amemthresh, sizeof(int), 0644, NULL,
1378 	  &proc_dointvec},
1379 #ifdef CONFIG_IP_VS_DEBUG
1380 	 {NET_IPV4_VS_DEBUG_LEVEL, "debug_level",
1381 	  &sysctl_ip_vs_debug_level, sizeof(int), 0644, NULL,
1382 	  &proc_dointvec},
1383 #endif
1384 	 {NET_IPV4_VS_AMDROPRATE, "am_droprate",
1385 	  &sysctl_ip_vs_am_droprate, sizeof(int), 0644, NULL,
1386 	  &proc_dointvec},
1387 	 {NET_IPV4_VS_DROP_ENTRY, "drop_entry",
1388 	  &sysctl_ip_vs_drop_entry, sizeof(int), 0644, NULL,
1389 	  &ip_vs_sysctl_defense_mode},
1390 	 {NET_IPV4_VS_DROP_PACKET, "drop_packet",
1391 	  &sysctl_ip_vs_drop_packet, sizeof(int), 0644, NULL,
1392 	  &ip_vs_sysctl_defense_mode},
1393 	 {NET_IPV4_VS_SECURE_TCP, "secure_tcp",
1394 	  &sysctl_ip_vs_secure_tcp, sizeof(int), 0644, NULL,
1395 	  &ip_vs_sysctl_defense_mode},
1396 	 {NET_IPV4_VS_TO_ES, "timeout_established",
1397 	  &vs_timeout_table_dos.timeout[IP_VS_S_ESTABLISHED],
1398 	  sizeof(int), 0644, NULL, &proc_dointvec_jiffies},
1399 	 {NET_IPV4_VS_TO_SS, "timeout_synsent",
1400 	  &vs_timeout_table_dos.timeout[IP_VS_S_SYN_SENT],
1401 	  sizeof(int), 0644, NULL, &proc_dointvec_jiffies},
1402 	 {NET_IPV4_VS_TO_SR, "timeout_synrecv",
1403 	  &vs_timeout_table_dos.timeout[IP_VS_S_SYN_RECV],
1404 	  sizeof(int), 0644, NULL, &proc_dointvec_jiffies},
1405 	 {NET_IPV4_VS_TO_FW, "timeout_finwait",
1406 	  &vs_timeout_table_dos.timeout[IP_VS_S_FIN_WAIT],
1407 	  sizeof(int), 0644, NULL, &proc_dointvec_jiffies},
1408 	 {NET_IPV4_VS_TO_TW, "timeout_timewait",
1409 	  &vs_timeout_table_dos.timeout[IP_VS_S_TIME_WAIT],
1410 	  sizeof(int), 0644, NULL, &proc_dointvec_jiffies},
1411 	 {NET_IPV4_VS_TO_CL, "timeout_close",
1412 	  &vs_timeout_table_dos.timeout[IP_VS_S_CLOSE],
1413 	  sizeof(int), 0644, NULL, &proc_dointvec_jiffies},
1414 	 {NET_IPV4_VS_TO_CW, "timeout_closewait",
1415 	  &vs_timeout_table_dos.timeout[IP_VS_S_CLOSE_WAIT],
1416 	  sizeof(int), 0644, NULL, &proc_dointvec_jiffies},
1417 	 {NET_IPV4_VS_TO_LA, "timeout_lastack",
1418 	  &vs_timeout_table_dos.timeout[IP_VS_S_LAST_ACK],
1419 	  sizeof(int), 0644, NULL, &proc_dointvec_jiffies},
1420 	 {NET_IPV4_VS_TO_LI, "timeout_listen",
1421 	  &vs_timeout_table_dos.timeout[IP_VS_S_LISTEN],
1422 	  sizeof(int), 0644, NULL, &proc_dointvec_jiffies},
1423 	 {NET_IPV4_VS_TO_SA, "timeout_synack",
1424 	  &vs_timeout_table_dos.timeout[IP_VS_S_SYNACK],
1425 	  sizeof(int), 0644, NULL, &proc_dointvec_jiffies},
1426 	 {NET_IPV4_VS_TO_UDP, "timeout_udp",
1427 	  &vs_timeout_table_dos.timeout[IP_VS_S_UDP],
1428 	  sizeof(int), 0644, NULL, &proc_dointvec_jiffies},
1429 	 {NET_IPV4_VS_TO_ICMP, "timeout_icmp",
1430 	  &vs_timeout_table_dos.timeout[IP_VS_S_ICMP],
1431 	  sizeof(int), 0644, NULL, &proc_dointvec_jiffies},
1432 	 {NET_IPV4_VS_CACHE_BYPASS, "cache_bypass",
1433 	  &sysctl_ip_vs_cache_bypass, sizeof(int), 0644, NULL,
1434 	  &proc_dointvec},
1435 	 {NET_IPV4_VS_EXPIRE_NODEST_CONN, "expire_nodest_conn",
1436 	  &sysctl_ip_vs_expire_nodest_conn, sizeof(int), 0644, NULL,
1437 	  &proc_dointvec},
1438 	 {NET_IPV4_VS_SYNC_THRESHOLD, "sync_threshold",
1439 	  &sysctl_ip_vs_sync_threshold, sizeof(int), 0644, NULL,
1440 	  &proc_dointvec},
1441 	 {NET_IPV4_VS_NAT_ICMP_SEND, "nat_icmp_send",
1442 	  &sysctl_ip_vs_nat_icmp_send, sizeof(int), 0644, NULL,
1443 	  &proc_dointvec},
1444 	 {NET_IPV4_VS_EXPIRE_QUIESCENT_TEMPLATE, "expire_quiescent_template",
1445 	  &sysctl_ip_vs_expire_quiescent_template, sizeof(int), 0644, NULL,
1446 	  &proc_dointvec},
1447 	 {0}},
1448 	{{NET_IPV4_VS, "vs", NULL, 0, 0555, ipv4_vs_table.vs_vars},
1449 	 {0}},
1450 	{{NET_IPV4, "ipv4", NULL, 0, 0555, ipv4_vs_table.vs_dir},
1451 	 {0}},
1452 	{{CTL_NET, "net", NULL, 0, 0555, ipv4_vs_table.ipv4_dir},
1453 	 {0}}
1454 };
1455 
1456 
1457 /*
1458  *	Write the contents of the VS rule table to a PROCfs file.
1459  *	(It is kept just for backward compatibility)
1460  */
ip_vs_fwd_name(unsigned flags)1461 static inline char *ip_vs_fwd_name(unsigned flags)
1462 {
1463 	char *fwd;
1464 
1465 	switch (flags & IP_VS_CONN_F_FWD_MASK) {
1466 	case IP_VS_CONN_F_LOCALNODE:
1467 		fwd = "Local";
1468 		break;
1469 	case IP_VS_CONN_F_TUNNEL:
1470 		fwd = "Tunnel";
1471 		break;
1472 	case IP_VS_CONN_F_DROUTE:
1473 		fwd = "Route";
1474 		break;
1475 	default:
1476 		fwd = "Masq";
1477 	}
1478 	return fwd;
1479 }
1480 
ip_vs_get_info(char * buf,char ** start,off_t offset,int length)1481 static int ip_vs_get_info(char *buf, char **start, off_t offset, int length)
1482 {
1483 	int len=0;
1484 	off_t pos=0;
1485 	char temp[64], temp2[32];
1486 	int idx;
1487 	struct ip_vs_service *svc;
1488 	struct ip_vs_dest *dest;
1489 	struct list_head *l, *e, *p, *q;
1490 
1491 	/*
1492 	 * Note: since the length of the buffer is usually the multiple
1493 	 * of 512, it is good to use fixed record of the divisor of 512,
1494 	 * so that records won't be truncated at buffer boundary.
1495 	 */
1496 	pos = 192;
1497 	if (pos > offset) {
1498 		sprintf(temp,
1499 			"IP Virtual Server version %d.%d.%d (size=%d)",
1500 			NVERSION(IP_VS_VERSION_CODE), IP_VS_CONN_TAB_SIZE);
1501 		len += sprintf(buf+len, "%-63s\n", temp);
1502 		len += sprintf(buf+len, "%-63s\n",
1503 			       "Prot LocalAddress:Port Scheduler Flags");
1504 		len += sprintf(buf+len, "%-63s\n",
1505 			       "  -> RemoteAddress:Port Forward Weight ActiveConn InActConn");
1506 	}
1507 
1508 	read_lock_bh(&__ip_vs_svc_lock);
1509 
1510 	/* print the service table hashed by <protocol,addr,port> */
1511 	for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1512 		l = &ip_vs_svc_table[idx];
1513 		for (e=l->next; e!=l; e=e->next) {
1514 			svc = list_entry(e, struct ip_vs_service, s_list);
1515 			pos += 64;
1516 			if (pos > offset) {
1517 				if (svc->flags & IP_VS_SVC_F_PERSISTENT)
1518 					sprintf(temp2, "persistent %d %08X",
1519 						svc->timeout,
1520 						ntohl(svc->netmask));
1521 				else
1522 					temp2[0] = '\0';
1523 
1524 				sprintf(temp, "%s  %08X:%04X %s %s",
1525 					ip_vs_proto_name(svc->protocol),
1526 					ntohl(svc->addr),
1527 					ntohs(svc->port),
1528 					svc->scheduler->name, temp2);
1529 				len += sprintf(buf+len, "%-63s\n", temp);
1530 				if (len >= length)
1531 					goto done;
1532 			}
1533 
1534 			p = &svc->destinations;
1535 			for (q=p->next; q!=p; q=q->next) {
1536 				dest = list_entry(q, struct ip_vs_dest, n_list);
1537 				pos += 64;
1538 				if (pos <= offset)
1539 					continue;
1540 				sprintf(temp,
1541 					"  -> %08X:%04X      %-7s %-6d %-10d %-10d",
1542 					ntohl(dest->addr),
1543 					ntohs(dest->port),
1544 					ip_vs_fwd_name(atomic_read(&dest->conn_flags)),
1545 					atomic_read(&dest->weight),
1546 					atomic_read(&dest->activeconns),
1547 					atomic_read(&dest->inactconns));
1548 				len += sprintf(buf+len, "%-63s\n", temp);
1549 				if (len >= length)
1550 					goto done;
1551 			}
1552 		}
1553 	}
1554 
1555 	/* print the service table hashed by fwmark */
1556 	for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1557 		l = &ip_vs_svc_fwm_table[idx];
1558 		for (e=l->next; e!=l; e=e->next) {
1559 			svc = list_entry(e, struct ip_vs_service, f_list);
1560 			pos += 64;
1561 			if (pos > offset) {
1562 				if (svc->flags & IP_VS_SVC_F_PERSISTENT)
1563 					sprintf(temp2, "persistent %d %08X",
1564 						svc->timeout,
1565 						ntohl(svc->netmask));
1566 				else
1567 					temp2[0] = '\0';
1568 
1569 				sprintf(temp, "FWM  %08X %s %s",
1570 					svc->fwmark,
1571 					svc->scheduler->name, temp2);
1572 				len += sprintf(buf+len, "%-63s\n", temp);
1573 				if (len >= length)
1574 					goto done;
1575 			}
1576 
1577 			p = &svc->destinations;
1578 			for (q=p->next; q!=p; q=q->next) {
1579 				dest = list_entry(q, struct ip_vs_dest, n_list);
1580 				pos += 64;
1581 				if (pos <= offset)
1582 					continue;
1583 				sprintf(temp,
1584 					"  -> %08X:%04X      %-7s %-6d %-10d %-10d",
1585 					ntohl(dest->addr),
1586 					ntohs(dest->port),
1587 					ip_vs_fwd_name(atomic_read(&dest->conn_flags)),
1588 					atomic_read(&dest->weight),
1589 					atomic_read(&dest->activeconns),
1590 					atomic_read(&dest->inactconns));
1591 				len += sprintf(buf+len, "%-63s\n", temp);
1592 				if (len >= length)
1593 					goto done;
1594 			}
1595 		}
1596 	}
1597 
1598   done:
1599 	read_unlock_bh(&__ip_vs_svc_lock);
1600 
1601 	*start = buf+len-(pos-offset);          /* Start of wanted data */
1602 	len = pos-offset;
1603 	if (len > length)
1604 		len = length;
1605 	if (len < 0)
1606 		len = 0;
1607 	return len;
1608 }
1609 
1610 
1611 struct ip_vs_stats ip_vs_stats;
1612 
1613 static int
ip_vs_stats_get_info(char * buf,char ** start,off_t offset,int length)1614 ip_vs_stats_get_info(char *buf, char **start, off_t offset, int length)
1615 {
1616 	int len=0;
1617 	off_t pos=0;
1618 	char temp[64];
1619 
1620 	pos += 320;
1621 	if (pos > offset) {
1622 		len += sprintf(buf+len, "%-63s\n%-63s\n",
1623 /*                              01234567 01234567 01234567 0123456701234567 0123456701234567 */
1624 			       "   Total Incoming Outgoing         Incoming         Outgoing",
1625 			       "   Conns  Packets  Packets            Bytes            Bytes");
1626 
1627 		spin_lock_bh(&ip_vs_stats.lock);
1628 		sprintf(temp, "%8X %8X %8X %8X%08X %8X%08X",
1629 			ip_vs_stats.conns,
1630 			ip_vs_stats.inpkts,
1631 			ip_vs_stats.outpkts,
1632 			(__u32)(ip_vs_stats.inbytes>>32),
1633 			(__u32)ip_vs_stats.inbytes,
1634 			(__u32)(ip_vs_stats.outbytes>>32),
1635 			(__u32)ip_vs_stats.outbytes);
1636 		len += sprintf(buf+len, "%-62s\n\n", temp);
1637 
1638 		len += sprintf(buf+len, "%-63s\n",
1639 /*                              01234567 01234567 01234567 0123456701234567 0123456701234567 */
1640 			       " Conns/s   Pkts/s   Pkts/s          Bytes/s          Bytes/s");
1641 		sprintf(temp, "%8X %8X %8X %16X %16X",
1642 			ip_vs_stats.cps,
1643 			ip_vs_stats.inpps,
1644 			ip_vs_stats.outpps,
1645 			ip_vs_stats.inbps,
1646 			ip_vs_stats.outbps);
1647 		len += sprintf(buf+len, "%-63s\n", temp);
1648 
1649 		spin_unlock_bh(&ip_vs_stats.lock);
1650 	}
1651 
1652 	*start = buf+len-(pos-offset);          /* Start of wanted data */
1653 	len = pos-offset;
1654 	if (len > length)
1655 		len = length;
1656 	if (len < 0)
1657 		len = 0;
1658 	return len;
1659 }
1660 
1661 
1662 /*
1663  * Set timeout values for tcp tcpfin udp in the vs_timeout_table.
1664  */
ip_vs_set_timeouts(struct ip_vs_rule_user * u)1665 static int ip_vs_set_timeouts(struct ip_vs_rule_user *u)
1666 {
1667 	IP_VS_DBG(2, "Setting timeout tcp:%d tcpfin:%d udp:%d\n",
1668 		  u->tcp_timeout,
1669 		  u->tcp_fin_timeout,
1670 		  u->udp_timeout);
1671 
1672 	if (u->tcp_timeout) {
1673 		vs_timeout_table.timeout[IP_VS_S_ESTABLISHED]
1674 			= u->tcp_timeout * HZ;
1675 	}
1676 
1677 	if (u->tcp_fin_timeout) {
1678 		vs_timeout_table.timeout[IP_VS_S_FIN_WAIT]
1679 			= u->tcp_fin_timeout * HZ;
1680 	}
1681 
1682 	if (u->udp_timeout) {
1683 		vs_timeout_table.timeout[IP_VS_S_UDP]
1684 			= u->udp_timeout * HZ;
1685 	}
1686 	return 0;
1687 }
1688 
1689 
1690 static int
do_ip_vs_set_ctl(struct sock * sk,int cmd,void * user,unsigned int len)1691 do_ip_vs_set_ctl(struct sock *sk, int cmd, void *user, unsigned int len)
1692 {
1693 	int ret;
1694 	struct ip_vs_rule_user *urule;
1695 	struct ip_vs_service *svc = NULL;
1696 
1697 	if (!capable(CAP_NET_ADMIN))
1698 		return -EPERM;
1699 
1700 	/*
1701 	 * Check the size of mm, no overflow...
1702 	 * len > 128000 is a sanity check.
1703 	 */
1704 	if (len < sizeof(struct ip_vs_rule_user)) {
1705 		IP_VS_ERR("set_ctl: len %u < %Zu\n",
1706 			  len, sizeof(struct ip_vs_rule_user));
1707 		return -EINVAL;
1708 	} else if (len > 128000) {
1709 		IP_VS_ERR("set_ctl: len %u > 128000\n", len);
1710 		return -EINVAL;
1711 	} else if ((urule = kmalloc(len, GFP_KERNEL)) == NULL) {
1712 		IP_VS_ERR("set_ctl: no mem for len %u\n", len);
1713 		return -ENOMEM;
1714 	} else if (copy_from_user(urule, user, len) != 0) {
1715 		ret = -EFAULT;
1716 		goto out_free;
1717 	}
1718 
1719 	MOD_INC_USE_COUNT;
1720 	if (down_interruptible(&__ip_vs_mutex)) {
1721 		ret = -ERESTARTSYS;
1722 		goto out_dec;
1723 	}
1724 
1725 	if (cmd == IP_VS_SO_SET_FLUSH) {
1726 		/* Flush the virtual service */
1727 		ret = ip_vs_flush();
1728 		goto out_unlock;
1729 	} else if (cmd == IP_VS_SO_SET_TIMEOUTS) {
1730 		/* Set timeout values for (tcp tcpfin udp) */
1731 		ret = ip_vs_set_timeouts(urule);
1732 		goto out_unlock;
1733 	} else if (cmd == IP_VS_SO_SET_STARTDAEMON) {
1734 		ret = start_sync_thread(urule->state, urule->mcast_ifn,
1735 					urule->syncid);
1736 		goto out_unlock;
1737 	} else if (cmd == IP_VS_SO_SET_STOPDAEMON) {
1738 		ret = stop_sync_thread(urule->state);
1739 		goto out_unlock;
1740 	} else if (cmd == IP_VS_SO_SET_ZERO) {
1741 		/* if no service address is set, zero counters in all */
1742 		if (!urule->vfwmark && !urule->vaddr && !urule->vport) {
1743 			ret = ip_vs_zero_all();
1744 			goto out_unlock;
1745 		}
1746 	}
1747 
1748 	/*
1749 	 * Check for valid protocol: TCP or UDP. Even for fwmark!=0
1750 	 */
1751 	if (urule->protocol!=IPPROTO_TCP && urule->protocol!=IPPROTO_UDP) {
1752 		IP_VS_ERR("set_ctl: invalid protocol %d %d.%d.%d.%d:%d %s\n",
1753 			  urule->protocol, NIPQUAD(urule->vaddr),
1754 			  ntohs(urule->vport), urule->sched_name);
1755 		ret = -EFAULT;
1756 		goto out_unlock;
1757 	}
1758 
1759 	/*
1760 	 * Lookup the exact service by <protocol, vaddr, vport> or fwmark
1761 	 */
1762 	if (urule->vfwmark == 0)
1763 		svc = __ip_vs_service_get(urule->protocol,
1764 					  urule->vaddr, urule->vport);
1765 	else
1766 		svc = __ip_vs_svc_fwm_get(urule->vfwmark);
1767 
1768 	if (cmd != IP_VS_SO_SET_ADD
1769 	    && (svc == NULL || svc->protocol != urule->protocol)) {
1770 		ret = -ESRCH;
1771 		goto out_unlock;
1772 	}
1773 
1774 	switch (cmd) {
1775 	case IP_VS_SO_SET_ADD:
1776 		if (svc != NULL)
1777 			ret = -EEXIST;
1778 		else
1779 			ret = ip_vs_add_service(urule, &svc);
1780 		break;
1781 	case IP_VS_SO_SET_EDIT:
1782 		ret = ip_vs_edit_service(svc, urule);
1783 		break;
1784 	case IP_VS_SO_SET_DEL:
1785 		ret = ip_vs_del_service(svc);
1786 		if (!ret)
1787 			goto out_unlock;
1788 		break;
1789 	case IP_VS_SO_SET_ADDDEST:
1790 		ret = ip_vs_add_dest(svc, urule);
1791 		break;
1792 	case IP_VS_SO_SET_EDITDEST:
1793 		ret = ip_vs_edit_dest(svc, urule);
1794 		break;
1795 	case IP_VS_SO_SET_DELDEST:
1796 		ret = ip_vs_del_dest(svc, urule);
1797 		break;
1798 	case IP_VS_SO_SET_ZERO:
1799 		ret = ip_vs_zero_service(svc);
1800 		break;
1801 	default:
1802 		ret = -EINVAL;
1803 	}
1804 
1805 	if (svc)
1806 		ip_vs_service_put(svc);
1807 
1808   out_unlock:
1809 	up(&__ip_vs_mutex);
1810   out_dec:
1811 	MOD_DEC_USE_COUNT;
1812   out_free:
1813 	kfree(urule);
1814 	return ret;
1815 }
1816 
1817 
1818 static inline void
__ip_vs_copy_stats(struct ip_vs_stats_user * dst,struct ip_vs_stats * src)1819 __ip_vs_copy_stats(struct ip_vs_stats_user *dst, struct ip_vs_stats *src)
1820 {
1821 	spin_lock_bh(&src->lock);
1822 	memcpy(dst, src, (char*)&src->lock - (char*)src);
1823 	spin_unlock_bh(&src->lock);
1824 }
1825 
1826 static inline int
__ip_vs_get_service_entries(const struct ip_vs_get_services * get,struct ip_vs_get_services * uptr)1827 __ip_vs_get_service_entries(const struct ip_vs_get_services *get,
1828 			    struct ip_vs_get_services *uptr)
1829 {
1830 	int idx, count=0;
1831 	struct ip_vs_service *svc;
1832 	struct list_head *l;
1833 	struct ip_vs_service_user entry;
1834 	int ret = 0;
1835 
1836 	for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1837 		list_for_each (l, &ip_vs_svc_table[idx]) {
1838 			if (count >= get->num_services)
1839 				goto out;
1840 			svc = list_entry(l, struct ip_vs_service, s_list);
1841 			entry.protocol = svc->protocol;
1842 			entry.addr = svc->addr;
1843 			entry.port = svc->port;
1844 			entry.fwmark = svc->fwmark;
1845 			strncpy(entry.sched_name, svc->scheduler->name, sizeof(entry.sched_name));
1846 			entry.sched_name[sizeof(entry.sched_name) - 1] = 0;
1847 			entry.flags = svc->flags;
1848 			entry.timeout = svc->timeout / HZ;
1849 			entry.netmask = svc->netmask;
1850 			entry.num_dests = svc->num_dests;
1851 			__ip_vs_copy_stats(&entry.stats, &svc->stats);
1852 			if (copy_to_user(&uptr->entrytable[count],
1853 					 &entry, sizeof(entry))) {
1854 				ret = -EFAULT;
1855 				goto out;
1856 			}
1857 			count++;
1858 		}
1859 	}
1860 
1861 	for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1862 		list_for_each (l, &ip_vs_svc_fwm_table[idx]) {
1863 			if (count >= get->num_services)
1864 				goto out;
1865 			svc = list_entry(l, struct ip_vs_service, f_list);
1866 			entry.protocol = svc->protocol;
1867 			entry.addr = svc->addr;
1868 			entry.port = svc->port;
1869 			entry.fwmark = svc->fwmark;
1870 			strncpy(entry.sched_name, svc->scheduler->name, sizeof(entry.sched_name));
1871 			entry.sched_name[sizeof(entry.sched_name) - 1] = 0;
1872 			entry.flags = svc->flags;
1873 			entry.timeout = svc->timeout / HZ;
1874 			entry.netmask = svc->netmask;
1875 			entry.num_dests = svc->num_dests;
1876 			__ip_vs_copy_stats(&entry.stats, &svc->stats);
1877 			if (copy_to_user(&uptr->entrytable[count],
1878 					 &entry, sizeof(entry))) {
1879 				ret = -EFAULT;
1880 				goto out;
1881 			}
1882 			count++;
1883 		}
1884 	}
1885  out:
1886 	return ret;
1887 }
1888 
1889 static inline int
__ip_vs_get_dest_entries(const struct ip_vs_get_dests * get,struct ip_vs_get_dests * uptr)1890 __ip_vs_get_dest_entries(const struct ip_vs_get_dests *get,
1891 			 struct ip_vs_get_dests *uptr)
1892 {
1893 	struct ip_vs_service *svc;
1894 	int ret = 0;
1895 
1896 	if (get->fwmark)
1897 		svc = __ip_vs_svc_fwm_get(get->fwmark);
1898 	else
1899 		svc = __ip_vs_service_get(get->protocol,
1900 					  get->addr, get->port);
1901 	if (svc) {
1902 		int count = 0;
1903 		struct ip_vs_dest *dest;
1904 		struct list_head *l, *e;
1905 		struct ip_vs_dest_user entry;
1906 
1907 		l = &svc->destinations;
1908 		for (e=l->next; e!=l; e=e->next) {
1909 			if (count >= get->num_dests)
1910 				break;
1911 			dest = list_entry(e, struct ip_vs_dest, n_list);
1912 			entry.addr = dest->addr;
1913 			entry.port = dest->port;
1914 			entry.flags = atomic_read(&dest->conn_flags);
1915 			entry.weight = atomic_read(&dest->weight);
1916 			entry.activeconns = atomic_read(&dest->activeconns);
1917 			entry.inactconns = atomic_read(&dest->inactconns);
1918 			__ip_vs_copy_stats(&entry.stats, &dest->stats);
1919 			if (copy_to_user(&uptr->entrytable[count],
1920 					 &entry, sizeof(entry))) {
1921 				ret = -EFAULT;
1922 				break;
1923 			}
1924 			count++;
1925 		}
1926 		ip_vs_service_put(svc);
1927 	} else
1928 		ret = -ESRCH;
1929 	return ret;
1930 }
1931 
1932 static inline void
__ip_vs_get_timeouts(struct ip_vs_timeout_user * u)1933 __ip_vs_get_timeouts(struct ip_vs_timeout_user *u)
1934 {
1935 	u->tcp_timeout = vs_timeout_table.timeout[IP_VS_S_ESTABLISHED] / HZ;
1936 	u->tcp_fin_timeout = vs_timeout_table.timeout[IP_VS_S_FIN_WAIT] / HZ;
1937 	u->udp_timeout = vs_timeout_table.timeout[IP_VS_S_UDP] / HZ;
1938 }
1939 
1940 static int
do_ip_vs_get_ctl(struct sock * sk,int cmd,void * user,int * len)1941 do_ip_vs_get_ctl(struct sock *sk, int cmd, void *user, int *len)
1942 {
1943 	int ret = 0;
1944 
1945 	if (!capable(CAP_NET_ADMIN))
1946 		return -EPERM;
1947 
1948 	if (down_interruptible(&__ip_vs_mutex))
1949 		return -ERESTARTSYS;
1950 
1951 	switch (cmd) {
1952 	case IP_VS_SO_GET_VERSION:
1953 	{
1954 		char buf[64];
1955 
1956 		sprintf(buf, "IP Virtual Server version %d.%d.%d (size=%d)",
1957 			NVERSION(IP_VS_VERSION_CODE), IP_VS_CONN_TAB_SIZE);
1958 		if (*len < strlen(buf)+1) {
1959 			ret = -EINVAL;
1960 			goto out;
1961 		}
1962 		if (copy_to_user(user, buf, strlen(buf)+1) != 0) {
1963 			ret = -EFAULT;
1964 			goto out;
1965 		}
1966 		*len = strlen(buf)+1;
1967 	}
1968 	break;
1969 
1970 	case IP_VS_SO_GET_INFO:
1971 	{
1972 		struct ip_vs_getinfo info;
1973 		info.version = IP_VS_VERSION_CODE;
1974 		info.size = IP_VS_CONN_TAB_SIZE;
1975 		info.num_services = ip_vs_num_services;
1976 		if (copy_to_user(user, &info, sizeof(info)) != 0)
1977 			ret = -EFAULT;
1978 	}
1979 	break;
1980 
1981 	case IP_VS_SO_GET_SERVICES:
1982 	{
1983 		struct ip_vs_get_services get;
1984 
1985 		if (*len < sizeof(get)) {
1986 			IP_VS_ERR("length: %u < %Zu\n", *len, sizeof(get));
1987 			ret = -EINVAL;
1988 			goto out;
1989 		}
1990 		if (copy_from_user(&get, user, sizeof(get))) {
1991 			ret = -EFAULT;
1992 			goto out;
1993 		}
1994 		if (*len != (sizeof(get)+sizeof(struct ip_vs_service_user)*get.num_services)) {
1995 			IP_VS_ERR("length: %u != %Zu\n", *len,
1996 				  sizeof(get)+sizeof(struct ip_vs_service_user)*get.num_services);
1997 			ret = -EINVAL;
1998 			goto out;
1999 		}
2000 		ret = __ip_vs_get_service_entries(&get, user);
2001 	}
2002 	break;
2003 
2004 	case IP_VS_SO_GET_SERVICE:
2005 	{
2006 		struct ip_vs_service_user get;
2007 		struct ip_vs_service *svc;
2008 
2009 		if (*len != sizeof(get)) {
2010 			IP_VS_ERR("length: %u != %Zu\n", *len, sizeof(get));
2011 			ret = -EINVAL;
2012 			goto out;
2013 		}
2014 		if (copy_from_user(&get, user, sizeof(get))) {
2015 			ret = -EFAULT;
2016 			goto out;
2017 		}
2018 
2019 		if (get.fwmark)
2020 			svc = __ip_vs_svc_fwm_get(get.fwmark);
2021 		else
2022 			svc = __ip_vs_service_get(get.protocol,
2023 						  get.addr, get.port);
2024 		if (svc) {
2025 			strncpy(get.sched_name, svc->scheduler->name, sizeof(get.sched_name));
2026 			get.sched_name[sizeof(get.sched_name) - 1] = 0;
2027 			get.flags = svc->flags;
2028 			get.timeout = svc->timeout / HZ;
2029 			get.netmask = svc->netmask;
2030 			get.num_dests = svc->num_dests;
2031 			__ip_vs_copy_stats(&get.stats, &svc->stats);
2032 			if (copy_to_user(user, &get, *len) != 0)
2033 				ret = -EFAULT;
2034 			ip_vs_service_put(svc);
2035 		} else
2036 			ret = -ESRCH;
2037 	}
2038 	break;
2039 
2040 	case IP_VS_SO_GET_DESTS:
2041 	{
2042 		struct ip_vs_get_dests get;
2043 
2044 		if (*len < sizeof(get)) {
2045 			IP_VS_ERR("length: %u < %Zu\n", *len, sizeof(get));
2046 			ret = -EINVAL;
2047 			goto out;
2048 		}
2049 		if (copy_from_user(&get, user, sizeof(get))) {
2050 			ret = -EFAULT;
2051 			goto out;
2052 		}
2053 		if (*len != (sizeof(get) +
2054 			     sizeof(struct ip_vs_dest_user)*get.num_dests)) {
2055 			IP_VS_ERR("length: %u != %Zu\n", *len,
2056 				  sizeof(get)+sizeof(struct ip_vs_dest_user)*get.num_dests);
2057 			ret = -EINVAL;
2058 			goto out;
2059 		}
2060 		ret = __ip_vs_get_dest_entries(&get, user);
2061 	}
2062 	break;
2063 
2064 	case IP_VS_SO_GET_TIMEOUTS:
2065 	{
2066 		struct ip_vs_timeout_user u;
2067 
2068 		if (*len < sizeof(u)) {
2069 			IP_VS_ERR("length: %u < %Zu\n", *len, sizeof(u));
2070 			ret = -EINVAL;
2071 			goto out;
2072 		}
2073 		__ip_vs_get_timeouts(&u);
2074 		if (copy_to_user(user, &u, sizeof(u)) != 0)
2075 			ret = -EFAULT;
2076 	}
2077 	break;
2078 
2079 	case IP_VS_SO_GET_DAEMON:
2080 	{
2081 		struct ip_vs_daemon_user u;
2082 
2083 		if (*len < sizeof(u)) {
2084 			IP_VS_ERR("length: %u < %Zu\n", *len, sizeof(u));
2085 			ret = -EINVAL;
2086 			goto out;
2087 		}
2088 		u.state = ip_vs_sync_state;
2089 		if (ip_vs_sync_state & IP_VS_STATE_MASTER) {
2090 			strncpy(u.mcast_master_ifn, ip_vs_mcast_master_ifn, sizeof(u.mcast_master_ifn));
2091 			u.mcast_master_ifn[sizeof(u.mcast_master_ifn) - 1] = 0;
2092 		}
2093 		if (ip_vs_sync_state & IP_VS_STATE_BACKUP) {
2094 			strncpy(u.mcast_backup_ifn, ip_vs_mcast_backup_ifn, sizeof(u.mcast_backup_ifn));
2095 			u.mcast_backup_ifn[sizeof(u.mcast_backup_ifn) - 1] = 0;
2096 		}
2097 		if (copy_to_user(user, &u, sizeof(u)) != 0)
2098 			ret = -EFAULT;
2099 	}
2100 	break;
2101 
2102 	default:
2103 		ret = -EINVAL;
2104 	}
2105 
2106   out:
2107 	up(&__ip_vs_mutex);
2108 	return ret;
2109 }
2110 
2111 
2112 static struct nf_sockopt_ops ip_vs_sockopts = {
2113 	{ NULL, NULL }, PF_INET,
2114 	IP_VS_BASE_CTL, IP_VS_SO_SET_MAX+1, do_ip_vs_set_ctl,
2115 	IP_VS_BASE_CTL, IP_VS_SO_GET_MAX+1, do_ip_vs_get_ctl
2116 };
2117 
2118 
ip_vs_control_init(void)2119 int ip_vs_control_init(void)
2120 {
2121 	int ret;
2122 	int idx;
2123 
2124 	EnterFunction(2);
2125 
2126 	ret = nf_register_sockopt(&ip_vs_sockopts);
2127 	if (ret) {
2128 		IP_VS_ERR("cannot register sockopt.\n");
2129 		return ret;
2130 	}
2131 
2132 	proc_net_create("ip_vs", 0, ip_vs_get_info);
2133 	proc_net_create("ip_vs_stats", 0, ip_vs_stats_get_info);
2134 
2135 	ipv4_vs_table.sysctl_header =
2136 		register_sysctl_table(ipv4_vs_table.root_dir, 0);
2137 	/*
2138 	 * Initilize ip_vs_svc_table, ip_vs_svc_fwm_table, ip_vs_rtable,
2139 	 * ip_vs_schedulers.
2140 	 */
2141 	for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++)  {
2142 		INIT_LIST_HEAD(&ip_vs_svc_table[idx]);
2143 		INIT_LIST_HEAD(&ip_vs_svc_fwm_table[idx]);
2144 	}
2145 	for(idx = 0; idx < IP_VS_RTAB_SIZE; idx++)  {
2146 		INIT_LIST_HEAD(&ip_vs_rtable[idx]);
2147 	}
2148 
2149 	memset(&ip_vs_stats, 0, sizeof(ip_vs_stats));
2150 	ip_vs_stats.lock = SPIN_LOCK_UNLOCKED;
2151 	ip_vs_new_estimator(&ip_vs_stats);
2152 
2153 	/* Hook the defense timer */
2154 	init_timer(&defense_timer);
2155 	defense_timer.function = defense_timer_handler;
2156 	defense_timer.expires = jiffies + DEFENSE_TIMER_PERIOD;
2157 	add_timer(&defense_timer);
2158 
2159 	LeaveFunction(2);
2160 	return 0;
2161 }
2162 
ip_vs_control_cleanup(void)2163 void ip_vs_control_cleanup(void)
2164 {
2165 	EnterFunction(2);
2166 	ip_vs_trash_cleanup();
2167 	del_timer_sync(&defense_timer);
2168 	ip_vs_kill_estimator(&ip_vs_stats);
2169 	unregister_sysctl_table(ipv4_vs_table.sysctl_header);
2170 	proc_net_remove("ip_vs_stats");
2171 	proc_net_remove("ip_vs");
2172 	nf_unregister_sockopt(&ip_vs_sockopts);
2173 	LeaveFunction(2);
2174 }
2175