1 /*
2 * IPVS An implementation of the IP virtual server support for the
3 * LINUX operating system. IPVS is now implemented as a module
4 * over the NetFilter framework. IPVS can be used to build a
5 * high-performance and highly available server based on a
6 * cluster of servers.
7 *
8 * Version: $Id: ip_vs_ctl.c,v 1.30.2.3 2003/07/29 14:37:12 wensong Exp $
9 *
10 * Authors: Wensong Zhang <wensong@linuxvirtualserver.org>
11 * Peter Kese <peter.kese@ijs.si>
12 * Julian Anastasov <ja@ssi.bg>
13 *
14 * This program is free software; you can redistribute it and/or
15 * modify it under the terms of the GNU General Public License
16 * as published by the Free Software Foundation; either version
17 * 2 of the License, or (at your option) any later version.
18 *
19 * Changes:
20 *
21 */
22
23 #include <linux/module.h>
24 #include <linux/init.h>
25 #include <linux/types.h>
26 #include <linux/fs.h>
27 #include <linux/sysctl.h>
28 #include <linux/proc_fs.h>
29 #include <linux/timer.h>
30 #include <linux/swap.h>
31 #include <linux/proc_fs.h>
32
33 #include <linux/netfilter.h>
34 #include <linux/netfilter_ipv4.h>
35
36 #include <net/ip.h>
37 #include <net/sock.h>
38
39 #include <asm/uaccess.h>
40
41 #include <net/ip_vs.h>
42
43 /* semaphore for IPVS sockopts. And, [gs]etsockopt may sleep. */
44 static DECLARE_MUTEX(__ip_vs_mutex);
45
46 /* lock for service table */
47 rwlock_t __ip_vs_svc_lock = RW_LOCK_UNLOCKED;
48
49 /* lock for table with the real services */
50 static rwlock_t __ip_vs_rs_lock = RW_LOCK_UNLOCKED;
51
52 /* lock for state and timeout tables */
53 static rwlock_t __ip_vs_securetcp_lock = RW_LOCK_UNLOCKED;
54
55 /* lock for drop entry handling */
56 static spinlock_t __ip_vs_dropentry_lock = SPIN_LOCK_UNLOCKED;
57
58 /* lock for drop packet handling */
59 static spinlock_t __ip_vs_droppacket_lock = SPIN_LOCK_UNLOCKED;
60
61 /* 1/rate drop and drop-entry variables */
62 int ip_vs_drop_rate = 0;
63 int ip_vs_drop_counter = 0;
64 atomic_t ip_vs_dropentry = ATOMIC_INIT(0);
65
66 /* number of virtual services */
67 static int ip_vs_num_services = 0;
68
69 /* sysctl variables */
70 static int sysctl_ip_vs_drop_entry = 0;
71 static int sysctl_ip_vs_drop_packet = 0;
72 static int sysctl_ip_vs_secure_tcp = 0;
73 static int sysctl_ip_vs_amemthresh = 2048;
74 static int sysctl_ip_vs_am_droprate = 10;
75 int sysctl_ip_vs_cache_bypass = 0;
76 int sysctl_ip_vs_expire_nodest_conn = 0;
77 int sysctl_ip_vs_expire_quiescent_template = 0;
78 int sysctl_ip_vs_sync_threshold = 3;
79 int sysctl_ip_vs_nat_icmp_send = 0;
80
81 #ifdef CONFIG_IP_VS_DEBUG
82 static int sysctl_ip_vs_debug_level = 0;
83
ip_vs_get_debug_level(void)84 int ip_vs_get_debug_level(void)
85 {
86 return sysctl_ip_vs_debug_level;
87 }
88 #endif
89
90 /*
91 * update_defense_level is called from timer bh and from sysctl.
92 */
update_defense_level(void)93 static void update_defense_level(void)
94 {
95 struct sysinfo i;
96 int availmem;
97 int nomem;
98
99 /* we only count free and buffered memory (in pages) */
100 si_meminfo(&i);
101 availmem = i.freeram + i.bufferram;
102
103 nomem = (availmem < sysctl_ip_vs_amemthresh);
104
105 /* drop_entry */
106 spin_lock(&__ip_vs_dropentry_lock);
107 switch (sysctl_ip_vs_drop_entry) {
108 case 0:
109 atomic_set(&ip_vs_dropentry, 0);
110 break;
111 case 1:
112 if (nomem) {
113 atomic_set(&ip_vs_dropentry, 1);
114 sysctl_ip_vs_drop_entry = 2;
115 } else {
116 atomic_set(&ip_vs_dropentry, 0);
117 }
118 break;
119 case 2:
120 if (nomem) {
121 atomic_set(&ip_vs_dropentry, 1);
122 } else {
123 atomic_set(&ip_vs_dropentry, 0);
124 sysctl_ip_vs_drop_entry = 1;
125 };
126 break;
127 case 3:
128 atomic_set(&ip_vs_dropentry, 1);
129 break;
130 }
131 spin_unlock(&__ip_vs_dropentry_lock);
132
133 /* drop_packet */
134 spin_lock(&__ip_vs_droppacket_lock);
135 switch (sysctl_ip_vs_drop_packet) {
136 case 0:
137 ip_vs_drop_rate = 0;
138 break;
139 case 1:
140 if (nomem) {
141 ip_vs_drop_rate = ip_vs_drop_counter
142 = sysctl_ip_vs_amemthresh /
143 (sysctl_ip_vs_amemthresh - availmem);
144 sysctl_ip_vs_drop_packet = 2;
145 } else {
146 ip_vs_drop_rate = 0;
147 }
148 break;
149 case 2:
150 if (nomem) {
151 ip_vs_drop_rate = ip_vs_drop_counter
152 = sysctl_ip_vs_amemthresh /
153 (sysctl_ip_vs_amemthresh - availmem);
154 } else {
155 ip_vs_drop_rate = 0;
156 sysctl_ip_vs_drop_packet = 1;
157 }
158 break;
159 case 3:
160 ip_vs_drop_rate = sysctl_ip_vs_am_droprate;
161 break;
162 }
163 spin_unlock(&__ip_vs_droppacket_lock);
164
165 /* secure_tcp */
166 write_lock(&__ip_vs_securetcp_lock);
167 switch (sysctl_ip_vs_secure_tcp) {
168 case 0:
169 ip_vs_secure_tcp_set(0);
170 break;
171 case 1:
172 if (nomem) {
173 ip_vs_secure_tcp_set(1);
174 sysctl_ip_vs_secure_tcp = 2;
175 } else {
176 ip_vs_secure_tcp_set(0);
177 }
178 break;
179 case 2:
180 if (nomem) {
181 ip_vs_secure_tcp_set(1);
182 } else {
183 ip_vs_secure_tcp_set(0);
184 sysctl_ip_vs_secure_tcp = 1;
185 }
186 break;
187 case 3:
188 ip_vs_secure_tcp_set(1);
189 break;
190 }
191 write_unlock(&__ip_vs_securetcp_lock);
192 }
193
194
195 /*
196 * Timer for checking the defense
197 */
198 static struct timer_list defense_timer;
199 #define DEFENSE_TIMER_PERIOD 1*HZ
200
defense_timer_handler(unsigned long data)201 static void defense_timer_handler(unsigned long data)
202 {
203 update_defense_level();
204 if (atomic_read(&ip_vs_dropentry))
205 ip_vs_random_dropentry();
206
207 mod_timer(&defense_timer, jiffies + DEFENSE_TIMER_PERIOD);
208 }
209
210
211 /*
212 * Hash table: for virtual service lookups
213 */
214 #define IP_VS_SVC_TAB_BITS 8
215 #define IP_VS_SVC_TAB_SIZE (1 << IP_VS_SVC_TAB_BITS)
216 #define IP_VS_SVC_TAB_MASK (IP_VS_SVC_TAB_SIZE - 1)
217
218 /* the service table hashed by <protocol, addr, port> */
219 static struct list_head ip_vs_svc_table[IP_VS_SVC_TAB_SIZE];
220 /* the service table hashed by fwmark */
221 static struct list_head ip_vs_svc_fwm_table[IP_VS_SVC_TAB_SIZE];
222
223 /*
224 * Hash table: for real service lookups
225 */
226 #define IP_VS_RTAB_BITS 4
227 #define IP_VS_RTAB_SIZE (1 << IP_VS_RTAB_BITS)
228 #define IP_VS_RTAB_MASK (IP_VS_RTAB_SIZE - 1)
229
230 static struct list_head ip_vs_rtable[IP_VS_RTAB_SIZE];
231
232 /*
233 * Trash for destinations
234 */
235 static LIST_HEAD(ip_vs_dest_trash);
236
237 /*
238 * FTP & NULL virtual service counters
239 */
240 static atomic_t ip_vs_ftpsvc_counter = ATOMIC_INIT(0);
241 static atomic_t ip_vs_nullsvc_counter = ATOMIC_INIT(0);
242
243
244 /*
245 * Returns hash value for virtual service
246 */
247 static __inline__ unsigned
ip_vs_svc_hashkey(unsigned proto,__u32 addr,__u16 port)248 ip_vs_svc_hashkey(unsigned proto, __u32 addr, __u16 port)
249 {
250 register unsigned porth = ntohs(port);
251
252 return (proto^ntohl(addr)^(porth>>IP_VS_SVC_TAB_BITS)^porth)
253 & IP_VS_SVC_TAB_MASK;
254 }
255
256 /*
257 * Returns hash value of fwmark for virtual service lookup
258 */
ip_vs_svc_fwm_hashkey(__u32 fwmark)259 static __inline__ unsigned ip_vs_svc_fwm_hashkey(__u32 fwmark)
260 {
261 return fwmark & IP_VS_SVC_TAB_MASK;
262 }
263
264 /*
265 * Hashes ip_vs_service in the ip_vs_svc_table by <proto,addr,port>
266 * or in the ip_vs_svc_fwm_table by fwmark.
267 * Should be called with locked tables.
268 * Returns bool success.
269 */
ip_vs_svc_hash(struct ip_vs_service * svc)270 static int ip_vs_svc_hash(struct ip_vs_service *svc)
271 {
272 unsigned hash;
273
274 if (svc->flags & IP_VS_SVC_F_HASHED) {
275 IP_VS_ERR("ip_vs_svc_hash(): request for already hashed, "
276 "called from %p\n", __builtin_return_address(0));
277 return 0;
278 }
279
280 if (svc->fwmark == 0) {
281 /*
282 * Hash it by <protocol,addr,port> in ip_vs_svc_table
283 */
284 hash = ip_vs_svc_hashkey(svc->protocol, svc->addr, svc->port);
285 list_add(&svc->s_list, &ip_vs_svc_table[hash]);
286 } else {
287 /*
288 * Hash it by fwmark in ip_vs_svc_fwm_table
289 */
290 hash = ip_vs_svc_fwm_hashkey(svc->fwmark);
291 list_add(&svc->f_list, &ip_vs_svc_fwm_table[hash]);
292 }
293
294 svc->flags |= IP_VS_SVC_F_HASHED;
295 /* increase its refcnt because it is referenced by the svc table */
296 atomic_inc(&svc->refcnt);
297 return 1;
298 }
299
300
301 /*
302 * Unhashes ip_vs_service from ip_vs_svc_table/ip_vs_svc_fwm_table.
303 * Should be called with locked tables.
304 * Returns bool success.
305 */
ip_vs_svc_unhash(struct ip_vs_service * svc)306 static int ip_vs_svc_unhash(struct ip_vs_service *svc)
307 {
308 if (!(svc->flags & IP_VS_SVC_F_HASHED)) {
309 IP_VS_ERR("ip_vs_svc_unhash(): request for unhash flagged, "
310 "called from %p\n", __builtin_return_address(0));
311 return 0;
312 }
313
314 if (svc->fwmark == 0) {
315 /*
316 * Remove it from the ip_vs_svc_table table.
317 */
318 list_del(&svc->s_list);
319 } else {
320 /*
321 * Remove it from the ip_vs_svc_fwm_table table.
322 */
323 list_del(&svc->f_list);
324 }
325
326 svc->flags &= ~IP_VS_SVC_F_HASHED;
327 atomic_dec(&svc->refcnt);
328 return 1;
329 }
330
331
332 /*
333 * Get service by {proto,addr,port} in the service table.
334 */
335 static __inline__ struct ip_vs_service *
__ip_vs_service_get(__u16 protocol,__u32 vaddr,__u16 vport)336 __ip_vs_service_get(__u16 protocol, __u32 vaddr, __u16 vport)
337 {
338 unsigned hash;
339 struct ip_vs_service *svc;
340 struct list_head *l,*e;
341
342 /*
343 * Check for "full" addressed entries
344 */
345 hash = ip_vs_svc_hashkey(protocol, vaddr, vport);
346
347 l = &ip_vs_svc_table[hash];
348 for (e=l->next; e!=l; e=e->next) {
349 svc = list_entry(e, struct ip_vs_service, s_list);
350 if ((svc->addr == vaddr)
351 && (svc->port == vport)
352 && (svc->protocol == protocol)) {
353 /* HIT */
354 atomic_inc(&svc->usecnt);
355 return svc;
356 }
357 }
358
359 return NULL;
360 }
361
362
363 /*
364 * Get service by {fwmark} in the service table.
365 */
__ip_vs_svc_fwm_get(__u32 fwmark)366 static __inline__ struct ip_vs_service *__ip_vs_svc_fwm_get(__u32 fwmark)
367 {
368 unsigned hash;
369 struct ip_vs_service *svc;
370 struct list_head *l,*e;
371
372 /*
373 * Check for "full" addressed entries
374 */
375 hash = ip_vs_svc_fwm_hashkey(fwmark);
376
377 l = &ip_vs_svc_fwm_table[hash];
378 for (e=l->next; e!=l; e=e->next) {
379 svc = list_entry(e, struct ip_vs_service, f_list);
380 if (svc->fwmark == fwmark) {
381 /* HIT */
382 atomic_inc(&svc->usecnt);
383 return svc;
384 }
385 }
386
387 return NULL;
388 }
389
390 struct ip_vs_service *
ip_vs_service_get(__u32 fwmark,__u16 protocol,__u32 vaddr,__u16 vport)391 ip_vs_service_get(__u32 fwmark, __u16 protocol, __u32 vaddr, __u16 vport)
392 {
393 struct ip_vs_service *svc;
394
395 read_lock(&__ip_vs_svc_lock);
396
397 /*
398 * Check the table hashed by fwmark first
399 */
400 if (fwmark && (svc = __ip_vs_svc_fwm_get(fwmark)))
401 goto out;
402
403 /*
404 * Check the table hashed by <protocol,addr,port>
405 * for "full" addressed entries
406 */
407 svc = __ip_vs_service_get(protocol, vaddr, vport);
408
409 if (svc == NULL
410 && protocol == IPPROTO_TCP
411 && atomic_read(&ip_vs_ftpsvc_counter)
412 && (vport == FTPDATA || ntohs(vport) >= PROT_SOCK)) {
413 /*
414 * Check if ftp service entry exists, the packet
415 * might belong to FTP data connections.
416 */
417 svc = __ip_vs_service_get(protocol, vaddr, FTPPORT);
418 }
419
420 if (svc == NULL
421 && atomic_read(&ip_vs_nullsvc_counter)) {
422 /*
423 * Check if the catch-all port (port zero) exists
424 */
425 svc = __ip_vs_service_get(protocol, vaddr, 0);
426 }
427
428 out:
429 read_unlock(&__ip_vs_svc_lock);
430
431 IP_VS_DBG(6, "lookup service: fwm %u %s %u.%u.%u.%u:%u %s\n",
432 fwmark, ip_vs_proto_name(protocol),
433 NIPQUAD(vaddr), ntohs(vport),
434 svc?"hit":"not hit");
435
436 return svc;
437 }
438
439
440 static inline void
__ip_vs_bind_svc(struct ip_vs_dest * dest,struct ip_vs_service * svc)441 __ip_vs_bind_svc(struct ip_vs_dest *dest, struct ip_vs_service *svc)
442 {
443 atomic_inc(&svc->refcnt);
444 dest->svc = svc;
445 }
446
447 static inline void
__ip_vs_unbind_svc(struct ip_vs_dest * dest)448 __ip_vs_unbind_svc(struct ip_vs_dest *dest)
449 {
450 struct ip_vs_service *svc = dest->svc;
451
452 dest->svc = NULL;
453 if (atomic_dec_and_test(&svc->refcnt))
454 kfree(svc);
455 }
456
457 /*
458 * Returns hash value for real service
459 */
ip_vs_rs_hashkey(__u32 addr,__u16 port)460 static __inline__ unsigned ip_vs_rs_hashkey(__u32 addr, __u16 port)
461 {
462 register unsigned porth = ntohs(port);
463
464 return (ntohl(addr)^(porth>>IP_VS_RTAB_BITS)^porth)
465 & IP_VS_RTAB_MASK;
466 }
467
468 /*
469 * Hashes ip_vs_dest in ip_vs_rtable by proto,addr,port.
470 * should be called with locked tables.
471 * returns bool success.
472 */
ip_vs_rs_hash(struct ip_vs_dest * dest)473 static int ip_vs_rs_hash(struct ip_vs_dest *dest)
474 {
475 unsigned hash;
476
477 if (!list_empty(&dest->d_list)) {
478 return 0;
479 }
480
481 /*
482 * Hash by proto,addr,port,
483 * which are the parameters of the real service.
484 */
485 hash = ip_vs_rs_hashkey(dest->addr, dest->port);
486 list_add(&dest->d_list, &ip_vs_rtable[hash]);
487
488 return 1;
489 }
490
491 /*
492 * UNhashes ip_vs_dest from ip_vs_rtable.
493 * should be called with locked tables.
494 * returns bool success.
495 */
ip_vs_rs_unhash(struct ip_vs_dest * dest)496 static int ip_vs_rs_unhash(struct ip_vs_dest *dest)
497 {
498 /*
499 * Remove it from the ip_vs_rtable table.
500 */
501 if (!list_empty(&dest->d_list)) {
502 list_del(&dest->d_list);
503 INIT_LIST_HEAD(&dest->d_list);
504 }
505
506 return 1;
507 }
508
509 /*
510 * Lookup real service by {proto,addr,port} in the real service table.
511 */
512 struct ip_vs_dest *
ip_vs_lookup_real_service(__u16 protocol,__u32 daddr,__u16 dport)513 ip_vs_lookup_real_service(__u16 protocol, __u32 daddr, __u16 dport)
514 {
515 unsigned hash;
516 struct ip_vs_dest *dest;
517 struct list_head *l,*e;
518
519 /*
520 * Check for "full" addressed entries
521 * Return the first found entry
522 */
523 hash = ip_vs_rs_hashkey(daddr, dport);
524
525 l = &ip_vs_rtable[hash];
526
527 read_lock(&__ip_vs_rs_lock);
528 for (e=l->next; e!=l; e=e->next) {
529 dest = list_entry(e, struct ip_vs_dest, d_list);
530 if ((dest->addr == daddr)
531 && (dest->port == dport)
532 && ((dest->protocol == protocol) ||
533 dest->vfwmark)) {
534 /* HIT */
535 read_unlock(&__ip_vs_rs_lock);
536 return dest;
537 }
538 }
539 read_unlock(&__ip_vs_rs_lock);
540
541 return NULL;
542 }
543
544 /*
545 * Lookup destination by {addr,port} in the given service
546 */
547 static struct ip_vs_dest *
ip_vs_lookup_dest(struct ip_vs_service * svc,__u32 daddr,__u16 dport)548 ip_vs_lookup_dest(struct ip_vs_service *svc, __u32 daddr, __u16 dport)
549 {
550 struct ip_vs_dest *dest;
551 struct list_head *l, *e;
552
553 /*
554 * Find the destination for the given service
555 */
556 l = &svc->destinations;
557 for (e=l->next; e!=l; e=e->next) {
558 dest = list_entry(e, struct ip_vs_dest, n_list);
559 if ((dest->addr == daddr) && (dest->port == dport)) {
560 /* HIT */
561 return dest;
562 }
563 }
564
565 return NULL;
566 }
567
568
569 /*
570 * Lookup dest by {svc,addr,port} in the destination trash.
571 * The destination trash is used to hold the destinations that are removed
572 * from the service table but are still referenced by some conn entries.
573 * The reason to add the destination trash is when the dest is temporary
574 * down (either by administrator or by monitor program), the dest can be
575 * picked back from the trash, the remaining connections to the dest can
576 * continue, and the counting information of the dest is also useful for
577 * scheduling.
578 */
579 static struct ip_vs_dest *
ip_vs_trash_get_dest(struct ip_vs_service * svc,__u32 daddr,__u16 dport)580 ip_vs_trash_get_dest(struct ip_vs_service *svc, __u32 daddr, __u16 dport)
581 {
582 struct ip_vs_dest *dest;
583 struct list_head *l, *e;
584
585 /*
586 * Find the destination in trash
587 */
588 l = &ip_vs_dest_trash;
589
590 for (e=l->next; e!=l; e=e->next) {
591 dest = list_entry(e, struct ip_vs_dest, n_list);
592 IP_VS_DBG(3, "Destination %u/%u.%u.%u.%u:%u still in trash, "
593 "refcnt=%d\n",
594 dest->vfwmark,
595 NIPQUAD(dest->addr), ntohs(dest->port),
596 atomic_read(&dest->refcnt));
597 if (dest->addr == daddr &&
598 dest->port == dport &&
599 dest->vfwmark == svc->fwmark &&
600 dest->protocol == svc->protocol &&
601 (svc->fwmark ||
602 (dest->vaddr == svc->addr &&
603 dest->vport == svc->port))) {
604 /* HIT */
605 return dest;
606 }
607
608 /*
609 * Try to purge the destination from trash if not referenced
610 */
611 if (atomic_read(&dest->refcnt) == 1) {
612 IP_VS_DBG(3, "Removing destination %u/%u.%u.%u.%u:%u "
613 "from trash\n",
614 dest->vfwmark,
615 NIPQUAD(dest->addr), ntohs(dest->port));
616 e = e->prev;
617 list_del(&dest->n_list);
618 __ip_vs_dst_reset(dest);
619 __ip_vs_unbind_svc(dest);
620 kfree(dest);
621 }
622 }
623
624 return NULL;
625 }
626
627
628 /*
629 * Clean up all the destinations in the trash
630 * Called by the ip_vs_control_cleanup()
631 *
632 * When the ip_vs_control_clearup is activated by ipvs module exit,
633 * the service tables must have been flushed and all the connections
634 * are expired, and the refcnt of each destination in the trash must
635 * be 1, so we simply release them here.
636 */
ip_vs_trash_cleanup(void)637 static void ip_vs_trash_cleanup(void)
638 {
639 struct ip_vs_dest *dest;
640 struct list_head *l;
641
642 l = &ip_vs_dest_trash;
643
644 while (l->next != l) {
645 dest = list_entry(l->next, struct ip_vs_dest, n_list);
646 list_del(&dest->n_list);
647 __ip_vs_dst_reset(dest);
648 __ip_vs_unbind_svc(dest);
649 kfree(dest);
650 }
651 }
652
653
654 static inline void
__ip_vs_zero_stats(struct ip_vs_stats * stats)655 __ip_vs_zero_stats(struct ip_vs_stats *stats)
656 {
657 spin_lock_bh(&stats->lock);
658 memset(stats, 0, (char *)&stats->lock - (char *)stats);
659 spin_unlock_bh(&stats->lock);
660 ip_vs_zero_estimator(stats);
661 }
662
663 /*
664 * Update a destination in the given service
665 */
__ip_vs_update_dest(struct ip_vs_service * svc,struct ip_vs_dest * dest,struct ip_vs_rule_user * ur)666 static void __ip_vs_update_dest(struct ip_vs_service *svc,
667 struct ip_vs_dest *dest,
668 struct ip_vs_rule_user *ur)
669 {
670 int conn_flags;
671
672 /*
673 * Set the weight and the flags
674 */
675 atomic_set(&dest->weight, ur->weight);
676
677 conn_flags = ur->conn_flags | IP_VS_CONN_F_INACTIVE;
678
679 /*
680 * Check if local node and update the flags
681 */
682 if (inet_addr_type(ur->daddr) == RTN_LOCAL) {
683 conn_flags = (conn_flags & ~IP_VS_CONN_F_FWD_MASK)
684 | IP_VS_CONN_F_LOCALNODE;
685 }
686
687 /*
688 * Set the IP_VS_CONN_F_NOOUTPUT flag if not masquerading
689 */
690 if ((conn_flags & IP_VS_CONN_F_FWD_MASK) != 0) {
691 conn_flags |= IP_VS_CONN_F_NOOUTPUT;
692 } else {
693 /*
694 * Put the real service in ip_vs_rtable if not present.
695 * For now only for NAT!
696 */
697 write_lock_bh(&__ip_vs_rs_lock);
698 ip_vs_rs_hash(dest);
699 write_unlock_bh(&__ip_vs_rs_lock);
700 }
701 atomic_set(&dest->conn_flags, conn_flags);
702
703 /* bind the service */
704 if (!dest->svc) {
705 __ip_vs_bind_svc(dest, svc);
706 } else {
707 if (dest->svc != svc) {
708 __ip_vs_unbind_svc(dest);
709 __ip_vs_zero_stats(&dest->stats);
710 __ip_vs_bind_svc(dest, svc);
711 }
712 }
713
714 /* set the dest status flags */
715 dest->flags |= IP_VS_DEST_F_AVAILABLE;
716 }
717
718
719 /*
720 * Create a destination for the given service
721 */
722 static int
ip_vs_new_dest(struct ip_vs_service * svc,struct ip_vs_rule_user * ur,struct ip_vs_dest ** destp)723 ip_vs_new_dest(struct ip_vs_service *svc, struct ip_vs_rule_user *ur,
724 struct ip_vs_dest **destp)
725 {
726 struct ip_vs_dest *dest;
727 unsigned atype;
728
729 EnterFunction(2);
730
731 atype = inet_addr_type(ur->daddr);
732 if (atype != RTN_LOCAL && atype != RTN_UNICAST)
733 return -EINVAL;
734
735 *destp = dest = (struct ip_vs_dest*)
736 kmalloc(sizeof(struct ip_vs_dest), GFP_ATOMIC);
737 if (dest == NULL) {
738 IP_VS_ERR("ip_vs_new_dest: kmalloc failed.\n");
739 return -ENOMEM;
740 }
741 memset(dest, 0, sizeof(struct ip_vs_dest));
742
743 dest->protocol = svc->protocol;
744 dest->vaddr = svc->addr;
745 dest->vport = svc->port;
746 dest->vfwmark = svc->fwmark;
747 dest->addr = ur->daddr;
748 dest->port = ur->dport;
749
750 atomic_set(&dest->activeconns, 0);
751 atomic_set(&dest->inactconns, 0);
752 atomic_set(&dest->refcnt, 0);
753
754 INIT_LIST_HEAD(&dest->d_list);
755 dest->dst_lock = SPIN_LOCK_UNLOCKED;
756 dest->stats.lock = SPIN_LOCK_UNLOCKED;
757 __ip_vs_update_dest(svc, dest, ur);
758 ip_vs_new_estimator(&dest->stats);
759
760 LeaveFunction(2);
761 return 0;
762 }
763
764
765 /*
766 * Add a destination into an existing service
767 */
ip_vs_add_dest(struct ip_vs_service * svc,struct ip_vs_rule_user * ur)768 static int ip_vs_add_dest(struct ip_vs_service *svc,
769 struct ip_vs_rule_user *ur)
770 {
771 struct ip_vs_dest *dest;
772 __u32 daddr = ur->daddr;
773 __u16 dport = ur->dport;
774 int ret;
775
776 EnterFunction(2);
777
778 if (ur->weight < 0) {
779 IP_VS_ERR("ip_vs_add_dest(): server weight less than zero\n");
780 return -ERANGE;
781 }
782
783 /*
784 * Check if the dest already exists in the list
785 */
786 dest = ip_vs_lookup_dest(svc, daddr, dport);
787 if (dest != NULL) {
788 IP_VS_DBG(1, "ip_vs_add_dest(): dest already exists\n");
789 return -EEXIST;
790 }
791
792 /*
793 * Check if the dest already exists in the trash and
794 * is from the same service
795 */
796 dest = ip_vs_trash_get_dest(svc, daddr, dport);
797 if (dest != NULL) {
798 IP_VS_DBG(3, "Get destination %u.%u.%u.%u:%u from trash, "
799 "refcnt=%d, service %u/%u.%u.%u.%u:%u\n",
800 NIPQUAD(daddr), ntohs(dport),
801 atomic_read(&dest->refcnt),
802 dest->vfwmark,
803 NIPQUAD(dest->vaddr),
804 ntohs(dest->vport));
805 __ip_vs_update_dest(svc, dest, ur);
806
807 /*
808 * Get the destination from the trash
809 */
810 list_del(&dest->n_list);
811
812 ip_vs_new_estimator(&dest->stats);
813
814 write_lock_bh(&__ip_vs_svc_lock);
815
816 /*
817 * Wait until all other svc users go away.
818 */
819 while (atomic_read(&svc->usecnt) > 1) {};
820
821 list_add(&dest->n_list, &svc->destinations);
822 svc->num_dests++;
823
824 /* call the update_service function of its scheduler */
825 svc->scheduler->update_service(svc);
826
827 write_unlock_bh(&__ip_vs_svc_lock);
828 return 0;
829 }
830
831 /*
832 * Allocate and initialize the dest structure
833 */
834 ret = ip_vs_new_dest(svc, ur, &dest);
835 if (ret) {
836 return ret;
837 }
838
839 /*
840 * Add the dest entry into the list
841 */
842 atomic_inc(&dest->refcnt);
843
844 write_lock_bh(&__ip_vs_svc_lock);
845
846 /*
847 * Wait until all other svc users go away.
848 */
849 while (atomic_read(&svc->usecnt) > 1) {};
850
851 list_add(&dest->n_list, &svc->destinations);
852 svc->num_dests++;
853
854 /* call the update_service function of its scheduler */
855 svc->scheduler->update_service(svc);
856
857 write_unlock_bh(&__ip_vs_svc_lock);
858
859 LeaveFunction(2);
860
861 return 0;
862 }
863
864
865 /*
866 * Edit a destination in the given service
867 */
ip_vs_edit_dest(struct ip_vs_service * svc,struct ip_vs_rule_user * ur)868 static int ip_vs_edit_dest(struct ip_vs_service *svc,
869 struct ip_vs_rule_user *ur)
870 {
871 struct ip_vs_dest *dest;
872 __u32 daddr = ur->daddr;
873 __u16 dport = ur->dport;
874
875 EnterFunction(2);
876
877 if (ur->weight < 0) {
878 IP_VS_ERR("ip_vs_edit_dest(): server weight less than zero\n");
879 return -ERANGE;
880 }
881
882 /*
883 * Lookup the destination list
884 */
885 dest = ip_vs_lookup_dest(svc, daddr, dport);
886 if (dest == NULL) {
887 IP_VS_DBG(1, "ip_vs_edit_dest(): dest doesn't exist\n");
888 return -ENOENT;
889 }
890
891 __ip_vs_update_dest(svc, dest, ur);
892
893 write_lock_bh(&__ip_vs_svc_lock);
894
895 /* Wait until all other svc users go away */
896 while (atomic_read(&svc->usecnt) > 1) {};
897
898 /* call the update_service, because server weight may be changed */
899 svc->scheduler->update_service(svc);
900
901 write_unlock_bh(&__ip_vs_svc_lock);
902
903 LeaveFunction(2);
904
905 return 0;
906 }
907
908
909 /*
910 * Delete a destination (must be already unlinked from the service)
911 */
__ip_vs_del_dest(struct ip_vs_dest * dest)912 static void __ip_vs_del_dest(struct ip_vs_dest *dest)
913 {
914 ip_vs_kill_estimator(&dest->stats);
915
916 /*
917 * Remove it from the d-linked list with the real services.
918 */
919 write_lock_bh(&__ip_vs_rs_lock);
920 ip_vs_rs_unhash(dest);
921 write_unlock_bh(&__ip_vs_rs_lock);
922
923 /*
924 * Decrease the refcnt of the dest, and free the dest
925 * if nobody refers to it (refcnt=0). Otherwise, throw
926 * the destination into the trash.
927 */
928 if (atomic_dec_and_test(&dest->refcnt)) {
929 __ip_vs_dst_reset(dest);
930 /* simply decrease svc->refcnt here, let the caller check
931 and release the service if nobody refers to it.
932 Only user context can release destination and service,
933 and only one user context can update virtual service at a
934 time, so the operation here is OK */
935 atomic_dec(&dest->svc->refcnt);
936 kfree(dest);
937 } else {
938 IP_VS_DBG(3, "Moving dest %u.%u.%u.%u:%u into trash, refcnt=%d\n",
939 NIPQUAD(dest->addr), ntohs(dest->port),
940 atomic_read(&dest->refcnt));
941 list_add(&dest->n_list, &ip_vs_dest_trash);
942 atomic_inc(&dest->refcnt);
943 }
944 }
945
946
947 /*
948 * Unlink a destination from the given service
949 */
__ip_vs_unlink_dest(struct ip_vs_service * svc,struct ip_vs_dest * dest,int svcupd)950 static void __ip_vs_unlink_dest(struct ip_vs_service *svc,
951 struct ip_vs_dest *dest,
952 int svcupd)
953 {
954 dest->flags &= ~IP_VS_DEST_F_AVAILABLE;
955
956 /*
957 * Remove it from the d-linked destination list.
958 */
959 list_del(&dest->n_list);
960 svc->num_dests--;
961 if (svcupd) {
962 /*
963 * Call the update_service function of its scheduler
964 */
965 svc->scheduler->update_service(svc);
966 }
967 }
968
969
970 /*
971 * Delete a destination server in the given service
972 */
ip_vs_del_dest(struct ip_vs_service * svc,struct ip_vs_rule_user * ur)973 static int ip_vs_del_dest(struct ip_vs_service *svc,struct ip_vs_rule_user *ur)
974 {
975 struct ip_vs_dest *dest;
976 __u32 daddr = ur->daddr;
977 __u16 dport = ur->dport;
978
979 EnterFunction(2);
980
981 dest = ip_vs_lookup_dest(svc, daddr, dport);
982 if (dest == NULL) {
983 IP_VS_DBG(1, "ip_vs_del_dest(): destination not found!\n");
984 return -ENOENT;
985 }
986
987 write_lock_bh(&__ip_vs_svc_lock);
988
989 /*
990 * Wait until all other svc users go away.
991 */
992 while (atomic_read(&svc->usecnt) > 1) {};
993
994 /*
995 * Unlink dest from the service
996 */
997 __ip_vs_unlink_dest(svc, dest, 1);
998
999 write_unlock_bh(&__ip_vs_svc_lock);
1000
1001 /*
1002 * Delete the destination
1003 */
1004 __ip_vs_del_dest(dest);
1005
1006 LeaveFunction(2);
1007
1008 return 0;
1009 }
1010
1011
1012 /*
1013 * Add a service into the service hash table
1014 */
1015 static int
ip_vs_add_service(struct ip_vs_rule_user * ur,struct ip_vs_service ** svc_p)1016 ip_vs_add_service(struct ip_vs_rule_user *ur, struct ip_vs_service **svc_p)
1017 {
1018 int ret = 0;
1019 struct ip_vs_scheduler *sched;
1020 struct ip_vs_service *svc = NULL;
1021
1022 MOD_INC_USE_COUNT;
1023
1024 /*
1025 * Lookup the scheduler, by 'ur->sched_name'
1026 */
1027 sched = ip_vs_scheduler_get(ur->sched_name);
1028 if (sched == NULL) {
1029 IP_VS_INFO("Scheduler module ip_vs_%s.o not found\n",
1030 ur->sched_name);
1031 ret = -ENOENT;
1032 goto out_mod_dec;
1033 }
1034
1035 svc = (struct ip_vs_service*)
1036 kmalloc(sizeof(struct ip_vs_service), GFP_ATOMIC);
1037 if (svc == NULL) {
1038 IP_VS_DBG(1, "ip_vs_add_service: kmalloc failed.\n");
1039 ret = -ENOMEM;
1040 goto out_err;
1041 }
1042 memset(svc, 0, sizeof(struct ip_vs_service));
1043
1044 svc->protocol = ur->protocol;
1045 svc->addr = ur->vaddr;
1046 svc->port = ur->vport;
1047 svc->fwmark = ur->vfwmark;
1048 svc->flags = ur->vs_flags;
1049 svc->timeout = ur->timeout * HZ;
1050 svc->netmask = ur->netmask;
1051
1052 INIT_LIST_HEAD(&svc->destinations);
1053 svc->sched_lock = RW_LOCK_UNLOCKED;
1054 svc->stats.lock = SPIN_LOCK_UNLOCKED;
1055
1056 /*
1057 * Bind the scheduler
1058 */
1059 ret = ip_vs_bind_scheduler(svc, sched);
1060 if (ret) {
1061 goto out_err;
1062 }
1063
1064 /*
1065 * Update the virtual service counters
1066 */
1067 if (svc->port == FTPPORT)
1068 atomic_inc(&ip_vs_ftpsvc_counter);
1069 else if (svc->port == 0)
1070 atomic_inc(&ip_vs_nullsvc_counter);
1071
1072 /*
1073 * I'm the first user of the service
1074 */
1075 atomic_set(&svc->usecnt, 1);
1076 atomic_set(&svc->refcnt, 0);
1077
1078 ip_vs_new_estimator(&svc->stats);
1079 ip_vs_num_services++;
1080
1081 /*
1082 * Hash the service into the service table
1083 */
1084 write_lock_bh(&__ip_vs_svc_lock);
1085 ip_vs_svc_hash(svc);
1086 write_unlock_bh(&__ip_vs_svc_lock);
1087
1088 *svc_p = svc;
1089 return 0;
1090
1091 out_err:
1092 if (svc)
1093 kfree(svc);
1094 ip_vs_scheduler_put(sched);
1095 out_mod_dec:
1096 MOD_DEC_USE_COUNT;
1097 return ret;
1098 }
1099
1100
1101 /*
1102 * Edit a service and bind it with a new scheduler
1103 */
ip_vs_edit_service(struct ip_vs_service * svc,struct ip_vs_rule_user * ur)1104 static int ip_vs_edit_service(struct ip_vs_service *svc,
1105 struct ip_vs_rule_user *ur)
1106 {
1107 struct ip_vs_scheduler *sched, *old_sched;
1108 int ret = 0;
1109
1110 /*
1111 * Lookup the scheduler, by 'ur->sched_name'
1112 */
1113 sched = ip_vs_scheduler_get(ur->sched_name);
1114 if (sched == NULL) {
1115 IP_VS_INFO("Scheduler module ip_vs_%s.o not found\n",
1116 ur->sched_name);
1117 return -ENOENT;
1118 }
1119
1120 write_lock_bh(&__ip_vs_svc_lock);
1121
1122 /*
1123 * Wait until all other svc users go away.
1124 */
1125 while (atomic_read(&svc->usecnt) > 1) {};
1126
1127 /*
1128 * Set the flags and timeout value
1129 */
1130 svc->flags = ur->vs_flags | IP_VS_SVC_F_HASHED;
1131 svc->timeout = ur->timeout * HZ;
1132 svc->netmask = ur->netmask;
1133
1134 old_sched = svc->scheduler;
1135 if (sched != old_sched) {
1136 /*
1137 * Unbind the old scheduler
1138 */
1139 if ((ret = ip_vs_unbind_scheduler(svc))) {
1140 old_sched = sched;
1141 goto out;
1142 }
1143
1144 /*
1145 * Bind the new scheduler
1146 */
1147 if ((ret = ip_vs_bind_scheduler(svc, sched))) {
1148 /*
1149 * If ip_vs_bind_scheduler fails, restore the old
1150 * scheduler.
1151 * The main reason of failure is out of memory.
1152 *
1153 * The question is if the old scheduler can be
1154 * restored all the time. TODO: if it cannot be
1155 * restored some time, we must delete the service,
1156 * otherwise the system may crash.
1157 */
1158 ip_vs_bind_scheduler(svc, old_sched);
1159 old_sched = sched;
1160 }
1161 }
1162
1163 out:
1164 write_unlock_bh(&__ip_vs_svc_lock);
1165
1166 if (old_sched)
1167 ip_vs_scheduler_put(old_sched);
1168
1169 return ret;
1170 }
1171
1172
1173 /*
1174 * Delete a service from the service list
1175 * The service must be unlinked, unlocked and not referenced!
1176 */
__ip_vs_del_service(struct ip_vs_service * svc)1177 static void __ip_vs_del_service(struct ip_vs_service *svc)
1178 {
1179 struct list_head *l;
1180 struct ip_vs_dest *dest;
1181 struct ip_vs_scheduler *old_sched;
1182
1183 ip_vs_num_services--;
1184 ip_vs_kill_estimator(&svc->stats);
1185
1186 /*
1187 * Unbind scheduler
1188 */
1189 old_sched = svc->scheduler;
1190 ip_vs_unbind_scheduler(svc);
1191 if (old_sched && old_sched->module)
1192 __MOD_DEC_USE_COUNT(old_sched->module);
1193
1194 /*
1195 * Unlink the whole destination list
1196 */
1197 l = &svc->destinations;
1198 while (l->next != l) {
1199 dest = list_entry(l->next, struct ip_vs_dest, n_list);
1200 __ip_vs_unlink_dest(svc, dest, 0);
1201 __ip_vs_del_dest(dest);
1202 }
1203
1204 /*
1205 * Update the virtual service counters
1206 */
1207 if (svc->port == FTPPORT)
1208 atomic_dec(&ip_vs_ftpsvc_counter);
1209 else if (svc->port == 0)
1210 atomic_dec(&ip_vs_nullsvc_counter);
1211
1212 /*
1213 * Free the service if nobody refers to it
1214 */
1215 if (atomic_read(&svc->refcnt) == 0)
1216 kfree(svc);
1217 MOD_DEC_USE_COUNT;
1218 }
1219
1220 /*
1221 * Delete a service from the service list
1222 */
ip_vs_del_service(struct ip_vs_service * svc)1223 static int ip_vs_del_service(struct ip_vs_service *svc)
1224 {
1225 if (svc == NULL)
1226 return -EEXIST;
1227
1228 /*
1229 * Unhash it from the service table
1230 */
1231 write_lock_bh(&__ip_vs_svc_lock);
1232
1233 ip_vs_svc_unhash(svc);
1234
1235 /*
1236 * Wait until all the svc users go away.
1237 */
1238 while (atomic_read(&svc->usecnt) > 1) {};
1239
1240 __ip_vs_del_service(svc);
1241
1242 write_unlock_bh(&__ip_vs_svc_lock);
1243
1244 return 0;
1245 }
1246
1247
1248 /*
1249 * Flush all the virtual services
1250 */
ip_vs_flush(void)1251 static int ip_vs_flush(void)
1252 {
1253 int idx;
1254 struct ip_vs_service *svc;
1255 struct list_head *l;
1256
1257 /*
1258 * Flush the service table hashed by <protocol,addr,port>
1259 */
1260 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1261 l = &ip_vs_svc_table[idx];
1262 while (l->next != l) {
1263 svc = list_entry(l->next,struct ip_vs_service,s_list);
1264 write_lock_bh(&__ip_vs_svc_lock);
1265 ip_vs_svc_unhash(svc);
1266 /*
1267 * Wait until all the svc users go away.
1268 */
1269 while (atomic_read(&svc->usecnt) > 0) {};
1270 __ip_vs_del_service(svc);
1271 write_unlock_bh(&__ip_vs_svc_lock);
1272 }
1273 }
1274
1275 /*
1276 * Flush the service table hashed by fwmark
1277 */
1278 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1279 l = &ip_vs_svc_fwm_table[idx];
1280 while (l->next != l) {
1281 svc = list_entry(l->next,struct ip_vs_service,f_list);
1282 write_lock_bh(&__ip_vs_svc_lock);
1283 ip_vs_svc_unhash(svc);
1284 /*
1285 * Wait until all the svc users go away.
1286 */
1287 while (atomic_read(&svc->usecnt) > 0) {};
1288 __ip_vs_del_service(svc);
1289 write_unlock_bh(&__ip_vs_svc_lock);
1290 }
1291 }
1292
1293 return 0;
1294 }
1295
1296
1297 /*
1298 * Zero counters in a service or all services
1299 */
ip_vs_zero_service(struct ip_vs_service * svc)1300 static int ip_vs_zero_service(struct ip_vs_service *svc)
1301 {
1302 struct list_head *l;
1303 struct ip_vs_dest *dest;
1304
1305 write_lock_bh(&__ip_vs_svc_lock);
1306 list_for_each (l, &svc->destinations) {
1307 dest = list_entry(l, struct ip_vs_dest, n_list);
1308 __ip_vs_zero_stats(&dest->stats);
1309 }
1310 __ip_vs_zero_stats(&svc->stats);
1311 write_unlock_bh(&__ip_vs_svc_lock);
1312 return 0;
1313 }
1314
ip_vs_zero_all(void)1315 static int ip_vs_zero_all(void)
1316 {
1317 int idx;
1318 struct list_head *l;
1319 struct ip_vs_service *svc;
1320
1321 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1322 list_for_each (l, &ip_vs_svc_table[idx]) {
1323 svc = list_entry(l, struct ip_vs_service, s_list);
1324 ip_vs_zero_service(svc);
1325 }
1326 }
1327
1328 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1329 list_for_each (l, &ip_vs_svc_fwm_table[idx]) {
1330 svc = list_entry(l, struct ip_vs_service, f_list);
1331 ip_vs_zero_service(svc);
1332 }
1333 }
1334
1335 __ip_vs_zero_stats(&ip_vs_stats);
1336 return 0;
1337 }
1338
1339
ip_vs_sysctl_defense_mode(ctl_table * ctl,int write,struct file * filp,void * buffer,size_t * lenp)1340 static int ip_vs_sysctl_defense_mode(ctl_table *ctl, int write,
1341 struct file * filp, void *buffer, size_t *lenp)
1342 {
1343 int *valp = ctl->data;
1344 int val = *valp;
1345 int ret;
1346
1347 ret = proc_dointvec(ctl, write, filp, buffer, lenp);
1348 if (write && (*valp != val)) {
1349 if ((*valp < 0) || (*valp > 3)) {
1350 /* Restore the correct value */
1351 *valp = val;
1352 } else {
1353 local_bh_disable();
1354 update_defense_level();
1355 local_bh_enable();
1356 }
1357 }
1358 return ret;
1359 }
1360
1361
1362 /*
1363 * IPVS sysctl table
1364 */
1365 struct ip_vs_sysctl_table {
1366 struct ctl_table_header *sysctl_header;
1367 ctl_table vs_vars[NET_IPV4_VS_LAST];
1368 ctl_table vs_dir[2];
1369 ctl_table ipv4_dir[2];
1370 ctl_table root_dir[2];
1371 };
1372
1373
1374 static struct ip_vs_sysctl_table ipv4_vs_table = {
1375 NULL,
1376 {{NET_IPV4_VS_AMEMTHRESH, "amemthresh",
1377 &sysctl_ip_vs_amemthresh, sizeof(int), 0644, NULL,
1378 &proc_dointvec},
1379 #ifdef CONFIG_IP_VS_DEBUG
1380 {NET_IPV4_VS_DEBUG_LEVEL, "debug_level",
1381 &sysctl_ip_vs_debug_level, sizeof(int), 0644, NULL,
1382 &proc_dointvec},
1383 #endif
1384 {NET_IPV4_VS_AMDROPRATE, "am_droprate",
1385 &sysctl_ip_vs_am_droprate, sizeof(int), 0644, NULL,
1386 &proc_dointvec},
1387 {NET_IPV4_VS_DROP_ENTRY, "drop_entry",
1388 &sysctl_ip_vs_drop_entry, sizeof(int), 0644, NULL,
1389 &ip_vs_sysctl_defense_mode},
1390 {NET_IPV4_VS_DROP_PACKET, "drop_packet",
1391 &sysctl_ip_vs_drop_packet, sizeof(int), 0644, NULL,
1392 &ip_vs_sysctl_defense_mode},
1393 {NET_IPV4_VS_SECURE_TCP, "secure_tcp",
1394 &sysctl_ip_vs_secure_tcp, sizeof(int), 0644, NULL,
1395 &ip_vs_sysctl_defense_mode},
1396 {NET_IPV4_VS_TO_ES, "timeout_established",
1397 &vs_timeout_table_dos.timeout[IP_VS_S_ESTABLISHED],
1398 sizeof(int), 0644, NULL, &proc_dointvec_jiffies},
1399 {NET_IPV4_VS_TO_SS, "timeout_synsent",
1400 &vs_timeout_table_dos.timeout[IP_VS_S_SYN_SENT],
1401 sizeof(int), 0644, NULL, &proc_dointvec_jiffies},
1402 {NET_IPV4_VS_TO_SR, "timeout_synrecv",
1403 &vs_timeout_table_dos.timeout[IP_VS_S_SYN_RECV],
1404 sizeof(int), 0644, NULL, &proc_dointvec_jiffies},
1405 {NET_IPV4_VS_TO_FW, "timeout_finwait",
1406 &vs_timeout_table_dos.timeout[IP_VS_S_FIN_WAIT],
1407 sizeof(int), 0644, NULL, &proc_dointvec_jiffies},
1408 {NET_IPV4_VS_TO_TW, "timeout_timewait",
1409 &vs_timeout_table_dos.timeout[IP_VS_S_TIME_WAIT],
1410 sizeof(int), 0644, NULL, &proc_dointvec_jiffies},
1411 {NET_IPV4_VS_TO_CL, "timeout_close",
1412 &vs_timeout_table_dos.timeout[IP_VS_S_CLOSE],
1413 sizeof(int), 0644, NULL, &proc_dointvec_jiffies},
1414 {NET_IPV4_VS_TO_CW, "timeout_closewait",
1415 &vs_timeout_table_dos.timeout[IP_VS_S_CLOSE_WAIT],
1416 sizeof(int), 0644, NULL, &proc_dointvec_jiffies},
1417 {NET_IPV4_VS_TO_LA, "timeout_lastack",
1418 &vs_timeout_table_dos.timeout[IP_VS_S_LAST_ACK],
1419 sizeof(int), 0644, NULL, &proc_dointvec_jiffies},
1420 {NET_IPV4_VS_TO_LI, "timeout_listen",
1421 &vs_timeout_table_dos.timeout[IP_VS_S_LISTEN],
1422 sizeof(int), 0644, NULL, &proc_dointvec_jiffies},
1423 {NET_IPV4_VS_TO_SA, "timeout_synack",
1424 &vs_timeout_table_dos.timeout[IP_VS_S_SYNACK],
1425 sizeof(int), 0644, NULL, &proc_dointvec_jiffies},
1426 {NET_IPV4_VS_TO_UDP, "timeout_udp",
1427 &vs_timeout_table_dos.timeout[IP_VS_S_UDP],
1428 sizeof(int), 0644, NULL, &proc_dointvec_jiffies},
1429 {NET_IPV4_VS_TO_ICMP, "timeout_icmp",
1430 &vs_timeout_table_dos.timeout[IP_VS_S_ICMP],
1431 sizeof(int), 0644, NULL, &proc_dointvec_jiffies},
1432 {NET_IPV4_VS_CACHE_BYPASS, "cache_bypass",
1433 &sysctl_ip_vs_cache_bypass, sizeof(int), 0644, NULL,
1434 &proc_dointvec},
1435 {NET_IPV4_VS_EXPIRE_NODEST_CONN, "expire_nodest_conn",
1436 &sysctl_ip_vs_expire_nodest_conn, sizeof(int), 0644, NULL,
1437 &proc_dointvec},
1438 {NET_IPV4_VS_SYNC_THRESHOLD, "sync_threshold",
1439 &sysctl_ip_vs_sync_threshold, sizeof(int), 0644, NULL,
1440 &proc_dointvec},
1441 {NET_IPV4_VS_NAT_ICMP_SEND, "nat_icmp_send",
1442 &sysctl_ip_vs_nat_icmp_send, sizeof(int), 0644, NULL,
1443 &proc_dointvec},
1444 {NET_IPV4_VS_EXPIRE_QUIESCENT_TEMPLATE, "expire_quiescent_template",
1445 &sysctl_ip_vs_expire_quiescent_template, sizeof(int), 0644, NULL,
1446 &proc_dointvec},
1447 {0}},
1448 {{NET_IPV4_VS, "vs", NULL, 0, 0555, ipv4_vs_table.vs_vars},
1449 {0}},
1450 {{NET_IPV4, "ipv4", NULL, 0, 0555, ipv4_vs_table.vs_dir},
1451 {0}},
1452 {{CTL_NET, "net", NULL, 0, 0555, ipv4_vs_table.ipv4_dir},
1453 {0}}
1454 };
1455
1456
1457 /*
1458 * Write the contents of the VS rule table to a PROCfs file.
1459 * (It is kept just for backward compatibility)
1460 */
ip_vs_fwd_name(unsigned flags)1461 static inline char *ip_vs_fwd_name(unsigned flags)
1462 {
1463 char *fwd;
1464
1465 switch (flags & IP_VS_CONN_F_FWD_MASK) {
1466 case IP_VS_CONN_F_LOCALNODE:
1467 fwd = "Local";
1468 break;
1469 case IP_VS_CONN_F_TUNNEL:
1470 fwd = "Tunnel";
1471 break;
1472 case IP_VS_CONN_F_DROUTE:
1473 fwd = "Route";
1474 break;
1475 default:
1476 fwd = "Masq";
1477 }
1478 return fwd;
1479 }
1480
ip_vs_get_info(char * buf,char ** start,off_t offset,int length)1481 static int ip_vs_get_info(char *buf, char **start, off_t offset, int length)
1482 {
1483 int len=0;
1484 off_t pos=0;
1485 char temp[64], temp2[32];
1486 int idx;
1487 struct ip_vs_service *svc;
1488 struct ip_vs_dest *dest;
1489 struct list_head *l, *e, *p, *q;
1490
1491 /*
1492 * Note: since the length of the buffer is usually the multiple
1493 * of 512, it is good to use fixed record of the divisor of 512,
1494 * so that records won't be truncated at buffer boundary.
1495 */
1496 pos = 192;
1497 if (pos > offset) {
1498 sprintf(temp,
1499 "IP Virtual Server version %d.%d.%d (size=%d)",
1500 NVERSION(IP_VS_VERSION_CODE), IP_VS_CONN_TAB_SIZE);
1501 len += sprintf(buf+len, "%-63s\n", temp);
1502 len += sprintf(buf+len, "%-63s\n",
1503 "Prot LocalAddress:Port Scheduler Flags");
1504 len += sprintf(buf+len, "%-63s\n",
1505 " -> RemoteAddress:Port Forward Weight ActiveConn InActConn");
1506 }
1507
1508 read_lock_bh(&__ip_vs_svc_lock);
1509
1510 /* print the service table hashed by <protocol,addr,port> */
1511 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1512 l = &ip_vs_svc_table[idx];
1513 for (e=l->next; e!=l; e=e->next) {
1514 svc = list_entry(e, struct ip_vs_service, s_list);
1515 pos += 64;
1516 if (pos > offset) {
1517 if (svc->flags & IP_VS_SVC_F_PERSISTENT)
1518 sprintf(temp2, "persistent %d %08X",
1519 svc->timeout,
1520 ntohl(svc->netmask));
1521 else
1522 temp2[0] = '\0';
1523
1524 sprintf(temp, "%s %08X:%04X %s %s",
1525 ip_vs_proto_name(svc->protocol),
1526 ntohl(svc->addr),
1527 ntohs(svc->port),
1528 svc->scheduler->name, temp2);
1529 len += sprintf(buf+len, "%-63s\n", temp);
1530 if (len >= length)
1531 goto done;
1532 }
1533
1534 p = &svc->destinations;
1535 for (q=p->next; q!=p; q=q->next) {
1536 dest = list_entry(q, struct ip_vs_dest, n_list);
1537 pos += 64;
1538 if (pos <= offset)
1539 continue;
1540 sprintf(temp,
1541 " -> %08X:%04X %-7s %-6d %-10d %-10d",
1542 ntohl(dest->addr),
1543 ntohs(dest->port),
1544 ip_vs_fwd_name(atomic_read(&dest->conn_flags)),
1545 atomic_read(&dest->weight),
1546 atomic_read(&dest->activeconns),
1547 atomic_read(&dest->inactconns));
1548 len += sprintf(buf+len, "%-63s\n", temp);
1549 if (len >= length)
1550 goto done;
1551 }
1552 }
1553 }
1554
1555 /* print the service table hashed by fwmark */
1556 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1557 l = &ip_vs_svc_fwm_table[idx];
1558 for (e=l->next; e!=l; e=e->next) {
1559 svc = list_entry(e, struct ip_vs_service, f_list);
1560 pos += 64;
1561 if (pos > offset) {
1562 if (svc->flags & IP_VS_SVC_F_PERSISTENT)
1563 sprintf(temp2, "persistent %d %08X",
1564 svc->timeout,
1565 ntohl(svc->netmask));
1566 else
1567 temp2[0] = '\0';
1568
1569 sprintf(temp, "FWM %08X %s %s",
1570 svc->fwmark,
1571 svc->scheduler->name, temp2);
1572 len += sprintf(buf+len, "%-63s\n", temp);
1573 if (len >= length)
1574 goto done;
1575 }
1576
1577 p = &svc->destinations;
1578 for (q=p->next; q!=p; q=q->next) {
1579 dest = list_entry(q, struct ip_vs_dest, n_list);
1580 pos += 64;
1581 if (pos <= offset)
1582 continue;
1583 sprintf(temp,
1584 " -> %08X:%04X %-7s %-6d %-10d %-10d",
1585 ntohl(dest->addr),
1586 ntohs(dest->port),
1587 ip_vs_fwd_name(atomic_read(&dest->conn_flags)),
1588 atomic_read(&dest->weight),
1589 atomic_read(&dest->activeconns),
1590 atomic_read(&dest->inactconns));
1591 len += sprintf(buf+len, "%-63s\n", temp);
1592 if (len >= length)
1593 goto done;
1594 }
1595 }
1596 }
1597
1598 done:
1599 read_unlock_bh(&__ip_vs_svc_lock);
1600
1601 *start = buf+len-(pos-offset); /* Start of wanted data */
1602 len = pos-offset;
1603 if (len > length)
1604 len = length;
1605 if (len < 0)
1606 len = 0;
1607 return len;
1608 }
1609
1610
1611 struct ip_vs_stats ip_vs_stats;
1612
1613 static int
ip_vs_stats_get_info(char * buf,char ** start,off_t offset,int length)1614 ip_vs_stats_get_info(char *buf, char **start, off_t offset, int length)
1615 {
1616 int len=0;
1617 off_t pos=0;
1618 char temp[64];
1619
1620 pos += 320;
1621 if (pos > offset) {
1622 len += sprintf(buf+len, "%-63s\n%-63s\n",
1623 /* 01234567 01234567 01234567 0123456701234567 0123456701234567 */
1624 " Total Incoming Outgoing Incoming Outgoing",
1625 " Conns Packets Packets Bytes Bytes");
1626
1627 spin_lock_bh(&ip_vs_stats.lock);
1628 sprintf(temp, "%8X %8X %8X %8X%08X %8X%08X",
1629 ip_vs_stats.conns,
1630 ip_vs_stats.inpkts,
1631 ip_vs_stats.outpkts,
1632 (__u32)(ip_vs_stats.inbytes>>32),
1633 (__u32)ip_vs_stats.inbytes,
1634 (__u32)(ip_vs_stats.outbytes>>32),
1635 (__u32)ip_vs_stats.outbytes);
1636 len += sprintf(buf+len, "%-62s\n\n", temp);
1637
1638 len += sprintf(buf+len, "%-63s\n",
1639 /* 01234567 01234567 01234567 0123456701234567 0123456701234567 */
1640 " Conns/s Pkts/s Pkts/s Bytes/s Bytes/s");
1641 sprintf(temp, "%8X %8X %8X %16X %16X",
1642 ip_vs_stats.cps,
1643 ip_vs_stats.inpps,
1644 ip_vs_stats.outpps,
1645 ip_vs_stats.inbps,
1646 ip_vs_stats.outbps);
1647 len += sprintf(buf+len, "%-63s\n", temp);
1648
1649 spin_unlock_bh(&ip_vs_stats.lock);
1650 }
1651
1652 *start = buf+len-(pos-offset); /* Start of wanted data */
1653 len = pos-offset;
1654 if (len > length)
1655 len = length;
1656 if (len < 0)
1657 len = 0;
1658 return len;
1659 }
1660
1661
1662 /*
1663 * Set timeout values for tcp tcpfin udp in the vs_timeout_table.
1664 */
ip_vs_set_timeouts(struct ip_vs_rule_user * u)1665 static int ip_vs_set_timeouts(struct ip_vs_rule_user *u)
1666 {
1667 IP_VS_DBG(2, "Setting timeout tcp:%d tcpfin:%d udp:%d\n",
1668 u->tcp_timeout,
1669 u->tcp_fin_timeout,
1670 u->udp_timeout);
1671
1672 if (u->tcp_timeout) {
1673 vs_timeout_table.timeout[IP_VS_S_ESTABLISHED]
1674 = u->tcp_timeout * HZ;
1675 }
1676
1677 if (u->tcp_fin_timeout) {
1678 vs_timeout_table.timeout[IP_VS_S_FIN_WAIT]
1679 = u->tcp_fin_timeout * HZ;
1680 }
1681
1682 if (u->udp_timeout) {
1683 vs_timeout_table.timeout[IP_VS_S_UDP]
1684 = u->udp_timeout * HZ;
1685 }
1686 return 0;
1687 }
1688
1689
1690 static int
do_ip_vs_set_ctl(struct sock * sk,int cmd,void * user,unsigned int len)1691 do_ip_vs_set_ctl(struct sock *sk, int cmd, void *user, unsigned int len)
1692 {
1693 int ret;
1694 struct ip_vs_rule_user *urule;
1695 struct ip_vs_service *svc = NULL;
1696
1697 if (!capable(CAP_NET_ADMIN))
1698 return -EPERM;
1699
1700 /*
1701 * Check the size of mm, no overflow...
1702 * len > 128000 is a sanity check.
1703 */
1704 if (len < sizeof(struct ip_vs_rule_user)) {
1705 IP_VS_ERR("set_ctl: len %u < %Zu\n",
1706 len, sizeof(struct ip_vs_rule_user));
1707 return -EINVAL;
1708 } else if (len > 128000) {
1709 IP_VS_ERR("set_ctl: len %u > 128000\n", len);
1710 return -EINVAL;
1711 } else if ((urule = kmalloc(len, GFP_KERNEL)) == NULL) {
1712 IP_VS_ERR("set_ctl: no mem for len %u\n", len);
1713 return -ENOMEM;
1714 } else if (copy_from_user(urule, user, len) != 0) {
1715 ret = -EFAULT;
1716 goto out_free;
1717 }
1718
1719 MOD_INC_USE_COUNT;
1720 if (down_interruptible(&__ip_vs_mutex)) {
1721 ret = -ERESTARTSYS;
1722 goto out_dec;
1723 }
1724
1725 if (cmd == IP_VS_SO_SET_FLUSH) {
1726 /* Flush the virtual service */
1727 ret = ip_vs_flush();
1728 goto out_unlock;
1729 } else if (cmd == IP_VS_SO_SET_TIMEOUTS) {
1730 /* Set timeout values for (tcp tcpfin udp) */
1731 ret = ip_vs_set_timeouts(urule);
1732 goto out_unlock;
1733 } else if (cmd == IP_VS_SO_SET_STARTDAEMON) {
1734 ret = start_sync_thread(urule->state, urule->mcast_ifn,
1735 urule->syncid);
1736 goto out_unlock;
1737 } else if (cmd == IP_VS_SO_SET_STOPDAEMON) {
1738 ret = stop_sync_thread(urule->state);
1739 goto out_unlock;
1740 } else if (cmd == IP_VS_SO_SET_ZERO) {
1741 /* if no service address is set, zero counters in all */
1742 if (!urule->vfwmark && !urule->vaddr && !urule->vport) {
1743 ret = ip_vs_zero_all();
1744 goto out_unlock;
1745 }
1746 }
1747
1748 /*
1749 * Check for valid protocol: TCP or UDP. Even for fwmark!=0
1750 */
1751 if (urule->protocol!=IPPROTO_TCP && urule->protocol!=IPPROTO_UDP) {
1752 IP_VS_ERR("set_ctl: invalid protocol %d %d.%d.%d.%d:%d %s\n",
1753 urule->protocol, NIPQUAD(urule->vaddr),
1754 ntohs(urule->vport), urule->sched_name);
1755 ret = -EFAULT;
1756 goto out_unlock;
1757 }
1758
1759 /*
1760 * Lookup the exact service by <protocol, vaddr, vport> or fwmark
1761 */
1762 if (urule->vfwmark == 0)
1763 svc = __ip_vs_service_get(urule->protocol,
1764 urule->vaddr, urule->vport);
1765 else
1766 svc = __ip_vs_svc_fwm_get(urule->vfwmark);
1767
1768 if (cmd != IP_VS_SO_SET_ADD
1769 && (svc == NULL || svc->protocol != urule->protocol)) {
1770 ret = -ESRCH;
1771 goto out_unlock;
1772 }
1773
1774 switch (cmd) {
1775 case IP_VS_SO_SET_ADD:
1776 if (svc != NULL)
1777 ret = -EEXIST;
1778 else
1779 ret = ip_vs_add_service(urule, &svc);
1780 break;
1781 case IP_VS_SO_SET_EDIT:
1782 ret = ip_vs_edit_service(svc, urule);
1783 break;
1784 case IP_VS_SO_SET_DEL:
1785 ret = ip_vs_del_service(svc);
1786 if (!ret)
1787 goto out_unlock;
1788 break;
1789 case IP_VS_SO_SET_ADDDEST:
1790 ret = ip_vs_add_dest(svc, urule);
1791 break;
1792 case IP_VS_SO_SET_EDITDEST:
1793 ret = ip_vs_edit_dest(svc, urule);
1794 break;
1795 case IP_VS_SO_SET_DELDEST:
1796 ret = ip_vs_del_dest(svc, urule);
1797 break;
1798 case IP_VS_SO_SET_ZERO:
1799 ret = ip_vs_zero_service(svc);
1800 break;
1801 default:
1802 ret = -EINVAL;
1803 }
1804
1805 if (svc)
1806 ip_vs_service_put(svc);
1807
1808 out_unlock:
1809 up(&__ip_vs_mutex);
1810 out_dec:
1811 MOD_DEC_USE_COUNT;
1812 out_free:
1813 kfree(urule);
1814 return ret;
1815 }
1816
1817
1818 static inline void
__ip_vs_copy_stats(struct ip_vs_stats_user * dst,struct ip_vs_stats * src)1819 __ip_vs_copy_stats(struct ip_vs_stats_user *dst, struct ip_vs_stats *src)
1820 {
1821 spin_lock_bh(&src->lock);
1822 memcpy(dst, src, (char*)&src->lock - (char*)src);
1823 spin_unlock_bh(&src->lock);
1824 }
1825
1826 static inline int
__ip_vs_get_service_entries(const struct ip_vs_get_services * get,struct ip_vs_get_services * uptr)1827 __ip_vs_get_service_entries(const struct ip_vs_get_services *get,
1828 struct ip_vs_get_services *uptr)
1829 {
1830 int idx, count=0;
1831 struct ip_vs_service *svc;
1832 struct list_head *l;
1833 struct ip_vs_service_user entry;
1834 int ret = 0;
1835
1836 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1837 list_for_each (l, &ip_vs_svc_table[idx]) {
1838 if (count >= get->num_services)
1839 goto out;
1840 svc = list_entry(l, struct ip_vs_service, s_list);
1841 entry.protocol = svc->protocol;
1842 entry.addr = svc->addr;
1843 entry.port = svc->port;
1844 entry.fwmark = svc->fwmark;
1845 strncpy(entry.sched_name, svc->scheduler->name, sizeof(entry.sched_name));
1846 entry.sched_name[sizeof(entry.sched_name) - 1] = 0;
1847 entry.flags = svc->flags;
1848 entry.timeout = svc->timeout / HZ;
1849 entry.netmask = svc->netmask;
1850 entry.num_dests = svc->num_dests;
1851 __ip_vs_copy_stats(&entry.stats, &svc->stats);
1852 if (copy_to_user(&uptr->entrytable[count],
1853 &entry, sizeof(entry))) {
1854 ret = -EFAULT;
1855 goto out;
1856 }
1857 count++;
1858 }
1859 }
1860
1861 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1862 list_for_each (l, &ip_vs_svc_fwm_table[idx]) {
1863 if (count >= get->num_services)
1864 goto out;
1865 svc = list_entry(l, struct ip_vs_service, f_list);
1866 entry.protocol = svc->protocol;
1867 entry.addr = svc->addr;
1868 entry.port = svc->port;
1869 entry.fwmark = svc->fwmark;
1870 strncpy(entry.sched_name, svc->scheduler->name, sizeof(entry.sched_name));
1871 entry.sched_name[sizeof(entry.sched_name) - 1] = 0;
1872 entry.flags = svc->flags;
1873 entry.timeout = svc->timeout / HZ;
1874 entry.netmask = svc->netmask;
1875 entry.num_dests = svc->num_dests;
1876 __ip_vs_copy_stats(&entry.stats, &svc->stats);
1877 if (copy_to_user(&uptr->entrytable[count],
1878 &entry, sizeof(entry))) {
1879 ret = -EFAULT;
1880 goto out;
1881 }
1882 count++;
1883 }
1884 }
1885 out:
1886 return ret;
1887 }
1888
1889 static inline int
__ip_vs_get_dest_entries(const struct ip_vs_get_dests * get,struct ip_vs_get_dests * uptr)1890 __ip_vs_get_dest_entries(const struct ip_vs_get_dests *get,
1891 struct ip_vs_get_dests *uptr)
1892 {
1893 struct ip_vs_service *svc;
1894 int ret = 0;
1895
1896 if (get->fwmark)
1897 svc = __ip_vs_svc_fwm_get(get->fwmark);
1898 else
1899 svc = __ip_vs_service_get(get->protocol,
1900 get->addr, get->port);
1901 if (svc) {
1902 int count = 0;
1903 struct ip_vs_dest *dest;
1904 struct list_head *l, *e;
1905 struct ip_vs_dest_user entry;
1906
1907 l = &svc->destinations;
1908 for (e=l->next; e!=l; e=e->next) {
1909 if (count >= get->num_dests)
1910 break;
1911 dest = list_entry(e, struct ip_vs_dest, n_list);
1912 entry.addr = dest->addr;
1913 entry.port = dest->port;
1914 entry.flags = atomic_read(&dest->conn_flags);
1915 entry.weight = atomic_read(&dest->weight);
1916 entry.activeconns = atomic_read(&dest->activeconns);
1917 entry.inactconns = atomic_read(&dest->inactconns);
1918 __ip_vs_copy_stats(&entry.stats, &dest->stats);
1919 if (copy_to_user(&uptr->entrytable[count],
1920 &entry, sizeof(entry))) {
1921 ret = -EFAULT;
1922 break;
1923 }
1924 count++;
1925 }
1926 ip_vs_service_put(svc);
1927 } else
1928 ret = -ESRCH;
1929 return ret;
1930 }
1931
1932 static inline void
__ip_vs_get_timeouts(struct ip_vs_timeout_user * u)1933 __ip_vs_get_timeouts(struct ip_vs_timeout_user *u)
1934 {
1935 u->tcp_timeout = vs_timeout_table.timeout[IP_VS_S_ESTABLISHED] / HZ;
1936 u->tcp_fin_timeout = vs_timeout_table.timeout[IP_VS_S_FIN_WAIT] / HZ;
1937 u->udp_timeout = vs_timeout_table.timeout[IP_VS_S_UDP] / HZ;
1938 }
1939
1940 static int
do_ip_vs_get_ctl(struct sock * sk,int cmd,void * user,int * len)1941 do_ip_vs_get_ctl(struct sock *sk, int cmd, void *user, int *len)
1942 {
1943 int ret = 0;
1944
1945 if (!capable(CAP_NET_ADMIN))
1946 return -EPERM;
1947
1948 if (down_interruptible(&__ip_vs_mutex))
1949 return -ERESTARTSYS;
1950
1951 switch (cmd) {
1952 case IP_VS_SO_GET_VERSION:
1953 {
1954 char buf[64];
1955
1956 sprintf(buf, "IP Virtual Server version %d.%d.%d (size=%d)",
1957 NVERSION(IP_VS_VERSION_CODE), IP_VS_CONN_TAB_SIZE);
1958 if (*len < strlen(buf)+1) {
1959 ret = -EINVAL;
1960 goto out;
1961 }
1962 if (copy_to_user(user, buf, strlen(buf)+1) != 0) {
1963 ret = -EFAULT;
1964 goto out;
1965 }
1966 *len = strlen(buf)+1;
1967 }
1968 break;
1969
1970 case IP_VS_SO_GET_INFO:
1971 {
1972 struct ip_vs_getinfo info;
1973 info.version = IP_VS_VERSION_CODE;
1974 info.size = IP_VS_CONN_TAB_SIZE;
1975 info.num_services = ip_vs_num_services;
1976 if (copy_to_user(user, &info, sizeof(info)) != 0)
1977 ret = -EFAULT;
1978 }
1979 break;
1980
1981 case IP_VS_SO_GET_SERVICES:
1982 {
1983 struct ip_vs_get_services get;
1984
1985 if (*len < sizeof(get)) {
1986 IP_VS_ERR("length: %u < %Zu\n", *len, sizeof(get));
1987 ret = -EINVAL;
1988 goto out;
1989 }
1990 if (copy_from_user(&get, user, sizeof(get))) {
1991 ret = -EFAULT;
1992 goto out;
1993 }
1994 if (*len != (sizeof(get)+sizeof(struct ip_vs_service_user)*get.num_services)) {
1995 IP_VS_ERR("length: %u != %Zu\n", *len,
1996 sizeof(get)+sizeof(struct ip_vs_service_user)*get.num_services);
1997 ret = -EINVAL;
1998 goto out;
1999 }
2000 ret = __ip_vs_get_service_entries(&get, user);
2001 }
2002 break;
2003
2004 case IP_VS_SO_GET_SERVICE:
2005 {
2006 struct ip_vs_service_user get;
2007 struct ip_vs_service *svc;
2008
2009 if (*len != sizeof(get)) {
2010 IP_VS_ERR("length: %u != %Zu\n", *len, sizeof(get));
2011 ret = -EINVAL;
2012 goto out;
2013 }
2014 if (copy_from_user(&get, user, sizeof(get))) {
2015 ret = -EFAULT;
2016 goto out;
2017 }
2018
2019 if (get.fwmark)
2020 svc = __ip_vs_svc_fwm_get(get.fwmark);
2021 else
2022 svc = __ip_vs_service_get(get.protocol,
2023 get.addr, get.port);
2024 if (svc) {
2025 strncpy(get.sched_name, svc->scheduler->name, sizeof(get.sched_name));
2026 get.sched_name[sizeof(get.sched_name) - 1] = 0;
2027 get.flags = svc->flags;
2028 get.timeout = svc->timeout / HZ;
2029 get.netmask = svc->netmask;
2030 get.num_dests = svc->num_dests;
2031 __ip_vs_copy_stats(&get.stats, &svc->stats);
2032 if (copy_to_user(user, &get, *len) != 0)
2033 ret = -EFAULT;
2034 ip_vs_service_put(svc);
2035 } else
2036 ret = -ESRCH;
2037 }
2038 break;
2039
2040 case IP_VS_SO_GET_DESTS:
2041 {
2042 struct ip_vs_get_dests get;
2043
2044 if (*len < sizeof(get)) {
2045 IP_VS_ERR("length: %u < %Zu\n", *len, sizeof(get));
2046 ret = -EINVAL;
2047 goto out;
2048 }
2049 if (copy_from_user(&get, user, sizeof(get))) {
2050 ret = -EFAULT;
2051 goto out;
2052 }
2053 if (*len != (sizeof(get) +
2054 sizeof(struct ip_vs_dest_user)*get.num_dests)) {
2055 IP_VS_ERR("length: %u != %Zu\n", *len,
2056 sizeof(get)+sizeof(struct ip_vs_dest_user)*get.num_dests);
2057 ret = -EINVAL;
2058 goto out;
2059 }
2060 ret = __ip_vs_get_dest_entries(&get, user);
2061 }
2062 break;
2063
2064 case IP_VS_SO_GET_TIMEOUTS:
2065 {
2066 struct ip_vs_timeout_user u;
2067
2068 if (*len < sizeof(u)) {
2069 IP_VS_ERR("length: %u < %Zu\n", *len, sizeof(u));
2070 ret = -EINVAL;
2071 goto out;
2072 }
2073 __ip_vs_get_timeouts(&u);
2074 if (copy_to_user(user, &u, sizeof(u)) != 0)
2075 ret = -EFAULT;
2076 }
2077 break;
2078
2079 case IP_VS_SO_GET_DAEMON:
2080 {
2081 struct ip_vs_daemon_user u;
2082
2083 if (*len < sizeof(u)) {
2084 IP_VS_ERR("length: %u < %Zu\n", *len, sizeof(u));
2085 ret = -EINVAL;
2086 goto out;
2087 }
2088 u.state = ip_vs_sync_state;
2089 if (ip_vs_sync_state & IP_VS_STATE_MASTER) {
2090 strncpy(u.mcast_master_ifn, ip_vs_mcast_master_ifn, sizeof(u.mcast_master_ifn));
2091 u.mcast_master_ifn[sizeof(u.mcast_master_ifn) - 1] = 0;
2092 }
2093 if (ip_vs_sync_state & IP_VS_STATE_BACKUP) {
2094 strncpy(u.mcast_backup_ifn, ip_vs_mcast_backup_ifn, sizeof(u.mcast_backup_ifn));
2095 u.mcast_backup_ifn[sizeof(u.mcast_backup_ifn) - 1] = 0;
2096 }
2097 if (copy_to_user(user, &u, sizeof(u)) != 0)
2098 ret = -EFAULT;
2099 }
2100 break;
2101
2102 default:
2103 ret = -EINVAL;
2104 }
2105
2106 out:
2107 up(&__ip_vs_mutex);
2108 return ret;
2109 }
2110
2111
2112 static struct nf_sockopt_ops ip_vs_sockopts = {
2113 { NULL, NULL }, PF_INET,
2114 IP_VS_BASE_CTL, IP_VS_SO_SET_MAX+1, do_ip_vs_set_ctl,
2115 IP_VS_BASE_CTL, IP_VS_SO_GET_MAX+1, do_ip_vs_get_ctl
2116 };
2117
2118
ip_vs_control_init(void)2119 int ip_vs_control_init(void)
2120 {
2121 int ret;
2122 int idx;
2123
2124 EnterFunction(2);
2125
2126 ret = nf_register_sockopt(&ip_vs_sockopts);
2127 if (ret) {
2128 IP_VS_ERR("cannot register sockopt.\n");
2129 return ret;
2130 }
2131
2132 proc_net_create("ip_vs", 0, ip_vs_get_info);
2133 proc_net_create("ip_vs_stats", 0, ip_vs_stats_get_info);
2134
2135 ipv4_vs_table.sysctl_header =
2136 register_sysctl_table(ipv4_vs_table.root_dir, 0);
2137 /*
2138 * Initilize ip_vs_svc_table, ip_vs_svc_fwm_table, ip_vs_rtable,
2139 * ip_vs_schedulers.
2140 */
2141 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
2142 INIT_LIST_HEAD(&ip_vs_svc_table[idx]);
2143 INIT_LIST_HEAD(&ip_vs_svc_fwm_table[idx]);
2144 }
2145 for(idx = 0; idx < IP_VS_RTAB_SIZE; idx++) {
2146 INIT_LIST_HEAD(&ip_vs_rtable[idx]);
2147 }
2148
2149 memset(&ip_vs_stats, 0, sizeof(ip_vs_stats));
2150 ip_vs_stats.lock = SPIN_LOCK_UNLOCKED;
2151 ip_vs_new_estimator(&ip_vs_stats);
2152
2153 /* Hook the defense timer */
2154 init_timer(&defense_timer);
2155 defense_timer.function = defense_timer_handler;
2156 defense_timer.expires = jiffies + DEFENSE_TIMER_PERIOD;
2157 add_timer(&defense_timer);
2158
2159 LeaveFunction(2);
2160 return 0;
2161 }
2162
ip_vs_control_cleanup(void)2163 void ip_vs_control_cleanup(void)
2164 {
2165 EnterFunction(2);
2166 ip_vs_trash_cleanup();
2167 del_timer_sync(&defense_timer);
2168 ip_vs_kill_estimator(&ip_vs_stats);
2169 unregister_sysctl_table(ipv4_vs_table.sysctl_header);
2170 proc_net_remove("ip_vs_stats");
2171 proc_net_remove("ip_vs");
2172 nf_unregister_sockopt(&ip_vs_sockopts);
2173 LeaveFunction(2);
2174 }
2175