1 /*
2  * IPVS         Application module
3  *
4  * Version:     $Id: ip_vs_app.c,v 1.14 2001/11/23 14:34:10 wensong Exp $
5  *
6  * Authors:     Wensong Zhang <wensong@linuxvirtualserver.org>
7  *
8  *              This program is free software; you can redistribute it and/or
9  *              modify it under the terms of the GNU General Public License
10  *              as published by the Free Software Foundation; either version
11  *              2 of the License, or (at your option) any later version.
12  *
13  * Most code here is taken from ip_masq_app.c in kernel 2.2. The difference
14  * is that ip_vs_app module handles the reverse direction (incoming requests
15  * and outgoing responses). The ip_vs_app modules are only used for VS/NAT.
16  *
17  *		IP_MASQ_APP application masquerading module
18  *
19  * Author:	Juan Jose Ciarlante, <jjciarla@raiz.uncu.edu.ar>
20  *
21  */
22 
23 #include <linux/module.h>
24 #include <linux/kernel.h>
25 #include <linux/skbuff.h>
26 #include <linux/in.h>
27 #include <linux/ip.h>
28 #include <net/protocol.h>
29 #include <asm/system.h>
30 #include <linux/stat.h>
31 #include <linux/proc_fs.h>
32 
33 #include <net/ip_vs.h>
34 
35 #define IP_VS_APP_TAB_SIZE  16          /* must be power of 2 */
36 
37 #define IP_VS_APP_HASH(proto, port) ((port^proto) & (IP_VS_APP_TAB_SIZE-1))
38 #define IP_VS_APP_TYPE(proto, port) (proto<<16 | port)
39 #define IP_VS_APP_PORT(type)        (type & 0xffff)
40 #define IP_VS_APP_PROTO(type)       ((type>>16) & 0x00ff)
41 
42 
43 EXPORT_SYMBOL(register_ip_vs_app);
44 EXPORT_SYMBOL(unregister_ip_vs_app);
45 
46 
47 /*
48  *	will hold ipvs app. hashed list heads
49  */
50 static struct list_head ip_vs_app_base[IP_VS_APP_TAB_SIZE];
51 
52 /* lock for ip_vs_app table */
53 static rwlock_t __ip_vs_app_lock = RW_LOCK_UNLOCKED;
54 
55 
56 /*
57  *	ip_vs_app registration routine
58  *	port: host byte order.
59  */
register_ip_vs_app(struct ip_vs_app * vapp,unsigned short proto,__u16 port)60 int register_ip_vs_app(struct ip_vs_app *vapp,
61 		       unsigned short proto, __u16 port)
62 {
63 	unsigned hash;
64 
65 	if (!vapp) {
66 		IP_VS_ERR("register_ip_vs_app(): NULL arg\n");
67 		return -EINVAL;
68 	}
69 
70 	MOD_INC_USE_COUNT;
71 
72 	vapp->type = IP_VS_APP_TYPE(proto, port);
73 	hash = IP_VS_APP_HASH(proto, port);
74 
75 	write_lock_bh(&__ip_vs_app_lock);
76 	list_add(&vapp->n_list, &ip_vs_app_base[hash]);
77 	write_unlock_bh(&__ip_vs_app_lock);
78 
79 	return 0;
80 }
81 
82 
83 /*
84  *	ip_vs_app unregistration routine.
85  */
unregister_ip_vs_app(struct ip_vs_app * vapp)86 int unregister_ip_vs_app(struct ip_vs_app *vapp)
87 {
88 	if (!vapp) {
89 		IP_VS_ERR("unregister_ip_vs_app(): NULL arg\n");
90 		return -EINVAL;
91 	}
92 
93 	write_lock_bh(&__ip_vs_app_lock);
94 	list_del(&vapp->n_list);
95 	write_unlock_bh(&__ip_vs_app_lock);
96 
97 	MOD_DEC_USE_COUNT;
98 
99 	return 0;
100 }
101 
102 
103 /*
104  *	get ip_vs_app object by its proto and port (net byte order).
105  */
ip_vs_app_get(unsigned short proto,__u16 port)106 static struct ip_vs_app * ip_vs_app_get(unsigned short proto, __u16 port)
107 {
108 	struct list_head *e;
109 	struct ip_vs_app *vapp;
110 	unsigned hash;
111 	unsigned type;
112 
113 	port = ntohs(port);
114 	type = IP_VS_APP_TYPE(proto, port);
115 	hash = IP_VS_APP_HASH(proto, port);
116 
117 	read_lock_bh(&__ip_vs_app_lock);
118 
119 	list_for_each(e, &ip_vs_app_base[hash]) {
120 		vapp = list_entry(e, struct ip_vs_app, n_list);
121 
122 		/*
123 		 * Test and MOD_INC_USE_COUNT atomically
124 		 */
125 		if (vapp->module && !try_inc_mod_count(vapp->module)) {
126 			/*
127 			 * This application module is just deleted
128 			 */
129 			continue;
130 		}
131 		if (type == vapp->type) {
132 			read_unlock_bh(&__ip_vs_app_lock);
133 			return vapp;
134 		}
135 
136 		if (vapp->module)
137 			__MOD_DEC_USE_COUNT(vapp->module);
138 	}
139 
140 	read_unlock_bh(&__ip_vs_app_lock);
141 	return NULL;
142 }
143 
144 
145 /*
146  *	Bind ip_vs_conn to its ip_vs_app based on proto and dport,
147  *	and call the ip_vs_app constructor.
148  */
ip_vs_bind_app(struct ip_vs_conn * cp)149 struct ip_vs_app * ip_vs_bind_app(struct ip_vs_conn *cp)
150 {
151 	struct ip_vs_app *vapp;
152 
153 	/* no need to bind app if its forwarding method is not NAT */
154 	if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ)
155 		return NULL;
156 
157 	if (cp->protocol != IPPROTO_TCP && cp->protocol != IPPROTO_UDP)
158 		return NULL;
159 
160 	/*
161 	 *	don't allow binding if already bound
162 	 */
163 	if (cp->app != NULL) {
164 		IP_VS_ERR("ip_vs_bind_app(): "
165 			  "called for already bound object.\n");
166 		return cp->app;
167 	}
168 
169 	vapp = ip_vs_app_get(cp->protocol, cp->vport);
170 
171 	if (vapp != NULL) {
172 		cp->app = vapp;
173 
174 		if (vapp->init_conn)
175 			vapp->init_conn(vapp, cp);
176 	}
177 	return vapp;
178 }
179 
180 
181 /*
182  *	Unbind cp from type object and call cp destructor (does not kfree()).
183  */
ip_vs_unbind_app(struct ip_vs_conn * cp)184 int ip_vs_unbind_app(struct ip_vs_conn *cp)
185 {
186 	struct ip_vs_app *vapp = cp->app;
187 
188 	if (cp->protocol != IPPROTO_TCP && cp->protocol != IPPROTO_UDP)
189 		return 0;
190 
191 	if (vapp != NULL) {
192 		if (vapp->done_conn)
193 			vapp->done_conn(vapp, cp);
194 		cp->app = NULL;
195 		if (vapp->module)
196 			__MOD_DEC_USE_COUNT(vapp->module);
197 	}
198 	return (vapp != NULL);
199 }
200 
201 
202 /*
203  *	Fixes th->seq based on ip_vs_seq info.
204  */
vs_fix_seq(const struct ip_vs_seq * vseq,struct tcphdr * th)205 static inline void vs_fix_seq(const struct ip_vs_seq *vseq, struct tcphdr *th)
206 {
207 	__u32 seq = ntohl(th->seq);
208 
209 	/*
210 	 *	Adjust seq with delta-offset for all packets after
211 	 *	the most recent resized pkt seq and with previous_delta offset
212 	 *	for all packets	before most recent resized pkt seq.
213 	 */
214 	if (vseq->delta || vseq->previous_delta) {
215 		if(after(seq, vseq->init_seq)) {
216 			th->seq = htonl(seq + vseq->delta);
217 			IP_VS_DBG(9, "vs_fix_seq(): added delta (%d) to seq\n",
218 				  vseq->delta);
219 		} else {
220 			th->seq = htonl(seq + vseq->previous_delta);
221 			IP_VS_DBG(9, "vs_fix_seq(): added previous_delta "
222 				  "(%d) to seq\n", vseq->previous_delta);
223 		}
224 	}
225 }
226 
227 
228 /*
229  *	Fixes th->ack_seq based on ip_vs_seq info.
230  */
231 static inline void
vs_fix_ack_seq(const struct ip_vs_seq * vseq,struct tcphdr * th)232 vs_fix_ack_seq(const struct ip_vs_seq *vseq, struct tcphdr *th)
233 {
234 	__u32 ack_seq = ntohl(th->ack_seq);
235 
236 	/*
237 	 * Adjust ack_seq with delta-offset for
238 	 * the packets AFTER most recent resized pkt has caused a shift
239 	 * for packets before most recent resized pkt, use previous_delta
240 	 */
241 	if (vseq->delta || vseq->previous_delta) {
242 		/* since ack_seq is the number of octet that is expected
243 		   to receive next, so compare it with init_seq+delta */
244 		if(after(ack_seq, vseq->init_seq+vseq->delta)) {
245 			th->ack_seq = htonl(ack_seq - vseq->delta);
246 			IP_VS_DBG(9, "vs_fix_ack_seq(): subtracted delta "
247 				  "(%d) from ack_seq\n", vseq->delta);
248 
249 		} else {
250 			th->ack_seq = htonl(ack_seq - vseq->previous_delta);
251 			IP_VS_DBG(9, "vs_fix_ack_seq(): subtracted "
252 				  "previous_delta (%d) from ack_seq\n",
253 				  vseq->previous_delta);
254 		}
255 	}
256 }
257 
258 
259 /*
260  *	Updates ip_vs_seq if pkt has been resized
261  *	Assumes already checked proto==IPPROTO_TCP and diff!=0.
262  */
vs_seq_update(struct ip_vs_conn * cp,struct ip_vs_seq * vseq,unsigned flag,__u32 seq,int diff)263 static inline void vs_seq_update(struct ip_vs_conn *cp, struct ip_vs_seq *vseq,
264 				 unsigned flag, __u32 seq, int diff)
265 {
266 	/* spinlock is to keep updating cp->flags atomic */
267 	spin_lock(&cp->lock);
268 	if ( !(cp->flags & flag) || after(seq, vseq->init_seq)) {
269 		vseq->previous_delta = vseq->delta;
270 		vseq->delta += diff;
271 		vseq->init_seq = seq;
272 		cp->flags |= flag;
273 	}
274 	spin_unlock(&cp->lock);
275 }
276 
277 
278 /*
279  *	Output pkt hook. Will call bound ip_vs_app specific function
280  *	called by ip_vs_out(), assumes previously checked cp!=NULL
281  *	returns (new - old) skb->len diff.
282  */
ip_vs_app_pkt_out(struct ip_vs_conn * cp,struct sk_buff * skb)283 int ip_vs_app_pkt_out(struct ip_vs_conn *cp, struct sk_buff *skb)
284 {
285 	struct ip_vs_app *vapp;
286 	int diff;
287 	struct iphdr *iph;
288 	struct tcphdr *th;
289 	__u32 seq;
290 
291 	/*
292 	 *	check if application module is bound to
293 	 *	this ip_vs_conn.
294 	 */
295 	if ((vapp = cp->app) == NULL)
296 		return 0;
297 
298 	iph = skb->nh.iph;
299 	th = (struct tcphdr *)&(((char *)iph)[iph->ihl*4]);
300 
301 	/*
302 	 *	Remember seq number in case this pkt gets resized
303 	 */
304 	seq = ntohl(th->seq);
305 
306 	/*
307 	 *	Fix seq stuff if flagged as so.
308 	 */
309 	if (cp->protocol == IPPROTO_TCP) {
310 		if (cp->flags & IP_VS_CONN_F_OUT_SEQ)
311 			vs_fix_seq(&cp->out_seq, th);
312 		if (cp->flags & IP_VS_CONN_F_IN_SEQ)
313 			vs_fix_ack_seq(&cp->in_seq, th);
314 	}
315 
316 	/*
317 	 *	Call private output hook function
318 	 */
319 	if (vapp->pkt_out == NULL)
320 		return 0;
321 
322 	diff = vapp->pkt_out(vapp, cp, skb);
323 
324 	/*
325 	 *	Update ip_vs seq stuff if len has changed.
326 	 */
327 	if (diff != 0 && cp->protocol == IPPROTO_TCP)
328 		vs_seq_update(cp, &cp->out_seq,
329 			      IP_VS_CONN_F_OUT_SEQ, seq, diff);
330 
331 	return diff;
332 }
333 
334 
335 /*
336  *	Input pkt hook. Will call bound ip_vs_app specific function
337  *	called by ip_fw_demasquerade(), assumes previously checked cp!=NULL.
338  *	returns (new - old) skb->len diff.
339  */
ip_vs_app_pkt_in(struct ip_vs_conn * cp,struct sk_buff * skb)340 int ip_vs_app_pkt_in(struct ip_vs_conn *cp, struct sk_buff *skb)
341 {
342 	struct ip_vs_app *vapp;
343 	int diff;
344 	struct iphdr *iph;
345 	struct tcphdr *th;
346 	__u32 seq;
347 
348 	/*
349 	 *	check if application module is bound to
350 	 *	this ip_vs_conn.
351 	 */
352 	if ((vapp = cp->app) == NULL)
353 		return 0;
354 
355 	iph = skb->nh.iph;
356 	th = (struct tcphdr *)&(((char *)iph)[iph->ihl*4]);
357 
358 	/*
359 	 *	Remember seq number in case this pkt gets resized
360 	 */
361 	seq = ntohl(th->seq);
362 
363 	/*
364 	 *	Fix seq stuff if flagged as so.
365 	 */
366 	if (cp->protocol == IPPROTO_TCP) {
367 		if (cp->flags & IP_VS_CONN_F_IN_SEQ)
368 			vs_fix_seq(&cp->in_seq, th);
369 		if (cp->flags & IP_VS_CONN_F_OUT_SEQ)
370 			vs_fix_ack_seq(&cp->out_seq, th);
371 	}
372 
373 	/*
374 	 *	Call private input hook function
375 	 */
376 	if (vapp->pkt_in == NULL)
377 		return 0;
378 
379 	diff = vapp->pkt_in(vapp, cp, skb);
380 
381 	/*
382 	 *	Update ip_vs seq stuff if len has changed.
383 	 */
384 	if (diff != 0 && cp->protocol == IPPROTO_TCP)
385 		vs_seq_update(cp, &cp->in_seq,
386 			      IP_VS_CONN_F_IN_SEQ, seq, diff);
387 
388 	return diff;
389 }
390 
391 
392 /*
393  *	/proc/net/ip_vs_app entry function
394  */
ip_vs_app_getinfo(char * buffer,char ** start,off_t offset,int length)395 static int ip_vs_app_getinfo(char *buffer, char **start, off_t offset,
396 			     int length)
397 {
398 	off_t pos=0;
399 	int len=0;
400 	char temp[64];
401 	int idx;
402 	struct ip_vs_app *vapp;
403 	struct list_head *e;
404 
405 	pos = 64;
406 	if (pos > offset) {
407 		len += sprintf(buffer+len, "%-63s\n",
408 			       "prot port    usecnt name");
409 	}
410 
411 	read_lock_bh(&__ip_vs_app_lock);
412 	for (idx=0 ; idx < IP_VS_APP_TAB_SIZE; idx++) {
413 		list_for_each (e, &ip_vs_app_base[idx]) {
414 			vapp = list_entry(e, struct ip_vs_app, n_list);
415 
416 			pos += 64;
417 			if (pos <= offset)
418 				continue;
419 			sprintf(temp, "%-3s  %-7u %-6d %-17s",
420 				ip_vs_proto_name(IP_VS_APP_PROTO(vapp->type)),
421 				IP_VS_APP_PORT(vapp->type),
422 				vapp->module?GET_USE_COUNT(vapp->module):0,
423 				vapp->name);
424 			len += sprintf(buffer+len, "%-63s\n", temp);
425 			if (pos >= offset+length)
426 				goto done;
427 		}
428 	}
429   done:
430 	read_unlock_bh(&__ip_vs_app_lock);
431 
432 	*start = buffer+len-(pos-offset);       /* Start of wanted data */
433 	len = pos-offset;
434 	if (len > length)
435 		len = length;
436 	if (len < 0)
437 		len = 0;
438 	return len;
439 }
440 
441 
442 /*
443  *	Replace a segment of data with a new segment
444  */
ip_vs_skb_replace(struct sk_buff * skb,int pri,char * o_buf,int o_len,char * n_buf,int n_len)445 int ip_vs_skb_replace(struct sk_buff *skb, int pri,
446 		      char *o_buf, int o_len, char *n_buf, int n_len)
447 {
448 	struct iphdr *iph;
449 	int diff;
450 	int o_offset;
451 	int o_left;
452 
453 	EnterFunction(9);
454 
455 	diff = n_len - o_len;
456 	o_offset = o_buf - (char *)skb->data;
457 	/* The length of left data after o_buf+o_len in the skb data */
458 	o_left = skb->len - (o_offset + o_len);
459 
460 	if (diff <= 0) {
461 		memmove(o_buf + n_len, o_buf + o_len, o_left);
462 		memcpy(o_buf, n_buf, n_len);
463 		skb_trim(skb, skb->len + diff);
464 	} else if (diff <= skb_tailroom(skb)) {
465 		skb_put(skb, diff);
466 		memmove(o_buf + n_len, o_buf + o_len, o_left);
467 		memcpy(o_buf, n_buf, n_len);
468 	} else {
469 		if (pskb_expand_head(skb, skb_headroom(skb), diff, pri))
470 			return -ENOMEM;
471 		skb_put(skb, diff);
472 		memmove(skb->data + o_offset + n_len,
473 			skb->data + o_offset + o_len, o_left);
474 		memcpy(skb->data + o_offset, n_buf, n_len);
475 	}
476 
477 	/* must update the iph total length here */
478 	iph = skb->nh.iph;
479 	iph->tot_len = htons(skb->len);
480 
481 	LeaveFunction(9);
482 	return 0;
483 }
484 
485 
ip_vs_app_init(void)486 int ip_vs_app_init(void)
487 {
488 	int idx;
489 
490 	for (idx=0 ; idx < IP_VS_APP_TAB_SIZE; idx++) {
491 		INIT_LIST_HEAD(&ip_vs_app_base[idx]);
492 	}
493 
494 	/* we will replace it with proc_net_ipvs_create() soon */
495 	proc_net_create("ip_vs_app", 0, ip_vs_app_getinfo);
496 	return 0;
497 }
498 
ip_vs_app_cleanup(void)499 void ip_vs_app_cleanup(void)
500 {
501 	proc_net_remove("ip_vs_app");
502 }
503