1 /*
2 * IPVS Application module
3 *
4 * Version: $Id: ip_vs_app.c,v 1.14 2001/11/23 14:34:10 wensong Exp $
5 *
6 * Authors: Wensong Zhang <wensong@linuxvirtualserver.org>
7 *
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License
10 * as published by the Free Software Foundation; either version
11 * 2 of the License, or (at your option) any later version.
12 *
13 * Most code here is taken from ip_masq_app.c in kernel 2.2. The difference
14 * is that ip_vs_app module handles the reverse direction (incoming requests
15 * and outgoing responses). The ip_vs_app modules are only used for VS/NAT.
16 *
17 * IP_MASQ_APP application masquerading module
18 *
19 * Author: Juan Jose Ciarlante, <jjciarla@raiz.uncu.edu.ar>
20 *
21 */
22
23 #include <linux/module.h>
24 #include <linux/kernel.h>
25 #include <linux/skbuff.h>
26 #include <linux/in.h>
27 #include <linux/ip.h>
28 #include <net/protocol.h>
29 #include <asm/system.h>
30 #include <linux/stat.h>
31 #include <linux/proc_fs.h>
32
33 #include <net/ip_vs.h>
34
35 #define IP_VS_APP_TAB_SIZE 16 /* must be power of 2 */
36
37 #define IP_VS_APP_HASH(proto, port) ((port^proto) & (IP_VS_APP_TAB_SIZE-1))
38 #define IP_VS_APP_TYPE(proto, port) (proto<<16 | port)
39 #define IP_VS_APP_PORT(type) (type & 0xffff)
40 #define IP_VS_APP_PROTO(type) ((type>>16) & 0x00ff)
41
42
43 EXPORT_SYMBOL(register_ip_vs_app);
44 EXPORT_SYMBOL(unregister_ip_vs_app);
45
46
47 /*
48 * will hold ipvs app. hashed list heads
49 */
50 static struct list_head ip_vs_app_base[IP_VS_APP_TAB_SIZE];
51
52 /* lock for ip_vs_app table */
53 static rwlock_t __ip_vs_app_lock = RW_LOCK_UNLOCKED;
54
55
56 /*
57 * ip_vs_app registration routine
58 * port: host byte order.
59 */
register_ip_vs_app(struct ip_vs_app * vapp,unsigned short proto,__u16 port)60 int register_ip_vs_app(struct ip_vs_app *vapp,
61 unsigned short proto, __u16 port)
62 {
63 unsigned hash;
64
65 if (!vapp) {
66 IP_VS_ERR("register_ip_vs_app(): NULL arg\n");
67 return -EINVAL;
68 }
69
70 MOD_INC_USE_COUNT;
71
72 vapp->type = IP_VS_APP_TYPE(proto, port);
73 hash = IP_VS_APP_HASH(proto, port);
74
75 write_lock_bh(&__ip_vs_app_lock);
76 list_add(&vapp->n_list, &ip_vs_app_base[hash]);
77 write_unlock_bh(&__ip_vs_app_lock);
78
79 return 0;
80 }
81
82
83 /*
84 * ip_vs_app unregistration routine.
85 */
unregister_ip_vs_app(struct ip_vs_app * vapp)86 int unregister_ip_vs_app(struct ip_vs_app *vapp)
87 {
88 if (!vapp) {
89 IP_VS_ERR("unregister_ip_vs_app(): NULL arg\n");
90 return -EINVAL;
91 }
92
93 write_lock_bh(&__ip_vs_app_lock);
94 list_del(&vapp->n_list);
95 write_unlock_bh(&__ip_vs_app_lock);
96
97 MOD_DEC_USE_COUNT;
98
99 return 0;
100 }
101
102
103 /*
104 * get ip_vs_app object by its proto and port (net byte order).
105 */
ip_vs_app_get(unsigned short proto,__u16 port)106 static struct ip_vs_app * ip_vs_app_get(unsigned short proto, __u16 port)
107 {
108 struct list_head *e;
109 struct ip_vs_app *vapp;
110 unsigned hash;
111 unsigned type;
112
113 port = ntohs(port);
114 type = IP_VS_APP_TYPE(proto, port);
115 hash = IP_VS_APP_HASH(proto, port);
116
117 read_lock_bh(&__ip_vs_app_lock);
118
119 list_for_each(e, &ip_vs_app_base[hash]) {
120 vapp = list_entry(e, struct ip_vs_app, n_list);
121
122 /*
123 * Test and MOD_INC_USE_COUNT atomically
124 */
125 if (vapp->module && !try_inc_mod_count(vapp->module)) {
126 /*
127 * This application module is just deleted
128 */
129 continue;
130 }
131 if (type == vapp->type) {
132 read_unlock_bh(&__ip_vs_app_lock);
133 return vapp;
134 }
135
136 if (vapp->module)
137 __MOD_DEC_USE_COUNT(vapp->module);
138 }
139
140 read_unlock_bh(&__ip_vs_app_lock);
141 return NULL;
142 }
143
144
145 /*
146 * Bind ip_vs_conn to its ip_vs_app based on proto and dport,
147 * and call the ip_vs_app constructor.
148 */
ip_vs_bind_app(struct ip_vs_conn * cp)149 struct ip_vs_app * ip_vs_bind_app(struct ip_vs_conn *cp)
150 {
151 struct ip_vs_app *vapp;
152
153 /* no need to bind app if its forwarding method is not NAT */
154 if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ)
155 return NULL;
156
157 if (cp->protocol != IPPROTO_TCP && cp->protocol != IPPROTO_UDP)
158 return NULL;
159
160 /*
161 * don't allow binding if already bound
162 */
163 if (cp->app != NULL) {
164 IP_VS_ERR("ip_vs_bind_app(): "
165 "called for already bound object.\n");
166 return cp->app;
167 }
168
169 vapp = ip_vs_app_get(cp->protocol, cp->vport);
170
171 if (vapp != NULL) {
172 cp->app = vapp;
173
174 if (vapp->init_conn)
175 vapp->init_conn(vapp, cp);
176 }
177 return vapp;
178 }
179
180
181 /*
182 * Unbind cp from type object and call cp destructor (does not kfree()).
183 */
ip_vs_unbind_app(struct ip_vs_conn * cp)184 int ip_vs_unbind_app(struct ip_vs_conn *cp)
185 {
186 struct ip_vs_app *vapp = cp->app;
187
188 if (cp->protocol != IPPROTO_TCP && cp->protocol != IPPROTO_UDP)
189 return 0;
190
191 if (vapp != NULL) {
192 if (vapp->done_conn)
193 vapp->done_conn(vapp, cp);
194 cp->app = NULL;
195 if (vapp->module)
196 __MOD_DEC_USE_COUNT(vapp->module);
197 }
198 return (vapp != NULL);
199 }
200
201
202 /*
203 * Fixes th->seq based on ip_vs_seq info.
204 */
vs_fix_seq(const struct ip_vs_seq * vseq,struct tcphdr * th)205 static inline void vs_fix_seq(const struct ip_vs_seq *vseq, struct tcphdr *th)
206 {
207 __u32 seq = ntohl(th->seq);
208
209 /*
210 * Adjust seq with delta-offset for all packets after
211 * the most recent resized pkt seq and with previous_delta offset
212 * for all packets before most recent resized pkt seq.
213 */
214 if (vseq->delta || vseq->previous_delta) {
215 if(after(seq, vseq->init_seq)) {
216 th->seq = htonl(seq + vseq->delta);
217 IP_VS_DBG(9, "vs_fix_seq(): added delta (%d) to seq\n",
218 vseq->delta);
219 } else {
220 th->seq = htonl(seq + vseq->previous_delta);
221 IP_VS_DBG(9, "vs_fix_seq(): added previous_delta "
222 "(%d) to seq\n", vseq->previous_delta);
223 }
224 }
225 }
226
227
228 /*
229 * Fixes th->ack_seq based on ip_vs_seq info.
230 */
231 static inline void
vs_fix_ack_seq(const struct ip_vs_seq * vseq,struct tcphdr * th)232 vs_fix_ack_seq(const struct ip_vs_seq *vseq, struct tcphdr *th)
233 {
234 __u32 ack_seq = ntohl(th->ack_seq);
235
236 /*
237 * Adjust ack_seq with delta-offset for
238 * the packets AFTER most recent resized pkt has caused a shift
239 * for packets before most recent resized pkt, use previous_delta
240 */
241 if (vseq->delta || vseq->previous_delta) {
242 /* since ack_seq is the number of octet that is expected
243 to receive next, so compare it with init_seq+delta */
244 if(after(ack_seq, vseq->init_seq+vseq->delta)) {
245 th->ack_seq = htonl(ack_seq - vseq->delta);
246 IP_VS_DBG(9, "vs_fix_ack_seq(): subtracted delta "
247 "(%d) from ack_seq\n", vseq->delta);
248
249 } else {
250 th->ack_seq = htonl(ack_seq - vseq->previous_delta);
251 IP_VS_DBG(9, "vs_fix_ack_seq(): subtracted "
252 "previous_delta (%d) from ack_seq\n",
253 vseq->previous_delta);
254 }
255 }
256 }
257
258
259 /*
260 * Updates ip_vs_seq if pkt has been resized
261 * Assumes already checked proto==IPPROTO_TCP and diff!=0.
262 */
vs_seq_update(struct ip_vs_conn * cp,struct ip_vs_seq * vseq,unsigned flag,__u32 seq,int diff)263 static inline void vs_seq_update(struct ip_vs_conn *cp, struct ip_vs_seq *vseq,
264 unsigned flag, __u32 seq, int diff)
265 {
266 /* spinlock is to keep updating cp->flags atomic */
267 spin_lock(&cp->lock);
268 if ( !(cp->flags & flag) || after(seq, vseq->init_seq)) {
269 vseq->previous_delta = vseq->delta;
270 vseq->delta += diff;
271 vseq->init_seq = seq;
272 cp->flags |= flag;
273 }
274 spin_unlock(&cp->lock);
275 }
276
277
278 /*
279 * Output pkt hook. Will call bound ip_vs_app specific function
280 * called by ip_vs_out(), assumes previously checked cp!=NULL
281 * returns (new - old) skb->len diff.
282 */
ip_vs_app_pkt_out(struct ip_vs_conn * cp,struct sk_buff * skb)283 int ip_vs_app_pkt_out(struct ip_vs_conn *cp, struct sk_buff *skb)
284 {
285 struct ip_vs_app *vapp;
286 int diff;
287 struct iphdr *iph;
288 struct tcphdr *th;
289 __u32 seq;
290
291 /*
292 * check if application module is bound to
293 * this ip_vs_conn.
294 */
295 if ((vapp = cp->app) == NULL)
296 return 0;
297
298 iph = skb->nh.iph;
299 th = (struct tcphdr *)&(((char *)iph)[iph->ihl*4]);
300
301 /*
302 * Remember seq number in case this pkt gets resized
303 */
304 seq = ntohl(th->seq);
305
306 /*
307 * Fix seq stuff if flagged as so.
308 */
309 if (cp->protocol == IPPROTO_TCP) {
310 if (cp->flags & IP_VS_CONN_F_OUT_SEQ)
311 vs_fix_seq(&cp->out_seq, th);
312 if (cp->flags & IP_VS_CONN_F_IN_SEQ)
313 vs_fix_ack_seq(&cp->in_seq, th);
314 }
315
316 /*
317 * Call private output hook function
318 */
319 if (vapp->pkt_out == NULL)
320 return 0;
321
322 diff = vapp->pkt_out(vapp, cp, skb);
323
324 /*
325 * Update ip_vs seq stuff if len has changed.
326 */
327 if (diff != 0 && cp->protocol == IPPROTO_TCP)
328 vs_seq_update(cp, &cp->out_seq,
329 IP_VS_CONN_F_OUT_SEQ, seq, diff);
330
331 return diff;
332 }
333
334
335 /*
336 * Input pkt hook. Will call bound ip_vs_app specific function
337 * called by ip_fw_demasquerade(), assumes previously checked cp!=NULL.
338 * returns (new - old) skb->len diff.
339 */
ip_vs_app_pkt_in(struct ip_vs_conn * cp,struct sk_buff * skb)340 int ip_vs_app_pkt_in(struct ip_vs_conn *cp, struct sk_buff *skb)
341 {
342 struct ip_vs_app *vapp;
343 int diff;
344 struct iphdr *iph;
345 struct tcphdr *th;
346 __u32 seq;
347
348 /*
349 * check if application module is bound to
350 * this ip_vs_conn.
351 */
352 if ((vapp = cp->app) == NULL)
353 return 0;
354
355 iph = skb->nh.iph;
356 th = (struct tcphdr *)&(((char *)iph)[iph->ihl*4]);
357
358 /*
359 * Remember seq number in case this pkt gets resized
360 */
361 seq = ntohl(th->seq);
362
363 /*
364 * Fix seq stuff if flagged as so.
365 */
366 if (cp->protocol == IPPROTO_TCP) {
367 if (cp->flags & IP_VS_CONN_F_IN_SEQ)
368 vs_fix_seq(&cp->in_seq, th);
369 if (cp->flags & IP_VS_CONN_F_OUT_SEQ)
370 vs_fix_ack_seq(&cp->out_seq, th);
371 }
372
373 /*
374 * Call private input hook function
375 */
376 if (vapp->pkt_in == NULL)
377 return 0;
378
379 diff = vapp->pkt_in(vapp, cp, skb);
380
381 /*
382 * Update ip_vs seq stuff if len has changed.
383 */
384 if (diff != 0 && cp->protocol == IPPROTO_TCP)
385 vs_seq_update(cp, &cp->in_seq,
386 IP_VS_CONN_F_IN_SEQ, seq, diff);
387
388 return diff;
389 }
390
391
392 /*
393 * /proc/net/ip_vs_app entry function
394 */
ip_vs_app_getinfo(char * buffer,char ** start,off_t offset,int length)395 static int ip_vs_app_getinfo(char *buffer, char **start, off_t offset,
396 int length)
397 {
398 off_t pos=0;
399 int len=0;
400 char temp[64];
401 int idx;
402 struct ip_vs_app *vapp;
403 struct list_head *e;
404
405 pos = 64;
406 if (pos > offset) {
407 len += sprintf(buffer+len, "%-63s\n",
408 "prot port usecnt name");
409 }
410
411 read_lock_bh(&__ip_vs_app_lock);
412 for (idx=0 ; idx < IP_VS_APP_TAB_SIZE; idx++) {
413 list_for_each (e, &ip_vs_app_base[idx]) {
414 vapp = list_entry(e, struct ip_vs_app, n_list);
415
416 pos += 64;
417 if (pos <= offset)
418 continue;
419 sprintf(temp, "%-3s %-7u %-6d %-17s",
420 ip_vs_proto_name(IP_VS_APP_PROTO(vapp->type)),
421 IP_VS_APP_PORT(vapp->type),
422 vapp->module?GET_USE_COUNT(vapp->module):0,
423 vapp->name);
424 len += sprintf(buffer+len, "%-63s\n", temp);
425 if (pos >= offset+length)
426 goto done;
427 }
428 }
429 done:
430 read_unlock_bh(&__ip_vs_app_lock);
431
432 *start = buffer+len-(pos-offset); /* Start of wanted data */
433 len = pos-offset;
434 if (len > length)
435 len = length;
436 if (len < 0)
437 len = 0;
438 return len;
439 }
440
441
442 /*
443 * Replace a segment of data with a new segment
444 */
ip_vs_skb_replace(struct sk_buff * skb,int pri,char * o_buf,int o_len,char * n_buf,int n_len)445 int ip_vs_skb_replace(struct sk_buff *skb, int pri,
446 char *o_buf, int o_len, char *n_buf, int n_len)
447 {
448 struct iphdr *iph;
449 int diff;
450 int o_offset;
451 int o_left;
452
453 EnterFunction(9);
454
455 diff = n_len - o_len;
456 o_offset = o_buf - (char *)skb->data;
457 /* The length of left data after o_buf+o_len in the skb data */
458 o_left = skb->len - (o_offset + o_len);
459
460 if (diff <= 0) {
461 memmove(o_buf + n_len, o_buf + o_len, o_left);
462 memcpy(o_buf, n_buf, n_len);
463 skb_trim(skb, skb->len + diff);
464 } else if (diff <= skb_tailroom(skb)) {
465 skb_put(skb, diff);
466 memmove(o_buf + n_len, o_buf + o_len, o_left);
467 memcpy(o_buf, n_buf, n_len);
468 } else {
469 if (pskb_expand_head(skb, skb_headroom(skb), diff, pri))
470 return -ENOMEM;
471 skb_put(skb, diff);
472 memmove(skb->data + o_offset + n_len,
473 skb->data + o_offset + o_len, o_left);
474 memcpy(skb->data + o_offset, n_buf, n_len);
475 }
476
477 /* must update the iph total length here */
478 iph = skb->nh.iph;
479 iph->tot_len = htons(skb->len);
480
481 LeaveFunction(9);
482 return 0;
483 }
484
485
ip_vs_app_init(void)486 int ip_vs_app_init(void)
487 {
488 int idx;
489
490 for (idx=0 ; idx < IP_VS_APP_TAB_SIZE; idx++) {
491 INIT_LIST_HEAD(&ip_vs_app_base[idx]);
492 }
493
494 /* we will replace it with proc_net_ipvs_create() soon */
495 proc_net_create("ip_vs_app", 0, ip_vs_app_getinfo);
496 return 0;
497 }
498
ip_vs_app_cleanup(void)499 void ip_vs_app_cleanup(void)
500 {
501 proc_net_remove("ip_vs_app");
502 }
503