1 /*
2  * ip_vs_app.c: Application module support for IPVS
3  *
4  * Authors:     Wensong Zhang <wensong@linuxvirtualserver.org>
5  *
6  *              This program is free software; you can redistribute it and/or
7  *              modify it under the terms of the GNU General Public License
8  *              as published by the Free Software Foundation; either version
9  *              2 of the License, or (at your option) any later version.
10  *
11  * Most code here is taken from ip_masq_app.c in kernel 2.2. The difference
12  * is that ip_vs_app module handles the reverse direction (incoming requests
13  * and outgoing responses).
14  *
15  *		IP_MASQ_APP application masquerading module
16  *
17  * Author:	Juan Jose Ciarlante, <jjciarla@raiz.uncu.edu.ar>
18  *
19  */
20 
21 #define KMSG_COMPONENT "IPVS"
22 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
23 
24 #include <linux/module.h>
25 #include <linux/kernel.h>
26 #include <linux/skbuff.h>
27 #include <linux/in.h>
28 #include <linux/ip.h>
29 #include <linux/netfilter.h>
30 #include <linux/slab.h>
31 #include <net/net_namespace.h>
32 #include <net/protocol.h>
33 #include <net/tcp.h>
34 #include <linux/stat.h>
35 #include <linux/proc_fs.h>
36 #include <linux/seq_file.h>
37 #include <linux/mutex.h>
38 
39 #include <net/ip_vs.h>
40 
41 EXPORT_SYMBOL(register_ip_vs_app);
42 EXPORT_SYMBOL(unregister_ip_vs_app);
43 EXPORT_SYMBOL(register_ip_vs_app_inc);
44 
45 static DEFINE_MUTEX(__ip_vs_app_mutex);
46 
47 /*
48  *	Get an ip_vs_app object
49  */
ip_vs_app_get(struct ip_vs_app * app)50 static inline int ip_vs_app_get(struct ip_vs_app *app)
51 {
52 	return try_module_get(app->module);
53 }
54 
55 
ip_vs_app_put(struct ip_vs_app * app)56 static inline void ip_vs_app_put(struct ip_vs_app *app)
57 {
58 	module_put(app->module);
59 }
60 
61 
62 /*
63  *	Allocate/initialize app incarnation and register it in proto apps.
64  */
65 static int
ip_vs_app_inc_new(struct net * net,struct ip_vs_app * app,__u16 proto,__u16 port)66 ip_vs_app_inc_new(struct net *net, struct ip_vs_app *app, __u16 proto,
67 		  __u16 port)
68 {
69 	struct ip_vs_protocol *pp;
70 	struct ip_vs_app *inc;
71 	int ret;
72 
73 	if (!(pp = ip_vs_proto_get(proto)))
74 		return -EPROTONOSUPPORT;
75 
76 	if (!pp->unregister_app)
77 		return -EOPNOTSUPP;
78 
79 	inc = kmemdup(app, sizeof(*inc), GFP_KERNEL);
80 	if (!inc)
81 		return -ENOMEM;
82 	INIT_LIST_HEAD(&inc->p_list);
83 	INIT_LIST_HEAD(&inc->incs_list);
84 	inc->app = app;
85 	inc->port = htons(port);
86 	atomic_set(&inc->usecnt, 0);
87 
88 	if (app->timeouts) {
89 		inc->timeout_table =
90 			ip_vs_create_timeout_table(app->timeouts,
91 						   app->timeouts_size);
92 		if (!inc->timeout_table) {
93 			ret = -ENOMEM;
94 			goto out;
95 		}
96 	}
97 
98 	ret = pp->register_app(net, inc);
99 	if (ret)
100 		goto out;
101 
102 	list_add(&inc->a_list, &app->incs_list);
103 	IP_VS_DBG(9, "%s App %s:%u registered\n",
104 		  pp->name, inc->name, ntohs(inc->port));
105 
106 	return 0;
107 
108   out:
109 	kfree(inc->timeout_table);
110 	kfree(inc);
111 	return ret;
112 }
113 
114 
115 /*
116  *	Release app incarnation
117  */
118 static void
ip_vs_app_inc_release(struct net * net,struct ip_vs_app * inc)119 ip_vs_app_inc_release(struct net *net, struct ip_vs_app *inc)
120 {
121 	struct ip_vs_protocol *pp;
122 
123 	if (!(pp = ip_vs_proto_get(inc->protocol)))
124 		return;
125 
126 	if (pp->unregister_app)
127 		pp->unregister_app(net, inc);
128 
129 	IP_VS_DBG(9, "%s App %s:%u unregistered\n",
130 		  pp->name, inc->name, ntohs(inc->port));
131 
132 	list_del(&inc->a_list);
133 
134 	kfree(inc->timeout_table);
135 	kfree(inc);
136 }
137 
138 
139 /*
140  *	Get reference to app inc (only called from softirq)
141  *
142  */
ip_vs_app_inc_get(struct ip_vs_app * inc)143 int ip_vs_app_inc_get(struct ip_vs_app *inc)
144 {
145 	int result;
146 
147 	atomic_inc(&inc->usecnt);
148 	if (unlikely((result = ip_vs_app_get(inc->app)) != 1))
149 		atomic_dec(&inc->usecnt);
150 	return result;
151 }
152 
153 
154 /*
155  *	Put the app inc (only called from timer or net softirq)
156  */
ip_vs_app_inc_put(struct ip_vs_app * inc)157 void ip_vs_app_inc_put(struct ip_vs_app *inc)
158 {
159 	ip_vs_app_put(inc->app);
160 	atomic_dec(&inc->usecnt);
161 }
162 
163 
164 /*
165  *	Register an application incarnation in protocol applications
166  */
167 int
register_ip_vs_app_inc(struct net * net,struct ip_vs_app * app,__u16 proto,__u16 port)168 register_ip_vs_app_inc(struct net *net, struct ip_vs_app *app, __u16 proto,
169 		       __u16 port)
170 {
171 	int result;
172 
173 	mutex_lock(&__ip_vs_app_mutex);
174 
175 	result = ip_vs_app_inc_new(net, app, proto, port);
176 
177 	mutex_unlock(&__ip_vs_app_mutex);
178 
179 	return result;
180 }
181 
182 
183 /*
184  *	ip_vs_app registration routine
185  */
register_ip_vs_app(struct net * net,struct ip_vs_app * app)186 int register_ip_vs_app(struct net *net, struct ip_vs_app *app)
187 {
188 	struct netns_ipvs *ipvs = net_ipvs(net);
189 	/* increase the module use count */
190 	ip_vs_use_count_inc();
191 
192 	mutex_lock(&__ip_vs_app_mutex);
193 
194 	list_add(&app->a_list, &ipvs->app_list);
195 
196 	mutex_unlock(&__ip_vs_app_mutex);
197 
198 	return 0;
199 }
200 
201 
202 /*
203  *	ip_vs_app unregistration routine
204  *	We are sure there are no app incarnations attached to services
205  */
unregister_ip_vs_app(struct net * net,struct ip_vs_app * app)206 void unregister_ip_vs_app(struct net *net, struct ip_vs_app *app)
207 {
208 	struct ip_vs_app *inc, *nxt;
209 
210 	mutex_lock(&__ip_vs_app_mutex);
211 
212 	list_for_each_entry_safe(inc, nxt, &app->incs_list, a_list) {
213 		ip_vs_app_inc_release(net, inc);
214 	}
215 
216 	list_del(&app->a_list);
217 
218 	mutex_unlock(&__ip_vs_app_mutex);
219 
220 	/* decrease the module use count */
221 	ip_vs_use_count_dec();
222 }
223 
224 
225 /*
226  *	Bind ip_vs_conn to its ip_vs_app (called by cp constructor)
227  */
ip_vs_bind_app(struct ip_vs_conn * cp,struct ip_vs_protocol * pp)228 int ip_vs_bind_app(struct ip_vs_conn *cp,
229 		   struct ip_vs_protocol *pp)
230 {
231 	return pp->app_conn_bind(cp);
232 }
233 
234 
235 /*
236  *	Unbind cp from application incarnation (called by cp destructor)
237  */
ip_vs_unbind_app(struct ip_vs_conn * cp)238 void ip_vs_unbind_app(struct ip_vs_conn *cp)
239 {
240 	struct ip_vs_app *inc = cp->app;
241 
242 	if (!inc)
243 		return;
244 
245 	if (inc->unbind_conn)
246 		inc->unbind_conn(inc, cp);
247 	if (inc->done_conn)
248 		inc->done_conn(inc, cp);
249 	ip_vs_app_inc_put(inc);
250 	cp->app = NULL;
251 }
252 
253 
254 /*
255  *	Fixes th->seq based on ip_vs_seq info.
256  */
vs_fix_seq(const struct ip_vs_seq * vseq,struct tcphdr * th)257 static inline void vs_fix_seq(const struct ip_vs_seq *vseq, struct tcphdr *th)
258 {
259 	__u32 seq = ntohl(th->seq);
260 
261 	/*
262 	 *	Adjust seq with delta-offset for all packets after
263 	 *	the most recent resized pkt seq and with previous_delta offset
264 	 *	for all packets	before most recent resized pkt seq.
265 	 */
266 	if (vseq->delta || vseq->previous_delta) {
267 		if(after(seq, vseq->init_seq)) {
268 			th->seq = htonl(seq + vseq->delta);
269 			IP_VS_DBG(9, "%s(): added delta (%d) to seq\n",
270 				  __func__, vseq->delta);
271 		} else {
272 			th->seq = htonl(seq + vseq->previous_delta);
273 			IP_VS_DBG(9, "%s(): added previous_delta (%d) to seq\n",
274 				  __func__, vseq->previous_delta);
275 		}
276 	}
277 }
278 
279 
280 /*
281  *	Fixes th->ack_seq based on ip_vs_seq info.
282  */
283 static inline void
vs_fix_ack_seq(const struct ip_vs_seq * vseq,struct tcphdr * th)284 vs_fix_ack_seq(const struct ip_vs_seq *vseq, struct tcphdr *th)
285 {
286 	__u32 ack_seq = ntohl(th->ack_seq);
287 
288 	/*
289 	 * Adjust ack_seq with delta-offset for
290 	 * the packets AFTER most recent resized pkt has caused a shift
291 	 * for packets before most recent resized pkt, use previous_delta
292 	 */
293 	if (vseq->delta || vseq->previous_delta) {
294 		/* since ack_seq is the number of octet that is expected
295 		   to receive next, so compare it with init_seq+delta */
296 		if(after(ack_seq, vseq->init_seq+vseq->delta)) {
297 			th->ack_seq = htonl(ack_seq - vseq->delta);
298 			IP_VS_DBG(9, "%s(): subtracted delta "
299 				  "(%d) from ack_seq\n", __func__, vseq->delta);
300 
301 		} else {
302 			th->ack_seq = htonl(ack_seq - vseq->previous_delta);
303 			IP_VS_DBG(9, "%s(): subtracted "
304 				  "previous_delta (%d) from ack_seq\n",
305 				  __func__, vseq->previous_delta);
306 		}
307 	}
308 }
309 
310 
311 /*
312  *	Updates ip_vs_seq if pkt has been resized
313  *	Assumes already checked proto==IPPROTO_TCP and diff!=0.
314  */
vs_seq_update(struct ip_vs_conn * cp,struct ip_vs_seq * vseq,unsigned flag,__u32 seq,int diff)315 static inline void vs_seq_update(struct ip_vs_conn *cp, struct ip_vs_seq *vseq,
316 				 unsigned flag, __u32 seq, int diff)
317 {
318 	/* spinlock is to keep updating cp->flags atomic */
319 	spin_lock(&cp->lock);
320 	if (!(cp->flags & flag) || after(seq, vseq->init_seq)) {
321 		vseq->previous_delta = vseq->delta;
322 		vseq->delta += diff;
323 		vseq->init_seq = seq;
324 		cp->flags |= flag;
325 	}
326 	spin_unlock(&cp->lock);
327 }
328 
app_tcp_pkt_out(struct ip_vs_conn * cp,struct sk_buff * skb,struct ip_vs_app * app)329 static inline int app_tcp_pkt_out(struct ip_vs_conn *cp, struct sk_buff *skb,
330 				  struct ip_vs_app *app)
331 {
332 	int diff;
333 	const unsigned int tcp_offset = ip_hdrlen(skb);
334 	struct tcphdr *th;
335 	__u32 seq;
336 
337 	if (!skb_make_writable(skb, tcp_offset + sizeof(*th)))
338 		return 0;
339 
340 	th = (struct tcphdr *)(skb_network_header(skb) + tcp_offset);
341 
342 	/*
343 	 *	Remember seq number in case this pkt gets resized
344 	 */
345 	seq = ntohl(th->seq);
346 
347 	/*
348 	 *	Fix seq stuff if flagged as so.
349 	 */
350 	if (cp->flags & IP_VS_CONN_F_OUT_SEQ)
351 		vs_fix_seq(&cp->out_seq, th);
352 	if (cp->flags & IP_VS_CONN_F_IN_SEQ)
353 		vs_fix_ack_seq(&cp->in_seq, th);
354 
355 	/*
356 	 *	Call private output hook function
357 	 */
358 	if (app->pkt_out == NULL)
359 		return 1;
360 
361 	if (!app->pkt_out(app, cp, skb, &diff))
362 		return 0;
363 
364 	/*
365 	 *	Update ip_vs seq stuff if len has changed.
366 	 */
367 	if (diff != 0)
368 		vs_seq_update(cp, &cp->out_seq,
369 			      IP_VS_CONN_F_OUT_SEQ, seq, diff);
370 
371 	return 1;
372 }
373 
374 /*
375  *	Output pkt hook. Will call bound ip_vs_app specific function
376  *	called by ipvs packet handler, assumes previously checked cp!=NULL
377  *	returns false if it can't handle packet (oom)
378  */
ip_vs_app_pkt_out(struct ip_vs_conn * cp,struct sk_buff * skb)379 int ip_vs_app_pkt_out(struct ip_vs_conn *cp, struct sk_buff *skb)
380 {
381 	struct ip_vs_app *app;
382 
383 	/*
384 	 *	check if application module is bound to
385 	 *	this ip_vs_conn.
386 	 */
387 	if ((app = cp->app) == NULL)
388 		return 1;
389 
390 	/* TCP is complicated */
391 	if (cp->protocol == IPPROTO_TCP)
392 		return app_tcp_pkt_out(cp, skb, app);
393 
394 	/*
395 	 *	Call private output hook function
396 	 */
397 	if (app->pkt_out == NULL)
398 		return 1;
399 
400 	return app->pkt_out(app, cp, skb, NULL);
401 }
402 
403 
app_tcp_pkt_in(struct ip_vs_conn * cp,struct sk_buff * skb,struct ip_vs_app * app)404 static inline int app_tcp_pkt_in(struct ip_vs_conn *cp, struct sk_buff *skb,
405 				 struct ip_vs_app *app)
406 {
407 	int diff;
408 	const unsigned int tcp_offset = ip_hdrlen(skb);
409 	struct tcphdr *th;
410 	__u32 seq;
411 
412 	if (!skb_make_writable(skb, tcp_offset + sizeof(*th)))
413 		return 0;
414 
415 	th = (struct tcphdr *)(skb_network_header(skb) + tcp_offset);
416 
417 	/*
418 	 *	Remember seq number in case this pkt gets resized
419 	 */
420 	seq = ntohl(th->seq);
421 
422 	/*
423 	 *	Fix seq stuff if flagged as so.
424 	 */
425 	if (cp->flags & IP_VS_CONN_F_IN_SEQ)
426 		vs_fix_seq(&cp->in_seq, th);
427 	if (cp->flags & IP_VS_CONN_F_OUT_SEQ)
428 		vs_fix_ack_seq(&cp->out_seq, th);
429 
430 	/*
431 	 *	Call private input hook function
432 	 */
433 	if (app->pkt_in == NULL)
434 		return 1;
435 
436 	if (!app->pkt_in(app, cp, skb, &diff))
437 		return 0;
438 
439 	/*
440 	 *	Update ip_vs seq stuff if len has changed.
441 	 */
442 	if (diff != 0)
443 		vs_seq_update(cp, &cp->in_seq,
444 			      IP_VS_CONN_F_IN_SEQ, seq, diff);
445 
446 	return 1;
447 }
448 
449 /*
450  *	Input pkt hook. Will call bound ip_vs_app specific function
451  *	called by ipvs packet handler, assumes previously checked cp!=NULL.
452  *	returns false if can't handle packet (oom).
453  */
ip_vs_app_pkt_in(struct ip_vs_conn * cp,struct sk_buff * skb)454 int ip_vs_app_pkt_in(struct ip_vs_conn *cp, struct sk_buff *skb)
455 {
456 	struct ip_vs_app *app;
457 
458 	/*
459 	 *	check if application module is bound to
460 	 *	this ip_vs_conn.
461 	 */
462 	if ((app = cp->app) == NULL)
463 		return 1;
464 
465 	/* TCP is complicated */
466 	if (cp->protocol == IPPROTO_TCP)
467 		return app_tcp_pkt_in(cp, skb, app);
468 
469 	/*
470 	 *	Call private input hook function
471 	 */
472 	if (app->pkt_in == NULL)
473 		return 1;
474 
475 	return app->pkt_in(app, cp, skb, NULL);
476 }
477 
478 
479 #ifdef CONFIG_PROC_FS
480 /*
481  *	/proc/net/ip_vs_app entry function
482  */
483 
ip_vs_app_idx(struct netns_ipvs * ipvs,loff_t pos)484 static struct ip_vs_app *ip_vs_app_idx(struct netns_ipvs *ipvs, loff_t pos)
485 {
486 	struct ip_vs_app *app, *inc;
487 
488 	list_for_each_entry(app, &ipvs->app_list, a_list) {
489 		list_for_each_entry(inc, &app->incs_list, a_list) {
490 			if (pos-- == 0)
491 				return inc;
492 		}
493 	}
494 	return NULL;
495 
496 }
497 
ip_vs_app_seq_start(struct seq_file * seq,loff_t * pos)498 static void *ip_vs_app_seq_start(struct seq_file *seq, loff_t *pos)
499 {
500 	struct net *net = seq_file_net(seq);
501 	struct netns_ipvs *ipvs = net_ipvs(net);
502 
503 	mutex_lock(&__ip_vs_app_mutex);
504 
505 	return *pos ? ip_vs_app_idx(ipvs, *pos - 1) : SEQ_START_TOKEN;
506 }
507 
ip_vs_app_seq_next(struct seq_file * seq,void * v,loff_t * pos)508 static void *ip_vs_app_seq_next(struct seq_file *seq, void *v, loff_t *pos)
509 {
510 	struct ip_vs_app *inc, *app;
511 	struct list_head *e;
512 	struct net *net = seq_file_net(seq);
513 	struct netns_ipvs *ipvs = net_ipvs(net);
514 
515 	++*pos;
516 	if (v == SEQ_START_TOKEN)
517 		return ip_vs_app_idx(ipvs, 0);
518 
519 	inc = v;
520 	app = inc->app;
521 
522 	if ((e = inc->a_list.next) != &app->incs_list)
523 		return list_entry(e, struct ip_vs_app, a_list);
524 
525 	/* go on to next application */
526 	for (e = app->a_list.next; e != &ipvs->app_list; e = e->next) {
527 		app = list_entry(e, struct ip_vs_app, a_list);
528 		list_for_each_entry(inc, &app->incs_list, a_list) {
529 			return inc;
530 		}
531 	}
532 	return NULL;
533 }
534 
ip_vs_app_seq_stop(struct seq_file * seq,void * v)535 static void ip_vs_app_seq_stop(struct seq_file *seq, void *v)
536 {
537 	mutex_unlock(&__ip_vs_app_mutex);
538 }
539 
ip_vs_app_seq_show(struct seq_file * seq,void * v)540 static int ip_vs_app_seq_show(struct seq_file *seq, void *v)
541 {
542 	if (v == SEQ_START_TOKEN)
543 		seq_puts(seq, "prot port    usecnt name\n");
544 	else {
545 		const struct ip_vs_app *inc = v;
546 
547 		seq_printf(seq, "%-3s  %-7u %-6d %-17s\n",
548 			   ip_vs_proto_name(inc->protocol),
549 			   ntohs(inc->port),
550 			   atomic_read(&inc->usecnt),
551 			   inc->name);
552 	}
553 	return 0;
554 }
555 
556 static const struct seq_operations ip_vs_app_seq_ops = {
557 	.start = ip_vs_app_seq_start,
558 	.next  = ip_vs_app_seq_next,
559 	.stop  = ip_vs_app_seq_stop,
560 	.show  = ip_vs_app_seq_show,
561 };
562 
ip_vs_app_open(struct inode * inode,struct file * file)563 static int ip_vs_app_open(struct inode *inode, struct file *file)
564 {
565 	return seq_open_net(inode, file, &ip_vs_app_seq_ops,
566 			    sizeof(struct seq_net_private));
567 }
568 
569 static const struct file_operations ip_vs_app_fops = {
570 	.owner	 = THIS_MODULE,
571 	.open	 = ip_vs_app_open,
572 	.read	 = seq_read,
573 	.llseek  = seq_lseek,
574 	.release = seq_release_net,
575 };
576 #endif
577 
ip_vs_app_net_init(struct net * net)578 int __net_init ip_vs_app_net_init(struct net *net)
579 {
580 	struct netns_ipvs *ipvs = net_ipvs(net);
581 
582 	INIT_LIST_HEAD(&ipvs->app_list);
583 	proc_net_fops_create(net, "ip_vs_app", 0, &ip_vs_app_fops);
584 	return 0;
585 }
586 
ip_vs_app_net_cleanup(struct net * net)587 void __net_exit ip_vs_app_net_cleanup(struct net *net)
588 {
589 	proc_net_remove(net, "ip_vs_app");
590 }
591