/* * IPVS Application module * * Version: $Id: ip_vs_app.c,v 1.14 2001/11/23 14:34:10 wensong Exp $ * * Authors: Wensong Zhang * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version * 2 of the License, or (at your option) any later version. * * Most code here is taken from ip_masq_app.c in kernel 2.2. The difference * is that ip_vs_app module handles the reverse direction (incoming requests * and outgoing responses). The ip_vs_app modules are only used for VS/NAT. * * IP_MASQ_APP application masquerading module * * Author: Juan Jose Ciarlante, * */ #include #include #include #include #include #include #include #include #include #include #define IP_VS_APP_TAB_SIZE 16 /* must be power of 2 */ #define IP_VS_APP_HASH(proto, port) ((port^proto) & (IP_VS_APP_TAB_SIZE-1)) #define IP_VS_APP_TYPE(proto, port) (proto<<16 | port) #define IP_VS_APP_PORT(type) (type & 0xffff) #define IP_VS_APP_PROTO(type) ((type>>16) & 0x00ff) EXPORT_SYMBOL(register_ip_vs_app); EXPORT_SYMBOL(unregister_ip_vs_app); /* * will hold ipvs app. hashed list heads */ static struct list_head ip_vs_app_base[IP_VS_APP_TAB_SIZE]; /* lock for ip_vs_app table */ static rwlock_t __ip_vs_app_lock = RW_LOCK_UNLOCKED; /* * ip_vs_app registration routine * port: host byte order. */ int register_ip_vs_app(struct ip_vs_app *vapp, unsigned short proto, __u16 port) { unsigned hash; if (!vapp) { IP_VS_ERR("register_ip_vs_app(): NULL arg\n"); return -EINVAL; } MOD_INC_USE_COUNT; vapp->type = IP_VS_APP_TYPE(proto, port); hash = IP_VS_APP_HASH(proto, port); write_lock_bh(&__ip_vs_app_lock); list_add(&vapp->n_list, &ip_vs_app_base[hash]); write_unlock_bh(&__ip_vs_app_lock); return 0; } /* * ip_vs_app unregistration routine. */ int unregister_ip_vs_app(struct ip_vs_app *vapp) { if (!vapp) { IP_VS_ERR("unregister_ip_vs_app(): NULL arg\n"); return -EINVAL; } write_lock_bh(&__ip_vs_app_lock); list_del(&vapp->n_list); write_unlock_bh(&__ip_vs_app_lock); MOD_DEC_USE_COUNT; return 0; } /* * get ip_vs_app object by its proto and port (net byte order). */ static struct ip_vs_app * ip_vs_app_get(unsigned short proto, __u16 port) { struct list_head *e; struct ip_vs_app *vapp; unsigned hash; unsigned type; port = ntohs(port); type = IP_VS_APP_TYPE(proto, port); hash = IP_VS_APP_HASH(proto, port); read_lock_bh(&__ip_vs_app_lock); list_for_each(e, &ip_vs_app_base[hash]) { vapp = list_entry(e, struct ip_vs_app, n_list); /* * Test and MOD_INC_USE_COUNT atomically */ if (vapp->module && !try_inc_mod_count(vapp->module)) { /* * This application module is just deleted */ continue; } if (type == vapp->type) { read_unlock_bh(&__ip_vs_app_lock); return vapp; } if (vapp->module) __MOD_DEC_USE_COUNT(vapp->module); } read_unlock_bh(&__ip_vs_app_lock); return NULL; } /* * Bind ip_vs_conn to its ip_vs_app based on proto and dport, * and call the ip_vs_app constructor. */ struct ip_vs_app * ip_vs_bind_app(struct ip_vs_conn *cp) { struct ip_vs_app *vapp; /* no need to bind app if its forwarding method is not NAT */ if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ) return NULL; if (cp->protocol != IPPROTO_TCP && cp->protocol != IPPROTO_UDP) return NULL; /* * don't allow binding if already bound */ if (cp->app != NULL) { IP_VS_ERR("ip_vs_bind_app(): " "called for already bound object.\n"); return cp->app; } vapp = ip_vs_app_get(cp->protocol, cp->vport); if (vapp != NULL) { cp->app = vapp; if (vapp->init_conn) vapp->init_conn(vapp, cp); } return vapp; } /* * Unbind cp from type object and call cp destructor (does not kfree()). */ int ip_vs_unbind_app(struct ip_vs_conn *cp) { struct ip_vs_app *vapp = cp->app; if (cp->protocol != IPPROTO_TCP && cp->protocol != IPPROTO_UDP) return 0; if (vapp != NULL) { if (vapp->done_conn) vapp->done_conn(vapp, cp); cp->app = NULL; if (vapp->module) __MOD_DEC_USE_COUNT(vapp->module); } return (vapp != NULL); } /* * Fixes th->seq based on ip_vs_seq info. */ static inline void vs_fix_seq(const struct ip_vs_seq *vseq, struct tcphdr *th) { __u32 seq = ntohl(th->seq); /* * Adjust seq with delta-offset for all packets after * the most recent resized pkt seq and with previous_delta offset * for all packets before most recent resized pkt seq. */ if (vseq->delta || vseq->previous_delta) { if(after(seq, vseq->init_seq)) { th->seq = htonl(seq + vseq->delta); IP_VS_DBG(9, "vs_fix_seq(): added delta (%d) to seq\n", vseq->delta); } else { th->seq = htonl(seq + vseq->previous_delta); IP_VS_DBG(9, "vs_fix_seq(): added previous_delta " "(%d) to seq\n", vseq->previous_delta); } } } /* * Fixes th->ack_seq based on ip_vs_seq info. */ static inline void vs_fix_ack_seq(const struct ip_vs_seq *vseq, struct tcphdr *th) { __u32 ack_seq = ntohl(th->ack_seq); /* * Adjust ack_seq with delta-offset for * the packets AFTER most recent resized pkt has caused a shift * for packets before most recent resized pkt, use previous_delta */ if (vseq->delta || vseq->previous_delta) { /* since ack_seq is the number of octet that is expected to receive next, so compare it with init_seq+delta */ if(after(ack_seq, vseq->init_seq+vseq->delta)) { th->ack_seq = htonl(ack_seq - vseq->delta); IP_VS_DBG(9, "vs_fix_ack_seq(): subtracted delta " "(%d) from ack_seq\n", vseq->delta); } else { th->ack_seq = htonl(ack_seq - vseq->previous_delta); IP_VS_DBG(9, "vs_fix_ack_seq(): subtracted " "previous_delta (%d) from ack_seq\n", vseq->previous_delta); } } } /* * Updates ip_vs_seq if pkt has been resized * Assumes already checked proto==IPPROTO_TCP and diff!=0. */ static inline void vs_seq_update(struct ip_vs_conn *cp, struct ip_vs_seq *vseq, unsigned flag, __u32 seq, int diff) { /* spinlock is to keep updating cp->flags atomic */ spin_lock(&cp->lock); if ( !(cp->flags & flag) || after(seq, vseq->init_seq)) { vseq->previous_delta = vseq->delta; vseq->delta += diff; vseq->init_seq = seq; cp->flags |= flag; } spin_unlock(&cp->lock); } /* * Output pkt hook. Will call bound ip_vs_app specific function * called by ip_vs_out(), assumes previously checked cp!=NULL * returns (new - old) skb->len diff. */ int ip_vs_app_pkt_out(struct ip_vs_conn *cp, struct sk_buff *skb) { struct ip_vs_app *vapp; int diff; struct iphdr *iph; struct tcphdr *th; __u32 seq; /* * check if application module is bound to * this ip_vs_conn. */ if ((vapp = cp->app) == NULL) return 0; iph = skb->nh.iph; th = (struct tcphdr *)&(((char *)iph)[iph->ihl*4]); /* * Remember seq number in case this pkt gets resized */ seq = ntohl(th->seq); /* * Fix seq stuff if flagged as so. */ if (cp->protocol == IPPROTO_TCP) { if (cp->flags & IP_VS_CONN_F_OUT_SEQ) vs_fix_seq(&cp->out_seq, th); if (cp->flags & IP_VS_CONN_F_IN_SEQ) vs_fix_ack_seq(&cp->in_seq, th); } /* * Call private output hook function */ if (vapp->pkt_out == NULL) return 0; diff = vapp->pkt_out(vapp, cp, skb); /* * Update ip_vs seq stuff if len has changed. */ if (diff != 0 && cp->protocol == IPPROTO_TCP) vs_seq_update(cp, &cp->out_seq, IP_VS_CONN_F_OUT_SEQ, seq, diff); return diff; } /* * Input pkt hook. Will call bound ip_vs_app specific function * called by ip_fw_demasquerade(), assumes previously checked cp!=NULL. * returns (new - old) skb->len diff. */ int ip_vs_app_pkt_in(struct ip_vs_conn *cp, struct sk_buff *skb) { struct ip_vs_app *vapp; int diff; struct iphdr *iph; struct tcphdr *th; __u32 seq; /* * check if application module is bound to * this ip_vs_conn. */ if ((vapp = cp->app) == NULL) return 0; iph = skb->nh.iph; th = (struct tcphdr *)&(((char *)iph)[iph->ihl*4]); /* * Remember seq number in case this pkt gets resized */ seq = ntohl(th->seq); /* * Fix seq stuff if flagged as so. */ if (cp->protocol == IPPROTO_TCP) { if (cp->flags & IP_VS_CONN_F_IN_SEQ) vs_fix_seq(&cp->in_seq, th); if (cp->flags & IP_VS_CONN_F_OUT_SEQ) vs_fix_ack_seq(&cp->out_seq, th); } /* * Call private input hook function */ if (vapp->pkt_in == NULL) return 0; diff = vapp->pkt_in(vapp, cp, skb); /* * Update ip_vs seq stuff if len has changed. */ if (diff != 0 && cp->protocol == IPPROTO_TCP) vs_seq_update(cp, &cp->in_seq, IP_VS_CONN_F_IN_SEQ, seq, diff); return diff; } /* * /proc/net/ip_vs_app entry function */ static int ip_vs_app_getinfo(char *buffer, char **start, off_t offset, int length) { off_t pos=0; int len=0; char temp[64]; int idx; struct ip_vs_app *vapp; struct list_head *e; pos = 64; if (pos > offset) { len += sprintf(buffer+len, "%-63s\n", "prot port usecnt name"); } read_lock_bh(&__ip_vs_app_lock); for (idx=0 ; idx < IP_VS_APP_TAB_SIZE; idx++) { list_for_each (e, &ip_vs_app_base[idx]) { vapp = list_entry(e, struct ip_vs_app, n_list); pos += 64; if (pos <= offset) continue; sprintf(temp, "%-3s %-7u %-6d %-17s", ip_vs_proto_name(IP_VS_APP_PROTO(vapp->type)), IP_VS_APP_PORT(vapp->type), vapp->module?GET_USE_COUNT(vapp->module):0, vapp->name); len += sprintf(buffer+len, "%-63s\n", temp); if (pos >= offset+length) goto done; } } done: read_unlock_bh(&__ip_vs_app_lock); *start = buffer+len-(pos-offset); /* Start of wanted data */ len = pos-offset; if (len > length) len = length; if (len < 0) len = 0; return len; } /* * Replace a segment of data with a new segment */ int ip_vs_skb_replace(struct sk_buff *skb, int pri, char *o_buf, int o_len, char *n_buf, int n_len) { struct iphdr *iph; int diff; int o_offset; int o_left; EnterFunction(9); diff = n_len - o_len; o_offset = o_buf - (char *)skb->data; /* The length of left data after o_buf+o_len in the skb data */ o_left = skb->len - (o_offset + o_len); if (diff <= 0) { memmove(o_buf + n_len, o_buf + o_len, o_left); memcpy(o_buf, n_buf, n_len); skb_trim(skb, skb->len + diff); } else if (diff <= skb_tailroom(skb)) { skb_put(skb, diff); memmove(o_buf + n_len, o_buf + o_len, o_left); memcpy(o_buf, n_buf, n_len); } else { if (pskb_expand_head(skb, skb_headroom(skb), diff, pri)) return -ENOMEM; skb_put(skb, diff); memmove(skb->data + o_offset + n_len, skb->data + o_offset + o_len, o_left); memcpy(skb->data + o_offset, n_buf, n_len); } /* must update the iph total length here */ iph = skb->nh.iph; iph->tot_len = htons(skb->len); LeaveFunction(9); return 0; } int ip_vs_app_init(void) { int idx; for (idx=0 ; idx < IP_VS_APP_TAB_SIZE; idx++) { INIT_LIST_HEAD(&ip_vs_app_base[idx]); } /* we will replace it with proc_net_ipvs_create() soon */ proc_net_create("ip_vs_app", 0, ip_vs_app_getinfo); return 0; } void ip_vs_app_cleanup(void) { proc_net_remove("ip_vs_app"); }