1 /* This file contains all the functions required for the standalone
2    ip_nat module.
3 
4    These are not required by the compatibility layer.
5 */
6 
7 /* (c) 1999 Paul `Rusty' Russell.  Licenced under the GNU General
8  * Public Licence.
9  *
10  * 23 Apr 2001: Harald Welte <laforge@gnumonks.org>
11  * 	- new API and handling of conntrack/nat helpers
12  * 	- now capable of multiple expectations for one master
13  * */
14 
15 #include <linux/config.h>
16 #include <linux/types.h>
17 #include <linux/ip.h>
18 #include <linux/netfilter.h>
19 #include <linux/netfilter_ipv4.h>
20 #include <linux/module.h>
21 #include <linux/skbuff.h>
22 #include <linux/proc_fs.h>
23 #include <net/checksum.h>
24 #include <linux/spinlock.h>
25 #include <linux/version.h>
26 #include <linux/brlock.h>
27 
28 #define ASSERT_READ_LOCK(x) MUST_BE_READ_LOCKED(&ip_nat_lock)
29 #define ASSERT_WRITE_LOCK(x) MUST_BE_WRITE_LOCKED(&ip_nat_lock)
30 
31 #include <linux/netfilter_ipv4/ip_nat.h>
32 #include <linux/netfilter_ipv4/ip_nat_rule.h>
33 #include <linux/netfilter_ipv4/ip_nat_protocol.h>
34 #include <linux/netfilter_ipv4/ip_nat_core.h>
35 #include <linux/netfilter_ipv4/ip_nat_helper.h>
36 #include <linux/netfilter_ipv4/ip_tables.h>
37 #include <linux/netfilter_ipv4/ip_conntrack_core.h>
38 #include <linux/netfilter_ipv4/listhelp.h>
39 
40 #if 0
41 #define DEBUGP printk
42 #else
43 #define DEBUGP(format, args...)
44 #endif
45 
46 #define HOOKNAME(hooknum) ((hooknum) == NF_IP_POST_ROUTING ? "POST_ROUTING"  \
47 			   : ((hooknum) == NF_IP_PRE_ROUTING ? "PRE_ROUTING" \
48 			      : ((hooknum) == NF_IP_LOCAL_OUT ? "LOCAL_OUT"  \
49 			         : ((hooknum) == NF_IP_LOCAL_IN ? "LOCAL_IN"  \
50 				    : "*ERROR*"))))
51 
call_expect(struct ip_conntrack * master,struct sk_buff ** pskb,unsigned int hooknum,struct ip_conntrack * ct,struct ip_nat_info * info)52 static inline int call_expect(struct ip_conntrack *master,
53 			      struct sk_buff **pskb,
54 			      unsigned int hooknum,
55 			      struct ip_conntrack *ct,
56 			      struct ip_nat_info *info)
57 {
58 	return master->nat.info.helper->expect(pskb, hooknum, ct, info);
59 }
60 
61 static unsigned int
ip_nat_fn(unsigned int hooknum,struct sk_buff ** pskb,const struct net_device * in,const struct net_device * out,int (* okfn)(struct sk_buff *))62 ip_nat_fn(unsigned int hooknum,
63 	  struct sk_buff **pskb,
64 	  const struct net_device *in,
65 	  const struct net_device *out,
66 	  int (*okfn)(struct sk_buff *))
67 {
68 	struct ip_conntrack *ct;
69 	enum ip_conntrack_info ctinfo;
70 	struct ip_nat_info *info;
71 	/* maniptype == SRC for postrouting. */
72 	enum ip_nat_manip_type maniptype = HOOK2MANIP(hooknum);
73 
74 	/* We never see fragments: conntrack defrags on pre-routing
75 	   and local-out, and ip_nat_out protects post-routing. */
76 	IP_NF_ASSERT(!((*pskb)->nh.iph->frag_off
77 		       & htons(IP_MF|IP_OFFSET)));
78 
79 	(*pskb)->nfcache |= NFC_UNKNOWN;
80 
81 	/* If we had a hardware checksum before, it's now invalid */
82 	if ((*pskb)->ip_summed == CHECKSUM_HW)
83 		(*pskb)->ip_summed = CHECKSUM_NONE;
84 
85 	ct = ip_conntrack_get(*pskb, &ctinfo);
86 	/* Can't track?  It's not due to stress, or conntrack would
87 	   have dropped it.  Hence it's the user's responsibilty to
88 	   packet filter it out, or implement conntrack/NAT for that
89 	   protocol. 8) --RR */
90 	if (!ct) {
91 		/* Exception: ICMP redirect to new connection (not in
92                    hash table yet).  We must not let this through, in
93                    case we're doing NAT to the same network. */
94 		struct iphdr *iph = (*pskb)->nh.iph;
95 		struct icmphdr *hdr = (struct icmphdr *)
96 			((u_int32_t *)iph + iph->ihl);
97 		if (iph->protocol == IPPROTO_ICMP
98 		    && hdr->type == ICMP_REDIRECT)
99 			return NF_DROP;
100 		return NF_ACCEPT;
101 	}
102 
103 	switch (ctinfo) {
104 	case IP_CT_RELATED:
105 	case IP_CT_RELATED+IP_CT_IS_REPLY:
106 		if ((*pskb)->nh.iph->protocol == IPPROTO_ICMP) {
107 			return icmp_reply_translation(*pskb, ct, hooknum,
108 						      CTINFO2DIR(ctinfo));
109 		}
110 		/* Fall thru... (Only ICMPs can be IP_CT_IS_REPLY) */
111 	case IP_CT_NEW:
112 		info = &ct->nat.info;
113 
114 		WRITE_LOCK(&ip_nat_lock);
115 		/* Seen it before?  This can happen for loopback, retrans,
116 		   or local packets.. */
117 		if (!(info->initialized & (1 << maniptype))) {
118 			unsigned int ret;
119 
120 			if (ct->master
121 			    && master_ct(ct)->nat.info.helper
122 			    && master_ct(ct)->nat.info.helper->expect) {
123 				ret = call_expect(master_ct(ct), pskb,
124 						  hooknum, ct, info);
125 			} else {
126 				if (unlikely(is_confirmed(ct)))
127 					/* NAT module was loaded late */
128 					ret = alloc_null_binding_confirmed(ct, info,
129 		        		                                   hooknum);
130 				else if (hooknum == NF_IP_LOCAL_IN)
131 					/* LOCAL_IN hook doesn't have a chain */
132 					ret = alloc_null_binding(ct, info,
133 								 hooknum);
134 				else
135 					ret = ip_nat_rule_find(pskb, hooknum,
136 					                       in, out,
137 					                       ct, info);
138 			}
139 
140 			if (ret != NF_ACCEPT) {
141 				WRITE_UNLOCK(&ip_nat_lock);
142 				return ret;
143 			}
144 		} else
145 			DEBUGP("Already setup manip %s for ct %p\n",
146 			       maniptype == IP_NAT_MANIP_SRC ? "SRC" : "DST",
147 			       ct);
148 		WRITE_UNLOCK(&ip_nat_lock);
149 		break;
150 
151 	default:
152 		/* ESTABLISHED */
153 		IP_NF_ASSERT(ctinfo == IP_CT_ESTABLISHED
154 			     || ctinfo == (IP_CT_ESTABLISHED+IP_CT_IS_REPLY));
155 		info = &ct->nat.info;
156 	}
157 
158 	IP_NF_ASSERT(info);
159 	return do_bindings(ct, ctinfo, info, hooknum, pskb);
160 }
161 
162 static unsigned int
ip_nat_in(unsigned int hooknum,struct sk_buff ** pskb,const struct net_device * in,const struct net_device * out,int (* okfn)(struct sk_buff *))163 ip_nat_in(unsigned int hooknum,
164           struct sk_buff **pskb,
165           const struct net_device *in,
166           const struct net_device *out,
167           int (*okfn)(struct sk_buff *))
168 {
169 	u_int32_t saddr, daddr;
170 	unsigned int ret;
171 
172 	saddr = (*pskb)->nh.iph->saddr;
173 	daddr = (*pskb)->nh.iph->daddr;
174 
175 	ret = ip_nat_fn(hooknum, pskb, in, out, okfn);
176 	if (ret != NF_DROP && ret != NF_STOLEN
177 	    && ((*pskb)->nh.iph->saddr != saddr
178 	        || (*pskb)->nh.iph->daddr != daddr)) {
179 		dst_release((*pskb)->dst);
180 		(*pskb)->dst = NULL;
181 	}
182 	return ret;
183 }
184 
185 static unsigned int
ip_nat_out(unsigned int hooknum,struct sk_buff ** pskb,const struct net_device * in,const struct net_device * out,int (* okfn)(struct sk_buff *))186 ip_nat_out(unsigned int hooknum,
187 	   struct sk_buff **pskb,
188 	   const struct net_device *in,
189 	   const struct net_device *out,
190 	   int (*okfn)(struct sk_buff *))
191 {
192 	/* root is playing with raw sockets. */
193 	if ((*pskb)->len < sizeof(struct iphdr)
194 	    || (*pskb)->nh.iph->ihl * 4 < sizeof(struct iphdr))
195 		return NF_ACCEPT;
196 
197 	/* We can hit fragment here; forwarded packets get
198 	   defragmented by connection tracking coming in, then
199 	   fragmented (grr) by the forward code.
200 
201 	   In future: If we have nfct != NULL, AND we have NAT
202 	   initialized, AND there is no helper, then we can do full
203 	   NAPT on the head, and IP-address-only NAT on the rest.
204 
205 	   I'm starting to have nightmares about fragments.  */
206 
207 	if ((*pskb)->nh.iph->frag_off & htons(IP_MF|IP_OFFSET)) {
208 		*pskb = ip_ct_gather_frags(*pskb, IP_DEFRAG_NAT_OUT);
209 
210 		if (!*pskb)
211 			return NF_STOLEN;
212 	}
213 
214 	return ip_nat_fn(hooknum, pskb, in, out, okfn);
215 }
216 
217 static unsigned int
ip_nat_local_fn(unsigned int hooknum,struct sk_buff ** pskb,const struct net_device * in,const struct net_device * out,int (* okfn)(struct sk_buff *))218 ip_nat_local_fn(unsigned int hooknum,
219 		struct sk_buff **pskb,
220 		const struct net_device *in,
221 		const struct net_device *out,
222 		int (*okfn)(struct sk_buff *))
223 {
224 	u_int32_t saddr, daddr;
225 	unsigned int ret;
226 
227 	/* root is playing with raw sockets. */
228 	if ((*pskb)->len < sizeof(struct iphdr)
229 	    || (*pskb)->nh.iph->ihl * 4 < sizeof(struct iphdr))
230 		return NF_ACCEPT;
231 
232 	saddr = (*pskb)->nh.iph->saddr;
233 	daddr = (*pskb)->nh.iph->daddr;
234 
235 	ret = ip_nat_fn(hooknum, pskb, in, out, okfn);
236 	if (ret != NF_DROP && ret != NF_STOLEN
237 	    && ((*pskb)->nh.iph->saddr != saddr
238 		|| (*pskb)->nh.iph->daddr != daddr))
239 		return ip_route_me_harder(pskb) == 0 ? ret : NF_DROP;
240 	return ret;
241 }
242 
243 /* We must be after connection tracking and before packet filtering. */
244 
245 /* Before packet filtering, change destination */
246 static struct nf_hook_ops ip_nat_in_ops
247 = { { NULL, NULL }, ip_nat_in, PF_INET, NF_IP_PRE_ROUTING, NF_IP_PRI_NAT_DST };
248 /* After packet filtering, change source */
249 static struct nf_hook_ops ip_nat_out_ops
250 = { { NULL, NULL }, ip_nat_out, PF_INET, NF_IP_POST_ROUTING, NF_IP_PRI_NAT_SRC};
251 /* Before packet filtering, change destination */
252 static struct nf_hook_ops ip_nat_local_out_ops
253 = { { NULL, NULL }, ip_nat_local_fn, PF_INET, NF_IP_LOCAL_OUT, NF_IP_PRI_NAT_DST };
254 /* After packet filtering, change source for reply packets of LOCAL_OUT DNAT */
255 static struct nf_hook_ops ip_nat_local_in_ops
256 = { { NULL, NULL }, ip_nat_fn, PF_INET, NF_IP_LOCAL_IN, NF_IP_PRI_NAT_SRC };
257 
258 /* Protocol registration. */
ip_nat_protocol_register(struct ip_nat_protocol * proto)259 int ip_nat_protocol_register(struct ip_nat_protocol *proto)
260 {
261 	int ret = 0;
262 	struct list_head *i;
263 
264 	WRITE_LOCK(&ip_nat_lock);
265 	for (i = protos.next; i != &protos; i = i->next) {
266 		if (((struct ip_nat_protocol *)i)->protonum
267 		    == proto->protonum) {
268 			ret = -EBUSY;
269 			goto out;
270 		}
271 	}
272 
273 	list_prepend(&protos, proto);
274 	MOD_INC_USE_COUNT;
275 
276  out:
277 	WRITE_UNLOCK(&ip_nat_lock);
278 	return ret;
279 }
280 
281 /* Noone stores the protocol anywhere; simply delete it. */
ip_nat_protocol_unregister(struct ip_nat_protocol * proto)282 void ip_nat_protocol_unregister(struct ip_nat_protocol *proto)
283 {
284 	WRITE_LOCK(&ip_nat_lock);
285 	LIST_DELETE(&protos, proto);
286 	WRITE_UNLOCK(&ip_nat_lock);
287 
288 	/* Someone could be still looking at the proto in a bh. */
289 	br_write_lock_bh(BR_NETPROTO_LOCK);
290 	br_write_unlock_bh(BR_NETPROTO_LOCK);
291 
292 	MOD_DEC_USE_COUNT;
293 }
294 
init_or_cleanup(int init)295 static int init_or_cleanup(int init)
296 {
297 	int ret = 0;
298 
299 	if (!init) goto cleanup;
300 
301 	ret = ip_nat_rule_init();
302 	if (ret < 0) {
303 		printk("ip_nat_init: can't setup rules.\n");
304 		goto cleanup_nothing;
305 	}
306 	ret = ip_nat_init();
307 	if (ret < 0) {
308 		printk("ip_nat_init: can't setup rules.\n");
309 		goto cleanup_rule_init;
310 	}
311 	ret = nf_register_hook(&ip_nat_in_ops);
312 	if (ret < 0) {
313 		printk("ip_nat_init: can't register in hook.\n");
314 		goto cleanup_nat;
315 	}
316 	ret = nf_register_hook(&ip_nat_out_ops);
317 	if (ret < 0) {
318 		printk("ip_nat_init: can't register out hook.\n");
319 		goto cleanup_inops;
320 	}
321 	ret = nf_register_hook(&ip_nat_local_out_ops);
322 	if (ret < 0) {
323 		printk("ip_nat_init: can't register local out hook.\n");
324 		goto cleanup_outops;
325 	}
326 	ret = nf_register_hook(&ip_nat_local_in_ops);
327 	if (ret < 0) {
328 		printk("ip_nat_init: can't register local in hook.\n");
329 		goto cleanup_localoutops;
330 	}
331 	return ret;
332 
333  cleanup:
334 	nf_unregister_hook(&ip_nat_local_in_ops);
335  cleanup_localoutops:
336 	nf_unregister_hook(&ip_nat_local_out_ops);
337  cleanup_outops:
338 	nf_unregister_hook(&ip_nat_out_ops);
339  cleanup_inops:
340 	nf_unregister_hook(&ip_nat_in_ops);
341  cleanup_nat:
342 	ip_nat_cleanup();
343  cleanup_rule_init:
344 	ip_nat_rule_cleanup();
345  cleanup_nothing:
346 	MUST_BE_READ_WRITE_UNLOCKED(&ip_nat_lock);
347 	return ret;
348 }
349 
init(void)350 static int __init init(void)
351 {
352 	return init_or_cleanup(1);
353 }
354 
fini(void)355 static void __exit fini(void)
356 {
357 	init_or_cleanup(0);
358 }
359 
360 module_init(init);
361 module_exit(fini);
362 
363 EXPORT_SYMBOL(ip_nat_setup_info);
364 EXPORT_SYMBOL(ip_nat_protocol_register);
365 EXPORT_SYMBOL(ip_nat_protocol_unregister);
366 EXPORT_SYMBOL(ip_nat_helper_register);
367 EXPORT_SYMBOL(ip_nat_helper_unregister);
368 EXPORT_SYMBOL(ip_nat_cheat_check);
369 EXPORT_SYMBOL(ip_nat_mangle_tcp_packet);
370 EXPORT_SYMBOL(ip_nat_mangle_udp_packet);
371 EXPORT_SYMBOL(ip_nat_used_tuple);
372 MODULE_LICENSE("GPL");
373