1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Copyright (c) 2013 Nicira, Inc.
4  */
5 
6 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
7 
8 #include <linux/capability.h>
9 #include <linux/module.h>
10 #include <linux/types.h>
11 #include <linux/kernel.h>
12 #include <linux/slab.h>
13 #include <linux/uaccess.h>
14 #include <linux/skbuff.h>
15 #include <linux/netdevice.h>
16 #include <linux/in.h>
17 #include <linux/tcp.h>
18 #include <linux/udp.h>
19 #include <linux/if_arp.h>
20 #include <linux/init.h>
21 #include <linux/in6.h>
22 #include <linux/inetdevice.h>
23 #include <linux/igmp.h>
24 #include <linux/netfilter_ipv4.h>
25 #include <linux/etherdevice.h>
26 #include <linux/if_ether.h>
27 #include <linux/if_vlan.h>
28 #include <linux/rculist.h>
29 #include <linux/err.h>
30 
31 #include <net/sock.h>
32 #include <net/ip.h>
33 #include <net/icmp.h>
34 #include <net/protocol.h>
35 #include <net/ip_tunnels.h>
36 #include <net/arp.h>
37 #include <net/checksum.h>
38 #include <net/dsfield.h>
39 #include <net/inet_ecn.h>
40 #include <net/xfrm.h>
41 #include <net/net_namespace.h>
42 #include <net/netns/generic.h>
43 #include <net/rtnetlink.h>
44 #include <net/udp.h>
45 #include <net/dst_metadata.h>
46 
47 #if IS_ENABLED(CONFIG_IPV6)
48 #include <net/ipv6.h>
49 #include <net/ip6_fib.h>
50 #include <net/ip6_route.h>
51 #endif
52 
ip_tunnel_hash(__be32 key,__be32 remote)53 static unsigned int ip_tunnel_hash(__be32 key, __be32 remote)
54 {
55 	return hash_32((__force u32)key ^ (__force u32)remote,
56 			 IP_TNL_HASH_BITS);
57 }
58 
ip_tunnel_key_match(const struct ip_tunnel_parm * p,__be16 flags,__be32 key)59 static bool ip_tunnel_key_match(const struct ip_tunnel_parm *p,
60 				__be16 flags, __be32 key)
61 {
62 	if (p->i_flags & TUNNEL_KEY) {
63 		if (flags & TUNNEL_KEY)
64 			return key == p->i_key;
65 		else
66 			/* key expected, none present */
67 			return false;
68 	} else
69 		return !(flags & TUNNEL_KEY);
70 }
71 
72 /* Fallback tunnel: no source, no destination, no key, no options
73 
74    Tunnel hash table:
75    We require exact key match i.e. if a key is present in packet
76    it will match only tunnel with the same key; if it is not present,
77    it will match only keyless tunnel.
78 
79    All keysless packets, if not matched configured keyless tunnels
80    will match fallback tunnel.
81    Given src, dst and key, find appropriate for input tunnel.
82 */
ip_tunnel_lookup(struct ip_tunnel_net * itn,int link,__be16 flags,__be32 remote,__be32 local,__be32 key)83 struct ip_tunnel *ip_tunnel_lookup(struct ip_tunnel_net *itn,
84 				   int link, __be16 flags,
85 				   __be32 remote, __be32 local,
86 				   __be32 key)
87 {
88 	struct ip_tunnel *t, *cand = NULL;
89 	struct hlist_head *head;
90 	struct net_device *ndev;
91 	unsigned int hash;
92 
93 	hash = ip_tunnel_hash(key, remote);
94 	head = &itn->tunnels[hash];
95 
96 	hlist_for_each_entry_rcu(t, head, hash_node) {
97 		if (local != t->parms.iph.saddr ||
98 		    remote != t->parms.iph.daddr ||
99 		    !(t->dev->flags & IFF_UP))
100 			continue;
101 
102 		if (!ip_tunnel_key_match(&t->parms, flags, key))
103 			continue;
104 
105 		if (t->parms.link == link)
106 			return t;
107 		else
108 			cand = t;
109 	}
110 
111 	hlist_for_each_entry_rcu(t, head, hash_node) {
112 		if (remote != t->parms.iph.daddr ||
113 		    t->parms.iph.saddr != 0 ||
114 		    !(t->dev->flags & IFF_UP))
115 			continue;
116 
117 		if (!ip_tunnel_key_match(&t->parms, flags, key))
118 			continue;
119 
120 		if (t->parms.link == link)
121 			return t;
122 		else if (!cand)
123 			cand = t;
124 	}
125 
126 	hash = ip_tunnel_hash(key, 0);
127 	head = &itn->tunnels[hash];
128 
129 	hlist_for_each_entry_rcu(t, head, hash_node) {
130 		if ((local != t->parms.iph.saddr || t->parms.iph.daddr != 0) &&
131 		    (local != t->parms.iph.daddr || !ipv4_is_multicast(local)))
132 			continue;
133 
134 		if (!(t->dev->flags & IFF_UP))
135 			continue;
136 
137 		if (!ip_tunnel_key_match(&t->parms, flags, key))
138 			continue;
139 
140 		if (t->parms.link == link)
141 			return t;
142 		else if (!cand)
143 			cand = t;
144 	}
145 
146 	hlist_for_each_entry_rcu(t, head, hash_node) {
147 		if ((!(flags & TUNNEL_NO_KEY) && t->parms.i_key != key) ||
148 		    t->parms.iph.saddr != 0 ||
149 		    t->parms.iph.daddr != 0 ||
150 		    !(t->dev->flags & IFF_UP))
151 			continue;
152 
153 		if (t->parms.link == link)
154 			return t;
155 		else if (!cand)
156 			cand = t;
157 	}
158 
159 	if (cand)
160 		return cand;
161 
162 	t = rcu_dereference(itn->collect_md_tun);
163 	if (t && t->dev->flags & IFF_UP)
164 		return t;
165 
166 	ndev = READ_ONCE(itn->fb_tunnel_dev);
167 	if (ndev && ndev->flags & IFF_UP)
168 		return netdev_priv(ndev);
169 
170 	return NULL;
171 }
172 EXPORT_SYMBOL_GPL(ip_tunnel_lookup);
173 
ip_bucket(struct ip_tunnel_net * itn,struct ip_tunnel_parm * parms)174 static struct hlist_head *ip_bucket(struct ip_tunnel_net *itn,
175 				    struct ip_tunnel_parm *parms)
176 {
177 	unsigned int h;
178 	__be32 remote;
179 	__be32 i_key = parms->i_key;
180 
181 	if (parms->iph.daddr && !ipv4_is_multicast(parms->iph.daddr))
182 		remote = parms->iph.daddr;
183 	else
184 		remote = 0;
185 
186 	if (!(parms->i_flags & TUNNEL_KEY) && (parms->i_flags & VTI_ISVTI))
187 		i_key = 0;
188 
189 	h = ip_tunnel_hash(i_key, remote);
190 	return &itn->tunnels[h];
191 }
192 
ip_tunnel_add(struct ip_tunnel_net * itn,struct ip_tunnel * t)193 static void ip_tunnel_add(struct ip_tunnel_net *itn, struct ip_tunnel *t)
194 {
195 	struct hlist_head *head = ip_bucket(itn, &t->parms);
196 
197 	if (t->collect_md)
198 		rcu_assign_pointer(itn->collect_md_tun, t);
199 	hlist_add_head_rcu(&t->hash_node, head);
200 }
201 
ip_tunnel_del(struct ip_tunnel_net * itn,struct ip_tunnel * t)202 static void ip_tunnel_del(struct ip_tunnel_net *itn, struct ip_tunnel *t)
203 {
204 	if (t->collect_md)
205 		rcu_assign_pointer(itn->collect_md_tun, NULL);
206 	hlist_del_init_rcu(&t->hash_node);
207 }
208 
ip_tunnel_find(struct ip_tunnel_net * itn,struct ip_tunnel_parm * parms,int type)209 static struct ip_tunnel *ip_tunnel_find(struct ip_tunnel_net *itn,
210 					struct ip_tunnel_parm *parms,
211 					int type)
212 {
213 	__be32 remote = parms->iph.daddr;
214 	__be32 local = parms->iph.saddr;
215 	__be32 key = parms->i_key;
216 	__be16 flags = parms->i_flags;
217 	int link = parms->link;
218 	struct ip_tunnel *t = NULL;
219 	struct hlist_head *head = ip_bucket(itn, parms);
220 
221 	hlist_for_each_entry_rcu(t, head, hash_node) {
222 		if (local == t->parms.iph.saddr &&
223 		    remote == t->parms.iph.daddr &&
224 		    link == t->parms.link &&
225 		    type == t->dev->type &&
226 		    ip_tunnel_key_match(&t->parms, flags, key))
227 			break;
228 	}
229 	return t;
230 }
231 
__ip_tunnel_create(struct net * net,const struct rtnl_link_ops * ops,struct ip_tunnel_parm * parms)232 static struct net_device *__ip_tunnel_create(struct net *net,
233 					     const struct rtnl_link_ops *ops,
234 					     struct ip_tunnel_parm *parms)
235 {
236 	int err;
237 	struct ip_tunnel *tunnel;
238 	struct net_device *dev;
239 	char name[IFNAMSIZ];
240 
241 	err = -E2BIG;
242 	if (parms->name[0]) {
243 		if (!dev_valid_name(parms->name))
244 			goto failed;
245 		strscpy(name, parms->name, IFNAMSIZ);
246 	} else {
247 		if (strlen(ops->kind) > (IFNAMSIZ - 3))
248 			goto failed;
249 		strcpy(name, ops->kind);
250 		strcat(name, "%d");
251 	}
252 
253 	ASSERT_RTNL();
254 	dev = alloc_netdev(ops->priv_size, name, NET_NAME_UNKNOWN, ops->setup);
255 	if (!dev) {
256 		err = -ENOMEM;
257 		goto failed;
258 	}
259 	dev_net_set(dev, net);
260 
261 	dev->rtnl_link_ops = ops;
262 
263 	tunnel = netdev_priv(dev);
264 	tunnel->parms = *parms;
265 	tunnel->net = net;
266 
267 	err = register_netdevice(dev);
268 	if (err)
269 		goto failed_free;
270 
271 	return dev;
272 
273 failed_free:
274 	free_netdev(dev);
275 failed:
276 	return ERR_PTR(err);
277 }
278 
ip_tunnel_bind_dev(struct net_device * dev)279 static int ip_tunnel_bind_dev(struct net_device *dev)
280 {
281 	struct net_device *tdev = NULL;
282 	struct ip_tunnel *tunnel = netdev_priv(dev);
283 	const struct iphdr *iph;
284 	int hlen = LL_MAX_HEADER;
285 	int mtu = ETH_DATA_LEN;
286 	int t_hlen = tunnel->hlen + sizeof(struct iphdr);
287 
288 	iph = &tunnel->parms.iph;
289 
290 	/* Guess output device to choose reasonable mtu and needed_headroom */
291 	if (iph->daddr) {
292 		struct flowi4 fl4;
293 		struct rtable *rt;
294 
295 		ip_tunnel_init_flow(&fl4, iph->protocol, iph->daddr,
296 				    iph->saddr, tunnel->parms.o_key,
297 				    RT_TOS(iph->tos), dev_net(dev),
298 				    tunnel->parms.link, tunnel->fwmark, 0, 0);
299 		rt = ip_route_output_key(tunnel->net, &fl4);
300 
301 		if (!IS_ERR(rt)) {
302 			tdev = rt->dst.dev;
303 			ip_rt_put(rt);
304 		}
305 		if (dev->type != ARPHRD_ETHER)
306 			dev->flags |= IFF_POINTOPOINT;
307 
308 		dst_cache_reset(&tunnel->dst_cache);
309 	}
310 
311 	if (!tdev && tunnel->parms.link)
312 		tdev = __dev_get_by_index(tunnel->net, tunnel->parms.link);
313 
314 	if (tdev) {
315 		hlen = tdev->hard_header_len + tdev->needed_headroom;
316 		mtu = min(tdev->mtu, IP_MAX_MTU);
317 	}
318 
319 	dev->needed_headroom = t_hlen + hlen;
320 	mtu -= t_hlen + (dev->type == ARPHRD_ETHER ? dev->hard_header_len : 0);
321 
322 	if (mtu < IPV4_MIN_MTU)
323 		mtu = IPV4_MIN_MTU;
324 
325 	return mtu;
326 }
327 
ip_tunnel_create(struct net * net,struct ip_tunnel_net * itn,struct ip_tunnel_parm * parms)328 static struct ip_tunnel *ip_tunnel_create(struct net *net,
329 					  struct ip_tunnel_net *itn,
330 					  struct ip_tunnel_parm *parms)
331 {
332 	struct ip_tunnel *nt;
333 	struct net_device *dev;
334 	int t_hlen;
335 	int mtu;
336 	int err;
337 
338 	dev = __ip_tunnel_create(net, itn->rtnl_link_ops, parms);
339 	if (IS_ERR(dev))
340 		return ERR_CAST(dev);
341 
342 	mtu = ip_tunnel_bind_dev(dev);
343 	err = dev_set_mtu(dev, mtu);
344 	if (err)
345 		goto err_dev_set_mtu;
346 
347 	nt = netdev_priv(dev);
348 	t_hlen = nt->hlen + sizeof(struct iphdr);
349 	dev->min_mtu = ETH_MIN_MTU;
350 	dev->max_mtu = IP_MAX_MTU - t_hlen;
351 	if (dev->type == ARPHRD_ETHER)
352 		dev->max_mtu -= dev->hard_header_len;
353 
354 	ip_tunnel_add(itn, nt);
355 	return nt;
356 
357 err_dev_set_mtu:
358 	unregister_netdevice(dev);
359 	return ERR_PTR(err);
360 }
361 
ip_tunnel_rcv(struct ip_tunnel * tunnel,struct sk_buff * skb,const struct tnl_ptk_info * tpi,struct metadata_dst * tun_dst,bool log_ecn_error)362 int ip_tunnel_rcv(struct ip_tunnel *tunnel, struct sk_buff *skb,
363 		  const struct tnl_ptk_info *tpi, struct metadata_dst *tun_dst,
364 		  bool log_ecn_error)
365 {
366 	const struct iphdr *iph = ip_hdr(skb);
367 	int err;
368 
369 #ifdef CONFIG_NET_IPGRE_BROADCAST
370 	if (ipv4_is_multicast(iph->daddr)) {
371 		tunnel->dev->stats.multicast++;
372 		skb->pkt_type = PACKET_BROADCAST;
373 	}
374 #endif
375 
376 	if ((!(tpi->flags&TUNNEL_CSUM) &&  (tunnel->parms.i_flags&TUNNEL_CSUM)) ||
377 	     ((tpi->flags&TUNNEL_CSUM) && !(tunnel->parms.i_flags&TUNNEL_CSUM))) {
378 		tunnel->dev->stats.rx_crc_errors++;
379 		tunnel->dev->stats.rx_errors++;
380 		goto drop;
381 	}
382 
383 	if (tunnel->parms.i_flags&TUNNEL_SEQ) {
384 		if (!(tpi->flags&TUNNEL_SEQ) ||
385 		    (tunnel->i_seqno && (s32)(ntohl(tpi->seq) - tunnel->i_seqno) < 0)) {
386 			tunnel->dev->stats.rx_fifo_errors++;
387 			tunnel->dev->stats.rx_errors++;
388 			goto drop;
389 		}
390 		tunnel->i_seqno = ntohl(tpi->seq) + 1;
391 	}
392 
393 	skb_set_network_header(skb, (tunnel->dev->type == ARPHRD_ETHER) ? ETH_HLEN : 0);
394 
395 	err = IP_ECN_decapsulate(iph, skb);
396 	if (unlikely(err)) {
397 		if (log_ecn_error)
398 			net_info_ratelimited("non-ECT from %pI4 with TOS=%#x\n",
399 					&iph->saddr, iph->tos);
400 		if (err > 1) {
401 			++tunnel->dev->stats.rx_frame_errors;
402 			++tunnel->dev->stats.rx_errors;
403 			goto drop;
404 		}
405 	}
406 
407 	dev_sw_netstats_rx_add(tunnel->dev, skb->len);
408 	skb_scrub_packet(skb, !net_eq(tunnel->net, dev_net(tunnel->dev)));
409 
410 	if (tunnel->dev->type == ARPHRD_ETHER) {
411 		skb->protocol = eth_type_trans(skb, tunnel->dev);
412 		skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
413 	} else {
414 		skb->dev = tunnel->dev;
415 	}
416 
417 	if (tun_dst)
418 		skb_dst_set(skb, (struct dst_entry *)tun_dst);
419 
420 	gro_cells_receive(&tunnel->gro_cells, skb);
421 	return 0;
422 
423 drop:
424 	if (tun_dst)
425 		dst_release((struct dst_entry *)tun_dst);
426 	kfree_skb(skb);
427 	return 0;
428 }
429 EXPORT_SYMBOL_GPL(ip_tunnel_rcv);
430 
ip_tunnel_encap_add_ops(const struct ip_tunnel_encap_ops * ops,unsigned int num)431 int ip_tunnel_encap_add_ops(const struct ip_tunnel_encap_ops *ops,
432 			    unsigned int num)
433 {
434 	if (num >= MAX_IPTUN_ENCAP_OPS)
435 		return -ERANGE;
436 
437 	return !cmpxchg((const struct ip_tunnel_encap_ops **)
438 			&iptun_encaps[num],
439 			NULL, ops) ? 0 : -1;
440 }
441 EXPORT_SYMBOL(ip_tunnel_encap_add_ops);
442 
ip_tunnel_encap_del_ops(const struct ip_tunnel_encap_ops * ops,unsigned int num)443 int ip_tunnel_encap_del_ops(const struct ip_tunnel_encap_ops *ops,
444 			    unsigned int num)
445 {
446 	int ret;
447 
448 	if (num >= MAX_IPTUN_ENCAP_OPS)
449 		return -ERANGE;
450 
451 	ret = (cmpxchg((const struct ip_tunnel_encap_ops **)
452 		       &iptun_encaps[num],
453 		       ops, NULL) == ops) ? 0 : -1;
454 
455 	synchronize_net();
456 
457 	return ret;
458 }
459 EXPORT_SYMBOL(ip_tunnel_encap_del_ops);
460 
ip_tunnel_encap_setup(struct ip_tunnel * t,struct ip_tunnel_encap * ipencap)461 int ip_tunnel_encap_setup(struct ip_tunnel *t,
462 			  struct ip_tunnel_encap *ipencap)
463 {
464 	int hlen;
465 
466 	memset(&t->encap, 0, sizeof(t->encap));
467 
468 	hlen = ip_encap_hlen(ipencap);
469 	if (hlen < 0)
470 		return hlen;
471 
472 	t->encap.type = ipencap->type;
473 	t->encap.sport = ipencap->sport;
474 	t->encap.dport = ipencap->dport;
475 	t->encap.flags = ipencap->flags;
476 
477 	t->encap_hlen = hlen;
478 	t->hlen = t->encap_hlen + t->tun_hlen;
479 
480 	return 0;
481 }
482 EXPORT_SYMBOL_GPL(ip_tunnel_encap_setup);
483 
tnl_update_pmtu(struct net_device * dev,struct sk_buff * skb,struct rtable * rt,__be16 df,const struct iphdr * inner_iph,int tunnel_hlen,__be32 dst,bool md)484 static int tnl_update_pmtu(struct net_device *dev, struct sk_buff *skb,
485 			    struct rtable *rt, __be16 df,
486 			    const struct iphdr *inner_iph,
487 			    int tunnel_hlen, __be32 dst, bool md)
488 {
489 	struct ip_tunnel *tunnel = netdev_priv(dev);
490 	int pkt_size;
491 	int mtu;
492 
493 	tunnel_hlen = md ? tunnel_hlen : tunnel->hlen;
494 	pkt_size = skb->len - tunnel_hlen;
495 	pkt_size -= dev->type == ARPHRD_ETHER ? dev->hard_header_len : 0;
496 
497 	if (df) {
498 		mtu = dst_mtu(&rt->dst) - (sizeof(struct iphdr) + tunnel_hlen);
499 		mtu -= dev->type == ARPHRD_ETHER ? dev->hard_header_len : 0;
500 	} else {
501 		mtu = skb_valid_dst(skb) ? dst_mtu(skb_dst(skb)) : dev->mtu;
502 	}
503 
504 	if (skb_valid_dst(skb))
505 		skb_dst_update_pmtu_no_confirm(skb, mtu);
506 
507 	if (skb->protocol == htons(ETH_P_IP)) {
508 		if (!skb_is_gso(skb) &&
509 		    (inner_iph->frag_off & htons(IP_DF)) &&
510 		    mtu < pkt_size) {
511 			icmp_ndo_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
512 			return -E2BIG;
513 		}
514 	}
515 #if IS_ENABLED(CONFIG_IPV6)
516 	else if (skb->protocol == htons(ETH_P_IPV6)) {
517 		struct rt6_info *rt6;
518 		__be32 daddr;
519 
520 		rt6 = skb_valid_dst(skb) ? (struct rt6_info *)skb_dst(skb) :
521 					   NULL;
522 		daddr = md ? dst : tunnel->parms.iph.daddr;
523 
524 		if (rt6 && mtu < dst_mtu(skb_dst(skb)) &&
525 			   mtu >= IPV6_MIN_MTU) {
526 			if ((daddr && !ipv4_is_multicast(daddr)) ||
527 			    rt6->rt6i_dst.plen == 128) {
528 				rt6->rt6i_flags |= RTF_MODIFIED;
529 				dst_metric_set(skb_dst(skb), RTAX_MTU, mtu);
530 			}
531 		}
532 
533 		if (!skb_is_gso(skb) && mtu >= IPV6_MIN_MTU &&
534 					mtu < pkt_size) {
535 			icmpv6_ndo_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
536 			return -E2BIG;
537 		}
538 	}
539 #endif
540 	return 0;
541 }
542 
ip_md_tunnel_xmit(struct sk_buff * skb,struct net_device * dev,u8 proto,int tunnel_hlen)543 void ip_md_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
544 		       u8 proto, int tunnel_hlen)
545 {
546 	struct ip_tunnel *tunnel = netdev_priv(dev);
547 	u32 headroom = sizeof(struct iphdr);
548 	struct ip_tunnel_info *tun_info;
549 	const struct ip_tunnel_key *key;
550 	const struct iphdr *inner_iph;
551 	struct rtable *rt = NULL;
552 	struct flowi4 fl4;
553 	__be16 df = 0;
554 	u8 tos, ttl;
555 	bool use_cache;
556 
557 	tun_info = skb_tunnel_info(skb);
558 	if (unlikely(!tun_info || !(tun_info->mode & IP_TUNNEL_INFO_TX) ||
559 		     ip_tunnel_info_af(tun_info) != AF_INET))
560 		goto tx_error;
561 	key = &tun_info->key;
562 	memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
563 	inner_iph = (const struct iphdr *)skb_inner_network_header(skb);
564 	tos = key->tos;
565 	if (tos == 1) {
566 		if (skb->protocol == htons(ETH_P_IP))
567 			tos = inner_iph->tos;
568 		else if (skb->protocol == htons(ETH_P_IPV6))
569 			tos = ipv6_get_dsfield((const struct ipv6hdr *)inner_iph);
570 	}
571 	ip_tunnel_init_flow(&fl4, proto, key->u.ipv4.dst, key->u.ipv4.src,
572 			    tunnel_id_to_key32(key->tun_id), RT_TOS(tos),
573 			    dev_net(dev), 0, skb->mark, skb_get_hash(skb),
574 			    key->flow_flags);
575 	if (tunnel->encap.type != TUNNEL_ENCAP_NONE)
576 		goto tx_error;
577 
578 	use_cache = ip_tunnel_dst_cache_usable(skb, tun_info);
579 	if (use_cache)
580 		rt = dst_cache_get_ip4(&tun_info->dst_cache, &fl4.saddr);
581 	if (!rt) {
582 		rt = ip_route_output_key(tunnel->net, &fl4);
583 		if (IS_ERR(rt)) {
584 			dev->stats.tx_carrier_errors++;
585 			goto tx_error;
586 		}
587 		if (use_cache)
588 			dst_cache_set_ip4(&tun_info->dst_cache, &rt->dst,
589 					  fl4.saddr);
590 	}
591 	if (rt->dst.dev == dev) {
592 		ip_rt_put(rt);
593 		dev->stats.collisions++;
594 		goto tx_error;
595 	}
596 
597 	if (key->tun_flags & TUNNEL_DONT_FRAGMENT)
598 		df = htons(IP_DF);
599 	if (tnl_update_pmtu(dev, skb, rt, df, inner_iph, tunnel_hlen,
600 			    key->u.ipv4.dst, true)) {
601 		ip_rt_put(rt);
602 		goto tx_error;
603 	}
604 
605 	tos = ip_tunnel_ecn_encap(tos, inner_iph, skb);
606 	ttl = key->ttl;
607 	if (ttl == 0) {
608 		if (skb->protocol == htons(ETH_P_IP))
609 			ttl = inner_iph->ttl;
610 		else if (skb->protocol == htons(ETH_P_IPV6))
611 			ttl = ((const struct ipv6hdr *)inner_iph)->hop_limit;
612 		else
613 			ttl = ip4_dst_hoplimit(&rt->dst);
614 	}
615 
616 	headroom += LL_RESERVED_SPACE(rt->dst.dev) + rt->dst.header_len;
617 	if (headroom > dev->needed_headroom)
618 		dev->needed_headroom = headroom;
619 
620 	if (skb_cow_head(skb, dev->needed_headroom)) {
621 		ip_rt_put(rt);
622 		goto tx_dropped;
623 	}
624 	iptunnel_xmit(NULL, rt, skb, fl4.saddr, fl4.daddr, proto, tos, ttl,
625 		      df, !net_eq(tunnel->net, dev_net(dev)));
626 	return;
627 tx_error:
628 	dev->stats.tx_errors++;
629 	goto kfree;
630 tx_dropped:
631 	dev->stats.tx_dropped++;
632 kfree:
633 	kfree_skb(skb);
634 }
635 EXPORT_SYMBOL_GPL(ip_md_tunnel_xmit);
636 
ip_tunnel_xmit(struct sk_buff * skb,struct net_device * dev,const struct iphdr * tnl_params,u8 protocol)637 void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
638 		    const struct iphdr *tnl_params, u8 protocol)
639 {
640 	struct ip_tunnel *tunnel = netdev_priv(dev);
641 	struct ip_tunnel_info *tun_info = NULL;
642 	const struct iphdr *inner_iph;
643 	unsigned int max_headroom;	/* The extra header space needed */
644 	struct rtable *rt = NULL;		/* Route to the other host */
645 	__be16 payload_protocol;
646 	bool use_cache = false;
647 	struct flowi4 fl4;
648 	bool md = false;
649 	bool connected;
650 	u8 tos, ttl;
651 	__be32 dst;
652 	__be16 df;
653 
654 	inner_iph = (const struct iphdr *)skb_inner_network_header(skb);
655 	connected = (tunnel->parms.iph.daddr != 0);
656 	payload_protocol = skb_protocol(skb, true);
657 
658 	memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
659 
660 	dst = tnl_params->daddr;
661 	if (dst == 0) {
662 		/* NBMA tunnel */
663 
664 		if (!skb_dst(skb)) {
665 			dev->stats.tx_fifo_errors++;
666 			goto tx_error;
667 		}
668 
669 		tun_info = skb_tunnel_info(skb);
670 		if (tun_info && (tun_info->mode & IP_TUNNEL_INFO_TX) &&
671 		    ip_tunnel_info_af(tun_info) == AF_INET &&
672 		    tun_info->key.u.ipv4.dst) {
673 			dst = tun_info->key.u.ipv4.dst;
674 			md = true;
675 			connected = true;
676 		} else if (payload_protocol == htons(ETH_P_IP)) {
677 			rt = skb_rtable(skb);
678 			dst = rt_nexthop(rt, inner_iph->daddr);
679 		}
680 #if IS_ENABLED(CONFIG_IPV6)
681 		else if (payload_protocol == htons(ETH_P_IPV6)) {
682 			const struct in6_addr *addr6;
683 			struct neighbour *neigh;
684 			bool do_tx_error_icmp;
685 			int addr_type;
686 
687 			neigh = dst_neigh_lookup(skb_dst(skb),
688 						 &ipv6_hdr(skb)->daddr);
689 			if (!neigh)
690 				goto tx_error;
691 
692 			addr6 = (const struct in6_addr *)&neigh->primary_key;
693 			addr_type = ipv6_addr_type(addr6);
694 
695 			if (addr_type == IPV6_ADDR_ANY) {
696 				addr6 = &ipv6_hdr(skb)->daddr;
697 				addr_type = ipv6_addr_type(addr6);
698 			}
699 
700 			if ((addr_type & IPV6_ADDR_COMPATv4) == 0)
701 				do_tx_error_icmp = true;
702 			else {
703 				do_tx_error_icmp = false;
704 				dst = addr6->s6_addr32[3];
705 			}
706 			neigh_release(neigh);
707 			if (do_tx_error_icmp)
708 				goto tx_error_icmp;
709 		}
710 #endif
711 		else
712 			goto tx_error;
713 
714 		if (!md)
715 			connected = false;
716 	}
717 
718 	tos = tnl_params->tos;
719 	if (tos & 0x1) {
720 		tos &= ~0x1;
721 		if (payload_protocol == htons(ETH_P_IP)) {
722 			tos = inner_iph->tos;
723 			connected = false;
724 		} else if (payload_protocol == htons(ETH_P_IPV6)) {
725 			tos = ipv6_get_dsfield((const struct ipv6hdr *)inner_iph);
726 			connected = false;
727 		}
728 	}
729 
730 	ip_tunnel_init_flow(&fl4, protocol, dst, tnl_params->saddr,
731 			    tunnel->parms.o_key, RT_TOS(tos),
732 			    dev_net(dev), tunnel->parms.link,
733 			    tunnel->fwmark, skb_get_hash(skb), 0);
734 
735 	if (ip_tunnel_encap(skb, tunnel, &protocol, &fl4) < 0)
736 		goto tx_error;
737 
738 	if (connected && md) {
739 		use_cache = ip_tunnel_dst_cache_usable(skb, tun_info);
740 		if (use_cache)
741 			rt = dst_cache_get_ip4(&tun_info->dst_cache,
742 					       &fl4.saddr);
743 	} else {
744 		rt = connected ? dst_cache_get_ip4(&tunnel->dst_cache,
745 						&fl4.saddr) : NULL;
746 	}
747 
748 	if (!rt) {
749 		rt = ip_route_output_key(tunnel->net, &fl4);
750 
751 		if (IS_ERR(rt)) {
752 			dev->stats.tx_carrier_errors++;
753 			goto tx_error;
754 		}
755 		if (use_cache)
756 			dst_cache_set_ip4(&tun_info->dst_cache, &rt->dst,
757 					  fl4.saddr);
758 		else if (!md && connected)
759 			dst_cache_set_ip4(&tunnel->dst_cache, &rt->dst,
760 					  fl4.saddr);
761 	}
762 
763 	if (rt->dst.dev == dev) {
764 		ip_rt_put(rt);
765 		dev->stats.collisions++;
766 		goto tx_error;
767 	}
768 
769 	df = tnl_params->frag_off;
770 	if (payload_protocol == htons(ETH_P_IP) && !tunnel->ignore_df)
771 		df |= (inner_iph->frag_off & htons(IP_DF));
772 
773 	if (tnl_update_pmtu(dev, skb, rt, df, inner_iph, 0, 0, false)) {
774 		ip_rt_put(rt);
775 		goto tx_error;
776 	}
777 
778 	if (tunnel->err_count > 0) {
779 		if (time_before(jiffies,
780 				tunnel->err_time + IPTUNNEL_ERR_TIMEO)) {
781 			tunnel->err_count--;
782 
783 			dst_link_failure(skb);
784 		} else
785 			tunnel->err_count = 0;
786 	}
787 
788 	tos = ip_tunnel_ecn_encap(tos, inner_iph, skb);
789 	ttl = tnl_params->ttl;
790 	if (ttl == 0) {
791 		if (payload_protocol == htons(ETH_P_IP))
792 			ttl = inner_iph->ttl;
793 #if IS_ENABLED(CONFIG_IPV6)
794 		else if (payload_protocol == htons(ETH_P_IPV6))
795 			ttl = ((const struct ipv6hdr *)inner_iph)->hop_limit;
796 #endif
797 		else
798 			ttl = ip4_dst_hoplimit(&rt->dst);
799 	}
800 
801 	max_headroom = LL_RESERVED_SPACE(rt->dst.dev) + sizeof(struct iphdr)
802 			+ rt->dst.header_len + ip_encap_hlen(&tunnel->encap);
803 	if (max_headroom > dev->needed_headroom)
804 		dev->needed_headroom = max_headroom;
805 
806 	if (skb_cow_head(skb, dev->needed_headroom)) {
807 		ip_rt_put(rt);
808 		dev->stats.tx_dropped++;
809 		kfree_skb(skb);
810 		return;
811 	}
812 
813 	iptunnel_xmit(NULL, rt, skb, fl4.saddr, fl4.daddr, protocol, tos, ttl,
814 		      df, !net_eq(tunnel->net, dev_net(dev)));
815 	return;
816 
817 #if IS_ENABLED(CONFIG_IPV6)
818 tx_error_icmp:
819 	dst_link_failure(skb);
820 #endif
821 tx_error:
822 	dev->stats.tx_errors++;
823 	kfree_skb(skb);
824 }
825 EXPORT_SYMBOL_GPL(ip_tunnel_xmit);
826 
ip_tunnel_update(struct ip_tunnel_net * itn,struct ip_tunnel * t,struct net_device * dev,struct ip_tunnel_parm * p,bool set_mtu,__u32 fwmark)827 static void ip_tunnel_update(struct ip_tunnel_net *itn,
828 			     struct ip_tunnel *t,
829 			     struct net_device *dev,
830 			     struct ip_tunnel_parm *p,
831 			     bool set_mtu,
832 			     __u32 fwmark)
833 {
834 	ip_tunnel_del(itn, t);
835 	t->parms.iph.saddr = p->iph.saddr;
836 	t->parms.iph.daddr = p->iph.daddr;
837 	t->parms.i_key = p->i_key;
838 	t->parms.o_key = p->o_key;
839 	if (dev->type != ARPHRD_ETHER) {
840 		__dev_addr_set(dev, &p->iph.saddr, 4);
841 		memcpy(dev->broadcast, &p->iph.daddr, 4);
842 	}
843 	ip_tunnel_add(itn, t);
844 
845 	t->parms.iph.ttl = p->iph.ttl;
846 	t->parms.iph.tos = p->iph.tos;
847 	t->parms.iph.frag_off = p->iph.frag_off;
848 
849 	if (t->parms.link != p->link || t->fwmark != fwmark) {
850 		int mtu;
851 
852 		t->parms.link = p->link;
853 		t->fwmark = fwmark;
854 		mtu = ip_tunnel_bind_dev(dev);
855 		if (set_mtu)
856 			dev->mtu = mtu;
857 	}
858 	dst_cache_reset(&t->dst_cache);
859 	netdev_state_change(dev);
860 }
861 
ip_tunnel_ctl(struct net_device * dev,struct ip_tunnel_parm * p,int cmd)862 int ip_tunnel_ctl(struct net_device *dev, struct ip_tunnel_parm *p, int cmd)
863 {
864 	int err = 0;
865 	struct ip_tunnel *t = netdev_priv(dev);
866 	struct net *net = t->net;
867 	struct ip_tunnel_net *itn = net_generic(net, t->ip_tnl_net_id);
868 
869 	switch (cmd) {
870 	case SIOCGETTUNNEL:
871 		if (dev == itn->fb_tunnel_dev) {
872 			t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type);
873 			if (!t)
874 				t = netdev_priv(dev);
875 		}
876 		memcpy(p, &t->parms, sizeof(*p));
877 		break;
878 
879 	case SIOCADDTUNNEL:
880 	case SIOCCHGTUNNEL:
881 		err = -EPERM;
882 		if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
883 			goto done;
884 		if (p->iph.ttl)
885 			p->iph.frag_off |= htons(IP_DF);
886 		if (!(p->i_flags & VTI_ISVTI)) {
887 			if (!(p->i_flags & TUNNEL_KEY))
888 				p->i_key = 0;
889 			if (!(p->o_flags & TUNNEL_KEY))
890 				p->o_key = 0;
891 		}
892 
893 		t = ip_tunnel_find(itn, p, itn->type);
894 
895 		if (cmd == SIOCADDTUNNEL) {
896 			if (!t) {
897 				t = ip_tunnel_create(net, itn, p);
898 				err = PTR_ERR_OR_ZERO(t);
899 				break;
900 			}
901 
902 			err = -EEXIST;
903 			break;
904 		}
905 		if (dev != itn->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) {
906 			if (t) {
907 				if (t->dev != dev) {
908 					err = -EEXIST;
909 					break;
910 				}
911 			} else {
912 				unsigned int nflags = 0;
913 
914 				if (ipv4_is_multicast(p->iph.daddr))
915 					nflags = IFF_BROADCAST;
916 				else if (p->iph.daddr)
917 					nflags = IFF_POINTOPOINT;
918 
919 				if ((dev->flags^nflags)&(IFF_POINTOPOINT|IFF_BROADCAST)) {
920 					err = -EINVAL;
921 					break;
922 				}
923 
924 				t = netdev_priv(dev);
925 			}
926 		}
927 
928 		if (t) {
929 			err = 0;
930 			ip_tunnel_update(itn, t, dev, p, true, 0);
931 		} else {
932 			err = -ENOENT;
933 		}
934 		break;
935 
936 	case SIOCDELTUNNEL:
937 		err = -EPERM;
938 		if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
939 			goto done;
940 
941 		if (dev == itn->fb_tunnel_dev) {
942 			err = -ENOENT;
943 			t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type);
944 			if (!t)
945 				goto done;
946 			err = -EPERM;
947 			if (t == netdev_priv(itn->fb_tunnel_dev))
948 				goto done;
949 			dev = t->dev;
950 		}
951 		unregister_netdevice(dev);
952 		err = 0;
953 		break;
954 
955 	default:
956 		err = -EINVAL;
957 	}
958 
959 done:
960 	return err;
961 }
962 EXPORT_SYMBOL_GPL(ip_tunnel_ctl);
963 
ip_tunnel_siocdevprivate(struct net_device * dev,struct ifreq * ifr,void __user * data,int cmd)964 int ip_tunnel_siocdevprivate(struct net_device *dev, struct ifreq *ifr,
965 			     void __user *data, int cmd)
966 {
967 	struct ip_tunnel_parm p;
968 	int err;
969 
970 	if (copy_from_user(&p, data, sizeof(p)))
971 		return -EFAULT;
972 	err = dev->netdev_ops->ndo_tunnel_ctl(dev, &p, cmd);
973 	if (!err && copy_to_user(data, &p, sizeof(p)))
974 		return -EFAULT;
975 	return err;
976 }
977 EXPORT_SYMBOL_GPL(ip_tunnel_siocdevprivate);
978 
__ip_tunnel_change_mtu(struct net_device * dev,int new_mtu,bool strict)979 int __ip_tunnel_change_mtu(struct net_device *dev, int new_mtu, bool strict)
980 {
981 	struct ip_tunnel *tunnel = netdev_priv(dev);
982 	int t_hlen = tunnel->hlen + sizeof(struct iphdr);
983 	int max_mtu = IP_MAX_MTU - t_hlen;
984 
985 	if (dev->type == ARPHRD_ETHER)
986 		max_mtu -= dev->hard_header_len;
987 
988 	if (new_mtu < ETH_MIN_MTU)
989 		return -EINVAL;
990 
991 	if (new_mtu > max_mtu) {
992 		if (strict)
993 			return -EINVAL;
994 
995 		new_mtu = max_mtu;
996 	}
997 
998 	dev->mtu = new_mtu;
999 	return 0;
1000 }
1001 EXPORT_SYMBOL_GPL(__ip_tunnel_change_mtu);
1002 
ip_tunnel_change_mtu(struct net_device * dev,int new_mtu)1003 int ip_tunnel_change_mtu(struct net_device *dev, int new_mtu)
1004 {
1005 	return __ip_tunnel_change_mtu(dev, new_mtu, true);
1006 }
1007 EXPORT_SYMBOL_GPL(ip_tunnel_change_mtu);
1008 
ip_tunnel_dev_free(struct net_device * dev)1009 static void ip_tunnel_dev_free(struct net_device *dev)
1010 {
1011 	struct ip_tunnel *tunnel = netdev_priv(dev);
1012 
1013 	gro_cells_destroy(&tunnel->gro_cells);
1014 	dst_cache_destroy(&tunnel->dst_cache);
1015 	free_percpu(dev->tstats);
1016 }
1017 
ip_tunnel_dellink(struct net_device * dev,struct list_head * head)1018 void ip_tunnel_dellink(struct net_device *dev, struct list_head *head)
1019 {
1020 	struct ip_tunnel *tunnel = netdev_priv(dev);
1021 	struct ip_tunnel_net *itn;
1022 
1023 	itn = net_generic(tunnel->net, tunnel->ip_tnl_net_id);
1024 
1025 	if (itn->fb_tunnel_dev != dev) {
1026 		ip_tunnel_del(itn, netdev_priv(dev));
1027 		unregister_netdevice_queue(dev, head);
1028 	}
1029 }
1030 EXPORT_SYMBOL_GPL(ip_tunnel_dellink);
1031 
ip_tunnel_get_link_net(const struct net_device * dev)1032 struct net *ip_tunnel_get_link_net(const struct net_device *dev)
1033 {
1034 	struct ip_tunnel *tunnel = netdev_priv(dev);
1035 
1036 	return tunnel->net;
1037 }
1038 EXPORT_SYMBOL(ip_tunnel_get_link_net);
1039 
ip_tunnel_get_iflink(const struct net_device * dev)1040 int ip_tunnel_get_iflink(const struct net_device *dev)
1041 {
1042 	struct ip_tunnel *tunnel = netdev_priv(dev);
1043 
1044 	return tunnel->parms.link;
1045 }
1046 EXPORT_SYMBOL(ip_tunnel_get_iflink);
1047 
ip_tunnel_init_net(struct net * net,unsigned int ip_tnl_net_id,struct rtnl_link_ops * ops,char * devname)1048 int ip_tunnel_init_net(struct net *net, unsigned int ip_tnl_net_id,
1049 				  struct rtnl_link_ops *ops, char *devname)
1050 {
1051 	struct ip_tunnel_net *itn = net_generic(net, ip_tnl_net_id);
1052 	struct ip_tunnel_parm parms;
1053 	unsigned int i;
1054 
1055 	itn->rtnl_link_ops = ops;
1056 	for (i = 0; i < IP_TNL_HASH_SIZE; i++)
1057 		INIT_HLIST_HEAD(&itn->tunnels[i]);
1058 
1059 	if (!ops || !net_has_fallback_tunnels(net)) {
1060 		struct ip_tunnel_net *it_init_net;
1061 
1062 		it_init_net = net_generic(&init_net, ip_tnl_net_id);
1063 		itn->type = it_init_net->type;
1064 		itn->fb_tunnel_dev = NULL;
1065 		return 0;
1066 	}
1067 
1068 	memset(&parms, 0, sizeof(parms));
1069 	if (devname)
1070 		strscpy(parms.name, devname, IFNAMSIZ);
1071 
1072 	rtnl_lock();
1073 	itn->fb_tunnel_dev = __ip_tunnel_create(net, ops, &parms);
1074 	/* FB netdevice is special: we have one, and only one per netns.
1075 	 * Allowing to move it to another netns is clearly unsafe.
1076 	 */
1077 	if (!IS_ERR(itn->fb_tunnel_dev)) {
1078 		itn->fb_tunnel_dev->features |= NETIF_F_NETNS_LOCAL;
1079 		itn->fb_tunnel_dev->mtu = ip_tunnel_bind_dev(itn->fb_tunnel_dev);
1080 		ip_tunnel_add(itn, netdev_priv(itn->fb_tunnel_dev));
1081 		itn->type = itn->fb_tunnel_dev->type;
1082 	}
1083 	rtnl_unlock();
1084 
1085 	return PTR_ERR_OR_ZERO(itn->fb_tunnel_dev);
1086 }
1087 EXPORT_SYMBOL_GPL(ip_tunnel_init_net);
1088 
ip_tunnel_destroy(struct net * net,struct ip_tunnel_net * itn,struct list_head * head,struct rtnl_link_ops * ops)1089 static void ip_tunnel_destroy(struct net *net, struct ip_tunnel_net *itn,
1090 			      struct list_head *head,
1091 			      struct rtnl_link_ops *ops)
1092 {
1093 	struct net_device *dev, *aux;
1094 	int h;
1095 
1096 	for_each_netdev_safe(net, dev, aux)
1097 		if (dev->rtnl_link_ops == ops)
1098 			unregister_netdevice_queue(dev, head);
1099 
1100 	for (h = 0; h < IP_TNL_HASH_SIZE; h++) {
1101 		struct ip_tunnel *t;
1102 		struct hlist_node *n;
1103 		struct hlist_head *thead = &itn->tunnels[h];
1104 
1105 		hlist_for_each_entry_safe(t, n, thead, hash_node)
1106 			/* If dev is in the same netns, it has already
1107 			 * been added to the list by the previous loop.
1108 			 */
1109 			if (!net_eq(dev_net(t->dev), net))
1110 				unregister_netdevice_queue(t->dev, head);
1111 	}
1112 }
1113 
ip_tunnel_delete_nets(struct list_head * net_list,unsigned int id,struct rtnl_link_ops * ops)1114 void ip_tunnel_delete_nets(struct list_head *net_list, unsigned int id,
1115 			   struct rtnl_link_ops *ops)
1116 {
1117 	struct ip_tunnel_net *itn;
1118 	struct net *net;
1119 	LIST_HEAD(list);
1120 
1121 	rtnl_lock();
1122 	list_for_each_entry(net, net_list, exit_list) {
1123 		itn = net_generic(net, id);
1124 		ip_tunnel_destroy(net, itn, &list, ops);
1125 	}
1126 	unregister_netdevice_many(&list);
1127 	rtnl_unlock();
1128 }
1129 EXPORT_SYMBOL_GPL(ip_tunnel_delete_nets);
1130 
ip_tunnel_newlink(struct net_device * dev,struct nlattr * tb[],struct ip_tunnel_parm * p,__u32 fwmark)1131 int ip_tunnel_newlink(struct net_device *dev, struct nlattr *tb[],
1132 		      struct ip_tunnel_parm *p, __u32 fwmark)
1133 {
1134 	struct ip_tunnel *nt;
1135 	struct net *net = dev_net(dev);
1136 	struct ip_tunnel_net *itn;
1137 	int mtu;
1138 	int err;
1139 
1140 	nt = netdev_priv(dev);
1141 	itn = net_generic(net, nt->ip_tnl_net_id);
1142 
1143 	if (nt->collect_md) {
1144 		if (rtnl_dereference(itn->collect_md_tun))
1145 			return -EEXIST;
1146 	} else {
1147 		if (ip_tunnel_find(itn, p, dev->type))
1148 			return -EEXIST;
1149 	}
1150 
1151 	nt->net = net;
1152 	nt->parms = *p;
1153 	nt->fwmark = fwmark;
1154 	err = register_netdevice(dev);
1155 	if (err)
1156 		goto err_register_netdevice;
1157 
1158 	if (dev->type == ARPHRD_ETHER && !tb[IFLA_ADDRESS])
1159 		eth_hw_addr_random(dev);
1160 
1161 	mtu = ip_tunnel_bind_dev(dev);
1162 	if (tb[IFLA_MTU]) {
1163 		unsigned int max = IP_MAX_MTU - (nt->hlen + sizeof(struct iphdr));
1164 
1165 		if (dev->type == ARPHRD_ETHER)
1166 			max -= dev->hard_header_len;
1167 
1168 		mtu = clamp(dev->mtu, (unsigned int)ETH_MIN_MTU, max);
1169 	}
1170 
1171 	err = dev_set_mtu(dev, mtu);
1172 	if (err)
1173 		goto err_dev_set_mtu;
1174 
1175 	ip_tunnel_add(itn, nt);
1176 	return 0;
1177 
1178 err_dev_set_mtu:
1179 	unregister_netdevice(dev);
1180 err_register_netdevice:
1181 	return err;
1182 }
1183 EXPORT_SYMBOL_GPL(ip_tunnel_newlink);
1184 
ip_tunnel_changelink(struct net_device * dev,struct nlattr * tb[],struct ip_tunnel_parm * p,__u32 fwmark)1185 int ip_tunnel_changelink(struct net_device *dev, struct nlattr *tb[],
1186 			 struct ip_tunnel_parm *p, __u32 fwmark)
1187 {
1188 	struct ip_tunnel *t;
1189 	struct ip_tunnel *tunnel = netdev_priv(dev);
1190 	struct net *net = tunnel->net;
1191 	struct ip_tunnel_net *itn = net_generic(net, tunnel->ip_tnl_net_id);
1192 
1193 	if (dev == itn->fb_tunnel_dev)
1194 		return -EINVAL;
1195 
1196 	t = ip_tunnel_find(itn, p, dev->type);
1197 
1198 	if (t) {
1199 		if (t->dev != dev)
1200 			return -EEXIST;
1201 	} else {
1202 		t = tunnel;
1203 
1204 		if (dev->type != ARPHRD_ETHER) {
1205 			unsigned int nflags = 0;
1206 
1207 			if (ipv4_is_multicast(p->iph.daddr))
1208 				nflags = IFF_BROADCAST;
1209 			else if (p->iph.daddr)
1210 				nflags = IFF_POINTOPOINT;
1211 
1212 			if ((dev->flags ^ nflags) &
1213 			    (IFF_POINTOPOINT | IFF_BROADCAST))
1214 				return -EINVAL;
1215 		}
1216 	}
1217 
1218 	ip_tunnel_update(itn, t, dev, p, !tb[IFLA_MTU], fwmark);
1219 	return 0;
1220 }
1221 EXPORT_SYMBOL_GPL(ip_tunnel_changelink);
1222 
ip_tunnel_init(struct net_device * dev)1223 int ip_tunnel_init(struct net_device *dev)
1224 {
1225 	struct ip_tunnel *tunnel = netdev_priv(dev);
1226 	struct iphdr *iph = &tunnel->parms.iph;
1227 	int err;
1228 
1229 	dev->needs_free_netdev = true;
1230 	dev->priv_destructor = ip_tunnel_dev_free;
1231 	dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);
1232 	if (!dev->tstats)
1233 		return -ENOMEM;
1234 
1235 	err = dst_cache_init(&tunnel->dst_cache, GFP_KERNEL);
1236 	if (err) {
1237 		free_percpu(dev->tstats);
1238 		return err;
1239 	}
1240 
1241 	err = gro_cells_init(&tunnel->gro_cells, dev);
1242 	if (err) {
1243 		dst_cache_destroy(&tunnel->dst_cache);
1244 		free_percpu(dev->tstats);
1245 		return err;
1246 	}
1247 
1248 	tunnel->dev = dev;
1249 	tunnel->net = dev_net(dev);
1250 	strcpy(tunnel->parms.name, dev->name);
1251 	iph->version		= 4;
1252 	iph->ihl		= 5;
1253 
1254 	if (tunnel->collect_md)
1255 		netif_keep_dst(dev);
1256 	return 0;
1257 }
1258 EXPORT_SYMBOL_GPL(ip_tunnel_init);
1259 
ip_tunnel_uninit(struct net_device * dev)1260 void ip_tunnel_uninit(struct net_device *dev)
1261 {
1262 	struct ip_tunnel *tunnel = netdev_priv(dev);
1263 	struct net *net = tunnel->net;
1264 	struct ip_tunnel_net *itn;
1265 
1266 	itn = net_generic(net, tunnel->ip_tnl_net_id);
1267 	ip_tunnel_del(itn, netdev_priv(dev));
1268 	if (itn->fb_tunnel_dev == dev)
1269 		WRITE_ONCE(itn->fb_tunnel_dev, NULL);
1270 
1271 	dst_cache_reset(&tunnel->dst_cache);
1272 }
1273 EXPORT_SYMBOL_GPL(ip_tunnel_uninit);
1274 
1275 /* Do least required initialization, rest of init is done in tunnel_init call */
ip_tunnel_setup(struct net_device * dev,unsigned int net_id)1276 void ip_tunnel_setup(struct net_device *dev, unsigned int net_id)
1277 {
1278 	struct ip_tunnel *tunnel = netdev_priv(dev);
1279 	tunnel->ip_tnl_net_id = net_id;
1280 }
1281 EXPORT_SYMBOL_GPL(ip_tunnel_setup);
1282 
1283 MODULE_LICENSE("GPL");
1284