1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Copyright (c) 2013 Nicira, Inc.
4  */
5 
6 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
7 
8 #include <linux/capability.h>
9 #include <linux/module.h>
10 #include <linux/types.h>
11 #include <linux/kernel.h>
12 #include <linux/slab.h>
13 #include <linux/uaccess.h>
14 #include <linux/skbuff.h>
15 #include <linux/netdevice.h>
16 #include <linux/in.h>
17 #include <linux/tcp.h>
18 #include <linux/udp.h>
19 #include <linux/if_arp.h>
20 #include <linux/init.h>
21 #include <linux/in6.h>
22 #include <linux/inetdevice.h>
23 #include <linux/igmp.h>
24 #include <linux/netfilter_ipv4.h>
25 #include <linux/etherdevice.h>
26 #include <linux/if_ether.h>
27 #include <linux/if_vlan.h>
28 #include <linux/rculist.h>
29 #include <linux/err.h>
30 
31 #include <net/sock.h>
32 #include <net/ip.h>
33 #include <net/icmp.h>
34 #include <net/protocol.h>
35 #include <net/ip_tunnels.h>
36 #include <net/arp.h>
37 #include <net/checksum.h>
38 #include <net/dsfield.h>
39 #include <net/inet_ecn.h>
40 #include <net/xfrm.h>
41 #include <net/net_namespace.h>
42 #include <net/netns/generic.h>
43 #include <net/rtnetlink.h>
44 #include <net/udp.h>
45 #include <net/dst_metadata.h>
46 
47 #if IS_ENABLED(CONFIG_IPV6)
48 #include <net/ipv6.h>
49 #include <net/ip6_fib.h>
50 #include <net/ip6_route.h>
51 #endif
52 
ip_tunnel_hash(__be32 key,__be32 remote)53 static unsigned int ip_tunnel_hash(__be32 key, __be32 remote)
54 {
55 	return hash_32((__force u32)key ^ (__force u32)remote,
56 			 IP_TNL_HASH_BITS);
57 }
58 
ip_tunnel_key_match(const struct ip_tunnel_parm * p,__be16 flags,__be32 key)59 static bool ip_tunnel_key_match(const struct ip_tunnel_parm *p,
60 				__be16 flags, __be32 key)
61 {
62 	if (p->i_flags & TUNNEL_KEY) {
63 		if (flags & TUNNEL_KEY)
64 			return key == p->i_key;
65 		else
66 			/* key expected, none present */
67 			return false;
68 	} else
69 		return !(flags & TUNNEL_KEY);
70 }
71 
72 /* Fallback tunnel: no source, no destination, no key, no options
73 
74    Tunnel hash table:
75    We require exact key match i.e. if a key is present in packet
76    it will match only tunnel with the same key; if it is not present,
77    it will match only keyless tunnel.
78 
79    All keysless packets, if not matched configured keyless tunnels
80    will match fallback tunnel.
81    Given src, dst and key, find appropriate for input tunnel.
82 */
ip_tunnel_lookup(struct ip_tunnel_net * itn,int link,__be16 flags,__be32 remote,__be32 local,__be32 key)83 struct ip_tunnel *ip_tunnel_lookup(struct ip_tunnel_net *itn,
84 				   int link, __be16 flags,
85 				   __be32 remote, __be32 local,
86 				   __be32 key)
87 {
88 	struct ip_tunnel *t, *cand = NULL;
89 	struct hlist_head *head;
90 	struct net_device *ndev;
91 	unsigned int hash;
92 
93 	hash = ip_tunnel_hash(key, remote);
94 	head = &itn->tunnels[hash];
95 
96 	hlist_for_each_entry_rcu(t, head, hash_node) {
97 		if (local != t->parms.iph.saddr ||
98 		    remote != t->parms.iph.daddr ||
99 		    !(t->dev->flags & IFF_UP))
100 			continue;
101 
102 		if (!ip_tunnel_key_match(&t->parms, flags, key))
103 			continue;
104 
105 		if (t->parms.link == link)
106 			return t;
107 		else
108 			cand = t;
109 	}
110 
111 	hlist_for_each_entry_rcu(t, head, hash_node) {
112 		if (remote != t->parms.iph.daddr ||
113 		    t->parms.iph.saddr != 0 ||
114 		    !(t->dev->flags & IFF_UP))
115 			continue;
116 
117 		if (!ip_tunnel_key_match(&t->parms, flags, key))
118 			continue;
119 
120 		if (t->parms.link == link)
121 			return t;
122 		else if (!cand)
123 			cand = t;
124 	}
125 
126 	hash = ip_tunnel_hash(key, 0);
127 	head = &itn->tunnels[hash];
128 
129 	hlist_for_each_entry_rcu(t, head, hash_node) {
130 		if ((local != t->parms.iph.saddr || t->parms.iph.daddr != 0) &&
131 		    (local != t->parms.iph.daddr || !ipv4_is_multicast(local)))
132 			continue;
133 
134 		if (!(t->dev->flags & IFF_UP))
135 			continue;
136 
137 		if (!ip_tunnel_key_match(&t->parms, flags, key))
138 			continue;
139 
140 		if (t->parms.link == link)
141 			return t;
142 		else if (!cand)
143 			cand = t;
144 	}
145 
146 	hlist_for_each_entry_rcu(t, head, hash_node) {
147 		if ((!(flags & TUNNEL_NO_KEY) && t->parms.i_key != key) ||
148 		    t->parms.iph.saddr != 0 ||
149 		    t->parms.iph.daddr != 0 ||
150 		    !(t->dev->flags & IFF_UP))
151 			continue;
152 
153 		if (t->parms.link == link)
154 			return t;
155 		else if (!cand)
156 			cand = t;
157 	}
158 
159 	if (cand)
160 		return cand;
161 
162 	t = rcu_dereference(itn->collect_md_tun);
163 	if (t && t->dev->flags & IFF_UP)
164 		return t;
165 
166 	ndev = READ_ONCE(itn->fb_tunnel_dev);
167 	if (ndev && ndev->flags & IFF_UP)
168 		return netdev_priv(ndev);
169 
170 	return NULL;
171 }
172 EXPORT_SYMBOL_GPL(ip_tunnel_lookup);
173 
ip_bucket(struct ip_tunnel_net * itn,struct ip_tunnel_parm * parms)174 static struct hlist_head *ip_bucket(struct ip_tunnel_net *itn,
175 				    struct ip_tunnel_parm *parms)
176 {
177 	unsigned int h;
178 	__be32 remote;
179 	__be32 i_key = parms->i_key;
180 
181 	if (parms->iph.daddr && !ipv4_is_multicast(parms->iph.daddr))
182 		remote = parms->iph.daddr;
183 	else
184 		remote = 0;
185 
186 	if (!(parms->i_flags & TUNNEL_KEY) && (parms->i_flags & VTI_ISVTI))
187 		i_key = 0;
188 
189 	h = ip_tunnel_hash(i_key, remote);
190 	return &itn->tunnels[h];
191 }
192 
ip_tunnel_add(struct ip_tunnel_net * itn,struct ip_tunnel * t)193 static void ip_tunnel_add(struct ip_tunnel_net *itn, struct ip_tunnel *t)
194 {
195 	struct hlist_head *head = ip_bucket(itn, &t->parms);
196 
197 	if (t->collect_md)
198 		rcu_assign_pointer(itn->collect_md_tun, t);
199 	hlist_add_head_rcu(&t->hash_node, head);
200 }
201 
ip_tunnel_del(struct ip_tunnel_net * itn,struct ip_tunnel * t)202 static void ip_tunnel_del(struct ip_tunnel_net *itn, struct ip_tunnel *t)
203 {
204 	if (t->collect_md)
205 		rcu_assign_pointer(itn->collect_md_tun, NULL);
206 	hlist_del_init_rcu(&t->hash_node);
207 }
208 
ip_tunnel_find(struct ip_tunnel_net * itn,struct ip_tunnel_parm * parms,int type)209 static struct ip_tunnel *ip_tunnel_find(struct ip_tunnel_net *itn,
210 					struct ip_tunnel_parm *parms,
211 					int type)
212 {
213 	__be32 remote = parms->iph.daddr;
214 	__be32 local = parms->iph.saddr;
215 	__be32 key = parms->i_key;
216 	__be16 flags = parms->i_flags;
217 	int link = parms->link;
218 	struct ip_tunnel *t = NULL;
219 	struct hlist_head *head = ip_bucket(itn, parms);
220 
221 	hlist_for_each_entry_rcu(t, head, hash_node) {
222 		if (local == t->parms.iph.saddr &&
223 		    remote == t->parms.iph.daddr &&
224 		    link == t->parms.link &&
225 		    type == t->dev->type &&
226 		    ip_tunnel_key_match(&t->parms, flags, key))
227 			break;
228 	}
229 	return t;
230 }
231 
__ip_tunnel_create(struct net * net,const struct rtnl_link_ops * ops,struct ip_tunnel_parm * parms)232 static struct net_device *__ip_tunnel_create(struct net *net,
233 					     const struct rtnl_link_ops *ops,
234 					     struct ip_tunnel_parm *parms)
235 {
236 	int err;
237 	struct ip_tunnel *tunnel;
238 	struct net_device *dev;
239 	char name[IFNAMSIZ];
240 
241 	err = -E2BIG;
242 	if (parms->name[0]) {
243 		if (!dev_valid_name(parms->name))
244 			goto failed;
245 		strscpy(name, parms->name, IFNAMSIZ);
246 	} else {
247 		if (strlen(ops->kind) > (IFNAMSIZ - 3))
248 			goto failed;
249 		strcpy(name, ops->kind);
250 		strcat(name, "%d");
251 	}
252 
253 	ASSERT_RTNL();
254 	dev = alloc_netdev(ops->priv_size, name, NET_NAME_UNKNOWN, ops->setup);
255 	if (!dev) {
256 		err = -ENOMEM;
257 		goto failed;
258 	}
259 	dev_net_set(dev, net);
260 
261 	dev->rtnl_link_ops = ops;
262 
263 	tunnel = netdev_priv(dev);
264 	tunnel->parms = *parms;
265 	tunnel->net = net;
266 
267 	err = register_netdevice(dev);
268 	if (err)
269 		goto failed_free;
270 
271 	return dev;
272 
273 failed_free:
274 	free_netdev(dev);
275 failed:
276 	return ERR_PTR(err);
277 }
278 
ip_tunnel_bind_dev(struct net_device * dev)279 static int ip_tunnel_bind_dev(struct net_device *dev)
280 {
281 	struct net_device *tdev = NULL;
282 	struct ip_tunnel *tunnel = netdev_priv(dev);
283 	const struct iphdr *iph;
284 	int hlen = LL_MAX_HEADER;
285 	int mtu = ETH_DATA_LEN;
286 	int t_hlen = tunnel->hlen + sizeof(struct iphdr);
287 
288 	iph = &tunnel->parms.iph;
289 
290 	/* Guess output device to choose reasonable mtu and needed_headroom */
291 	if (iph->daddr) {
292 		struct flowi4 fl4;
293 		struct rtable *rt;
294 
295 		ip_tunnel_init_flow(&fl4, iph->protocol, iph->daddr,
296 				    iph->saddr, tunnel->parms.o_key,
297 				    RT_TOS(iph->tos), dev_net(dev),
298 				    tunnel->parms.link, tunnel->fwmark, 0, 0);
299 		rt = ip_route_output_key(tunnel->net, &fl4);
300 
301 		if (!IS_ERR(rt)) {
302 			tdev = rt->dst.dev;
303 			ip_rt_put(rt);
304 		}
305 		if (dev->type != ARPHRD_ETHER)
306 			dev->flags |= IFF_POINTOPOINT;
307 
308 		dst_cache_reset(&tunnel->dst_cache);
309 	}
310 
311 	if (!tdev && tunnel->parms.link)
312 		tdev = __dev_get_by_index(tunnel->net, tunnel->parms.link);
313 
314 	if (tdev) {
315 		hlen = tdev->hard_header_len + tdev->needed_headroom;
316 		mtu = min(tdev->mtu, IP_MAX_MTU);
317 	}
318 
319 	dev->needed_headroom = t_hlen + hlen;
320 	mtu -= t_hlen + (dev->type == ARPHRD_ETHER ? dev->hard_header_len : 0);
321 
322 	if (mtu < IPV4_MIN_MTU)
323 		mtu = IPV4_MIN_MTU;
324 
325 	return mtu;
326 }
327 
ip_tunnel_create(struct net * net,struct ip_tunnel_net * itn,struct ip_tunnel_parm * parms)328 static struct ip_tunnel *ip_tunnel_create(struct net *net,
329 					  struct ip_tunnel_net *itn,
330 					  struct ip_tunnel_parm *parms)
331 {
332 	struct ip_tunnel *nt;
333 	struct net_device *dev;
334 	int t_hlen;
335 	int mtu;
336 	int err;
337 
338 	dev = __ip_tunnel_create(net, itn->rtnl_link_ops, parms);
339 	if (IS_ERR(dev))
340 		return ERR_CAST(dev);
341 
342 	mtu = ip_tunnel_bind_dev(dev);
343 	err = dev_set_mtu(dev, mtu);
344 	if (err)
345 		goto err_dev_set_mtu;
346 
347 	nt = netdev_priv(dev);
348 	t_hlen = nt->hlen + sizeof(struct iphdr);
349 	dev->min_mtu = ETH_MIN_MTU;
350 	dev->max_mtu = IP_MAX_MTU - t_hlen;
351 	if (dev->type == ARPHRD_ETHER)
352 		dev->max_mtu -= dev->hard_header_len;
353 
354 	ip_tunnel_add(itn, nt);
355 	return nt;
356 
357 err_dev_set_mtu:
358 	unregister_netdevice(dev);
359 	return ERR_PTR(err);
360 }
361 
ip_tunnel_md_udp_encap(struct sk_buff * skb,struct ip_tunnel_info * info)362 void ip_tunnel_md_udp_encap(struct sk_buff *skb, struct ip_tunnel_info *info)
363 {
364 	const struct iphdr *iph = ip_hdr(skb);
365 	const struct udphdr *udph;
366 
367 	if (iph->protocol != IPPROTO_UDP)
368 		return;
369 
370 	udph = (struct udphdr *)((__u8 *)iph + (iph->ihl << 2));
371 	info->encap.sport = udph->source;
372 	info->encap.dport = udph->dest;
373 }
374 EXPORT_SYMBOL(ip_tunnel_md_udp_encap);
375 
ip_tunnel_rcv(struct ip_tunnel * tunnel,struct sk_buff * skb,const struct tnl_ptk_info * tpi,struct metadata_dst * tun_dst,bool log_ecn_error)376 int ip_tunnel_rcv(struct ip_tunnel *tunnel, struct sk_buff *skb,
377 		  const struct tnl_ptk_info *tpi, struct metadata_dst *tun_dst,
378 		  bool log_ecn_error)
379 {
380 	const struct iphdr *iph = ip_hdr(skb);
381 	int err;
382 
383 #ifdef CONFIG_NET_IPGRE_BROADCAST
384 	if (ipv4_is_multicast(iph->daddr)) {
385 		DEV_STATS_INC(tunnel->dev, multicast);
386 		skb->pkt_type = PACKET_BROADCAST;
387 	}
388 #endif
389 
390 	if ((!(tpi->flags&TUNNEL_CSUM) &&  (tunnel->parms.i_flags&TUNNEL_CSUM)) ||
391 	     ((tpi->flags&TUNNEL_CSUM) && !(tunnel->parms.i_flags&TUNNEL_CSUM))) {
392 		DEV_STATS_INC(tunnel->dev, rx_crc_errors);
393 		DEV_STATS_INC(tunnel->dev, rx_errors);
394 		goto drop;
395 	}
396 
397 	if (tunnel->parms.i_flags&TUNNEL_SEQ) {
398 		if (!(tpi->flags&TUNNEL_SEQ) ||
399 		    (tunnel->i_seqno && (s32)(ntohl(tpi->seq) - tunnel->i_seqno) < 0)) {
400 			DEV_STATS_INC(tunnel->dev, rx_fifo_errors);
401 			DEV_STATS_INC(tunnel->dev, rx_errors);
402 			goto drop;
403 		}
404 		tunnel->i_seqno = ntohl(tpi->seq) + 1;
405 	}
406 
407 	skb_set_network_header(skb, (tunnel->dev->type == ARPHRD_ETHER) ? ETH_HLEN : 0);
408 
409 	err = IP_ECN_decapsulate(iph, skb);
410 	if (unlikely(err)) {
411 		if (log_ecn_error)
412 			net_info_ratelimited("non-ECT from %pI4 with TOS=%#x\n",
413 					&iph->saddr, iph->tos);
414 		if (err > 1) {
415 			DEV_STATS_INC(tunnel->dev, rx_frame_errors);
416 			DEV_STATS_INC(tunnel->dev, rx_errors);
417 			goto drop;
418 		}
419 	}
420 
421 	dev_sw_netstats_rx_add(tunnel->dev, skb->len);
422 	skb_scrub_packet(skb, !net_eq(tunnel->net, dev_net(tunnel->dev)));
423 
424 	if (tunnel->dev->type == ARPHRD_ETHER) {
425 		skb->protocol = eth_type_trans(skb, tunnel->dev);
426 		skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
427 	} else {
428 		skb->dev = tunnel->dev;
429 	}
430 
431 	if (tun_dst)
432 		skb_dst_set(skb, (struct dst_entry *)tun_dst);
433 
434 	gro_cells_receive(&tunnel->gro_cells, skb);
435 	return 0;
436 
437 drop:
438 	if (tun_dst)
439 		dst_release((struct dst_entry *)tun_dst);
440 	kfree_skb(skb);
441 	return 0;
442 }
443 EXPORT_SYMBOL_GPL(ip_tunnel_rcv);
444 
ip_tunnel_encap_add_ops(const struct ip_tunnel_encap_ops * ops,unsigned int num)445 int ip_tunnel_encap_add_ops(const struct ip_tunnel_encap_ops *ops,
446 			    unsigned int num)
447 {
448 	if (num >= MAX_IPTUN_ENCAP_OPS)
449 		return -ERANGE;
450 
451 	return !cmpxchg((const struct ip_tunnel_encap_ops **)
452 			&iptun_encaps[num],
453 			NULL, ops) ? 0 : -1;
454 }
455 EXPORT_SYMBOL(ip_tunnel_encap_add_ops);
456 
ip_tunnel_encap_del_ops(const struct ip_tunnel_encap_ops * ops,unsigned int num)457 int ip_tunnel_encap_del_ops(const struct ip_tunnel_encap_ops *ops,
458 			    unsigned int num)
459 {
460 	int ret;
461 
462 	if (num >= MAX_IPTUN_ENCAP_OPS)
463 		return -ERANGE;
464 
465 	ret = (cmpxchg((const struct ip_tunnel_encap_ops **)
466 		       &iptun_encaps[num],
467 		       ops, NULL) == ops) ? 0 : -1;
468 
469 	synchronize_net();
470 
471 	return ret;
472 }
473 EXPORT_SYMBOL(ip_tunnel_encap_del_ops);
474 
ip_tunnel_encap_setup(struct ip_tunnel * t,struct ip_tunnel_encap * ipencap)475 int ip_tunnel_encap_setup(struct ip_tunnel *t,
476 			  struct ip_tunnel_encap *ipencap)
477 {
478 	int hlen;
479 
480 	memset(&t->encap, 0, sizeof(t->encap));
481 
482 	hlen = ip_encap_hlen(ipencap);
483 	if (hlen < 0)
484 		return hlen;
485 
486 	t->encap.type = ipencap->type;
487 	t->encap.sport = ipencap->sport;
488 	t->encap.dport = ipencap->dport;
489 	t->encap.flags = ipencap->flags;
490 
491 	t->encap_hlen = hlen;
492 	t->hlen = t->encap_hlen + t->tun_hlen;
493 
494 	return 0;
495 }
496 EXPORT_SYMBOL_GPL(ip_tunnel_encap_setup);
497 
tnl_update_pmtu(struct net_device * dev,struct sk_buff * skb,struct rtable * rt,__be16 df,const struct iphdr * inner_iph,int tunnel_hlen,__be32 dst,bool md)498 static int tnl_update_pmtu(struct net_device *dev, struct sk_buff *skb,
499 			    struct rtable *rt, __be16 df,
500 			    const struct iphdr *inner_iph,
501 			    int tunnel_hlen, __be32 dst, bool md)
502 {
503 	struct ip_tunnel *tunnel = netdev_priv(dev);
504 	int pkt_size;
505 	int mtu;
506 
507 	tunnel_hlen = md ? tunnel_hlen : tunnel->hlen;
508 	pkt_size = skb->len - tunnel_hlen;
509 	pkt_size -= dev->type == ARPHRD_ETHER ? dev->hard_header_len : 0;
510 
511 	if (df) {
512 		mtu = dst_mtu(&rt->dst) - (sizeof(struct iphdr) + tunnel_hlen);
513 		mtu -= dev->type == ARPHRD_ETHER ? dev->hard_header_len : 0;
514 	} else {
515 		mtu = skb_valid_dst(skb) ? dst_mtu(skb_dst(skb)) : dev->mtu;
516 	}
517 
518 	if (skb_valid_dst(skb))
519 		skb_dst_update_pmtu_no_confirm(skb, mtu);
520 
521 	if (skb->protocol == htons(ETH_P_IP)) {
522 		if (!skb_is_gso(skb) &&
523 		    (inner_iph->frag_off & htons(IP_DF)) &&
524 		    mtu < pkt_size) {
525 			icmp_ndo_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
526 			return -E2BIG;
527 		}
528 	}
529 #if IS_ENABLED(CONFIG_IPV6)
530 	else if (skb->protocol == htons(ETH_P_IPV6)) {
531 		struct rt6_info *rt6;
532 		__be32 daddr;
533 
534 		rt6 = skb_valid_dst(skb) ? (struct rt6_info *)skb_dst(skb) :
535 					   NULL;
536 		daddr = md ? dst : tunnel->parms.iph.daddr;
537 
538 		if (rt6 && mtu < dst_mtu(skb_dst(skb)) &&
539 			   mtu >= IPV6_MIN_MTU) {
540 			if ((daddr && !ipv4_is_multicast(daddr)) ||
541 			    rt6->rt6i_dst.plen == 128) {
542 				rt6->rt6i_flags |= RTF_MODIFIED;
543 				dst_metric_set(skb_dst(skb), RTAX_MTU, mtu);
544 			}
545 		}
546 
547 		if (!skb_is_gso(skb) && mtu >= IPV6_MIN_MTU &&
548 					mtu < pkt_size) {
549 			icmpv6_ndo_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
550 			return -E2BIG;
551 		}
552 	}
553 #endif
554 	return 0;
555 }
556 
ip_tunnel_adj_headroom(struct net_device * dev,unsigned int headroom)557 static void ip_tunnel_adj_headroom(struct net_device *dev, unsigned int headroom)
558 {
559 	/* we must cap headroom to some upperlimit, else pskb_expand_head
560 	 * will overflow header offsets in skb_headers_offset_update().
561 	 */
562 	static const unsigned int max_allowed = 512;
563 
564 	if (headroom > max_allowed)
565 		headroom = max_allowed;
566 
567 	if (headroom > READ_ONCE(dev->needed_headroom))
568 		WRITE_ONCE(dev->needed_headroom, headroom);
569 }
570 
ip_md_tunnel_xmit(struct sk_buff * skb,struct net_device * dev,u8 proto,int tunnel_hlen)571 void ip_md_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
572 		       u8 proto, int tunnel_hlen)
573 {
574 	struct ip_tunnel *tunnel = netdev_priv(dev);
575 	u32 headroom = sizeof(struct iphdr);
576 	struct ip_tunnel_info *tun_info;
577 	const struct ip_tunnel_key *key;
578 	const struct iphdr *inner_iph;
579 	struct rtable *rt = NULL;
580 	struct flowi4 fl4;
581 	__be16 df = 0;
582 	u8 tos, ttl;
583 	bool use_cache;
584 
585 	tun_info = skb_tunnel_info(skb);
586 	if (unlikely(!tun_info || !(tun_info->mode & IP_TUNNEL_INFO_TX) ||
587 		     ip_tunnel_info_af(tun_info) != AF_INET))
588 		goto tx_error;
589 	key = &tun_info->key;
590 	memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
591 	inner_iph = (const struct iphdr *)skb_inner_network_header(skb);
592 	tos = key->tos;
593 	if (tos == 1) {
594 		if (skb->protocol == htons(ETH_P_IP))
595 			tos = inner_iph->tos;
596 		else if (skb->protocol == htons(ETH_P_IPV6))
597 			tos = ipv6_get_dsfield((const struct ipv6hdr *)inner_iph);
598 	}
599 	ip_tunnel_init_flow(&fl4, proto, key->u.ipv4.dst, key->u.ipv4.src,
600 			    tunnel_id_to_key32(key->tun_id), RT_TOS(tos),
601 			    dev_net(dev), 0, skb->mark, skb_get_hash(skb),
602 			    key->flow_flags);
603 
604 	if (!tunnel_hlen)
605 		tunnel_hlen = ip_encap_hlen(&tun_info->encap);
606 
607 	if (ip_tunnel_encap(skb, &tun_info->encap, &proto, &fl4) < 0)
608 		goto tx_error;
609 
610 	use_cache = ip_tunnel_dst_cache_usable(skb, tun_info);
611 	if (use_cache)
612 		rt = dst_cache_get_ip4(&tun_info->dst_cache, &fl4.saddr);
613 	if (!rt) {
614 		rt = ip_route_output_key(tunnel->net, &fl4);
615 		if (IS_ERR(rt)) {
616 			DEV_STATS_INC(dev, tx_carrier_errors);
617 			goto tx_error;
618 		}
619 		if (use_cache)
620 			dst_cache_set_ip4(&tun_info->dst_cache, &rt->dst,
621 					  fl4.saddr);
622 	}
623 	if (rt->dst.dev == dev) {
624 		ip_rt_put(rt);
625 		DEV_STATS_INC(dev, collisions);
626 		goto tx_error;
627 	}
628 
629 	if (key->tun_flags & TUNNEL_DONT_FRAGMENT)
630 		df = htons(IP_DF);
631 	if (tnl_update_pmtu(dev, skb, rt, df, inner_iph, tunnel_hlen,
632 			    key->u.ipv4.dst, true)) {
633 		ip_rt_put(rt);
634 		goto tx_error;
635 	}
636 
637 	tos = ip_tunnel_ecn_encap(tos, inner_iph, skb);
638 	ttl = key->ttl;
639 	if (ttl == 0) {
640 		if (skb->protocol == htons(ETH_P_IP))
641 			ttl = inner_iph->ttl;
642 		else if (skb->protocol == htons(ETH_P_IPV6))
643 			ttl = ((const struct ipv6hdr *)inner_iph)->hop_limit;
644 		else
645 			ttl = ip4_dst_hoplimit(&rt->dst);
646 	}
647 
648 	headroom += LL_RESERVED_SPACE(rt->dst.dev) + rt->dst.header_len;
649 	if (skb_cow_head(skb, headroom)) {
650 		ip_rt_put(rt);
651 		goto tx_dropped;
652 	}
653 
654 	ip_tunnel_adj_headroom(dev, headroom);
655 
656 	iptunnel_xmit(NULL, rt, skb, fl4.saddr, fl4.daddr, proto, tos, ttl,
657 		      df, !net_eq(tunnel->net, dev_net(dev)));
658 	return;
659 tx_error:
660 	DEV_STATS_INC(dev, tx_errors);
661 	goto kfree;
662 tx_dropped:
663 	DEV_STATS_INC(dev, tx_dropped);
664 kfree:
665 	kfree_skb(skb);
666 }
667 EXPORT_SYMBOL_GPL(ip_md_tunnel_xmit);
668 
ip_tunnel_xmit(struct sk_buff * skb,struct net_device * dev,const struct iphdr * tnl_params,u8 protocol)669 void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
670 		    const struct iphdr *tnl_params, u8 protocol)
671 {
672 	struct ip_tunnel *tunnel = netdev_priv(dev);
673 	struct ip_tunnel_info *tun_info = NULL;
674 	const struct iphdr *inner_iph;
675 	unsigned int max_headroom;	/* The extra header space needed */
676 	struct rtable *rt = NULL;		/* Route to the other host */
677 	__be16 payload_protocol;
678 	bool use_cache = false;
679 	struct flowi4 fl4;
680 	bool md = false;
681 	bool connected;
682 	u8 tos, ttl;
683 	__be32 dst;
684 	__be16 df;
685 
686 	inner_iph = (const struct iphdr *)skb_inner_network_header(skb);
687 	connected = (tunnel->parms.iph.daddr != 0);
688 	payload_protocol = skb_protocol(skb, true);
689 
690 	memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
691 
692 	dst = tnl_params->daddr;
693 	if (dst == 0) {
694 		/* NBMA tunnel */
695 
696 		if (!skb_dst(skb)) {
697 			DEV_STATS_INC(dev, tx_fifo_errors);
698 			goto tx_error;
699 		}
700 
701 		tun_info = skb_tunnel_info(skb);
702 		if (tun_info && (tun_info->mode & IP_TUNNEL_INFO_TX) &&
703 		    ip_tunnel_info_af(tun_info) == AF_INET &&
704 		    tun_info->key.u.ipv4.dst) {
705 			dst = tun_info->key.u.ipv4.dst;
706 			md = true;
707 			connected = true;
708 		} else if (payload_protocol == htons(ETH_P_IP)) {
709 			rt = skb_rtable(skb);
710 			dst = rt_nexthop(rt, inner_iph->daddr);
711 		}
712 #if IS_ENABLED(CONFIG_IPV6)
713 		else if (payload_protocol == htons(ETH_P_IPV6)) {
714 			const struct in6_addr *addr6;
715 			struct neighbour *neigh;
716 			bool do_tx_error_icmp;
717 			int addr_type;
718 
719 			neigh = dst_neigh_lookup(skb_dst(skb),
720 						 &ipv6_hdr(skb)->daddr);
721 			if (!neigh)
722 				goto tx_error;
723 
724 			addr6 = (const struct in6_addr *)&neigh->primary_key;
725 			addr_type = ipv6_addr_type(addr6);
726 
727 			if (addr_type == IPV6_ADDR_ANY) {
728 				addr6 = &ipv6_hdr(skb)->daddr;
729 				addr_type = ipv6_addr_type(addr6);
730 			}
731 
732 			if ((addr_type & IPV6_ADDR_COMPATv4) == 0)
733 				do_tx_error_icmp = true;
734 			else {
735 				do_tx_error_icmp = false;
736 				dst = addr6->s6_addr32[3];
737 			}
738 			neigh_release(neigh);
739 			if (do_tx_error_icmp)
740 				goto tx_error_icmp;
741 		}
742 #endif
743 		else
744 			goto tx_error;
745 
746 		if (!md)
747 			connected = false;
748 	}
749 
750 	tos = tnl_params->tos;
751 	if (tos & 0x1) {
752 		tos &= ~0x1;
753 		if (payload_protocol == htons(ETH_P_IP)) {
754 			tos = inner_iph->tos;
755 			connected = false;
756 		} else if (payload_protocol == htons(ETH_P_IPV6)) {
757 			tos = ipv6_get_dsfield((const struct ipv6hdr *)inner_iph);
758 			connected = false;
759 		}
760 	}
761 
762 	ip_tunnel_init_flow(&fl4, protocol, dst, tnl_params->saddr,
763 			    tunnel->parms.o_key, RT_TOS(tos),
764 			    dev_net(dev), tunnel->parms.link,
765 			    tunnel->fwmark, skb_get_hash(skb), 0);
766 
767 	if (ip_tunnel_encap(skb, &tunnel->encap, &protocol, &fl4) < 0)
768 		goto tx_error;
769 
770 	if (connected && md) {
771 		use_cache = ip_tunnel_dst_cache_usable(skb, tun_info);
772 		if (use_cache)
773 			rt = dst_cache_get_ip4(&tun_info->dst_cache,
774 					       &fl4.saddr);
775 	} else {
776 		rt = connected ? dst_cache_get_ip4(&tunnel->dst_cache,
777 						&fl4.saddr) : NULL;
778 	}
779 
780 	if (!rt) {
781 		rt = ip_route_output_key(tunnel->net, &fl4);
782 
783 		if (IS_ERR(rt)) {
784 			DEV_STATS_INC(dev, tx_carrier_errors);
785 			goto tx_error;
786 		}
787 		if (use_cache)
788 			dst_cache_set_ip4(&tun_info->dst_cache, &rt->dst,
789 					  fl4.saddr);
790 		else if (!md && connected)
791 			dst_cache_set_ip4(&tunnel->dst_cache, &rt->dst,
792 					  fl4.saddr);
793 	}
794 
795 	if (rt->dst.dev == dev) {
796 		ip_rt_put(rt);
797 		DEV_STATS_INC(dev, collisions);
798 		goto tx_error;
799 	}
800 
801 	df = tnl_params->frag_off;
802 	if (payload_protocol == htons(ETH_P_IP) && !tunnel->ignore_df)
803 		df |= (inner_iph->frag_off & htons(IP_DF));
804 
805 	if (tnl_update_pmtu(dev, skb, rt, df, inner_iph, 0, 0, false)) {
806 		ip_rt_put(rt);
807 		goto tx_error;
808 	}
809 
810 	if (tunnel->err_count > 0) {
811 		if (time_before(jiffies,
812 				tunnel->err_time + IPTUNNEL_ERR_TIMEO)) {
813 			tunnel->err_count--;
814 
815 			dst_link_failure(skb);
816 		} else
817 			tunnel->err_count = 0;
818 	}
819 
820 	tos = ip_tunnel_ecn_encap(tos, inner_iph, skb);
821 	ttl = tnl_params->ttl;
822 	if (ttl == 0) {
823 		if (payload_protocol == htons(ETH_P_IP))
824 			ttl = inner_iph->ttl;
825 #if IS_ENABLED(CONFIG_IPV6)
826 		else if (payload_protocol == htons(ETH_P_IPV6))
827 			ttl = ((const struct ipv6hdr *)inner_iph)->hop_limit;
828 #endif
829 		else
830 			ttl = ip4_dst_hoplimit(&rt->dst);
831 	}
832 
833 	max_headroom = LL_RESERVED_SPACE(rt->dst.dev) + sizeof(struct iphdr)
834 			+ rt->dst.header_len + ip_encap_hlen(&tunnel->encap);
835 
836 	if (skb_cow_head(skb, max_headroom)) {
837 		ip_rt_put(rt);
838 		DEV_STATS_INC(dev, tx_dropped);
839 		kfree_skb(skb);
840 		return;
841 	}
842 
843 	ip_tunnel_adj_headroom(dev, max_headroom);
844 
845 	iptunnel_xmit(NULL, rt, skb, fl4.saddr, fl4.daddr, protocol, tos, ttl,
846 		      df, !net_eq(tunnel->net, dev_net(dev)));
847 	return;
848 
849 #if IS_ENABLED(CONFIG_IPV6)
850 tx_error_icmp:
851 	dst_link_failure(skb);
852 #endif
853 tx_error:
854 	DEV_STATS_INC(dev, tx_errors);
855 	kfree_skb(skb);
856 }
857 EXPORT_SYMBOL_GPL(ip_tunnel_xmit);
858 
ip_tunnel_update(struct ip_tunnel_net * itn,struct ip_tunnel * t,struct net_device * dev,struct ip_tunnel_parm * p,bool set_mtu,__u32 fwmark)859 static void ip_tunnel_update(struct ip_tunnel_net *itn,
860 			     struct ip_tunnel *t,
861 			     struct net_device *dev,
862 			     struct ip_tunnel_parm *p,
863 			     bool set_mtu,
864 			     __u32 fwmark)
865 {
866 	ip_tunnel_del(itn, t);
867 	t->parms.iph.saddr = p->iph.saddr;
868 	t->parms.iph.daddr = p->iph.daddr;
869 	t->parms.i_key = p->i_key;
870 	t->parms.o_key = p->o_key;
871 	if (dev->type != ARPHRD_ETHER) {
872 		__dev_addr_set(dev, &p->iph.saddr, 4);
873 		memcpy(dev->broadcast, &p->iph.daddr, 4);
874 	}
875 	ip_tunnel_add(itn, t);
876 
877 	t->parms.iph.ttl = p->iph.ttl;
878 	t->parms.iph.tos = p->iph.tos;
879 	t->parms.iph.frag_off = p->iph.frag_off;
880 
881 	if (t->parms.link != p->link || t->fwmark != fwmark) {
882 		int mtu;
883 
884 		t->parms.link = p->link;
885 		t->fwmark = fwmark;
886 		mtu = ip_tunnel_bind_dev(dev);
887 		if (set_mtu)
888 			dev->mtu = mtu;
889 	}
890 	dst_cache_reset(&t->dst_cache);
891 	netdev_state_change(dev);
892 }
893 
ip_tunnel_ctl(struct net_device * dev,struct ip_tunnel_parm * p,int cmd)894 int ip_tunnel_ctl(struct net_device *dev, struct ip_tunnel_parm *p, int cmd)
895 {
896 	int err = 0;
897 	struct ip_tunnel *t = netdev_priv(dev);
898 	struct net *net = t->net;
899 	struct ip_tunnel_net *itn = net_generic(net, t->ip_tnl_net_id);
900 
901 	switch (cmd) {
902 	case SIOCGETTUNNEL:
903 		if (dev == itn->fb_tunnel_dev) {
904 			t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type);
905 			if (!t)
906 				t = netdev_priv(dev);
907 		}
908 		memcpy(p, &t->parms, sizeof(*p));
909 		break;
910 
911 	case SIOCADDTUNNEL:
912 	case SIOCCHGTUNNEL:
913 		err = -EPERM;
914 		if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
915 			goto done;
916 		if (p->iph.ttl)
917 			p->iph.frag_off |= htons(IP_DF);
918 		if (!(p->i_flags & VTI_ISVTI)) {
919 			if (!(p->i_flags & TUNNEL_KEY))
920 				p->i_key = 0;
921 			if (!(p->o_flags & TUNNEL_KEY))
922 				p->o_key = 0;
923 		}
924 
925 		t = ip_tunnel_find(itn, p, itn->type);
926 
927 		if (cmd == SIOCADDTUNNEL) {
928 			if (!t) {
929 				t = ip_tunnel_create(net, itn, p);
930 				err = PTR_ERR_OR_ZERO(t);
931 				break;
932 			}
933 
934 			err = -EEXIST;
935 			break;
936 		}
937 		if (dev != itn->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) {
938 			if (t) {
939 				if (t->dev != dev) {
940 					err = -EEXIST;
941 					break;
942 				}
943 			} else {
944 				unsigned int nflags = 0;
945 
946 				if (ipv4_is_multicast(p->iph.daddr))
947 					nflags = IFF_BROADCAST;
948 				else if (p->iph.daddr)
949 					nflags = IFF_POINTOPOINT;
950 
951 				if ((dev->flags^nflags)&(IFF_POINTOPOINT|IFF_BROADCAST)) {
952 					err = -EINVAL;
953 					break;
954 				}
955 
956 				t = netdev_priv(dev);
957 			}
958 		}
959 
960 		if (t) {
961 			err = 0;
962 			ip_tunnel_update(itn, t, dev, p, true, 0);
963 		} else {
964 			err = -ENOENT;
965 		}
966 		break;
967 
968 	case SIOCDELTUNNEL:
969 		err = -EPERM;
970 		if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
971 			goto done;
972 
973 		if (dev == itn->fb_tunnel_dev) {
974 			err = -ENOENT;
975 			t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type);
976 			if (!t)
977 				goto done;
978 			err = -EPERM;
979 			if (t == netdev_priv(itn->fb_tunnel_dev))
980 				goto done;
981 			dev = t->dev;
982 		}
983 		unregister_netdevice(dev);
984 		err = 0;
985 		break;
986 
987 	default:
988 		err = -EINVAL;
989 	}
990 
991 done:
992 	return err;
993 }
994 EXPORT_SYMBOL_GPL(ip_tunnel_ctl);
995 
ip_tunnel_siocdevprivate(struct net_device * dev,struct ifreq * ifr,void __user * data,int cmd)996 int ip_tunnel_siocdevprivate(struct net_device *dev, struct ifreq *ifr,
997 			     void __user *data, int cmd)
998 {
999 	struct ip_tunnel_parm p;
1000 	int err;
1001 
1002 	if (copy_from_user(&p, data, sizeof(p)))
1003 		return -EFAULT;
1004 	err = dev->netdev_ops->ndo_tunnel_ctl(dev, &p, cmd);
1005 	if (!err && copy_to_user(data, &p, sizeof(p)))
1006 		return -EFAULT;
1007 	return err;
1008 }
1009 EXPORT_SYMBOL_GPL(ip_tunnel_siocdevprivate);
1010 
__ip_tunnel_change_mtu(struct net_device * dev,int new_mtu,bool strict)1011 int __ip_tunnel_change_mtu(struct net_device *dev, int new_mtu, bool strict)
1012 {
1013 	struct ip_tunnel *tunnel = netdev_priv(dev);
1014 	int t_hlen = tunnel->hlen + sizeof(struct iphdr);
1015 	int max_mtu = IP_MAX_MTU - t_hlen;
1016 
1017 	if (dev->type == ARPHRD_ETHER)
1018 		max_mtu -= dev->hard_header_len;
1019 
1020 	if (new_mtu < ETH_MIN_MTU)
1021 		return -EINVAL;
1022 
1023 	if (new_mtu > max_mtu) {
1024 		if (strict)
1025 			return -EINVAL;
1026 
1027 		new_mtu = max_mtu;
1028 	}
1029 
1030 	dev->mtu = new_mtu;
1031 	return 0;
1032 }
1033 EXPORT_SYMBOL_GPL(__ip_tunnel_change_mtu);
1034 
ip_tunnel_change_mtu(struct net_device * dev,int new_mtu)1035 int ip_tunnel_change_mtu(struct net_device *dev, int new_mtu)
1036 {
1037 	return __ip_tunnel_change_mtu(dev, new_mtu, true);
1038 }
1039 EXPORT_SYMBOL_GPL(ip_tunnel_change_mtu);
1040 
ip_tunnel_dev_free(struct net_device * dev)1041 static void ip_tunnel_dev_free(struct net_device *dev)
1042 {
1043 	struct ip_tunnel *tunnel = netdev_priv(dev);
1044 
1045 	gro_cells_destroy(&tunnel->gro_cells);
1046 	dst_cache_destroy(&tunnel->dst_cache);
1047 	free_percpu(dev->tstats);
1048 }
1049 
ip_tunnel_dellink(struct net_device * dev,struct list_head * head)1050 void ip_tunnel_dellink(struct net_device *dev, struct list_head *head)
1051 {
1052 	struct ip_tunnel *tunnel = netdev_priv(dev);
1053 	struct ip_tunnel_net *itn;
1054 
1055 	itn = net_generic(tunnel->net, tunnel->ip_tnl_net_id);
1056 
1057 	if (itn->fb_tunnel_dev != dev) {
1058 		ip_tunnel_del(itn, netdev_priv(dev));
1059 		unregister_netdevice_queue(dev, head);
1060 	}
1061 }
1062 EXPORT_SYMBOL_GPL(ip_tunnel_dellink);
1063 
ip_tunnel_get_link_net(const struct net_device * dev)1064 struct net *ip_tunnel_get_link_net(const struct net_device *dev)
1065 {
1066 	struct ip_tunnel *tunnel = netdev_priv(dev);
1067 
1068 	return tunnel->net;
1069 }
1070 EXPORT_SYMBOL(ip_tunnel_get_link_net);
1071 
ip_tunnel_get_iflink(const struct net_device * dev)1072 int ip_tunnel_get_iflink(const struct net_device *dev)
1073 {
1074 	struct ip_tunnel *tunnel = netdev_priv(dev);
1075 
1076 	return tunnel->parms.link;
1077 }
1078 EXPORT_SYMBOL(ip_tunnel_get_iflink);
1079 
ip_tunnel_init_net(struct net * net,unsigned int ip_tnl_net_id,struct rtnl_link_ops * ops,char * devname)1080 int ip_tunnel_init_net(struct net *net, unsigned int ip_tnl_net_id,
1081 				  struct rtnl_link_ops *ops, char *devname)
1082 {
1083 	struct ip_tunnel_net *itn = net_generic(net, ip_tnl_net_id);
1084 	struct ip_tunnel_parm parms;
1085 	unsigned int i;
1086 
1087 	itn->rtnl_link_ops = ops;
1088 	for (i = 0; i < IP_TNL_HASH_SIZE; i++)
1089 		INIT_HLIST_HEAD(&itn->tunnels[i]);
1090 
1091 	if (!ops || !net_has_fallback_tunnels(net)) {
1092 		struct ip_tunnel_net *it_init_net;
1093 
1094 		it_init_net = net_generic(&init_net, ip_tnl_net_id);
1095 		itn->type = it_init_net->type;
1096 		itn->fb_tunnel_dev = NULL;
1097 		return 0;
1098 	}
1099 
1100 	memset(&parms, 0, sizeof(parms));
1101 	if (devname)
1102 		strscpy(parms.name, devname, IFNAMSIZ);
1103 
1104 	rtnl_lock();
1105 	itn->fb_tunnel_dev = __ip_tunnel_create(net, ops, &parms);
1106 	/* FB netdevice is special: we have one, and only one per netns.
1107 	 * Allowing to move it to another netns is clearly unsafe.
1108 	 */
1109 	if (!IS_ERR(itn->fb_tunnel_dev)) {
1110 		itn->fb_tunnel_dev->features |= NETIF_F_NETNS_LOCAL;
1111 		itn->fb_tunnel_dev->mtu = ip_tunnel_bind_dev(itn->fb_tunnel_dev);
1112 		ip_tunnel_add(itn, netdev_priv(itn->fb_tunnel_dev));
1113 		itn->type = itn->fb_tunnel_dev->type;
1114 	}
1115 	rtnl_unlock();
1116 
1117 	return PTR_ERR_OR_ZERO(itn->fb_tunnel_dev);
1118 }
1119 EXPORT_SYMBOL_GPL(ip_tunnel_init_net);
1120 
ip_tunnel_destroy(struct net * net,struct ip_tunnel_net * itn,struct list_head * head,struct rtnl_link_ops * ops)1121 static void ip_tunnel_destroy(struct net *net, struct ip_tunnel_net *itn,
1122 			      struct list_head *head,
1123 			      struct rtnl_link_ops *ops)
1124 {
1125 	struct net_device *dev, *aux;
1126 	int h;
1127 
1128 	for_each_netdev_safe(net, dev, aux)
1129 		if (dev->rtnl_link_ops == ops)
1130 			unregister_netdevice_queue(dev, head);
1131 
1132 	for (h = 0; h < IP_TNL_HASH_SIZE; h++) {
1133 		struct ip_tunnel *t;
1134 		struct hlist_node *n;
1135 		struct hlist_head *thead = &itn->tunnels[h];
1136 
1137 		hlist_for_each_entry_safe(t, n, thead, hash_node)
1138 			/* If dev is in the same netns, it has already
1139 			 * been added to the list by the previous loop.
1140 			 */
1141 			if (!net_eq(dev_net(t->dev), net))
1142 				unregister_netdevice_queue(t->dev, head);
1143 	}
1144 }
1145 
ip_tunnel_delete_nets(struct list_head * net_list,unsigned int id,struct rtnl_link_ops * ops)1146 void ip_tunnel_delete_nets(struct list_head *net_list, unsigned int id,
1147 			   struct rtnl_link_ops *ops)
1148 {
1149 	struct ip_tunnel_net *itn;
1150 	struct net *net;
1151 	LIST_HEAD(list);
1152 
1153 	rtnl_lock();
1154 	list_for_each_entry(net, net_list, exit_list) {
1155 		itn = net_generic(net, id);
1156 		ip_tunnel_destroy(net, itn, &list, ops);
1157 	}
1158 	unregister_netdevice_many(&list);
1159 	rtnl_unlock();
1160 }
1161 EXPORT_SYMBOL_GPL(ip_tunnel_delete_nets);
1162 
ip_tunnel_newlink(struct net_device * dev,struct nlattr * tb[],struct ip_tunnel_parm * p,__u32 fwmark)1163 int ip_tunnel_newlink(struct net_device *dev, struct nlattr *tb[],
1164 		      struct ip_tunnel_parm *p, __u32 fwmark)
1165 {
1166 	struct ip_tunnel *nt;
1167 	struct net *net = dev_net(dev);
1168 	struct ip_tunnel_net *itn;
1169 	int mtu;
1170 	int err;
1171 
1172 	nt = netdev_priv(dev);
1173 	itn = net_generic(net, nt->ip_tnl_net_id);
1174 
1175 	if (nt->collect_md) {
1176 		if (rtnl_dereference(itn->collect_md_tun))
1177 			return -EEXIST;
1178 	} else {
1179 		if (ip_tunnel_find(itn, p, dev->type))
1180 			return -EEXIST;
1181 	}
1182 
1183 	nt->net = net;
1184 	nt->parms = *p;
1185 	nt->fwmark = fwmark;
1186 	err = register_netdevice(dev);
1187 	if (err)
1188 		goto err_register_netdevice;
1189 
1190 	if (dev->type == ARPHRD_ETHER && !tb[IFLA_ADDRESS])
1191 		eth_hw_addr_random(dev);
1192 
1193 	mtu = ip_tunnel_bind_dev(dev);
1194 	if (tb[IFLA_MTU]) {
1195 		unsigned int max = IP_MAX_MTU - (nt->hlen + sizeof(struct iphdr));
1196 
1197 		if (dev->type == ARPHRD_ETHER)
1198 			max -= dev->hard_header_len;
1199 
1200 		mtu = clamp(dev->mtu, (unsigned int)ETH_MIN_MTU, max);
1201 	}
1202 
1203 	err = dev_set_mtu(dev, mtu);
1204 	if (err)
1205 		goto err_dev_set_mtu;
1206 
1207 	ip_tunnel_add(itn, nt);
1208 	return 0;
1209 
1210 err_dev_set_mtu:
1211 	unregister_netdevice(dev);
1212 err_register_netdevice:
1213 	return err;
1214 }
1215 EXPORT_SYMBOL_GPL(ip_tunnel_newlink);
1216 
ip_tunnel_changelink(struct net_device * dev,struct nlattr * tb[],struct ip_tunnel_parm * p,__u32 fwmark)1217 int ip_tunnel_changelink(struct net_device *dev, struct nlattr *tb[],
1218 			 struct ip_tunnel_parm *p, __u32 fwmark)
1219 {
1220 	struct ip_tunnel *t;
1221 	struct ip_tunnel *tunnel = netdev_priv(dev);
1222 	struct net *net = tunnel->net;
1223 	struct ip_tunnel_net *itn = net_generic(net, tunnel->ip_tnl_net_id);
1224 
1225 	if (dev == itn->fb_tunnel_dev)
1226 		return -EINVAL;
1227 
1228 	t = ip_tunnel_find(itn, p, dev->type);
1229 
1230 	if (t) {
1231 		if (t->dev != dev)
1232 			return -EEXIST;
1233 	} else {
1234 		t = tunnel;
1235 
1236 		if (dev->type != ARPHRD_ETHER) {
1237 			unsigned int nflags = 0;
1238 
1239 			if (ipv4_is_multicast(p->iph.daddr))
1240 				nflags = IFF_BROADCAST;
1241 			else if (p->iph.daddr)
1242 				nflags = IFF_POINTOPOINT;
1243 
1244 			if ((dev->flags ^ nflags) &
1245 			    (IFF_POINTOPOINT | IFF_BROADCAST))
1246 				return -EINVAL;
1247 		}
1248 	}
1249 
1250 	ip_tunnel_update(itn, t, dev, p, !tb[IFLA_MTU], fwmark);
1251 	return 0;
1252 }
1253 EXPORT_SYMBOL_GPL(ip_tunnel_changelink);
1254 
ip_tunnel_init(struct net_device * dev)1255 int ip_tunnel_init(struct net_device *dev)
1256 {
1257 	struct ip_tunnel *tunnel = netdev_priv(dev);
1258 	struct iphdr *iph = &tunnel->parms.iph;
1259 	int err;
1260 
1261 	dev->needs_free_netdev = true;
1262 	dev->priv_destructor = ip_tunnel_dev_free;
1263 	dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);
1264 	if (!dev->tstats)
1265 		return -ENOMEM;
1266 
1267 	err = dst_cache_init(&tunnel->dst_cache, GFP_KERNEL);
1268 	if (err) {
1269 		free_percpu(dev->tstats);
1270 		return err;
1271 	}
1272 
1273 	err = gro_cells_init(&tunnel->gro_cells, dev);
1274 	if (err) {
1275 		dst_cache_destroy(&tunnel->dst_cache);
1276 		free_percpu(dev->tstats);
1277 		return err;
1278 	}
1279 
1280 	tunnel->dev = dev;
1281 	tunnel->net = dev_net(dev);
1282 	strcpy(tunnel->parms.name, dev->name);
1283 	iph->version		= 4;
1284 	iph->ihl		= 5;
1285 
1286 	if (tunnel->collect_md)
1287 		netif_keep_dst(dev);
1288 	return 0;
1289 }
1290 EXPORT_SYMBOL_GPL(ip_tunnel_init);
1291 
ip_tunnel_uninit(struct net_device * dev)1292 void ip_tunnel_uninit(struct net_device *dev)
1293 {
1294 	struct ip_tunnel *tunnel = netdev_priv(dev);
1295 	struct net *net = tunnel->net;
1296 	struct ip_tunnel_net *itn;
1297 
1298 	itn = net_generic(net, tunnel->ip_tnl_net_id);
1299 	ip_tunnel_del(itn, netdev_priv(dev));
1300 	if (itn->fb_tunnel_dev == dev)
1301 		WRITE_ONCE(itn->fb_tunnel_dev, NULL);
1302 
1303 	dst_cache_reset(&tunnel->dst_cache);
1304 }
1305 EXPORT_SYMBOL_GPL(ip_tunnel_uninit);
1306 
1307 /* Do least required initialization, rest of init is done in tunnel_init call */
ip_tunnel_setup(struct net_device * dev,unsigned int net_id)1308 void ip_tunnel_setup(struct net_device *dev, unsigned int net_id)
1309 {
1310 	struct ip_tunnel *tunnel = netdev_priv(dev);
1311 	tunnel->ip_tnl_net_id = net_id;
1312 }
1313 EXPORT_SYMBOL_GPL(ip_tunnel_setup);
1314 
1315 MODULE_LICENSE("GPL");
1316