1 /*
2  * NETLINK      Kernel-user communication protocol.
3  *
4  * 		Authors:	Alan Cox <alan@lxorguk.ukuu.org.uk>
5  * 				Alexey Kuznetsov <kuznet@ms2.inr.ac.ru>
6  *
7  *		This program is free software; you can redistribute it and/or
8  *		modify it under the terms of the GNU General Public License
9  *		as published by the Free Software Foundation; either version
10  *		2 of the License, or (at your option) any later version.
11  *
12  * Tue Jun 26 14:36:48 MEST 2001 Herbert "herp" Rosmanith
13  *                               added netlink_proto_exit
14  * Tue Jan 22 18:32:44 BRST 2002 Arnaldo C. de Melo <acme@conectiva.com.br>
15  * 				 use nlk_sk, as sk->protinfo is on a diet 8)
16  * Fri Jul 22 19:51:12 MEST 2005 Harald Welte <laforge@gnumonks.org>
17  * 				 - inc module use count of module that owns
18  * 				   the kernel socket in case userspace opens
19  * 				   socket of same protocol
20  * 				 - remove all module support, since netlink is
21  * 				   mandatory if CONFIG_NET=y these days
22  */
23 
24 #include <linux/module.h>
25 
26 #include <linux/capability.h>
27 #include <linux/kernel.h>
28 #include <linux/init.h>
29 #include <linux/signal.h>
30 #include <linux/sched.h>
31 #include <linux/errno.h>
32 #include <linux/string.h>
33 #include <linux/stat.h>
34 #include <linux/socket.h>
35 #include <linux/un.h>
36 #include <linux/fcntl.h>
37 #include <linux/termios.h>
38 #include <linux/sockios.h>
39 #include <linux/net.h>
40 #include <linux/fs.h>
41 #include <linux/slab.h>
42 #include <asm/uaccess.h>
43 #include <linux/skbuff.h>
44 #include <linux/netdevice.h>
45 #include <linux/rtnetlink.h>
46 #include <linux/proc_fs.h>
47 #include <linux/seq_file.h>
48 #include <linux/notifier.h>
49 #include <linux/security.h>
50 #include <linux/jhash.h>
51 #include <linux/jiffies.h>
52 #include <linux/random.h>
53 #include <linux/bitops.h>
54 #include <linux/mm.h>
55 #include <linux/types.h>
56 #include <linux/audit.h>
57 #include <linux/mutex.h>
58 
59 #include <net/net_namespace.h>
60 #include <net/sock.h>
61 #include <net/scm.h>
62 #include <net/netlink.h>
63 
64 #define NLGRPSZ(x)	(ALIGN(x, sizeof(unsigned long) * 8) / 8)
65 #define NLGRPLONGS(x)	(NLGRPSZ(x)/sizeof(unsigned long))
66 
67 struct netlink_sock {
68 	/* struct sock has to be the first member of netlink_sock */
69 	struct sock		sk;
70 	u32			pid;
71 	u32			dst_pid;
72 	u32			dst_group;
73 	u32			flags;
74 	u32			subscriptions;
75 	u32			ngroups;
76 	unsigned long		*groups;
77 	unsigned long		state;
78 	wait_queue_head_t	wait;
79 	struct netlink_callback	*cb;
80 	struct mutex		*cb_mutex;
81 	struct mutex		cb_def_mutex;
82 	void			(*netlink_rcv)(struct sk_buff *skb);
83 	struct module		*module;
84 };
85 
86 struct listeners {
87 	struct rcu_head		rcu;
88 	unsigned long		masks[0];
89 };
90 
91 #define NETLINK_KERNEL_SOCKET	0x1
92 #define NETLINK_RECV_PKTINFO	0x2
93 #define NETLINK_BROADCAST_SEND_ERROR	0x4
94 #define NETLINK_RECV_NO_ENOBUFS	0x8
95 
nlk_sk(struct sock * sk)96 static inline struct netlink_sock *nlk_sk(struct sock *sk)
97 {
98 	return container_of(sk, struct netlink_sock, sk);
99 }
100 
netlink_is_kernel(struct sock * sk)101 static inline int netlink_is_kernel(struct sock *sk)
102 {
103 	return nlk_sk(sk)->flags & NETLINK_KERNEL_SOCKET;
104 }
105 
106 struct nl_pid_hash {
107 	struct hlist_head *table;
108 	unsigned long rehash_time;
109 
110 	unsigned int mask;
111 	unsigned int shift;
112 
113 	unsigned int entries;
114 	unsigned int max_shift;
115 
116 	u32 rnd;
117 };
118 
119 struct netlink_table {
120 	struct nl_pid_hash hash;
121 	struct hlist_head mc_list;
122 	struct listeners __rcu *listeners;
123 	unsigned int nl_nonroot;
124 	unsigned int groups;
125 	struct mutex *cb_mutex;
126 	struct module *module;
127 	int registered;
128 };
129 
130 static struct netlink_table *nl_table;
131 
132 static DECLARE_WAIT_QUEUE_HEAD(nl_table_wait);
133 
134 static int netlink_dump(struct sock *sk);
135 static void netlink_destroy_callback(struct netlink_callback *cb);
136 
137 static DEFINE_RWLOCK(nl_table_lock);
138 static atomic_t nl_table_users = ATOMIC_INIT(0);
139 
140 #define nl_deref_protected(X) rcu_dereference_protected(X, lockdep_is_held(&nl_table_lock));
141 
142 static ATOMIC_NOTIFIER_HEAD(netlink_chain);
143 
netlink_group_mask(u32 group)144 static inline u32 netlink_group_mask(u32 group)
145 {
146 	return group ? 1 << (group - 1) : 0;
147 }
148 
nl_pid_hashfn(struct nl_pid_hash * hash,u32 pid)149 static inline struct hlist_head *nl_pid_hashfn(struct nl_pid_hash *hash, u32 pid)
150 {
151 	return &hash->table[jhash_1word(pid, hash->rnd) & hash->mask];
152 }
153 
netlink_sock_destruct(struct sock * sk)154 static void netlink_sock_destruct(struct sock *sk)
155 {
156 	struct netlink_sock *nlk = nlk_sk(sk);
157 
158 	if (nlk->cb) {
159 		if (nlk->cb->done)
160 			nlk->cb->done(nlk->cb);
161 
162 		module_put(nlk->cb->module);
163 		netlink_destroy_callback(nlk->cb);
164 	}
165 
166 	skb_queue_purge(&sk->sk_receive_queue);
167 
168 	if (!sock_flag(sk, SOCK_DEAD)) {
169 		printk(KERN_ERR "Freeing alive netlink socket %p\n", sk);
170 		return;
171 	}
172 
173 	WARN_ON(atomic_read(&sk->sk_rmem_alloc));
174 	WARN_ON(atomic_read(&sk->sk_wmem_alloc));
175 	WARN_ON(nlk_sk(sk)->groups);
176 }
177 
178 /* This lock without WQ_FLAG_EXCLUSIVE is good on UP and it is _very_ bad on
179  * SMP. Look, when several writers sleep and reader wakes them up, all but one
180  * immediately hit write lock and grab all the cpus. Exclusive sleep solves
181  * this, _but_ remember, it adds useless work on UP machines.
182  */
183 
netlink_table_grab(void)184 void netlink_table_grab(void)
185 	__acquires(nl_table_lock)
186 {
187 	might_sleep();
188 
189 	write_lock_irq(&nl_table_lock);
190 
191 	if (atomic_read(&nl_table_users)) {
192 		DECLARE_WAITQUEUE(wait, current);
193 
194 		add_wait_queue_exclusive(&nl_table_wait, &wait);
195 		for (;;) {
196 			set_current_state(TASK_UNINTERRUPTIBLE);
197 			if (atomic_read(&nl_table_users) == 0)
198 				break;
199 			write_unlock_irq(&nl_table_lock);
200 			schedule();
201 			write_lock_irq(&nl_table_lock);
202 		}
203 
204 		__set_current_state(TASK_RUNNING);
205 		remove_wait_queue(&nl_table_wait, &wait);
206 	}
207 }
208 
netlink_table_ungrab(void)209 void netlink_table_ungrab(void)
210 	__releases(nl_table_lock)
211 {
212 	write_unlock_irq(&nl_table_lock);
213 	wake_up(&nl_table_wait);
214 }
215 
216 static inline void
netlink_lock_table(void)217 netlink_lock_table(void)
218 {
219 	/* read_lock() synchronizes us to netlink_table_grab */
220 
221 	read_lock(&nl_table_lock);
222 	atomic_inc(&nl_table_users);
223 	read_unlock(&nl_table_lock);
224 }
225 
226 static inline void
netlink_unlock_table(void)227 netlink_unlock_table(void)
228 {
229 	if (atomic_dec_and_test(&nl_table_users))
230 		wake_up(&nl_table_wait);
231 }
232 
netlink_lookup(struct net * net,int protocol,u32 pid)233 static struct sock *netlink_lookup(struct net *net, int protocol, u32 pid)
234 {
235 	struct nl_pid_hash *hash = &nl_table[protocol].hash;
236 	struct hlist_head *head;
237 	struct sock *sk;
238 	struct hlist_node *node;
239 
240 	read_lock(&nl_table_lock);
241 	head = nl_pid_hashfn(hash, pid);
242 	sk_for_each(sk, node, head) {
243 		if (net_eq(sock_net(sk), net) && (nlk_sk(sk)->pid == pid)) {
244 			sock_hold(sk);
245 			goto found;
246 		}
247 	}
248 	sk = NULL;
249 found:
250 	read_unlock(&nl_table_lock);
251 	return sk;
252 }
253 
nl_pid_hash_zalloc(size_t size)254 static struct hlist_head *nl_pid_hash_zalloc(size_t size)
255 {
256 	if (size <= PAGE_SIZE)
257 		return kzalloc(size, GFP_ATOMIC);
258 	else
259 		return (struct hlist_head *)
260 			__get_free_pages(GFP_ATOMIC | __GFP_ZERO,
261 					 get_order(size));
262 }
263 
nl_pid_hash_free(struct hlist_head * table,size_t size)264 static void nl_pid_hash_free(struct hlist_head *table, size_t size)
265 {
266 	if (size <= PAGE_SIZE)
267 		kfree(table);
268 	else
269 		free_pages((unsigned long)table, get_order(size));
270 }
271 
nl_pid_hash_rehash(struct nl_pid_hash * hash,int grow)272 static int nl_pid_hash_rehash(struct nl_pid_hash *hash, int grow)
273 {
274 	unsigned int omask, mask, shift;
275 	size_t osize, size;
276 	struct hlist_head *otable, *table;
277 	int i;
278 
279 	omask = mask = hash->mask;
280 	osize = size = (mask + 1) * sizeof(*table);
281 	shift = hash->shift;
282 
283 	if (grow) {
284 		if (++shift > hash->max_shift)
285 			return 0;
286 		mask = mask * 2 + 1;
287 		size *= 2;
288 	}
289 
290 	table = nl_pid_hash_zalloc(size);
291 	if (!table)
292 		return 0;
293 
294 	otable = hash->table;
295 	hash->table = table;
296 	hash->mask = mask;
297 	hash->shift = shift;
298 	get_random_bytes(&hash->rnd, sizeof(hash->rnd));
299 
300 	for (i = 0; i <= omask; i++) {
301 		struct sock *sk;
302 		struct hlist_node *node, *tmp;
303 
304 		sk_for_each_safe(sk, node, tmp, &otable[i])
305 			__sk_add_node(sk, nl_pid_hashfn(hash, nlk_sk(sk)->pid));
306 	}
307 
308 	nl_pid_hash_free(otable, osize);
309 	hash->rehash_time = jiffies + 10 * 60 * HZ;
310 	return 1;
311 }
312 
nl_pid_hash_dilute(struct nl_pid_hash * hash,int len)313 static inline int nl_pid_hash_dilute(struct nl_pid_hash *hash, int len)
314 {
315 	int avg = hash->entries >> hash->shift;
316 
317 	if (unlikely(avg > 1) && nl_pid_hash_rehash(hash, 1))
318 		return 1;
319 
320 	if (unlikely(len > avg) && time_after(jiffies, hash->rehash_time)) {
321 		nl_pid_hash_rehash(hash, 0);
322 		return 1;
323 	}
324 
325 	return 0;
326 }
327 
328 static const struct proto_ops netlink_ops;
329 
330 static void
netlink_update_listeners(struct sock * sk)331 netlink_update_listeners(struct sock *sk)
332 {
333 	struct netlink_table *tbl = &nl_table[sk->sk_protocol];
334 	struct hlist_node *node;
335 	unsigned long mask;
336 	unsigned int i;
337 	struct listeners *listeners;
338 
339 	listeners = nl_deref_protected(tbl->listeners);
340 	if (!listeners)
341 		return;
342 
343 	for (i = 0; i < NLGRPLONGS(tbl->groups); i++) {
344 		mask = 0;
345 		sk_for_each_bound(sk, node, &tbl->mc_list) {
346 			if (i < NLGRPLONGS(nlk_sk(sk)->ngroups))
347 				mask |= nlk_sk(sk)->groups[i];
348 		}
349 		listeners->masks[i] = mask;
350 	}
351 	/* this function is only called with the netlink table "grabbed", which
352 	 * makes sure updates are visible before bind or setsockopt return. */
353 }
354 
netlink_insert(struct sock * sk,struct net * net,u32 pid)355 static int netlink_insert(struct sock *sk, struct net *net, u32 pid)
356 {
357 	struct nl_pid_hash *hash = &nl_table[sk->sk_protocol].hash;
358 	struct hlist_head *head;
359 	int err = -EADDRINUSE;
360 	struct sock *osk;
361 	struct hlist_node *node;
362 	int len;
363 
364 	netlink_table_grab();
365 	head = nl_pid_hashfn(hash, pid);
366 	len = 0;
367 	sk_for_each(osk, node, head) {
368 		if (net_eq(sock_net(osk), net) && (nlk_sk(osk)->pid == pid))
369 			break;
370 		len++;
371 	}
372 	if (node)
373 		goto err;
374 
375 	err = -EBUSY;
376 	if (nlk_sk(sk)->pid)
377 		goto err;
378 
379 	err = -ENOMEM;
380 	if (BITS_PER_LONG > 32 && unlikely(hash->entries >= UINT_MAX))
381 		goto err;
382 
383 	if (len && nl_pid_hash_dilute(hash, len))
384 		head = nl_pid_hashfn(hash, pid);
385 	hash->entries++;
386 	nlk_sk(sk)->pid = pid;
387 	sk_add_node(sk, head);
388 	err = 0;
389 
390 err:
391 	netlink_table_ungrab();
392 	return err;
393 }
394 
netlink_remove(struct sock * sk)395 static void netlink_remove(struct sock *sk)
396 {
397 	netlink_table_grab();
398 	if (sk_del_node_init(sk))
399 		nl_table[sk->sk_protocol].hash.entries--;
400 	if (nlk_sk(sk)->subscriptions)
401 		__sk_del_bind_node(sk);
402 	netlink_table_ungrab();
403 }
404 
405 static struct proto netlink_proto = {
406 	.name	  = "NETLINK",
407 	.owner	  = THIS_MODULE,
408 	.obj_size = sizeof(struct netlink_sock),
409 };
410 
__netlink_create(struct net * net,struct socket * sock,struct mutex * cb_mutex,int protocol)411 static int __netlink_create(struct net *net, struct socket *sock,
412 			    struct mutex *cb_mutex, int protocol)
413 {
414 	struct sock *sk;
415 	struct netlink_sock *nlk;
416 
417 	sock->ops = &netlink_ops;
418 
419 	sk = sk_alloc(net, PF_NETLINK, GFP_KERNEL, &netlink_proto);
420 	if (!sk)
421 		return -ENOMEM;
422 
423 	sock_init_data(sock, sk);
424 
425 	nlk = nlk_sk(sk);
426 	if (cb_mutex)
427 		nlk->cb_mutex = cb_mutex;
428 	else {
429 		nlk->cb_mutex = &nlk->cb_def_mutex;
430 		mutex_init(nlk->cb_mutex);
431 	}
432 	init_waitqueue_head(&nlk->wait);
433 
434 	sk->sk_destruct = netlink_sock_destruct;
435 	sk->sk_protocol = protocol;
436 	return 0;
437 }
438 
netlink_create(struct net * net,struct socket * sock,int protocol,int kern)439 static int netlink_create(struct net *net, struct socket *sock, int protocol,
440 			  int kern)
441 {
442 	struct module *module = NULL;
443 	struct mutex *cb_mutex;
444 	struct netlink_sock *nlk;
445 	int err = 0;
446 
447 	sock->state = SS_UNCONNECTED;
448 
449 	if (sock->type != SOCK_RAW && sock->type != SOCK_DGRAM)
450 		return -ESOCKTNOSUPPORT;
451 
452 	if (protocol < 0 || protocol >= MAX_LINKS)
453 		return -EPROTONOSUPPORT;
454 
455 	netlink_lock_table();
456 #ifdef CONFIG_MODULES
457 	if (!nl_table[protocol].registered) {
458 		netlink_unlock_table();
459 		request_module("net-pf-%d-proto-%d", PF_NETLINK, protocol);
460 		netlink_lock_table();
461 	}
462 #endif
463 	if (nl_table[protocol].registered &&
464 	    try_module_get(nl_table[protocol].module))
465 		module = nl_table[protocol].module;
466 	else
467 		err = -EPROTONOSUPPORT;
468 	cb_mutex = nl_table[protocol].cb_mutex;
469 	netlink_unlock_table();
470 
471 	if (err < 0)
472 		goto out;
473 
474 	err = __netlink_create(net, sock, cb_mutex, protocol);
475 	if (err < 0)
476 		goto out_module;
477 
478 	local_bh_disable();
479 	sock_prot_inuse_add(net, &netlink_proto, 1);
480 	local_bh_enable();
481 
482 	nlk = nlk_sk(sock->sk);
483 	nlk->module = module;
484 out:
485 	return err;
486 
487 out_module:
488 	module_put(module);
489 	goto out;
490 }
491 
netlink_release(struct socket * sock)492 static int netlink_release(struct socket *sock)
493 {
494 	struct sock *sk = sock->sk;
495 	struct netlink_sock *nlk;
496 
497 	if (!sk)
498 		return 0;
499 
500 	netlink_remove(sk);
501 	sock_orphan(sk);
502 	nlk = nlk_sk(sk);
503 
504 	/*
505 	 * OK. Socket is unlinked, any packets that arrive now
506 	 * will be purged.
507 	 */
508 
509 	sock->sk = NULL;
510 	wake_up_interruptible_all(&nlk->wait);
511 
512 	skb_queue_purge(&sk->sk_write_queue);
513 
514 	if (nlk->pid) {
515 		struct netlink_notify n = {
516 						.net = sock_net(sk),
517 						.protocol = sk->sk_protocol,
518 						.pid = nlk->pid,
519 					  };
520 		atomic_notifier_call_chain(&netlink_chain,
521 				NETLINK_URELEASE, &n);
522 	}
523 
524 	module_put(nlk->module);
525 
526 	netlink_table_grab();
527 	if (netlink_is_kernel(sk)) {
528 		BUG_ON(nl_table[sk->sk_protocol].registered == 0);
529 		if (--nl_table[sk->sk_protocol].registered == 0) {
530 			struct listeners *old;
531 
532 			old = nl_deref_protected(nl_table[sk->sk_protocol].listeners);
533 			RCU_INIT_POINTER(nl_table[sk->sk_protocol].listeners, NULL);
534 			kfree_rcu(old, rcu);
535 			nl_table[sk->sk_protocol].module = NULL;
536 			nl_table[sk->sk_protocol].registered = 0;
537 		}
538 	} else if (nlk->subscriptions)
539 		netlink_update_listeners(sk);
540 	netlink_table_ungrab();
541 
542 	kfree(nlk->groups);
543 	nlk->groups = NULL;
544 
545 	local_bh_disable();
546 	sock_prot_inuse_add(sock_net(sk), &netlink_proto, -1);
547 	local_bh_enable();
548 	sock_put(sk);
549 	return 0;
550 }
551 
netlink_autobind(struct socket * sock)552 static int netlink_autobind(struct socket *sock)
553 {
554 	struct sock *sk = sock->sk;
555 	struct net *net = sock_net(sk);
556 	struct nl_pid_hash *hash = &nl_table[sk->sk_protocol].hash;
557 	struct hlist_head *head;
558 	struct sock *osk;
559 	struct hlist_node *node;
560 	s32 pid = task_tgid_vnr(current);
561 	int err;
562 	static s32 rover = -4097;
563 
564 retry:
565 	cond_resched();
566 	netlink_table_grab();
567 	head = nl_pid_hashfn(hash, pid);
568 	sk_for_each(osk, node, head) {
569 		if (!net_eq(sock_net(osk), net))
570 			continue;
571 		if (nlk_sk(osk)->pid == pid) {
572 			/* Bind collision, search negative pid values. */
573 			pid = rover--;
574 			if (rover > -4097)
575 				rover = -4097;
576 			netlink_table_ungrab();
577 			goto retry;
578 		}
579 	}
580 	netlink_table_ungrab();
581 
582 	err = netlink_insert(sk, net, pid);
583 	if (err == -EADDRINUSE)
584 		goto retry;
585 
586 	/* If 2 threads race to autobind, that is fine.  */
587 	if (err == -EBUSY)
588 		err = 0;
589 
590 	return err;
591 }
592 
netlink_capable(const struct socket * sock,unsigned int flag)593 static inline int netlink_capable(const struct socket *sock, unsigned int flag)
594 {
595 	return (nl_table[sock->sk->sk_protocol].nl_nonroot & flag) ||
596 	       capable(CAP_NET_ADMIN);
597 }
598 
599 static void
netlink_update_subscriptions(struct sock * sk,unsigned int subscriptions)600 netlink_update_subscriptions(struct sock *sk, unsigned int subscriptions)
601 {
602 	struct netlink_sock *nlk = nlk_sk(sk);
603 
604 	if (nlk->subscriptions && !subscriptions)
605 		__sk_del_bind_node(sk);
606 	else if (!nlk->subscriptions && subscriptions)
607 		sk_add_bind_node(sk, &nl_table[sk->sk_protocol].mc_list);
608 	nlk->subscriptions = subscriptions;
609 }
610 
netlink_realloc_groups(struct sock * sk)611 static int netlink_realloc_groups(struct sock *sk)
612 {
613 	struct netlink_sock *nlk = nlk_sk(sk);
614 	unsigned int groups;
615 	unsigned long *new_groups;
616 	int err = 0;
617 
618 	netlink_table_grab();
619 
620 	groups = nl_table[sk->sk_protocol].groups;
621 	if (!nl_table[sk->sk_protocol].registered) {
622 		err = -ENOENT;
623 		goto out_unlock;
624 	}
625 
626 	if (nlk->ngroups >= groups)
627 		goto out_unlock;
628 
629 	new_groups = krealloc(nlk->groups, NLGRPSZ(groups), GFP_ATOMIC);
630 	if (new_groups == NULL) {
631 		err = -ENOMEM;
632 		goto out_unlock;
633 	}
634 	memset((char *)new_groups + NLGRPSZ(nlk->ngroups), 0,
635 	       NLGRPSZ(groups) - NLGRPSZ(nlk->ngroups));
636 
637 	nlk->groups = new_groups;
638 	nlk->ngroups = groups;
639  out_unlock:
640 	netlink_table_ungrab();
641 	return err;
642 }
643 
netlink_bind(struct socket * sock,struct sockaddr * addr,int addr_len)644 static int netlink_bind(struct socket *sock, struct sockaddr *addr,
645 			int addr_len)
646 {
647 	struct sock *sk = sock->sk;
648 	struct net *net = sock_net(sk);
649 	struct netlink_sock *nlk = nlk_sk(sk);
650 	struct sockaddr_nl *nladdr = (struct sockaddr_nl *)addr;
651 	int err;
652 
653 	if (nladdr->nl_family != AF_NETLINK)
654 		return -EINVAL;
655 
656 	/* Only superuser is allowed to listen multicasts */
657 	if (nladdr->nl_groups) {
658 		if (!netlink_capable(sock, NL_NONROOT_RECV))
659 			return -EPERM;
660 		err = netlink_realloc_groups(sk);
661 		if (err)
662 			return err;
663 	}
664 
665 	if (nlk->pid) {
666 		if (nladdr->nl_pid != nlk->pid)
667 			return -EINVAL;
668 	} else {
669 		err = nladdr->nl_pid ?
670 			netlink_insert(sk, net, nladdr->nl_pid) :
671 			netlink_autobind(sock);
672 		if (err)
673 			return err;
674 	}
675 
676 	if (!nladdr->nl_groups && (nlk->groups == NULL || !(u32)nlk->groups[0]))
677 		return 0;
678 
679 	netlink_table_grab();
680 	netlink_update_subscriptions(sk, nlk->subscriptions +
681 					 hweight32(nladdr->nl_groups) -
682 					 hweight32(nlk->groups[0]));
683 	nlk->groups[0] = (nlk->groups[0] & ~0xffffffffUL) | nladdr->nl_groups;
684 	netlink_update_listeners(sk);
685 	netlink_table_ungrab();
686 
687 	return 0;
688 }
689 
netlink_connect(struct socket * sock,struct sockaddr * addr,int alen,int flags)690 static int netlink_connect(struct socket *sock, struct sockaddr *addr,
691 			   int alen, int flags)
692 {
693 	int err = 0;
694 	struct sock *sk = sock->sk;
695 	struct netlink_sock *nlk = nlk_sk(sk);
696 	struct sockaddr_nl *nladdr = (struct sockaddr_nl *)addr;
697 
698 	if (alen < sizeof(addr->sa_family))
699 		return -EINVAL;
700 
701 	if (addr->sa_family == AF_UNSPEC) {
702 		sk->sk_state	= NETLINK_UNCONNECTED;
703 		nlk->dst_pid	= 0;
704 		nlk->dst_group  = 0;
705 		return 0;
706 	}
707 	if (addr->sa_family != AF_NETLINK)
708 		return -EINVAL;
709 
710 	/* Only superuser is allowed to send multicasts */
711 	if (nladdr->nl_groups && !netlink_capable(sock, NL_NONROOT_SEND))
712 		return -EPERM;
713 
714 	if (!nlk->pid)
715 		err = netlink_autobind(sock);
716 
717 	if (err == 0) {
718 		sk->sk_state	= NETLINK_CONNECTED;
719 		nlk->dst_pid 	= nladdr->nl_pid;
720 		nlk->dst_group  = ffs(nladdr->nl_groups);
721 	}
722 
723 	return err;
724 }
725 
netlink_getname(struct socket * sock,struct sockaddr * addr,int * addr_len,int peer)726 static int netlink_getname(struct socket *sock, struct sockaddr *addr,
727 			   int *addr_len, int peer)
728 {
729 	struct sock *sk = sock->sk;
730 	struct netlink_sock *nlk = nlk_sk(sk);
731 	DECLARE_SOCKADDR(struct sockaddr_nl *, nladdr, addr);
732 
733 	nladdr->nl_family = AF_NETLINK;
734 	nladdr->nl_pad = 0;
735 	*addr_len = sizeof(*nladdr);
736 
737 	if (peer) {
738 		nladdr->nl_pid = nlk->dst_pid;
739 		nladdr->nl_groups = netlink_group_mask(nlk->dst_group);
740 	} else {
741 		nladdr->nl_pid = nlk->pid;
742 		nladdr->nl_groups = nlk->groups ? nlk->groups[0] : 0;
743 	}
744 	return 0;
745 }
746 
netlink_overrun(struct sock * sk)747 static void netlink_overrun(struct sock *sk)
748 {
749 	struct netlink_sock *nlk = nlk_sk(sk);
750 
751 	if (!(nlk->flags & NETLINK_RECV_NO_ENOBUFS)) {
752 		if (!test_and_set_bit(0, &nlk_sk(sk)->state)) {
753 			sk->sk_err = ENOBUFS;
754 			sk->sk_error_report(sk);
755 		}
756 	}
757 	atomic_inc(&sk->sk_drops);
758 }
759 
netlink_getsockbypid(struct sock * ssk,u32 pid)760 static struct sock *netlink_getsockbypid(struct sock *ssk, u32 pid)
761 {
762 	struct sock *sock;
763 	struct netlink_sock *nlk;
764 
765 	sock = netlink_lookup(sock_net(ssk), ssk->sk_protocol, pid);
766 	if (!sock)
767 		return ERR_PTR(-ECONNREFUSED);
768 
769 	/* Don't bother queuing skb if kernel socket has no input function */
770 	nlk = nlk_sk(sock);
771 	if (sock->sk_state == NETLINK_CONNECTED &&
772 	    nlk->dst_pid != nlk_sk(ssk)->pid) {
773 		sock_put(sock);
774 		return ERR_PTR(-ECONNREFUSED);
775 	}
776 	return sock;
777 }
778 
netlink_getsockbyfilp(struct file * filp)779 struct sock *netlink_getsockbyfilp(struct file *filp)
780 {
781 	struct inode *inode = filp->f_path.dentry->d_inode;
782 	struct sock *sock;
783 
784 	if (!S_ISSOCK(inode->i_mode))
785 		return ERR_PTR(-ENOTSOCK);
786 
787 	sock = SOCKET_I(inode)->sk;
788 	if (sock->sk_family != AF_NETLINK)
789 		return ERR_PTR(-EINVAL);
790 
791 	sock_hold(sock);
792 	return sock;
793 }
794 
795 /*
796  * Attach a skb to a netlink socket.
797  * The caller must hold a reference to the destination socket. On error, the
798  * reference is dropped. The skb is not send to the destination, just all
799  * all error checks are performed and memory in the queue is reserved.
800  * Return values:
801  * < 0: error. skb freed, reference to sock dropped.
802  * 0: continue
803  * 1: repeat lookup - reference dropped while waiting for socket memory.
804  */
netlink_attachskb(struct sock * sk,struct sk_buff * skb,long * timeo,struct sock * ssk)805 int netlink_attachskb(struct sock *sk, struct sk_buff *skb,
806 		      long *timeo, struct sock *ssk)
807 {
808 	struct netlink_sock *nlk;
809 
810 	nlk = nlk_sk(sk);
811 
812 	if (atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf ||
813 	    test_bit(0, &nlk->state)) {
814 		DECLARE_WAITQUEUE(wait, current);
815 		if (!*timeo) {
816 			if (!ssk || netlink_is_kernel(ssk))
817 				netlink_overrun(sk);
818 			sock_put(sk);
819 			kfree_skb(skb);
820 			return -EAGAIN;
821 		}
822 
823 		__set_current_state(TASK_INTERRUPTIBLE);
824 		add_wait_queue(&nlk->wait, &wait);
825 
826 		if ((atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf ||
827 		     test_bit(0, &nlk->state)) &&
828 		    !sock_flag(sk, SOCK_DEAD))
829 			*timeo = schedule_timeout(*timeo);
830 
831 		__set_current_state(TASK_RUNNING);
832 		remove_wait_queue(&nlk->wait, &wait);
833 		sock_put(sk);
834 
835 		if (signal_pending(current)) {
836 			kfree_skb(skb);
837 			return sock_intr_errno(*timeo);
838 		}
839 		return 1;
840 	}
841 	skb_set_owner_r(skb, sk);
842 	return 0;
843 }
844 
__netlink_sendskb(struct sock * sk,struct sk_buff * skb)845 static int __netlink_sendskb(struct sock *sk, struct sk_buff *skb)
846 {
847 	int len = skb->len;
848 
849 	skb_queue_tail(&sk->sk_receive_queue, skb);
850 	sk->sk_data_ready(sk, len);
851 	return len;
852 }
853 
netlink_sendskb(struct sock * sk,struct sk_buff * skb)854 int netlink_sendskb(struct sock *sk, struct sk_buff *skb)
855 {
856 	int len = __netlink_sendskb(sk, skb);
857 
858 	sock_put(sk);
859 	return len;
860 }
861 
netlink_detachskb(struct sock * sk,struct sk_buff * skb)862 void netlink_detachskb(struct sock *sk, struct sk_buff *skb)
863 {
864 	kfree_skb(skb);
865 	sock_put(sk);
866 }
867 
netlink_trim(struct sk_buff * skb,gfp_t allocation)868 static struct sk_buff *netlink_trim(struct sk_buff *skb, gfp_t allocation)
869 {
870 	int delta;
871 
872 	skb_orphan(skb);
873 
874 	delta = skb->end - skb->tail;
875 	if (delta * 2 < skb->truesize)
876 		return skb;
877 
878 	if (skb_shared(skb)) {
879 		struct sk_buff *nskb = skb_clone(skb, allocation);
880 		if (!nskb)
881 			return skb;
882 		kfree_skb(skb);
883 		skb = nskb;
884 	}
885 
886 	if (!pskb_expand_head(skb, 0, -delta, allocation))
887 		skb->truesize -= delta;
888 
889 	return skb;
890 }
891 
netlink_rcv_wake(struct sock * sk)892 static void netlink_rcv_wake(struct sock *sk)
893 {
894 	struct netlink_sock *nlk = nlk_sk(sk);
895 
896 	if (skb_queue_empty(&sk->sk_receive_queue))
897 		clear_bit(0, &nlk->state);
898 	if (!test_bit(0, &nlk->state))
899 		wake_up_interruptible(&nlk->wait);
900 }
901 
netlink_unicast_kernel(struct sock * sk,struct sk_buff * skb)902 static int netlink_unicast_kernel(struct sock *sk, struct sk_buff *skb)
903 {
904 	int ret;
905 	struct netlink_sock *nlk = nlk_sk(sk);
906 
907 	ret = -ECONNREFUSED;
908 	if (nlk->netlink_rcv != NULL) {
909 		ret = skb->len;
910 		skb_set_owner_r(skb, sk);
911 		nlk->netlink_rcv(skb);
912 	}
913 	kfree_skb(skb);
914 	sock_put(sk);
915 	return ret;
916 }
917 
netlink_unicast(struct sock * ssk,struct sk_buff * skb,u32 pid,int nonblock)918 int netlink_unicast(struct sock *ssk, struct sk_buff *skb,
919 		    u32 pid, int nonblock)
920 {
921 	struct sock *sk;
922 	int err;
923 	long timeo;
924 
925 	skb = netlink_trim(skb, gfp_any());
926 
927 	timeo = sock_sndtimeo(ssk, nonblock);
928 retry:
929 	sk = netlink_getsockbypid(ssk, pid);
930 	if (IS_ERR(sk)) {
931 		kfree_skb(skb);
932 		return PTR_ERR(sk);
933 	}
934 	if (netlink_is_kernel(sk))
935 		return netlink_unicast_kernel(sk, skb);
936 
937 	if (sk_filter(sk, skb)) {
938 		err = skb->len;
939 		kfree_skb(skb);
940 		sock_put(sk);
941 		return err;
942 	}
943 
944 	err = netlink_attachskb(sk, skb, &timeo, ssk);
945 	if (err == 1)
946 		goto retry;
947 	if (err)
948 		return err;
949 
950 	return netlink_sendskb(sk, skb);
951 }
952 EXPORT_SYMBOL(netlink_unicast);
953 
netlink_has_listeners(struct sock * sk,unsigned int group)954 int netlink_has_listeners(struct sock *sk, unsigned int group)
955 {
956 	int res = 0;
957 	struct listeners *listeners;
958 
959 	BUG_ON(!netlink_is_kernel(sk));
960 
961 	rcu_read_lock();
962 	listeners = rcu_dereference(nl_table[sk->sk_protocol].listeners);
963 
964 	if (listeners && group - 1 < nl_table[sk->sk_protocol].groups)
965 		res = test_bit(group - 1, listeners->masks);
966 
967 	rcu_read_unlock();
968 
969 	return res;
970 }
971 EXPORT_SYMBOL_GPL(netlink_has_listeners);
972 
netlink_broadcast_deliver(struct sock * sk,struct sk_buff * skb)973 static int netlink_broadcast_deliver(struct sock *sk, struct sk_buff *skb)
974 {
975 	struct netlink_sock *nlk = nlk_sk(sk);
976 
977 	if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf &&
978 	    !test_bit(0, &nlk->state)) {
979 		skb_set_owner_r(skb, sk);
980 		__netlink_sendskb(sk, skb);
981 		return atomic_read(&sk->sk_rmem_alloc) > (sk->sk_rcvbuf >> 1);
982 	}
983 	return -1;
984 }
985 
986 struct netlink_broadcast_data {
987 	struct sock *exclude_sk;
988 	struct net *net;
989 	u32 pid;
990 	u32 group;
991 	int failure;
992 	int delivery_failure;
993 	int congested;
994 	int delivered;
995 	gfp_t allocation;
996 	struct sk_buff *skb, *skb2;
997 	int (*tx_filter)(struct sock *dsk, struct sk_buff *skb, void *data);
998 	void *tx_data;
999 };
1000 
do_one_broadcast(struct sock * sk,struct netlink_broadcast_data * p)1001 static int do_one_broadcast(struct sock *sk,
1002 				   struct netlink_broadcast_data *p)
1003 {
1004 	struct netlink_sock *nlk = nlk_sk(sk);
1005 	int val;
1006 
1007 	if (p->exclude_sk == sk)
1008 		goto out;
1009 
1010 	if (nlk->pid == p->pid || p->group - 1 >= nlk->ngroups ||
1011 	    !test_bit(p->group - 1, nlk->groups))
1012 		goto out;
1013 
1014 	if (!net_eq(sock_net(sk), p->net))
1015 		goto out;
1016 
1017 	if (p->failure) {
1018 		netlink_overrun(sk);
1019 		goto out;
1020 	}
1021 
1022 	sock_hold(sk);
1023 	if (p->skb2 == NULL) {
1024 		if (skb_shared(p->skb)) {
1025 			p->skb2 = skb_clone(p->skb, p->allocation);
1026 		} else {
1027 			p->skb2 = skb_get(p->skb);
1028 			/*
1029 			 * skb ownership may have been set when
1030 			 * delivered to a previous socket.
1031 			 */
1032 			skb_orphan(p->skb2);
1033 		}
1034 	}
1035 	if (p->skb2 == NULL) {
1036 		netlink_overrun(sk);
1037 		/* Clone failed. Notify ALL listeners. */
1038 		p->failure = 1;
1039 		if (nlk->flags & NETLINK_BROADCAST_SEND_ERROR)
1040 			p->delivery_failure = 1;
1041 	} else if (p->tx_filter && p->tx_filter(sk, p->skb2, p->tx_data)) {
1042 		kfree_skb(p->skb2);
1043 		p->skb2 = NULL;
1044 	} else if (sk_filter(sk, p->skb2)) {
1045 		kfree_skb(p->skb2);
1046 		p->skb2 = NULL;
1047 	} else if ((val = netlink_broadcast_deliver(sk, p->skb2)) < 0) {
1048 		netlink_overrun(sk);
1049 		if (nlk->flags & NETLINK_BROADCAST_SEND_ERROR)
1050 			p->delivery_failure = 1;
1051 	} else {
1052 		p->congested |= val;
1053 		p->delivered = 1;
1054 		p->skb2 = NULL;
1055 	}
1056 	sock_put(sk);
1057 
1058 out:
1059 	return 0;
1060 }
1061 
netlink_broadcast_filtered(struct sock * ssk,struct sk_buff * skb,u32 pid,u32 group,gfp_t allocation,int (* filter)(struct sock * dsk,struct sk_buff * skb,void * data),void * filter_data)1062 int netlink_broadcast_filtered(struct sock *ssk, struct sk_buff *skb, u32 pid,
1063 	u32 group, gfp_t allocation,
1064 	int (*filter)(struct sock *dsk, struct sk_buff *skb, void *data),
1065 	void *filter_data)
1066 {
1067 	struct net *net = sock_net(ssk);
1068 	struct netlink_broadcast_data info;
1069 	struct hlist_node *node;
1070 	struct sock *sk;
1071 
1072 	skb = netlink_trim(skb, allocation);
1073 
1074 	info.exclude_sk = ssk;
1075 	info.net = net;
1076 	info.pid = pid;
1077 	info.group = group;
1078 	info.failure = 0;
1079 	info.delivery_failure = 0;
1080 	info.congested = 0;
1081 	info.delivered = 0;
1082 	info.allocation = allocation;
1083 	info.skb = skb;
1084 	info.skb2 = NULL;
1085 	info.tx_filter = filter;
1086 	info.tx_data = filter_data;
1087 
1088 	/* While we sleep in clone, do not allow to change socket list */
1089 
1090 	netlink_lock_table();
1091 
1092 	sk_for_each_bound(sk, node, &nl_table[ssk->sk_protocol].mc_list)
1093 		do_one_broadcast(sk, &info);
1094 
1095 	consume_skb(skb);
1096 
1097 	netlink_unlock_table();
1098 
1099 	if (info.delivery_failure) {
1100 		kfree_skb(info.skb2);
1101 		return -ENOBUFS;
1102 	} else
1103 		consume_skb(info.skb2);
1104 
1105 	if (info.delivered) {
1106 		if (info.congested && (allocation & __GFP_WAIT))
1107 			yield();
1108 		return 0;
1109 	}
1110 	return -ESRCH;
1111 }
1112 EXPORT_SYMBOL(netlink_broadcast_filtered);
1113 
netlink_broadcast(struct sock * ssk,struct sk_buff * skb,u32 pid,u32 group,gfp_t allocation)1114 int netlink_broadcast(struct sock *ssk, struct sk_buff *skb, u32 pid,
1115 		      u32 group, gfp_t allocation)
1116 {
1117 	return netlink_broadcast_filtered(ssk, skb, pid, group, allocation,
1118 		NULL, NULL);
1119 }
1120 EXPORT_SYMBOL(netlink_broadcast);
1121 
1122 struct netlink_set_err_data {
1123 	struct sock *exclude_sk;
1124 	u32 pid;
1125 	u32 group;
1126 	int code;
1127 };
1128 
do_one_set_err(struct sock * sk,struct netlink_set_err_data * p)1129 static int do_one_set_err(struct sock *sk, struct netlink_set_err_data *p)
1130 {
1131 	struct netlink_sock *nlk = nlk_sk(sk);
1132 	int ret = 0;
1133 
1134 	if (sk == p->exclude_sk)
1135 		goto out;
1136 
1137 	if (!net_eq(sock_net(sk), sock_net(p->exclude_sk)))
1138 		goto out;
1139 
1140 	if (nlk->pid == p->pid || p->group - 1 >= nlk->ngroups ||
1141 	    !test_bit(p->group - 1, nlk->groups))
1142 		goto out;
1143 
1144 	if (p->code == ENOBUFS && nlk->flags & NETLINK_RECV_NO_ENOBUFS) {
1145 		ret = 1;
1146 		goto out;
1147 	}
1148 
1149 	sk->sk_err = p->code;
1150 	sk->sk_error_report(sk);
1151 out:
1152 	return ret;
1153 }
1154 
1155 /**
1156  * netlink_set_err - report error to broadcast listeners
1157  * @ssk: the kernel netlink socket, as returned by netlink_kernel_create()
1158  * @pid: the PID of a process that we want to skip (if any)
1159  * @groups: the broadcast group that will notice the error
1160  * @code: error code, must be negative (as usual in kernelspace)
1161  *
1162  * This function returns the number of broadcast listeners that have set the
1163  * NETLINK_RECV_NO_ENOBUFS socket option.
1164  */
netlink_set_err(struct sock * ssk,u32 pid,u32 group,int code)1165 int netlink_set_err(struct sock *ssk, u32 pid, u32 group, int code)
1166 {
1167 	struct netlink_set_err_data info;
1168 	struct hlist_node *node;
1169 	struct sock *sk;
1170 	int ret = 0;
1171 
1172 	info.exclude_sk = ssk;
1173 	info.pid = pid;
1174 	info.group = group;
1175 	/* sk->sk_err wants a positive error value */
1176 	info.code = -code;
1177 
1178 	read_lock(&nl_table_lock);
1179 
1180 	sk_for_each_bound(sk, node, &nl_table[ssk->sk_protocol].mc_list)
1181 		ret += do_one_set_err(sk, &info);
1182 
1183 	read_unlock(&nl_table_lock);
1184 	return ret;
1185 }
1186 EXPORT_SYMBOL(netlink_set_err);
1187 
1188 /* must be called with netlink table grabbed */
netlink_update_socket_mc(struct netlink_sock * nlk,unsigned int group,int is_new)1189 static void netlink_update_socket_mc(struct netlink_sock *nlk,
1190 				     unsigned int group,
1191 				     int is_new)
1192 {
1193 	int old, new = !!is_new, subscriptions;
1194 
1195 	old = test_bit(group - 1, nlk->groups);
1196 	subscriptions = nlk->subscriptions - old + new;
1197 	if (new)
1198 		__set_bit(group - 1, nlk->groups);
1199 	else
1200 		__clear_bit(group - 1, nlk->groups);
1201 	netlink_update_subscriptions(&nlk->sk, subscriptions);
1202 	netlink_update_listeners(&nlk->sk);
1203 }
1204 
netlink_setsockopt(struct socket * sock,int level,int optname,char __user * optval,unsigned int optlen)1205 static int netlink_setsockopt(struct socket *sock, int level, int optname,
1206 			      char __user *optval, unsigned int optlen)
1207 {
1208 	struct sock *sk = sock->sk;
1209 	struct netlink_sock *nlk = nlk_sk(sk);
1210 	unsigned int val = 0;
1211 	int err;
1212 
1213 	if (level != SOL_NETLINK)
1214 		return -ENOPROTOOPT;
1215 
1216 	if (optlen >= sizeof(int) &&
1217 	    get_user(val, (unsigned int __user *)optval))
1218 		return -EFAULT;
1219 
1220 	switch (optname) {
1221 	case NETLINK_PKTINFO:
1222 		if (val)
1223 			nlk->flags |= NETLINK_RECV_PKTINFO;
1224 		else
1225 			nlk->flags &= ~NETLINK_RECV_PKTINFO;
1226 		err = 0;
1227 		break;
1228 	case NETLINK_ADD_MEMBERSHIP:
1229 	case NETLINK_DROP_MEMBERSHIP: {
1230 		if (!netlink_capable(sock, NL_NONROOT_RECV))
1231 			return -EPERM;
1232 		err = netlink_realloc_groups(sk);
1233 		if (err)
1234 			return err;
1235 		if (!val || val - 1 >= nlk->ngroups)
1236 			return -EINVAL;
1237 		netlink_table_grab();
1238 		netlink_update_socket_mc(nlk, val,
1239 					 optname == NETLINK_ADD_MEMBERSHIP);
1240 		netlink_table_ungrab();
1241 		err = 0;
1242 		break;
1243 	}
1244 	case NETLINK_BROADCAST_ERROR:
1245 		if (val)
1246 			nlk->flags |= NETLINK_BROADCAST_SEND_ERROR;
1247 		else
1248 			nlk->flags &= ~NETLINK_BROADCAST_SEND_ERROR;
1249 		err = 0;
1250 		break;
1251 	case NETLINK_NO_ENOBUFS:
1252 		if (val) {
1253 			nlk->flags |= NETLINK_RECV_NO_ENOBUFS;
1254 			clear_bit(0, &nlk->state);
1255 			wake_up_interruptible(&nlk->wait);
1256 		} else
1257 			nlk->flags &= ~NETLINK_RECV_NO_ENOBUFS;
1258 		err = 0;
1259 		break;
1260 	default:
1261 		err = -ENOPROTOOPT;
1262 	}
1263 	return err;
1264 }
1265 
netlink_getsockopt(struct socket * sock,int level,int optname,char __user * optval,int __user * optlen)1266 static int netlink_getsockopt(struct socket *sock, int level, int optname,
1267 			      char __user *optval, int __user *optlen)
1268 {
1269 	struct sock *sk = sock->sk;
1270 	struct netlink_sock *nlk = nlk_sk(sk);
1271 	int len, val, err;
1272 
1273 	if (level != SOL_NETLINK)
1274 		return -ENOPROTOOPT;
1275 
1276 	if (get_user(len, optlen))
1277 		return -EFAULT;
1278 	if (len < 0)
1279 		return -EINVAL;
1280 
1281 	switch (optname) {
1282 	case NETLINK_PKTINFO:
1283 		if (len < sizeof(int))
1284 			return -EINVAL;
1285 		len = sizeof(int);
1286 		val = nlk->flags & NETLINK_RECV_PKTINFO ? 1 : 0;
1287 		if (put_user(len, optlen) ||
1288 		    put_user(val, optval))
1289 			return -EFAULT;
1290 		err = 0;
1291 		break;
1292 	case NETLINK_BROADCAST_ERROR:
1293 		if (len < sizeof(int))
1294 			return -EINVAL;
1295 		len = sizeof(int);
1296 		val = nlk->flags & NETLINK_BROADCAST_SEND_ERROR ? 1 : 0;
1297 		if (put_user(len, optlen) ||
1298 		    put_user(val, optval))
1299 			return -EFAULT;
1300 		err = 0;
1301 		break;
1302 	case NETLINK_NO_ENOBUFS:
1303 		if (len < sizeof(int))
1304 			return -EINVAL;
1305 		len = sizeof(int);
1306 		val = nlk->flags & NETLINK_RECV_NO_ENOBUFS ? 1 : 0;
1307 		if (put_user(len, optlen) ||
1308 		    put_user(val, optval))
1309 			return -EFAULT;
1310 		err = 0;
1311 		break;
1312 	default:
1313 		err = -ENOPROTOOPT;
1314 	}
1315 	return err;
1316 }
1317 
netlink_cmsg_recv_pktinfo(struct msghdr * msg,struct sk_buff * skb)1318 static void netlink_cmsg_recv_pktinfo(struct msghdr *msg, struct sk_buff *skb)
1319 {
1320 	struct nl_pktinfo info;
1321 
1322 	info.group = NETLINK_CB(skb).dst_group;
1323 	put_cmsg(msg, SOL_NETLINK, NETLINK_PKTINFO, sizeof(info), &info);
1324 }
1325 
netlink_sendmsg(struct kiocb * kiocb,struct socket * sock,struct msghdr * msg,size_t len)1326 static int netlink_sendmsg(struct kiocb *kiocb, struct socket *sock,
1327 			   struct msghdr *msg, size_t len)
1328 {
1329 	struct sock_iocb *siocb = kiocb_to_siocb(kiocb);
1330 	struct sock *sk = sock->sk;
1331 	struct netlink_sock *nlk = nlk_sk(sk);
1332 	struct sockaddr_nl *addr = msg->msg_name;
1333 	u32 dst_pid;
1334 	u32 dst_group;
1335 	struct sk_buff *skb;
1336 	int err;
1337 	struct scm_cookie scm;
1338 
1339 	if (msg->msg_flags&MSG_OOB)
1340 		return -EOPNOTSUPP;
1341 
1342 	if (NULL == siocb->scm)
1343 		siocb->scm = &scm;
1344 
1345 	err = scm_send(sock, msg, siocb->scm, true);
1346 	if (err < 0)
1347 		return err;
1348 
1349 	if (msg->msg_namelen) {
1350 		err = -EINVAL;
1351 		if (addr->nl_family != AF_NETLINK)
1352 			goto out;
1353 		dst_pid = addr->nl_pid;
1354 		dst_group = ffs(addr->nl_groups);
1355 		err =  -EPERM;
1356 		if ((dst_group || dst_pid) &&
1357 		    !netlink_capable(sock, NL_NONROOT_SEND))
1358 			goto out;
1359 	} else {
1360 		dst_pid = nlk->dst_pid;
1361 		dst_group = nlk->dst_group;
1362 	}
1363 
1364 	if (!nlk->pid) {
1365 		err = netlink_autobind(sock);
1366 		if (err)
1367 			goto out;
1368 	}
1369 
1370 	err = -EMSGSIZE;
1371 	if (len > sk->sk_sndbuf - 32)
1372 		goto out;
1373 	err = -ENOBUFS;
1374 	skb = alloc_skb(len, GFP_KERNEL);
1375 	if (skb == NULL)
1376 		goto out;
1377 
1378 	NETLINK_CB(skb).pid	= nlk->pid;
1379 	NETLINK_CB(skb).dst_group = dst_group;
1380 	memcpy(NETLINK_CREDS(skb), &siocb->scm->creds, sizeof(struct ucred));
1381 
1382 	err = -EFAULT;
1383 	if (memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len)) {
1384 		kfree_skb(skb);
1385 		goto out;
1386 	}
1387 
1388 	err = security_netlink_send(sk, skb);
1389 	if (err) {
1390 		kfree_skb(skb);
1391 		goto out;
1392 	}
1393 
1394 	if (dst_group) {
1395 		atomic_inc(&skb->users);
1396 		netlink_broadcast(sk, skb, dst_pid, dst_group, GFP_KERNEL);
1397 	}
1398 	err = netlink_unicast(sk, skb, dst_pid, msg->msg_flags&MSG_DONTWAIT);
1399 
1400 out:
1401 	scm_destroy(siocb->scm);
1402 	return err;
1403 }
1404 
netlink_recvmsg(struct kiocb * kiocb,struct socket * sock,struct msghdr * msg,size_t len,int flags)1405 static int netlink_recvmsg(struct kiocb *kiocb, struct socket *sock,
1406 			   struct msghdr *msg, size_t len,
1407 			   int flags)
1408 {
1409 	struct sock_iocb *siocb = kiocb_to_siocb(kiocb);
1410 	struct scm_cookie scm;
1411 	struct sock *sk = sock->sk;
1412 	struct netlink_sock *nlk = nlk_sk(sk);
1413 	int noblock = flags&MSG_DONTWAIT;
1414 	size_t copied;
1415 	struct sk_buff *skb, *data_skb;
1416 	int err, ret;
1417 
1418 	if (flags&MSG_OOB)
1419 		return -EOPNOTSUPP;
1420 
1421 	copied = 0;
1422 
1423 	skb = skb_recv_datagram(sk, flags, noblock, &err);
1424 	if (skb == NULL)
1425 		goto out;
1426 
1427 	data_skb = skb;
1428 
1429 #ifdef CONFIG_COMPAT_NETLINK_MESSAGES
1430 	if (unlikely(skb_shinfo(skb)->frag_list)) {
1431 		/*
1432 		 * If this skb has a frag_list, then here that means that we
1433 		 * will have to use the frag_list skb's data for compat tasks
1434 		 * and the regular skb's data for normal (non-compat) tasks.
1435 		 *
1436 		 * If we need to send the compat skb, assign it to the
1437 		 * 'data_skb' variable so that it will be used below for data
1438 		 * copying. We keep 'skb' for everything else, including
1439 		 * freeing both later.
1440 		 */
1441 		if (flags & MSG_CMSG_COMPAT)
1442 			data_skb = skb_shinfo(skb)->frag_list;
1443 	}
1444 #endif
1445 
1446 	copied = data_skb->len;
1447 	if (len < copied) {
1448 		msg->msg_flags |= MSG_TRUNC;
1449 		copied = len;
1450 	}
1451 
1452 	skb_reset_transport_header(data_skb);
1453 	err = skb_copy_datagram_iovec(data_skb, 0, msg->msg_iov, copied);
1454 
1455 	if (msg->msg_name) {
1456 		struct sockaddr_nl *addr = (struct sockaddr_nl *)msg->msg_name;
1457 		addr->nl_family = AF_NETLINK;
1458 		addr->nl_pad    = 0;
1459 		addr->nl_pid	= NETLINK_CB(skb).pid;
1460 		addr->nl_groups	= netlink_group_mask(NETLINK_CB(skb).dst_group);
1461 		msg->msg_namelen = sizeof(*addr);
1462 	}
1463 
1464 	if (nlk->flags & NETLINK_RECV_PKTINFO)
1465 		netlink_cmsg_recv_pktinfo(msg, skb);
1466 
1467 	if (NULL == siocb->scm) {
1468 		memset(&scm, 0, sizeof(scm));
1469 		siocb->scm = &scm;
1470 	}
1471 	siocb->scm->creds = *NETLINK_CREDS(skb);
1472 	if (flags & MSG_TRUNC)
1473 		copied = data_skb->len;
1474 
1475 	skb_free_datagram(sk, skb);
1476 
1477 	if (nlk->cb && atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf / 2) {
1478 		ret = netlink_dump(sk);
1479 		if (ret) {
1480 			sk->sk_err = ret;
1481 			sk->sk_error_report(sk);
1482 		}
1483 	}
1484 
1485 	scm_recv(sock, msg, siocb->scm, flags);
1486 out:
1487 	netlink_rcv_wake(sk);
1488 	return err ? : copied;
1489 }
1490 
netlink_data_ready(struct sock * sk,int len)1491 static void netlink_data_ready(struct sock *sk, int len)
1492 {
1493 	BUG();
1494 }
1495 
1496 /*
1497  *	We export these functions to other modules. They provide a
1498  *	complete set of kernel non-blocking support for message
1499  *	queueing.
1500  */
1501 
1502 struct sock *
netlink_kernel_create(struct net * net,int unit,unsigned int groups,void (* input)(struct sk_buff * skb),struct mutex * cb_mutex,struct module * module)1503 netlink_kernel_create(struct net *net, int unit, unsigned int groups,
1504 		      void (*input)(struct sk_buff *skb),
1505 		      struct mutex *cb_mutex, struct module *module)
1506 {
1507 	struct socket *sock;
1508 	struct sock *sk;
1509 	struct netlink_sock *nlk;
1510 	struct listeners *listeners = NULL;
1511 
1512 	BUG_ON(!nl_table);
1513 
1514 	if (unit < 0 || unit >= MAX_LINKS)
1515 		return NULL;
1516 
1517 	if (sock_create_lite(PF_NETLINK, SOCK_DGRAM, unit, &sock))
1518 		return NULL;
1519 
1520 	/*
1521 	 * We have to just have a reference on the net from sk, but don't
1522 	 * get_net it. Besides, we cannot get and then put the net here.
1523 	 * So we create one inside init_net and the move it to net.
1524 	 */
1525 
1526 	if (__netlink_create(&init_net, sock, cb_mutex, unit) < 0)
1527 		goto out_sock_release_nosk;
1528 
1529 	sk = sock->sk;
1530 	sk_change_net(sk, net);
1531 
1532 	if (groups < 32)
1533 		groups = 32;
1534 
1535 	listeners = kzalloc(sizeof(*listeners) + NLGRPSZ(groups), GFP_KERNEL);
1536 	if (!listeners)
1537 		goto out_sock_release;
1538 
1539 	sk->sk_data_ready = netlink_data_ready;
1540 	if (input)
1541 		nlk_sk(sk)->netlink_rcv = input;
1542 
1543 	if (netlink_insert(sk, net, 0))
1544 		goto out_sock_release;
1545 
1546 	nlk = nlk_sk(sk);
1547 	nlk->flags |= NETLINK_KERNEL_SOCKET;
1548 
1549 	netlink_table_grab();
1550 	if (!nl_table[unit].registered) {
1551 		nl_table[unit].groups = groups;
1552 		rcu_assign_pointer(nl_table[unit].listeners, listeners);
1553 		nl_table[unit].cb_mutex = cb_mutex;
1554 		nl_table[unit].module = module;
1555 		nl_table[unit].registered = 1;
1556 	} else {
1557 		kfree(listeners);
1558 		nl_table[unit].registered++;
1559 	}
1560 	netlink_table_ungrab();
1561 	return sk;
1562 
1563 out_sock_release:
1564 	kfree(listeners);
1565 	netlink_kernel_release(sk);
1566 	return NULL;
1567 
1568 out_sock_release_nosk:
1569 	sock_release(sock);
1570 	return NULL;
1571 }
1572 EXPORT_SYMBOL(netlink_kernel_create);
1573 
1574 
1575 void
netlink_kernel_release(struct sock * sk)1576 netlink_kernel_release(struct sock *sk)
1577 {
1578 	sk_release_kernel(sk);
1579 }
1580 EXPORT_SYMBOL(netlink_kernel_release);
1581 
__netlink_change_ngroups(struct sock * sk,unsigned int groups)1582 int __netlink_change_ngroups(struct sock *sk, unsigned int groups)
1583 {
1584 	struct listeners *new, *old;
1585 	struct netlink_table *tbl = &nl_table[sk->sk_protocol];
1586 
1587 	if (groups < 32)
1588 		groups = 32;
1589 
1590 	if (NLGRPSZ(tbl->groups) < NLGRPSZ(groups)) {
1591 		new = kzalloc(sizeof(*new) + NLGRPSZ(groups), GFP_ATOMIC);
1592 		if (!new)
1593 			return -ENOMEM;
1594 		old = nl_deref_protected(tbl->listeners);
1595 		memcpy(new->masks, old->masks, NLGRPSZ(tbl->groups));
1596 		rcu_assign_pointer(tbl->listeners, new);
1597 
1598 		kfree_rcu(old, rcu);
1599 	}
1600 	tbl->groups = groups;
1601 
1602 	return 0;
1603 }
1604 
1605 /**
1606  * netlink_change_ngroups - change number of multicast groups
1607  *
1608  * This changes the number of multicast groups that are available
1609  * on a certain netlink family. Note that it is not possible to
1610  * change the number of groups to below 32. Also note that it does
1611  * not implicitly call netlink_clear_multicast_users() when the
1612  * number of groups is reduced.
1613  *
1614  * @sk: The kernel netlink socket, as returned by netlink_kernel_create().
1615  * @groups: The new number of groups.
1616  */
netlink_change_ngroups(struct sock * sk,unsigned int groups)1617 int netlink_change_ngroups(struct sock *sk, unsigned int groups)
1618 {
1619 	int err;
1620 
1621 	netlink_table_grab();
1622 	err = __netlink_change_ngroups(sk, groups);
1623 	netlink_table_ungrab();
1624 
1625 	return err;
1626 }
1627 
__netlink_clear_multicast_users(struct sock * ksk,unsigned int group)1628 void __netlink_clear_multicast_users(struct sock *ksk, unsigned int group)
1629 {
1630 	struct sock *sk;
1631 	struct hlist_node *node;
1632 	struct netlink_table *tbl = &nl_table[ksk->sk_protocol];
1633 
1634 	sk_for_each_bound(sk, node, &tbl->mc_list)
1635 		netlink_update_socket_mc(nlk_sk(sk), group, 0);
1636 }
1637 
1638 /**
1639  * netlink_clear_multicast_users - kick off multicast listeners
1640  *
1641  * This function removes all listeners from the given group.
1642  * @ksk: The kernel netlink socket, as returned by
1643  *	netlink_kernel_create().
1644  * @group: The multicast group to clear.
1645  */
netlink_clear_multicast_users(struct sock * ksk,unsigned int group)1646 void netlink_clear_multicast_users(struct sock *ksk, unsigned int group)
1647 {
1648 	netlink_table_grab();
1649 	__netlink_clear_multicast_users(ksk, group);
1650 	netlink_table_ungrab();
1651 }
1652 
netlink_set_nonroot(int protocol,unsigned int flags)1653 void netlink_set_nonroot(int protocol, unsigned int flags)
1654 {
1655 	if ((unsigned int)protocol < MAX_LINKS)
1656 		nl_table[protocol].nl_nonroot = flags;
1657 }
1658 EXPORT_SYMBOL(netlink_set_nonroot);
1659 
netlink_destroy_callback(struct netlink_callback * cb)1660 static void netlink_destroy_callback(struct netlink_callback *cb)
1661 {
1662 	kfree_skb(cb->skb);
1663 	kfree(cb);
1664 }
1665 
1666 struct nlmsghdr *
__nlmsg_put(struct sk_buff * skb,u32 pid,u32 seq,int type,int len,int flags)1667 __nlmsg_put(struct sk_buff *skb, u32 pid, u32 seq, int type, int len, int flags)
1668 {
1669 	struct nlmsghdr *nlh;
1670 	int size = NLMSG_LENGTH(len);
1671 
1672 	nlh = (struct nlmsghdr*)skb_put(skb, NLMSG_ALIGN(size));
1673 	nlh->nlmsg_type = type;
1674 	nlh->nlmsg_len = size;
1675 	nlh->nlmsg_flags = flags;
1676 	nlh->nlmsg_pid = pid;
1677 	nlh->nlmsg_seq = seq;
1678 	if (!__builtin_constant_p(size) || NLMSG_ALIGN(size) - size != 0)
1679 		memset(NLMSG_DATA(nlh) + len, 0, NLMSG_ALIGN(size) - size);
1680 	return nlh;
1681 }
1682 EXPORT_SYMBOL(__nlmsg_put);
1683 
1684 /*
1685  * It looks a bit ugly.
1686  * It would be better to create kernel thread.
1687  */
1688 
netlink_dump(struct sock * sk)1689 static int netlink_dump(struct sock *sk)
1690 {
1691 	struct netlink_sock *nlk = nlk_sk(sk);
1692 	struct netlink_callback *cb;
1693 	struct sk_buff *skb = NULL;
1694 	struct nlmsghdr *nlh;
1695 	int len, err = -ENOBUFS;
1696 	int alloc_size;
1697 
1698 	mutex_lock(nlk->cb_mutex);
1699 
1700 	cb = nlk->cb;
1701 	if (cb == NULL) {
1702 		err = -EINVAL;
1703 		goto errout_skb;
1704 	}
1705 
1706 	alloc_size = max_t(int, cb->min_dump_alloc, NLMSG_GOODSIZE);
1707 
1708 	skb = sock_rmalloc(sk, alloc_size, 0, GFP_KERNEL);
1709 	if (!skb)
1710 		goto errout_skb;
1711 
1712 	len = cb->dump(skb, cb);
1713 
1714 	if (len > 0) {
1715 		mutex_unlock(nlk->cb_mutex);
1716 
1717 		if (sk_filter(sk, skb))
1718 			kfree_skb(skb);
1719 		else
1720 			__netlink_sendskb(sk, skb);
1721 		return 0;
1722 	}
1723 
1724 	nlh = nlmsg_put_answer(skb, cb, NLMSG_DONE, sizeof(len), NLM_F_MULTI);
1725 	if (!nlh)
1726 		goto errout_skb;
1727 
1728 	nl_dump_check_consistent(cb, nlh);
1729 
1730 	memcpy(nlmsg_data(nlh), &len, sizeof(len));
1731 
1732 	if (sk_filter(sk, skb))
1733 		kfree_skb(skb);
1734 	else
1735 		__netlink_sendskb(sk, skb);
1736 
1737 	if (cb->done)
1738 		cb->done(cb);
1739 	nlk->cb = NULL;
1740 	mutex_unlock(nlk->cb_mutex);
1741 
1742 	module_put(cb->module);
1743 	netlink_destroy_callback(cb);
1744 	return 0;
1745 
1746 errout_skb:
1747 	mutex_unlock(nlk->cb_mutex);
1748 	kfree_skb(skb);
1749 	return err;
1750 }
1751 
__netlink_dump_start(struct sock * ssk,struct sk_buff * skb,const struct nlmsghdr * nlh,struct netlink_dump_control * control)1752 int __netlink_dump_start(struct sock *ssk, struct sk_buff *skb,
1753 			 const struct nlmsghdr *nlh,
1754 			 struct netlink_dump_control *control)
1755 {
1756 	struct netlink_callback *cb;
1757 	struct sock *sk;
1758 	struct netlink_sock *nlk;
1759 	int ret;
1760 
1761 	cb = kzalloc(sizeof(*cb), GFP_KERNEL);
1762 	if (cb == NULL)
1763 		return -ENOBUFS;
1764 
1765 	cb->dump = control->dump;
1766 	cb->done = control->done;
1767 	cb->nlh = nlh;
1768 	cb->data = control->data;
1769 	cb->module = control->module;
1770 	cb->min_dump_alloc = control->min_dump_alloc;
1771 	atomic_inc(&skb->users);
1772 	cb->skb = skb;
1773 
1774 	sk = netlink_lookup(sock_net(ssk), ssk->sk_protocol, NETLINK_CB(skb).pid);
1775 	if (sk == NULL) {
1776 		netlink_destroy_callback(cb);
1777 		return -ECONNREFUSED;
1778 	}
1779 	nlk = nlk_sk(sk);
1780 
1781 	mutex_lock(nlk->cb_mutex);
1782 	/* A dump is in progress... */
1783 	if (nlk->cb) {
1784 		mutex_unlock(nlk->cb_mutex);
1785 		netlink_destroy_callback(cb);
1786 		ret = -EBUSY;
1787 		goto out;
1788 	}
1789 	/* add reference of module which cb->dump belongs to */
1790 	if (!try_module_get(cb->module)) {
1791 		mutex_unlock(nlk->cb_mutex);
1792 		netlink_destroy_callback(cb);
1793 		ret = -EPROTONOSUPPORT;
1794 		goto out;
1795 	}
1796 
1797 	nlk->cb = cb;
1798 	mutex_unlock(nlk->cb_mutex);
1799 
1800 	ret = netlink_dump(sk);
1801 out:
1802 	sock_put(sk);
1803 
1804 	if (ret)
1805 		return ret;
1806 
1807 	/* We successfully started a dump, by returning -EINTR we
1808 	 * signal not to send ACK even if it was requested.
1809 	 */
1810 	return -EINTR;
1811 }
1812 EXPORT_SYMBOL(__netlink_dump_start);
1813 
netlink_ack(struct sk_buff * in_skb,struct nlmsghdr * nlh,int err)1814 void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err)
1815 {
1816 	struct sk_buff *skb;
1817 	struct nlmsghdr *rep;
1818 	struct nlmsgerr *errmsg;
1819 	size_t payload = sizeof(*errmsg);
1820 
1821 	/* error messages get the original request appened */
1822 	if (err)
1823 		payload += nlmsg_len(nlh);
1824 
1825 	skb = nlmsg_new(payload, GFP_KERNEL);
1826 	if (!skb) {
1827 		struct sock *sk;
1828 
1829 		sk = netlink_lookup(sock_net(in_skb->sk),
1830 				    in_skb->sk->sk_protocol,
1831 				    NETLINK_CB(in_skb).pid);
1832 		if (sk) {
1833 			sk->sk_err = ENOBUFS;
1834 			sk->sk_error_report(sk);
1835 			sock_put(sk);
1836 		}
1837 		return;
1838 	}
1839 
1840 	rep = __nlmsg_put(skb, NETLINK_CB(in_skb).pid, nlh->nlmsg_seq,
1841 			  NLMSG_ERROR, payload, 0);
1842 	errmsg = nlmsg_data(rep);
1843 	errmsg->error = err;
1844 	memcpy(&errmsg->msg, nlh, err ? nlh->nlmsg_len : sizeof(*nlh));
1845 	netlink_unicast(in_skb->sk, skb, NETLINK_CB(in_skb).pid, MSG_DONTWAIT);
1846 }
1847 EXPORT_SYMBOL(netlink_ack);
1848 
netlink_rcv_skb(struct sk_buff * skb,int (* cb)(struct sk_buff *,struct nlmsghdr *))1849 int netlink_rcv_skb(struct sk_buff *skb, int (*cb)(struct sk_buff *,
1850 						     struct nlmsghdr *))
1851 {
1852 	struct nlmsghdr *nlh;
1853 	int err;
1854 
1855 	while (skb->len >= nlmsg_total_size(0)) {
1856 		int msglen;
1857 
1858 		nlh = nlmsg_hdr(skb);
1859 		err = 0;
1860 
1861 		if (nlh->nlmsg_len < NLMSG_HDRLEN || skb->len < nlh->nlmsg_len)
1862 			return 0;
1863 
1864 		/* Only requests are handled by the kernel */
1865 		if (!(nlh->nlmsg_flags & NLM_F_REQUEST))
1866 			goto ack;
1867 
1868 		/* Skip control messages */
1869 		if (nlh->nlmsg_type < NLMSG_MIN_TYPE)
1870 			goto ack;
1871 
1872 		err = cb(skb, nlh);
1873 		if (err == -EINTR)
1874 			goto skip;
1875 
1876 ack:
1877 		if (nlh->nlmsg_flags & NLM_F_ACK || err)
1878 			netlink_ack(skb, nlh, err);
1879 
1880 skip:
1881 		msglen = NLMSG_ALIGN(nlh->nlmsg_len);
1882 		if (msglen > skb->len)
1883 			msglen = skb->len;
1884 		skb_pull(skb, msglen);
1885 	}
1886 
1887 	return 0;
1888 }
1889 EXPORT_SYMBOL(netlink_rcv_skb);
1890 
1891 /**
1892  * nlmsg_notify - send a notification netlink message
1893  * @sk: netlink socket to use
1894  * @skb: notification message
1895  * @pid: destination netlink pid for reports or 0
1896  * @group: destination multicast group or 0
1897  * @report: 1 to report back, 0 to disable
1898  * @flags: allocation flags
1899  */
nlmsg_notify(struct sock * sk,struct sk_buff * skb,u32 pid,unsigned int group,int report,gfp_t flags)1900 int nlmsg_notify(struct sock *sk, struct sk_buff *skb, u32 pid,
1901 		 unsigned int group, int report, gfp_t flags)
1902 {
1903 	int err = 0;
1904 
1905 	if (group) {
1906 		int exclude_pid = 0;
1907 
1908 		if (report) {
1909 			atomic_inc(&skb->users);
1910 			exclude_pid = pid;
1911 		}
1912 
1913 		/* errors reported via destination sk->sk_err, but propagate
1914 		 * delivery errors if NETLINK_BROADCAST_ERROR flag is set */
1915 		err = nlmsg_multicast(sk, skb, exclude_pid, group, flags);
1916 	}
1917 
1918 	if (report) {
1919 		int err2;
1920 
1921 		err2 = nlmsg_unicast(sk, skb, pid);
1922 		if (!err || err == -ESRCH)
1923 			err = err2;
1924 	}
1925 
1926 	return err;
1927 }
1928 EXPORT_SYMBOL(nlmsg_notify);
1929 
1930 #ifdef CONFIG_PROC_FS
1931 struct nl_seq_iter {
1932 	struct seq_net_private p;
1933 	int link;
1934 	int hash_idx;
1935 };
1936 
netlink_seq_socket_idx(struct seq_file * seq,loff_t pos)1937 static struct sock *netlink_seq_socket_idx(struct seq_file *seq, loff_t pos)
1938 {
1939 	struct nl_seq_iter *iter = seq->private;
1940 	int i, j;
1941 	struct sock *s;
1942 	struct hlist_node *node;
1943 	loff_t off = 0;
1944 
1945 	for (i = 0; i < MAX_LINKS; i++) {
1946 		struct nl_pid_hash *hash = &nl_table[i].hash;
1947 
1948 		for (j = 0; j <= hash->mask; j++) {
1949 			sk_for_each(s, node, &hash->table[j]) {
1950 				if (sock_net(s) != seq_file_net(seq))
1951 					continue;
1952 				if (off == pos) {
1953 					iter->link = i;
1954 					iter->hash_idx = j;
1955 					return s;
1956 				}
1957 				++off;
1958 			}
1959 		}
1960 	}
1961 	return NULL;
1962 }
1963 
netlink_seq_start(struct seq_file * seq,loff_t * pos)1964 static void *netlink_seq_start(struct seq_file *seq, loff_t *pos)
1965 	__acquires(nl_table_lock)
1966 {
1967 	read_lock(&nl_table_lock);
1968 	return *pos ? netlink_seq_socket_idx(seq, *pos - 1) : SEQ_START_TOKEN;
1969 }
1970 
netlink_seq_next(struct seq_file * seq,void * v,loff_t * pos)1971 static void *netlink_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1972 {
1973 	struct sock *s;
1974 	struct nl_seq_iter *iter;
1975 	int i, j;
1976 
1977 	++*pos;
1978 
1979 	if (v == SEQ_START_TOKEN)
1980 		return netlink_seq_socket_idx(seq, 0);
1981 
1982 	iter = seq->private;
1983 	s = v;
1984 	do {
1985 		s = sk_next(s);
1986 	} while (s && sock_net(s) != seq_file_net(seq));
1987 	if (s)
1988 		return s;
1989 
1990 	i = iter->link;
1991 	j = iter->hash_idx + 1;
1992 
1993 	do {
1994 		struct nl_pid_hash *hash = &nl_table[i].hash;
1995 
1996 		for (; j <= hash->mask; j++) {
1997 			s = sk_head(&hash->table[j]);
1998 			while (s && sock_net(s) != seq_file_net(seq))
1999 				s = sk_next(s);
2000 			if (s) {
2001 				iter->link = i;
2002 				iter->hash_idx = j;
2003 				return s;
2004 			}
2005 		}
2006 
2007 		j = 0;
2008 	} while (++i < MAX_LINKS);
2009 
2010 	return NULL;
2011 }
2012 
netlink_seq_stop(struct seq_file * seq,void * v)2013 static void netlink_seq_stop(struct seq_file *seq, void *v)
2014 	__releases(nl_table_lock)
2015 {
2016 	read_unlock(&nl_table_lock);
2017 }
2018 
2019 
netlink_seq_show(struct seq_file * seq,void * v)2020 static int netlink_seq_show(struct seq_file *seq, void *v)
2021 {
2022 	if (v == SEQ_START_TOKEN)
2023 		seq_puts(seq,
2024 			 "sk       Eth Pid    Groups   "
2025 			 "Rmem     Wmem     Dump     Locks     Drops     Inode\n");
2026 	else {
2027 		struct sock *s = v;
2028 		struct netlink_sock *nlk = nlk_sk(s);
2029 
2030 		seq_printf(seq, "%pK %-3d %-6d %08x %-8d %-8d %pK %-8d %-8d %-8lu\n",
2031 			   s,
2032 			   s->sk_protocol,
2033 			   nlk->pid,
2034 			   nlk->groups ? (u32)nlk->groups[0] : 0,
2035 			   sk_rmem_alloc_get(s),
2036 			   sk_wmem_alloc_get(s),
2037 			   nlk->cb,
2038 			   atomic_read(&s->sk_refcnt),
2039 			   atomic_read(&s->sk_drops),
2040 			   sock_i_ino(s)
2041 			);
2042 
2043 	}
2044 	return 0;
2045 }
2046 
2047 static const struct seq_operations netlink_seq_ops = {
2048 	.start  = netlink_seq_start,
2049 	.next   = netlink_seq_next,
2050 	.stop   = netlink_seq_stop,
2051 	.show   = netlink_seq_show,
2052 };
2053 
2054 
netlink_seq_open(struct inode * inode,struct file * file)2055 static int netlink_seq_open(struct inode *inode, struct file *file)
2056 {
2057 	return seq_open_net(inode, file, &netlink_seq_ops,
2058 				sizeof(struct nl_seq_iter));
2059 }
2060 
2061 static const struct file_operations netlink_seq_fops = {
2062 	.owner		= THIS_MODULE,
2063 	.open		= netlink_seq_open,
2064 	.read		= seq_read,
2065 	.llseek		= seq_lseek,
2066 	.release	= seq_release_net,
2067 };
2068 
2069 #endif
2070 
netlink_register_notifier(struct notifier_block * nb)2071 int netlink_register_notifier(struct notifier_block *nb)
2072 {
2073 	return atomic_notifier_chain_register(&netlink_chain, nb);
2074 }
2075 EXPORT_SYMBOL(netlink_register_notifier);
2076 
netlink_unregister_notifier(struct notifier_block * nb)2077 int netlink_unregister_notifier(struct notifier_block *nb)
2078 {
2079 	return atomic_notifier_chain_unregister(&netlink_chain, nb);
2080 }
2081 EXPORT_SYMBOL(netlink_unregister_notifier);
2082 
2083 static const struct proto_ops netlink_ops = {
2084 	.family =	PF_NETLINK,
2085 	.owner =	THIS_MODULE,
2086 	.release =	netlink_release,
2087 	.bind =		netlink_bind,
2088 	.connect =	netlink_connect,
2089 	.socketpair =	sock_no_socketpair,
2090 	.accept =	sock_no_accept,
2091 	.getname =	netlink_getname,
2092 	.poll =		datagram_poll,
2093 	.ioctl =	sock_no_ioctl,
2094 	.listen =	sock_no_listen,
2095 	.shutdown =	sock_no_shutdown,
2096 	.setsockopt =	netlink_setsockopt,
2097 	.getsockopt =	netlink_getsockopt,
2098 	.sendmsg =	netlink_sendmsg,
2099 	.recvmsg =	netlink_recvmsg,
2100 	.mmap =		sock_no_mmap,
2101 	.sendpage =	sock_no_sendpage,
2102 };
2103 
2104 static const struct net_proto_family netlink_family_ops = {
2105 	.family = PF_NETLINK,
2106 	.create = netlink_create,
2107 	.owner	= THIS_MODULE,	/* for consistency 8) */
2108 };
2109 
netlink_net_init(struct net * net)2110 static int __net_init netlink_net_init(struct net *net)
2111 {
2112 #ifdef CONFIG_PROC_FS
2113 	if (!proc_net_fops_create(net, "netlink", 0, &netlink_seq_fops))
2114 		return -ENOMEM;
2115 #endif
2116 	return 0;
2117 }
2118 
netlink_net_exit(struct net * net)2119 static void __net_exit netlink_net_exit(struct net *net)
2120 {
2121 #ifdef CONFIG_PROC_FS
2122 	proc_net_remove(net, "netlink");
2123 #endif
2124 }
2125 
netlink_add_usersock_entry(void)2126 static void __init netlink_add_usersock_entry(void)
2127 {
2128 	struct listeners *listeners;
2129 	int groups = 32;
2130 
2131 	listeners = kzalloc(sizeof(*listeners) + NLGRPSZ(groups), GFP_KERNEL);
2132 	if (!listeners)
2133 		panic("netlink_add_usersock_entry: Cannot allocate listeners\n");
2134 
2135 	netlink_table_grab();
2136 
2137 	nl_table[NETLINK_USERSOCK].groups = groups;
2138 	rcu_assign_pointer(nl_table[NETLINK_USERSOCK].listeners, listeners);
2139 	nl_table[NETLINK_USERSOCK].module = THIS_MODULE;
2140 	nl_table[NETLINK_USERSOCK].registered = 1;
2141 	nl_table[NETLINK_USERSOCK].nl_nonroot = NL_NONROOT_SEND;
2142 
2143 	netlink_table_ungrab();
2144 }
2145 
2146 static struct pernet_operations __net_initdata netlink_net_ops = {
2147 	.init = netlink_net_init,
2148 	.exit = netlink_net_exit,
2149 };
2150 
netlink_proto_init(void)2151 static int __init netlink_proto_init(void)
2152 {
2153 	struct sk_buff *dummy_skb;
2154 	int i;
2155 	unsigned long limit;
2156 	unsigned int order;
2157 	int err = proto_register(&netlink_proto, 0);
2158 
2159 	if (err != 0)
2160 		goto out;
2161 
2162 	BUILD_BUG_ON(sizeof(struct netlink_skb_parms) > sizeof(dummy_skb->cb));
2163 
2164 	nl_table = kcalloc(MAX_LINKS, sizeof(*nl_table), GFP_KERNEL);
2165 	if (!nl_table)
2166 		goto panic;
2167 
2168 	if (totalram_pages >= (128 * 1024))
2169 		limit = totalram_pages >> (21 - PAGE_SHIFT);
2170 	else
2171 		limit = totalram_pages >> (23 - PAGE_SHIFT);
2172 
2173 	order = get_bitmask_order(limit) - 1 + PAGE_SHIFT;
2174 	limit = (1UL << order) / sizeof(struct hlist_head);
2175 	order = get_bitmask_order(min(limit, (unsigned long)UINT_MAX)) - 1;
2176 
2177 	for (i = 0; i < MAX_LINKS; i++) {
2178 		struct nl_pid_hash *hash = &nl_table[i].hash;
2179 
2180 		hash->table = nl_pid_hash_zalloc(1 * sizeof(*hash->table));
2181 		if (!hash->table) {
2182 			while (i-- > 0)
2183 				nl_pid_hash_free(nl_table[i].hash.table,
2184 						 1 * sizeof(*hash->table));
2185 			kfree(nl_table);
2186 			goto panic;
2187 		}
2188 		hash->max_shift = order;
2189 		hash->shift = 0;
2190 		hash->mask = 0;
2191 		hash->rehash_time = jiffies;
2192 	}
2193 
2194 	netlink_add_usersock_entry();
2195 
2196 	sock_register(&netlink_family_ops);
2197 	register_pernet_subsys(&netlink_net_ops);
2198 	/* The netlink device handler may be needed early. */
2199 	rtnetlink_init();
2200 out:
2201 	return err;
2202 panic:
2203 	panic("netlink_init: Cannot allocate nl_table\n");
2204 }
2205 
2206 core_initcall(netlink_proto_init);
2207