1 /*
2 * Generic address resolution entity
3 *
4 * Authors:
5 * Pedro Roque <roque@di.fc.ul.pt>
6 * Alexey Kuznetsov <kuznet@ms2.inr.ac.ru>
7 *
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License
10 * as published by the Free Software Foundation; either version
11 * 2 of the License, or (at your option) any later version.
12 *
13 * Fixes:
14 * Vitaly E. Lavrov releasing NULL neighbor in neigh_add.
15 * Harald Welte Add neighbour cache statistics like rtstat
16 */
17
18 #include <linux/slab.h>
19 #include <linux/types.h>
20 #include <linux/kernel.h>
21 #include <linux/module.h>
22 #include <linux/socket.h>
23 #include <linux/netdevice.h>
24 #include <linux/proc_fs.h>
25 #ifdef CONFIG_SYSCTL
26 #include <linux/sysctl.h>
27 #endif
28 #include <linux/times.h>
29 #include <net/net_namespace.h>
30 #include <net/neighbour.h>
31 #include <net/dst.h>
32 #include <net/sock.h>
33 #include <net/netevent.h>
34 #include <net/netlink.h>
35 #include <linux/rtnetlink.h>
36 #include <linux/random.h>
37 #include <linux/string.h>
38 #include <linux/log2.h>
39
40 #define NEIGH_DEBUG 1
41
42 #define NEIGH_PRINTK(x...) printk(x)
43 #define NEIGH_NOPRINTK(x...) do { ; } while(0)
44 #define NEIGH_PRINTK1 NEIGH_NOPRINTK
45 #define NEIGH_PRINTK2 NEIGH_NOPRINTK
46
47 #if NEIGH_DEBUG >= 1
48 #undef NEIGH_PRINTK1
49 #define NEIGH_PRINTK1 NEIGH_PRINTK
50 #endif
51 #if NEIGH_DEBUG >= 2
52 #undef NEIGH_PRINTK2
53 #define NEIGH_PRINTK2 NEIGH_PRINTK
54 #endif
55
56 #define PNEIGH_HASHMASK 0xF
57
58 static void neigh_timer_handler(unsigned long arg);
59 static void __neigh_notify(struct neighbour *n, int type, int flags);
60 static void neigh_update_notify(struct neighbour *neigh);
61 static int pneigh_ifdown(struct neigh_table *tbl, struct net_device *dev);
62
63 static struct neigh_table *neigh_tables;
64 #ifdef CONFIG_PROC_FS
65 static const struct file_operations neigh_stat_seq_fops;
66 #endif
67
68 /*
69 Neighbour hash table buckets are protected with rwlock tbl->lock.
70
71 - All the scans/updates to hash buckets MUST be made under this lock.
72 - NOTHING clever should be made under this lock: no callbacks
73 to protocol backends, no attempts to send something to network.
74 It will result in deadlocks, if backend/driver wants to use neighbour
75 cache.
76 - If the entry requires some non-trivial actions, increase
77 its reference count and release table lock.
78
79 Neighbour entries are protected:
80 - with reference count.
81 - with rwlock neigh->lock
82
83 Reference count prevents destruction.
84
85 neigh->lock mainly serializes ll address data and its validity state.
86 However, the same lock is used to protect another entry fields:
87 - timer
88 - resolution queue
89
90 Again, nothing clever shall be made under neigh->lock,
91 the most complicated procedure, which we allow is dev->hard_header.
92 It is supposed, that dev->hard_header is simplistic and does
93 not make callbacks to neighbour tables.
94
95 The last lock is neigh_tbl_lock. It is pure SMP lock, protecting
96 list of neighbour tables. This list is used only in process context,
97 */
98
99 static DEFINE_RWLOCK(neigh_tbl_lock);
100
neigh_blackhole(struct neighbour * neigh,struct sk_buff * skb)101 static int neigh_blackhole(struct neighbour *neigh, struct sk_buff *skb)
102 {
103 kfree_skb(skb);
104 return -ENETDOWN;
105 }
106
neigh_cleanup_and_release(struct neighbour * neigh)107 static void neigh_cleanup_and_release(struct neighbour *neigh)
108 {
109 if (neigh->parms->neigh_cleanup)
110 neigh->parms->neigh_cleanup(neigh);
111
112 __neigh_notify(neigh, RTM_DELNEIGH, 0);
113 neigh_release(neigh);
114 }
115
116 /*
117 * It is random distribution in the interval (1/2)*base...(3/2)*base.
118 * It corresponds to default IPv6 settings and is not overridable,
119 * because it is really reasonable choice.
120 */
121
neigh_rand_reach_time(unsigned long base)122 unsigned long neigh_rand_reach_time(unsigned long base)
123 {
124 return base ? (net_random() % base) + (base >> 1) : 0;
125 }
126 EXPORT_SYMBOL(neigh_rand_reach_time);
127
128
neigh_forced_gc(struct neigh_table * tbl)129 static int neigh_forced_gc(struct neigh_table *tbl)
130 {
131 int shrunk = 0;
132 int i;
133 struct neigh_hash_table *nht;
134
135 NEIGH_CACHE_STAT_INC(tbl, forced_gc_runs);
136
137 write_lock_bh(&tbl->lock);
138 nht = rcu_dereference_protected(tbl->nht,
139 lockdep_is_held(&tbl->lock));
140 for (i = 0; i < (1 << nht->hash_shift); i++) {
141 struct neighbour *n;
142 struct neighbour __rcu **np;
143
144 np = &nht->hash_buckets[i];
145 while ((n = rcu_dereference_protected(*np,
146 lockdep_is_held(&tbl->lock))) != NULL) {
147 /* Neighbour record may be discarded if:
148 * - nobody refers to it.
149 * - it is not permanent
150 */
151 write_lock(&n->lock);
152 if (atomic_read(&n->refcnt) == 1 &&
153 !(n->nud_state & NUD_PERMANENT)) {
154 rcu_assign_pointer(*np,
155 rcu_dereference_protected(n->next,
156 lockdep_is_held(&tbl->lock)));
157 n->dead = 1;
158 shrunk = 1;
159 write_unlock(&n->lock);
160 neigh_cleanup_and_release(n);
161 continue;
162 }
163 write_unlock(&n->lock);
164 np = &n->next;
165 }
166 }
167
168 tbl->last_flush = jiffies;
169
170 write_unlock_bh(&tbl->lock);
171
172 return shrunk;
173 }
174
neigh_add_timer(struct neighbour * n,unsigned long when)175 static void neigh_add_timer(struct neighbour *n, unsigned long when)
176 {
177 neigh_hold(n);
178 if (unlikely(mod_timer(&n->timer, when))) {
179 printk("NEIGH: BUG, double timer add, state is %x\n",
180 n->nud_state);
181 dump_stack();
182 }
183 }
184
neigh_del_timer(struct neighbour * n)185 static int neigh_del_timer(struct neighbour *n)
186 {
187 if ((n->nud_state & NUD_IN_TIMER) &&
188 del_timer(&n->timer)) {
189 neigh_release(n);
190 return 1;
191 }
192 return 0;
193 }
194
pneigh_queue_purge(struct sk_buff_head * list)195 static void pneigh_queue_purge(struct sk_buff_head *list)
196 {
197 struct sk_buff *skb;
198
199 while ((skb = skb_dequeue(list)) != NULL) {
200 dev_put(skb->dev);
201 kfree_skb(skb);
202 }
203 }
204
neigh_flush_dev(struct neigh_table * tbl,struct net_device * dev)205 static void neigh_flush_dev(struct neigh_table *tbl, struct net_device *dev)
206 {
207 int i;
208 struct neigh_hash_table *nht;
209
210 nht = rcu_dereference_protected(tbl->nht,
211 lockdep_is_held(&tbl->lock));
212
213 for (i = 0; i < (1 << nht->hash_shift); i++) {
214 struct neighbour *n;
215 struct neighbour __rcu **np = &nht->hash_buckets[i];
216
217 while ((n = rcu_dereference_protected(*np,
218 lockdep_is_held(&tbl->lock))) != NULL) {
219 if (dev && n->dev != dev) {
220 np = &n->next;
221 continue;
222 }
223 rcu_assign_pointer(*np,
224 rcu_dereference_protected(n->next,
225 lockdep_is_held(&tbl->lock)));
226 write_lock(&n->lock);
227 neigh_del_timer(n);
228 n->dead = 1;
229
230 if (atomic_read(&n->refcnt) != 1) {
231 /* The most unpleasant situation.
232 We must destroy neighbour entry,
233 but someone still uses it.
234
235 The destroy will be delayed until
236 the last user releases us, but
237 we must kill timers etc. and move
238 it to safe state.
239 */
240 __skb_queue_purge(&n->arp_queue);
241 n->arp_queue_len_bytes = 0;
242 n->output = neigh_blackhole;
243 if (n->nud_state & NUD_VALID)
244 n->nud_state = NUD_NOARP;
245 else
246 n->nud_state = NUD_NONE;
247 NEIGH_PRINTK2("neigh %p is stray.\n", n);
248 }
249 write_unlock(&n->lock);
250 neigh_cleanup_and_release(n);
251 }
252 }
253 }
254
neigh_changeaddr(struct neigh_table * tbl,struct net_device * dev)255 void neigh_changeaddr(struct neigh_table *tbl, struct net_device *dev)
256 {
257 write_lock_bh(&tbl->lock);
258 neigh_flush_dev(tbl, dev);
259 write_unlock_bh(&tbl->lock);
260 }
261 EXPORT_SYMBOL(neigh_changeaddr);
262
neigh_ifdown(struct neigh_table * tbl,struct net_device * dev)263 int neigh_ifdown(struct neigh_table *tbl, struct net_device *dev)
264 {
265 write_lock_bh(&tbl->lock);
266 neigh_flush_dev(tbl, dev);
267 pneigh_ifdown(tbl, dev);
268 write_unlock_bh(&tbl->lock);
269
270 del_timer_sync(&tbl->proxy_timer);
271 pneigh_queue_purge(&tbl->proxy_queue);
272 return 0;
273 }
274 EXPORT_SYMBOL(neigh_ifdown);
275
neigh_alloc(struct neigh_table * tbl,struct net_device * dev)276 static struct neighbour *neigh_alloc(struct neigh_table *tbl, struct net_device *dev)
277 {
278 struct neighbour *n = NULL;
279 unsigned long now = jiffies;
280 int entries;
281
282 entries = atomic_inc_return(&tbl->entries) - 1;
283 if (entries >= tbl->gc_thresh3 ||
284 (entries >= tbl->gc_thresh2 &&
285 time_after(now, tbl->last_flush + 5 * HZ))) {
286 if (!neigh_forced_gc(tbl) &&
287 entries >= tbl->gc_thresh3)
288 goto out_entries;
289 }
290
291 if (tbl->entry_size)
292 n = kzalloc(tbl->entry_size, GFP_ATOMIC);
293 else {
294 int sz = sizeof(*n) + tbl->key_len;
295
296 sz = ALIGN(sz, NEIGH_PRIV_ALIGN);
297 sz += dev->neigh_priv_len;
298 n = kzalloc(sz, GFP_ATOMIC);
299 }
300 if (!n)
301 goto out_entries;
302
303 __skb_queue_head_init(&n->arp_queue);
304 rwlock_init(&n->lock);
305 seqlock_init(&n->ha_lock);
306 n->updated = n->used = now;
307 n->nud_state = NUD_NONE;
308 n->output = neigh_blackhole;
309 seqlock_init(&n->hh.hh_lock);
310 n->parms = neigh_parms_clone(&tbl->parms);
311 setup_timer(&n->timer, neigh_timer_handler, (unsigned long)n);
312
313 NEIGH_CACHE_STAT_INC(tbl, allocs);
314 n->tbl = tbl;
315 atomic_set(&n->refcnt, 1);
316 n->dead = 1;
317 out:
318 return n;
319
320 out_entries:
321 atomic_dec(&tbl->entries);
322 goto out;
323 }
324
neigh_get_hash_rnd(u32 * x)325 static void neigh_get_hash_rnd(u32 *x)
326 {
327 get_random_bytes(x, sizeof(*x));
328 *x |= 1;
329 }
330
neigh_hash_alloc(unsigned int shift)331 static struct neigh_hash_table *neigh_hash_alloc(unsigned int shift)
332 {
333 size_t size = (1 << shift) * sizeof(struct neighbour *);
334 struct neigh_hash_table *ret;
335 struct neighbour __rcu **buckets;
336 int i;
337
338 ret = kmalloc(sizeof(*ret), GFP_ATOMIC);
339 if (!ret)
340 return NULL;
341 if (size <= PAGE_SIZE)
342 buckets = kzalloc(size, GFP_ATOMIC);
343 else
344 buckets = (struct neighbour __rcu **)
345 __get_free_pages(GFP_ATOMIC | __GFP_ZERO,
346 get_order(size));
347 if (!buckets) {
348 kfree(ret);
349 return NULL;
350 }
351 ret->hash_buckets = buckets;
352 ret->hash_shift = shift;
353 for (i = 0; i < NEIGH_NUM_HASH_RND; i++)
354 neigh_get_hash_rnd(&ret->hash_rnd[i]);
355 return ret;
356 }
357
neigh_hash_free_rcu(struct rcu_head * head)358 static void neigh_hash_free_rcu(struct rcu_head *head)
359 {
360 struct neigh_hash_table *nht = container_of(head,
361 struct neigh_hash_table,
362 rcu);
363 size_t size = (1 << nht->hash_shift) * sizeof(struct neighbour *);
364 struct neighbour __rcu **buckets = nht->hash_buckets;
365
366 if (size <= PAGE_SIZE)
367 kfree(buckets);
368 else
369 free_pages((unsigned long)buckets, get_order(size));
370 kfree(nht);
371 }
372
neigh_hash_grow(struct neigh_table * tbl,unsigned long new_shift)373 static struct neigh_hash_table *neigh_hash_grow(struct neigh_table *tbl,
374 unsigned long new_shift)
375 {
376 unsigned int i, hash;
377 struct neigh_hash_table *new_nht, *old_nht;
378
379 NEIGH_CACHE_STAT_INC(tbl, hash_grows);
380
381 old_nht = rcu_dereference_protected(tbl->nht,
382 lockdep_is_held(&tbl->lock));
383 new_nht = neigh_hash_alloc(new_shift);
384 if (!new_nht)
385 return old_nht;
386
387 for (i = 0; i < (1 << old_nht->hash_shift); i++) {
388 struct neighbour *n, *next;
389
390 for (n = rcu_dereference_protected(old_nht->hash_buckets[i],
391 lockdep_is_held(&tbl->lock));
392 n != NULL;
393 n = next) {
394 hash = tbl->hash(n->primary_key, n->dev,
395 new_nht->hash_rnd);
396
397 hash >>= (32 - new_nht->hash_shift);
398 next = rcu_dereference_protected(n->next,
399 lockdep_is_held(&tbl->lock));
400
401 rcu_assign_pointer(n->next,
402 rcu_dereference_protected(
403 new_nht->hash_buckets[hash],
404 lockdep_is_held(&tbl->lock)));
405 rcu_assign_pointer(new_nht->hash_buckets[hash], n);
406 }
407 }
408
409 rcu_assign_pointer(tbl->nht, new_nht);
410 call_rcu(&old_nht->rcu, neigh_hash_free_rcu);
411 return new_nht;
412 }
413
neigh_lookup(struct neigh_table * tbl,const void * pkey,struct net_device * dev)414 struct neighbour *neigh_lookup(struct neigh_table *tbl, const void *pkey,
415 struct net_device *dev)
416 {
417 struct neighbour *n;
418 int key_len = tbl->key_len;
419 u32 hash_val;
420 struct neigh_hash_table *nht;
421
422 NEIGH_CACHE_STAT_INC(tbl, lookups);
423
424 rcu_read_lock_bh();
425 nht = rcu_dereference_bh(tbl->nht);
426 hash_val = tbl->hash(pkey, dev, nht->hash_rnd) >> (32 - nht->hash_shift);
427
428 for (n = rcu_dereference_bh(nht->hash_buckets[hash_val]);
429 n != NULL;
430 n = rcu_dereference_bh(n->next)) {
431 if (dev == n->dev && !memcmp(n->primary_key, pkey, key_len)) {
432 if (!atomic_inc_not_zero(&n->refcnt))
433 n = NULL;
434 NEIGH_CACHE_STAT_INC(tbl, hits);
435 break;
436 }
437 }
438
439 rcu_read_unlock_bh();
440 return n;
441 }
442 EXPORT_SYMBOL(neigh_lookup);
443
neigh_lookup_nodev(struct neigh_table * tbl,struct net * net,const void * pkey)444 struct neighbour *neigh_lookup_nodev(struct neigh_table *tbl, struct net *net,
445 const void *pkey)
446 {
447 struct neighbour *n;
448 int key_len = tbl->key_len;
449 u32 hash_val;
450 struct neigh_hash_table *nht;
451
452 NEIGH_CACHE_STAT_INC(tbl, lookups);
453
454 rcu_read_lock_bh();
455 nht = rcu_dereference_bh(tbl->nht);
456 hash_val = tbl->hash(pkey, NULL, nht->hash_rnd) >> (32 - nht->hash_shift);
457
458 for (n = rcu_dereference_bh(nht->hash_buckets[hash_val]);
459 n != NULL;
460 n = rcu_dereference_bh(n->next)) {
461 if (!memcmp(n->primary_key, pkey, key_len) &&
462 net_eq(dev_net(n->dev), net)) {
463 if (!atomic_inc_not_zero(&n->refcnt))
464 n = NULL;
465 NEIGH_CACHE_STAT_INC(tbl, hits);
466 break;
467 }
468 }
469
470 rcu_read_unlock_bh();
471 return n;
472 }
473 EXPORT_SYMBOL(neigh_lookup_nodev);
474
neigh_create(struct neigh_table * tbl,const void * pkey,struct net_device * dev)475 struct neighbour *neigh_create(struct neigh_table *tbl, const void *pkey,
476 struct net_device *dev)
477 {
478 u32 hash_val;
479 int key_len = tbl->key_len;
480 int error;
481 struct neighbour *n1, *rc, *n = neigh_alloc(tbl, dev);
482 struct neigh_hash_table *nht;
483
484 if (!n) {
485 rc = ERR_PTR(-ENOBUFS);
486 goto out;
487 }
488
489 memcpy(n->primary_key, pkey, key_len);
490 n->dev = dev;
491 dev_hold(dev);
492
493 /* Protocol specific setup. */
494 if (tbl->constructor && (error = tbl->constructor(n)) < 0) {
495 rc = ERR_PTR(error);
496 goto out_neigh_release;
497 }
498
499 if (dev->netdev_ops->ndo_neigh_construct) {
500 error = dev->netdev_ops->ndo_neigh_construct(n);
501 if (error < 0) {
502 rc = ERR_PTR(error);
503 goto out_neigh_release;
504 }
505 }
506
507 /* Device specific setup. */
508 if (n->parms->neigh_setup &&
509 (error = n->parms->neigh_setup(n)) < 0) {
510 rc = ERR_PTR(error);
511 goto out_neigh_release;
512 }
513
514 n->confirmed = jiffies - (n->parms->base_reachable_time << 1);
515
516 write_lock_bh(&tbl->lock);
517 nht = rcu_dereference_protected(tbl->nht,
518 lockdep_is_held(&tbl->lock));
519
520 if (atomic_read(&tbl->entries) > (1 << nht->hash_shift))
521 nht = neigh_hash_grow(tbl, nht->hash_shift + 1);
522
523 hash_val = tbl->hash(pkey, dev, nht->hash_rnd) >> (32 - nht->hash_shift);
524
525 if (n->parms->dead) {
526 rc = ERR_PTR(-EINVAL);
527 goto out_tbl_unlock;
528 }
529
530 for (n1 = rcu_dereference_protected(nht->hash_buckets[hash_val],
531 lockdep_is_held(&tbl->lock));
532 n1 != NULL;
533 n1 = rcu_dereference_protected(n1->next,
534 lockdep_is_held(&tbl->lock))) {
535 if (dev == n1->dev && !memcmp(n1->primary_key, pkey, key_len)) {
536 neigh_hold(n1);
537 rc = n1;
538 goto out_tbl_unlock;
539 }
540 }
541
542 n->dead = 0;
543 neigh_hold(n);
544 rcu_assign_pointer(n->next,
545 rcu_dereference_protected(nht->hash_buckets[hash_val],
546 lockdep_is_held(&tbl->lock)));
547 rcu_assign_pointer(nht->hash_buckets[hash_val], n);
548 write_unlock_bh(&tbl->lock);
549 NEIGH_PRINTK2("neigh %p is created.\n", n);
550 rc = n;
551 out:
552 return rc;
553 out_tbl_unlock:
554 write_unlock_bh(&tbl->lock);
555 out_neigh_release:
556 neigh_release(n);
557 goto out;
558 }
559 EXPORT_SYMBOL(neigh_create);
560
pneigh_hash(const void * pkey,int key_len)561 static u32 pneigh_hash(const void *pkey, int key_len)
562 {
563 u32 hash_val = *(u32 *)(pkey + key_len - 4);
564 hash_val ^= (hash_val >> 16);
565 hash_val ^= hash_val >> 8;
566 hash_val ^= hash_val >> 4;
567 hash_val &= PNEIGH_HASHMASK;
568 return hash_val;
569 }
570
__pneigh_lookup_1(struct pneigh_entry * n,struct net * net,const void * pkey,int key_len,struct net_device * dev)571 static struct pneigh_entry *__pneigh_lookup_1(struct pneigh_entry *n,
572 struct net *net,
573 const void *pkey,
574 int key_len,
575 struct net_device *dev)
576 {
577 while (n) {
578 if (!memcmp(n->key, pkey, key_len) &&
579 net_eq(pneigh_net(n), net) &&
580 (n->dev == dev || !n->dev))
581 return n;
582 n = n->next;
583 }
584 return NULL;
585 }
586
__pneigh_lookup(struct neigh_table * tbl,struct net * net,const void * pkey,struct net_device * dev)587 struct pneigh_entry *__pneigh_lookup(struct neigh_table *tbl,
588 struct net *net, const void *pkey, struct net_device *dev)
589 {
590 int key_len = tbl->key_len;
591 u32 hash_val = pneigh_hash(pkey, key_len);
592
593 return __pneigh_lookup_1(tbl->phash_buckets[hash_val],
594 net, pkey, key_len, dev);
595 }
596 EXPORT_SYMBOL_GPL(__pneigh_lookup);
597
pneigh_lookup(struct neigh_table * tbl,struct net * net,const void * pkey,struct net_device * dev,int creat)598 struct pneigh_entry * pneigh_lookup(struct neigh_table *tbl,
599 struct net *net, const void *pkey,
600 struct net_device *dev, int creat)
601 {
602 struct pneigh_entry *n;
603 int key_len = tbl->key_len;
604 u32 hash_val = pneigh_hash(pkey, key_len);
605
606 read_lock_bh(&tbl->lock);
607 n = __pneigh_lookup_1(tbl->phash_buckets[hash_val],
608 net, pkey, key_len, dev);
609 read_unlock_bh(&tbl->lock);
610
611 if (n || !creat)
612 goto out;
613
614 ASSERT_RTNL();
615
616 n = kmalloc(sizeof(*n) + key_len, GFP_KERNEL);
617 if (!n)
618 goto out;
619
620 write_pnet(&n->net, hold_net(net));
621 memcpy(n->key, pkey, key_len);
622 n->dev = dev;
623 if (dev)
624 dev_hold(dev);
625
626 if (tbl->pconstructor && tbl->pconstructor(n)) {
627 if (dev)
628 dev_put(dev);
629 release_net(net);
630 kfree(n);
631 n = NULL;
632 goto out;
633 }
634
635 write_lock_bh(&tbl->lock);
636 n->next = tbl->phash_buckets[hash_val];
637 tbl->phash_buckets[hash_val] = n;
638 write_unlock_bh(&tbl->lock);
639 out:
640 return n;
641 }
642 EXPORT_SYMBOL(pneigh_lookup);
643
644
pneigh_delete(struct neigh_table * tbl,struct net * net,const void * pkey,struct net_device * dev)645 int pneigh_delete(struct neigh_table *tbl, struct net *net, const void *pkey,
646 struct net_device *dev)
647 {
648 struct pneigh_entry *n, **np;
649 int key_len = tbl->key_len;
650 u32 hash_val = pneigh_hash(pkey, key_len);
651
652 write_lock_bh(&tbl->lock);
653 for (np = &tbl->phash_buckets[hash_val]; (n = *np) != NULL;
654 np = &n->next) {
655 if (!memcmp(n->key, pkey, key_len) && n->dev == dev &&
656 net_eq(pneigh_net(n), net)) {
657 *np = n->next;
658 write_unlock_bh(&tbl->lock);
659 if (tbl->pdestructor)
660 tbl->pdestructor(n);
661 if (n->dev)
662 dev_put(n->dev);
663 release_net(pneigh_net(n));
664 kfree(n);
665 return 0;
666 }
667 }
668 write_unlock_bh(&tbl->lock);
669 return -ENOENT;
670 }
671
pneigh_ifdown(struct neigh_table * tbl,struct net_device * dev)672 static int pneigh_ifdown(struct neigh_table *tbl, struct net_device *dev)
673 {
674 struct pneigh_entry *n, **np;
675 u32 h;
676
677 for (h = 0; h <= PNEIGH_HASHMASK; h++) {
678 np = &tbl->phash_buckets[h];
679 while ((n = *np) != NULL) {
680 if (!dev || n->dev == dev) {
681 *np = n->next;
682 if (tbl->pdestructor)
683 tbl->pdestructor(n);
684 if (n->dev)
685 dev_put(n->dev);
686 release_net(pneigh_net(n));
687 kfree(n);
688 continue;
689 }
690 np = &n->next;
691 }
692 }
693 return -ENOENT;
694 }
695
696 static void neigh_parms_destroy(struct neigh_parms *parms);
697
neigh_parms_put(struct neigh_parms * parms)698 static inline void neigh_parms_put(struct neigh_parms *parms)
699 {
700 if (atomic_dec_and_test(&parms->refcnt))
701 neigh_parms_destroy(parms);
702 }
703
704 /*
705 * neighbour must already be out of the table;
706 *
707 */
neigh_destroy(struct neighbour * neigh)708 void neigh_destroy(struct neighbour *neigh)
709 {
710 struct net_device *dev = neigh->dev;
711
712 NEIGH_CACHE_STAT_INC(neigh->tbl, destroys);
713
714 if (!neigh->dead) {
715 printk(KERN_WARNING
716 "Destroying alive neighbour %p\n", neigh);
717 dump_stack();
718 return;
719 }
720
721 if (neigh_del_timer(neigh))
722 printk(KERN_WARNING "Impossible event.\n");
723
724 write_lock_bh(&neigh->lock);
725 __skb_queue_purge(&neigh->arp_queue);
726 write_unlock_bh(&neigh->lock);
727 neigh->arp_queue_len_bytes = 0;
728
729 if (dev->netdev_ops->ndo_neigh_destroy)
730 dev->netdev_ops->ndo_neigh_destroy(neigh);
731
732 dev_put(dev);
733 neigh_parms_put(neigh->parms);
734
735 NEIGH_PRINTK2("neigh %p is destroyed.\n", neigh);
736
737 atomic_dec(&neigh->tbl->entries);
738 kfree_rcu(neigh, rcu);
739 }
740 EXPORT_SYMBOL(neigh_destroy);
741
742 /* Neighbour state is suspicious;
743 disable fast path.
744
745 Called with write_locked neigh.
746 */
neigh_suspect(struct neighbour * neigh)747 static void neigh_suspect(struct neighbour *neigh)
748 {
749 NEIGH_PRINTK2("neigh %p is suspected.\n", neigh);
750
751 neigh->output = neigh->ops->output;
752 }
753
754 /* Neighbour state is OK;
755 enable fast path.
756
757 Called with write_locked neigh.
758 */
neigh_connect(struct neighbour * neigh)759 static void neigh_connect(struct neighbour *neigh)
760 {
761 NEIGH_PRINTK2("neigh %p is connected.\n", neigh);
762
763 neigh->output = neigh->ops->connected_output;
764 }
765
neigh_periodic_work(struct work_struct * work)766 static void neigh_periodic_work(struct work_struct *work)
767 {
768 struct neigh_table *tbl = container_of(work, struct neigh_table, gc_work.work);
769 struct neighbour *n;
770 struct neighbour __rcu **np;
771 unsigned int i;
772 struct neigh_hash_table *nht;
773
774 NEIGH_CACHE_STAT_INC(tbl, periodic_gc_runs);
775
776 write_lock_bh(&tbl->lock);
777 nht = rcu_dereference_protected(tbl->nht,
778 lockdep_is_held(&tbl->lock));
779
780 /*
781 * periodically recompute ReachableTime from random function
782 */
783
784 if (time_after(jiffies, tbl->last_rand + 300 * HZ)) {
785 struct neigh_parms *p;
786 tbl->last_rand = jiffies;
787 for (p = &tbl->parms; p; p = p->next)
788 p->reachable_time =
789 neigh_rand_reach_time(p->base_reachable_time);
790 }
791
792 for (i = 0 ; i < (1 << nht->hash_shift); i++) {
793 np = &nht->hash_buckets[i];
794
795 while ((n = rcu_dereference_protected(*np,
796 lockdep_is_held(&tbl->lock))) != NULL) {
797 unsigned int state;
798
799 write_lock(&n->lock);
800
801 state = n->nud_state;
802 if (state & (NUD_PERMANENT | NUD_IN_TIMER)) {
803 write_unlock(&n->lock);
804 goto next_elt;
805 }
806
807 if (time_before(n->used, n->confirmed))
808 n->used = n->confirmed;
809
810 if (atomic_read(&n->refcnt) == 1 &&
811 (state == NUD_FAILED ||
812 time_after(jiffies, n->used + n->parms->gc_staletime))) {
813 *np = n->next;
814 n->dead = 1;
815 write_unlock(&n->lock);
816 neigh_cleanup_and_release(n);
817 continue;
818 }
819 write_unlock(&n->lock);
820
821 next_elt:
822 np = &n->next;
823 }
824 /*
825 * It's fine to release lock here, even if hash table
826 * grows while we are preempted.
827 */
828 write_unlock_bh(&tbl->lock);
829 cond_resched();
830 write_lock_bh(&tbl->lock);
831 nht = rcu_dereference_protected(tbl->nht,
832 lockdep_is_held(&tbl->lock));
833 }
834 /* Cycle through all hash buckets every base_reachable_time/2 ticks.
835 * ARP entry timeouts range from 1/2 base_reachable_time to 3/2
836 * base_reachable_time.
837 */
838 schedule_delayed_work(&tbl->gc_work,
839 tbl->parms.base_reachable_time >> 1);
840 write_unlock_bh(&tbl->lock);
841 }
842
neigh_max_probes(struct neighbour * n)843 static __inline__ int neigh_max_probes(struct neighbour *n)
844 {
845 struct neigh_parms *p = n->parms;
846 return (n->nud_state & NUD_PROBE) ?
847 p->ucast_probes :
848 p->ucast_probes + p->app_probes + p->mcast_probes;
849 }
850
neigh_invalidate(struct neighbour * neigh)851 static void neigh_invalidate(struct neighbour *neigh)
852 __releases(neigh->lock)
853 __acquires(neigh->lock)
854 {
855 struct sk_buff *skb;
856
857 NEIGH_CACHE_STAT_INC(neigh->tbl, res_failed);
858 NEIGH_PRINTK2("neigh %p is failed.\n", neigh);
859 neigh->updated = jiffies;
860
861 /* It is very thin place. report_unreachable is very complicated
862 routine. Particularly, it can hit the same neighbour entry!
863
864 So that, we try to be accurate and avoid dead loop. --ANK
865 */
866 while (neigh->nud_state == NUD_FAILED &&
867 (skb = __skb_dequeue(&neigh->arp_queue)) != NULL) {
868 write_unlock(&neigh->lock);
869 neigh->ops->error_report(neigh, skb);
870 write_lock(&neigh->lock);
871 }
872 __skb_queue_purge(&neigh->arp_queue);
873 neigh->arp_queue_len_bytes = 0;
874 }
875
neigh_probe(struct neighbour * neigh)876 static void neigh_probe(struct neighbour *neigh)
877 __releases(neigh->lock)
878 {
879 struct sk_buff *skb = skb_peek(&neigh->arp_queue);
880 /* keep skb alive even if arp_queue overflows */
881 if (skb)
882 skb = skb_copy(skb, GFP_ATOMIC);
883 write_unlock(&neigh->lock);
884 neigh->ops->solicit(neigh, skb);
885 atomic_inc(&neigh->probes);
886 kfree_skb(skb);
887 }
888
889 /* Called when a timer expires for a neighbour entry. */
890
neigh_timer_handler(unsigned long arg)891 static void neigh_timer_handler(unsigned long arg)
892 {
893 unsigned long now, next;
894 struct neighbour *neigh = (struct neighbour *)arg;
895 unsigned state;
896 int notify = 0;
897
898 write_lock(&neigh->lock);
899
900 state = neigh->nud_state;
901 now = jiffies;
902 next = now + HZ;
903
904 if (!(state & NUD_IN_TIMER))
905 goto out;
906
907 if (state & NUD_REACHABLE) {
908 if (time_before_eq(now,
909 neigh->confirmed + neigh->parms->reachable_time)) {
910 NEIGH_PRINTK2("neigh %p is still alive.\n", neigh);
911 next = neigh->confirmed + neigh->parms->reachable_time;
912 } else if (time_before_eq(now,
913 neigh->used + neigh->parms->delay_probe_time)) {
914 NEIGH_PRINTK2("neigh %p is delayed.\n", neigh);
915 neigh->nud_state = NUD_DELAY;
916 neigh->updated = jiffies;
917 neigh_suspect(neigh);
918 next = now + neigh->parms->delay_probe_time;
919 } else {
920 NEIGH_PRINTK2("neigh %p is suspected.\n", neigh);
921 neigh->nud_state = NUD_STALE;
922 neigh->updated = jiffies;
923 neigh_suspect(neigh);
924 notify = 1;
925 }
926 } else if (state & NUD_DELAY) {
927 if (time_before_eq(now,
928 neigh->confirmed + neigh->parms->delay_probe_time)) {
929 NEIGH_PRINTK2("neigh %p is now reachable.\n", neigh);
930 neigh->nud_state = NUD_REACHABLE;
931 neigh->updated = jiffies;
932 neigh_connect(neigh);
933 notify = 1;
934 next = neigh->confirmed + neigh->parms->reachable_time;
935 } else {
936 NEIGH_PRINTK2("neigh %p is probed.\n", neigh);
937 neigh->nud_state = NUD_PROBE;
938 neigh->updated = jiffies;
939 atomic_set(&neigh->probes, 0);
940 next = now + neigh->parms->retrans_time;
941 }
942 } else {
943 /* NUD_PROBE|NUD_INCOMPLETE */
944 next = now + neigh->parms->retrans_time;
945 }
946
947 if ((neigh->nud_state & (NUD_INCOMPLETE | NUD_PROBE)) &&
948 atomic_read(&neigh->probes) >= neigh_max_probes(neigh)) {
949 neigh->nud_state = NUD_FAILED;
950 notify = 1;
951 neigh_invalidate(neigh);
952 }
953
954 if (neigh->nud_state & NUD_IN_TIMER) {
955 if (time_before(next, jiffies + HZ/2))
956 next = jiffies + HZ/2;
957 if (!mod_timer(&neigh->timer, next))
958 neigh_hold(neigh);
959 }
960 if (neigh->nud_state & (NUD_INCOMPLETE | NUD_PROBE)) {
961 neigh_probe(neigh);
962 } else {
963 out:
964 write_unlock(&neigh->lock);
965 }
966
967 if (notify)
968 neigh_update_notify(neigh);
969
970 neigh_release(neigh);
971 }
972
__neigh_event_send(struct neighbour * neigh,struct sk_buff * skb)973 int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb)
974 {
975 int rc;
976 bool immediate_probe = false;
977
978 write_lock_bh(&neigh->lock);
979
980 rc = 0;
981 if (neigh->nud_state & (NUD_CONNECTED | NUD_DELAY | NUD_PROBE))
982 goto out_unlock_bh;
983
984 if (!(neigh->nud_state & (NUD_STALE | NUD_INCOMPLETE))) {
985 if (neigh->parms->mcast_probes + neigh->parms->app_probes) {
986 unsigned long next, now = jiffies;
987
988 atomic_set(&neigh->probes, neigh->parms->ucast_probes);
989 neigh->nud_state = NUD_INCOMPLETE;
990 neigh->updated = now;
991 next = now + max(neigh->parms->retrans_time, HZ/2);
992 neigh_add_timer(neigh, next);
993 immediate_probe = true;
994 } else {
995 neigh->nud_state = NUD_FAILED;
996 neigh->updated = jiffies;
997 write_unlock_bh(&neigh->lock);
998
999 kfree_skb(skb);
1000 return 1;
1001 }
1002 } else if (neigh->nud_state & NUD_STALE) {
1003 NEIGH_PRINTK2("neigh %p is delayed.\n", neigh);
1004 neigh->nud_state = NUD_DELAY;
1005 neigh->updated = jiffies;
1006 neigh_add_timer(neigh,
1007 jiffies + neigh->parms->delay_probe_time);
1008 }
1009
1010 if (neigh->nud_state == NUD_INCOMPLETE) {
1011 if (skb) {
1012 while (neigh->arp_queue_len_bytes + skb->truesize >
1013 neigh->parms->queue_len_bytes) {
1014 struct sk_buff *buff;
1015
1016 buff = __skb_dequeue(&neigh->arp_queue);
1017 if (!buff)
1018 break;
1019 neigh->arp_queue_len_bytes -= buff->truesize;
1020 kfree_skb(buff);
1021 NEIGH_CACHE_STAT_INC(neigh->tbl, unres_discards);
1022 }
1023 skb_dst_force(skb);
1024 __skb_queue_tail(&neigh->arp_queue, skb);
1025 neigh->arp_queue_len_bytes += skb->truesize;
1026 }
1027 rc = 1;
1028 }
1029 out_unlock_bh:
1030 if (immediate_probe)
1031 neigh_probe(neigh);
1032 else
1033 write_unlock(&neigh->lock);
1034 local_bh_enable();
1035 return rc;
1036 }
1037 EXPORT_SYMBOL(__neigh_event_send);
1038
neigh_update_hhs(struct neighbour * neigh)1039 static void neigh_update_hhs(struct neighbour *neigh)
1040 {
1041 struct hh_cache *hh;
1042 void (*update)(struct hh_cache*, const struct net_device*, const unsigned char *)
1043 = NULL;
1044
1045 if (neigh->dev->header_ops)
1046 update = neigh->dev->header_ops->cache_update;
1047
1048 if (update) {
1049 hh = &neigh->hh;
1050 if (hh->hh_len) {
1051 write_seqlock_bh(&hh->hh_lock);
1052 update(hh, neigh->dev, neigh->ha);
1053 write_sequnlock_bh(&hh->hh_lock);
1054 }
1055 }
1056 }
1057
1058
1059
1060 /* Generic update routine.
1061 -- lladdr is new lladdr or NULL, if it is not supplied.
1062 -- new is new state.
1063 -- flags
1064 NEIGH_UPDATE_F_OVERRIDE allows to override existing lladdr,
1065 if it is different.
1066 NEIGH_UPDATE_F_WEAK_OVERRIDE will suspect existing "connected"
1067 lladdr instead of overriding it
1068 if it is different.
1069 It also allows to retain current state
1070 if lladdr is unchanged.
1071 NEIGH_UPDATE_F_ADMIN means that the change is administrative.
1072
1073 NEIGH_UPDATE_F_OVERRIDE_ISROUTER allows to override existing
1074 NTF_ROUTER flag.
1075 NEIGH_UPDATE_F_ISROUTER indicates if the neighbour is known as
1076 a router.
1077
1078 Caller MUST hold reference count on the entry.
1079 */
1080
neigh_update(struct neighbour * neigh,const u8 * lladdr,u8 new,u32 flags)1081 int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new,
1082 u32 flags)
1083 {
1084 u8 old;
1085 int err;
1086 int notify = 0;
1087 struct net_device *dev;
1088 int update_isrouter = 0;
1089
1090 write_lock_bh(&neigh->lock);
1091
1092 dev = neigh->dev;
1093 old = neigh->nud_state;
1094 err = -EPERM;
1095
1096 if (!(flags & NEIGH_UPDATE_F_ADMIN) &&
1097 (old & (NUD_NOARP | NUD_PERMANENT)))
1098 goto out;
1099
1100 if (!(new & NUD_VALID)) {
1101 neigh_del_timer(neigh);
1102 if (old & NUD_CONNECTED)
1103 neigh_suspect(neigh);
1104 neigh->nud_state = new;
1105 err = 0;
1106 notify = old & NUD_VALID;
1107 if ((old & (NUD_INCOMPLETE | NUD_PROBE)) &&
1108 (new & NUD_FAILED)) {
1109 neigh_invalidate(neigh);
1110 notify = 1;
1111 }
1112 goto out;
1113 }
1114
1115 /* Compare new lladdr with cached one */
1116 if (!dev->addr_len) {
1117 /* First case: device needs no address. */
1118 lladdr = neigh->ha;
1119 } else if (lladdr) {
1120 /* The second case: if something is already cached
1121 and a new address is proposed:
1122 - compare new & old
1123 - if they are different, check override flag
1124 */
1125 if ((old & NUD_VALID) &&
1126 !memcmp(lladdr, neigh->ha, dev->addr_len))
1127 lladdr = neigh->ha;
1128 } else {
1129 /* No address is supplied; if we know something,
1130 use it, otherwise discard the request.
1131 */
1132 err = -EINVAL;
1133 if (!(old & NUD_VALID))
1134 goto out;
1135 lladdr = neigh->ha;
1136 }
1137
1138 if (new & NUD_CONNECTED)
1139 neigh->confirmed = jiffies;
1140 neigh->updated = jiffies;
1141
1142 /* If entry was valid and address is not changed,
1143 do not change entry state, if new one is STALE.
1144 */
1145 err = 0;
1146 update_isrouter = flags & NEIGH_UPDATE_F_OVERRIDE_ISROUTER;
1147 if (old & NUD_VALID) {
1148 if (lladdr != neigh->ha && !(flags & NEIGH_UPDATE_F_OVERRIDE)) {
1149 update_isrouter = 0;
1150 if ((flags & NEIGH_UPDATE_F_WEAK_OVERRIDE) &&
1151 (old & NUD_CONNECTED)) {
1152 lladdr = neigh->ha;
1153 new = NUD_STALE;
1154 } else
1155 goto out;
1156 } else {
1157 if (lladdr == neigh->ha && new == NUD_STALE &&
1158 ((flags & NEIGH_UPDATE_F_WEAK_OVERRIDE) ||
1159 (old & NUD_CONNECTED))
1160 )
1161 new = old;
1162 }
1163 }
1164
1165 if (new != old) {
1166 neigh_del_timer(neigh);
1167 if (new & NUD_IN_TIMER)
1168 neigh_add_timer(neigh, (jiffies +
1169 ((new & NUD_REACHABLE) ?
1170 neigh->parms->reachable_time :
1171 0)));
1172 neigh->nud_state = new;
1173 }
1174
1175 if (lladdr != neigh->ha) {
1176 write_seqlock(&neigh->ha_lock);
1177 memcpy(&neigh->ha, lladdr, dev->addr_len);
1178 write_sequnlock(&neigh->ha_lock);
1179 neigh_update_hhs(neigh);
1180 if (!(new & NUD_CONNECTED))
1181 neigh->confirmed = jiffies -
1182 (neigh->parms->base_reachable_time << 1);
1183 notify = 1;
1184 }
1185 if (new == old)
1186 goto out;
1187 if (new & NUD_CONNECTED)
1188 neigh_connect(neigh);
1189 else
1190 neigh_suspect(neigh);
1191 if (!(old & NUD_VALID)) {
1192 struct sk_buff *skb;
1193
1194 /* Again: avoid dead loop if something went wrong */
1195
1196 while (neigh->nud_state & NUD_VALID &&
1197 (skb = __skb_dequeue(&neigh->arp_queue)) != NULL) {
1198 struct dst_entry *dst = skb_dst(skb);
1199 struct neighbour *n2, *n1 = neigh;
1200 write_unlock_bh(&neigh->lock);
1201
1202 rcu_read_lock();
1203 /* On shaper/eql skb->dst->neighbour != neigh :( */
1204 if (dst && (n2 = dst_get_neighbour_noref(dst)) != NULL)
1205 n1 = n2;
1206 n1->output(n1, skb);
1207 rcu_read_unlock();
1208
1209 write_lock_bh(&neigh->lock);
1210 }
1211 __skb_queue_purge(&neigh->arp_queue);
1212 neigh->arp_queue_len_bytes = 0;
1213 }
1214 out:
1215 if (update_isrouter) {
1216 neigh->flags = (flags & NEIGH_UPDATE_F_ISROUTER) ?
1217 (neigh->flags | NTF_ROUTER) :
1218 (neigh->flags & ~NTF_ROUTER);
1219 }
1220 write_unlock_bh(&neigh->lock);
1221
1222 if (notify)
1223 neigh_update_notify(neigh);
1224
1225 return err;
1226 }
1227 EXPORT_SYMBOL(neigh_update);
1228
neigh_event_ns(struct neigh_table * tbl,u8 * lladdr,void * saddr,struct net_device * dev)1229 struct neighbour *neigh_event_ns(struct neigh_table *tbl,
1230 u8 *lladdr, void *saddr,
1231 struct net_device *dev)
1232 {
1233 struct neighbour *neigh = __neigh_lookup(tbl, saddr, dev,
1234 lladdr || !dev->addr_len);
1235 if (neigh)
1236 neigh_update(neigh, lladdr, NUD_STALE,
1237 NEIGH_UPDATE_F_OVERRIDE);
1238 return neigh;
1239 }
1240 EXPORT_SYMBOL(neigh_event_ns);
1241
1242 /* called with read_lock_bh(&n->lock); */
neigh_hh_init(struct neighbour * n,struct dst_entry * dst)1243 static void neigh_hh_init(struct neighbour *n, struct dst_entry *dst)
1244 {
1245 struct net_device *dev = dst->dev;
1246 __be16 prot = dst->ops->protocol;
1247 struct hh_cache *hh = &n->hh;
1248
1249 write_lock_bh(&n->lock);
1250
1251 /* Only one thread can come in here and initialize the
1252 * hh_cache entry.
1253 */
1254 if (!hh->hh_len)
1255 dev->header_ops->cache(n, hh, prot);
1256
1257 write_unlock_bh(&n->lock);
1258 }
1259
1260 /* This function can be used in contexts, where only old dev_queue_xmit
1261 * worked, f.e. if you want to override normal output path (eql, shaper),
1262 * but resolution is not made yet.
1263 */
1264
neigh_compat_output(struct neighbour * neigh,struct sk_buff * skb)1265 int neigh_compat_output(struct neighbour *neigh, struct sk_buff *skb)
1266 {
1267 struct net_device *dev = skb->dev;
1268
1269 __skb_pull(skb, skb_network_offset(skb));
1270
1271 if (dev_hard_header(skb, dev, ntohs(skb->protocol), NULL, NULL,
1272 skb->len) < 0 &&
1273 dev->header_ops->rebuild(skb))
1274 return 0;
1275
1276 return dev_queue_xmit(skb);
1277 }
1278 EXPORT_SYMBOL(neigh_compat_output);
1279
1280 /* Slow and careful. */
1281
neigh_resolve_output(struct neighbour * neigh,struct sk_buff * skb)1282 int neigh_resolve_output(struct neighbour *neigh, struct sk_buff *skb)
1283 {
1284 struct dst_entry *dst = skb_dst(skb);
1285 int rc = 0;
1286
1287 if (!dst)
1288 goto discard;
1289
1290 if (!neigh_event_send(neigh, skb)) {
1291 int err;
1292 struct net_device *dev = neigh->dev;
1293 unsigned int seq;
1294
1295 if (dev->header_ops->cache && !neigh->hh.hh_len)
1296 neigh_hh_init(neigh, dst);
1297
1298 do {
1299 __skb_pull(skb, skb_network_offset(skb));
1300 seq = read_seqbegin(&neigh->ha_lock);
1301 err = dev_hard_header(skb, dev, ntohs(skb->protocol),
1302 neigh->ha, NULL, skb->len);
1303 } while (read_seqretry(&neigh->ha_lock, seq));
1304
1305 if (err >= 0)
1306 rc = dev_queue_xmit(skb);
1307 else
1308 goto out_kfree_skb;
1309 }
1310 out:
1311 return rc;
1312 discard:
1313 NEIGH_PRINTK1("neigh_resolve_output: dst=%p neigh=%p\n",
1314 dst, neigh);
1315 out_kfree_skb:
1316 rc = -EINVAL;
1317 kfree_skb(skb);
1318 goto out;
1319 }
1320 EXPORT_SYMBOL(neigh_resolve_output);
1321
1322 /* As fast as possible without hh cache */
1323
neigh_connected_output(struct neighbour * neigh,struct sk_buff * skb)1324 int neigh_connected_output(struct neighbour *neigh, struct sk_buff *skb)
1325 {
1326 struct net_device *dev = neigh->dev;
1327 unsigned int seq;
1328 int err;
1329
1330 do {
1331 __skb_pull(skb, skb_network_offset(skb));
1332 seq = read_seqbegin(&neigh->ha_lock);
1333 err = dev_hard_header(skb, dev, ntohs(skb->protocol),
1334 neigh->ha, NULL, skb->len);
1335 } while (read_seqretry(&neigh->ha_lock, seq));
1336
1337 if (err >= 0)
1338 err = dev_queue_xmit(skb);
1339 else {
1340 err = -EINVAL;
1341 kfree_skb(skb);
1342 }
1343 return err;
1344 }
1345 EXPORT_SYMBOL(neigh_connected_output);
1346
neigh_direct_output(struct neighbour * neigh,struct sk_buff * skb)1347 int neigh_direct_output(struct neighbour *neigh, struct sk_buff *skb)
1348 {
1349 return dev_queue_xmit(skb);
1350 }
1351 EXPORT_SYMBOL(neigh_direct_output);
1352
neigh_proxy_process(unsigned long arg)1353 static void neigh_proxy_process(unsigned long arg)
1354 {
1355 struct neigh_table *tbl = (struct neigh_table *)arg;
1356 long sched_next = 0;
1357 unsigned long now = jiffies;
1358 struct sk_buff *skb, *n;
1359
1360 spin_lock(&tbl->proxy_queue.lock);
1361
1362 skb_queue_walk_safe(&tbl->proxy_queue, skb, n) {
1363 long tdif = NEIGH_CB(skb)->sched_next - now;
1364
1365 if (tdif <= 0) {
1366 struct net_device *dev = skb->dev;
1367
1368 __skb_unlink(skb, &tbl->proxy_queue);
1369 if (tbl->proxy_redo && netif_running(dev)) {
1370 rcu_read_lock();
1371 tbl->proxy_redo(skb);
1372 rcu_read_unlock();
1373 } else {
1374 kfree_skb(skb);
1375 }
1376
1377 dev_put(dev);
1378 } else if (!sched_next || tdif < sched_next)
1379 sched_next = tdif;
1380 }
1381 del_timer(&tbl->proxy_timer);
1382 if (sched_next)
1383 mod_timer(&tbl->proxy_timer, jiffies + sched_next);
1384 spin_unlock(&tbl->proxy_queue.lock);
1385 }
1386
pneigh_enqueue(struct neigh_table * tbl,struct neigh_parms * p,struct sk_buff * skb)1387 void pneigh_enqueue(struct neigh_table *tbl, struct neigh_parms *p,
1388 struct sk_buff *skb)
1389 {
1390 unsigned long now = jiffies;
1391 unsigned long sched_next = now + (net_random() % p->proxy_delay);
1392
1393 if (tbl->proxy_queue.qlen > p->proxy_qlen) {
1394 kfree_skb(skb);
1395 return;
1396 }
1397
1398 NEIGH_CB(skb)->sched_next = sched_next;
1399 NEIGH_CB(skb)->flags |= LOCALLY_ENQUEUED;
1400
1401 spin_lock(&tbl->proxy_queue.lock);
1402 if (del_timer(&tbl->proxy_timer)) {
1403 if (time_before(tbl->proxy_timer.expires, sched_next))
1404 sched_next = tbl->proxy_timer.expires;
1405 }
1406 skb_dst_drop(skb);
1407 dev_hold(skb->dev);
1408 __skb_queue_tail(&tbl->proxy_queue, skb);
1409 mod_timer(&tbl->proxy_timer, sched_next);
1410 spin_unlock(&tbl->proxy_queue.lock);
1411 }
1412 EXPORT_SYMBOL(pneigh_enqueue);
1413
lookup_neigh_parms(struct neigh_table * tbl,struct net * net,int ifindex)1414 static inline struct neigh_parms *lookup_neigh_parms(struct neigh_table *tbl,
1415 struct net *net, int ifindex)
1416 {
1417 struct neigh_parms *p;
1418
1419 for (p = &tbl->parms; p; p = p->next) {
1420 if ((p->dev && p->dev->ifindex == ifindex && net_eq(neigh_parms_net(p), net)) ||
1421 (!p->dev && !ifindex))
1422 return p;
1423 }
1424
1425 return NULL;
1426 }
1427
neigh_parms_alloc(struct net_device * dev,struct neigh_table * tbl)1428 struct neigh_parms *neigh_parms_alloc(struct net_device *dev,
1429 struct neigh_table *tbl)
1430 {
1431 struct neigh_parms *p, *ref;
1432 struct net *net = dev_net(dev);
1433 const struct net_device_ops *ops = dev->netdev_ops;
1434
1435 ref = lookup_neigh_parms(tbl, net, 0);
1436 if (!ref)
1437 return NULL;
1438
1439 p = kmemdup(ref, sizeof(*p), GFP_KERNEL);
1440 if (p) {
1441 p->tbl = tbl;
1442 atomic_set(&p->refcnt, 1);
1443 p->reachable_time =
1444 neigh_rand_reach_time(p->base_reachable_time);
1445 dev_hold(dev);
1446 p->dev = dev;
1447 write_pnet(&p->net, hold_net(net));
1448 p->sysctl_table = NULL;
1449
1450 if (ops->ndo_neigh_setup && ops->ndo_neigh_setup(dev, p)) {
1451 release_net(net);
1452 dev_put(dev);
1453 kfree(p);
1454 return NULL;
1455 }
1456
1457 write_lock_bh(&tbl->lock);
1458 p->next = tbl->parms.next;
1459 tbl->parms.next = p;
1460 write_unlock_bh(&tbl->lock);
1461 }
1462 return p;
1463 }
1464 EXPORT_SYMBOL(neigh_parms_alloc);
1465
neigh_rcu_free_parms(struct rcu_head * head)1466 static void neigh_rcu_free_parms(struct rcu_head *head)
1467 {
1468 struct neigh_parms *parms =
1469 container_of(head, struct neigh_parms, rcu_head);
1470
1471 neigh_parms_put(parms);
1472 }
1473
neigh_parms_release(struct neigh_table * tbl,struct neigh_parms * parms)1474 void neigh_parms_release(struct neigh_table *tbl, struct neigh_parms *parms)
1475 {
1476 struct neigh_parms **p;
1477
1478 if (!parms || parms == &tbl->parms)
1479 return;
1480 write_lock_bh(&tbl->lock);
1481 for (p = &tbl->parms.next; *p; p = &(*p)->next) {
1482 if (*p == parms) {
1483 *p = parms->next;
1484 parms->dead = 1;
1485 write_unlock_bh(&tbl->lock);
1486 if (parms->dev)
1487 dev_put(parms->dev);
1488 call_rcu(&parms->rcu_head, neigh_rcu_free_parms);
1489 return;
1490 }
1491 }
1492 write_unlock_bh(&tbl->lock);
1493 NEIGH_PRINTK1("neigh_parms_release: not found\n");
1494 }
1495 EXPORT_SYMBOL(neigh_parms_release);
1496
neigh_parms_destroy(struct neigh_parms * parms)1497 static void neigh_parms_destroy(struct neigh_parms *parms)
1498 {
1499 release_net(neigh_parms_net(parms));
1500 kfree(parms);
1501 }
1502
1503 static struct lock_class_key neigh_table_proxy_queue_class;
1504
neigh_table_init_no_netlink(struct neigh_table * tbl)1505 void neigh_table_init_no_netlink(struct neigh_table *tbl)
1506 {
1507 unsigned long now = jiffies;
1508 unsigned long phsize;
1509
1510 write_pnet(&tbl->parms.net, &init_net);
1511 atomic_set(&tbl->parms.refcnt, 1);
1512 tbl->parms.reachable_time =
1513 neigh_rand_reach_time(tbl->parms.base_reachable_time);
1514
1515 tbl->stats = alloc_percpu(struct neigh_statistics);
1516 if (!tbl->stats)
1517 panic("cannot create neighbour cache statistics");
1518
1519 #ifdef CONFIG_PROC_FS
1520 if (!proc_create_data(tbl->id, 0, init_net.proc_net_stat,
1521 &neigh_stat_seq_fops, tbl))
1522 panic("cannot create neighbour proc dir entry");
1523 #endif
1524
1525 RCU_INIT_POINTER(tbl->nht, neigh_hash_alloc(3));
1526
1527 phsize = (PNEIGH_HASHMASK + 1) * sizeof(struct pneigh_entry *);
1528 tbl->phash_buckets = kzalloc(phsize, GFP_KERNEL);
1529
1530 if (!tbl->nht || !tbl->phash_buckets)
1531 panic("cannot allocate neighbour cache hashes");
1532
1533 rwlock_init(&tbl->lock);
1534 INIT_DELAYED_WORK_DEFERRABLE(&tbl->gc_work, neigh_periodic_work);
1535 schedule_delayed_work(&tbl->gc_work, tbl->parms.reachable_time);
1536 setup_timer(&tbl->proxy_timer, neigh_proxy_process, (unsigned long)tbl);
1537 skb_queue_head_init_class(&tbl->proxy_queue,
1538 &neigh_table_proxy_queue_class);
1539
1540 tbl->last_flush = now;
1541 tbl->last_rand = now + tbl->parms.reachable_time * 20;
1542 }
1543 EXPORT_SYMBOL(neigh_table_init_no_netlink);
1544
neigh_table_init(struct neigh_table * tbl)1545 void neigh_table_init(struct neigh_table *tbl)
1546 {
1547 struct neigh_table *tmp;
1548
1549 neigh_table_init_no_netlink(tbl);
1550 write_lock(&neigh_tbl_lock);
1551 for (tmp = neigh_tables; tmp; tmp = tmp->next) {
1552 if (tmp->family == tbl->family)
1553 break;
1554 }
1555 tbl->next = neigh_tables;
1556 neigh_tables = tbl;
1557 write_unlock(&neigh_tbl_lock);
1558
1559 if (unlikely(tmp)) {
1560 printk(KERN_ERR "NEIGH: Registering multiple tables for "
1561 "family %d\n", tbl->family);
1562 dump_stack();
1563 }
1564 }
1565 EXPORT_SYMBOL(neigh_table_init);
1566
neigh_table_clear(struct neigh_table * tbl)1567 int neigh_table_clear(struct neigh_table *tbl)
1568 {
1569 struct neigh_table **tp;
1570
1571 /* It is not clean... Fix it to unload IPv6 module safely */
1572 cancel_delayed_work_sync(&tbl->gc_work);
1573 del_timer_sync(&tbl->proxy_timer);
1574 pneigh_queue_purge(&tbl->proxy_queue);
1575 neigh_ifdown(tbl, NULL);
1576 if (atomic_read(&tbl->entries))
1577 printk(KERN_CRIT "neighbour leakage\n");
1578 write_lock(&neigh_tbl_lock);
1579 for (tp = &neigh_tables; *tp; tp = &(*tp)->next) {
1580 if (*tp == tbl) {
1581 *tp = tbl->next;
1582 break;
1583 }
1584 }
1585 write_unlock(&neigh_tbl_lock);
1586
1587 call_rcu(&rcu_dereference_protected(tbl->nht, 1)->rcu,
1588 neigh_hash_free_rcu);
1589 tbl->nht = NULL;
1590
1591 kfree(tbl->phash_buckets);
1592 tbl->phash_buckets = NULL;
1593
1594 remove_proc_entry(tbl->id, init_net.proc_net_stat);
1595
1596 free_percpu(tbl->stats);
1597 tbl->stats = NULL;
1598
1599 return 0;
1600 }
1601 EXPORT_SYMBOL(neigh_table_clear);
1602
neigh_delete(struct sk_buff * skb,struct nlmsghdr * nlh,void * arg)1603 static int neigh_delete(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
1604 {
1605 struct net *net = sock_net(skb->sk);
1606 struct ndmsg *ndm;
1607 struct nlattr *dst_attr;
1608 struct neigh_table *tbl;
1609 struct net_device *dev = NULL;
1610 int err = -EINVAL;
1611
1612 ASSERT_RTNL();
1613 if (nlmsg_len(nlh) < sizeof(*ndm))
1614 goto out;
1615
1616 dst_attr = nlmsg_find_attr(nlh, sizeof(*ndm), NDA_DST);
1617 if (dst_attr == NULL)
1618 goto out;
1619
1620 ndm = nlmsg_data(nlh);
1621 if (ndm->ndm_ifindex) {
1622 dev = __dev_get_by_index(net, ndm->ndm_ifindex);
1623 if (dev == NULL) {
1624 err = -ENODEV;
1625 goto out;
1626 }
1627 }
1628
1629 read_lock(&neigh_tbl_lock);
1630 for (tbl = neigh_tables; tbl; tbl = tbl->next) {
1631 struct neighbour *neigh;
1632
1633 if (tbl->family != ndm->ndm_family)
1634 continue;
1635 read_unlock(&neigh_tbl_lock);
1636
1637 if (nla_len(dst_attr) < tbl->key_len)
1638 goto out;
1639
1640 if (ndm->ndm_flags & NTF_PROXY) {
1641 err = pneigh_delete(tbl, net, nla_data(dst_attr), dev);
1642 goto out;
1643 }
1644
1645 if (dev == NULL)
1646 goto out;
1647
1648 neigh = neigh_lookup(tbl, nla_data(dst_attr), dev);
1649 if (neigh == NULL) {
1650 err = -ENOENT;
1651 goto out;
1652 }
1653
1654 err = neigh_update(neigh, NULL, NUD_FAILED,
1655 NEIGH_UPDATE_F_OVERRIDE |
1656 NEIGH_UPDATE_F_ADMIN);
1657 neigh_release(neigh);
1658 goto out;
1659 }
1660 read_unlock(&neigh_tbl_lock);
1661 err = -EAFNOSUPPORT;
1662
1663 out:
1664 return err;
1665 }
1666
neigh_add(struct sk_buff * skb,struct nlmsghdr * nlh,void * arg)1667 static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
1668 {
1669 struct net *net = sock_net(skb->sk);
1670 struct ndmsg *ndm;
1671 struct nlattr *tb[NDA_MAX+1];
1672 struct neigh_table *tbl;
1673 struct net_device *dev = NULL;
1674 int err;
1675
1676 ASSERT_RTNL();
1677 err = nlmsg_parse(nlh, sizeof(*ndm), tb, NDA_MAX, NULL);
1678 if (err < 0)
1679 goto out;
1680
1681 err = -EINVAL;
1682 if (tb[NDA_DST] == NULL)
1683 goto out;
1684
1685 ndm = nlmsg_data(nlh);
1686 if (ndm->ndm_ifindex) {
1687 dev = __dev_get_by_index(net, ndm->ndm_ifindex);
1688 if (dev == NULL) {
1689 err = -ENODEV;
1690 goto out;
1691 }
1692
1693 if (tb[NDA_LLADDR] && nla_len(tb[NDA_LLADDR]) < dev->addr_len)
1694 goto out;
1695 }
1696
1697 read_lock(&neigh_tbl_lock);
1698 for (tbl = neigh_tables; tbl; tbl = tbl->next) {
1699 int flags = NEIGH_UPDATE_F_ADMIN | NEIGH_UPDATE_F_OVERRIDE;
1700 struct neighbour *neigh;
1701 void *dst, *lladdr;
1702
1703 if (tbl->family != ndm->ndm_family)
1704 continue;
1705 read_unlock(&neigh_tbl_lock);
1706
1707 if (nla_len(tb[NDA_DST]) < tbl->key_len)
1708 goto out;
1709 dst = nla_data(tb[NDA_DST]);
1710 lladdr = tb[NDA_LLADDR] ? nla_data(tb[NDA_LLADDR]) : NULL;
1711
1712 if (ndm->ndm_flags & NTF_PROXY) {
1713 struct pneigh_entry *pn;
1714
1715 err = -ENOBUFS;
1716 pn = pneigh_lookup(tbl, net, dst, dev, 1);
1717 if (pn) {
1718 pn->flags = ndm->ndm_flags;
1719 err = 0;
1720 }
1721 goto out;
1722 }
1723
1724 if (dev == NULL)
1725 goto out;
1726
1727 neigh = neigh_lookup(tbl, dst, dev);
1728 if (neigh == NULL) {
1729 if (!(nlh->nlmsg_flags & NLM_F_CREATE)) {
1730 err = -ENOENT;
1731 goto out;
1732 }
1733
1734 neigh = __neigh_lookup_errno(tbl, dst, dev);
1735 if (IS_ERR(neigh)) {
1736 err = PTR_ERR(neigh);
1737 goto out;
1738 }
1739 } else {
1740 if (nlh->nlmsg_flags & NLM_F_EXCL) {
1741 err = -EEXIST;
1742 neigh_release(neigh);
1743 goto out;
1744 }
1745
1746 if (!(nlh->nlmsg_flags & NLM_F_REPLACE))
1747 flags &= ~NEIGH_UPDATE_F_OVERRIDE;
1748 }
1749
1750 if (ndm->ndm_flags & NTF_USE) {
1751 neigh_event_send(neigh, NULL);
1752 err = 0;
1753 } else
1754 err = neigh_update(neigh, lladdr, ndm->ndm_state, flags);
1755 neigh_release(neigh);
1756 goto out;
1757 }
1758
1759 read_unlock(&neigh_tbl_lock);
1760 err = -EAFNOSUPPORT;
1761 out:
1762 return err;
1763 }
1764
neightbl_fill_parms(struct sk_buff * skb,struct neigh_parms * parms)1765 static int neightbl_fill_parms(struct sk_buff *skb, struct neigh_parms *parms)
1766 {
1767 struct nlattr *nest;
1768
1769 nest = nla_nest_start(skb, NDTA_PARMS);
1770 if (nest == NULL)
1771 return -ENOBUFS;
1772
1773 if (parms->dev)
1774 NLA_PUT_U32(skb, NDTPA_IFINDEX, parms->dev->ifindex);
1775
1776 NLA_PUT_U32(skb, NDTPA_REFCNT, atomic_read(&parms->refcnt));
1777 NLA_PUT_U32(skb, NDTPA_QUEUE_LENBYTES, parms->queue_len_bytes);
1778 /* approximative value for deprecated QUEUE_LEN (in packets) */
1779 NLA_PUT_U32(skb, NDTPA_QUEUE_LEN,
1780 DIV_ROUND_UP(parms->queue_len_bytes,
1781 SKB_TRUESIZE(ETH_FRAME_LEN)));
1782 NLA_PUT_U32(skb, NDTPA_PROXY_QLEN, parms->proxy_qlen);
1783 NLA_PUT_U32(skb, NDTPA_APP_PROBES, parms->app_probes);
1784 NLA_PUT_U32(skb, NDTPA_UCAST_PROBES, parms->ucast_probes);
1785 NLA_PUT_U32(skb, NDTPA_MCAST_PROBES, parms->mcast_probes);
1786 NLA_PUT_MSECS(skb, NDTPA_REACHABLE_TIME, parms->reachable_time);
1787 NLA_PUT_MSECS(skb, NDTPA_BASE_REACHABLE_TIME,
1788 parms->base_reachable_time);
1789 NLA_PUT_MSECS(skb, NDTPA_GC_STALETIME, parms->gc_staletime);
1790 NLA_PUT_MSECS(skb, NDTPA_DELAY_PROBE_TIME, parms->delay_probe_time);
1791 NLA_PUT_MSECS(skb, NDTPA_RETRANS_TIME, parms->retrans_time);
1792 NLA_PUT_MSECS(skb, NDTPA_ANYCAST_DELAY, parms->anycast_delay);
1793 NLA_PUT_MSECS(skb, NDTPA_PROXY_DELAY, parms->proxy_delay);
1794 NLA_PUT_MSECS(skb, NDTPA_LOCKTIME, parms->locktime);
1795
1796 return nla_nest_end(skb, nest);
1797
1798 nla_put_failure:
1799 nla_nest_cancel(skb, nest);
1800 return -EMSGSIZE;
1801 }
1802
neightbl_fill_info(struct sk_buff * skb,struct neigh_table * tbl,u32 pid,u32 seq,int type,int flags)1803 static int neightbl_fill_info(struct sk_buff *skb, struct neigh_table *tbl,
1804 u32 pid, u32 seq, int type, int flags)
1805 {
1806 struct nlmsghdr *nlh;
1807 struct ndtmsg *ndtmsg;
1808
1809 nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndtmsg), flags);
1810 if (nlh == NULL)
1811 return -EMSGSIZE;
1812
1813 ndtmsg = nlmsg_data(nlh);
1814
1815 read_lock_bh(&tbl->lock);
1816 ndtmsg->ndtm_family = tbl->family;
1817 ndtmsg->ndtm_pad1 = 0;
1818 ndtmsg->ndtm_pad2 = 0;
1819
1820 NLA_PUT_STRING(skb, NDTA_NAME, tbl->id);
1821 NLA_PUT_MSECS(skb, NDTA_GC_INTERVAL, tbl->gc_interval);
1822 NLA_PUT_U32(skb, NDTA_THRESH1, tbl->gc_thresh1);
1823 NLA_PUT_U32(skb, NDTA_THRESH2, tbl->gc_thresh2);
1824 NLA_PUT_U32(skb, NDTA_THRESH3, tbl->gc_thresh3);
1825
1826 {
1827 unsigned long now = jiffies;
1828 unsigned int flush_delta = now - tbl->last_flush;
1829 unsigned int rand_delta = now - tbl->last_rand;
1830 struct neigh_hash_table *nht;
1831 struct ndt_config ndc = {
1832 .ndtc_key_len = tbl->key_len,
1833 .ndtc_entry_size = tbl->entry_size,
1834 .ndtc_entries = atomic_read(&tbl->entries),
1835 .ndtc_last_flush = jiffies_to_msecs(flush_delta),
1836 .ndtc_last_rand = jiffies_to_msecs(rand_delta),
1837 .ndtc_proxy_qlen = tbl->proxy_queue.qlen,
1838 };
1839
1840 rcu_read_lock_bh();
1841 nht = rcu_dereference_bh(tbl->nht);
1842 ndc.ndtc_hash_rnd = nht->hash_rnd[0];
1843 ndc.ndtc_hash_mask = ((1 << nht->hash_shift) - 1);
1844 rcu_read_unlock_bh();
1845
1846 NLA_PUT(skb, NDTA_CONFIG, sizeof(ndc), &ndc);
1847 }
1848
1849 {
1850 int cpu;
1851 struct ndt_stats ndst;
1852
1853 memset(&ndst, 0, sizeof(ndst));
1854
1855 for_each_possible_cpu(cpu) {
1856 struct neigh_statistics *st;
1857
1858 st = per_cpu_ptr(tbl->stats, cpu);
1859 ndst.ndts_allocs += st->allocs;
1860 ndst.ndts_destroys += st->destroys;
1861 ndst.ndts_hash_grows += st->hash_grows;
1862 ndst.ndts_res_failed += st->res_failed;
1863 ndst.ndts_lookups += st->lookups;
1864 ndst.ndts_hits += st->hits;
1865 ndst.ndts_rcv_probes_mcast += st->rcv_probes_mcast;
1866 ndst.ndts_rcv_probes_ucast += st->rcv_probes_ucast;
1867 ndst.ndts_periodic_gc_runs += st->periodic_gc_runs;
1868 ndst.ndts_forced_gc_runs += st->forced_gc_runs;
1869 }
1870
1871 NLA_PUT(skb, NDTA_STATS, sizeof(ndst), &ndst);
1872 }
1873
1874 BUG_ON(tbl->parms.dev);
1875 if (neightbl_fill_parms(skb, &tbl->parms) < 0)
1876 goto nla_put_failure;
1877
1878 read_unlock_bh(&tbl->lock);
1879 return nlmsg_end(skb, nlh);
1880
1881 nla_put_failure:
1882 read_unlock_bh(&tbl->lock);
1883 nlmsg_cancel(skb, nlh);
1884 return -EMSGSIZE;
1885 }
1886
neightbl_fill_param_info(struct sk_buff * skb,struct neigh_table * tbl,struct neigh_parms * parms,u32 pid,u32 seq,int type,unsigned int flags)1887 static int neightbl_fill_param_info(struct sk_buff *skb,
1888 struct neigh_table *tbl,
1889 struct neigh_parms *parms,
1890 u32 pid, u32 seq, int type,
1891 unsigned int flags)
1892 {
1893 struct ndtmsg *ndtmsg;
1894 struct nlmsghdr *nlh;
1895
1896 nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndtmsg), flags);
1897 if (nlh == NULL)
1898 return -EMSGSIZE;
1899
1900 ndtmsg = nlmsg_data(nlh);
1901
1902 read_lock_bh(&tbl->lock);
1903 ndtmsg->ndtm_family = tbl->family;
1904 ndtmsg->ndtm_pad1 = 0;
1905 ndtmsg->ndtm_pad2 = 0;
1906
1907 if (nla_put_string(skb, NDTA_NAME, tbl->id) < 0 ||
1908 neightbl_fill_parms(skb, parms) < 0)
1909 goto errout;
1910
1911 read_unlock_bh(&tbl->lock);
1912 return nlmsg_end(skb, nlh);
1913 errout:
1914 read_unlock_bh(&tbl->lock);
1915 nlmsg_cancel(skb, nlh);
1916 return -EMSGSIZE;
1917 }
1918
1919 static const struct nla_policy nl_neightbl_policy[NDTA_MAX+1] = {
1920 [NDTA_NAME] = { .type = NLA_STRING },
1921 [NDTA_THRESH1] = { .type = NLA_U32 },
1922 [NDTA_THRESH2] = { .type = NLA_U32 },
1923 [NDTA_THRESH3] = { .type = NLA_U32 },
1924 [NDTA_GC_INTERVAL] = { .type = NLA_U64 },
1925 [NDTA_PARMS] = { .type = NLA_NESTED },
1926 };
1927
1928 static const struct nla_policy nl_ntbl_parm_policy[NDTPA_MAX+1] = {
1929 [NDTPA_IFINDEX] = { .type = NLA_U32 },
1930 [NDTPA_QUEUE_LEN] = { .type = NLA_U32 },
1931 [NDTPA_PROXY_QLEN] = { .type = NLA_U32 },
1932 [NDTPA_APP_PROBES] = { .type = NLA_U32 },
1933 [NDTPA_UCAST_PROBES] = { .type = NLA_U32 },
1934 [NDTPA_MCAST_PROBES] = { .type = NLA_U32 },
1935 [NDTPA_BASE_REACHABLE_TIME] = { .type = NLA_U64 },
1936 [NDTPA_GC_STALETIME] = { .type = NLA_U64 },
1937 [NDTPA_DELAY_PROBE_TIME] = { .type = NLA_U64 },
1938 [NDTPA_RETRANS_TIME] = { .type = NLA_U64 },
1939 [NDTPA_ANYCAST_DELAY] = { .type = NLA_U64 },
1940 [NDTPA_PROXY_DELAY] = { .type = NLA_U64 },
1941 [NDTPA_LOCKTIME] = { .type = NLA_U64 },
1942 };
1943
neightbl_set(struct sk_buff * skb,struct nlmsghdr * nlh,void * arg)1944 static int neightbl_set(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
1945 {
1946 struct net *net = sock_net(skb->sk);
1947 struct neigh_table *tbl;
1948 struct ndtmsg *ndtmsg;
1949 struct nlattr *tb[NDTA_MAX+1];
1950 int err;
1951
1952 err = nlmsg_parse(nlh, sizeof(*ndtmsg), tb, NDTA_MAX,
1953 nl_neightbl_policy);
1954 if (err < 0)
1955 goto errout;
1956
1957 if (tb[NDTA_NAME] == NULL) {
1958 err = -EINVAL;
1959 goto errout;
1960 }
1961
1962 ndtmsg = nlmsg_data(nlh);
1963 read_lock(&neigh_tbl_lock);
1964 for (tbl = neigh_tables; tbl; tbl = tbl->next) {
1965 if (ndtmsg->ndtm_family && tbl->family != ndtmsg->ndtm_family)
1966 continue;
1967
1968 if (nla_strcmp(tb[NDTA_NAME], tbl->id) == 0)
1969 break;
1970 }
1971
1972 if (tbl == NULL) {
1973 err = -ENOENT;
1974 goto errout_locked;
1975 }
1976
1977 /*
1978 * We acquire tbl->lock to be nice to the periodic timers and
1979 * make sure they always see a consistent set of values.
1980 */
1981 write_lock_bh(&tbl->lock);
1982
1983 if (tb[NDTA_PARMS]) {
1984 struct nlattr *tbp[NDTPA_MAX+1];
1985 struct neigh_parms *p;
1986 int i, ifindex = 0;
1987
1988 err = nla_parse_nested(tbp, NDTPA_MAX, tb[NDTA_PARMS],
1989 nl_ntbl_parm_policy);
1990 if (err < 0)
1991 goto errout_tbl_lock;
1992
1993 if (tbp[NDTPA_IFINDEX])
1994 ifindex = nla_get_u32(tbp[NDTPA_IFINDEX]);
1995
1996 p = lookup_neigh_parms(tbl, net, ifindex);
1997 if (p == NULL) {
1998 err = -ENOENT;
1999 goto errout_tbl_lock;
2000 }
2001
2002 for (i = 1; i <= NDTPA_MAX; i++) {
2003 if (tbp[i] == NULL)
2004 continue;
2005
2006 switch (i) {
2007 case NDTPA_QUEUE_LEN:
2008 p->queue_len_bytes = nla_get_u32(tbp[i]) *
2009 SKB_TRUESIZE(ETH_FRAME_LEN);
2010 break;
2011 case NDTPA_QUEUE_LENBYTES:
2012 p->queue_len_bytes = nla_get_u32(tbp[i]);
2013 break;
2014 case NDTPA_PROXY_QLEN:
2015 p->proxy_qlen = nla_get_u32(tbp[i]);
2016 break;
2017 case NDTPA_APP_PROBES:
2018 p->app_probes = nla_get_u32(tbp[i]);
2019 break;
2020 case NDTPA_UCAST_PROBES:
2021 p->ucast_probes = nla_get_u32(tbp[i]);
2022 break;
2023 case NDTPA_MCAST_PROBES:
2024 p->mcast_probes = nla_get_u32(tbp[i]);
2025 break;
2026 case NDTPA_BASE_REACHABLE_TIME:
2027 p->base_reachable_time = nla_get_msecs(tbp[i]);
2028 break;
2029 case NDTPA_GC_STALETIME:
2030 p->gc_staletime = nla_get_msecs(tbp[i]);
2031 break;
2032 case NDTPA_DELAY_PROBE_TIME:
2033 p->delay_probe_time = nla_get_msecs(tbp[i]);
2034 break;
2035 case NDTPA_RETRANS_TIME:
2036 p->retrans_time = nla_get_msecs(tbp[i]);
2037 break;
2038 case NDTPA_ANYCAST_DELAY:
2039 p->anycast_delay = nla_get_msecs(tbp[i]);
2040 break;
2041 case NDTPA_PROXY_DELAY:
2042 p->proxy_delay = nla_get_msecs(tbp[i]);
2043 break;
2044 case NDTPA_LOCKTIME:
2045 p->locktime = nla_get_msecs(tbp[i]);
2046 break;
2047 }
2048 }
2049 }
2050
2051 if (tb[NDTA_THRESH1])
2052 tbl->gc_thresh1 = nla_get_u32(tb[NDTA_THRESH1]);
2053
2054 if (tb[NDTA_THRESH2])
2055 tbl->gc_thresh2 = nla_get_u32(tb[NDTA_THRESH2]);
2056
2057 if (tb[NDTA_THRESH3])
2058 tbl->gc_thresh3 = nla_get_u32(tb[NDTA_THRESH3]);
2059
2060 if (tb[NDTA_GC_INTERVAL])
2061 tbl->gc_interval = nla_get_msecs(tb[NDTA_GC_INTERVAL]);
2062
2063 err = 0;
2064
2065 errout_tbl_lock:
2066 write_unlock_bh(&tbl->lock);
2067 errout_locked:
2068 read_unlock(&neigh_tbl_lock);
2069 errout:
2070 return err;
2071 }
2072
neightbl_dump_info(struct sk_buff * skb,struct netlink_callback * cb)2073 static int neightbl_dump_info(struct sk_buff *skb, struct netlink_callback *cb)
2074 {
2075 struct net *net = sock_net(skb->sk);
2076 int family, tidx, nidx = 0;
2077 int tbl_skip = cb->args[0];
2078 int neigh_skip = cb->args[1];
2079 struct neigh_table *tbl;
2080
2081 family = ((struct rtgenmsg *) nlmsg_data(cb->nlh))->rtgen_family;
2082
2083 read_lock(&neigh_tbl_lock);
2084 for (tbl = neigh_tables, tidx = 0; tbl; tbl = tbl->next, tidx++) {
2085 struct neigh_parms *p;
2086
2087 if (tidx < tbl_skip || (family && tbl->family != family))
2088 continue;
2089
2090 if (neightbl_fill_info(skb, tbl, NETLINK_CB(cb->skb).pid,
2091 cb->nlh->nlmsg_seq, RTM_NEWNEIGHTBL,
2092 NLM_F_MULTI) <= 0)
2093 break;
2094
2095 for (nidx = 0, p = tbl->parms.next; p; p = p->next) {
2096 if (!net_eq(neigh_parms_net(p), net))
2097 continue;
2098
2099 if (nidx < neigh_skip)
2100 goto next;
2101
2102 if (neightbl_fill_param_info(skb, tbl, p,
2103 NETLINK_CB(cb->skb).pid,
2104 cb->nlh->nlmsg_seq,
2105 RTM_NEWNEIGHTBL,
2106 NLM_F_MULTI) <= 0)
2107 goto out;
2108 next:
2109 nidx++;
2110 }
2111
2112 neigh_skip = 0;
2113 }
2114 out:
2115 read_unlock(&neigh_tbl_lock);
2116 cb->args[0] = tidx;
2117 cb->args[1] = nidx;
2118
2119 return skb->len;
2120 }
2121
neigh_fill_info(struct sk_buff * skb,struct neighbour * neigh,u32 pid,u32 seq,int type,unsigned int flags)2122 static int neigh_fill_info(struct sk_buff *skb, struct neighbour *neigh,
2123 u32 pid, u32 seq, int type, unsigned int flags)
2124 {
2125 unsigned long now = jiffies;
2126 struct nda_cacheinfo ci;
2127 struct nlmsghdr *nlh;
2128 struct ndmsg *ndm;
2129
2130 nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndm), flags);
2131 if (nlh == NULL)
2132 return -EMSGSIZE;
2133
2134 ndm = nlmsg_data(nlh);
2135 ndm->ndm_family = neigh->ops->family;
2136 ndm->ndm_pad1 = 0;
2137 ndm->ndm_pad2 = 0;
2138 ndm->ndm_flags = neigh->flags;
2139 ndm->ndm_type = neigh->type;
2140 ndm->ndm_ifindex = neigh->dev->ifindex;
2141
2142 NLA_PUT(skb, NDA_DST, neigh->tbl->key_len, neigh->primary_key);
2143
2144 read_lock_bh(&neigh->lock);
2145 ndm->ndm_state = neigh->nud_state;
2146 if (neigh->nud_state & NUD_VALID) {
2147 char haddr[MAX_ADDR_LEN];
2148
2149 neigh_ha_snapshot(haddr, neigh, neigh->dev);
2150 if (nla_put(skb, NDA_LLADDR, neigh->dev->addr_len, haddr) < 0) {
2151 read_unlock_bh(&neigh->lock);
2152 goto nla_put_failure;
2153 }
2154 }
2155
2156 ci.ndm_used = jiffies_to_clock_t(now - neigh->used);
2157 ci.ndm_confirmed = jiffies_to_clock_t(now - neigh->confirmed);
2158 ci.ndm_updated = jiffies_to_clock_t(now - neigh->updated);
2159 ci.ndm_refcnt = atomic_read(&neigh->refcnt) - 1;
2160 read_unlock_bh(&neigh->lock);
2161
2162 NLA_PUT_U32(skb, NDA_PROBES, atomic_read(&neigh->probes));
2163 NLA_PUT(skb, NDA_CACHEINFO, sizeof(ci), &ci);
2164
2165 return nlmsg_end(skb, nlh);
2166
2167 nla_put_failure:
2168 nlmsg_cancel(skb, nlh);
2169 return -EMSGSIZE;
2170 }
2171
pneigh_fill_info(struct sk_buff * skb,struct pneigh_entry * pn,u32 pid,u32 seq,int type,unsigned int flags,struct neigh_table * tbl)2172 static int pneigh_fill_info(struct sk_buff *skb, struct pneigh_entry *pn,
2173 u32 pid, u32 seq, int type, unsigned int flags,
2174 struct neigh_table *tbl)
2175 {
2176 struct nlmsghdr *nlh;
2177 struct ndmsg *ndm;
2178
2179 nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndm), flags);
2180 if (nlh == NULL)
2181 return -EMSGSIZE;
2182
2183 ndm = nlmsg_data(nlh);
2184 ndm->ndm_family = tbl->family;
2185 ndm->ndm_pad1 = 0;
2186 ndm->ndm_pad2 = 0;
2187 ndm->ndm_flags = pn->flags | NTF_PROXY;
2188 ndm->ndm_type = NDA_DST;
2189 ndm->ndm_ifindex = pn->dev->ifindex;
2190 ndm->ndm_state = NUD_NONE;
2191
2192 NLA_PUT(skb, NDA_DST, tbl->key_len, pn->key);
2193
2194 return nlmsg_end(skb, nlh);
2195
2196 nla_put_failure:
2197 nlmsg_cancel(skb, nlh);
2198 return -EMSGSIZE;
2199 }
2200
neigh_update_notify(struct neighbour * neigh)2201 static void neigh_update_notify(struct neighbour *neigh)
2202 {
2203 call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, neigh);
2204 __neigh_notify(neigh, RTM_NEWNEIGH, 0);
2205 }
2206
neigh_dump_table(struct neigh_table * tbl,struct sk_buff * skb,struct netlink_callback * cb)2207 static int neigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb,
2208 struct netlink_callback *cb)
2209 {
2210 struct net *net = sock_net(skb->sk);
2211 struct neighbour *n;
2212 int rc, h, s_h = cb->args[1];
2213 int idx, s_idx = idx = cb->args[2];
2214 struct neigh_hash_table *nht;
2215
2216 rcu_read_lock_bh();
2217 nht = rcu_dereference_bh(tbl->nht);
2218
2219 for (h = s_h; h < (1 << nht->hash_shift); h++) {
2220 if (h > s_h)
2221 s_idx = 0;
2222 for (n = rcu_dereference_bh(nht->hash_buckets[h]), idx = 0;
2223 n != NULL;
2224 n = rcu_dereference_bh(n->next)) {
2225 if (!net_eq(dev_net(n->dev), net))
2226 continue;
2227 if (idx < s_idx)
2228 goto next;
2229 if (neigh_fill_info(skb, n, NETLINK_CB(cb->skb).pid,
2230 cb->nlh->nlmsg_seq,
2231 RTM_NEWNEIGH,
2232 NLM_F_MULTI) <= 0) {
2233 rc = -1;
2234 goto out;
2235 }
2236 next:
2237 idx++;
2238 }
2239 }
2240 rc = skb->len;
2241 out:
2242 rcu_read_unlock_bh();
2243 cb->args[1] = h;
2244 cb->args[2] = idx;
2245 return rc;
2246 }
2247
pneigh_dump_table(struct neigh_table * tbl,struct sk_buff * skb,struct netlink_callback * cb)2248 static int pneigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb,
2249 struct netlink_callback *cb)
2250 {
2251 struct pneigh_entry *n;
2252 struct net *net = sock_net(skb->sk);
2253 int rc, h, s_h = cb->args[3];
2254 int idx, s_idx = idx = cb->args[4];
2255
2256 read_lock_bh(&tbl->lock);
2257
2258 for (h = s_h; h <= PNEIGH_HASHMASK; h++) {
2259 if (h > s_h)
2260 s_idx = 0;
2261 for (n = tbl->phash_buckets[h], idx = 0; n; n = n->next) {
2262 if (dev_net(n->dev) != net)
2263 continue;
2264 if (idx < s_idx)
2265 goto next;
2266 if (pneigh_fill_info(skb, n, NETLINK_CB(cb->skb).pid,
2267 cb->nlh->nlmsg_seq,
2268 RTM_NEWNEIGH,
2269 NLM_F_MULTI, tbl) <= 0) {
2270 read_unlock_bh(&tbl->lock);
2271 rc = -1;
2272 goto out;
2273 }
2274 next:
2275 idx++;
2276 }
2277 }
2278
2279 read_unlock_bh(&tbl->lock);
2280 rc = skb->len;
2281 out:
2282 cb->args[3] = h;
2283 cb->args[4] = idx;
2284 return rc;
2285
2286 }
2287
neigh_dump_info(struct sk_buff * skb,struct netlink_callback * cb)2288 static int neigh_dump_info(struct sk_buff *skb, struct netlink_callback *cb)
2289 {
2290 struct neigh_table *tbl;
2291 int t, family, s_t;
2292 int proxy = 0;
2293 int err;
2294
2295 read_lock(&neigh_tbl_lock);
2296 family = ((struct rtgenmsg *) nlmsg_data(cb->nlh))->rtgen_family;
2297
2298 /* check for full ndmsg structure presence, family member is
2299 * the same for both structures
2300 */
2301 if (nlmsg_len(cb->nlh) >= sizeof(struct ndmsg) &&
2302 ((struct ndmsg *) nlmsg_data(cb->nlh))->ndm_flags == NTF_PROXY)
2303 proxy = 1;
2304
2305 s_t = cb->args[0];
2306
2307 for (tbl = neigh_tables, t = 0; tbl;
2308 tbl = tbl->next, t++) {
2309 if (t < s_t || (family && tbl->family != family))
2310 continue;
2311 if (t > s_t)
2312 memset(&cb->args[1], 0, sizeof(cb->args) -
2313 sizeof(cb->args[0]));
2314 if (proxy)
2315 err = pneigh_dump_table(tbl, skb, cb);
2316 else
2317 err = neigh_dump_table(tbl, skb, cb);
2318 if (err < 0)
2319 break;
2320 }
2321 read_unlock(&neigh_tbl_lock);
2322
2323 cb->args[0] = t;
2324 return skb->len;
2325 }
2326
neigh_for_each(struct neigh_table * tbl,void (* cb)(struct neighbour *,void *),void * cookie)2327 void neigh_for_each(struct neigh_table *tbl, void (*cb)(struct neighbour *, void *), void *cookie)
2328 {
2329 int chain;
2330 struct neigh_hash_table *nht;
2331
2332 rcu_read_lock_bh();
2333 nht = rcu_dereference_bh(tbl->nht);
2334
2335 read_lock(&tbl->lock); /* avoid resizes */
2336 for (chain = 0; chain < (1 << nht->hash_shift); chain++) {
2337 struct neighbour *n;
2338
2339 for (n = rcu_dereference_bh(nht->hash_buckets[chain]);
2340 n != NULL;
2341 n = rcu_dereference_bh(n->next))
2342 cb(n, cookie);
2343 }
2344 read_unlock(&tbl->lock);
2345 rcu_read_unlock_bh();
2346 }
2347 EXPORT_SYMBOL(neigh_for_each);
2348
2349 /* The tbl->lock must be held as a writer and BH disabled. */
__neigh_for_each_release(struct neigh_table * tbl,int (* cb)(struct neighbour *))2350 void __neigh_for_each_release(struct neigh_table *tbl,
2351 int (*cb)(struct neighbour *))
2352 {
2353 int chain;
2354 struct neigh_hash_table *nht;
2355
2356 nht = rcu_dereference_protected(tbl->nht,
2357 lockdep_is_held(&tbl->lock));
2358 for (chain = 0; chain < (1 << nht->hash_shift); chain++) {
2359 struct neighbour *n;
2360 struct neighbour __rcu **np;
2361
2362 np = &nht->hash_buckets[chain];
2363 while ((n = rcu_dereference_protected(*np,
2364 lockdep_is_held(&tbl->lock))) != NULL) {
2365 int release;
2366
2367 write_lock(&n->lock);
2368 release = cb(n);
2369 if (release) {
2370 rcu_assign_pointer(*np,
2371 rcu_dereference_protected(n->next,
2372 lockdep_is_held(&tbl->lock)));
2373 n->dead = 1;
2374 } else
2375 np = &n->next;
2376 write_unlock(&n->lock);
2377 if (release)
2378 neigh_cleanup_and_release(n);
2379 }
2380 }
2381 }
2382 EXPORT_SYMBOL(__neigh_for_each_release);
2383
2384 #ifdef CONFIG_PROC_FS
2385
neigh_get_first(struct seq_file * seq)2386 static struct neighbour *neigh_get_first(struct seq_file *seq)
2387 {
2388 struct neigh_seq_state *state = seq->private;
2389 struct net *net = seq_file_net(seq);
2390 struct neigh_hash_table *nht = state->nht;
2391 struct neighbour *n = NULL;
2392 int bucket = state->bucket;
2393
2394 state->flags &= ~NEIGH_SEQ_IS_PNEIGH;
2395 for (bucket = 0; bucket < (1 << nht->hash_shift); bucket++) {
2396 n = rcu_dereference_bh(nht->hash_buckets[bucket]);
2397
2398 while (n) {
2399 if (!net_eq(dev_net(n->dev), net))
2400 goto next;
2401 if (state->neigh_sub_iter) {
2402 loff_t fakep = 0;
2403 void *v;
2404
2405 v = state->neigh_sub_iter(state, n, &fakep);
2406 if (!v)
2407 goto next;
2408 }
2409 if (!(state->flags & NEIGH_SEQ_SKIP_NOARP))
2410 break;
2411 if (n->nud_state & ~NUD_NOARP)
2412 break;
2413 next:
2414 n = rcu_dereference_bh(n->next);
2415 }
2416
2417 if (n)
2418 break;
2419 }
2420 state->bucket = bucket;
2421
2422 return n;
2423 }
2424
neigh_get_next(struct seq_file * seq,struct neighbour * n,loff_t * pos)2425 static struct neighbour *neigh_get_next(struct seq_file *seq,
2426 struct neighbour *n,
2427 loff_t *pos)
2428 {
2429 struct neigh_seq_state *state = seq->private;
2430 struct net *net = seq_file_net(seq);
2431 struct neigh_hash_table *nht = state->nht;
2432
2433 if (state->neigh_sub_iter) {
2434 void *v = state->neigh_sub_iter(state, n, pos);
2435 if (v)
2436 return n;
2437 }
2438 n = rcu_dereference_bh(n->next);
2439
2440 while (1) {
2441 while (n) {
2442 if (!net_eq(dev_net(n->dev), net))
2443 goto next;
2444 if (state->neigh_sub_iter) {
2445 void *v = state->neigh_sub_iter(state, n, pos);
2446 if (v)
2447 return n;
2448 goto next;
2449 }
2450 if (!(state->flags & NEIGH_SEQ_SKIP_NOARP))
2451 break;
2452
2453 if (n->nud_state & ~NUD_NOARP)
2454 break;
2455 next:
2456 n = rcu_dereference_bh(n->next);
2457 }
2458
2459 if (n)
2460 break;
2461
2462 if (++state->bucket >= (1 << nht->hash_shift))
2463 break;
2464
2465 n = rcu_dereference_bh(nht->hash_buckets[state->bucket]);
2466 }
2467
2468 if (n && pos)
2469 --(*pos);
2470 return n;
2471 }
2472
neigh_get_idx(struct seq_file * seq,loff_t * pos)2473 static struct neighbour *neigh_get_idx(struct seq_file *seq, loff_t *pos)
2474 {
2475 struct neighbour *n = neigh_get_first(seq);
2476
2477 if (n) {
2478 --(*pos);
2479 while (*pos) {
2480 n = neigh_get_next(seq, n, pos);
2481 if (!n)
2482 break;
2483 }
2484 }
2485 return *pos ? NULL : n;
2486 }
2487
pneigh_get_first(struct seq_file * seq)2488 static struct pneigh_entry *pneigh_get_first(struct seq_file *seq)
2489 {
2490 struct neigh_seq_state *state = seq->private;
2491 struct net *net = seq_file_net(seq);
2492 struct neigh_table *tbl = state->tbl;
2493 struct pneigh_entry *pn = NULL;
2494 int bucket = state->bucket;
2495
2496 state->flags |= NEIGH_SEQ_IS_PNEIGH;
2497 for (bucket = 0; bucket <= PNEIGH_HASHMASK; bucket++) {
2498 pn = tbl->phash_buckets[bucket];
2499 while (pn && !net_eq(pneigh_net(pn), net))
2500 pn = pn->next;
2501 if (pn)
2502 break;
2503 }
2504 state->bucket = bucket;
2505
2506 return pn;
2507 }
2508
pneigh_get_next(struct seq_file * seq,struct pneigh_entry * pn,loff_t * pos)2509 static struct pneigh_entry *pneigh_get_next(struct seq_file *seq,
2510 struct pneigh_entry *pn,
2511 loff_t *pos)
2512 {
2513 struct neigh_seq_state *state = seq->private;
2514 struct net *net = seq_file_net(seq);
2515 struct neigh_table *tbl = state->tbl;
2516
2517 do {
2518 pn = pn->next;
2519 } while (pn && !net_eq(pneigh_net(pn), net));
2520
2521 while (!pn) {
2522 if (++state->bucket > PNEIGH_HASHMASK)
2523 break;
2524 pn = tbl->phash_buckets[state->bucket];
2525 while (pn && !net_eq(pneigh_net(pn), net))
2526 pn = pn->next;
2527 if (pn)
2528 break;
2529 }
2530
2531 if (pn && pos)
2532 --(*pos);
2533
2534 return pn;
2535 }
2536
pneigh_get_idx(struct seq_file * seq,loff_t * pos)2537 static struct pneigh_entry *pneigh_get_idx(struct seq_file *seq, loff_t *pos)
2538 {
2539 struct pneigh_entry *pn = pneigh_get_first(seq);
2540
2541 if (pn) {
2542 --(*pos);
2543 while (*pos) {
2544 pn = pneigh_get_next(seq, pn, pos);
2545 if (!pn)
2546 break;
2547 }
2548 }
2549 return *pos ? NULL : pn;
2550 }
2551
neigh_get_idx_any(struct seq_file * seq,loff_t * pos)2552 static void *neigh_get_idx_any(struct seq_file *seq, loff_t *pos)
2553 {
2554 struct neigh_seq_state *state = seq->private;
2555 void *rc;
2556 loff_t idxpos = *pos;
2557
2558 rc = neigh_get_idx(seq, &idxpos);
2559 if (!rc && !(state->flags & NEIGH_SEQ_NEIGH_ONLY))
2560 rc = pneigh_get_idx(seq, &idxpos);
2561
2562 return rc;
2563 }
2564
neigh_seq_start(struct seq_file * seq,loff_t * pos,struct neigh_table * tbl,unsigned int neigh_seq_flags)2565 void *neigh_seq_start(struct seq_file *seq, loff_t *pos, struct neigh_table *tbl, unsigned int neigh_seq_flags)
2566 __acquires(rcu_bh)
2567 {
2568 struct neigh_seq_state *state = seq->private;
2569
2570 state->tbl = tbl;
2571 state->bucket = 0;
2572 state->flags = (neigh_seq_flags & ~NEIGH_SEQ_IS_PNEIGH);
2573
2574 rcu_read_lock_bh();
2575 state->nht = rcu_dereference_bh(tbl->nht);
2576
2577 return *pos ? neigh_get_idx_any(seq, pos) : SEQ_START_TOKEN;
2578 }
2579 EXPORT_SYMBOL(neigh_seq_start);
2580
neigh_seq_next(struct seq_file * seq,void * v,loff_t * pos)2581 void *neigh_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2582 {
2583 struct neigh_seq_state *state;
2584 void *rc;
2585
2586 if (v == SEQ_START_TOKEN) {
2587 rc = neigh_get_first(seq);
2588 goto out;
2589 }
2590
2591 state = seq->private;
2592 if (!(state->flags & NEIGH_SEQ_IS_PNEIGH)) {
2593 rc = neigh_get_next(seq, v, NULL);
2594 if (rc)
2595 goto out;
2596 if (!(state->flags & NEIGH_SEQ_NEIGH_ONLY))
2597 rc = pneigh_get_first(seq);
2598 } else {
2599 BUG_ON(state->flags & NEIGH_SEQ_NEIGH_ONLY);
2600 rc = pneigh_get_next(seq, v, NULL);
2601 }
2602 out:
2603 ++(*pos);
2604 return rc;
2605 }
2606 EXPORT_SYMBOL(neigh_seq_next);
2607
neigh_seq_stop(struct seq_file * seq,void * v)2608 void neigh_seq_stop(struct seq_file *seq, void *v)
2609 __releases(rcu_bh)
2610 {
2611 rcu_read_unlock_bh();
2612 }
2613 EXPORT_SYMBOL(neigh_seq_stop);
2614
2615 /* statistics via seq_file */
2616
neigh_stat_seq_start(struct seq_file * seq,loff_t * pos)2617 static void *neigh_stat_seq_start(struct seq_file *seq, loff_t *pos)
2618 {
2619 struct neigh_table *tbl = seq->private;
2620 int cpu;
2621
2622 if (*pos == 0)
2623 return SEQ_START_TOKEN;
2624
2625 for (cpu = *pos-1; cpu < nr_cpu_ids; ++cpu) {
2626 if (!cpu_possible(cpu))
2627 continue;
2628 *pos = cpu+1;
2629 return per_cpu_ptr(tbl->stats, cpu);
2630 }
2631 return NULL;
2632 }
2633
neigh_stat_seq_next(struct seq_file * seq,void * v,loff_t * pos)2634 static void *neigh_stat_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2635 {
2636 struct neigh_table *tbl = seq->private;
2637 int cpu;
2638
2639 for (cpu = *pos; cpu < nr_cpu_ids; ++cpu) {
2640 if (!cpu_possible(cpu))
2641 continue;
2642 *pos = cpu+1;
2643 return per_cpu_ptr(tbl->stats, cpu);
2644 }
2645 return NULL;
2646 }
2647
neigh_stat_seq_stop(struct seq_file * seq,void * v)2648 static void neigh_stat_seq_stop(struct seq_file *seq, void *v)
2649 {
2650
2651 }
2652
neigh_stat_seq_show(struct seq_file * seq,void * v)2653 static int neigh_stat_seq_show(struct seq_file *seq, void *v)
2654 {
2655 struct neigh_table *tbl = seq->private;
2656 struct neigh_statistics *st = v;
2657
2658 if (v == SEQ_START_TOKEN) {
2659 seq_printf(seq, "entries allocs destroys hash_grows lookups hits res_failed rcv_probes_mcast rcv_probes_ucast periodic_gc_runs forced_gc_runs unresolved_discards\n");
2660 return 0;
2661 }
2662
2663 seq_printf(seq, "%08x %08lx %08lx %08lx %08lx %08lx %08lx "
2664 "%08lx %08lx %08lx %08lx %08lx\n",
2665 atomic_read(&tbl->entries),
2666
2667 st->allocs,
2668 st->destroys,
2669 st->hash_grows,
2670
2671 st->lookups,
2672 st->hits,
2673
2674 st->res_failed,
2675
2676 st->rcv_probes_mcast,
2677 st->rcv_probes_ucast,
2678
2679 st->periodic_gc_runs,
2680 st->forced_gc_runs,
2681 st->unres_discards
2682 );
2683
2684 return 0;
2685 }
2686
2687 static const struct seq_operations neigh_stat_seq_ops = {
2688 .start = neigh_stat_seq_start,
2689 .next = neigh_stat_seq_next,
2690 .stop = neigh_stat_seq_stop,
2691 .show = neigh_stat_seq_show,
2692 };
2693
neigh_stat_seq_open(struct inode * inode,struct file * file)2694 static int neigh_stat_seq_open(struct inode *inode, struct file *file)
2695 {
2696 int ret = seq_open(file, &neigh_stat_seq_ops);
2697
2698 if (!ret) {
2699 struct seq_file *sf = file->private_data;
2700 sf->private = PDE(inode)->data;
2701 }
2702 return ret;
2703 };
2704
2705 static const struct file_operations neigh_stat_seq_fops = {
2706 .owner = THIS_MODULE,
2707 .open = neigh_stat_seq_open,
2708 .read = seq_read,
2709 .llseek = seq_lseek,
2710 .release = seq_release,
2711 };
2712
2713 #endif /* CONFIG_PROC_FS */
2714
neigh_nlmsg_size(void)2715 static inline size_t neigh_nlmsg_size(void)
2716 {
2717 return NLMSG_ALIGN(sizeof(struct ndmsg))
2718 + nla_total_size(MAX_ADDR_LEN) /* NDA_DST */
2719 + nla_total_size(MAX_ADDR_LEN) /* NDA_LLADDR */
2720 + nla_total_size(sizeof(struct nda_cacheinfo))
2721 + nla_total_size(4); /* NDA_PROBES */
2722 }
2723
__neigh_notify(struct neighbour * n,int type,int flags)2724 static void __neigh_notify(struct neighbour *n, int type, int flags)
2725 {
2726 struct net *net = dev_net(n->dev);
2727 struct sk_buff *skb;
2728 int err = -ENOBUFS;
2729
2730 skb = nlmsg_new(neigh_nlmsg_size(), GFP_ATOMIC);
2731 if (skb == NULL)
2732 goto errout;
2733
2734 err = neigh_fill_info(skb, n, 0, 0, type, flags);
2735 if (err < 0) {
2736 /* -EMSGSIZE implies BUG in neigh_nlmsg_size() */
2737 WARN_ON(err == -EMSGSIZE);
2738 kfree_skb(skb);
2739 goto errout;
2740 }
2741 rtnl_notify(skb, net, 0, RTNLGRP_NEIGH, NULL, GFP_ATOMIC);
2742 return;
2743 errout:
2744 if (err < 0)
2745 rtnl_set_sk_err(net, RTNLGRP_NEIGH, err);
2746 }
2747
2748 #ifdef CONFIG_ARPD
neigh_app_ns(struct neighbour * n)2749 void neigh_app_ns(struct neighbour *n)
2750 {
2751 __neigh_notify(n, RTM_GETNEIGH, NLM_F_REQUEST);
2752 }
2753 EXPORT_SYMBOL(neigh_app_ns);
2754 #endif /* CONFIG_ARPD */
2755
2756 #ifdef CONFIG_SYSCTL
2757
proc_unres_qlen(ctl_table * ctl,int write,void __user * buffer,size_t * lenp,loff_t * ppos)2758 static int proc_unres_qlen(ctl_table *ctl, int write, void __user *buffer,
2759 size_t *lenp, loff_t *ppos)
2760 {
2761 int size, ret;
2762 ctl_table tmp = *ctl;
2763
2764 tmp.data = &size;
2765 size = DIV_ROUND_UP(*(int *)ctl->data, SKB_TRUESIZE(ETH_FRAME_LEN));
2766 ret = proc_dointvec(&tmp, write, buffer, lenp, ppos);
2767 if (write && !ret)
2768 *(int *)ctl->data = size * SKB_TRUESIZE(ETH_FRAME_LEN);
2769 return ret;
2770 }
2771
2772 enum {
2773 NEIGH_VAR_MCAST_PROBE,
2774 NEIGH_VAR_UCAST_PROBE,
2775 NEIGH_VAR_APP_PROBE,
2776 NEIGH_VAR_RETRANS_TIME,
2777 NEIGH_VAR_BASE_REACHABLE_TIME,
2778 NEIGH_VAR_DELAY_PROBE_TIME,
2779 NEIGH_VAR_GC_STALETIME,
2780 NEIGH_VAR_QUEUE_LEN,
2781 NEIGH_VAR_QUEUE_LEN_BYTES,
2782 NEIGH_VAR_PROXY_QLEN,
2783 NEIGH_VAR_ANYCAST_DELAY,
2784 NEIGH_VAR_PROXY_DELAY,
2785 NEIGH_VAR_LOCKTIME,
2786 NEIGH_VAR_RETRANS_TIME_MS,
2787 NEIGH_VAR_BASE_REACHABLE_TIME_MS,
2788 NEIGH_VAR_GC_INTERVAL,
2789 NEIGH_VAR_GC_THRESH1,
2790 NEIGH_VAR_GC_THRESH2,
2791 NEIGH_VAR_GC_THRESH3,
2792 NEIGH_VAR_MAX
2793 };
2794
2795 static struct neigh_sysctl_table {
2796 struct ctl_table_header *sysctl_header;
2797 struct ctl_table neigh_vars[NEIGH_VAR_MAX + 1];
2798 char *dev_name;
2799 } neigh_sysctl_template __read_mostly = {
2800 .neigh_vars = {
2801 [NEIGH_VAR_MCAST_PROBE] = {
2802 .procname = "mcast_solicit",
2803 .maxlen = sizeof(int),
2804 .mode = 0644,
2805 .proc_handler = proc_dointvec,
2806 },
2807 [NEIGH_VAR_UCAST_PROBE] = {
2808 .procname = "ucast_solicit",
2809 .maxlen = sizeof(int),
2810 .mode = 0644,
2811 .proc_handler = proc_dointvec,
2812 },
2813 [NEIGH_VAR_APP_PROBE] = {
2814 .procname = "app_solicit",
2815 .maxlen = sizeof(int),
2816 .mode = 0644,
2817 .proc_handler = proc_dointvec,
2818 },
2819 [NEIGH_VAR_RETRANS_TIME] = {
2820 .procname = "retrans_time",
2821 .maxlen = sizeof(int),
2822 .mode = 0644,
2823 .proc_handler = proc_dointvec_userhz_jiffies,
2824 },
2825 [NEIGH_VAR_BASE_REACHABLE_TIME] = {
2826 .procname = "base_reachable_time",
2827 .maxlen = sizeof(int),
2828 .mode = 0644,
2829 .proc_handler = proc_dointvec_jiffies,
2830 },
2831 [NEIGH_VAR_DELAY_PROBE_TIME] = {
2832 .procname = "delay_first_probe_time",
2833 .maxlen = sizeof(int),
2834 .mode = 0644,
2835 .proc_handler = proc_dointvec_jiffies,
2836 },
2837 [NEIGH_VAR_GC_STALETIME] = {
2838 .procname = "gc_stale_time",
2839 .maxlen = sizeof(int),
2840 .mode = 0644,
2841 .proc_handler = proc_dointvec_jiffies,
2842 },
2843 [NEIGH_VAR_QUEUE_LEN] = {
2844 .procname = "unres_qlen",
2845 .maxlen = sizeof(int),
2846 .mode = 0644,
2847 .proc_handler = proc_unres_qlen,
2848 },
2849 [NEIGH_VAR_QUEUE_LEN_BYTES] = {
2850 .procname = "unres_qlen_bytes",
2851 .maxlen = sizeof(int),
2852 .mode = 0644,
2853 .proc_handler = proc_dointvec,
2854 },
2855 [NEIGH_VAR_PROXY_QLEN] = {
2856 .procname = "proxy_qlen",
2857 .maxlen = sizeof(int),
2858 .mode = 0644,
2859 .proc_handler = proc_dointvec,
2860 },
2861 [NEIGH_VAR_ANYCAST_DELAY] = {
2862 .procname = "anycast_delay",
2863 .maxlen = sizeof(int),
2864 .mode = 0644,
2865 .proc_handler = proc_dointvec_userhz_jiffies,
2866 },
2867 [NEIGH_VAR_PROXY_DELAY] = {
2868 .procname = "proxy_delay",
2869 .maxlen = sizeof(int),
2870 .mode = 0644,
2871 .proc_handler = proc_dointvec_userhz_jiffies,
2872 },
2873 [NEIGH_VAR_LOCKTIME] = {
2874 .procname = "locktime",
2875 .maxlen = sizeof(int),
2876 .mode = 0644,
2877 .proc_handler = proc_dointvec_userhz_jiffies,
2878 },
2879 [NEIGH_VAR_RETRANS_TIME_MS] = {
2880 .procname = "retrans_time_ms",
2881 .maxlen = sizeof(int),
2882 .mode = 0644,
2883 .proc_handler = proc_dointvec_ms_jiffies,
2884 },
2885 [NEIGH_VAR_BASE_REACHABLE_TIME_MS] = {
2886 .procname = "base_reachable_time_ms",
2887 .maxlen = sizeof(int),
2888 .mode = 0644,
2889 .proc_handler = proc_dointvec_ms_jiffies,
2890 },
2891 [NEIGH_VAR_GC_INTERVAL] = {
2892 .procname = "gc_interval",
2893 .maxlen = sizeof(int),
2894 .mode = 0644,
2895 .proc_handler = proc_dointvec_jiffies,
2896 },
2897 [NEIGH_VAR_GC_THRESH1] = {
2898 .procname = "gc_thresh1",
2899 .maxlen = sizeof(int),
2900 .mode = 0644,
2901 .proc_handler = proc_dointvec,
2902 },
2903 [NEIGH_VAR_GC_THRESH2] = {
2904 .procname = "gc_thresh2",
2905 .maxlen = sizeof(int),
2906 .mode = 0644,
2907 .proc_handler = proc_dointvec,
2908 },
2909 [NEIGH_VAR_GC_THRESH3] = {
2910 .procname = "gc_thresh3",
2911 .maxlen = sizeof(int),
2912 .mode = 0644,
2913 .proc_handler = proc_dointvec,
2914 },
2915 {},
2916 },
2917 };
2918
neigh_sysctl_register(struct net_device * dev,struct neigh_parms * p,char * p_name,proc_handler * handler)2919 int neigh_sysctl_register(struct net_device *dev, struct neigh_parms *p,
2920 char *p_name, proc_handler *handler)
2921 {
2922 struct neigh_sysctl_table *t;
2923 const char *dev_name_source = NULL;
2924
2925 #define NEIGH_CTL_PATH_ROOT 0
2926 #define NEIGH_CTL_PATH_PROTO 1
2927 #define NEIGH_CTL_PATH_NEIGH 2
2928 #define NEIGH_CTL_PATH_DEV 3
2929
2930 struct ctl_path neigh_path[] = {
2931 { .procname = "net", },
2932 { .procname = "proto", },
2933 { .procname = "neigh", },
2934 { .procname = "default", },
2935 { },
2936 };
2937
2938 t = kmemdup(&neigh_sysctl_template, sizeof(*t), GFP_KERNEL);
2939 if (!t)
2940 goto err;
2941
2942 t->neigh_vars[NEIGH_VAR_MCAST_PROBE].data = &p->mcast_probes;
2943 t->neigh_vars[NEIGH_VAR_UCAST_PROBE].data = &p->ucast_probes;
2944 t->neigh_vars[NEIGH_VAR_APP_PROBE].data = &p->app_probes;
2945 t->neigh_vars[NEIGH_VAR_RETRANS_TIME].data = &p->retrans_time;
2946 t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME].data = &p->base_reachable_time;
2947 t->neigh_vars[NEIGH_VAR_DELAY_PROBE_TIME].data = &p->delay_probe_time;
2948 t->neigh_vars[NEIGH_VAR_GC_STALETIME].data = &p->gc_staletime;
2949 t->neigh_vars[NEIGH_VAR_QUEUE_LEN].data = &p->queue_len_bytes;
2950 t->neigh_vars[NEIGH_VAR_QUEUE_LEN_BYTES].data = &p->queue_len_bytes;
2951 t->neigh_vars[NEIGH_VAR_PROXY_QLEN].data = &p->proxy_qlen;
2952 t->neigh_vars[NEIGH_VAR_ANYCAST_DELAY].data = &p->anycast_delay;
2953 t->neigh_vars[NEIGH_VAR_PROXY_DELAY].data = &p->proxy_delay;
2954 t->neigh_vars[NEIGH_VAR_LOCKTIME].data = &p->locktime;
2955 t->neigh_vars[NEIGH_VAR_RETRANS_TIME_MS].data = &p->retrans_time;
2956 t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME_MS].data = &p->base_reachable_time;
2957
2958 if (dev) {
2959 dev_name_source = dev->name;
2960 /* Terminate the table early */
2961 memset(&t->neigh_vars[NEIGH_VAR_GC_INTERVAL], 0,
2962 sizeof(t->neigh_vars[NEIGH_VAR_GC_INTERVAL]));
2963 } else {
2964 dev_name_source = neigh_path[NEIGH_CTL_PATH_DEV].procname;
2965 t->neigh_vars[NEIGH_VAR_GC_INTERVAL].data = (int *)(p + 1);
2966 t->neigh_vars[NEIGH_VAR_GC_THRESH1].data = (int *)(p + 1) + 1;
2967 t->neigh_vars[NEIGH_VAR_GC_THRESH2].data = (int *)(p + 1) + 2;
2968 t->neigh_vars[NEIGH_VAR_GC_THRESH3].data = (int *)(p + 1) + 3;
2969 }
2970
2971
2972 if (handler) {
2973 /* RetransTime */
2974 t->neigh_vars[NEIGH_VAR_RETRANS_TIME].proc_handler = handler;
2975 t->neigh_vars[NEIGH_VAR_RETRANS_TIME].extra1 = dev;
2976 /* ReachableTime */
2977 t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME].proc_handler = handler;
2978 t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME].extra1 = dev;
2979 /* RetransTime (in milliseconds)*/
2980 t->neigh_vars[NEIGH_VAR_RETRANS_TIME_MS].proc_handler = handler;
2981 t->neigh_vars[NEIGH_VAR_RETRANS_TIME_MS].extra1 = dev;
2982 /* ReachableTime (in milliseconds) */
2983 t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME_MS].proc_handler = handler;
2984 t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME_MS].extra1 = dev;
2985 }
2986
2987 t->dev_name = kstrdup(dev_name_source, GFP_KERNEL);
2988 if (!t->dev_name)
2989 goto free;
2990
2991 neigh_path[NEIGH_CTL_PATH_DEV].procname = t->dev_name;
2992 neigh_path[NEIGH_CTL_PATH_PROTO].procname = p_name;
2993
2994 t->sysctl_header =
2995 register_net_sysctl_table(neigh_parms_net(p), neigh_path, t->neigh_vars);
2996 if (!t->sysctl_header)
2997 goto free_procname;
2998
2999 p->sysctl_table = t;
3000 return 0;
3001
3002 free_procname:
3003 kfree(t->dev_name);
3004 free:
3005 kfree(t);
3006 err:
3007 return -ENOBUFS;
3008 }
3009 EXPORT_SYMBOL(neigh_sysctl_register);
3010
neigh_sysctl_unregister(struct neigh_parms * p)3011 void neigh_sysctl_unregister(struct neigh_parms *p)
3012 {
3013 if (p->sysctl_table) {
3014 struct neigh_sysctl_table *t = p->sysctl_table;
3015 p->sysctl_table = NULL;
3016 unregister_sysctl_table(t->sysctl_header);
3017 kfree(t->dev_name);
3018 kfree(t);
3019 }
3020 }
3021 EXPORT_SYMBOL(neigh_sysctl_unregister);
3022
3023 #endif /* CONFIG_SYSCTL */
3024
neigh_init(void)3025 static int __init neigh_init(void)
3026 {
3027 rtnl_register(PF_UNSPEC, RTM_NEWNEIGH, neigh_add, NULL, NULL);
3028 rtnl_register(PF_UNSPEC, RTM_DELNEIGH, neigh_delete, NULL, NULL);
3029 rtnl_register(PF_UNSPEC, RTM_GETNEIGH, NULL, neigh_dump_info, NULL);
3030
3031 rtnl_register(PF_UNSPEC, RTM_GETNEIGHTBL, NULL, neightbl_dump_info,
3032 NULL);
3033 rtnl_register(PF_UNSPEC, RTM_SETNEIGHTBL, neightbl_set, NULL, NULL);
3034
3035 return 0;
3036 }
3037
3038 subsys_initcall(neigh_init);
3039
3040