1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /* AFS fileserver probing
3 *
4 * Copyright (C) 2018, 2020 Red Hat, Inc. All Rights Reserved.
5 * Written by David Howells (dhowells@redhat.com)
6 */
7
8 #include <linux/sched.h>
9 #include <linux/slab.h>
10 #include "afs_fs.h"
11 #include "internal.h"
12 #include "protocol_afs.h"
13 #include "protocol_yfs.h"
14
15 static unsigned int afs_fs_probe_fast_poll_interval = 30 * HZ;
16 static unsigned int afs_fs_probe_slow_poll_interval = 5 * 60 * HZ;
17
18 /*
19 * Start the probe polling timer. We have to supply it with an inc on the
20 * outstanding server count.
21 */
afs_schedule_fs_probe(struct afs_net * net,struct afs_server * server,bool fast)22 static void afs_schedule_fs_probe(struct afs_net *net,
23 struct afs_server *server, bool fast)
24 {
25 unsigned long atj;
26
27 if (!net->live)
28 return;
29
30 atj = server->probed_at;
31 atj += fast ? afs_fs_probe_fast_poll_interval : afs_fs_probe_slow_poll_interval;
32
33 afs_inc_servers_outstanding(net);
34 if (timer_reduce(&net->fs_probe_timer, atj))
35 afs_dec_servers_outstanding(net);
36 }
37
38 /*
39 * Handle the completion of a set of probes.
40 */
afs_finished_fs_probe(struct afs_net * net,struct afs_server * server)41 static void afs_finished_fs_probe(struct afs_net *net, struct afs_server *server)
42 {
43 bool responded = server->probe.responded;
44
45 write_seqlock(&net->fs_lock);
46 if (responded) {
47 list_add_tail(&server->probe_link, &net->fs_probe_slow);
48 } else {
49 server->rtt = UINT_MAX;
50 clear_bit(AFS_SERVER_FL_RESPONDING, &server->flags);
51 list_add_tail(&server->probe_link, &net->fs_probe_fast);
52 }
53 write_sequnlock(&net->fs_lock);
54
55 afs_schedule_fs_probe(net, server, !responded);
56 }
57
58 /*
59 * Handle the completion of a probe.
60 */
afs_done_one_fs_probe(struct afs_net * net,struct afs_server * server)61 static void afs_done_one_fs_probe(struct afs_net *net, struct afs_server *server)
62 {
63 _enter("");
64
65 if (atomic_dec_and_test(&server->probe_outstanding))
66 afs_finished_fs_probe(net, server);
67
68 wake_up_all(&server->probe_wq);
69 }
70
71 /*
72 * Handle inability to send a probe due to ENOMEM when trying to allocate a
73 * call struct.
74 */
afs_fs_probe_not_done(struct afs_net * net,struct afs_server * server,struct afs_addr_cursor * ac)75 static void afs_fs_probe_not_done(struct afs_net *net,
76 struct afs_server *server,
77 struct afs_addr_cursor *ac)
78 {
79 struct afs_addr_list *alist = ac->alist;
80 unsigned int index = ac->index;
81
82 _enter("");
83
84 trace_afs_io_error(0, -ENOMEM, afs_io_error_fs_probe_fail);
85 spin_lock(&server->probe_lock);
86
87 server->probe.local_failure = true;
88 if (server->probe.error == 0)
89 server->probe.error = -ENOMEM;
90
91 set_bit(index, &alist->failed);
92
93 spin_unlock(&server->probe_lock);
94 return afs_done_one_fs_probe(net, server);
95 }
96
97 /*
98 * Process the result of probing a fileserver. This is called after successful
99 * or failed delivery of an FS.GetCapabilities operation.
100 */
afs_fileserver_probe_result(struct afs_call * call)101 void afs_fileserver_probe_result(struct afs_call *call)
102 {
103 struct afs_addr_list *alist = call->alist;
104 struct afs_server *server = call->server;
105 unsigned int index = call->addr_ix;
106 unsigned int rtt_us = 0, cap0;
107 int ret = call->error;
108
109 _enter("%pU,%u", &server->uuid, index);
110
111 spin_lock(&server->probe_lock);
112
113 switch (ret) {
114 case 0:
115 server->probe.error = 0;
116 goto responded;
117 case -ECONNABORTED:
118 if (!server->probe.responded) {
119 server->probe.abort_code = call->abort_code;
120 server->probe.error = ret;
121 }
122 goto responded;
123 case -ENOMEM:
124 case -ENONET:
125 clear_bit(index, &alist->responded);
126 server->probe.local_failure = true;
127 trace_afs_io_error(call->debug_id, ret, afs_io_error_fs_probe_fail);
128 goto out;
129 case -ECONNRESET: /* Responded, but call expired. */
130 case -ERFKILL:
131 case -EADDRNOTAVAIL:
132 case -ENETUNREACH:
133 case -EHOSTUNREACH:
134 case -EHOSTDOWN:
135 case -ECONNREFUSED:
136 case -ETIMEDOUT:
137 case -ETIME:
138 default:
139 clear_bit(index, &alist->responded);
140 set_bit(index, &alist->failed);
141 if (!server->probe.responded &&
142 (server->probe.error == 0 ||
143 server->probe.error == -ETIMEDOUT ||
144 server->probe.error == -ETIME))
145 server->probe.error = ret;
146 trace_afs_io_error(call->debug_id, ret, afs_io_error_fs_probe_fail);
147 goto out;
148 }
149
150 responded:
151 clear_bit(index, &alist->failed);
152
153 if (call->service_id == YFS_FS_SERVICE) {
154 server->probe.is_yfs = true;
155 set_bit(AFS_SERVER_FL_IS_YFS, &server->flags);
156 alist->addrs[index].srx_service = call->service_id;
157 } else {
158 server->probe.not_yfs = true;
159 if (!server->probe.is_yfs) {
160 clear_bit(AFS_SERVER_FL_IS_YFS, &server->flags);
161 alist->addrs[index].srx_service = call->service_id;
162 }
163 cap0 = ntohl(call->tmp);
164 if (cap0 & AFS3_VICED_CAPABILITY_64BITFILES)
165 set_bit(AFS_SERVER_FL_HAS_FS64, &server->flags);
166 else
167 clear_bit(AFS_SERVER_FL_HAS_FS64, &server->flags);
168 }
169
170 if (rxrpc_kernel_get_srtt(call->net->socket, call->rxcall, &rtt_us) &&
171 rtt_us < server->probe.rtt) {
172 server->probe.rtt = rtt_us;
173 server->rtt = rtt_us;
174 alist->preferred = index;
175 }
176
177 smp_wmb(); /* Set rtt before responded. */
178 server->probe.responded = true;
179 set_bit(index, &alist->responded);
180 set_bit(AFS_SERVER_FL_RESPONDING, &server->flags);
181 out:
182 spin_unlock(&server->probe_lock);
183
184 _debug("probe %pU [%u] %pISpc rtt=%u ret=%d",
185 &server->uuid, index, &alist->addrs[index].transport,
186 rtt_us, ret);
187
188 return afs_done_one_fs_probe(call->net, server);
189 }
190
191 /*
192 * Probe one or all of a fileserver's addresses to find out the best route and
193 * to query its capabilities.
194 */
afs_fs_probe_fileserver(struct afs_net * net,struct afs_server * server,struct key * key,bool all)195 void afs_fs_probe_fileserver(struct afs_net *net, struct afs_server *server,
196 struct key *key, bool all)
197 {
198 struct afs_addr_cursor ac = {
199 .index = 0,
200 };
201
202 _enter("%pU", &server->uuid);
203
204 read_lock(&server->fs_lock);
205 ac.alist = rcu_dereference_protected(server->addresses,
206 lockdep_is_held(&server->fs_lock));
207 afs_get_addrlist(ac.alist);
208 read_unlock(&server->fs_lock);
209
210 server->probed_at = jiffies;
211 atomic_set(&server->probe_outstanding, all ? ac.alist->nr_addrs : 1);
212 memset(&server->probe, 0, sizeof(server->probe));
213 server->probe.rtt = UINT_MAX;
214
215 ac.index = ac.alist->preferred;
216 if (ac.index < 0 || ac.index >= ac.alist->nr_addrs)
217 all = true;
218
219 if (all) {
220 for (ac.index = 0; ac.index < ac.alist->nr_addrs; ac.index++)
221 if (!afs_fs_get_capabilities(net, server, &ac, key))
222 afs_fs_probe_not_done(net, server, &ac);
223 } else {
224 if (!afs_fs_get_capabilities(net, server, &ac, key))
225 afs_fs_probe_not_done(net, server, &ac);
226 }
227
228 afs_put_addrlist(ac.alist);
229 }
230
231 /*
232 * Wait for the first as-yet untried fileserver to respond.
233 */
afs_wait_for_fs_probes(struct afs_server_list * slist,unsigned long untried)234 int afs_wait_for_fs_probes(struct afs_server_list *slist, unsigned long untried)
235 {
236 struct wait_queue_entry *waits;
237 struct afs_server *server;
238 unsigned int rtt = UINT_MAX, rtt_s;
239 bool have_responders = false;
240 int pref = -1, i;
241
242 _enter("%u,%lx", slist->nr_servers, untried);
243
244 /* Only wait for servers that have a probe outstanding. */
245 for (i = 0; i < slist->nr_servers; i++) {
246 if (test_bit(i, &untried)) {
247 server = slist->servers[i].server;
248 if (!atomic_read(&server->probe_outstanding))
249 __clear_bit(i, &untried);
250 if (server->probe.responded)
251 have_responders = true;
252 }
253 }
254 if (have_responders || !untried)
255 return 0;
256
257 waits = kmalloc(array_size(slist->nr_servers, sizeof(*waits)), GFP_KERNEL);
258 if (!waits)
259 return -ENOMEM;
260
261 for (i = 0; i < slist->nr_servers; i++) {
262 if (test_bit(i, &untried)) {
263 server = slist->servers[i].server;
264 init_waitqueue_entry(&waits[i], current);
265 add_wait_queue(&server->probe_wq, &waits[i]);
266 }
267 }
268
269 for (;;) {
270 bool still_probing = false;
271
272 set_current_state(TASK_INTERRUPTIBLE);
273 for (i = 0; i < slist->nr_servers; i++) {
274 if (test_bit(i, &untried)) {
275 server = slist->servers[i].server;
276 if (server->probe.responded)
277 goto stop;
278 if (atomic_read(&server->probe_outstanding))
279 still_probing = true;
280 }
281 }
282
283 if (!still_probing || signal_pending(current))
284 goto stop;
285 schedule();
286 }
287
288 stop:
289 set_current_state(TASK_RUNNING);
290
291 for (i = 0; i < slist->nr_servers; i++) {
292 if (test_bit(i, &untried)) {
293 server = slist->servers[i].server;
294 rtt_s = READ_ONCE(server->rtt);
295 if (test_bit(AFS_SERVER_FL_RESPONDING, &server->flags) &&
296 rtt_s < rtt) {
297 pref = i;
298 rtt = rtt_s;
299 }
300
301 remove_wait_queue(&server->probe_wq, &waits[i]);
302 }
303 }
304
305 kfree(waits);
306
307 if (pref == -1 && signal_pending(current))
308 return -ERESTARTSYS;
309
310 if (pref >= 0)
311 slist->preferred = pref;
312 return 0;
313 }
314
315 /*
316 * Probe timer. We have an increment on fs_outstanding that we need to pass
317 * along to the work item.
318 */
afs_fs_probe_timer(struct timer_list * timer)319 void afs_fs_probe_timer(struct timer_list *timer)
320 {
321 struct afs_net *net = container_of(timer, struct afs_net, fs_probe_timer);
322
323 if (!net->live || !queue_work(afs_wq, &net->fs_prober))
324 afs_dec_servers_outstanding(net);
325 }
326
327 /*
328 * Dispatch a probe to a server.
329 */
afs_dispatch_fs_probe(struct afs_net * net,struct afs_server * server,bool all)330 static void afs_dispatch_fs_probe(struct afs_net *net, struct afs_server *server, bool all)
331 __releases(&net->fs_lock)
332 {
333 struct key *key = NULL;
334
335 /* We remove it from the queues here - it will be added back to
336 * one of the queues on the completion of the probe.
337 */
338 list_del_init(&server->probe_link);
339
340 afs_get_server(server, afs_server_trace_get_probe);
341 write_sequnlock(&net->fs_lock);
342
343 afs_fs_probe_fileserver(net, server, key, all);
344 afs_put_server(net, server, afs_server_trace_put_probe);
345 }
346
347 /*
348 * Probe a server immediately without waiting for its due time to come
349 * round. This is used when all of the addresses have been tried.
350 */
afs_probe_fileserver(struct afs_net * net,struct afs_server * server)351 void afs_probe_fileserver(struct afs_net *net, struct afs_server *server)
352 {
353 write_seqlock(&net->fs_lock);
354 if (!list_empty(&server->probe_link))
355 return afs_dispatch_fs_probe(net, server, true);
356 write_sequnlock(&net->fs_lock);
357 }
358
359 /*
360 * Probe dispatcher to regularly dispatch probes to keep NAT alive.
361 */
afs_fs_probe_dispatcher(struct work_struct * work)362 void afs_fs_probe_dispatcher(struct work_struct *work)
363 {
364 struct afs_net *net = container_of(work, struct afs_net, fs_prober);
365 struct afs_server *fast, *slow, *server;
366 unsigned long nowj, timer_at, poll_at;
367 bool first_pass = true, set_timer = false;
368
369 if (!net->live)
370 return;
371
372 _enter("");
373
374 if (list_empty(&net->fs_probe_fast) && list_empty(&net->fs_probe_slow)) {
375 _leave(" [none]");
376 return;
377 }
378
379 again:
380 write_seqlock(&net->fs_lock);
381
382 fast = slow = server = NULL;
383 nowj = jiffies;
384 timer_at = nowj + MAX_JIFFY_OFFSET;
385
386 if (!list_empty(&net->fs_probe_fast)) {
387 fast = list_first_entry(&net->fs_probe_fast, struct afs_server, probe_link);
388 poll_at = fast->probed_at + afs_fs_probe_fast_poll_interval;
389 if (time_before(nowj, poll_at)) {
390 timer_at = poll_at;
391 set_timer = true;
392 fast = NULL;
393 }
394 }
395
396 if (!list_empty(&net->fs_probe_slow)) {
397 slow = list_first_entry(&net->fs_probe_slow, struct afs_server, probe_link);
398 poll_at = slow->probed_at + afs_fs_probe_slow_poll_interval;
399 if (time_before(nowj, poll_at)) {
400 if (time_before(poll_at, timer_at))
401 timer_at = poll_at;
402 set_timer = true;
403 slow = NULL;
404 }
405 }
406
407 server = fast ?: slow;
408 if (server)
409 _debug("probe %pU", &server->uuid);
410
411 if (server && (first_pass || !need_resched())) {
412 afs_dispatch_fs_probe(net, server, server == fast);
413 first_pass = false;
414 goto again;
415 }
416
417 write_sequnlock(&net->fs_lock);
418
419 if (server) {
420 if (!queue_work(afs_wq, &net->fs_prober))
421 afs_dec_servers_outstanding(net);
422 _leave(" [requeue]");
423 } else if (set_timer) {
424 if (timer_reduce(&net->fs_probe_timer, timer_at))
425 afs_dec_servers_outstanding(net);
426 _leave(" [timer]");
427 } else {
428 afs_dec_servers_outstanding(net);
429 _leave(" [quiesce]");
430 }
431 }
432
433 /*
434 * Wait for a probe on a particular fileserver to complete for 2s.
435 */
afs_wait_for_one_fs_probe(struct afs_server * server,bool is_intr)436 int afs_wait_for_one_fs_probe(struct afs_server *server, bool is_intr)
437 {
438 struct wait_queue_entry wait;
439 unsigned long timo = 2 * HZ;
440
441 if (atomic_read(&server->probe_outstanding) == 0)
442 goto dont_wait;
443
444 init_wait_entry(&wait, 0);
445 for (;;) {
446 prepare_to_wait_event(&server->probe_wq, &wait,
447 is_intr ? TASK_INTERRUPTIBLE : TASK_UNINTERRUPTIBLE);
448 if (timo == 0 ||
449 server->probe.responded ||
450 atomic_read(&server->probe_outstanding) == 0 ||
451 (is_intr && signal_pending(current)))
452 break;
453 timo = schedule_timeout(timo);
454 }
455
456 finish_wait(&server->probe_wq, &wait);
457
458 dont_wait:
459 if (server->probe.responded)
460 return 0;
461 if (is_intr && signal_pending(current))
462 return -ERESTARTSYS;
463 if (timo == 0)
464 return -ETIME;
465 return -EDESTADDRREQ;
466 }
467
468 /*
469 * Clean up the probing when the namespace is killed off.
470 */
afs_fs_probe_cleanup(struct afs_net * net)471 void afs_fs_probe_cleanup(struct afs_net *net)
472 {
473 if (del_timer_sync(&net->fs_probe_timer))
474 afs_dec_servers_outstanding(net);
475 }
476