1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3 * libata-eh.c - libata error handling
4 *
5 * Copyright 2006 Tejun Heo <htejun@gmail.com>
6 *
7 * libata documentation is available via 'make {ps|pdf}docs',
8 * as Documentation/driver-api/libata.rst
9 *
10 * Hardware documentation available from http://www.t13.org/ and
11 * http://www.sata-io.org/
12 */
13
14 #include <linux/kernel.h>
15 #include <linux/blkdev.h>
16 #include <linux/export.h>
17 #include <linux/pci.h>
18 #include <scsi/scsi.h>
19 #include <scsi/scsi_host.h>
20 #include <scsi/scsi_eh.h>
21 #include <scsi/scsi_device.h>
22 #include <scsi/scsi_cmnd.h>
23 #include <scsi/scsi_dbg.h>
24 #include "../scsi/scsi_transport_api.h"
25
26 #include <linux/libata.h>
27
28 #include <trace/events/libata.h>
29 #include "libata.h"
30
31 enum {
32 /* speed down verdicts */
33 ATA_EH_SPDN_NCQ_OFF = (1 << 0),
34 ATA_EH_SPDN_SPEED_DOWN = (1 << 1),
35 ATA_EH_SPDN_FALLBACK_TO_PIO = (1 << 2),
36 ATA_EH_SPDN_KEEP_ERRORS = (1 << 3),
37
38 /* error flags */
39 ATA_EFLAG_IS_IO = (1 << 0),
40 ATA_EFLAG_DUBIOUS_XFER = (1 << 1),
41 ATA_EFLAG_OLD_ER = (1 << 31),
42
43 /* error categories */
44 ATA_ECAT_NONE = 0,
45 ATA_ECAT_ATA_BUS = 1,
46 ATA_ECAT_TOUT_HSM = 2,
47 ATA_ECAT_UNK_DEV = 3,
48 ATA_ECAT_DUBIOUS_NONE = 4,
49 ATA_ECAT_DUBIOUS_ATA_BUS = 5,
50 ATA_ECAT_DUBIOUS_TOUT_HSM = 6,
51 ATA_ECAT_DUBIOUS_UNK_DEV = 7,
52 ATA_ECAT_NR = 8,
53
54 ATA_EH_CMD_DFL_TIMEOUT = 5000,
55
56 /* always put at least this amount of time between resets */
57 ATA_EH_RESET_COOL_DOWN = 5000,
58
59 /* Waiting in ->prereset can never be reliable. It's
60 * sometimes nice to wait there but it can't be depended upon;
61 * otherwise, we wouldn't be resetting. Just give it enough
62 * time for most drives to spin up.
63 */
64 ATA_EH_PRERESET_TIMEOUT = 10000,
65 ATA_EH_FASTDRAIN_INTERVAL = 3000,
66
67 ATA_EH_UA_TRIES = 5,
68
69 /* probe speed down parameters, see ata_eh_schedule_probe() */
70 ATA_EH_PROBE_TRIAL_INTERVAL = 60000, /* 1 min */
71 ATA_EH_PROBE_TRIALS = 2,
72 };
73
74 /* The following table determines how we sequence resets. Each entry
75 * represents timeout for that try. The first try can be soft or
76 * hardreset. All others are hardreset if available. In most cases
77 * the first reset w/ 10sec timeout should succeed. Following entries
78 * are mostly for error handling, hotplug and those outlier devices that
79 * take an exceptionally long time to recover from reset.
80 */
81 static const unsigned long ata_eh_reset_timeouts[] = {
82 10000, /* most drives spin up by 10sec */
83 10000, /* > 99% working drives spin up before 20sec */
84 35000, /* give > 30 secs of idleness for outlier devices */
85 5000, /* and sweet one last chance */
86 ULONG_MAX, /* > 1 min has elapsed, give up */
87 };
88
89 static const unsigned int ata_eh_identify_timeouts[] = {
90 5000, /* covers > 99% of successes and not too boring on failures */
91 10000, /* combined time till here is enough even for media access */
92 30000, /* for true idiots */
93 UINT_MAX,
94 };
95
96 static const unsigned int ata_eh_revalidate_timeouts[] = {
97 15000, /* Some drives are slow to read log pages when waking-up */
98 15000, /* combined time till here is enough even for media access */
99 UINT_MAX,
100 };
101
102 static const unsigned int ata_eh_flush_timeouts[] = {
103 15000, /* be generous with flush */
104 15000, /* ditto */
105 30000, /* and even more generous */
106 UINT_MAX,
107 };
108
109 static const unsigned int ata_eh_other_timeouts[] = {
110 5000, /* same rationale as identify timeout */
111 10000, /* ditto */
112 /* but no merciful 30sec for other commands, it just isn't worth it */
113 UINT_MAX,
114 };
115
116 struct ata_eh_cmd_timeout_ent {
117 const u8 *commands;
118 const unsigned int *timeouts;
119 };
120
121 /* The following table determines timeouts to use for EH internal
122 * commands. Each table entry is a command class and matches the
123 * commands the entry applies to and the timeout table to use.
124 *
125 * On the retry after a command timed out, the next timeout value from
126 * the table is used. If the table doesn't contain further entries,
127 * the last value is used.
128 *
129 * ehc->cmd_timeout_idx keeps track of which timeout to use per
130 * command class, so if SET_FEATURES times out on the first try, the
131 * next try will use the second timeout value only for that class.
132 */
133 #define CMDS(cmds...) (const u8 []){ cmds, 0 }
134 static const struct ata_eh_cmd_timeout_ent
135 ata_eh_cmd_timeout_table[ATA_EH_CMD_TIMEOUT_TABLE_SIZE] = {
136 { .commands = CMDS(ATA_CMD_ID_ATA, ATA_CMD_ID_ATAPI),
137 .timeouts = ata_eh_identify_timeouts, },
138 { .commands = CMDS(ATA_CMD_READ_LOG_EXT, ATA_CMD_READ_LOG_DMA_EXT),
139 .timeouts = ata_eh_revalidate_timeouts, },
140 { .commands = CMDS(ATA_CMD_READ_NATIVE_MAX, ATA_CMD_READ_NATIVE_MAX_EXT),
141 .timeouts = ata_eh_other_timeouts, },
142 { .commands = CMDS(ATA_CMD_SET_MAX, ATA_CMD_SET_MAX_EXT),
143 .timeouts = ata_eh_other_timeouts, },
144 { .commands = CMDS(ATA_CMD_SET_FEATURES),
145 .timeouts = ata_eh_other_timeouts, },
146 { .commands = CMDS(ATA_CMD_INIT_DEV_PARAMS),
147 .timeouts = ata_eh_other_timeouts, },
148 { .commands = CMDS(ATA_CMD_FLUSH, ATA_CMD_FLUSH_EXT),
149 .timeouts = ata_eh_flush_timeouts },
150 };
151 #undef CMDS
152
153 static void __ata_port_freeze(struct ata_port *ap);
154 static int ata_eh_set_lpm(struct ata_link *link, enum ata_lpm_policy policy,
155 struct ata_device **r_failed_dev);
156 #ifdef CONFIG_PM
157 static void ata_eh_handle_port_suspend(struct ata_port *ap);
158 static void ata_eh_handle_port_resume(struct ata_port *ap);
159 #else /* CONFIG_PM */
ata_eh_handle_port_suspend(struct ata_port * ap)160 static void ata_eh_handle_port_suspend(struct ata_port *ap)
161 { }
162
ata_eh_handle_port_resume(struct ata_port * ap)163 static void ata_eh_handle_port_resume(struct ata_port *ap)
164 { }
165 #endif /* CONFIG_PM */
166
__ata_ehi_pushv_desc(struct ata_eh_info * ehi,const char * fmt,va_list args)167 static __printf(2, 0) void __ata_ehi_pushv_desc(struct ata_eh_info *ehi,
168 const char *fmt, va_list args)
169 {
170 ehi->desc_len += vscnprintf(ehi->desc + ehi->desc_len,
171 ATA_EH_DESC_LEN - ehi->desc_len,
172 fmt, args);
173 }
174
175 /**
176 * __ata_ehi_push_desc - push error description without adding separator
177 * @ehi: target EHI
178 * @fmt: printf format string
179 *
180 * Format string according to @fmt and append it to @ehi->desc.
181 *
182 * LOCKING:
183 * spin_lock_irqsave(host lock)
184 */
__ata_ehi_push_desc(struct ata_eh_info * ehi,const char * fmt,...)185 void __ata_ehi_push_desc(struct ata_eh_info *ehi, const char *fmt, ...)
186 {
187 va_list args;
188
189 va_start(args, fmt);
190 __ata_ehi_pushv_desc(ehi, fmt, args);
191 va_end(args);
192 }
193 EXPORT_SYMBOL_GPL(__ata_ehi_push_desc);
194
195 /**
196 * ata_ehi_push_desc - push error description with separator
197 * @ehi: target EHI
198 * @fmt: printf format string
199 *
200 * Format string according to @fmt and append it to @ehi->desc.
201 * If @ehi->desc is not empty, ", " is added in-between.
202 *
203 * LOCKING:
204 * spin_lock_irqsave(host lock)
205 */
ata_ehi_push_desc(struct ata_eh_info * ehi,const char * fmt,...)206 void ata_ehi_push_desc(struct ata_eh_info *ehi, const char *fmt, ...)
207 {
208 va_list args;
209
210 if (ehi->desc_len)
211 __ata_ehi_push_desc(ehi, ", ");
212
213 va_start(args, fmt);
214 __ata_ehi_pushv_desc(ehi, fmt, args);
215 va_end(args);
216 }
217 EXPORT_SYMBOL_GPL(ata_ehi_push_desc);
218
219 /**
220 * ata_ehi_clear_desc - clean error description
221 * @ehi: target EHI
222 *
223 * Clear @ehi->desc.
224 *
225 * LOCKING:
226 * spin_lock_irqsave(host lock)
227 */
ata_ehi_clear_desc(struct ata_eh_info * ehi)228 void ata_ehi_clear_desc(struct ata_eh_info *ehi)
229 {
230 ehi->desc[0] = '\0';
231 ehi->desc_len = 0;
232 }
233 EXPORT_SYMBOL_GPL(ata_ehi_clear_desc);
234
235 /**
236 * ata_port_desc - append port description
237 * @ap: target ATA port
238 * @fmt: printf format string
239 *
240 * Format string according to @fmt and append it to port
241 * description. If port description is not empty, " " is added
242 * in-between. This function is to be used while initializing
243 * ata_host. The description is printed on host registration.
244 *
245 * LOCKING:
246 * None.
247 */
ata_port_desc(struct ata_port * ap,const char * fmt,...)248 void ata_port_desc(struct ata_port *ap, const char *fmt, ...)
249 {
250 va_list args;
251
252 WARN_ON(!(ap->pflags & ATA_PFLAG_INITIALIZING));
253
254 if (ap->link.eh_info.desc_len)
255 __ata_ehi_push_desc(&ap->link.eh_info, " ");
256
257 va_start(args, fmt);
258 __ata_ehi_pushv_desc(&ap->link.eh_info, fmt, args);
259 va_end(args);
260 }
261 EXPORT_SYMBOL_GPL(ata_port_desc);
262
263 #ifdef CONFIG_PCI
264 /**
265 * ata_port_pbar_desc - append PCI BAR description
266 * @ap: target ATA port
267 * @bar: target PCI BAR
268 * @offset: offset into PCI BAR
269 * @name: name of the area
270 *
271 * If @offset is negative, this function formats a string which
272 * contains the name, address, size and type of the BAR and
273 * appends it to the port description. If @offset is zero or
274 * positive, only name and offsetted address is appended.
275 *
276 * LOCKING:
277 * None.
278 */
ata_port_pbar_desc(struct ata_port * ap,int bar,ssize_t offset,const char * name)279 void ata_port_pbar_desc(struct ata_port *ap, int bar, ssize_t offset,
280 const char *name)
281 {
282 struct pci_dev *pdev = to_pci_dev(ap->host->dev);
283 char *type = "";
284 unsigned long long start, len;
285
286 if (pci_resource_flags(pdev, bar) & IORESOURCE_MEM)
287 type = "m";
288 else if (pci_resource_flags(pdev, bar) & IORESOURCE_IO)
289 type = "i";
290
291 start = (unsigned long long)pci_resource_start(pdev, bar);
292 len = (unsigned long long)pci_resource_len(pdev, bar);
293
294 if (offset < 0)
295 ata_port_desc(ap, "%s %s%llu@0x%llx", name, type, len, start);
296 else
297 ata_port_desc(ap, "%s 0x%llx", name,
298 start + (unsigned long long)offset);
299 }
300 EXPORT_SYMBOL_GPL(ata_port_pbar_desc);
301 #endif /* CONFIG_PCI */
302
ata_lookup_timeout_table(u8 cmd)303 static int ata_lookup_timeout_table(u8 cmd)
304 {
305 int i;
306
307 for (i = 0; i < ATA_EH_CMD_TIMEOUT_TABLE_SIZE; i++) {
308 const u8 *cur;
309
310 for (cur = ata_eh_cmd_timeout_table[i].commands; *cur; cur++)
311 if (*cur == cmd)
312 return i;
313 }
314
315 return -1;
316 }
317
318 /**
319 * ata_internal_cmd_timeout - determine timeout for an internal command
320 * @dev: target device
321 * @cmd: internal command to be issued
322 *
323 * Determine timeout for internal command @cmd for @dev.
324 *
325 * LOCKING:
326 * EH context.
327 *
328 * RETURNS:
329 * Determined timeout.
330 */
ata_internal_cmd_timeout(struct ata_device * dev,u8 cmd)331 unsigned int ata_internal_cmd_timeout(struct ata_device *dev, u8 cmd)
332 {
333 struct ata_eh_context *ehc = &dev->link->eh_context;
334 int ent = ata_lookup_timeout_table(cmd);
335 int idx;
336
337 if (ent < 0)
338 return ATA_EH_CMD_DFL_TIMEOUT;
339
340 idx = ehc->cmd_timeout_idx[dev->devno][ent];
341 return ata_eh_cmd_timeout_table[ent].timeouts[idx];
342 }
343
344 /**
345 * ata_internal_cmd_timed_out - notification for internal command timeout
346 * @dev: target device
347 * @cmd: internal command which timed out
348 *
349 * Notify EH that internal command @cmd for @dev timed out. This
350 * function should be called only for commands whose timeouts are
351 * determined using ata_internal_cmd_timeout().
352 *
353 * LOCKING:
354 * EH context.
355 */
ata_internal_cmd_timed_out(struct ata_device * dev,u8 cmd)356 void ata_internal_cmd_timed_out(struct ata_device *dev, u8 cmd)
357 {
358 struct ata_eh_context *ehc = &dev->link->eh_context;
359 int ent = ata_lookup_timeout_table(cmd);
360 int idx;
361
362 if (ent < 0)
363 return;
364
365 idx = ehc->cmd_timeout_idx[dev->devno][ent];
366 if (ata_eh_cmd_timeout_table[ent].timeouts[idx + 1] != UINT_MAX)
367 ehc->cmd_timeout_idx[dev->devno][ent]++;
368 }
369
ata_ering_record(struct ata_ering * ering,unsigned int eflags,unsigned int err_mask)370 static void ata_ering_record(struct ata_ering *ering, unsigned int eflags,
371 unsigned int err_mask)
372 {
373 struct ata_ering_entry *ent;
374
375 WARN_ON(!err_mask);
376
377 ering->cursor++;
378 ering->cursor %= ATA_ERING_SIZE;
379
380 ent = &ering->ring[ering->cursor];
381 ent->eflags = eflags;
382 ent->err_mask = err_mask;
383 ent->timestamp = get_jiffies_64();
384 }
385
ata_ering_top(struct ata_ering * ering)386 static struct ata_ering_entry *ata_ering_top(struct ata_ering *ering)
387 {
388 struct ata_ering_entry *ent = &ering->ring[ering->cursor];
389
390 if (ent->err_mask)
391 return ent;
392 return NULL;
393 }
394
ata_ering_map(struct ata_ering * ering,int (* map_fn)(struct ata_ering_entry *,void *),void * arg)395 int ata_ering_map(struct ata_ering *ering,
396 int (*map_fn)(struct ata_ering_entry *, void *),
397 void *arg)
398 {
399 int idx, rc = 0;
400 struct ata_ering_entry *ent;
401
402 idx = ering->cursor;
403 do {
404 ent = &ering->ring[idx];
405 if (!ent->err_mask)
406 break;
407 rc = map_fn(ent, arg);
408 if (rc)
409 break;
410 idx = (idx - 1 + ATA_ERING_SIZE) % ATA_ERING_SIZE;
411 } while (idx != ering->cursor);
412
413 return rc;
414 }
415
ata_ering_clear_cb(struct ata_ering_entry * ent,void * void_arg)416 static int ata_ering_clear_cb(struct ata_ering_entry *ent, void *void_arg)
417 {
418 ent->eflags |= ATA_EFLAG_OLD_ER;
419 return 0;
420 }
421
ata_ering_clear(struct ata_ering * ering)422 static void ata_ering_clear(struct ata_ering *ering)
423 {
424 ata_ering_map(ering, ata_ering_clear_cb, NULL);
425 }
426
ata_eh_dev_action(struct ata_device * dev)427 static unsigned int ata_eh_dev_action(struct ata_device *dev)
428 {
429 struct ata_eh_context *ehc = &dev->link->eh_context;
430
431 return ehc->i.action | ehc->i.dev_action[dev->devno];
432 }
433
ata_eh_clear_action(struct ata_link * link,struct ata_device * dev,struct ata_eh_info * ehi,unsigned int action)434 static void ata_eh_clear_action(struct ata_link *link, struct ata_device *dev,
435 struct ata_eh_info *ehi, unsigned int action)
436 {
437 struct ata_device *tdev;
438
439 if (!dev) {
440 ehi->action &= ~action;
441 ata_for_each_dev(tdev, link, ALL)
442 ehi->dev_action[tdev->devno] &= ~action;
443 } else {
444 /* doesn't make sense for port-wide EH actions */
445 WARN_ON(!(action & ATA_EH_PERDEV_MASK));
446
447 /* break ehi->action into ehi->dev_action */
448 if (ehi->action & action) {
449 ata_for_each_dev(tdev, link, ALL)
450 ehi->dev_action[tdev->devno] |=
451 ehi->action & action;
452 ehi->action &= ~action;
453 }
454
455 /* turn off the specified per-dev action */
456 ehi->dev_action[dev->devno] &= ~action;
457 }
458 }
459
460 /**
461 * ata_eh_acquire - acquire EH ownership
462 * @ap: ATA port to acquire EH ownership for
463 *
464 * Acquire EH ownership for @ap. This is the basic exclusion
465 * mechanism for ports sharing a host. Only one port hanging off
466 * the same host can claim the ownership of EH.
467 *
468 * LOCKING:
469 * EH context.
470 */
ata_eh_acquire(struct ata_port * ap)471 void ata_eh_acquire(struct ata_port *ap)
472 {
473 mutex_lock(&ap->host->eh_mutex);
474 WARN_ON_ONCE(ap->host->eh_owner);
475 ap->host->eh_owner = current;
476 }
477
478 /**
479 * ata_eh_release - release EH ownership
480 * @ap: ATA port to release EH ownership for
481 *
482 * Release EH ownership for @ap if the caller. The caller must
483 * have acquired EH ownership using ata_eh_acquire() previously.
484 *
485 * LOCKING:
486 * EH context.
487 */
ata_eh_release(struct ata_port * ap)488 void ata_eh_release(struct ata_port *ap)
489 {
490 WARN_ON_ONCE(ap->host->eh_owner != current);
491 ap->host->eh_owner = NULL;
492 mutex_unlock(&ap->host->eh_mutex);
493 }
494
ata_eh_unload(struct ata_port * ap)495 static void ata_eh_unload(struct ata_port *ap)
496 {
497 struct ata_link *link;
498 struct ata_device *dev;
499 unsigned long flags;
500
501 /* Restore SControl IPM and SPD for the next driver and
502 * disable attached devices.
503 */
504 ata_for_each_link(link, ap, PMP_FIRST) {
505 sata_scr_write(link, SCR_CONTROL, link->saved_scontrol & 0xff0);
506 ata_for_each_dev(dev, link, ALL)
507 ata_dev_disable(dev);
508 }
509
510 /* freeze and set UNLOADED */
511 spin_lock_irqsave(ap->lock, flags);
512
513 ata_port_freeze(ap); /* won't be thawed */
514 ap->pflags &= ~ATA_PFLAG_EH_PENDING; /* clear pending from freeze */
515 ap->pflags |= ATA_PFLAG_UNLOADED;
516
517 spin_unlock_irqrestore(ap->lock, flags);
518 }
519
520 /**
521 * ata_scsi_error - SCSI layer error handler callback
522 * @host: SCSI host on which error occurred
523 *
524 * Handles SCSI-layer-thrown error events.
525 *
526 * LOCKING:
527 * Inherited from SCSI layer (none, can sleep)
528 *
529 * RETURNS:
530 * Zero.
531 */
ata_scsi_error(struct Scsi_Host * host)532 void ata_scsi_error(struct Scsi_Host *host)
533 {
534 struct ata_port *ap = ata_shost_to_port(host);
535 unsigned long flags;
536 LIST_HEAD(eh_work_q);
537
538 spin_lock_irqsave(host->host_lock, flags);
539 list_splice_init(&host->eh_cmd_q, &eh_work_q);
540 spin_unlock_irqrestore(host->host_lock, flags);
541
542 ata_scsi_cmd_error_handler(host, ap, &eh_work_q);
543
544 /* If we timed raced normal completion and there is nothing to
545 recover nr_timedout == 0 why exactly are we doing error recovery ? */
546 ata_scsi_port_error_handler(host, ap);
547
548 /* finish or retry handled scmd's and clean up */
549 WARN_ON(!list_empty(&eh_work_q));
550
551 }
552
553 /**
554 * ata_scsi_cmd_error_handler - error callback for a list of commands
555 * @host: scsi host containing the port
556 * @ap: ATA port within the host
557 * @eh_work_q: list of commands to process
558 *
559 * process the given list of commands and return those finished to the
560 * ap->eh_done_q. This function is the first part of the libata error
561 * handler which processes a given list of failed commands.
562 */
ata_scsi_cmd_error_handler(struct Scsi_Host * host,struct ata_port * ap,struct list_head * eh_work_q)563 void ata_scsi_cmd_error_handler(struct Scsi_Host *host, struct ata_port *ap,
564 struct list_head *eh_work_q)
565 {
566 int i;
567 unsigned long flags;
568
569 /* make sure sff pio task is not running */
570 ata_sff_flush_pio_task(ap);
571
572 /* synchronize with host lock and sort out timeouts */
573
574 /* For new EH, all qcs are finished in one of three ways -
575 * normal completion, error completion, and SCSI timeout.
576 * Both completions can race against SCSI timeout. When normal
577 * completion wins, the qc never reaches EH. When error
578 * completion wins, the qc has ATA_QCFLAG_FAILED set.
579 *
580 * When SCSI timeout wins, things are a bit more complex.
581 * Normal or error completion can occur after the timeout but
582 * before this point. In such cases, both types of
583 * completions are honored. A scmd is determined to have
584 * timed out iff its associated qc is active and not failed.
585 */
586 spin_lock_irqsave(ap->lock, flags);
587 if (ap->ops->error_handler) {
588 struct scsi_cmnd *scmd, *tmp;
589 int nr_timedout = 0;
590
591 /* This must occur under the ap->lock as we don't want
592 a polled recovery to race the real interrupt handler
593
594 The lost_interrupt handler checks for any completed but
595 non-notified command and completes much like an IRQ handler.
596
597 We then fall into the error recovery code which will treat
598 this as if normal completion won the race */
599
600 if (ap->ops->lost_interrupt)
601 ap->ops->lost_interrupt(ap);
602
603 list_for_each_entry_safe(scmd, tmp, eh_work_q, eh_entry) {
604 struct ata_queued_cmd *qc;
605
606 ata_qc_for_each_raw(ap, qc, i) {
607 if (qc->flags & ATA_QCFLAG_ACTIVE &&
608 qc->scsicmd == scmd)
609 break;
610 }
611
612 if (i < ATA_MAX_QUEUE) {
613 /* the scmd has an associated qc */
614 if (!(qc->flags & ATA_QCFLAG_FAILED)) {
615 /* which hasn't failed yet, timeout */
616 qc->err_mask |= AC_ERR_TIMEOUT;
617 qc->flags |= ATA_QCFLAG_FAILED;
618 nr_timedout++;
619 }
620 } else {
621 /* Normal completion occurred after
622 * SCSI timeout but before this point.
623 * Successfully complete it.
624 */
625 scmd->retries = scmd->allowed;
626 scsi_eh_finish_cmd(scmd, &ap->eh_done_q);
627 }
628 }
629
630 /* If we have timed out qcs. They belong to EH from
631 * this point but the state of the controller is
632 * unknown. Freeze the port to make sure the IRQ
633 * handler doesn't diddle with those qcs. This must
634 * be done atomically w.r.t. setting QCFLAG_FAILED.
635 */
636 if (nr_timedout)
637 __ata_port_freeze(ap);
638
639
640 /* initialize eh_tries */
641 ap->eh_tries = ATA_EH_MAX_TRIES;
642 }
643 spin_unlock_irqrestore(ap->lock, flags);
644
645 }
646 EXPORT_SYMBOL(ata_scsi_cmd_error_handler);
647
648 /**
649 * ata_scsi_port_error_handler - recover the port after the commands
650 * @host: SCSI host containing the port
651 * @ap: the ATA port
652 *
653 * Handle the recovery of the port @ap after all the commands
654 * have been recovered.
655 */
ata_scsi_port_error_handler(struct Scsi_Host * host,struct ata_port * ap)656 void ata_scsi_port_error_handler(struct Scsi_Host *host, struct ata_port *ap)
657 {
658 unsigned long flags;
659
660 /* invoke error handler */
661 if (ap->ops->error_handler) {
662 struct ata_link *link;
663
664 /* acquire EH ownership */
665 ata_eh_acquire(ap);
666 repeat:
667 /* kill fast drain timer */
668 del_timer_sync(&ap->fastdrain_timer);
669
670 /* process port resume request */
671 ata_eh_handle_port_resume(ap);
672
673 /* fetch & clear EH info */
674 spin_lock_irqsave(ap->lock, flags);
675
676 ata_for_each_link(link, ap, HOST_FIRST) {
677 struct ata_eh_context *ehc = &link->eh_context;
678 struct ata_device *dev;
679
680 memset(&link->eh_context, 0, sizeof(link->eh_context));
681 link->eh_context.i = link->eh_info;
682 memset(&link->eh_info, 0, sizeof(link->eh_info));
683
684 ata_for_each_dev(dev, link, ENABLED) {
685 int devno = dev->devno;
686
687 ehc->saved_xfer_mode[devno] = dev->xfer_mode;
688 if (ata_ncq_enabled(dev))
689 ehc->saved_ncq_enabled |= 1 << devno;
690 }
691 }
692
693 ap->pflags |= ATA_PFLAG_EH_IN_PROGRESS;
694 ap->pflags &= ~ATA_PFLAG_EH_PENDING;
695 ap->excl_link = NULL; /* don't maintain exclusion over EH */
696
697 spin_unlock_irqrestore(ap->lock, flags);
698
699 /* invoke EH, skip if unloading or suspended */
700 if (!(ap->pflags & (ATA_PFLAG_UNLOADING | ATA_PFLAG_SUSPENDED)))
701 ap->ops->error_handler(ap);
702 else {
703 /* if unloading, commence suicide */
704 if ((ap->pflags & ATA_PFLAG_UNLOADING) &&
705 !(ap->pflags & ATA_PFLAG_UNLOADED))
706 ata_eh_unload(ap);
707 ata_eh_finish(ap);
708 }
709
710 /* process port suspend request */
711 ata_eh_handle_port_suspend(ap);
712
713 /* Exception might have happened after ->error_handler
714 * recovered the port but before this point. Repeat
715 * EH in such case.
716 */
717 spin_lock_irqsave(ap->lock, flags);
718
719 if (ap->pflags & ATA_PFLAG_EH_PENDING) {
720 if (--ap->eh_tries) {
721 spin_unlock_irqrestore(ap->lock, flags);
722 goto repeat;
723 }
724 ata_port_err(ap,
725 "EH pending after %d tries, giving up\n",
726 ATA_EH_MAX_TRIES);
727 ap->pflags &= ~ATA_PFLAG_EH_PENDING;
728 }
729
730 /* this run is complete, make sure EH info is clear */
731 ata_for_each_link(link, ap, HOST_FIRST)
732 memset(&link->eh_info, 0, sizeof(link->eh_info));
733
734 /* end eh (clear host_eh_scheduled) while holding
735 * ap->lock such that if exception occurs after this
736 * point but before EH completion, SCSI midlayer will
737 * re-initiate EH.
738 */
739 ap->ops->end_eh(ap);
740
741 spin_unlock_irqrestore(ap->lock, flags);
742 ata_eh_release(ap);
743 } else {
744 WARN_ON(ata_qc_from_tag(ap, ap->link.active_tag) == NULL);
745 ap->ops->eng_timeout(ap);
746 }
747
748 scsi_eh_flush_done_q(&ap->eh_done_q);
749
750 /* clean up */
751 spin_lock_irqsave(ap->lock, flags);
752
753 if (ap->pflags & ATA_PFLAG_LOADING)
754 ap->pflags &= ~ATA_PFLAG_LOADING;
755 else if ((ap->pflags & ATA_PFLAG_SCSI_HOTPLUG) &&
756 !(ap->flags & ATA_FLAG_SAS_HOST))
757 schedule_delayed_work(&ap->hotplug_task, 0);
758
759 if (ap->pflags & ATA_PFLAG_RECOVERED)
760 ata_port_info(ap, "EH complete\n");
761
762 ap->pflags &= ~(ATA_PFLAG_SCSI_HOTPLUG | ATA_PFLAG_RECOVERED);
763
764 /* tell wait_eh that we're done */
765 ap->pflags &= ~ATA_PFLAG_EH_IN_PROGRESS;
766 wake_up_all(&ap->eh_wait_q);
767
768 spin_unlock_irqrestore(ap->lock, flags);
769 }
770 EXPORT_SYMBOL_GPL(ata_scsi_port_error_handler);
771
772 /**
773 * ata_port_wait_eh - Wait for the currently pending EH to complete
774 * @ap: Port to wait EH for
775 *
776 * Wait until the currently pending EH is complete.
777 *
778 * LOCKING:
779 * Kernel thread context (may sleep).
780 */
ata_port_wait_eh(struct ata_port * ap)781 void ata_port_wait_eh(struct ata_port *ap)
782 {
783 unsigned long flags;
784 DEFINE_WAIT(wait);
785
786 retry:
787 spin_lock_irqsave(ap->lock, flags);
788
789 while (ap->pflags & (ATA_PFLAG_EH_PENDING | ATA_PFLAG_EH_IN_PROGRESS)) {
790 prepare_to_wait(&ap->eh_wait_q, &wait, TASK_UNINTERRUPTIBLE);
791 spin_unlock_irqrestore(ap->lock, flags);
792 schedule();
793 spin_lock_irqsave(ap->lock, flags);
794 }
795 finish_wait(&ap->eh_wait_q, &wait);
796
797 spin_unlock_irqrestore(ap->lock, flags);
798
799 /* make sure SCSI EH is complete */
800 if (scsi_host_in_recovery(ap->scsi_host)) {
801 ata_msleep(ap, 10);
802 goto retry;
803 }
804 }
805 EXPORT_SYMBOL_GPL(ata_port_wait_eh);
806
ata_eh_nr_in_flight(struct ata_port * ap)807 static unsigned int ata_eh_nr_in_flight(struct ata_port *ap)
808 {
809 struct ata_queued_cmd *qc;
810 unsigned int tag;
811 unsigned int nr = 0;
812
813 /* count only non-internal commands */
814 ata_qc_for_each(ap, qc, tag) {
815 if (qc)
816 nr++;
817 }
818
819 return nr;
820 }
821
ata_eh_fastdrain_timerfn(struct timer_list * t)822 void ata_eh_fastdrain_timerfn(struct timer_list *t)
823 {
824 struct ata_port *ap = from_timer(ap, t, fastdrain_timer);
825 unsigned long flags;
826 unsigned int cnt;
827
828 spin_lock_irqsave(ap->lock, flags);
829
830 cnt = ata_eh_nr_in_flight(ap);
831
832 /* are we done? */
833 if (!cnt)
834 goto out_unlock;
835
836 if (cnt == ap->fastdrain_cnt) {
837 struct ata_queued_cmd *qc;
838 unsigned int tag;
839
840 /* No progress during the last interval, tag all
841 * in-flight qcs as timed out and freeze the port.
842 */
843 ata_qc_for_each(ap, qc, tag) {
844 if (qc)
845 qc->err_mask |= AC_ERR_TIMEOUT;
846 }
847
848 ata_port_freeze(ap);
849 } else {
850 /* some qcs have finished, give it another chance */
851 ap->fastdrain_cnt = cnt;
852 ap->fastdrain_timer.expires =
853 ata_deadline(jiffies, ATA_EH_FASTDRAIN_INTERVAL);
854 add_timer(&ap->fastdrain_timer);
855 }
856
857 out_unlock:
858 spin_unlock_irqrestore(ap->lock, flags);
859 }
860
861 /**
862 * ata_eh_set_pending - set ATA_PFLAG_EH_PENDING and activate fast drain
863 * @ap: target ATA port
864 * @fastdrain: activate fast drain
865 *
866 * Set ATA_PFLAG_EH_PENDING and activate fast drain if @fastdrain
867 * is non-zero and EH wasn't pending before. Fast drain ensures
868 * that EH kicks in in timely manner.
869 *
870 * LOCKING:
871 * spin_lock_irqsave(host lock)
872 */
ata_eh_set_pending(struct ata_port * ap,int fastdrain)873 static void ata_eh_set_pending(struct ata_port *ap, int fastdrain)
874 {
875 unsigned int cnt;
876
877 /* already scheduled? */
878 if (ap->pflags & ATA_PFLAG_EH_PENDING)
879 return;
880
881 ap->pflags |= ATA_PFLAG_EH_PENDING;
882
883 if (!fastdrain)
884 return;
885
886 /* do we have in-flight qcs? */
887 cnt = ata_eh_nr_in_flight(ap);
888 if (!cnt)
889 return;
890
891 /* activate fast drain */
892 ap->fastdrain_cnt = cnt;
893 ap->fastdrain_timer.expires =
894 ata_deadline(jiffies, ATA_EH_FASTDRAIN_INTERVAL);
895 add_timer(&ap->fastdrain_timer);
896 }
897
898 /**
899 * ata_qc_schedule_eh - schedule qc for error handling
900 * @qc: command to schedule error handling for
901 *
902 * Schedule error handling for @qc. EH will kick in as soon as
903 * other commands are drained.
904 *
905 * LOCKING:
906 * spin_lock_irqsave(host lock)
907 */
ata_qc_schedule_eh(struct ata_queued_cmd * qc)908 void ata_qc_schedule_eh(struct ata_queued_cmd *qc)
909 {
910 struct ata_port *ap = qc->ap;
911
912 WARN_ON(!ap->ops->error_handler);
913
914 qc->flags |= ATA_QCFLAG_FAILED;
915 ata_eh_set_pending(ap, 1);
916
917 /* The following will fail if timeout has already expired.
918 * ata_scsi_error() takes care of such scmds on EH entry.
919 * Note that ATA_QCFLAG_FAILED is unconditionally set after
920 * this function completes.
921 */
922 blk_abort_request(scsi_cmd_to_rq(qc->scsicmd));
923 }
924
925 /**
926 * ata_std_sched_eh - non-libsas ata_ports issue eh with this common routine
927 * @ap: ATA port to schedule EH for
928 *
929 * LOCKING: inherited from ata_port_schedule_eh
930 * spin_lock_irqsave(host lock)
931 */
ata_std_sched_eh(struct ata_port * ap)932 void ata_std_sched_eh(struct ata_port *ap)
933 {
934 WARN_ON(!ap->ops->error_handler);
935
936 if (ap->pflags & ATA_PFLAG_INITIALIZING)
937 return;
938
939 ata_eh_set_pending(ap, 1);
940 scsi_schedule_eh(ap->scsi_host);
941
942 trace_ata_std_sched_eh(ap);
943 }
944 EXPORT_SYMBOL_GPL(ata_std_sched_eh);
945
946 /**
947 * ata_std_end_eh - non-libsas ata_ports complete eh with this common routine
948 * @ap: ATA port to end EH for
949 *
950 * In the libata object model there is a 1:1 mapping of ata_port to
951 * shost, so host fields can be directly manipulated under ap->lock, in
952 * the libsas case we need to hold a lock at the ha->level to coordinate
953 * these events.
954 *
955 * LOCKING:
956 * spin_lock_irqsave(host lock)
957 */
ata_std_end_eh(struct ata_port * ap)958 void ata_std_end_eh(struct ata_port *ap)
959 {
960 struct Scsi_Host *host = ap->scsi_host;
961
962 host->host_eh_scheduled = 0;
963 }
964 EXPORT_SYMBOL(ata_std_end_eh);
965
966
967 /**
968 * ata_port_schedule_eh - schedule error handling without a qc
969 * @ap: ATA port to schedule EH for
970 *
971 * Schedule error handling for @ap. EH will kick in as soon as
972 * all commands are drained.
973 *
974 * LOCKING:
975 * spin_lock_irqsave(host lock)
976 */
ata_port_schedule_eh(struct ata_port * ap)977 void ata_port_schedule_eh(struct ata_port *ap)
978 {
979 /* see: ata_std_sched_eh, unless you know better */
980 ap->ops->sched_eh(ap);
981 }
982 EXPORT_SYMBOL_GPL(ata_port_schedule_eh);
983
ata_do_link_abort(struct ata_port * ap,struct ata_link * link)984 static int ata_do_link_abort(struct ata_port *ap, struct ata_link *link)
985 {
986 struct ata_queued_cmd *qc;
987 int tag, nr_aborted = 0;
988
989 WARN_ON(!ap->ops->error_handler);
990
991 /* we're gonna abort all commands, no need for fast drain */
992 ata_eh_set_pending(ap, 0);
993
994 /* include internal tag in iteration */
995 ata_qc_for_each_with_internal(ap, qc, tag) {
996 if (qc && (!link || qc->dev->link == link)) {
997 qc->flags |= ATA_QCFLAG_FAILED;
998 ata_qc_complete(qc);
999 nr_aborted++;
1000 }
1001 }
1002
1003 if (!nr_aborted)
1004 ata_port_schedule_eh(ap);
1005
1006 return nr_aborted;
1007 }
1008
1009 /**
1010 * ata_link_abort - abort all qc's on the link
1011 * @link: ATA link to abort qc's for
1012 *
1013 * Abort all active qc's active on @link and schedule EH.
1014 *
1015 * LOCKING:
1016 * spin_lock_irqsave(host lock)
1017 *
1018 * RETURNS:
1019 * Number of aborted qc's.
1020 */
ata_link_abort(struct ata_link * link)1021 int ata_link_abort(struct ata_link *link)
1022 {
1023 return ata_do_link_abort(link->ap, link);
1024 }
1025 EXPORT_SYMBOL_GPL(ata_link_abort);
1026
1027 /**
1028 * ata_port_abort - abort all qc's on the port
1029 * @ap: ATA port to abort qc's for
1030 *
1031 * Abort all active qc's of @ap and schedule EH.
1032 *
1033 * LOCKING:
1034 * spin_lock_irqsave(host_set lock)
1035 *
1036 * RETURNS:
1037 * Number of aborted qc's.
1038 */
ata_port_abort(struct ata_port * ap)1039 int ata_port_abort(struct ata_port *ap)
1040 {
1041 return ata_do_link_abort(ap, NULL);
1042 }
1043 EXPORT_SYMBOL_GPL(ata_port_abort);
1044
1045 /**
1046 * __ata_port_freeze - freeze port
1047 * @ap: ATA port to freeze
1048 *
1049 * This function is called when HSM violation or some other
1050 * condition disrupts normal operation of the port. Frozen port
1051 * is not allowed to perform any operation until the port is
1052 * thawed, which usually follows a successful reset.
1053 *
1054 * ap->ops->freeze() callback can be used for freezing the port
1055 * hardware-wise (e.g. mask interrupt and stop DMA engine). If a
1056 * port cannot be frozen hardware-wise, the interrupt handler
1057 * must ack and clear interrupts unconditionally while the port
1058 * is frozen.
1059 *
1060 * LOCKING:
1061 * spin_lock_irqsave(host lock)
1062 */
__ata_port_freeze(struct ata_port * ap)1063 static void __ata_port_freeze(struct ata_port *ap)
1064 {
1065 WARN_ON(!ap->ops->error_handler);
1066
1067 if (ap->ops->freeze)
1068 ap->ops->freeze(ap);
1069
1070 ap->pflags |= ATA_PFLAG_FROZEN;
1071
1072 trace_ata_port_freeze(ap);
1073 }
1074
1075 /**
1076 * ata_port_freeze - abort & freeze port
1077 * @ap: ATA port to freeze
1078 *
1079 * Abort and freeze @ap. The freeze operation must be called
1080 * first, because some hardware requires special operations
1081 * before the taskfile registers are accessible.
1082 *
1083 * LOCKING:
1084 * spin_lock_irqsave(host lock)
1085 *
1086 * RETURNS:
1087 * Number of aborted commands.
1088 */
ata_port_freeze(struct ata_port * ap)1089 int ata_port_freeze(struct ata_port *ap)
1090 {
1091 WARN_ON(!ap->ops->error_handler);
1092
1093 __ata_port_freeze(ap);
1094
1095 return ata_port_abort(ap);
1096 }
1097 EXPORT_SYMBOL_GPL(ata_port_freeze);
1098
1099 /**
1100 * ata_eh_freeze_port - EH helper to freeze port
1101 * @ap: ATA port to freeze
1102 *
1103 * Freeze @ap.
1104 *
1105 * LOCKING:
1106 * None.
1107 */
ata_eh_freeze_port(struct ata_port * ap)1108 void ata_eh_freeze_port(struct ata_port *ap)
1109 {
1110 unsigned long flags;
1111
1112 if (!ap->ops->error_handler)
1113 return;
1114
1115 spin_lock_irqsave(ap->lock, flags);
1116 __ata_port_freeze(ap);
1117 spin_unlock_irqrestore(ap->lock, flags);
1118 }
1119 EXPORT_SYMBOL_GPL(ata_eh_freeze_port);
1120
1121 /**
1122 * ata_eh_thaw_port - EH helper to thaw port
1123 * @ap: ATA port to thaw
1124 *
1125 * Thaw frozen port @ap.
1126 *
1127 * LOCKING:
1128 * None.
1129 */
ata_eh_thaw_port(struct ata_port * ap)1130 void ata_eh_thaw_port(struct ata_port *ap)
1131 {
1132 unsigned long flags;
1133
1134 if (!ap->ops->error_handler)
1135 return;
1136
1137 spin_lock_irqsave(ap->lock, flags);
1138
1139 ap->pflags &= ~ATA_PFLAG_FROZEN;
1140
1141 if (ap->ops->thaw)
1142 ap->ops->thaw(ap);
1143
1144 spin_unlock_irqrestore(ap->lock, flags);
1145
1146 trace_ata_port_thaw(ap);
1147 }
1148
ata_eh_scsidone(struct scsi_cmnd * scmd)1149 static void ata_eh_scsidone(struct scsi_cmnd *scmd)
1150 {
1151 /* nada */
1152 }
1153
__ata_eh_qc_complete(struct ata_queued_cmd * qc)1154 static void __ata_eh_qc_complete(struct ata_queued_cmd *qc)
1155 {
1156 struct ata_port *ap = qc->ap;
1157 struct scsi_cmnd *scmd = qc->scsicmd;
1158 unsigned long flags;
1159
1160 spin_lock_irqsave(ap->lock, flags);
1161 qc->scsidone = ata_eh_scsidone;
1162 __ata_qc_complete(qc);
1163 WARN_ON(ata_tag_valid(qc->tag));
1164 spin_unlock_irqrestore(ap->lock, flags);
1165
1166 scsi_eh_finish_cmd(scmd, &ap->eh_done_q);
1167 }
1168
1169 /**
1170 * ata_eh_qc_complete - Complete an active ATA command from EH
1171 * @qc: Command to complete
1172 *
1173 * Indicate to the mid and upper layers that an ATA command has
1174 * completed. To be used from EH.
1175 */
ata_eh_qc_complete(struct ata_queued_cmd * qc)1176 void ata_eh_qc_complete(struct ata_queued_cmd *qc)
1177 {
1178 struct scsi_cmnd *scmd = qc->scsicmd;
1179 scmd->retries = scmd->allowed;
1180 __ata_eh_qc_complete(qc);
1181 }
1182
1183 /**
1184 * ata_eh_qc_retry - Tell midlayer to retry an ATA command after EH
1185 * @qc: Command to retry
1186 *
1187 * Indicate to the mid and upper layers that an ATA command
1188 * should be retried. To be used from EH.
1189 *
1190 * SCSI midlayer limits the number of retries to scmd->allowed.
1191 * scmd->allowed is incremented for commands which get retried
1192 * due to unrelated failures (qc->err_mask is zero).
1193 */
ata_eh_qc_retry(struct ata_queued_cmd * qc)1194 void ata_eh_qc_retry(struct ata_queued_cmd *qc)
1195 {
1196 struct scsi_cmnd *scmd = qc->scsicmd;
1197 if (!qc->err_mask)
1198 scmd->allowed++;
1199 __ata_eh_qc_complete(qc);
1200 }
1201
1202 /**
1203 * ata_dev_disable - disable ATA device
1204 * @dev: ATA device to disable
1205 *
1206 * Disable @dev.
1207 *
1208 * Locking:
1209 * EH context.
1210 */
ata_dev_disable(struct ata_device * dev)1211 void ata_dev_disable(struct ata_device *dev)
1212 {
1213 if (!ata_dev_enabled(dev))
1214 return;
1215
1216 ata_dev_warn(dev, "disable device\n");
1217 ata_acpi_on_disable(dev);
1218 ata_down_xfermask_limit(dev, ATA_DNXFER_FORCE_PIO0 | ATA_DNXFER_QUIET);
1219 dev->class++;
1220
1221 /* From now till the next successful probe, ering is used to
1222 * track probe failures. Clear accumulated device error info.
1223 */
1224 ata_ering_clear(&dev->ering);
1225 }
1226 EXPORT_SYMBOL_GPL(ata_dev_disable);
1227
1228 /**
1229 * ata_eh_detach_dev - detach ATA device
1230 * @dev: ATA device to detach
1231 *
1232 * Detach @dev.
1233 *
1234 * LOCKING:
1235 * None.
1236 */
ata_eh_detach_dev(struct ata_device * dev)1237 void ata_eh_detach_dev(struct ata_device *dev)
1238 {
1239 struct ata_link *link = dev->link;
1240 struct ata_port *ap = link->ap;
1241 struct ata_eh_context *ehc = &link->eh_context;
1242 unsigned long flags;
1243
1244 ata_dev_disable(dev);
1245
1246 spin_lock_irqsave(ap->lock, flags);
1247
1248 dev->flags &= ~ATA_DFLAG_DETACH;
1249
1250 if (ata_scsi_offline_dev(dev)) {
1251 dev->flags |= ATA_DFLAG_DETACHED;
1252 ap->pflags |= ATA_PFLAG_SCSI_HOTPLUG;
1253 }
1254
1255 /* clear per-dev EH info */
1256 ata_eh_clear_action(link, dev, &link->eh_info, ATA_EH_PERDEV_MASK);
1257 ata_eh_clear_action(link, dev, &link->eh_context.i, ATA_EH_PERDEV_MASK);
1258 ehc->saved_xfer_mode[dev->devno] = 0;
1259 ehc->saved_ncq_enabled &= ~(1 << dev->devno);
1260
1261 spin_unlock_irqrestore(ap->lock, flags);
1262 }
1263
1264 /**
1265 * ata_eh_about_to_do - about to perform eh_action
1266 * @link: target ATA link
1267 * @dev: target ATA dev for per-dev action (can be NULL)
1268 * @action: action about to be performed
1269 *
1270 * Called just before performing EH actions to clear related bits
1271 * in @link->eh_info such that eh actions are not unnecessarily
1272 * repeated.
1273 *
1274 * LOCKING:
1275 * None.
1276 */
ata_eh_about_to_do(struct ata_link * link,struct ata_device * dev,unsigned int action)1277 void ata_eh_about_to_do(struct ata_link *link, struct ata_device *dev,
1278 unsigned int action)
1279 {
1280 struct ata_port *ap = link->ap;
1281 struct ata_eh_info *ehi = &link->eh_info;
1282 struct ata_eh_context *ehc = &link->eh_context;
1283 unsigned long flags;
1284
1285 trace_ata_eh_about_to_do(link, dev ? dev->devno : 0, action);
1286
1287 spin_lock_irqsave(ap->lock, flags);
1288
1289 ata_eh_clear_action(link, dev, ehi, action);
1290
1291 /* About to take EH action, set RECOVERED. Ignore actions on
1292 * slave links as master will do them again.
1293 */
1294 if (!(ehc->i.flags & ATA_EHI_QUIET) && link != ap->slave_link)
1295 ap->pflags |= ATA_PFLAG_RECOVERED;
1296
1297 spin_unlock_irqrestore(ap->lock, flags);
1298 }
1299
1300 /**
1301 * ata_eh_done - EH action complete
1302 * @link: ATA link for which EH actions are complete
1303 * @dev: target ATA dev for per-dev action (can be NULL)
1304 * @action: action just completed
1305 *
1306 * Called right after performing EH actions to clear related bits
1307 * in @link->eh_context.
1308 *
1309 * LOCKING:
1310 * None.
1311 */
ata_eh_done(struct ata_link * link,struct ata_device * dev,unsigned int action)1312 void ata_eh_done(struct ata_link *link, struct ata_device *dev,
1313 unsigned int action)
1314 {
1315 struct ata_eh_context *ehc = &link->eh_context;
1316
1317 trace_ata_eh_done(link, dev ? dev->devno : 0, action);
1318
1319 ata_eh_clear_action(link, dev, &ehc->i, action);
1320 }
1321
1322 /**
1323 * ata_err_string - convert err_mask to descriptive string
1324 * @err_mask: error mask to convert to string
1325 *
1326 * Convert @err_mask to descriptive string. Errors are
1327 * prioritized according to severity and only the most severe
1328 * error is reported.
1329 *
1330 * LOCKING:
1331 * None.
1332 *
1333 * RETURNS:
1334 * Descriptive string for @err_mask
1335 */
ata_err_string(unsigned int err_mask)1336 static const char *ata_err_string(unsigned int err_mask)
1337 {
1338 if (err_mask & AC_ERR_HOST_BUS)
1339 return "host bus error";
1340 if (err_mask & AC_ERR_ATA_BUS)
1341 return "ATA bus error";
1342 if (err_mask & AC_ERR_TIMEOUT)
1343 return "timeout";
1344 if (err_mask & AC_ERR_HSM)
1345 return "HSM violation";
1346 if (err_mask & AC_ERR_SYSTEM)
1347 return "internal error";
1348 if (err_mask & AC_ERR_MEDIA)
1349 return "media error";
1350 if (err_mask & AC_ERR_INVALID)
1351 return "invalid argument";
1352 if (err_mask & AC_ERR_DEV)
1353 return "device error";
1354 if (err_mask & AC_ERR_NCQ)
1355 return "NCQ error";
1356 if (err_mask & AC_ERR_NODEV_HINT)
1357 return "Polling detection error";
1358 return "unknown error";
1359 }
1360
1361 /**
1362 * atapi_eh_tur - perform ATAPI TEST_UNIT_READY
1363 * @dev: target ATAPI device
1364 * @r_sense_key: out parameter for sense_key
1365 *
1366 * Perform ATAPI TEST_UNIT_READY.
1367 *
1368 * LOCKING:
1369 * EH context (may sleep).
1370 *
1371 * RETURNS:
1372 * 0 on success, AC_ERR_* mask on failure.
1373 */
atapi_eh_tur(struct ata_device * dev,u8 * r_sense_key)1374 unsigned int atapi_eh_tur(struct ata_device *dev, u8 *r_sense_key)
1375 {
1376 u8 cdb[ATAPI_CDB_LEN] = { TEST_UNIT_READY, 0, 0, 0, 0, 0 };
1377 struct ata_taskfile tf;
1378 unsigned int err_mask;
1379
1380 ata_tf_init(dev, &tf);
1381
1382 tf.flags |= ATA_TFLAG_ISADDR | ATA_TFLAG_DEVICE;
1383 tf.command = ATA_CMD_PACKET;
1384 tf.protocol = ATAPI_PROT_NODATA;
1385
1386 err_mask = ata_exec_internal(dev, &tf, cdb, DMA_NONE, NULL, 0, 0);
1387 if (err_mask == AC_ERR_DEV)
1388 *r_sense_key = tf.error >> 4;
1389 return err_mask;
1390 }
1391
1392 /**
1393 * ata_eh_request_sense - perform REQUEST_SENSE_DATA_EXT
1394 * @qc: qc to perform REQUEST_SENSE_SENSE_DATA_EXT to
1395 *
1396 * Perform REQUEST_SENSE_DATA_EXT after the device reported CHECK
1397 * SENSE. This function is an EH helper.
1398 *
1399 * LOCKING:
1400 * Kernel thread context (may sleep).
1401 */
ata_eh_request_sense(struct ata_queued_cmd * qc)1402 static void ata_eh_request_sense(struct ata_queued_cmd *qc)
1403 {
1404 struct scsi_cmnd *cmd = qc->scsicmd;
1405 struct ata_device *dev = qc->dev;
1406 struct ata_taskfile tf;
1407 unsigned int err_mask;
1408
1409 if (qc->ap->pflags & ATA_PFLAG_FROZEN) {
1410 ata_dev_warn(dev, "sense data available but port frozen\n");
1411 return;
1412 }
1413
1414 if (!cmd || qc->flags & ATA_QCFLAG_SENSE_VALID)
1415 return;
1416
1417 if (!ata_id_sense_reporting_enabled(dev->id)) {
1418 ata_dev_warn(qc->dev, "sense data reporting disabled\n");
1419 return;
1420 }
1421
1422 ata_tf_init(dev, &tf);
1423 tf.flags |= ATA_TFLAG_ISADDR | ATA_TFLAG_DEVICE;
1424 tf.flags |= ATA_TFLAG_LBA | ATA_TFLAG_LBA48;
1425 tf.command = ATA_CMD_REQ_SENSE_DATA;
1426 tf.protocol = ATA_PROT_NODATA;
1427
1428 err_mask = ata_exec_internal(dev, &tf, NULL, DMA_NONE, NULL, 0, 0);
1429 /* Ignore err_mask; ATA_ERR might be set */
1430 if (tf.status & ATA_SENSE) {
1431 ata_scsi_set_sense(dev, cmd, tf.lbah, tf.lbam, tf.lbal);
1432 qc->flags |= ATA_QCFLAG_SENSE_VALID;
1433 } else {
1434 ata_dev_warn(dev, "request sense failed stat %02x emask %x\n",
1435 tf.status, err_mask);
1436 }
1437 }
1438
1439 /**
1440 * atapi_eh_request_sense - perform ATAPI REQUEST_SENSE
1441 * @dev: device to perform REQUEST_SENSE to
1442 * @sense_buf: result sense data buffer (SCSI_SENSE_BUFFERSIZE bytes long)
1443 * @dfl_sense_key: default sense key to use
1444 *
1445 * Perform ATAPI REQUEST_SENSE after the device reported CHECK
1446 * SENSE. This function is EH helper.
1447 *
1448 * LOCKING:
1449 * Kernel thread context (may sleep).
1450 *
1451 * RETURNS:
1452 * 0 on success, AC_ERR_* mask on failure
1453 */
atapi_eh_request_sense(struct ata_device * dev,u8 * sense_buf,u8 dfl_sense_key)1454 unsigned int atapi_eh_request_sense(struct ata_device *dev,
1455 u8 *sense_buf, u8 dfl_sense_key)
1456 {
1457 u8 cdb[ATAPI_CDB_LEN] =
1458 { REQUEST_SENSE, 0, 0, 0, SCSI_SENSE_BUFFERSIZE, 0 };
1459 struct ata_port *ap = dev->link->ap;
1460 struct ata_taskfile tf;
1461
1462 memset(sense_buf, 0, SCSI_SENSE_BUFFERSIZE);
1463
1464 /* initialize sense_buf with the error register,
1465 * for the case where they are -not- overwritten
1466 */
1467 sense_buf[0] = 0x70;
1468 sense_buf[2] = dfl_sense_key;
1469
1470 /* some devices time out if garbage left in tf */
1471 ata_tf_init(dev, &tf);
1472
1473 tf.flags |= ATA_TFLAG_ISADDR | ATA_TFLAG_DEVICE;
1474 tf.command = ATA_CMD_PACKET;
1475
1476 /* is it pointless to prefer PIO for "safety reasons"? */
1477 if (ap->flags & ATA_FLAG_PIO_DMA) {
1478 tf.protocol = ATAPI_PROT_DMA;
1479 tf.feature |= ATAPI_PKT_DMA;
1480 } else {
1481 tf.protocol = ATAPI_PROT_PIO;
1482 tf.lbam = SCSI_SENSE_BUFFERSIZE;
1483 tf.lbah = 0;
1484 }
1485
1486 return ata_exec_internal(dev, &tf, cdb, DMA_FROM_DEVICE,
1487 sense_buf, SCSI_SENSE_BUFFERSIZE, 0);
1488 }
1489
1490 /**
1491 * ata_eh_analyze_serror - analyze SError for a failed port
1492 * @link: ATA link to analyze SError for
1493 *
1494 * Analyze SError if available and further determine cause of
1495 * failure.
1496 *
1497 * LOCKING:
1498 * None.
1499 */
ata_eh_analyze_serror(struct ata_link * link)1500 static void ata_eh_analyze_serror(struct ata_link *link)
1501 {
1502 struct ata_eh_context *ehc = &link->eh_context;
1503 u32 serror = ehc->i.serror;
1504 unsigned int err_mask = 0, action = 0;
1505 u32 hotplug_mask;
1506
1507 if (serror & (SERR_PERSISTENT | SERR_DATA)) {
1508 err_mask |= AC_ERR_ATA_BUS;
1509 action |= ATA_EH_RESET;
1510 }
1511 if (serror & SERR_PROTOCOL) {
1512 err_mask |= AC_ERR_HSM;
1513 action |= ATA_EH_RESET;
1514 }
1515 if (serror & SERR_INTERNAL) {
1516 err_mask |= AC_ERR_SYSTEM;
1517 action |= ATA_EH_RESET;
1518 }
1519
1520 /* Determine whether a hotplug event has occurred. Both
1521 * SError.N/X are considered hotplug events for enabled or
1522 * host links. For disabled PMP links, only N bit is
1523 * considered as X bit is left at 1 for link plugging.
1524 */
1525 if (link->lpm_policy > ATA_LPM_MAX_POWER)
1526 hotplug_mask = 0; /* hotplug doesn't work w/ LPM */
1527 else if (!(link->flags & ATA_LFLAG_DISABLED) || ata_is_host_link(link))
1528 hotplug_mask = SERR_PHYRDY_CHG | SERR_DEV_XCHG;
1529 else
1530 hotplug_mask = SERR_PHYRDY_CHG;
1531
1532 if (serror & hotplug_mask)
1533 ata_ehi_hotplugged(&ehc->i);
1534
1535 ehc->i.err_mask |= err_mask;
1536 ehc->i.action |= action;
1537 }
1538
1539 /**
1540 * ata_eh_analyze_tf - analyze taskfile of a failed qc
1541 * @qc: qc to analyze
1542 *
1543 * Analyze taskfile of @qc and further determine cause of
1544 * failure. This function also requests ATAPI sense data if
1545 * available.
1546 *
1547 * LOCKING:
1548 * Kernel thread context (may sleep).
1549 *
1550 * RETURNS:
1551 * Determined recovery action
1552 */
ata_eh_analyze_tf(struct ata_queued_cmd * qc)1553 static unsigned int ata_eh_analyze_tf(struct ata_queued_cmd *qc)
1554 {
1555 const struct ata_taskfile *tf = &qc->result_tf;
1556 unsigned int tmp, action = 0;
1557 u8 stat = tf->status, err = tf->error;
1558
1559 if ((stat & (ATA_BUSY | ATA_DRQ | ATA_DRDY)) != ATA_DRDY) {
1560 qc->err_mask |= AC_ERR_HSM;
1561 return ATA_EH_RESET;
1562 }
1563
1564 if (stat & (ATA_ERR | ATA_DF)) {
1565 qc->err_mask |= AC_ERR_DEV;
1566 /*
1567 * Sense data reporting does not work if the
1568 * device fault bit is set.
1569 */
1570 if (stat & ATA_DF)
1571 stat &= ~ATA_SENSE;
1572 } else {
1573 return 0;
1574 }
1575
1576 switch (qc->dev->class) {
1577 case ATA_DEV_ZAC:
1578 if (stat & ATA_SENSE)
1579 ata_eh_request_sense(qc);
1580 fallthrough;
1581 case ATA_DEV_ATA:
1582 if (err & ATA_ICRC)
1583 qc->err_mask |= AC_ERR_ATA_BUS;
1584 if (err & (ATA_UNC | ATA_AMNF))
1585 qc->err_mask |= AC_ERR_MEDIA;
1586 if (err & ATA_IDNF)
1587 qc->err_mask |= AC_ERR_INVALID;
1588 break;
1589
1590 case ATA_DEV_ATAPI:
1591 if (!(qc->ap->pflags & ATA_PFLAG_FROZEN)) {
1592 tmp = atapi_eh_request_sense(qc->dev,
1593 qc->scsicmd->sense_buffer,
1594 qc->result_tf.error >> 4);
1595 if (!tmp)
1596 qc->flags |= ATA_QCFLAG_SENSE_VALID;
1597 else
1598 qc->err_mask |= tmp;
1599 }
1600 }
1601
1602 if (qc->flags & ATA_QCFLAG_SENSE_VALID) {
1603 enum scsi_disposition ret = scsi_check_sense(qc->scsicmd);
1604 /*
1605 * SUCCESS here means that the sense code could be
1606 * evaluated and should be passed to the upper layers
1607 * for correct evaluation.
1608 * FAILED means the sense code could not be interpreted
1609 * and the device would need to be reset.
1610 * NEEDS_RETRY and ADD_TO_MLQUEUE means that the
1611 * command would need to be retried.
1612 */
1613 if (ret == NEEDS_RETRY || ret == ADD_TO_MLQUEUE) {
1614 qc->flags |= ATA_QCFLAG_RETRY;
1615 qc->err_mask |= AC_ERR_OTHER;
1616 } else if (ret != SUCCESS) {
1617 qc->err_mask |= AC_ERR_HSM;
1618 }
1619 }
1620 if (qc->err_mask & (AC_ERR_HSM | AC_ERR_TIMEOUT | AC_ERR_ATA_BUS))
1621 action |= ATA_EH_RESET;
1622
1623 return action;
1624 }
1625
ata_eh_categorize_error(unsigned int eflags,unsigned int err_mask,int * xfer_ok)1626 static int ata_eh_categorize_error(unsigned int eflags, unsigned int err_mask,
1627 int *xfer_ok)
1628 {
1629 int base = 0;
1630
1631 if (!(eflags & ATA_EFLAG_DUBIOUS_XFER))
1632 *xfer_ok = 1;
1633
1634 if (!*xfer_ok)
1635 base = ATA_ECAT_DUBIOUS_NONE;
1636
1637 if (err_mask & AC_ERR_ATA_BUS)
1638 return base + ATA_ECAT_ATA_BUS;
1639
1640 if (err_mask & AC_ERR_TIMEOUT)
1641 return base + ATA_ECAT_TOUT_HSM;
1642
1643 if (eflags & ATA_EFLAG_IS_IO) {
1644 if (err_mask & AC_ERR_HSM)
1645 return base + ATA_ECAT_TOUT_HSM;
1646 if ((err_mask &
1647 (AC_ERR_DEV|AC_ERR_MEDIA|AC_ERR_INVALID)) == AC_ERR_DEV)
1648 return base + ATA_ECAT_UNK_DEV;
1649 }
1650
1651 return 0;
1652 }
1653
1654 struct speed_down_verdict_arg {
1655 u64 since;
1656 int xfer_ok;
1657 int nr_errors[ATA_ECAT_NR];
1658 };
1659
speed_down_verdict_cb(struct ata_ering_entry * ent,void * void_arg)1660 static int speed_down_verdict_cb(struct ata_ering_entry *ent, void *void_arg)
1661 {
1662 struct speed_down_verdict_arg *arg = void_arg;
1663 int cat;
1664
1665 if ((ent->eflags & ATA_EFLAG_OLD_ER) || (ent->timestamp < arg->since))
1666 return -1;
1667
1668 cat = ata_eh_categorize_error(ent->eflags, ent->err_mask,
1669 &arg->xfer_ok);
1670 arg->nr_errors[cat]++;
1671
1672 return 0;
1673 }
1674
1675 /**
1676 * ata_eh_speed_down_verdict - Determine speed down verdict
1677 * @dev: Device of interest
1678 *
1679 * This function examines error ring of @dev and determines
1680 * whether NCQ needs to be turned off, transfer speed should be
1681 * stepped down, or falling back to PIO is necessary.
1682 *
1683 * ECAT_ATA_BUS : ATA_BUS error for any command
1684 *
1685 * ECAT_TOUT_HSM : TIMEOUT for any command or HSM violation for
1686 * IO commands
1687 *
1688 * ECAT_UNK_DEV : Unknown DEV error for IO commands
1689 *
1690 * ECAT_DUBIOUS_* : Identical to above three but occurred while
1691 * data transfer hasn't been verified.
1692 *
1693 * Verdicts are
1694 *
1695 * NCQ_OFF : Turn off NCQ.
1696 *
1697 * SPEED_DOWN : Speed down transfer speed but don't fall back
1698 * to PIO.
1699 *
1700 * FALLBACK_TO_PIO : Fall back to PIO.
1701 *
1702 * Even if multiple verdicts are returned, only one action is
1703 * taken per error. An action triggered by non-DUBIOUS errors
1704 * clears ering, while one triggered by DUBIOUS_* errors doesn't.
1705 * This is to expedite speed down decisions right after device is
1706 * initially configured.
1707 *
1708 * The following are speed down rules. #1 and #2 deal with
1709 * DUBIOUS errors.
1710 *
1711 * 1. If more than one DUBIOUS_ATA_BUS or DUBIOUS_TOUT_HSM errors
1712 * occurred during last 5 mins, SPEED_DOWN and FALLBACK_TO_PIO.
1713 *
1714 * 2. If more than one DUBIOUS_TOUT_HSM or DUBIOUS_UNK_DEV errors
1715 * occurred during last 5 mins, NCQ_OFF.
1716 *
1717 * 3. If more than 8 ATA_BUS, TOUT_HSM or UNK_DEV errors
1718 * occurred during last 5 mins, FALLBACK_TO_PIO
1719 *
1720 * 4. If more than 3 TOUT_HSM or UNK_DEV errors occurred
1721 * during last 10 mins, NCQ_OFF.
1722 *
1723 * 5. If more than 3 ATA_BUS or TOUT_HSM errors, or more than 6
1724 * UNK_DEV errors occurred during last 10 mins, SPEED_DOWN.
1725 *
1726 * LOCKING:
1727 * Inherited from caller.
1728 *
1729 * RETURNS:
1730 * OR of ATA_EH_SPDN_* flags.
1731 */
ata_eh_speed_down_verdict(struct ata_device * dev)1732 static unsigned int ata_eh_speed_down_verdict(struct ata_device *dev)
1733 {
1734 const u64 j5mins = 5LLU * 60 * HZ, j10mins = 10LLU * 60 * HZ;
1735 u64 j64 = get_jiffies_64();
1736 struct speed_down_verdict_arg arg;
1737 unsigned int verdict = 0;
1738
1739 /* scan past 5 mins of error history */
1740 memset(&arg, 0, sizeof(arg));
1741 arg.since = j64 - min(j64, j5mins);
1742 ata_ering_map(&dev->ering, speed_down_verdict_cb, &arg);
1743
1744 if (arg.nr_errors[ATA_ECAT_DUBIOUS_ATA_BUS] +
1745 arg.nr_errors[ATA_ECAT_DUBIOUS_TOUT_HSM] > 1)
1746 verdict |= ATA_EH_SPDN_SPEED_DOWN |
1747 ATA_EH_SPDN_FALLBACK_TO_PIO | ATA_EH_SPDN_KEEP_ERRORS;
1748
1749 if (arg.nr_errors[ATA_ECAT_DUBIOUS_TOUT_HSM] +
1750 arg.nr_errors[ATA_ECAT_DUBIOUS_UNK_DEV] > 1)
1751 verdict |= ATA_EH_SPDN_NCQ_OFF | ATA_EH_SPDN_KEEP_ERRORS;
1752
1753 if (arg.nr_errors[ATA_ECAT_ATA_BUS] +
1754 arg.nr_errors[ATA_ECAT_TOUT_HSM] +
1755 arg.nr_errors[ATA_ECAT_UNK_DEV] > 6)
1756 verdict |= ATA_EH_SPDN_FALLBACK_TO_PIO;
1757
1758 /* scan past 10 mins of error history */
1759 memset(&arg, 0, sizeof(arg));
1760 arg.since = j64 - min(j64, j10mins);
1761 ata_ering_map(&dev->ering, speed_down_verdict_cb, &arg);
1762
1763 if (arg.nr_errors[ATA_ECAT_TOUT_HSM] +
1764 arg.nr_errors[ATA_ECAT_UNK_DEV] > 3)
1765 verdict |= ATA_EH_SPDN_NCQ_OFF;
1766
1767 if (arg.nr_errors[ATA_ECAT_ATA_BUS] +
1768 arg.nr_errors[ATA_ECAT_TOUT_HSM] > 3 ||
1769 arg.nr_errors[ATA_ECAT_UNK_DEV] > 6)
1770 verdict |= ATA_EH_SPDN_SPEED_DOWN;
1771
1772 return verdict;
1773 }
1774
1775 /**
1776 * ata_eh_speed_down - record error and speed down if necessary
1777 * @dev: Failed device
1778 * @eflags: mask of ATA_EFLAG_* flags
1779 * @err_mask: err_mask of the error
1780 *
1781 * Record error and examine error history to determine whether
1782 * adjusting transmission speed is necessary. It also sets
1783 * transmission limits appropriately if such adjustment is
1784 * necessary.
1785 *
1786 * LOCKING:
1787 * Kernel thread context (may sleep).
1788 *
1789 * RETURNS:
1790 * Determined recovery action.
1791 */
ata_eh_speed_down(struct ata_device * dev,unsigned int eflags,unsigned int err_mask)1792 static unsigned int ata_eh_speed_down(struct ata_device *dev,
1793 unsigned int eflags, unsigned int err_mask)
1794 {
1795 struct ata_link *link = ata_dev_phys_link(dev);
1796 int xfer_ok = 0;
1797 unsigned int verdict;
1798 unsigned int action = 0;
1799
1800 /* don't bother if Cat-0 error */
1801 if (ata_eh_categorize_error(eflags, err_mask, &xfer_ok) == 0)
1802 return 0;
1803
1804 /* record error and determine whether speed down is necessary */
1805 ata_ering_record(&dev->ering, eflags, err_mask);
1806 verdict = ata_eh_speed_down_verdict(dev);
1807
1808 /* turn off NCQ? */
1809 if ((verdict & ATA_EH_SPDN_NCQ_OFF) &&
1810 (dev->flags & (ATA_DFLAG_PIO | ATA_DFLAG_NCQ |
1811 ATA_DFLAG_NCQ_OFF)) == ATA_DFLAG_NCQ) {
1812 dev->flags |= ATA_DFLAG_NCQ_OFF;
1813 ata_dev_warn(dev, "NCQ disabled due to excessive errors\n");
1814 goto done;
1815 }
1816
1817 /* speed down? */
1818 if (verdict & ATA_EH_SPDN_SPEED_DOWN) {
1819 /* speed down SATA link speed if possible */
1820 if (sata_down_spd_limit(link, 0) == 0) {
1821 action |= ATA_EH_RESET;
1822 goto done;
1823 }
1824
1825 /* lower transfer mode */
1826 if (dev->spdn_cnt < 2) {
1827 static const int dma_dnxfer_sel[] =
1828 { ATA_DNXFER_DMA, ATA_DNXFER_40C };
1829 static const int pio_dnxfer_sel[] =
1830 { ATA_DNXFER_PIO, ATA_DNXFER_FORCE_PIO0 };
1831 int sel;
1832
1833 if (dev->xfer_shift != ATA_SHIFT_PIO)
1834 sel = dma_dnxfer_sel[dev->spdn_cnt];
1835 else
1836 sel = pio_dnxfer_sel[dev->spdn_cnt];
1837
1838 dev->spdn_cnt++;
1839
1840 if (ata_down_xfermask_limit(dev, sel) == 0) {
1841 action |= ATA_EH_RESET;
1842 goto done;
1843 }
1844 }
1845 }
1846
1847 /* Fall back to PIO? Slowing down to PIO is meaningless for
1848 * SATA ATA devices. Consider it only for PATA and SATAPI.
1849 */
1850 if ((verdict & ATA_EH_SPDN_FALLBACK_TO_PIO) && (dev->spdn_cnt >= 2) &&
1851 (link->ap->cbl != ATA_CBL_SATA || dev->class == ATA_DEV_ATAPI) &&
1852 (dev->xfer_shift != ATA_SHIFT_PIO)) {
1853 if (ata_down_xfermask_limit(dev, ATA_DNXFER_FORCE_PIO) == 0) {
1854 dev->spdn_cnt = 0;
1855 action |= ATA_EH_RESET;
1856 goto done;
1857 }
1858 }
1859
1860 return 0;
1861 done:
1862 /* device has been slowed down, blow error history */
1863 if (!(verdict & ATA_EH_SPDN_KEEP_ERRORS))
1864 ata_ering_clear(&dev->ering);
1865 return action;
1866 }
1867
1868 /**
1869 * ata_eh_worth_retry - analyze error and decide whether to retry
1870 * @qc: qc to possibly retry
1871 *
1872 * Look at the cause of the error and decide if a retry
1873 * might be useful or not. We don't want to retry media errors
1874 * because the drive itself has probably already taken 10-30 seconds
1875 * doing its own internal retries before reporting the failure.
1876 */
ata_eh_worth_retry(struct ata_queued_cmd * qc)1877 static inline int ata_eh_worth_retry(struct ata_queued_cmd *qc)
1878 {
1879 if (qc->err_mask & AC_ERR_MEDIA)
1880 return 0; /* don't retry media errors */
1881 if (qc->flags & ATA_QCFLAG_IO)
1882 return 1; /* otherwise retry anything from fs stack */
1883 if (qc->err_mask & AC_ERR_INVALID)
1884 return 0; /* don't retry these */
1885 return qc->err_mask != AC_ERR_DEV; /* retry if not dev error */
1886 }
1887
1888 /**
1889 * ata_eh_quiet - check if we need to be quiet about a command error
1890 * @qc: qc to check
1891 *
1892 * Look at the qc flags anbd its scsi command request flags to determine
1893 * if we need to be quiet about the command failure.
1894 */
ata_eh_quiet(struct ata_queued_cmd * qc)1895 static inline bool ata_eh_quiet(struct ata_queued_cmd *qc)
1896 {
1897 if (qc->scsicmd && scsi_cmd_to_rq(qc->scsicmd)->rq_flags & RQF_QUIET)
1898 qc->flags |= ATA_QCFLAG_QUIET;
1899 return qc->flags & ATA_QCFLAG_QUIET;
1900 }
1901
1902 /**
1903 * ata_eh_link_autopsy - analyze error and determine recovery action
1904 * @link: host link to perform autopsy on
1905 *
1906 * Analyze why @link failed and determine which recovery actions
1907 * are needed. This function also sets more detailed AC_ERR_*
1908 * values and fills sense data for ATAPI CHECK SENSE.
1909 *
1910 * LOCKING:
1911 * Kernel thread context (may sleep).
1912 */
ata_eh_link_autopsy(struct ata_link * link)1913 static void ata_eh_link_autopsy(struct ata_link *link)
1914 {
1915 struct ata_port *ap = link->ap;
1916 struct ata_eh_context *ehc = &link->eh_context;
1917 struct ata_queued_cmd *qc;
1918 struct ata_device *dev;
1919 unsigned int all_err_mask = 0, eflags = 0;
1920 int tag, nr_failed = 0, nr_quiet = 0;
1921 u32 serror;
1922 int rc;
1923
1924 if (ehc->i.flags & ATA_EHI_NO_AUTOPSY)
1925 return;
1926
1927 /* obtain and analyze SError */
1928 rc = sata_scr_read(link, SCR_ERROR, &serror);
1929 if (rc == 0) {
1930 ehc->i.serror |= serror;
1931 ata_eh_analyze_serror(link);
1932 } else if (rc != -EOPNOTSUPP) {
1933 /* SError read failed, force reset and probing */
1934 ehc->i.probe_mask |= ATA_ALL_DEVICES;
1935 ehc->i.action |= ATA_EH_RESET;
1936 ehc->i.err_mask |= AC_ERR_OTHER;
1937 }
1938
1939 /* analyze NCQ failure */
1940 ata_eh_analyze_ncq_error(link);
1941
1942 /* any real error trumps AC_ERR_OTHER */
1943 if (ehc->i.err_mask & ~AC_ERR_OTHER)
1944 ehc->i.err_mask &= ~AC_ERR_OTHER;
1945
1946 all_err_mask |= ehc->i.err_mask;
1947
1948 ata_qc_for_each_raw(ap, qc, tag) {
1949 if (!(qc->flags & ATA_QCFLAG_FAILED) ||
1950 ata_dev_phys_link(qc->dev) != link)
1951 continue;
1952
1953 /* inherit upper level err_mask */
1954 qc->err_mask |= ehc->i.err_mask;
1955
1956 /* analyze TF */
1957 ehc->i.action |= ata_eh_analyze_tf(qc);
1958
1959 /* DEV errors are probably spurious in case of ATA_BUS error */
1960 if (qc->err_mask & AC_ERR_ATA_BUS)
1961 qc->err_mask &= ~(AC_ERR_DEV | AC_ERR_MEDIA |
1962 AC_ERR_INVALID);
1963
1964 /* any real error trumps unknown error */
1965 if (qc->err_mask & ~AC_ERR_OTHER)
1966 qc->err_mask &= ~AC_ERR_OTHER;
1967
1968 /*
1969 * SENSE_VALID trumps dev/unknown error and revalidation. Upper
1970 * layers will determine whether the command is worth retrying
1971 * based on the sense data and device class/type. Otherwise,
1972 * determine directly if the command is worth retrying using its
1973 * error mask and flags.
1974 */
1975 if (qc->flags & ATA_QCFLAG_SENSE_VALID)
1976 qc->err_mask &= ~(AC_ERR_DEV | AC_ERR_OTHER);
1977 else if (ata_eh_worth_retry(qc))
1978 qc->flags |= ATA_QCFLAG_RETRY;
1979
1980 /* accumulate error info */
1981 ehc->i.dev = qc->dev;
1982 all_err_mask |= qc->err_mask;
1983 if (qc->flags & ATA_QCFLAG_IO)
1984 eflags |= ATA_EFLAG_IS_IO;
1985 trace_ata_eh_link_autopsy_qc(qc);
1986
1987 /* Count quiet errors */
1988 if (ata_eh_quiet(qc))
1989 nr_quiet++;
1990 nr_failed++;
1991 }
1992
1993 /* If all failed commands requested silence, then be quiet */
1994 if (nr_quiet == nr_failed)
1995 ehc->i.flags |= ATA_EHI_QUIET;
1996
1997 /* enforce default EH actions */
1998 if (ap->pflags & ATA_PFLAG_FROZEN ||
1999 all_err_mask & (AC_ERR_HSM | AC_ERR_TIMEOUT))
2000 ehc->i.action |= ATA_EH_RESET;
2001 else if (((eflags & ATA_EFLAG_IS_IO) && all_err_mask) ||
2002 (!(eflags & ATA_EFLAG_IS_IO) && (all_err_mask & ~AC_ERR_DEV)))
2003 ehc->i.action |= ATA_EH_REVALIDATE;
2004
2005 /* If we have offending qcs and the associated failed device,
2006 * perform per-dev EH action only on the offending device.
2007 */
2008 if (ehc->i.dev) {
2009 ehc->i.dev_action[ehc->i.dev->devno] |=
2010 ehc->i.action & ATA_EH_PERDEV_MASK;
2011 ehc->i.action &= ~ATA_EH_PERDEV_MASK;
2012 }
2013
2014 /* propagate timeout to host link */
2015 if ((all_err_mask & AC_ERR_TIMEOUT) && !ata_is_host_link(link))
2016 ap->link.eh_context.i.err_mask |= AC_ERR_TIMEOUT;
2017
2018 /* record error and consider speeding down */
2019 dev = ehc->i.dev;
2020 if (!dev && ((ata_link_max_devices(link) == 1 &&
2021 ata_dev_enabled(link->device))))
2022 dev = link->device;
2023
2024 if (dev) {
2025 if (dev->flags & ATA_DFLAG_DUBIOUS_XFER)
2026 eflags |= ATA_EFLAG_DUBIOUS_XFER;
2027 ehc->i.action |= ata_eh_speed_down(dev, eflags, all_err_mask);
2028 trace_ata_eh_link_autopsy(dev, ehc->i.action, all_err_mask);
2029 }
2030 }
2031
2032 /**
2033 * ata_eh_autopsy - analyze error and determine recovery action
2034 * @ap: host port to perform autopsy on
2035 *
2036 * Analyze all links of @ap and determine why they failed and
2037 * which recovery actions are needed.
2038 *
2039 * LOCKING:
2040 * Kernel thread context (may sleep).
2041 */
ata_eh_autopsy(struct ata_port * ap)2042 void ata_eh_autopsy(struct ata_port *ap)
2043 {
2044 struct ata_link *link;
2045
2046 ata_for_each_link(link, ap, EDGE)
2047 ata_eh_link_autopsy(link);
2048
2049 /* Handle the frigging slave link. Autopsy is done similarly
2050 * but actions and flags are transferred over to the master
2051 * link and handled from there.
2052 */
2053 if (ap->slave_link) {
2054 struct ata_eh_context *mehc = &ap->link.eh_context;
2055 struct ata_eh_context *sehc = &ap->slave_link->eh_context;
2056
2057 /* transfer control flags from master to slave */
2058 sehc->i.flags |= mehc->i.flags & ATA_EHI_TO_SLAVE_MASK;
2059
2060 /* perform autopsy on the slave link */
2061 ata_eh_link_autopsy(ap->slave_link);
2062
2063 /* transfer actions from slave to master and clear slave */
2064 ata_eh_about_to_do(ap->slave_link, NULL, ATA_EH_ALL_ACTIONS);
2065 mehc->i.action |= sehc->i.action;
2066 mehc->i.dev_action[1] |= sehc->i.dev_action[1];
2067 mehc->i.flags |= sehc->i.flags;
2068 ata_eh_done(ap->slave_link, NULL, ATA_EH_ALL_ACTIONS);
2069 }
2070
2071 /* Autopsy of fanout ports can affect host link autopsy.
2072 * Perform host link autopsy last.
2073 */
2074 if (sata_pmp_attached(ap))
2075 ata_eh_link_autopsy(&ap->link);
2076 }
2077
2078 /**
2079 * ata_get_cmd_name - get name for ATA command
2080 * @command: ATA command code to get name for
2081 *
2082 * Return a textual name of the given command or "unknown"
2083 *
2084 * LOCKING:
2085 * None
2086 */
ata_get_cmd_name(u8 command)2087 const char *ata_get_cmd_name(u8 command)
2088 {
2089 #ifdef CONFIG_ATA_VERBOSE_ERROR
2090 static const struct
2091 {
2092 u8 command;
2093 const char *text;
2094 } cmd_descr[] = {
2095 { ATA_CMD_DEV_RESET, "DEVICE RESET" },
2096 { ATA_CMD_CHK_POWER, "CHECK POWER MODE" },
2097 { ATA_CMD_STANDBY, "STANDBY" },
2098 { ATA_CMD_IDLE, "IDLE" },
2099 { ATA_CMD_EDD, "EXECUTE DEVICE DIAGNOSTIC" },
2100 { ATA_CMD_DOWNLOAD_MICRO, "DOWNLOAD MICROCODE" },
2101 { ATA_CMD_DOWNLOAD_MICRO_DMA, "DOWNLOAD MICROCODE DMA" },
2102 { ATA_CMD_NOP, "NOP" },
2103 { ATA_CMD_FLUSH, "FLUSH CACHE" },
2104 { ATA_CMD_FLUSH_EXT, "FLUSH CACHE EXT" },
2105 { ATA_CMD_ID_ATA, "IDENTIFY DEVICE" },
2106 { ATA_CMD_ID_ATAPI, "IDENTIFY PACKET DEVICE" },
2107 { ATA_CMD_SERVICE, "SERVICE" },
2108 { ATA_CMD_READ, "READ DMA" },
2109 { ATA_CMD_READ_EXT, "READ DMA EXT" },
2110 { ATA_CMD_READ_QUEUED, "READ DMA QUEUED" },
2111 { ATA_CMD_READ_STREAM_EXT, "READ STREAM EXT" },
2112 { ATA_CMD_READ_STREAM_DMA_EXT, "READ STREAM DMA EXT" },
2113 { ATA_CMD_WRITE, "WRITE DMA" },
2114 { ATA_CMD_WRITE_EXT, "WRITE DMA EXT" },
2115 { ATA_CMD_WRITE_QUEUED, "WRITE DMA QUEUED EXT" },
2116 { ATA_CMD_WRITE_STREAM_EXT, "WRITE STREAM EXT" },
2117 { ATA_CMD_WRITE_STREAM_DMA_EXT, "WRITE STREAM DMA EXT" },
2118 { ATA_CMD_WRITE_FUA_EXT, "WRITE DMA FUA EXT" },
2119 { ATA_CMD_WRITE_QUEUED_FUA_EXT, "WRITE DMA QUEUED FUA EXT" },
2120 { ATA_CMD_FPDMA_READ, "READ FPDMA QUEUED" },
2121 { ATA_CMD_FPDMA_WRITE, "WRITE FPDMA QUEUED" },
2122 { ATA_CMD_NCQ_NON_DATA, "NCQ NON-DATA" },
2123 { ATA_CMD_FPDMA_SEND, "SEND FPDMA QUEUED" },
2124 { ATA_CMD_FPDMA_RECV, "RECEIVE FPDMA QUEUED" },
2125 { ATA_CMD_PIO_READ, "READ SECTOR(S)" },
2126 { ATA_CMD_PIO_READ_EXT, "READ SECTOR(S) EXT" },
2127 { ATA_CMD_PIO_WRITE, "WRITE SECTOR(S)" },
2128 { ATA_CMD_PIO_WRITE_EXT, "WRITE SECTOR(S) EXT" },
2129 { ATA_CMD_READ_MULTI, "READ MULTIPLE" },
2130 { ATA_CMD_READ_MULTI_EXT, "READ MULTIPLE EXT" },
2131 { ATA_CMD_WRITE_MULTI, "WRITE MULTIPLE" },
2132 { ATA_CMD_WRITE_MULTI_EXT, "WRITE MULTIPLE EXT" },
2133 { ATA_CMD_WRITE_MULTI_FUA_EXT, "WRITE MULTIPLE FUA EXT" },
2134 { ATA_CMD_SET_FEATURES, "SET FEATURES" },
2135 { ATA_CMD_SET_MULTI, "SET MULTIPLE MODE" },
2136 { ATA_CMD_VERIFY, "READ VERIFY SECTOR(S)" },
2137 { ATA_CMD_VERIFY_EXT, "READ VERIFY SECTOR(S) EXT" },
2138 { ATA_CMD_WRITE_UNCORR_EXT, "WRITE UNCORRECTABLE EXT" },
2139 { ATA_CMD_STANDBYNOW1, "STANDBY IMMEDIATE" },
2140 { ATA_CMD_IDLEIMMEDIATE, "IDLE IMMEDIATE" },
2141 { ATA_CMD_SLEEP, "SLEEP" },
2142 { ATA_CMD_INIT_DEV_PARAMS, "INITIALIZE DEVICE PARAMETERS" },
2143 { ATA_CMD_READ_NATIVE_MAX, "READ NATIVE MAX ADDRESS" },
2144 { ATA_CMD_READ_NATIVE_MAX_EXT, "READ NATIVE MAX ADDRESS EXT" },
2145 { ATA_CMD_SET_MAX, "SET MAX ADDRESS" },
2146 { ATA_CMD_SET_MAX_EXT, "SET MAX ADDRESS EXT" },
2147 { ATA_CMD_READ_LOG_EXT, "READ LOG EXT" },
2148 { ATA_CMD_WRITE_LOG_EXT, "WRITE LOG EXT" },
2149 { ATA_CMD_READ_LOG_DMA_EXT, "READ LOG DMA EXT" },
2150 { ATA_CMD_WRITE_LOG_DMA_EXT, "WRITE LOG DMA EXT" },
2151 { ATA_CMD_TRUSTED_NONDATA, "TRUSTED NON-DATA" },
2152 { ATA_CMD_TRUSTED_RCV, "TRUSTED RECEIVE" },
2153 { ATA_CMD_TRUSTED_RCV_DMA, "TRUSTED RECEIVE DMA" },
2154 { ATA_CMD_TRUSTED_SND, "TRUSTED SEND" },
2155 { ATA_CMD_TRUSTED_SND_DMA, "TRUSTED SEND DMA" },
2156 { ATA_CMD_PMP_READ, "READ BUFFER" },
2157 { ATA_CMD_PMP_READ_DMA, "READ BUFFER DMA" },
2158 { ATA_CMD_PMP_WRITE, "WRITE BUFFER" },
2159 { ATA_CMD_PMP_WRITE_DMA, "WRITE BUFFER DMA" },
2160 { ATA_CMD_CONF_OVERLAY, "DEVICE CONFIGURATION OVERLAY" },
2161 { ATA_CMD_SEC_SET_PASS, "SECURITY SET PASSWORD" },
2162 { ATA_CMD_SEC_UNLOCK, "SECURITY UNLOCK" },
2163 { ATA_CMD_SEC_ERASE_PREP, "SECURITY ERASE PREPARE" },
2164 { ATA_CMD_SEC_ERASE_UNIT, "SECURITY ERASE UNIT" },
2165 { ATA_CMD_SEC_FREEZE_LOCK, "SECURITY FREEZE LOCK" },
2166 { ATA_CMD_SEC_DISABLE_PASS, "SECURITY DISABLE PASSWORD" },
2167 { ATA_CMD_CONFIG_STREAM, "CONFIGURE STREAM" },
2168 { ATA_CMD_SMART, "SMART" },
2169 { ATA_CMD_MEDIA_LOCK, "DOOR LOCK" },
2170 { ATA_CMD_MEDIA_UNLOCK, "DOOR UNLOCK" },
2171 { ATA_CMD_DSM, "DATA SET MANAGEMENT" },
2172 { ATA_CMD_CHK_MED_CRD_TYP, "CHECK MEDIA CARD TYPE" },
2173 { ATA_CMD_CFA_REQ_EXT_ERR, "CFA REQUEST EXTENDED ERROR" },
2174 { ATA_CMD_CFA_WRITE_NE, "CFA WRITE SECTORS WITHOUT ERASE" },
2175 { ATA_CMD_CFA_TRANS_SECT, "CFA TRANSLATE SECTOR" },
2176 { ATA_CMD_CFA_ERASE, "CFA ERASE SECTORS" },
2177 { ATA_CMD_CFA_WRITE_MULT_NE, "CFA WRITE MULTIPLE WITHOUT ERASE" },
2178 { ATA_CMD_REQ_SENSE_DATA, "REQUEST SENSE DATA EXT" },
2179 { ATA_CMD_SANITIZE_DEVICE, "SANITIZE DEVICE" },
2180 { ATA_CMD_ZAC_MGMT_IN, "ZAC MANAGEMENT IN" },
2181 { ATA_CMD_ZAC_MGMT_OUT, "ZAC MANAGEMENT OUT" },
2182 { ATA_CMD_READ_LONG, "READ LONG (with retries)" },
2183 { ATA_CMD_READ_LONG_ONCE, "READ LONG (without retries)" },
2184 { ATA_CMD_WRITE_LONG, "WRITE LONG (with retries)" },
2185 { ATA_CMD_WRITE_LONG_ONCE, "WRITE LONG (without retries)" },
2186 { ATA_CMD_RESTORE, "RECALIBRATE" },
2187 { 0, NULL } /* terminate list */
2188 };
2189
2190 unsigned int i;
2191 for (i = 0; cmd_descr[i].text; i++)
2192 if (cmd_descr[i].command == command)
2193 return cmd_descr[i].text;
2194 #endif
2195
2196 return "unknown";
2197 }
2198 EXPORT_SYMBOL_GPL(ata_get_cmd_name);
2199
2200 /**
2201 * ata_eh_link_report - report error handling to user
2202 * @link: ATA link EH is going on
2203 *
2204 * Report EH to user.
2205 *
2206 * LOCKING:
2207 * None.
2208 */
ata_eh_link_report(struct ata_link * link)2209 static void ata_eh_link_report(struct ata_link *link)
2210 {
2211 struct ata_port *ap = link->ap;
2212 struct ata_eh_context *ehc = &link->eh_context;
2213 struct ata_queued_cmd *qc;
2214 const char *frozen, *desc;
2215 char tries_buf[6] = "";
2216 int tag, nr_failed = 0;
2217
2218 if (ehc->i.flags & ATA_EHI_QUIET)
2219 return;
2220
2221 desc = NULL;
2222 if (ehc->i.desc[0] != '\0')
2223 desc = ehc->i.desc;
2224
2225 ata_qc_for_each_raw(ap, qc, tag) {
2226 if (!(qc->flags & ATA_QCFLAG_FAILED) ||
2227 ata_dev_phys_link(qc->dev) != link ||
2228 ((qc->flags & ATA_QCFLAG_QUIET) &&
2229 qc->err_mask == AC_ERR_DEV))
2230 continue;
2231 if (qc->flags & ATA_QCFLAG_SENSE_VALID && !qc->err_mask)
2232 continue;
2233
2234 nr_failed++;
2235 }
2236
2237 if (!nr_failed && !ehc->i.err_mask)
2238 return;
2239
2240 frozen = "";
2241 if (ap->pflags & ATA_PFLAG_FROZEN)
2242 frozen = " frozen";
2243
2244 if (ap->eh_tries < ATA_EH_MAX_TRIES)
2245 snprintf(tries_buf, sizeof(tries_buf), " t%d",
2246 ap->eh_tries);
2247
2248 if (ehc->i.dev) {
2249 ata_dev_err(ehc->i.dev, "exception Emask 0x%x "
2250 "SAct 0x%x SErr 0x%x action 0x%x%s%s\n",
2251 ehc->i.err_mask, link->sactive, ehc->i.serror,
2252 ehc->i.action, frozen, tries_buf);
2253 if (desc)
2254 ata_dev_err(ehc->i.dev, "%s\n", desc);
2255 } else {
2256 ata_link_err(link, "exception Emask 0x%x "
2257 "SAct 0x%x SErr 0x%x action 0x%x%s%s\n",
2258 ehc->i.err_mask, link->sactive, ehc->i.serror,
2259 ehc->i.action, frozen, tries_buf);
2260 if (desc)
2261 ata_link_err(link, "%s\n", desc);
2262 }
2263
2264 #ifdef CONFIG_ATA_VERBOSE_ERROR
2265 if (ehc->i.serror)
2266 ata_link_err(link,
2267 "SError: { %s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s}\n",
2268 ehc->i.serror & SERR_DATA_RECOVERED ? "RecovData " : "",
2269 ehc->i.serror & SERR_COMM_RECOVERED ? "RecovComm " : "",
2270 ehc->i.serror & SERR_DATA ? "UnrecovData " : "",
2271 ehc->i.serror & SERR_PERSISTENT ? "Persist " : "",
2272 ehc->i.serror & SERR_PROTOCOL ? "Proto " : "",
2273 ehc->i.serror & SERR_INTERNAL ? "HostInt " : "",
2274 ehc->i.serror & SERR_PHYRDY_CHG ? "PHYRdyChg " : "",
2275 ehc->i.serror & SERR_PHY_INT_ERR ? "PHYInt " : "",
2276 ehc->i.serror & SERR_COMM_WAKE ? "CommWake " : "",
2277 ehc->i.serror & SERR_10B_8B_ERR ? "10B8B " : "",
2278 ehc->i.serror & SERR_DISPARITY ? "Dispar " : "",
2279 ehc->i.serror & SERR_CRC ? "BadCRC " : "",
2280 ehc->i.serror & SERR_HANDSHAKE ? "Handshk " : "",
2281 ehc->i.serror & SERR_LINK_SEQ_ERR ? "LinkSeq " : "",
2282 ehc->i.serror & SERR_TRANS_ST_ERROR ? "TrStaTrns " : "",
2283 ehc->i.serror & SERR_UNRECOG_FIS ? "UnrecFIS " : "",
2284 ehc->i.serror & SERR_DEV_XCHG ? "DevExch " : "");
2285 #endif
2286
2287 ata_qc_for_each_raw(ap, qc, tag) {
2288 struct ata_taskfile *cmd = &qc->tf, *res = &qc->result_tf;
2289 char data_buf[20] = "";
2290 char cdb_buf[70] = "";
2291
2292 if (!(qc->flags & ATA_QCFLAG_FAILED) ||
2293 ata_dev_phys_link(qc->dev) != link || !qc->err_mask)
2294 continue;
2295
2296 if (qc->dma_dir != DMA_NONE) {
2297 static const char *dma_str[] = {
2298 [DMA_BIDIRECTIONAL] = "bidi",
2299 [DMA_TO_DEVICE] = "out",
2300 [DMA_FROM_DEVICE] = "in",
2301 };
2302 const char *prot_str = NULL;
2303
2304 switch (qc->tf.protocol) {
2305 case ATA_PROT_UNKNOWN:
2306 prot_str = "unknown";
2307 break;
2308 case ATA_PROT_NODATA:
2309 prot_str = "nodata";
2310 break;
2311 case ATA_PROT_PIO:
2312 prot_str = "pio";
2313 break;
2314 case ATA_PROT_DMA:
2315 prot_str = "dma";
2316 break;
2317 case ATA_PROT_NCQ:
2318 prot_str = "ncq dma";
2319 break;
2320 case ATA_PROT_NCQ_NODATA:
2321 prot_str = "ncq nodata";
2322 break;
2323 case ATAPI_PROT_NODATA:
2324 prot_str = "nodata";
2325 break;
2326 case ATAPI_PROT_PIO:
2327 prot_str = "pio";
2328 break;
2329 case ATAPI_PROT_DMA:
2330 prot_str = "dma";
2331 break;
2332 }
2333 snprintf(data_buf, sizeof(data_buf), " %s %u %s",
2334 prot_str, qc->nbytes, dma_str[qc->dma_dir]);
2335 }
2336
2337 if (ata_is_atapi(qc->tf.protocol)) {
2338 const u8 *cdb = qc->cdb;
2339 size_t cdb_len = qc->dev->cdb_len;
2340
2341 if (qc->scsicmd) {
2342 cdb = qc->scsicmd->cmnd;
2343 cdb_len = qc->scsicmd->cmd_len;
2344 }
2345 __scsi_format_command(cdb_buf, sizeof(cdb_buf),
2346 cdb, cdb_len);
2347 } else
2348 ata_dev_err(qc->dev, "failed command: %s\n",
2349 ata_get_cmd_name(cmd->command));
2350
2351 ata_dev_err(qc->dev,
2352 "cmd %02x/%02x:%02x:%02x:%02x:%02x/%02x:%02x:%02x:%02x:%02x/%02x "
2353 "tag %d%s\n %s"
2354 "res %02x/%02x:%02x:%02x:%02x:%02x/%02x:%02x:%02x:%02x:%02x/%02x "
2355 "Emask 0x%x (%s)%s\n",
2356 cmd->command, cmd->feature, cmd->nsect,
2357 cmd->lbal, cmd->lbam, cmd->lbah,
2358 cmd->hob_feature, cmd->hob_nsect,
2359 cmd->hob_lbal, cmd->hob_lbam, cmd->hob_lbah,
2360 cmd->device, qc->tag, data_buf, cdb_buf,
2361 res->status, res->error, res->nsect,
2362 res->lbal, res->lbam, res->lbah,
2363 res->hob_feature, res->hob_nsect,
2364 res->hob_lbal, res->hob_lbam, res->hob_lbah,
2365 res->device, qc->err_mask, ata_err_string(qc->err_mask),
2366 qc->err_mask & AC_ERR_NCQ ? " <F>" : "");
2367
2368 #ifdef CONFIG_ATA_VERBOSE_ERROR
2369 if (res->status & (ATA_BUSY | ATA_DRDY | ATA_DF | ATA_DRQ |
2370 ATA_SENSE | ATA_ERR)) {
2371 if (res->status & ATA_BUSY)
2372 ata_dev_err(qc->dev, "status: { Busy }\n");
2373 else
2374 ata_dev_err(qc->dev, "status: { %s%s%s%s%s}\n",
2375 res->status & ATA_DRDY ? "DRDY " : "",
2376 res->status & ATA_DF ? "DF " : "",
2377 res->status & ATA_DRQ ? "DRQ " : "",
2378 res->status & ATA_SENSE ? "SENSE " : "",
2379 res->status & ATA_ERR ? "ERR " : "");
2380 }
2381
2382 if (cmd->command != ATA_CMD_PACKET &&
2383 (res->error & (ATA_ICRC | ATA_UNC | ATA_AMNF | ATA_IDNF |
2384 ATA_ABORTED)))
2385 ata_dev_err(qc->dev, "error: { %s%s%s%s%s}\n",
2386 res->error & ATA_ICRC ? "ICRC " : "",
2387 res->error & ATA_UNC ? "UNC " : "",
2388 res->error & ATA_AMNF ? "AMNF " : "",
2389 res->error & ATA_IDNF ? "IDNF " : "",
2390 res->error & ATA_ABORTED ? "ABRT " : "");
2391 #endif
2392 }
2393 }
2394
2395 /**
2396 * ata_eh_report - report error handling to user
2397 * @ap: ATA port to report EH about
2398 *
2399 * Report EH to user.
2400 *
2401 * LOCKING:
2402 * None.
2403 */
ata_eh_report(struct ata_port * ap)2404 void ata_eh_report(struct ata_port *ap)
2405 {
2406 struct ata_link *link;
2407
2408 ata_for_each_link(link, ap, HOST_FIRST)
2409 ata_eh_link_report(link);
2410 }
2411
ata_do_reset(struct ata_link * link,ata_reset_fn_t reset,unsigned int * classes,unsigned long deadline,bool clear_classes)2412 static int ata_do_reset(struct ata_link *link, ata_reset_fn_t reset,
2413 unsigned int *classes, unsigned long deadline,
2414 bool clear_classes)
2415 {
2416 struct ata_device *dev;
2417
2418 if (clear_classes)
2419 ata_for_each_dev(dev, link, ALL)
2420 classes[dev->devno] = ATA_DEV_UNKNOWN;
2421
2422 return reset(link, classes, deadline);
2423 }
2424
ata_eh_followup_srst_needed(struct ata_link * link,int rc)2425 static int ata_eh_followup_srst_needed(struct ata_link *link, int rc)
2426 {
2427 if ((link->flags & ATA_LFLAG_NO_SRST) || ata_link_offline(link))
2428 return 0;
2429 if (rc == -EAGAIN)
2430 return 1;
2431 if (sata_pmp_supported(link->ap) && ata_is_host_link(link))
2432 return 1;
2433 return 0;
2434 }
2435
ata_eh_reset(struct ata_link * link,int classify,ata_prereset_fn_t prereset,ata_reset_fn_t softreset,ata_reset_fn_t hardreset,ata_postreset_fn_t postreset)2436 int ata_eh_reset(struct ata_link *link, int classify,
2437 ata_prereset_fn_t prereset, ata_reset_fn_t softreset,
2438 ata_reset_fn_t hardreset, ata_postreset_fn_t postreset)
2439 {
2440 struct ata_port *ap = link->ap;
2441 struct ata_link *slave = ap->slave_link;
2442 struct ata_eh_context *ehc = &link->eh_context;
2443 struct ata_eh_context *sehc = slave ? &slave->eh_context : NULL;
2444 unsigned int *classes = ehc->classes;
2445 unsigned int lflags = link->flags;
2446 int verbose = !(ehc->i.flags & ATA_EHI_QUIET);
2447 int max_tries = 0, try = 0;
2448 struct ata_link *failed_link;
2449 struct ata_device *dev;
2450 unsigned long deadline, now;
2451 ata_reset_fn_t reset;
2452 unsigned long flags;
2453 u32 sstatus;
2454 int nr_unknown, rc;
2455
2456 /*
2457 * Prepare to reset
2458 */
2459 while (ata_eh_reset_timeouts[max_tries] != ULONG_MAX)
2460 max_tries++;
2461 if (link->flags & ATA_LFLAG_RST_ONCE)
2462 max_tries = 1;
2463 if (link->flags & ATA_LFLAG_NO_HRST)
2464 hardreset = NULL;
2465 if (link->flags & ATA_LFLAG_NO_SRST)
2466 softreset = NULL;
2467
2468 /* make sure each reset attempt is at least COOL_DOWN apart */
2469 if (ehc->i.flags & ATA_EHI_DID_RESET) {
2470 now = jiffies;
2471 WARN_ON(time_after(ehc->last_reset, now));
2472 deadline = ata_deadline(ehc->last_reset,
2473 ATA_EH_RESET_COOL_DOWN);
2474 if (time_before(now, deadline))
2475 schedule_timeout_uninterruptible(deadline - now);
2476 }
2477
2478 spin_lock_irqsave(ap->lock, flags);
2479 ap->pflags |= ATA_PFLAG_RESETTING;
2480 spin_unlock_irqrestore(ap->lock, flags);
2481
2482 ata_eh_about_to_do(link, NULL, ATA_EH_RESET);
2483
2484 ata_for_each_dev(dev, link, ALL) {
2485 /* If we issue an SRST then an ATA drive (not ATAPI)
2486 * may change configuration and be in PIO0 timing. If
2487 * we do a hard reset (or are coming from power on)
2488 * this is true for ATA or ATAPI. Until we've set a
2489 * suitable controller mode we should not touch the
2490 * bus as we may be talking too fast.
2491 */
2492 dev->pio_mode = XFER_PIO_0;
2493 dev->dma_mode = 0xff;
2494
2495 /* If the controller has a pio mode setup function
2496 * then use it to set the chipset to rights. Don't
2497 * touch the DMA setup as that will be dealt with when
2498 * configuring devices.
2499 */
2500 if (ap->ops->set_piomode)
2501 ap->ops->set_piomode(ap, dev);
2502 }
2503
2504 /* prefer hardreset */
2505 reset = NULL;
2506 ehc->i.action &= ~ATA_EH_RESET;
2507 if (hardreset) {
2508 reset = hardreset;
2509 ehc->i.action |= ATA_EH_HARDRESET;
2510 } else if (softreset) {
2511 reset = softreset;
2512 ehc->i.action |= ATA_EH_SOFTRESET;
2513 }
2514
2515 if (prereset) {
2516 unsigned long deadline = ata_deadline(jiffies,
2517 ATA_EH_PRERESET_TIMEOUT);
2518
2519 if (slave) {
2520 sehc->i.action &= ~ATA_EH_RESET;
2521 sehc->i.action |= ehc->i.action;
2522 }
2523
2524 rc = prereset(link, deadline);
2525
2526 /* If present, do prereset on slave link too. Reset
2527 * is skipped iff both master and slave links report
2528 * -ENOENT or clear ATA_EH_RESET.
2529 */
2530 if (slave && (rc == 0 || rc == -ENOENT)) {
2531 int tmp;
2532
2533 tmp = prereset(slave, deadline);
2534 if (tmp != -ENOENT)
2535 rc = tmp;
2536
2537 ehc->i.action |= sehc->i.action;
2538 }
2539
2540 if (rc) {
2541 if (rc == -ENOENT) {
2542 ata_link_dbg(link, "port disabled--ignoring\n");
2543 ehc->i.action &= ~ATA_EH_RESET;
2544
2545 ata_for_each_dev(dev, link, ALL)
2546 classes[dev->devno] = ATA_DEV_NONE;
2547
2548 rc = 0;
2549 } else
2550 ata_link_err(link,
2551 "prereset failed (errno=%d)\n",
2552 rc);
2553 goto out;
2554 }
2555
2556 /* prereset() might have cleared ATA_EH_RESET. If so,
2557 * bang classes, thaw and return.
2558 */
2559 if (reset && !(ehc->i.action & ATA_EH_RESET)) {
2560 ata_for_each_dev(dev, link, ALL)
2561 classes[dev->devno] = ATA_DEV_NONE;
2562 if ((ap->pflags & ATA_PFLAG_FROZEN) &&
2563 ata_is_host_link(link))
2564 ata_eh_thaw_port(ap);
2565 rc = 0;
2566 goto out;
2567 }
2568 }
2569
2570 retry:
2571 /*
2572 * Perform reset
2573 */
2574 if (ata_is_host_link(link))
2575 ata_eh_freeze_port(ap);
2576
2577 deadline = ata_deadline(jiffies, ata_eh_reset_timeouts[try++]);
2578
2579 if (reset) {
2580 if (verbose)
2581 ata_link_info(link, "%s resetting link\n",
2582 reset == softreset ? "soft" : "hard");
2583
2584 /* mark that this EH session started with reset */
2585 ehc->last_reset = jiffies;
2586 if (reset == hardreset) {
2587 ehc->i.flags |= ATA_EHI_DID_HARDRESET;
2588 trace_ata_link_hardreset_begin(link, classes, deadline);
2589 } else {
2590 ehc->i.flags |= ATA_EHI_DID_SOFTRESET;
2591 trace_ata_link_softreset_begin(link, classes, deadline);
2592 }
2593
2594 rc = ata_do_reset(link, reset, classes, deadline, true);
2595 if (reset == hardreset)
2596 trace_ata_link_hardreset_end(link, classes, rc);
2597 else
2598 trace_ata_link_softreset_end(link, classes, rc);
2599 if (rc && rc != -EAGAIN) {
2600 failed_link = link;
2601 goto fail;
2602 }
2603
2604 /* hardreset slave link if existent */
2605 if (slave && reset == hardreset) {
2606 int tmp;
2607
2608 if (verbose)
2609 ata_link_info(slave, "hard resetting link\n");
2610
2611 ata_eh_about_to_do(slave, NULL, ATA_EH_RESET);
2612 trace_ata_slave_hardreset_begin(slave, classes,
2613 deadline);
2614 tmp = ata_do_reset(slave, reset, classes, deadline,
2615 false);
2616 trace_ata_slave_hardreset_end(slave, classes, tmp);
2617 switch (tmp) {
2618 case -EAGAIN:
2619 rc = -EAGAIN;
2620 break;
2621 case 0:
2622 break;
2623 default:
2624 failed_link = slave;
2625 rc = tmp;
2626 goto fail;
2627 }
2628 }
2629
2630 /* perform follow-up SRST if necessary */
2631 if (reset == hardreset &&
2632 ata_eh_followup_srst_needed(link, rc)) {
2633 reset = softreset;
2634
2635 if (!reset) {
2636 ata_link_err(link,
2637 "follow-up softreset required but no softreset available\n");
2638 failed_link = link;
2639 rc = -EINVAL;
2640 goto fail;
2641 }
2642
2643 ata_eh_about_to_do(link, NULL, ATA_EH_RESET);
2644 trace_ata_link_softreset_begin(link, classes, deadline);
2645 rc = ata_do_reset(link, reset, classes, deadline, true);
2646 trace_ata_link_softreset_end(link, classes, rc);
2647 if (rc) {
2648 failed_link = link;
2649 goto fail;
2650 }
2651 }
2652 } else {
2653 if (verbose)
2654 ata_link_info(link,
2655 "no reset method available, skipping reset\n");
2656 if (!(lflags & ATA_LFLAG_ASSUME_CLASS))
2657 lflags |= ATA_LFLAG_ASSUME_ATA;
2658 }
2659
2660 /*
2661 * Post-reset processing
2662 */
2663 ata_for_each_dev(dev, link, ALL) {
2664 /* After the reset, the device state is PIO 0 and the
2665 * controller state is undefined. Reset also wakes up
2666 * drives from sleeping mode.
2667 */
2668 dev->pio_mode = XFER_PIO_0;
2669 dev->flags &= ~ATA_DFLAG_SLEEPING;
2670
2671 if (ata_phys_link_offline(ata_dev_phys_link(dev)))
2672 continue;
2673
2674 /* apply class override */
2675 if (lflags & ATA_LFLAG_ASSUME_ATA)
2676 classes[dev->devno] = ATA_DEV_ATA;
2677 else if (lflags & ATA_LFLAG_ASSUME_SEMB)
2678 classes[dev->devno] = ATA_DEV_SEMB_UNSUP;
2679 }
2680
2681 /* record current link speed */
2682 if (sata_scr_read(link, SCR_STATUS, &sstatus) == 0)
2683 link->sata_spd = (sstatus >> 4) & 0xf;
2684 if (slave && sata_scr_read(slave, SCR_STATUS, &sstatus) == 0)
2685 slave->sata_spd = (sstatus >> 4) & 0xf;
2686
2687 /* thaw the port */
2688 if (ata_is_host_link(link))
2689 ata_eh_thaw_port(ap);
2690
2691 /* postreset() should clear hardware SError. Although SError
2692 * is cleared during link resume, clearing SError here is
2693 * necessary as some PHYs raise hotplug events after SRST.
2694 * This introduces race condition where hotplug occurs between
2695 * reset and here. This race is mediated by cross checking
2696 * link onlineness and classification result later.
2697 */
2698 if (postreset) {
2699 postreset(link, classes);
2700 trace_ata_link_postreset(link, classes, rc);
2701 if (slave) {
2702 postreset(slave, classes);
2703 trace_ata_slave_postreset(slave, classes, rc);
2704 }
2705 }
2706
2707 /*
2708 * Some controllers can't be frozen very well and may set spurious
2709 * error conditions during reset. Clear accumulated error
2710 * information and re-thaw the port if frozen. As reset is the
2711 * final recovery action and we cross check link onlineness against
2712 * device classification later, no hotplug event is lost by this.
2713 */
2714 spin_lock_irqsave(link->ap->lock, flags);
2715 memset(&link->eh_info, 0, sizeof(link->eh_info));
2716 if (slave)
2717 memset(&slave->eh_info, 0, sizeof(link->eh_info));
2718 ap->pflags &= ~ATA_PFLAG_EH_PENDING;
2719 spin_unlock_irqrestore(link->ap->lock, flags);
2720
2721 if (ap->pflags & ATA_PFLAG_FROZEN)
2722 ata_eh_thaw_port(ap);
2723
2724 /*
2725 * Make sure onlineness and classification result correspond.
2726 * Hotplug could have happened during reset and some
2727 * controllers fail to wait while a drive is spinning up after
2728 * being hotplugged causing misdetection. By cross checking
2729 * link on/offlineness and classification result, those
2730 * conditions can be reliably detected and retried.
2731 */
2732 nr_unknown = 0;
2733 ata_for_each_dev(dev, link, ALL) {
2734 if (ata_phys_link_online(ata_dev_phys_link(dev))) {
2735 if (classes[dev->devno] == ATA_DEV_UNKNOWN) {
2736 ata_dev_dbg(dev, "link online but device misclassified\n");
2737 classes[dev->devno] = ATA_DEV_NONE;
2738 nr_unknown++;
2739 }
2740 } else if (ata_phys_link_offline(ata_dev_phys_link(dev))) {
2741 if (ata_class_enabled(classes[dev->devno]))
2742 ata_dev_dbg(dev,
2743 "link offline, clearing class %d to NONE\n",
2744 classes[dev->devno]);
2745 classes[dev->devno] = ATA_DEV_NONE;
2746 } else if (classes[dev->devno] == ATA_DEV_UNKNOWN) {
2747 ata_dev_dbg(dev,
2748 "link status unknown, clearing UNKNOWN to NONE\n");
2749 classes[dev->devno] = ATA_DEV_NONE;
2750 }
2751 }
2752
2753 if (classify && nr_unknown) {
2754 if (try < max_tries) {
2755 ata_link_warn(link,
2756 "link online but %d devices misclassified, retrying\n",
2757 nr_unknown);
2758 failed_link = link;
2759 rc = -EAGAIN;
2760 goto fail;
2761 }
2762 ata_link_warn(link,
2763 "link online but %d devices misclassified, "
2764 "device detection might fail\n", nr_unknown);
2765 }
2766
2767 /* reset successful, schedule revalidation */
2768 ata_eh_done(link, NULL, ATA_EH_RESET);
2769 if (slave)
2770 ata_eh_done(slave, NULL, ATA_EH_RESET);
2771 ehc->last_reset = jiffies; /* update to completion time */
2772 ehc->i.action |= ATA_EH_REVALIDATE;
2773 link->lpm_policy = ATA_LPM_UNKNOWN; /* reset LPM state */
2774
2775 rc = 0;
2776 out:
2777 /* clear hotplug flag */
2778 ehc->i.flags &= ~ATA_EHI_HOTPLUGGED;
2779 if (slave)
2780 sehc->i.flags &= ~ATA_EHI_HOTPLUGGED;
2781
2782 spin_lock_irqsave(ap->lock, flags);
2783 ap->pflags &= ~ATA_PFLAG_RESETTING;
2784 spin_unlock_irqrestore(ap->lock, flags);
2785
2786 return rc;
2787
2788 fail:
2789 /* if SCR isn't accessible on a fan-out port, PMP needs to be reset */
2790 if (!ata_is_host_link(link) &&
2791 sata_scr_read(link, SCR_STATUS, &sstatus))
2792 rc = -ERESTART;
2793
2794 if (try >= max_tries) {
2795 /*
2796 * Thaw host port even if reset failed, so that the port
2797 * can be retried on the next phy event. This risks
2798 * repeated EH runs but seems to be a better tradeoff than
2799 * shutting down a port after a botched hotplug attempt.
2800 */
2801 if (ata_is_host_link(link))
2802 ata_eh_thaw_port(ap);
2803 goto out;
2804 }
2805
2806 now = jiffies;
2807 if (time_before(now, deadline)) {
2808 unsigned long delta = deadline - now;
2809
2810 ata_link_warn(failed_link,
2811 "reset failed (errno=%d), retrying in %u secs\n",
2812 rc, DIV_ROUND_UP(jiffies_to_msecs(delta), 1000));
2813
2814 ata_eh_release(ap);
2815 while (delta)
2816 delta = schedule_timeout_uninterruptible(delta);
2817 ata_eh_acquire(ap);
2818 }
2819
2820 /*
2821 * While disks spinup behind PMP, some controllers fail sending SRST.
2822 * They need to be reset - as well as the PMP - before retrying.
2823 */
2824 if (rc == -ERESTART) {
2825 if (ata_is_host_link(link))
2826 ata_eh_thaw_port(ap);
2827 goto out;
2828 }
2829
2830 if (try == max_tries - 1) {
2831 sata_down_spd_limit(link, 0);
2832 if (slave)
2833 sata_down_spd_limit(slave, 0);
2834 } else if (rc == -EPIPE)
2835 sata_down_spd_limit(failed_link, 0);
2836
2837 if (hardreset)
2838 reset = hardreset;
2839 goto retry;
2840 }
2841
ata_eh_pull_park_action(struct ata_port * ap)2842 static inline void ata_eh_pull_park_action(struct ata_port *ap)
2843 {
2844 struct ata_link *link;
2845 struct ata_device *dev;
2846 unsigned long flags;
2847
2848 /*
2849 * This function can be thought of as an extended version of
2850 * ata_eh_about_to_do() specially crafted to accommodate the
2851 * requirements of ATA_EH_PARK handling. Since the EH thread
2852 * does not leave the do {} while () loop in ata_eh_recover as
2853 * long as the timeout for a park request to *one* device on
2854 * the port has not expired, and since we still want to pick
2855 * up park requests to other devices on the same port or
2856 * timeout updates for the same device, we have to pull
2857 * ATA_EH_PARK actions from eh_info into eh_context.i
2858 * ourselves at the beginning of each pass over the loop.
2859 *
2860 * Additionally, all write accesses to &ap->park_req_pending
2861 * through reinit_completion() (see below) or complete_all()
2862 * (see ata_scsi_park_store()) are protected by the host lock.
2863 * As a result we have that park_req_pending.done is zero on
2864 * exit from this function, i.e. when ATA_EH_PARK actions for
2865 * *all* devices on port ap have been pulled into the
2866 * respective eh_context structs. If, and only if,
2867 * park_req_pending.done is non-zero by the time we reach
2868 * wait_for_completion_timeout(), another ATA_EH_PARK action
2869 * has been scheduled for at least one of the devices on port
2870 * ap and we have to cycle over the do {} while () loop in
2871 * ata_eh_recover() again.
2872 */
2873
2874 spin_lock_irqsave(ap->lock, flags);
2875 reinit_completion(&ap->park_req_pending);
2876 ata_for_each_link(link, ap, EDGE) {
2877 ata_for_each_dev(dev, link, ALL) {
2878 struct ata_eh_info *ehi = &link->eh_info;
2879
2880 link->eh_context.i.dev_action[dev->devno] |=
2881 ehi->dev_action[dev->devno] & ATA_EH_PARK;
2882 ata_eh_clear_action(link, dev, ehi, ATA_EH_PARK);
2883 }
2884 }
2885 spin_unlock_irqrestore(ap->lock, flags);
2886 }
2887
ata_eh_park_issue_cmd(struct ata_device * dev,int park)2888 static void ata_eh_park_issue_cmd(struct ata_device *dev, int park)
2889 {
2890 struct ata_eh_context *ehc = &dev->link->eh_context;
2891 struct ata_taskfile tf;
2892 unsigned int err_mask;
2893
2894 ata_tf_init(dev, &tf);
2895 if (park) {
2896 ehc->unloaded_mask |= 1 << dev->devno;
2897 tf.command = ATA_CMD_IDLEIMMEDIATE;
2898 tf.feature = 0x44;
2899 tf.lbal = 0x4c;
2900 tf.lbam = 0x4e;
2901 tf.lbah = 0x55;
2902 } else {
2903 ehc->unloaded_mask &= ~(1 << dev->devno);
2904 tf.command = ATA_CMD_CHK_POWER;
2905 }
2906
2907 tf.flags |= ATA_TFLAG_DEVICE | ATA_TFLAG_ISADDR;
2908 tf.protocol = ATA_PROT_NODATA;
2909 err_mask = ata_exec_internal(dev, &tf, NULL, DMA_NONE, NULL, 0, 0);
2910 if (park && (err_mask || tf.lbal != 0xc4)) {
2911 ata_dev_err(dev, "head unload failed!\n");
2912 ehc->unloaded_mask &= ~(1 << dev->devno);
2913 }
2914 }
2915
ata_eh_revalidate_and_attach(struct ata_link * link,struct ata_device ** r_failed_dev)2916 static int ata_eh_revalidate_and_attach(struct ata_link *link,
2917 struct ata_device **r_failed_dev)
2918 {
2919 struct ata_port *ap = link->ap;
2920 struct ata_eh_context *ehc = &link->eh_context;
2921 struct ata_device *dev;
2922 unsigned int new_mask = 0;
2923 unsigned long flags;
2924 int rc = 0;
2925
2926 /* For PATA drive side cable detection to work, IDENTIFY must
2927 * be done backwards such that PDIAG- is released by the slave
2928 * device before the master device is identified.
2929 */
2930 ata_for_each_dev(dev, link, ALL_REVERSE) {
2931 unsigned int action = ata_eh_dev_action(dev);
2932 unsigned int readid_flags = 0;
2933
2934 if (ehc->i.flags & ATA_EHI_DID_RESET)
2935 readid_flags |= ATA_READID_POSTRESET;
2936
2937 if ((action & ATA_EH_REVALIDATE) && ata_dev_enabled(dev)) {
2938 WARN_ON(dev->class == ATA_DEV_PMP);
2939
2940 /*
2941 * The link may be in a deep sleep, wake it up.
2942 *
2943 * If the link is in deep sleep, ata_phys_link_offline()
2944 * will return true, causing the revalidation to fail,
2945 * which leads to a (potentially) needless hard reset.
2946 *
2947 * ata_eh_recover() will later restore the link policy
2948 * to ap->target_lpm_policy after revalidation is done.
2949 */
2950 if (link->lpm_policy > ATA_LPM_MAX_POWER) {
2951 rc = ata_eh_set_lpm(link, ATA_LPM_MAX_POWER,
2952 r_failed_dev);
2953 if (rc)
2954 goto err;
2955 }
2956
2957 if (ata_phys_link_offline(ata_dev_phys_link(dev))) {
2958 rc = -EIO;
2959 goto err;
2960 }
2961
2962 ata_eh_about_to_do(link, dev, ATA_EH_REVALIDATE);
2963 rc = ata_dev_revalidate(dev, ehc->classes[dev->devno],
2964 readid_flags);
2965 if (rc)
2966 goto err;
2967
2968 ata_eh_done(link, dev, ATA_EH_REVALIDATE);
2969
2970 /* Configuration may have changed, reconfigure
2971 * transfer mode.
2972 */
2973 ehc->i.flags |= ATA_EHI_SETMODE;
2974
2975 /* schedule the scsi_rescan_device() here */
2976 schedule_work(&(ap->scsi_rescan_task));
2977 } else if (dev->class == ATA_DEV_UNKNOWN &&
2978 ehc->tries[dev->devno] &&
2979 ata_class_enabled(ehc->classes[dev->devno])) {
2980 /* Temporarily set dev->class, it will be
2981 * permanently set once all configurations are
2982 * complete. This is necessary because new
2983 * device configuration is done in two
2984 * separate loops.
2985 */
2986 dev->class = ehc->classes[dev->devno];
2987
2988 if (dev->class == ATA_DEV_PMP)
2989 rc = sata_pmp_attach(dev);
2990 else
2991 rc = ata_dev_read_id(dev, &dev->class,
2992 readid_flags, dev->id);
2993
2994 /* read_id might have changed class, store and reset */
2995 ehc->classes[dev->devno] = dev->class;
2996 dev->class = ATA_DEV_UNKNOWN;
2997
2998 switch (rc) {
2999 case 0:
3000 /* clear error info accumulated during probe */
3001 ata_ering_clear(&dev->ering);
3002 new_mask |= 1 << dev->devno;
3003 break;
3004 case -ENOENT:
3005 /* IDENTIFY was issued to non-existent
3006 * device. No need to reset. Just
3007 * thaw and ignore the device.
3008 */
3009 ata_eh_thaw_port(ap);
3010 break;
3011 default:
3012 goto err;
3013 }
3014 }
3015 }
3016
3017 /* PDIAG- should have been released, ask cable type if post-reset */
3018 if ((ehc->i.flags & ATA_EHI_DID_RESET) && ata_is_host_link(link)) {
3019 if (ap->ops->cable_detect)
3020 ap->cbl = ap->ops->cable_detect(ap);
3021 ata_force_cbl(ap);
3022 }
3023
3024 /* Configure new devices forward such that user doesn't see
3025 * device detection messages backwards.
3026 */
3027 ata_for_each_dev(dev, link, ALL) {
3028 if (!(new_mask & (1 << dev->devno)))
3029 continue;
3030
3031 dev->class = ehc->classes[dev->devno];
3032
3033 if (dev->class == ATA_DEV_PMP)
3034 continue;
3035
3036 ehc->i.flags |= ATA_EHI_PRINTINFO;
3037 rc = ata_dev_configure(dev);
3038 ehc->i.flags &= ~ATA_EHI_PRINTINFO;
3039 if (rc) {
3040 dev->class = ATA_DEV_UNKNOWN;
3041 goto err;
3042 }
3043
3044 spin_lock_irqsave(ap->lock, flags);
3045 ap->pflags |= ATA_PFLAG_SCSI_HOTPLUG;
3046 spin_unlock_irqrestore(ap->lock, flags);
3047
3048 /* new device discovered, configure xfermode */
3049 ehc->i.flags |= ATA_EHI_SETMODE;
3050 }
3051
3052 return 0;
3053
3054 err:
3055 *r_failed_dev = dev;
3056 return rc;
3057 }
3058
3059 /**
3060 * ata_set_mode - Program timings and issue SET FEATURES - XFER
3061 * @link: link on which timings will be programmed
3062 * @r_failed_dev: out parameter for failed device
3063 *
3064 * Set ATA device disk transfer mode (PIO3, UDMA6, etc.). If
3065 * ata_set_mode() fails, pointer to the failing device is
3066 * returned in @r_failed_dev.
3067 *
3068 * LOCKING:
3069 * PCI/etc. bus probe sem.
3070 *
3071 * RETURNS:
3072 * 0 on success, negative errno otherwise
3073 */
ata_set_mode(struct ata_link * link,struct ata_device ** r_failed_dev)3074 int ata_set_mode(struct ata_link *link, struct ata_device **r_failed_dev)
3075 {
3076 struct ata_port *ap = link->ap;
3077 struct ata_device *dev;
3078 int rc;
3079
3080 /* if data transfer is verified, clear DUBIOUS_XFER on ering top */
3081 ata_for_each_dev(dev, link, ENABLED) {
3082 if (!(dev->flags & ATA_DFLAG_DUBIOUS_XFER)) {
3083 struct ata_ering_entry *ent;
3084
3085 ent = ata_ering_top(&dev->ering);
3086 if (ent)
3087 ent->eflags &= ~ATA_EFLAG_DUBIOUS_XFER;
3088 }
3089 }
3090
3091 /* has private set_mode? */
3092 if (ap->ops->set_mode)
3093 rc = ap->ops->set_mode(link, r_failed_dev);
3094 else
3095 rc = ata_do_set_mode(link, r_failed_dev);
3096
3097 /* if transfer mode has changed, set DUBIOUS_XFER on device */
3098 ata_for_each_dev(dev, link, ENABLED) {
3099 struct ata_eh_context *ehc = &link->eh_context;
3100 u8 saved_xfer_mode = ehc->saved_xfer_mode[dev->devno];
3101 u8 saved_ncq = !!(ehc->saved_ncq_enabled & (1 << dev->devno));
3102
3103 if (dev->xfer_mode != saved_xfer_mode ||
3104 ata_ncq_enabled(dev) != saved_ncq)
3105 dev->flags |= ATA_DFLAG_DUBIOUS_XFER;
3106 }
3107
3108 return rc;
3109 }
3110
3111 /**
3112 * atapi_eh_clear_ua - Clear ATAPI UNIT ATTENTION after reset
3113 * @dev: ATAPI device to clear UA for
3114 *
3115 * Resets and other operations can make an ATAPI device raise
3116 * UNIT ATTENTION which causes the next operation to fail. This
3117 * function clears UA.
3118 *
3119 * LOCKING:
3120 * EH context (may sleep).
3121 *
3122 * RETURNS:
3123 * 0 on success, -errno on failure.
3124 */
atapi_eh_clear_ua(struct ata_device * dev)3125 static int atapi_eh_clear_ua(struct ata_device *dev)
3126 {
3127 int i;
3128
3129 for (i = 0; i < ATA_EH_UA_TRIES; i++) {
3130 u8 *sense_buffer = dev->link->ap->sector_buf;
3131 u8 sense_key = 0;
3132 unsigned int err_mask;
3133
3134 err_mask = atapi_eh_tur(dev, &sense_key);
3135 if (err_mask != 0 && err_mask != AC_ERR_DEV) {
3136 ata_dev_warn(dev,
3137 "TEST_UNIT_READY failed (err_mask=0x%x)\n",
3138 err_mask);
3139 return -EIO;
3140 }
3141
3142 if (!err_mask || sense_key != UNIT_ATTENTION)
3143 return 0;
3144
3145 err_mask = atapi_eh_request_sense(dev, sense_buffer, sense_key);
3146 if (err_mask) {
3147 ata_dev_warn(dev, "failed to clear "
3148 "UNIT ATTENTION (err_mask=0x%x)\n", err_mask);
3149 return -EIO;
3150 }
3151 }
3152
3153 ata_dev_warn(dev, "UNIT ATTENTION persists after %d tries\n",
3154 ATA_EH_UA_TRIES);
3155
3156 return 0;
3157 }
3158
3159 /**
3160 * ata_eh_maybe_retry_flush - Retry FLUSH if necessary
3161 * @dev: ATA device which may need FLUSH retry
3162 *
3163 * If @dev failed FLUSH, it needs to be reported upper layer
3164 * immediately as it means that @dev failed to remap and already
3165 * lost at least a sector and further FLUSH retrials won't make
3166 * any difference to the lost sector. However, if FLUSH failed
3167 * for other reasons, for example transmission error, FLUSH needs
3168 * to be retried.
3169 *
3170 * This function determines whether FLUSH failure retry is
3171 * necessary and performs it if so.
3172 *
3173 * RETURNS:
3174 * 0 if EH can continue, -errno if EH needs to be repeated.
3175 */
ata_eh_maybe_retry_flush(struct ata_device * dev)3176 static int ata_eh_maybe_retry_flush(struct ata_device *dev)
3177 {
3178 struct ata_link *link = dev->link;
3179 struct ata_port *ap = link->ap;
3180 struct ata_queued_cmd *qc;
3181 struct ata_taskfile tf;
3182 unsigned int err_mask;
3183 int rc = 0;
3184
3185 /* did flush fail for this device? */
3186 if (!ata_tag_valid(link->active_tag))
3187 return 0;
3188
3189 qc = __ata_qc_from_tag(ap, link->active_tag);
3190 if (qc->dev != dev || (qc->tf.command != ATA_CMD_FLUSH_EXT &&
3191 qc->tf.command != ATA_CMD_FLUSH))
3192 return 0;
3193
3194 /* if the device failed it, it should be reported to upper layers */
3195 if (qc->err_mask & AC_ERR_DEV)
3196 return 0;
3197
3198 /* flush failed for some other reason, give it another shot */
3199 ata_tf_init(dev, &tf);
3200
3201 tf.command = qc->tf.command;
3202 tf.flags |= ATA_TFLAG_DEVICE;
3203 tf.protocol = ATA_PROT_NODATA;
3204
3205 ata_dev_warn(dev, "retrying FLUSH 0x%x Emask 0x%x\n",
3206 tf.command, qc->err_mask);
3207
3208 err_mask = ata_exec_internal(dev, &tf, NULL, DMA_NONE, NULL, 0, 0);
3209 if (!err_mask) {
3210 /*
3211 * FLUSH is complete but there's no way to
3212 * successfully complete a failed command from EH.
3213 * Making sure retry is allowed at least once and
3214 * retrying it should do the trick - whatever was in
3215 * the cache is already on the platter and this won't
3216 * cause infinite loop.
3217 */
3218 qc->scsicmd->allowed = max(qc->scsicmd->allowed, 1);
3219 } else {
3220 ata_dev_warn(dev, "FLUSH failed Emask 0x%x\n",
3221 err_mask);
3222 rc = -EIO;
3223
3224 /* if device failed it, report it to upper layers */
3225 if (err_mask & AC_ERR_DEV) {
3226 qc->err_mask |= AC_ERR_DEV;
3227 qc->result_tf = tf;
3228 if (!(ap->pflags & ATA_PFLAG_FROZEN))
3229 rc = 0;
3230 }
3231 }
3232 return rc;
3233 }
3234
3235 /**
3236 * ata_eh_set_lpm - configure SATA interface power management
3237 * @link: link to configure power management
3238 * @policy: the link power management policy
3239 * @r_failed_dev: out parameter for failed device
3240 *
3241 * Enable SATA Interface power management. This will enable
3242 * Device Interface Power Management (DIPM) for min_power and
3243 * medium_power_with_dipm policies, and then call driver specific
3244 * callbacks for enabling Host Initiated Power management.
3245 *
3246 * LOCKING:
3247 * EH context.
3248 *
3249 * RETURNS:
3250 * 0 on success, -errno on failure.
3251 */
ata_eh_set_lpm(struct ata_link * link,enum ata_lpm_policy policy,struct ata_device ** r_failed_dev)3252 static int ata_eh_set_lpm(struct ata_link *link, enum ata_lpm_policy policy,
3253 struct ata_device **r_failed_dev)
3254 {
3255 struct ata_port *ap = ata_is_host_link(link) ? link->ap : NULL;
3256 struct ata_eh_context *ehc = &link->eh_context;
3257 struct ata_device *dev, *link_dev = NULL, *lpm_dev = NULL;
3258 enum ata_lpm_policy old_policy = link->lpm_policy;
3259 bool no_dipm = link->ap->flags & ATA_FLAG_NO_DIPM;
3260 unsigned int hints = ATA_LPM_EMPTY | ATA_LPM_HIPM;
3261 unsigned int err_mask;
3262 int rc;
3263
3264 /* if the link or host doesn't do LPM, noop */
3265 if (!IS_ENABLED(CONFIG_SATA_HOST) ||
3266 (link->flags & ATA_LFLAG_NO_LPM) || (ap && !ap->ops->set_lpm))
3267 return 0;
3268
3269 /*
3270 * DIPM is enabled only for MIN_POWER as some devices
3271 * misbehave when the host NACKs transition to SLUMBER. Order
3272 * device and link configurations such that the host always
3273 * allows DIPM requests.
3274 */
3275 ata_for_each_dev(dev, link, ENABLED) {
3276 bool hipm = ata_id_has_hipm(dev->id);
3277 bool dipm = ata_id_has_dipm(dev->id) && !no_dipm;
3278
3279 /* find the first enabled and LPM enabled devices */
3280 if (!link_dev)
3281 link_dev = dev;
3282
3283 if (!lpm_dev && (hipm || dipm))
3284 lpm_dev = dev;
3285
3286 hints &= ~ATA_LPM_EMPTY;
3287 if (!hipm)
3288 hints &= ~ATA_LPM_HIPM;
3289
3290 /* disable DIPM before changing link config */
3291 if (policy < ATA_LPM_MED_POWER_WITH_DIPM && dipm) {
3292 err_mask = ata_dev_set_feature(dev,
3293 SETFEATURES_SATA_DISABLE, SATA_DIPM);
3294 if (err_mask && err_mask != AC_ERR_DEV) {
3295 ata_dev_warn(dev,
3296 "failed to disable DIPM, Emask 0x%x\n",
3297 err_mask);
3298 rc = -EIO;
3299 goto fail;
3300 }
3301 }
3302 }
3303
3304 if (ap) {
3305 rc = ap->ops->set_lpm(link, policy, hints);
3306 if (!rc && ap->slave_link)
3307 rc = ap->ops->set_lpm(ap->slave_link, policy, hints);
3308 } else
3309 rc = sata_pmp_set_lpm(link, policy, hints);
3310
3311 /*
3312 * Attribute link config failure to the first (LPM) enabled
3313 * device on the link.
3314 */
3315 if (rc) {
3316 if (rc == -EOPNOTSUPP) {
3317 link->flags |= ATA_LFLAG_NO_LPM;
3318 return 0;
3319 }
3320 dev = lpm_dev ? lpm_dev : link_dev;
3321 goto fail;
3322 }
3323
3324 /*
3325 * Low level driver acked the transition. Issue DIPM command
3326 * with the new policy set.
3327 */
3328 link->lpm_policy = policy;
3329 if (ap && ap->slave_link)
3330 ap->slave_link->lpm_policy = policy;
3331
3332 /* host config updated, enable DIPM if transitioning to MIN_POWER */
3333 ata_for_each_dev(dev, link, ENABLED) {
3334 if (policy >= ATA_LPM_MED_POWER_WITH_DIPM && !no_dipm &&
3335 ata_id_has_dipm(dev->id)) {
3336 err_mask = ata_dev_set_feature(dev,
3337 SETFEATURES_SATA_ENABLE, SATA_DIPM);
3338 if (err_mask && err_mask != AC_ERR_DEV) {
3339 ata_dev_warn(dev,
3340 "failed to enable DIPM, Emask 0x%x\n",
3341 err_mask);
3342 rc = -EIO;
3343 goto fail;
3344 }
3345 }
3346 }
3347
3348 link->last_lpm_change = jiffies;
3349 link->flags |= ATA_LFLAG_CHANGED;
3350
3351 return 0;
3352
3353 fail:
3354 /* restore the old policy */
3355 link->lpm_policy = old_policy;
3356 if (ap && ap->slave_link)
3357 ap->slave_link->lpm_policy = old_policy;
3358
3359 /* if no device or only one more chance is left, disable LPM */
3360 if (!dev || ehc->tries[dev->devno] <= 2) {
3361 ata_link_warn(link, "disabling LPM on the link\n");
3362 link->flags |= ATA_LFLAG_NO_LPM;
3363 }
3364 if (r_failed_dev)
3365 *r_failed_dev = dev;
3366 return rc;
3367 }
3368
ata_link_nr_enabled(struct ata_link * link)3369 int ata_link_nr_enabled(struct ata_link *link)
3370 {
3371 struct ata_device *dev;
3372 int cnt = 0;
3373
3374 ata_for_each_dev(dev, link, ENABLED)
3375 cnt++;
3376 return cnt;
3377 }
3378
ata_link_nr_vacant(struct ata_link * link)3379 static int ata_link_nr_vacant(struct ata_link *link)
3380 {
3381 struct ata_device *dev;
3382 int cnt = 0;
3383
3384 ata_for_each_dev(dev, link, ALL)
3385 if (dev->class == ATA_DEV_UNKNOWN)
3386 cnt++;
3387 return cnt;
3388 }
3389
ata_eh_skip_recovery(struct ata_link * link)3390 static int ata_eh_skip_recovery(struct ata_link *link)
3391 {
3392 struct ata_port *ap = link->ap;
3393 struct ata_eh_context *ehc = &link->eh_context;
3394 struct ata_device *dev;
3395
3396 /* skip disabled links */
3397 if (link->flags & ATA_LFLAG_DISABLED)
3398 return 1;
3399
3400 /* skip if explicitly requested */
3401 if (ehc->i.flags & ATA_EHI_NO_RECOVERY)
3402 return 1;
3403
3404 /* thaw frozen port and recover failed devices */
3405 if ((ap->pflags & ATA_PFLAG_FROZEN) || ata_link_nr_enabled(link))
3406 return 0;
3407
3408 /* reset at least once if reset is requested */
3409 if ((ehc->i.action & ATA_EH_RESET) &&
3410 !(ehc->i.flags & ATA_EHI_DID_RESET))
3411 return 0;
3412
3413 /* skip if class codes for all vacant slots are ATA_DEV_NONE */
3414 ata_for_each_dev(dev, link, ALL) {
3415 if (dev->class == ATA_DEV_UNKNOWN &&
3416 ehc->classes[dev->devno] != ATA_DEV_NONE)
3417 return 0;
3418 }
3419
3420 return 1;
3421 }
3422
ata_count_probe_trials_cb(struct ata_ering_entry * ent,void * void_arg)3423 static int ata_count_probe_trials_cb(struct ata_ering_entry *ent, void *void_arg)
3424 {
3425 u64 interval = msecs_to_jiffies(ATA_EH_PROBE_TRIAL_INTERVAL);
3426 u64 now = get_jiffies_64();
3427 int *trials = void_arg;
3428
3429 if ((ent->eflags & ATA_EFLAG_OLD_ER) ||
3430 (ent->timestamp < now - min(now, interval)))
3431 return -1;
3432
3433 (*trials)++;
3434 return 0;
3435 }
3436
ata_eh_schedule_probe(struct ata_device * dev)3437 static int ata_eh_schedule_probe(struct ata_device *dev)
3438 {
3439 struct ata_eh_context *ehc = &dev->link->eh_context;
3440 struct ata_link *link = ata_dev_phys_link(dev);
3441 int trials = 0;
3442
3443 if (!(ehc->i.probe_mask & (1 << dev->devno)) ||
3444 (ehc->did_probe_mask & (1 << dev->devno)))
3445 return 0;
3446
3447 ata_eh_detach_dev(dev);
3448 ata_dev_init(dev);
3449 ehc->did_probe_mask |= (1 << dev->devno);
3450 ehc->i.action |= ATA_EH_RESET;
3451 ehc->saved_xfer_mode[dev->devno] = 0;
3452 ehc->saved_ncq_enabled &= ~(1 << dev->devno);
3453
3454 /* the link maybe in a deep sleep, wake it up */
3455 if (link->lpm_policy > ATA_LPM_MAX_POWER) {
3456 if (ata_is_host_link(link))
3457 link->ap->ops->set_lpm(link, ATA_LPM_MAX_POWER,
3458 ATA_LPM_EMPTY);
3459 else
3460 sata_pmp_set_lpm(link, ATA_LPM_MAX_POWER,
3461 ATA_LPM_EMPTY);
3462 }
3463
3464 /* Record and count probe trials on the ering. The specific
3465 * error mask used is irrelevant. Because a successful device
3466 * detection clears the ering, this count accumulates only if
3467 * there are consecutive failed probes.
3468 *
3469 * If the count is equal to or higher than ATA_EH_PROBE_TRIALS
3470 * in the last ATA_EH_PROBE_TRIAL_INTERVAL, link speed is
3471 * forced to 1.5Gbps.
3472 *
3473 * This is to work around cases where failed link speed
3474 * negotiation results in device misdetection leading to
3475 * infinite DEVXCHG or PHRDY CHG events.
3476 */
3477 ata_ering_record(&dev->ering, 0, AC_ERR_OTHER);
3478 ata_ering_map(&dev->ering, ata_count_probe_trials_cb, &trials);
3479
3480 if (trials > ATA_EH_PROBE_TRIALS)
3481 sata_down_spd_limit(link, 1);
3482
3483 return 1;
3484 }
3485
ata_eh_handle_dev_fail(struct ata_device * dev,int err)3486 static int ata_eh_handle_dev_fail(struct ata_device *dev, int err)
3487 {
3488 struct ata_eh_context *ehc = &dev->link->eh_context;
3489
3490 /* -EAGAIN from EH routine indicates retry without prejudice.
3491 * The requester is responsible for ensuring forward progress.
3492 */
3493 if (err != -EAGAIN)
3494 ehc->tries[dev->devno]--;
3495
3496 switch (err) {
3497 case -ENODEV:
3498 /* device missing or wrong IDENTIFY data, schedule probing */
3499 ehc->i.probe_mask |= (1 << dev->devno);
3500 fallthrough;
3501 case -EINVAL:
3502 /* give it just one more chance */
3503 ehc->tries[dev->devno] = min(ehc->tries[dev->devno], 1);
3504 fallthrough;
3505 case -EIO:
3506 if (ehc->tries[dev->devno] == 1) {
3507 /* This is the last chance, better to slow
3508 * down than lose it.
3509 */
3510 sata_down_spd_limit(ata_dev_phys_link(dev), 0);
3511 if (dev->pio_mode > XFER_PIO_0)
3512 ata_down_xfermask_limit(dev, ATA_DNXFER_PIO);
3513 }
3514 }
3515
3516 if (ata_dev_enabled(dev) && !ehc->tries[dev->devno]) {
3517 /* disable device if it has used up all its chances */
3518 ata_dev_disable(dev);
3519
3520 /* detach if offline */
3521 if (ata_phys_link_offline(ata_dev_phys_link(dev)))
3522 ata_eh_detach_dev(dev);
3523
3524 /* schedule probe if necessary */
3525 if (ata_eh_schedule_probe(dev)) {
3526 ehc->tries[dev->devno] = ATA_EH_DEV_TRIES;
3527 memset(ehc->cmd_timeout_idx[dev->devno], 0,
3528 sizeof(ehc->cmd_timeout_idx[dev->devno]));
3529 }
3530
3531 return 1;
3532 } else {
3533 ehc->i.action |= ATA_EH_RESET;
3534 return 0;
3535 }
3536 }
3537
3538 /**
3539 * ata_eh_recover - recover host port after error
3540 * @ap: host port to recover
3541 * @prereset: prereset method (can be NULL)
3542 * @softreset: softreset method (can be NULL)
3543 * @hardreset: hardreset method (can be NULL)
3544 * @postreset: postreset method (can be NULL)
3545 * @r_failed_link: out parameter for failed link
3546 *
3547 * This is the alpha and omega, eum and yang, heart and soul of
3548 * libata exception handling. On entry, actions required to
3549 * recover each link and hotplug requests are recorded in the
3550 * link's eh_context. This function executes all the operations
3551 * with appropriate retrials and fallbacks to resurrect failed
3552 * devices, detach goners and greet newcomers.
3553 *
3554 * LOCKING:
3555 * Kernel thread context (may sleep).
3556 *
3557 * RETURNS:
3558 * 0 on success, -errno on failure.
3559 */
ata_eh_recover(struct ata_port * ap,ata_prereset_fn_t prereset,ata_reset_fn_t softreset,ata_reset_fn_t hardreset,ata_postreset_fn_t postreset,struct ata_link ** r_failed_link)3560 int ata_eh_recover(struct ata_port *ap, ata_prereset_fn_t prereset,
3561 ata_reset_fn_t softreset, ata_reset_fn_t hardreset,
3562 ata_postreset_fn_t postreset,
3563 struct ata_link **r_failed_link)
3564 {
3565 struct ata_link *link;
3566 struct ata_device *dev;
3567 int rc, nr_fails;
3568 unsigned long flags, deadline;
3569
3570 /* prep for recovery */
3571 ata_for_each_link(link, ap, EDGE) {
3572 struct ata_eh_context *ehc = &link->eh_context;
3573
3574 /* re-enable link? */
3575 if (ehc->i.action & ATA_EH_ENABLE_LINK) {
3576 ata_eh_about_to_do(link, NULL, ATA_EH_ENABLE_LINK);
3577 spin_lock_irqsave(ap->lock, flags);
3578 link->flags &= ~ATA_LFLAG_DISABLED;
3579 spin_unlock_irqrestore(ap->lock, flags);
3580 ata_eh_done(link, NULL, ATA_EH_ENABLE_LINK);
3581 }
3582
3583 ata_for_each_dev(dev, link, ALL) {
3584 if (link->flags & ATA_LFLAG_NO_RETRY)
3585 ehc->tries[dev->devno] = 1;
3586 else
3587 ehc->tries[dev->devno] = ATA_EH_DEV_TRIES;
3588
3589 /* collect port action mask recorded in dev actions */
3590 ehc->i.action |= ehc->i.dev_action[dev->devno] &
3591 ~ATA_EH_PERDEV_MASK;
3592 ehc->i.dev_action[dev->devno] &= ATA_EH_PERDEV_MASK;
3593
3594 /* process hotplug request */
3595 if (dev->flags & ATA_DFLAG_DETACH)
3596 ata_eh_detach_dev(dev);
3597
3598 /* schedule probe if necessary */
3599 if (!ata_dev_enabled(dev))
3600 ata_eh_schedule_probe(dev);
3601 }
3602 }
3603
3604 retry:
3605 rc = 0;
3606
3607 /* if UNLOADING, finish immediately */
3608 if (ap->pflags & ATA_PFLAG_UNLOADING)
3609 goto out;
3610
3611 /* prep for EH */
3612 ata_for_each_link(link, ap, EDGE) {
3613 struct ata_eh_context *ehc = &link->eh_context;
3614
3615 /* skip EH if possible. */
3616 if (ata_eh_skip_recovery(link))
3617 ehc->i.action = 0;
3618
3619 ata_for_each_dev(dev, link, ALL)
3620 ehc->classes[dev->devno] = ATA_DEV_UNKNOWN;
3621 }
3622
3623 /* reset */
3624 ata_for_each_link(link, ap, EDGE) {
3625 struct ata_eh_context *ehc = &link->eh_context;
3626
3627 if (!(ehc->i.action & ATA_EH_RESET))
3628 continue;
3629
3630 rc = ata_eh_reset(link, ata_link_nr_vacant(link),
3631 prereset, softreset, hardreset, postreset);
3632 if (rc) {
3633 ata_link_err(link, "reset failed, giving up\n");
3634 goto out;
3635 }
3636 }
3637
3638 do {
3639 unsigned long now;
3640
3641 /*
3642 * clears ATA_EH_PARK in eh_info and resets
3643 * ap->park_req_pending
3644 */
3645 ata_eh_pull_park_action(ap);
3646
3647 deadline = jiffies;
3648 ata_for_each_link(link, ap, EDGE) {
3649 ata_for_each_dev(dev, link, ALL) {
3650 struct ata_eh_context *ehc = &link->eh_context;
3651 unsigned long tmp;
3652
3653 if (dev->class != ATA_DEV_ATA &&
3654 dev->class != ATA_DEV_ZAC)
3655 continue;
3656 if (!(ehc->i.dev_action[dev->devno] &
3657 ATA_EH_PARK))
3658 continue;
3659 tmp = dev->unpark_deadline;
3660 if (time_before(deadline, tmp))
3661 deadline = tmp;
3662 else if (time_before_eq(tmp, jiffies))
3663 continue;
3664 if (ehc->unloaded_mask & (1 << dev->devno))
3665 continue;
3666
3667 ata_eh_park_issue_cmd(dev, 1);
3668 }
3669 }
3670
3671 now = jiffies;
3672 if (time_before_eq(deadline, now))
3673 break;
3674
3675 ata_eh_release(ap);
3676 deadline = wait_for_completion_timeout(&ap->park_req_pending,
3677 deadline - now);
3678 ata_eh_acquire(ap);
3679 } while (deadline);
3680 ata_for_each_link(link, ap, EDGE) {
3681 ata_for_each_dev(dev, link, ALL) {
3682 if (!(link->eh_context.unloaded_mask &
3683 (1 << dev->devno)))
3684 continue;
3685
3686 ata_eh_park_issue_cmd(dev, 0);
3687 ata_eh_done(link, dev, ATA_EH_PARK);
3688 }
3689 }
3690
3691 /* the rest */
3692 nr_fails = 0;
3693 ata_for_each_link(link, ap, PMP_FIRST) {
3694 struct ata_eh_context *ehc = &link->eh_context;
3695
3696 if (sata_pmp_attached(ap) && ata_is_host_link(link))
3697 goto config_lpm;
3698
3699 /* revalidate existing devices and attach new ones */
3700 rc = ata_eh_revalidate_and_attach(link, &dev);
3701 if (rc)
3702 goto rest_fail;
3703
3704 /* if PMP got attached, return, pmp EH will take care of it */
3705 if (link->device->class == ATA_DEV_PMP) {
3706 ehc->i.action = 0;
3707 return 0;
3708 }
3709
3710 /* configure transfer mode if necessary */
3711 if (ehc->i.flags & ATA_EHI_SETMODE) {
3712 rc = ata_set_mode(link, &dev);
3713 if (rc)
3714 goto rest_fail;
3715 ehc->i.flags &= ~ATA_EHI_SETMODE;
3716 }
3717
3718 /* If reset has been issued, clear UA to avoid
3719 * disrupting the current users of the device.
3720 */
3721 if (ehc->i.flags & ATA_EHI_DID_RESET) {
3722 ata_for_each_dev(dev, link, ALL) {
3723 if (dev->class != ATA_DEV_ATAPI)
3724 continue;
3725 rc = atapi_eh_clear_ua(dev);
3726 if (rc)
3727 goto rest_fail;
3728 if (zpodd_dev_enabled(dev))
3729 zpodd_post_poweron(dev);
3730 }
3731 }
3732
3733 /* retry flush if necessary */
3734 ata_for_each_dev(dev, link, ALL) {
3735 if (dev->class != ATA_DEV_ATA &&
3736 dev->class != ATA_DEV_ZAC)
3737 continue;
3738 rc = ata_eh_maybe_retry_flush(dev);
3739 if (rc)
3740 goto rest_fail;
3741 }
3742
3743 config_lpm:
3744 /* configure link power saving */
3745 if (link->lpm_policy != ap->target_lpm_policy) {
3746 rc = ata_eh_set_lpm(link, ap->target_lpm_policy, &dev);
3747 if (rc)
3748 goto rest_fail;
3749 }
3750
3751 /* this link is okay now */
3752 ehc->i.flags = 0;
3753 continue;
3754
3755 rest_fail:
3756 nr_fails++;
3757 if (dev)
3758 ata_eh_handle_dev_fail(dev, rc);
3759
3760 if (ap->pflags & ATA_PFLAG_FROZEN) {
3761 /* PMP reset requires working host port.
3762 * Can't retry if it's frozen.
3763 */
3764 if (sata_pmp_attached(ap))
3765 goto out;
3766 break;
3767 }
3768 }
3769
3770 if (nr_fails)
3771 goto retry;
3772
3773 out:
3774 if (rc && r_failed_link)
3775 *r_failed_link = link;
3776
3777 return rc;
3778 }
3779
3780 /**
3781 * ata_eh_finish - finish up EH
3782 * @ap: host port to finish EH for
3783 *
3784 * Recovery is complete. Clean up EH states and retry or finish
3785 * failed qcs.
3786 *
3787 * LOCKING:
3788 * None.
3789 */
ata_eh_finish(struct ata_port * ap)3790 void ata_eh_finish(struct ata_port *ap)
3791 {
3792 struct ata_queued_cmd *qc;
3793 int tag;
3794
3795 /* retry or finish qcs */
3796 ata_qc_for_each_raw(ap, qc, tag) {
3797 if (!(qc->flags & ATA_QCFLAG_FAILED))
3798 continue;
3799
3800 if (qc->err_mask) {
3801 /* FIXME: Once EH migration is complete,
3802 * generate sense data in this function,
3803 * considering both err_mask and tf.
3804 */
3805 if (qc->flags & ATA_QCFLAG_RETRY)
3806 ata_eh_qc_retry(qc);
3807 else
3808 ata_eh_qc_complete(qc);
3809 } else {
3810 if (qc->flags & ATA_QCFLAG_SENSE_VALID) {
3811 ata_eh_qc_complete(qc);
3812 } else {
3813 /* feed zero TF to sense generation */
3814 memset(&qc->result_tf, 0, sizeof(qc->result_tf));
3815 ata_eh_qc_retry(qc);
3816 }
3817 }
3818 }
3819
3820 /* make sure nr_active_links is zero after EH */
3821 WARN_ON(ap->nr_active_links);
3822 ap->nr_active_links = 0;
3823 }
3824
3825 /**
3826 * ata_do_eh - do standard error handling
3827 * @ap: host port to handle error for
3828 *
3829 * @prereset: prereset method (can be NULL)
3830 * @softreset: softreset method (can be NULL)
3831 * @hardreset: hardreset method (can be NULL)
3832 * @postreset: postreset method (can be NULL)
3833 *
3834 * Perform standard error handling sequence.
3835 *
3836 * LOCKING:
3837 * Kernel thread context (may sleep).
3838 */
ata_do_eh(struct ata_port * ap,ata_prereset_fn_t prereset,ata_reset_fn_t softreset,ata_reset_fn_t hardreset,ata_postreset_fn_t postreset)3839 void ata_do_eh(struct ata_port *ap, ata_prereset_fn_t prereset,
3840 ata_reset_fn_t softreset, ata_reset_fn_t hardreset,
3841 ata_postreset_fn_t postreset)
3842 {
3843 struct ata_device *dev;
3844 int rc;
3845
3846 ata_eh_autopsy(ap);
3847 ata_eh_report(ap);
3848
3849 rc = ata_eh_recover(ap, prereset, softreset, hardreset, postreset,
3850 NULL);
3851 if (rc) {
3852 ata_for_each_dev(dev, &ap->link, ALL)
3853 ata_dev_disable(dev);
3854 }
3855
3856 ata_eh_finish(ap);
3857 }
3858
3859 /**
3860 * ata_std_error_handler - standard error handler
3861 * @ap: host port to handle error for
3862 *
3863 * Standard error handler
3864 *
3865 * LOCKING:
3866 * Kernel thread context (may sleep).
3867 */
ata_std_error_handler(struct ata_port * ap)3868 void ata_std_error_handler(struct ata_port *ap)
3869 {
3870 struct ata_port_operations *ops = ap->ops;
3871 ata_reset_fn_t hardreset = ops->hardreset;
3872
3873 /* ignore built-in hardreset if SCR access is not available */
3874 if (hardreset == sata_std_hardreset && !sata_scr_valid(&ap->link))
3875 hardreset = NULL;
3876
3877 ata_do_eh(ap, ops->prereset, ops->softreset, hardreset, ops->postreset);
3878 }
3879 EXPORT_SYMBOL_GPL(ata_std_error_handler);
3880
3881 #ifdef CONFIG_PM
3882 /**
3883 * ata_eh_handle_port_suspend - perform port suspend operation
3884 * @ap: port to suspend
3885 *
3886 * Suspend @ap.
3887 *
3888 * LOCKING:
3889 * Kernel thread context (may sleep).
3890 */
ata_eh_handle_port_suspend(struct ata_port * ap)3891 static void ata_eh_handle_port_suspend(struct ata_port *ap)
3892 {
3893 unsigned long flags;
3894 int rc = 0;
3895 struct ata_device *dev;
3896
3897 /* are we suspending? */
3898 spin_lock_irqsave(ap->lock, flags);
3899 if (!(ap->pflags & ATA_PFLAG_PM_PENDING) ||
3900 ap->pm_mesg.event & PM_EVENT_RESUME) {
3901 spin_unlock_irqrestore(ap->lock, flags);
3902 return;
3903 }
3904 spin_unlock_irqrestore(ap->lock, flags);
3905
3906 WARN_ON(ap->pflags & ATA_PFLAG_SUSPENDED);
3907
3908 /*
3909 * If we have a ZPODD attached, check its zero
3910 * power ready status before the port is frozen.
3911 * Only needed for runtime suspend.
3912 */
3913 if (PMSG_IS_AUTO(ap->pm_mesg)) {
3914 ata_for_each_dev(dev, &ap->link, ENABLED) {
3915 if (zpodd_dev_enabled(dev))
3916 zpodd_on_suspend(dev);
3917 }
3918 }
3919
3920 /* suspend */
3921 ata_eh_freeze_port(ap);
3922
3923 if (ap->ops->port_suspend)
3924 rc = ap->ops->port_suspend(ap, ap->pm_mesg);
3925
3926 ata_acpi_set_state(ap, ap->pm_mesg);
3927
3928 /* update the flags */
3929 spin_lock_irqsave(ap->lock, flags);
3930
3931 ap->pflags &= ~ATA_PFLAG_PM_PENDING;
3932 if (rc == 0)
3933 ap->pflags |= ATA_PFLAG_SUSPENDED;
3934 else if (ap->pflags & ATA_PFLAG_FROZEN)
3935 ata_port_schedule_eh(ap);
3936
3937 spin_unlock_irqrestore(ap->lock, flags);
3938
3939 return;
3940 }
3941
3942 /**
3943 * ata_eh_handle_port_resume - perform port resume operation
3944 * @ap: port to resume
3945 *
3946 * Resume @ap.
3947 *
3948 * LOCKING:
3949 * Kernel thread context (may sleep).
3950 */
ata_eh_handle_port_resume(struct ata_port * ap)3951 static void ata_eh_handle_port_resume(struct ata_port *ap)
3952 {
3953 struct ata_link *link;
3954 struct ata_device *dev;
3955 unsigned long flags;
3956
3957 /* are we resuming? */
3958 spin_lock_irqsave(ap->lock, flags);
3959 if (!(ap->pflags & ATA_PFLAG_PM_PENDING) ||
3960 !(ap->pm_mesg.event & PM_EVENT_RESUME)) {
3961 spin_unlock_irqrestore(ap->lock, flags);
3962 return;
3963 }
3964 spin_unlock_irqrestore(ap->lock, flags);
3965
3966 WARN_ON(!(ap->pflags & ATA_PFLAG_SUSPENDED));
3967
3968 /*
3969 * Error timestamps are in jiffies which doesn't run while
3970 * suspended and PHY events during resume isn't too uncommon.
3971 * When the two are combined, it can lead to unnecessary speed
3972 * downs if the machine is suspended and resumed repeatedly.
3973 * Clear error history.
3974 */
3975 ata_for_each_link(link, ap, HOST_FIRST)
3976 ata_for_each_dev(dev, link, ALL)
3977 ata_ering_clear(&dev->ering);
3978
3979 ata_acpi_set_state(ap, ap->pm_mesg);
3980
3981 if (ap->ops->port_resume)
3982 ap->ops->port_resume(ap);
3983
3984 /* tell ACPI that we're resuming */
3985 ata_acpi_on_resume(ap);
3986
3987 /* update the flags */
3988 spin_lock_irqsave(ap->lock, flags);
3989 ap->pflags &= ~(ATA_PFLAG_PM_PENDING | ATA_PFLAG_SUSPENDED);
3990 spin_unlock_irqrestore(ap->lock, flags);
3991 }
3992 #endif /* CONFIG_PM */
3993