1 /*
2 * scsi_error.c Copyright (C) 1997 Eric Youngdale
3 *
4 * SCSI error/timeout handling
5 * Initial versions: Eric Youngdale. Based upon conversations with
6 * Leonard Zubkoff and David Miller at Linux Expo,
7 * ideas originating from all over the place.
8 *
9 */
10
11 #define __NO_VERSION__
12 #include <linux/module.h>
13
14 #include <linux/sched.h>
15 #include <linux/timer.h>
16 #include <linux/string.h>
17 #include <linux/slab.h>
18 #include <linux/ioport.h>
19 #include <linux/kernel.h>
20 #include <linux/stat.h>
21 #include <linux/blk.h>
22 #include <linux/interrupt.h>
23 #include <linux/delay.h>
24 #include <linux/smp_lock.h>
25
26 #define __KERNEL_SYSCALLS__
27
28 #include <linux/unistd.h>
29
30 #include <asm/system.h>
31 #include <asm/irq.h>
32 #include <asm/dma.h>
33
34 #include "scsi.h"
35 #include "hosts.h"
36 #include "constants.h"
37
38 /*
39 * We must always allow SHUTDOWN_SIGS. Even if we are not a module,
40 * the host drivers that we are using may be loaded as modules, and
41 * when we unload these, we need to ensure that the error handler thread
42 * can be shut down.
43 *
44 * Note - when we unload a module, we send a SIGHUP. We mustn't
45 * enable SIGTERM, as this is how the init shuts things down when you
46 * go to single-user mode. For that matter, init also sends SIGKILL,
47 * so we mustn't enable that one either. We use SIGHUP instead. Other
48 * options would be SIGPWR, I suppose.
49 */
50 #define SHUTDOWN_SIGS (sigmask(SIGHUP))
51
52 #ifdef DEBUG
53 #define SENSE_TIMEOUT SCSI_TIMEOUT
54 #define ABORT_TIMEOUT SCSI_TIMEOUT
55 #define RESET_TIMEOUT SCSI_TIMEOUT
56 #else
57 #define SENSE_TIMEOUT (10*HZ)
58 #define RESET_TIMEOUT (2*HZ)
59 #define ABORT_TIMEOUT (15*HZ)
60 #endif
61
62 #define STATIC
63
64 /*
65 * These should *probably* be handled by the host itself.
66 * Since it is allowed to sleep, it probably should.
67 */
68 #define BUS_RESET_SETTLE_TIME 5*HZ
69 #define HOST_RESET_SETTLE_TIME 10*HZ
70
71
72 static const char RCSid[] = "$Header: /mnt/ide/home/eric/CVSROOT/linux/drivers/scsi/scsi_error.c,v 1.10 1997/12/08 04:50:35 eric Exp $";
73
74 STATIC int scsi_check_sense(Scsi_Cmnd * SCpnt);
75 STATIC int scsi_request_sense(Scsi_Cmnd *);
76 STATIC void scsi_send_eh_cmnd(Scsi_Cmnd * SCpnt, int timeout);
77 STATIC int scsi_try_to_abort_command(Scsi_Cmnd *, int);
78 STATIC int scsi_test_unit_ready(Scsi_Cmnd *);
79 STATIC int scsi_try_bus_device_reset(Scsi_Cmnd *, int timeout);
80 STATIC int scsi_try_bus_reset(Scsi_Cmnd *);
81 STATIC int scsi_try_host_reset(Scsi_Cmnd *);
82 STATIC int scsi_unit_is_ready(Scsi_Cmnd *);
83 STATIC void scsi_eh_action_done(Scsi_Cmnd *, int);
84 STATIC int scsi_eh_retry_command(Scsi_Cmnd *);
85 STATIC int scsi_eh_completed_normally(Scsi_Cmnd * SCpnt);
86 STATIC void scsi_restart_operations(struct Scsi_Host *);
87 STATIC void scsi_eh_finish_command(Scsi_Cmnd ** SClist, Scsi_Cmnd * SCpnt);
88
89
90 /*
91 * Function: scsi_add_timer()
92 *
93 * Purpose: Start timeout timer for a single scsi command.
94 *
95 * Arguments: SCset - command that is about to start running.
96 * timeout - amount of time to allow this command to run.
97 * complete - timeout function to call if timer isn't
98 * canceled.
99 *
100 * Returns: Nothing
101 *
102 * Notes: This should be turned into an inline function.
103 *
104 * More Notes: Each scsi command has it's own timer, and as it is added to
105 * the queue, we set up the timer. When the command completes,
106 * we cancel the timer. Pretty simple, really, especially
107 * compared to the old way of handling this crap.
108 */
scsi_add_timer(Scsi_Cmnd * SCset,int timeout,void (* complete)(Scsi_Cmnd *))109 void scsi_add_timer(Scsi_Cmnd * SCset,
110 int timeout,
111 void (*complete) (Scsi_Cmnd *))
112 {
113 SCset->eh_timeout.data = (unsigned long) SCset;
114 SCset->eh_timeout.function = (void (*)(unsigned long)) complete;
115 mod_timer(&SCset->eh_timeout, jiffies + timeout);
116
117 SCset->done_late = 0;
118
119 SCSI_LOG_ERROR_RECOVERY(5, printk("Adding timer for command %p at %d (%p)\n", SCset, timeout, complete));
120 }
121
122 /*
123 * Function: scsi_delete_timer()
124 *
125 * Purpose: Delete/cancel timer for a given function.
126 *
127 * Arguments: SCset - command that we are canceling timer for.
128 *
129 * Returns: 1 if we were able to detach the timer. 0 if we
130 * blew it, and the timer function has already started
131 * to run.
132 *
133 * Notes: This should be turned into an inline function.
134 */
scsi_delete_timer(Scsi_Cmnd * SCset)135 int scsi_delete_timer(Scsi_Cmnd * SCset)
136 {
137 int rtn;
138
139 rtn = del_timer(&SCset->eh_timeout);
140
141 SCSI_LOG_ERROR_RECOVERY(5, printk("Clearing timer for command %p %d\n", SCset, rtn));
142
143 SCset->eh_timeout.data = (unsigned long) NULL;
144 SCset->eh_timeout.function = NULL;
145
146 return rtn;
147 }
148
149 /*
150 * Function: scsi_times_out()
151 *
152 * Purpose: Timeout function for normal scsi commands..
153 *
154 * Arguments: SCpnt - command that is timing out.
155 *
156 * Returns: Nothing.
157 *
158 * Notes: We do not need to lock this. There is the potential for
159 * a race only in that the normal completion handling might
160 * run, but if the normal completion function determines
161 * that the timer has already fired, then it mustn't do
162 * anything.
163 */
scsi_times_out(Scsi_Cmnd * SCpnt)164 void scsi_times_out(Scsi_Cmnd * SCpnt)
165 {
166 /*
167 * Notify the low-level code that this operation failed and we are
168 * reposessing the command.
169 */
170 #ifdef ERIC_neverdef
171 /*
172 * FIXME(eric)
173 * Allow the host adapter to push a queue ordering tag
174 * out to the bus to force the command in question to complete.
175 * If the host wants to do this, then we just restart the timer
176 * for the command. Before we really do this, some real thought
177 * as to the optimum way to handle this should be done. We *do*
178 * need to force ordering every so often to ensure that all requests
179 * do eventually complete, but I am not sure if this is the best way
180 * to actually go about it.
181 *
182 * Better yet, force a sync here, but don't block since we are in an
183 * interrupt.
184 */
185 if (SCpnt->host->hostt->eh_ordered_queue_tag) {
186 if ((*SCpnt->host->hostt->eh_ordered_queue_tag) (SCpnt)) {
187 scsi_add_timer(SCpnt, SCpnt->internal_timeout,
188 scsi_times_out);
189 return;
190 }
191 }
192 /*
193 * FIXME(eric) - add a second special interface to handle this
194 * case. Ideally that interface can also be used to request
195 * a queu
196 */
197 if (SCpnt->host->can_queue) {
198 SCpnt->host->hostt->queuecommand(SCpnt, NULL);
199 }
200 #endif
201
202 /* Set the serial_number_at_timeout to the current serial_number */
203 SCpnt->serial_number_at_timeout = SCpnt->serial_number;
204
205 SCpnt->eh_state = FAILED;
206 SCpnt->state = SCSI_STATE_TIMEOUT;
207 SCpnt->owner = SCSI_OWNER_ERROR_HANDLER;
208
209 SCpnt->host->in_recovery = 1;
210 SCpnt->host->host_failed++;
211
212 SCSI_LOG_TIMEOUT(3, printk("Command timed out active=%d busy=%d failed=%d\n",
213 atomic_read(&SCpnt->host->host_active),
214 SCpnt->host->host_busy,
215 SCpnt->host->host_failed));
216
217 /*
218 * If the host is having troubles, then look to see if this was the last
219 * command that might have failed. If so, wake up the error handler.
220 */
221 if( SCpnt->host->eh_wait == NULL ) {
222 panic("Error handler thread not present at %p %p %s %d",
223 SCpnt, SCpnt->host, __FILE__, __LINE__);
224 }
225 if (SCpnt->host->host_busy == SCpnt->host->host_failed) {
226 up(SCpnt->host->eh_wait);
227 }
228 }
229
230 /*
231 * Function scsi_block_when_processing_errors
232 *
233 * Purpose: Prevent more commands from being queued while error recovery
234 * is taking place.
235 *
236 * Arguments: SDpnt - device on which we are performing recovery.
237 *
238 * Returns: FALSE The device was taken offline by error recovery.
239 * TRUE OK to proceed.
240 *
241 * Notes: We block until the host is out of error recovery, and then
242 * check to see whether the host or the device is offline.
243 */
scsi_block_when_processing_errors(Scsi_Device * SDpnt)244 int scsi_block_when_processing_errors(Scsi_Device * SDpnt)
245 {
246
247 SCSI_SLEEP(&SDpnt->host->host_wait, SDpnt->host->in_recovery);
248
249 SCSI_LOG_ERROR_RECOVERY(5, printk("Open returning %d\n", SDpnt->online));
250
251 return SDpnt->online;
252 }
253
254 /*
255 * Function: scsi_eh_times_out()
256 *
257 * Purpose: Timeout function for error handling.
258 *
259 * Arguments: SCpnt - command that is timing out.
260 *
261 * Returns: Nothing.
262 *
263 * Notes: During error handling, the kernel thread will be sleeping
264 * waiting for some action to complete on the device. Our only
265 * job is to record that it timed out, and to wake up the
266 * thread.
267 */
268 STATIC
scsi_eh_times_out(Scsi_Cmnd * SCpnt)269 void scsi_eh_times_out(Scsi_Cmnd * SCpnt)
270 {
271 SCpnt->eh_state = SCSI_STATE_TIMEOUT;
272 SCSI_LOG_ERROR_RECOVERY(5, printk("In scsi_eh_times_out %p\n", SCpnt));
273
274 if (SCpnt->host->eh_action != NULL)
275 up(SCpnt->host->eh_action);
276 else
277 printk("Missing scsi error handler thread\n");
278 }
279
280
281 /*
282 * Function: scsi_eh_done()
283 *
284 * Purpose: Completion function for error handling.
285 *
286 * Arguments: SCpnt - command that is timing out.
287 *
288 * Returns: Nothing.
289 *
290 * Notes: During error handling, the kernel thread will be sleeping
291 * waiting for some action to complete on the device. Our only
292 * job is to record that the action completed, and to wake up the
293 * thread.
294 */
295 STATIC
scsi_eh_done(Scsi_Cmnd * SCpnt)296 void scsi_eh_done(Scsi_Cmnd * SCpnt)
297 {
298 int rtn;
299
300 /*
301 * If the timeout handler is already running, then just set the
302 * flag which says we finished late, and return. We have no
303 * way of stopping the timeout handler from running, so we must
304 * always defer to it.
305 */
306 rtn = del_timer(&SCpnt->eh_timeout);
307 if (!rtn) {
308 SCpnt->done_late = 1;
309 return;
310 }
311
312 SCpnt->request.rq_status = RQ_SCSI_DONE;
313
314 SCpnt->owner = SCSI_OWNER_ERROR_HANDLER;
315 SCpnt->eh_state = SUCCESS;
316
317 SCSI_LOG_ERROR_RECOVERY(5, printk("In eh_done %p result:%x\n", SCpnt,
318 SCpnt->result));
319
320 if (SCpnt->host->eh_action != NULL)
321 up(SCpnt->host->eh_action);
322 }
323
324 /*
325 * Function: scsi_eh_action_done()
326 *
327 * Purpose: Completion function for error handling.
328 *
329 * Arguments: SCpnt - command that is timing out.
330 * answer - boolean that indicates whether operation succeeded.
331 *
332 * Returns: Nothing.
333 *
334 * Notes: This callback is only used for abort and reset operations.
335 */
336 STATIC
scsi_eh_action_done(Scsi_Cmnd * SCpnt,int answer)337 void scsi_eh_action_done(Scsi_Cmnd * SCpnt, int answer)
338 {
339 SCpnt->request.rq_status = RQ_SCSI_DONE;
340
341 SCpnt->owner = SCSI_OWNER_ERROR_HANDLER;
342 SCpnt->eh_state = (answer ? SUCCESS : FAILED);
343
344 if (SCpnt->host->eh_action != NULL)
345 up(SCpnt->host->eh_action);
346 }
347
348 /*
349 * Function: scsi_sense_valid()
350 *
351 * Purpose: Determine whether a host has automatically obtained sense
352 * information or not. If we have it, then give a recommendation
353 * as to what we should do next.
354 */
scsi_sense_valid(Scsi_Cmnd * SCpnt)355 int scsi_sense_valid(Scsi_Cmnd * SCpnt)
356 {
357 if (((SCpnt->sense_buffer[0] & 0x70) >> 4) != 7) {
358 return FALSE;
359 }
360 return TRUE;
361 }
362
363 /*
364 * Function: scsi_eh_retry_command()
365 *
366 * Purpose: Retry the original command
367 *
368 * Returns: SUCCESS - we were able to get the sense data.
369 * FAILED - we were not able to get the sense data.
370 *
371 * Notes: This function will *NOT* return until the command either
372 * times out, or it completes.
373 */
scsi_eh_retry_command(Scsi_Cmnd * SCpnt)374 STATIC int scsi_eh_retry_command(Scsi_Cmnd * SCpnt)
375 {
376 memcpy((void *) SCpnt->cmnd, (void *) SCpnt->data_cmnd,
377 sizeof(SCpnt->data_cmnd));
378 SCpnt->request_buffer = SCpnt->buffer;
379 SCpnt->request_bufflen = SCpnt->bufflen;
380 SCpnt->use_sg = SCpnt->old_use_sg;
381 SCpnt->cmd_len = SCpnt->old_cmd_len;
382 SCpnt->sc_data_direction = SCpnt->sc_old_data_direction;
383 SCpnt->underflow = SCpnt->old_underflow;
384
385 scsi_send_eh_cmnd(SCpnt, SCpnt->timeout_per_command);
386
387 /*
388 * Hey, we are done. Let's look to see what happened.
389 */
390 return SCpnt->eh_state;
391 }
392
393 /*
394 * Function: scsi_request_sense()
395 *
396 * Purpose: Request sense data from a particular target.
397 *
398 * Returns: SUCCESS - we were able to get the sense data.
399 * FAILED - we were not able to get the sense data.
400 *
401 * Notes: Some hosts automatically obtain this information, others
402 * require that we obtain it on our own.
403 *
404 * This function will *NOT* return until the command either
405 * times out, or it completes.
406 */
scsi_request_sense(Scsi_Cmnd * SCpnt)407 STATIC int scsi_request_sense(Scsi_Cmnd * SCpnt)
408 {
409 static unsigned char generic_sense[6] =
410 {REQUEST_SENSE, 0, 0, 0, 255, 0};
411 unsigned char scsi_result0[256], *scsi_result = NULL;
412 int saved_result;
413 int saved_resid;
414
415 ASSERT_LOCK(&io_request_lock, 0);
416
417 memcpy((void *) SCpnt->cmnd, (void *) generic_sense,
418 sizeof(generic_sense));
419
420 if (SCpnt->device->scsi_level <= SCSI_2)
421 SCpnt->cmnd[1] = SCpnt->lun << 5;
422
423 scsi_result = (!SCpnt->host->hostt->unchecked_isa_dma)
424 ? &scsi_result0[0] : kmalloc(512, GFP_ATOMIC | GFP_DMA);
425
426 if (scsi_result == NULL) {
427 printk("cannot allocate scsi_result in scsi_request_sense.\n");
428 return FAILED;
429 }
430 /*
431 * Zero the sense buffer. Some host adapters automatically always request
432 * sense, so it is not a good idea that SCpnt->request_buffer and
433 * SCpnt->sense_buffer point to the same address (DB).
434 * 0 is not a valid sense code.
435 */
436 memset((void *) SCpnt->sense_buffer, 0, sizeof(SCpnt->sense_buffer));
437 memset((void *) scsi_result, 0, 256);
438
439 saved_result = SCpnt->result;
440 saved_resid = SCpnt->resid;
441 SCpnt->request_buffer = scsi_result;
442 SCpnt->request_bufflen = 256;
443 SCpnt->use_sg = 0;
444 SCpnt->cmd_len = COMMAND_SIZE(SCpnt->cmnd[0]);
445 SCpnt->sc_data_direction = SCSI_DATA_READ;
446 SCpnt->underflow = 0;
447
448 scsi_send_eh_cmnd(SCpnt, SENSE_TIMEOUT);
449
450 /* Last chance to have valid sense data */
451 if (!scsi_sense_valid(SCpnt))
452 memcpy((void *) SCpnt->sense_buffer,
453 SCpnt->request_buffer,
454 sizeof(SCpnt->sense_buffer));
455
456 if (scsi_result != &scsi_result0[0] && scsi_result != NULL)
457 kfree(scsi_result);
458
459 /*
460 * When we eventually call scsi_finish, we really wish to complete
461 * the original request, so let's restore the original data. (DB)
462 */
463 memcpy((void *) SCpnt->cmnd, (void *) SCpnt->data_cmnd,
464 sizeof(SCpnt->data_cmnd));
465 SCpnt->result = saved_result;
466 SCpnt->resid = saved_resid;
467 SCpnt->request_buffer = SCpnt->buffer;
468 SCpnt->request_bufflen = SCpnt->bufflen;
469 SCpnt->use_sg = SCpnt->old_use_sg;
470 SCpnt->cmd_len = SCpnt->old_cmd_len;
471 SCpnt->sc_data_direction = SCpnt->sc_old_data_direction;
472 SCpnt->underflow = SCpnt->old_underflow;
473
474 /*
475 * Hey, we are done. Let's look to see what happened.
476 */
477 return SCpnt->eh_state;
478 }
479
480 /*
481 * Function: scsi_test_unit_ready()
482 *
483 * Purpose: Run test unit ready command to see if the device is talking to us or not.
484 *
485 */
scsi_test_unit_ready(Scsi_Cmnd * SCpnt)486 STATIC int scsi_test_unit_ready(Scsi_Cmnd * SCpnt)
487 {
488 static unsigned char tur_command[6] =
489 {TEST_UNIT_READY, 0, 0, 0, 0, 0};
490 int saved_resid;
491
492 memcpy((void *) SCpnt->cmnd, (void *) tur_command,
493 sizeof(tur_command));
494
495 if (SCpnt->device->scsi_level <= SCSI_2)
496 SCpnt->cmnd[1] = SCpnt->lun << 5;
497
498 /*
499 * Zero the sense buffer. The SCSI spec mandates that any
500 * untransferred sense data should be interpreted as being zero.
501 */
502 memset((void *) SCpnt->sense_buffer, 0, sizeof(SCpnt->sense_buffer));
503
504 saved_resid = SCpnt->resid;
505 SCpnt->request_buffer = NULL;
506 SCpnt->request_bufflen = 0;
507 SCpnt->use_sg = 0;
508 SCpnt->cmd_len = COMMAND_SIZE(SCpnt->cmnd[0]);
509 SCpnt->underflow = 0;
510 SCpnt->sc_data_direction = SCSI_DATA_NONE;
511
512 scsi_send_eh_cmnd(SCpnt, SENSE_TIMEOUT);
513
514 /*
515 * When we eventually call scsi_finish, we really wish to complete
516 * the original request, so let's restore the original data. (DB)
517 */
518 memcpy((void *) SCpnt->cmnd, (void *) SCpnt->data_cmnd,
519 sizeof(SCpnt->data_cmnd));
520 SCpnt->resid = saved_resid;
521 SCpnt->request_buffer = SCpnt->buffer;
522 SCpnt->request_bufflen = SCpnt->bufflen;
523 SCpnt->use_sg = SCpnt->old_use_sg;
524 SCpnt->cmd_len = SCpnt->old_cmd_len;
525 SCpnt->sc_data_direction = SCpnt->sc_old_data_direction;
526 SCpnt->underflow = SCpnt->old_underflow;
527
528 /*
529 * Hey, we are done. Let's look to see what happened.
530 */
531 SCSI_LOG_ERROR_RECOVERY(3,
532 printk("scsi_test_unit_ready: SCpnt %p eh_state %x\n",
533 SCpnt, SCpnt->eh_state));
534 return SCpnt->eh_state;
535 }
536
537 /*
538 * This would normally need to get the IO request lock,
539 * but as it doesn't actually touch anything that needs
540 * to be locked we can avoid the lock here..
541 */
542 STATIC
scsi_sleep_done(struct semaphore * sem)543 void scsi_sleep_done(struct semaphore *sem)
544 {
545 if (sem != NULL) {
546 up(sem);
547 }
548 }
549
scsi_sleep(int timeout)550 void scsi_sleep(int timeout)
551 {
552 DECLARE_MUTEX_LOCKED(sem);
553 struct timer_list timer;
554
555 init_timer(&timer);
556 timer.data = (unsigned long) &sem;
557 timer.expires = jiffies + timeout;
558 timer.function = (void (*)(unsigned long)) scsi_sleep_done;
559
560 SCSI_LOG_ERROR_RECOVERY(5, printk("Sleeping for timer tics %d\n", timeout));
561
562 add_timer(&timer);
563
564 down(&sem);
565 del_timer(&timer);
566 }
567
568 /*
569 * Function: scsi_send_eh_cmnd
570 *
571 * Purpose: Send a command out to a device as part of error recovery.
572 *
573 * Notes: The initialization of the structures is quite a bit different
574 * in this case, and furthermore, there is a different completion
575 * handler.
576 */
scsi_send_eh_cmnd(Scsi_Cmnd * SCpnt,int timeout)577 STATIC void scsi_send_eh_cmnd(Scsi_Cmnd * SCpnt, int timeout)
578 {
579 unsigned long flags;
580 struct Scsi_Host *host;
581
582 ASSERT_LOCK(&io_request_lock, 0);
583
584 host = SCpnt->host;
585
586 retry:
587 /*
588 * We will use a queued command if possible, otherwise we will emulate the
589 * queuing and calling of completion function ourselves.
590 */
591 SCpnt->owner = SCSI_OWNER_LOWLEVEL;
592
593 if (host->can_queue) {
594 DECLARE_MUTEX_LOCKED(sem);
595
596 SCpnt->eh_state = SCSI_STATE_QUEUED;
597
598 scsi_add_timer(SCpnt, timeout, scsi_eh_times_out);
599
600 /*
601 * Set up the semaphore so we wait for the command to complete.
602 */
603 SCpnt->host->eh_action = &sem;
604 SCpnt->request.rq_status = RQ_SCSI_BUSY;
605
606 spin_lock_irqsave(&io_request_lock, flags);
607 host->hostt->queuecommand(SCpnt, scsi_eh_done);
608 spin_unlock_irqrestore(&io_request_lock, flags);
609
610 down(&sem);
611
612 SCpnt->host->eh_action = NULL;
613
614 /*
615 * See if timeout. If so, tell the host to forget about it.
616 * In other words, we don't want a callback any more.
617 */
618 if (SCpnt->eh_state == SCSI_STATE_TIMEOUT) {
619 SCpnt->owner = SCSI_OWNER_LOWLEVEL;
620
621 /*
622 * As far as the low level driver is
623 * concerned, this command is still active, so
624 * we must give the low level driver a chance
625 * to abort it. (DB)
626 *
627 * FIXME(eric) - we are not tracking whether we could
628 * abort a timed out command or not. Not sure how
629 * we should treat them differently anyways.
630 */
631 spin_lock_irqsave(&io_request_lock, flags);
632 if (SCpnt->host->hostt->eh_abort_handler)
633 SCpnt->host->hostt->eh_abort_handler(SCpnt);
634 spin_unlock_irqrestore(&io_request_lock, flags);
635
636 SCpnt->request.rq_status = RQ_SCSI_DONE;
637 SCpnt->owner = SCSI_OWNER_ERROR_HANDLER;
638
639 SCpnt->eh_state = FAILED;
640 }
641 SCSI_LOG_ERROR_RECOVERY(5, printk("send_eh_cmnd: %p eh_state:%x\n",
642 SCpnt, SCpnt->eh_state));
643 } else {
644 int temp;
645
646 /*
647 * We damn well had better never use this code. There is no timeout
648 * protection here, since we would end up waiting in the actual low
649 * level driver, we don't know how to wake it up.
650 */
651 spin_lock_irqsave(&io_request_lock, flags);
652 temp = host->hostt->command(SCpnt);
653 spin_unlock_irqrestore(&io_request_lock, flags);
654
655 SCpnt->result = temp;
656 /* Fall through to code below to examine status. */
657 SCpnt->eh_state = SUCCESS;
658 }
659
660 /*
661 * Now examine the actual status codes to see whether the command actually
662 * did complete normally.
663 */
664 if (SCpnt->eh_state == SUCCESS) {
665 int ret = scsi_eh_completed_normally(SCpnt);
666 SCSI_LOG_ERROR_RECOVERY(3,
667 printk("scsi_send_eh_cmnd: scsi_eh_completed_normally %x\n", ret));
668 switch (ret) {
669 case SUCCESS:
670 SCpnt->eh_state = SUCCESS;
671 break;
672 case NEEDS_RETRY:
673 if ((++SCpnt->retries) < SCpnt->allowed)
674 goto retry;
675 SCpnt->eh_state = SUCCESS;
676 break;
677 case FAILED:
678 default:
679 SCpnt->eh_state = FAILED;
680 break;
681 }
682 } else {
683 SCpnt->eh_state = FAILED;
684 }
685 }
686
687 /*
688 * Function: scsi_unit_is_ready()
689 *
690 * Purpose: Called after TEST_UNIT_READY is run, to test to see if
691 * the unit responded in a way that indicates it is ready.
692 */
scsi_unit_is_ready(Scsi_Cmnd * SCpnt)693 STATIC int scsi_unit_is_ready(Scsi_Cmnd * SCpnt)
694 {
695 if (SCpnt->result) {
696 if (((driver_byte(SCpnt->result) & DRIVER_SENSE) ||
697 (status_byte(SCpnt->result) & CHECK_CONDITION)) &&
698 ((SCpnt->sense_buffer[0] & 0x70) >> 4) == 7) {
699 if (((SCpnt->sense_buffer[2] & 0xf) != NOT_READY) &&
700 ((SCpnt->sense_buffer[2] & 0xf) != UNIT_ATTENTION) &&
701 ((SCpnt->sense_buffer[2] & 0xf) != ILLEGAL_REQUEST)) {
702 return 0;
703 }
704 }
705 }
706 return 1;
707 }
708
709 /*
710 * Function: scsi_eh_finish_command
711 *
712 * Purpose: Handle a command that we are finished with WRT error handling.
713 *
714 * Arguments: SClist - pointer to list into which we are putting completed commands.
715 * SCpnt - command that is completing
716 *
717 * Notes: We don't want to use the normal command completion while we are
718 * are still handling errors - it may cause other commands to be queued,
719 * and that would disturb what we are doing. Thus we really want to keep
720 * a list of pending commands for final completion, and once we
721 * are ready to leave error handling we handle completion for real.
722 */
scsi_eh_finish_command(Scsi_Cmnd ** SClist,Scsi_Cmnd * SCpnt)723 STATIC void scsi_eh_finish_command(Scsi_Cmnd ** SClist, Scsi_Cmnd * SCpnt)
724 {
725 SCpnt->state = SCSI_STATE_BHQUEUE;
726 SCpnt->bh_next = *SClist;
727 /*
728 * Set this back so that the upper level can correctly free up
729 * things.
730 */
731 SCpnt->use_sg = SCpnt->old_use_sg;
732 SCpnt->sc_data_direction = SCpnt->sc_old_data_direction;
733 SCpnt->underflow = SCpnt->old_underflow;
734 *SClist = SCpnt;
735 }
736
737 /*
738 * Function: scsi_try_to_abort_command
739 *
740 * Purpose: Ask host adapter to abort a running command.
741 *
742 * Returns: FAILED Operation failed or not supported.
743 * SUCCESS Succeeded.
744 *
745 * Notes: This function will not return until the user's completion
746 * function has been called. There is no timeout on this
747 * operation. If the author of the low-level driver wishes
748 * this operation to be timed, they can provide this facility
749 * themselves. Helper functions in scsi_error.c can be supplied
750 * to make this easier to do.
751 *
752 * Notes: It may be possible to combine this with all of the reset
753 * handling to eliminate a lot of code duplication. I don't
754 * know what makes more sense at the moment - this is just a
755 * prototype.
756 */
scsi_try_to_abort_command(Scsi_Cmnd * SCpnt,int timeout)757 STATIC int scsi_try_to_abort_command(Scsi_Cmnd * SCpnt, int timeout)
758 {
759 int rtn;
760 unsigned long flags;
761
762 SCpnt->eh_state = FAILED; /* Until we come up with something better */
763
764 if (SCpnt->host->hostt->eh_abort_handler == NULL) {
765 return FAILED;
766 }
767 /*
768 * scsi_done was called just after the command timed out and before
769 * we had a chance to process it. (DB)
770 */
771 if (SCpnt->serial_number == 0)
772 return SUCCESS;
773
774 SCpnt->owner = SCSI_OWNER_LOWLEVEL;
775
776 spin_lock_irqsave(&io_request_lock, flags);
777 rtn = SCpnt->host->hostt->eh_abort_handler(SCpnt);
778 spin_unlock_irqrestore(&io_request_lock, flags);
779 return rtn;
780 }
781
782 /*
783 * Function: scsi_try_bus_device_reset
784 *
785 * Purpose: Ask host adapter to perform a bus device reset for a given
786 * device.
787 *
788 * Returns: FAILED Operation failed or not supported.
789 * SUCCESS Succeeded.
790 *
791 * Notes: There is no timeout for this operation. If this operation is
792 * unreliable for a given host, then the host itself needs to put a
793 * timer on it, and set the host back to a consistent state prior
794 * to returning.
795 */
scsi_try_bus_device_reset(Scsi_Cmnd * SCpnt,int timeout)796 STATIC int scsi_try_bus_device_reset(Scsi_Cmnd * SCpnt, int timeout)
797 {
798 unsigned long flags;
799 int rtn;
800
801 SCpnt->eh_state = FAILED; /* Until we come up with something better */
802
803 if (SCpnt->host->hostt->eh_device_reset_handler == NULL) {
804 return FAILED;
805 }
806 SCpnt->owner = SCSI_OWNER_LOWLEVEL;
807
808 spin_lock_irqsave(&io_request_lock, flags);
809 rtn = SCpnt->host->hostt->eh_device_reset_handler(SCpnt);
810 spin_unlock_irqrestore(&io_request_lock, flags);
811
812 if (rtn == SUCCESS)
813 SCpnt->eh_state = SUCCESS;
814
815 return SCpnt->eh_state;
816 }
817
818 /*
819 * Function: scsi_try_bus_reset
820 *
821 * Purpose: Ask host adapter to perform a bus reset for a host.
822 *
823 * Returns: FAILED Operation failed or not supported.
824 * SUCCESS Succeeded.
825 *
826 * Notes:
827 */
scsi_try_bus_reset(Scsi_Cmnd * SCpnt)828 STATIC int scsi_try_bus_reset(Scsi_Cmnd * SCpnt)
829 {
830 unsigned long flags;
831 int rtn;
832
833 SCpnt->eh_state = FAILED; /* Until we come up with something better */
834 SCpnt->owner = SCSI_OWNER_LOWLEVEL;
835 SCpnt->serial_number_at_timeout = SCpnt->serial_number;
836
837 if (SCpnt->host->hostt->eh_bus_reset_handler == NULL) {
838 return FAILED;
839 }
840
841 spin_lock_irqsave(&io_request_lock, flags);
842 rtn = SCpnt->host->hostt->eh_bus_reset_handler(SCpnt);
843 spin_unlock_irqrestore(&io_request_lock, flags);
844
845 if (rtn == SUCCESS)
846 SCpnt->eh_state = SUCCESS;
847
848 /*
849 * If we had a successful bus reset, mark the command blocks to expect
850 * a condition code of unit attention.
851 */
852 scsi_sleep(BUS_RESET_SETTLE_TIME);
853 if (SCpnt->eh_state == SUCCESS) {
854 Scsi_Device *SDloop;
855 for (SDloop = SCpnt->host->host_queue; SDloop; SDloop = SDloop->next) {
856 if (SCpnt->channel == SDloop->channel) {
857 SDloop->was_reset = 1;
858 SDloop->expecting_cc_ua = 1;
859 }
860 }
861 }
862 return SCpnt->eh_state;
863 }
864
865 /*
866 * Function: scsi_try_host_reset
867 *
868 * Purpose: Ask host adapter to reset itself, and the bus.
869 *
870 * Returns: FAILED Operation failed or not supported.
871 * SUCCESS Succeeded.
872 *
873 * Notes:
874 */
scsi_try_host_reset(Scsi_Cmnd * SCpnt)875 STATIC int scsi_try_host_reset(Scsi_Cmnd * SCpnt)
876 {
877 unsigned long flags;
878 int rtn;
879
880 SCpnt->eh_state = FAILED; /* Until we come up with something better */
881 SCpnt->owner = SCSI_OWNER_LOWLEVEL;
882 SCpnt->serial_number_at_timeout = SCpnt->serial_number;
883
884 if (SCpnt->host->hostt->eh_host_reset_handler == NULL) {
885 return FAILED;
886 }
887 spin_lock_irqsave(&io_request_lock, flags);
888 rtn = SCpnt->host->hostt->eh_host_reset_handler(SCpnt);
889 spin_unlock_irqrestore(&io_request_lock, flags);
890
891 if (rtn == SUCCESS)
892 SCpnt->eh_state = SUCCESS;
893
894 /*
895 * If we had a successful host reset, mark the command blocks to expect
896 * a condition code of unit attention.
897 */
898 scsi_sleep(HOST_RESET_SETTLE_TIME);
899 if (SCpnt->eh_state == SUCCESS) {
900 Scsi_Device *SDloop;
901 for (SDloop = SCpnt->host->host_queue; SDloop; SDloop = SDloop->next) {
902 SDloop->was_reset = 1;
903 SDloop->expecting_cc_ua = 1;
904 }
905 }
906 return SCpnt->eh_state;
907 }
908
909 /*
910 * Function: scsi_decide_disposition
911 *
912 * Purpose: Examine a command block that has come back from the low-level
913 * and figure out what to do next.
914 *
915 * Returns: SUCCESS - pass on to upper level.
916 * FAILED - pass on to error handler thread.
917 * RETRY - command should be retried.
918 * SOFTERR - command succeeded, but we need to log
919 * a soft error.
920 *
921 * Notes: This is *ONLY* called when we are examining the status
922 * after sending out the actual data command. Any commands
923 * that are queued for error recovery (i.e. TEST_UNIT_READY)
924 * do *NOT* come through here.
925 *
926 * NOTE - When this routine returns FAILED, it means the error
927 * handler thread is woken. In cases where the error code
928 * indicates an error that doesn't require the error handler
929 * thread (i.e. we don't need to abort/reset), then this function
930 * should return SUCCESS.
931 */
scsi_decide_disposition(Scsi_Cmnd * SCpnt)932 int scsi_decide_disposition(Scsi_Cmnd * SCpnt)
933 {
934 int rtn;
935
936 /*
937 * If the device is offline, then we clearly just pass the result back
938 * up to the top level.
939 */
940 if (SCpnt->device->online == FALSE) {
941 SCSI_LOG_ERROR_RECOVERY(5, printk("scsi_error.c: device offline - report as SUCCESS\n"));
942 return SUCCESS;
943 }
944 /*
945 * First check the host byte, to see if there is anything in there
946 * that would indicate what we need to do.
947 */
948
949 switch (host_byte(SCpnt->result)) {
950 case DID_PASSTHROUGH:
951 /*
952 * No matter what, pass this through to the upper layer.
953 * Nuke this special code so that it looks like we are saying
954 * DID_OK.
955 */
956 SCpnt->result &= 0xff00ffff;
957 return SUCCESS;
958 case DID_OK:
959 /*
960 * Looks good. Drop through, and check the next byte.
961 */
962 break;
963 case DID_NO_CONNECT:
964 case DID_BAD_TARGET:
965 case DID_ABORT:
966 /*
967 * Note - this means that we just report the status back to the
968 * top level driver, not that we actually think that it indicates
969 * success.
970 */
971 return SUCCESS;
972 /*
973 * When the low level driver returns DID_SOFT_ERROR,
974 * it is responsible for keeping an internal retry counter
975 * in order to avoid endless loops (DB)
976 *
977 * Actually this is a bug in this function here. We should
978 * be mindful of the maximum number of retries specified
979 * and not get stuck in a loop.
980 */
981 case DID_SOFT_ERROR:
982 goto maybe_retry;
983
984 case DID_ERROR:
985 if (msg_byte(SCpnt->result) == COMMAND_COMPLETE &&
986 status_byte(SCpnt->result) == RESERVATION_CONFLICT)
987 /*
988 * execute reservation conflict processing code
989 * lower down
990 */
991 break;
992 /* FALLTHROUGH */
993
994 case DID_BUS_BUSY:
995 case DID_PARITY:
996 goto maybe_retry;
997 case DID_TIME_OUT:
998 /*
999 * When we scan the bus, we get timeout messages for
1000 * these commands if there is no device available.
1001 * Other hosts report DID_NO_CONNECT for the same thing.
1002 */
1003 if ((SCpnt->cmnd[0] == TEST_UNIT_READY ||
1004 SCpnt->cmnd[0] == INQUIRY)) {
1005 return SUCCESS;
1006 } else {
1007 return FAILED;
1008 }
1009 case DID_RESET:
1010 /*
1011 * In the normal case where we haven't initiated a reset, this is
1012 * a failure.
1013 */
1014 if (SCpnt->flags & IS_RESETTING) {
1015 SCpnt->flags &= ~IS_RESETTING;
1016 goto maybe_retry;
1017 }
1018 return SUCCESS;
1019 default:
1020 return FAILED;
1021 }
1022
1023 /*
1024 * Next, check the message byte.
1025 */
1026 if (msg_byte(SCpnt->result) != COMMAND_COMPLETE) {
1027 return FAILED;
1028 }
1029 /*
1030 * Now, check the status byte to see if this indicates anything special.
1031 */
1032 switch (status_byte(SCpnt->result)) {
1033 case QUEUE_FULL:
1034 /*
1035 * The case of trying to send too many commands to a tagged queueing
1036 * device.
1037 */
1038 return ADD_TO_MLQUEUE;
1039 case GOOD:
1040 case COMMAND_TERMINATED:
1041 return SUCCESS;
1042 case CHECK_CONDITION:
1043 rtn = scsi_check_sense(SCpnt);
1044 if (rtn == NEEDS_RETRY) {
1045 goto maybe_retry;
1046 }
1047 return rtn;
1048 case CONDITION_GOOD:
1049 case INTERMEDIATE_GOOD:
1050 case INTERMEDIATE_C_GOOD:
1051 /*
1052 * Who knows? FIXME(eric)
1053 */
1054 return SUCCESS;
1055 case BUSY:
1056 goto maybe_retry;
1057
1058 case RESERVATION_CONFLICT:
1059 printk("scsi%d (%d,%d,%d) : RESERVATION CONFLICT\n",
1060 SCpnt->host->host_no, SCpnt->channel,
1061 SCpnt->device->id, SCpnt->device->lun);
1062 return SUCCESS; /* causes immediate I/O error */
1063 default:
1064 return FAILED;
1065 }
1066 return FAILED;
1067
1068 maybe_retry:
1069
1070 if ((++SCpnt->retries) < SCpnt->allowed) {
1071 return NEEDS_RETRY;
1072 } else {
1073 /*
1074 * No more retries - report this one back to upper level.
1075 */
1076 return SUCCESS;
1077 }
1078 }
1079
1080 /*
1081 * Function: scsi_eh_completed_normally
1082 *
1083 * Purpose: Examine a command block that has come back from the low-level
1084 * and figure out what to do next.
1085 *
1086 * Returns: SUCCESS - pass on to upper level.
1087 * FAILED - pass on to error handler thread.
1088 * RETRY - command should be retried.
1089 * SOFTERR - command succeeded, but we need to log
1090 * a soft error.
1091 *
1092 * Notes: This is *ONLY* called when we are examining the status
1093 * of commands queued during error recovery. The main
1094 * difference here is that we don't allow for the possibility
1095 * of retries here, and we are a lot more restrictive about what
1096 * we consider acceptable.
1097 */
scsi_eh_completed_normally(Scsi_Cmnd * SCpnt)1098 STATIC int scsi_eh_completed_normally(Scsi_Cmnd * SCpnt)
1099 {
1100 /*
1101 * First check the host byte, to see if there is anything in there
1102 * that would indicate what we need to do.
1103 */
1104 if (host_byte(SCpnt->result) == DID_RESET) {
1105 if (SCpnt->flags & IS_RESETTING) {
1106 /*
1107 * OK, this is normal. We don't know whether in fact the
1108 * command in question really needs to be rerun or not -
1109 * if this was the original data command then the answer is yes,
1110 * otherwise we just flag it as success.
1111 */
1112 SCpnt->flags &= ~IS_RESETTING;
1113 return NEEDS_RETRY;
1114 }
1115 /*
1116 * Rats. We are already in the error handler, so we now get to try
1117 * and figure out what to do next. If the sense is valid, we have
1118 * a pretty good idea of what to do. If not, we mark it as failed.
1119 */
1120 return scsi_check_sense(SCpnt);
1121 }
1122 if (host_byte(SCpnt->result) != DID_OK) {
1123 return FAILED;
1124 }
1125 /*
1126 * Next, check the message byte.
1127 */
1128 if (msg_byte(SCpnt->result) != COMMAND_COMPLETE) {
1129 return FAILED;
1130 }
1131 /*
1132 * Now, check the status byte to see if this indicates anything special.
1133 */
1134 switch (status_byte(SCpnt->result)) {
1135 case GOOD:
1136 case COMMAND_TERMINATED:
1137 return SUCCESS;
1138 case CHECK_CONDITION:
1139 return scsi_check_sense(SCpnt);
1140 case CONDITION_GOOD:
1141 case INTERMEDIATE_GOOD:
1142 case INTERMEDIATE_C_GOOD:
1143 /*
1144 * Who knows? FIXME(eric)
1145 */
1146 return SUCCESS;
1147 case BUSY:
1148 case QUEUE_FULL:
1149 case RESERVATION_CONFLICT:
1150 default:
1151 return FAILED;
1152 }
1153 return FAILED;
1154 }
1155
1156 /*
1157 * Function: scsi_check_sense
1158 *
1159 * Purpose: Examine sense information - give suggestion as to what
1160 * we should do with it.
1161 */
scsi_check_sense(Scsi_Cmnd * SCpnt)1162 STATIC int scsi_check_sense(Scsi_Cmnd * SCpnt)
1163 {
1164 if (!scsi_sense_valid(SCpnt)) {
1165 return FAILED;
1166 }
1167 if (SCpnt->sense_buffer[2] & 0xe0)
1168 return SUCCESS;
1169
1170 switch (SCpnt->sense_buffer[2] & 0xf) {
1171 case NO_SENSE:
1172 return SUCCESS;
1173 case RECOVERED_ERROR:
1174 return /* SOFT_ERROR */ SUCCESS;
1175
1176 case ABORTED_COMMAND:
1177 return NEEDS_RETRY;
1178 case NOT_READY:
1179 case UNIT_ATTENTION:
1180 /*
1181 * If we are expecting a CC/UA because of a bus reset that we
1182 * performed, treat this just as a retry. Otherwise this is
1183 * information that we should pass up to the upper-level driver
1184 * so that we can deal with it there.
1185 */
1186 if (SCpnt->device->expecting_cc_ua) {
1187 SCpnt->device->expecting_cc_ua = 0;
1188 return NEEDS_RETRY;
1189 }
1190 /*
1191 * If the device is in the process of becoming ready, we
1192 * should retry.
1193 */
1194 if ((SCpnt->sense_buffer[12] == 0x04) &&
1195 (SCpnt->sense_buffer[13] == 0x01)) {
1196 return NEEDS_RETRY;
1197 }
1198 return SUCCESS;
1199
1200 /* these three are not supported */
1201 case COPY_ABORTED:
1202 case VOLUME_OVERFLOW:
1203 case MISCOMPARE:
1204 return SUCCESS;
1205
1206 case MEDIUM_ERROR:
1207 return NEEDS_RETRY;
1208
1209 case ILLEGAL_REQUEST:
1210 case BLANK_CHECK:
1211 case DATA_PROTECT:
1212 case HARDWARE_ERROR:
1213 default:
1214 return SUCCESS;
1215 }
1216 }
1217
1218
1219 /*
1220 * Function: scsi_restart_operations
1221 *
1222 * Purpose: Restart IO operations to the specified host.
1223 *
1224 * Arguments: host - host that we are restarting
1225 *
1226 * Lock status: Assumed that locks are not held upon entry.
1227 *
1228 * Returns: Nothing
1229 *
1230 * Notes: When we entered the error handler, we blocked all further
1231 * I/O to this device. We need to 'reverse' this process.
1232 */
scsi_restart_operations(struct Scsi_Host * host)1233 STATIC void scsi_restart_operations(struct Scsi_Host *host)
1234 {
1235 Scsi_Device *SDpnt;
1236 unsigned long flags;
1237
1238 ASSERT_LOCK(&io_request_lock, 0);
1239
1240 /*
1241 * Next free up anything directly waiting upon the host. This will be
1242 * requests for character device operations, and also for ioctls to queued
1243 * block devices.
1244 */
1245 SCSI_LOG_ERROR_RECOVERY(5, printk("scsi_error.c: Waking up host to restart\n"));
1246
1247 wake_up(&host->host_wait);
1248
1249 /*
1250 * Finally we need to re-initiate requests that may be pending. We will
1251 * have had everything blocked while error handling is taking place, and
1252 * now that error recovery is done, we will need to ensure that these
1253 * requests are started.
1254 */
1255 spin_lock_irqsave(&io_request_lock, flags);
1256 for (SDpnt = host->host_queue; SDpnt; SDpnt = SDpnt->next) {
1257 request_queue_t *q;
1258 if ((host->can_queue > 0 && (host->host_busy >= host->can_queue))
1259 || (host->host_blocked)
1260 || (host->host_self_blocked)
1261 || (SDpnt->device_blocked)) {
1262 break;
1263 }
1264 q = &SDpnt->request_queue;
1265 q->request_fn(q);
1266 }
1267 spin_unlock_irqrestore(&io_request_lock, flags);
1268 }
1269
1270 /*
1271 * Function: scsi_unjam_host
1272 *
1273 * Purpose: Attempt to fix a host which has a command that failed for
1274 * some reason.
1275 *
1276 * Arguments: host - host that needs unjamming.
1277 *
1278 * Returns: Nothing
1279 *
1280 * Notes: When we come in here, we *know* that all commands on the
1281 * bus have either completed, failed or timed out. We also
1282 * know that no further commands are being sent to the host,
1283 * so things are relatively quiet and we have freedom to
1284 * fiddle with things as we wish.
1285 *
1286 * Additional note: This is only the *default* implementation. It is possible
1287 * for individual drivers to supply their own version of this
1288 * function, and if the maintainer wishes to do this, it is
1289 * strongly suggested that this function be taken as a template
1290 * and modified. This function was designed to correctly handle
1291 * problems for about 95% of the different cases out there, and
1292 * it should always provide at least a reasonable amount of error
1293 * recovery.
1294 *
1295 * Note3: Any command marked 'FAILED' or 'TIMEOUT' must eventually
1296 * have scsi_finish_command() called for it. We do all of
1297 * the retry stuff here, so when we restart the host after we
1298 * return it should have an empty queue.
1299 */
scsi_unjam_host(struct Scsi_Host * host)1300 STATIC int scsi_unjam_host(struct Scsi_Host *host)
1301 {
1302 int devices_failed;
1303 int numfailed;
1304 int ourrtn;
1305 int rtn = FALSE;
1306 int result;
1307 Scsi_Cmnd *SCloop;
1308 Scsi_Cmnd *SCpnt;
1309 Scsi_Device *SDpnt;
1310 Scsi_Device *SDloop;
1311 Scsi_Cmnd *SCdone;
1312 int timed_out;
1313
1314 ASSERT_LOCK(&io_request_lock, 0);
1315
1316 SCdone = NULL;
1317
1318 /*
1319 * First, protect against any sort of race condition. If any of the outstanding
1320 * commands are in states that indicate that we are not yet blocked (i.e. we are
1321 * not in a quiet state) then we got woken up in error. If we ever end up here,
1322 * we need to re-examine some of the assumptions.
1323 */
1324 for (SDpnt = host->host_queue; SDpnt; SDpnt = SDpnt->next) {
1325 for (SCpnt = SDpnt->device_queue; SCpnt; SCpnt = SCpnt->next) {
1326 if (SCpnt->state == SCSI_STATE_FAILED
1327 || SCpnt->state == SCSI_STATE_TIMEOUT
1328 || SCpnt->state == SCSI_STATE_INITIALIZING
1329 || SCpnt->state == SCSI_STATE_UNUSED) {
1330 continue;
1331 }
1332 /*
1333 * Rats. Something is still floating around out there. This could
1334 * be the result of the fact that the upper level drivers are still frobbing
1335 * commands that might have succeeded. There are two outcomes. One is that
1336 * the command block will eventually be freed, and the other one is that
1337 * the command will be queued and will be finished along the way.
1338 */
1339 SCSI_LOG_ERROR_RECOVERY(1, printk("Error handler prematurely woken - commands still active (%p %x %d)\n", SCpnt, SCpnt->state, SCpnt->target));
1340
1341 /*
1342 * panic("SCSI Error handler woken too early\n");
1343 *
1344 * This is no longer a problem, since now the code cares only about
1345 * SCSI_STATE_TIMEOUT and SCSI_STATE_FAILED.
1346 * Other states are useful only to release active commands when devices are
1347 * set offline. If (host->host_active == host->host_busy) we can safely assume
1348 * that there are no commands in state other then TIMEOUT od FAILED. (DB)
1349 *
1350 * FIXME:
1351 * It is not easy to release correctly commands according to their state when
1352 * devices are set offline, when the state is neither TIMEOUT nor FAILED.
1353 * When a device is set offline, we can have some command with
1354 * rq_status=RQ_SCSY_BUSY, owner=SCSI_STATE_HIGHLEVEL,
1355 * state=SCSI_STATE_INITIALIZING and the driver module cannot be released.
1356 * (DB, 17 May 1998)
1357 */
1358 }
1359 }
1360
1361 /*
1362 * Next, see if we need to request sense information. if so,
1363 * then get it now, so we have a better idea of what to do.
1364 * FIXME(eric) this has the unfortunate side effect that if a host
1365 * adapter does not automatically request sense information, that we end
1366 * up shutting it down before we request it. All hosts should be doing this
1367 * anyways, so for now all I have to say is tough noogies if you end up in here.
1368 * On second thought, this is probably a good idea. We *really* want to give
1369 * authors an incentive to automatically request this.
1370 */
1371 SCSI_LOG_ERROR_RECOVERY(3, printk("scsi_unjam_host: Checking to see if we need to request sense\n"));
1372
1373 for (SDpnt = host->host_queue; SDpnt; SDpnt = SDpnt->next) {
1374 for (SCpnt = SDpnt->device_queue; SCpnt; SCpnt = SCpnt->next) {
1375 recheck_sense_valid:
1376 if (SCpnt->state != SCSI_STATE_FAILED || scsi_sense_valid(SCpnt)) {
1377 continue;
1378 }
1379 SCSI_LOG_ERROR_RECOVERY(2, printk("scsi_unjam_host: Requesting sense for %d\n",
1380 SCpnt->target));
1381 rtn = scsi_request_sense(SCpnt);
1382 if (rtn != SUCCESS) {
1383 continue;
1384 }
1385 SCSI_LOG_ERROR_RECOVERY(3, printk("Sense requested for %p - result %x\n",
1386 SCpnt, SCpnt->result));
1387 SCSI_LOG_ERROR_RECOVERY(3, print_sense("bh", SCpnt));
1388
1389 result = scsi_decide_disposition(SCpnt);
1390
1391 /*
1392 * If the result was normal, then just pass it along to the
1393 * upper level.
1394 */
1395 if (result == SUCCESS) {
1396 SCpnt->host->host_failed--;
1397 scsi_eh_finish_command(&SCdone, SCpnt);
1398 }
1399 if (result != NEEDS_RETRY) {
1400 continue;
1401 }
1402 /*
1403 * We only come in here if we want to retry a
1404 * command. The test to see whether the command
1405 * should be retried should be keeping track of the
1406 * number of tries, so we don't end up looping, of
1407 * course.
1408 */
1409 SCpnt->state = NEEDS_RETRY;
1410 rtn = scsi_eh_retry_command(SCpnt);
1411 if (rtn != SUCCESS) {
1412 SCpnt->state = SCSI_STATE_FAILED;
1413 goto recheck_sense_valid;
1414 }
1415 /*
1416 * We eventually hand this one back to the top level.
1417 */
1418 SCpnt->host->host_failed--;
1419 scsi_eh_finish_command(&SCdone, SCpnt);
1420 }
1421 }
1422
1423 /*
1424 * Go through the list of commands and figure out where we stand and how bad things
1425 * really are.
1426 */
1427 numfailed = 0;
1428 timed_out = 0;
1429 devices_failed = 0;
1430 for (SDpnt = host->host_queue; SDpnt; SDpnt = SDpnt->next) {
1431 unsigned int device_error = 0;
1432
1433 for (SCpnt = SDpnt->device_queue; SCpnt; SCpnt = SCpnt->next) {
1434 if (SCpnt->state == SCSI_STATE_FAILED) {
1435 SCSI_LOG_ERROR_RECOVERY(5, printk("Command to ID %d failed\n",
1436 SCpnt->target));
1437 numfailed++;
1438 device_error++;
1439 }
1440 if (SCpnt->state == SCSI_STATE_TIMEOUT) {
1441 SCSI_LOG_ERROR_RECOVERY(5, printk("Command to ID %d timedout\n",
1442 SCpnt->target));
1443 timed_out++;
1444 device_error++;
1445 }
1446 }
1447 if (device_error > 0) {
1448 devices_failed++;
1449 }
1450 }
1451
1452 SCSI_LOG_ERROR_RECOVERY(2, printk("Total of %d+%d commands on %d devices require eh work\n",
1453 numfailed, timed_out, devices_failed));
1454
1455 if (host->host_failed == 0) {
1456 ourrtn = TRUE;
1457 goto leave;
1458 }
1459 /*
1460 * Next, try and see whether or not it makes sense to try and abort
1461 * the running command. This only works out to be the case if we have
1462 * one command that has timed out. If the command simply failed, it
1463 * makes no sense to try and abort the command, since as far as the
1464 * host adapter is concerned, it isn't running.
1465 */
1466
1467 SCSI_LOG_ERROR_RECOVERY(3, printk("scsi_unjam_host: Checking to see if we want to try abort\n"));
1468
1469 for (SDpnt = host->host_queue; SDpnt; SDpnt = SDpnt->next) {
1470 for (SCloop = SDpnt->device_queue; SCloop; SCloop = SCloop->next) {
1471 if (SCloop->state != SCSI_STATE_TIMEOUT) {
1472 continue;
1473 }
1474 rtn = scsi_try_to_abort_command(SCloop, ABORT_TIMEOUT);
1475 if (rtn == SUCCESS) {
1476 rtn = scsi_test_unit_ready(SCloop);
1477
1478 if (rtn == SUCCESS && scsi_unit_is_ready(SCloop)) {
1479 rtn = scsi_eh_retry_command(SCloop);
1480
1481 if (rtn == SUCCESS) {
1482 SCloop->host->host_failed--;
1483 scsi_eh_finish_command(&SCdone, SCloop);
1484 }
1485 }
1486 }
1487 }
1488 }
1489
1490 /*
1491 * If we have corrected all of the problems, then we are done.
1492 */
1493 if (host->host_failed == 0) {
1494 ourrtn = TRUE;
1495 goto leave;
1496 }
1497 /*
1498 * Either the abort wasn't appropriate, or it didn't succeed.
1499 * Now try a bus device reset. Still, look to see whether we have
1500 * multiple devices that are jammed or not - if we have multiple devices,
1501 * it makes no sense to try BUS_DEVICE_RESET - we really would need
1502 * to try a BUS_RESET instead.
1503 *
1504 * Does this make sense - should we try BDR on each device individually?
1505 * Yes, definitely.
1506 */
1507 SCSI_LOG_ERROR_RECOVERY(3, printk("scsi_unjam_host: Checking to see if we want to try BDR\n"));
1508
1509 for (SDpnt = host->host_queue; SDpnt; SDpnt = SDpnt->next) {
1510 for (SCloop = SDpnt->device_queue; SCloop; SCloop = SCloop->next) {
1511 if (SCloop->state == SCSI_STATE_FAILED
1512 || SCloop->state == SCSI_STATE_TIMEOUT) {
1513 break;
1514 }
1515 }
1516
1517 if (SCloop == NULL) {
1518 continue;
1519 }
1520 /*
1521 * OK, we have a device that is having problems. Try and send
1522 * a bus device reset to it.
1523 *
1524 * FIXME(eric) - make sure we handle the case where multiple
1525 * commands to the same device have failed. They all must
1526 * get properly restarted.
1527 */
1528 rtn = scsi_try_bus_device_reset(SCloop, RESET_TIMEOUT);
1529
1530 if (rtn == SUCCESS) {
1531 rtn = scsi_test_unit_ready(SCloop);
1532
1533 if (rtn == SUCCESS && scsi_unit_is_ready(SCloop)) {
1534 rtn = scsi_eh_retry_command(SCloop);
1535
1536 if (rtn == SUCCESS) {
1537 SCloop->host->host_failed--;
1538 scsi_eh_finish_command(&SCdone, SCloop);
1539 }
1540 }
1541 }
1542 }
1543
1544 if (host->host_failed == 0) {
1545 ourrtn = TRUE;
1546 goto leave;
1547 }
1548 /*
1549 * If we ended up here, we have serious problems. The only thing left
1550 * to try is a full bus reset. If someone has grabbed the bus and isn't
1551 * letting go, then perhaps this will help.
1552 */
1553 SCSI_LOG_ERROR_RECOVERY(3, printk("scsi_unjam_host: Try hard bus reset\n"));
1554
1555 /*
1556 * We really want to loop over the various channels, and do this on
1557 * a channel by channel basis. We should also check to see if any
1558 * of the failed commands are on soft_reset devices, and if so, skip
1559 * the reset.
1560 */
1561 for (SDpnt = host->host_queue; SDpnt; SDpnt = SDpnt->next) {
1562 next_device:
1563 for (SCpnt = SDpnt->device_queue; SCpnt; SCpnt = SCpnt->next) {
1564 if (SCpnt->state != SCSI_STATE_FAILED
1565 && SCpnt->state != SCSI_STATE_TIMEOUT) {
1566 continue;
1567 }
1568 /*
1569 * We have a failed command. Make sure there are no other failed
1570 * commands on the same channel that are timed out and implement a
1571 * soft reset.
1572 */
1573 for (SDloop = host->host_queue; SDloop; SDloop = SDloop->next) {
1574 for (SCloop = SDloop->device_queue; SCloop; SCloop = SCloop->next) {
1575 if (SCloop->channel != SCpnt->channel) {
1576 continue;
1577 }
1578 if (SCloop->state != SCSI_STATE_FAILED
1579 && SCloop->state != SCSI_STATE_TIMEOUT) {
1580 continue;
1581 }
1582 if (SDloop->soft_reset && SCloop->state == SCSI_STATE_TIMEOUT) {
1583 /*
1584 * If this device uses the soft reset option, and this
1585 * is one of the devices acting up, then our only
1586 * option is to wait a bit, since the command is
1587 * supposedly still running.
1588 *
1589 * FIXME(eric) - right now we will just end up falling
1590 * through to the 'take device offline' case.
1591 *
1592 * FIXME(eric) - It is possible that the command completed
1593 * *after* the error recovery procedure started, and if this
1594 * is the case, we are worrying about nothing here.
1595 */
1596
1597 scsi_sleep(1 * HZ);
1598 goto next_device;
1599 }
1600 }
1601 }
1602
1603 /*
1604 * We now know that we are able to perform a reset for the
1605 * bus that SCpnt points to. There are no soft-reset devices
1606 * with outstanding timed out commands.
1607 */
1608 rtn = scsi_try_bus_reset(SCpnt);
1609 if (rtn == SUCCESS) {
1610 for (SDloop = host->host_queue; SDloop; SDloop = SDloop->next) {
1611 for (SCloop = SDloop->device_queue; SCloop; SCloop = SCloop->next) {
1612 if (SCloop->channel != SCpnt->channel) {
1613 continue;
1614 }
1615 if (SCloop->state != SCSI_STATE_FAILED
1616 && SCloop->state != SCSI_STATE_TIMEOUT) {
1617 continue;
1618 }
1619 rtn = scsi_test_unit_ready(SCloop);
1620
1621 if (rtn == SUCCESS && scsi_unit_is_ready(SCloop)) {
1622 rtn = scsi_eh_retry_command(SCloop);
1623
1624 if (rtn == SUCCESS) {
1625 SCpnt->host->host_failed--;
1626 scsi_eh_finish_command(&SCdone, SCloop);
1627 }
1628 }
1629 /*
1630 * If the bus reset worked, but we are still unable to
1631 * talk to the device, take it offline.
1632 * FIXME(eric) - is this really the correct thing to do?
1633 */
1634 if (rtn != SUCCESS) {
1635 printk(KERN_INFO "scsi: device set offline - not ready or command retry failed after bus reset: host %d channel %d id %d lun %d\n", SDloop->host->host_no, SDloop->channel, SDloop->id, SDloop->lun);
1636
1637 SDloop->online = FALSE;
1638 SDloop->host->host_failed--;
1639 scsi_eh_finish_command(&SCdone, SCloop);
1640 }
1641 }
1642 }
1643 }
1644 }
1645 }
1646
1647 if (host->host_failed == 0) {
1648 ourrtn = TRUE;
1649 goto leave;
1650 }
1651 /*
1652 * If we ended up here, we have serious problems. The only thing left
1653 * to try is a full host reset - perhaps the firmware on the device
1654 * crashed, or something like that.
1655 *
1656 * It is assumed that a succesful host reset will cause *all* information
1657 * about the command to be flushed from both the host adapter *and* the
1658 * device.
1659 *
1660 * FIXME(eric) - it isn't clear that devices that implement the soft reset
1661 * option can ever be cleared except via cycling the power. The problem is
1662 * that sending the host reset command will cause the host to forget
1663 * about the pending command, but the device won't forget. For now, we
1664 * skip the host reset option if any of the failed devices are configured
1665 * to use the soft reset option.
1666 */
1667 for (SDpnt = host->host_queue; SDpnt; SDpnt = SDpnt->next) {
1668 next_device2:
1669 for (SCpnt = SDpnt->device_queue; SCpnt; SCpnt = SCpnt->next) {
1670 if (SCpnt->state != SCSI_STATE_FAILED
1671 && SCpnt->state != SCSI_STATE_TIMEOUT) {
1672 continue;
1673 }
1674 if (SDpnt->soft_reset && SCpnt->state == SCSI_STATE_TIMEOUT) {
1675 /*
1676 * If this device uses the soft reset option, and this
1677 * is one of the devices acting up, then our only
1678 * option is to wait a bit, since the command is
1679 * supposedly still running.
1680 *
1681 * FIXME(eric) - right now we will just end up falling
1682 * through to the 'take device offline' case.
1683 */
1684 SCSI_LOG_ERROR_RECOVERY(3,
1685 printk("scsi_unjam_host: Unable to try hard host reset\n"));
1686
1687 /*
1688 * Due to the spinlock, we will never get out of this
1689 * loop without a proper wait. (DB)
1690 */
1691 scsi_sleep(1 * HZ);
1692
1693 goto next_device2;
1694 }
1695 SCSI_LOG_ERROR_RECOVERY(3, printk("scsi_unjam_host: Try hard host reset\n"));
1696
1697 /*
1698 * FIXME(eric) - we need to obtain a valid SCpnt to perform this call.
1699 */
1700 rtn = scsi_try_host_reset(SCpnt);
1701 if (rtn == SUCCESS) {
1702 /*
1703 * FIXME(eric) we assume that all commands are flushed from the
1704 * controller. We should get a DID_RESET for all of the commands
1705 * that were pending. We should ignore these so that we can
1706 * guarantee that we are in a consistent state.
1707 *
1708 * I believe this to be the case right now, but this needs to be
1709 * tested.
1710 */
1711 for (SDloop = host->host_queue; SDloop; SDloop = SDloop->next) {
1712 for (SCloop = SDloop->device_queue; SCloop; SCloop = SCloop->next) {
1713 if (SCloop->state != SCSI_STATE_FAILED
1714 && SCloop->state != SCSI_STATE_TIMEOUT) {
1715 continue;
1716 }
1717 rtn = scsi_test_unit_ready(SCloop);
1718
1719 if (rtn == SUCCESS && scsi_unit_is_ready(SCloop)) {
1720 rtn = scsi_eh_retry_command(SCloop);
1721
1722 if (rtn == SUCCESS) {
1723 SCpnt->host->host_failed--;
1724 scsi_eh_finish_command(&SCdone, SCloop);
1725 }
1726 }
1727 if (rtn != SUCCESS) {
1728 printk(KERN_INFO "scsi: device set offline - not ready or command retry failed after host reset: host %d channel %d id %d lun %d\n", SDloop->host->host_no, SDloop->channel, SDloop->id, SDloop->lun);
1729 SDloop->online = FALSE;
1730 SDloop->host->host_failed--;
1731 scsi_eh_finish_command(&SCdone, SCloop);
1732 }
1733 }
1734 }
1735 }
1736 }
1737 }
1738
1739 /*
1740 * If we solved all of the problems, then let's rev up the engines again.
1741 */
1742 if (host->host_failed == 0) {
1743 ourrtn = TRUE;
1744 goto leave;
1745 }
1746 /*
1747 * If the HOST RESET failed, then for now we assume that the entire host
1748 * adapter is too hosed to be of any use. For our purposes, however, it is
1749 * easier to simply take the devices offline that correspond to commands
1750 * that failed.
1751 */
1752 SCSI_LOG_ERROR_RECOVERY(1, printk("scsi_unjam_host: Take device offline\n"));
1753
1754 for (SDpnt = host->host_queue; SDpnt; SDpnt = SDpnt->next) {
1755 for (SCloop = SDpnt->device_queue; SCloop; SCloop = SCloop->next) {
1756 if (SCloop->state == SCSI_STATE_FAILED || SCloop->state == SCSI_STATE_TIMEOUT) {
1757 SDloop = SCloop->device;
1758 if (SDloop->online == TRUE) {
1759 printk(KERN_INFO "scsi: device set offline - command error recover failed: host %d channel %d id %d lun %d\n", SDloop->host->host_no, SDloop->channel, SDloop->id, SDloop->lun);
1760 SDloop->online = FALSE;
1761 }
1762
1763 /*
1764 * This should pass the failure up to the top level driver, and
1765 * it will have to try and do something intelligent with it.
1766 */
1767 SCloop->host->host_failed--;
1768
1769 if (SCloop->state == SCSI_STATE_TIMEOUT) {
1770 SCloop->result |= (DRIVER_TIMEOUT << 24);
1771 }
1772 SCSI_LOG_ERROR_RECOVERY(3, printk("Finishing command for device %d %x\n",
1773 SDloop->id, SCloop->result));
1774
1775 scsi_eh_finish_command(&SCdone, SCloop);
1776 }
1777 }
1778 }
1779
1780 if (host->host_failed != 0) {
1781 panic("scsi_unjam_host: Miscount of number of failed commands.\n");
1782 }
1783 SCSI_LOG_ERROR_RECOVERY(3, printk("scsi_unjam_host: Returning\n"));
1784
1785 ourrtn = FALSE;
1786
1787 leave:
1788
1789 /*
1790 * We should have a list of commands that we 'finished' during the course of
1791 * error recovery. This should be the same as the list of commands that timed out
1792 * or failed. We are currently holding these things in a linked list - we didn't
1793 * put them in the bottom half queue because we wanted to keep things quiet while
1794 * we were working on recovery, and passing them up to the top level could easily
1795 * cause the top level to try and queue something else again.
1796 *
1797 * Start by marking that the host is no longer in error recovery.
1798 */
1799 host->in_recovery = 0;
1800
1801 /*
1802 * Take the list of commands, and stick them in the bottom half queue.
1803 * The current implementation of scsi_done will do this for us - if need
1804 * be we can create a special version of this function to do the
1805 * same job for us.
1806 */
1807 for (SCpnt = SCdone; SCpnt != NULL; SCpnt = SCdone) {
1808 SCdone = SCpnt->bh_next;
1809 SCpnt->bh_next = NULL;
1810 /*
1811 * Oh, this is a vile hack. scsi_done() expects a timer
1812 * to be running on the command. If there isn't, it assumes
1813 * that the command has actually timed out, and a timer
1814 * handler is running. That may well be how we got into
1815 * this fix, but right now things are stable. We add
1816 * a timer back again so that we can report completion.
1817 * scsi_done() will immediately remove said timer from
1818 * the command, and then process it.
1819 */
1820 scsi_add_timer(SCpnt, 100, scsi_eh_times_out);
1821 scsi_done(SCpnt);
1822 }
1823
1824 return (ourrtn);
1825 }
1826
1827
1828 /*
1829 * Function: scsi_error_handler
1830 *
1831 * Purpose: Handle errors/timeouts of scsi commands, try and clean up
1832 * and unjam the bus, and restart things.
1833 *
1834 * Arguments: host - host for which we are running.
1835 *
1836 * Returns: Never returns.
1837 *
1838 * Notes: This is always run in the context of a kernel thread. The
1839 * idea is that we start this thing up when the kernel starts
1840 * up (one per host that we detect), and it immediately goes to
1841 * sleep and waits for some event (i.e. failure). When this
1842 * takes place, we have the job of trying to unjam the bus
1843 * and restarting things.
1844 *
1845 */
scsi_error_handler(void * data)1846 void scsi_error_handler(void *data)
1847 {
1848 struct Scsi_Host *host = (struct Scsi_Host *) data;
1849 int rtn;
1850 DECLARE_MUTEX_LOCKED(sem);
1851
1852 /*
1853 * We only listen to signals if the HA was loaded as a module.
1854 * If the HA was compiled into the kernel, then we don't listen
1855 * to any signals.
1856 */
1857 if( host->loaded_as_module ) {
1858 siginitsetinv(¤t->blocked, SHUTDOWN_SIGS);
1859 } else {
1860 siginitsetinv(¤t->blocked, 0);
1861 }
1862
1863 lock_kernel();
1864
1865 /*
1866 * Flush resources
1867 */
1868
1869 daemonize();
1870 reparent_to_init();
1871
1872 /*
1873 * Set the name of this process.
1874 */
1875
1876 sprintf(current->comm, "scsi_eh_%d", host->host_no);
1877
1878 host->eh_wait = &sem;
1879 host->ehandler = current;
1880
1881 unlock_kernel();
1882
1883 /*
1884 * Wake up the thread that created us.
1885 */
1886 SCSI_LOG_ERROR_RECOVERY(3, printk("Wake up parent %d\n", sem_getcount(host->eh_notify)));
1887
1888 up(host->eh_notify);
1889
1890 while (1) {
1891 /*
1892 * If we get a signal, it means we are supposed to go
1893 * away and die. This typically happens if the user is
1894 * trying to unload a module.
1895 */
1896 SCSI_LOG_ERROR_RECOVERY(1, printk("Error handler sleeping\n"));
1897
1898 /*
1899 * Note - we always use down_interruptible with the semaphore
1900 * even if the module was loaded as part of the kernel. The
1901 * reason is that down() will cause this thread to be counted
1902 * in the load average as a running process, and down
1903 * interruptible doesn't. Given that we need to allow this
1904 * thread to die if the driver was loaded as a module, using
1905 * semaphores isn't unreasonable.
1906 */
1907 down_interruptible(&sem);
1908 if( host->loaded_as_module ) {
1909 if (signal_pending(current))
1910 break;
1911 }
1912
1913 SCSI_LOG_ERROR_RECOVERY(1, printk("Error handler waking up\n"));
1914
1915 host->eh_active = 1;
1916
1917 /*
1918 * We have a host that is failing for some reason. Figure out
1919 * what we need to do to get it up and online again (if we can).
1920 * If we fail, we end up taking the thing offline.
1921 */
1922 if (host->hostt->eh_strategy_handler != NULL) {
1923 rtn = host->hostt->eh_strategy_handler(host);
1924 } else {
1925 rtn = scsi_unjam_host(host);
1926 }
1927
1928 host->eh_active = 0;
1929
1930 /*
1931 * Note - if the above fails completely, the action is to take
1932 * individual devices offline and flush the queue of any
1933 * outstanding requests that may have been pending. When we
1934 * restart, we restart any I/O to any other devices on the bus
1935 * which are still online.
1936 */
1937 scsi_restart_operations(host);
1938
1939 }
1940
1941 SCSI_LOG_ERROR_RECOVERY(1, printk("Error handler exiting\n"));
1942
1943 /*
1944 * Make sure that nobody tries to wake us up again.
1945 */
1946 host->eh_wait = NULL;
1947
1948 /*
1949 * Knock this down too. From this point on, the host is flying
1950 * without a pilot. If this is because the module is being unloaded,
1951 * that's fine. If the user sent a signal to this thing, we are
1952 * potentially in real danger.
1953 */
1954 host->in_recovery = 0;
1955 host->eh_active = 0;
1956 host->ehandler = NULL;
1957
1958 /*
1959 * If anyone is waiting for us to exit (i.e. someone trying to unload
1960 * a driver), then wake up that process to let them know we are on
1961 * the way out the door. This may be overkill - I *think* that we
1962 * could probably just unload the driver and send the signal, and when
1963 * the error handling thread wakes up that it would just exit without
1964 * needing to touch any memory associated with the driver itself.
1965 */
1966 if (host->eh_notify != NULL)
1967 up(host->eh_notify);
1968 }
1969
1970 /*
1971 * Function: scsi_new_reset
1972 *
1973 * Purpose: Send requested reset to a bus or device at any phase.
1974 *
1975 * Arguments: SCpnt - command ptr to send reset with (usually a dummy)
1976 * flag - reset type (see scsi.h)
1977 *
1978 * Returns: SUCCESS/FAILURE.
1979 *
1980 * Notes: This is used by the SCSI Generic driver to provide
1981 * Bus/Device reset capability.
1982 */
1983 int
scsi_new_reset(Scsi_Cmnd * SCpnt,int flag)1984 scsi_new_reset(Scsi_Cmnd *SCpnt, int flag)
1985 {
1986 int rtn;
1987
1988 switch(flag) {
1989 case SCSI_TRY_RESET_DEVICE:
1990 rtn = scsi_try_bus_device_reset(SCpnt, 0);
1991 if (rtn == SUCCESS)
1992 break;
1993 /* FALLTHROUGH */
1994 case SCSI_TRY_RESET_BUS:
1995 rtn = scsi_try_bus_reset(SCpnt);
1996 if (rtn == SUCCESS)
1997 break;
1998 /* FALLTHROUGH */
1999 case SCSI_TRY_RESET_HOST:
2000 rtn = scsi_try_host_reset(SCpnt);
2001 break;
2002 default:
2003 rtn = FAILED;
2004 }
2005
2006 return rtn;
2007 }
2008
2009 /*
2010 * Overrides for Emacs so that we follow Linus's tabbing style.
2011 * Emacs will notice this stuff at the end of the file and automatically
2012 * adjust the settings for this buffer only. This must remain at the end
2013 * of the file.
2014 * ---------------------------------------------------------------------------
2015 * Local variables:
2016 * c-indent-level: 4
2017 * c-brace-imaginary-offset: 0
2018 * c-brace-offset: -4
2019 * c-argdecl-indent: 4
2020 * c-label-offset: -4
2021 * c-continued-statement-offset: 4
2022 * c-continued-brace-offset: 0
2023 * indent-tabs-mode: nil
2024 * tab-width: 8
2025 * End:
2026 */
2027