diff options
Diffstat (limited to 'drivers/scsi/scsi_error.c')
| -rw-r--r-- | drivers/scsi/scsi_error.c | 802 | 
1 files changed, 608 insertions, 194 deletions
diff --git a/drivers/scsi/scsi_error.c b/drivers/scsi/scsi_error.c index f3cf924a2cd..7e957918f33 100644 --- a/drivers/scsi/scsi_error.c +++ b/drivers/scsi/scsi_error.c @@ -3,14 +3,14 @@   *   *  SCSI error/timeout handling   *      Initial versions: Eric Youngdale.  Based upon conversations with - *                        Leonard Zubkoff and David Miller at Linux Expo,  + *                        Leonard Zubkoff and David Miller at Linux Expo,   *                        ideas originating from all over the place.   *   *	Restructured scsi_unjam_host and associated functions.   *	September 04, 2002 Mike Anderson (andmike@us.ibm.com)   *   *	Forward port of Russell King's (rmk@arm.linux.org.uk) changes and - *	minor  cleanups. + *	minor cleanups.   *	September 30, 2002 Mike Anderson (andmike@us.ibm.com)   */ @@ -25,11 +25,13 @@  #include <linux/interrupt.h>  #include <linux/blkdev.h>  #include <linux/delay.h> +#include <linux/jiffies.h>  #include <scsi/scsi.h>  #include <scsi/scsi_cmnd.h>  #include <scsi/scsi_dbg.h>  #include <scsi/scsi_device.h> +#include <scsi/scsi_driver.h>  #include <scsi/scsi_eh.h>  #include <scsi/scsi_transport.h>  #include <scsi/scsi_host.h> @@ -41,7 +43,7 @@  #include <trace/events/scsi.h> -#define SENSE_TIMEOUT		(10*HZ) +static void scsi_eh_done(struct scsi_cmnd *scmd);  /*   * These should *probably* be handled by the host itself. @@ -50,6 +52,10 @@  #define BUS_RESET_SETTLE_TIME   (10)  #define HOST_RESET_SETTLE_TIME  (10) +static int scsi_eh_try_stu(struct scsi_cmnd *scmd); +static int scsi_try_to_abort_cmd(struct scsi_host_template *, +				 struct scsi_cmnd *); +  /* called with shost->host_lock held */  void scsi_eh_wakeup(struct Scsi_Host *shost)  { @@ -83,6 +89,140 @@ void scsi_schedule_eh(struct Scsi_Host *shost)  }  EXPORT_SYMBOL_GPL(scsi_schedule_eh); +static int scsi_host_eh_past_deadline(struct Scsi_Host *shost) +{ +	if (!shost->last_reset || shost->eh_deadline == -1) +		return 0; + +	/* +	 * 32bit accesses are guaranteed to be atomic +	 * (on all supported architectures), so instead +	 * of using a spinlock we can as well double check +	 * if eh_deadline has been set to 'off' during the +	 * time_before call. +	 */ +	if (time_before(jiffies, shost->last_reset + shost->eh_deadline) && +	    shost->eh_deadline > -1) +		return 0; + +	return 1; +} + +/** + * scmd_eh_abort_handler - Handle command aborts + * @work:	command to be aborted. + */ +void +scmd_eh_abort_handler(struct work_struct *work) +{ +	struct scsi_cmnd *scmd = +		container_of(work, struct scsi_cmnd, abort_work.work); +	struct scsi_device *sdev = scmd->device; +	int rtn; + +	if (scsi_host_eh_past_deadline(sdev->host)) { +		SCSI_LOG_ERROR_RECOVERY(3, +			scmd_printk(KERN_INFO, scmd, +				    "scmd %p eh timeout, not aborting\n", +				    scmd)); +	} else { +		SCSI_LOG_ERROR_RECOVERY(3, +			scmd_printk(KERN_INFO, scmd, +				    "aborting command %p\n", scmd)); +		rtn = scsi_try_to_abort_cmd(sdev->host->hostt, scmd); +		if (rtn == SUCCESS) { +			set_host_byte(scmd, DID_TIME_OUT); +			if (scsi_host_eh_past_deadline(sdev->host)) { +				SCSI_LOG_ERROR_RECOVERY(3, +					scmd_printk(KERN_INFO, scmd, +						    "scmd %p eh timeout, " +						    "not retrying aborted " +						    "command\n", scmd)); +			} else if (!scsi_noretry_cmd(scmd) && +			    (++scmd->retries <= scmd->allowed)) { +				SCSI_LOG_ERROR_RECOVERY(3, +					scmd_printk(KERN_WARNING, scmd, +						    "scmd %p retry " +						    "aborted command\n", scmd)); +				scsi_queue_insert(scmd, SCSI_MLQUEUE_EH_RETRY); +				return; +			} else { +				SCSI_LOG_ERROR_RECOVERY(3, +					scmd_printk(KERN_WARNING, scmd, +						    "scmd %p finish " +						    "aborted command\n", scmd)); +				scsi_finish_command(scmd); +				return; +			} +		} else { +			SCSI_LOG_ERROR_RECOVERY(3, +				scmd_printk(KERN_INFO, scmd, +					    "scmd %p abort failed, rtn %d\n", +					    scmd, rtn)); +		} +	} + +	if (!scsi_eh_scmd_add(scmd, 0)) { +		SCSI_LOG_ERROR_RECOVERY(3, +			scmd_printk(KERN_WARNING, scmd, +				    "scmd %p terminate " +				    "aborted command\n", scmd)); +		set_host_byte(scmd, DID_TIME_OUT); +		scsi_finish_command(scmd); +	} +} + +/** + * scsi_abort_command - schedule a command abort + * @scmd:	scmd to abort. + * + * We only need to abort commands after a command timeout + */ +static int +scsi_abort_command(struct scsi_cmnd *scmd) +{ +	struct scsi_device *sdev = scmd->device; +	struct Scsi_Host *shost = sdev->host; +	unsigned long flags; + +	if (scmd->eh_eflags & SCSI_EH_ABORT_SCHEDULED) { +		/* +		 * Retry after abort failed, escalate to next level. +		 */ +		scmd->eh_eflags &= ~SCSI_EH_ABORT_SCHEDULED; +		SCSI_LOG_ERROR_RECOVERY(3, +			scmd_printk(KERN_INFO, scmd, +				    "scmd %p previous abort failed\n", scmd)); +		cancel_delayed_work(&scmd->abort_work); +		return FAILED; +	} + +	/* +	 * Do not try a command abort if +	 * SCSI EH has already started. +	 */ +	spin_lock_irqsave(shost->host_lock, flags); +	if (scsi_host_in_recovery(shost)) { +		spin_unlock_irqrestore(shost->host_lock, flags); +		SCSI_LOG_ERROR_RECOVERY(3, +			scmd_printk(KERN_INFO, scmd, +				    "scmd %p not aborting, host in recovery\n", +				    scmd)); +		return FAILED; +	} + +	if (shost->eh_deadline != -1 && !shost->last_reset) +		shost->last_reset = jiffies; +	spin_unlock_irqrestore(shost->host_lock, flags); + +	scmd->eh_eflags |= SCSI_EH_ABORT_SCHEDULED; +	SCSI_LOG_ERROR_RECOVERY(3, +		scmd_printk(KERN_INFO, scmd, +			    "scmd %p abort scheduled\n", scmd)); +	queue_delayed_work(shost->tmf_work_q, &scmd->abort_work, HZ / 100); +	return SUCCESS; +} +  /**   * scsi_eh_scmd_add - add scsi cmd to error handling.   * @scmd:	scmd to run eh on. @@ -105,7 +245,12 @@ int scsi_eh_scmd_add(struct scsi_cmnd *scmd, int eh_flag)  		if (scsi_host_set_state(shost, SHOST_CANCEL_RECOVERY))  			goto out_unlock; +	if (shost->eh_deadline != -1 && !shost->last_reset) +		shost->last_reset = jiffies; +  	ret = 1; +	if (scmd->eh_eflags & SCSI_EH_ABORT_SCHEDULED) +		eh_flag &= ~SCSI_EH_CANCEL_CMD;  	scmd->eh_eflags |= eh_flag;  	list_add_tail(&scmd->eh_entry, &shost->eh_cmd_q);  	shost->host_failed++; @@ -129,19 +274,27 @@ enum blk_eh_timer_return scsi_times_out(struct request *req)  {  	struct scsi_cmnd *scmd = req->special;  	enum blk_eh_timer_return rtn = BLK_EH_NOT_HANDLED; +	struct Scsi_Host *host = scmd->device->host;  	trace_scsi_dispatch_cmd_timeout(scmd);  	scsi_log_completion(scmd, TIMEOUT_ERROR); -	if (scmd->device->host->transportt->eh_timed_out) -		rtn = scmd->device->host->transportt->eh_timed_out(scmd); -	else if (scmd->device->host->hostt->eh_timed_out) -		rtn = scmd->device->host->hostt->eh_timed_out(scmd); +	if (host->eh_deadline != -1 && !host->last_reset) +		host->last_reset = jiffies; + +	if (host->transportt->eh_timed_out) +		rtn = host->transportt->eh_timed_out(scmd); +	else if (host->hostt->eh_timed_out) +		rtn = host->hostt->eh_timed_out(scmd); -	if (unlikely(rtn == BLK_EH_NOT_HANDLED && -		     !scsi_eh_scmd_add(scmd, SCSI_EH_CANCEL_CMD))) { -		scmd->result |= DID_TIME_OUT << 16; -		rtn = BLK_EH_HANDLED; +	if (rtn == BLK_EH_NOT_HANDLED) { +		if (!host->hostt->no_async_abort && +		    scsi_abort_command(scmd) == SUCCESS) +			return BLK_EH_NOT_HANDLED; + +		set_host_byte(scmd, DID_TIME_OUT); +		if (!scsi_eh_scmd_add(scmd, SCSI_EH_CANCEL_CMD)) +			rtn = BLK_EH_HANDLED;  	}  	return rtn; @@ -195,7 +348,7 @@ static inline void scsi_eh_prt_fail_stats(struct Scsi_Host *shost,  				++total_failures;  				if (scmd->eh_eflags & SCSI_EH_CANCEL_CMD)  					++cmd_cancel; -				else  +				else  					++cmd_failed;  			}  		} @@ -214,16 +367,84 @@ static inline void scsi_eh_prt_fail_stats(struct Scsi_Host *shost,  	SCSI_LOG_ERROR_RECOVERY(2, printk("Total of %d commands on %d"  					  " devices require eh work\n", -				  total_failures, devices_failed)); +				   total_failures, devices_failed));  }  #endif + /** + * scsi_report_lun_change - Set flag on all *other* devices on the same target + *                          to indicate that a UNIT ATTENTION is expected. + * @sdev:	Device reporting the UNIT ATTENTION + */ +static void scsi_report_lun_change(struct scsi_device *sdev) +{ +	sdev->sdev_target->expecting_lun_change = 1; +} + +/** + * scsi_report_sense - Examine scsi sense information and log messages for + *		       certain conditions, also issue uevents for some of them. + * @sdev:	Device reporting the sense code + * @sshdr:	sshdr to be examined + */ +static void scsi_report_sense(struct scsi_device *sdev, +			      struct scsi_sense_hdr *sshdr) +{ +	enum scsi_device_event evt_type = SDEV_EVT_MAXBITS;	/* i.e. none */ + +	if (sshdr->sense_key == UNIT_ATTENTION) { +		if (sshdr->asc == 0x3f && sshdr->ascq == 0x03) { +			evt_type = SDEV_EVT_INQUIRY_CHANGE_REPORTED; +			sdev_printk(KERN_WARNING, sdev, +				    "Inquiry data has changed"); +		} else if (sshdr->asc == 0x3f && sshdr->ascq == 0x0e) { +			evt_type = SDEV_EVT_LUN_CHANGE_REPORTED; +			scsi_report_lun_change(sdev); +			sdev_printk(KERN_WARNING, sdev, +				    "Warning! Received an indication that the " +				    "LUN assignments on this target have " +				    "changed. The Linux SCSI layer does not " +				    "automatically remap LUN assignments.\n"); +		} else if (sshdr->asc == 0x3f) +			sdev_printk(KERN_WARNING, sdev, +				    "Warning! Received an indication that the " +				    "operating parameters on this target have " +				    "changed. The Linux SCSI layer does not " +				    "automatically adjust these parameters.\n"); + +		if (sshdr->asc == 0x38 && sshdr->ascq == 0x07) { +			evt_type = SDEV_EVT_SOFT_THRESHOLD_REACHED_REPORTED; +			sdev_printk(KERN_WARNING, sdev, +				    "Warning! Received an indication that the " +				    "LUN reached a thin provisioning soft " +				    "threshold.\n"); +		} + +		if (sshdr->asc == 0x2a && sshdr->ascq == 0x01) { +			evt_type = SDEV_EVT_MODE_PARAMETER_CHANGE_REPORTED; +			sdev_printk(KERN_WARNING, sdev, +				    "Mode parameters changed"); +		} else if (sshdr->asc == 0x2a && sshdr->ascq == 0x09) { +			evt_type = SDEV_EVT_CAPACITY_CHANGE_REPORTED; +			sdev_printk(KERN_WARNING, sdev, +				    "Capacity data has changed"); +		} else if (sshdr->asc == 0x2a) +			sdev_printk(KERN_WARNING, sdev, +				    "Parameters changed"); +	} + +	if (evt_type != SDEV_EVT_MAXBITS) { +		set_bit(evt_type, sdev->pending_events); +		schedule_work(&sdev->event_work); +	} +} +  /**   * scsi_check_sense - Examine scsi cmd sense   * @scmd:	Cmd to have sense checked.   *   * Return value: - * 	SUCCESS or FAILED or NEEDS_RETRY + *	SUCCESS or FAILED or NEEDS_RETRY or ADD_TO_MLQUEUE   *   * Notes:   *	When a deferred error is detected the current command has @@ -237,6 +458,16 @@ static int scsi_check_sense(struct scsi_cmnd *scmd)  	if (! scsi_command_normalize_sense(scmd, &sshdr))  		return FAILED;	/* no valid sense data */ +	if (scmd->cmnd[0] == TEST_UNIT_READY && scmd->scsi_done != scsi_eh_done) +		/* +		 * nasty: for mid-layer issued TURs, we need to return the +		 * actual sense data without any recovery attempt.  For eh +		 * issued ones, we need to try to recover and interpret +		 */ +		return SUCCESS; + +	scsi_report_sense(sdev, &sshdr); +  	if (scsi_sense_is_deferred(&sshdr))  		return NEEDS_RETRY; @@ -290,11 +521,27 @@ static int scsi_check_sense(struct scsi_cmnd *scmd)  		 * so that we can deal with it there.  		 */  		if (scmd->device->expecting_cc_ua) { -			scmd->device->expecting_cc_ua = 0; -			return NEEDS_RETRY; +			/* +			 * Because some device does not queue unit +			 * attentions correctly, we carefully check +			 * additional sense code and qualifier so as +			 * not to squash media change unit attention. +			 */ +			if (sshdr.asc != 0x28 || sshdr.ascq != 0x00) { +				scmd->device->expecting_cc_ua = 0; +				return NEEDS_RETRY; +			}  		}  		/* -		 * if the device is in the process of becoming ready, we  +		 * we might also expect a cc/ua if another LUN on the target +		 * reported a UA with an ASC/ASCQ of 3F 0E - +		 * REPORTED LUNS DATA HAS CHANGED. +		 */ +		if (scmd->device->sdev_target->expecting_lun_change && +		    sshdr.asc == 0x3f && sshdr.ascq == 0x0e) +			return NEEDS_RETRY; +		/* +		 * if the device is in the process of becoming ready, we  		 * should retry.  		 */  		if ((sshdr.asc == 0x04) && (sshdr.ascq == 0x01)) @@ -306,36 +553,31 @@ static int scsi_check_sense(struct scsi_cmnd *scmd)  		if (scmd->device->allow_restart &&  		    (sshdr.asc == 0x04) && (sshdr.ascq == 0x02))  			return FAILED; - -		if (sshdr.asc == 0x3f && sshdr.ascq == 0x0e) -			scmd_printk(KERN_WARNING, scmd, -				    "Warning! Received an indication that the " -				    "LUN assignments on this target have " -				    "changed. The Linux SCSI layer does not " -				    "automatically remap LUN assignments.\n"); -		else if (sshdr.asc == 0x3f) -			scmd_printk(KERN_WARNING, scmd, -				    "Warning! Received an indication that the " -				    "operating parameters on this target have " -				    "changed. The Linux SCSI layer does not " -				    "automatically adjust these parameters.\n"); -  		/*  		 * Pass the UA upwards for a determination in the completion  		 * functions.  		 */  		return SUCCESS; -		/* these three are not supported */ +		/* these are not supported */ +	case DATA_PROTECT: +		if (sshdr.asc == 0x27 && sshdr.ascq == 0x07) { +			/* Thin provisioning hard threshold reached */ +			set_host_byte(scmd, DID_ALLOC_FAILURE); +			return SUCCESS; +		}  	case COPY_ABORTED:  	case VOLUME_OVERFLOW:  	case MISCOMPARE: +	case BLANK_CHECK: +		set_host_byte(scmd, DID_TARGET_FAILURE);  		return SUCCESS;  	case MEDIUM_ERROR:  		if (sshdr.asc == 0x11 || /* UNRECOVERED READ ERR */  		    sshdr.asc == 0x13 || /* AMNF DATA FIELD */  		    sshdr.asc == 0x14) { /* RECORD NOT FOUND */ +			set_host_byte(scmd, DID_MEDIUM_ERROR);  			return SUCCESS;  		}  		return NEEDS_RETRY; @@ -344,11 +586,17 @@ static int scsi_check_sense(struct scsi_cmnd *scmd)  		if (scmd->device->retry_hwerror)  			return ADD_TO_MLQUEUE;  		else -			return SUCCESS; +			set_host_byte(scmd, DID_TARGET_FAILURE);  	case ILLEGAL_REQUEST: -	case BLANK_CHECK: -	case DATA_PROTECT: +		if (sshdr.asc == 0x20 || /* Invalid command operation code */ +		    sshdr.asc == 0x21 || /* Logical block address out of range */ +		    sshdr.asc == 0x24 || /* Invalid field in cdb */ +		    sshdr.asc == 0x26) { /* Parameter value invalid */ +			set_host_byte(scmd, DID_TARGET_FAILURE); +		} +		return SUCCESS; +  	default:  		return SUCCESS;  	} @@ -488,7 +736,7 @@ static int scsi_eh_completed_normally(struct scsi_cmnd *scmd)   */  static void scsi_eh_done(struct scsi_cmnd *scmd)  { -	struct completion     *eh_action; +	struct completion *eh_action;  	SCSI_LOG_ERROR_RECOVERY(3,  		printk("%s scmd: %p result: %x\n", @@ -501,28 +749,29 @@ static void scsi_eh_done(struct scsi_cmnd *scmd)  /**   * scsi_try_host_reset - ask host adapter to reset itself - * @scmd:	SCSI cmd to send hsot reset. + * @scmd:	SCSI cmd to send host reset.   */  static int scsi_try_host_reset(struct scsi_cmnd *scmd)  {  	unsigned long flags;  	int rtn; +	struct Scsi_Host *host = scmd->device->host; +	struct scsi_host_template *hostt = host->hostt;  	SCSI_LOG_ERROR_RECOVERY(3, printk("%s: Snd Host RST\n",  					  __func__)); -	if (!scmd->device->host->hostt->eh_host_reset_handler) +	if (!hostt->eh_host_reset_handler)  		return FAILED; -	rtn = scmd->device->host->hostt->eh_host_reset_handler(scmd); +	rtn = hostt->eh_host_reset_handler(scmd);  	if (rtn == SUCCESS) { -		if (!scmd->device->host->hostt->skip_settle_delay) +		if (!hostt->skip_settle_delay)  			ssleep(HOST_RESET_SETTLE_TIME); -		spin_lock_irqsave(scmd->device->host->host_lock, flags); -		scsi_report_bus_reset(scmd->device->host, -				      scmd_channel(scmd)); -		spin_unlock_irqrestore(scmd->device->host->host_lock, flags); +		spin_lock_irqsave(host->host_lock, flags); +		scsi_report_bus_reset(host, scmd_channel(scmd)); +		spin_unlock_irqrestore(host->host_lock, flags);  	}  	return rtn; @@ -536,22 +785,23 @@ static int scsi_try_bus_reset(struct scsi_cmnd *scmd)  {  	unsigned long flags;  	int rtn; +	struct Scsi_Host *host = scmd->device->host; +	struct scsi_host_template *hostt = host->hostt;  	SCSI_LOG_ERROR_RECOVERY(3, printk("%s: Snd Bus RST\n",  					  __func__)); -	if (!scmd->device->host->hostt->eh_bus_reset_handler) +	if (!hostt->eh_bus_reset_handler)  		return FAILED; -	rtn = scmd->device->host->hostt->eh_bus_reset_handler(scmd); +	rtn = hostt->eh_bus_reset_handler(scmd);  	if (rtn == SUCCESS) { -		if (!scmd->device->host->hostt->skip_settle_delay) +		if (!hostt->skip_settle_delay)  			ssleep(BUS_RESET_SETTLE_TIME); -		spin_lock_irqsave(scmd->device->host->host_lock, flags); -		scsi_report_bus_reset(scmd->device->host, -				      scmd_channel(scmd)); -		spin_unlock_irqrestore(scmd->device->host->host_lock, flags); +		spin_lock_irqsave(host->host_lock, flags); +		scsi_report_bus_reset(host, scmd_channel(scmd)); +		spin_unlock_irqrestore(host->host_lock, flags);  	}  	return rtn; @@ -577,16 +827,18 @@ static int scsi_try_target_reset(struct scsi_cmnd *scmd)  {  	unsigned long flags;  	int rtn; +	struct Scsi_Host *host = scmd->device->host; +	struct scsi_host_template *hostt = host->hostt; -	if (!scmd->device->host->hostt->eh_target_reset_handler) +	if (!hostt->eh_target_reset_handler)  		return FAILED; -	rtn = scmd->device->host->hostt->eh_target_reset_handler(scmd); +	rtn = hostt->eh_target_reset_handler(scmd);  	if (rtn == SUCCESS) { -		spin_lock_irqsave(scmd->device->host->host_lock, flags); +		spin_lock_irqsave(host->host_lock, flags);  		__starget_for_each_device(scsi_target(scmd->device), NULL,  					  __scsi_report_device_reset); -		spin_unlock_irqrestore(scmd->device->host->host_lock, flags); +		spin_unlock_irqrestore(host->host_lock, flags);  	}  	return rtn; @@ -605,49 +857,28 @@ static int scsi_try_target_reset(struct scsi_cmnd *scmd)  static int scsi_try_bus_device_reset(struct scsi_cmnd *scmd)  {  	int rtn; +	struct scsi_host_template *hostt = scmd->device->host->hostt; -	if (!scmd->device->host->hostt->eh_device_reset_handler) +	if (!hostt->eh_device_reset_handler)  		return FAILED; -	rtn = scmd->device->host->hostt->eh_device_reset_handler(scmd); +	rtn = hostt->eh_device_reset_handler(scmd);  	if (rtn == SUCCESS)  		__scsi_report_device_reset(scmd->device, NULL);  	return rtn;  } -static int __scsi_try_to_abort_cmd(struct scsi_cmnd *scmd) +static int scsi_try_to_abort_cmd(struct scsi_host_template *hostt, struct scsi_cmnd *scmd)  { -	if (!scmd->device->host->hostt->eh_abort_handler) +	if (!hostt->eh_abort_handler)  		return FAILED; -	return scmd->device->host->hostt->eh_abort_handler(scmd); -} - -/** - * scsi_try_to_abort_cmd - Ask host to abort a running command. - * @scmd:	SCSI cmd to abort from Lower Level. - * - * Notes: - *    This function will not return until the user's completion function - *    has been called.  there is no timeout on this operation.  if the - *    author of the low-level driver wishes this operation to be timed, - *    they can provide this facility themselves.  helper functions in - *    scsi_error.c can be supplied to make this easier to do. - */ -static int scsi_try_to_abort_cmd(struct scsi_cmnd *scmd) -{ -	/* -	 * scsi_done was called just after the command timed out and before -	 * we had a chance to process it. (db) -	 */ -	if (scmd->serial_number == 0) -		return SUCCESS; -	return __scsi_try_to_abort_cmd(scmd); +	return hostt->eh_abort_handler(scmd);  }  static void scsi_abort_eh_cmnd(struct scsi_cmnd *scmd)  { -	if (__scsi_try_to_abort_cmd(scmd) != SUCCESS) +	if (scsi_try_to_abort_cmd(scmd->device->host->hostt, scmd) != SUCCESS)  		if (scsi_try_bus_device_reset(scmd) != SUCCESS)  			if (scsi_try_target_reset(scmd) != SUCCESS)  				if (scsi_try_bus_reset(scmd) != SUCCESS) @@ -655,7 +886,7 @@ static void scsi_abort_eh_cmnd(struct scsi_cmnd *scmd)  }  /** - * scsi_eh_prep_cmnd  - Save a scsi command info as part of error recory + * scsi_eh_prep_cmnd  - Save a scsi command info as part of error recovery   * @scmd:       SCSI command structure to hijack   * @ses:        structure to save restore information   * @cmnd:       CDB to send. Can be NULL if no new cmnd is needed @@ -690,10 +921,12 @@ void scsi_eh_prep_cmnd(struct scsi_cmnd *scmd, struct scsi_eh_save *ses,  	ses->prot_op = scmd->prot_op;  	scmd->prot_op = SCSI_PROT_NORMAL; +	scmd->eh_eflags = 0;  	scmd->cmnd = ses->eh_cmnd;  	memset(scmd->cmnd, 0, BLK_MAX_CDB);  	memset(&scmd->sdb, 0, sizeof(scmd->sdb));  	scmd->request->next_rq = NULL; +	scmd->result = 0;  	if (sense_bytes) {  		scmd->sdb.length = min_t(unsigned, SCSI_SENSE_BUFFERSIZE, @@ -730,7 +963,7 @@ void scsi_eh_prep_cmnd(struct scsi_cmnd *scmd, struct scsi_eh_save *ses,  EXPORT_SYMBOL(scsi_eh_prep_cmnd);  /** - * scsi_eh_restore_cmnd  - Restore a scsi command info as part of error recory + * scsi_eh_restore_cmnd  - Restore a scsi command info as part of error recovery   * @scmd:       SCSI command structure to restore   * @ses:        saved information from a coresponding call to scsi_eh_prep_cmnd   * @@ -753,7 +986,7 @@ void scsi_eh_restore_cmnd(struct scsi_cmnd* scmd, struct scsi_eh_save *ses)  EXPORT_SYMBOL(scsi_eh_restore_cmnd);  /** - * scsi_send_eh_cmnd  - submit a scsi command as part of error recory + * scsi_send_eh_cmnd  - submit a scsi command as part of error recovery   * @scmd:       SCSI command structure to hijack   * @cmnd:       CDB to send   * @cmnd_size:  size in bytes of @cmnd @@ -772,34 +1005,49 @@ static int scsi_send_eh_cmnd(struct scsi_cmnd *scmd, unsigned char *cmnd,  	struct scsi_device *sdev = scmd->device;  	struct Scsi_Host *shost = sdev->host;  	DECLARE_COMPLETION_ONSTACK(done); -	unsigned long timeleft; -	unsigned long flags; +	unsigned long timeleft = timeout;  	struct scsi_eh_save ses; +	const unsigned long stall_for = msecs_to_jiffies(100);  	int rtn; +retry:  	scsi_eh_prep_cmnd(scmd, &ses, cmnd, cmnd_size, sense_bytes);  	shost->eh_action = &done; -	spin_lock_irqsave(shost->host_lock, flags);  	scsi_log_send(scmd); -	shost->hostt->queuecommand(scmd, scsi_eh_done); -	spin_unlock_irqrestore(shost->host_lock, flags); - -	timeleft = wait_for_completion_timeout(&done, timeout); +	scmd->scsi_done = scsi_eh_done; +	rtn = shost->hostt->queuecommand(shost, scmd); +	if (rtn) { +		if (timeleft > stall_for) { +			scsi_eh_restore_cmnd(scmd, &ses); +			timeleft -= stall_for; +			msleep(jiffies_to_msecs(stall_for)); +			goto retry; +		} +		/* signal not to enter either branch of the if () below */ +		timeleft = 0; +		rtn = NEEDS_RETRY; +	} else { +		timeleft = wait_for_completion_timeout(&done, timeout); +		rtn = SUCCESS; +	}  	shost->eh_action = NULL; -	scsi_log_completion(scmd, SUCCESS); +	scsi_log_completion(scmd, rtn);  	SCSI_LOG_ERROR_RECOVERY(3,  		printk("%s: scmd: %p, timeleft: %ld\n",  			__func__, scmd, timeleft));  	/* -	 * If there is time left scsi_eh_done got called, and we will -	 * examine the actual status codes to see whether the command -	 * actually did complete normally, else tell the host to forget -	 * about this command. +	 * If there is time left scsi_eh_done got called, and we will examine +	 * the actual status codes to see whether the command actually did +	 * complete normally, else if we have a zero return and no time left, +	 * the command must still be pending, so abort it and return FAILED. +	 * If we never actually managed to issue the command, because +	 * ->queuecommand() kept returning non zero, use the rtn = FAILED +	 * value above (so don't execute either branch of the if)  	 */  	if (timeleft) {  		rtn = scsi_eh_completed_normally(scmd); @@ -819,12 +1067,13 @@ static int scsi_send_eh_cmnd(struct scsi_cmnd *scmd, unsigned char *cmnd,  			rtn = FAILED;  			break;  		} -	} else { +	} else if (!rtn) {  		scsi_abort_eh_cmnd(scmd);  		rtn = FAILED;  	}  	scsi_eh_restore_cmnd(scmd, &ses); +  	return rtn;  } @@ -839,7 +1088,17 @@ static int scsi_send_eh_cmnd(struct scsi_cmnd *scmd, unsigned char *cmnd,   */  static int scsi_request_sense(struct scsi_cmnd *scmd)  { -	return scsi_send_eh_cmnd(scmd, NULL, 0, SENSE_TIMEOUT, ~0); +	return scsi_send_eh_cmnd(scmd, NULL, 0, scmd->device->eh_timeout, ~0); +} + +static int scsi_eh_action(struct scsi_cmnd *scmd, int rtn) +{ +	if (scmd->request->cmd_type != REQ_TYPE_BLOCK_PC) { +		struct scsi_driver *sdrv = scsi_cmd_to_driver(scmd); +		if (sdrv->eh_action) +			rtn = sdrv->eh_action(scmd, rtn); +	} +	return rtn;  }  /** @@ -869,7 +1128,7 @@ EXPORT_SYMBOL(scsi_eh_finish_cmd);   *   * Description:   *    See if we need to request sense information.  if so, then get it - *    now, so we have a better idea of what to do.   + *    now, so we have a better idea of what to do.   *   * Notes:   *    This has the unfortunate side effect that if a shost adapter does @@ -886,6 +1145,7 @@ int scsi_eh_get_sense(struct list_head *work_q,  		      struct list_head *done_q)  {  	struct scsi_cmnd *scmd, *next; +	struct Scsi_Host *shost;  	int rtn;  	list_for_each_entry_safe(scmd, next, work_q, eh_entry) { @@ -893,6 +1153,23 @@ int scsi_eh_get_sense(struct list_head *work_q,  		    SCSI_SENSE_VALID(scmd))  			continue; +		shost = scmd->device->host; +		if (scsi_host_eh_past_deadline(shost)) { +			SCSI_LOG_ERROR_RECOVERY(3, +				shost_printk(KERN_INFO, shost, +					    "skip %s, past eh deadline\n", +					     __func__)); +			break; +		} +		if (status_byte(scmd->result) != CHECK_CONDITION) +			/* +			 * don't request sense if there's no check condition +			 * status because the error we're processing isn't one +			 * that has a sense code (and some devices get +			 * confused by sense requests out of the blue) +			 */ +			continue; +  		SCSI_LOG_ERROR_RECOVERY(2, scmd_printk(KERN_INFO, scmd,  						  "%s: requesting sense\n",  						  current->comm)); @@ -940,7 +1217,8 @@ static int scsi_eh_tur(struct scsi_cmnd *scmd)  	int retry_cnt = 1, rtn;  retry_tur: -	rtn = scsi_send_eh_cmnd(scmd, tur_command, 6, SENSE_TIMEOUT, 0); +	rtn = scsi_send_eh_cmnd(scmd, tur_command, 6, +				scmd->device->eh_timeout, 0);  	SCSI_LOG_ERROR_RECOVERY(3, printk("%s: scmd %p rtn %x\n",  		__func__, scmd, rtn)); @@ -958,6 +1236,62 @@ retry_tur:  }  /** + * scsi_eh_test_devices - check if devices are responding from error recovery. + * @cmd_list:	scsi commands in error recovery. + * @work_q:     queue for commands which still need more error recovery + * @done_q:     queue for commands which are finished + * @try_stu:    boolean on if a STU command should be tried in addition to TUR. + * + * Decription: + *    Tests if devices are in a working state.  Commands to devices now in + *    a working state are sent to the done_q while commands to devices which + *    are still failing to respond are returned to the work_q for more + *    processing. + **/ +static int scsi_eh_test_devices(struct list_head *cmd_list, +				struct list_head *work_q, +				struct list_head *done_q, int try_stu) +{ +	struct scsi_cmnd *scmd, *next; +	struct scsi_device *sdev; +	int finish_cmds; + +	while (!list_empty(cmd_list)) { +		scmd = list_entry(cmd_list->next, struct scsi_cmnd, eh_entry); +		sdev = scmd->device; + +		if (!try_stu) { +			if (scsi_host_eh_past_deadline(sdev->host)) { +				/* Push items back onto work_q */ +				list_splice_init(cmd_list, work_q); +				SCSI_LOG_ERROR_RECOVERY(3, +					shost_printk(KERN_INFO, sdev->host, +						     "skip %s, past eh deadline", +						     __func__)); +				break; +			} +		} + +		finish_cmds = !scsi_device_online(scmd->device) || +			(try_stu && !scsi_eh_try_stu(scmd) && +			 !scsi_eh_tur(scmd)) || +			!scsi_eh_tur(scmd); + +		list_for_each_entry_safe(scmd, next, cmd_list, eh_entry) +			if (scmd->device == sdev) { +				if (finish_cmds && +				    (try_stu || +				     scsi_eh_action(scmd, SUCCESS) == SUCCESS)) +					scsi_eh_finish_cmd(scmd, done_q); +				else +					list_move_tail(&scmd->eh_entry, work_q); +			} +	} +	return list_empty(work_q); +} + + +/**   * scsi_eh_abort_cmds - abort pending commands.   * @work_q:	&list_head for pending commands.   * @done_q:	&list_head for processed commands. @@ -973,32 +1307,43 @@ static int scsi_eh_abort_cmds(struct list_head *work_q,  			      struct list_head *done_q)  {  	struct scsi_cmnd *scmd, *next; +	LIST_HEAD(check_list);  	int rtn; +	struct Scsi_Host *shost;  	list_for_each_entry_safe(scmd, next, work_q, eh_entry) {  		if (!(scmd->eh_eflags & SCSI_EH_CANCEL_CMD))  			continue; +		shost = scmd->device->host; +		if (scsi_host_eh_past_deadline(shost)) { +			list_splice_init(&check_list, work_q); +			SCSI_LOG_ERROR_RECOVERY(3, +				shost_printk(KERN_INFO, shost, +					    "skip %s, past eh deadline\n", +					     __func__)); +			return list_empty(work_q); +		}  		SCSI_LOG_ERROR_RECOVERY(3, printk("%s: aborting cmd:"  						  "0x%p\n", current->comm,  						  scmd)); -		rtn = scsi_try_to_abort_cmd(scmd); -		if (rtn == SUCCESS || rtn == FAST_IO_FAIL) { -			scmd->eh_eflags &= ~SCSI_EH_CANCEL_CMD; -			if (!scsi_device_online(scmd->device) || -			    rtn == FAST_IO_FAIL || -			    !scsi_eh_tur(scmd)) { -				scsi_eh_finish_cmd(scmd, done_q); -			} -				 -		} else +		rtn = scsi_try_to_abort_cmd(shost->hostt, scmd); +		if (rtn == FAILED) {  			SCSI_LOG_ERROR_RECOVERY(3, printk("%s: aborting"  							  " cmd failed:"  							  "0x%p\n",  							  current->comm,  							  scmd)); +			list_splice_init(&check_list, work_q); +			return list_empty(work_q); +		} +		scmd->eh_eflags &= ~SCSI_EH_CANCEL_CMD; +		if (rtn == FAST_IO_FAIL) +			scsi_eh_finish_cmd(scmd, done_q); +		else +			list_move_tail(&scmd->eh_entry, &check_list);  	} -	return list_empty(work_q); +	return scsi_eh_test_devices(&check_list, work_q, done_q, 0);  }  /** @@ -1033,7 +1378,7 @@ static int scsi_eh_try_stu(struct scsi_cmnd *scmd)   *   * Notes:   *    If commands are failing due to not ready, initializing command required, - *	try revalidating the device, which will end up sending a start unit.  + *	try revalidating the device, which will end up sending a start unit.   */  static int scsi_eh_stu(struct Scsi_Host *shost,  			      struct list_head *work_q, @@ -1043,6 +1388,13 @@ static int scsi_eh_stu(struct Scsi_Host *shost,  	struct scsi_device *sdev;  	shost_for_each_device(sdev, shost) { +		if (scsi_host_eh_past_deadline(shost)) { +			SCSI_LOG_ERROR_RECOVERY(3, +				shost_printk(KERN_INFO, shost, +					    "skip %s, past eh deadline\n", +					     __func__)); +			break; +		}  		stu_scmd = NULL;  		list_for_each_entry(scmd, work_q, eh_entry)  			if (scmd->device == sdev && SCSI_SENSE_VALID(scmd) && @@ -1062,7 +1414,8 @@ static int scsi_eh_stu(struct Scsi_Host *shost,  			    !scsi_eh_tur(stu_scmd)) {  				list_for_each_entry_safe(scmd, next,  							  work_q, eh_entry) { -					if (scmd->device == sdev) +					if (scmd->device == sdev && +					    scsi_eh_action(scmd, SUCCESS) == SUCCESS)  						scsi_eh_finish_cmd(scmd, done_q);  				}  			} @@ -1087,7 +1440,7 @@ static int scsi_eh_stu(struct Scsi_Host *shost,   *    Try a bus device reset.  Still, look to see whether we have multiple   *    devices that are jammed or not - if we have multiple devices, it   *    makes no sense to try bus_device_reset - we really would need to try - *    a bus_reset instead.  + *    a bus_reset instead.   */  static int scsi_eh_bus_device_reset(struct Scsi_Host *shost,  				    struct list_head *work_q, @@ -1098,6 +1451,13 @@ static int scsi_eh_bus_device_reset(struct Scsi_Host *shost,  	int rtn;  	shost_for_each_device(sdev, shost) { +		if (scsi_host_eh_past_deadline(shost)) { +			SCSI_LOG_ERROR_RECOVERY(3, +				shost_printk(KERN_INFO, shost, +					    "skip %s, past eh deadline\n", +					     __func__)); +			break; +		}  		bdr_scmd = NULL;  		list_for_each_entry(scmd, work_q, eh_entry)  			if (scmd->device == sdev) { @@ -1118,7 +1478,8 @@ static int scsi_eh_bus_device_reset(struct Scsi_Host *shost,  			    !scsi_eh_tur(bdr_scmd)) {  				list_for_each_entry_safe(scmd, next,  							 work_q, eh_entry) { -					if (scmd->device == sdev) +					if (scmd->device == sdev && +					    scsi_eh_action(scmd, rtn) != FAILED)  						scsi_eh_finish_cmd(scmd,  								   done_q);  				} @@ -1148,57 +1509,58 @@ static int scsi_eh_target_reset(struct Scsi_Host *shost,  				struct list_head *work_q,  				struct list_head *done_q)  { -	struct scsi_cmnd *scmd, *tgtr_scmd, *next; -	unsigned int id = 0; -	int rtn; +	LIST_HEAD(tmp_list); +	LIST_HEAD(check_list); -	do { -		tgtr_scmd = NULL; -		list_for_each_entry(scmd, work_q, eh_entry) { -			if (id == scmd_id(scmd)) { -				tgtr_scmd = scmd; -				break; -			} -		} -		if (!tgtr_scmd) { -			/* not one exactly equal; find the next highest */ -			list_for_each_entry(scmd, work_q, eh_entry) { -				if (scmd_id(scmd) > id && -				    (!tgtr_scmd || -				     scmd_id(tgtr_scmd) > scmd_id(scmd))) -						tgtr_scmd = scmd; -			} +	list_splice_init(work_q, &tmp_list); + +	while (!list_empty(&tmp_list)) { +		struct scsi_cmnd *next, *scmd; +		int rtn; +		unsigned int id; + +		if (scsi_host_eh_past_deadline(shost)) { +			/* push back on work queue for further processing */ +			list_splice_init(&check_list, work_q); +			list_splice_init(&tmp_list, work_q); +			SCSI_LOG_ERROR_RECOVERY(3, +				shost_printk(KERN_INFO, shost, +					    "skip %s, past eh deadline\n", +					     __func__)); +			return list_empty(work_q);  		} -		if (!tgtr_scmd) -			/* no more commands, that's it */ -			break; + +		scmd = list_entry(tmp_list.next, struct scsi_cmnd, eh_entry); +		id = scmd_id(scmd);  		SCSI_LOG_ERROR_RECOVERY(3, printk("%s: Sending target reset "  						  "to target %d\n",  						  current->comm, id)); -		rtn = scsi_try_target_reset(tgtr_scmd); -		if (rtn == SUCCESS || rtn == FAST_IO_FAIL) { -			list_for_each_entry_safe(scmd, next, work_q, eh_entry) { -				if (id == scmd_id(scmd)) -					if (!scsi_device_online(scmd->device) || -					    rtn == FAST_IO_FAIL || -					    !scsi_eh_tur(tgtr_scmd)) -						scsi_eh_finish_cmd(scmd, -								   done_q); -			} -		} else +		rtn = scsi_try_target_reset(scmd); +		if (rtn != SUCCESS && rtn != FAST_IO_FAIL)  			SCSI_LOG_ERROR_RECOVERY(3, printk("%s: Target reset"  							  " failed target: "  							  "%d\n",  							  current->comm, id)); -		id++; -	} while(id != 0); +		list_for_each_entry_safe(scmd, next, &tmp_list, eh_entry) { +			if (scmd_id(scmd) != id) +				continue; -	return list_empty(work_q); +			if (rtn == SUCCESS) +				list_move_tail(&scmd->eh_entry, &check_list); +			else if (rtn == FAST_IO_FAIL) +				scsi_eh_finish_cmd(scmd, done_q); +			else +				/* push back on work queue for further processing */ +				list_move(&scmd->eh_entry, work_q); +		} +	} + +	return scsi_eh_test_devices(&check_list, work_q, done_q, 0);  }  /** - * scsi_eh_bus_reset - send a bus reset  + * scsi_eh_bus_reset - send a bus reset   * @shost:	&scsi host being recovered.   * @work_q:     &list_head for pending commands.   * @done_q:	&list_head for processed commands. @@ -1208,6 +1570,7 @@ static int scsi_eh_bus_reset(struct Scsi_Host *shost,  			     struct list_head *done_q)  {  	struct scsi_cmnd *scmd, *chan_scmd, *next; +	LIST_HEAD(check_list);  	unsigned int channel;  	int rtn; @@ -1215,10 +1578,19 @@ static int scsi_eh_bus_reset(struct Scsi_Host *shost,  	 * we really want to loop over the various channels, and do this on  	 * a channel by channel basis.  we should also check to see if any  	 * of the failed commands are on soft_reset devices, and if so, skip -	 * the reset.   +	 * the reset.  	 */  	for (channel = 0; channel <= shost->max_channel; channel++) { +		if (scsi_host_eh_past_deadline(shost)) { +			list_splice_init(&check_list, work_q); +			SCSI_LOG_ERROR_RECOVERY(3, +				shost_printk(KERN_INFO, shost, +					    "skip %s, past eh deadline\n", +					     __func__)); +			return list_empty(work_q); +		} +  		chan_scmd = NULL;  		list_for_each_entry(scmd, work_q, eh_entry) {  			if (channel == scmd_channel(scmd)) { @@ -1239,12 +1611,14 @@ static int scsi_eh_bus_reset(struct Scsi_Host *shost,  		rtn = scsi_try_bus_reset(chan_scmd);  		if (rtn == SUCCESS || rtn == FAST_IO_FAIL) {  			list_for_each_entry_safe(scmd, next, work_q, eh_entry) { -				if (channel == scmd_channel(scmd)) -					if (!scsi_device_online(scmd->device) || -					    rtn == FAST_IO_FAIL || -					    !scsi_eh_tur(scmd)) +				if (channel == scmd_channel(scmd)) { +					if (rtn == FAST_IO_FAIL)  						scsi_eh_finish_cmd(scmd,  								   done_q); +					else +						list_move_tail(&scmd->eh_entry, +							       &check_list); +				}  			}  		} else {  			SCSI_LOG_ERROR_RECOVERY(3, printk("%s: BRST" @@ -1253,11 +1627,11 @@ static int scsi_eh_bus_reset(struct Scsi_Host *shost,  							  channel));  		}  	} -	return list_empty(work_q); +	return scsi_eh_test_devices(&check_list, work_q, done_q, 0);  }  /** - * scsi_eh_host_reset - send a host reset  + * scsi_eh_host_reset - send a host reset   * @work_q:	list_head for processed commands.   * @done_q:	list_head for processed commands.   */ @@ -1265,6 +1639,7 @@ static int scsi_eh_host_reset(struct list_head *work_q,  			      struct list_head *done_q)  {  	struct scsi_cmnd *scmd, *next; +	LIST_HEAD(check_list);  	int rtn;  	if (!list_empty(work_q)) { @@ -1275,12 +1650,10 @@ static int scsi_eh_host_reset(struct list_head *work_q,  						  , current->comm));  		rtn = scsi_try_host_reset(scmd); -		if (rtn == SUCCESS || rtn == FAST_IO_FAIL) { +		if (rtn == SUCCESS) { +			list_splice_init(work_q, &check_list); +		} else if (rtn == FAST_IO_FAIL) {  			list_for_each_entry_safe(scmd, next, work_q, eh_entry) { -				if (!scsi_device_online(scmd->device) || -				    rtn == FAST_IO_FAIL || -				    (!scsi_eh_try_stu(scmd) && !scsi_eh_tur(scmd)) || -				    !scsi_eh_tur(scmd))  					scsi_eh_finish_cmd(scmd, done_q);  			}  		} else { @@ -1289,7 +1662,7 @@ static int scsi_eh_host_reset(struct list_head *work_q,  							  current->comm));  		}  	} -	return list_empty(work_q); +	return scsi_eh_test_devices(&check_list, work_q, done_q, 1);  }  /** @@ -1317,7 +1690,7 @@ static void scsi_eh_offline_sdevs(struct list_head *work_q,  }  /** - * scsi_noretry_cmd - determinte if command should be failed fast + * scsi_noretry_cmd - determine if command should be failed fast   * @scmd:	SCSI cmd to examine.   */  int scsi_noretry_cmd(struct scsi_cmnd *scmd) @@ -1325,6 +1698,8 @@ int scsi_noretry_cmd(struct scsi_cmnd *scmd)  	switch (host_byte(scmd->result)) {  	case DID_OK:  		break; +	case DID_TIME_OUT: +		goto check_type;  	case DID_BUS_BUSY:  		return (scmd->request->cmd_flags & REQ_FAILFAST_TRANSPORT);  	case DID_PARITY: @@ -1338,18 +1713,19 @@ int scsi_noretry_cmd(struct scsi_cmnd *scmd)  		return (scmd->request->cmd_flags & REQ_FAILFAST_DRIVER);  	} -	switch (status_byte(scmd->result)) { -	case CHECK_CONDITION: -		/* -		 * assume caller has checked sense and determinted -		 * the check condition was retryable. -		 */ -		if (scmd->request->cmd_flags & REQ_FAILFAST_DEV || -		    scmd->request->cmd_type == REQ_TYPE_BLOCK_PC) -			return 1; -	} +	if (status_byte(scmd->result) != CHECK_CONDITION) +		return 0; -	return 0; +check_type: +	/* +	 * assume caller has checked sense and determined +	 * the check condition was retryable. +	 */ +	if (scmd->request->cmd_flags & REQ_FAILFAST_DEV || +	    scmd->request->cmd_type == REQ_TYPE_BLOCK_PC) +		return 1; +	else +		return 0;  }  /** @@ -1399,9 +1775,13 @@ int scsi_decide_disposition(struct scsi_cmnd *scmd)  		 * looks good.  drop through, and check the next byte.  		 */  		break; +	case DID_ABORT: +		if (scmd->eh_eflags & SCSI_EH_ABORT_SCHEDULED) { +			set_host_byte(scmd, DID_TIME_OUT); +			return SUCCESS; +		}  	case DID_NO_CONNECT:  	case DID_BAD_TARGET: -	case DID_ABORT:  		/*  		 * note - this means that we just report the status back  		 * to the top level driver, not that we actually think @@ -1410,7 +1790,7 @@ int scsi_decide_disposition(struct scsi_cmnd *scmd)  		return SUCCESS;  		/*  		 * when the low level driver returns did_soft_error, -		 * it is responsible for keeping an internal retry counter  +		 * it is responsible for keeping an internal retry counter  		 * in order to avoid endless loops (db)  		 *  		 * actually this is a bug in this function here.  we should @@ -1448,7 +1828,6 @@ int scsi_decide_disposition(struct scsi_cmnd *scmd)  			 */  			break;  		/* fallthrough */ -  	case DID_BUS_BUSY:  	case DID_PARITY:  		goto maybe_retry; @@ -1495,6 +1874,8 @@ int scsi_decide_disposition(struct scsi_cmnd *scmd)  		 */  		return ADD_TO_MLQUEUE;  	case GOOD: +		if (scmd->cmnd[0] == REPORT_LUNS) +			scmd->device->sdev_target->expecting_lun_change = 0;  		scsi_handle_queue_ramp_up(scmd->device);  	case COMMAND_TERMINATED:  		return SUCCESS; @@ -1521,6 +1902,7 @@ int scsi_decide_disposition(struct scsi_cmnd *scmd)  	case RESERVATION_CONFLICT:  		sdev_printk(KERN_INFO, scmd->device,  			    "reservation conflict\n"); +		set_host_byte(scmd, DID_NEXUS_FAILURE);  		return SUCCESS; /* causes immediate i/o error */  	default:  		return FAILED; @@ -1570,6 +1952,8 @@ static void scsi_eh_lock_door(struct scsi_device *sdev)  	 */  	req = blk_get_request(sdev->request_queue, READ, GFP_KERNEL); +	blk_rq_set_block_pc(req); +  	req->cmd[0] = ALLOW_MEDIUM_REMOVAL;  	req->cmd[1] = 0;  	req->cmd[2] = 0; @@ -1579,7 +1963,6 @@ static void scsi_eh_lock_door(struct scsi_device *sdev)  	req->cmd_len = COMMAND_SIZE(req->cmd[0]); -	req->cmd_type = REQ_TYPE_BLOCK_PC;  	req->cmd_flags |= REQ_QUIET;  	req->timeout = 10 * HZ;  	req->retries = 5; @@ -1615,8 +1998,9 @@ static void scsi_restart_operations(struct Scsi_Host *shost)  	 * will be requests for character device operations, and also for  	 * ioctls to queued block devices.  	 */ -	SCSI_LOG_ERROR_RECOVERY(3, printk("%s: waking up host to restart\n", -					  __func__)); +	SCSI_LOG_ERROR_RECOVERY(3, +		printk("scsi_eh_%d waking up host to restart\n", +		       shost->host_no));  	spin_lock_irqsave(shost->host_lock, flags);  	if (scsi_host_set_state(shost, SHOST_RUNNING)) @@ -1633,6 +2017,20 @@ static void scsi_restart_operations(struct Scsi_Host *shost)  	 * requests are started.  	 */  	scsi_run_host_queues(shost); + +	/* +	 * if eh is active and host_eh_scheduled is pending we need to re-run +	 * recovery.  we do this check after scsi_run_host_queues() to allow +	 * everything pent up since the last eh run a chance to make forward +	 * progress before we sync again.  Either we'll immediately re-run +	 * recovery or scsi_device_unbusy() will wake us again when these +	 * pending commands complete. +	 */ +	spin_lock_irqsave(shost->host_lock, flags); +	if (shost->host_eh_scheduled) +		if (scsi_host_set_state(shost, SHOST_RECOVERY)) +			WARN_ON(scsi_host_set_state(shost, SHOST_CANCEL_RECOVERY)); +	spin_unlock_irqrestore(shost->host_lock, flags);  }  /** @@ -1729,6 +2127,10 @@ static void scsi_unjam_host(struct Scsi_Host *shost)  		if (!scsi_eh_abort_cmds(&eh_work_q, &eh_done_q))  			scsi_eh_ready_devs(shost, &eh_work_q, &eh_done_q); +	spin_lock_irqsave(shost->host_lock, flags); +	if (shost->eh_deadline != -1) +		shost->last_reset = 0; +	spin_unlock_irqrestore(shost->host_lock, flags);  	scsi_eh_flush_done_q(&eh_done_q);  } @@ -1750,29 +2152,29 @@ int scsi_error_handler(void *data)  	 * We never actually get interrupted because kthread_run  	 * disables signal delivery for the created thread.  	 */ -	set_current_state(TASK_INTERRUPTIBLE);  	while (!kthread_should_stop()) { +		set_current_state(TASK_INTERRUPTIBLE);  		if ((shost->host_failed == 0 && shost->host_eh_scheduled == 0) ||  		    shost->host_failed != shost->host_busy) {  			SCSI_LOG_ERROR_RECOVERY(1, -				printk("Error handler scsi_eh_%d sleeping\n", +				printk("scsi_eh_%d: sleeping\n",  					shost->host_no));  			schedule(); -			set_current_state(TASK_INTERRUPTIBLE);  			continue;  		}  		__set_current_state(TASK_RUNNING);  		SCSI_LOG_ERROR_RECOVERY(1, -			printk("Error handler scsi_eh_%d waking up\n", -				shost->host_no)); +			printk("scsi_eh_%d: waking up %d/%d/%d\n", +			       shost->host_no, shost->host_eh_scheduled, +			       shost->host_failed, shost->host_busy));  		/*  		 * We have a host that is failing for some reason.  Figure out  		 * what we need to do to get it up and online again (if we can).  		 * If we fail, we end up taking the thing offline.  		 */ -		if (scsi_autopm_get_host(shost) != 0) { +		if (!shost->eh_noresume && scsi_autopm_get_host(shost) != 0) {  			SCSI_LOG_ERROR_RECOVERY(1,  				printk(KERN_ERR "Error handler scsi_eh_%d "  						"unable to autoresume\n", @@ -1793,8 +2195,8 @@ int scsi_error_handler(void *data)  		 * which are still online.  		 */  		scsi_restart_operations(shost); -		scsi_autopm_put_host(shost); -		set_current_state(TASK_INTERRUPTIBLE); +		if (!shost->eh_noresume) +			scsi_autopm_put_host(shost);  	}  	__set_current_state(TASK_RUNNING); @@ -1900,7 +2302,18 @@ scsi_reset_provider(struct scsi_device *dev, int flag)  	if (scsi_autopm_get_host(shost) < 0)  		return FAILED; +	if (!get_device(&dev->sdev_gendev)) { +		rtn = FAILED; +		goto out_put_autopm_host; +	} +  	scmd = scsi_get_command(dev, GFP_KERNEL); +	if (!scmd) { +		rtn = FAILED; +		put_device(&dev->sdev_gendev); +		goto out_put_autopm_host; +	} +  	blk_rq_init(NULL, &req);  	scmd->request = &req; @@ -1957,6 +2370,7 @@ scsi_reset_provider(struct scsi_device *dev, int flag)  	scsi_run_host_queues(shost);  	scsi_next_command(scmd); +out_put_autopm_host:  	scsi_autopm_put_host(shost);  	return rtn;  } @@ -2007,7 +2421,7 @@ int scsi_normalize_sense(const u8 *sense_buffer, int sb_len,  		if (sb_len > 7)  			sshdr->additional_length = sense_buffer[7];  	} else { -		/*  +		/*  		 * fixed format  		 */  		if (sb_len > 2)  | 
