diff options
Diffstat (limited to 'drivers/scsi/scsi_error.c')
| -rw-r--r-- | drivers/scsi/scsi_error.c | 341 | 
1 files changed, 301 insertions, 40 deletions
diff --git a/drivers/scsi/scsi_error.c b/drivers/scsi/scsi_error.c index 83e591b6019..7e957918f33 100644 --- a/drivers/scsi/scsi_error.c +++ b/drivers/scsi/scsi_error.c @@ -53,6 +53,8 @@ static void scsi_eh_done(struct scsi_cmnd *scmd);  #define HOST_RESET_SETTLE_TIME  (10)  static int scsi_eh_try_stu(struct scsi_cmnd *scmd); +static int scsi_try_to_abort_cmd(struct scsi_host_template *, +				 struct scsi_cmnd *);  /* called with shost->host_lock held */  void scsi_eh_wakeup(struct Scsi_Host *shost) @@ -87,6 +89,140 @@ void scsi_schedule_eh(struct Scsi_Host *shost)  }  EXPORT_SYMBOL_GPL(scsi_schedule_eh); +static int scsi_host_eh_past_deadline(struct Scsi_Host *shost) +{ +	if (!shost->last_reset || shost->eh_deadline == -1) +		return 0; + +	/* +	 * 32bit accesses are guaranteed to be atomic +	 * (on all supported architectures), so instead +	 * of using a spinlock we can as well double check +	 * if eh_deadline has been set to 'off' during the +	 * time_before call. +	 */ +	if (time_before(jiffies, shost->last_reset + shost->eh_deadline) && +	    shost->eh_deadline > -1) +		return 0; + +	return 1; +} + +/** + * scmd_eh_abort_handler - Handle command aborts + * @work:	command to be aborted. + */ +void +scmd_eh_abort_handler(struct work_struct *work) +{ +	struct scsi_cmnd *scmd = +		container_of(work, struct scsi_cmnd, abort_work.work); +	struct scsi_device *sdev = scmd->device; +	int rtn; + +	if (scsi_host_eh_past_deadline(sdev->host)) { +		SCSI_LOG_ERROR_RECOVERY(3, +			scmd_printk(KERN_INFO, scmd, +				    "scmd %p eh timeout, not aborting\n", +				    scmd)); +	} else { +		SCSI_LOG_ERROR_RECOVERY(3, +			scmd_printk(KERN_INFO, scmd, +				    "aborting command %p\n", scmd)); +		rtn = scsi_try_to_abort_cmd(sdev->host->hostt, scmd); +		if (rtn == SUCCESS) { +			set_host_byte(scmd, DID_TIME_OUT); +			if (scsi_host_eh_past_deadline(sdev->host)) { +				SCSI_LOG_ERROR_RECOVERY(3, +					scmd_printk(KERN_INFO, scmd, +						    "scmd %p eh timeout, " +						    "not retrying aborted " +						    "command\n", scmd)); +			} else if (!scsi_noretry_cmd(scmd) && +			    (++scmd->retries <= scmd->allowed)) { +				SCSI_LOG_ERROR_RECOVERY(3, +					scmd_printk(KERN_WARNING, scmd, +						    "scmd %p retry " +						    "aborted command\n", scmd)); +				scsi_queue_insert(scmd, SCSI_MLQUEUE_EH_RETRY); +				return; +			} else { +				SCSI_LOG_ERROR_RECOVERY(3, +					scmd_printk(KERN_WARNING, scmd, +						    "scmd %p finish " +						    "aborted command\n", scmd)); +				scsi_finish_command(scmd); +				return; +			} +		} else { +			SCSI_LOG_ERROR_RECOVERY(3, +				scmd_printk(KERN_INFO, scmd, +					    "scmd %p abort failed, rtn %d\n", +					    scmd, rtn)); +		} +	} + +	if (!scsi_eh_scmd_add(scmd, 0)) { +		SCSI_LOG_ERROR_RECOVERY(3, +			scmd_printk(KERN_WARNING, scmd, +				    "scmd %p terminate " +				    "aborted command\n", scmd)); +		set_host_byte(scmd, DID_TIME_OUT); +		scsi_finish_command(scmd); +	} +} + +/** + * scsi_abort_command - schedule a command abort + * @scmd:	scmd to abort. + * + * We only need to abort commands after a command timeout + */ +static int +scsi_abort_command(struct scsi_cmnd *scmd) +{ +	struct scsi_device *sdev = scmd->device; +	struct Scsi_Host *shost = sdev->host; +	unsigned long flags; + +	if (scmd->eh_eflags & SCSI_EH_ABORT_SCHEDULED) { +		/* +		 * Retry after abort failed, escalate to next level. +		 */ +		scmd->eh_eflags &= ~SCSI_EH_ABORT_SCHEDULED; +		SCSI_LOG_ERROR_RECOVERY(3, +			scmd_printk(KERN_INFO, scmd, +				    "scmd %p previous abort failed\n", scmd)); +		cancel_delayed_work(&scmd->abort_work); +		return FAILED; +	} + +	/* +	 * Do not try a command abort if +	 * SCSI EH has already started. +	 */ +	spin_lock_irqsave(shost->host_lock, flags); +	if (scsi_host_in_recovery(shost)) { +		spin_unlock_irqrestore(shost->host_lock, flags); +		SCSI_LOG_ERROR_RECOVERY(3, +			scmd_printk(KERN_INFO, scmd, +				    "scmd %p not aborting, host in recovery\n", +				    scmd)); +		return FAILED; +	} + +	if (shost->eh_deadline != -1 && !shost->last_reset) +		shost->last_reset = jiffies; +	spin_unlock_irqrestore(shost->host_lock, flags); + +	scmd->eh_eflags |= SCSI_EH_ABORT_SCHEDULED; +	SCSI_LOG_ERROR_RECOVERY(3, +		scmd_printk(KERN_INFO, scmd, +			    "scmd %p abort scheduled\n", scmd)); +	queue_delayed_work(shost->tmf_work_q, &scmd->abort_work, HZ / 100); +	return SUCCESS; +} +  /**   * scsi_eh_scmd_add - add scsi cmd to error handling.   * @scmd:	scmd to run eh on. @@ -109,7 +245,12 @@ int scsi_eh_scmd_add(struct scsi_cmnd *scmd, int eh_flag)  		if (scsi_host_set_state(shost, SHOST_CANCEL_RECOVERY))  			goto out_unlock; +	if (shost->eh_deadline != -1 && !shost->last_reset) +		shost->last_reset = jiffies; +  	ret = 1; +	if (scmd->eh_eflags & SCSI_EH_ABORT_SCHEDULED) +		eh_flag &= ~SCSI_EH_CANCEL_CMD;  	scmd->eh_eflags |= eh_flag;  	list_add_tail(&scmd->eh_entry, &shost->eh_cmd_q);  	shost->host_failed++; @@ -138,16 +279,23 @@ enum blk_eh_timer_return scsi_times_out(struct request *req)  	trace_scsi_dispatch_cmd_timeout(scmd);  	scsi_log_completion(scmd, TIMEOUT_ERROR); +	if (host->eh_deadline != -1 && !host->last_reset) +		host->last_reset = jiffies; +  	if (host->transportt->eh_timed_out)  		rtn = host->transportt->eh_timed_out(scmd);  	else if (host->hostt->eh_timed_out)  		rtn = host->hostt->eh_timed_out(scmd); -	scmd->result |= DID_TIME_OUT << 16; +	if (rtn == BLK_EH_NOT_HANDLED) { +		if (!host->hostt->no_async_abort && +		    scsi_abort_command(scmd) == SUCCESS) +			return BLK_EH_NOT_HANDLED; -	if (unlikely(rtn == BLK_EH_NOT_HANDLED && -		     !scsi_eh_scmd_add(scmd, SCSI_EH_CANCEL_CMD))) -		rtn = BLK_EH_HANDLED; +		set_host_byte(scmd, DID_TIME_OUT); +		if (!scsi_eh_scmd_add(scmd, SCSI_EH_CANCEL_CMD)) +			rtn = BLK_EH_HANDLED; +	}  	return rtn;  } @@ -773,10 +921,12 @@ void scsi_eh_prep_cmnd(struct scsi_cmnd *scmd, struct scsi_eh_save *ses,  	ses->prot_op = scmd->prot_op;  	scmd->prot_op = SCSI_PROT_NORMAL; +	scmd->eh_eflags = 0;  	scmd->cmnd = ses->eh_cmnd;  	memset(scmd->cmnd, 0, BLK_MAX_CDB);  	memset(&scmd->sdb, 0, sizeof(scmd->sdb));  	scmd->request->next_rq = NULL; +	scmd->result = 0;  	if (sense_bytes) {  		scmd->sdb.length = min_t(unsigned, SCSI_SENSE_BUFFERSIZE, @@ -879,6 +1029,7 @@ retry:  		rtn = NEEDS_RETRY;  	} else {  		timeleft = wait_for_completion_timeout(&done, timeout); +		rtn = SUCCESS;  	}  	shost->eh_action = NULL; @@ -923,12 +1074,6 @@ retry:  	scsi_eh_restore_cmnd(scmd, &ses); -	if (scmd->request->cmd_type != REQ_TYPE_BLOCK_PC) { -		struct scsi_driver *sdrv = scsi_cmd_to_driver(scmd); -		if (sdrv->eh_action) -			rtn = sdrv->eh_action(scmd, cmnd, cmnd_size, rtn); -	} -  	return rtn;  } @@ -946,6 +1091,16 @@ static int scsi_request_sense(struct scsi_cmnd *scmd)  	return scsi_send_eh_cmnd(scmd, NULL, 0, scmd->device->eh_timeout, ~0);  } +static int scsi_eh_action(struct scsi_cmnd *scmd, int rtn) +{ +	if (scmd->request->cmd_type != REQ_TYPE_BLOCK_PC) { +		struct scsi_driver *sdrv = scsi_cmd_to_driver(scmd); +		if (sdrv->eh_action) +			rtn = sdrv->eh_action(scmd, rtn); +	} +	return rtn; +} +  /**   * scsi_eh_finish_cmd - Handle a cmd that eh is finished with.   * @scmd:	Original SCSI cmd that eh has finished. @@ -990,6 +1145,7 @@ int scsi_eh_get_sense(struct list_head *work_q,  		      struct list_head *done_q)  {  	struct scsi_cmnd *scmd, *next; +	struct Scsi_Host *shost;  	int rtn;  	list_for_each_entry_safe(scmd, next, work_q, eh_entry) { @@ -997,6 +1153,23 @@ int scsi_eh_get_sense(struct list_head *work_q,  		    SCSI_SENSE_VALID(scmd))  			continue; +		shost = scmd->device->host; +		if (scsi_host_eh_past_deadline(shost)) { +			SCSI_LOG_ERROR_RECOVERY(3, +				shost_printk(KERN_INFO, shost, +					    "skip %s, past eh deadline\n", +					     __func__)); +			break; +		} +		if (status_byte(scmd->result) != CHECK_CONDITION) +			/* +			 * don't request sense if there's no check condition +			 * status because the error we're processing isn't one +			 * that has a sense code (and some devices get +			 * confused by sense requests out of the blue) +			 */ +			continue; +  		SCSI_LOG_ERROR_RECOVERY(2, scmd_printk(KERN_INFO, scmd,  						  "%s: requesting sense\n",  						  current->comm)); @@ -1087,6 +1260,18 @@ static int scsi_eh_test_devices(struct list_head *cmd_list,  		scmd = list_entry(cmd_list->next, struct scsi_cmnd, eh_entry);  		sdev = scmd->device; +		if (!try_stu) { +			if (scsi_host_eh_past_deadline(sdev->host)) { +				/* Push items back onto work_q */ +				list_splice_init(cmd_list, work_q); +				SCSI_LOG_ERROR_RECOVERY(3, +					shost_printk(KERN_INFO, sdev->host, +						     "skip %s, past eh deadline", +						     __func__)); +				break; +			} +		} +  		finish_cmds = !scsi_device_online(scmd->device) ||  			(try_stu && !scsi_eh_try_stu(scmd) &&  			 !scsi_eh_tur(scmd)) || @@ -1094,7 +1279,9 @@ static int scsi_eh_test_devices(struct list_head *cmd_list,  		list_for_each_entry_safe(scmd, next, cmd_list, eh_entry)  			if (scmd->device == sdev) { -				if (finish_cmds) +				if (finish_cmds && +				    (try_stu || +				     scsi_eh_action(scmd, SUCCESS) == SUCCESS))  					scsi_eh_finish_cmd(scmd, done_q);  				else  					list_move_tail(&scmd->eh_entry, work_q); @@ -1122,26 +1309,38 @@ static int scsi_eh_abort_cmds(struct list_head *work_q,  	struct scsi_cmnd *scmd, *next;  	LIST_HEAD(check_list);  	int rtn; +	struct Scsi_Host *shost;  	list_for_each_entry_safe(scmd, next, work_q, eh_entry) {  		if (!(scmd->eh_eflags & SCSI_EH_CANCEL_CMD))  			continue; +		shost = scmd->device->host; +		if (scsi_host_eh_past_deadline(shost)) { +			list_splice_init(&check_list, work_q); +			SCSI_LOG_ERROR_RECOVERY(3, +				shost_printk(KERN_INFO, shost, +					    "skip %s, past eh deadline\n", +					     __func__)); +			return list_empty(work_q); +		}  		SCSI_LOG_ERROR_RECOVERY(3, printk("%s: aborting cmd:"  						  "0x%p\n", current->comm,  						  scmd)); -		rtn = scsi_try_to_abort_cmd(scmd->device->host->hostt, scmd); -		if (rtn == SUCCESS || rtn == FAST_IO_FAIL) { -			scmd->eh_eflags &= ~SCSI_EH_CANCEL_CMD; -			if (rtn == FAST_IO_FAIL) -				scsi_eh_finish_cmd(scmd, done_q); -			else -				list_move_tail(&scmd->eh_entry, &check_list); -		} else +		rtn = scsi_try_to_abort_cmd(shost->hostt, scmd); +		if (rtn == FAILED) {  			SCSI_LOG_ERROR_RECOVERY(3, printk("%s: aborting"  							  " cmd failed:"  							  "0x%p\n",  							  current->comm,  							  scmd)); +			list_splice_init(&check_list, work_q); +			return list_empty(work_q); +		} +		scmd->eh_eflags &= ~SCSI_EH_CANCEL_CMD; +		if (rtn == FAST_IO_FAIL) +			scsi_eh_finish_cmd(scmd, done_q); +		else +			list_move_tail(&scmd->eh_entry, &check_list);  	}  	return scsi_eh_test_devices(&check_list, work_q, done_q, 0); @@ -1189,6 +1388,13 @@ static int scsi_eh_stu(struct Scsi_Host *shost,  	struct scsi_device *sdev;  	shost_for_each_device(sdev, shost) { +		if (scsi_host_eh_past_deadline(shost)) { +			SCSI_LOG_ERROR_RECOVERY(3, +				shost_printk(KERN_INFO, shost, +					    "skip %s, past eh deadline\n", +					     __func__)); +			break; +		}  		stu_scmd = NULL;  		list_for_each_entry(scmd, work_q, eh_entry)  			if (scmd->device == sdev && SCSI_SENSE_VALID(scmd) && @@ -1208,7 +1414,8 @@ static int scsi_eh_stu(struct Scsi_Host *shost,  			    !scsi_eh_tur(stu_scmd)) {  				list_for_each_entry_safe(scmd, next,  							  work_q, eh_entry) { -					if (scmd->device == sdev) +					if (scmd->device == sdev && +					    scsi_eh_action(scmd, SUCCESS) == SUCCESS)  						scsi_eh_finish_cmd(scmd, done_q);  				}  			} @@ -1244,6 +1451,13 @@ static int scsi_eh_bus_device_reset(struct Scsi_Host *shost,  	int rtn;  	shost_for_each_device(sdev, shost) { +		if (scsi_host_eh_past_deadline(shost)) { +			SCSI_LOG_ERROR_RECOVERY(3, +				shost_printk(KERN_INFO, shost, +					    "skip %s, past eh deadline\n", +					     __func__)); +			break; +		}  		bdr_scmd = NULL;  		list_for_each_entry(scmd, work_q, eh_entry)  			if (scmd->device == sdev) { @@ -1264,7 +1478,8 @@ static int scsi_eh_bus_device_reset(struct Scsi_Host *shost,  			    !scsi_eh_tur(bdr_scmd)) {  				list_for_each_entry_safe(scmd, next,  							 work_q, eh_entry) { -					if (scmd->device == sdev) +					if (scmd->device == sdev && +					    scsi_eh_action(scmd, rtn) != FAILED)  						scsi_eh_finish_cmd(scmd,  								   done_q);  				} @@ -1304,6 +1519,17 @@ static int scsi_eh_target_reset(struct Scsi_Host *shost,  		int rtn;  		unsigned int id; +		if (scsi_host_eh_past_deadline(shost)) { +			/* push back on work queue for further processing */ +			list_splice_init(&check_list, work_q); +			list_splice_init(&tmp_list, work_q); +			SCSI_LOG_ERROR_RECOVERY(3, +				shost_printk(KERN_INFO, shost, +					    "skip %s, past eh deadline\n", +					     __func__)); +			return list_empty(work_q); +		} +  		scmd = list_entry(tmp_list.next, struct scsi_cmnd, eh_entry);  		id = scmd_id(scmd); @@ -1356,6 +1582,15 @@ static int scsi_eh_bus_reset(struct Scsi_Host *shost,  	 */  	for (channel = 0; channel <= shost->max_channel; channel++) { +		if (scsi_host_eh_past_deadline(shost)) { +			list_splice_init(&check_list, work_q); +			SCSI_LOG_ERROR_RECOVERY(3, +				shost_printk(KERN_INFO, shost, +					    "skip %s, past eh deadline\n", +					     __func__)); +			return list_empty(work_q); +		} +  		chan_scmd = NULL;  		list_for_each_entry(scmd, work_q, eh_entry) {  			if (channel == scmd_channel(scmd)) { @@ -1455,7 +1690,7 @@ static void scsi_eh_offline_sdevs(struct list_head *work_q,  }  /** - * scsi_noretry_cmd - determinte if command should be failed fast + * scsi_noretry_cmd - determine if command should be failed fast   * @scmd:	SCSI cmd to examine.   */  int scsi_noretry_cmd(struct scsi_cmnd *scmd) @@ -1463,6 +1698,8 @@ int scsi_noretry_cmd(struct scsi_cmnd *scmd)  	switch (host_byte(scmd->result)) {  	case DID_OK:  		break; +	case DID_TIME_OUT: +		goto check_type;  	case DID_BUS_BUSY:  		return (scmd->request->cmd_flags & REQ_FAILFAST_TRANSPORT);  	case DID_PARITY: @@ -1476,18 +1713,19 @@ int scsi_noretry_cmd(struct scsi_cmnd *scmd)  		return (scmd->request->cmd_flags & REQ_FAILFAST_DRIVER);  	} -	switch (status_byte(scmd->result)) { -	case CHECK_CONDITION: -		/* -		 * assume caller has checked sense and determinted -		 * the check condition was retryable. -		 */ -		if (scmd->request->cmd_flags & REQ_FAILFAST_DEV || -		    scmd->request->cmd_type == REQ_TYPE_BLOCK_PC) -			return 1; -	} +	if (status_byte(scmd->result) != CHECK_CONDITION) +		return 0; -	return 0; +check_type: +	/* +	 * assume caller has checked sense and determined +	 * the check condition was retryable. +	 */ +	if (scmd->request->cmd_flags & REQ_FAILFAST_DEV || +	    scmd->request->cmd_type == REQ_TYPE_BLOCK_PC) +		return 1; +	else +		return 0;  }  /** @@ -1537,9 +1775,13 @@ int scsi_decide_disposition(struct scsi_cmnd *scmd)  		 * looks good.  drop through, and check the next byte.  		 */  		break; +	case DID_ABORT: +		if (scmd->eh_eflags & SCSI_EH_ABORT_SCHEDULED) { +			set_host_byte(scmd, DID_TIME_OUT); +			return SUCCESS; +		}  	case DID_NO_CONNECT:  	case DID_BAD_TARGET: -	case DID_ABORT:  		/*  		 * note - this means that we just report the status back  		 * to the top level driver, not that we actually think @@ -1710,6 +1952,8 @@ static void scsi_eh_lock_door(struct scsi_device *sdev)  	 */  	req = blk_get_request(sdev->request_queue, READ, GFP_KERNEL); +	blk_rq_set_block_pc(req); +  	req->cmd[0] = ALLOW_MEDIUM_REMOVAL;  	req->cmd[1] = 0;  	req->cmd[2] = 0; @@ -1719,7 +1963,6 @@ static void scsi_eh_lock_door(struct scsi_device *sdev)  	req->cmd_len = COMMAND_SIZE(req->cmd[0]); -	req->cmd_type = REQ_TYPE_BLOCK_PC;  	req->cmd_flags |= REQ_QUIET;  	req->timeout = 10 * HZ;  	req->retries = 5; @@ -1755,8 +1998,9 @@ static void scsi_restart_operations(struct Scsi_Host *shost)  	 * will be requests for character device operations, and also for  	 * ioctls to queued block devices.  	 */ -	SCSI_LOG_ERROR_RECOVERY(3, printk("%s: waking up host to restart\n", -					  __func__)); +	SCSI_LOG_ERROR_RECOVERY(3, +		printk("scsi_eh_%d waking up host to restart\n", +		       shost->host_no));  	spin_lock_irqsave(shost->host_lock, flags);  	if (scsi_host_set_state(shost, SHOST_RUNNING)) @@ -1883,6 +2127,10 @@ static void scsi_unjam_host(struct Scsi_Host *shost)  		if (!scsi_eh_abort_cmds(&eh_work_q, &eh_done_q))  			scsi_eh_ready_devs(shost, &eh_work_q, &eh_done_q); +	spin_lock_irqsave(shost->host_lock, flags); +	if (shost->eh_deadline != -1) +		shost->last_reset = 0; +	spin_unlock_irqrestore(shost->host_lock, flags);  	scsi_eh_flush_done_q(&eh_done_q);  } @@ -1909,7 +2157,7 @@ int scsi_error_handler(void *data)  		if ((shost->host_failed == 0 && shost->host_eh_scheduled == 0) ||  		    shost->host_failed != shost->host_busy) {  			SCSI_LOG_ERROR_RECOVERY(1, -				printk("Error handler scsi_eh_%d sleeping\n", +				printk("scsi_eh_%d: sleeping\n",  					shost->host_no));  			schedule();  			continue; @@ -1917,8 +2165,9 @@ int scsi_error_handler(void *data)  		__set_current_state(TASK_RUNNING);  		SCSI_LOG_ERROR_RECOVERY(1, -			printk("Error handler scsi_eh_%d waking up\n", -				shost->host_no)); +			printk("scsi_eh_%d: waking up %d/%d/%d\n", +			       shost->host_no, shost->host_eh_scheduled, +			       shost->host_failed, shost->host_busy));  		/*  		 * We have a host that is failing for some reason.  Figure out @@ -2053,7 +2302,18 @@ scsi_reset_provider(struct scsi_device *dev, int flag)  	if (scsi_autopm_get_host(shost) < 0)  		return FAILED; +	if (!get_device(&dev->sdev_gendev)) { +		rtn = FAILED; +		goto out_put_autopm_host; +	} +  	scmd = scsi_get_command(dev, GFP_KERNEL); +	if (!scmd) { +		rtn = FAILED; +		put_device(&dev->sdev_gendev); +		goto out_put_autopm_host; +	} +  	blk_rq_init(NULL, &req);  	scmd->request = &req; @@ -2110,6 +2370,7 @@ scsi_reset_provider(struct scsi_device *dev, int flag)  	scsi_run_host_queues(shost);  	scsi_next_command(scmd); +out_put_autopm_host:  	scsi_autopm_put_host(shost);  	return rtn;  }  | 
