Linux v2.1.105 changed the algorithm for polling for the BSY signal in NCR5380_select() and NCR5380_main(). Presently, this code has a bug. Back then, NCR5380_set_timer(hostdata, 1) meant reschedule main() after sleeping for 10 ms. Repeated 25 times this provided the recommended 250 ms selection time-out delay. This got broken when HZ became configurable. We could fix this but there's no need to reschedule the main loop. This BSY polling presently happens when the NCR5380_main() work queue item calls NCR5380_select(), which in turn schedules NCR5380_main(), which calls NCR5380_select() again, and so on. This algorithm is a deviation from the simpler one in atari_NCR5380.c. The extra complexity and state is pointless. There's no reason to stop selection half-way and return to to the main loop when the main loop can do nothing useful until selection completes. So just poll for BSY. We can sleep while polling now that we have a suitable workqueue. Signed-off-by: Finn Thain --- drivers/scsi/NCR5380.c | 74 ++++++++----------------------------------- drivers/scsi/NCR5380.h | 2 - drivers/scsi/atari_NCR5380.c | 49 ++++++++-------------------- 3 files changed, 29 insertions(+), 96 deletions(-) Index: linux/drivers/scsi/NCR5380.c =================================================================== --- linux.orig/drivers/scsi/NCR5380.c 2015-12-06 12:29:54.000000000 +1100 +++ linux/drivers/scsi/NCR5380.c 2015-12-06 12:29:56.000000000 +1100 @@ -983,7 +983,7 @@ static void NCR5380_main(struct work_str do { /* Lock held here */ done = 1; - if (!hostdata->connected && !hostdata->selecting) { + if (!hostdata->connected) { dprintk(NDEBUG_MAIN, "scsi%d : not connected\n", instance->host_no); /* * Search through the issue_queue for a command destined @@ -1013,9 +1013,6 @@ static void NCR5380_main(struct work_str */ dprintk(NDEBUG_MAIN|NDEBUG_QUEUES, "scsi%d : main() : command for target %d lun %llu removed from issue_queue\n", instance->host_no, tmp->device->id, tmp->device->lun); - hostdata->selecting = NULL; - /* RvC: have to preset this to indicate a new command is being performed */ - /* * REQUEST SENSE commands are issued without tagged * queueing, even on SCSI-II devices because the @@ -1033,26 +1030,13 @@ static void NCR5380_main(struct work_str done = 0; dprintk(NDEBUG_MAIN|NDEBUG_QUEUES, "scsi%d : main(): select() failed, returned to issue_queue\n", instance->host_no); } - if (hostdata->connected || - hostdata->selecting) + if (hostdata->connected) break; /* lock held here still */ } /* if target/lun is not busy */ } /* for */ /* exited locked */ } /* if (!hostdata->connected) */ - if (hostdata->selecting) { - tmp = (struct scsi_cmnd *) hostdata->selecting; - /* Selection will drop and retake the lock */ - if (!NCR5380_select(instance, tmp)) { - /* OK or bad target */ - } else { - LIST(tmp, hostdata->issue_queue); - tmp->host_scribble = (unsigned char *) hostdata->issue_queue; - hostdata->issue_queue = tmp; - done = 0; - } - } /* if hostdata->selecting */ if (hostdata->connected #ifdef REAL_DMA && !hostdata->dmalen @@ -1171,7 +1155,6 @@ static irqreturn_t NCR5380_intr(int dumm * Returns : -1 if selection failed but should be retried. * 0 if selection failed and should not be retried. * 0 if selection succeeded completely (hostdata->connected == cmd). - * 0 if selection in progress (hostdata->selecting == cmd). * * Side effects : * If bus busy, arbitration failed, etc, NCR5380_select() will exit @@ -1195,13 +1178,8 @@ static int NCR5380_select(struct Scsi_Ho unsigned char tmp[3], phase; unsigned char *data; int len; - unsigned long timeout; - unsigned char value; int err; - if (hostdata->selecting) - goto part2; - NCR5380_dprint(NDEBUG_ARBITRATION, instance); dprintk(NDEBUG_ARBITRATION, "scsi%d : starting arbitration, id = %d\n", instance->host_no, instance->this_id); @@ -1337,33 +1315,9 @@ static int NCR5380_select(struct Scsi_Ho * selection. */ - timeout = jiffies + msecs_to_jiffies(250); - - /* - * XXX very interesting - we're seeing a bounce where the BSY we - * asserted is being reflected / still asserted (propagation delay?) - * and it's detecting as true. Sigh. - */ - - hostdata->select_time = 0; /* we count the clock ticks at which we polled */ - hostdata->selecting = cmd; - -part2: - /* RvC: here we enter after a sleeping period, or immediately after - execution of part 1 - we poll only once ech clock tick */ - value = NCR5380_read(STATUS_REG) & (SR_BSY | SR_IO); - - if (!value && (hostdata->select_time < HZ/4)) { - /* RvC: we still must wait for a device response */ - hostdata->select_time++; /* after 25 ticks the device has failed */ - NCR5380_set_timer(hostdata, 1); - return 0; /* RvC: we return here with hostdata->selecting set, - to go to sleep */ - } + err = NCR5380_poll_politely(instance, STATUS_REG, SR_BSY, SR_BSY, + msecs_to_jiffies(250)); - hostdata->selecting = NULL;/* clear this pointer, because we passed the - waiting period */ if ((NCR5380_read(STATUS_REG) & (SR_SEL | SR_IO)) == (SR_SEL | SR_IO)) { NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE); NCR5380_reselect(instance); @@ -1371,6 +1325,17 @@ part2: NCR5380_write(SELECT_ENABLE_REG, hostdata->id_mask); return -1; } + + if (err < 0) { + NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE); + cmd->result = DID_BAD_TARGET << 16; + cmd->scsi_done(cmd); + NCR5380_write(SELECT_ENABLE_REG, hostdata->id_mask); + dprintk(NDEBUG_SELECTION, "scsi%d : target did not respond within 250ms\n", + instance->host_no); + return 0; + } + /* * No less than two deskew delays after the initiator detects the * BSY signal is true, it shall release the SEL signal and may @@ -1381,15 +1346,6 @@ part2: NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE | ICR_ASSERT_ATN); - if (!(NCR5380_read(STATUS_REG) & SR_BSY)) { - NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE); - cmd->result = DID_BAD_TARGET << 16; - cmd->scsi_done(cmd); - NCR5380_write(SELECT_ENABLE_REG, hostdata->id_mask); - dprintk(NDEBUG_SELECTION, "scsi%d : target did not respond within 250ms\n", instance->host_no); - return 0; - } - /* * Since we followed the SCSI spec, and raised ATN while SEL * was true but before BSY was false during selection, the information Index: linux/drivers/scsi/NCR5380.h =================================================================== --- linux.orig/drivers/scsi/NCR5380.h 2015-12-06 12:29:54.000000000 +1100 +++ linux/drivers/scsi/NCR5380.h 2015-12-06 12:29:56.000000000 +1100 @@ -267,8 +267,6 @@ struct NCR5380_hostdata { volatile struct scsi_cmnd *disconnected_queue; /* waiting for reconnect */ int flags; unsigned long time_expires; /* in jiffies, set prior to sleeping */ - int select_time; /* timer in select for target response */ - volatile struct scsi_cmnd *selecting; struct delayed_work coroutine; /* our co-routine */ struct scsi_eh_save ses; char info[256]; Index: linux/drivers/scsi/atari_NCR5380.c =================================================================== --- linux.orig/drivers/scsi/atari_NCR5380.c 2015-12-06 12:29:54.000000000 +1100 +++ linux/drivers/scsi/atari_NCR5380.c 2015-12-06 12:29:56.000000000 +1100 @@ -1426,7 +1426,7 @@ static int NCR5380_select(struct Scsi_Ho unsigned char tmp[3], phase; unsigned char *data; int len; - unsigned long timeout; + int err; unsigned long flags; NCR5380_dprint(NDEBUG_ARBITRATION, instance); @@ -1600,25 +1600,8 @@ static int NCR5380_select(struct Scsi_Ho * selection. */ - timeout = jiffies + msecs_to_jiffies(250); - - /* - * XXX very interesting - we're seeing a bounce where the BSY we - * asserted is being reflected / still asserted (propagation delay?) - * and it's detecting as true. Sigh. - */ - -#if 0 - /* ++roman: If a target conformed to the SCSI standard, it wouldn't assert - * IO while SEL is true. But again, there are some disks out the in the - * world that do that nevertheless. (Somebody claimed that this announces - * reselection capability of the target.) So we better skip that test and - * only wait for BSY... (Famous german words: Der Klügere gibt nach :-) - */ - - while (time_before(jiffies, timeout) && - !(NCR5380_read(STATUS_REG) & (SR_BSY | SR_IO))) - ; + err = NCR5380_poll_politely(instance, STATUS_REG, SR_BSY, SR_BSY, + msecs_to_jiffies(250)); if ((NCR5380_read(STATUS_REG) & (SR_SEL | SR_IO)) == (SR_SEL | SR_IO)) { NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE); @@ -1628,22 +1611,8 @@ static int NCR5380_select(struct Scsi_Ho NCR5380_write(SELECT_ENABLE_REG, hostdata->id_mask); return -1; } -#else - while (time_before(jiffies, timeout) && !(NCR5380_read(STATUS_REG) & SR_BSY)) - ; -#endif - - /* - * No less than two deskew delays after the initiator detects the - * BSY signal is true, it shall release the SEL signal and may - * change the DATA BUS. -wingel - */ - udelay(1); - - NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE | ICR_ASSERT_ATN); - - if (!(NCR5380_read(STATUS_REG) & SR_BSY)) { + if (err < 0) { NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE); cmd->result = DID_BAD_TARGET << 16; #ifdef SUPPORT_TAGS @@ -1656,6 +1625,16 @@ static int NCR5380_select(struct Scsi_Ho } /* + * No less than two deskew delays after the initiator detects the + * BSY signal is true, it shall release the SEL signal and may + * change the DATA BUS. -wingel + */ + + udelay(1); + + NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE | ICR_ASSERT_ATN); + + /* * Since we followed the SCSI spec, and raised ATN while SEL * was true but before BSY was false during selection, the information * transfer phase should be a MESSAGE OUT phase so that we can send the -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/