diff mbox series

[11/15] s390-bios: cio error handling

Message ID 1548768562-20007-12-git-send-email-jjherne@linux.ibm.com (mailing list archive)
State New, archived
Headers show
Series s390: vfio-ccw dasd ipl support | expand

Commit Message

Jason J. Herne Jan. 29, 2019, 1:29 p.m. UTC
Add verbose error output for when unexpected i/o errors happen. This eases the
burden of debugging and reporting i/o errors. No error information is printed
in the success case, here is an example of what is output on error:

vfio-ccw device I/O error - Interrupt Response Block Data:
    Function Ctrl : [Start]
    Activity Ctrl : [Start-Pending]
    Status Ctrl : [Alert] [Primary] [Secondary] [Status-Pending]
    Device Status : [Unit-Check]
    Channel Status :
    cpa=: 0x0000000001e67098
    prev_ccw=: 0x0000000000000000
    this_ccw=: 0x0000000000000000

Sense Data (fmt 32-bytes):
    Sense Condition Flags : [Equipment-Check]
    Residual Count     =: 0x0000000000000000
    Phys Drive ID      =: 0x000000000000009e
    low cyl address    =: 0x0000000000000000
    head addr & hi cyl =: 0x0000000000000000
    format/message     =: 0x0000000000000008
    fmt-dependent[0-7] =: 0x0000000000000004
    fmt-dependent[8-15]=: 0xe561282305082fff
    prog action code   =: 0x0000000000000016
    Configuration info =: 0x00000000000040e0
    mcode / hi-cyl     =: 0x0000000000000000
    cyl & head addr [0]=: 0x0000000000000000
    cyl & head addr [1]=: 0x0000000000000000
    cyl & head addr [2]=: 0x0000000000000000

Signed-off-by: Jason J. Herne <jjherne@linux.ibm.com>

# Conflicts:
#	pc-bios/s390-ccw/cio.c

Signed-off-by: Jason J. Herne <jjherne@linux.ibm.com>
---
 pc-bios/s390-ccw/cio.c  | 225 ++++++++++++++++++++++++++++++++++++++++++++++++
 pc-bios/s390-ccw/libc.h |  11 +++
 2 files changed, 236 insertions(+)

Comments

Cornelia Huck Feb. 4, 2019, 11:41 a.m. UTC | #1
On Tue, 29 Jan 2019 08:29:18 -0500
"Jason J. Herne" <jjherne@linux.ibm.com> wrote:

> Add verbose error output for when unexpected i/o errors happen. This eases the
> burden of debugging and reporting i/o errors. No error information is printed
> in the success case, here is an example of what is output on error:
> 
> vfio-ccw device I/O error - Interrupt Response Block Data:
>     Function Ctrl : [Start]
>     Activity Ctrl : [Start-Pending]
>     Status Ctrl : [Alert] [Primary] [Secondary] [Status-Pending]
>     Device Status : [Unit-Check]
>     Channel Status :
>     cpa=: 0x0000000001e67098
>     prev_ccw=: 0x0000000000000000
>     this_ccw=: 0x0000000000000000
> 
> Sense Data (fmt 32-bytes):
>     Sense Condition Flags : [Equipment-Check]
>     Residual Count     =: 0x0000000000000000
>     Phys Drive ID      =: 0x000000000000009e
>     low cyl address    =: 0x0000000000000000
>     head addr & hi cyl =: 0x0000000000000000
>     format/message     =: 0x0000000000000008
>     fmt-dependent[0-7] =: 0x0000000000000004
>     fmt-dependent[8-15]=: 0xe561282305082fff
>     prog action code   =: 0x0000000000000016
>     Configuration info =: 0x00000000000040e0
>     mcode / hi-cyl     =: 0x0000000000000000
>     cyl & head addr [0]=: 0x0000000000000000
>     cyl & head addr [1]=: 0x0000000000000000
>     cyl & head addr [2]=: 0x0000000000000000

Looks cool.

> 
> Signed-off-by: Jason J. Herne <jjherne@linux.ibm.com>
> 
> # Conflicts:
> #	pc-bios/s390-ccw/cio.c

You probably don't want to keep that :)

> 
> Signed-off-by: Jason J. Herne <jjherne@linux.ibm.com>
> ---
>  pc-bios/s390-ccw/cio.c  | 225 ++++++++++++++++++++++++++++++++++++++++++++++++
>  pc-bios/s390-ccw/libc.h |  11 +++
>  2 files changed, 236 insertions(+)
> 

(...)

> +static void print_irb_err(Irb *irb)
> +{
> +    Ccw0 *this_ccw = u32toptr(irb->scsw.cpa);
> +    Ccw0 *prev_ccw = u32toptr(irb->scsw.cpa - 8);

I don't think you can cast this conditionally to format 0 -- I'd pass
in the format from do_cio and handle it accordingly.

> +    char msgline[256];
> +
> +    sclp_print("vfio-ccw device I/O error - Interrupt Response Block Data:\n");

If you call this from the generic function, you shouldn't talk about
vfio-ccw here; but it might make sense to print subchannel/devno and
the cu type.

(...)

> @@ -148,6 +370,9 @@ int do_cio(SubChannelId schid, uint32_t ccw_addr, int fmt)
>              continue;
>          }
>  
> +        print_irb_err(&irb);
> +        basic_sense(schid, &sd, sizeof(sd));
> +        print_eckd_dasd_sense_data(&sd);

I think this should only be printed for actual dasds (and maybe only
print it if there is actually sense data available)?

>          break;
>      }
>
Jason J. Herne Feb. 28, 2019, 3:59 p.m. UTC | #2
On 2/4/19 6:41 AM, Cornelia Huck wrote:
...
> 
>> +static void print_irb_err(Irb *irb)
>> +{
>> +    Ccw0 *this_ccw = u32toptr(irb->scsw.cpa);
>> +    Ccw0 *prev_ccw = u32toptr(irb->scsw.cpa - 8);
> 
> I don't think you can cast this conditionally to format 0 -- I'd pass
> in the format from do_cio and handle it accordingly.
> 
>> +    char msgline[256];
>> +
>> +    sclp_print("vfio-ccw device I/O error - Interrupt Response Block Data:\n");
> 
> If you call this from the generic function, you shouldn't talk about
> vfio-ccw here; but it might make sense to print subchannel/devno and
> the cu type.
> 
> (...)
>

Fixed for v3.


>> @@ -148,6 +370,9 @@ int do_cio(SubChannelId schid, uint32_t ccw_addr, int fmt)
>>               continue;
>>           }
>>   
>> +        print_irb_err(&irb);
>> +        basic_sense(schid, &sd, sizeof(sd));
>> +        print_eckd_dasd_sense_data(&sd);
> 
> I think this should only be printed for actual dasds (and maybe only
> print it if there is actually sense data available)?
> 

Good point. Currently do_cio doesn't have any info on its device type. Should I pass in 
the controller type? Thats the easiest way I can think of to make the decision.
Cornelia Huck Feb. 28, 2019, 4:11 p.m. UTC | #3
On Thu, 28 Feb 2019 10:59:13 -0500
"Jason J. Herne" <jjherne@linux.ibm.com> wrote:

> On 2/4/19 6:41 AM, Cornelia Huck wrote:

> >> @@ -148,6 +370,9 @@ int do_cio(SubChannelId schid, uint32_t ccw_addr, int fmt)
> >>               continue;
> >>           }
> >>   
> >> +        print_irb_err(&irb);
> >> +        basic_sense(schid, &sd, sizeof(sd));
> >> +        print_eckd_dasd_sense_data(&sd);  
> > 
> > I think this should only be printed for actual dasds (and maybe only
> > print it if there is actually sense data available)?
> >   
> 
> Good point. Currently do_cio doesn't have any info on its device type. Should I pass in 
> the controller type? Thats the easiest way I can think of to make the decision.
> 

Sounds reasonable to me!
diff mbox series

Patch

diff --git a/pc-bios/s390-ccw/cio.c b/pc-bios/s390-ccw/cio.c
index 63581c6..bd6051b 100644
--- a/pc-bios/s390-ccw/cio.c
+++ b/pc-bios/s390-ccw/cio.c
@@ -79,6 +79,228 @@  static bool irb_error(Irb *irb)
     return irb->scsw.dstat != (SCSW_DSTAT_DEVEND | SCSW_DSTAT_CHEND);
 }
 
+static void print_eckd_dasd_sense_data(sense_data_eckd_dasd *sd)
+{
+    char msgline[512];
+
+    if (sd->config_info & 0x8000) {
+        sclp_print("Eckd Dasd Sense Data (fmt 24-bytes):\n");
+    } else {
+        sclp_print("Eckd Dasd Sense Data (fmt 32-bytes):\n");
+    }
+
+    strcat(msgline, "    Sense Condition Flags :");
+    if (sd->status[0] & SNS_STAT0_CMD_REJECT) {
+        strcat(msgline, " [Cmd-Reject]");
+    }
+    if (sd->status[0] & SNS_STAT0_INTERVENTION_REQ) {
+        strcat(msgline, " [Intervention-Required]");
+    }
+    if (sd->status[0] & SNS_STAT0_BUS_OUT_CHECK) {
+        strcat(msgline, " [Bus-Out-Parity-Check]");
+    }
+    if (sd->status[0] & SNS_STAT0_EQUIPMENT_CHECK) {
+        strcat(msgline, " [Equipment-Check]");
+    }
+    if (sd->status[0] & SNS_STAT0_DATA_CHECK) {
+        strcat(msgline, " [Data-Check]");
+    }
+    if (sd->status[0] & SNS_STAT0_OVERRUN) {
+        strcat(msgline, " [Overrun]");
+    }
+    if (sd->status[0] & SNS_STAT0_INCOMPL_DOMAIN) {
+        strcat(msgline, " [Incomplete-Domain]");
+    }
+
+    if (sd->status[1] & SNS_STAT1_PERM_ERR) {
+        strcat(msgline, " [Permanent-Error]");
+    }
+    if (sd->status[1] & SNS_STAT1_INV_TRACK_FORMAT) {
+        strcat(msgline, " [Invalid-Track-Fmt]");
+    }
+    if (sd->status[1] & SNS_STAT1_EOC) {
+        strcat(msgline, " [End-of-Cyl]");
+    }
+    if (sd->status[1] & SNS_STAT1_MESSAGE_TO_OPER) {
+        strcat(msgline, " [Operator-Msg]");
+    }
+    if (sd->status[1] & SNS_STAT1_NO_REC_FOUND) {
+        strcat(msgline, " [No-Record-Found]");
+    }
+    if (sd->status[1] & SNS_STAT1_FILE_PROTECTED) {
+        strcat(msgline, " [File-Protected]");
+    }
+    if (sd->status[1] & SNS_STAT1_WRITE_INHIBITED) {
+        strcat(msgline, " [Write-Inhibited]");
+    }
+    if (sd->status[1] & SNS_STAT1_IMPRECISE_END) {
+        strcat(msgline, " [Imprecise-Ending]");
+    }
+
+    if (sd->status[2] & SNS_STAT2_REQ_INH_WRITE) {
+        strcat(msgline, " [Req-Inhibit-Write]");
+    }
+    if (sd->status[2] & SNS_STAT2_CORRECTABLE) {
+        strcat(msgline, " [Correctable-Data-Check]");
+    }
+    if (sd->status[2] & SNS_STAT2_FIRST_LOG_ERR) {
+        strcat(msgline, " [First-Error-Log]");
+    }
+    if (sd->status[2] & SNS_STAT2_ENV_DATA_PRESENT) {
+        strcat(msgline, " [Env-Data-Present]");
+    }
+    if (sd->status[2] & SNS_STAT2_IMPRECISE_END) {
+        strcat(msgline, " [Imprecise-End]");
+    }
+    strcat(msgline, "\n");
+    sclp_print(msgline);
+
+    print_int("    Residual Count     =", sd->res_count);
+    print_int("    Phys Drive ID      =", sd->phys_drive_id);
+    print_int("    low cyl address    =", sd->low_cyl_addr);
+    print_int("    head addr & hi cyl =", sd->head_high_cyl_addr);
+    print_int("    format/message     =", sd->fmt_msg);
+    print_int("    fmt-dependent[0-7] =", sd->fmt_dependent_info[0]);
+    print_int("    fmt-dependent[8-15]=", sd->fmt_dependent_info[1]);
+    print_int("    prog action code   =", sd->program_action_code);
+    print_int("    Configuration info =", sd->config_info);
+    print_int("    mcode / hi-cyl     =", sd->mcode_hicyl);
+    print_int("    cyl & head addr [0]=", sd->cyl_head_addr[0]);
+    print_int("    cyl & head addr [1]=", sd->cyl_head_addr[1]);
+    print_int("    cyl & head addr [2]=", sd->cyl_head_addr[2]);
+}
+
+static void print_irb_err(Irb *irb)
+{
+    Ccw0 *this_ccw = u32toptr(irb->scsw.cpa);
+    Ccw0 *prev_ccw = u32toptr(irb->scsw.cpa - 8);
+    char msgline[256];
+
+    sclp_print("vfio-ccw device I/O error - Interrupt Response Block Data:\n");
+
+    strcat(msgline, "    Function Ctrl :");
+    if (irb->scsw.ctrl & SCSW_FCTL_START_FUNC) {
+        strcat(msgline, " [Start]");
+    }
+    if (irb->scsw.ctrl & SCSW_FCTL_HALT_FUNC) {
+        strcat(msgline, " [Halt]");
+    }
+    if (irb->scsw.ctrl & SCSW_FCTL_CLEAR_FUNC) {
+        strcat(msgline, " [Clear]");
+    }
+    strcat(msgline, "\n");
+    sclp_print(msgline);
+
+    msgline[0] = '\0';
+    strcat(msgline, "    Activity Ctrl :");
+    if (irb->scsw.ctrl & SCSW_ACTL_RESUME_PEND) {
+        strcat(msgline, " [Resume-Pending]");
+    }
+    if (irb->scsw.ctrl & SCSW_ACTL_START_PEND) {
+        strcat(msgline, " [Start-Pending]");
+    }
+    if (irb->scsw.ctrl & SCSW_ACTL_HALT_PEND) {
+        strcat(msgline, " [Halt-Pending]");
+    }
+    if (irb->scsw.ctrl & SCSW_ACTL_CLEAR_PEND) {
+        strcat(msgline, " [Clear-Pending]");
+    }
+    if (irb->scsw.ctrl & SCSW_ACTL_CH_ACTIVE) {
+        strcat(msgline, " [Channel-Active]");
+    }
+    if (irb->scsw.ctrl & SCSW_ACTL_DEV_ACTIVE) {
+        strcat(msgline, " [Device-Active]");
+    }
+    if (irb->scsw.ctrl & SCSW_ACTL_SUSPENDED) {
+        strcat(msgline, " [Suspended]");
+    }
+    strcat(msgline, "\n");
+    sclp_print(msgline);
+
+    msgline[0] = '\0';
+    strcat(msgline, "    Status Ctrl :");
+    if (irb->scsw.ctrl & SCSW_SCTL_ALERT) {
+        strcat(msgline, " [Alert]");
+    }
+    if (irb->scsw.ctrl & SCSW_SCTL_INTERMED) {
+        strcat(msgline, " [Intermediate]");
+    }
+    if (irb->scsw.ctrl & SCSW_SCTL_PRIMARY) {
+        strcat(msgline, " [Primary]");
+    }
+    if (irb->scsw.ctrl & SCSW_SCTL_SECONDARY) {
+        strcat(msgline, " [Secondary]");
+    }
+    if (irb->scsw.ctrl & SCSW_SCTL_STATUS_PEND) {
+        strcat(msgline, " [Status-Pending]");
+    }
+
+    strcat(msgline, "\n");
+    sclp_print(msgline);
+
+    msgline[0] = '\0';
+    strcat(msgline, "    Device Status :");
+    if (irb->scsw.dstat & SCSW_DSTAT_ATTN) {
+        strcat(msgline, " [Attention]");
+    }
+    if (irb->scsw.dstat & SCSW_DSTAT_STATMOD) {
+        strcat(msgline, " [Status-Modifier]");
+    }
+    if (irb->scsw.dstat & SCSW_DSTAT_CUEND) {
+        strcat(msgline, " [Ctrl-Unit-End]");
+    }
+    if (irb->scsw.dstat & SCSW_DSTAT_BUSY) {
+        strcat(msgline, " [Busy]");
+    }
+    if (irb->scsw.dstat & SCSW_DSTAT_CHEND) {
+        strcat(msgline, " [Channel-End]");
+    }
+    if (irb->scsw.dstat & SCSW_DSTAT_DEVEND) {
+        strcat(msgline, " [Device-End]");
+    }
+    if (irb->scsw.dstat & SCSW_DSTAT_UCHK) {
+        strcat(msgline, " [Unit-Check]");
+    }
+    if (irb->scsw.dstat & SCSW_DSTAT_UEXCP) {
+        strcat(msgline, " [Unit-Exception]");
+    }
+    strcat(msgline, "\n");
+    sclp_print(msgline);
+
+    msgline[0] = '\0';
+    strcat(msgline, "    Channel Status :");
+    if (irb->scsw.cstat & SCSW_CSTAT_PCINT) {
+        strcat(msgline, " [Program-Ctrl-Interruption]");
+    }
+    if (irb->scsw.cstat & SCSW_CSTAT_BADLEN) {
+        strcat(msgline, " [Incorrect-Length]");
+    }
+    if (irb->scsw.cstat & SCSW_CSTAT_PROGCHK) {
+        strcat(msgline, " [Program-Check]");
+    }
+    if (irb->scsw.cstat & SCSW_CSTAT_PROTCHK) {
+        strcat(msgline, " [Protection-Check]");
+    }
+    if (irb->scsw.cstat & SCSW_CSTAT_CHDCHK) {
+        strcat(msgline, " [Channel-Data-Check]");
+    }
+    if (irb->scsw.cstat & SCSW_CSTAT_CHCCHK) {
+        strcat(msgline, " [Channel-Ctrl-Check]");
+    }
+    if (irb->scsw.cstat & SCSW_CSTAT_ICCHK) {
+        strcat(msgline, " [Interface-Ctrl-Check]");
+    }
+    if (irb->scsw.cstat & SCSW_CSTAT_CHAINCHK) {
+        strcat(msgline, " [Chaining-Check]");
+    }
+    strcat(msgline, "\n");
+    sclp_print(msgline);
+
+    print_int("    cpa=", irb->scsw.cpa);
+    print_int("    prev_ccw=", *((uint64_t *)prev_ccw));
+    print_int("    this_ccw=", *((uint64_t *)this_ccw));
+}
+
 /*
  * Executes a channel program at a given subchannel. The request to run the
  * channel program is sent to the subchannel, we then wait for the interrupt
@@ -148,6 +370,9 @@  int do_cio(SubChannelId schid, uint32_t ccw_addr, int fmt)
             continue;
         }
 
+        print_irb_err(&irb);
+        basic_sense(schid, &sd, sizeof(sd));
+        print_eckd_dasd_sense_data(&sd);
         break;
     }
 
diff --git a/pc-bios/s390-ccw/libc.h b/pc-bios/s390-ccw/libc.h
index e198f0b..01b5de0 100644
--- a/pc-bios/s390-ccw/libc.h
+++ b/pc-bios/s390-ccw/libc.h
@@ -79,6 +79,17 @@  static inline size_t strlen(const char *str)
     return i;
 }
 
+static inline char *strcat(char *dest, const char *src)
+{
+    int i;
+    char *dest_end = dest + strlen(dest);
+
+    for (i = 0; i <= strlen(src); i++) {
+        dest_end[i] = src[i];
+    }
+    return dest;
+}
+
 static inline int isdigit(int c)
 {
     return (c >= '0') && (c <= '9');