diff mbox

[v7,2/3] qmp event: Refactor QUORUM_REPORT_BAD

Message ID 1456450742-13905-3-git-send-email-xiecl.fnst@cn.fujitsu.com (mailing list archive)
State New, archived
Headers show

Commit Message

Changlong Xie Feb. 26, 2016, 1:39 a.m. UTC
Introduce QuorumOpType, and make QUORUM_REPORT_BAD compatible
with it.

Cc: Dr. David Alan Gilbert <dgilbert@redhat.com>
Cc: Wen Congyang <wency@cn.fujitsu.com>
Signed-off-by: Wen Congyang <wency@cn.fujitsu.com>
Signed-off-by: Changlong Xie <xiecl.fnst@cn.fujitsu.com>
---
 block/quorum.c      | 17 ++++++++++++-----
 docs/qmp-events.txt | 11 ++++++++++-
 qapi/block.json     | 16 ++++++++++++++++
 qapi/event.json     |  4 +++-
 4 files changed, 41 insertions(+), 7 deletions(-)

Comments

Alberto Garcia Feb. 26, 2016, 7:33 a.m. UTC | #1
On Fri 26 Feb 2016 02:39:01 AM CET, Changlong Xie wrote:
> Introduce QuorumOpType, and make QUORUM_REPORT_BAD compatible
> with it.
>
> Cc: Dr. David Alan Gilbert <dgilbert@redhat.com>
> Cc: Wen Congyang <wency@cn.fujitsu.com>
> Signed-off-by: Wen Congyang <wency@cn.fujitsu.com>
> Signed-off-by: Changlong Xie <xiecl.fnst@cn.fujitsu.com>

Reviewed-by: Alberto Garcia <berto@igalia.com>

Berto
Eric Blake March 1, 2016, 3:57 p.m. UTC | #2
On 02/25/2016 06:39 PM, Changlong Xie wrote:
> Introduce QuorumOpType, and make QUORUM_REPORT_BAD compatible
> with it.
> 
> Cc: Dr. David Alan Gilbert <dgilbert@redhat.com>
> Cc: Wen Congyang <wency@cn.fujitsu.com>
> Signed-off-by: Wen Congyang <wency@cn.fujitsu.com>
> Signed-off-by: Changlong Xie <xiecl.fnst@cn.fujitsu.com>
> ---

> +Read operation:
>  { "event": "QUORUM_REPORT_BAD",
> -     "data": { "node-name": "node0", "sector-num": 345435, "sectors-count": 5 },
> +     "data": { "node-name": "node0", "sector-num": 345435, "sectors-count": 5,
> +               "type": "read" },
>       "timestamp": { "seconds": 1344522075, "microseconds": 745528 } }
>  
> +Flush operation:
> +{ "event": "QUORUM_REPORT_BAD",
> +     "data": { "node-name": "node0", "sector-num": 0, "sectors-count": 2097120,
> +               "type": "flush", "error": "Broken pipe" },
> +     "timestamp": { "seconds": 1456406829, "microseconds": 291763 } }
> +
>  Note: this event is rate-limited.

Question - do we care if rate limiting masks one type of failure due to
another?  Or put another way, are we okay with a single rate-limiting
queue for all three types, or do we want three queues?  Also, shouldn't
this have a queue per child node (I don't want to be flooded with
multiple notifications in one second that child1 has failed, but I _do_
want notifications if both child1 and child2 fail in the same second).

But that's for future patches to change; it does not need to hold up the
current series.
Alberto Garcia March 3, 2016, 2:48 p.m. UTC | #3
>> +Read operation:
>>  { "event": "QUORUM_REPORT_BAD",
>> -     "data": { "node-name": "node0", "sector-num": 345435, "sectors-count": 5 },
>> +     "data": { "node-name": "node0", "sector-num": 345435, "sectors-count": 5,
>> +               "type": "read" },
>>       "timestamp": { "seconds": 1344522075, "microseconds": 745528 } }
>>  
>> +Flush operation:
>> +{ "event": "QUORUM_REPORT_BAD",
>> +     "data": { "node-name": "node0", "sector-num": 0, "sectors-count": 2097120,
>> +               "type": "flush", "error": "Broken pipe" },
>> +     "timestamp": { "seconds": 1456406829, "microseconds": 291763 } }
>> +
>>  Note: this event is rate-limited.
>
> Question - do we care if rate limiting masks one type of failure due
> to another?  Or put another way, are we okay with a single
> rate-limiting queue for all three types, or do we want three queues?
> Also, shouldn't this have a queue per child node (I don't want to be
> flooded with multiple notifications in one second that child1 has
> failed, but I _do_ want notifications if both child1 and child2 fail
> in the same second).

I think you are right, thanks for pointing it out.

Berto
diff mbox

Patch

diff --git a/block/quorum.c b/block/quorum.c
index 11cc60b..8f83574 100644
--- a/block/quorum.c
+++ b/block/quorum.c
@@ -215,14 +215,16 @@  static QuorumAIOCB *quorum_aio_get(BDRVQuorumState *s,
     return acb;
 }
 
-static void quorum_report_bad(QuorumAIOCB *acb, char *node_name, int ret)
+static void quorum_report_bad(QuorumOpType type, uint64_t sector_num,
+                              int nb_sectors, char *node_name, int ret)
 {
     const char *msg = NULL;
     if (ret < 0) {
         msg = strerror(-ret);
     }
-    qapi_event_send_quorum_report_bad(!!msg, msg, node_name,
-                                      acb->sector_num, acb->nb_sectors, &error_abort);
+
+    qapi_event_send_quorum_report_bad(type, !!msg, msg, node_name,
+                                      sector_num, nb_sectors, &error_abort);
 }
 
 static void quorum_report_failure(QuorumAIOCB *acb)
@@ -282,6 +284,7 @@  static void quorum_aio_cb(void *opaque, int ret)
     QuorumChildRequest *sacb = opaque;
     QuorumAIOCB *acb = sacb->parent;
     BDRVQuorumState *s = acb->common.bs->opaque;
+    QuorumOpType type;
     bool rewrite = false;
 
     if (acb->is_read && s->read_pattern == QUORUM_READ_PATTERN_FIFO) {
@@ -300,12 +303,14 @@  static void quorum_aio_cb(void *opaque, int ret)
         return;
     }
 
+    type = acb->is_read ? QUORUM_OP_TYPE_READ : QUORUM_OP_TYPE_WRITE;
     sacb->ret = ret;
     acb->count++;
     if (ret == 0) {
         acb->success_count++;
     } else {
-        quorum_report_bad(acb, sacb->aiocb->bs->node_name, ret);
+        quorum_report_bad(type, acb->sector_num, acb->nb_sectors,
+                          sacb->aiocb->bs->node_name, ret);
     }
     assert(acb->count <= s->num_children);
     assert(acb->success_count <= s->num_children);
@@ -338,7 +343,9 @@  static void quorum_report_bad_versions(BDRVQuorumState *s,
             continue;
         }
         QLIST_FOREACH(item, &version->items, next) {
-            quorum_report_bad(acb, s->children[item->index]->bs->node_name, 0);
+            quorum_report_bad(QUORUM_OP_TYPE_READ, acb->sector_num,
+                              acb->nb_sectors,
+                              s->children[item->index]->bs->node_name, 0);
         }
     }
 }
diff --git a/docs/qmp-events.txt b/docs/qmp-events.txt
index d6b9aea..fa7574d 100644
--- a/docs/qmp-events.txt
+++ b/docs/qmp-events.txt
@@ -325,6 +325,7 @@  Emitted to report a corruption of a Quorum file.
 
 Data:
 
+- "type":          Quorum operation type
 - "error":         Error message (json-string, optional)
                    Only present on failure.  This field contains a human-readable
                    error message.  There are no semantics other than that the
@@ -336,10 +337,18 @@  Data:
 
 Example:
 
+Read operation:
 { "event": "QUORUM_REPORT_BAD",
-     "data": { "node-name": "node0", "sector-num": 345435, "sectors-count": 5 },
+     "data": { "node-name": "node0", "sector-num": 345435, "sectors-count": 5,
+               "type": "read" },
      "timestamp": { "seconds": 1344522075, "microseconds": 745528 } }
 
+Flush operation:
+{ "event": "QUORUM_REPORT_BAD",
+     "data": { "node-name": "node0", "sector-num": 0, "sectors-count": 2097120,
+               "type": "flush", "error": "Broken pipe" },
+     "timestamp": { "seconds": 1456406829, "microseconds": 291763 } }
+
 Note: this event is rate-limited.
 
 RESET
diff --git a/qapi/block.json b/qapi/block.json
index 58e6b30..937337d 100644
--- a/qapi/block.json
+++ b/qapi/block.json
@@ -196,3 +196,19 @@ 
 ##
 { 'event': 'DEVICE_TRAY_MOVED',
   'data': { 'device': 'str', 'tray-open': 'bool' } }
+
+##
+# @QuorumOpType
+#
+# An enumeration of the quorum operation types
+#
+# @read: read operation
+#
+# @write: write operation
+#
+# @flush: flush operation
+#
+# Since: 2.6
+##
+{ 'enum': 'QuorumOpType',
+  'data': [ 'read', 'write', 'flush' ] }
diff --git a/qapi/event.json b/qapi/event.json
index 1a45a6c..8642052 100644
--- a/qapi/event.json
+++ b/qapi/event.json
@@ -325,6 +325,8 @@ 
 #
 # Emitted to report a corruption of a Quorum file
 #
+# @type: quorum operation type (Since 2.6)
+#
 # @error: #optional, error message. Only present on failure. This field
 #         contains a human-readable error message. There are no semantics other
 #         than that the block layer reported an error and clients should not
@@ -339,7 +341,7 @@ 
 # Since: 2.0
 ##
 { 'event': 'QUORUM_REPORT_BAD',
-  'data': { '*error': 'str', 'node-name': 'str',
+  'data': { 'type': 'QuorumOpType', '*error': 'str', 'node-name': 'str',
             'sector-num': 'int', 'sectors-count': 'int' } }
 
 ##