Message ID | 1460977906-25218-5-git-send-email-zhangchen.fnst@cn.fujitsu.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
On 04/18/2016 07:11 PM, Zhang Chen wrote: > Signed-off-by: Zhang Chen <zhangchen.fnst@cn.fujitsu.com> > Signed-off-by: Li Zhijian <lizhijian@cn.fujitsu.com> > Signed-off-by: Wen Congyang <wency@cn.fujitsu.com> > --- > net/colo-compare.c | 158 +++++++++++++++++++++++++++++++++++++++++++++++++++-- > 1 file changed, 154 insertions(+), 4 deletions(-) I'm not a tcp expert, this patch may need some guys who are good at TCP to review. > diff --git a/net/colo-compare.c b/net/colo-compare.c > index 4b5a2d4..3dad461 100644 > --- a/net/colo-compare.c > +++ b/net/colo-compare.c > @@ -385,9 +385,148 @@ static int colo_packet_compare(Packet *ppkt, Packet *spkt) > } > } > > -static int colo_packet_compare_all(Packet *spkt, Packet *ppkt) > +/* > + * called from the compare thread on the primary > + * for compare tcp packet > + * compare_tcp copied from Dr. David Alan Gilbert's branch > + */ > +static int colo_packet_compare_tcp(Packet *spkt, Packet *ppkt) > +{ > + struct tcphdr *ptcp, *stcp; > + int res; > + char *sdebug, *ddebug; > + ptrdiff_t offset; > + > + trace_colo_compare_main("compare tcp"); > + ptcp = (struct tcphdr *)ppkt->transport_layer; > + stcp = (struct tcphdr *)spkt->transport_layer; > + > + /* Initial is compare the whole packet */ > + offset = 12; /* Hack! Skip virtio header */ So this won't work for e1000 I believe? > + > + if (ptcp->th_flags == stcp->th_flags && > + ((ptcp->th_flags & (TH_ACK | TH_SYN)) == (TH_ACK | TH_SYN))) { > + /* This is the syn/ack response from the guest to an incoming > + * connection; the secondary won't have matched the sequence number > + * Note: We should probably compare the IP level? > + * Note hack: This already has the virtio offset > + */ > + offset = sizeof(ptcp->th_ack) + (void *)&ptcp->th_ack - ppkt->data; > + } > + /* Note - we want to compare everything as long as it's not the syn/ack? */ > + assert(offset > 0); > + assert(spkt->size > offset); > + > + /* The 'identification' field in the IP header is *very* random > + * it almost never matches. Fudge this by ignoring differences in > + * unfragmented packets; they'll normally sort themselves out if different > + * anyway, and it should recover at the TCP level. > + * An alternative would be to get both the primary and secondary to rewrite > + * somehow; but that would need some sync traffic to sync the state This is very tricky I believe, I wonder this will work. Would it be easier if we reassembly the packet? And I believe we should check for the length before all the other examinations? > + */ > + if (ntohs(ppkt->ip->ip_off) & IP_DF) { > + spkt->ip->ip_id = ppkt->ip->ip_id; > + /* and the sum will be different if the IDs were different */ > + spkt->ip->ip_sum = ppkt->ip->ip_sum; > + } > + > + res = memcmp(ppkt->data + offset, spkt->data + offset, > + (spkt->size - offset)); > + > + if (res && DEBUG_TCP_COMPARE) { > + sdebug = strdup(inet_ntoa(ppkt->ip->ip_src)); > + ddebug = strdup(inet_ntoa(ppkt->ip->ip_dst)); > + fprintf(stderr, "%s: src/dst: %s/%s offset=%zd p: seq/ack=%u/%u" > + " s: seq/ack=%u/%u res=%d flags=%x/%x\n", __func__, > + sdebug, ddebug, offset, > + ntohl(ptcp->th_seq), ntohl(ptcp->th_ack), > + ntohl(stcp->th_seq), ntohl(stcp->th_ack), > + res, ptcp->th_flags, stcp->th_flags); > + if (res && (ptcp->th_seq == stcp->th_seq)) { > + trace_colo_compare_with_int("Primary len", ppkt->size); > + colo_dump_packet(ppkt); > + trace_colo_compare_with_int("Secondary len", spkt->size); > + colo_dump_packet(spkt); > + } > + g_free(sdebug); > + g_free(ddebug); > + } > + > + return res; > +} > + > +/* > + * called from the compare thread on the primary > + * for compare udp packet > + */ > +static int colo_packet_compare_udp(Packet *spkt, Packet *ppkt) > +{ > + int ret = 1; > + > + trace_colo_compare_main("compare udp"); > + ret = colo_packet_compare(ppkt, spkt); > + > + if (ret) { > + trace_colo_compare_main("primary udp"); > + colo_dump_packet(ppkt); > + trace_colo_compare_main("secondary udp"); > + colo_dump_packet(spkt); > + } > + > + return ret; > +} > + > +/* > + * called from the compare thread on the primary > + * for compare icmp packet > + */ > +static int colo_packet_compare_icmp(Packet *spkt, Packet *ppkt) > +{ > + int network_length; > + struct icmp *icmp_ppkt, *icmp_spkt; > + > + trace_colo_compare_main("compare icmp"); > + network_length = (ppkt->ip->ip_hl & 0x0F) * 4; > + icmp_ppkt = (struct icmp *)(ppkt->data + network_length + ETH_HLEN); > + icmp_spkt = (struct icmp *)(spkt->data + network_length + ETH_HLEN); > + > + if ((icmp_ppkt->icmp_type == icmp_spkt->icmp_type) && > + (icmp_ppkt->icmp_code == icmp_spkt->icmp_code)) { > + if (icmp_ppkt->icmp_type == ICMP_REDIRECT) { > + if (icmp_ppkt->icmp_gwaddr.s_addr != > + icmp_spkt->icmp_gwaddr.s_addr) { > + trace_colo_compare_main("icmp_gwaddr.s_addr not same"); > + trace_colo_compare_with_char("ppkt s_addr", > + inet_ntoa(icmp_ppkt->icmp_gwaddr)); > + trace_colo_compare_with_char("spkt s_addr", > + inet_ntoa(icmp_spkt->icmp_gwaddr)); > + return -1; > + } > + } else if ((icmp_ppkt->icmp_type == ICMP_UNREACH) && > + (icmp_ppkt->icmp_type == ICMP_UNREACH_NEEDFRAG)) { > + if (icmp_ppkt->icmp_nextmtu != icmp_spkt->icmp_nextmtu) { > + trace_colo_compare_main("icmp_nextmtu not same"); > + trace_colo_compare_with_int("ppkt s_addr", > + icmp_ppkt->icmp_nextmtu); > + trace_colo_compare_with_int("spkt s_addr", > + icmp_spkt->icmp_nextmtu); > + return -1; > + } > + } > + } else { > + return -1; > + } > + > + return 0; > +} > + > +/* > + * called from the compare thread on the primary > + * for compare other packet > + */ > +static int colo_packet_compare_other(Packet *spkt, Packet *ppkt) > { > - trace_colo_compare_main("compare all"); > + trace_colo_compare_main("compare other"); > return colo_packet_compare(ppkt, spkt); > } > > @@ -406,8 +545,19 @@ static void colo_compare_connection(void *opaque, void *user_data) > while (!g_queue_is_empty(&conn->primary_list) && > !g_queue_is_empty(&conn->secondary_list)) { > pkt = g_queue_pop_head(&conn->primary_list); > - result = g_queue_find_custom(&conn->secondary_list, > - pkt, (GCompareFunc)colo_packet_compare_all); > + if (conn->ip_proto == IPPROTO_TCP) { > + result = g_queue_find_custom(&conn->secondary_list, > + pkt, (GCompareFunc)colo_packet_compare_tcp); > + } else if (conn->ip_proto == IPPROTO_UDP) { > + result = g_queue_find_custom(&conn->secondary_list, > + pkt, (GCompareFunc)colo_packet_compare_udp); > + } else if (conn->ip_proto == IPPROTO_ICMP) { > + result = g_queue_find_custom(&conn->secondary_list, > + pkt, (GCompareFunc)colo_packet_compare_icmp); > + } else { > + result = g_queue_find_custom(&conn->secondary_list, > + pkt, (GCompareFunc)colo_packet_compare_other); > + } > A switch...case looks better. > if (result) { > ret = compare_chr_send(pkt->s->chr_out, pkt->data, pkt->size);
* Zhang Chen (zhangchen.fnst@cn.fujitsu.com) wrote: > Signed-off-by: Zhang Chen <zhangchen.fnst@cn.fujitsu.com> > Signed-off-by: Li Zhijian <lizhijian@cn.fujitsu.com> > Signed-off-by: Wen Congyang <wency@cn.fujitsu.com> > --- > net/colo-compare.c | 158 +++++++++++++++++++++++++++++++++++++++++++++++++++-- > 1 file changed, 154 insertions(+), 4 deletions(-) > > diff --git a/net/colo-compare.c b/net/colo-compare.c > index 4b5a2d4..3dad461 100644 > --- a/net/colo-compare.c > +++ b/net/colo-compare.c > @@ -385,9 +385,148 @@ static int colo_packet_compare(Packet *ppkt, Packet *spkt) > } > } > > -static int colo_packet_compare_all(Packet *spkt, Packet *ppkt) > +/* > + * called from the compare thread on the primary > + * for compare tcp packet > + * compare_tcp copied from Dr. David Alan Gilbert's branch > + */ > +static int colo_packet_compare_tcp(Packet *spkt, Packet *ppkt) > +{ > + struct tcphdr *ptcp, *stcp; > + int res; > + char *sdebug, *ddebug; > + ptrdiff_t offset; > + > + trace_colo_compare_main("compare tcp"); > + ptcp = (struct tcphdr *)ppkt->transport_layer; > + stcp = (struct tcphdr *)spkt->transport_layer; > + > + /* Initial is compare the whole packet */ > + offset = 12; /* Hack! Skip virtio header */ So, when I post a set of patches and mark it saying that I know they've got a lot of hacks in them, it's good for those reusing those patches to check they need the hacks! In my world I found I needed to skip over that header and I didn't understand why; but hadn't figured out the details yet, and I'd added the 12 everywhere - I think this is the only place you've got it, so it's almost certainly wrong. > + if (ptcp->th_flags == stcp->th_flags && > + ((ptcp->th_flags & (TH_ACK | TH_SYN)) == (TH_ACK | TH_SYN))) { > + /* This is the syn/ack response from the guest to an incoming > + * connection; the secondary won't have matched the sequence number > + * Note: We should probably compare the IP level? > + * Note hack: This already has the virtio offset > + */ > + offset = sizeof(ptcp->th_ack) + (void *)&ptcp->th_ack - ppkt->data; > + } > + /* Note - we want to compare everything as long as it's not the syn/ack? */ > + assert(offset > 0); > + assert(spkt->size > offset); > + > + /* The 'identification' field in the IP header is *very* random > + * it almost never matches. Fudge this by ignoring differences in > + * unfragmented packets; they'll normally sort themselves out if different > + * anyway, and it should recover at the TCP level. > + * An alternative would be to get both the primary and secondary to rewrite > + * somehow; but that would need some sync traffic to sync the state > + */ > + if (ntohs(ppkt->ip->ip_off) & IP_DF) { > + spkt->ip->ip_id = ppkt->ip->ip_id; > + /* and the sum will be different if the IDs were different */ > + spkt->ip->ip_sum = ppkt->ip->ip_sum; > + } > + > + res = memcmp(ppkt->data + offset, spkt->data + offset, > + (spkt->size - offset)); > + > + if (res && DEBUG_TCP_COMPARE) { > + sdebug = strdup(inet_ntoa(ppkt->ip->ip_src)); > + ddebug = strdup(inet_ntoa(ppkt->ip->ip_dst)); > + fprintf(stderr, "%s: src/dst: %s/%s offset=%zd p: seq/ack=%u/%u" > + " s: seq/ack=%u/%u res=%d flags=%x/%x\n", __func__, > + sdebug, ddebug, offset, > + ntohl(ptcp->th_seq), ntohl(ptcp->th_ack), > + ntohl(stcp->th_seq), ntohl(stcp->th_ack), > + res, ptcp->th_flags, stcp->th_flags); > + if (res && (ptcp->th_seq == stcp->th_seq)) { > + trace_colo_compare_with_int("Primary len", ppkt->size); > + colo_dump_packet(ppkt); > + trace_colo_compare_with_int("Secondary len", spkt->size); > + colo_dump_packet(spkt); > + } Try and use meaningful traceing for this - don't use a 'compare_with_int' trace; but use a name that says what you're doing - for example trace_colo_tcp_miscompare ; that way if you're running COLO and just want to see why you're getting so many miscompares, you can look at this without turning on all the rest of the debug. Also, in my version instead of using a DEBUG_TCP macro, I again used the trace system, so, my code here was: if (trace_event_get_state(TRACE_COLO_PROXY_MISCOMPARE) && res) { that means you can switch it on and off at runtime using the trace system. Then just as it's running I can get to the (qemu) prompt and do: trace-event colo_proxy_miscompare on and see what's happening without recompiling. > + g_free(sdebug); > + g_free(ddebug); > + } > + > + return res; > +} > + > +/* > + * called from the compare thread on the primary > + * for compare udp packet > + */ > +static int colo_packet_compare_udp(Packet *spkt, Packet *ppkt) > +{ > + int ret = 1; > + > + trace_colo_compare_main("compare udp"); > + ret = colo_packet_compare(ppkt, spkt); > + > + if (ret) { > + trace_colo_compare_main("primary udp"); > + colo_dump_packet(ppkt); > + trace_colo_compare_main("secondary udp"); > + colo_dump_packet(spkt); > + } > + > + return ret; > +} > + > +/* > + * called from the compare thread on the primary > + * for compare icmp packet > + */ > +static int colo_packet_compare_icmp(Packet *spkt, Packet *ppkt) > +{ > + int network_length; > + struct icmp *icmp_ppkt, *icmp_spkt; > + > + trace_colo_compare_main("compare icmp"); > + network_length = (ppkt->ip->ip_hl & 0x0F) * 4; Do you need that & 0x0f - the definition in ip.h is ip_hl:4 ? > + icmp_ppkt = (struct icmp *)(ppkt->data + network_length + ETH_HLEN); > + icmp_spkt = (struct icmp *)(spkt->data + network_length + ETH_HLEN); > + > + if ((icmp_ppkt->icmp_type == icmp_spkt->icmp_type) && > + (icmp_ppkt->icmp_code == icmp_spkt->icmp_code)) { > + if (icmp_ppkt->icmp_type == ICMP_REDIRECT) { Do you need to check the lengths again before reading any of these fields? > + if (icmp_ppkt->icmp_gwaddr.s_addr != > + icmp_spkt->icmp_gwaddr.s_addr) { > + trace_colo_compare_main("icmp_gwaddr.s_addr not same"); > + trace_colo_compare_with_char("ppkt s_addr", > + inet_ntoa(icmp_ppkt->icmp_gwaddr)); > + trace_colo_compare_with_char("spkt s_addr", > + inet_ntoa(icmp_spkt->icmp_gwaddr)); > + return -1; > + } > + } else if ((icmp_ppkt->icmp_type == ICMP_UNREACH) && > + (icmp_ppkt->icmp_type == ICMP_UNREACH_NEEDFRAG)) { > + if (icmp_ppkt->icmp_nextmtu != icmp_spkt->icmp_nextmtu) { > + trace_colo_compare_main("icmp_nextmtu not same"); > + trace_colo_compare_with_int("ppkt s_addr", > + icmp_ppkt->icmp_nextmtu); > + trace_colo_compare_with_int("spkt s_addr", > + icmp_spkt->icmp_nextmtu); > + return -1; > + } > + } > + } else { > + return -1; > + } > + > + return 0; > +} > + > +/* > + * called from the compare thread on the primary > + * for compare other packet > + */ > +static int colo_packet_compare_other(Packet *spkt, Packet *ppkt) > { > - trace_colo_compare_main("compare all"); > + trace_colo_compare_main("compare other"); Try and make the traces give you all the information you're likely to need - for example, adding ip_proto here would be really useful for when you find you've suddenly got a lot of miscompare compare others and want to figure out why. > return colo_packet_compare(ppkt, spkt); > } > > @@ -406,8 +545,19 @@ static void colo_compare_connection(void *opaque, void *user_data) > while (!g_queue_is_empty(&conn->primary_list) && > !g_queue_is_empty(&conn->secondary_list)) { > pkt = g_queue_pop_head(&conn->primary_list); > - result = g_queue_find_custom(&conn->secondary_list, > - pkt, (GCompareFunc)colo_packet_compare_all); > + if (conn->ip_proto == IPPROTO_TCP) { > + result = g_queue_find_custom(&conn->secondary_list, > + pkt, (GCompareFunc)colo_packet_compare_tcp); > + } else if (conn->ip_proto == IPPROTO_UDP) { > + result = g_queue_find_custom(&conn->secondary_list, > + pkt, (GCompareFunc)colo_packet_compare_udp); > + } else if (conn->ip_proto == IPPROTO_ICMP) { > + result = g_queue_find_custom(&conn->secondary_list, > + pkt, (GCompareFunc)colo_packet_compare_icmp); > + } else { > + result = g_queue_find_custom(&conn->secondary_list, > + pkt, (GCompareFunc)colo_packet_compare_other); > + } > > if (result) { > ret = compare_chr_send(pkt->s->chr_out, pkt->data, pkt->size); > -- > 1.9.1 Dave -- Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK
On 04/29/2016 03:44 AM, Dr. David Alan Gilbert wrote: > * Zhang Chen (zhangchen.fnst@cn.fujitsu.com) wrote: >> Signed-off-by: Zhang Chen <zhangchen.fnst@cn.fujitsu.com> >> Signed-off-by: Li Zhijian <lizhijian@cn.fujitsu.com> >> Signed-off-by: Wen Congyang <wency@cn.fujitsu.com> >> --- >> net/colo-compare.c | 158 +++++++++++++++++++++++++++++++++++++++++++++++++++-- >> 1 file changed, 154 insertions(+), 4 deletions(-) >> >> diff --git a/net/colo-compare.c b/net/colo-compare.c >> index 4b5a2d4..3dad461 100644 >> --- a/net/colo-compare.c >> +++ b/net/colo-compare.c >> @@ -385,9 +385,148 @@ static int colo_packet_compare(Packet *ppkt, Packet *spkt) >> } >> } >> >> -static int colo_packet_compare_all(Packet *spkt, Packet *ppkt) >> +/* >> + * called from the compare thread on the primary >> + * for compare tcp packet >> + * compare_tcp copied from Dr. David Alan Gilbert's branch >> + */ >> +static int colo_packet_compare_tcp(Packet *spkt, Packet *ppkt) >> +{ >> + struct tcphdr *ptcp, *stcp; >> + int res; >> + char *sdebug, *ddebug; >> + ptrdiff_t offset; >> + >> + trace_colo_compare_main("compare tcp"); >> + ptcp = (struct tcphdr *)ppkt->transport_layer; >> + stcp = (struct tcphdr *)spkt->transport_layer; >> + >> + /* Initial is compare the whole packet */ >> + offset = 12; /* Hack! Skip virtio header */ > So, when I post a set of patches and mark it saying that I know they've > got a lot of hacks in them, it's good for those reusing those patches > to check they need the hacks! > > In my world I found I needed to skip over that header and I didn't understand > why; but hadn't figured out the details yet, and I'd added the 12 everywhere - > I think this is the only place you've got it, so it's almost certainly wrong. I test in my world it hadn't that header,so if I remove the 12 offset,then the function is almost OK? > >> + if (ptcp->th_flags == stcp->th_flags && >> + ((ptcp->th_flags & (TH_ACK | TH_SYN)) == (TH_ACK | TH_SYN))) { >> + /* This is the syn/ack response from the guest to an incoming >> + * connection; the secondary won't have matched the sequence number >> + * Note: We should probably compare the IP level? >> + * Note hack: This already has the virtio offset >> + */ >> + offset = sizeof(ptcp->th_ack) + (void *)&ptcp->th_ack - ppkt->data; >> + } >> + /* Note - we want to compare everything as long as it's not the syn/ack? */ >> + assert(offset > 0); >> + assert(spkt->size > offset); >> + >> + /* The 'identification' field in the IP header is *very* random >> + * it almost never matches. Fudge this by ignoring differences in >> + * unfragmented packets; they'll normally sort themselves out if different >> + * anyway, and it should recover at the TCP level. >> + * An alternative would be to get both the primary and secondary to rewrite >> + * somehow; but that would need some sync traffic to sync the state >> + */ >> + if (ntohs(ppkt->ip->ip_off) & IP_DF) { >> + spkt->ip->ip_id = ppkt->ip->ip_id; >> + /* and the sum will be different if the IDs were different */ >> + spkt->ip->ip_sum = ppkt->ip->ip_sum; >> + } >> + >> + res = memcmp(ppkt->data + offset, spkt->data + offset, >> + (spkt->size - offset)); >> + >> + if (res && DEBUG_TCP_COMPARE) { >> + sdebug = strdup(inet_ntoa(ppkt->ip->ip_src)); >> + ddebug = strdup(inet_ntoa(ppkt->ip->ip_dst)); >> + fprintf(stderr, "%s: src/dst: %s/%s offset=%zd p: seq/ack=%u/%u" >> + " s: seq/ack=%u/%u res=%d flags=%x/%x\n", __func__, >> + sdebug, ddebug, offset, >> + ntohl(ptcp->th_seq), ntohl(ptcp->th_ack), >> + ntohl(stcp->th_seq), ntohl(stcp->th_ack), >> + res, ptcp->th_flags, stcp->th_flags); >> + if (res && (ptcp->th_seq == stcp->th_seq)) { >> + trace_colo_compare_with_int("Primary len", ppkt->size); >> + colo_dump_packet(ppkt); >> + trace_colo_compare_with_int("Secondary len", spkt->size); >> + colo_dump_packet(spkt); >> + } > Try and use meaningful traceing for this - don't use a 'compare_with_int' > trace; but use a name that says what you're doing - for example > trace_colo_tcp_miscompare ; that way if you're running COLO and just > want to see why you're getting so many miscompares, you can look > at this without turning on all the rest of the debug. OK,I will fix in next version. > > Also, in my version instead of using a DEBUG_TCP macro, I again used > the trace system, so, my code here was: > > if (trace_event_get_state(TRACE_COLO_PROXY_MISCOMPARE) && res) { > > that means you can switch it on and off at runtime using the > trace system. Then just as it's running I can get to the (qemu) prompt > and do: > trace-event colo_proxy_miscompare on > > and see what's happening without recompiling. OK,I will fix. > >> + g_free(sdebug); >> + g_free(ddebug); >> + } >> + >> + return res; >> +} >> + >> +/* >> + * called from the compare thread on the primary >> + * for compare udp packet >> + */ >> +static int colo_packet_compare_udp(Packet *spkt, Packet *ppkt) >> +{ >> + int ret = 1; >> + >> + trace_colo_compare_main("compare udp"); >> + ret = colo_packet_compare(ppkt, spkt); >> + >> + if (ret) { >> + trace_colo_compare_main("primary udp"); >> + colo_dump_packet(ppkt); >> + trace_colo_compare_main("secondary udp"); >> + colo_dump_packet(spkt); >> + } >> + >> + return ret; >> +} >> + >> +/* >> + * called from the compare thread on the primary >> + * for compare icmp packet >> + */ >> +static int colo_packet_compare_icmp(Packet *spkt, Packet *ppkt) >> +{ >> + int network_length; >> + struct icmp *icmp_ppkt, *icmp_spkt; >> + >> + trace_colo_compare_main("compare icmp"); >> + network_length = (ppkt->ip->ip_hl & 0x0F) * 4; > Do you need that & 0x0f - the definition in ip.h is ip_hl:4 ? I will fix it in next version. > >> + icmp_ppkt = (struct icmp *)(ppkt->data + network_length + ETH_HLEN); >> + icmp_spkt = (struct icmp *)(spkt->data + network_length + ETH_HLEN); >> + >> + if ((icmp_ppkt->icmp_type == icmp_spkt->icmp_type) && >> + (icmp_ppkt->icmp_code == icmp_spkt->icmp_code)) { >> + if (icmp_ppkt->icmp_type == ICMP_REDIRECT) { > Do you need to check the lengths again before reading any of these fields? OK, I will check it. Thanks Zhang Chen > >> + if (icmp_ppkt->icmp_gwaddr.s_addr != >> + icmp_spkt->icmp_gwaddr.s_addr) { >> + trace_colo_compare_main("icmp_gwaddr.s_addr not same"); >> + trace_colo_compare_with_char("ppkt s_addr", >> + inet_ntoa(icmp_ppkt->icmp_gwaddr)); >> + trace_colo_compare_with_char("spkt s_addr", >> + inet_ntoa(icmp_spkt->icmp_gwaddr)); >> + return -1; >> + } >> + } else if ((icmp_ppkt->icmp_type == ICMP_UNREACH) && >> + (icmp_ppkt->icmp_type == ICMP_UNREACH_NEEDFRAG)) { >> + if (icmp_ppkt->icmp_nextmtu != icmp_spkt->icmp_nextmtu) { >> + trace_colo_compare_main("icmp_nextmtu not same"); >> + trace_colo_compare_with_int("ppkt s_addr", >> + icmp_ppkt->icmp_nextmtu); >> + trace_colo_compare_with_int("spkt s_addr", >> + icmp_spkt->icmp_nextmtu); >> + return -1; >> + } >> + } >> + } else { >> + return -1; >> + } >> + >> + return 0; >> +} >> + >> +/* >> + * called from the compare thread on the primary >> + * for compare other packet >> + */ >> +static int colo_packet_compare_other(Packet *spkt, Packet *ppkt) >> { >> - trace_colo_compare_main("compare all"); >> + trace_colo_compare_main("compare other"); > Try and make the traces give you all the information you're likely to need - for > example, adding ip_proto here would be really useful for when you find you've > suddenly got a lot of miscompare compare others and want to figure out why. > >> return colo_packet_compare(ppkt, spkt); >> } >> >> @@ -406,8 +545,19 @@ static void colo_compare_connection(void *opaque, void *user_data) >> while (!g_queue_is_empty(&conn->primary_list) && >> !g_queue_is_empty(&conn->secondary_list)) { >> pkt = g_queue_pop_head(&conn->primary_list); >> - result = g_queue_find_custom(&conn->secondary_list, >> - pkt, (GCompareFunc)colo_packet_compare_all); >> + if (conn->ip_proto == IPPROTO_TCP) { >> + result = g_queue_find_custom(&conn->secondary_list, >> + pkt, (GCompareFunc)colo_packet_compare_tcp); >> + } else if (conn->ip_proto == IPPROTO_UDP) { >> + result = g_queue_find_custom(&conn->secondary_list, >> + pkt, (GCompareFunc)colo_packet_compare_udp); >> + } else if (conn->ip_proto == IPPROTO_ICMP) { >> + result = g_queue_find_custom(&conn->secondary_list, >> + pkt, (GCompareFunc)colo_packet_compare_icmp); >> + } else { >> + result = g_queue_find_custom(&conn->secondary_list, >> + pkt, (GCompareFunc)colo_packet_compare_other); >> + } >> >> if (result) { >> ret = compare_chr_send(pkt->s->chr_out, pkt->data, pkt->size); >> -- >> 1.9.1 > Dave > > -- > Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK > > > . >
On 05/05/2016 11:03 AM, Zhang Chen wrote: > > > On 04/29/2016 03:44 AM, Dr. David Alan Gilbert wrote: >> * Zhang Chen (zhangchen.fnst@cn.fujitsu.com) wrote: >>> Signed-off-by: Zhang Chen <zhangchen.fnst@cn.fujitsu.com> >>> Signed-off-by: Li Zhijian <lizhijian@cn.fujitsu.com> >>> Signed-off-by: Wen Congyang <wency@cn.fujitsu.com> >>> --- >>> net/colo-compare.c | 158 >>> +++++++++++++++++++++++++++++++++++++++++++++++++++-- >>> 1 file changed, 154 insertions(+), 4 deletions(-) >>> >>> diff --git a/net/colo-compare.c b/net/colo-compare.c >>> index 4b5a2d4..3dad461 100644 >>> --- a/net/colo-compare.c >>> +++ b/net/colo-compare.c >>> @@ -385,9 +385,148 @@ static int colo_packet_compare(Packet *ppkt, >>> Packet *spkt) >>> } >>> } >>> -static int colo_packet_compare_all(Packet *spkt, Packet *ppkt) >>> +/* >>> + * called from the compare thread on the primary >>> + * for compare tcp packet >>> + * compare_tcp copied from Dr. David Alan Gilbert's branch >>> + */ >>> +static int colo_packet_compare_tcp(Packet *spkt, Packet *ppkt) >>> +{ >>> + struct tcphdr *ptcp, *stcp; >>> + int res; >>> + char *sdebug, *ddebug; >>> + ptrdiff_t offset; >>> + >>> + trace_colo_compare_main("compare tcp"); >>> + ptcp = (struct tcphdr *)ppkt->transport_layer; >>> + stcp = (struct tcphdr *)spkt->transport_layer; >>> + >>> + /* Initial is compare the whole packet */ >>> + offset = 12; /* Hack! Skip virtio header */ >> So, when I post a set of patches and mark it saying that I know they've >> got a lot of hacks in them, it's good for those reusing those patches >> to check they need the hacks! >> >> In my world I found I needed to skip over that header and I didn't >> understand >> why; but hadn't figured out the details yet, and I'd added the 12 >> everywhere - >> I think this is the only place you've got it, so it's almost >> certainly wrong. > > I test in my world it hadn't that header,so if I remove the > 12 offset,then the function is almost OK? > >> >>> + if (ptcp->th_flags == stcp->th_flags && >>> + ((ptcp->th_flags & (TH_ACK | TH_SYN)) == (TH_ACK | TH_SYN))) { >>> + /* This is the syn/ack response from the guest to an incoming >>> + * connection; the secondary won't have matched the >>> sequence number >>> + * Note: We should probably compare the IP level? >>> + * Note hack: This already has the virtio offset >>> + */ >>> + offset = sizeof(ptcp->th_ack) + (void *)&ptcp->th_ack - >>> ppkt->data; >>> + } >>> + /* Note - we want to compare everything as long as it's not the >>> syn/ack? */ >>> + assert(offset > 0); >>> + assert(spkt->size > offset); >>> + >>> + /* The 'identification' field in the IP header is *very* random >>> + * it almost never matches. Fudge this by ignoring differences in >>> + * unfragmented packets; they'll normally sort themselves out >>> if different >>> + * anyway, and it should recover at the TCP level. >>> + * An alternative would be to get both the primary and >>> secondary to rewrite >>> + * somehow; but that would need some sync traffic to sync the >>> state >>> + */ >>> + if (ntohs(ppkt->ip->ip_off) & IP_DF) { >>> + spkt->ip->ip_id = ppkt->ip->ip_id; >>> + /* and the sum will be different if the IDs were different */ >>> + spkt->ip->ip_sum = ppkt->ip->ip_sum; >>> + } >>> + >>> + res = memcmp(ppkt->data + offset, spkt->data + offset, >>> + (spkt->size - offset)); >>> + >>> + if (res && DEBUG_TCP_COMPARE) { >>> + sdebug = strdup(inet_ntoa(ppkt->ip->ip_src)); >>> + ddebug = strdup(inet_ntoa(ppkt->ip->ip_dst)); >>> + fprintf(stderr, "%s: src/dst: %s/%s offset=%zd p: >>> seq/ack=%u/%u" >>> + " s: seq/ack=%u/%u res=%d flags=%x/%x\n", __func__, >>> + sdebug, ddebug, offset, >>> + ntohl(ptcp->th_seq), ntohl(ptcp->th_ack), >>> + ntohl(stcp->th_seq), ntohl(stcp->th_ack), >>> + res, ptcp->th_flags, stcp->th_flags); >>> + if (res && (ptcp->th_seq == stcp->th_seq)) { >>> + trace_colo_compare_with_int("Primary len", ppkt->size); >>> + colo_dump_packet(ppkt); >>> + trace_colo_compare_with_int("Secondary len", spkt->size); >>> + colo_dump_packet(spkt); >>> + } >> Try and use meaningful traceing for this - don't use a >> 'compare_with_int' >> trace; but use a name that says what you're doing - for example >> trace_colo_tcp_miscompare ; that way if you're running COLO and just >> want to see why you're getting so many miscompares, you can look >> at this without turning on all the rest of the debug. > > OK,I will fix in next version. > >> >> Also, in my version instead of using a DEBUG_TCP macro, I again used >> the trace system, so, my code here was: >> >> if (trace_event_get_state(TRACE_COLO_PROXY_MISCOMPARE) && res) { >> >> that means you can switch it on and off at runtime using the >> trace system. Then just as it's running I can get to the (qemu) prompt >> and do: >> trace-event colo_proxy_miscompare on >> >> and see what's happening without recompiling. > > OK,I will fix. > >> >>> + g_free(sdebug); >>> + g_free(ddebug); >>> + } >>> + >>> + return res; >>> +} >>> + >>> +/* >>> + * called from the compare thread on the primary >>> + * for compare udp packet >>> + */ >>> +static int colo_packet_compare_udp(Packet *spkt, Packet *ppkt) >>> +{ >>> + int ret = 1; >>> + >>> + trace_colo_compare_main("compare udp"); >>> + ret = colo_packet_compare(ppkt, spkt); >>> + >>> + if (ret) { >>> + trace_colo_compare_main("primary udp"); >>> + colo_dump_packet(ppkt); >>> + trace_colo_compare_main("secondary udp"); >>> + colo_dump_packet(spkt); >>> + } >>> + >>> + return ret; >>> +} >>> + >>> +/* >>> + * called from the compare thread on the primary >>> + * for compare icmp packet >>> + */ >>> +static int colo_packet_compare_icmp(Packet *spkt, Packet *ppkt) >>> +{ >>> + int network_length; >>> + struct icmp *icmp_ppkt, *icmp_spkt; >>> + >>> + trace_colo_compare_main("compare icmp"); >>> + network_length = (ppkt->ip->ip_hl & 0x0F) * 4; >> Do you need that & 0x0f - the definition in ip.h is ip_hl:4 ? > > I will fix it in next version. > >> >>> + icmp_ppkt = (struct icmp *)(ppkt->data + network_length + >>> ETH_HLEN); >>> + icmp_spkt = (struct icmp *)(spkt->data + network_length + >>> ETH_HLEN); >>> + >>> + if ((icmp_ppkt->icmp_type == icmp_spkt->icmp_type) && >>> + (icmp_ppkt->icmp_code == icmp_spkt->icmp_code)) { >>> + if (icmp_ppkt->icmp_type == ICMP_REDIRECT) { >> Do you need to check the lengths again before reading any of these >> fields? > > OK, I will check it. > > Thanks > Zhang Chen > >> >>> + if (icmp_ppkt->icmp_gwaddr.s_addr != >>> + icmp_spkt->icmp_gwaddr.s_addr) { >>> + trace_colo_compare_main("icmp_gwaddr.s_addr not >>> same"); >>> + trace_colo_compare_with_char("ppkt s_addr", >>> + inet_ntoa(icmp_ppkt->icmp_gwaddr)); >>> + trace_colo_compare_with_char("spkt s_addr", >>> + inet_ntoa(icmp_spkt->icmp_gwaddr)); >>> + return -1; >>> + } >>> + } else if ((icmp_ppkt->icmp_type == ICMP_UNREACH) && >>> + (icmp_ppkt->icmp_type == ICMP_UNREACH_NEEDFRAG)) { >>> + if (icmp_ppkt->icmp_nextmtu != icmp_spkt->icmp_nextmtu) { >>> + trace_colo_compare_main("icmp_nextmtu not same"); >>> + trace_colo_compare_with_int("ppkt s_addr", >>> + icmp_ppkt->icmp_nextmtu); >>> + trace_colo_compare_with_int("spkt s_addr", >>> + icmp_spkt->icmp_nextmtu); >>> + return -1; >>> + } >>> + } >>> + } else { >>> + return -1; >>> + } >>> + >>> + return 0; >>> +} >>> + >>> +/* >>> + * called from the compare thread on the primary >>> + * for compare other packet >>> + */ >>> +static int colo_packet_compare_other(Packet *spkt, Packet *ppkt) >>> { >>> - trace_colo_compare_main("compare all"); >>> + trace_colo_compare_main("compare other"); >> Try and make the traces give you all the information you're likely to >> need - for >> example, adding ip_proto here would be really useful for when you >> find you've >> suddenly got a lot of miscompare compare others and want to figure >> out why. OK,I will add more info. Thanks Zhang Chen >> >>> return colo_packet_compare(ppkt, spkt); >>> } >>> @@ -406,8 +545,19 @@ static void colo_compare_connection(void >>> *opaque, void *user_data) >>> while (!g_queue_is_empty(&conn->primary_list) && >>> !g_queue_is_empty(&conn->secondary_list)) { >>> pkt = g_queue_pop_head(&conn->primary_list); >>> - result = g_queue_find_custom(&conn->secondary_list, >>> - pkt, >>> (GCompareFunc)colo_packet_compare_all); >>> + if (conn->ip_proto == IPPROTO_TCP) { >>> + result = g_queue_find_custom(&conn->secondary_list, >>> + pkt, (GCompareFunc)colo_packet_compare_tcp); >>> + } else if (conn->ip_proto == IPPROTO_UDP) { >>> + result = g_queue_find_custom(&conn->secondary_list, >>> + pkt, (GCompareFunc)colo_packet_compare_udp); >>> + } else if (conn->ip_proto == IPPROTO_ICMP) { >>> + result = g_queue_find_custom(&conn->secondary_list, >>> + pkt, (GCompareFunc)colo_packet_compare_icmp); >>> + } else { >>> + result = g_queue_find_custom(&conn->secondary_list, >>> + pkt, (GCompareFunc)colo_packet_compare_other); >>> + } >>> if (result) { >>> ret = compare_chr_send(pkt->s->chr_out, pkt->data, >>> pkt->size); >>> -- >>> 1.9.1 >> Dave >> >> -- >> Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK >> >> >> . >> >
diff --git a/net/colo-compare.c b/net/colo-compare.c index 4b5a2d4..3dad461 100644 --- a/net/colo-compare.c +++ b/net/colo-compare.c @@ -385,9 +385,148 @@ static int colo_packet_compare(Packet *ppkt, Packet *spkt) } } -static int colo_packet_compare_all(Packet *spkt, Packet *ppkt) +/* + * called from the compare thread on the primary + * for compare tcp packet + * compare_tcp copied from Dr. David Alan Gilbert's branch + */ +static int colo_packet_compare_tcp(Packet *spkt, Packet *ppkt) +{ + struct tcphdr *ptcp, *stcp; + int res; + char *sdebug, *ddebug; + ptrdiff_t offset; + + trace_colo_compare_main("compare tcp"); + ptcp = (struct tcphdr *)ppkt->transport_layer; + stcp = (struct tcphdr *)spkt->transport_layer; + + /* Initial is compare the whole packet */ + offset = 12; /* Hack! Skip virtio header */ + + if (ptcp->th_flags == stcp->th_flags && + ((ptcp->th_flags & (TH_ACK | TH_SYN)) == (TH_ACK | TH_SYN))) { + /* This is the syn/ack response from the guest to an incoming + * connection; the secondary won't have matched the sequence number + * Note: We should probably compare the IP level? + * Note hack: This already has the virtio offset + */ + offset = sizeof(ptcp->th_ack) + (void *)&ptcp->th_ack - ppkt->data; + } + /* Note - we want to compare everything as long as it's not the syn/ack? */ + assert(offset > 0); + assert(spkt->size > offset); + + /* The 'identification' field in the IP header is *very* random + * it almost never matches. Fudge this by ignoring differences in + * unfragmented packets; they'll normally sort themselves out if different + * anyway, and it should recover at the TCP level. + * An alternative would be to get both the primary and secondary to rewrite + * somehow; but that would need some sync traffic to sync the state + */ + if (ntohs(ppkt->ip->ip_off) & IP_DF) { + spkt->ip->ip_id = ppkt->ip->ip_id; + /* and the sum will be different if the IDs were different */ + spkt->ip->ip_sum = ppkt->ip->ip_sum; + } + + res = memcmp(ppkt->data + offset, spkt->data + offset, + (spkt->size - offset)); + + if (res && DEBUG_TCP_COMPARE) { + sdebug = strdup(inet_ntoa(ppkt->ip->ip_src)); + ddebug = strdup(inet_ntoa(ppkt->ip->ip_dst)); + fprintf(stderr, "%s: src/dst: %s/%s offset=%zd p: seq/ack=%u/%u" + " s: seq/ack=%u/%u res=%d flags=%x/%x\n", __func__, + sdebug, ddebug, offset, + ntohl(ptcp->th_seq), ntohl(ptcp->th_ack), + ntohl(stcp->th_seq), ntohl(stcp->th_ack), + res, ptcp->th_flags, stcp->th_flags); + if (res && (ptcp->th_seq == stcp->th_seq)) { + trace_colo_compare_with_int("Primary len", ppkt->size); + colo_dump_packet(ppkt); + trace_colo_compare_with_int("Secondary len", spkt->size); + colo_dump_packet(spkt); + } + g_free(sdebug); + g_free(ddebug); + } + + return res; +} + +/* + * called from the compare thread on the primary + * for compare udp packet + */ +static int colo_packet_compare_udp(Packet *spkt, Packet *ppkt) +{ + int ret = 1; + + trace_colo_compare_main("compare udp"); + ret = colo_packet_compare(ppkt, spkt); + + if (ret) { + trace_colo_compare_main("primary udp"); + colo_dump_packet(ppkt); + trace_colo_compare_main("secondary udp"); + colo_dump_packet(spkt); + } + + return ret; +} + +/* + * called from the compare thread on the primary + * for compare icmp packet + */ +static int colo_packet_compare_icmp(Packet *spkt, Packet *ppkt) +{ + int network_length; + struct icmp *icmp_ppkt, *icmp_spkt; + + trace_colo_compare_main("compare icmp"); + network_length = (ppkt->ip->ip_hl & 0x0F) * 4; + icmp_ppkt = (struct icmp *)(ppkt->data + network_length + ETH_HLEN); + icmp_spkt = (struct icmp *)(spkt->data + network_length + ETH_HLEN); + + if ((icmp_ppkt->icmp_type == icmp_spkt->icmp_type) && + (icmp_ppkt->icmp_code == icmp_spkt->icmp_code)) { + if (icmp_ppkt->icmp_type == ICMP_REDIRECT) { + if (icmp_ppkt->icmp_gwaddr.s_addr != + icmp_spkt->icmp_gwaddr.s_addr) { + trace_colo_compare_main("icmp_gwaddr.s_addr not same"); + trace_colo_compare_with_char("ppkt s_addr", + inet_ntoa(icmp_ppkt->icmp_gwaddr)); + trace_colo_compare_with_char("spkt s_addr", + inet_ntoa(icmp_spkt->icmp_gwaddr)); + return -1; + } + } else if ((icmp_ppkt->icmp_type == ICMP_UNREACH) && + (icmp_ppkt->icmp_type == ICMP_UNREACH_NEEDFRAG)) { + if (icmp_ppkt->icmp_nextmtu != icmp_spkt->icmp_nextmtu) { + trace_colo_compare_main("icmp_nextmtu not same"); + trace_colo_compare_with_int("ppkt s_addr", + icmp_ppkt->icmp_nextmtu); + trace_colo_compare_with_int("spkt s_addr", + icmp_spkt->icmp_nextmtu); + return -1; + } + } + } else { + return -1; + } + + return 0; +} + +/* + * called from the compare thread on the primary + * for compare other packet + */ +static int colo_packet_compare_other(Packet *spkt, Packet *ppkt) { - trace_colo_compare_main("compare all"); + trace_colo_compare_main("compare other"); return colo_packet_compare(ppkt, spkt); } @@ -406,8 +545,19 @@ static void colo_compare_connection(void *opaque, void *user_data) while (!g_queue_is_empty(&conn->primary_list) && !g_queue_is_empty(&conn->secondary_list)) { pkt = g_queue_pop_head(&conn->primary_list); - result = g_queue_find_custom(&conn->secondary_list, - pkt, (GCompareFunc)colo_packet_compare_all); + if (conn->ip_proto == IPPROTO_TCP) { + result = g_queue_find_custom(&conn->secondary_list, + pkt, (GCompareFunc)colo_packet_compare_tcp); + } else if (conn->ip_proto == IPPROTO_UDP) { + result = g_queue_find_custom(&conn->secondary_list, + pkt, (GCompareFunc)colo_packet_compare_udp); + } else if (conn->ip_proto == IPPROTO_ICMP) { + result = g_queue_find_custom(&conn->secondary_list, + pkt, (GCompareFunc)colo_packet_compare_icmp); + } else { + result = g_queue_find_custom(&conn->secondary_list, + pkt, (GCompareFunc)colo_packet_compare_other); + } if (result) { ret = compare_chr_send(pkt->s->chr_out, pkt->data, pkt->size);