Message ID | 20250127223840.67280-2-ebiggers@kernel.org (mailing list archive) |
---|---|
State | New |
Headers | show |
Series | RDMA: switch to using CRC32 library functions | expand |
在 2025/1/27 23:38, Eric Biggers 写道: > From: Eric Biggers <ebiggers@google.com> > > The usual big endian convention of InfiniBand does not apply to the > ICRC field, whose transmission is specified in terms of the CRC32 > polynomial coefficients. The coefficients are transmitted in > descending order from the x^31 coefficient to the x^0 one. When the > result is interpreted as a 32-bit integer using the required reverse > mapping between bits and polynomial coefficients, it's a __le32. > > The code used __be32, but it did not actually do an endianness > conversion. So it actually just used the CPU's native endianness, > making it comply with the spec on little endian systems only. > > Fix the code to use __le32 and do the needed le32_to_cpu() and > cpu_to_le32() so that it complies with the spec on big endian systems. > > Also update the lower-level helper functions to use u32, as they are > working with CPU native values. This part does not change behavior. > > Found through code review. I don't know whether anyone is actually > using this code on big endian systems. Probably not. > > Signed-off-by: Eric Biggers <ebiggers@google.com> In this commit, the little endian is used instead of big endian in the rxe source code. To x86_64 architecture, I am fine with this commit. To other big endian architecture, e.g. ppc/ppc64, I do not have such host, thus, I can not make tests on the big endian host. Thanks, Reviewed-by: Zhu Yanjun <yanjun.zhu@linux.dev> Zhu Yanjun > --- > drivers/infiniband/sw/rxe/rxe_icrc.c | 41 +++++++++++++++------------- > 1 file changed, 22 insertions(+), 19 deletions(-) > > diff --git a/drivers/infiniband/sw/rxe/rxe_icrc.c b/drivers/infiniband/sw/rxe/rxe_icrc.c > index fdf5f08cd8f17..ee417a0bbbdca 100644 > --- a/drivers/infiniband/sw/rxe/rxe_icrc.c > +++ b/drivers/infiniband/sw/rxe/rxe_icrc.c > @@ -38,26 +38,26 @@ int rxe_icrc_init(struct rxe_dev *rxe) > * @next: starting address of current segment > * @len: length of current segment > * > * Return: the cumulative crc32 checksum > */ > -static __be32 rxe_crc32(struct rxe_dev *rxe, __be32 crc, void *next, size_t len) > +static u32 rxe_crc32(struct rxe_dev *rxe, u32 crc, void *next, size_t len) > { > - __be32 icrc; > + u32 icrc; > int err; > > SHASH_DESC_ON_STACK(shash, rxe->tfm); > > shash->tfm = rxe->tfm; > - *(__be32 *)shash_desc_ctx(shash) = crc; > + *(u32 *)shash_desc_ctx(shash) = crc; > err = crypto_shash_update(shash, next, len); > if (unlikely(err)) { > rxe_dbg_dev(rxe, "failed crc calculation, err: %d\n", err); > - return (__force __be32)crc32_le((__force u32)crc, next, len); > + return crc32_le(crc, next, len); > } > > - icrc = *(__be32 *)shash_desc_ctx(shash); > + icrc = *(u32 *)shash_desc_ctx(shash); > barrier_data(shash_desc_ctx(shash)); > > return icrc; > } > > @@ -67,18 +67,18 @@ static __be32 rxe_crc32(struct rxe_dev *rxe, __be32 crc, void *next, size_t len) > * @skb: packet buffer > * @pkt: packet information > * > * Return: the partial ICRC > */ > -static __be32 rxe_icrc_hdr(struct sk_buff *skb, struct rxe_pkt_info *pkt) > +static u32 rxe_icrc_hdr(struct sk_buff *skb, struct rxe_pkt_info *pkt) > { > unsigned int bth_offset = 0; > struct iphdr *ip4h = NULL; > struct ipv6hdr *ip6h = NULL; > struct udphdr *udph; > struct rxe_bth *bth; > - __be32 crc; > + u32 crc; > int length; > int hdr_size = sizeof(struct udphdr) + > (skb->protocol == htons(ETH_P_IP) ? > sizeof(struct iphdr) : sizeof(struct ipv6hdr)); > /* pseudo header buffer size is calculate using ipv6 header size since > @@ -89,11 +89,11 @@ static __be32 rxe_icrc_hdr(struct sk_buff *skb, struct rxe_pkt_info *pkt) > RXE_BTH_BYTES]; > > /* This seed is the result of computing a CRC with a seed of > * 0xfffffff and 8 bytes of 0xff representing a masked LRH. > */ > - crc = (__force __be32)0xdebb20e3; > + crc = 0xdebb20e3; > > if (skb->protocol == htons(ETH_P_IP)) { /* IPv4 */ > memcpy(pshdr, ip_hdr(skb), hdr_size); > ip4h = (struct iphdr *)pshdr; > udph = (struct udphdr *)(ip4h + 1); > @@ -137,23 +137,27 @@ static __be32 rxe_icrc_hdr(struct sk_buff *skb, struct rxe_pkt_info *pkt) > * > * Return: 0 if the values match else an error > */ > int rxe_icrc_check(struct sk_buff *skb, struct rxe_pkt_info *pkt) > { > - __be32 *icrcp; > - __be32 pkt_icrc; > - __be32 icrc; > - > - icrcp = (__be32 *)(pkt->hdr + pkt->paylen - RXE_ICRC_SIZE); > - pkt_icrc = *icrcp; > + /* > + * The usual big endian convention of InfiniBand does not apply to the > + * ICRC field, whose transmission is specified in terms of the CRC32 > + * polynomial coefficients. The coefficients are transmitted in > + * descending order from the x^31 coefficient to the x^0 one. When the > + * result is interpreted as a 32-bit integer using the required reverse > + * mapping between bits and polynomial coefficients, it's a __le32. > + */ > + __le32 *icrcp = (__le32 *)(pkt->hdr + pkt->paylen - RXE_ICRC_SIZE); > + u32 icrc; > > icrc = rxe_icrc_hdr(skb, pkt); > icrc = rxe_crc32(pkt->rxe, icrc, (u8 *)payload_addr(pkt), > payload_size(pkt) + bth_pad(pkt)); > icrc = ~icrc; > > - if (unlikely(icrc != pkt_icrc)) > + if (unlikely(icrc != le32_to_cpu(*icrcp))) > return -EINVAL; > > return 0; > } > > @@ -162,14 +166,13 @@ int rxe_icrc_check(struct sk_buff *skb, struct rxe_pkt_info *pkt) > * @skb: packet buffer > * @pkt: packet information > */ > void rxe_icrc_generate(struct sk_buff *skb, struct rxe_pkt_info *pkt) > { > - __be32 *icrcp; > - __be32 icrc; > + __le32 *icrcp = (__le32 *)(pkt->hdr + pkt->paylen - RXE_ICRC_SIZE); > + u32 icrc; > > - icrcp = (__be32 *)(pkt->hdr + pkt->paylen - RXE_ICRC_SIZE); > icrc = rxe_icrc_hdr(skb, pkt); > icrc = rxe_crc32(pkt->rxe, icrc, (u8 *)payload_addr(pkt), > payload_size(pkt) + bth_pad(pkt)); > - *icrcp = ~icrc; > + *icrcp = cpu_to_le32(~icrc); > }
在 2025/1/27 23:38, Eric Biggers 写道: > From: Eric Biggers <ebiggers@google.com> > > The usual big endian convention of InfiniBand does not apply to the > ICRC field, whose transmission is specified in terms of the CRC32 > polynomial coefficients. The coefficients are transmitted in > descending order from the x^31 coefficient to the x^0 one. When the > result is interpreted as a 32-bit integer using the required reverse > mapping between bits and polynomial coefficients, it's a __le32. In InfiniBand Architecture Specification, the following " The resulting bits are sent in order from the bit representing the coefficient of the highest term of the remainder polynomial. The least significant bit, most significant byte first ordering of the packet does not apply to the ICRC field. " and " The 32 Flip-Flops are initialized to 1 before every ICRC generation. The packet is fed to the reference design of Figure 57 one bit at a time in the order specified in Section 7.8.1 on page 222. The Remainder is the bitwise NOT of the value stored at the 32 Flip-Flops after the last bit of the packet was clocked into the ICRC Generator. ICRC Field is obtained from the Remainder as shown in Figure 57. ICRC Field is transmitted using Big Endian byte ordering like every field of an InfiniBand packet. " It seems that big endian byte ordering is needed in ICRC field. I am not sure if MLX NIC can connect to RXE after this patchset is applied. Thanks, Zhu Yanjun > > The code used __be32, but it did not actually do an endianness > conversion. So it actually just used the CPU's native endianness, > making it comply with the spec on little endian systems only. > > Fix the code to use __le32 and do the needed le32_to_cpu() and > cpu_to_le32() so that it complies with the spec on big endian systems. > > Also update the lower-level helper functions to use u32, as they are > working with CPU native values. This part does not change behavior. > > Found through code review. I don't know whether anyone is actually > using this code on big endian systems. Probably not. > > Signed-off-by: Eric Biggers <ebiggers@google.com> > --- > drivers/infiniband/sw/rxe/rxe_icrc.c | 41 +++++++++++++++------------- > 1 file changed, 22 insertions(+), 19 deletions(-) > > diff --git a/drivers/infiniband/sw/rxe/rxe_icrc.c b/drivers/infiniband/sw/rxe/rxe_icrc.c > index fdf5f08cd8f17..ee417a0bbbdca 100644 > --- a/drivers/infiniband/sw/rxe/rxe_icrc.c > +++ b/drivers/infiniband/sw/rxe/rxe_icrc.c > @@ -38,26 +38,26 @@ int rxe_icrc_init(struct rxe_dev *rxe) > * @next: starting address of current segment > * @len: length of current segment > * > * Return: the cumulative crc32 checksum > */ > -static __be32 rxe_crc32(struct rxe_dev *rxe, __be32 crc, void *next, size_t len) > +static u32 rxe_crc32(struct rxe_dev *rxe, u32 crc, void *next, size_t len) > { > - __be32 icrc; > + u32 icrc; > int err; > > SHASH_DESC_ON_STACK(shash, rxe->tfm); > > shash->tfm = rxe->tfm; > - *(__be32 *)shash_desc_ctx(shash) = crc; > + *(u32 *)shash_desc_ctx(shash) = crc; > err = crypto_shash_update(shash, next, len); > if (unlikely(err)) { > rxe_dbg_dev(rxe, "failed crc calculation, err: %d\n", err); > - return (__force __be32)crc32_le((__force u32)crc, next, len); > + return crc32_le(crc, next, len); > } > > - icrc = *(__be32 *)shash_desc_ctx(shash); > + icrc = *(u32 *)shash_desc_ctx(shash); > barrier_data(shash_desc_ctx(shash)); > > return icrc; > } > > @@ -67,18 +67,18 @@ static __be32 rxe_crc32(struct rxe_dev *rxe, __be32 crc, void *next, size_t len) > * @skb: packet buffer > * @pkt: packet information > * > * Return: the partial ICRC > */ > -static __be32 rxe_icrc_hdr(struct sk_buff *skb, struct rxe_pkt_info *pkt) > +static u32 rxe_icrc_hdr(struct sk_buff *skb, struct rxe_pkt_info *pkt) > { > unsigned int bth_offset = 0; > struct iphdr *ip4h = NULL; > struct ipv6hdr *ip6h = NULL; > struct udphdr *udph; > struct rxe_bth *bth; > - __be32 crc; > + u32 crc; > int length; > int hdr_size = sizeof(struct udphdr) + > (skb->protocol == htons(ETH_P_IP) ? > sizeof(struct iphdr) : sizeof(struct ipv6hdr)); > /* pseudo header buffer size is calculate using ipv6 header size since > @@ -89,11 +89,11 @@ static __be32 rxe_icrc_hdr(struct sk_buff *skb, struct rxe_pkt_info *pkt) > RXE_BTH_BYTES]; > > /* This seed is the result of computing a CRC with a seed of > * 0xfffffff and 8 bytes of 0xff representing a masked LRH. > */ > - crc = (__force __be32)0xdebb20e3; > + crc = 0xdebb20e3; > > if (skb->protocol == htons(ETH_P_IP)) { /* IPv4 */ > memcpy(pshdr, ip_hdr(skb), hdr_size); > ip4h = (struct iphdr *)pshdr; > udph = (struct udphdr *)(ip4h + 1); > @@ -137,23 +137,27 @@ static __be32 rxe_icrc_hdr(struct sk_buff *skb, struct rxe_pkt_info *pkt) > * > * Return: 0 if the values match else an error > */ > int rxe_icrc_check(struct sk_buff *skb, struct rxe_pkt_info *pkt) > { > - __be32 *icrcp; > - __be32 pkt_icrc; > - __be32 icrc; > - > - icrcp = (__be32 *)(pkt->hdr + pkt->paylen - RXE_ICRC_SIZE); > - pkt_icrc = *icrcp; > + /* > + * The usual big endian convention of InfiniBand does not apply to the > + * ICRC field, whose transmission is specified in terms of the CRC32 > + * polynomial coefficients. The coefficients are transmitted in > + * descending order from the x^31 coefficient to the x^0 one. When the > + * result is interpreted as a 32-bit integer using the required reverse > + * mapping between bits and polynomial coefficients, it's a __le32. > + */ > + __le32 *icrcp = (__le32 *)(pkt->hdr + pkt->paylen - RXE_ICRC_SIZE); > + u32 icrc; > > icrc = rxe_icrc_hdr(skb, pkt); > icrc = rxe_crc32(pkt->rxe, icrc, (u8 *)payload_addr(pkt), > payload_size(pkt) + bth_pad(pkt)); > icrc = ~icrc; > > - if (unlikely(icrc != pkt_icrc)) > + if (unlikely(icrc != le32_to_cpu(*icrcp))) > return -EINVAL; > > return 0; > } > > @@ -162,14 +166,13 @@ int rxe_icrc_check(struct sk_buff *skb, struct rxe_pkt_info *pkt) > * @skb: packet buffer > * @pkt: packet information > */ > void rxe_icrc_generate(struct sk_buff *skb, struct rxe_pkt_info *pkt) > { > - __be32 *icrcp; > - __be32 icrc; > + __le32 *icrcp = (__le32 *)(pkt->hdr + pkt->paylen - RXE_ICRC_SIZE); > + u32 icrc; > > - icrcp = (__be32 *)(pkt->hdr + pkt->paylen - RXE_ICRC_SIZE); > icrc = rxe_icrc_hdr(skb, pkt); > icrc = rxe_crc32(pkt->rxe, icrc, (u8 *)payload_addr(pkt), > payload_size(pkt) + bth_pad(pkt)); > - *icrcp = ~icrc; > + *icrcp = cpu_to_le32(~icrc); > }
On Wed, Jan 29, 2025 at 10:44:39AM +0100, Zhu Yanjun wrote: > 在 2025/1/27 23:38, Eric Biggers 写道: > > From: Eric Biggers <ebiggers@google.com> > > > > The usual big endian convention of InfiniBand does not apply to the > > ICRC field, whose transmission is specified in terms of the CRC32 > > polynomial coefficients. This patch is on to something but this is not a good explanation. The CRC32 in IB is stored as big endian and computed in big endian, the spec says so explicitly: 2) The CRC calculation is done in big endian byte order with the least 31 significant bit of the most significant byte being the first bits in the 32 CRC calculation. In this context saying it is not "big endian" doesn't seem to be quite right.. The spec gives a sample data packet (in offset/value pairs): 0 0xF0 15 0xB3 30 0x7A 45 0x8B 1 0x12 16 0x00 31 0x05 46 0xC0 2 0x37 17 0x0D 32 0x00 47 0x69 3 0x5C 18 0xEC 33 0x00 48 0x0E 4 0x00 19 0x2A 34 0x00 49 0xD4 5 0x0E 20 0x01 35 0x0E 50 0x00 6 0x17 21 0x71 36 0xBB 51 0x00 7 0xD2 22 0x0A 37 0x88 8 0x0A 23 0x1C 38 0x4D 9 0x20 24 0x01 39 0x85 10 0x24 25 0x5D 40 0xFD 11 0x87 26 0x40 41 0x5C 12 0xFF 27 0x02 42 0xFB 13 0x87 28 0x38 43 0xA4 14 0xB1 29 0xF2 44 0x72 If you feed that to the CRC32 you should get 0x9625B75A in a CPU register u32. cpu_to_be32() will put it in the right order for on the wire. Since rxe doesn't have any cpu_to_be32() on this path, I'm guessing the Linux CRC32 implementations gives a u32 with the value = 0x5AB72596 ie swapped. Probably the issue here is that the Linux CRC32 and the IBTA CRC32 are using a different mapping of LFSR bits. I love CRCs. So many different ways to implement the same thing. Thus, I guess, the code should really read: linux_crc32 = swab32(be32_to_cpu(val)); Which is a NOP on x86 and requires a swap on BE. Zhu, can you check it for Eric? (this is page 229 in the spec). I assume the Linux CRC32 always gives the same CPU value regardless of LE or BE? Jason
Hi Jason, On Wed, Jan 29, 2025 at 02:30:09PM -0400, Jason Gunthorpe wrote: > On Wed, Jan 29, 2025 at 10:44:39AM +0100, Zhu Yanjun wrote: > > 在 2025/1/27 23:38, Eric Biggers 写道: > > > From: Eric Biggers <ebiggers@google.com> > > > > > > The usual big endian convention of InfiniBand does not apply to the > > > ICRC field, whose transmission is specified in terms of the CRC32 > > > polynomial coefficients. > > This patch is on to something but this is not a good explanation. > > The CRC32 in IB is stored as big endian and computed in big endian, > the spec says so explicitly: > > 2) The CRC calculation is done in big endian byte order with the least > significant bit of the most significant byte being the first > bits in the CRC calculation. (2) just specifies the order in which the bits are passed to the CRC. It says nothing about how the CRC value is stored; that's in (4) instead. The mention of "big endian" seems to refer to the bytes being passed from first to last, which is the nearly universal convention. (I would not have used the term "big endian" here, as it's confusing.) The rest clarifies that it's a least-significant-bit (LSB)-first CRC, i.e. "little endian" or "le" in Linux terminology. (The Linux terminology here is a mistake too -- it would be clearer to call it lsb rather than le, given that it's about bits.) > In this context saying it is not "big endian" doesn't seem to be quite > right.. > > The spec gives a sample data packet (in offset/value pairs): > > 0 0xF0 15 0xB3 30 0x7A 45 0x8B > 1 0x12 16 0x00 31 0x05 46 0xC0 > 2 0x37 17 0x0D 32 0x00 47 0x69 > 3 0x5C 18 0xEC 33 0x00 48 0x0E > 4 0x00 19 0x2A 34 0x00 49 0xD4 > 5 0x0E 20 0x01 35 0x0E 50 0x00 > 6 0x17 21 0x71 36 0xBB 51 0x00 > 7 0xD2 22 0x0A 37 0x88 > 8 0x0A 23 0x1C 38 0x4D > 9 0x20 24 0x01 39 0x85 > 10 0x24 25 0x5D 40 0xFD > 11 0x87 26 0x40 41 0x5C > 12 0xFF 27 0x02 42 0xFB > 13 0x87 28 0x38 43 0xA4 > 14 0xB1 29 0xF2 44 0x72 > > If you feed that to the CRC32 you should get 0x9625B75A in a CPU > register u32. > > cpu_to_be32() will put it in the right order for on the wire. I think cpu_to_be32() would put it in the wrong order. Refer to the following: 4. The resulting bits are sent in order from the bit representing the coefficient of the highest term of the remainder polynomial. The least significant bit, most significant byte first ordering of the packet does not apply to the ICRC field. As per (2) it's a least-significant-bit first CRC, i.e. bit 0 represents the coefficient of x^31 and bit 31 represents the coefficient of x^0. So the least significant byte of the u32 has the highest 8 polynomial terms (the coefficients of x^24 through x^31) and must be sent first. That's an __le32. Yes, the fact that the mapping between bits and polynomial terms is backwards makes it confusing, but that's how LSB-first CRCs work. > I assume the Linux CRC32 always gives the same CPU value regardless of > LE or BE? Yes, they return a u32. (Unless you use crypto_shash_final or crypto_shash_digest in which case you get a u8[4] a.k.a. __le32.) - Eric
On Wed, Jan 29, 2025 at 07:27:20PM +0100, Zhu Yanjun wrote: > 在 2025/1/27 23:38, Eric Biggers 写道: > > From: Eric Biggers <ebiggers@google.com> > > > > The usual big endian convention of InfiniBand does not apply to the > > ICRC field, whose transmission is specified in terms of the CRC32 > > polynomial coefficients. The coefficients are transmitted in > > descending order from the x^31 coefficient to the x^0 one. When the > > result is interpreted as a 32-bit integer using the required reverse > > mapping between bits and polynomial coefficients, it's a __le32. > In InfiniBand Architecture Specification, the following > " > The resulting bits are sent in order from the bit representing the > coefficient of the highest term of the remainder polynomial. The least > significant bit, most significant byte first ordering of the packet does not > apply to the ICRC field. > " > and > " > The 32 Flip-Flops are initialized to 1 before every ICRC generation. The > packet is fed to the reference design of Figure 57 one bit at a time in the > order specified in Section 7.8.1 on page 222. The Remainder is the bitwise > NOT of the value stored at the 32 Flip-Flops after the last bit of the > packet was clocked into the ICRC Generator. ICRC Field is obtained from the > Remainder as shown in Figure 57. ICRC Field is transmitted using Big Endian > byte ordering like every field of an InfiniBand packet. > " > > It seems that big endian byte ordering is needed in ICRC field. I am not > sure if MLX NIC can connect to RXE after this patchset is applied. As I mentioned in my response to Jason, it's a least-significant-bit first CRC, so the mapping between bits and polynomial coefficients is reversed from the natural order. So that needs to be kept in mind. In "Figure 56 ICRC Generator" (looking at https://www.afs.enea.it/asantoro/V1r1_2_1.Release_12062007.pdf) that shows the sample hardware, it looks like what's going on is that the bytes are already reversed in the hardware, i.e. what it calls bits 0 through 7 are the coefficients of x^0 through x^7, which in a software implementation are actually in bits 31 through 24. Thus in software it would be a __le32. - Eric
On Wed, Jan 29, 2025 at 06:51:15PM +0000, Eric Biggers wrote: > Hi Jason, > > On Wed, Jan 29, 2025 at 02:30:09PM -0400, Jason Gunthorpe wrote: > > On Wed, Jan 29, 2025 at 10:44:39AM +0100, Zhu Yanjun wrote: > > > 在 2025/1/27 23:38, Eric Biggers 写道: > > > > From: Eric Biggers <ebiggers@google.com> > > > > > > > > The usual big endian convention of InfiniBand does not apply to the > > > > ICRC field, whose transmission is specified in terms of the CRC32 > > > > polynomial coefficients. > > > > This patch is on to something but this is not a good explanation. > > > > The CRC32 in IB is stored as big endian and computed in big endian, > > the spec says so explicitly: > > > > 2) The CRC calculation is done in big endian byte order with the least > > significant bit of the most significant byte being the first > > bits in the CRC calculation. > > (2) just specifies the order in which the bits are passed to the CRC. It says > nothing about how the CRC value is stored; that's in (4) instead. It could be. The other parts of the spec are more definitive. > The mention of "big endian" seems to refer to the bytes being passed > from first to last, which is the nearly universal convention. The LFSR Figure 57 shows how the numerical representation the spec uses (ie the 0x9625B75A) maps to the LFSR, and it could be called big endian. Regardless, table 29 makes this crystal clear, it shows how the numerical representation of 0x9625B75A is placed on the wire, it is big endian as all IBTA values are - byte 52 is 0x96, byte 55 is 0x5A. > (I would not have used the term "big endian" here, as it's > confusing.) It could be read as going byte-by-byte, or it could be referring to the layout of the numerical representation.. > > If you feed that to the CRC32 you should get 0x9625B75A in a CPU > > register u32. > > > > cpu_to_be32() will put it in the right order for on the wire. > > I think cpu_to_be32() would put it in the wrong order. Refer to the following: It is fully explicit in table 29, 0x9625B75A is stored in big endian format starting at byte 52. It is easy to answer without guessing, Zhu should just run the sample packet through the new crc library code and determine what the u32 value is. It is either 0x9625B75A or swab(0x9625B75A) and that will tell what the code should be. Jason
On Wed, Jan 29, 2025 at 03:43:44PM -0400, Jason Gunthorpe wrote: > On Wed, Jan 29, 2025 at 06:51:15PM +0000, Eric Biggers wrote: > > Hi Jason, > > > > On Wed, Jan 29, 2025 at 02:30:09PM -0400, Jason Gunthorpe wrote: > > > On Wed, Jan 29, 2025 at 10:44:39AM +0100, Zhu Yanjun wrote: > > > > 在 2025/1/27 23:38, Eric Biggers 写道: > > > > > From: Eric Biggers <ebiggers@google.com> > > > > > > > > > > The usual big endian convention of InfiniBand does not apply to the > > > > > ICRC field, whose transmission is specified in terms of the CRC32 > > > > > polynomial coefficients. > > > > > > This patch is on to something but this is not a good explanation. > > > > > > The CRC32 in IB is stored as big endian and computed in big endian, > > > the spec says so explicitly: > > > > > > 2) The CRC calculation is done in big endian byte order with the least > > > significant bit of the most significant byte being the first > > > bits in the CRC calculation. > > > > (2) just specifies the order in which the bits are passed to the CRC. It says > > nothing about how the CRC value is stored; that's in (4) instead. > > It could be. The other parts of the spec are more definitive. > > > The mention of "big endian" seems to refer to the bytes being passed > > from first to last, which is the nearly universal convention. > > The LFSR Figure 57 shows how the numerical representation the spec > uses (ie the 0x9625B75A) maps to the LFSR, and it could be called big > endian. > > Regardless, table 29 makes this crystal clear, it shows how the > numerical representation of 0x9625B75A is placed on the wire, it is > big endian as all IBTA values are - byte 52 is 0x96, byte 55 is 0x5A. > > > (I would not have used the term "big endian" here, as it's > > confusing.) > > It could be read as going byte-by-byte, or it could be referring to > the layout of the numerical representation.. > > > > If you feed that to the CRC32 you should get 0x9625B75A in a CPU > > > register u32. > > > > > > cpu_to_be32() will put it in the right order for on the wire. > > > > I think cpu_to_be32() would put it in the wrong order. Refer to the following: > > It is fully explicit in table 29, 0x9625B75A is stored in big > endian format starting at byte 52. > > It is easy to answer without guessing, Zhu should just run the sample > packet through the new crc library code and determine what the u32 > value is. It is either 0x9625B75A or swab(0x9625B75A) and that will > tell what the code should be. > > Jason static const u8 data[52] = { 0xf0, 0x12, 0x37, 0x5c, 0x00, 0x0e, 0x17, 0xd2, 0x0a, 0x20, 0x24, 0x87, 0xff, 0x87, 0xb1, 0xb3, 0x00, 0x0d, 0xec, 0x2a, 0x01, 0x71, 0x0a, 0x1c, 0x01, 0x5d, 0x40, 0x02, 0x38, 0xf2, 0x7a, 0x05, 0x00, 0x00, 0x00, 0x0e, 0xbb, 0x88, 0x4d, 0x85, 0xfd, 0x5c, 0xfb, 0xa4, 0x72, 0x8b, 0xc0, 0x69, 0x0e, 0xd4, 0x00, 0x00 }; pr_info("crcval=0x%x\n", ~crc32_le(~0,data,sizeof(data))); crcval=0x5ab72596 So yes, the InfiniBand spec gives swab(0x5ab72596) = 0x9625B75A. It's a least-significant-bit first CRC32, so bits 0 through 31 of 0x5ab72596 represent the coefficients of x^31 through x^0 in that order. The byte swap to 0x9625B75A reorders the coefficients to x^7 through x^0, x^15 through x^8, x^23 through x^16, x^31 through x^24. (This is no longer a sequential order, so this order is not usually used.) The spec then stores the swapped value in big endian order to cancel out the extra swap, resulting in the polynomial coefficients being in a sequential order again. IMO, it's easier to think about this as storing the least-significant-bit first CRC32 value using little endian byte order. - Eric
On Wed, Jan 29, 2025 at 12:25:37PM -0800, Eric Biggers wrote: > static const u8 data[52] = { > 0xf0, 0x12, 0x37, 0x5c, 0x00, 0x0e, 0x17, 0xd2, 0x0a, 0x20, 0x24, 0x87, 0xff, 0x87, 0xb1, > 0xb3, 0x00, 0x0d, 0xec, 0x2a, 0x01, 0x71, 0x0a, 0x1c, 0x01, 0x5d, 0x40, 0x02, 0x38, 0xf2, > 0x7a, 0x05, 0x00, 0x00, 0x00, 0x0e, 0xbb, 0x88, 0x4d, 0x85, 0xfd, 0x5c, 0xfb, 0xa4, 0x72, > 0x8b, 0xc0, 0x69, 0x0e, 0xd4, 0x00, 0x00 > }; > pr_info("crcval=0x%x\n", ~crc32_le(~0,data,sizeof(data))); > > crcval=0x5ab72596 > > So yes, the InfiniBand spec gives swab(0x5ab72596) = 0x9625B75A. > > It's a least-significant-bit first CRC32, so bits 0 through 31 of 0x5ab72596 > represent the coefficients of x^31 through x^0 in that order. The byte swap to > 0x9625B75A reorders the coefficients to x^7 through x^0, x^15 through x^8, x^23 > through x^16, x^31 through x^24. (This is no longer a sequential order, so this > order is not usually used.) The spec then stores the swapped value in big > endian order to cancel out the extra swap, resulting in the polynomial > coefficients being in a sequential order again. > IMO, it's easier to think about this as storing the least-significant-bit first > CRC32 value using little endian byte order. To be most clear this should be written as: u32 ibta_crc = swab32(~crc32_le(..)) // Gives you the IBTA defined value *packet = cpu_to_be32(ibta_crc); // Puts it in the packet It follows the spec clearly and exactly. Yes, you can get the same net effect using le: u32 not_ibta_crc = ~crc32_le(..) *packet = cpu_to_le32(not_ibta_crc) It does work, but it is very hard to follow how that relates to the specification when the u32 is not in the spec's format anymore. What matters here, in rxe, is how to use the Linux crc32 library to get exactly the value written down in the spec. IMHO the le approach is an optimization to avoid the dobule swap, and it should simply be described as such in a comment: The crc32 library gives a byte swapped result compared to the IBTA specification. swab32(~crc32_le(..)) will give values that match IBTA. To avoid double swapping we can instead write: *icrc = cpu_to_le32(~crc32_le(..)) The value will still be big endian on the network. No need to talk about coefficients. Jason
On Wed, Jan 29, 2025 at 05:16:51PM -0400, Jason Gunthorpe wrote: > On Wed, Jan 29, 2025 at 12:25:37PM -0800, Eric Biggers wrote: > > static const u8 data[52] = { > > 0xf0, 0x12, 0x37, 0x5c, 0x00, 0x0e, 0x17, 0xd2, 0x0a, 0x20, 0x24, 0x87, 0xff, 0x87, 0xb1, > > 0xb3, 0x00, 0x0d, 0xec, 0x2a, 0x01, 0x71, 0x0a, 0x1c, 0x01, 0x5d, 0x40, 0x02, 0x38, 0xf2, > > 0x7a, 0x05, 0x00, 0x00, 0x00, 0x0e, 0xbb, 0x88, 0x4d, 0x85, 0xfd, 0x5c, 0xfb, 0xa4, 0x72, > > 0x8b, 0xc0, 0x69, 0x0e, 0xd4, 0x00, 0x00 > > }; > > pr_info("crcval=0x%x\n", ~crc32_le(~0,data,sizeof(data))); > > > > crcval=0x5ab72596 > > > > So yes, the InfiniBand spec gives swab(0x5ab72596) = 0x9625B75A. > > > > It's a least-significant-bit first CRC32, so bits 0 through 31 of 0x5ab72596 > > represent the coefficients of x^31 through x^0 in that order. The byte swap to > > 0x9625B75A reorders the coefficients to x^7 through x^0, x^15 through x^8, x^23 > > through x^16, x^31 through x^24. (This is no longer a sequential order, so this > > order is not usually used.) The spec then stores the swapped value in big > > endian order to cancel out the extra swap, resulting in the polynomial > > coefficients being in a sequential order again. > > > IMO, it's easier to think about this as storing the least-significant-bit first > > CRC32 value using little endian byte order. > > To be most clear this should be written as: > > u32 ibta_crc = swab32(~crc32_le(..)) // Gives you the IBTA defined value > *packet = cpu_to_be32(ibta_crc); // Puts it in the packet > > It follows the spec clearly and exactly. > > Yes, you can get the same net effect using le: > > u32 not_ibta_crc = ~crc32_le(..) > *packet = cpu_to_le32(not_ibta_crc) > > It does work, but it is very hard to follow how that relates to the > specification when the u32 is not in the spec's format anymore. > > What matters here, in rxe, is how to use the Linux crc32 library to > get exactly the value written down in the spec. > > IMHO the le approach is an optimization to avoid the dobule swap, and > it should simply be described as such in a comment: > > The crc32 library gives a byte swapped result compared to the IBTA > specification. swab32(~crc32_le(..)) will give values that match > IBTA. > > To avoid double swapping we can instead write: > *icrc = cpu_to_le32(~crc32_le(..)) > The value will still be big endian on the network. > > No need to talk about coefficients. We are looking at the same spec, right? The following is the specification for the ICRC field: The resulting bits are sent in order from the bit representing the coefficient of the highest term of the remainder polynomial. The least significant bit, most significant byte first ordering of the packet does not apply to the ICRC field. So it does talk about the polynomial coefficients. It sounds like you want to add two unnecessary byteswaps to match the example in Table 25, which misleadingly shows a byte-swapped ICRC value as a u32 without mentioning it is byte-swapped. I don't agree that would be clearer, but we can do it if you prefer. - Eric
On Wed, Jan 29, 2025 at 10:21:47PM +0000, Eric Biggers wrote: > > To be most clear this should be written as: > > > > u32 ibta_crc = swab32(~crc32_le(..)) // Gives you the IBTA defined value > > *packet = cpu_to_be32(ibta_crc); // Puts it in the packet > > > > It follows the spec clearly and exactly. > > > > Yes, you can get the same net effect using le: > > > > u32 not_ibta_crc = ~crc32_le(..) > > *packet = cpu_to_le32(not_ibta_crc) > > > > It does work, but it is very hard to follow how that relates to the > > specification when the u32 is not in the spec's format anymore. > > > > What matters here, in rxe, is how to use the Linux crc32 library to > > get exactly the value written down in the spec. > > > > IMHO the le approach is an optimization to avoid the dobule swap, and > > it should simply be described as such in a comment: > > > > The crc32 library gives a byte swapped result compared to the IBTA > > specification. swab32(~crc32_le(..)) will give values that match > > IBTA. > > > > To avoid double swapping we can instead write: > > *icrc = cpu_to_le32(~crc32_le(..)) > > The value will still be big endian on the network. > > > > No need to talk about coefficients. > > We are looking at the same spec, right? The following is the specification for > the ICRC field: > > The resulting bits are sent in order from the bit representing the > coefficient of the highest term of the remainder polynomial. The least > significant bit, most significant byte first ordering of the packet does not > apply to the ICRC field. > > So it does talk about the polynomial coefficients. Of course it does, it is defining a CRC. The above text is reflected in Figure 57 which shows the Remainder being swapped all around to produce the ICRC. The spec goes on to say: CRC Field is obtained from the Remainder as shown in Figure 57. ICRC Field is transmitted using Big Endian byte ordering like every field of an InfiniBand packet. From a spec perspective is *total nonsense* to describe something the spec explicitly says is big endian as little endian. Yes from a maths perspective coefficients are reversed and whatever, but that doesn't matter to someone reading the code. Clearly state how to calculate the u32 "ICRC Field" as called out in the spec using Linux. That is swab32(~crc32_le(..)) - that detail clarifies everything. > It sounds like you want to add two unnecessary byteswaps to match > the example in Table 25, which misleadingly shows a byte-swapped > ICRC value as a u32 without mentioning it is byte-swapped. There are an obnoxious number of ways to make, label and describe these LFSRs. IBTA choose their representation, Linux choose a different one. It isn't misleadingly byte-swapped, it is self consistent with the rest of the spec, and different from Linux. > I don't agree that would be clearer, but we can do it if you prefer. If you want to keep the le32 optimization, then keep it, but have a comment to explain that it is an optimization based on the logical be32 double swap that matches the plain text of the spec. I gave an example comment above. Jason
On Wed, Jan 29, 2025 at 09:29:51PM -0400, Jason Gunthorpe wrote: > On Wed, Jan 29, 2025 at 10:21:47PM +0000, Eric Biggers wrote: > > > > To be most clear this should be written as: > > > > > > u32 ibta_crc = swab32(~crc32_le(..)) // Gives you the IBTA defined value > > > *packet = cpu_to_be32(ibta_crc); // Puts it in the packet > > > > > > It follows the spec clearly and exactly. > > > > > > Yes, you can get the same net effect using le: > > > > > > u32 not_ibta_crc = ~crc32_le(..) > > > *packet = cpu_to_le32(not_ibta_crc) > > > > > > It does work, but it is very hard to follow how that relates to the > > > specification when the u32 is not in the spec's format anymore. > > > > > > What matters here, in rxe, is how to use the Linux crc32 library to > > > get exactly the value written down in the spec. > > > > > > IMHO the le approach is an optimization to avoid the dobule swap, and > > > it should simply be described as such in a comment: > > > > > > The crc32 library gives a byte swapped result compared to the IBTA > > > specification. swab32(~crc32_le(..)) will give values that match > > > IBTA. > > > > > > To avoid double swapping we can instead write: > > > *icrc = cpu_to_le32(~crc32_le(..)) > > > The value will still be big endian on the network. > > > > > > No need to talk about coefficients. > > > > We are looking at the same spec, right? The following is the specification for > > the ICRC field: > > > > The resulting bits are sent in order from the bit representing the > > coefficient of the highest term of the remainder polynomial. The least > > significant bit, most significant byte first ordering of the packet does not > > apply to the ICRC field. > > > > So it does talk about the polynomial coefficients. > > Of course it does, it is defining a CRC. Yes. > From a spec perspective is *total nonsense* to describe something the > spec explicitly says is big endian as little endian. The spec also says "The least significant bit, most significant byte first ordering of the packet does not apply to the ICRC field." I.e. it is not big endian. The full explanation would need to point out the two seemingly conflicting parts of the spec and how they are resolved. > Yes from a maths perspective coefficients are reversed and whatever, > but that doesn't matter to someone reading the code. It does matter. Besides the above, it's also the only way to know whether the least-significant-bit-first CRC32 (crc32_le()) or most-significant-bit-first CRC32 (crc32_be()) is the correct function to call. > There are an obnoxious number of ways to make, label and describe these LFSRs. > IBTA choose their representation, Linux choose a different one. It's a CRC, not an LFSR. LFSR is an implementation choice. > If you want to keep the le32 optimization, then keep it, but have a > comment to explain that it is an optimization based on the logical > be32 double swap that matches the plain text of the spec. I will plan to clarify the comment to point out the two seemingly conflicting parts of the spec and how they apply. But feel free to send your own patches that you would approve of though, since you seem very opinionated about this. - Eric
在 2025/1/29 19:30, Jason Gunthorpe 写道: > On Wed, Jan 29, 2025 at 10:44:39AM +0100, Zhu Yanjun wrote: >> 在 2025/1/27 23:38, Eric Biggers 写道: >>> From: Eric Biggers <ebiggers@google.com> >>> >>> The usual big endian convention of InfiniBand does not apply to the >>> ICRC field, whose transmission is specified in terms of the CRC32 >>> polynomial coefficients. > > This patch is on to something but this is not a good explanation. > > The CRC32 in IB is stored as big endian and computed in big endian, > the spec says so explicitly: > > 2) The CRC calculation is done in big endian byte order with the least > 31 significant bit of the most significant byte being the first > bits in the 32 CRC calculation. > > In this context saying it is not "big endian" doesn't seem to be quite > right.. > > The spec gives a sample data packet (in offset/value pairs): > > 0 0xF0 15 0xB3 30 0x7A 45 0x8B > 1 0x12 16 0x00 31 0x05 46 0xC0 > 2 0x37 17 0x0D 32 0x00 47 0x69 > 3 0x5C 18 0xEC 33 0x00 48 0x0E > 4 0x00 19 0x2A 34 0x00 49 0xD4 > 5 0x0E 20 0x01 35 0x0E 50 0x00 > 6 0x17 21 0x71 36 0xBB 51 0x00 > 7 0xD2 22 0x0A 37 0x88 > 8 0x0A 23 0x1C 38 0x4D > 9 0x20 24 0x01 39 0x85 > 10 0x24 25 0x5D 40 0xFD > 11 0x87 26 0x40 41 0x5C > 12 0xFF 27 0x02 42 0xFB > 13 0x87 28 0x38 43 0xA4 > 14 0xB1 29 0xF2 44 0x72 > > If you feed that to the CRC32 you should get 0x9625B75A in a CPU > register u32. > > cpu_to_be32() will put it in the right order for on the wire. > > Since rxe doesn't have any cpu_to_be32() on this path, I'm guessing > the Linux CRC32 implementations gives a u32 with the > value = 0x5AB72596 ie swapped. > > Probably the issue here is that the Linux CRC32 and the IBTA CRC32 are > using a different mapping of LFSR bits. I love CRCs. So many different > ways to implement the same thing. > > Thus, I guess, the code should really read: > linux_crc32 = swab32(be32_to_cpu(val)); > > Which is a NOP on x86 and requires a swap on BE. > > Zhu, can you check it for Eric? (this is page 229 in the spec). Got it. In my IBTA spec, I can find what you mentioned in the IBTA spec. Your explanation is in details and very nice. Thanks a lot. Zhu Yanjun > > I assume the Linux CRC32 always gives the same CPU value regardless of > LE or BE? > > Jason
On 29.01.25 20:43, Jason Gunthorpe wrote: > On Wed, Jan 29, 2025 at 06:51:15PM +0000, Eric Biggers wrote: >> Hi Jason, >> >> On Wed, Jan 29, 2025 at 02:30:09PM -0400, Jason Gunthorpe wrote: >>> On Wed, Jan 29, 2025 at 10:44:39AM +0100, Zhu Yanjun wrote: >>>> 在 2025/1/27 23:38, Eric Biggers 写道: >>>>> From: Eric Biggers <ebiggers@google.com> >>>>> >>>>> The usual big endian convention of InfiniBand does not apply to the >>>>> ICRC field, whose transmission is specified in terms of the CRC32 >>>>> polynomial coefficients. >>> >>> This patch is on to something but this is not a good explanation. >>> >>> The CRC32 in IB is stored as big endian and computed in big endian, >>> the spec says so explicitly: >>> >>> 2) The CRC calculation is done in big endian byte order with the least >>> significant bit of the most significant byte being the first >>> bits in the CRC calculation. >> >> (2) just specifies the order in which the bits are passed to the CRC. It says >> nothing about how the CRC value is stored; that's in (4) instead. > > It could be. The other parts of the spec are more definitive. > >> The mention of "big endian" seems to refer to the bytes being passed >> from first to last, which is the nearly universal convention. > > The LFSR Figure 57 shows how the numerical representation the spec > uses (ie the 0x9625B75A) maps to the LFSR, and it could be called big > endian. > > Regardless, table 29 makes this crystal clear, it shows how the > numerical representation of 0x9625B75A is placed on the wire, it is > big endian as all IBTA values are - byte 52 is 0x96, byte 55 is 0x5A. > >> (I would not have used the term "big endian" here, as it's >> confusing.) > > It could be read as going byte-by-byte, or it could be referring to > the layout of the numerical representation.. > >>> If you feed that to the CRC32 you should get 0x9625B75A in a CPU >>> register u32. >>> >>> cpu_to_be32() will put it in the right order for on the wire. >> >> I think cpu_to_be32() would put it in the wrong order. Refer to the following: > > It is fully explicit in table 29, 0x9625B75A is stored in big > endian format starting at byte 52. > > It is easy to answer without guessing, Zhu should just run the sample > packet through the new crc library code and determine what the u32 > value is. It is either 0x9625B75A or swab(0x9625B75A) and that will > tell what the code should be. Got it. I will run the sample packet through the new crc library code and determine what the u32 value is. Zhu Yanjun > > Jason
On Wed, Jan 29, 2025 at 06:04:03PM -0800, Eric Biggers wrote: > > From a spec perspective is *total nonsense* to describe something the > > spec explicitly says is big endian as little endian. > > The spec also says "The least significant bit, most significant byte first > ordering of the packet does not apply to the ICRC field." I'm pretty sure this is text trying to explain the Remainder -> ICRC transformation in Figure 57. Jason
diff --git a/drivers/infiniband/sw/rxe/rxe_icrc.c b/drivers/infiniband/sw/rxe/rxe_icrc.c index fdf5f08cd8f17..ee417a0bbbdca 100644 --- a/drivers/infiniband/sw/rxe/rxe_icrc.c +++ b/drivers/infiniband/sw/rxe/rxe_icrc.c @@ -38,26 +38,26 @@ int rxe_icrc_init(struct rxe_dev *rxe) * @next: starting address of current segment * @len: length of current segment * * Return: the cumulative crc32 checksum */ -static __be32 rxe_crc32(struct rxe_dev *rxe, __be32 crc, void *next, size_t len) +static u32 rxe_crc32(struct rxe_dev *rxe, u32 crc, void *next, size_t len) { - __be32 icrc; + u32 icrc; int err; SHASH_DESC_ON_STACK(shash, rxe->tfm); shash->tfm = rxe->tfm; - *(__be32 *)shash_desc_ctx(shash) = crc; + *(u32 *)shash_desc_ctx(shash) = crc; err = crypto_shash_update(shash, next, len); if (unlikely(err)) { rxe_dbg_dev(rxe, "failed crc calculation, err: %d\n", err); - return (__force __be32)crc32_le((__force u32)crc, next, len); + return crc32_le(crc, next, len); } - icrc = *(__be32 *)shash_desc_ctx(shash); + icrc = *(u32 *)shash_desc_ctx(shash); barrier_data(shash_desc_ctx(shash)); return icrc; } @@ -67,18 +67,18 @@ static __be32 rxe_crc32(struct rxe_dev *rxe, __be32 crc, void *next, size_t len) * @skb: packet buffer * @pkt: packet information * * Return: the partial ICRC */ -static __be32 rxe_icrc_hdr(struct sk_buff *skb, struct rxe_pkt_info *pkt) +static u32 rxe_icrc_hdr(struct sk_buff *skb, struct rxe_pkt_info *pkt) { unsigned int bth_offset = 0; struct iphdr *ip4h = NULL; struct ipv6hdr *ip6h = NULL; struct udphdr *udph; struct rxe_bth *bth; - __be32 crc; + u32 crc; int length; int hdr_size = sizeof(struct udphdr) + (skb->protocol == htons(ETH_P_IP) ? sizeof(struct iphdr) : sizeof(struct ipv6hdr)); /* pseudo header buffer size is calculate using ipv6 header size since @@ -89,11 +89,11 @@ static __be32 rxe_icrc_hdr(struct sk_buff *skb, struct rxe_pkt_info *pkt) RXE_BTH_BYTES]; /* This seed is the result of computing a CRC with a seed of * 0xfffffff and 8 bytes of 0xff representing a masked LRH. */ - crc = (__force __be32)0xdebb20e3; + crc = 0xdebb20e3; if (skb->protocol == htons(ETH_P_IP)) { /* IPv4 */ memcpy(pshdr, ip_hdr(skb), hdr_size); ip4h = (struct iphdr *)pshdr; udph = (struct udphdr *)(ip4h + 1); @@ -137,23 +137,27 @@ static __be32 rxe_icrc_hdr(struct sk_buff *skb, struct rxe_pkt_info *pkt) * * Return: 0 if the values match else an error */ int rxe_icrc_check(struct sk_buff *skb, struct rxe_pkt_info *pkt) { - __be32 *icrcp; - __be32 pkt_icrc; - __be32 icrc; - - icrcp = (__be32 *)(pkt->hdr + pkt->paylen - RXE_ICRC_SIZE); - pkt_icrc = *icrcp; + /* + * The usual big endian convention of InfiniBand does not apply to the + * ICRC field, whose transmission is specified in terms of the CRC32 + * polynomial coefficients. The coefficients are transmitted in + * descending order from the x^31 coefficient to the x^0 one. When the + * result is interpreted as a 32-bit integer using the required reverse + * mapping between bits and polynomial coefficients, it's a __le32. + */ + __le32 *icrcp = (__le32 *)(pkt->hdr + pkt->paylen - RXE_ICRC_SIZE); + u32 icrc; icrc = rxe_icrc_hdr(skb, pkt); icrc = rxe_crc32(pkt->rxe, icrc, (u8 *)payload_addr(pkt), payload_size(pkt) + bth_pad(pkt)); icrc = ~icrc; - if (unlikely(icrc != pkt_icrc)) + if (unlikely(icrc != le32_to_cpu(*icrcp))) return -EINVAL; return 0; } @@ -162,14 +166,13 @@ int rxe_icrc_check(struct sk_buff *skb, struct rxe_pkt_info *pkt) * @skb: packet buffer * @pkt: packet information */ void rxe_icrc_generate(struct sk_buff *skb, struct rxe_pkt_info *pkt) { - __be32 *icrcp; - __be32 icrc; + __le32 *icrcp = (__le32 *)(pkt->hdr + pkt->paylen - RXE_ICRC_SIZE); + u32 icrc; - icrcp = (__be32 *)(pkt->hdr + pkt->paylen - RXE_ICRC_SIZE); icrc = rxe_icrc_hdr(skb, pkt); icrc = rxe_crc32(pkt->rxe, icrc, (u8 *)payload_addr(pkt), payload_size(pkt) + bth_pad(pkt)); - *icrcp = ~icrc; + *icrcp = cpu_to_le32(~icrc); }