diff mbox series

[net-next,v3,2/3] net: qualcomm: rmnet: add tx packets aggregation

Message ID 20221205093359.49350-3-dnlplm@gmail.com (mailing list archive)
State Changes Requested
Delegated to: Netdev Maintainers
Headers show
Series add tx packets aggregation to ethtool and rmnet | expand

Checks

Context Check Description
netdev/tree_selection success Clearly marked for net-next
netdev/fixes_present success Fixes tag not required for -next series
netdev/subject_prefix success Link
netdev/cover_letter success Series has a cover letter
netdev/patch_count success Link
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 0 this patch: 0
netdev/cc_maintainers success CCed 7 of 7 maintainers
netdev/build_clang success Errors and warnings before: 0 this patch: 0
netdev/module_param success Was 0 now: 0
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn success Errors and warnings before: 0 this patch: 0
netdev/checkpatch warning WARNING: line length of 81 exceeds 80 columns WARNING: line length of 82 exceeds 80 columns WARNING: line length of 83 exceeds 80 columns
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/source_inline success Was 0 now: 0

Commit Message

Daniele Palmas Dec. 5, 2022, 9:33 a.m. UTC
Add tx packets aggregation.

Bidirectional TCP throughput tests through iperf with low-cat
Thread-x based modems revelead performance issues both in tx
and rx.

The Windows driver does not show this issue: inspecting USB
packets revealed that the only notable change is the driver
enabling tx packets aggregation.

Tx packets aggregation is by default disabled and can be enabled
by increasing the value of ETHTOOL_A_COALESCE_TX_MAX_AGGR_FRAMES.

The maximum aggregated size is by default set to a reasonably low
value in order to support the majority of modems.

This implementation is based on patches available in Code Aurora
repositories (msm kernel) whose main authors are

Subash Abhinov Kasiviswanathan <subashab@codeaurora.org>
Sean Tranchetti <stranche@codeaurora.org>

Signed-off-by: Daniele Palmas <dnlplm@gmail.com>
---
v3
- Fixed no previous prototype for rmnet_map_flush_tx_packet_queue
  warning reported by kernel test robot
v2
- Removed icmp packets direct sending
- Changed spin_lock_irqsave to spin_lock_bh
- Increased the possible maximum size of an aggregated block
- Aligned rmnet_egress_agg_params and types to ethtool ones
- Changed bypass time from variable to define
- Fixed RCT style in rmnet_map_tx_aggregate
- Fixed order of skb freeing in rmnet_map_tx_aggregate
- rmnet_map_tx_aggregate refactoring
- Change aggregation function to use frag_list
- Removed RMNET_FLAGS_EGRESS_AGGREGATION
---
 .../ethernet/qualcomm/rmnet/rmnet_config.c    |   5 +
 .../ethernet/qualcomm/rmnet/rmnet_config.h    |  20 ++
 .../ethernet/qualcomm/rmnet/rmnet_handlers.c  |  18 +-
 .../net/ethernet/qualcomm/rmnet/rmnet_map.h   |   6 +
 .../ethernet/qualcomm/rmnet/rmnet_map_data.c  | 191 ++++++++++++++++++
 .../net/ethernet/qualcomm/rmnet/rmnet_vnd.c   |   9 +-
 .../net/ethernet/qualcomm/rmnet/rmnet_vnd.h   |   1 +
 7 files changed, 246 insertions(+), 4 deletions(-)

Comments

Paolo Abeni Dec. 7, 2022, 12:45 p.m. UTC | #1
On Mon, 2022-12-05 at 10:33 +0100, Daniele Palmas wrote:
> diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_handlers.c b/drivers/net/ethernet/qualcomm/rmnet/rmnet_handlers.c
> index a313242a762e..914ef03b5438 100644
> --- a/drivers/net/ethernet/qualcomm/rmnet/rmnet_handlers.c
> +++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_handlers.c
> @@ -164,8 +164,18 @@ static int rmnet_map_egress_handler(struct sk_buff *skb,
>  
>  	map_header->mux_id = mux_id;
>  
> -	skb->protocol = htons(ETH_P_MAP);
> +	if (port->egress_agg_params.count > 1) {

This is racy. Here you read 'count' outside the 'agg_lock' lock and
later, in rmnet_map_tx_aggregate() the code assumes the above condition
helds, but ethtool could have changed the value in the meantime.

You need a READ_ONCE() above, a WRITE_ONCE() on update and cope with 0
value in rmnet_map_tx_aggregate().

[...]

> +static void rmnet_map_flush_tx_packet_work(struct work_struct *work)
> +{
> +	struct sk_buff *skb = NULL;
> +	struct rmnet_port *port;
> +
> +	port = container_of(work, struct rmnet_port, agg_wq);
> +
> +	spin_lock_bh(&port->agg_lock);
> +	if (likely(port->agg_state == -EINPROGRESS)) {
> +		/* Buffer may have already been shipped out */
> +		if (likely(port->skbagg_head)) {
> +			skb = port->skbagg_head;
> +			reset_aggr_params(port);
> +		}
> +		port->agg_state = 0;
> +	}
> +
> +	spin_unlock_bh(&port->agg_lock);
> +	if (skb)
> +		rmnet_send_skb(port, skb);
> +}
> +
> +static enum hrtimer_restart rmnet_map_flush_tx_packet_queue(struct hrtimer *t)
> +{
> +	struct rmnet_port *port;
> +
> +	port = container_of(t, struct rmnet_port, hrtimer);
> +
> +	schedule_work(&port->agg_wq);

Why you need to schedule a work and you can't instead call the core of
rmnet_map_flush_tx_packet_work() here? it looks like the latter does
not need process context...

> +
> +	return HRTIMER_NORESTART;
> +}
> +
> +unsigned int rmnet_map_tx_aggregate(struct sk_buff *skb, struct rmnet_port *port,
> +				    struct net_device *orig_dev)
> +{
> +	struct timespec64 diff, last;
> +	unsigned int len = skb->len;
> +	struct sk_buff *agg_skb;
> +	int size;
> +
> +	spin_lock_bh(&port->agg_lock);
> +	memcpy(&last, &port->agg_last, sizeof(struct timespec64));
> +	ktime_get_real_ts64(&port->agg_last);
> +
> +	if (!port->skbagg_head) {
> +		/* Check to see if we should agg first. If the traffic is very
> +		 * sparse, don't aggregate.
> +		 */
> +new_packet:
> +		diff = timespec64_sub(port->agg_last, last);
> +		size = port->egress_agg_params.bytes - skb->len;
> +
> +		if (size < 0) {
> +			/* dropped */
> +			spin_unlock_bh(&port->agg_lock);
> +			return 0;
> +		}
> +
> +		if (diff.tv_sec > 0 || diff.tv_nsec > RMNET_AGG_BYPASS_TIME_NSEC ||
> +		    size == 0) {

You can avoid some code duplication moving the following lines under an
'error' label and jumping to it here and in the next error case.

> +			spin_unlock_bh(&port->agg_lock);
> +			skb->protocol = htons(ETH_P_MAP);
> +			dev_queue_xmit(skb);
> +			return len;
> +		}
> +
> +		port->skbagg_head = skb_copy_expand(skb, 0, size, GFP_ATOMIC);
> +		if (!port->skbagg_head) {
> +			spin_unlock_bh(&port->agg_lock);
> +			skb->protocol = htons(ETH_P_MAP);
> +			dev_queue_xmit(skb);
> +			return len;
> +		}
> +		dev_kfree_skb_any(skb);
> +		port->skbagg_head->protocol = htons(ETH_P_MAP);
> +		port->agg_count = 1;
> +		ktime_get_real_ts64(&port->agg_time);
> +		skb_frag_list_init(port->skbagg_head);
> +		goto schedule;
> +	}
> +	diff = timespec64_sub(port->agg_last, port->agg_time);
> +	size = port->egress_agg_params.bytes - port->skbagg_head->len;
> +
> +	if (skb->len > size) {
> +		agg_skb = port->skbagg_head;
> +		reset_aggr_params(port);
> +		spin_unlock_bh(&port->agg_lock);
> +		hrtimer_cancel(&port->hrtimer);
> +		rmnet_send_skb(port, agg_skb);
> +		spin_lock_bh(&port->agg_lock);
> +		goto new_packet;
> +	}
> +
> +	if (skb_has_frag_list(port->skbagg_head))
> +		port->skbagg_tail->next = skb;
> +	else
> +		skb_shinfo(port->skbagg_head)->frag_list = skb;
> +
> +	port->skbagg_head->len += skb->len;
> +	port->skbagg_head->data_len += skb->len;
> +	port->skbagg_head->truesize += skb->truesize;
> +	port->skbagg_tail = skb;
> +	port->agg_count++;
> +
> +	if (diff.tv_sec > 0 || diff.tv_nsec > port->egress_agg_params.time_nsec ||
> +	    port->agg_count == port->egress_agg_params.count ||

At this point port->egress_agg_params.count can be 0, you need to check
for:
	port->agg_count >= port->egress_agg_params.count	


Cheers,

Paolo
Daniele Palmas Dec. 9, 2022, 7:38 a.m. UTC | #2
Il giorno mer 7 dic 2022 alle ore 13:46 Paolo Abeni
<pabeni@redhat.com> ha scritto:
>
> On Mon, 2022-12-05 at 10:33 +0100, Daniele Palmas wrote:
> > diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_handlers.c b/drivers/net/ethernet/qualcomm/rmnet/rmnet_handlers.c
> > index a313242a762e..914ef03b5438 100644
> > --- a/drivers/net/ethernet/qualcomm/rmnet/rmnet_handlers.c
> > +++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_handlers.c
> > @@ -164,8 +164,18 @@ static int rmnet_map_egress_handler(struct sk_buff *skb,
> >
> >       map_header->mux_id = mux_id;
> >
> > -     skb->protocol = htons(ETH_P_MAP);
> > +     if (port->egress_agg_params.count > 1) {
>
> This is racy. Here you read 'count' outside the 'agg_lock' lock and
> later, in rmnet_map_tx_aggregate() the code assumes the above condition
> helds, but ethtool could have changed the value in the meantime.
>
> You need a READ_ONCE() above, a WRITE_ONCE() on update and cope with 0
> value in rmnet_map_tx_aggregate().
>

Ack.

> [...]
>
> > +static void rmnet_map_flush_tx_packet_work(struct work_struct *work)
> > +{
> > +     struct sk_buff *skb = NULL;
> > +     struct rmnet_port *port;
> > +
> > +     port = container_of(work, struct rmnet_port, agg_wq);
> > +
> > +     spin_lock_bh(&port->agg_lock);
> > +     if (likely(port->agg_state == -EINPROGRESS)) {
> > +             /* Buffer may have already been shipped out */
> > +             if (likely(port->skbagg_head)) {
> > +                     skb = port->skbagg_head;
> > +                     reset_aggr_params(port);
> > +             }
> > +             port->agg_state = 0;
> > +     }
> > +
> > +     spin_unlock_bh(&port->agg_lock);
> > +     if (skb)
> > +             rmnet_send_skb(port, skb);
> > +}
> > +
> > +static enum hrtimer_restart rmnet_map_flush_tx_packet_queue(struct hrtimer *t)
> > +{
> > +     struct rmnet_port *port;
> > +
> > +     port = container_of(t, struct rmnet_port, hrtimer);
> > +
> > +     schedule_work(&port->agg_wq);
>
> Why you need to schedule a work and you can't instead call the core of
> rmnet_map_flush_tx_packet_work() here? it looks like the latter does
> not need process context...
>

Ack.

> > +
> > +     return HRTIMER_NORESTART;
> > +}
> > +
> > +unsigned int rmnet_map_tx_aggregate(struct sk_buff *skb, struct rmnet_port *port,
> > +                                 struct net_device *orig_dev)
> > +{
> > +     struct timespec64 diff, last;
> > +     unsigned int len = skb->len;
> > +     struct sk_buff *agg_skb;
> > +     int size;
> > +
> > +     spin_lock_bh(&port->agg_lock);
> > +     memcpy(&last, &port->agg_last, sizeof(struct timespec64));
> > +     ktime_get_real_ts64(&port->agg_last);
> > +
> > +     if (!port->skbagg_head) {
> > +             /* Check to see if we should agg first. If the traffic is very
> > +              * sparse, don't aggregate.
> > +              */
> > +new_packet:
> > +             diff = timespec64_sub(port->agg_last, last);
> > +             size = port->egress_agg_params.bytes - skb->len;
> > +
> > +             if (size < 0) {
> > +                     /* dropped */
> > +                     spin_unlock_bh(&port->agg_lock);
> > +                     return 0;
> > +             }
> > +
> > +             if (diff.tv_sec > 0 || diff.tv_nsec > RMNET_AGG_BYPASS_TIME_NSEC ||
> > +                 size == 0) {
>
> You can avoid some code duplication moving the following lines under an
> 'error' label and jumping to it here and in the next error case.
>

Ack.

> > +                     spin_unlock_bh(&port->agg_lock);
> > +                     skb->protocol = htons(ETH_P_MAP);
> > +                     dev_queue_xmit(skb);
> > +                     return len;
> > +             }
> > +
> > +             port->skbagg_head = skb_copy_expand(skb, 0, size, GFP_ATOMIC);
> > +             if (!port->skbagg_head) {
> > +                     spin_unlock_bh(&port->agg_lock);
> > +                     skb->protocol = htons(ETH_P_MAP);
> > +                     dev_queue_xmit(skb);
> > +                     return len;
> > +             }
> > +             dev_kfree_skb_any(skb);
> > +             port->skbagg_head->protocol = htons(ETH_P_MAP);
> > +             port->agg_count = 1;
> > +             ktime_get_real_ts64(&port->agg_time);
> > +             skb_frag_list_init(port->skbagg_head);
> > +             goto schedule;
> > +     }
> > +     diff = timespec64_sub(port->agg_last, port->agg_time);
> > +     size = port->egress_agg_params.bytes - port->skbagg_head->len;
> > +
> > +     if (skb->len > size) {
> > +             agg_skb = port->skbagg_head;
> > +             reset_aggr_params(port);
> > +             spin_unlock_bh(&port->agg_lock);
> > +             hrtimer_cancel(&port->hrtimer);
> > +             rmnet_send_skb(port, agg_skb);
> > +             spin_lock_bh(&port->agg_lock);
> > +             goto new_packet;
> > +     }
> > +
> > +     if (skb_has_frag_list(port->skbagg_head))
> > +             port->skbagg_tail->next = skb;
> > +     else
> > +             skb_shinfo(port->skbagg_head)->frag_list = skb;
> > +
> > +     port->skbagg_head->len += skb->len;
> > +     port->skbagg_head->data_len += skb->len;
> > +     port->skbagg_head->truesize += skb->truesize;
> > +     port->skbagg_tail = skb;
> > +     port->agg_count++;
> > +
> > +     if (diff.tv_sec > 0 || diff.tv_nsec > port->egress_agg_params.time_nsec ||
> > +         port->agg_count == port->egress_agg_params.count ||
>
> At this point port->egress_agg_params.count can be 0, you need to check
> for:
>         port->agg_count >= port->egress_agg_params.count
>

Got it, thanks for the review: I'll fix all the above and send v4.

Regards,
Daniele
Daniele Palmas Dec. 20, 2022, 2:28 p.m. UTC | #3
Hello Paolo,

Il giorno ven 9 dic 2022 alle ore 08:38 Daniele Palmas
<dnlplm@gmail.com> ha scritto:
>
> Il giorno mer 7 dic 2022 alle ore 13:46 Paolo Abeni
> <pabeni@redhat.com> ha scritto:
> > > +static void rmnet_map_flush_tx_packet_work(struct work_struct *work)
> > > +{
> > > +     struct sk_buff *skb = NULL;
> > > +     struct rmnet_port *port;
> > > +
> > > +     port = container_of(work, struct rmnet_port, agg_wq);
> > > +
> > > +     spin_lock_bh(&port->agg_lock);
> > > +     if (likely(port->agg_state == -EINPROGRESS)) {
> > > +             /* Buffer may have already been shipped out */
> > > +             if (likely(port->skbagg_head)) {
> > > +                     skb = port->skbagg_head;
> > > +                     reset_aggr_params(port);
> > > +             }
> > > +             port->agg_state = 0;
> > > +     }
> > > +
> > > +     spin_unlock_bh(&port->agg_lock);
> > > +     if (skb)
> > > +             rmnet_send_skb(port, skb);
> > > +}
> > > +
> > > +static enum hrtimer_restart rmnet_map_flush_tx_packet_queue(struct hrtimer *t)
> > > +{
> > > +     struct rmnet_port *port;
> > > +
> > > +     port = container_of(t, struct rmnet_port, hrtimer);
> > > +
> > > +     schedule_work(&port->agg_wq);
> >
> > Why you need to schedule a work and you can't instead call the core of
> > rmnet_map_flush_tx_packet_work() here? it looks like the latter does
> > not need process context...
> >
>
> Ack.
>

looks like removing the work is not as straightforward as I thought.

Now the timer cb has become:

static enum hrtimer_restart rmnet_map_flush_tx_packet_cb(struct hrtimer *t)
{
    struct sk_buff *skb = NULL;
    struct rmnet_port *port;

    port = container_of(t, struct rmnet_port, hrtimer);

    spin_lock_bh(&port->agg_lock);
    if (likely(port->agg_state == -EINPROGRESS)) {
        /* Buffer may have already been shipped out */
        if (likely(port->skbagg_head)) {
            skb = port->skbagg_head;
            reset_aggr_params(port);
        }
        port->agg_state = 0;
    }
    spin_unlock_bh(&port->agg_lock);

    if (skb)
        rmnet_send_skb(port, skb);

    return HRTIMER_NORESTART;
}

but this is causing the following warning:

[ 3106.701296] WARNING: CPU: 15 PID: 0 at kernel/softirq.c:375
__local_bh_enable_ip+0x54/0x70
...
[ 3106.701537] CPU: 15 PID: 0 Comm: swapper/15 Tainted: G           OE
     6.1.0-rc5-rmnet-v4-warn #1
[ 3106.701543] Hardware name: LENOVO 30DH00H2IX/1048, BIOS S08KT40A 08/23/2021
[ 3106.701546] RIP: 0010:__local_bh_enable_ip+0x54/0x70
[ 3106.701554] Code: a9 00 ff ff 00 74 27 65 ff 0d 08 bb 75 61 65 8b
05 01 bb 75 61 85 c0 74 06 5d c3 cc cc cc cc 0f 1f 44 00 00 5d c3 cc
cc cc cc <0f> 0b eb bf 65 66 8b 05 e0 ca 76 61 66 85 c0 74 cc e8 e6 fd
ff ff
[ 3106.701559] RSP: 0018:ffffb8aa80510ec8 EFLAGS: 00010006
[ 3106.701564] RAX: 0000000080010202 RBX: ffff932d7b687868 RCX: 0000000000000000
[ 3106.701569] RDX: 0000000000000001 RSI: 0000000000000201 RDI: ffffffffc0bd5f7c
[ 3106.701573] RBP: ffffb8aa80510ec8 R08: ffff933bdc3e31a0 R09: 000002d355c2f99d
[ 3106.701576] R10: 0000000000000000 R11: ffffb8aa80510ff8 R12: ffff932d7b687828
[ 3106.701580] R13: ffff932d7b687000 R14: ffff932cc1a76400 R15: ffff933bdc3e3180
[ 3106.701584] FS:  0000000000000000(0000) GS:ffff933bdc3c0000(0000)
knlGS:0000000000000000
[ 3106.701589] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[ 3106.701593] CR2: 00007ffc26dae080 CR3: 0000000209f04003 CR4: 00000000007706e0
[ 3106.701597] PKRU: 55555554
[ 3106.701599] Call Trace:
[ 3106.701602]  <IRQ>
[ 3106.701608]  _raw_spin_unlock_bh+0x1d/0x30
[ 3106.701623]  rmnet_map_flush_tx_packet_cb+0x4c/0x90 [rmnet]
[ 3106.701640]  ? rmnet_send_skb+0x90/0x90 [rmnet]
[ 3106.701655]  __hrtimer_run_queues+0x106/0x260
[ 3106.701664]  hrtimer_interrupt+0x101/0x220
[ 3106.701671]  __sysvec_apic_timer_interrupt+0x61/0x110
[ 3106.701677]  sysvec_apic_timer_interrupt+0x7b/0x90
[ 3106.701685]  </IRQ>
[ 3106.701687]  <TASK>
[ 3106.701689]  asm_sysvec_apic_timer_interrupt+0x1b/0x20
[ 3106.701694] RIP: 0010:cpuidle_enter_state+0xde/0x6e0
[ 3106.701704] Code: eb d6 60 e8 74 01 68 ff 8b 53 04 49 89 c7 0f 1f
44 00 00 31 ff e8 b2 23 67 ff 80 7d d0 00 0f 85 da 00 00 00 fb 0f 1f
44 00 00 <45> 85 f6 0f 88 01 02 00 00 4d 63 ee 49 83 fd 09 0f 87 b6 04
00 00
[ 3106.701709] RSP: 0018:ffffb8aa801dbe38 EFLAGS: 00000246
[ 3106.701713] RAX: ffff933bdc3f1380 RBX: ffffd8aa7fbc0700 RCX: 000000000000001f
[ 3106.701717] RDX: 000000000000000f RSI: 0000000000000002 RDI: 0000000000000000
[ 3106.701720] RBP: ffffb8aa801dbe88 R08: 000002d355d34146 R09: 000000000006e988
[ 3106.701723] R10: 0000000000000004 R11: 071c71c71c71c71c R12: ffffffffa04ba5c0
[ 3106.701727] R13: 0000000000000001 R14: 0000000000000001 R15: 000002d355d34146
[ 3106.701735]  ? cpuidle_enter_state+0xce/0x6e0
[ 3106.701744]  cpuidle_enter+0x2e/0x50
[ 3106.701751]  do_idle+0x204/0x290
[ 3106.701758]  cpu_startup_entry+0x20/0x30
[ 3106.701763]  start_secondary+0x122/0x160
[ 3106.701773]  secondary_startup_64_no_verify+0xe5/0xeb
[ 3106.701784]  </TASK>

The reason is not obvious to me, so I need to dig further...

Regards,
Daniele
Paolo Abeni Dec. 20, 2022, 3:28 p.m. UTC | #4
On Tue, 2022-12-20 at 15:28 +0100, Daniele Palmas wrote:
> Hello Paolo,
> 
> Il giorno ven 9 dic 2022 alle ore 08:38 Daniele Palmas
> <dnlplm@gmail.com> ha scritto:
> > 
> > Il giorno mer 7 dic 2022 alle ore 13:46 Paolo Abeni
> > <pabeni@redhat.com> ha scritto:
> > > > +static void rmnet_map_flush_tx_packet_work(struct work_struct *work)
> > > > +{
> > > > +     struct sk_buff *skb = NULL;
> > > > +     struct rmnet_port *port;
> > > > +
> > > > +     port = container_of(work, struct rmnet_port, agg_wq);
> > > > +
> > > > +     spin_lock_bh(&port->agg_lock);
> > > > +     if (likely(port->agg_state == -EINPROGRESS)) {
> > > > +             /* Buffer may have already been shipped out */
> > > > +             if (likely(port->skbagg_head)) {
> > > > +                     skb = port->skbagg_head;
> > > > +                     reset_aggr_params(port);
> > > > +             }
> > > > +             port->agg_state = 0;
> > > > +     }
> > > > +
> > > > +     spin_unlock_bh(&port->agg_lock);
> > > > +     if (skb)
> > > > +             rmnet_send_skb(port, skb);
> > > > +}
> > > > +
> > > > +static enum hrtimer_restart rmnet_map_flush_tx_packet_queue(struct hrtimer *t)
> > > > +{
> > > > +     struct rmnet_port *port;
> > > > +
> > > > +     port = container_of(t, struct rmnet_port, hrtimer);
> > > > +
> > > > +     schedule_work(&port->agg_wq);
> > > 
> > > Why you need to schedule a work and you can't instead call the core of
> > > rmnet_map_flush_tx_packet_work() here? it looks like the latter does
> > > not need process context...
> > > 
> > 
> > Ack.
> > 
> 
> looks like removing the work is not as straightforward as I thought.
> 
> Now the timer cb has become:
> 
> static enum hrtimer_restart rmnet_map_flush_tx_packet_cb(struct hrtimer *t)
> {
>     struct sk_buff *skb = NULL;
>     struct rmnet_port *port;
> 
>     port = container_of(t, struct rmnet_port, hrtimer);
> 
>     spin_lock_bh(&port->agg_lock);
>     if (likely(port->agg_state == -EINPROGRESS)) {
>         /* Buffer may have already been shipped out */
>         if (likely(port->skbagg_head)) {
>             skb = port->skbagg_head;
>             reset_aggr_params(port);
>         }
>         port->agg_state = 0;
>     }
>     spin_unlock_bh(&port->agg_lock);
> 
>     if (skb)
>         rmnet_send_skb(port, skb);
> 
>     return HRTIMER_NORESTART;
> }
> 
> but this is causing the following warning:
> 
> [ 3106.701296] WARNING: CPU: 15 PID: 0 at kernel/softirq.c:375
> __local_bh_enable_ip+0x54/0x70
> ...
> [ 3106.701537] CPU: 15 PID: 0 Comm: swapper/15 Tainted: G           OE
>      6.1.0-rc5-rmnet-v4-warn #1
> [ 3106.701543] Hardware name: LENOVO 30DH00H2IX/1048, BIOS S08KT40A 08/23/2021
> [ 3106.701546] RIP: 0010:__local_bh_enable_ip+0x54/0x70
> [ 3106.701554] Code: a9 00 ff ff 00 74 27 65 ff 0d 08 bb 75 61 65 8b
> 05 01 bb 75 61 85 c0 74 06 5d c3 cc cc cc cc 0f 1f 44 00 00 5d c3 cc
> cc cc cc <0f> 0b eb bf 65 66 8b 05 e0 ca 76 61 66 85 c0 74 cc e8 e6 fd
> ff ff
> [ 3106.701559] RSP: 0018:ffffb8aa80510ec8 EFLAGS: 00010006
> [ 3106.701564] RAX: 0000000080010202 RBX: ffff932d7b687868 RCX: 0000000000000000
> [ 3106.701569] RDX: 0000000000000001 RSI: 0000000000000201 RDI: ffffffffc0bd5f7c
> [ 3106.701573] RBP: ffffb8aa80510ec8 R08: ffff933bdc3e31a0 R09: 000002d355c2f99d
> [ 3106.701576] R10: 0000000000000000 R11: ffffb8aa80510ff8 R12: ffff932d7b687828
> [ 3106.701580] R13: ffff932d7b687000 R14: ffff932cc1a76400 R15: ffff933bdc3e3180
> [ 3106.701584] FS:  0000000000000000(0000) GS:ffff933bdc3c0000(0000)
> knlGS:0000000000000000
> [ 3106.701589] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
> [ 3106.701593] CR2: 00007ffc26dae080 CR3: 0000000209f04003 CR4: 00000000007706e0
> [ 3106.701597] PKRU: 55555554
> [ 3106.701599] Call Trace:
> [ 3106.701602]  <IRQ>
> [ 3106.701608]  _raw_spin_unlock_bh+0x1d/0x30
> [ 3106.701623]  rmnet_map_flush_tx_packet_cb+0x4c/0x90 [rmnet]
> [ 3106.701640]  ? rmnet_send_skb+0x90/0x90 [rmnet]
> [ 3106.701655]  __hrtimer_run_queues+0x106/0x260
> [ 3106.701664]  hrtimer_interrupt+0x101/0x220
> [ 3106.701671]  __sysvec_apic_timer_interrupt+0x61/0x110
> [ 3106.701677]  sysvec_apic_timer_interrupt+0x7b/0x90
> [ 3106.701685]  </IRQ>
> [ 3106.701687]  <TASK>
> [ 3106.701689]  asm_sysvec_apic_timer_interrupt+0x1b/0x20
> [ 3106.701694] RIP: 0010:cpuidle_enter_state+0xde/0x6e0
> [ 3106.701704] Code: eb d6 60 e8 74 01 68 ff 8b 53 04 49 89 c7 0f 1f
> 44 00 00 31 ff e8 b2 23 67 ff 80 7d d0 00 0f 85 da 00 00 00 fb 0f 1f
> 44 00 00 <45> 85 f6 0f 88 01 02 00 00 4d 63 ee 49 83 fd 09 0f 87 b6 04
> 00 00
> [ 3106.701709] RSP: 0018:ffffb8aa801dbe38 EFLAGS: 00000246
> [ 3106.701713] RAX: ffff933bdc3f1380 RBX: ffffd8aa7fbc0700 RCX: 000000000000001f
> [ 3106.701717] RDX: 000000000000000f RSI: 0000000000000002 RDI: 0000000000000000
> [ 3106.701720] RBP: ffffb8aa801dbe88 R08: 000002d355d34146 R09: 000000000006e988
> [ 3106.701723] R10: 0000000000000004 R11: 071c71c71c71c71c R12: ffffffffa04ba5c0
> [ 3106.701727] R13: 0000000000000001 R14: 0000000000000001 R15: 000002d355d34146
> [ 3106.701735]  ? cpuidle_enter_state+0xce/0x6e0
> [ 3106.701744]  cpuidle_enter+0x2e/0x50
> [ 3106.701751]  do_idle+0x204/0x290
> [ 3106.701758]  cpu_startup_entry+0x20/0x30
> [ 3106.701763]  start_secondary+0x122/0x160
> [ 3106.701773]  secondary_startup_64_no_verify+0xe5/0xeb
> [ 3106.701784]  </TASK>
> 
> The reason is not obvious to me, so I need to dig further...

It happens because __hrtimer_run_queues runs in hard-IRQ context.

To address the above you need to replace all the

spin_lock_bh(&port->agg_lock);

instances with the spin_lock_irqsave() variant. With one exception: in
rmnet_map_flush_tx_packet_cb() you can use simply spin_lock() as such
fuction is already in hard-irq context.

Cheers,

Paolo
Daniele Palmas Dec. 21, 2022, 3:22 p.m. UTC | #5
Hello Paolo,

Il giorno mar 20 dic 2022 alle ore 16:28 Paolo Abeni
<pabeni@redhat.com> ha scritto:
>
> On Tue, 2022-12-20 at 15:28 +0100, Daniele Palmas wrote:
> > Hello Paolo,
> >
> > Il giorno ven 9 dic 2022 alle ore 08:38 Daniele Palmas
> > <dnlplm@gmail.com> ha scritto:
> > >
> > > Il giorno mer 7 dic 2022 alle ore 13:46 Paolo Abeni
> > > <pabeni@redhat.com> ha scritto:
> > > > > +static enum hrtimer_restart rmnet_map_flush_tx_packet_queue(struct hrtimer *t)
> > > > > +{
> > > > > +     struct rmnet_port *port;
> > > > > +
> > > > > +     port = container_of(t, struct rmnet_port, hrtimer);
> > > > > +
> > > > > +     schedule_work(&port->agg_wq);
> > > >
> > > > Why you need to schedule a work and you can't instead call the core of
> > > > rmnet_map_flush_tx_packet_work() here? it looks like the latter does
> > > > not need process context...
> > > >
> > >
> > > Ack.
> > >
> >
> > looks like removing the work is not as straightforward as I thought.
> >
> > Now the timer cb has become:
> >
> > static enum hrtimer_restart rmnet_map_flush_tx_packet_cb(struct hrtimer *t)
> > {
> >     struct sk_buff *skb = NULL;
> >     struct rmnet_port *port;
> >
> >     port = container_of(t, struct rmnet_port, hrtimer);
> >
> >     spin_lock_bh(&port->agg_lock);
> >     if (likely(port->agg_state == -EINPROGRESS)) {
> >         /* Buffer may have already been shipped out */
> >         if (likely(port->skbagg_head)) {
> >             skb = port->skbagg_head;
> >             reset_aggr_params(port);
> >         }
> >         port->agg_state = 0;
> >     }
> >     spin_unlock_bh(&port->agg_lock);
> >
> >     if (skb)
> >         rmnet_send_skb(port, skb);
> >
> >     return HRTIMER_NORESTART;
> > }
> >
> > but this is causing the following warning:
> >
> > [ 3106.701296] WARNING: CPU: 15 PID: 0 at kernel/softirq.c:375
> > __local_bh_enable_ip+0x54/0x70
> > ...
> > [ 3106.701537] CPU: 15 PID: 0 Comm: swapper/15 Tainted: G           OE
> >      6.1.0-rc5-rmnet-v4-warn #1
> > [ 3106.701543] Hardware name: LENOVO 30DH00H2IX/1048, BIOS S08KT40A 08/23/2021
> > [ 3106.701546] RIP: 0010:__local_bh_enable_ip+0x54/0x70
> > [ 3106.701554] Code: a9 00 ff ff 00 74 27 65 ff 0d 08 bb 75 61 65 8b
> > 05 01 bb 75 61 85 c0 74 06 5d c3 cc cc cc cc 0f 1f 44 00 00 5d c3 cc
> > cc cc cc <0f> 0b eb bf 65 66 8b 05 e0 ca 76 61 66 85 c0 74 cc e8 e6 fd
> > ff ff
> > [ 3106.701559] RSP: 0018:ffffb8aa80510ec8 EFLAGS: 00010006
> > [ 3106.701564] RAX: 0000000080010202 RBX: ffff932d7b687868 RCX: 0000000000000000
> > [ 3106.701569] RDX: 0000000000000001 RSI: 0000000000000201 RDI: ffffffffc0bd5f7c
> > [ 3106.701573] RBP: ffffb8aa80510ec8 R08: ffff933bdc3e31a0 R09: 000002d355c2f99d
> > [ 3106.701576] R10: 0000000000000000 R11: ffffb8aa80510ff8 R12: ffff932d7b687828
> > [ 3106.701580] R13: ffff932d7b687000 R14: ffff932cc1a76400 R15: ffff933bdc3e3180
> > [ 3106.701584] FS:  0000000000000000(0000) GS:ffff933bdc3c0000(0000)
> > knlGS:0000000000000000
> > [ 3106.701589] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
> > [ 3106.701593] CR2: 00007ffc26dae080 CR3: 0000000209f04003 CR4: 00000000007706e0
> > [ 3106.701597] PKRU: 55555554
> > [ 3106.701599] Call Trace:
> > [ 3106.701602]  <IRQ>
> > [ 3106.701608]  _raw_spin_unlock_bh+0x1d/0x30
> > [ 3106.701623]  rmnet_map_flush_tx_packet_cb+0x4c/0x90 [rmnet]
> > [ 3106.701640]  ? rmnet_send_skb+0x90/0x90 [rmnet]
> > [ 3106.701655]  __hrtimer_run_queues+0x106/0x260
> > [ 3106.701664]  hrtimer_interrupt+0x101/0x220
> > [ 3106.701671]  __sysvec_apic_timer_interrupt+0x61/0x110
> > [ 3106.701677]  sysvec_apic_timer_interrupt+0x7b/0x90
> > [ 3106.701685]  </IRQ>
> > [ 3106.701687]  <TASK>
> > [ 3106.701689]  asm_sysvec_apic_timer_interrupt+0x1b/0x20
> > [ 3106.701694] RIP: 0010:cpuidle_enter_state+0xde/0x6e0
> > [ 3106.701704] Code: eb d6 60 e8 74 01 68 ff 8b 53 04 49 89 c7 0f 1f
> > 44 00 00 31 ff e8 b2 23 67 ff 80 7d d0 00 0f 85 da 00 00 00 fb 0f 1f
> > 44 00 00 <45> 85 f6 0f 88 01 02 00 00 4d 63 ee 49 83 fd 09 0f 87 b6 04
> > 00 00
> > [ 3106.701709] RSP: 0018:ffffb8aa801dbe38 EFLAGS: 00000246
> > [ 3106.701713] RAX: ffff933bdc3f1380 RBX: ffffd8aa7fbc0700 RCX: 000000000000001f
> > [ 3106.701717] RDX: 000000000000000f RSI: 0000000000000002 RDI: 0000000000000000
> > [ 3106.701720] RBP: ffffb8aa801dbe88 R08: 000002d355d34146 R09: 000000000006e988
> > [ 3106.701723] R10: 0000000000000004 R11: 071c71c71c71c71c R12: ffffffffa04ba5c0
> > [ 3106.701727] R13: 0000000000000001 R14: 0000000000000001 R15: 000002d355d34146
> > [ 3106.701735]  ? cpuidle_enter_state+0xce/0x6e0
> > [ 3106.701744]  cpuidle_enter+0x2e/0x50
> > [ 3106.701751]  do_idle+0x204/0x290
> > [ 3106.701758]  cpu_startup_entry+0x20/0x30
> > [ 3106.701763]  start_secondary+0x122/0x160
> > [ 3106.701773]  secondary_startup_64_no_verify+0xe5/0xeb
> > [ 3106.701784]  </TASK>
> >
> > The reason is not obvious to me, so I need to dig further...
>
> It happens because __hrtimer_run_queues runs in hard-IRQ context.
>
> To address the above you need to replace all the
>
> spin_lock_bh(&port->agg_lock);
>
> instances with the spin_lock_irqsave() variant. With one exception: in
> rmnet_map_flush_tx_packet_cb() you can use simply spin_lock() as such
> fuction is already in hard-irq context.
>

Thanks for your hints and I apologize in advance if I'm making naive mistakes.

I've modified the spin related locking according to your advice,
bringing to a different warning:

[ 5403.446269] WARNING: CPU: 1 PID: 0 at kernel/softirq.c:321
__local_bh_disable_ip+0x67/0x80
...
[ 5403.446349] CPU: 1 PID: 0 Comm: swapper/1 Tainted: G        W  OE
   6.1.0-rc5-rmnet-v4-warn-debug #1
[ 5403.446352] Hardware name: LENOVO 30DH00H2IX/1048, BIOS S08KT40A 08/23/2021
[ 5403.446353] RIP: 0010:__local_bh_disable_ip+0x67/0x80
[ 5403.446355] Code: 00 74 20 9c 58 0f 1f 40 00 f6 c4 02 75 20 80 e7
02 74 06 fb 0f 1f 44 00 00 48 8b 5d f8 c9 c3 cc cc cc cc e8 db e2 08
00 eb d9 <0f> 0b eb ad e8 a0 4b eb 00 eb d9 66 66 2e 0f 1f 84 00 00 00
00 00
[ 5403.446357] RSP: 0018:ffffb8e700238df8 EFLAGS: 00010006
[ 5403.446359] RAX: 0000000080010001 RBX: ffff92d209a248a0 RCX: 0000000000000000
[ 5403.446361] RDX: ffff92d21adf0000 RSI: 0000000000000200 RDI: ffffffffa7de4e22
[ 5403.446362] RBP: ffffb8e700238e00 R08: 0000000000000001 R09: 0000000000000000
[ 5403.446363] R10: ffffb8e700238ec0 R11: 0000000000000000 R12: 0000000000000000
[ 5403.446364] R13: ffff92d209a24000 R14: ffff92d209e7a000 R15: ffff92d203232400
[ 5403.446366] FS:  0000000000000000(0000) GS:ffff92e11bc80000(0000)
knlGS:0000000000000000
[ 5403.446367] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[ 5403.446369] CR2: 00007ffe4db2e080 CR3: 000000017f1dc006 CR4: 00000000007706e0
[ 5403.446370] PKRU: 55555554
[ 5403.446371] Call Trace:
[ 5403.446372]  <IRQ>
[ 5403.446376]  __dev_queue_xmit+0x83/0x11d0
[ 5403.446379]  ? slab_free_freelist_hook.constprop.0+0x8f/0x180
[ 5403.446383]  ? lock_acquire+0x1a9/0x310
[ 5403.446385]  ? rcu_read_lock_sched_held+0x16/0x90
[ 5403.446389]  rmnet_send_skb+0x56/0x90 [rmnet]
[ 5403.446394]  ? rmnet_send_skb+0x56/0x90 [rmnet]
[ 5403.446397]  rmnet_map_flush_tx_packet_cb+0x57/0x90 [rmnet]
[ 5403.446401]  ? rmnet_send_skb+0x90/0x90 [rmnet]
[ 5403.446405]  __hrtimer_run_queues+0x19f/0x3b0
[ 5403.446408]  hrtimer_interrupt+0x101/0x220
[ 5403.446410]  __sysvec_apic_timer_interrupt+0x78/0x230
[ 5403.446412]  sysvec_apic_timer_interrupt+0x7b/0x90
[ 5403.446415]  </IRQ>
[ 5403.446416]  <TASK>
[ 5403.446417]  asm_sysvec_apic_timer_interrupt+0x1b/0x20
[ 5403.446420] RIP: 0010:cpuidle_enter_state+0xeb/0x5e0
[ 5403.446423] Code: 04 bf ff ff ff ff 49 89 c7 e8 21 f6 ff ff 31 ff
e8 4a 48 5c ff 80 7d d0 00 0f 85 db 00 00 00 e8 fb 99 6d ff fb 0f 1f
44 00 00 <45> 85 f6 0f 88 64 01 00 00 4d 63 ee 49 83 fd 09 0f 87 ed 03
00 00
[ 5403.446424] RSP: 0018:ffffb8e70016be40 EFLAGS: 00000246
[ 5403.446426] RAX: ffffb8e70016be88 RBX: ffffd8e6ff483500 RCX: 000000000000001f
[ 5403.446427] RDX: 0000000000000000 RSI: 0000000000000001 RDI: ffffffffa7d6f7d5
[ 5403.446428] RBP: ffffb8e70016be88 R08: 0000000000000000 R09: 0000000000000000
[ 5403.446429] R10: 0000000000000004 R11: 071c71c71c71c71c R12: ffffffffa8f4de40
[ 5403.446430] R13: 0000000000000002 R14: 0000000000000002 R15: 000004ea167ea79d
[ 5403.446432]  ? cpuidle_enter_state+0xe5/0x5e0
[ 5403.446436]  cpuidle_enter+0x2e/0x50
[ 5403.446439]  do_idle+0x21e/0x2b0
[ 5403.446441]  ? raw_spin_rq_unlock+0x10/0x40
[ 5403.446445]  cpu_startup_entry+0x20/0x30
[ 5403.446447]  start_secondary+0x127/0x160
[ 5403.446450]  secondary_startup_64_no_verify+0xe5/0xeb
[ 5403.446454]  </TASK>
[ 5403.446455] irq event stamp: 94314
[ 5403.446456] hardirqs last  enabled at (94313): [<ffffffffa73c0f97>]
tick_nohz_idle_enter+0x67/0xa0
[ 5403.446460] hardirqs last disabled at (94314): [<ffffffffa733418c>]
do_idle+0xbc/0x2b0
[ 5403.446462] softirqs last  enabled at (93926): [<ffffffffa72cc923>]
__irq_exit_rcu+0xa3/0xd0
[ 5403.446463] softirqs last disabled at (93921): [<ffffffffa72cc923>]
__irq_exit_rcu+0xa3/0xd0
[ 5403.446465] ---[ end trace 0000000000000000 ]---

And gdb says:

(gdb) l* __dev_queue_xmit+0x83
0xffffffff81be4e33 is in __dev_queue_xmit (./include/linux/rcupdate.h:316).
311
312     #ifdef CONFIG_DEBUG_LOCK_ALLOC
313
314     static inline void rcu_lock_acquire(struct lockdep_map *map)
315     {
316             lock_acquire(map, 0, 0, 2, 0, NULL, _THIS_IP_);
317     }
318
319     static inline void rcu_lock_release(struct lockdep_map *map)
320     {

(gdb) l* __local_bh_disable_ip+0x67
0xffffffff810cc447 is in __local_bh_disable_ip (kernel/softirq.c:321).
316     #ifdef CONFIG_TRACE_IRQFLAGS
317     void __local_bh_disable_ip(unsigned long ip, unsigned int cnt)
318     {
319             unsigned long flags;
320
321             WARN_ON_ONCE(in_hardirq());
322
323             raw_local_irq_save(flags);
324             /*
325              * The preempt tracer hooks into preempt_count_add and
will break

Does this mean that __dev_queue_xmit can't be called in such a scenario?

Thanks,
Daniele
diff mbox series

Patch

diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c b/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c
index 27b1663c476e..39d24e07f306 100644
--- a/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c
+++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c
@@ -12,6 +12,7 @@ 
 #include "rmnet_handlers.h"
 #include "rmnet_vnd.h"
 #include "rmnet_private.h"
+#include "rmnet_map.h"
 
 /* Local Definitions and Declarations */
 
@@ -39,6 +40,8 @@  static int rmnet_unregister_real_device(struct net_device *real_dev)
 	if (port->nr_rmnet_devs)
 		return -EINVAL;
 
+	rmnet_map_tx_aggregate_exit(port);
+
 	netdev_rx_handler_unregister(real_dev);
 
 	kfree(port);
@@ -79,6 +82,8 @@  static int rmnet_register_real_device(struct net_device *real_dev,
 	for (entry = 0; entry < RMNET_MAX_LOGICAL_EP; entry++)
 		INIT_HLIST_HEAD(&port->muxed_ep[entry]);
 
+	rmnet_map_tx_aggregate_init(port);
+
 	netdev_dbg(real_dev, "registered with rmnet\n");
 	return 0;
 }
diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.h b/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.h
index 3d3cba56c516..ed112d51ac5a 100644
--- a/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.h
+++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.h
@@ -6,6 +6,7 @@ 
  */
 
 #include <linux/skbuff.h>
+#include <linux/time.h>
 #include <net/gro_cells.h>
 
 #ifndef _RMNET_CONFIG_H_
@@ -19,6 +20,12 @@  struct rmnet_endpoint {
 	struct hlist_node hlnode;
 };
 
+struct rmnet_egress_agg_params {
+	u32 bytes;
+	u32 count;
+	u64 time_nsec;
+};
+
 /* One instance of this structure is instantiated for each real_dev associated
  * with rmnet.
  */
@@ -30,6 +37,19 @@  struct rmnet_port {
 	struct hlist_head muxed_ep[RMNET_MAX_LOGICAL_EP];
 	struct net_device *bridge_ep;
 	struct net_device *rmnet_dev;
+
+	/* Egress aggregation information */
+	struct rmnet_egress_agg_params egress_agg_params;
+	/* Protect aggregation related elements */
+	spinlock_t agg_lock;
+	struct sk_buff *skbagg_head;
+	struct sk_buff *skbagg_tail;
+	int agg_state;
+	u8 agg_count;
+	struct timespec64 agg_time;
+	struct timespec64 agg_last;
+	struct hrtimer hrtimer;
+	struct work_struct agg_wq;
 };
 
 extern struct rtnl_link_ops rmnet_link_ops;
diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_handlers.c b/drivers/net/ethernet/qualcomm/rmnet/rmnet_handlers.c
index a313242a762e..914ef03b5438 100644
--- a/drivers/net/ethernet/qualcomm/rmnet/rmnet_handlers.c
+++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_handlers.c
@@ -164,8 +164,18 @@  static int rmnet_map_egress_handler(struct sk_buff *skb,
 
 	map_header->mux_id = mux_id;
 
-	skb->protocol = htons(ETH_P_MAP);
+	if (port->egress_agg_params.count > 1) {
+		unsigned int len;
+
+		len = rmnet_map_tx_aggregate(skb, port, orig_dev);
+		if (likely(len)) {
+			rmnet_vnd_tx_fixup_len(len, orig_dev);
+			return -EINPROGRESS;
+		}
+		return -ENOMEM;
+	}
 
+	skb->protocol = htons(ETH_P_MAP);
 	return 0;
 }
 
@@ -235,6 +245,7 @@  void rmnet_egress_handler(struct sk_buff *skb)
 	struct rmnet_port *port;
 	struct rmnet_priv *priv;
 	u8 mux_id;
+	int err;
 
 	sk_pacing_shift_update(skb->sk, 8);
 
@@ -247,8 +258,11 @@  void rmnet_egress_handler(struct sk_buff *skb)
 	if (!port)
 		goto drop;
 
-	if (rmnet_map_egress_handler(skb, port, mux_id, orig_dev))
+	err = rmnet_map_egress_handler(skb, port, mux_id, orig_dev);
+	if (err == -ENOMEM)
 		goto drop;
+	else if (err == -EINPROGRESS)
+		return;
 
 	rmnet_vnd_tx_fixup(skb, orig_dev);
 
diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_map.h b/drivers/net/ethernet/qualcomm/rmnet/rmnet_map.h
index 2b033060fc20..b70284095568 100644
--- a/drivers/net/ethernet/qualcomm/rmnet/rmnet_map.h
+++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_map.h
@@ -53,5 +53,11 @@  void rmnet_map_checksum_uplink_packet(struct sk_buff *skb,
 				      struct net_device *orig_dev,
 				      int csum_type);
 int rmnet_map_process_next_hdr_packet(struct sk_buff *skb, u16 len);
+unsigned int rmnet_map_tx_aggregate(struct sk_buff *skb, struct rmnet_port *port,
+				    struct net_device *orig_dev);
+void rmnet_map_tx_aggregate_init(struct rmnet_port *port);
+void rmnet_map_tx_aggregate_exit(struct rmnet_port *port);
+void rmnet_map_update_ul_agg_config(struct rmnet_port *port, u32 size,
+				    u32 count, u32 time);
 
 #endif /* _RMNET_MAP_H_ */
diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_map_data.c b/drivers/net/ethernet/qualcomm/rmnet/rmnet_map_data.c
index ba194698cc14..2c094640f245 100644
--- a/drivers/net/ethernet/qualcomm/rmnet/rmnet_map_data.c
+++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_map_data.c
@@ -12,6 +12,7 @@ 
 #include "rmnet_config.h"
 #include "rmnet_map.h"
 #include "rmnet_private.h"
+#include "rmnet_vnd.h"
 
 #define RMNET_MAP_DEAGGR_SPACING  64
 #define RMNET_MAP_DEAGGR_HEADROOM (RMNET_MAP_DEAGGR_SPACING / 2)
@@ -518,3 +519,193 @@  int rmnet_map_process_next_hdr_packet(struct sk_buff *skb,
 
 	return 0;
 }
+
+#define RMNET_AGG_BYPASS_TIME_NSEC 10000000L
+
+static void reset_aggr_params(struct rmnet_port *port)
+{
+	port->skbagg_head = NULL;
+	port->agg_count = 0;
+	port->agg_state = 0;
+	memset(&port->agg_time, 0, sizeof(struct timespec64));
+}
+
+static void rmnet_send_skb(struct rmnet_port *port, struct sk_buff *skb)
+{
+	if (skb_needs_linearize(skb, port->dev->features)) {
+		if (unlikely(__skb_linearize(skb))) {
+			struct rmnet_priv *priv;
+
+			priv = netdev_priv(port->rmnet_dev);
+			this_cpu_inc(priv->pcpu_stats->stats.tx_drops);
+			dev_kfree_skb_any(skb);
+			return;
+		}
+	}
+
+	dev_queue_xmit(skb);
+}
+
+static void rmnet_map_flush_tx_packet_work(struct work_struct *work)
+{
+	struct sk_buff *skb = NULL;
+	struct rmnet_port *port;
+
+	port = container_of(work, struct rmnet_port, agg_wq);
+
+	spin_lock_bh(&port->agg_lock);
+	if (likely(port->agg_state == -EINPROGRESS)) {
+		/* Buffer may have already been shipped out */
+		if (likely(port->skbagg_head)) {
+			skb = port->skbagg_head;
+			reset_aggr_params(port);
+		}
+		port->agg_state = 0;
+	}
+
+	spin_unlock_bh(&port->agg_lock);
+	if (skb)
+		rmnet_send_skb(port, skb);
+}
+
+static enum hrtimer_restart rmnet_map_flush_tx_packet_queue(struct hrtimer *t)
+{
+	struct rmnet_port *port;
+
+	port = container_of(t, struct rmnet_port, hrtimer);
+
+	schedule_work(&port->agg_wq);
+
+	return HRTIMER_NORESTART;
+}
+
+unsigned int rmnet_map_tx_aggregate(struct sk_buff *skb, struct rmnet_port *port,
+				    struct net_device *orig_dev)
+{
+	struct timespec64 diff, last;
+	unsigned int len = skb->len;
+	struct sk_buff *agg_skb;
+	int size;
+
+	spin_lock_bh(&port->agg_lock);
+	memcpy(&last, &port->agg_last, sizeof(struct timespec64));
+	ktime_get_real_ts64(&port->agg_last);
+
+	if (!port->skbagg_head) {
+		/* Check to see if we should agg first. If the traffic is very
+		 * sparse, don't aggregate.
+		 */
+new_packet:
+		diff = timespec64_sub(port->agg_last, last);
+		size = port->egress_agg_params.bytes - skb->len;
+
+		if (size < 0) {
+			/* dropped */
+			spin_unlock_bh(&port->agg_lock);
+			return 0;
+		}
+
+		if (diff.tv_sec > 0 || diff.tv_nsec > RMNET_AGG_BYPASS_TIME_NSEC ||
+		    size == 0) {
+			spin_unlock_bh(&port->agg_lock);
+			skb->protocol = htons(ETH_P_MAP);
+			dev_queue_xmit(skb);
+			return len;
+		}
+
+		port->skbagg_head = skb_copy_expand(skb, 0, size, GFP_ATOMIC);
+		if (!port->skbagg_head) {
+			spin_unlock_bh(&port->agg_lock);
+			skb->protocol = htons(ETH_P_MAP);
+			dev_queue_xmit(skb);
+			return len;
+		}
+		dev_kfree_skb_any(skb);
+		port->skbagg_head->protocol = htons(ETH_P_MAP);
+		port->agg_count = 1;
+		ktime_get_real_ts64(&port->agg_time);
+		skb_frag_list_init(port->skbagg_head);
+		goto schedule;
+	}
+	diff = timespec64_sub(port->agg_last, port->agg_time);
+	size = port->egress_agg_params.bytes - port->skbagg_head->len;
+
+	if (skb->len > size) {
+		agg_skb = port->skbagg_head;
+		reset_aggr_params(port);
+		spin_unlock_bh(&port->agg_lock);
+		hrtimer_cancel(&port->hrtimer);
+		rmnet_send_skb(port, agg_skb);
+		spin_lock_bh(&port->agg_lock);
+		goto new_packet;
+	}
+
+	if (skb_has_frag_list(port->skbagg_head))
+		port->skbagg_tail->next = skb;
+	else
+		skb_shinfo(port->skbagg_head)->frag_list = skb;
+
+	port->skbagg_head->len += skb->len;
+	port->skbagg_head->data_len += skb->len;
+	port->skbagg_head->truesize += skb->truesize;
+	port->skbagg_tail = skb;
+	port->agg_count++;
+
+	if (diff.tv_sec > 0 || diff.tv_nsec > port->egress_agg_params.time_nsec ||
+	    port->agg_count == port->egress_agg_params.count ||
+	    port->skbagg_head->len == port->egress_agg_params.bytes) {
+		agg_skb = port->skbagg_head;
+		reset_aggr_params(port);
+		spin_unlock_bh(&port->agg_lock);
+		hrtimer_cancel(&port->hrtimer);
+		rmnet_send_skb(port, agg_skb);
+		return len;
+	}
+
+schedule:
+	if (!hrtimer_active(&port->hrtimer) && port->agg_state != -EINPROGRESS) {
+		port->agg_state = -EINPROGRESS;
+		hrtimer_start(&port->hrtimer,
+			      ns_to_ktime(port->egress_agg_params.time_nsec),
+			      HRTIMER_MODE_REL);
+	}
+	spin_unlock_bh(&port->agg_lock);
+
+	return len;
+}
+
+void rmnet_map_update_ul_agg_config(struct rmnet_port *port, u32 size,
+				    u32 count, u32 time)
+{
+	spin_lock_bh(&port->agg_lock);
+	port->egress_agg_params.bytes = size;
+	port->egress_agg_params.count = count;
+	port->egress_agg_params.time_nsec = time * NSEC_PER_USEC;
+	spin_unlock_bh(&port->agg_lock);
+}
+
+void rmnet_map_tx_aggregate_init(struct rmnet_port *port)
+{
+	hrtimer_init(&port->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+	port->hrtimer.function = rmnet_map_flush_tx_packet_queue;
+	spin_lock_init(&port->agg_lock);
+	rmnet_map_update_ul_agg_config(port, 4096, 1, 800);
+	INIT_WORK(&port->agg_wq, rmnet_map_flush_tx_packet_work);
+}
+
+void rmnet_map_tx_aggregate_exit(struct rmnet_port *port)
+{
+	hrtimer_cancel(&port->hrtimer);
+	cancel_work_sync(&port->agg_wq);
+
+	spin_lock_bh(&port->agg_lock);
+	if (port->agg_state == -EINPROGRESS) {
+		if (port->skbagg_head) {
+			dev_kfree_skb_any(port->skbagg_head);
+			reset_aggr_params(port);
+		}
+
+		port->agg_state = 0;
+	}
+	spin_unlock_bh(&port->agg_lock);
+}
diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_vnd.c b/drivers/net/ethernet/qualcomm/rmnet/rmnet_vnd.c
index 3f5e6572d20e..6d8b8fdb9d03 100644
--- a/drivers/net/ethernet/qualcomm/rmnet/rmnet_vnd.c
+++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_vnd.c
@@ -29,7 +29,7 @@  void rmnet_vnd_rx_fixup(struct sk_buff *skb, struct net_device *dev)
 	u64_stats_update_end(&pcpu_ptr->syncp);
 }
 
-void rmnet_vnd_tx_fixup(struct sk_buff *skb, struct net_device *dev)
+void rmnet_vnd_tx_fixup_len(unsigned int len, struct net_device *dev)
 {
 	struct rmnet_priv *priv = netdev_priv(dev);
 	struct rmnet_pcpu_stats *pcpu_ptr;
@@ -38,10 +38,15 @@  void rmnet_vnd_tx_fixup(struct sk_buff *skb, struct net_device *dev)
 
 	u64_stats_update_begin(&pcpu_ptr->syncp);
 	pcpu_ptr->stats.tx_pkts++;
-	pcpu_ptr->stats.tx_bytes += skb->len;
+	pcpu_ptr->stats.tx_bytes += len;
 	u64_stats_update_end(&pcpu_ptr->syncp);
 }
 
+void rmnet_vnd_tx_fixup(struct sk_buff *skb, struct net_device *dev)
+{
+	rmnet_vnd_tx_fixup_len(skb->len, dev);
+}
+
 /* Network Device Operations */
 
 static netdev_tx_t rmnet_vnd_start_xmit(struct sk_buff *skb,
diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_vnd.h b/drivers/net/ethernet/qualcomm/rmnet/rmnet_vnd.h
index dc3a4443ef0a..c2b2baf86894 100644
--- a/drivers/net/ethernet/qualcomm/rmnet/rmnet_vnd.h
+++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_vnd.h
@@ -16,6 +16,7 @@  int rmnet_vnd_newlink(u8 id, struct net_device *rmnet_dev,
 int rmnet_vnd_dellink(u8 id, struct rmnet_port *port,
 		      struct rmnet_endpoint *ep);
 void rmnet_vnd_rx_fixup(struct sk_buff *skb, struct net_device *dev);
+void rmnet_vnd_tx_fixup_len(unsigned int len, struct net_device *dev);
 void rmnet_vnd_tx_fixup(struct sk_buff *skb, struct net_device *dev);
 void rmnet_vnd_setup(struct net_device *dev);
 int rmnet_vnd_validate_real_dev_mtu(struct net_device *real_dev);