Message ID | 20201203160245.1014867-8-jackmanb@google.com (mailing list archive) |
---|---|
State | Superseded |
Delegated to: | BPF |
Headers | show |
Series | Atomics for eBPF | expand |
Context | Check | Description |
---|---|---|
netdev/cover_letter | success | Link |
netdev/fixes_present | success | Link |
netdev/patch_count | success | Link |
netdev/tree_selection | success | Clearly marked for bpf-next |
netdev/subject_prefix | success | Link |
netdev/source_inline | success | Was 0 now: 0 |
netdev/verify_signedoff | success | Link |
netdev/module_param | success | Was 0 now: 0 |
netdev/build_32bit | success | Errors and warnings before: 15753 this patch: 15753 |
netdev/kdoc | success | Errors and warnings before: 0 this patch: 0 |
netdev/verify_fixes | success | Link |
netdev/checkpatch | fail | CHECK: Lines should not end with a '(' CHECK: No space is necessary after a cast ERROR: Remove Gerrit Change-Id's before submitting upstream WARNING: line length of 110 exceeds 80 columns WARNING: line length of 90 exceeds 80 columns |
netdev/build_allmodconfig_warn | success | Errors and warnings before: 15665 this patch: 15665 |
netdev/header_inline | success | Link |
netdev/stable | success | Stable not CCed |
On 12/3/20 8:02 AM, Brendan Jackman wrote: > This value can be set in bpf_insn.imm, for BPF_ATOMIC instructions, it is not clear what "this value" means here. Maybe more specific using "The BPF_FETCH field"? > in order to have the previous value of the atomically-modified memory > location loaded into the src register after an atomic op is carried > out. > > Suggested-by: Yonghong Song <yhs@fb.com> > Signed-off-by: Brendan Jackman <jackmanb@google.com> Ack with the above nit. Acked-by: Yonghong Song <yhs@fb.com>
On 12/3/20 8:02 AM, Brendan Jackman wrote: > This value can be set in bpf_insn.imm, for BPF_ATOMIC instructions, > in order to have the previous value of the atomically-modified memory > location loaded into the src register after an atomic op is carried > out. > > Suggested-by: Yonghong Song <yhs@fb.com> > Signed-off-by: Brendan Jackman <jackmanb@google.com> > Change-Id: I649ad48edb565a32ccdf72924ffe96a8c8da57ad > --- > arch/x86/net/bpf_jit_comp.c | 4 ++++ > include/linux/filter.h | 9 +++++++++ > include/uapi/linux/bpf.h | 3 +++ > kernel/bpf/core.c | 13 +++++++++++++ > kernel/bpf/disasm.c | 7 +++++++ > kernel/bpf/verifier.c | 35 ++++++++++++++++++++++++---------- > tools/include/linux/filter.h | 10 ++++++++++ > tools/include/uapi/linux/bpf.h | 3 +++ > 8 files changed, 74 insertions(+), 10 deletions(-) > > diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c > index 5e5a132b3d52..88cb09fa3bfb 100644 > --- a/arch/x86/net/bpf_jit_comp.c > +++ b/arch/x86/net/bpf_jit_comp.c > @@ -827,6 +827,10 @@ static int emit_atomic(u8 **pprog, u8 atomic_op, > /* lock *(u32/u64*)(dst_reg + off) <op>= src_reg */ > EMIT1(simple_alu_opcodes[atomic_op]); > break; > + case BPF_ADD | BPF_FETCH: > + /* src_reg = atomic_fetch_add(*(dst_reg + off), src_reg); */ > + EMIT2(0x0F, 0xC1); > + break; > default: > pr_err("bpf_jit: unknown atomic opcode %02x\n", atomic_op); > return -EFAULT; > diff --git a/include/linux/filter.h b/include/linux/filter.h > index ce19988fb312..4e04d0fc454f 100644 > --- a/include/linux/filter.h > +++ b/include/linux/filter.h > @@ -270,6 +270,15 @@ static inline bool insn_is_zext(const struct bpf_insn *insn) > .imm = BPF_ADD }) > #define BPF_STX_XADD BPF_ATOMIC_ADD /* alias */ > > +/* Atomic memory add with fetch, src_reg = atomic_fetch_add(*(dst_reg + off), src_reg); */ > + > +#define BPF_ATOMIC_FETCH_ADD(SIZE, DST, SRC, OFF) \ > + ((struct bpf_insn) { \ > + .code = BPF_STX | BPF_SIZE(SIZE) | BPF_ATOMIC, \ > + .dst_reg = DST, \ > + .src_reg = SRC, \ > + .off = OFF, \ > + .imm = BPF_ADD | BPF_FETCH }) > > /* Memory store, *(uint *) (dst_reg + off16) = imm32 */ > > diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h > index d0adc48db43c..025e377e7229 100644 > --- a/include/uapi/linux/bpf.h > +++ b/include/uapi/linux/bpf.h > @@ -44,6 +44,9 @@ > #define BPF_CALL 0x80 /* function call */ > #define BPF_EXIT 0x90 /* function return */ > > +/* atomic op type fields (stored in immediate) */ > +#define BPF_FETCH 0x01 /* fetch previous value into src reg */ > + > /* Register numbers */ > enum { > BPF_REG_0 = 0, > diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c > index 3abc6b250b18..61e93eb7d363 100644 > --- a/kernel/bpf/core.c > +++ b/kernel/bpf/core.c > @@ -1624,16 +1624,29 @@ static u64 ___bpf_prog_run(u64 *regs, const struct bpf_insn *insn, u64 *stack) > /* lock xadd *(u32 *)(dst_reg + off16) += src_reg */ > atomic_add((u32) SRC, (atomic_t *)(unsigned long) > (DST + insn->off)); > + break; > + case BPF_ADD | BPF_FETCH: > + SRC = (u32) atomic_fetch_add( > + (u32) SRC, > + (atomic_t *)(unsigned long) (DST + insn->off)); > + break; > default: > goto default_label; > } > CONT; > + > STX_ATOMIC_DW: > switch (IMM) { > case BPF_ADD: > /* lock xadd *(u64 *)(dst_reg + off16) += src_reg */ > atomic64_add((u64) SRC, (atomic64_t *)(unsigned long) > (DST + insn->off)); > + break; > + case BPF_ADD | BPF_FETCH: > + SRC = (u64) atomic64_fetch_add( > + (u64) SRC, > + (atomic64_t *)(s64) (DST + insn->off)); > + break; > default: > goto default_label; > } > diff --git a/kernel/bpf/disasm.c b/kernel/bpf/disasm.c > index 37c8d6e9b4cc..3ee2246a52ef 100644 > --- a/kernel/bpf/disasm.c > +++ b/kernel/bpf/disasm.c > @@ -160,6 +160,13 @@ void print_bpf_insn(const struct bpf_insn_cbs *cbs, > bpf_ldst_string[BPF_SIZE(insn->code) >> 3], > insn->dst_reg, insn->off, > insn->src_reg); > + } else if (BPF_MODE(insn->code) == BPF_ATOMIC && > + insn->imm == (BPF_ADD | BPF_FETCH)) { > + verbose(cbs->private_data, "(%02x) r%d = atomic%s_fetch_add(*(%s *)(r%d %+d), r%d)\n", We should not do dereference here (withough first *), right? since the input is actually an address. something like below? r2 = atomic[64]_fetch_add((u64/u32 *)(r3 +40), r2) > + insn->code, insn->src_reg, > + BPF_SIZE(insn->code) == BPF_DW ? "64" : "", > + bpf_ldst_string[BPF_SIZE(insn->code) >> 3], > + insn->dst_reg, insn->off, insn->src_reg); > } else { > verbose(cbs->private_data, "BUG_%02x\n", insn->code); > } > diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c > index e8b41ccdfb90..a68adbcee370 100644 > --- a/kernel/bpf/verifier.c > +++ b/kernel/bpf/verifier.c > @@ -3602,7 +3602,11 @@ static int check_atomic(struct bpf_verifier_env *env, int insn_idx, struct bpf_i > { > int err; > > - if (insn->imm != BPF_ADD) { > + switch (insn->imm) { > + case BPF_ADD: > + case BPF_ADD | BPF_FETCH: > + break; > + default: > verbose(env, "BPF_ATOMIC uses invalid atomic opcode %02x\n", insn->imm); > return -EINVAL; > } > @@ -3631,7 +3635,7 @@ static int check_atomic(struct bpf_verifier_env *env, int insn_idx, struct bpf_i > is_pkt_reg(env, insn->dst_reg) || > is_flow_key_reg(env, insn->dst_reg) || > is_sk_reg(env, insn->dst_reg)) { > - verbose(env, "atomic stores into R%d %s is not allowed\n", > + verbose(env, "BPF_ATOMIC stores into R%d %s is not allowed\n", > insn->dst_reg, > reg_type_str[reg_state(env, insn->dst_reg)->type]); > return -EACCES; > @@ -3644,8 +3648,20 @@ static int check_atomic(struct bpf_verifier_env *env, int insn_idx, struct bpf_i > return err; > > /* check whether we can write into the same memory */ > - return check_mem_access(env, insn_idx, insn->dst_reg, insn->off, > - BPF_SIZE(insn->code), BPF_WRITE, -1, true); > + err = check_mem_access(env, insn_idx, insn->dst_reg, insn->off, > + BPF_SIZE(insn->code), BPF_WRITE, -1, true); > + if (err) > + return err; > + > + if (!(insn->imm & BPF_FETCH)) > + return 0; > + > + /* check and record load of old value into src reg */ > + err = check_reg_arg(env, insn->src_reg, DST_OP); > + if (err) > + return err; > + > + return 0; > } > > static int __check_stack_boundary(struct bpf_verifier_env *env, u32 regno, > @@ -9501,12 +9517,6 @@ static int do_check(struct bpf_verifier_env *env) > } else if (class == BPF_STX) { > enum bpf_reg_type *prev_dst_type, dst_reg_type; > > - if (((BPF_MODE(insn->code) != BPF_MEM && > - BPF_MODE(insn->code) != BPF_ATOMIC) || insn->imm != 0)) { > - verbose(env, "BPF_STX uses reserved fields\n"); > - return -EINVAL; > - } > - > if (BPF_MODE(insn->code) == BPF_ATOMIC) { > err = check_atomic(env, env->insn_idx, insn); > if (err) > @@ -9515,6 +9525,11 @@ static int do_check(struct bpf_verifier_env *env) > continue; > } > > + if (BPF_MODE(insn->code) != BPF_MEM || insn->imm != 0) { > + verbose(env, "BPF_STX uses reserved fields\n"); > + return -EINVAL; > + } > + > /* check src1 operand */ > err = check_reg_arg(env, insn->src_reg, SRC_OP); > if (err) > diff --git a/tools/include/linux/filter.h b/tools/include/linux/filter.h > index 95ff51d97f25..ac7701678e1a 100644 > --- a/tools/include/linux/filter.h > +++ b/tools/include/linux/filter.h > @@ -180,6 +180,16 @@ > .imm = BPF_ADD }) > #define BPF_STX_XADD BPF_ATOMIC_ADD /* alias */ > > +/* Atomic memory add with fetch, src_reg = atomic_fetch_add(*(dst_reg + off), src_reg); */ Maybe src_reg = atomic_fetch_add(dst_reg + off, src_reg)? > + > +#define BPF_ATOMIC_FETCH_ADD(SIZE, DST, SRC, OFF) \ > + ((struct bpf_insn) { \ > + .code = BPF_STX | BPF_SIZE(SIZE) | BPF_ATOMIC, \ > + .dst_reg = DST, \ > + .src_reg = SRC, \ > + .off = OFF, \ > + .imm = BPF_ADD | BPF_FETCH }) > + > /* Memory store, *(uint *) (dst_reg + off16) = imm32 */ > > #define BPF_ST_MEM(SIZE, DST, OFF, IMM) \ > diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h > index d0adc48db43c..025e377e7229 100644 > --- a/tools/include/uapi/linux/bpf.h > +++ b/tools/include/uapi/linux/bpf.h > @@ -44,6 +44,9 @@ > #define BPF_CALL 0x80 /* function call */ > #define BPF_EXIT 0x90 /* function return */ > > +/* atomic op type fields (stored in immediate) */ > +#define BPF_FETCH 0x01 /* fetch previous value into src reg */ > + > /* Register numbers */ > enum { > BPF_REG_0 = 0, >
On Thu, Dec 03, 2020 at 09:27:04PM -0800, Yonghong Song wrote: > On 12/3/20 8:02 AM, Brendan Jackman wrote: [...] > > diff --git a/kernel/bpf/disasm.c b/kernel/bpf/disasm.c > > index 37c8d6e9b4cc..3ee2246a52ef 100644 > > --- a/kernel/bpf/disasm.c > > +++ b/kernel/bpf/disasm.c > > @@ -160,6 +160,13 @@ void print_bpf_insn(const struct bpf_insn_cbs *cbs, > > bpf_ldst_string[BPF_SIZE(insn->code) >> 3], > > insn->dst_reg, insn->off, > > insn->src_reg); > > + } else if (BPF_MODE(insn->code) == BPF_ATOMIC && > > + insn->imm == (BPF_ADD | BPF_FETCH)) { > > + verbose(cbs->private_data, "(%02x) r%d = atomic%s_fetch_add(*(%s *)(r%d %+d), r%d)\n", > > We should not do dereference here (withough first *), right? since the input > is actually an address. something like below? > r2 = atomic[64]_fetch_add((u64/u32 *)(r3 +40), r2) Ah yep - thanks! [...] > > diff --git a/tools/include/linux/filter.h b/tools/include/linux/filter.h > > index 95ff51d97f25..ac7701678e1a 100644 > > --- a/tools/include/linux/filter.h > > +++ b/tools/include/linux/filter.h > > @@ -180,6 +180,16 @@ > > .imm = BPF_ADD }) > > #define BPF_STX_XADD BPF_ATOMIC_ADD /* alias */ > > +/* Atomic memory add with fetch, src_reg = atomic_fetch_add(*(dst_reg + off), src_reg); */ > > Maybe src_reg = atomic_fetch_add(dst_reg + off, src_reg)? Yep - and the same for the bitwise ops in the later patch.
diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index 5e5a132b3d52..88cb09fa3bfb 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c @@ -827,6 +827,10 @@ static int emit_atomic(u8 **pprog, u8 atomic_op, /* lock *(u32/u64*)(dst_reg + off) <op>= src_reg */ EMIT1(simple_alu_opcodes[atomic_op]); break; + case BPF_ADD | BPF_FETCH: + /* src_reg = atomic_fetch_add(*(dst_reg + off), src_reg); */ + EMIT2(0x0F, 0xC1); + break; default: pr_err("bpf_jit: unknown atomic opcode %02x\n", atomic_op); return -EFAULT; diff --git a/include/linux/filter.h b/include/linux/filter.h index ce19988fb312..4e04d0fc454f 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -270,6 +270,15 @@ static inline bool insn_is_zext(const struct bpf_insn *insn) .imm = BPF_ADD }) #define BPF_STX_XADD BPF_ATOMIC_ADD /* alias */ +/* Atomic memory add with fetch, src_reg = atomic_fetch_add(*(dst_reg + off), src_reg); */ + +#define BPF_ATOMIC_FETCH_ADD(SIZE, DST, SRC, OFF) \ + ((struct bpf_insn) { \ + .code = BPF_STX | BPF_SIZE(SIZE) | BPF_ATOMIC, \ + .dst_reg = DST, \ + .src_reg = SRC, \ + .off = OFF, \ + .imm = BPF_ADD | BPF_FETCH }) /* Memory store, *(uint *) (dst_reg + off16) = imm32 */ diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index d0adc48db43c..025e377e7229 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -44,6 +44,9 @@ #define BPF_CALL 0x80 /* function call */ #define BPF_EXIT 0x90 /* function return */ +/* atomic op type fields (stored in immediate) */ +#define BPF_FETCH 0x01 /* fetch previous value into src reg */ + /* Register numbers */ enum { BPF_REG_0 = 0, diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index 3abc6b250b18..61e93eb7d363 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -1624,16 +1624,29 @@ static u64 ___bpf_prog_run(u64 *regs, const struct bpf_insn *insn, u64 *stack) /* lock xadd *(u32 *)(dst_reg + off16) += src_reg */ atomic_add((u32) SRC, (atomic_t *)(unsigned long) (DST + insn->off)); + break; + case BPF_ADD | BPF_FETCH: + SRC = (u32) atomic_fetch_add( + (u32) SRC, + (atomic_t *)(unsigned long) (DST + insn->off)); + break; default: goto default_label; } CONT; + STX_ATOMIC_DW: switch (IMM) { case BPF_ADD: /* lock xadd *(u64 *)(dst_reg + off16) += src_reg */ atomic64_add((u64) SRC, (atomic64_t *)(unsigned long) (DST + insn->off)); + break; + case BPF_ADD | BPF_FETCH: + SRC = (u64) atomic64_fetch_add( + (u64) SRC, + (atomic64_t *)(s64) (DST + insn->off)); + break; default: goto default_label; } diff --git a/kernel/bpf/disasm.c b/kernel/bpf/disasm.c index 37c8d6e9b4cc..3ee2246a52ef 100644 --- a/kernel/bpf/disasm.c +++ b/kernel/bpf/disasm.c @@ -160,6 +160,13 @@ void print_bpf_insn(const struct bpf_insn_cbs *cbs, bpf_ldst_string[BPF_SIZE(insn->code) >> 3], insn->dst_reg, insn->off, insn->src_reg); + } else if (BPF_MODE(insn->code) == BPF_ATOMIC && + insn->imm == (BPF_ADD | BPF_FETCH)) { + verbose(cbs->private_data, "(%02x) r%d = atomic%s_fetch_add(*(%s *)(r%d %+d), r%d)\n", + insn->code, insn->src_reg, + BPF_SIZE(insn->code) == BPF_DW ? "64" : "", + bpf_ldst_string[BPF_SIZE(insn->code) >> 3], + insn->dst_reg, insn->off, insn->src_reg); } else { verbose(cbs->private_data, "BUG_%02x\n", insn->code); } diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index e8b41ccdfb90..a68adbcee370 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -3602,7 +3602,11 @@ static int check_atomic(struct bpf_verifier_env *env, int insn_idx, struct bpf_i { int err; - if (insn->imm != BPF_ADD) { + switch (insn->imm) { + case BPF_ADD: + case BPF_ADD | BPF_FETCH: + break; + default: verbose(env, "BPF_ATOMIC uses invalid atomic opcode %02x\n", insn->imm); return -EINVAL; } @@ -3631,7 +3635,7 @@ static int check_atomic(struct bpf_verifier_env *env, int insn_idx, struct bpf_i is_pkt_reg(env, insn->dst_reg) || is_flow_key_reg(env, insn->dst_reg) || is_sk_reg(env, insn->dst_reg)) { - verbose(env, "atomic stores into R%d %s is not allowed\n", + verbose(env, "BPF_ATOMIC stores into R%d %s is not allowed\n", insn->dst_reg, reg_type_str[reg_state(env, insn->dst_reg)->type]); return -EACCES; @@ -3644,8 +3648,20 @@ static int check_atomic(struct bpf_verifier_env *env, int insn_idx, struct bpf_i return err; /* check whether we can write into the same memory */ - return check_mem_access(env, insn_idx, insn->dst_reg, insn->off, - BPF_SIZE(insn->code), BPF_WRITE, -1, true); + err = check_mem_access(env, insn_idx, insn->dst_reg, insn->off, + BPF_SIZE(insn->code), BPF_WRITE, -1, true); + if (err) + return err; + + if (!(insn->imm & BPF_FETCH)) + return 0; + + /* check and record load of old value into src reg */ + err = check_reg_arg(env, insn->src_reg, DST_OP); + if (err) + return err; + + return 0; } static int __check_stack_boundary(struct bpf_verifier_env *env, u32 regno, @@ -9501,12 +9517,6 @@ static int do_check(struct bpf_verifier_env *env) } else if (class == BPF_STX) { enum bpf_reg_type *prev_dst_type, dst_reg_type; - if (((BPF_MODE(insn->code) != BPF_MEM && - BPF_MODE(insn->code) != BPF_ATOMIC) || insn->imm != 0)) { - verbose(env, "BPF_STX uses reserved fields\n"); - return -EINVAL; - } - if (BPF_MODE(insn->code) == BPF_ATOMIC) { err = check_atomic(env, env->insn_idx, insn); if (err) @@ -9515,6 +9525,11 @@ static int do_check(struct bpf_verifier_env *env) continue; } + if (BPF_MODE(insn->code) != BPF_MEM || insn->imm != 0) { + verbose(env, "BPF_STX uses reserved fields\n"); + return -EINVAL; + } + /* check src1 operand */ err = check_reg_arg(env, insn->src_reg, SRC_OP); if (err) diff --git a/tools/include/linux/filter.h b/tools/include/linux/filter.h index 95ff51d97f25..ac7701678e1a 100644 --- a/tools/include/linux/filter.h +++ b/tools/include/linux/filter.h @@ -180,6 +180,16 @@ .imm = BPF_ADD }) #define BPF_STX_XADD BPF_ATOMIC_ADD /* alias */ +/* Atomic memory add with fetch, src_reg = atomic_fetch_add(*(dst_reg + off), src_reg); */ + +#define BPF_ATOMIC_FETCH_ADD(SIZE, DST, SRC, OFF) \ + ((struct bpf_insn) { \ + .code = BPF_STX | BPF_SIZE(SIZE) | BPF_ATOMIC, \ + .dst_reg = DST, \ + .src_reg = SRC, \ + .off = OFF, \ + .imm = BPF_ADD | BPF_FETCH }) + /* Memory store, *(uint *) (dst_reg + off16) = imm32 */ #define BPF_ST_MEM(SIZE, DST, OFF, IMM) \ diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index d0adc48db43c..025e377e7229 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -44,6 +44,9 @@ #define BPF_CALL 0x80 /* function call */ #define BPF_EXIT 0x90 /* function return */ +/* atomic op type fields (stored in immediate) */ +#define BPF_FETCH 0x01 /* fetch previous value into src reg */ + /* Register numbers */ enum { BPF_REG_0 = 0,
This value can be set in bpf_insn.imm, for BPF_ATOMIC instructions, in order to have the previous value of the atomically-modified memory location loaded into the src register after an atomic op is carried out. Suggested-by: Yonghong Song <yhs@fb.com> Signed-off-by: Brendan Jackman <jackmanb@google.com> Change-Id: I649ad48edb565a32ccdf72924ffe96a8c8da57ad --- arch/x86/net/bpf_jit_comp.c | 4 ++++ include/linux/filter.h | 9 +++++++++ include/uapi/linux/bpf.h | 3 +++ kernel/bpf/core.c | 13 +++++++++++++ kernel/bpf/disasm.c | 7 +++++++ kernel/bpf/verifier.c | 35 ++++++++++++++++++++++++---------- tools/include/linux/filter.h | 10 ++++++++++ tools/include/uapi/linux/bpf.h | 3 +++ 8 files changed, 74 insertions(+), 10 deletions(-)