Skip to content

Commit 5ffa255

Browse files
bjackmanAlexei Starovoitov
authored andcommitted
bpf: Add instructions for atomic_[cmp]xchg
This adds two atomic opcodes, both of which include the BPF_FETCH flag. XCHG without the BPF_FETCH flag would naturally encode atomic_set. This is not supported because it would be of limited value to userspace (it doesn't imply any barriers). CMPXCHG without BPF_FETCH woulud be an atomic compare-and-write. We don't have such an operation in the kernel so it isn't provided to BPF either. There are two significant design decisions made for the CMPXCHG instruction: - To solve the issue that this operation fundamentally has 3 operands, but we only have two register fields. Therefore the operand we compare against (the kernel's API calls it 'old') is hard-coded to be R0. x86 has similar design (and A64 doesn't have this problem). A potential alternative might be to encode the other operand's register number in the immediate field. - The kernel's atomic_cmpxchg returns the old value, while the C11 userspace APIs return a boolean indicating the comparison result. Which should BPF do? A64 returns the old value. x86 returns the old value in the hard-coded register (and also sets a flag). That means return-old-value is easier to JIT, so that's what we use. Signed-off-by: Brendan Jackman <jackmanb@google.com> Signed-off-by: Alexei Starovoitov <ast@kernel.org> Acked-by: Yonghong Song <yhs@fb.com> Link: https://lore.kernel.org/bpf/20210114181751.768687-8-jackmanb@google.com
1 parent 5ca419f commit 5ffa255

File tree

8 files changed

+70
-4
lines changed

8 files changed

+70
-4
lines changed

arch/x86/net/bpf_jit_comp.c

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -815,6 +815,14 @@ static int emit_atomic(u8 **pprog, u8 atomic_op,
815815
/* src_reg = atomic_fetch_add(dst_reg + off, src_reg); */
816816
EMIT2(0x0F, 0xC1);
817817
break;
818+
case BPF_XCHG:
819+
/* src_reg = atomic_xchg(dst_reg + off, src_reg); */
820+
EMIT1(0x87);
821+
break;
822+
case BPF_CMPXCHG:
823+
/* r0 = atomic_cmpxchg(dst_reg + off, r0, src_reg); */
824+
EMIT2(0x0F, 0xB1);
825+
break;
818826
default:
819827
pr_err("bpf_jit: unknown atomic opcode %02x\n", atomic_op);
820828
return -EFAULT;

include/linux/filter.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -265,6 +265,8 @@ static inline bool insn_is_zext(const struct bpf_insn *insn)
265265
*
266266
* BPF_ADD *(uint *) (dst_reg + off16) += src_reg
267267
* BPF_ADD | BPF_FETCH src_reg = atomic_fetch_add(dst_reg + off16, src_reg);
268+
* BPF_XCHG src_reg = atomic_xchg(dst_reg + off16, src_reg)
269+
* BPF_CMPXCHG r0 = atomic_cmpxchg(dst_reg + off16, r0, src_reg)
268270
*/
269271

270272
#define BPF_ATOMIC_OP(SIZE, OP, DST, SRC, OFF) \

include/uapi/linux/bpf.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,9 @@
4545
#define BPF_EXIT 0x90 /* function return */
4646

4747
/* atomic op type fields (stored in immediate) */
48-
#define BPF_FETCH 0x01 /* fetch previous value into src reg */
48+
#define BPF_FETCH 0x01 /* not an opcode on its own, used to build others */
49+
#define BPF_XCHG (0xe0 | BPF_FETCH) /* atomic exchange */
50+
#define BPF_CMPXCHG (0xf0 | BPF_FETCH) /* atomic compare-and-write */
4951

5052
/* Register numbers */
5153
enum {

kernel/bpf/core.c

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1630,6 +1630,16 @@ static u64 ___bpf_prog_run(u64 *regs, const struct bpf_insn *insn, u64 *stack)
16301630
(u32) SRC,
16311631
(atomic_t *)(unsigned long) (DST + insn->off));
16321632
break;
1633+
case BPF_XCHG:
1634+
SRC = (u32) atomic_xchg(
1635+
(atomic_t *)(unsigned long) (DST + insn->off),
1636+
(u32) SRC);
1637+
break;
1638+
case BPF_CMPXCHG:
1639+
BPF_R0 = (u32) atomic_cmpxchg(
1640+
(atomic_t *)(unsigned long) (DST + insn->off),
1641+
(u32) BPF_R0, (u32) SRC);
1642+
break;
16331643
default:
16341644
goto default_label;
16351645
}
@@ -1647,6 +1657,16 @@ static u64 ___bpf_prog_run(u64 *regs, const struct bpf_insn *insn, u64 *stack)
16471657
(u64) SRC,
16481658
(atomic64_t *)(unsigned long) (DST + insn->off));
16491659
break;
1660+
case BPF_XCHG:
1661+
SRC = (u64) atomic64_xchg(
1662+
(atomic64_t *)(unsigned long) (DST + insn->off),
1663+
(u64) SRC);
1664+
break;
1665+
case BPF_CMPXCHG:
1666+
BPF_R0 = (u64) atomic64_cmpxchg(
1667+
(atomic64_t *)(unsigned long) (DST + insn->off),
1668+
(u64) BPF_R0, (u64) SRC);
1669+
break;
16501670
default:
16511671
goto default_label;
16521672
}

kernel/bpf/disasm.c

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -167,6 +167,21 @@ void print_bpf_insn(const struct bpf_insn_cbs *cbs,
167167
BPF_SIZE(insn->code) == BPF_DW ? "64" : "",
168168
bpf_ldst_string[BPF_SIZE(insn->code) >> 3],
169169
insn->dst_reg, insn->off, insn->src_reg);
170+
} else if (BPF_MODE(insn->code) == BPF_ATOMIC &&
171+
insn->imm == BPF_CMPXCHG) {
172+
verbose(cbs->private_data, "(%02x) r0 = atomic%s_cmpxchg((%s *)(r%d %+d), r0, r%d)\n",
173+
insn->code,
174+
BPF_SIZE(insn->code) == BPF_DW ? "64" : "",
175+
bpf_ldst_string[BPF_SIZE(insn->code) >> 3],
176+
insn->dst_reg, insn->off,
177+
insn->src_reg);
178+
} else if (BPF_MODE(insn->code) == BPF_ATOMIC &&
179+
insn->imm == BPF_XCHG) {
180+
verbose(cbs->private_data, "(%02x) r%d = atomic%s_xchg((%s *)(r%d %+d), r%d)\n",
181+
insn->code, insn->src_reg,
182+
BPF_SIZE(insn->code) == BPF_DW ? "64" : "",
183+
bpf_ldst_string[BPF_SIZE(insn->code) >> 3],
184+
insn->dst_reg, insn->off, insn->src_reg);
170185
} else {
171186
verbose(cbs->private_data, "BUG_%02x\n", insn->code);
172187
}

kernel/bpf/verifier.c

Lines changed: 17 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3606,11 +3606,14 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn
36063606

36073607
static int check_atomic(struct bpf_verifier_env *env, int insn_idx, struct bpf_insn *insn)
36083608
{
3609+
int load_reg;
36093610
int err;
36103611

36113612
switch (insn->imm) {
36123613
case BPF_ADD:
36133614
case BPF_ADD | BPF_FETCH:
3615+
case BPF_XCHG:
3616+
case BPF_CMPXCHG:
36143617
break;
36153618
default:
36163619
verbose(env, "BPF_ATOMIC uses invalid atomic opcode %02x\n", insn->imm);
@@ -3632,6 +3635,13 @@ static int check_atomic(struct bpf_verifier_env *env, int insn_idx, struct bpf_i
36323635
if (err)
36333636
return err;
36343637

3638+
if (insn->imm == BPF_CMPXCHG) {
3639+
/* Check comparison of R0 with memory location */
3640+
err = check_reg_arg(env, BPF_REG_0, SRC_OP);
3641+
if (err)
3642+
return err;
3643+
}
3644+
36353645
if (is_pointer_value(env, insn->src_reg)) {
36363646
verbose(env, "R%d leaks addr into mem\n", insn->src_reg);
36373647
return -EACCES;
@@ -3662,8 +3672,13 @@ static int check_atomic(struct bpf_verifier_env *env, int insn_idx, struct bpf_i
36623672
if (!(insn->imm & BPF_FETCH))
36633673
return 0;
36643674

3665-
/* check and record load of old value into src reg */
3666-
err = check_reg_arg(env, insn->src_reg, DST_OP);
3675+
if (insn->imm == BPF_CMPXCHG)
3676+
load_reg = BPF_REG_0;
3677+
else
3678+
load_reg = insn->src_reg;
3679+
3680+
/* check and record load of old value */
3681+
err = check_reg_arg(env, load_reg, DST_OP);
36673682
if (err)
36683683
return err;
36693684

tools/include/linux/filter.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -174,6 +174,8 @@
174174
*
175175
* BPF_ADD *(uint *) (dst_reg + off16) += src_reg
176176
* BPF_ADD | BPF_FETCH src_reg = atomic_fetch_add(dst_reg + off16, src_reg);
177+
* BPF_XCHG src_reg = atomic_xchg(dst_reg + off16, src_reg)
178+
* BPF_CMPXCHG r0 = atomic_cmpxchg(dst_reg + off16, r0, src_reg)
177179
*/
178180

179181
#define BPF_ATOMIC_OP(SIZE, OP, DST, SRC, OFF) \

tools/include/uapi/linux/bpf.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,9 @@
4545
#define BPF_EXIT 0x90 /* function return */
4646

4747
/* atomic op type fields (stored in immediate) */
48-
#define BPF_FETCH 0x01 /* fetch previous value into src reg */
48+
#define BPF_FETCH 0x01 /* not an opcode on its own, used to build others */
49+
#define BPF_XCHG (0xe0 | BPF_FETCH) /* atomic exchange */
50+
#define BPF_CMPXCHG (0xf0 | BPF_FETCH) /* atomic compare-and-write */
4951

5052
/* Register numbers */
5153
enum {

0 commit comments

Comments
 (0)