Skip to content

Commit 33c9805

Browse files
liu-song-6Alexei Starovoitov
authored andcommitted
bpf: Introduce bpf_jit_binary_pack_[alloc|finalize|free]
This is the jit binary allocator built on top of bpf_prog_pack. bpf_prog_pack allocates RO memory, which cannot be used directly by the JIT engine. Therefore, a temporary rw buffer is allocated for the JIT engine. Once JIT is done, bpf_jit_binary_pack_finalize is used to copy the program to the RO memory. bpf_jit_binary_pack_alloc reserves 16 bytes of extra space for illegal instructions, which is small than the 128 bytes space reserved by bpf_jit_binary_alloc. This change is necessary for bpf_jit_binary_hdr to find the correct header. Also, flag use_bpf_prog_pack is added to differentiate a program allocated by bpf_jit_binary_pack_alloc. Signed-off-by: Song Liu <songliubraving@fb.com> Signed-off-by: Alexei Starovoitov <ast@kernel.org> Link: https://lore.kernel.org/bpf/20220204185742.271030-9-song@kernel.org
1 parent 5763105 commit 33c9805

File tree

3 files changed

+120
-10
lines changed

3 files changed

+120
-10
lines changed

include/linux/bpf.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -953,6 +953,7 @@ struct bpf_prog_aux {
953953
bool sleepable;
954954
bool tail_call_reachable;
955955
bool xdp_has_frags;
956+
bool use_bpf_prog_pack;
956957
struct hlist_node tramp_hlist;
957958
/* BTF_KIND_FUNC_PROTO for valid attach_btf_id */
958959
const struct btf_type *attach_func_proto;

include/linux/filter.h

Lines changed: 12 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -890,15 +890,6 @@ static inline void bpf_jit_binary_lock_ro(struct bpf_binary_header *hdr)
890890
set_memory_x((unsigned long)hdr, hdr->size >> PAGE_SHIFT);
891891
}
892892

893-
static inline struct bpf_binary_header *
894-
bpf_jit_binary_hdr(const struct bpf_prog *fp)
895-
{
896-
unsigned long real_start = (unsigned long)fp->bpf_func;
897-
unsigned long addr = real_start & PAGE_MASK;
898-
899-
return (void *)addr;
900-
}
901-
902893
int sk_filter_trim_cap(struct sock *sk, struct sk_buff *skb, unsigned int cap);
903894
static inline int sk_filter(struct sock *sk, struct sk_buff *skb)
904895
{
@@ -1068,6 +1059,18 @@ void *bpf_jit_alloc_exec(unsigned long size);
10681059
void bpf_jit_free_exec(void *addr);
10691060
void bpf_jit_free(struct bpf_prog *fp);
10701061

1062+
struct bpf_binary_header *
1063+
bpf_jit_binary_pack_alloc(unsigned int proglen, u8 **ro_image,
1064+
unsigned int alignment,
1065+
struct bpf_binary_header **rw_hdr,
1066+
u8 **rw_image,
1067+
bpf_jit_fill_hole_t bpf_fill_ill_insns);
1068+
int bpf_jit_binary_pack_finalize(struct bpf_prog *prog,
1069+
struct bpf_binary_header *ro_header,
1070+
struct bpf_binary_header *rw_header);
1071+
void bpf_jit_binary_pack_free(struct bpf_binary_header *ro_header,
1072+
struct bpf_binary_header *rw_header);
1073+
10711074
int bpf_jit_add_poke_descriptor(struct bpf_prog *prog,
10721075
struct bpf_jit_poke_descriptor *poke);
10731076

kernel/bpf/core.c

Lines changed: 107 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1031,6 +1031,109 @@ void bpf_jit_binary_free(struct bpf_binary_header *hdr)
10311031
bpf_jit_uncharge_modmem(size);
10321032
}
10331033

1034+
/* Allocate jit binary from bpf_prog_pack allocator.
1035+
* Since the allocated memory is RO+X, the JIT engine cannot write directly
1036+
* to the memory. To solve this problem, a RW buffer is also allocated at
1037+
* as the same time. The JIT engine should calculate offsets based on the
1038+
* RO memory address, but write JITed program to the RW buffer. Once the
1039+
* JIT engine finishes, it calls bpf_jit_binary_pack_finalize, which copies
1040+
* the JITed program to the RO memory.
1041+
*/
1042+
struct bpf_binary_header *
1043+
bpf_jit_binary_pack_alloc(unsigned int proglen, u8 **image_ptr,
1044+
unsigned int alignment,
1045+
struct bpf_binary_header **rw_header,
1046+
u8 **rw_image,
1047+
bpf_jit_fill_hole_t bpf_fill_ill_insns)
1048+
{
1049+
struct bpf_binary_header *ro_header;
1050+
u32 size, hole, start;
1051+
1052+
WARN_ON_ONCE(!is_power_of_2(alignment) ||
1053+
alignment > BPF_IMAGE_ALIGNMENT);
1054+
1055+
/* add 16 bytes for a random section of illegal instructions */
1056+
size = round_up(proglen + sizeof(*ro_header) + 16, BPF_PROG_CHUNK_SIZE);
1057+
1058+
if (bpf_jit_charge_modmem(size))
1059+
return NULL;
1060+
ro_header = bpf_prog_pack_alloc(size);
1061+
if (!ro_header) {
1062+
bpf_jit_uncharge_modmem(size);
1063+
return NULL;
1064+
}
1065+
1066+
*rw_header = kvmalloc(size, GFP_KERNEL);
1067+
if (!*rw_header) {
1068+
bpf_prog_pack_free(ro_header);
1069+
bpf_jit_uncharge_modmem(size);
1070+
return NULL;
1071+
}
1072+
1073+
/* Fill space with illegal/arch-dep instructions. */
1074+
bpf_fill_ill_insns(*rw_header, size);
1075+
(*rw_header)->size = size;
1076+
1077+
hole = min_t(unsigned int, size - (proglen + sizeof(*ro_header)),
1078+
BPF_PROG_CHUNK_SIZE - sizeof(*ro_header));
1079+
start = (get_random_int() % hole) & ~(alignment - 1);
1080+
1081+
*image_ptr = &ro_header->image[start];
1082+
*rw_image = &(*rw_header)->image[start];
1083+
1084+
return ro_header;
1085+
}
1086+
1087+
/* Copy JITed text from rw_header to its final location, the ro_header. */
1088+
int bpf_jit_binary_pack_finalize(struct bpf_prog *prog,
1089+
struct bpf_binary_header *ro_header,
1090+
struct bpf_binary_header *rw_header)
1091+
{
1092+
void *ptr;
1093+
1094+
ptr = bpf_arch_text_copy(ro_header, rw_header, rw_header->size);
1095+
1096+
kvfree(rw_header);
1097+
1098+
if (IS_ERR(ptr)) {
1099+
bpf_prog_pack_free(ro_header);
1100+
return PTR_ERR(ptr);
1101+
}
1102+
prog->aux->use_bpf_prog_pack = true;
1103+
return 0;
1104+
}
1105+
1106+
/* bpf_jit_binary_pack_free is called in two different scenarios:
1107+
* 1) when the program is freed after;
1108+
* 2) when the JIT engine fails (before bpf_jit_binary_pack_finalize).
1109+
* For case 2), we need to free both the RO memory and the RW buffer.
1110+
* Also, ro_header->size in 2) is not properly set yet, so rw_header->size
1111+
* is used for uncharge.
1112+
*/
1113+
void bpf_jit_binary_pack_free(struct bpf_binary_header *ro_header,
1114+
struct bpf_binary_header *rw_header)
1115+
{
1116+
u32 size = rw_header ? rw_header->size : ro_header->size;
1117+
1118+
bpf_prog_pack_free(ro_header);
1119+
kvfree(rw_header);
1120+
bpf_jit_uncharge_modmem(size);
1121+
}
1122+
1123+
static inline struct bpf_binary_header *
1124+
bpf_jit_binary_hdr(const struct bpf_prog *fp)
1125+
{
1126+
unsigned long real_start = (unsigned long)fp->bpf_func;
1127+
unsigned long addr;
1128+
1129+
if (fp->aux->use_bpf_prog_pack)
1130+
addr = real_start & BPF_PROG_CHUNK_MASK;
1131+
else
1132+
addr = real_start & PAGE_MASK;
1133+
1134+
return (void *)addr;
1135+
}
1136+
10341137
/* This symbol is only overridden by archs that have different
10351138
* requirements than the usual eBPF JITs, f.e. when they only
10361139
* implement cBPF JIT, do not set images read-only, etc.
@@ -1040,7 +1143,10 @@ void __weak bpf_jit_free(struct bpf_prog *fp)
10401143
if (fp->jited) {
10411144
struct bpf_binary_header *hdr = bpf_jit_binary_hdr(fp);
10421145

1043-
bpf_jit_binary_free(hdr);
1146+
if (fp->aux->use_bpf_prog_pack)
1147+
bpf_jit_binary_pack_free(hdr, NULL /* rw_buffer */);
1148+
else
1149+
bpf_jit_binary_free(hdr);
10441150

10451151
WARN_ON_ONCE(!bpf_prog_kallsyms_verify_off(fp));
10461152
}

0 commit comments

Comments
 (0)