@@ -4153,34 +4153,45 @@ static int bpf_xdp_frags_increase_tail(struct xdp_buff *xdp, int offset)
41534153 return 0 ;
41544154}
41554155
4156- static void bpf_xdp_shrink_data_zc (struct xdp_buff * xdp , int shrink ,
4157- enum xdp_mem_type mem_type , bool release )
4156+ static struct xdp_buff * bpf_xdp_shrink_data_zc (struct xdp_buff * xdp , int shrink ,
4157+ bool tail , bool release )
41584158{
4159- struct xdp_buff * zc_frag = xsk_buff_get_tail (xdp );
4159+ struct xdp_buff * zc_frag = tail ? xsk_buff_get_tail (xdp ) :
4160+ xsk_buff_get_head (xdp );
41604161
41614162 if (release ) {
4162- xsk_buff_del_tail (zc_frag );
4163- __xdp_return (0 , mem_type , false, zc_frag );
4163+ xsk_buff_del_frag (zc_frag );
41644164 } else {
4165- zc_frag -> data_end -= shrink ;
4165+ if (tail )
4166+ zc_frag -> data_end -= shrink ;
4167+ else
4168+ zc_frag -> data += shrink ;
41664169 }
4170+
4171+ return zc_frag ;
41674172}
41684173
41694174static bool bpf_xdp_shrink_data (struct xdp_buff * xdp , skb_frag_t * frag ,
4170- int shrink )
4175+ int shrink , bool tail )
41714176{
41724177 enum xdp_mem_type mem_type = xdp -> rxq -> mem .type ;
41734178 bool release = skb_frag_size (frag ) == shrink ;
4179+ netmem_ref netmem = skb_frag_netmem (frag );
4180+ struct xdp_buff * zc_frag = NULL ;
41744181
41754182 if (mem_type == MEM_TYPE_XSK_BUFF_POOL ) {
4176- bpf_xdp_shrink_data_zc ( xdp , shrink , mem_type , release ) ;
4177- goto out ;
4183+ netmem = 0 ;
4184+ zc_frag = bpf_xdp_shrink_data_zc ( xdp , shrink , tail , release ) ;
41784185 }
41794186
4180- if (release )
4181- __xdp_return (skb_frag_netmem (frag ), mem_type , false, NULL );
4187+ if (release ) {
4188+ __xdp_return (netmem , mem_type , false, zc_frag );
4189+ } else {
4190+ if (!tail )
4191+ skb_frag_off_add (frag , shrink );
4192+ skb_frag_size_sub (frag , shrink );
4193+ }
41824194
4183- out :
41844195 return release ;
41854196}
41864197
@@ -4198,18 +4209,15 @@ static int bpf_xdp_frags_shrink_tail(struct xdp_buff *xdp, int offset)
41984209
41994210 len_free += shrink ;
42004211 offset -= shrink ;
4201- if (bpf_xdp_shrink_data (xdp , frag , shrink )) {
4212+ if (bpf_xdp_shrink_data (xdp , frag , shrink , true))
42024213 n_frags_free ++ ;
4203- } else {
4204- skb_frag_size_sub (frag , shrink );
4205- break ;
4206- }
42074214 }
42084215 sinfo -> nr_frags -= n_frags_free ;
42094216 sinfo -> xdp_frags_size -= len_free ;
42104217
42114218 if (unlikely (!sinfo -> nr_frags )) {
42124219 xdp_buff_clear_frags_flag (xdp );
4220+ xdp_buff_clear_frag_pfmemalloc (xdp );
42134221 xdp -> data_end -= offset ;
42144222 }
42154223
@@ -12206,6 +12214,98 @@ __bpf_kfunc int bpf_sock_ops_enable_tx_tstamp(struct bpf_sock_ops_kern *skops,
1220612214 return 0 ;
1220712215}
1220812216
12217+ /**
12218+ * bpf_xdp_pull_data() - Pull in non-linear xdp data.
12219+ * @x: &xdp_md associated with the XDP buffer
12220+ * @len: length of data to be made directly accessible in the linear part
12221+ *
12222+ * Pull in data in case the XDP buffer associated with @x is non-linear and
12223+ * not all @len are in the linear data area.
12224+ *
12225+ * Direct packet access allows reading and writing linear XDP data through
12226+ * packet pointers (i.e., &xdp_md->data + offsets). The amount of data which
12227+ * ends up in the linear part of the xdp_buff depends on the NIC and its
12228+ * configuration. When a frag-capable XDP program wants to directly access
12229+ * headers that may be in the non-linear area, call this kfunc to make sure
12230+ * the data is available in the linear area. Alternatively, use dynptr or
12231+ * bpf_xdp_{load,store}_bytes() to access data without pulling.
12232+ *
12233+ * This kfunc can also be used with bpf_xdp_adjust_head() to decapsulate
12234+ * headers in the non-linear data area.
12235+ *
12236+ * A call to this kfunc may reduce headroom. If there is not enough tailroom
12237+ * in the linear data area, metadata and data will be shifted down.
12238+ *
12239+ * A call to this kfunc is susceptible to change the buffer geometry.
12240+ * Therefore, at load time, all checks on pointers previously done by the
12241+ * verifier are invalidated and must be performed again, if the kfunc is used
12242+ * in combination with direct packet access.
12243+ *
12244+ * Return:
12245+ * * %0 - success
12246+ * * %-EINVAL - invalid len
12247+ */
12248+ __bpf_kfunc int bpf_xdp_pull_data (struct xdp_md * x , u32 len )
12249+ {
12250+ struct xdp_buff * xdp = (struct xdp_buff * )x ;
12251+ struct skb_shared_info * sinfo = xdp_get_shared_info_from_buff (xdp );
12252+ int i , delta , shift , headroom , tailroom , n_frags_free = 0 ;
12253+ void * data_hard_end = xdp_data_hard_end (xdp );
12254+ int data_len = xdp -> data_end - xdp -> data ;
12255+ void * start ;
12256+
12257+ if (len <= data_len )
12258+ return 0 ;
12259+
12260+ if (unlikely (len > xdp_get_buff_len (xdp )))
12261+ return - EINVAL ;
12262+
12263+ start = xdp_data_meta_unsupported (xdp ) ? xdp -> data : xdp -> data_meta ;
12264+
12265+ headroom = start - xdp -> data_hard_start - sizeof (struct xdp_frame );
12266+ tailroom = data_hard_end - xdp -> data_end ;
12267+
12268+ delta = len - data_len ;
12269+ if (unlikely (delta > tailroom + headroom ))
12270+ return - EINVAL ;
12271+
12272+ shift = delta - tailroom ;
12273+ if (shift > 0 ) {
12274+ memmove (start - shift , start , xdp -> data_end - start );
12275+
12276+ xdp -> data_meta -= shift ;
12277+ xdp -> data -= shift ;
12278+ xdp -> data_end -= shift ;
12279+ }
12280+
12281+ for (i = 0 ; i < sinfo -> nr_frags && delta ; i ++ ) {
12282+ skb_frag_t * frag = & sinfo -> frags [i ];
12283+ u32 shrink = min_t (u32 , delta , skb_frag_size (frag ));
12284+
12285+ memcpy (xdp -> data_end , skb_frag_address (frag ), shrink );
12286+
12287+ xdp -> data_end += shrink ;
12288+ sinfo -> xdp_frags_size -= shrink ;
12289+ delta -= shrink ;
12290+ if (bpf_xdp_shrink_data (xdp , frag , shrink , false))
12291+ n_frags_free ++ ;
12292+ }
12293+
12294+ if (unlikely (n_frags_free )) {
12295+ memmove (sinfo -> frags , sinfo -> frags + n_frags_free ,
12296+ (sinfo -> nr_frags - n_frags_free ) * sizeof (skb_frag_t ));
12297+
12298+ sinfo -> nr_frags -= n_frags_free ;
12299+
12300+ if (!sinfo -> nr_frags ) {
12301+ xdp_buff_clear_frags_flag (xdp );
12302+ xdp_buff_clear_frag_pfmemalloc (xdp );
12303+ }
12304+ }
12305+
12306+ return 0 ;
12307+ }
12308+
1220912309__bpf_kfunc_end_defs ();
1221012310
1221112311int bpf_dynptr_from_skb_rdonly (struct __sk_buff * skb , u64 flags ,
@@ -12233,6 +12333,7 @@ BTF_KFUNCS_END(bpf_kfunc_check_set_skb_meta)
1223312333
1223412334BTF_KFUNCS_START (bpf_kfunc_check_set_xdp )
1223512335BTF_ID_FLAGS (func , bpf_dynptr_from_xdp )
12336+ BTF_ID_FLAGS (func , bpf_xdp_pull_data )
1223612337BTF_KFUNCS_END (bpf_kfunc_check_set_xdp )
1223712338
1223812339BTF_KFUNCS_START (bpf_kfunc_check_set_sock_addr )
0 commit comments