@@ -671,134 +671,208 @@ int ice_clean_rx_irq_zc(struct ice_rx_ring *rx_ring, int budget)
671671}
672672
673673/**
674- * ice_xmit_zc - Completes AF_XDP entries, and cleans XDP entries
674+ * ice_clean_xdp_tx_buf - Free and unmap XDP Tx buffer
675675 * @xdp_ring: XDP Tx ring
676- * @budget: max number of frames to xmit
677- *
678- * Returns true if cleanup/transmission is done.
676+ * @tx_buf: Tx buffer to clean
679677 */
680- static bool ice_xmit_zc (struct ice_tx_ring * xdp_ring , int budget )
678+ static void
679+ ice_clean_xdp_tx_buf (struct ice_tx_ring * xdp_ring , struct ice_tx_buf * tx_buf )
681680{
682- struct ice_tx_desc * tx_desc = NULL ;
683- bool work_done = true;
684- struct xdp_desc desc ;
685- dma_addr_t dma ;
686-
687- while (likely (budget -- > 0 )) {
688- struct ice_tx_buf * tx_buf ;
689-
690- if (unlikely (!ICE_DESC_UNUSED (xdp_ring ))) {
691- xdp_ring -> tx_stats .tx_busy ++ ;
692- work_done = false;
693- break ;
694- }
681+ xdp_return_frame ((struct xdp_frame * )tx_buf -> raw_buf );
682+ dma_unmap_single (xdp_ring -> dev , dma_unmap_addr (tx_buf , dma ),
683+ dma_unmap_len (tx_buf , len ), DMA_TO_DEVICE );
684+ dma_unmap_len_set (tx_buf , len , 0 );
685+ }
695686
696- tx_buf = & xdp_ring -> tx_buf [xdp_ring -> next_to_use ];
687+ /**
688+ * ice_clean_xdp_irq_zc - Reclaim resources after transmit completes on XDP ring
689+ * @xdp_ring: XDP ring to clean
690+ * @napi_budget: amount of descriptors that NAPI allows us to clean
691+ *
692+ * Returns count of cleaned descriptors
693+ */
694+ static u16 ice_clean_xdp_irq_zc (struct ice_tx_ring * xdp_ring , int napi_budget )
695+ {
696+ u16 tx_thresh = xdp_ring -> tx_thresh ;
697+ int budget = napi_budget / tx_thresh ;
698+ u16 ntc = xdp_ring -> next_to_clean ;
699+ struct ice_tx_desc * next_dd_desc ;
700+ u16 next_dd = xdp_ring -> next_dd ;
701+ u16 desc_cnt = xdp_ring -> count ;
702+ struct ice_tx_buf * tx_buf ;
703+ u16 cleared_dds = 0 ;
704+ u32 xsk_frames = 0 ;
705+ u16 i ;
697706
698- if (!xsk_tx_peek_desc (xdp_ring -> xsk_pool , & desc ))
707+ do {
708+ next_dd_desc = ICE_TX_DESC (xdp_ring , next_dd );
709+ if (!(next_dd_desc -> cmd_type_offset_bsz &
710+ cpu_to_le64 (ICE_TX_DESC_DTYPE_DESC_DONE )))
699711 break ;
700712
701- dma = xsk_buff_raw_get_dma (xdp_ring -> xsk_pool , desc .addr );
702- xsk_buff_raw_dma_sync_for_device (xdp_ring -> xsk_pool , dma ,
703- desc .len );
713+ cleared_dds ++ ;
714+ xsk_frames = 0 ;
704715
705- tx_buf -> bytecount = desc .len ;
716+ for (i = 0 ; i < tx_thresh ; i ++ ) {
717+ tx_buf = & xdp_ring -> tx_buf [ntc ];
706718
707- tx_desc = ICE_TX_DESC (xdp_ring , xdp_ring -> next_to_use );
708- tx_desc -> buf_addr = cpu_to_le64 (dma );
709- tx_desc -> cmd_type_offset_bsz =
710- ice_build_ctob (ICE_TXD_LAST_DESC_CMD , 0 , desc .len , 0 );
719+ if (tx_buf -> raw_buf ) {
720+ ice_clean_xdp_tx_buf (xdp_ring , tx_buf );
721+ tx_buf -> raw_buf = NULL ;
722+ } else {
723+ xsk_frames ++ ;
724+ }
711725
712- xdp_ring -> next_to_use ++ ;
713- if (xdp_ring -> next_to_use == xdp_ring -> count )
714- xdp_ring -> next_to_use = 0 ;
715- }
726+ ntc ++ ;
727+ if (ntc >= xdp_ring -> count )
728+ ntc = 0 ;
729+ }
730+ if (xsk_frames )
731+ xsk_tx_completed (xdp_ring -> xsk_pool , xsk_frames );
732+ next_dd_desc -> cmd_type_offset_bsz = 0 ;
733+ next_dd = next_dd + tx_thresh ;
734+ if (next_dd >= desc_cnt )
735+ next_dd = tx_thresh - 1 ;
736+ } while (budget -- );
716737
717- if (tx_desc ) {
718- ice_xdp_ring_update_tail (xdp_ring );
719- xsk_tx_release (xdp_ring -> xsk_pool );
720- }
738+ xdp_ring -> next_to_clean = ntc ;
739+ xdp_ring -> next_dd = next_dd ;
721740
722- return budget > 0 && work_done ;
741+ return cleared_dds * tx_thresh ;
723742}
724743
725744/**
726- * ice_clean_xdp_tx_buf - Free and unmap XDP Tx buffer
727- * @xdp_ring: XDP Tx ring
728- * @tx_buf: Tx buffer to clean
745+ * ice_xmit_pkt - produce a single HW Tx descriptor out of AF_XDP descriptor
746+ * @xdp_ring: XDP ring to produce the HW Tx descriptor on
747+ * @desc: AF_XDP descriptor to pull the DMA address and length from
748+ * @total_bytes: bytes accumulator that will be used for stats update
729749 */
730- static void
731- ice_clean_xdp_tx_buf ( struct ice_tx_ring * xdp_ring , struct ice_tx_buf * tx_buf )
750+ static void ice_xmit_pkt ( struct ice_tx_ring * xdp_ring , struct xdp_desc * desc ,
751+ unsigned int * total_bytes )
732752{
733- xdp_return_frame ((struct xdp_frame * )tx_buf -> raw_buf );
734- dma_unmap_single (xdp_ring -> dev , dma_unmap_addr (tx_buf , dma ),
735- dma_unmap_len (tx_buf , len ), DMA_TO_DEVICE );
736- dma_unmap_len_set (tx_buf , len , 0 );
753+ struct ice_tx_desc * tx_desc ;
754+ dma_addr_t dma ;
755+
756+ dma = xsk_buff_raw_get_dma (xdp_ring -> xsk_pool , desc -> addr );
757+ xsk_buff_raw_dma_sync_for_device (xdp_ring -> xsk_pool , dma , desc -> len );
758+
759+ tx_desc = ICE_TX_DESC (xdp_ring , xdp_ring -> next_to_use ++ );
760+ tx_desc -> buf_addr = cpu_to_le64 (dma );
761+ tx_desc -> cmd_type_offset_bsz = ice_build_ctob (ICE_TX_DESC_CMD_EOP ,
762+ 0 , desc -> len , 0 );
763+
764+ * total_bytes += desc -> len ;
737765}
738766
739767/**
740- * ice_clean_tx_irq_zc - Completes AF_XDP entries, and cleans XDP entries
741- * @xdp_ring: XDP Tx ring
742- * @budget: NAPI budget
743- *
744- * Returns true if cleanup/tranmission is done.
768+ * ice_xmit_pkt_batch - produce a batch of HW Tx descriptors out of AF_XDP descriptors
769+ * @xdp_ring: XDP ring to produce the HW Tx descriptors on
770+ * @descs: AF_XDP descriptors to pull the DMA addresses and lengths from
771+ * @total_bytes: bytes accumulator that will be used for stats update
745772 */
746- bool ice_clean_tx_irq_zc (struct ice_tx_ring * xdp_ring , int budget )
773+ static void ice_xmit_pkt_batch (struct ice_tx_ring * xdp_ring , struct xdp_desc * descs ,
774+ unsigned int * total_bytes )
747775{
748- int total_packets = 0 , total_bytes = 0 ;
749- s16 ntc = xdp_ring -> next_to_clean ;
776+ u16 tx_thresh = xdp_ring -> tx_thresh ;
777+ u16 ntu = xdp_ring -> next_to_use ;
750778 struct ice_tx_desc * tx_desc ;
751- struct ice_tx_buf * tx_buf ;
752- u32 xsk_frames = 0 ;
753- bool xmit_done ;
779+ dma_addr_t dma ;
780+ u32 i ;
754781
755- tx_desc = ICE_TX_DESC ( xdp_ring , ntc );
756- tx_buf = & xdp_ring -> tx_buf [ ntc ] ;
757- ntc -= xdp_ring -> count ;
782+ loop_unrolled_for ( i = 0 ; i < PKTS_PER_BATCH ; i ++ ) {
783+ dma = xsk_buff_raw_get_dma ( xdp_ring -> xsk_pool , descs [ i ]. addr ) ;
784+ xsk_buff_raw_dma_sync_for_device ( xdp_ring -> xsk_pool , dma , descs [ i ]. len ) ;
758785
759- do {
760- if (!( tx_desc -> cmd_type_offset_bsz &
761- cpu_to_le64 ( ICE_TX_DESC_DTYPE_DESC_DONE )))
762- break ;
786+ tx_desc = ICE_TX_DESC ( xdp_ring , ntu ++ );
787+ tx_desc -> buf_addr = cpu_to_le64 ( dma );
788+ tx_desc -> cmd_type_offset_bsz = ice_build_ctob ( ICE_TX_DESC_CMD_EOP ,
789+ 0 , descs [ i ]. len , 0 ) ;
763790
764- total_bytes += tx_buf -> bytecount ;
765- total_packets ++ ;
791+ * total_bytes += descs [ i ]. len ;
792+ }
766793
767- if (tx_buf -> raw_buf ) {
768- ice_clean_xdp_tx_buf (xdp_ring , tx_buf );
769- tx_buf -> raw_buf = NULL ;
770- } else {
771- xsk_frames ++ ;
772- }
794+ xdp_ring -> next_to_use = ntu ;
773795
774- tx_desc -> cmd_type_offset_bsz = 0 ;
775- tx_buf ++ ;
776- tx_desc ++ ;
777- ntc ++ ;
796+ if (xdp_ring -> next_to_use > xdp_ring -> next_rs ) {
797+ tx_desc = ICE_TX_DESC (xdp_ring , xdp_ring -> next_rs );
798+ tx_desc -> cmd_type_offset_bsz |=
799+ cpu_to_le64 (ICE_TX_DESC_CMD_RS << ICE_TXD_QW1_CMD_S );
800+ xdp_ring -> next_rs += tx_thresh ;
801+ }
802+ }
778803
779- if (unlikely (!ntc )) {
780- ntc -= xdp_ring -> count ;
781- tx_buf = xdp_ring -> tx_buf ;
782- tx_desc = ICE_TX_DESC (xdp_ring , 0 );
783- }
804+ /**
805+ * ice_fill_tx_hw_ring - produce the number of Tx descriptors onto ring
806+ * @xdp_ring: XDP ring to produce the HW Tx descriptors on
807+ * @descs: AF_XDP descriptors to pull the DMA addresses and lengths from
808+ * @nb_pkts: count of packets to be send
809+ * @total_bytes: bytes accumulator that will be used for stats update
810+ */
811+ static void ice_fill_tx_hw_ring (struct ice_tx_ring * xdp_ring , struct xdp_desc * descs ,
812+ u32 nb_pkts , unsigned int * total_bytes )
813+ {
814+ u16 tx_thresh = xdp_ring -> tx_thresh ;
815+ struct ice_tx_desc * tx_desc ;
816+ u32 batched , leftover , i ;
817+
818+ batched = nb_pkts & ~(PKTS_PER_BATCH - 1 );
819+ leftover = nb_pkts & (PKTS_PER_BATCH - 1 );
820+ for (i = 0 ; i < batched ; i += PKTS_PER_BATCH )
821+ ice_xmit_pkt_batch (xdp_ring , & descs [i ], total_bytes );
822+ for (i = batched ; i < batched + leftover ; i ++ )
823+ ice_xmit_pkt (xdp_ring , & descs [i ], total_bytes );
824+
825+ if (xdp_ring -> next_to_use > xdp_ring -> next_rs ) {
826+ tx_desc = ICE_TX_DESC (xdp_ring , xdp_ring -> next_rs );
827+ tx_desc -> cmd_type_offset_bsz |=
828+ cpu_to_le64 (ICE_TX_DESC_CMD_RS << ICE_TXD_QW1_CMD_S );
829+ xdp_ring -> next_rs += tx_thresh ;
830+ }
831+ }
784832
785- prefetch (tx_desc );
833+ /**
834+ * ice_xmit_zc - take entries from XSK Tx ring and place them onto HW Tx ring
835+ * @xdp_ring: XDP ring to produce the HW Tx descriptors on
836+ * @budget: number of free descriptors on HW Tx ring that can be used
837+ * @napi_budget: amount of descriptors that NAPI allows us to clean
838+ *
839+ * Returns true if there is no more work that needs to be done, false otherwise
840+ */
841+ bool ice_xmit_zc (struct ice_tx_ring * xdp_ring , u32 budget , int napi_budget )
842+ {
843+ struct xdp_desc * descs = xdp_ring -> xsk_pool -> tx_descs ;
844+ u16 tx_thresh = xdp_ring -> tx_thresh ;
845+ u32 nb_pkts , nb_processed = 0 ;
846+ unsigned int total_bytes = 0 ;
847+ struct ice_tx_desc * tx_desc ;
786848
787- } while (likely (-- budget ));
849+ if (budget < tx_thresh )
850+ budget += ice_clean_xdp_irq_zc (xdp_ring , napi_budget );
851+
852+ nb_pkts = xsk_tx_peek_release_desc_batch (xdp_ring -> xsk_pool , budget );
853+ if (!nb_pkts )
854+ return true;
855+
856+ if (xdp_ring -> next_to_use + nb_pkts >= xdp_ring -> count ) {
857+ nb_processed = xdp_ring -> count - xdp_ring -> next_to_use ;
858+ ice_fill_tx_hw_ring (xdp_ring , descs , nb_processed , & total_bytes );
859+ tx_desc = ICE_TX_DESC (xdp_ring , xdp_ring -> next_rs );
860+ tx_desc -> cmd_type_offset_bsz |=
861+ cpu_to_le64 (ICE_TX_DESC_CMD_RS << ICE_TXD_QW1_CMD_S );
862+ xdp_ring -> next_rs = tx_thresh - 1 ;
863+ xdp_ring -> next_to_use = 0 ;
864+ }
788865
789- ntc += xdp_ring -> count ;
790- xdp_ring -> next_to_clean = ntc ;
866+ ice_fill_tx_hw_ring ( xdp_ring , & descs [ nb_processed ], nb_pkts - nb_processed ,
867+ & total_bytes ) ;
791868
792- if ( xsk_frames )
793- xsk_tx_completed (xdp_ring -> xsk_pool , xsk_frames );
869+ ice_xdp_ring_update_tail ( xdp_ring );
870+ ice_update_tx_ring_stats (xdp_ring , nb_pkts , total_bytes );
794871
795872 if (xsk_uses_need_wakeup (xdp_ring -> xsk_pool ))
796873 xsk_set_tx_need_wakeup (xdp_ring -> xsk_pool );
797874
798- ice_update_tx_ring_stats (xdp_ring , total_packets , total_bytes );
799- xmit_done = ice_xmit_zc (xdp_ring , ICE_DFLT_IRQ_WORK );
800-
801- return budget > 0 && xmit_done ;
875+ return nb_pkts < budget ;
802876}
803877
804878/**
0 commit comments