@@ -242,7 +242,7 @@ struct at_xdmac_lld {
242242 u32 mbr_dus ; /* Destination Microblock Stride Register */
243243};
244244
245-
245+ /* 64-bit alignment needed to update CNDA and CUBC registers in an atomic way. */
246246struct at_xdmac_desc {
247247 struct at_xdmac_lld lld ;
248248 enum dma_transfer_direction direction ;
@@ -253,7 +253,7 @@ struct at_xdmac_desc {
253253 unsigned int xfer_size ;
254254 struct list_head descs_list ;
255255 struct list_head xfer_node ;
256- };
256+ } __aligned ( sizeof ( u64 )) ;
257257
258258static inline void __iomem * at_xdmac_chan_reg_base (struct at_xdmac * atxdmac , unsigned int chan_nb )
259259{
@@ -1400,6 +1400,7 @@ at_xdmac_tx_status(struct dma_chan *chan, dma_cookie_t cookie,
14001400 u32 cur_nda , check_nda , cur_ubc , mask , value ;
14011401 u8 dwidth = 0 ;
14021402 unsigned long flags ;
1403+ bool initd ;
14031404
14041405 ret = dma_cookie_status (chan , cookie , txstate );
14051406 if (ret == DMA_COMPLETE )
@@ -1424,7 +1425,16 @@ at_xdmac_tx_status(struct dma_chan *chan, dma_cookie_t cookie,
14241425 residue = desc -> xfer_size ;
14251426 /*
14261427 * Flush FIFO: only relevant when the transfer is source peripheral
1427- * synchronized.
1428+ * synchronized. Flush is needed before reading CUBC because data in
1429+ * the FIFO are not reported by CUBC. Reporting a residue of the
1430+ * transfer length while we have data in FIFO can cause issue.
1431+ * Usecase: atmel USART has a timeout which means I have received
1432+ * characters but there is no more character received for a while. On
1433+ * timeout, it requests the residue. If the data are in the DMA FIFO,
1434+ * we will return a residue of the transfer length. It means no data
1435+ * received. If an application is waiting for these data, it will hang
1436+ * since we won't have another USART timeout without receiving new
1437+ * data.
14281438 */
14291439 mask = AT_XDMAC_CC_TYPE | AT_XDMAC_CC_DSYNC ;
14301440 value = AT_XDMAC_CC_TYPE_PER_TRAN | AT_XDMAC_CC_DSYNC_PER2MEM ;
@@ -1435,41 +1445,63 @@ at_xdmac_tx_status(struct dma_chan *chan, dma_cookie_t cookie,
14351445 }
14361446
14371447 /*
1438- * When processing the residue, we need to read two registers but we
1439- * can't do it in an atomic way. AT_XDMAC_CNDA is used to find where
1440- * we stand in the descriptor list and AT_XDMAC_CUBC is used
1441- * to know how many data are remaining for the current descriptor.
1442- * Since the dma channel is not paused to not loose data, between the
1443- * AT_XDMAC_CNDA and AT_XDMAC_CUBC read, we may have change of
1444- * descriptor.
1445- * For that reason, after reading AT_XDMAC_CUBC, we check if we are
1446- * still using the same descriptor by reading a second time
1447- * AT_XDMAC_CNDA. If AT_XDMAC_CNDA has changed, it means we have to
1448- * read again AT_XDMAC_CUBC.
1448+ * The easiest way to compute the residue should be to pause the DMA
1449+ * but doing this can lead to miss some data as some devices don't
1450+ * have FIFO.
1451+ * We need to read several registers because:
1452+ * - DMA is running therefore a descriptor change is possible while
1453+ * reading these registers
1454+ * - When the block transfer is done, the value of the CUBC register
1455+ * is set to its initial value until the fetch of the next descriptor.
1456+ * This value will corrupt the residue calculation so we have to skip
1457+ * it.
1458+ *
1459+ * INITD -------- ------------
1460+ * |____________________|
1461+ * _______________________ _______________
1462+ * NDA @desc2 \/ @desc3
1463+ * _______________________/\_______________
1464+ * __________ ___________ _______________
1465+ * CUBC 0 \/ MAX desc1 \/ MAX desc2
1466+ * __________/\___________/\_______________
1467+ *
1468+ * Since descriptors are aligned on 64 bits, we can assume that
1469+ * the update of NDA and CUBC is atomic.
14491470 * Memory barriers are used to ensure the read order of the registers.
1450- * A max number of retries is set because unlikely it can never ends if
1451- * we are transferring a lot of data with small buffers.
1471+ * A max number of retries is set because unlikely it could never ends.
14521472 */
1453- cur_nda = at_xdmac_chan_read (atchan , AT_XDMAC_CNDA ) & 0xfffffffc ;
1454- rmb ();
1455- cur_ubc = at_xdmac_chan_read (atchan , AT_XDMAC_CUBC );
14561473 for (retry = 0 ; retry < AT_XDMAC_RESIDUE_MAX_RETRIES ; retry ++ ) {
1457- rmb ();
14581474 check_nda = at_xdmac_chan_read (atchan , AT_XDMAC_CNDA ) & 0xfffffffc ;
1459-
1460- if (likely (cur_nda == check_nda ))
1461- break ;
1462-
1463- cur_nda = check_nda ;
1475+ rmb ();
1476+ initd = !!(at_xdmac_chan_read (atchan , AT_XDMAC_CC ) & AT_XDMAC_CC_INITD );
14641477 rmb ();
14651478 cur_ubc = at_xdmac_chan_read (atchan , AT_XDMAC_CUBC );
1479+ rmb ();
1480+ cur_nda = at_xdmac_chan_read (atchan , AT_XDMAC_CNDA ) & 0xfffffffc ;
1481+ rmb ();
1482+
1483+ if ((check_nda == cur_nda ) && initd )
1484+ break ;
14661485 }
14671486
14681487 if (unlikely (retry >= AT_XDMAC_RESIDUE_MAX_RETRIES )) {
14691488 ret = DMA_ERROR ;
14701489 goto spin_unlock ;
14711490 }
14721491
1492+ /*
1493+ * Flush FIFO: only relevant when the transfer is source peripheral
1494+ * synchronized. Another flush is needed here because CUBC is updated
1495+ * when the controller sends the data write command. It can lead to
1496+ * report data that are not written in the memory or the device. The
1497+ * FIFO flush ensures that data are really written.
1498+ */
1499+ if ((desc -> lld .mbr_cfg & mask ) == value ) {
1500+ at_xdmac_write (atxdmac , AT_XDMAC_GSWF , atchan -> mask );
1501+ while (!(at_xdmac_chan_read (atchan , AT_XDMAC_CIS ) & AT_XDMAC_CIS_FIS ))
1502+ cpu_relax ();
1503+ }
1504+
14731505 /*
14741506 * Remove size of all microblocks already transferred and the current
14751507 * one. Then add the remaining size to transfer of the current
0 commit comments