Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support PSRAM in DmaTxBuf #2161

Merged
merged 25 commits into from
Sep 24, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
306d118
support psram in DmaTxBuf
liebman Sep 15, 2024
008c8cd
add example that sometimes works :-(
liebman Sep 15, 2024
b231c8b
fmt
liebman Sep 15, 2024
07f780c
cleanups
liebman Sep 15, 2024
b1376b3
allow chunk_size upto (including) 4095
liebman Sep 16, 2024
179e926
this test is passing for me now
liebman Sep 16, 2024
1e37d4f
remove chunk_size and compute based on block_size
liebman Sep 16, 2024
5b2b4c9
return error in `prepare_transfer` if psram is found on non-esp32s3
liebman Sep 18, 2024
12acacd
missing parens
liebman Sep 18, 2024
9e2067d
changelog
liebman Sep 18, 2024
9fce2f4
default 4092 for esp32 & fmt
liebman Sep 18, 2024
b264ee2
no errors anymode
liebman Sep 19, 2024
1e62e91
use block_size is_some to flag invalid psram in prepare_transfer
liebman Sep 19, 2024
510b392
drop block_size from macro, the buffer allocation was not being align…
liebman Sep 19, 2024
af58a61
missed macro example
liebman Sep 19, 2024
70e98d7
use defmt::Format that decodes owner like Debug
liebman Sep 19, 2024
fa7cce0
fix typo
liebman Sep 19, 2024
6c9ff4c
DmaTxBuf: its an error if buffer is in psram and block_size is none
liebman Sep 20, 2024
d18fea7
DmaTxBuf: its an error if buffer is in psram and block_size is none
liebman Sep 20, 2024
37f7bff
update for PSRAM feature changes
liebman Sep 20, 2024
9162e63
Merge branch 'main' into dma-psram-tx-buf
JurajSadel Sep 24, 2024
51028c6
address alignment comments
liebman Sep 24, 2024
d452ef6
fmt
liebman Sep 24, 2024
b24f066
better alignment test
liebman Sep 24, 2024
65adcdf
revert alignment test
liebman Sep 24, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions esp-hal/CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- Allow handling interrupts while trying to lock critical section on multi-core chips. (#2197)
- Removed the PS-RAM related features, replaced by `quad-psram`/`octal-psram`, `init_psram` takes a configuration parameter, it's now possible to auto-detect PS-RAM size (#2178)
- `EspTwaiFrame` constructors now accept any type that converts into `esp_hal::twai::Id` (#2207)
- Change `DmaTxBuf` to support PSRAM on `esp32s3` (#2161)

### Fixed

Expand Down
195 changes: 182 additions & 13 deletions esp-hal/src/dma/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -205,7 +205,6 @@ where
bitfield::bitfield! {
#[doc(hidden)]
#[derive(Clone, Copy)]
#[cfg_attr(feature = "defmt", derive(defmt::Format))]
pub struct DmaDescriptorFlags(u32);

u16;
Expand All @@ -226,6 +225,20 @@ impl Debug for DmaDescriptorFlags {
}
}

#[cfg(feature = "defmt")]
impl defmt::Format for DmaDescriptorFlags {
fn format(&self, fmt: defmt::Formatter<'_>) {
defmt::write!(
fmt,
"DmaDescriptorFlags {{ size: {}, length: {}, suc_eof: {}, owner: {} }}",
self.size(),
self.length(),
self.suc_eof(),
if self.owner() { "DMA" } else { "CPU" }
);
}
}

/// A DMA transfer descriptor.
#[derive(Clone, Copy, Debug)]
#[cfg_attr(feature = "defmt", derive(defmt::Format))]
Expand Down Expand Up @@ -286,6 +299,8 @@ use enumset::{EnumSet, EnumSetType};
pub use self::gdma::*;
#[cfg(pdma)]
pub use self::pdma::*;
#[cfg(esp32s3)]
use crate::soc::is_slice_in_psram;
use crate::{interrupt::InterruptHandler, soc::is_slice_in_dram, Mode};

#[cfg(gdma)]
Expand Down Expand Up @@ -558,7 +573,7 @@ macro_rules! dma_circular_buffers_chunk_size {
macro_rules! dma_descriptors_chunk_size {
liebman marked this conversation as resolved.
Show resolved Hide resolved
($rx_size:expr, $tx_size:expr, $chunk_size:expr) => {{
// these will check for size at compile time
const _: () = ::core::assert!($chunk_size <= 4092, "chunk size must be <= 4092");
const _: () = ::core::assert!($chunk_size <= 4095, "chunk size must be <= 4095");
const _: () = ::core::assert!($chunk_size > 0, "chunk size must be > 0");

static mut RX_DESCRIPTORS: [$crate::dma::DmaDescriptor;
Expand Down Expand Up @@ -593,7 +608,7 @@ macro_rules! dma_descriptors_chunk_size {
macro_rules! dma_circular_descriptors_chunk_size {
($rx_size:expr, $tx_size:expr, $chunk_size:expr) => {{
// these will check for size at compile time
const _: () = ::core::assert!($chunk_size <= 4092, "chunk size must be <= 4092");
const _: () = ::core::assert!($chunk_size <= 4095, "chunk size must be <= 4095");
const _: () = ::core::assert!($chunk_size > 0, "chunk size must be > 0");

const rx_descriptor_len: usize = if $rx_size > $chunk_size * 2 {
Expand All @@ -620,6 +635,33 @@ macro_rules! dma_circular_descriptors_chunk_size {
};
}

/// Convenience macro to create a DmaTxBuf from buffer size. The buffer and
/// descriptors are statically allocated and used to create the `DmaTxBuf`.
///
/// ## Usage
/// ```rust,no_run
#[doc = crate::before_snippet!()]
/// use esp_hal::dma_tx_buffer;
/// use esp_hal::dma::DmaBufBlkSize;
///
/// let tx_buf =
/// dma_tx_buffer!(32000);
/// # }
/// ```
#[macro_export]
macro_rules! dma_tx_buffer {
($tx_size:expr) => {{
const TX_DESCRIPTOR_LEN: usize =
$crate::dma::DmaTxBuf::compute_descriptor_count($tx_size, None);
$crate::declare_aligned_dma_buffer!(TX_BUFFER, $tx_size);
static mut TX_DESCRIPTORS: [$crate::dma::DmaDescriptor; TX_DESCRIPTOR_LEN] =
[$crate::dma::DmaDescriptor::EMPTY; TX_DESCRIPTOR_LEN];
let tx_buffer = $crate::as_mut_byte_array!(TX_BUFFER, $tx_size);
let tx_descriptors = unsafe { &mut TX_DESCRIPTORS };
$crate::dma::DmaTxBuf::new(tx_descriptors, tx_buffer)
liebman marked this conversation as resolved.
Show resolved Hide resolved
}};
}

/// DMA Errors
#[derive(Debug, Clone, Copy, PartialEq)]
#[cfg_attr(feature = "defmt", derive(defmt::Format))]
Expand Down Expand Up @@ -1001,6 +1043,16 @@ pub enum DmaExtMemBKSize {
Size64 = 2,
}

impl From<DmaBufBlkSize> for DmaExtMemBKSize {
liebman marked this conversation as resolved.
Show resolved Hide resolved
fn from(size: DmaBufBlkSize) -> Self {
match size {
DmaBufBlkSize::Size16 => DmaExtMemBKSize::Size16,
DmaBufBlkSize::Size32 => DmaExtMemBKSize::Size32,
DmaBufBlkSize::Size64 => DmaExtMemBKSize::Size64,
}
}
}

pub(crate) struct TxCircularState {
write_offset: usize,
write_descr_ptr: *mut DmaDescriptor,
Expand Down Expand Up @@ -1414,7 +1466,6 @@ where
if des.buffer as usize % alignment != 0 && des.size() % alignment != 0 {
return Err(DmaError::InvalidAlignment);
}
// TODO: make this optional?
crate::soc::cache_invalidate_addr(des.buffer as u32, des.size() as u32);
}
}
Expand Down Expand Up @@ -1632,6 +1683,7 @@ where
peri: DmaPeripheral,
chain: &DescriptorChain,
) -> Result<(), DmaError> {
// TODO: based on the ESP32-S3 TRM the alignment check is not needed for TX!
// for esp32s3 we check each descriptor buffer that points to psram for
// alignment and writeback the cache for that buffer
#[cfg(esp32s3)]
Expand All @@ -1657,7 +1709,19 @@ where
buffer: &mut BUF,
) -> Result<(), DmaError> {
let preparation = buffer.prepare();

cfg_if::cfg_if!(
if #[cfg(esp32s3)] {
if let Some(block_size) = preparation.block_size {
self.set_ext_mem_block_size(block_size.into());
}
} else {
// we insure that block_size is some only for PSRAM addresses
if preparation.block_size.is_some() {
return Err(DmaError::UnsupportedMemoryRegion);
}
}
);
// TODO: Get burst mode from DmaBuf.
self.tx_impl
.prepare_transfer_without_start(preparation.start, peri)
}
Expand Down Expand Up @@ -1819,6 +1883,10 @@ where
/// Holds all the information needed to configure a DMA channel for a transfer.
pub struct Preparation {
start: *mut DmaDescriptor,
/// block size for PSRAM transfers (TODO: enable burst mode for non external
/// memory?)
#[cfg_attr(not(esp32s3), allow(dead_code))]
block_size: Option<DmaBufBlkSize>,
// burst_mode, alignment, check_owner, etc.
}

Expand Down Expand Up @@ -1861,22 +1929,41 @@ pub trait DmaRxBuffer {

/// Error returned from Dma[Rx|Tx|RxTx]Buf operations.
#[derive(Debug)]
#[cfg_attr(feature = "defmt", derive(defmt::Format))]
pub enum DmaBufError {
/// More descriptors are needed for the buffer size
InsufficientDescriptors,
/// Descriptors or buffers are not located in a supported memory region
UnsupportedMemoryRegion,
/// Buffer is not aligned to the required size
InvalidAlignment,
/// Invalid chunk size: must be > 0 and <= 4095
InvalidChunkSize,
}

/// DMA buffer allignments
#[derive(Debug, Clone, Copy, PartialEq)]
#[cfg_attr(feature = "defmt", derive(defmt::Format))]
pub enum DmaBufBlkSize {
/// 16 bytes
Size16 = 16,
/// 32 bytes
Size32 = 32,
/// 64 bytes
Size64 = 64,
}

/// DMA transmit buffer
///
/// This is a contiguous buffer linked together by DMA descriptors of length
/// 4092. It can only be used for transmitting data to a peripheral's FIFO.
/// See [DmaRxBuf] for receiving data.
/// 4095 at most. It can only be used for transmitting data to a peripheral's
/// FIFO. See [DmaRxBuf] for receiving data.
#[derive(Debug)]
#[cfg_attr(feature = "defmt", derive(defmt::Format))]
pub struct DmaTxBuf {
descriptors: &'static mut [DmaDescriptor],
buffer: &'static mut [u8],
block_size: Option<DmaBufBlkSize>,
}

impl DmaTxBuf {
Expand All @@ -1886,23 +1973,87 @@ impl DmaTxBuf {
/// Each descriptor can handle 4092 bytes worth of buffer.
///
/// Both the descriptors and buffer must be in DMA-capable memory.
/// Only DRAM is supported.
/// Only DRAM is supported for descriptors.
pub fn new(
descriptors: &'static mut [DmaDescriptor],
buffer: &'static mut [u8],
) -> Result<Self, DmaBufError> {
let min_descriptors = buffer.len().div_ceil(CHUNK_SIZE);
Self::new_with_block_size(descriptors, buffer, None)
}

/// Compute max chunk size based on block size
pub const fn compute_chunk_size(block_size: Option<DmaBufBlkSize>) -> usize {
match block_size {
Some(size) => 4096 - size as usize,
#[cfg(esp32)]
None => 4092, // esp32 requires 4 byte alignment
#[cfg(not(esp32))]
None => 4095,
}
}

/// Compute the number of descriptors required for a given block size and
/// buffer size
pub const fn compute_descriptor_count(
buffer_size: usize,
block_size: Option<DmaBufBlkSize>,
) -> usize {
buffer_size.div_ceil(Self::compute_chunk_size(block_size))
}

/// Creates a new [DmaTxBuf] from some descriptors and a buffer.
///
/// There must be enough descriptors for the provided buffer.
/// Each descriptor can handle at most 4095 bytes worth of buffer.
/// Optionally, a block size can be provided for PSRAM & Burst transfers.
///
/// Both the descriptors and buffer must be in DMA-capable memory.
/// Only DRAM is supported for descriptors.
pub fn new_with_block_size(
descriptors: &'static mut [DmaDescriptor],
buffer: &'static mut [u8],
block_size: Option<DmaBufBlkSize>,
) -> Result<Self, DmaBufError> {
let chunk_size = Self::compute_chunk_size(block_size);
let min_descriptors = Self::compute_descriptor_count(buffer.len(), block_size);
if descriptors.len() < min_descriptors {
return Err(DmaBufError::InsufficientDescriptors);
}

if !is_slice_in_dram(descriptors) || !is_slice_in_dram(buffer) {
// descriptors are required to be in DRAM
if !is_slice_in_dram(descriptors) {
return Err(DmaBufError::UnsupportedMemoryRegion);
}

cfg_if::cfg_if! {
if #[cfg(esp32s3)] {
// buffer can be either DRAM or PSRAM (if supported)
if !is_slice_in_dram(buffer) && !is_slice_in_psram(buffer) {
return Err(DmaBufError::UnsupportedMemoryRegion);
}
// if its PSRAM, the block_size/alignment must be specified
if is_slice_in_psram(buffer) && block_size.is_none() {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think we do any validation here, so in theory, could we simplify all this? Could we, instead of erroring if block_size is None, assume the strictest alignment requirement, and use that for our chunk size? If yes, do we even need manually specifying block_size? The difference between 4080 and 4032 byte chunks sounds mostly negligible to me.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think we do any validation here

Isn't this the validation? Oh you mean that we don't check that the buffer aligns to the block size? That's because it's TX, fair it could be skipped.

imo the user should be able to choose and the choice should be explicit. Using bigger block sizes has bandwidth consequences for PSRAM.

Would moving the psram support to a different type get out of your way?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Would moving the psram support to a different type get out of your way?

This isn't exactly in my way, I was just wondering. But I missed the bandwidth part of the question so I'm fine with the rest :)

return Err(DmaBufError::InvalidAlignment);
}
} else {
#[cfg(any(esp32,esp32s2))]
if buffer.len() % 4 != 0 && buffer.as_ptr() as usize % 4 != 0 {
bugadani marked this conversation as resolved.
Show resolved Hide resolved
// ESP32 requires word alignment for DMA buffers.
// ESP32-S2 technically supports byte-aligned DMA buffers, but the
// transfer ends up writing out of bounds if the buffer's length
// is 2 or 3 (mod 4).
return Err(DmaBufError::InvalidAlignment);
}
// buffer can only be DRAM
if !is_slice_in_dram(buffer) {
return Err(DmaBufError::UnsupportedMemoryRegion);
}
}
}

// Setup size and buffer pointer as these will not change for the remainder of
// this object's lifetime
let chunk_iter = descriptors.iter_mut().zip(buffer.chunks_mut(CHUNK_SIZE));
let chunk_iter = descriptors.iter_mut().zip(buffer.chunks_mut(chunk_size));
for (desc, chunk) in chunk_iter {
desc.set_size(chunk.len());
desc.buffer = chunk.as_mut_ptr();
Expand All @@ -1911,9 +2062,13 @@ impl DmaTxBuf {
let mut buf = Self {
descriptors,
buffer,
block_size,
};
buf.set_length(buf.capacity());

// no need for block size if the buffer is in DRAM
if is_slice_in_dram(buf.buffer) {
buf.block_size = None;
}
liebman marked this conversation as resolved.
Show resolved Hide resolved
Ok(buf)
}

Expand Down Expand Up @@ -1949,7 +2104,7 @@ impl DmaTxBuf {
assert!(len <= self.buffer.len());

// Get the minimum number of descriptors needed for this length of data.
let descriptor_count = len.div_ceil(CHUNK_SIZE).max(1);
let descriptor_count = len.div_ceil(self.descriptors[0].size()).max(1);
let required_descriptors = &mut self.descriptors[0..descriptor_count];

// Link up the relevant descriptors.
Expand Down Expand Up @@ -2010,8 +2165,19 @@ impl DmaTxBuffer for DmaTxBuf {
}
}

#[cfg(esp32s3)]
if crate::soc::is_valid_psram_address(self.buffer.as_ptr() as u32) {
unsafe {
crate::soc::cache_writeback_addr(
self.buffer.as_ptr() as u32,
self.buffer.len() as u32,
)
};
}

Preparation {
start: self.descriptors.as_mut_ptr(),
block_size: self.block_size,
}
}

Expand Down Expand Up @@ -2248,6 +2414,7 @@ impl DmaRxBuffer for DmaRxBuf {

Preparation {
start: self.descriptors.as_mut_ptr(),
block_size: None,
}
}

Expand Down Expand Up @@ -2440,6 +2607,7 @@ impl DmaTxBuffer for DmaRxTxBuf {

Preparation {
start: self.tx_descriptors.as_mut_ptr(),
block_size: None, // TODO: support block size!
}
}

Expand Down Expand Up @@ -2469,6 +2637,7 @@ impl DmaRxBuffer for DmaRxTxBuf {

Preparation {
start: self.rx_descriptors.as_mut_ptr(),
block_size: None, // TODO: support block size!
}
}

Expand Down
7 changes: 7 additions & 0 deletions esp-hal/src/soc/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,13 @@ pub(crate) fn is_valid_psram_address(address: u32) -> bool {
false
}

#[allow(unused)]
pub(crate) fn is_slice_in_psram<T>(slice: &[T]) -> bool {
let start = slice.as_ptr() as u32;
let end = start + slice.len() as u32;
is_valid_psram_address(start) && is_valid_psram_address(end)
}

#[allow(unused)]
pub(crate) fn is_valid_memory_address(address: u32) -> bool {
is_valid_ram_address(address) || is_valid_psram_address(address)
Expand Down
Loading