💾 hal::dma
Direct Memory Access - High Performance Data Transfer
391 lines
~15 function
Zero-Copy
📖 Overview
DMA (Direct Memory Access), CPU müdahalesi olmadan bellek ve çevre birimleri arasında veri transferi sağlar. ADC, USART, SPI gibi modüllerle yüksek hızlı veri aktarımı for kullanılır.
🔑 Key Features
- Memory-to-memory transfers
- Peripheral-to-memory transfers
- Memory-to-peripheral transfers
- Circular/Normal mode
- Priority levels (4)
- 8/16/32-bit data width
- Transfer complete interrupts
- Multiple channels/streams
🚀 Quick Start
import hal::dma
// Memory-to-memory transfer
let src = [1, 2, 3, 4, 5]
let dst = [0; 5]
let dma_ch = dma.init(dma.DMA1, dma.CH1, do
direction: dma.M2M, // Memory to Memory
src_increment: true,
dst_increment: true,
data_size: dma.SIZE_8BIT
end)
dma.transfer(dma_ch, src.as_ptr(), dst.as_ptr(), 5)
dma.wait(dma_ch) // Wait for completion
💡 Example: ADC with DMA (Continuous Sampling)
import hal::dma, hal::adc
const SAMPLE_COUNT = 1000
let adc_buffer = [0u16; SAMPLE_COUNT]
function adc_dma_init() do
// Initialize ADC
adc.clock_enable()
let adc1 = adc.init(adc.ADC1, do
resolution: adc.RESOLUTION_12BIT,
continuous: true,
dma_enable: true
end)
adc.channel_config(adc1, adc.CHANNEL_0, do
sampling_time: adc.SAMPLETIME_480CYCLES
end)
// Setup DMA
dma.clock_enable(dma.DMA2)
let dma_ch = dma.init(dma.DMA2, dma.STREAM0, do
direction: dma.P2M, // Peripheral to Memory
src_address: adc.get_data_register(adc1),
dst_address: adc_buffer.as_ptr(),
data_count: SAMPLE_COUNT,
src_increment: false, // Same ADC register
dst_increment: true, // Increment buffer
circular: true, // Continuous sampling
data_size: dma.SIZE_16BIT,
priority: dma.PRIORITY_HIGH
end)
dma.start(dma_ch)
adc.start(adc1)
end
function calculate_average() -> float do
let toplam = 0
each sample forde adc_buffer for do
toplam += sample
end
return toplam.float() / SAMPLE_COUNT.float()
end
function ana() do
adc_dma_init()
loop do
let avg = calculate_average()
let voltage = avg * 3.3 / 4095.0
io.println("Average voltage: {:.3f}V".formatla(voltage))
core.delay_ms(1000)
end
end
💡 Example: USART TX with DMA
import hal::dma, hal::usart
let dma_complete = false
function dma_complete_callback() do
dma_complete = true
end
function usart_dma_send(uart: usart.Handle, data: str) -> Result[None, Error] do
// Setup DMA for USART TX
let dma_ch = dma.init(dma.DMA1, dma.CH4, do
direction: dma.M2P, // Memory to Peripheral
src_address: data.as_ptr(),
dst_address: usart.get_tx_register(uart),
data_count: data.uzunluk(),
src_increment: true,
dst_increment: false, // Same USART register
data_size: dma.SIZE_8BIT,
priority: dma.PRIORITY_MEDIUM,
callback: dma_complete_callback
end)
dma_complete = false
dma.start(dma_ch)
// Enable USART DMA mode
usart.enable_dma_tx(uart)
// Wait for completion
loop dma_complete değilse do
core.delay_ms(1)
end
return Ok(None)
end
function ana() do
let uart = usart.init(usart.USART1, do
baud_rate: 115200
end)
usart_dma_send(uart, "Hello from DMA!\n")?
io.println("DMA transfer complete!")
end
💡 Example: Memory-to-Memory Fast Copy
import hal::dma
function fast_memcpy(dest: *let u8, src: *u8, size: int) do
// DMA memory-to-memory is much faster than CPU copy
let dma_ch = dma.init(dma.DMA2, dma.STREAM0, do
direction: dma.M2M, // Memory to Memory
src_address: src,
dst_address: dest,
data_count: size,
src_increment: true,
dst_increment: true,
data_size: dma.SIZE_32BIT, // 32-bit for efficiency
priority: dma.PRIORITY_VERY_HIGH
end)
dma.start(dma_ch)
dma.wait(dma_ch)
end
function ana() do
// Large buffer copy test
const SIZE = 10000
let src_buffer = [0u8; SIZE]
let dst_buffer = [0u8; SIZE]
// Fill source with test data
each i forde 0..SIZE for do
src_buffer[i] = (i % 256).u8()
end
// Benchmark
let start = core.uptime_us()
fast_memcpy(dst_buffer.as_mut_ptr(), src_buffer.as_ptr(), SIZE)
let elapsed = core.uptime_us() - start
io.println("DMA copy {}KB in {}us", SIZE/1024, elapsed)
io.println("Speed: {:.2f}MB/s", SIZE.float() / elapsed.float())
end
💡 Example: Double Buffer (Ping-Pong)
import hal::dma, hal::adc
const BUFFER_SIZE = 512
let buffer_a = [0u16; BUFFER_SIZE]
let buffer_b = [0u16; BUFFER_SIZE]
let current_buffer = 0
function buffer_half_complete() do
// Buffer A is full, start processing while DMA fills B
process_samples(buffer_a)
current_buffer = 1
end
function buffer_complete() do
// Buffer B is full, start processing while DMA fills A
process_samples(buffer_b)
current_buffer = 0
end
function process_samples(buffer: []u16) do
let max_val = 0u16
each sample forde buffer for do
if sample > max_val ise max_val = sample
end
io.println("Max value: {}", max_val)
end
function adc_double_buffer_init() do
// Init ADC
let adc1 = adc.init(adc.ADC1, do
resolution: adc.RESOLUTION_12BIT,
continuous: true,
dma_enable: true
end)
// DMA double buffer mode
let dma_ch = dma.init(dma.DMA2, dma.STREAM0, do
direction: dma.P2M,
src_address: adc.get_data_register(adc1),
dst_address_0: buffer_a.as_ptr(), // Ping buffer
dst_address_1: buffer_b.as_ptr(), // Pong buffer
data_count: BUFFER_SIZE,
circular: true,
double_buffer: true,
src_increment: false,
dst_increment: true,
data_size: dma.SIZE_16BIT,
half_complete_callback: buffer_half_complete,
complete_callback: buffer_complete
end)
dma.start(dma_ch)
adc.start(adc1)
end
function ana() do
adc_double_buffer_init()
// Process in background via interrupts
loop do
core.sleep() // CPU sleeps while DMA works
end
end
💡 Example: SPI Display with DMA Burst
import hal::dma, hal::spi, hal::gpio
// Display framebuffer (128x64 OLED = 1024 bytes)
const WIDTH = 128
const HEIGHT = 64
let framebuffer = [0u8; (WIDTH * HEIGHT) / 8]
function display_update() do
// CS low
gpio.pin_write(gpio.PORT_A, 4, gpio.LOW)
// Command: set column/page address
spi.write_byte(spi1, 0x00) // Command mode
// DMA burst transfer entire framebuffer
let dma_ch = dma.init(dma.DMA1, dma.STREAM3, do
direction: dma.M2P,
src_address: framebuffer.as_ptr(),
dst_address: spi.get_data_register(spi1),
data_count: framebuffer.uzunluk(),
src_increment: true,
dst_increment: false,
data_size: dma.SIZE_8BIT,
priority: dma.PRIORITY_HIGH
end)
dma.start(dma_ch)
dma.wait(dma_ch)
// CS high
gpio.pin_write(gpio.PORT_A, 4, gpio.HIGH)
end
function draw_pixel(x: int, y: int, color: bool) do
let byte_index = x + (y / 8) * WIDTH
let bit_index = y % 8
if color ise do
framebuffer[byte_index] |= (1 << bit_index)
end else do
framebuffer[byte_index] &= ~(1 << bit_index)
end
end
function ana() do
// Draw test pattern
each x forde 0..WIDTH for do
each y forde 0..HEIGHT for do
draw_pixel(x, y, (x + y) % 2 == 0)
end
end
// Fast DMA update (instead of slow SPI byte-by-byte)
display_update() // ~8ms with DMA vs 100ms+ without
end
💡 Example: USART RX with DMA Circular Buffer
import hal::dma, hal::usart
const RX_BUFFER_SIZE = 256
let rx_buffer = [0u8; RX_BUFFER_SIZE]
let read_pos = 0
function uart_dma_rx_init(uart: usart.Handle) do
// DMA in circular mode for continuous RX
let dma_ch = dma.init(dma.DMA1, dma.STREAM5, do
direction: dma.P2M,
src_address: usart.get_rx_register(uart),
dst_address: rx_buffer.as_ptr(),
data_count: RX_BUFFER_SIZE,
src_increment: false,
dst_increment: true,
circular: true, // Never stops, wraps around
data_size: dma.SIZE_8BIT,
priority: dma.PRIORITY_MEDIUM
end)
usart.enable_dma_rx(uart)
dma.start(dma_ch)
end
function available() -> int do
// Get DMA write position
let write_pos = RX_BUFFER_SIZE - dma.get_counter(dma_ch)
if write_pos >= read_pos ise do
return write_pos - read_pos
end else do
return RX_BUFFER_SIZE - read_pos + write_pos
end
end
function read_byte() -> Option[u8] do
if available() > 0 ise do
let data = rx_buffer[read_pos]
read_pos = (read_pos + 1) % RX_BUFFER_SIZE
return Some(data)
end
return None
end
function ana() do
let uart = usart.init(usart.USART1, do
baud_rate: 115200
end)
uart_dma_rx_init(uart)
loop do
if available() > 0 ise do
match read_byte() do
Some(byte) => io.print("{:c}", byte),
None => {}
end
end
core.delay_ms(10)
end
end
⚙️ DMA structlandırma Tipleri
// DMA Direction
enum Direction {
P2M, // Peripheral to Memory (ADC, USART RX)
M2P, // Memory to Peripheral (USART TX, DAC)
M2M // Memory to Memory (fast copy)
}
// Data Size
enum DataSize {
SIZE_8BIT, // Byte
SIZE_16BIT, // Half-word
SIZE_32BIT // Word
}
// Priority
enum Priority {
LOW,
MEDIUM,
HIGH,
VERY_HIGH
}
// DMA Configuration
struct DMAConfig do
direction: Direction,
src_address: *u8,
dst_address: *u8,
data_count: int,
src_increment: bool,
dst_increment: bool,
circular: bool,
data_size: DataSize,
priority: Priority,
double_buffer: bool, // Optional
dst_address_1: *u8, // For double buffer
half_complete_callback: fn(), // Optional
complete_callback: fn() // Optional
end
📚 DMA functionları
// DMA channel initialization
function init(dma: DMAInstance, channel: Channel, config: DMAConfig) -> Handle
// Transfer control
function start(handle: Handle)
function stop(handle: Handle)
function wait(handle: Handle) // Blocking wait
function is_complete(handle: Handle) -> bool
// Status queries
function get_counter(handle: Handle) -> int // Remaining transfers
function get_current_buffer(handle: Handle) -> int // 0 or 1 (double buffer)
// Advanced
function transfer(handle: Handle, src: *u8, dst: *u8, count: int)
function circular_transfer(handle: Handle, src: *u8, dst: *u8, count: int)
// Clock control
function clock_enable(dma: DMAInstance)
function clock_disable(dma: DMAInstance)
⚡ Performance Tips
- 32-bit transfers: 4x faster than 8-bit for aligned data
- Circular mode: Zero overhead for continuous streaming (ADC, USART)
- Double buffer: Process one buffer while filling the other (zero data loss)
- Priority: Set VERY_HIGH for time-critical transfers
- Burst mode: Use for large block transfers (framebuffers, files)
📊 DMA Use Cases
| Senaryo | Direction | Mode | Avantaj |
|---|---|---|---|
| ADC Continuous Sampling | P2M | Circular | CPU-free data acquisition |
| USART High-Speed TX | M2P | Normal | Non-blocking transmission |
| SPI Display Update | M2P | Normal | 10-20x faster refresh |
| Audio Streaming | M2P (DAC) | Double Buffer | Glitch-free playback |
| Large Buffer Copy | M2M | Normal | Hardware acceleration |
🖥️ Platform Support
- STM32F1: DMA1 (7 channels), DMA2 (5 channels)
- STM32F4: DMA1/DMA2 (8 streams each, FIFO support)
- STM32L4: DMA1/DMA2 (7 channels each, low-power)
- GD32: Compatible with STM32 DMA architecture
- ESP32: GDMA (general purpose DMA)
⚠️ Important Notes
- Memory Alignment: DMA buffer'ları RAM'de olmalıdır (not flash/peripheral memory)
- Circular Mode: Infinite loop for kullanılır, ADC/USART streaming ideal
- NVIC Enable: DMA interrupt kullanırken NVIC enable edilmelidir
- Double Buffer: Zero data loss for ping-pong buffering kullanın
- Buffer Safety: DMA transfer sırasında buffer modify edilmemelidir
- Cache Coherency: STM32F7/H7'de cache invalidation gerekir
- Data Width: 32-bit aligned data for SIZE_32BIT kullanın (4x hızlı)
- Priority: Aynı öncelikte multiple DMA kullanımında arbitration olur
🔗 Related Modules
hal::adc- ADC with DMA (continuous sampling)hal::dac- DAC with DMA (waveform generation)hal::usart- UART with DMA (high-speed serial)hal::spi- SPI with DMA (display, SD card)hal::i2c- I2C with DMA (sensor reading)hal::int- DMA interrupts (NVIC configuration)hal::timer- Timer-triggered DMA
📖 References
- AN4031: Using STM32 DMA Controller
- AN4943: DMA in STM32F7
- Reference Manual: DMA Controller (DMA) chapter