💾 hal::dma

Direct Memory Access - High Performance Data Transfer

391 lines ~15 function Zero-Copy

📖 Overview

DMA (Direct Memory Access), CPU müdahalesi olmadan bellek ve çevre birimleri arasında veri transferi sağlar. ADC, USART, SPI gibi modüllerle yüksek hızlı veri aktarımı for kullanılır.

🔑 Key Features

Memory-to-memory transfers
Peripheral-to-memory transfers
Memory-to-peripheral transfers
Circular/Normal mode
Priority levels (4)
8/16/32-bit data width
Transfer complete interrupts
Multiple channels/streams

🚀 Quick Start

import hal::dma

// Memory-to-memory transfer
let src = [1, 2, 3, 4, 5]
let dst = [0; 5]

let dma_ch = dma.init(dma.DMA1, dma.CH1, do
    direction: dma.M2M,  // Memory to Memory
    src_increment: true,
    dst_increment: true,
    data_size: dma.SIZE_8BIT
end)

dma.transfer(dma_ch, src.as_ptr(), dst.as_ptr(), 5)
dma.wait(dma_ch)  // Wait for completion

💡 Example: ADC with DMA (Continuous Sampling)

import hal::dma, hal::adc

const SAMPLE_COUNT = 1000

let adc_buffer = [0u16; SAMPLE_COUNT]

function adc_dma_init() do
    // Initialize ADC
    adc.clock_enable()
    let adc1 = adc.init(adc.ADC1, do
        resolution: adc.RESOLUTION_12BIT,
        continuous: true,
        dma_enable: true
    end)
    
    adc.channel_config(adc1, adc.CHANNEL_0, do
        sampling_time: adc.SAMPLETIME_480CYCLES
    end)
    
    // Setup DMA
    dma.clock_enable(dma.DMA2)
    let dma_ch = dma.init(dma.DMA2, dma.STREAM0, do
        direction: dma.P2M,  // Peripheral to Memory
        src_address: adc.get_data_register(adc1),
        dst_address: adc_buffer.as_ptr(),
        data_count: SAMPLE_COUNT,
        src_increment: false,  // Same ADC register
        dst_increment: true,   // Increment buffer
        circular: true,        // Continuous sampling
        data_size: dma.SIZE_16BIT,
        priority: dma.PRIORITY_HIGH
    end)
    
    dma.start(dma_ch)
    adc.start(adc1)
end

function calculate_average() -> float do
    let toplam = 0
    each sample forde adc_buffer for do
        toplam += sample
    end
    return toplam.float() / SAMPLE_COUNT.float()
end

function ana() do
    adc_dma_init()
    
    loop do
        let avg = calculate_average()
        let voltage = avg * 3.3 / 4095.0
        io.println("Average voltage: {:.3f}V".formatla(voltage))
        core.delay_ms(1000)
    end
end

💡 Example: USART TX with DMA

import hal::dma, hal::usart

let dma_complete = false

function dma_complete_callback() do
    dma_complete = true
end

function usart_dma_send(uart: usart.Handle, data: str) -> Result[None, Error] do
    // Setup DMA for USART TX
    let dma_ch = dma.init(dma.DMA1, dma.CH4, do
        direction: dma.M2P,  // Memory to Peripheral
        src_address: data.as_ptr(),
        dst_address: usart.get_tx_register(uart),
        data_count: data.uzunluk(),
        src_increment: true,
        dst_increment: false,  // Same USART register
        data_size: dma.SIZE_8BIT,
        priority: dma.PRIORITY_MEDIUM,
        callback: dma_complete_callback
    end)
    
    dma_complete = false
    dma.start(dma_ch)
    
    // Enable USART DMA mode
    usart.enable_dma_tx(uart)
    
    // Wait for completion
    loop dma_complete değilse do
        core.delay_ms(1)
    end
    
    return Ok(None)
end

function ana() do
    let uart = usart.init(usart.USART1, do
        baud_rate: 115200
    end)
    
    usart_dma_send(uart, "Hello from DMA!\n")?
    io.println("DMA transfer complete!")
end

💡 Example: Memory-to-Memory Fast Copy

import hal::dma

function fast_memcpy(dest: *let u8, src: *u8, size: int) do
    // DMA memory-to-memory is much faster than CPU copy
    let dma_ch = dma.init(dma.DMA2, dma.STREAM0, do
        direction: dma.M2M,  // Memory to Memory
        src_address: src,
        dst_address: dest,
        data_count: size,
        src_increment: true,
        dst_increment: true,
        data_size: dma.SIZE_32BIT,  // 32-bit for efficiency
        priority: dma.PRIORITY_VERY_HIGH
    end)
    
    dma.start(dma_ch)
    dma.wait(dma_ch)
end

function ana() do
    // Large buffer copy test
    const SIZE = 10000
    let src_buffer = [0u8; SIZE]
    let dst_buffer = [0u8; SIZE]
    
    // Fill source with test data
    each i forde 0..SIZE for do
        src_buffer[i] = (i % 256).u8()
    end
    
    // Benchmark
    let start = core.uptime_us()
    fast_memcpy(dst_buffer.as_mut_ptr(), src_buffer.as_ptr(), SIZE)
    let elapsed = core.uptime_us() - start
    
    io.println("DMA copy {}KB in {}us", SIZE/1024, elapsed)
    io.println("Speed: {:.2f}MB/s", SIZE.float() / elapsed.float())
end

💡 Example: Double Buffer (Ping-Pong)

import hal::dma, hal::adc

const BUFFER_SIZE = 512

let buffer_a = [0u16; BUFFER_SIZE]
let buffer_b = [0u16; BUFFER_SIZE]
let current_buffer = 0

function buffer_half_complete() do
    // Buffer A is full, start processing while DMA fills B
    process_samples(buffer_a)
    current_buffer = 1
end

function buffer_complete() do
    // Buffer B is full, start processing while DMA fills A
    process_samples(buffer_b)
    current_buffer = 0
end

function process_samples(buffer: []u16) do
    let max_val = 0u16
    each sample forde buffer for do
        if sample > max_val ise max_val = sample
    end
    io.println("Max value: {}", max_val)
end

function adc_double_buffer_init() do
    // Init ADC
    let adc1 = adc.init(adc.ADC1, do
        resolution: adc.RESOLUTION_12BIT,
        continuous: true,
        dma_enable: true
    end)
    
    // DMA double buffer mode
    let dma_ch = dma.init(dma.DMA2, dma.STREAM0, do
        direction: dma.P2M,
        src_address: adc.get_data_register(adc1),
        dst_address_0: buffer_a.as_ptr(),  // Ping buffer
        dst_address_1: buffer_b.as_ptr(),  // Pong buffer
        data_count: BUFFER_SIZE,
        circular: true,
        double_buffer: true,
        src_increment: false,
        dst_increment: true,
        data_size: dma.SIZE_16BIT,
        half_complete_callback: buffer_half_complete,
        complete_callback: buffer_complete
    end)
    
    dma.start(dma_ch)
    adc.start(adc1)
end

function ana() do
    adc_double_buffer_init()
    
    // Process in background via interrupts
    loop do
        core.sleep()  // CPU sleeps while DMA works
    end
end

💡 Example: SPI Display with DMA Burst

import hal::dma, hal::spi, hal::gpio

// Display framebuffer (128x64 OLED = 1024 bytes)
const WIDTH = 128
const HEIGHT = 64
let framebuffer = [0u8; (WIDTH * HEIGHT) / 8]

function display_update() do
    // CS low
    gpio.pin_write(gpio.PORT_A, 4, gpio.LOW)
    
    // Command: set column/page address
    spi.write_byte(spi1, 0x00)  // Command mode
    
    // DMA burst transfer entire framebuffer
    let dma_ch = dma.init(dma.DMA1, dma.STREAM3, do
        direction: dma.M2P,
        src_address: framebuffer.as_ptr(),
        dst_address: spi.get_data_register(spi1),
        data_count: framebuffer.uzunluk(),
        src_increment: true,
        dst_increment: false,
        data_size: dma.SIZE_8BIT,
        priority: dma.PRIORITY_HIGH
    end)
    
    dma.start(dma_ch)
    dma.wait(dma_ch)
    
    // CS high
    gpio.pin_write(gpio.PORT_A, 4, gpio.HIGH)
end

function draw_pixel(x: int, y: int, color: bool) do
    let byte_index = x + (y / 8) * WIDTH
    let bit_index = y % 8
    
    if color ise do
        framebuffer[byte_index] |= (1 << bit_index)
    end else do
        framebuffer[byte_index] &= ~(1 << bit_index)
    end
end

function ana() do
    // Draw test pattern
    each x forde 0..WIDTH for do
        each y forde 0..HEIGHT for do
            draw_pixel(x, y, (x + y) % 2 == 0)
        end
    end
    
    // Fast DMA update (instead of slow SPI byte-by-byte)
    display_update()  // ~8ms with DMA vs 100ms+ without
end

💡 Example: USART RX with DMA Circular Buffer

import hal::dma, hal::usart

const RX_BUFFER_SIZE = 256
let rx_buffer = [0u8; RX_BUFFER_SIZE]
let read_pos = 0

function uart_dma_rx_init(uart: usart.Handle) do
    // DMA in circular mode for continuous RX
    let dma_ch = dma.init(dma.DMA1, dma.STREAM5, do
        direction: dma.P2M,
        src_address: usart.get_rx_register(uart),
        dst_address: rx_buffer.as_ptr(),
        data_count: RX_BUFFER_SIZE,
        src_increment: false,
        dst_increment: true,
        circular: true,  // Never stops, wraps around
        data_size: dma.SIZE_8BIT,
        priority: dma.PRIORITY_MEDIUM
    end)
    
    usart.enable_dma_rx(uart)
    dma.start(dma_ch)
end

function available() -> int do
    // Get DMA write position
    let write_pos = RX_BUFFER_SIZE - dma.get_counter(dma_ch)
    
    if write_pos >= read_pos ise do
        return write_pos - read_pos
    end else do
        return RX_BUFFER_SIZE - read_pos + write_pos
    end
end

function read_byte() -> Option[u8] do
    if available() > 0 ise do
        let data = rx_buffer[read_pos]
        read_pos = (read_pos + 1) % RX_BUFFER_SIZE
        return Some(data)
    end
    return None
end

function ana() do
    let uart = usart.init(usart.USART1, do
        baud_rate: 115200
    end)
    
    uart_dma_rx_init(uart)
    
    loop do
        if available() > 0 ise do
            match read_byte() do
                Some(byte) => io.print("{:c}", byte),
                None => {}
            end
        end
        core.delay_ms(10)
    end
end

⚙️ DMA structlandırma Tipleri

// DMA Direction
enum Direction {
    P2M,  // Peripheral to Memory (ADC, USART RX)
    M2P,  // Memory to Peripheral (USART TX, DAC)
    M2M   // Memory to Memory (fast copy)
}

// Data Size
enum DataSize {
    SIZE_8BIT,   // Byte
    SIZE_16BIT,  // Half-word
    SIZE_32BIT   // Word
}

// Priority
enum Priority {
    LOW,
    MEDIUM,
    HIGH,
    VERY_HIGH
}

// DMA Configuration
struct DMAConfig do
    direction: Direction,
    src_address: *u8,
    dst_address: *u8,
    data_count: int,
    src_increment: bool,
    dst_increment: bool,
    circular: bool,
    data_size: DataSize,
    priority: Priority,
    double_buffer: bool,         // Optional
    dst_address_1: *u8,          // For double buffer
    half_complete_callback: fn(), // Optional
    complete_callback: fn()       // Optional
end

📚 DMA functionları

// DMA channel initialization
function init(dma: DMAInstance, channel: Channel, config: DMAConfig) -> Handle

// Transfer control
function start(handle: Handle)
function stop(handle: Handle)
function wait(handle: Handle)  // Blocking wait
function is_complete(handle: Handle) -> bool

// Status queries
function get_counter(handle: Handle) -> int  // Remaining transfers
function get_current_buffer(handle: Handle) -> int  // 0 or 1 (double buffer)

// Advanced
function transfer(handle: Handle, src: *u8, dst: *u8, count: int)
function circular_transfer(handle: Handle, src: *u8, dst: *u8, count: int)

// Clock control
function clock_enable(dma: DMAInstance)
function clock_disable(dma: DMAInstance)

⚡ Performance Tips

32-bit transfers: 4x faster than 8-bit for aligned data
Circular mode: Zero overhead for continuous streaming (ADC, USART)
Double buffer: Process one buffer while filling the other (zero data loss)
Priority: Set VERY_HIGH for time-critical transfers
Burst mode: Use for large block transfers (framebuffers, files)

📊 DMA Use Cases

Senaryo	Direction	Mode	Avantaj
ADC Continuous Sampling	P2M	Circular	CPU-free data acquisition
USART High-Speed TX	M2P	Normal	Non-blocking transmission
SPI Display Update	M2P	Normal	10-20x faster refresh
Audio Streaming	M2P (DAC)	Double Buffer	Glitch-free playback
Large Buffer Copy	M2M	Normal	Hardware acceleration

🖥️ Platform Support

STM32F1: DMA1 (7 channels), DMA2 (5 channels)
STM32F4: DMA1/DMA2 (8 streams each, FIFO support)
STM32L4: DMA1/DMA2 (7 channels each, low-power)
GD32: Compatible with STM32 DMA architecture
ESP32: GDMA (general purpose DMA)

⚠️ Important Notes

Memory Alignment: DMA buffer'ları RAM'de olmalıdır (not flash/peripheral memory)
Circular Mode: Infinite loop for kullanılır, ADC/USART streaming ideal
NVIC Enable: DMA interrupt kullanırken NVIC enable edilmelidir
Double Buffer: Zero data loss for ping-pong buffering kullanın
Buffer Safety: DMA transfer sırasında buffer modify edilmemelidir
Cache Coherency: STM32F7/H7'de cache invalidation gerekir
Data Width: 32-bit aligned data for SIZE_32BIT kullanın (4x hızlı)
Priority: Aynı öncelikte multiple DMA kullanımında arbitration olur

🔗 Related Modules

hal::adc - ADC with DMA (continuous sampling)
hal::dac - DAC with DMA (waveform generation)
hal::usart - UART with DMA (high-speed serial)
hal::spi - SPI with DMA (display, SD card)
hal::i2c - I2C with DMA (sensor reading)
hal::int - DMA interrupts (NVIC configuration)
hal::timer - Timer-triggered DMA

📖 References

AN4031: Using STM32 DMA Controller
AN4943: DMA in STM32F7
Reference Manual: DMA Controller (DMA) chapter

← HAL Modülleri