-
Notifications
You must be signed in to change notification settings - Fork 49
Example: DDR3 Memory Buffer UART Loopback
Julian Kemmerer edited this page Jun 12, 2022
·
9 revisions
This is a break down of a UART loopback design example that uses DDR3 memory to buffer messages.
This example is from a series of examples designed for the Arty Board, see those instructions for building the project.
The PipelineC source for this design can be found here. A C test program for exercising the loopback can be found here.
#include "compiler.h"
#include "wire.h"
#include "../leds/led0_3.c"
#include "../uart/uart_msg_fifos.c"
#include "../ddr3/xil_mig.c"
#include "test.h" // Constants shared with software
// Write stream of messages from uart to DDR3, and once done
// read those same messages back from DDR3 stream out over uart
// State machine that waits for incoming message over uart (async fifos)
// Then writes it to DDR memory at a specific address
typedef enum uart_to_mem_state_t
{
RESET,
WAIT_MSG,
SER_MSG,
}uart_to_mem_state_t;
typedef struct uart_to_mem_t
{
uint1_t ready;
xil_app_to_mig_t to_mem;
uint1_t done;
}uart_to_mem_t;
uart_to_mem_t uart_to_mem(uint1_t start, test_count_t msg_index, xil_mig_to_app_t from_mem)
{
// Registers
static uart_to_mem_state_t state; // FSM state
static uart_msg_t msg; // Message from uart / memory deserializer buffer
// Outputs
uart_to_mem_t o;
o.to_mem = XIL_APP_TO_MIG_T_NULL();
o.done = 0;
o.ready = 0;
if(state==RESET)
{
o.ready = 1;
if(start)
{
state = WAIT_MSG;
}
}
else if(state==WAIT_MSG)
{
// Wait for valid message from uart
uart_rx_msg_fifo_receiver_t msg_rx = uart_rx_msg_fifo_receiver(1);
if(msg_rx.done)
{
// Then begin serializing it
msg = msg_rx.msg;
state = SER_MSG;
}
}
else if(state==SER_MSG)
{
// Begin ddr serializer
xil_mig_addr_t byte_addr = msg_index * UART_MSG_SIZE; // TODO shifts?
mig_write_256_t ser = mig_write_256(1, byte_addr, msg.data, from_mem);
o.to_mem = ser.to_mem;
msg.data = ser.data;
// Wait until serializer done
if(ser.done)
{
// Then all the way done, back to start
o.done = 1;
state = RESET;
}
}
return o;
}
// State machine controlling memory to read a message from a specific address
// and then waits for the message to be outgoing over uart (async fifo)
typedef enum mem_to_uart_state_t
{
RESET,
DESER_MSG,
WAIT_MSG
}mem_to_uart_state_t;
typedef struct mem_to_uart_t
{
uint1_t ready;
xil_app_to_mig_t to_mem;
uint1_t done;
}mem_to_uart_t;
mem_to_uart_t mem_to_uart(uint1_t start, test_count_t msg_index, xil_mig_to_app_t from_mem)
{
// Registers
static mem_to_uart_state_t state; // FSM state
static uart_msg_t msg; // Message from memory deserializer buffer / into uar
// Drive leds
WIRE_WRITE(uint1_t, led1, state==WAIT_MSG)
// Outputs
mem_to_uart_t o;
o.to_mem = XIL_APP_TO_MIG_T_NULL();
o.done = 0;
o.ready = 0;
if(state==RESET)
{
o.ready = 1;
if(start)
{
state = DESER_MSG;
}
}
else if(state==DESER_MSG)
{
// Begin ddr deserializer
xil_mig_addr_t byte_addr = msg_index * UART_MSG_SIZE; // TODO shifts?
mig_read_256_t deser = mig_read_256(1, byte_addr, msg.data, from_mem);
msg.data = deser.data;
o.to_mem = deser.to_mem;
// Wait until deserializer done and we have full message
if(deser.done)
{
// Then wait until message goes out over uart
state = WAIT_MSG;
}
}
else if(state==WAIT_MSG)
{
// Begin trying to send msg out
uart_tx_msg_fifo_sender_t msg_tx = uart_tx_msg_fifo_sender(1, msg);
// Wait for message to go out over uart
if(msg_tx.done)
{
// Then all the way done, back to start
o.done = 1;
state = RESET;
}
}
return o;
}
// Uses above state machines to transfer messages to/from DDR memory
typedef enum msg_ctrl_state_t
{
WAIT_RESET,
UART_TO_MEM, // N messages into memory
MEM_TO_UART // N messages out of memory
}msg_ctrl_state_t;
// The main process, same clock as generated memory interface
#pragma MAIN_MHZ app xil_mig_module
void app()
{
// Input port: read outputs wires from memory controller
xil_mig_to_app_t from_mem;
WIRE_READ(xil_mig_to_app_t, from_mem, xil_mig_to_app)
// Output port wire: into memory controller
xil_app_to_mig_t to_mem = XIL_APP_TO_MIG_T_NULL();
// Registers
static msg_ctrl_state_t state;
static test_count_t num_msgs;
// Drive leds
WIRE_WRITE(uint1_t, led0, state==MEM_TO_UART)
// MEM CTRL FSM
if(state==WAIT_RESET)
{
// Wait for DDR reset to be done
uint1_t mem_rst_done = !from_mem.ui_clk_sync_rst & from_mem.init_calib_complete;
if(mem_rst_done)
{
// Start things with writes first
state = UART_TO_MEM;
}
num_msgs = 0;
}
else if(state==UART_TO_MEM)
{
// Keep starting the uart_to_mem fsm until N messages have been written to mem
uart_to_mem_t writer = uart_to_mem(1, num_msgs, from_mem);
to_mem = writer.to_mem;
if(writer.done)
{
// next message ?
if(num_msgs<(NUM_MSGS_TEST-1))
{
// Do next message
num_msgs += 1;
}
else
{
// Done writing messages, onto reads
state = MEM_TO_UART;
num_msgs = 0;
}
}
}
else if(state==MEM_TO_UART)
{
// Keep starting the mem_to_uart fsm until N messages have been read from mem
mem_to_uart_t reader = mem_to_uart(1, num_msgs, from_mem);
to_mem = reader.to_mem;
if(reader.done)
{
// next message ?
if(num_msgs<(NUM_MSGS_TEST-1))
{
// Do next message
num_msgs += 1;
}
else
{
// Done read messages, repeat from reset
state = WAIT_RESET;
}
}
}
// Resets
if(from_mem.ui_clk_sync_rst)
{
state = WAIT_RESET;
}
// Drive wires into memory controller
WIRE_WRITE(xil_app_to_mig_t, xil_app_to_mig, to_mem)
}
In more advanced examples you will want to use the auto-pipelining features of PipelineC. This stateful function example cannot be further pipelined, no further latency can be traded off for throughput. Luckily as-written it is expected to meet the UART and DDR clock timings as needed, no further design changes are needed.
================== Beginning Throughput Sweep ================================
Function: led0_module Target MHz: 83.33
Function: led1_module Target MHz: 83.33
Function: led2_module Target MHz: 83.33
Function: led3_module Target MHz: 83.33
Function: uart_module Target MHz: 25.0
Function: uart_rx_mac Target MHz: 25.0
Function: uart_tx_mac Target MHz: 25.0
Function: uart_rx_msg Target MHz: 25.0
Function: uart_tx_msg Target MHz: 25.0
Function: uart_rx_msg_fifo_module Target MHz: 25.0
Function: uart_tx_msg_fifo_module Target MHz: 25.0
Function: xil_mig_module Target MHz: 83.33
Function: app Target MHz: 83.33
Function: app_tieoff Target MHz: 25.0
WARNING: uart_tx_msg_fifo async fifo depth increased to minimum allowed = 16
WARNING: uart_rx_msg_fifo async fifo depth increased to minimum allowed = 16
Starting with blank sweep state...
...determining slicing information for each main function...
led0_module : 0 clocks latency, sliced coarsely...
led1_module : 0 clocks latency, sliced coarsely...
led2_module : 0 clocks latency, sliced coarsely...
led3_module : 0 clocks latency, sliced coarsely...
uart_module : 0 clocks latency, sliced coarsely...
uart_rx_mac : 0 clocks latency, sliced coarsely...
uart_tx_mac : 0 clocks latency, sliced coarsely...
uart_rx_msg : 0 clocks latency, sliced coarsely...
uart_tx_msg : 0 clocks latency, sliced coarsely...
uart_rx_msg_fifo_module : 0 clocks latency, sliced coarsely...
uart_tx_msg_fifo_module : 0 clocks latency, sliced coarsely...
xil_mig_module : 0 clocks latency, sliced coarsely...
app : 0 clocks latency, sliced coarsely...
app_tieoff : 0 clocks latency, sliced coarsely...
Running: /media/1TB/Programs/Linux/Xilinx/Vivado/2019.2/bin/vivado -journal /home/julian/pipelinec_syn_output/top/vivado.jou -log /home/julian/pipelinec_syn_output/top/vivado_2784.log -mode batch -source "/home/julian/pipelinec_syn_output/top/top_2784.tcl"
Clock Goal (MHz): 25.0 , Current MHz: 159.6169193934557 ( 6.265000000000001 ns)
Clock Goal (MHz): 83.33333333333333 , Current MHz: 129.87012987012986 ( 7.7 ns)
Found maximum pipeline latencies...
================== Writing Results of Throughput Sweep ================================
Done.
-- Top level file connecting board to PipelineC generated code
library IEEE;
use IEEE.STD_LOGIC_1164.ALL;
use ieee.numeric_std.all;
library UNISIM;
use UNISIM.VCOMPONENTS.ALL;
-- PipelineC packages
use work.c_structs_pkg.all;
-- Connections to the board, see xdc files, un/commment things as needed
entity board is
port (
CLK100MHZ : in std_logic;
sw : in std_logic_vector(3 downto 0);
led : out std_logic_vector(3 downto 0);
uart_rxd_out : out std_logic;
uart_txd_in : in std_logic;
ddr3_dq : inout std_logic_vector(15 downto 0);
ddr3_dqs_p : inout std_logic_vector(1 downto 0);
ddr3_dqs_n : inout std_logic_vector(1 downto 0);
ddr3_addr : out std_logic_vector(13 downto 0);
ddr3_ba : out std_logic_vector(2 downto 0);
ddr3_ras_n : out std_logic;
ddr3_cas_n : out std_logic;
ddr3_we_n : out std_logic;
ddr3_reset_n : out std_logic;
ddr3_ck_p : out std_logic_vector(0 downto 0); -- Uncomment to use DDR3
ddr3_ck_n : out std_logic_vector(0 downto 0); -- Uncomment to use DDR3
ddr3_cke : out std_logic_vector(0 downto 0);
ddr3_cs_n : out std_logic_vector(0 downto 0);
ddr3_dm : out std_logic_vector(1 downto 0);
ddr3_odt : out std_logic_vector(0 downto 0)
);
end board;
architecture arch of board is
-- General clocks based off of the board's CLK100MHZ
signal clk_25, clk_50, clk_100, clk_200, clk_400 : std_logic;
signal clks_ready: std_logic;
signal rst : std_logic;
component clks_sys_clk_100
port
(
-- Clock out ports
clk_25 : out std_logic;
clk_50 : out std_logic;
clk_100 : out std_logic;
clk_200 : out std_logic;
clk_400 : out std_logic;
-- Status and control signals
locked : out std_logic;
-- Clock in ports
sys_clk_100 : in std_logic
);
end component;
-- DDR clocks based off of the board's CLK100MHZ
signal ddr_sys_clk : std_logic; -- 166.66MHz
signal clk_166p66 : std_logic;
signal ddr_clks_ready: std_logic;
signal ddr_sys_rst_n : std_logic;
signal ddr_sys_rst : std_logic;
component ddr_clks_sys_clk_100
port
(
-- Clock out ports
ddr_sys_clk : out std_logic;
-- Status and control signals
locked : out std_logic;
-- Clock in ports
sys_clk_100 : in std_logic
);
end component;
-- The board's DDR3 controller
signal app_addr : std_logic_vector(27 downto 0);
signal app_cmd : std_logic_vector(2 downto 0);
signal app_en : std_logic;
signal app_wdf_data : std_logic_vector(127 downto 0);
signal app_wdf_end : std_logic;
signal app_wdf_mask : std_logic_vector(15 downto 0);
signal app_wdf_wren : std_logic;
signal app_rd_data : std_logic_vector(127 downto 0);
signal app_rd_data_end : std_logic;
signal app_rd_data_valid : std_logic;
signal app_rdy : std_logic;
signal app_wdf_rdy : std_logic;
signal app_sr_req : std_logic;
signal app_ref_req : std_logic;
signal app_zq_req : std_logic;
signal app_sr_active : std_logic;
signal app_ref_ack : std_logic;
signal app_zq_ack : std_logic;
signal ui_clk : std_logic; -- 83.33MHz
signal clk_83p33 : std_logic;
signal ui_clk_sync_rst : std_logic;
signal init_calib_complete : std_logic;
component ddr3_0
port (
ddr3_dq : inout std_logic_vector(15 downto 0);
ddr3_dqs_p : inout std_logic_vector(1 downto 0);
ddr3_dqs_n : inout std_logic_vector(1 downto 0);
ddr3_addr : out std_logic_vector(13 downto 0);
ddr3_ba : out std_logic_vector(2 downto 0);
ddr3_ras_n : out std_logic;
ddr3_cas_n : out std_logic;
ddr3_we_n : out std_logic;
ddr3_reset_n : out std_logic;
ddr3_ck_p : out std_logic_vector(0 downto 0);
ddr3_ck_n : out std_logic_vector(0 downto 0);
ddr3_cke : out std_logic_vector(0 downto 0);
ddr3_cs_n : out std_logic_vector(0 downto 0);
ddr3_dm : out std_logic_vector(1 downto 0);
ddr3_odt : out std_logic_vector(0 downto 0);
app_addr : in std_logic_vector(27 downto 0);
app_cmd : in std_logic_vector(2 downto 0);
app_en : in std_logic;
app_wdf_data : in std_logic_vector(127 downto 0);
app_wdf_end : in std_logic;
app_wdf_mask : in std_logic_vector(15 downto 0);
app_wdf_wren : in std_logic;
app_rd_data : out std_logic_vector(127 downto 0);
app_rd_data_end : out std_logic;
app_rd_data_valid : out std_logic;
app_rdy : out std_logic;
app_wdf_rdy : out std_logic;
app_sr_req : in std_logic;
app_ref_req : in std_logic;
app_zq_req : in std_logic;
app_sr_active : out std_logic;
app_ref_ack : out std_logic;
app_zq_ack : out std_logic;
ui_clk : out std_logic;
ui_clk_sync_rst : out std_logic;
init_calib_complete : out std_logic;
-- System Clock Ports
sys_clk_i : in std_logic;
-- Reference Clock Ports
clk_ref_i : in std_logic;
sys_rst : in std_logic -- ACTIVE LOW - PORT NAME IS INCORRECT
);
end component ddr3_0;
-- Internal signals
-- Clocks
signal sys_clk_100 : std_logic;
-- Switches
signal switches_wire : unsigned(3 downto 0);
-- LEDs
signal leds_wire : unsigned(3 downto 0);
-- UART
signal uart_data_in : unsigned(0 downto 0);
signal uart_data_out : unsigned(0 downto 0);
-- DDR3
signal mig_to_app : xil_mig_to_app_t;
signal app_to_mig : xil_app_to_mig_t;
begin
-- Connect board's CLK100MHZ pin to internal global clock buffer network
CLK100MHZ_bufg_inst: BUFG
port map (
I => CLK100MHZ,
O => sys_clk_100
);
-- General clocks based off of the board's CLK100MHZ
clks_sys_clk_100_inst : clks_sys_clk_100
port map (
-- Clock out ports
clk_25 => clk_25,
clk_50 => clk_50,
clk_100 => clk_100,
clk_200 => clk_200,
clk_400 => clk_400,
-- Status and control signals
locked => clks_ready,
-- Clock in ports
sys_clk_100 => sys_clk_100
);
-- Hold in reset until clocks are ready
rst <= not clks_ready;
-- DDR clocks based off of the board's CLK100MHZ
ddr_clks_sys_clk_100_inst : ddr_clks_sys_clk_100
port map (
ddr_sys_clk => ddr_sys_clk, -- 166.66MHz
locked => ddr_clks_ready,
sys_clk_100 => sys_clk_100
);
clk_166p66 <= ddr_sys_clk;
-- Hold in reset until clocks are ready
ddr_sys_rst <= rst or not ddr_clks_ready;
ddr_sys_rst_n <= not ddr_sys_rst;
-- The board's DDR3 controller
ddr3_0_inst : ddr3_0
port map (
-- Memory interface ports
ddr3_addr => ddr3_addr,
ddr3_ba => ddr3_ba,
ddr3_cas_n => ddr3_cas_n,
ddr3_ck_n => ddr3_ck_n,
ddr3_ck_p => ddr3_ck_p,
ddr3_cke => ddr3_cke,
ddr3_ras_n => ddr3_ras_n,
ddr3_reset_n => ddr3_reset_n,
ddr3_we_n => ddr3_we_n,
ddr3_dq => ddr3_dq,
ddr3_dqs_n => ddr3_dqs_n,
ddr3_dqs_p => ddr3_dqs_p,
init_calib_complete => init_calib_complete,
ddr3_cs_n => ddr3_cs_n,
ddr3_dm => ddr3_dm,
ddr3_odt => ddr3_odt,
-- Application interface ports
app_addr => app_addr,
app_cmd => app_cmd,
app_en => app_en,
app_wdf_data => app_wdf_data,
app_wdf_end => app_wdf_end,
app_wdf_wren => app_wdf_wren,
app_rd_data => app_rd_data,
app_rd_data_end => app_rd_data_end,
app_rd_data_valid => app_rd_data_valid,
app_rdy => app_rdy,
app_wdf_rdy => app_wdf_rdy,
app_sr_req => app_sr_req,
app_ref_req => app_ref_req,
app_zq_req => app_zq_req,
app_sr_active => app_sr_active,
app_ref_ack => app_ref_ack,
app_zq_ack => app_zq_ack,
ui_clk => ui_clk, -- 83.33MHz
ui_clk_sync_rst => ui_clk_sync_rst,
app_wdf_mask => app_wdf_mask,
-- System Clock Ports
sys_clk_i => ddr_sys_clk, -- 166.66MHz
-- Reference Clock Ports
clk_ref_i => clk_200, -- Ref always 200MHz
sys_rst => ddr_sys_rst_n -- ACTIVE LOW - PORT NAME IS INCORRECT
);
clk_83p33 <= ui_clk;
-- Un/pack IO struct types to/from flattened SLV board pins
-- TODO Code gen this...
-- Commented out wires as necessary
process(all) begin
-- LEDs
led <= std_logic_vector(leds_wire);
-- Switches
switches_wire <= unsigned(sw);
-- UART
uart_data_in(0) <= uart_txd_in;
uart_rxd_out <= uart_data_out(0);
-- DDR3
app_addr <= std_logic_vector(app_to_mig.addr);
app_cmd <= std_logic_vector(app_to_mig.cmd);
app_en <= std_logic(app_to_mig.en(0));
for byte_i in 0 to app_wdf_mask'length-1 loop
app_wdf_data(((byte_i+1)*8)-1 downto (byte_i*8)) <= std_logic_vector(app_to_mig.wdf_data(byte_i));
end loop;
app_wdf_end <= std_logic(app_to_mig.wdf_end(0));
for byte_i in 0 to app_wdf_mask'length-1 loop
app_wdf_mask(byte_i) <= std_logic(app_to_mig.wdf_mask(byte_i)(0));
end loop;
app_wdf_wren <= std_logic(app_to_mig.wdf_wren(0));
for byte_i in 0 to app_wdf_mask'length-1 loop
mig_to_app.rd_data(byte_i) <= unsigned(app_rd_data(((byte_i+1)*8)-1 downto (byte_i*8)));
end loop;
mig_to_app.rd_data_end(0) <= app_rd_data_end;
mig_to_app.rd_data_valid(0) <= app_rd_data_valid;
mig_to_app.rdy(0) <= app_rdy;
mig_to_app.wdf_rdy(0) <= app_wdf_rdy;
app_sr_req <= std_logic(app_to_mig.sr_req(0));
app_ref_req <= std_logic(app_to_mig.ref_req(0));
app_zq_req <= std_logic(app_to_mig.zq_req(0));
mig_to_app.sr_active(0) <= app_sr_active;
mig_to_app.ref_ack(0) <= app_ref_ack;
mig_to_app.zq_ack(0) <= app_zq_ack;
mig_to_app.ui_clk_sync_rst(0) <= ui_clk_sync_rst;
mig_to_app.init_calib_complete(0) <= init_calib_complete;
end process;
-- The PipelineC generated entity
top_inst : entity work.top port map (
-- Main function clocks
clk_25p0 => clk_25,
--clk_50p0 => clk_50,
clk_83p33 => clk_83p33,
--clk_100p0 => clk_100,
--clk_166p66 => clk_166p66,
--clk_200p0 => clk_200,
--clk_400p0 => clk_400,
-- Each main funciton's inputs and outputs
-- LEDs
led0_module_return_output(0) => leds_wire(0),
led1_module_return_output(0) => leds_wire(1),
led2_module_return_output(0) => leds_wire(2),
led3_module_return_output(0) => leds_wire(3),
-- Switches
--switches_module_sw => switches_wire
-- UART
uart_module_data_in => uart_data_in,
uart_module_return_output => uart_data_out,
-- DDR3
xil_mig_module_mig_to_app => mig_to_app,
xil_mig_module_return_output => app_to_mig
);
end arch;
Resource usage: