-
Notifications
You must be signed in to change notification settings - Fork 49
Example: DDR3 Memory Buffer UART Loopback
This is a break down of a UART loopback design example that uses DDR3 memory to buffer messages.
This example is from a series of examples designed for the Arty Board.
The PipelineC source for this design can be found here. A C test program for exercising the loopback can be found here.
#include "compiler.h"
#include "wire.h"
#include "../leds/led0_3.c"
#include "../uart/uart_msg_fifos.c"
#include "../ddr3/xil_mig.c"
#include "test.h" // Constants shared with software
// Write stream of messages from uart to DDR3, and once done
// read those same messages back from DDR3 stream out over uart
// State machine that waits for incoming message over uart (async fifos)
// Then writes it to DDR memory at a specific address
typedef enum uart_to_mem_state_t
{
RESET,
WAIT_MSG,
SER_MSG,
}uart_to_mem_state_t;
typedef struct uart_to_mem_t
{
uint1_t ready;
xil_app_to_mig_t to_mem;
uint1_t done;
}uart_to_mem_t;
uart_to_mem_t uart_to_mem(uint1_t start, test_count_t msg_index, xil_mig_to_app_t from_mem)
{
// Registers
static uart_to_mem_state_t state; // FSM state
static uart_msg_t msg; // Message from uart / memory deserializer buffer
// Outputs
uart_to_mem_t o;
o.to_mem = XIL_APP_TO_MIG_T_NULL();
o.done = 0;
o.ready = 0;
if(state==RESET)
{
o.ready = 1;
if(start)
{
state = WAIT_MSG;
}
}
else if(state==WAIT_MSG)
{
// Wait for valid message from uart
uart_rx_msg_fifo_receiver_t msg_rx = uart_rx_msg_fifo_receiver(1);
if(msg_rx.done)
{
// Then begin serializing it
msg = msg_rx.msg;
state = SER_MSG;
}
}
else if(state==SER_MSG)
{
// Begin ddr serializer
xil_mig_addr_t byte_addr = msg_index * UART_MSG_SIZE; // TODO shifts?
mig_write_256_t ser = mig_write_256(1, byte_addr, msg.data, from_mem);
o.to_mem = ser.to_mem;
msg.data = ser.data;
// Wait until serializer done
if(ser.done)
{
// Then all the way done, back to start
o.done = 1;
state = RESET;
}
}
return o;
}
// State machine controlling memory to read a message from a specific address
// and then waits for the message to be outgoing over uart (async fifo)
typedef enum mem_to_uart_state_t
{
RESET,
DESER_MSG,
WAIT_MSG
}mem_to_uart_state_t;
typedef struct mem_to_uart_t
{
uint1_t ready;
xil_app_to_mig_t to_mem;
uint1_t done;
}mem_to_uart_t;
mem_to_uart_t mem_to_uart(uint1_t start, test_count_t msg_index, xil_mig_to_app_t from_mem)
{
// Registers
static mem_to_uart_state_t state; // FSM state
static uart_msg_t msg; // Message from memory deserializer buffer / into uar
// Drive leds
WIRE_WRITE(uint1_t, led1, state==WAIT_MSG)
// Outputs
mem_to_uart_t o;
o.to_mem = XIL_APP_TO_MIG_T_NULL();
o.done = 0;
o.ready = 0;
if(state==RESET)
{
o.ready = 1;
if(start)
{
state = DESER_MSG;
}
}
else if(state==DESER_MSG)
{
// Begin ddr deserializer
xil_mig_addr_t byte_addr = msg_index * UART_MSG_SIZE; // TODO shifts?
mig_read_256_t deser = mig_read_256(1, byte_addr, msg.data, from_mem);
msg.data = deser.data;
o.to_mem = deser.to_mem;
// Wait until deserializer done and we have full message
if(deser.done)
{
// Then wait until message goes out over uart
state = WAIT_MSG;
}
}
else if(state==WAIT_MSG)
{
// Begin trying to send msg out
uart_tx_msg_fifo_sender_t msg_tx = uart_tx_msg_fifo_sender(1, msg);
// Wait for message to go out over uart
if(msg_tx.done)
{
// Then all the way done, back to start
o.done = 1;
state = RESET;
}
}
return o;
}
// Uses above state machines to transfer messages to/from DDR memory
typedef enum msg_ctrl_state_t
{
WAIT_RESET,
UART_TO_MEM, // N messages into memory
MEM_TO_UART // N messages out of memory
}msg_ctrl_state_t;
// The main process, same clock as generated memory interface
#pragma MAIN_MHZ app xil_mig_module
void app()
{
// Input port: read outputs wires from memory controller
xil_mig_to_app_t from_mem;
WIRE_READ(xil_mig_to_app_t, from_mem, xil_mig_to_app)
// Output port wire: into memory controller
xil_app_to_mig_t to_mem = XIL_APP_TO_MIG_T_NULL();
// Registers
static msg_ctrl_state_t state;
static test_count_t num_msgs;
// Drive leds
WIRE_WRITE(uint1_t, led0, state==MEM_TO_UART)
// MEM CTRL FSM
if(state==WAIT_RESET)
{
// Wait for DDR reset to be done
uint1_t mem_rst_done = !from_mem.ui_clk_sync_rst & from_mem.init_calib_complete;
if(mem_rst_done)
{
// Start things with writes first
state = UART_TO_MEM;
}
num_msgs = 0;
}
else if(state==UART_TO_MEM)
{
// Keep starting the uart_to_mem fsm until N messages have been written to mem
uart_to_mem_t writer = uart_to_mem(1, num_msgs, from_mem);
to_mem = writer.to_mem;
if(writer.done)
{
// next message ?
if(num_msgs<(NUM_MSGS_TEST-1))
{
// Do next message
num_msgs += 1;
}
else
{
// Done writing messages, onto reads
state = MEM_TO_UART;
num_msgs = 0;
}
}
}
else if(state==MEM_TO_UART)
{
// Keep starting the mem_to_uart fsm until N messages have been read from mem
mem_to_uart_t reader = mem_to_uart(1, num_msgs, from_mem);
to_mem = reader.to_mem;
if(reader.done)
{
// next message ?
if(num_msgs<(NUM_MSGS_TEST-1))
{
// Do next message
num_msgs += 1;
}
else
{
// Done read messages, repeat from reset
state = WAIT_RESET;
}
}
}
// Resets
if(from_mem.ui_clk_sync_rst)
{
state = WAIT_RESET;
}
// Drive wires into memory controller
WIRE_WRITE(xil_app_to_mig_t, xil_app_to_mig, to_mem)
}
// Separate leds module for now since some clock cross types still TODO
#pragma MAIN app_tieoff
uint1_t app_tieoff()
{
// Input port: overflow flag from the uart rx mac
uint1_t rx_overflow;
WIRE_READ(uint1_t, rx_overflow, uart_rx_mac_overflow)
// Registers
static uint1_t overflow;
// Drive leds
//WIRE_WRITE(uint1_t, led3, !overflow)
uint1_t rv = overflow;
// Record overflow in register
overflow |= rx_overflow; // sticky or equals
return rv;
}
The compiler produces a text representation of what operations occur at which point during during each function's pipeline (i.e. how long each operation takes and when). Inputs flow from top to bottom. Functions listed on the same lines are occurring in parallel.
app
Pipeline Map:
In more advanced examples you will want to use the auto-pipelining features of PipelineC. This entirely global-variable-using example cannot be further pipelined, no further latency can be traded off for throughput. Luckily as-written it is expected to meet the UART and DDR clock timings as needed, no further design changes are needed.
================== Beginning Throughput Sweep ================================
================== Writing Results of Throughput Sweep ================================
Done.
-- Top level file connecting board to PipelineC generated code
library IEEE;
use IEEE.STD_LOGIC_1164.ALL;
use ieee.numeric_std.all;
library UNISIM;
use UNISIM.VCOMPONENTS.ALL;
-- PipelineC packages
use work.c_structs_pkg.all;
-- Connections to the board, see xdc files, un/commment things as needed
entity board is
port (
CLK100MHZ : in std_logic;
sw : in std_logic_vector(3 downto 0);
led : out std_logic_vector(3 downto 0);
uart_rxd_out : out std_logic;
uart_txd_in : in std_logic;
ddr3_dq : inout std_logic_vector(15 downto 0);
ddr3_dqs_p : inout std_logic_vector(1 downto 0);
ddr3_dqs_n : inout std_logic_vector(1 downto 0);
ddr3_addr : out std_logic_vector(13 downto 0);
ddr3_ba : out std_logic_vector(2 downto 0);
ddr3_ras_n : out std_logic;
ddr3_cas_n : out std_logic;
ddr3_we_n : out std_logic;
ddr3_reset_n : out std_logic;
ddr3_ck_p : out std_logic_vector(0 downto 0); -- Uncomment to use DDR3
ddr3_ck_n : out std_logic_vector(0 downto 0); -- Uncomment to use DDR3
ddr3_cke : out std_logic_vector(0 downto 0);
ddr3_cs_n : out std_logic_vector(0 downto 0);
ddr3_dm : out std_logic_vector(1 downto 0);
ddr3_odt : out std_logic_vector(0 downto 0)
);
end board;
architecture arch of board is
-- General clocks based off of the board's CLK100MHZ
signal clk_25, clk_50, clk_100, clk_200, clk_400 : std_logic;
signal clks_ready: std_logic;
signal rst : std_logic;
component clks_sys_clk_100
port
(
-- Clock out ports
clk_25 : out std_logic;
clk_50 : out std_logic;
clk_100 : out std_logic;
clk_200 : out std_logic;
clk_400 : out std_logic;
-- Status and control signals
locked : out std_logic;
-- Clock in ports
sys_clk_100 : in std_logic
);
end component;
-- DDR clocks based off of the board's CLK100MHZ
signal ddr_sys_clk : std_logic; -- 166.66MHz
signal clk_166p66 : std_logic;
signal ddr_clks_ready: std_logic;
signal ddr_sys_rst_n : std_logic;
signal ddr_sys_rst : std_logic;
component ddr_clks_sys_clk_100
port
(
-- Clock out ports
ddr_sys_clk : out std_logic;
-- Status and control signals
locked : out std_logic;
-- Clock in ports
sys_clk_100 : in std_logic
);
end component;
-- The board's DDR3 controller
signal app_addr : std_logic_vector(27 downto 0);
signal app_cmd : std_logic_vector(2 downto 0);
signal app_en : std_logic;
signal app_wdf_data : std_logic_vector(127 downto 0);
signal app_wdf_end : std_logic;
signal app_wdf_mask : std_logic_vector(15 downto 0);
signal app_wdf_wren : std_logic;
signal app_rd_data : std_logic_vector(127 downto 0);
signal app_rd_data_end : std_logic;
signal app_rd_data_valid : std_logic;
signal app_rdy : std_logic;
signal app_wdf_rdy : std_logic;
signal app_sr_req : std_logic;
signal app_ref_req : std_logic;
signal app_zq_req : std_logic;
signal app_sr_active : std_logic;
signal app_ref_ack : std_logic;
signal app_zq_ack : std_logic;
signal ui_clk : std_logic; -- 83.33MHz
signal clk_83p33 : std_logic;
signal ui_clk_sync_rst : std_logic;
signal init_calib_complete : std_logic;
component ddr3_0
port (
ddr3_dq : inout std_logic_vector(15 downto 0);
ddr3_dqs_p : inout std_logic_vector(1 downto 0);
ddr3_dqs_n : inout std_logic_vector(1 downto 0);
ddr3_addr : out std_logic_vector(13 downto 0);
ddr3_ba : out std_logic_vector(2 downto 0);
ddr3_ras_n : out std_logic;
ddr3_cas_n : out std_logic;
ddr3_we_n : out std_logic;
ddr3_reset_n : out std_logic;
ddr3_ck_p : out std_logic_vector(0 downto 0);
ddr3_ck_n : out std_logic_vector(0 downto 0);
ddr3_cke : out std_logic_vector(0 downto 0);
ddr3_cs_n : out std_logic_vector(0 downto 0);
ddr3_dm : out std_logic_vector(1 downto 0);
ddr3_odt : out std_logic_vector(0 downto 0);
app_addr : in std_logic_vector(27 downto 0);
app_cmd : in std_logic_vector(2 downto 0);
app_en : in std_logic;
app_wdf_data : in std_logic_vector(127 downto 0);
app_wdf_end : in std_logic;
app_wdf_mask : in std_logic_vector(15 downto 0);
app_wdf_wren : in std_logic;
app_rd_data : out std_logic_vector(127 downto 0);
app_rd_data_end : out std_logic;
app_rd_data_valid : out std_logic;
app_rdy : out std_logic;
app_wdf_rdy : out std_logic;
app_sr_req : in std_logic;
app_ref_req : in std_logic;
app_zq_req : in std_logic;
app_sr_active : out std_logic;
app_ref_ack : out std_logic;
app_zq_ack : out std_logic;
ui_clk : out std_logic;
ui_clk_sync_rst : out std_logic;
init_calib_complete : out std_logic;
-- System Clock Ports
sys_clk_i : in std_logic;
-- Reference Clock Ports
clk_ref_i : in std_logic;
sys_rst : in std_logic -- ACTIVE LOW - PORT NAME IS INCORRECT
);
end component ddr3_0;
-- Internal signals
-- Clocks
signal sys_clk_100 : std_logic;
-- Switches
signal switches_wire : unsigned(3 downto 0);
-- LEDs
signal leds_wire : unsigned(3 downto 0);
-- UART
signal uart_data_in : unsigned(0 downto 0);
signal uart_data_out : unsigned(0 downto 0);
-- DDR3
signal mig_to_app : xil_mig_to_app_t;
signal app_to_mig : xil_app_to_mig_t;
begin
-- Connect board's CLK100MHZ pin to internal global clock buffer network
CLK100MHZ_bufg_inst: BUFG
port map (
I => CLK100MHZ,
O => sys_clk_100
);
-- General clocks based off of the board's CLK100MHZ
clks_sys_clk_100_inst : clks_sys_clk_100
port map (
-- Clock out ports
clk_25 => clk_25,
clk_50 => clk_50,
clk_100 => clk_100,
clk_200 => clk_200,
clk_400 => clk_400,
-- Status and control signals
locked => clks_ready,
-- Clock in ports
sys_clk_100 => sys_clk_100
);
-- Hold in reset until clocks are ready
rst <= not clks_ready;
-- DDR clocks based off of the board's CLK100MHZ
ddr_clks_sys_clk_100_inst : ddr_clks_sys_clk_100
port map (
ddr_sys_clk => ddr_sys_clk, -- 166.66MHz
locked => ddr_clks_ready,
sys_clk_100 => sys_clk_100
);
clk_166p66 <= ddr_sys_clk;
-- Hold in reset until clocks are ready
ddr_sys_rst <= rst or not ddr_clks_ready;
ddr_sys_rst_n <= not ddr_sys_rst;
-- The board's DDR3 controller
ddr3_0_inst : ddr3_0
port map (
-- Memory interface ports
ddr3_addr => ddr3_addr,
ddr3_ba => ddr3_ba,
ddr3_cas_n => ddr3_cas_n,
ddr3_ck_n => ddr3_ck_n,
ddr3_ck_p => ddr3_ck_p,
ddr3_cke => ddr3_cke,
ddr3_ras_n => ddr3_ras_n,
ddr3_reset_n => ddr3_reset_n,
ddr3_we_n => ddr3_we_n,
ddr3_dq => ddr3_dq,
ddr3_dqs_n => ddr3_dqs_n,
ddr3_dqs_p => ddr3_dqs_p,
init_calib_complete => init_calib_complete,
ddr3_cs_n => ddr3_cs_n,
ddr3_dm => ddr3_dm,
ddr3_odt => ddr3_odt,
-- Application interface ports
app_addr => app_addr,
app_cmd => app_cmd,
app_en => app_en,
app_wdf_data => app_wdf_data,
app_wdf_end => app_wdf_end,
app_wdf_wren => app_wdf_wren,
app_rd_data => app_rd_data,
app_rd_data_end => app_rd_data_end,
app_rd_data_valid => app_rd_data_valid,
app_rdy => app_rdy,
app_wdf_rdy => app_wdf_rdy,
app_sr_req => app_sr_req,
app_ref_req => app_ref_req,
app_zq_req => app_zq_req,
app_sr_active => app_sr_active,
app_ref_ack => app_ref_ack,
app_zq_ack => app_zq_ack,
ui_clk => ui_clk, -- 83.33MHz
ui_clk_sync_rst => ui_clk_sync_rst,
app_wdf_mask => app_wdf_mask,
-- System Clock Ports
sys_clk_i => ddr_sys_clk, -- 166.66MHz
-- Reference Clock Ports
clk_ref_i => clk_200, -- Ref always 200MHz
sys_rst => ddr_sys_rst_n -- ACTIVE LOW - PORT NAME IS INCORRECT
);
clk_83p33 <= ui_clk;
-- Un/pack IO struct types to/from flattened SLV board pins
-- TODO Code gen this...
-- Commented out wires as necessary
process(all) begin
-- LEDs
led <= std_logic_vector(leds_wire);
-- Switches
switches_wire <= unsigned(sw);
-- UART
uart_data_in(0) <= uart_txd_in;
uart_rxd_out <= uart_data_out(0);
-- DDR3
app_addr <= std_logic_vector(app_to_mig.addr);
app_cmd <= std_logic_vector(app_to_mig.cmd);
app_en <= std_logic(app_to_mig.en(0));
for byte_i in 0 to app_wdf_mask'length-1 loop
app_wdf_data(((byte_i+1)*8)-1 downto (byte_i*8)) <= std_logic_vector(app_to_mig.wdf_data(byte_i));
end loop;
app_wdf_end <= std_logic(app_to_mig.wdf_end(0));
for byte_i in 0 to app_wdf_mask'length-1 loop
app_wdf_mask(byte_i) <= std_logic(app_to_mig.wdf_mask(byte_i)(0));
end loop;
app_wdf_wren <= std_logic(app_to_mig.wdf_wren(0));
for byte_i in 0 to app_wdf_mask'length-1 loop
mig_to_app.rd_data(byte_i) <= unsigned(app_rd_data(((byte_i+1)*8)-1 downto (byte_i*8)));
end loop;
mig_to_app.rd_data_end(0) <= app_rd_data_end;
mig_to_app.rd_data_valid(0) <= app_rd_data_valid;
mig_to_app.rdy(0) <= app_rdy;
mig_to_app.wdf_rdy(0) <= app_wdf_rdy;
app_sr_req <= std_logic(app_to_mig.sr_req(0));
app_ref_req <= std_logic(app_to_mig.ref_req(0));
app_zq_req <= std_logic(app_to_mig.zq_req(0));
mig_to_app.sr_active(0) <= app_sr_active;
mig_to_app.ref_ack(0) <= app_ref_ack;
mig_to_app.zq_ack(0) <= app_zq_ack;
mig_to_app.ui_clk_sync_rst(0) <= ui_clk_sync_rst;
mig_to_app.init_calib_complete(0) <= init_calib_complete;
end process;
-- The PipelineC generated entity
top_inst : entity work.top port map (
-- Main function clocks
clk_25p0 => clk_25,
--clk_50p0 => clk_50,
clk_83p33 => clk_83p33,
--clk_100p0 => clk_100,
--clk_166p66 => clk_166p66,
--clk_200p0 => clk_200,
--clk_400p0 => clk_400,
-- Each main funciton's inputs and outputs
-- LEDs
led0_module_return_output(0) => leds_wire(0),
led1_module_return_output(0) => leds_wire(1),
led2_module_return_output(0) => leds_wire(2),
led3_module_return_output(0) => leds_wire(3),
-- Switches
--switches_module_sw => switches_wire
-- UART
uart_module_data_in => uart_data_in,
uart_module_return_output => uart_data_out,
-- DDR3
xil_mig_module_mig_to_app => mig_to_app,
xil_mig_module_return_output => app_to_mig
);
end arch;
Resource usage: