Skip to content

Example: DDR3 Memory Buffer UART Loopback

Julian Kemmerer edited this page Nov 4, 2020 · 9 revisions

This is a break down of a UART loopback design example that uses DDR3 memory to buffer messages.

This example is from a series of examples designed for the Arty Board.

The PipelineC source for this design can be found here. A C test program for exercising the loopback can be found here.

Source

#include "compiler.h"
#include "wire.h"
#include "../leds/led0_3.c"
#include "../uart/uart_msg_fifos.c"
#include "../ddr3/xil_mig.c"
#include "test.h" // Constants shared with software

// Write stream of messages from uart to DDR3, and once done
// read those same messages back from DDR3 stream out over uart

// State machine that waits for incoming message over uart (async fifos)
// Then writes it to DDR memory at a specific address
typedef enum uart_to_mem_state_t
{
  RESET,
  WAIT_MSG,
  SER_MSG,
}uart_to_mem_state_t;
typedef struct uart_to_mem_t
{
  uint1_t ready;
  xil_app_to_mig_t to_mem;
  uint1_t done; 
}uart_to_mem_t;
uart_to_mem_t uart_to_mem(uint1_t start, test_count_t msg_index, xil_mig_to_app_t from_mem)
{
  // Registers
  static uart_to_mem_state_t state; // FSM state
  static uart_msg_t msg; // Message from uart / memory deserializer buffer
  
  // Outputs
  uart_to_mem_t o;
  o.to_mem = XIL_APP_TO_MIG_T_NULL();
  o.done = 0;
  o.ready = 0;
  
  if(state==RESET)
  {
    o.ready = 1;
    if(start)
    {
      state = WAIT_MSG;
    }
  }
  else if(state==WAIT_MSG)
  {
    // Wait for valid message from uart
    uart_rx_msg_fifo_receiver_t msg_rx = uart_rx_msg_fifo_receiver(1);
    if(msg_rx.done)
    {
      // Then begin serializing it
      msg = msg_rx.msg;
      state = SER_MSG;
    }
  }
  else if(state==SER_MSG)
  {
    // Begin ddr serializer 
    xil_mig_addr_t byte_addr = msg_index * UART_MSG_SIZE; // TODO shifts?
    mig_write_256_t ser = mig_write_256(1, byte_addr, msg.data, from_mem);
    o.to_mem = ser.to_mem;
    msg.data = ser.data;
    // Wait until serializer done
    if(ser.done)
    {
      // Then all the way done, back to start
      o.done = 1;
      state = RESET;
    }
  }
  
  return o;
}

// State machine controlling memory to read a message from a specific address
// and then waits for the message to be outgoing over uart (async fifo)
typedef enum mem_to_uart_state_t
{
  RESET,
  DESER_MSG,
  WAIT_MSG
}mem_to_uart_state_t;
typedef struct mem_to_uart_t
{
  uint1_t ready;
  xil_app_to_mig_t to_mem;
  uint1_t done; 
}mem_to_uart_t;
mem_to_uart_t mem_to_uart(uint1_t start, test_count_t msg_index, xil_mig_to_app_t from_mem)
{
  // Registers
  static mem_to_uart_state_t state; // FSM state
  static uart_msg_t msg; // Message from memory deserializer buffer / into uar
  
  // Drive leds
  WIRE_WRITE(uint1_t, led1, state==WAIT_MSG)
  
  // Outputs
  mem_to_uart_t o;
  o.to_mem = XIL_APP_TO_MIG_T_NULL();
  o.done = 0;
  o.ready = 0;
  
  if(state==RESET)
  {
    o.ready = 1;
    if(start)
    {
      state = DESER_MSG;
    }
  }
  else if(state==DESER_MSG)
  {
    // Begin ddr deserializer 
    xil_mig_addr_t byte_addr = msg_index * UART_MSG_SIZE; // TODO shifts?
    mig_read_256_t deser = mig_read_256(1, byte_addr, msg.data, from_mem);
    msg.data = deser.data;
    o.to_mem = deser.to_mem;
    // Wait until deserializer done and we have full message
    if(deser.done)
    {
      // Then wait until message goes out over uart
      state = WAIT_MSG;
    }
  }
  else if(state==WAIT_MSG)
  {
    // Begin trying to send msg out
    uart_tx_msg_fifo_sender_t msg_tx = uart_tx_msg_fifo_sender(1, msg);
    // Wait for message to go out over uart
    if(msg_tx.done)
    {
      // Then all the way done, back to start
      o.done = 1;
      state = RESET;
    }
  }
  
  return o;
}

// Uses above state machines to transfer messages to/from DDR memory
typedef enum msg_ctrl_state_t
{
  WAIT_RESET,
  UART_TO_MEM, // N messages into memory
  MEM_TO_UART // N messages out of memory
}msg_ctrl_state_t;
// The main process, same clock as generated memory interface
#pragma MAIN_MHZ app xil_mig_module
void app()
{
  // Input port: read outputs wires from memory controller
  xil_mig_to_app_t from_mem;
  WIRE_READ(xil_mig_to_app_t, from_mem, xil_mig_to_app)
  
  // Output port wire: into memory controller
  xil_app_to_mig_t to_mem = XIL_APP_TO_MIG_T_NULL();
  
  // Registers
  static msg_ctrl_state_t state;
  static test_count_t num_msgs;
  
  // Drive leds
  WIRE_WRITE(uint1_t, led0, state==MEM_TO_UART)

  // MEM CTRL FSM
  if(state==WAIT_RESET)
  {
    // Wait for DDR reset to be done
    uint1_t mem_rst_done = !from_mem.ui_clk_sync_rst & from_mem.init_calib_complete;
    if(mem_rst_done)
    {
      // Start things with writes first
      state = UART_TO_MEM;
    }
    num_msgs = 0;
  }
  else if(state==UART_TO_MEM)
  {
    // Keep starting the uart_to_mem fsm until N messages have been written to mem
    uart_to_mem_t writer = uart_to_mem(1, num_msgs, from_mem);
    to_mem = writer.to_mem;   
    if(writer.done)
    {
      // next message ?
      if(num_msgs<(NUM_MSGS_TEST-1))
      {
        // Do next message
        num_msgs += 1;
      }
      else
      {
        // Done writing messages, onto reads
        state = MEM_TO_UART;
        num_msgs = 0;
      }
    }
  }
  else if(state==MEM_TO_UART)
  {
    // Keep starting the mem_to_uart fsm until N messages have been read from mem
    mem_to_uart_t reader = mem_to_uart(1, num_msgs, from_mem);
    to_mem = reader.to_mem;
    if(reader.done)
    {
      // next message ?
      if(num_msgs<(NUM_MSGS_TEST-1))
      {
        // Do next message
        num_msgs += 1;
      }
      else
      {
        // Done read messages, repeat from reset
        state = WAIT_RESET;
      }
    }
  }
 
  // Resets
  if(from_mem.ui_clk_sync_rst)
  {
    state = WAIT_RESET;
  }
   
  // Drive wires into memory controller
  WIRE_WRITE(xil_app_to_mig_t, xil_app_to_mig, to_mem)  
}


// Separate leds module for now since some clock cross types still TODO
#pragma MAIN app_tieoff
uint1_t app_tieoff()
{
  // Input port: overflow flag from the uart rx mac
  uint1_t rx_overflow;
  WIRE_READ(uint1_t, rx_overflow, uart_rx_mac_overflow)
  
  // Registers
  static uint1_t overflow;
  
  // Drive leds
  //WIRE_WRITE(uint1_t, led3, !overflow)
  uint1_t rv = overflow;
  
  // Record overflow in register
  overflow |= rx_overflow; // sticky or equals
  
  return rv;
}

PipelineC Tool Output

Pipeline Map

The compiler produces a text representation of what operations occur at which point during during each function's pipeline (i.e. how long each operation takes and when). Inputs flow from top to bottom. Functions listed on the same lines are occurring in parallel.

app
Pipeline Map:

Throughput Sweep (Uneventful for simple state machines...)

In more advanced examples you will want to use the auto-pipelining features of PipelineC. This entirely global-variable-using example cannot be further pipelined, no further latency can be traded off for throughput. Luckily as-written it is expected to meet the UART and DDR clock timings as needed, no further design changes are needed.

================== Beginning Throughput Sweep ================================

================== Writing Results of Throughput Sweep ================================
Done.

VHDL wrapper for Vivado

-- Top level file connecting board to PipelineC generated code

library IEEE;
use IEEE.STD_LOGIC_1164.ALL;
use ieee.numeric_std.all;
library UNISIM;
use UNISIM.VCOMPONENTS.ALL;

-- PipelineC packages
use work.c_structs_pkg.all;

-- Connections to the board, see xdc files, un/commment things as needed
entity board is
  port (
    CLK100MHZ : in std_logic;
    sw : in std_logic_vector(3 downto 0);
    led : out std_logic_vector(3 downto 0);
    uart_rxd_out : out std_logic;
    uart_txd_in : in std_logic;
    ddr3_dq       : inout std_logic_vector(15 downto 0);
    ddr3_dqs_p    : inout std_logic_vector(1 downto 0);
    ddr3_dqs_n    : inout std_logic_vector(1 downto 0);
    ddr3_addr     : out   std_logic_vector(13 downto 0);
    ddr3_ba       : out   std_logic_vector(2 downto 0);
    ddr3_ras_n    : out   std_logic;
    ddr3_cas_n    : out   std_logic;
    ddr3_we_n     : out   std_logic;
    ddr3_reset_n  : out   std_logic;
    ddr3_ck_p     : out   std_logic_vector(0 downto 0); -- Uncomment to use DDR3
    ddr3_ck_n     : out   std_logic_vector(0 downto 0); -- Uncomment to use DDR3
    ddr3_cke      : out   std_logic_vector(0 downto 0);
    ddr3_cs_n     : out   std_logic_vector(0 downto 0);
    ddr3_dm       : out   std_logic_vector(1 downto 0);
    ddr3_odt      : out   std_logic_vector(0 downto 0)
  );
end board;

architecture arch of board is

-- General clocks based off of the board's CLK100MHZ
signal clk_25, clk_50, clk_100, clk_200, clk_400 : std_logic;
signal clks_ready: std_logic;
signal rst : std_logic;
component clks_sys_clk_100
port
 (
  -- Clock out ports
  clk_25          : out    std_logic;
  clk_50          : out    std_logic;
  clk_100         : out    std_logic;
  clk_200         : out    std_logic;
  clk_400         : out    std_logic;
  -- Status and control signals
  locked          : out    std_logic;
  -- Clock in ports
  sys_clk_100     : in     std_logic
 );
end component;

-- DDR clocks based off of the board's CLK100MHZ
signal ddr_sys_clk : std_logic; -- 166.66MHz 
signal clk_166p66 : std_logic;
signal ddr_clks_ready: std_logic;
signal ddr_sys_rst_n : std_logic;
signal ddr_sys_rst : std_logic;
component ddr_clks_sys_clk_100
port
 (
  -- Clock out ports
  ddr_sys_clk          : out    std_logic;
  -- Status and control signals
  locked            : out    std_logic;
  -- Clock in ports
  sys_clk_100           : in     std_logic
 );
end component;

-- The board's DDR3 controller
signal app_addr                  :     std_logic_vector(27 downto 0);
signal app_cmd                   :     std_logic_vector(2 downto 0);
signal app_en                    :     std_logic;
signal app_wdf_data              :     std_logic_vector(127 downto 0);
signal app_wdf_end               :     std_logic;
signal app_wdf_mask              :     std_logic_vector(15 downto 0);
signal app_wdf_wren              :     std_logic;
signal app_rd_data               :    std_logic_vector(127 downto 0);
signal app_rd_data_end           :    std_logic;
signal app_rd_data_valid         :    std_logic;
signal app_rdy                   :    std_logic;
signal app_wdf_rdy               :    std_logic;
signal app_sr_req                :     std_logic;
signal app_ref_req               :     std_logic;
signal app_zq_req                :     std_logic;
signal app_sr_active             :    std_logic;
signal app_ref_ack               :    std_logic;
signal app_zq_ack                :    std_logic;
signal ui_clk                    :    std_logic; -- 83.33MHz 
signal clk_83p33                 :    std_logic;
signal ui_clk_sync_rst           :    std_logic;
signal init_calib_complete       :    std_logic;
component ddr3_0
  port (
      ddr3_dq       : inout std_logic_vector(15 downto 0);
      ddr3_dqs_p    : inout std_logic_vector(1 downto 0);
      ddr3_dqs_n    : inout std_logic_vector(1 downto 0);
      ddr3_addr     : out   std_logic_vector(13 downto 0);
      ddr3_ba       : out   std_logic_vector(2 downto 0);
      ddr3_ras_n    : out   std_logic;
      ddr3_cas_n    : out   std_logic;
      ddr3_we_n     : out   std_logic;
      ddr3_reset_n  : out   std_logic;
      ddr3_ck_p     : out   std_logic_vector(0 downto 0);
      ddr3_ck_n     : out   std_logic_vector(0 downto 0);
      ddr3_cke      : out   std_logic_vector(0 downto 0);
	  ddr3_cs_n     : out   std_logic_vector(0 downto 0);
      ddr3_dm       : out   std_logic_vector(1 downto 0);
      ddr3_odt      : out   std_logic_vector(0 downto 0);
      app_addr                  : in    std_logic_vector(27 downto 0);
      app_cmd                   : in    std_logic_vector(2 downto 0);
      app_en                    : in    std_logic;
      app_wdf_data              : in    std_logic_vector(127 downto 0);
      app_wdf_end               : in    std_logic;
      app_wdf_mask              : in    std_logic_vector(15 downto 0);
      app_wdf_wren              : in    std_logic;
      app_rd_data               : out   std_logic_vector(127 downto 0);
      app_rd_data_end           : out   std_logic;
      app_rd_data_valid         : out   std_logic;
      app_rdy                   : out   std_logic;
      app_wdf_rdy               : out   std_logic;
      app_sr_req                : in    std_logic;
      app_ref_req               : in    std_logic;
      app_zq_req                : in    std_logic;
      app_sr_active             : out   std_logic;
      app_ref_ack               : out   std_logic;
      app_zq_ack                : out   std_logic;
      ui_clk                    : out   std_logic;
      ui_clk_sync_rst           : out   std_logic;
      init_calib_complete       : out   std_logic;
      -- System Clock Ports
      sys_clk_i                 : in    std_logic;
      -- Reference Clock Ports
      clk_ref_i                 : in    std_logic;
      sys_rst                   : in    std_logic -- ACTIVE LOW - PORT NAME IS INCORRECT
  );
end component ddr3_0;
 
-- Internal signals
-- Clocks
signal sys_clk_100 : std_logic;
-- Switches
signal switches_wire : unsigned(3 downto 0);
-- LEDs
signal leds_wire : unsigned(3 downto 0);
-- UART
signal uart_data_in : unsigned(0 downto 0);
signal uart_data_out : unsigned(0 downto 0);
-- DDR3
signal mig_to_app : xil_mig_to_app_t;
signal app_to_mig : xil_app_to_mig_t;

begin

-- Connect board's CLK100MHZ pin to internal global clock buffer network
CLK100MHZ_bufg_inst: BUFG 
port map (
    I => CLK100MHZ, 
    O => sys_clk_100
);

-- General clocks based off of the board's CLK100MHZ
clks_sys_clk_100_inst : clks_sys_clk_100
   port map ( 
  -- Clock out ports  
   clk_25 => clk_25,
   clk_50 => clk_50,
   clk_100 => clk_100,
   clk_200 => clk_200,
   clk_400 => clk_400,
  -- Status and control signals                
   locked => clks_ready,
   -- Clock in ports
   sys_clk_100 => sys_clk_100
 );
-- Hold in reset until clocks are ready
rst <= not clks_ready;

-- DDR clocks based off of the board's CLK100MHZ 
ddr_clks_sys_clk_100_inst : ddr_clks_sys_clk_100
   port map ( 
   ddr_sys_clk => ddr_sys_clk, -- 166.66MHz 
   locked => ddr_clks_ready,
   sys_clk_100 => sys_clk_100
 );
clk_166p66 <= ddr_sys_clk;
-- Hold in reset until clocks are ready
ddr_sys_rst <= rst or not ddr_clks_ready;
ddr_sys_rst_n <= not ddr_sys_rst;
 
-- The board's DDR3 controller
 ddr3_0_inst : ddr3_0
     port map (
        -- Memory interface ports
        ddr3_addr                      => ddr3_addr,
        ddr3_ba                        => ddr3_ba,
        ddr3_cas_n                     => ddr3_cas_n,
        ddr3_ck_n                      => ddr3_ck_n,
        ddr3_ck_p                      => ddr3_ck_p,
        ddr3_cke                       => ddr3_cke,
        ddr3_ras_n                     => ddr3_ras_n,
        ddr3_reset_n                   => ddr3_reset_n,
        ddr3_we_n                      => ddr3_we_n,
        ddr3_dq                        => ddr3_dq,
        ddr3_dqs_n                     => ddr3_dqs_n,
        ddr3_dqs_p                     => ddr3_dqs_p,
        init_calib_complete            => init_calib_complete,
 	   ddr3_cs_n                      => ddr3_cs_n,
        ddr3_dm                        => ddr3_dm,
        ddr3_odt                       => ddr3_odt,
        -- Application interface ports
        app_addr                       => app_addr,
        app_cmd                        => app_cmd,
        app_en                         => app_en,
        app_wdf_data                   => app_wdf_data,
        app_wdf_end                    => app_wdf_end,
        app_wdf_wren                   => app_wdf_wren,
        app_rd_data                    => app_rd_data,
        app_rd_data_end                => app_rd_data_end,
        app_rd_data_valid              => app_rd_data_valid,
        app_rdy                        => app_rdy,
        app_wdf_rdy                    => app_wdf_rdy,
        app_sr_req                     => app_sr_req,
        app_ref_req                    => app_ref_req,
        app_zq_req                     => app_zq_req,
        app_sr_active                  => app_sr_active,
        app_ref_ack                    => app_ref_ack,
        app_zq_ack                     => app_zq_ack,
        ui_clk                         => ui_clk, -- 83.33MHz
        ui_clk_sync_rst                => ui_clk_sync_rst,
        app_wdf_mask                   => app_wdf_mask,
        -- System Clock Ports
        sys_clk_i                      => ddr_sys_clk, -- 166.66MHz 
        -- Reference Clock Ports
        clk_ref_i                      => clk_200, -- Ref always 200MHz
        sys_rst                        => ddr_sys_rst_n -- ACTIVE LOW - PORT NAME IS INCORRECT
     );
clk_83p33 <= ui_clk;

-- Un/pack IO struct types to/from flattened SLV board pins
-- TODO Code gen this...
-- Commented out wires as necessary
process(all) begin
    -- LEDs
    led <= std_logic_vector(leds_wire);       
    -- Switches
    switches_wire <= unsigned(sw);
    -- UART
    uart_data_in(0) <= uart_txd_in;
    uart_rxd_out <= uart_data_out(0);
    -- DDR3
    app_addr <= std_logic_vector(app_to_mig.addr);
    app_cmd  <= std_logic_vector(app_to_mig.cmd);
    app_en  <= std_logic(app_to_mig.en(0));
    for byte_i in 0 to app_wdf_mask'length-1 loop
		app_wdf_data(((byte_i+1)*8)-1 downto (byte_i*8)) <= std_logic_vector(app_to_mig.wdf_data(byte_i));
	end loop;
    app_wdf_end  <= std_logic(app_to_mig.wdf_end(0));
    for byte_i in 0 to app_wdf_mask'length-1 loop
		app_wdf_mask(byte_i) <= std_logic(app_to_mig.wdf_mask(byte_i)(0));
	end loop;
    app_wdf_wren <= std_logic(app_to_mig.wdf_wren(0));
    for byte_i in 0 to app_wdf_mask'length-1 loop
        mig_to_app.rd_data(byte_i) <= unsigned(app_rd_data(((byte_i+1)*8)-1 downto (byte_i*8)));
	end loop;
    mig_to_app.rd_data_end(0) <= app_rd_data_end; 
    mig_to_app.rd_data_valid(0) <= app_rd_data_valid;
    mig_to_app.rdy(0) <= app_rdy;
    mig_to_app.wdf_rdy(0) <= app_wdf_rdy;
    app_sr_req   <= std_logic(app_to_mig.sr_req(0));
    app_ref_req  <= std_logic(app_to_mig.ref_req(0));
    app_zq_req   <= std_logic(app_to_mig.zq_req(0));
    mig_to_app.sr_active(0) <= app_sr_active;
    mig_to_app.ref_ack(0) <= app_ref_ack;
    mig_to_app.zq_ack(0)  <= app_zq_ack;
    mig_to_app.ui_clk_sync_rst(0) <= ui_clk_sync_rst;
    mig_to_app.init_calib_complete(0) <= init_calib_complete;
end process;
    
-- The PipelineC generated entity
top_inst : entity work.top port map (    
    -- Main function clocks
    clk_25p0 => clk_25,
    --clk_50p0 => clk_50,
    clk_83p33 => clk_83p33,
    --clk_100p0 => clk_100,
    --clk_166p66 => clk_166p66,
    --clk_200p0 => clk_200,
    --clk_400p0 => clk_400,
        
    -- Each main funciton's inputs and outputs
    
    -- LEDs
    led0_module_return_output(0) => leds_wire(0),
    led1_module_return_output(0) => leds_wire(1),
    led2_module_return_output(0) => leds_wire(2),
    led3_module_return_output(0) => leds_wire(3),
    
    -- Switches
    --switches_module_sw => switches_wire
    
    -- UART
    uart_module_data_in => uart_data_in,
    uart_module_return_output => uart_data_out,
    
    -- DDR3
    xil_mig_module_mig_to_app => mig_to_app,
    xil_mig_module_return_output => app_to_mig
);

end arch;

Vivado Results

Resource usage: