diff --git a/software/glasgow/gateware/hyperram.py b/software/glasgow/gateware/hyperram.py new file mode 100644 index 000000000..ec68b03b1 --- /dev/null +++ b/software/glasgow/gateware/hyperram.py @@ -0,0 +1,434 @@ +# Ref: HyperBus™ Specification +# Ref: https://www.mouser.com/pdfdocs/cypress_hyperbus_specification.pdf +# Document Number: 001-99253 +# Accession: ? + +# This module is intended to be imported as `from glasgow.support import hyperbus` and then used +# as `hyperbus.PHYx1`, etc. + +from amaranth import * +from amaranth.utils import log2_int +from amaranth.lib import enum, data, wiring +from amaranth.lib.wiring import Signature, In, Out + + +__all__ = ["PHYx1", "HyperRAMController"] + + +class IOBufferWithEn(wiring.Component): + def __init__(self, pins): + self.pins = pins + + super().__init__() + + @property + def signature(self): + return Signature({ + "en" : In(1, reset=1), # clock enable (for both input and output) + "oe" : In(1), # output enable + "o" : Out(len(self.pins)), # output + "i" : In(len(self.pins)), # input + }) + + def elaborate(self, platform): + m = Module() + for pin in self.pins: + m.submodules += Instance("SB_IO", + p_PIN_TYPE=C(0b1101_00, 6), + i_INPUT_CLK=ClockSignal(), + i_OUTPUT_CLK=ClockSignal(), + i_CLOCK_ENABLE=self.en, + i_OUTPUT_ENABLE=self.oe, + i_D_OUT_0=self.o, + o_D_IN_0=self.i, + io_PACKAGE_PIN=pin, + ) + return m + + +class PHYx1(wiring.Component): + """Non-geared (one octet per cycle) HyperBus PHY. + + This PHY connects to a HyperBus memory device without deriving any new clocks or using any + delay line primitives. The "x1" in the name refers to the lack of gearing; since HyperBus + uses a DDR bus, this PHY runs the bus at one half the frequency of its clock domain, + transferring 1 octet per cycle. This implementation allows DDR output buffers to be used + to provide the required 90° phase offset for the memory clock, eliminating the need for a PLL. + + All of the inputs and outputs are registered, with one cycle of latency. + """ + + gearing = 1 + + def __init__(self, resource, *, cs_count=1): + if not isinstance(cs_count, int) or not cs_count >= 1: + raise ValueError(f"CS# count must be a positive integer, not {cs_count!r}") + + self.resource = resource + self.cs_count = cs_count + + super().__init__() + + @property + def signature(self): + # All of the PHY inputs below are synchronous to the `sync` clock. + return Signature({ + # valid signal; if strobed when `ready`, the gearbox will generate a rising edge on + # CK/CK# on the next `sync` clock cycle and a falling edge on CK/CK# on the `sync` + # clock cycle after that, transferring data inputs and outputs in the process + "valid" : In(1), + # ready signal; asserted whenever the data output is valid and the gearbox will accept + # the data input if `valid` is strobed + "ready" : Out(1), + # select signal, linked to the `ready` and `valid` strobes; if non-zero, CS#[sel-1] is + # driven low during the next CK/CK# cycle + "sel" : In(range(self.cs_count + 1)), + # data signals, linked to the `ready` and `valid` strobes + "rwds" : Out(Signature({ + "oe" : In(1), # data strobe output enable signal; enables RWDS driver + "o" : In(2), # data strobe output signal; drives RWDS if `oe` is asserted + "i" : Out(2), # data strobe input signal; is captured from RWDS + })), + "data" : Out(Signature({ + "oe" : In(1), # data output enable signal; enables DQ[7:0] drivers + "o" : In(16), # data output signal; drives DQ[7:0] if `oe` is asserted + "i" : Out(16), # data input signal; is captured from DQ[7:0] + })), + }) + + def elaborate(self, platform): + pins = platform.request(*self.resource, xdr={ + "reset": 0, + "cs": 1, + "ck_p": 2, + "ck_n": 2, + }, dir={ + "rwds": "-", + "dq": "-", + }) + + m = Module() + + # RESET# is an unregistered output pin driven by `sync` domain reset. (It is not registered + # to allow for configurations with asynchronous reset.) + m.d.comb += [ + pins.reset.o.eq(ResetSignal(allow_reset_less=True)), + ] + + # CK/CK# is a pseudo-differential pair (depending on HyperBus register configuration only + # the positive polarity may be used), driven as source synchronous DDR output gated by + # `self.en` and with polarity set by `ck_phase`. This clock idles low and toggles whenever + # `self.en` is asserted, at 90° phase offset referenced to DQ[7:0]. The I/O buffer is + # configured as DDR to introduce the phase offset. + m.d.comb += [ + pins.ck_p.o_clk.eq(ClockSignal()), + pins.ck_n.o_clk.eq(ClockSignal()), + # continued below... + ] + + # CS#[`cs_count`-1:0] are a set of output pins driven by a one-hot encoding of `self.sel`. + m.submodules.pins_cs = pins_cs = IOBufferWithEn(pins.cs) + reg_cs_o = Signal.like(pins_cs.o) + + # RWDS is an input/output pin driven by `self.rwds`. It has three distinct functions: + # - During command/address phase, RWDS is a memory output and FPGA input indicating whether + # the memory controller must introduce additional latency. It is essentially a strap with + # no particular timing relationship to other signals; it is enough to + # sample it somewhere during the C/A phase. + # - During write transactions, RWDS is a memory input and FPGA output that is edge aligned + # with data output by the FPGA, masking off bytes that should not be written. + # - During read transactions, RWDS is a memory output and FPGA input that is edge aligned + # with data output by the memory, indicating a pause in data transfer when the address + # crosses page boundaries. + m.submodules.pins_rwds = pins_rwds = IOBufferWithEn(pins.rwds) + reg_rwds_oe = Signal.like(pins_rwds.oe) + reg_rwds_o = Signal.like(pins_rwds.o) + reg_rwds_i = Signal.like(pins_rwds.i) + + # DQ[7:0] are a set of input/output pins driven by `self.data`. They are used for command, + # address, and data transfer. When used as an output, DQ[7:0] changes state 90° before + # the transition on CK/CK#; when used as an input, DQ[7:0] must be sampled 90° after + # the transition on CK/CK#. + m.submodules.pins_dq = pins_dq = IOBufferWithEn(pins.dq) + reg_dq_oe = Signal.like(pins_dq.oe) + reg_dq_o = Signal.like(pins_dq.o) + reg_dq_i = Signal.like(pins_dq.i) + + # This state machine implements a soft gearbox. Each time `valid` is strobed, it captures + # word-wide inputs, and outputs the two halves of the word using a fixed two-cycle sequence. + with m.FSM(): + # Idle. + m.d.comb += [ + pins.ck_p.o0.eq(0), + pins.ck_p.o1.eq(0), + pins.ck_n.o0.eq(1), + pins.ck_n.o1.eq(1), + ] + + with m.State("Rising"): + m.d.comb += [ + pins_cs.o.eq(Cat(self.sel == index for index in range(1, self.cs_count + 1))), + pins_rwds.oe.eq(self.rwds.oe), + pins_rwds.o.eq(self.rwds.o[1]), + self.rwds.i.eq(Cat(pins_rwds.i, reg_rwds_i)), + pins_dq.oe.eq(self.dq.oe), + pins_dq.o.eq(self.dq.o[8:]), + self.dq.i.eq(Cat(pins_dq.i, reg_dq_i)), + ] + m.d.sync += [ + reg_cs_o.eq(pins_cs.o), + reg_rwds_oe.eq(pins_rwds.oe), + reg_rwds_o.eq(pins_rwds.o), + reg_dq_oe.eq(pins_dq.oe), + reg_dq_o.eq(pins_dq.o), + ] + m.d.comb += [ + self.ready.eq(1), + ] + with m.If(self.valid): + m.d.comb += [ + pins_rwds.en.eq(1), + pins_dq.en.eq(1), + pins.ck_p.o0.eq(0), + pins.ck_p.o1.eq(1), + pins.ck_n.o0.eq(1), + pins.ck_n.o1.eq(0), + ] + m.next = "Falling" + + with m.State("Falling"): + m.d.comb += [ + pins_cs.o.eq(reg_cs_o), + pins_rwds.oe.eq(reg_rwds_oe), + pins_rwds.o.eq(reg_rwds_o[0]), + self.rwds.i.eq(Cat(pins_rwds.i, reg_rwds_i)), + pins_dq.oe.eq(reg_dq_oe), + pins_dq.o.eq(reg_dq_o[:8]), + self.dq.i.eq(Cat(pins_dq.i, reg_dq_i)), + ] + m.d.sync += [ + reg_rwds_i.eq(pins_rwds.i), + reg_dq_i.eq(pins_dq.i), + ] + # The HyperBus specification forbids pausing or stopping the clock in the non-idle + # state, so after the rising edge is generated, this state machine unconditionally + # falls through to generating the falling edge. + m.d.comb += [ + pins_rwds.en.eq(1), + pins_dq.en.eq(1), + pins.ck_p.o0.eq(1), + pins.ck_p.o1.eq(0), + pins.ck_n.o0.eq(0), + pins.ck_n.o1.eq(1), + ] + m.next = "Rising" + + return m + + +class Operation(enum.Enum, shape=1): + """The R/W# field in the Command/Address information.""" + Write = 0 + Read = 1 + + +class AddressSpace(enum.Enum, shape=1): + """The AS field in the Command/Address information.""" + Memory = 0 + Register = 1 + + +class BurstType(enum.Enum, shape=1): + """The Burst Type field in the Command/Address information.""" + Wrapped = 0 + Linear = 1 + + +class CommandAddress(data.Struct): + """The Command/Address information at the beginning of a HyperBus transaction.""" + address_low : 3 + _reserved : 13 + address_high : 29 + burst_type : BurstType + address_space : AddressSpace + operation : Operation + + +class Sequencer(wiring.Component): + def __init__(self, phy): + self.phy = phy + + @property + def signature(self): + return Signature({ + "rst" : In(1), + "sel" : In(range(self.phy.cs_count + 1)), + "cmd_addr" : In(CommandAddress), + "latency" : In(range(17)), + "write" : In(Signature({ + "payload" : Out(data.StructLayout({ + "data" : 16, + "mask" : 2, + })), + "valid" : Out(1), + "ready" : In(1), + })), + "read" : Out(Signature({ + "payload" : Out(data.StructLayout({ + "data" : 16, + })), + "valid" : Out(1), + "ready" : In(1), + })) + }) + + def elaborate(self, platform): + m = Module() + m.submodules.phy = phy = self.phy + + # The command inputs are latched into these registers whenever `sel` changes and then + # the latched version is used. In addition, the data in the latched `cmd_addr` is shifted + # during the Command/Address cycle (rather than using a selection mux) to improve delay. + cmd_addr = Signal.like(self.cmd_addr) + latency = Signal.like(self.latency) + is_write = Signal() + use_mask = Signal() # for writes only + + timer = Signal() # FIXME: needs ocd fix + + with m.FSM(): + with m.State("Idle"): + m.d.sync += [ + cmd_addr.eq(self.cmd_addr), + latency.eq(self.latency), + is_write.eq(self.cmd_addr.operation == Operation.Write), + use_mask.eq(self.latency > 0), + phy.sel.eq(self.sel), + timer.eq(5), + ] + with m.If(self.sel != 0): + m.next = "Command/Address" + + with m.State("Command/Address"): + m.d.comb += [ + phy.en.eq(1), + phy.data.oe.eq(1), + phy.data.o.eq(cmd_addr[40:]), + ] + m.d.sync += [ + cmd_addr.eq(Cat(C(0, phy.gearing * 8), cmd_addr)), + ] + with m.If(timer != 0): + m.d.sync += timer.eq(timer - 1) + with m.Else(): + gear_shift = log2_int(phy.gearing, need_pow2=True) + with m.If(phy.rwds.i): # latency count x2 + m.d.sync += timer.eq(latency << (gear_shift + 1)) + with m.Else(): # latency count x1 + m.d.sync += timer.eq(latency << gear_shift) + with m.If(is_write & (latency == 0)): + m.next = "Write" + with m.Else(): + m.next = "Latency" + + with m.State("Latency"): + with m.If(is_write): + m.next = "Write" + with m.Else(): + m.next = "Read" + + with m.State("Write"): + m.d.comb += [ + phy.en.eq(self.write.valid), + phy.data.oe.eq(1), + phy.data.o.eq(self.write.payload.data), + phy.rwds.oe.eq(use_mask), + phy.rwds.o.eq(self.write.payload.mask), + self.write.ready.eq(1), + ] + + with m.State("Read"): + m.d.comb += [ + phy.en.eq(self.read.ready), + self.read.payload.data.eq(phy.data), + self.read.valid.eq(1), # FIXME + # self.read.valid.eq(phy.rwds), + ] + + return ResetInserter(self.rst)(m) + + +# class HyperRAMController(wiring.Component): +# cmd_addr : In(48) +# out_en : Out(1) +# out_data : Out(8) +# +# def __init__(self, *, phy): +# self.phy = phy +# +# super().__init__() +# +# def elaborate(self, platform): +# m = Module() +# m.submodules.phy = phy = self.phy +# +# timer = Signal(8) +# cmd_addr = Signal(48) +# +# with m.FSM(): +# with m.State("Idle"): +# m.d.sync += cmd_addr.eq(self.cmd_addr) +# m.next = "Select" +# +# with m.State("Select"): +# m.d.comb += [ +# phy.sel.eq(1) +# ] +# m.d.sync += timer.eq(6) # 6 Command/Address cycles +# m.next = "Command/Address" +# +# with m.State("Command/Address"): +# m.d.comb += [ +# phy.sel.eq(1), +# phy.en.eq(1), +# phy.data.oe.eq(1), +# phy.data.o.eq(cmd_addr[40:]), +# ] +# with m.If(timer == 1): +# m.d.sync += [ +# timer.eq(7 << 2), # 28 latency cycles +# ] +# m.next = "Latency" +# with m.Else(): +# m.d.sync += [ +# timer.eq(timer - 1), +# cmd_addr.eq(Cat(C(0, 8), cmd_addr[:40])), +# ] +# +# with m.State("Latency"): +# m.d.comb += [ +# phy.sel.eq(1), +# phy.en.eq(1), +# ] +# with m.If(timer == 1): +# m.d.sync += timer.eq(32) +# m.next = "Read" +# with m.Else(): +# m.d.sync += timer.eq(timer - 1) +# +# with m.State("Read"): +# m.d.comb += [ +# phy.sel.eq(1), +# phy.en.eq(1), +# self.out_en.eq(1), +# self.out_data.eq(phy.data.i) +# ] +# with m.If(timer == 1): +# m.next = "End" +# with m.Else(): +# m.d.sync += timer.eq(timer - 1) +# +# with m.State("End"): +# pass +# +# return m \ No newline at end of file diff --git a/software/tests/gateware/test_hyperram.py b/software/tests/gateware/test_hyperram.py new file mode 100644 index 000000000..389cebf02 --- /dev/null +++ b/software/tests/gateware/test_hyperram.py @@ -0,0 +1,64 @@ +import asyncio +import logging +from amaranth import * + +from glasgow.gateware.hyperram import * + + +class HyperRAMHardwareTestbench(Elaboratable): + def __init__(self, *, in_fifo, cmd_addr): + self.in_fifo = in_fifo + self.cmd_addr = cmd_addr + + def elaborate(self, platform): + m = Module() + + platform.add_ram_pak_resources() + m.submodules.ctl = ctl = HyperRAMController(phy=HyperRAMPHYx1(resource=("hyperram", 0))) + + m.d.comb += [ + self.in_fifo.w_en.eq(ctl.out_en), + self.in_fifo.w_data.eq(ctl.out_data), + ctl.cmd_addr.eq(self.cmd_addr), + ] + + return m + + +async def main(): + from glasgow.target.hardware import GlasgowHardwareTarget + from glasgow.device.hardware import GlasgowHardwareDevice + + logging.getLogger().setLevel(logging.DEBUG) + logging.getLogger().addHandler(loggingHandler := logging.StreamHandler()) + loggingHandler.setFormatter( + logging.Formatter(style="{", fmt="{levelname[0]:s}: {name:s}: {message:s}")) + + device = GlasgowHardwareDevice() + target = GlasgowHardwareTarget(revision=device.revision) + reset, reset_addr = target.registers.add_rw(1, reset=1) + cmd_addr, cmd_addr_addr = target.registers.add_rw(48) + target.add_submodule(ResetInserter(reset)( + HyperRAMHardwareTestbench( + in_fifo=target.fx2_crossbar.get_in_fifo(0), + cmd_addr=cmd_addr))) + await device.download_target(target.build_plan()) + + print("Running...") + device.usb_handle.claimInterface(0) + device.usb_handle.setInterfaceAltSetting(0, 1) + + async def run_cmd(cmd_addr): + await device.write_register(reset_addr, 1) + await device.write_register(cmd_addr_addr, cmd_addr, width=6) + await device.write_register(reset_addr, 0) + return await device.bulk_read(0x86, 512) + + print((await run_cmd(0x800000000000)).hex()) + print((await run_cmd(0x800000000001)).hex()) + print((await run_cmd(0x800000000002)).hex()) + print((await run_cmd(0x800000000003)).hex()) + + +if __name__ == "__main__": + asyncio.run(main()) \ No newline at end of file