From d0c9f88c43be320c34b418bfd3bf5644b7182130 Mon Sep 17 00:00:00 2001
From: Wilko Nienhaus <wilko.nienhaus@gmail.com>
Date: Sun, 9 Jul 2023 21:45:47 +0300
Subject: [PATCH 01/20] Add copies of opcodes.py (and its tests) as starting
 point for opcodes_s2

Currently the logic in these copies is identical to the original.
This will makes it easier to see the changes we're making in
subsequent commits.
---
 esp32_ulp/opcodes_s2.py | 739 ++++++++++++++++++++++++++++++++++++++++
 tests/00_unit_tests.sh  |   2 +-
 tests/opcodes_s2.py     | 185 ++++++++++
 3 files changed, 925 insertions(+), 1 deletion(-)
 create mode 100644 esp32_ulp/opcodes_s2.py
 create mode 100644 tests/opcodes_s2.py

diff --git a/esp32_ulp/opcodes_s2.py b/esp32_ulp/opcodes_s2.py
new file mode 100644
index 0000000..6910081
--- /dev/null
+++ b/esp32_ulp/opcodes_s2.py
@@ -0,0 +1,739 @@
+"""
+ESP32 ULP Co-Processor Instructions
+"""
+
+from ucollections import namedtuple
+from uctypes import struct, addressof, LITTLE_ENDIAN, UINT32, BFUINT32, BF_POS, BF_LEN
+
+from .soc import *
+from .util import split_tokens, validate_expression
+
+# XXX dirty hack: use a global for the symbol table
+symbols = None
+
+# Opcodes, Sub-Opcodes, Modes, ...
+
+OPCODE_WR_REG = 1
+OPCODE_RD_REG = 2
+
+DR_REG_MAX_DIRECT = 0x3ff
+RD_REG_PERIPH_RTC_CNTL = 0
+RD_REG_PERIPH_RTC_IO = 1
+RD_REG_PERIPH_SENS = 2
+RD_REG_PERIPH_RTC_I2C = 3
+
+OPCODE_I2C = 3
+
+OPCODE_DELAY = 4
+
+OPCODE_ADC = 5
+
+OPCODE_ST = 6
+SUB_OPCODE_ST = 4
+
+OPCODE_ALU = 7
+SUB_OPCODE_ALU_REG = 0
+SUB_OPCODE_ALU_IMM = 1
+ALU_SEL_ADD = 0
+ALU_SEL_SUB = 1
+ALU_SEL_AND = 2
+ALU_SEL_OR = 3
+ALU_SEL_MOV = 4
+ALU_SEL_LSH = 5
+ALU_SEL_RSH = 6
+SUB_OPCODE_ALU_CNT = 2
+ALU_SEL_INC = 0
+ALU_SEL_DEC = 1
+ALU_SEL_RST = 2
+
+OPCODE_BRANCH = 8
+# https://github.com/espressif/binutils-esp32ulp/blob/d61f86f97eda43fc118df30d019fc062aaa6bc8d/include/opcode/esp32ulp_esp32.h#L85
+SUB_OPCODE_BX = 0
+SUB_OPCODE_BR = 1
+SUB_OPCODE_BS = 2
+BX_JUMP_TYPE_DIRECT = 0
+BX_JUMP_TYPE_ZERO = 1
+BX_JUMP_TYPE_OVF = 2
+# https://github.com/espressif/binutils-esp32ulp/blob/d61f86f97eda43fc118df30d019fc062aaa6bc8d/gas/config/tc-esp32ulp.h#L91
+BRCOND_LT = 0
+BRCOND_GE = 1
+BRCOND_LE = 2
+BRCOND_EQ = 3
+BRCOND_GT = 4
+
+OPCODE_END = 9
+SUB_OPCODE_END = 0
+SUB_OPCODE_SLEEP = 1
+
+OPCODE_TSENS = 10
+
+OPCODE_HALT = 11
+
+OPCODE_LD = 13
+
+
+def make_ins_struct_def(layout):
+    lines = layout.strip().splitlines()
+    pos = 0  # bitfield definitions start from lsb
+    struct_def = {}
+    for line in lines:
+        bitfield = line.split('#', 1)[0]  # get rid of comment
+        name, width = bitfield.split(':', 1)
+        name = name.strip()
+        width = int(width.strip())
+        struct_def[name] = BFUINT32 | pos << BF_POS | width << BF_LEN
+        pos += width
+    if pos != 32:
+        raise ValueError('make_ins: bit field widths must sum up to 32. [%s]' % layout)
+    struct_def['all'] = UINT32
+    return struct_def
+
+
+def make_ins(layout):
+    """
+    transform textual instruction layout description into a ready-to-use uctypes struct
+    """
+    struct_def = make_ins_struct_def(layout)
+    instruction = bytearray(4)
+    return struct(addressof(instruction), struct_def, LITTLE_ENDIAN)
+
+
+# instruction structure definitions
+
+_wr_reg = make_ins("""
+    addr : 8        # Address within either RTC_CNTL, RTC_IO, or SARADC
+    periph_sel : 2  # Select peripheral: RTC_CNTL (0), RTC_IO(1), SARADC(2)
+    data : 8        # 8 bits of data to write
+    low : 5         # Low bit
+    high : 5        # High bit
+    opcode : 4      # Opcode (OPCODE_WR_REG)
+""")
+
+
+_rd_reg = make_ins("""
+    addr : 8        # Address within either RTC_CNTL, RTC_IO, or SARADC
+    periph_sel : 2  # Select peripheral: RTC_CNTL (0), RTC_IO(1), SARADC(2)
+    unused : 8      # Unused
+    low : 5         # Low bit
+    high : 5        # High bit
+    opcode : 4      # Opcode (OPCODE_RD_REG)
+""")
+
+
+_i2c = make_ins("""
+    sub_addr : 8    # address within I2C slave
+    data : 8        # Data to write (not used for read)
+    low : 3         # low bit
+    high : 3        # high bit
+    i2c_sel : 4     # select i2c slave via SENS_I2C_SLAVE_ADDRx
+    unused : 1      # Unused
+    rw : 1          # Write (1) or read (0)
+    opcode : 4      # Opcode (OPCODE_I2C)
+""")
+
+
+_delay = make_ins("""
+    cycles : 16     # Number of cycles to sleep
+    unused : 12     # Unused
+    opcode : 4      # Opcode (OPCODE_DELAY)
+""")
+
+
+_tsens = make_ins("""
+    dreg : 2        # Register where to store TSENS result
+    delay : 14      # Number of cycles needed to obtain a measurement
+    unused : 12     # Unused
+    opcode : 4      # Opcode (OPCODE_TSENS)
+""")
+
+
+_adc = make_ins("""
+    dreg : 2        # Register where to store ADC result
+    mux : 4         # Select SARADC pad (mux + 1)
+    sar_sel : 1     # Select SARADC0 (0) or SARADC1 (1)
+    unused1 : 1     # Unused
+    cycles : 16     # TBD, cycles used for measurement
+    unused2 : 4     # Unused
+    opcode: 4       # Opcode (OPCODE_ADC)
+""")
+
+
+_st = make_ins("""
+    sreg : 2        # Register which contains data to store
+    dreg : 2        # Register which contains address in RTC memory (expressed in words)
+    unused1 : 6     # Unused
+    offset : 11     # Offset to add to dreg
+    unused2 : 4     # Unused
+    sub_opcode : 3  # Sub opcode (SUB_OPCODE_ST)
+    opcode : 4      # Opcode (OPCODE_ST)
+""")
+
+
+_alu_reg = make_ins("""
+    dreg : 2        # Destination register
+    sreg : 2        # Register with operand A
+    treg : 2        # Register with operand B
+    unused : 15     # Unused
+    sel : 4         # Operation to perform, one of ALU_SEL_xxx
+    sub_opcode : 3  # Sub opcode (SUB_OPCODE_ALU_REG)
+    opcode : 4      # Opcode (OPCODE_ALU)
+""")
+
+
+_alu_imm = make_ins("""
+    dreg : 2        # Destination register
+    sreg : 2        # Register with operand A
+    imm : 16        # Immediate value of operand B
+    unused : 1      # Unused
+    sel : 4         # Operation to perform, one of ALU_SEL_xxx
+    sub_opcode : 3  # Sub opcode (SUB_OPCODE_ALU_IMM)
+    opcode : 4      # Opcode (OPCODE_ALU)
+""")
+
+
+_alu_cnt = make_ins("""
+    unused1 : 4     # Unused
+    imm : 8         # Immediate value (to inc / dec stage counter)
+    unused2 : 9     # Unused
+    sel : 4         # Operation to perform, one of ALU_SEL_xxx
+    sub_opcode : 3  # Sub opcode (SUB_OPCODE_ALU_CNT)
+    opcode : 4      # Opcode (OPCODE_ALU)
+""")
+
+
+_bx = make_ins("""
+    dreg : 2        # Register which contains target PC, expressed in words (used if .reg == 1)
+    addr : 11       # Target PC, expressed in words (used if .reg == 0)
+    unused : 8      # Unused
+    reg : 1         # Target PC in register (1) or immediate (0)
+    type : 3        # Jump condition (BX_JUMP_TYPE_xxx)
+    sub_opcode : 3  # Sub opcode (SUB_OPCODE_BX)
+    opcode : 4      # Opcode (OPCODE_BRANCH)
+""")
+
+
+_br = make_ins("""
+    imm : 16        # Immediate value to compare against
+    cmp : 1         # Comparison to perform: BRCOND_LT or BRCOND_GE
+    offset : 7      # Absolute value of target PC offset w.r.t. current PC, expressed in words
+    sign : 1        # Sign of target PC offset: 0: positive, 1: negative
+    sub_opcode : 3  # Sub opcode (SUB_OPCODE_BR)
+    opcode : 4      # Opcode (OPCODE_BRANCH)
+""")
+
+
+_bs = make_ins("""
+    imm : 8         # Immediate value to compare against
+    unused : 7      # Unused
+    cmp : 2         # Comparison to perform: BRCOND_LT, GT or EQ
+    offset : 7      # Absolute value of target PC offset w.r.t. current PC, expressed in words
+    sign : 1        # Sign of target PC offset: 0: positive, 1: negative
+    sub_opcode : 3  # Sub opcode (SUB_OPCODE_BS)
+    opcode : 4      # Opcode (OPCODE_BRANCH)
+""")
+
+
+_end = make_ins("""
+    wakeup : 1      # Set to 1 to wake up chip
+    unused : 24     # Unused
+    sub_opcode : 3  # Sub opcode (SUB_OPCODE_END)
+    opcode : 4      # Opcode (OPCODE_END)
+""")
+
+
+_sleep = make_ins("""
+    cycle_sel : 4   # Select which one of SARADC_ULP_CP_SLEEP_CYCx_REG to get the sleep duration from
+    unused : 21     # Unused
+    sub_opcode : 3  # Sub opcode (SUB_OPCODE_SLEEP)
+    opcode : 4      # Opcode (OPCODE_END)
+""")
+
+
+_halt = make_ins("""
+    unused : 28     # Unused
+    opcode : 4      # Opcode (OPCODE_HALT)
+""")
+
+
+_ld = make_ins("""
+    dreg : 2        # Register where the data should be loaded to
+    sreg : 2        # Register which contains address in RTC memory (expressed in words)
+    unused1 : 6     # Unused
+    offset : 11     # Offset to add to sreg
+    unused2 : 7     # Unused
+    opcode : 4      # Opcode (OPCODE_LD)
+""")
+
+
+# assembler opcode definitions
+
+REG, IMM, COND, SYM = 0, 1, 2, 3
+ARG = namedtuple('ARG', ('type', 'value', 'raw'))
+
+
+def eval_arg(arg):
+    parts = []
+    for token in split_tokens(arg):
+        if symbols.has_sym(token):
+            _, _, sym_value = symbols.get_sym(token)
+            parts.append(str(sym_value))
+        else:
+            parts.append(token)
+    parts = "".join(parts)
+    if not validate_expression(parts):
+        raise ValueError('Unsupported expression: %s' % parts)
+    return eval(parts)
+
+
+def arg_qualify(arg):
+    """
+    look at arg and qualify its type:
+    REG(ister), IMM(ediate) value
+
+    then convert arg into a int value, e.g. 'R1' -> 1 or '0x20' -> 32.
+
+    return result as ARG namedtuple
+    """
+    arg_lower = arg.lower()
+    if len(arg) == 2:
+        if arg_lower[0] == 'r' and arg[1] in '0123456789':
+            reg = int(arg[1])
+            if 0 <= reg <= 3:
+                return ARG(REG, reg, arg)
+            raise ValueError('arg_qualify: valid registers are r0, r1, r2, r3. Given: %s' % arg)
+        if arg_lower in ['--', 'eq', 'ov', 'lt', 'gt', 'ge', 'le']:
+            return ARG(COND, arg_lower, arg)
+    try:
+        return ARG(IMM, int(arg), arg)
+    except ValueError:
+        pass
+    try:
+        entry = symbols.get_sym(arg)
+    except KeyError:
+        return ARG(IMM, int(eval_arg(arg)), arg)
+    else:
+        return ARG(SYM, entry, arg)
+
+
+def get_reg(arg):
+    if isinstance(arg, str):
+        arg = arg_qualify(arg)
+    if arg.type == REG:
+        return arg.value
+    raise TypeError('wanted: register, got: %s' % arg.raw)
+
+
+def get_imm(arg):
+    if isinstance(arg, str):
+        arg = arg_qualify(arg)
+    if arg.type == IMM:
+        return arg.value
+    if arg.type == SYM:
+        return symbols.resolve_absolute(arg.value)
+    raise TypeError('wanted: immediate, got: %s' % arg.raw)
+
+
+get_abs = get_imm
+
+
+def get_rel(arg):
+    if isinstance(arg, str):
+        arg = arg_qualify(arg)
+    if arg.type == IMM:
+        if arg.value & 3 != 0:  # bitwise version of: arg.value % 4 != 0
+            raise ValueError('Relative offset must be a multiple of 4')
+        return IMM, arg.value >> 2  # bitwise version of: arg.value // 4
+    if arg.type == SYM:
+        return SYM, symbols.resolve_relative(arg.value)
+    raise TypeError('wanted: immediate, got: %s' % arg.raw)
+
+
+def get_cond(arg):
+    if isinstance(arg, str):
+        arg = arg_qualify(arg)
+    if arg.type == COND:
+        return arg.value
+    raise TypeError('wanted: condition, got: %s' % arg.raw)
+
+
+def _soc_reg_to_ulp_periph_sel(reg):
+    # Map SoC peripheral register to periph_sel field of RD_REG and WR_REG instructions.
+    if reg < DR_REG_RTCCNTL_BASE:
+        raise ValueError("invalid register base")
+    elif reg < DR_REG_RTCIO_BASE:
+        ret = RD_REG_PERIPH_RTC_CNTL
+    elif reg < DR_REG_SENS_BASE:
+        ret = RD_REG_PERIPH_RTC_IO
+    elif reg < DR_REG_RTC_I2C_BASE:
+        ret = RD_REG_PERIPH_SENS
+    elif reg < DR_REG_IO_MUX_BASE:
+        ret = RD_REG_PERIPH_RTC_I2C
+    else:
+        raise ValueError("invalid register base")
+    return ret
+
+
+def i_reg_wr(reg, high_bit, low_bit, val):
+    reg = get_imm(reg)
+    if reg <= DR_REG_MAX_DIRECT:  # see https://github.com/espressif/binutils-esp32ulp/blob/master/gas/config/tc-esp32ulp_esp32.c
+        _wr_reg.addr = reg & 0xff
+        _wr_reg.periph_sel = (reg & 0x300) >> 8
+    else:
+        _wr_reg.addr = (reg & 0xff) >> 2
+        _wr_reg.periph_sel = _soc_reg_to_ulp_periph_sel(reg)
+    _wr_reg.data = get_imm(val)
+    _wr_reg.low = get_imm(low_bit)
+    _wr_reg.high = get_imm(high_bit)
+    _wr_reg.opcode = OPCODE_WR_REG
+    return _wr_reg.all
+
+
+def i_reg_rd(reg, high_bit, low_bit):
+    reg = get_imm(reg)
+    if reg <= DR_REG_MAX_DIRECT:  # see https://github.com/espressif/binutils-esp32ulp/blob/master/gas/config/tc-esp32ulp_esp32.c
+        _rd_reg.addr = reg & 0xff
+        _rd_reg.periph_sel = (reg & 0x300) >> 8
+    else:
+        _rd_reg.addr = (reg & 0xff) >> 2
+        _rd_reg.periph_sel = _soc_reg_to_ulp_periph_sel(reg)
+    _rd_reg.unused = 0
+    _rd_reg.low = get_imm(low_bit)
+    _rd_reg.high = get_imm(high_bit)
+    _rd_reg.opcode = OPCODE_RD_REG
+    return _rd_reg.all
+
+
+def i_i2c_rd(sub_addr, high_bit, low_bit, slave_sel):
+    _i2c.sub_addr = get_imm(sub_addr)
+    _i2c.data = 0
+    _i2c.low = get_imm(low_bit)
+    _i2c.high = get_imm(high_bit)
+    _i2c.i2c_sel = get_imm(slave_sel)
+    _i2c.unused = 0
+    _i2c.rw = 0
+    _i2c.opcode = OPCODE_I2C
+    return _i2c.all
+
+
+def i_i2c_wr(sub_addr, value, high_bit, low_bit, slave_sel):
+    _i2c.sub_addr = get_imm(sub_addr)
+    _i2c.data = get_imm(value)
+    _i2c.low = get_imm(low_bit)
+    _i2c.high = get_imm(high_bit)
+    _i2c.i2c_sel = get_imm(slave_sel)
+    _i2c.unused = 0
+    _i2c.rw = 1
+    _i2c.opcode = OPCODE_I2C
+    return _i2c.all
+
+
+def i_nop():
+    _delay.cycles = 0
+    _delay.unused = 0
+    _delay.opcode = OPCODE_DELAY
+    return _delay.all
+
+
+def i_wait(cycles):
+    _delay.cycles = get_imm(cycles)
+    _delay.unused = 0
+    _delay.opcode = OPCODE_DELAY
+    return _delay.all
+
+
+def i_tsens(reg_dest, delay):
+    _tsens.dreg = get_reg(reg_dest)
+    _tsens.delay = get_imm(delay)
+    _tsens.unused = 0
+    _tsens.opcode = OPCODE_TSENS
+    return _tsens.all
+
+
+def i_adc(reg_dest, adc_idx, mux, _not_used=None):
+    _adc.dreg = get_reg(reg_dest)
+    _adc.mux = get_imm(mux)
+    _adc.sar_sel = get_imm(adc_idx)
+    _adc.unused1 = 0
+    _adc.cycles = 0
+    _adc.unused2 = 0
+    _adc.opcode = OPCODE_ADC
+    return _adc.all
+
+
+def i_st(reg_val, reg_addr, offset):
+    _st.dreg = get_reg(reg_addr)
+    _st.sreg = get_reg(reg_val)
+    _st.unused1 = 0
+    _st.offset = get_imm(offset) // 4
+    _st.unused2 = 0
+    _st.sub_opcode = SUB_OPCODE_ST
+    _st.opcode = OPCODE_ST
+    return _st.all
+
+
+def i_halt():
+    _halt.unused = 0
+    _halt.opcode = OPCODE_HALT
+    return _halt.all
+
+
+def i_ld(reg_dest, reg_addr, offset):
+    _ld.dreg = get_reg(reg_dest)
+    _ld.sreg = get_reg(reg_addr)
+    _ld.unused1 = 0
+    _ld.offset = get_imm(offset) // 4
+    _ld.unused2 = 0
+    _ld.opcode = OPCODE_LD
+    return _ld.all
+
+
+def i_move(reg_dest, reg_imm_src):
+    # this is the only ALU instruction with 2 args: move r0, r1
+    dest = get_reg(reg_dest)
+    src = arg_qualify(reg_imm_src)
+    if src.type == REG:
+        _alu_reg.dreg = dest
+        _alu_reg.sreg = src.value
+        _alu_reg.treg = src.value  # XXX undocumented, this is the value binutils-esp32 uses
+        _alu_reg.unused = 0
+        _alu_reg.sel = ALU_SEL_MOV
+        _alu_reg.sub_opcode = SUB_OPCODE_ALU_REG
+        _alu_reg.opcode = OPCODE_ALU
+        return _alu_reg.all
+    if src.type == IMM or src.type == SYM:
+        _alu_imm.dreg = dest
+        _alu_imm.sreg = 0
+        _alu_imm.imm = get_abs(src)
+        _alu_imm.unused = 0
+        _alu_imm.sel = ALU_SEL_MOV
+        _alu_imm.sub_opcode = SUB_OPCODE_ALU_IMM
+        _alu_imm.opcode = OPCODE_ALU
+        return _alu_imm.all
+    raise TypeError('unsupported operand: %s' % src.raw)
+
+
+def _alu3(reg_dest, reg_src1, reg_imm_src2, alu_sel):
+    """
+    ALU instructions with 3 args, like e.g. add r1, r2, r3
+    """
+    dest = get_reg(reg_dest)
+    src1 = get_reg(reg_src1)
+    src2 = arg_qualify(reg_imm_src2)
+    if src2.type == REG:
+        _alu_reg.dreg = dest
+        _alu_reg.sreg = src1
+        _alu_reg.treg = src2.value
+        _alu_reg.unused = 0
+        _alu_reg.sel = alu_sel
+        _alu_reg.sub_opcode = SUB_OPCODE_ALU_REG
+        _alu_reg.opcode = OPCODE_ALU
+        return _alu_reg.all
+    if src2.type == IMM or src2.type == SYM:
+        _alu_imm.dreg = dest
+        _alu_imm.sreg = src1
+        _alu_imm.imm = get_abs(src2)
+        _alu_imm.unused = 0
+        _alu_imm.sel = alu_sel
+        _alu_imm.sub_opcode = SUB_OPCODE_ALU_IMM
+        _alu_imm.opcode = OPCODE_ALU
+        return _alu_imm.all
+    raise TypeError('unsupported operand: %s' % src2.raw)
+
+
+def i_add(reg_dest, reg_src1, reg_imm_src2):
+    return _alu3(reg_dest, reg_src1, reg_imm_src2, ALU_SEL_ADD)
+
+
+def i_sub(reg_dest, reg_src1, reg_imm_src2):
+    return _alu3(reg_dest, reg_src1, reg_imm_src2, ALU_SEL_SUB)
+
+
+def i_and(reg_dest, reg_src1, reg_imm_src2):
+    return _alu3(reg_dest, reg_src1, reg_imm_src2, ALU_SEL_AND)
+
+
+def i_or(reg_dest, reg_src1, reg_imm_src2):
+    return _alu3(reg_dest, reg_src1, reg_imm_src2, ALU_SEL_OR)
+
+
+def i_lsh(reg_dest, reg_src1, reg_imm_src2):
+    return _alu3(reg_dest, reg_src1, reg_imm_src2, ALU_SEL_LSH)
+
+
+def i_rsh(reg_dest, reg_src1, reg_imm_src2):
+    return _alu3(reg_dest, reg_src1, reg_imm_src2, ALU_SEL_RSH)
+
+
+def _alu_stage(imm, alu_sel):
+    """
+    Stage counter instructions with 1 arg: stage_inc / stage_dec
+    """
+    imm = get_imm(imm)
+    _alu_cnt.unused1 = 0
+    _alu_cnt.imm = imm
+    _alu_cnt.unused2 = 0
+    _alu_cnt.sel = alu_sel
+    _alu_cnt.sub_opcode = SUB_OPCODE_ALU_CNT
+    _alu_cnt.opcode = OPCODE_ALU
+    return _alu_cnt.all
+
+
+def i_stage_inc(imm):
+    return _alu_stage(imm, ALU_SEL_INC)
+
+
+def i_stage_dec(imm):
+    return _alu_stage(imm, ALU_SEL_DEC)
+
+
+def i_stage_rst():
+    return _alu_stage('0', ALU_SEL_RST)
+
+
+def i_wake():
+    _end.wakeup = 1
+    _end.unused = 0
+    _end.sub_opcode = SUB_OPCODE_END
+    _end.opcode = OPCODE_END
+    return _end.all
+
+
+def i_sleep(timer_idx):
+    _sleep.cycle_sel = get_imm(timer_idx)
+    _sleep.unused = 0
+    _sleep.sub_opcode = SUB_OPCODE_SLEEP
+    _sleep.opcode = OPCODE_END
+    return _sleep.all
+
+
+def i_jump(target, condition='--'):
+    target = arg_qualify(target)
+    condition = get_cond(condition)
+    if condition == 'eq':
+        jump_type = BX_JUMP_TYPE_ZERO
+    elif condition == 'ov':
+        jump_type = BX_JUMP_TYPE_OVF
+    elif condition == '--':  # means unconditional
+        jump_type = BX_JUMP_TYPE_DIRECT
+    else:
+        raise ValueError("invalid flags condition")
+    if target.type == IMM or target.type == SYM:
+        _bx.dreg = 0
+        # we track label addresses in 32bit words, but immediate values are in bytes and need to get divided by 4.
+        _bx.addr = get_abs(target) if target.type == SYM else get_abs(target) >> 2  # bitwise version of "// 4"
+        _bx.unused = 0
+        _bx.reg = 0
+        _bx.type = jump_type
+        _bx.sub_opcode = SUB_OPCODE_BX
+        _bx.opcode = OPCODE_BRANCH
+        return _bx.all
+    if target.type == REG:
+        _bx.dreg = target.value
+        _bx.addr = 0
+        _bx.unused = 0
+        _bx.reg = 1
+        _bx.type = jump_type
+        _bx.sub_opcode = SUB_OPCODE_BX
+        _bx.opcode = OPCODE_BRANCH
+        return _bx.all
+    raise TypeError('unsupported operand: %s' % target.raw)
+
+
+def _jump_relr(threshold, cond, offset):
+    """
+    Equivalent of I_JUMP_RELR macro in binutils-esp32ulp
+    """
+    _br.imm = threshold
+    _br.cmp = cond
+    _br.offset = abs(offset)
+    _br.sign = 0 if offset >= 0 else 1
+    _br.sub_opcode = SUB_OPCODE_BR
+    _br.opcode = OPCODE_BRANCH
+    return _br.all
+
+
+def i_jumpr(offset, threshold, condition):
+    offset_type, offset = get_rel(offset)
+    threshold = get_imm(threshold)
+    condition = get_cond(condition)
+    if condition == 'lt':
+        cmp_op = BRCOND_LT
+    elif condition == 'ge':
+        cmp_op = BRCOND_GE
+    elif condition == 'le':  # le == lt(threshold+1)
+        threshold += 1
+        cmp_op = BRCOND_LT
+    elif condition == 'gt':  # gt == ge(threshold+1)
+        threshold += 1
+        cmp_op = BRCOND_GE
+    elif condition == 'eq':  # eq == ge(threshold) but not ge(threshold+1)
+        # jump over next JUMPR
+        skip_ins = _jump_relr(threshold + 1, BRCOND_GE, 2)
+        # jump to target
+        if (offset_type == IMM and offset < 0) or offset_type == SYM:
+            # adjust for the additional JUMPR instruction
+            # for IMM offsets, the offset is relative to the 2nd instruction, so only backwards jumps need adjusting
+            # for SYM offsets, label offsets already include the extra instruction, so both directions need adjusting
+            offset -= 1
+        jump_ins = _jump_relr(threshold, BRCOND_GE, offset)
+        return (skip_ins, jump_ins)
+    else:
+        raise ValueError("invalid comparison condition")
+    return _jump_relr(threshold, cmp_op, offset)
+
+
+def _jump_rels(threshold, cond, offset):
+    """
+    Equivalent of I_JUMP_RELS macro in binutils-esp32ulp
+    """
+    _bs.imm = threshold
+    _bs.cmp = cond
+    _bs.offset = abs(offset)
+    _bs.sign = 0 if offset >= 0 else 1
+    _bs.sub_opcode = SUB_OPCODE_BS
+    _bs.opcode = OPCODE_BRANCH
+    return _bs.all
+
+
+def i_jumps(offset, threshold, condition):
+    offset_type, offset = get_rel(offset)
+    threshold = get_imm(threshold)
+    condition = get_cond(condition)
+    if condition == 'lt':
+        cmp_op = BRCOND_LT
+    elif condition == 'le':
+        cmp_op = BRCOND_LE
+    elif condition == 'ge':
+        cmp_op = BRCOND_GE
+    elif condition in ('eq', 'gt'):
+        if condition == 'eq':  # eq == le but not lt
+            skip_cond = BRCOND_LT
+            jump_cond = BRCOND_LE
+        elif condition == 'gt':  # gt == ge but not le
+            skip_cond = BRCOND_LE
+            jump_cond = BRCOND_GE
+
+        # jump over next JUMPS
+        skip_ins = _jump_rels(threshold, skip_cond, 2)
+        # jump to target
+        if (offset_type == IMM and offset < 0) or offset_type == SYM:
+            # adjust for the additional JUMPS instruction
+            # for IMM offsets, the offset is relative to the 2nd instruction, so only backwards jumps need adjusting
+            # for SYM offsets, label offsets already include the extra instruction, so both directions need adjusting
+            offset -= 1
+        jump_ins = _jump_rels(threshold, jump_cond, offset)
+
+        return (skip_ins, jump_ins)
+    else:
+        raise ValueError("invalid comparison condition")
+    return _jump_rels(threshold, cmp_op, offset)
+
+
+def no_of_instr(opcode, args):
+    if opcode == 'jumpr' and get_cond(args[2]) == 'eq':
+        return 2
+
+    if opcode == 'jumps' and get_cond(args[2]) in ('eq', 'gt'):
+        return 2
+
+    return 1
diff --git a/tests/00_unit_tests.sh b/tests/00_unit_tests.sh
index 1dc05e9..a570553 100755
--- a/tests/00_unit_tests.sh
+++ b/tests/00_unit_tests.sh
@@ -4,7 +4,7 @@
 
 set -e
 
-LIST=${1:-opcodes assemble link util preprocess definesdb disassemble}
+LIST=${1:-opcodes opcodes_s2 assemble link util preprocess definesdb disassemble}
 
 for file in $LIST; do
     echo testing $file...
diff --git a/tests/opcodes_s2.py b/tests/opcodes_s2.py
new file mode 100644
index 0000000..6e64e50
--- /dev/null
+++ b/tests/opcodes_s2.py
@@ -0,0 +1,185 @@
+from uctypes import UINT32, BFUINT32, BF_POS, BF_LEN
+from esp32_ulp.opcodes_s2 import make_ins, make_ins_struct_def
+from esp32_ulp.opcodes_s2 import get_reg, get_imm, get_cond, arg_qualify, eval_arg, ARG, REG, IMM, SYM, COND
+from esp32_ulp.assemble import SymbolTable, ABS, REL, TEXT
+import esp32_ulp.opcodes_s2 as opcodes
+
+OPCODE_DELAY = 4
+LAYOUT_DELAY = """
+    cycles : 16     # Number of cycles to sleep
+    unused : 12     # Unused
+    opcode : 4      # Opcode (OPCODE_DELAY)
+"""
+
+
+def test_make_ins_struct_def():
+    sd = make_ins_struct_def(LAYOUT_DELAY)
+    assert set(sd) == {'cycles', 'unused', 'opcode', 'all'}
+    assert sd['cycles'] == BFUINT32 | 0 << BF_POS | 16 << BF_LEN
+    assert sd['unused'] == BFUINT32 | 16 << BF_POS | 12 << BF_LEN
+    assert sd['opcode'] == BFUINT32 | 28 << BF_POS | 4 << BF_LEN
+    assert sd['all'] == UINT32
+
+
+def test_make_ins():
+    _delay = make_ins(LAYOUT_DELAY)
+    _delay.cycles = 0x23
+    _delay.unused = 0
+    _delay.opcode = OPCODE_DELAY
+    assert _delay.cycles == 0x23
+    assert _delay.unused == 0
+    assert _delay.opcode == OPCODE_DELAY
+    assert _delay.all == 0x40000023
+
+
+def test_arg_qualify():
+    assert arg_qualify('r0') == ARG(REG, 0, 'r0')
+    assert arg_qualify('R3') == ARG(REG, 3, 'R3')
+    assert arg_qualify('0') == ARG(IMM, 0, '0')
+    assert arg_qualify('-1') == ARG(IMM, -1, '-1')
+    assert arg_qualify('1') == ARG(IMM, 1, '1')
+    assert arg_qualify('0x20') == ARG(IMM, 32, '0x20')
+    assert arg_qualify('0o100') == ARG(IMM, 64, '0o100')
+    assert arg_qualify('0b1000') == ARG(IMM, 8, '0b1000')
+    assert arg_qualify('eq') == ARG(COND, 'eq', 'eq')
+    assert arg_qualify('Eq') == ARG(COND, 'eq', 'Eq')
+    assert arg_qualify('EQ') == ARG(COND, 'eq', 'EQ')
+
+    # for the next tests, ensure the opcodes module has a SymbolTable
+    opcodes.symbols = SymbolTable({}, {}, {})
+    opcodes.symbols.set_sym('const', ABS, None, 42)  # constant as defined by .set
+    opcodes.symbols.set_sym('entry', REL, TEXT, 4)  # label pointing to code
+
+    assert arg_qualify('1+1') == ARG(IMM, 2, '1+1')
+    assert arg_qualify('const >> 1') == ARG(IMM, 21, 'const >> 1')
+    assert arg_qualify('entry') == ARG(SYM, (REL, TEXT, 4), 'entry')  # symbols should not (yet) be evaluated
+    assert arg_qualify('entry + const') == ARG(IMM, 46, 'entry + const')
+
+    # clean up
+    opcodes.symbols = None
+
+
+def test_get_reg():
+    assert get_reg('r0') == 0
+    assert get_reg('R3') == 3
+
+
+def test_get_imm():
+    assert get_imm('42') == 42
+
+
+def test_get_cond():
+    assert get_cond('Eq') == 'eq'
+
+
+def test_eval_arg():
+    opcodes.symbols = SymbolTable({}, {}, {})
+    opcodes.symbols.set_sym('const', ABS, None, 42)  # constant
+    opcodes.symbols.set_sym('raise', ABS, None, 99)  # constant using a python keyword as name (is allowed)
+
+    assert eval_arg('1+1') == 2
+    assert eval_arg('1+const') == 43
+    assert eval_arg('raise*2/3') == 66
+    assert eval_arg('raise-const') == 57
+    assert eval_arg('(raise-const)*2') == 114
+    assert eval_arg('const    % 5') == 2
+    assert eval_arg('const + 0x19af') == 0x19af + 42
+    assert eval_arg('const & ~2') == 40
+    assert eval_arg('const << 3') == 336
+    assert eval_arg('const >> 1') == 21
+    assert eval_arg('(const|4)&0xf') == 0xe
+
+    assert_raises(ValueError, eval_arg, 'evil()')
+    assert_raises(ValueError, eval_arg, 'def cafe()')
+    assert_raises(ValueError, eval_arg, '1 ^ 2')
+    assert_raises(ValueError, eval_arg, '!100')
+
+    # clean up
+    opcodes.symbols = None
+
+
+def assert_raises(exception, func, *args):
+    try:
+        func(*args)
+    except exception:
+        raised = True
+    else:
+        raised = False
+    assert raised
+
+
+def test_reg_direct_ulp_addressing():
+    """
+    Test direct ULP addressing of peripheral registers
+    input must be <= 0x3ff (10 bits)
+    periph_sel == high 2 bits from input
+    addr == low 8 bits from input
+    """
+
+    ins = make_ins("""
+    addr : 8        # Address within either RTC_CNTL, RTC_IO, or SARADC
+    periph_sel : 2  # Select peripheral: RTC_CNTL (0), RTC_IO(1), SARADC(2)
+    unused : 8      # Unused
+    low : 5         # Low bit
+    high : 5        # High bit
+    opcode : 4      # Opcode (OPCODE_RD_REG)
+    """)
+
+    ins.all = opcodes.i_reg_rd("0x0", "0", "0")
+    assert ins.periph_sel == 0
+    assert ins.addr == 0x0
+
+    ins.all = opcodes.i_reg_rd("0x012", "0", "0")
+    assert ins.periph_sel == 0
+    assert ins.addr == 0x12
+
+    ins.all = opcodes.i_reg_rd("0x123", "0", "0")
+    assert ins.periph_sel == 1
+    assert ins.addr == 0x23
+
+    ins.all = opcodes.i_reg_rd("0x2ee", "0", "0")
+    assert ins.periph_sel == 2
+    assert ins.addr == 0xee
+
+    ins.all = opcodes.i_reg_rd("0x3ff", "0", "0")
+    assert ins.periph_sel == 3
+    assert ins.addr == 0xff
+
+    # anything bigger than 0x3ff must be a valid full address
+    assert_raises(ValueError, opcodes.i_reg_rd, "0x400", "0", "0")
+
+
+def test_reg_address_translations():
+    """
+    Test addressing of peripheral registers using full DPORT bus addresses
+    """
+
+    ins = make_ins("""
+    addr : 8        # Address within either RTC_CNTL, RTC_IO, or SARADC
+    periph_sel : 2  # Select peripheral: RTC_CNTL (0), RTC_IO(1), SARADC(2)
+    unused : 8      # Unused
+    low : 5         # Low bit
+    high : 5        # High bit
+    opcode : 4      # Opcode (OPCODE_RD_REG)
+    """)
+
+    # direct ULP address is derived from full address as follows:
+    # full:0x3ff484a8 == ulp:(0x3ff484a8-DR_REG_RTCCNTL_BASE) / 4
+    # full:0x3ff484a8 == ulp:(0x3ff484a8-0x3ff48000) / 4
+    # full:0x3ff484a8 == ulp:0x4a8 / 4
+    # full:0x3ff484a8 == ulp:0x12a
+    # see: https://github.com/espressif/binutils-esp32ulp/blob/249ec34/gas/config/tc-esp32ulp_esp32.c#L149
+    ins.all = opcodes.i_reg_rd("0x3ff484a8", "0", "0")
+    assert ins.periph_sel == 1  # high 2 bits of 0x12a
+    assert ins.addr == 0x2a  # low 8 bits of 0x12a
+
+
+test_make_ins_struct_def()
+test_make_ins()
+test_arg_qualify()
+test_get_reg()
+test_get_imm()
+test_get_cond()
+test_eval_arg()
+test_reg_direct_ulp_addressing()
+test_reg_address_translations()

From 9f04bd7f02f9d063b9fa3982cc49ce64e72096a3 Mon Sep 17 00:00:00 2001
From: Wilko Nienhaus <wilko.nienhaus@gmail.com>
Date: Sun, 9 Jul 2023 22:14:15 +0300
Subject: [PATCH 02/20] Allow selecting cpu to assemble for from cmdline

Select cpu with -c when running the assembler (--mcpu as used by
Espressif's esp32ulp-elf-as also works). The possible values are
'esp32' and 'esp32s2'. (Note esp32s2 also works for the ESP32-S3,
because those two MCUs share the same ULP-FSM binary format).

If no cpu is specified the original 'esp32' will be used as before.
---
 docs/index.rst        |  9 ++++++++-
 esp32_ulp/__init__.py |  8 ++++----
 esp32_ulp/__main__.py | 14 +++++++++++---
 esp32_ulp/assemble.py | 21 +++++++++++++++------
 examples/blink.py     |  2 +-
 examples/counter.py   |  2 +-
 examples/readgpio.py  |  2 +-
 7 files changed, 41 insertions(+), 17 deletions(-)

diff --git a/docs/index.rst b/docs/index.rst
index 314bc68..16d18dc 100644
--- a/docs/index.rst
+++ b/docs/index.rst
@@ -58,6 +58,13 @@ follows:
    cd micropython-esp32-ulp
    micropython -m esp32_ulp path/to/code.S  # this results in path/to/code.ulp
 
+The assembler supports selecting a CPU to assemble for using the ``-c`` option
+(valid cpu's are ``esp32`` and ``esp32s2``):
+
+.. code-block:: shell
+
+   micropython -m esp32_ulp -c esp32s2 path/to/code.S  # assemble for an ESP32-S2
+
 
 More examples
 +++++++++++++
@@ -91,7 +98,7 @@ That file can then be loaded directly without assembling the source again.
    .. code-block:: python
 
       import esp32_ulp
-      esp32_ulp.assemble_file('code.S')  # this results in code.ulp
+      esp32_ulp.assemble_file('code.S', cpu='esp32')  # this results in code.ulp
 
 2. The above prints out the offsets of all global symbols/labels. For the next
    step, you will need to note down the offset of the label, which represents
diff --git a/esp32_ulp/__init__.py b/esp32_ulp/__init__.py
index dddafc8..1536c16 100644
--- a/esp32_ulp/__init__.py
+++ b/esp32_ulp/__init__.py
@@ -6,8 +6,8 @@
 garbage_collect('after import')
 
 
-def src_to_binary(src):
-    assembler = Assembler()
+def src_to_binary(src, cpu):
+    assembler = Assembler(cpu)
     src = preprocess(src)
     assembler.assemble(src, remove_comments=False)  # comments already removed by preprocessor
     garbage_collect('before symbols export')
@@ -19,11 +19,11 @@ def src_to_binary(src):
     return make_binary(text, data, bss_len)
 
 
-def assemble_file(filename):
+def assemble_file(filename, cpu):
     with open(filename) as f:
         src = f.read()
 
-    binary = src_to_binary(src)
+    binary = src_to_binary(src, cpu)
 
     if filename.endswith('.s') or filename.endswith('.S'):
         filename = filename[:-2]
diff --git a/esp32_ulp/__main__.py b/esp32_ulp/__main__.py
index 6f69bea..b28ea78 100644
--- a/esp32_ulp/__main__.py
+++ b/esp32_ulp/__main__.py
@@ -2,10 +2,18 @@
 from . import assemble_file
 
 
-def main(fn):
-    assemble_file(fn)
+def main(fn, cpu):
+    assemble_file(fn, cpu)
 
 
 if __name__ == '__main__':
-    main(sys.argv[1])
+    cpu = 'esp32'
+    filename = sys.argv[1]
+    if len(sys.argv) > 3:
+        if sys.argv[1] in ('-c', '--mcpu'):
+            cpu = sys.argv[2].lower()
+            if cpu not in ('esp32', 'esp32s2'):
+                raise ValueError('Invalid cpu')
+            filename = sys.argv[3]
+    main(filename, cpu)
 
diff --git a/esp32_ulp/assemble.py b/esp32_ulp/assemble.py
index 0ec11ec..7d1101f 100644
--- a/esp32_ulp/assemble.py
+++ b/esp32_ulp/assemble.py
@@ -3,7 +3,6 @@
 """
 
 import re
-from . import opcodes
 from .nocomment import remove_comments as do_remove_comments
 from .util import garbage_collect
 
@@ -88,9 +87,19 @@ def set_global(self, symbol):
 
 class Assembler:
 
-    def __init__(self, symbols=None, bases=None, globals=None):
+    def __init__(self, cpu='esp32', symbols=None, bases=None, globals=None):
+        if cpu == 'esp32':
+            opcode_module = 'opcodes'
+        elif cpu == 'esp32s2':
+            opcode_module = 'opcodes_s2'
+        else:
+            raise ValueError('Invalid cpu')
+
+        relative_import = 1 if '/' in __file__ else 0
+        self.opcodes = __import__(opcode_module, None, None, [], relative_import)
+
         self.symbols = SymbolTable(symbols or {}, bases or {}, globals or {})
-        opcodes.symbols = self.symbols  # XXX dirty hack
+        self.opcodes.symbols = self.symbols  # XXX dirty hack
 
         # regex for parsing assembly lines
         # format: [[whitespace]label:][whitespace][opcode[whitespace arg[,arg...]]]
@@ -223,7 +232,7 @@ def d_align(self, align=4, fill=None):
             self.fill(self.section, amount, fill)
 
     def d_set(self, symbol, expr):
-        value = int(opcodes.eval_arg(expr))
+        value = int(self.opcodes.eval_arg(expr))
         self.symbols.set_sym(symbol, ABS, None, value)
 
     def d_global(self, symbol):
@@ -264,13 +273,13 @@ def assembler_pass(self, lines):
                 else:
                     # machine instruction
                     opcode_lower = opcode.lower()
-                    func = getattr(opcodes, 'i_' + opcode_lower, None)
+                    func = getattr(self.opcodes, 'i_' + opcode_lower, None)
                     if func is not None:
                         if self.a_pass == 1:
                             # during the first pass, symbols are not all known yet.
                             # so we add empty instructions to the section, to determine
                             # section sizes and symbol offsets for pass 2.
-                            result = (0,) * opcodes.no_of_instr(opcode_lower, args)
+                            result = (0,) * self.opcodes.no_of_instr(opcode_lower, args)
                         else:
                             result = func(*args)
 
diff --git a/examples/blink.py b/examples/blink.py
index 165f4e9..1350bc2 100644
--- a/examples/blink.py
+++ b/examples/blink.py
@@ -93,7 +93,7 @@
   halt  # go back to sleep until next wakeup period
 """
 
-binary = src_to_binary(source)
+binary = src_to_binary(source, cpu="esp32")  # cpu is esp32 or esp32s2
 
 load_addr, entry_addr = 0, 8
 
diff --git a/examples/counter.py b/examples/counter.py
index 77fb146..057e66d 100644
--- a/examples/counter.py
+++ b/examples/counter.py
@@ -25,7 +25,7 @@
             halt             # halt ULP co-prozessor (until it gets waked up again)
 """
 
-binary = src_to_binary(source)
+binary = src_to_binary(source, cpu="esp32")  # cpu is esp32 or esp32s2
 
 load_addr, entry_addr = 0, 4
 
diff --git a/examples/readgpio.py b/examples/readgpio.py
index 66c9b05..8ac9436 100644
--- a/examples/readgpio.py
+++ b/examples/readgpio.py
@@ -52,7 +52,7 @@
             halt
 """
 
-binary = src_to_binary(source)
+binary = src_to_binary(source, cpu="esp32")  # cpu is esp32 or esp32s2
 
 load_addr, entry_addr = 0, 4
 

From 4bf6389d2da4a4f992bee67b70bc428d57ec7223 Mon Sep 17 00:00:00 2001
From: Wilko Nienhaus <wilko.nienhaus@gmail.com>
Date: Sun, 9 Jul 2023 22:14:15 +0300
Subject: [PATCH 03/20] Implement ESP32-S2 opcodes

In summary, these are the most important changes:
* the `sub_opcode` field on all opcodes except ST have been shrunk
  from 3 bits down to 2 bits
* the LD and ST opcodes support more variations (new instructions)
* branching instructions have changed. The JUMPR instruction uses
  different comparisons and the JUMPS instruction now implements all
  comparisons in hardware, without requiring multiple instructions
  to emulate any comparison.
* There is no more SLEEP instruction to select a sleep timer. Since
  Espressif chose to simply convert SLEEP instructions into WAIT
  instructions we're doing the same.

Update integration tests to run for both the ESP32 and ESP32-S2.
---
 esp32_ulp/opcodes_s2.py      | 219 +++++++++++++++++++----------------
 tests/01_compat_tests.sh     |  79 +++++++------
 tests/02_compat_rtc_tests.sh | 115 +++++++++---------
 3 files changed, 224 insertions(+), 189 deletions(-)

diff --git a/esp32_ulp/opcodes_s2.py b/esp32_ulp/opcodes_s2.py
index 6910081..dbaeb22 100644
--- a/esp32_ulp/opcodes_s2.py
+++ b/esp32_ulp/opcodes_s2.py
@@ -23,12 +23,16 @@
 RD_REG_PERIPH_RTC_I2C = 3
 
 OPCODE_I2C = 3
+SUB_OPCODE_I2C_RD = 0
+SUB_OPCODE_I2C_WR = 1
 
 OPCODE_DELAY = 4
 
 OPCODE_ADC = 5
 
 OPCODE_ST = 6
+SUB_OPCODE_ST_AUTO = 1
+SUB_OPCODE_ST_OFFSET = 3
 SUB_OPCODE_ST = 4
 
 OPCODE_ALU = 7
@@ -42,24 +46,27 @@
 ALU_SEL_LSH = 5
 ALU_SEL_RSH = 6
 SUB_OPCODE_ALU_CNT = 2
-ALU_SEL_INC = 0
-ALU_SEL_DEC = 1
-ALU_SEL_RST = 2
+ALU_SEL_STAGE_INC = 0
+ALU_SEL_STAGE_DEC = 1
+ALU_SEL_STAGE_RST = 2
 
 OPCODE_BRANCH = 8
 # https://github.com/espressif/binutils-esp32ulp/blob/d61f86f97eda43fc118df30d019fc062aaa6bc8d/include/opcode/esp32ulp_esp32.h#L85
-SUB_OPCODE_BX = 0
-SUB_OPCODE_BR = 1
+SUB_OPCODE_B = 0
+SUB_OPCODE_BX = 1
 SUB_OPCODE_BS = 2
 BX_JUMP_TYPE_DIRECT = 0
 BX_JUMP_TYPE_ZERO = 1
 BX_JUMP_TYPE_OVF = 2
 # https://github.com/espressif/binutils-esp32ulp/blob/d61f86f97eda43fc118df30d019fc062aaa6bc8d/gas/config/tc-esp32ulp.h#L91
-BRCOND_LT = 0
-BRCOND_GE = 1
-BRCOND_LE = 2
-BRCOND_EQ = 3
-BRCOND_GT = 4
+B_CMP_L = 0
+B_CMP_G = 1
+B_CMP_E = 2
+JUMPS_EQ = 4
+JUMPS_GT = 3
+JUMPS_LT = 1
+JUMPS_LE = 5
+JUMPS_GE = 7
 
 OPCODE_END = 9
 SUB_OPCODE_END = 0
@@ -161,7 +168,10 @@ def make_ins(layout):
 _st = make_ins("""
     sreg : 2        # Register which contains data to store
     dreg : 2        # Register which contains address in RTC memory (expressed in words)
-    unused1 : 6     # Unused
+    label : 2       # Data label
+    upper : 1       # Write low (0) or high (1) half-word
+    wr_way : 2      # Write the (0) full-word or with label (1) or without label (3)
+    unused1 : 1     # Unused
     offset : 11     # Offset to add to dreg
     unused2 : 4     # Unused
     sub_opcode : 3  # Sub opcode (SUB_OPCODE_ST)
@@ -173,9 +183,10 @@ def make_ins(layout):
     dreg : 2        # Destination register
     sreg : 2        # Register with operand A
     treg : 2        # Register with operand B
-    unused : 15     # Unused
+    unused1 : 15    # Unused
     sel : 4         # Operation to perform, one of ALU_SEL_xxx
-    sub_opcode : 3  # Sub opcode (SUB_OPCODE_ALU_REG)
+    unused2 : 1     # Unused
+    sub_opcode : 2  # Sub opcode (SUB_OPCODE_ALU_REG)
     opcode : 4      # Opcode (OPCODE_ALU)
 """)
 
@@ -184,9 +195,10 @@ def make_ins(layout):
     dreg : 2        # Destination register
     sreg : 2        # Register with operand A
     imm : 16        # Immediate value of operand B
-    unused : 1      # Unused
+    unused1 : 1     # Unused
     sel : 4         # Operation to perform, one of ALU_SEL_xxx
-    sub_opcode : 3  # Sub opcode (SUB_OPCODE_ALU_IMM)
+    unused2 : 1     # Unused
+    sub_opcode : 2  # Sub opcode (SUB_OPCODE_ALU_IMM)
     opcode : 4      # Opcode (OPCODE_ALU)
 """)
 
@@ -196,7 +208,8 @@ def make_ins(layout):
     imm : 8         # Immediate value (to inc / dec stage counter)
     unused2 : 9     # Unused
     sel : 4         # Operation to perform, one of ALU_SEL_xxx
-    sub_opcode : 3  # Sub opcode (SUB_OPCODE_ALU_CNT)
+    unused3 : 1     # Unused
+    sub_opcode : 2  # Sub opcode (SUB_OPCODE_ALU_CNT)
     opcode : 4      # Opcode (OPCODE_ALU)
 """)
 
@@ -204,20 +217,21 @@ def make_ins(layout):
 _bx = make_ins("""
     dreg : 2        # Register which contains target PC, expressed in words (used if .reg == 1)
     addr : 11       # Target PC, expressed in words (used if .reg == 0)
-    unused : 8      # Unused
+    unused1 : 8     # Unused
     reg : 1         # Target PC in register (1) or immediate (0)
     type : 3        # Jump condition (BX_JUMP_TYPE_xxx)
-    sub_opcode : 3  # Sub opcode (SUB_OPCODE_BX)
+    unused2 : 1     # Unused
+    sub_opcode : 2  # Sub opcode (SUB_OPCODE_BX)
     opcode : 4      # Opcode (OPCODE_BRANCH)
 """)
 
 
-_br = make_ins("""
+_b = make_ins("""
     imm : 16        # Immediate value to compare against
-    cmp : 1         # Comparison to perform: BRCOND_LT or BRCOND_GE
+    cmp : 2         # Comparison to perform: BRCOND_LT or BRCOND_GE
     offset : 7      # Absolute value of target PC offset w.r.t. current PC, expressed in words
     sign : 1        # Sign of target PC offset: 0: positive, 1: negative
-    sub_opcode : 3  # Sub opcode (SUB_OPCODE_BR)
+    sub_opcode : 2  # Sub opcode (SUB_OPCODE_B)
     opcode : 4      # Opcode (OPCODE_BRANCH)
 """)
 
@@ -225,26 +239,18 @@ def make_ins(layout):
 _bs = make_ins("""
     imm : 8         # Immediate value to compare against
     unused : 7      # Unused
-    cmp : 2         # Comparison to perform: BRCOND_LT, GT or EQ
+    cmp : 3         # Comparison to perform: BRCOND_LT, GT or EQ
     offset : 7      # Absolute value of target PC offset w.r.t. current PC, expressed in words
     sign : 1        # Sign of target PC offset: 0: positive, 1: negative
-    sub_opcode : 3  # Sub opcode (SUB_OPCODE_BS)
+    sub_opcode : 2  # Sub opcode (SUB_OPCODE_BS)
     opcode : 4      # Opcode (OPCODE_BRANCH)
 """)
 
 
 _end = make_ins("""
     wakeup : 1      # Set to 1 to wake up chip
-    unused : 24     # Unused
-    sub_opcode : 3  # Sub opcode (SUB_OPCODE_END)
-    opcode : 4      # Opcode (OPCODE_END)
-""")
-
-
-_sleep = make_ins("""
-    cycle_sel : 4   # Select which one of SARADC_ULP_CP_SLEEP_CYCx_REG to get the sleep duration from
-    unused : 21     # Unused
-    sub_opcode : 3  # Sub opcode (SUB_OPCODE_SLEEP)
+    unused : 25     # Unused
+    sub_opcode : 2  # Sub opcode (SUB_OPCODE_END)
     opcode : 4      # Opcode (OPCODE_END)
 """)
 
@@ -260,7 +266,8 @@ def make_ins(layout):
     sreg : 2        # Register which contains address in RTC memory (expressed in words)
     unused1 : 6     # Unused
     offset : 11     # Offset to add to sreg
-    unused2 : 7     # Unused
+    unused2 : 6     # Unused
+    rd_upper : 1    # Read low (0) or high (1) half-word
     opcode : 4      # Opcode (OPCODE_LD)
 """)
 
@@ -460,9 +467,12 @@ def i_adc(reg_dest, adc_idx, mux, _not_used=None):
     return _adc.all
 
 
-def i_st(reg_val, reg_addr, offset):
+def i_st(reg_val, reg_addr, offset): ## FIXME do via i_st_manual
     _st.dreg = get_reg(reg_addr)
     _st.sreg = get_reg(reg_val)
+    _st.label = 0
+    _st.upper = 0
+    _st.wr_way = 3
     _st.unused1 = 0
     _st.offset = get_imm(offset) // 4
     _st.unused2 = 0
@@ -477,12 +487,13 @@ def i_halt():
     return _halt.all
 
 
-def i_ld(reg_dest, reg_addr, offset):
+def i_ld(reg_dest, reg_addr, offset): ## FIXME do via i_ld_manual
     _ld.dreg = get_reg(reg_dest)
     _ld.sreg = get_reg(reg_addr)
     _ld.unused1 = 0
     _ld.offset = get_imm(offset) // 4
     _ld.unused2 = 0
+    _ld.rd_upper = 0
     _ld.opcode = OPCODE_LD
     return _ld.all
 
@@ -495,8 +506,9 @@ def i_move(reg_dest, reg_imm_src):
         _alu_reg.dreg = dest
         _alu_reg.sreg = src.value
         _alu_reg.treg = src.value  # XXX undocumented, this is the value binutils-esp32 uses
-        _alu_reg.unused = 0
+        _alu_reg.unused1 = 0
         _alu_reg.sel = ALU_SEL_MOV
+        _alu_reg.unused2 = 0
         _alu_reg.sub_opcode = SUB_OPCODE_ALU_REG
         _alu_reg.opcode = OPCODE_ALU
         return _alu_reg.all
@@ -504,8 +516,9 @@ def i_move(reg_dest, reg_imm_src):
         _alu_imm.dreg = dest
         _alu_imm.sreg = 0
         _alu_imm.imm = get_abs(src)
-        _alu_imm.unused = 0
+        _alu_imm.unused1 = 0
         _alu_imm.sel = ALU_SEL_MOV
+        _alu_imm.unused2 = 0
         _alu_imm.sub_opcode = SUB_OPCODE_ALU_IMM
         _alu_imm.opcode = OPCODE_ALU
         return _alu_imm.all
@@ -523,8 +536,9 @@ def _alu3(reg_dest, reg_src1, reg_imm_src2, alu_sel):
         _alu_reg.dreg = dest
         _alu_reg.sreg = src1
         _alu_reg.treg = src2.value
-        _alu_reg.unused = 0
+        _alu_reg.unused1 = 0
         _alu_reg.sel = alu_sel
+        _alu_reg.unused2 = 0
         _alu_reg.sub_opcode = SUB_OPCODE_ALU_REG
         _alu_reg.opcode = OPCODE_ALU
         return _alu_reg.all
@@ -532,8 +546,9 @@ def _alu3(reg_dest, reg_src1, reg_imm_src2, alu_sel):
         _alu_imm.dreg = dest
         _alu_imm.sreg = src1
         _alu_imm.imm = get_abs(src2)
-        _alu_imm.unused = 0
+        _alu_imm.unused1 = 0
         _alu_imm.sel = alu_sel
+        _alu_imm.unused2 = 0
         _alu_imm.sub_opcode = SUB_OPCODE_ALU_IMM
         _alu_imm.opcode = OPCODE_ALU
         return _alu_imm.all
@@ -579,15 +594,15 @@ def _alu_stage(imm, alu_sel):
 
 
 def i_stage_inc(imm):
-    return _alu_stage(imm, ALU_SEL_INC)
+    return _alu_stage(imm, ALU_SEL_STAGE_INC)
 
 
 def i_stage_dec(imm):
-    return _alu_stage(imm, ALU_SEL_DEC)
+    return _alu_stage(imm, ALU_SEL_STAGE_DEC)
 
 
 def i_stage_rst():
-    return _alu_stage('0', ALU_SEL_RST)
+    return _alu_stage('0', ALU_SEL_STAGE_RST)
 
 
 def i_wake():
@@ -598,12 +613,11 @@ def i_wake():
     return _end.all
 
 
-def i_sleep(timer_idx):
-    _sleep.cycle_sel = get_imm(timer_idx)
-    _sleep.unused = 0
-    _sleep.sub_opcode = SUB_OPCODE_SLEEP
-    _sleep.opcode = OPCODE_END
-    return _sleep.all
+# NOTE: Technically the S2 no longer has the SLEEP instruction, but
+# we're keeping it, since esp32ulp-elf-as happily assembles it.
+# It's now emitted as a WAIT so we'll do the same.
+def i_sleep(cycles):
+    return i_wait(cycles)
 
 
 def i_jump(target, condition='--'):
@@ -621,18 +635,20 @@ def i_jump(target, condition='--'):
         _bx.dreg = 0
         # we track label addresses in 32bit words, but immediate values are in bytes and need to get divided by 4.
         _bx.addr = get_abs(target) if target.type == SYM else get_abs(target) >> 2  # bitwise version of "// 4"
-        _bx.unused = 0
+        _bx.unused1 = 0
         _bx.reg = 0
         _bx.type = jump_type
         _bx.sub_opcode = SUB_OPCODE_BX
+        _bx.unused2 = 0
         _bx.opcode = OPCODE_BRANCH
         return _bx.all
     if target.type == REG:
         _bx.dreg = target.value
         _bx.addr = 0
-        _bx.unused = 0
+        _bx.unused1 = 0
         _bx.reg = 1
         _bx.type = jump_type
+        _bx.unused2 = 0
         _bx.sub_opcode = SUB_OPCODE_BX
         _bx.opcode = OPCODE_BRANCH
         return _bx.all
@@ -641,42 +657,45 @@ def i_jump(target, condition='--'):
 
 def _jump_relr(threshold, cond, offset):
     """
-    Equivalent of I_JUMP_RELR macro in binutils-esp32ulp
+    Equivalent of I_JUMP_RELR macro in binutils-gdb esp32ulp
     """
-    _br.imm = threshold
-    _br.cmp = cond
-    _br.offset = abs(offset)
-    _br.sign = 0 if offset >= 0 else 1
-    _br.sub_opcode = SUB_OPCODE_BR
-    _br.opcode = OPCODE_BRANCH
-    return _br.all
+    _b.imm = threshold
+    _b.cmp = cond
+    _b.offset = abs(offset)
+    _b.sign = 0 if offset >= 0 else 1
+    _b.sub_opcode = SUB_OPCODE_B
+    _b.opcode = OPCODE_BRANCH
+    return _b.all
 
 
 def i_jumpr(offset, threshold, condition):
     offset_type, offset = get_rel(offset)
     threshold = get_imm(threshold)
     condition = get_cond(condition)
-    if condition == 'lt':
-        cmp_op = BRCOND_LT
-    elif condition == 'ge':
-        cmp_op = BRCOND_GE
-    elif condition == 'le':  # le == lt(threshold+1)
-        threshold += 1
-        cmp_op = BRCOND_LT
-    elif condition == 'gt':  # gt == ge(threshold+1)
-        threshold += 1
-        cmp_op = BRCOND_GE
-    elif condition == 'eq':  # eq == ge(threshold) but not ge(threshold+1)
-        # jump over next JUMPR
-        skip_ins = _jump_relr(threshold + 1, BRCOND_GE, 2)
+    if condition in ('le', 'ge'):
+        if condition == 'le':
+            cmp_op = B_CMP_L
+        elif condition == 'ge':
+            cmp_op = B_CMP_G
+
         # jump to target
+        first_ins = _jump_relr(threshold, cmp_op, offset)
+
+        # jump over prev JUMPR
         if (offset_type == IMM and offset < 0) or offset_type == SYM:
             # adjust for the additional JUMPR instruction
             # for IMM offsets, the offset is relative to the 2nd instruction, so only backwards jumps need adjusting
             # for SYM offsets, label offsets already include the extra instruction, so both directions need adjusting
             offset -= 1
-        jump_ins = _jump_relr(threshold, BRCOND_GE, offset)
-        return (skip_ins, jump_ins)
+        second_ins = _jump_relr(threshold, B_CMP_E, offset)
+        return (first_ins, second_ins)
+
+    elif condition == 'lt':
+        cmp_op = B_CMP_L
+    elif condition == 'gt':
+        cmp_op = B_CMP_G
+    elif condition == 'eq':
+        cmp_op = B_CMP_E
     else:
         raise ValueError("invalid comparison condition")
     return _jump_relr(threshold, cmp_op, offset)
@@ -684,7 +703,7 @@ def i_jumpr(offset, threshold, condition):
 
 def _jump_rels(threshold, cond, offset):
     """
-    Equivalent of I_JUMP_RELS macro in binutils-esp32ulp
+    Equivalent of I_JUMP_RELS macro in binutils-gdb esp32ulp
     """
     _bs.imm = threshold
     _bs.cmp = cond
@@ -697,43 +716,41 @@ def _jump_rels(threshold, cond, offset):
 
 def i_jumps(offset, threshold, condition):
     offset_type, offset = get_rel(offset)
+    if (offset_type == IMM):
+        # This makes our assembler behave exactly like binutils-gdb, even
+        # though its behaviour is incorrect. binutils-gdb does not divide
+        # immediate offsets by 4 (i.e. it does not convert bytes to words)
+        # for JUMPS instructions, even though it does so for all other JUMP
+        # instructions, and even though the assembler for the original
+        # ESP32 divides immediate offsets by 4 for JUMPS instructions.
+        #
+        # The issue is reported as a pull-request with a fix here:
+        # https://github.com/espressif/binutils-gdb/pull/1
+        #
+        # Once the issue is fixed in binutils-gdb, this code here should be
+        # removed.
+        offset = offset << 2  # bug in binutils-gdb
+
     threshold = get_imm(threshold)
     condition = get_cond(condition)
     if condition == 'lt':
-        cmp_op = BRCOND_LT
+        cmp_op = JUMPS_LT
     elif condition == 'le':
-        cmp_op = BRCOND_LE
+        cmp_op = JUMPS_LE
     elif condition == 'ge':
-        cmp_op = BRCOND_GE
-    elif condition in ('eq', 'gt'):
-        if condition == 'eq':  # eq == le but not lt
-            skip_cond = BRCOND_LT
-            jump_cond = BRCOND_LE
-        elif condition == 'gt':  # gt == ge but not le
-            skip_cond = BRCOND_LE
-            jump_cond = BRCOND_GE
-
-        # jump over next JUMPS
-        skip_ins = _jump_rels(threshold, skip_cond, 2)
-        # jump to target
-        if (offset_type == IMM and offset < 0) or offset_type == SYM:
-            # adjust for the additional JUMPS instruction
-            # for IMM offsets, the offset is relative to the 2nd instruction, so only backwards jumps need adjusting
-            # for SYM offsets, label offsets already include the extra instruction, so both directions need adjusting
-            offset -= 1
-        jump_ins = _jump_rels(threshold, jump_cond, offset)
-
-        return (skip_ins, jump_ins)
+        cmp_op = JUMPS_GE
+    elif condition == 'eq':
+        cmp_op = JUMPS_EQ
+    elif condition == 'gt':
+        cmp_op = JUMPS_GT
     else:
         raise ValueError("invalid comparison condition")
+
     return _jump_rels(threshold, cmp_op, offset)
 
 
 def no_of_instr(opcode, args):
-    if opcode == 'jumpr' and get_cond(args[2]) == 'eq':
-        return 2
-
-    if opcode == 'jumps' and get_cond(args[2]) in ('eq', 'gt'):
+    if opcode == 'jumpr' and get_cond(args[2]) in ('le', 'ge'):
         return 2
 
     return 1
diff --git a/tests/01_compat_tests.sh b/tests/01_compat_tests.sh
index 4beb2b1..745e8e0 100755
--- a/tests/01_compat_tests.sh
+++ b/tests/01_compat_tests.sh
@@ -10,38 +10,47 @@ calc_file_hash() {
     shasum < $1 | cut -d' ' -f1
 }
 
-for src_file in $(ls -1 compat/*.S); do
-    src_name="${src_file%.S}"
-    
-    echo "Testing $src_file"
-    echo -e "\tBuilding using micropython-esp32-ulp"
-    ulp_file="${src_name}.ulp"
-    log_file="${src_name}.log"
-    micropython -m esp32_ulp $src_file 1>$log_file   # generates $ulp_file
-
-    pre_file="${src_name}.pre"
-    obj_file="${src_name}.o"
-    elf_file="${src_name}.elf"
-    bin_file="${src_name}.bin"
-
-    echo -e "\tBuilding using binutils"
-    gcc -E -o ${pre_file} $src_file
-    esp32ulp-elf-as --mcpu=esp32 -o $obj_file ${pre_file}
-    esp32ulp-elf-ld -T esp32.ulp.ld -o $elf_file $obj_file
-    esp32ulp-elf-objcopy -O binary $elf_file $bin_file
-
-    if ! diff $ulp_file $bin_file 1>/dev/null; then
-        echo -e "\tBuild outputs differ!"
-        echo ""
-        echo "Compatibility test failed for $src_file"
-        echo "micropython-esp32-ulp log:"
-        cat $log_file
-        echo "micropython-esp32-ulp output:"
-        xxd $ulp_file
-        echo "binutils output:"
-        xxd $bin_file
-        exit 1
-    else
-        echo -e "\tBuild outputs match (sha1: $(calc_file_hash $ulp_file))"
-    fi
-done
+run_tests_for_cpu() {
+    local cpu=$1
+    echo "Testing for CPU: $cpu"
+
+    for src_file in $(ls -1 compat/*.S); do
+        src_name="${src_file%.S}"
+
+        echo "Testing $src_file"
+        echo -e "\tBuilding using micropython-esp32-ulp ($cpu)"
+        ulp_file="${src_name}.ulp"
+        log_file="${src_name}.log"
+        micropython -m esp32_ulp -c $cpu $src_file 1>$log_file   # generates $ulp_file
+
+        pre_file="${src_name}.pre"
+        obj_file="${src_name}.o"
+        elf_file="${src_name}.elf"
+        bin_file="${src_name}.bin"
+
+        echo -e "\tBuilding using binutils ($cpu)"
+        gcc -E -o ${pre_file} $src_file
+        esp32ulp-elf-as --mcpu=$cpu -o $obj_file ${pre_file}
+        esp32ulp-elf-ld -T esp32.ulp.ld -o $elf_file $obj_file
+        esp32ulp-elf-objcopy -O binary $elf_file $bin_file
+
+        if ! diff $ulp_file $bin_file 1>/dev/null; then
+            echo -e "\tBuild outputs differ!"
+            echo ""
+            echo "Compatibility test failed for $src_file"
+            echo "micropython-esp32-ulp log:"
+            cat $log_file
+            echo "micropython-esp32-ulp output:"
+            xxd $ulp_file
+            echo "binutils output:"
+            xxd $bin_file
+            exit 1
+        else
+            echo -e "\tBuild outputs match (sha1: $(calc_file_hash $ulp_file))"
+        fi
+    done
+    echo ""
+}
+
+run_tests_for_cpu esp32
+run_tests_for_cpu esp32s2
diff --git a/tests/02_compat_rtc_tests.sh b/tests/02_compat_rtc_tests.sh
index 1cfbe7c..9ad24b1 100755
--- a/tests/02_compat_rtc_tests.sh
+++ b/tests/02_compat_rtc_tests.sh
@@ -100,63 +100,72 @@ fetch_ulptool_examples
 fetch_binutils_esp32ulp_examples
 build_defines_db $1
 
-for src_file in ulptool/src/ulp_examples/*/*.s binutils-gdb/gas/testsuite/gas/esp32ulp/esp32/*.s; do
+run_tests_for_cpu() {
+    local cpu=$1
+    echo "Testing for CPU: $cpu"
 
-    src_name="${src_file%.s}"
-    src_dir="${src_name%/*}"
+    for src_file in ulptool/src/ulp_examples/*/*.s binutils-gdb/gas/testsuite/gas/esp32ulp/esp32/*.s; do
 
-    echo "Testing $src_file"
+        src_name="${src_file%.s}"
+        src_dir="${src_name%/*}"
 
-    test_name="${src_name##*/}"
+        echo "Testing $src_file"
 
-    # for now, skip files that contain unsupported things (macros)
-    for I in i2c i2c_dev stack i2c_wr test1 test_jumpr test_macro; do
-        if [ "${test_name}" = "$I" ]; then
-            echo -e "\tSkipping... not yet supported"
-            continue 2
+        test_name="${src_name##*/}"
+
+        # for now, skip files that contain unsupported things (macros)
+        for I in i2c i2c_dev stack i2c_wr test1 test_jumpr test_macro; do
+            if [ "${test_name}" = "$I" ]; then
+                echo -e "\tSkipping... not yet supported"
+                continue 2
+            fi
+        done
+
+        # BEGIN: work around known issues with binutils-gdb (esp32ulp)
+        ulp_file="${src_name}.ulp"
+
+        if patch_test ${test_name}; then
+            # switch to the patched file instead of original one
+            src_file="${src_dir}/${test_name}.tmp"
+            src_name="${src_file%.tmp}"
+            ulp_file="${src_name}.tmp.ulp"  # when extension is not .s, micropython-esp32-ulp doesn't remove original extension
+        fi
+        # END: work around known issues with binutils-gdb (esp32ulp)
+
+        echo -e "\tBuilding using micropython-esp32-ulp ($cpu)"
+        log_file="${src_name}.log"
+        micropython -m esp32_ulp -c $cpu $src_file 1>$log_file   # generates $ulp_file
+
+        pre_file="${src_name}.pre"
+        obj_file="${src_name}.o"
+        elf_file="${src_name}.elf"
+        bin_file="${src_name}.bin"
+
+        echo -e "\tBuilding using binutils ($cpu)"
+        gcc -I esp-idf/components/soc/esp32/include -I esp-idf/components/esp_common/include \
+            -x assembler-with-cpp \
+            -E -o ${pre_file} $src_file
+        esp32ulp-elf-as --mcpu=$cpu -o $obj_file ${pre_file}
+        esp32ulp-elf-ld -T esp32.ulp.ld -o $elf_file $obj_file
+        esp32ulp-elf-objcopy -O binary $elf_file $bin_file
+
+        if ! diff $ulp_file $bin_file 1>/dev/null; then
+            echo -e "\tBuild outputs differ!"
+            echo ""
+            echo "Compatibility test failed for $src_file"
+            echo "micropython-esp32-ulp log:"
+            cat $log_file
+            echo "micropython-esp32-ulp output:"
+            xxd $ulp_file
+            echo "binutils output:"
+            xxd $bin_file
+            exit 1
+        else
+            echo -e "\tBuild outputs match (sha1: $(calc_file_hash $ulp_file))"
         fi
     done
+    echo ""
+}
 
-    # BEGIN: work around known issues with binutils-gdb (esp32ulp)
-    ulp_file="${src_name}.ulp"
-
-    if patch_test ${test_name}; then
-        # switch to the patched file instead of original one
-        src_file="${src_dir}/${test_name}.tmp"
-        src_name="${src_file%.tmp}"
-        ulp_file="${src_name}.tmp.ulp"  # when extension is not .s, micropython-esp32-ulp doesn't remove original extension
-    fi
-    # END: work around known issues with binutils-gdb (esp32ulp)
-
-    echo -e "\tBuilding using micropython-esp32-ulp"
-    log_file="${src_name}.log"
-    micropython -m esp32_ulp $src_file 1>$log_file   # generates $ulp_file
-
-    pre_file="${src_name}.pre"
-    obj_file="${src_name}.o"
-    elf_file="${src_name}.elf"
-    bin_file="${src_name}.bin"
-
-    echo -e "\tBuilding using binutils"
-    gcc -I esp-idf/components/soc/esp32/include -I esp-idf/components/esp_common/include \
-        -x assembler-with-cpp \
-        -E -o ${pre_file} $src_file
-    esp32ulp-elf-as --mcpu=esp32 -o $obj_file ${pre_file}
-    esp32ulp-elf-ld -T esp32.ulp.ld -o $elf_file $obj_file
-    esp32ulp-elf-objcopy -O binary $elf_file $bin_file
-
-    if ! diff $ulp_file $bin_file 1>/dev/null; then
-        echo -e "\tBuild outputs differ!"
-        echo ""
-        echo "Compatibility test failed for $src_file"
-        echo "micropython-esp32-ulp log:"
-        cat $log_file
-        echo "micropython-esp32-ulp output:"
-        xxd $ulp_file
-        echo "binutils output:"
-        xxd $bin_file
-        exit 1
-    else
-        echo -e "\tBuild outputs match (sha1: $(calc_file_hash $ulp_file))"
-    fi
-done
+run_tests_for_cpu esp32
+run_tests_for_cpu esp32s2

From 2e69c12a5e977910fc95e6342e5267de7d39ec9d Mon Sep 17 00:00:00 2001
From: Wilko Nienhaus <wilko.nienhaus@gmail.com>
Date: Tue, 11 Jul 2023 09:13:51 +0300
Subject: [PATCH 04/20] Move instruction decoding out of disassembler

The disassembler is now mainly the command line tool, which deals
with interpreting user input, formatting output, etc.

This allows us to add decoding logic for new cpus (the S2) and the
disassembler can then dynamically load the correct decoding module.
---
 tests/00_unit_tests.sh              |   2 +-
 tests/03_disassembler_tests.sh      |   4 +-
 tests/{disassemble.py => decode.py} |   2 +-
 tools/decode.py                     | 149 +++++++++++++++++++++++++++
 tools/disassemble.py                | 153 +---------------------------
 5 files changed, 154 insertions(+), 156 deletions(-)
 rename tests/{disassemble.py => decode.py} (99%)
 create mode 100644 tools/decode.py

diff --git a/tests/00_unit_tests.sh b/tests/00_unit_tests.sh
index a570553..0ae2bad 100755
--- a/tests/00_unit_tests.sh
+++ b/tests/00_unit_tests.sh
@@ -4,7 +4,7 @@
 
 set -e
 
-LIST=${1:-opcodes opcodes_s2 assemble link util preprocess definesdb disassemble}
+LIST=${1:-opcodes opcodes_s2 assemble link util preprocess definesdb decode}
 
 for file in $LIST; do
     echo testing $file...
diff --git a/tests/03_disassembler_tests.sh b/tests/03_disassembler_tests.sh
index 7c76f11..579909e 100755
--- a/tests/03_disassembler_tests.sh
+++ b/tests/03_disassembler_tests.sh
@@ -22,7 +22,7 @@ test_disassembling_a_file() {
     lst_file="${testname}.lst"
     lst_file_fixture=fixtures/${testname}${verbose}.lst
     echo -e "\tDisassembling $ulp_file using micropython-esp32-ulp disassembler"
-    micropython tools/disassemble.py $verbose $ulp_file > $lst_file
+    micropython -m tools.disassemble $verbose $ulp_file > $lst_file
 
     if ! diff $lst_file_fixture $lst_file 1>/dev/null; then
         echo -e "\tDisassembled output differs from expected output!"
@@ -49,7 +49,7 @@ test_disassembling_a_manual_sequence() {
     lst_file="manual_bytes.lst"
     lst_file_fixture=fixtures/manual_bytes${verbose}.lst
     echo -e "\tDisassembling manual byte sequence using micropython-esp32-ulp disassembler"
-    micropython tools/disassemble.py $verbose -m $sequence > $lst_file
+    micropython -m tools.disassemble $verbose -m $sequence > $lst_file
 
     if ! diff $lst_file_fixture $lst_file 1>/dev/null; then
         echo -e "\tDisassembled output differs from expected output!"
diff --git a/tests/disassemble.py b/tests/decode.py
similarity index 99%
rename from tests/disassemble.py
rename to tests/decode.py
index 7e7f6df..916bb95 100644
--- a/tests/disassemble.py
+++ b/tests/decode.py
@@ -1,4 +1,4 @@
-from tools.disassemble import decode_instruction, get_instruction_fields
+from tools.decode import decode_instruction, get_instruction_fields
 import esp32_ulp.opcodes as opcodes
 import ubinascii
 
diff --git a/tools/decode.py b/tools/decode.py
new file mode 100644
index 0000000..3aaf095
--- /dev/null
+++ b/tools/decode.py
@@ -0,0 +1,149 @@
+import esp32_ulp.opcodes as opcodes
+
+
+alu_cnt_ops = ('STAGE_INC', 'STAGE_DEC', 'STAGE_RST')
+alu_ops = ('ADD', 'SUB', 'AND', 'OR', 'MOVE', 'LSH', 'RSH')
+jump_types = ('--', 'EQ', 'OV')
+cmp_ops = ('LT', 'GE', 'LE', 'EQ', 'GT')
+
+lookup = {
+    opcodes.OPCODE_ADC: ('ADC', opcodes._adc, lambda op: 'ADC r%s, %s, %s' % (op.dreg, op.mux, op.sar_sel)),
+    opcodes.OPCODE_ALU: ('ALU', opcodes._alu_imm, {
+        opcodes.SUB_OPCODE_ALU_CNT: (
+            'ALU_CNT',
+            opcodes._alu_cnt,
+            lambda op: '%s%s' % (alu_cnt_ops[op.sel], '' if op.sel == opcodes.ALU_SEL_RST else ' %s' % op.imm)
+        ),
+        opcodes.SUB_OPCODE_ALU_IMM: (
+            'ALU_IMM',
+            opcodes._alu_imm,
+            lambda op: '%s r%s, %s' % (alu_ops[op.sel], op.dreg, op.imm) if op.sel == opcodes.ALU_SEL_MOV
+                else '%s r%s, r%s, %s' % (alu_ops[op.sel], op.dreg, op.sreg, op.imm)
+        ),
+        opcodes.SUB_OPCODE_ALU_REG: (
+            'ALU_REG',
+            opcodes._alu_reg,
+            lambda op: '%s r%s, r%s' % (alu_ops[op.sel], op.dreg, op.sreg) if op.sel == opcodes.ALU_SEL_MOV
+                else '%s r%s, r%s, r%s' % (alu_ops[op.sel], op.dreg, op.sreg, op.treg)
+        ),
+    }),
+    opcodes.OPCODE_BRANCH: ('BRANCH', opcodes._bx, {
+        opcodes.SUB_OPCODE_BX: (
+            'BX',
+            opcodes._bx,
+            lambda op: 'JUMP %s%s' % (op.addr if op.reg == 0 else 'r%s' % op.dreg, ', %s' % jump_types[op.type]
+                if op.type != 0 else '')
+        ),
+        opcodes.SUB_OPCODE_BR: (
+            'BR',
+            opcodes._br,
+            lambda op: 'JUMPR %s, %s, %s' % ('%s%s' % ('-' if op.sign == 1 else '', op.offset), op.imm, cmp_ops[op.cmp])
+        ),
+        opcodes.SUB_OPCODE_BS: (
+            'BS',
+            opcodes._bs,
+            lambda op: 'JUMPS %s, %s, %s' % ('%s%s' % ('-' if op.sign == 1 else '', op.offset), op.imm, cmp_ops[op.cmp])
+        ),
+    }),
+    opcodes.OPCODE_DELAY: (
+        'DELAY',
+        opcodes._delay,
+        lambda op: 'NOP' if op.cycles == 0 else 'WAIT %s' % op.cycles
+    ),
+    opcodes.OPCODE_END: ('END', opcodes._end, {
+        opcodes.SUB_OPCODE_END: (
+            'WAKE',
+            opcodes._end
+        ),
+        opcodes.SUB_OPCODE_SLEEP: (
+            'SLEEP',
+            opcodes._sleep,
+            lambda op: 'SLEEP %s' % op.cycle_sel
+        ),
+    }),
+    opcodes.OPCODE_HALT: ('HALT', opcodes._halt),
+    opcodes.OPCODE_I2C: (
+        'I2C',
+        opcodes._i2c,
+        lambda op: 'I2C_%s %s, %s, %s, %s' % ('RD' if op.rw == 0 else 'WR', op.sub_addr, op.high, op.low, op.i2c_sel)
+    ),
+    opcodes.OPCODE_LD: ('LD', opcodes._ld, lambda op: 'LD r%s, r%s, %s' % (op.dreg, op.sreg, op.offset)),
+    opcodes.OPCODE_ST: ('ST', opcodes._st, lambda op: 'ST r%s, r%s, %s' % (op.sreg, op.dreg, op.offset)),
+    opcodes.OPCODE_RD_REG: (
+        'RD_REG',
+        opcodes._rd_reg,
+        lambda op: 'REG_RD 0x%x, %s, %s' % (op.periph_sel << 8 | op.addr, op.high, op.low)
+    ),
+    opcodes.OPCODE_WR_REG: (
+        'WR_REG',
+        opcodes._wr_reg,
+        lambda op: 'REG_WR 0x%x, %s, %s, %s' % (op.periph_sel << 8 | op.addr, op.high, op.low, op.data)
+    ),
+    opcodes.OPCODE_TSENS: ('TSENS', opcodes._tsens, lambda op: 'TSENS r%s, %s' % (op.dreg, op.delay)),
+}
+
+
+def decode_instruction(i):
+    if i == 0:
+        raise Exception('<empty>')
+
+    ins = opcodes._end
+    ins.all = i  # abuse a struct to get opcode
+
+    params = lookup.get(ins.opcode, None)
+
+    if not params:
+        raise Exception('Unknown instruction')
+
+    if len(params) == 3:
+        name, ins, third = params
+        ins.all = i
+
+        if callable(third):
+            params = (third(ins), ins)
+        else:
+            params = third.get(ins.sub_opcode, ())
+
+    if len(params) == 3:
+        name, ins, pretty = params
+        ins.all = i
+        name = pretty(ins)
+    else:
+        name, ins = params
+        ins.all = i
+
+    return ins, name
+
+
+def get_instruction_fields(ins):
+    possible_fields = (
+        'addr', 'cmp', 'cycle_sel', 'cycles', 'data', 'delay', 'dreg',
+        'high', 'i2c_sel', 'imm', 'low', 'mux', 'offset', 'opcode',
+        'periph_sel', 'reg', 'rw', 'sar_sel', 'sel', 'sign', 'sreg',
+        'sub_addr', 'sub_opcode', 'treg', 'type', 'unused', 'unused1',
+        'unused2', 'wakeup'
+    )
+    field_details = []
+    for field in possible_fields:
+        extra = ''
+        try:
+            # eval is ugly but constrained to possible_fields and variable ins
+            val = eval('i.%s' % field, {}, {'i': ins})
+            if (val>9):
+                extra = ' (0x%02x)' % val
+        except KeyError:
+            continue
+
+        if field == 'sel':  # ALU
+            if ins.sub_opcode == opcodes.SUB_OPCODE_ALU_CNT:
+                extra = ' (%s)' % alu_cnt_ops[val]
+            else:
+                extra = ' (%s)' % alu_ops[val]
+        elif field == 'type':  # JUMP
+            extra = ' (%s)' % jump_types[val]
+        elif field == 'cmp':  # JUMPR/JUMPS
+            extra = ' (%s)' % cmp_ops[val]
+
+        field_details.append((field, val, extra))
+
+    return field_details
diff --git a/tools/disassemble.py b/tools/disassemble.py
index f647576..c791b3a 100644
--- a/tools/disassemble.py
+++ b/tools/disassemble.py
@@ -1,160 +1,9 @@
 from uctypes import struct, addressof, LITTLE_ENDIAN, UINT16, UINT32
-from esp32_ulp.opcodes import RD_REG_PERIPH_RTC_CNTL, RD_REG_PERIPH_RTC_IO, RD_REG_PERIPH_RTC_I2C, \
-    RD_REG_PERIPH_SENS, DR_REG_MAX_DIRECT
-import esp32_ulp.opcodes as opcodes
-import esp32_ulp.soc as soc
+from .decode import decode_instruction, get_instruction_fields
 import ubinascii
 import sys
 
 
-alu_cnt_ops = ('STAGE_INC', 'STAGE_DEC', 'STAGE_RST')
-alu_ops = ('ADD', 'SUB', 'AND', 'OR', 'MOVE', 'LSH', 'RSH')
-jump_types = ('--', 'EQ', 'OV')
-cmp_ops = ('LT', 'GE', 'LE', 'EQ', 'GT')
-
-lookup = {
-    opcodes.OPCODE_ADC: ('ADC', opcodes._adc, lambda op: 'ADC r%s, %s, %s' % (op.dreg, op.mux, op.sar_sel)),
-    opcodes.OPCODE_ALU: ('ALU', opcodes._alu_imm, {
-        opcodes.SUB_OPCODE_ALU_CNT: (
-            'ALU_CNT',
-            opcodes._alu_cnt,
-            lambda op: '%s%s' % (alu_cnt_ops[op.sel], '' if op.sel == opcodes.ALU_SEL_RST else ' %s' % op.imm)
-        ),
-        opcodes.SUB_OPCODE_ALU_IMM: (
-            'ALU_IMM',
-            opcodes._alu_imm,
-            lambda op: '%s r%s, %s' % (alu_ops[op.sel], op.dreg, op.imm) if op.sel == opcodes.ALU_SEL_MOV
-                else '%s r%s, r%s, %s' % (alu_ops[op.sel], op.dreg, op.sreg, op.imm)
-        ),
-        opcodes.SUB_OPCODE_ALU_REG: (
-            'ALU_REG',
-            opcodes._alu_reg,
-            lambda op: '%s r%s, r%s' % (alu_ops[op.sel], op.dreg, op.sreg) if op.sel == opcodes.ALU_SEL_MOV
-                else '%s r%s, r%s, r%s' % (alu_ops[op.sel], op.dreg, op.sreg, op.treg)
-        ),
-    }),
-    opcodes.OPCODE_BRANCH: ('BRANCH', opcodes._bx, {
-        opcodes.SUB_OPCODE_BX: (
-            'BX',
-            opcodes._bx,
-            lambda op: 'JUMP %s%s' % (op.addr if op.reg == 0 else 'r%s' % op.dreg, ', %s' % jump_types[op.type]
-                if op.type != 0 else '')
-        ),
-        opcodes.SUB_OPCODE_BR: (
-            'BR',
-            opcodes._br,
-            lambda op: 'JUMPR %s, %s, %s' % ('%s%s' % ('-' if op.sign == 1 else '', op.offset), op.imm, cmp_ops[op.cmp])
-        ),
-        opcodes.SUB_OPCODE_BS: (
-            'BS',
-            opcodes._bs,
-            lambda op: 'JUMPS %s, %s, %s' % ('%s%s' % ('-' if op.sign == 1 else '', op.offset), op.imm, cmp_ops[op.cmp])
-        ),
-    }),
-    opcodes.OPCODE_DELAY: (
-        'DELAY',
-        opcodes._delay,
-        lambda op: 'NOP' if op.cycles == 0 else 'WAIT %s' % op.cycles
-    ),
-    opcodes.OPCODE_END: ('END', opcodes._end, {
-        opcodes.SUB_OPCODE_END: (
-            'WAKE',
-            opcodes._end
-        ),
-        opcodes.SUB_OPCODE_SLEEP: (
-            'SLEEP',
-            opcodes._sleep,
-            lambda op: 'SLEEP %s' % op.cycle_sel
-        ),
-    }),
-    opcodes.OPCODE_HALT: ('HALT', opcodes._halt),
-    opcodes.OPCODE_I2C: (
-        'I2C',
-        opcodes._i2c,
-        lambda op: 'I2C_%s %s, %s, %s, %s' % ('RD' if op.rw == 0 else 'WR', op.sub_addr, op.high, op.low, op.i2c_sel)
-    ),
-    opcodes.OPCODE_LD: ('LD', opcodes._ld, lambda op: 'LD r%s, r%s, %s' % (op.dreg, op.sreg, op.offset)),
-    opcodes.OPCODE_ST: ('ST', opcodes._st, lambda op: 'ST r%s, r%s, %s' % (op.sreg, op.dreg, op.offset)),
-    opcodes.OPCODE_RD_REG: (
-        'RD_REG',
-        opcodes._rd_reg,
-        lambda op: 'REG_RD 0x%x, %s, %s' % (op.periph_sel << 8 | op.addr, op.high, op.low)
-    ),
-    opcodes.OPCODE_WR_REG: (
-        'WR_REG',
-        opcodes._wr_reg,
-        lambda op: 'REG_WR 0x%x, %s, %s, %s' % (op.periph_sel << 8 | op.addr, op.high, op.low, op.data)
-    ),
-    opcodes.OPCODE_TSENS: ('TSENS', opcodes._tsens, lambda op: 'TSENS r%s, %s' % (op.dreg, op.delay)),
-}
-
-
-def decode_instruction(i):
-    if i == 0:
-        raise Exception('<empty>')
-
-    ins = opcodes._end
-    ins.all = i  # abuse a struct to get opcode
-
-    params = lookup.get(ins.opcode, None)
-
-    if not params:
-        raise Exception('Unknown instruction')
-
-    if len(params) == 3:
-        name, ins, third = params
-        ins.all = i
-
-        if callable(third):
-            params = (third(ins), ins)
-        else:
-            params = third.get(ins.sub_opcode, ())
-
-    if len(params) == 3:
-        name, ins, pretty = params
-        ins.all = i
-        name = pretty(ins)
-    else:
-        name, ins = params
-        ins.all = i
-
-    return ins, name
-
-
-def get_instruction_fields(ins):
-    possible_fields = (
-        'addr', 'cmp', 'cycle_sel', 'cycles', 'data', 'delay', 'dreg',
-        'high', 'i2c_sel', 'imm', 'low', 'mux', 'offset', 'opcode',
-        'periph_sel', 'reg', 'rw', 'sar_sel', 'sel', 'sign', 'sreg',
-        'sub_addr', 'sub_opcode', 'treg', 'type', 'unused', 'unused1',
-        'unused2', 'wakeup'
-    )
-    field_details = []
-    for field in possible_fields:
-        extra = ''
-        try:
-            # eval is ugly but constrained to possible_fields and variable ins
-            val = eval('i.%s' % field, {}, {'i': ins})
-            if (val>9):
-                extra = ' (0x%02x)' % val
-        except KeyError:
-            continue
-
-        if field == 'sel':  # ALU
-            if ins.sub_opcode == opcodes.SUB_OPCODE_ALU_CNT:
-                extra = ' (%s)' % alu_cnt_ops[val]
-            else:
-                extra = ' (%s)' % alu_ops[val]
-        elif field == 'type':  # JUMP
-            extra = ' (%s)' % jump_types[val]
-        elif field == 'cmp':  # JUMPR/JUMPS
-            extra = ' (%s)' % cmp_ops[val]
-
-        field_details.append((field, val, extra))
-
-    return field_details
-
-
 def chunk_into_words(code, bytes_per_word, byteorder):
     chunks = [
         ubinascii.hexlify(code[i:i + bytes_per_word])

From b83b73d84e4e355fc97d0c6deb9cfdd9cc8423ae Mon Sep 17 00:00:00 2001
From: Wilko Nienhaus <wilko.nienhaus@gmail.com>
Date: Tue, 11 Jul 2023 09:24:33 +0300
Subject: [PATCH 05/20] Allow selecting cpu to disassemble for

Currently only the esp32 is implemented (esp32s2 soon to follow).

This commit adds the -c command line option to the disassembler, as
well as updates the integration tests to supply the cpu parameter,
and test fixtures are renamed to include the cpu type in their name,
so that we can have separate fixture files for other cpus.
---
 tests/03_disassembler_tests.sh                | 36 ++++++++++--------
 ..._opcodes-v.lst => all_opcodes-v.esp32.lst} |  0
 ...{all_opcodes.lst => all_opcodes.esp32.lst} |  0
 ...l_bytes-v.lst => manual_bytes-v.esp32.lst} |  0
 ...anual_bytes.lst => manual_bytes.esp32.lst} |  0
 tools/disassemble.py                          | 38 ++++++++++++++++---
 6 files changed, 53 insertions(+), 21 deletions(-)
 rename tests/fixtures/{all_opcodes-v.lst => all_opcodes-v.esp32.lst} (100%)
 rename tests/fixtures/{all_opcodes.lst => all_opcodes.esp32.lst} (100%)
 rename tests/fixtures/{manual_bytes-v.lst => manual_bytes-v.esp32.lst} (100%)
 rename tests/fixtures/{manual_bytes.lst => manual_bytes.esp32.lst} (100%)

diff --git a/tests/03_disassembler_tests.sh b/tests/03_disassembler_tests.sh
index 579909e..49a174e 100755
--- a/tests/03_disassembler_tests.sh
+++ b/tests/03_disassembler_tests.sh
@@ -3,8 +3,9 @@
 set -e
 
 test_disassembling_a_file() {
+    local cpu=$1
     local verbose
-    if [ "$1" == verbose ]; then
+    if [ "$2" == verbose ]; then
         verbose=-v
         echo -e "Testing disassembling a file in VERBOSE mode"
     else
@@ -13,16 +14,16 @@ test_disassembling_a_file() {
 
     testname=all_opcodes
     fixture=fixtures/${testname}.S
-    echo -e "\tBuilding $fixture using micropython-esp32-ulp"
+    echo -e "\tBuilding $fixture using micropython-esp32-ulp ($cpu)"
 
     log_file="${testname}.log"
     ulp_file="fixtures/${testname}.ulp"
-    micropython -m esp32_ulp $fixture 1>$log_file   # generates $ulp_file
+    micropython -m esp32_ulp -c $cpu $fixture 1>$log_file   # generates $ulp_file
 
-    lst_file="${testname}.lst"
-    lst_file_fixture=fixtures/${testname}${verbose}.lst
-    echo -e "\tDisassembling $ulp_file using micropython-esp32-ulp disassembler"
-    micropython -m tools.disassemble $verbose $ulp_file > $lst_file
+    lst_file="${testname}.$cpu.lst"
+    lst_file_fixture=fixtures/${testname}${verbose}.$cpu.lst
+    echo -e "\tDisassembling $ulp_file using micropython-esp32-ulp disassembler ($cpu)"
+    micropython -m tools.disassemble -c $cpu $verbose $ulp_file > $lst_file
 
     if ! diff $lst_file_fixture $lst_file 1>/dev/null; then
         echo -e "\tDisassembled output differs from expected output!"
@@ -36,8 +37,9 @@ test_disassembling_a_file() {
 }
 
 test_disassembling_a_manual_sequence() {
+    local cpu=$1
     local verbose
-    if [ "$1" == verbose ]; then
+    if [ "$2" == verbose ]; then
         verbose=-v
         echo -e "Testing disassembling a manual byte sequence in VERBOSE mode"
     else
@@ -46,10 +48,10 @@ test_disassembling_a_manual_sequence() {
 
     sequence="e1af 8c72 0100 0068 2705 cc19 0005 681d 0000 00a0 0000 0074"
 
-    lst_file="manual_bytes.lst"
-    lst_file_fixture=fixtures/manual_bytes${verbose}.lst
-    echo -e "\tDisassembling manual byte sequence using micropython-esp32-ulp disassembler"
-    micropython -m tools.disassemble $verbose -m $sequence > $lst_file
+    lst_file="manual_bytes.$cpu.lst"
+    lst_file_fixture=fixtures/manual_bytes${verbose}.$cpu.lst
+    echo -e "\tDisassembling manual byte sequence using micropython-esp32-ulp disassembler ($cpu)"
+    micropython -m tools.disassemble -c $cpu $verbose -m $sequence> $lst_file
 
     if ! diff $lst_file_fixture $lst_file 1>/dev/null; then
         echo -e "\tDisassembled output differs from expected output!"
@@ -60,8 +62,10 @@ test_disassembling_a_manual_sequence() {
     fi
 }
 
-test_disassembling_a_file
-test_disassembling_a_file verbose
+# esp32
+echo "Testing for CPU: esp32"
+test_disassembling_a_file esp32
+test_disassembling_a_file esp32 verbose
 
-test_disassembling_a_manual_sequence
-test_disassembling_a_manual_sequence verbose
+test_disassembling_a_manual_sequence esp32
+test_disassembling_a_manual_sequence esp32 verbose
diff --git a/tests/fixtures/all_opcodes-v.lst b/tests/fixtures/all_opcodes-v.esp32.lst
similarity index 100%
rename from tests/fixtures/all_opcodes-v.lst
rename to tests/fixtures/all_opcodes-v.esp32.lst
diff --git a/tests/fixtures/all_opcodes.lst b/tests/fixtures/all_opcodes.esp32.lst
similarity index 100%
rename from tests/fixtures/all_opcodes.lst
rename to tests/fixtures/all_opcodes.esp32.lst
diff --git a/tests/fixtures/manual_bytes-v.lst b/tests/fixtures/manual_bytes-v.esp32.lst
similarity index 100%
rename from tests/fixtures/manual_bytes-v.lst
rename to tests/fixtures/manual_bytes-v.esp32.lst
diff --git a/tests/fixtures/manual_bytes.lst b/tests/fixtures/manual_bytes.esp32.lst
similarity index 100%
rename from tests/fixtures/manual_bytes.lst
rename to tests/fixtures/manual_bytes.esp32.lst
diff --git a/tools/disassemble.py b/tools/disassemble.py
index c791b3a..cbf8f4c 100644
--- a/tools/disassemble.py
+++ b/tools/disassemble.py
@@ -1,9 +1,27 @@
 from uctypes import struct, addressof, LITTLE_ENDIAN, UINT16, UINT32
-from .decode import decode_instruction, get_instruction_fields
 import ubinascii
 import sys
 
 
+# placeholders:
+# these functions will be dynamically loaded later based on the chosen cpu
+decode_instruction, get_instruction_fields = None, None
+
+
+def load_decoder(cpu):
+    if cpu == 'esp32':
+        mod = 'decode'
+    else:
+        raise ValueError('Invalid cpu')
+
+    relative_import = 1 if '/' in __file__ else 0
+    decode = __import__(mod, globals(), locals(), [], relative_import)
+
+    global decode_instruction, get_instruction_fields
+    decode_instruction = decode.decode_instruction
+    get_instruction_fields = decode.get_instruction_fields
+
+
 def chunk_into_words(code, bytes_per_word, byteorder):
     chunks = [
         ubinascii.hexlify(code[i:i + bytes_per_word])
@@ -65,7 +83,9 @@ def print_data_section(data_offset, code):
         print_code_line(data_offset + (idx << 2), i, asm)
 
 
-def disassemble_manually(byte_sequence_string, verbose=False):
+def disassemble_manually(byte_sequence_string, cpu, verbose=False):
+    load_decoder(cpu)
+
     sequence = byte_sequence_string.strip().replace(' ','')
     chars_per_instruction = 8
     list = [
@@ -79,7 +99,9 @@ def disassemble_manually(byte_sequence_string, verbose=False):
         decode_instruction_and_print(idx << 2, i, verbose)
 
 
-def disassemble_file(filename, verbose=False):
+def disassemble_file(filename, cpu, verbose=False):
+    load_decoder(cpu)
+
     with open(filename, 'rb') as f:
         data = f.read()
 
@@ -114,6 +136,7 @@ def print_help():
     print('Usage: disassemble.py [<options>] [-m <byte_sequence> | <filename>]')
     print('')
     print('Options:')
+    print('  -c                  Choose ULP variant: only esp32 supported for now')
     print('  -h                  Show this help text')
     print('  -m <byte_sequence>  Sequence of hex bytes (8 per instruction)')
     print('  -v                  Verbose mode. Show ULP header and fields of each instruction')
@@ -122,6 +145,7 @@ def print_help():
 
 
 def handle_cmdline(params):
+    cpu = 'esp32'
     verbose = False
     filename = None
     byte_sequence = None
@@ -130,6 +154,9 @@ def handle_cmdline(params):
         if params[0] == '-h':
             print_help()
             sys.exit(0)
+        elif params[0] == '-c':
+            cpu = params[1]
+            params = params[1:]  # remove first param from list
         elif params[0] == '-m':
             if len(params) == 1:
                 print_help()
@@ -159,10 +186,11 @@ def handle_cmdline(params):
 
         params = params[1:]  # remove first param from list
 
+
     if byte_sequence:
-        disassemble_manually(byte_sequence, verbose)
+        disassemble_manually(byte_sequence, cpu, verbose)
     elif filename:
-        disassemble_file(filename, verbose)
+        disassemble_file(filename, cpu, verbose)
 
 
 if sys.argv: # if run from cmdline

From 88803de660e8aa7a5af29640c8bdc953c493e535 Mon Sep 17 00:00:00 2001
From: Wilko Nienhaus <wilko.nienhaus@gmail.com>
Date: Tue, 11 Jul 2023 09:29:57 +0300
Subject: [PATCH 06/20] Add support for disassembling ESP32-S2 ULP binaries

To disassemble ESP32-S2 ULP binaries use the `-c esp32s2` option
when running the disassembler.

Update documentation to mention support for the ESP32-S2.
---
 docs/disassembler.rst                     |  35 ++-
 tests/00_unit_tests.sh                    |   2 +-
 tests/03_disassembler_tests.sh            |  16 +-
 tests/decode_s2.py                        | 156 ++++++++++
 tests/fixtures/all_opcodes-v.esp32s2.lst  | 339 ++++++++++++++++++++++
 tests/fixtures/all_opcodes.esp32s2.lst    |  46 +++
 tests/fixtures/manual_bytes-v.esp32s2.lst |  43 +++
 tests/fixtures/manual_bytes.esp32s2.lst   |   6 +
 tools/decode_s2.py                        | 148 ++++++++++
 tools/disassemble.py                      |   4 +-
 10 files changed, 783 insertions(+), 12 deletions(-)
 create mode 100644 tests/decode_s2.py
 create mode 100644 tests/fixtures/all_opcodes-v.esp32s2.lst
 create mode 100644 tests/fixtures/all_opcodes.esp32s2.lst
 create mode 100644 tests/fixtures/manual_bytes-v.esp32s2.lst
 create mode 100644 tests/fixtures/manual_bytes.esp32s2.lst
 create mode 100644 tools/decode_s2.py

diff --git a/docs/disassembler.rst b/docs/disassembler.rst
index b92a19e..ee733e9 100644
--- a/docs/disassembler.rst
+++ b/docs/disassembler.rst
@@ -25,6 +25,8 @@ You can also specify additional options to ``disassemble.py`` as follows:
 +--------------------------+----------------------------------------------------------------+
 | Option                   | Description                                                    |
 +==========================+================================================================+
+| ``-c`` or ``--mcpu``     | Choose ULP variant: either esp32 or esp32s2                    |
++--------------------------+----------------------------------------------------------------+
 | ``-h``                   | Show help text                                                 |
 +--------------------------+----------------------------------------------------------------+
 || ``-m <bytes sequence>`` || Disassemble a provided sequence of hex bytes                  |
@@ -43,18 +45,31 @@ specified file.
 Note that the ULP header is validates and files with unknown magic bytes will be
 rejected. The correct 4 magic bytes at the start of a ULP binary are ``ulp\x00``.
 
-Example:
+Example disassembling an ESP32 ULP binary:
 
 .. code-block:: shell
 
    $ micropython -m tools.disassemble path/to/binary.ulp
    .text
    0000  040000d0  LD r0, r1, 0
-   0004  0e0400d0  LD r2, r3, 1
+   0004  0e0000d0  LD r2, r3, 0
+   0008  04000068  ST r0, r1, 0
+   000c  0b000068  ST r3, r2, 0
+   .data
+   0010  00000000  <empty>
+
+Example disassembling an ESP32-S2 ULP binary:
+
+.. code-block:: shell
+
+   $ micropython -m tools.disassemble -c esp32s2 path/to/binary.ulp
+   .text
+   0000  040000d0  LD r0, r1, 0
+   0004  0e0000d0  LD r2, r3, 0
    0008  84010068  ST r0, r1, 0
-   000c  8b090068  ST r3, r2, 2
+   000c  8b010068  ST r3, r2, 0
    .data
-   0000  00000000  <empty>
+   0010  00000000  <empty>
 
 
 Disassembling a byte sequence
@@ -129,18 +144,20 @@ For example:
 Disassembling on device
 -----------------------------
 
-The disassembler also works when used on an ESP32.
+The disassembler also works when used on an ESP32 device.
 
 To use the disassembler on a real device:
 
 * ensure ``micropython-esp32-ulp`` is installed on the device (see `docs/index.rst </docs/index.rst>`_).
-* upload ``tools/disassemble.py`` to the device (any directory will do)
-* run the following:
+* upload ``tools/disassemble.py`` ``tools/decode.py`` and ``tools/decode_s2.py`` to the device
+  (any directory will do, as long as those 3 files are in the same directory)
+* the following example code assumes you placed the 3 files into the device's "root" directory
+* run the following (note, we must specify which the cpu the binary is for):
 
   .. code-block:: python
 
      from disassemble import disassemble_file
      # then either:
-     disassemble_file('path/to/file.ulp')  # normal mode
+     disassemble_file('path/to/file.ulp', cpu='esp32s2')  # normal mode
      # or:
-     disassemble_file('path/to/file.ulp', True)  # verbose mode
+     disassemble_file('path/to/file.ulp', cpu='esp32s2', verbose=True)  # verbose mode
diff --git a/tests/00_unit_tests.sh b/tests/00_unit_tests.sh
index 0ae2bad..262b759 100755
--- a/tests/00_unit_tests.sh
+++ b/tests/00_unit_tests.sh
@@ -4,7 +4,7 @@
 
 set -e
 
-LIST=${1:-opcodes opcodes_s2 assemble link util preprocess definesdb decode}
+LIST=${1:-opcodes opcodes_s2 assemble link util preprocess definesdb decode decode_s2}
 
 for file in $LIST; do
     echo testing $file...
diff --git a/tests/03_disassembler_tests.sh b/tests/03_disassembler_tests.sh
index 49a174e..b3740f4 100755
--- a/tests/03_disassembler_tests.sh
+++ b/tests/03_disassembler_tests.sh
@@ -46,7 +46,11 @@ test_disassembling_a_manual_sequence() {
         echo -e "Testing disassembling a manual byte sequence in NORMAL mode"
     fi
 
-    sequence="e1af 8c72 0100 0068 2705 cc19 0005 681d 0000 00a0 0000 0074"
+    if [ "$cpu" == "esp32s2" ]; then
+        sequence="e1af 8c74 8101 0068 2705 cc19 0005 681d 0000 00a0 0000 0078"
+    else
+        sequence="e1af 8c72 0100 0068 2705 cc19 0005 681d 0000 00a0 0000 0074"
+    fi
 
     lst_file="manual_bytes.$cpu.lst"
     lst_file_fixture=fixtures/manual_bytes${verbose}.$cpu.lst
@@ -69,3 +73,13 @@ test_disassembling_a_file esp32 verbose
 
 test_disassembling_a_manual_sequence esp32
 test_disassembling_a_manual_sequence esp32 verbose
+
+echo ""
+
+# esp32s2
+echo "Testing for CPU: esp32s2"
+test_disassembling_a_file esp32s2
+test_disassembling_a_file esp32s2 verbose
+
+test_disassembling_a_manual_sequence esp32s2
+test_disassembling_a_manual_sequence esp32s2 verbose
diff --git a/tests/decode_s2.py b/tests/decode_s2.py
new file mode 100644
index 0000000..ae46263
--- /dev/null
+++ b/tests/decode_s2.py
@@ -0,0 +1,156 @@
+from tools.decode_s2 import decode_instruction, get_instruction_fields
+import esp32_ulp.opcodes_s2 as opcodes
+import ubinascii
+
+tests = []
+
+
+def test(param):
+    tests.append(param)
+
+
+def hex_to_int(sequence):
+    byte_sequence = ubinascii.unhexlify(sequence)
+    return int.from_bytes(byte_sequence, 'little')
+
+
+def assert_decode(sequence, expected_struct, expected_name):
+    i = hex_to_int(sequence)
+
+    ins, name = decode_instruction(i)
+
+    assert name == expected_name, '%s != %s' % (name, expected_name)
+    assert ins is expected_struct, 'incorrect instruction struct (%s, %s)' % (sequence, name)
+
+
+def assert_decode_exception(sequence, expected_message):
+    i = hex_to_int(sequence)
+
+    try:
+        decode_instruction(i)
+    except Exception as e:
+        assert str(e) == expected_message, str(e)
+        raised = True
+    else:
+        raised = False
+
+    assert raised, 'Exception not raised'
+
+
+def assert_decode_fields(sequence, expected_field_details):
+    i = hex_to_int(sequence)
+
+    ins, _ = decode_instruction(i)
+
+    actual_field_details = get_instruction_fields(ins)
+
+    assert actual_field_details == expected_field_details, '\n- %s \n+ %s' % (actual_field_details, expected_field_details)
+
+
+@test
+def test_unknown_instruction():
+    assert_decode_exception("10000001", 'Unknown instruction')
+
+
+@test
+def test_empty_instruction():
+    assert_decode_exception("00000000", '<empty>')
+
+
+# All hex sequences were generated using our assembler.
+# Note: disassembled instructions always show field values according
+# to what is actually encoded into the binary instruction, not as per
+# original assembly code.
+# For example in JUMP instructions in the source code one would
+# specify jump offsets in bytes (e.g. 4 bytes) but in the actual
+# instruction offset encoded in the binary instruction will be in
+# words (1 word = 4 bytes).
+# The disassembled instructions would therefore show as "JUMP 1"
+# for what was originally "JUMP 4" in the source code.@test
+@test
+def test_all_instructions():
+    # OPCODE_WR_REG = 1
+    assert_decode("00000010", opcodes._wr_reg, 'REG_WR 0x0, 0, 0, 0')
+
+    # OPCODE_RD_REG = 2
+    assert_decode("00000020", opcodes._rd_reg, 'REG_RD 0x0, 0, 0')
+
+    # OPCODE_I2C = 3
+    assert_decode("00000030", opcodes._i2c, 'I2C_RD 0, 0, 0, 0')
+    assert_decode("00000038", opcodes._i2c, 'I2C_WR 0, 0, 0, 0')
+
+    # OPCODE_DELAY = 4
+    assert_decode("00000040", opcodes._delay, 'NOP')
+    assert_decode("01000040", opcodes._delay, 'WAIT 1')
+
+    # OPCODE_ADC = 5
+    assert_decode("00000050", opcodes._adc, 'ADC r0, 0, 0')
+
+    # OPCODE_ST = 6
+    assert_decode("80010068", opcodes._st, 'ST r0, r0, 0')
+
+    # OPCODE_ALU = 7, SUB_OPCODE_ALU_REG
+    assert_decode("00000070", opcodes._alu_reg, 'ADD r0, r0, r0')
+    assert_decode("00002070", opcodes._alu_reg, 'SUB r0, r0, r0')
+    assert_decode("00004070", opcodes._alu_reg, 'AND r0, r0, r0')
+    assert_decode("00006070", opcodes._alu_reg, 'OR r0, r0, r0')
+    assert_decode("00008070", opcodes._alu_reg, "MOVE r0, r0")
+    assert_decode("0000a070", opcodes._alu_reg, 'LSH r0, r0, r0')
+    assert_decode("0000c070", opcodes._alu_reg, 'RSH r0, r0, r0')
+
+    # OPCODE_ALU = 7, SUB_OPCODE_ALU_IMM
+    assert_decode("00000074", opcodes._alu_imm, 'ADD r0, r0, 0')
+    assert_decode("00002074", opcodes._alu_imm, 'SUB r0, r0, 0')
+    assert_decode("00004074", opcodes._alu_imm, 'AND r0, r0, 0')
+    assert_decode("00006074", opcodes._alu_imm, 'OR r0, r0, 0')
+    assert_decode("00008074", opcodes._alu_imm, "MOVE r0, 0")
+    assert_decode("0000a074", opcodes._alu_imm, 'LSH r0, r0, 0')
+    assert_decode("0000c074", opcodes._alu_imm, 'RSH r0, r0, 0')
+
+    # OPCODE_ALU = 7, SUB_OPCODE_ALU_CNT
+    assert_decode("00004078", opcodes._alu_cnt, 'STAGE_RST')
+    assert_decode("00000078", opcodes._alu_cnt, 'STAGE_INC 0')
+    assert_decode("00002078", opcodes._alu_cnt, 'STAGE_DEC 0')
+
+    # OPCODE_BRANCH = 8, SUB_OPCODE_BX (IMM)
+    assert_decode("00000084", opcodes._bx, 'JUMP 0')
+    assert_decode("00004084", opcodes._bx, 'JUMP 0, EQ')
+    assert_decode("00008084", opcodes._bx, 'JUMP 0, OV')
+
+    # OPCODE_BRANCH = 8, SUB_OPCODE_BX (REG)
+    assert_decode("00002084", opcodes._bx, 'JUMP r0')
+    assert_decode("00006084", opcodes._bx, 'JUMP r0, EQ')
+    assert_decode("0000a084", opcodes._bx, 'JUMP r0, OV')
+
+    # OPCODE_BRANCH = 8, SUB_OPCODE_BR
+    assert_decode("00000080", opcodes._b, 'JUMPR 0, 0, LT')
+    assert_decode("00000180", opcodes._b, 'JUMPR 0, 0, GT')
+    assert_decode("00000280", opcodes._b, 'JUMPR 0, 0, EQ')
+
+    # OPCODE_BRANCH = 8, SUB_OPCODE_BX
+    assert_decode("00800088", opcodes._bs, 'JUMPS 0, 0, LT')
+    assert_decode("00800188", opcodes._bs, 'JUMPS 0, 0, GT')
+    assert_decode("00000288", opcodes._bs, 'JUMPS 0, 0, EQ')
+    assert_decode("00800288", opcodes._bs, 'JUMPS 0, 0, LE')
+    assert_decode("00800388", opcodes._bs, 'JUMPS 0, 0, GE')
+
+    # OPCODE_END = 9, SUB_OPCODE_END
+    assert_decode("01000090", opcodes._end, 'WAKE')
+
+    # OPCODE_END = 9, SUB_OPCODE_SLEEP
+    ###assert_decode("01000040", opcodes._end, 'SLEEP 1') ##TODO
+
+    # OPCODE_TSENS = 10
+    assert_decode("000000a0", opcodes._tsens, 'TSENS r0, 0')
+
+    # OPCODE_HALT = 11
+    assert_decode("000000b0", opcodes._halt, 'HALT')
+
+    # OPCODE_LD = 13
+    assert_decode("000000d0", opcodes._ld, 'LD r0, r0, 0')
+
+
+if __name__ == '__main__':
+    # run all methods marked with @test
+    for t in tests:
+        t()
diff --git a/tests/fixtures/all_opcodes-v.esp32s2.lst b/tests/fixtures/all_opcodes-v.esp32s2.lst
new file mode 100644
index 0000000..dc2a63f
--- /dev/null
+++ b/tests/fixtures/all_opcodes-v.esp32s2.lst
@@ -0,0 +1,339 @@
+header
+ULP magic    : b'ulp\x00' (0x00706c75)
+.text offset : 12 (0x0c)
+.text size   : 168 (0xa8)
+.data offset : 180 (0xb4)
+.data size   : 8 (0x08)
+.bss size    : 0 (0x00)
+----------------------------------------
+.text
+0000  230d8810  REG_WR 0x123, 1, 2, 3
+                 addr       =  35 (0x23)
+                 data       =   3
+                 high       =   1
+                 low        =   2
+                 opcode     =   1
+                 periph_sel =   1
+0004  21030421  REG_RD 0x321, 2, 1
+                 addr       =  33 (0x21)
+                 high       =   2
+                 low        =   1
+                 opcode     =   2
+                 periph_sel =   3
+                 unused     =   0
+0008  03001130  I2C_RD 3, 2, 1, 0
+                 data       =   0
+                 high       =   2
+                 i2c_sel    =   0
+                 low        =   1
+                 opcode     =   3
+                 rw         =   0
+                 sub_addr   =   3
+                 unused     =   0
+000c  00011339  I2C_WR 0, 2, 3, 4
+                 data       =   1
+                 high       =   2
+                 i2c_sel    =   4
+                 low        =   3
+                 opcode     =   3
+                 rw         =   1
+                 sub_addr   =   0
+                 unused     =   0
+0010  00000040  NOP
+                 cycles     =   0
+                 opcode     =   4
+                 unused     =   0
+0014  07000040  WAIT 7
+                 cycles     =   7
+                 opcode     =   4
+                 unused     =   0
+0018  07000050  ADC r3, 1, 0
+                 cycles     =   0
+                 dreg       =   3
+                 mux        =   1
+                 opcode     =   5
+                 sar_sel    =   0
+                 unused1    =   0
+                 unused2    =   0
+001c  8b010068  ST r3, r2, 0
+                 dreg       =   2
+                 offset     =   0
+                 opcode     =   6
+                 sreg       =   3
+                 sub_opcode =   4
+                 unused1    =   0
+                 unused2    =   0
+0020  06000070  ADD r2, r1, r0
+                 dreg       =   2
+                 opcode     =   7
+                 sel        =   0 (ADD)
+                 sreg       =   1
+                 sub_opcode =   0
+                 treg       =   0
+                 unused1    =   0
+                 unused2    =   0
+0024  06002070  SUB r2, r1, r0
+                 dreg       =   2
+                 opcode     =   7
+                 sel        =   1 (SUB)
+                 sreg       =   1
+                 sub_opcode =   0
+                 treg       =   0
+                 unused1    =   0
+                 unused2    =   0
+0028  06004070  AND r2, r1, r0
+                 dreg       =   2
+                 opcode     =   7
+                 sel        =   2 (AND)
+                 sreg       =   1
+                 sub_opcode =   0
+                 treg       =   0
+                 unused1    =   0
+                 unused2    =   0
+002c  06006070  OR r2, r1, r0
+                 dreg       =   2
+                 opcode     =   7
+                 sel        =   3 (OR)
+                 sreg       =   1
+                 sub_opcode =   0
+                 treg       =   0
+                 unused1    =   0
+                 unused2    =   0
+0030  16008070  MOVE r2, r1
+                 dreg       =   2
+                 opcode     =   7
+                 sel        =   4 (MOVE)
+                 sreg       =   1
+                 sub_opcode =   0
+                 treg       =   1
+                 unused1    =   0
+                 unused2    =   0
+0034  0600a070  LSH r2, r1, r0
+                 dreg       =   2
+                 opcode     =   7
+                 sel        =   5 (LSH)
+                 sreg       =   1
+                 sub_opcode =   0
+                 treg       =   0
+                 unused1    =   0
+                 unused2    =   0
+0038  0600c070  RSH r2, r1, r0
+                 dreg       =   2
+                 opcode     =   7
+                 sel        =   6 (RSH)
+                 sreg       =   1
+                 sub_opcode =   0
+                 treg       =   0
+                 unused1    =   0
+                 unused2    =   0
+003c  06000074  ADD r2, r1, 0
+                 dreg       =   2
+                 imm        =   0
+                 opcode     =   7
+                 sel        =   0 (ADD)
+                 sreg       =   1
+                 sub_opcode =   1
+                 unused1    =   0
+                 unused2    =   0
+0040  06002074  SUB r2, r1, 0
+                 dreg       =   2
+                 imm        =   0
+                 opcode     =   7
+                 sel        =   1 (SUB)
+                 sreg       =   1
+                 sub_opcode =   1
+                 unused1    =   0
+                 unused2    =   0
+0044  06004074  AND r2, r1, 0
+                 dreg       =   2
+                 imm        =   0
+                 opcode     =   7
+                 sel        =   2 (AND)
+                 sreg       =   1
+                 sub_opcode =   1
+                 unused1    =   0
+                 unused2    =   0
+0048  06006074  OR r2, r1, 0
+                 dreg       =   2
+                 imm        =   0
+                 opcode     =   7
+                 sel        =   3 (OR)
+                 sreg       =   1
+                 sub_opcode =   1
+                 unused1    =   0
+                 unused2    =   0
+004c  01008074  MOVE r1, 0
+                 dreg       =   1
+                 imm        =   0
+                 opcode     =   7
+                 sel        =   4 (MOVE)
+                 sreg       =   0
+                 sub_opcode =   1
+                 unused1    =   0
+                 unused2    =   0
+0050  0600a074  LSH r2, r1, 0
+                 dreg       =   2
+                 imm        =   0
+                 opcode     =   7
+                 sel        =   5 (LSH)
+                 sreg       =   1
+                 sub_opcode =   1
+                 unused1    =   0
+                 unused2    =   0
+0054  0600c074  RSH r2, r1, 0
+                 dreg       =   2
+                 imm        =   0
+                 opcode     =   7
+                 sel        =   6 (RSH)
+                 sreg       =   1
+                 sub_opcode =   1
+                 unused1    =   0
+                 unused2    =   0
+0058  00004078  STAGE_RST
+                 imm        =   0
+                 opcode     =   7
+                 sel        =   2 (STAGE_RST)
+                 sub_opcode =   2
+                 unused1    =   0
+                 unused2    =   0
+005c  70000078  STAGE_INC 7
+                 imm        =   7
+                 opcode     =   7
+                 sel        =   0 (STAGE_INC)
+                 sub_opcode =   2
+                 unused1    =   0
+                 unused2    =   0
+0060  30002078  STAGE_DEC 3
+                 imm        =   3
+                 opcode     =   7
+                 sel        =   1 (STAGE_DEC)
+                 sub_opcode =   2
+                 unused1    =   0
+                 unused2    =   0
+0064  00002084  JUMP r0
+                 addr       =   0
+                 dreg       =   0
+                 opcode     =   8
+                 reg        =   1
+                 sub_opcode =   1
+                 type       =   0 (--)
+                 unused1    =   0
+                 unused2    =   0
+0068  01006084  JUMP r1, EQ
+                 addr       =   0
+                 dreg       =   1
+                 opcode     =   8
+                 reg        =   1
+                 sub_opcode =   1
+                 type       =   1 (EQ)
+                 unused1    =   0
+                 unused2    =   0
+006c  0200a084  JUMP r2, OV
+                 addr       =   0
+                 dreg       =   2
+                 opcode     =   8
+                 reg        =   1
+                 sub_opcode =   1
+                 type       =   2 (OV)
+                 unused1    =   0
+                 unused2    =   0
+0070  00000084  JUMP 0
+                 addr       =   0
+                 dreg       =   0
+                 opcode     =   8
+                 reg        =   0
+                 sub_opcode =   1
+                 type       =   0 (--)
+                 unused1    =   0
+                 unused2    =   0
+0074  00004084  JUMP 0, EQ
+                 addr       =   0
+                 dreg       =   0
+                 opcode     =   8
+                 reg        =   0
+                 sub_opcode =   1
+                 type       =   1 (EQ)
+                 unused1    =   0
+                 unused2    =   0
+0078  00008084  JUMP 0, OV
+                 addr       =   0
+                 dreg       =   0
+                 opcode     =   8
+                 reg        =   0
+                 sub_opcode =   1
+                 type       =   2 (OV)
+                 unused1    =   0
+                 unused2    =   0
+007c  01000080  JUMPR 0, 1, LT
+                 cmp        =   0 (LT)
+                 imm        =   1
+                 offset     =   0
+                 opcode     =   8
+                 sign       =   0
+                 sub_opcode =   0
+0080  05000580  JUMPR 1, 5, GT
+                 cmp        =   1 (GT)
+                 imm        =   5
+                 offset     =   1
+                 opcode     =   8
+                 sign       =   0
+                 sub_opcode =   0
+0084  05000680  JUMPR 1, 5, EQ
+                 cmp        =   2 (EQ)
+                 imm        =   5
+                 offset     =   1
+                 opcode     =   8
+                 sign       =   0
+                 sub_opcode =   0
+0088  01800088  JUMPS 0, 1, LT
+                 cmp        =   1 (LT)
+                 imm        =   1
+                 offset     =   0
+                 opcode     =   8
+                 sign       =   0
+                 sub_opcode =   2
+                 unused     =   0
+008c  05801388  JUMPS 4, 5, GE
+                 cmp        =   7 (GE)
+                 imm        =   5
+                 offset     =   4
+                 opcode     =   8
+                 sign       =   0
+                 sub_opcode =   2
+                 unused     =   0
+0090  09802288  JUMPS 8, 9, LE
+                 cmp        =   5 (LE)
+                 imm        =   9
+                 offset     =   8
+                 opcode     =   8
+                 sign       =   0
+                 sub_opcode =   2
+                 unused     =   0
+0094  01000090  WAKE
+                 opcode     =   9
+                 sub_opcode =   0
+                 unused     =   0
+                 wakeup     =   1
+0098  07000040  WAIT 7
+                 cycles     =   7
+                 opcode     =   4
+                 unused     =   0
+009c  090000a0  TSENS r1, 2
+                 delay      =   2
+                 dreg       =   1
+                 opcode     =  10 (0x0a)
+                 unused     =   0
+00a0  000000b0  HALT
+                 opcode     =  11 (0x0b)
+                 unused     =   0
+00a4  060000d0  LD r2, r1, 0
+                 dreg       =   2
+                 offset     =   0
+                 opcode     =  13 (0x0d)
+                 sreg       =   1
+                 unused1    =   0
+                 unused2    =   0
+----------------------------------------
+.data
+00a8  00000000  <empty>
+00ac  fecadec0  <non-empty>
diff --git a/tests/fixtures/all_opcodes.esp32s2.lst b/tests/fixtures/all_opcodes.esp32s2.lst
new file mode 100644
index 0000000..97040c5
--- /dev/null
+++ b/tests/fixtures/all_opcodes.esp32s2.lst
@@ -0,0 +1,46 @@
+.text
+0000  230d8810  REG_WR 0x123, 1, 2, 3
+0004  21030421  REG_RD 0x321, 2, 1
+0008  03001130  I2C_RD 3, 2, 1, 0
+000c  00011339  I2C_WR 0, 2, 3, 4
+0010  00000040  NOP
+0014  07000040  WAIT 7
+0018  07000050  ADC r3, 1, 0
+001c  8b010068  ST r3, r2, 0
+0020  06000070  ADD r2, r1, r0
+0024  06002070  SUB r2, r1, r0
+0028  06004070  AND r2, r1, r0
+002c  06006070  OR r2, r1, r0
+0030  16008070  MOVE r2, r1
+0034  0600a070  LSH r2, r1, r0
+0038  0600c070  RSH r2, r1, r0
+003c  06000074  ADD r2, r1, 0
+0040  06002074  SUB r2, r1, 0
+0044  06004074  AND r2, r1, 0
+0048  06006074  OR r2, r1, 0
+004c  01008074  MOVE r1, 0
+0050  0600a074  LSH r2, r1, 0
+0054  0600c074  RSH r2, r1, 0
+0058  00004078  STAGE_RST
+005c  70000078  STAGE_INC 7
+0060  30002078  STAGE_DEC 3
+0064  00002084  JUMP r0
+0068  01006084  JUMP r1, EQ
+006c  0200a084  JUMP r2, OV
+0070  00000084  JUMP 0
+0074  00004084  JUMP 0, EQ
+0078  00008084  JUMP 0, OV
+007c  01000080  JUMPR 0, 1, LT
+0080  05000580  JUMPR 1, 5, GT
+0084  05000680  JUMPR 1, 5, EQ
+0088  01800088  JUMPS 0, 1, LT
+008c  05801388  JUMPS 4, 5, GE
+0090  09802288  JUMPS 8, 9, LE
+0094  01000090  WAKE
+0098  07000040  WAIT 7
+009c  090000a0  TSENS r1, 2
+00a0  000000b0  HALT
+00a4  060000d0  LD r2, r1, 0
+.data
+00a8  00000000  <empty>
+00ac  fecadec0  <non-empty>
diff --git a/tests/fixtures/manual_bytes-v.esp32s2.lst b/tests/fixtures/manual_bytes-v.esp32s2.lst
new file mode 100644
index 0000000..bcd452e
--- /dev/null
+++ b/tests/fixtures/manual_bytes-v.esp32s2.lst
@@ -0,0 +1,43 @@
+0000  e1af8c74  MOVE r1, 51966
+                 dreg       =   1
+                 imm        = 51966 (0xcafe)
+                 opcode     =   7
+                 sel        =   4 (MOVE)
+                 sreg       =   0
+                 sub_opcode =   1
+                 unused1    =   0
+                 unused2    =   0
+0004  81010068  ST r1, r0, 0
+                 dreg       =   0
+                 offset     =   0
+                 opcode     =   6
+                 sreg       =   1
+                 sub_opcode =   4
+                 unused1    =   0
+                 unused2    =   0
+0008  2705cc19  REG_WR 0x127, 19, 19, 1
+                 addr       =  39 (0x27)
+                 data       =   1
+                 high       =  19 (0x13)
+                 low        =  19 (0x13)
+                 opcode     =   1
+                 periph_sel =   1
+000c  0005681d  REG_WR 0x100, 26, 26, 1
+                 addr       =   0
+                 data       =   1
+                 high       =  26 (0x1a)
+                 low        =  26 (0x1a)
+                 opcode     =   1
+                 periph_sel =   1
+0010  000000a0  TSENS r0, 0
+                 delay      =   0
+                 dreg       =   0
+                 opcode     =  10 (0x0a)
+                 unused     =   0
+0014  00000078  STAGE_INC 0
+                 imm        =   0
+                 opcode     =   7
+                 sel        =   0 (STAGE_INC)
+                 sub_opcode =   2
+                 unused1    =   0
+                 unused2    =   0
diff --git a/tests/fixtures/manual_bytes.esp32s2.lst b/tests/fixtures/manual_bytes.esp32s2.lst
new file mode 100644
index 0000000..52799af
--- /dev/null
+++ b/tests/fixtures/manual_bytes.esp32s2.lst
@@ -0,0 +1,6 @@
+0000  e1af8c74  MOVE r1, 51966
+0004  81010068  ST r1, r0, 0
+0008  2705cc19  REG_WR 0x127, 19, 19, 1
+000c  0005681d  REG_WR 0x100, 26, 26, 1
+0010  000000a0  TSENS r0, 0
+0014  00000078  STAGE_INC 0
diff --git a/tools/decode_s2.py b/tools/decode_s2.py
new file mode 100644
index 0000000..f40db5d
--- /dev/null
+++ b/tools/decode_s2.py
@@ -0,0 +1,148 @@
+import esp32_ulp.opcodes_s2 as opcodes
+
+
+alu_cnt_ops = ('STAGE_INC', 'STAGE_DEC', 'STAGE_RST')
+alu_ops = ('ADD', 'SUB', 'AND', 'OR', 'MOVE', 'LSH', 'RSH')
+jump_types = ('--', 'EQ', 'OV')
+cmp_ops = ('LT', 'GT', 'EQ')
+bs_cmp_ops = ('??', 'LT', '??', 'GT', 'EQ', 'LE', '??', 'GE')
+
+lookup = {
+    opcodes.OPCODE_ADC: ('ADC', opcodes._adc, lambda op: 'ADC r%s, %s, %s' % (op.dreg, op.mux, op.sar_sel)),
+    opcodes.OPCODE_ALU: ('ALU', opcodes._alu_imm, {
+        opcodes.SUB_OPCODE_ALU_CNT: (
+            'ALU_CNT',
+            opcodes._alu_cnt,
+            lambda op: '%s%s' % (alu_cnt_ops[op.sel], '' if op.sel == opcodes.ALU_SEL_STAGE_RST else ' %s' % op.imm)
+        ),
+        opcodes.SUB_OPCODE_ALU_IMM: (
+            'ALU_IMM',
+            opcodes._alu_imm,
+            lambda op: '%s r%s, %s' % (alu_ops[op.sel], op.dreg, op.imm) if op.sel == opcodes.ALU_SEL_MOV
+                else '%s r%s, r%s, %s' % (alu_ops[op.sel], op.dreg, op.sreg, op.imm)
+        ),
+        opcodes.SUB_OPCODE_ALU_REG: (
+            'ALU_REG',
+            opcodes._alu_reg,
+            lambda op: '%s r%s, r%s' % (alu_ops[op.sel], op.dreg, op.sreg) if op.sel == opcodes.ALU_SEL_MOV
+                else '%s r%s, r%s, r%s' % (alu_ops[op.sel], op.dreg, op.sreg, op.treg)
+        ),
+    }),
+    opcodes.OPCODE_BRANCH: ('BRANCH', opcodes._bx, {
+        opcodes.SUB_OPCODE_BX: (
+            'BX',
+            opcodes._bx,
+            lambda op: 'JUMP %s%s' % (op.addr if op.reg == 0 else 'r%s' % op.dreg, ', %s' % jump_types[op.type]
+                if op.type != 0 else '')
+        ),
+        opcodes.SUB_OPCODE_B: (
+            'BR',
+            opcodes._b,
+            lambda op: 'JUMPR %s, %s, %s' % ('%s%s' % ('-' if op.sign == 1 else '', op.offset), op.imm, cmp_ops[op.cmp])
+        ),
+        opcodes.SUB_OPCODE_BS: (
+            'BS',
+            opcodes._bs,
+            lambda op: 'JUMPS %s, %s, %s' % ('%s%s' % ('-' if op.sign == 1 else '', op.offset), op.imm, bs_cmp_ops[op.cmp])
+        ),
+    }),
+    opcodes.OPCODE_DELAY: (
+        'DELAY',
+        opcodes._delay,
+        lambda op: 'NOP' if op.cycles == 0 else 'WAIT %s' % op.cycles
+    ),
+    opcodes.OPCODE_END: ('END', opcodes._end, {
+        opcodes.SUB_OPCODE_END: (
+            'WAKE',
+            opcodes._end
+        ),
+    }),
+    opcodes.OPCODE_HALT: ('HALT', opcodes._halt),
+    opcodes.OPCODE_I2C: (
+        'I2C',
+        opcodes._i2c,
+        lambda op: 'I2C_%s %s, %s, %s, %s' % ('RD' if op.rw == 0 else 'WR', op.sub_addr, op.high, op.low, op.i2c_sel)
+    ),
+    opcodes.OPCODE_LD: ('LD', opcodes._ld, lambda op: 'LD r%s, r%s, %s' % (op.dreg, op.sreg, op.offset)),
+    opcodes.OPCODE_ST: ('ST', opcodes._st, lambda op: 'ST r%s, r%s, %s' % (op.sreg, op.dreg, op.offset)),
+    opcodes.OPCODE_RD_REG: (
+        'RD_REG',
+        opcodes._rd_reg,
+        lambda op: 'REG_RD 0x%x, %s, %s' % (op.periph_sel << 8 | op.addr, op.high, op.low)
+    ),
+    opcodes.OPCODE_WR_REG: (
+        'WR_REG',
+        opcodes._wr_reg,
+        lambda op: 'REG_WR 0x%x, %s, %s, %s' % (op.periph_sel << 8 | op.addr, op.high, op.low, op.data)
+    ),
+    opcodes.OPCODE_TSENS: ('TSENS', opcodes._tsens, lambda op: 'TSENS r%s, %s' % (op.dreg, op.delay)),
+}
+
+
+def decode_instruction(i):
+    if i == 0:
+        raise Exception('<empty>')
+
+    ins = opcodes._end
+    ins.all = i  # abuse a struct to get opcode
+
+    params = lookup.get(ins.opcode, None)
+
+    if not params:
+        raise Exception('Unknown instruction')
+
+    if len(params) == 3:
+        name, ins, third = params
+        ins.all = i
+
+        if callable(third):
+            params = (third(ins), ins)
+        else:
+            params = third.get(ins.sub_opcode, ())
+
+    if len(params) == 3:
+        name, ins, pretty = params
+        ins.all = i
+        name = pretty(ins)
+    else:
+        name, ins = params
+        ins.all = i
+
+    return ins, name
+
+
+def get_instruction_fields(ins):
+    possible_fields = (
+        'addr', 'cmp', 'cycle_sel', 'cycles', 'data', 'delay', 'dreg',
+        'high', 'i2c_sel', 'imm', 'low', 'mux', 'offset', 'opcode',
+        'periph_sel', 'reg', 'rw', 'sar_sel', 'sel', 'sign', 'sreg',
+        'sub_addr', 'sub_opcode', 'treg', 'type', 'unused', 'unused1',
+        'unused2', 'wakeup'
+    )
+    field_details = []
+    for field in possible_fields:
+        extra = ''
+        try:
+            # eval is ugly but constrained to possible_fields and variable ins
+            val = eval('i.%s' % field, {}, {'i': ins})
+            if (val>9):
+                extra = ' (0x%02x)' % val
+        except KeyError:
+            continue
+
+        if field == 'sel':  # ALU
+            if ins.sub_opcode == opcodes.SUB_OPCODE_ALU_CNT:
+                extra = ' (%s)' % alu_cnt_ops[val]
+            else:
+                extra = ' (%s)' % alu_ops[val]
+        elif field == 'type':  # JUMP
+            extra = ' (%s)' % jump_types[val]
+        elif field == 'cmp':  # JUMPR/JUMPS
+            if ins.sub_opcode == opcodes.SUB_OPCODE_BS:
+                extra = ' (%s)' % bs_cmp_ops[val]
+            else:
+                extra = ' (%s)' % cmp_ops[val]
+
+        field_details.append((field, val, extra))
+
+    return field_details
diff --git a/tools/disassemble.py b/tools/disassemble.py
index cbf8f4c..442c8f3 100644
--- a/tools/disassemble.py
+++ b/tools/disassemble.py
@@ -11,6 +11,8 @@
 def load_decoder(cpu):
     if cpu == 'esp32':
         mod = 'decode'
+    elif cpu == 'esp32s2':
+        mod = 'decode_s2'
     else:
         raise ValueError('Invalid cpu')
 
@@ -136,7 +138,7 @@ def print_help():
     print('Usage: disassemble.py [<options>] [-m <byte_sequence> | <filename>]')
     print('')
     print('Options:')
-    print('  -c                  Choose ULP variant: only esp32 supported for now')
+    print('  -c                  Choose ULP variant: either esp32 or esp32s2')
     print('  -h                  Show this help text')
     print('  -m <byte_sequence>  Sequence of hex bytes (8 per instruction)')
     print('  -v                  Verbose mode. Show ULP header and fields of each instruction')

From 0db6f6082ac7c70f61d03002e9ec1b998d3e0bc2 Mon Sep 17 00:00:00 2001
From: Wilko Nienhaus <wilko.nienhaus@gmail.com>
Date: Tue, 11 Jul 2023 19:13:02 +0300
Subject: [PATCH 07/20] Add integration tests for disassembling all JUMPR/JUMPS
 conditions supported by the S2

The ESP32-S2 changes the comparison operators that are natively
supported by the CPU for the JUMPR and JUMPS instruction.

For the JUMPS instruction all comparison operators are now natively
supported in hardware.
---
 tests/fixtures/all_opcodes-v.esp32.lst   | 76 +++++++++++++++++++-----
 tests/fixtures/all_opcodes-v.esp32s2.lst | 48 ++++++++++-----
 tests/fixtures/all_opcodes.S             |  9 ++-
 tests/fixtures/all_opcodes.esp32.lst     | 28 +++++----
 tests/fixtures/all_opcodes.esp32s2.lst   | 22 +++----
 5 files changed, 128 insertions(+), 55 deletions(-)

diff --git a/tests/fixtures/all_opcodes-v.esp32.lst b/tests/fixtures/all_opcodes-v.esp32.lst
index 4bc7975..1e2ebf7 100644
--- a/tests/fixtures/all_opcodes-v.esp32.lst
+++ b/tests/fixtures/all_opcodes-v.esp32.lst
@@ -1,8 +1,8 @@
 header
 ULP magic    : b'ulp\x00' (0x00706c75)
 .text offset : 12 (0x0c)
-.text size   : 164 (0xa4)
-.data offset : 176 (0xb0)
+.text size   : 188 (0xbc)
+.data offset : 200 (0xc8)
 .data size   : 8 (0x08)
 .bss size    : 0 (0x00)
 ----------------------------------------
@@ -251,14 +251,28 @@ ULP magic    : b'ulp\x00' (0x00706c75)
                  opcode     =   8
                  sign       =   0
                  sub_opcode =   1
-0080  05000382  JUMPR 1, 5, GE
+0080  06000382  JUMPR 1, 6, GE
                  cmp        =   1 (GE)
-                 imm        =   5
+                 imm        =   6
                  offset     =   1
                  opcode     =   8
                  sign       =   0
                  sub_opcode =   1
-0084  01000084  JUMPS 0, 1, LT
+0084  08000582  JUMPR 2, 8, GE
+                 cmp        =   1 (GE)
+                 imm        =   8
+                 offset     =   2
+                 opcode     =   8
+                 sign       =   0
+                 sub_opcode =   1
+0088  07000582  JUMPR 2, 7, GE
+                 cmp        =   1 (GE)
+                 imm        =   7
+                 offset     =   2
+                 opcode     =   8
+                 sign       =   0
+                 sub_opcode =   1
+008c  01000084  JUMPS 0, 1, LT
                  cmp        =   0 (LT)
                  imm        =   1
                  offset     =   0
@@ -266,7 +280,15 @@ ULP magic    : b'ulp\x00' (0x00706c75)
                  sign       =   0
                  sub_opcode =   2
                  unused     =   0
-0088  05800284  JUMPS 1, 5, GE
+0090  05000584  JUMPS 2, 5, LE
+                 cmp        =   2 (LE)
+                 imm        =   5
+                 offset     =   2
+                 opcode     =   8
+                 sign       =   0
+                 sub_opcode =   2
+                 unused     =   0
+0094  05800284  JUMPS 1, 5, GE
                  cmp        =   1 (GE)
                  imm        =   5
                  offset     =   1
@@ -274,33 +296,57 @@ ULP magic    : b'ulp\x00' (0x00706c75)
                  sign       =   0
                  sub_opcode =   2
                  unused     =   0
-008c  09000584  JUMPS 2, 9, LE
+0098  07000484  JUMPS 2, 7, LT
+                 cmp        =   0 (LT)
+                 imm        =   7
+                 offset     =   2
+                 opcode     =   8
+                 sign       =   0
+                 sub_opcode =   2
+                 unused     =   0
+009c  07000584  JUMPS 2, 7, LE
                  cmp        =   2 (LE)
-                 imm        =   9
+                 imm        =   7
                  offset     =   2
                  opcode     =   8
                  sign       =   0
                  sub_opcode =   2
                  unused     =   0
-0090  01000090  WAKE
+00a0  09000784  JUMPS 3, 9, LE
+                 cmp        =   2 (LE)
+                 imm        =   9
+                 offset     =   3
+                 opcode     =   8
+                 sign       =   0
+                 sub_opcode =   2
+                 unused     =   0
+00a4  0b800884  JUMPS 4, 11, GE
+                 cmp        =   1 (GE)
+                 imm        =  11 (0x0b)
+                 offset     =   4
+                 opcode     =   8
+                 sign       =   0
+                 sub_opcode =   2
+                 unused     =   0
+00a8  01000090  WAKE
                  opcode     =   9
                  sub_opcode =   0
                  unused     =   0
                  wakeup     =   1
-0094  07000092  SLEEP 7
+00ac  07000092  SLEEP 7
                  cycle_sel  =   7
                  opcode     =   9
                  sub_opcode =   1
                  unused     =   0
-0098  090000a0  TSENS r1, 2
+00b0  090000a0  TSENS r1, 2
                  delay      =   2
                  dreg       =   1
                  opcode     =  10 (0x0a)
                  unused     =   0
-009c  000000b0  HALT
+00b4  000000b0  HALT
                  opcode     =  11 (0x0b)
                  unused     =   0
-00a0  060000d0  LD r2, r1, 0
+00b8  060000d0  LD r2, r1, 0
                  dreg       =   2
                  offset     =   0
                  opcode     =  13 (0x0d)
@@ -309,5 +355,5 @@ ULP magic    : b'ulp\x00' (0x00706c75)
                  unused2    =   0
 ----------------------------------------
 .data
-00a4  00000000  <empty>
-00a8  fecadec0  <non-empty>
+00bc  00000000  <empty>
+00c0  fecadec0  <non-empty>
diff --git a/tests/fixtures/all_opcodes-v.esp32s2.lst b/tests/fixtures/all_opcodes-v.esp32s2.lst
index dc2a63f..bdc8855 100644
--- a/tests/fixtures/all_opcodes-v.esp32s2.lst
+++ b/tests/fixtures/all_opcodes-v.esp32s2.lst
@@ -1,8 +1,8 @@
 header
 ULP magic    : b'ulp\x00' (0x00706c75)
 .text offset : 12 (0x0c)
-.text size   : 168 (0xa8)
-.data offset : 180 (0xb4)
+.text size   : 176 (0xb0)
+.data offset : 188 (0xbc)
 .data size   : 8 (0x08)
 .bss size    : 0 (0x00)
 ----------------------------------------
@@ -278,10 +278,10 @@ ULP magic    : b'ulp\x00' (0x00706c75)
                  opcode     =   8
                  sign       =   0
                  sub_opcode =   0
-0084  05000680  JUMPR 1, 5, EQ
+0084  07000a80  JUMPR 2, 7, EQ
                  cmp        =   2 (EQ)
-                 imm        =   5
-                 offset     =   1
+                 imm        =   7
+                 offset     =   2
                  opcode     =   8
                  sign       =   0
                  sub_opcode =   0
@@ -293,40 +293,56 @@ ULP magic    : b'ulp\x00' (0x00706c75)
                  sign       =   0
                  sub_opcode =   2
                  unused     =   0
-008c  05801388  JUMPS 4, 5, GE
-                 cmp        =   7 (GE)
+008c  05801188  JUMPS 4, 5, GT
+                 cmp        =   3 (GT)
                  imm        =   5
                  offset     =   4
                  opcode     =   8
                  sign       =   0
                  sub_opcode =   2
                  unused     =   0
-0090  09802288  JUMPS 8, 9, LE
+0090  07002288  JUMPS 8, 7, EQ
+                 cmp        =   4 (EQ)
+                 imm        =   7
+                 offset     =   8
+                 opcode     =   8
+                 sign       =   0
+                 sub_opcode =   2
+                 unused     =   0
+0094  09803288  JUMPS 12, 9, LE
                  cmp        =   5 (LE)
                  imm        =   9
-                 offset     =   8
+                 offset     =  12 (0x0c)
+                 opcode     =   8
+                 sign       =   0
+                 sub_opcode =   2
+                 unused     =   0
+0098  0b804388  JUMPS 16, 11, GE
+                 cmp        =   7 (GE)
+                 imm        =  11 (0x0b)
+                 offset     =  16 (0x10)
                  opcode     =   8
                  sign       =   0
                  sub_opcode =   2
                  unused     =   0
-0094  01000090  WAKE
+009c  01000090  WAKE
                  opcode     =   9
                  sub_opcode =   0
                  unused     =   0
                  wakeup     =   1
-0098  07000040  WAIT 7
+00a0  07000040  WAIT 7
                  cycles     =   7
                  opcode     =   4
                  unused     =   0
-009c  090000a0  TSENS r1, 2
+00a4  090000a0  TSENS r1, 2
                  delay      =   2
                  dreg       =   1
                  opcode     =  10 (0x0a)
                  unused     =   0
-00a0  000000b0  HALT
+00a8  000000b0  HALT
                  opcode     =  11 (0x0b)
                  unused     =   0
-00a4  060000d0  LD r2, r1, 0
+00ac  060000d0  LD r2, r1, 0
                  dreg       =   2
                  offset     =   0
                  opcode     =  13 (0x0d)
@@ -335,5 +351,5 @@ ULP magic    : b'ulp\x00' (0x00706c75)
                  unused2    =   0
 ----------------------------------------
 .data
-00a8  00000000  <empty>
-00ac  fecadec0  <non-empty>
+00b0  00000000  <empty>
+00b4  fecadec0  <non-empty>
diff --git a/tests/fixtures/all_opcodes.S b/tests/fixtures/all_opcodes.S
index 7f8c916..1710bc9 100644
--- a/tests/fixtures/all_opcodes.S
+++ b/tests/fixtures/all_opcodes.S
@@ -46,11 +46,14 @@ JUMP 0, EQ
 JUMP 0, OV
 
 JUMPR 0, 1, LT
-JUMPR 4, 5, GE
+JUMPR 4, 5, GT
+JUMPR 8, 7, EQ
 
 JUMPS 0, 1, LT
-JUMPS 4, 5, GE
-JUMPS 8, 9, LE
+JUMPS 4, 5, GT
+JUMPS 8, 7, EQ
+JUMPS 12, 9, LE
+JUMPS 16, 11, GE
 
 WAKE
 SLEEP 7
diff --git a/tests/fixtures/all_opcodes.esp32.lst b/tests/fixtures/all_opcodes.esp32.lst
index 2ef1bd7..b882a3e 100644
--- a/tests/fixtures/all_opcodes.esp32.lst
+++ b/tests/fixtures/all_opcodes.esp32.lst
@@ -31,15 +31,21 @@
 0074  00004080  JUMP 0, EQ
 0078  00008080  JUMP 0, OV
 007c  01000082  JUMPR 0, 1, LT
-0080  05000382  JUMPR 1, 5, GE
-0084  01000084  JUMPS 0, 1, LT
-0088  05800284  JUMPS 1, 5, GE
-008c  09000584  JUMPS 2, 9, LE
-0090  01000090  WAKE
-0094  07000092  SLEEP 7
-0098  090000a0  TSENS r1, 2
-009c  000000b0  HALT
-00a0  060000d0  LD r2, r1, 0
+0080  06000382  JUMPR 1, 6, GE
+0084  08000582  JUMPR 2, 8, GE
+0088  07000582  JUMPR 2, 7, GE
+008c  01000084  JUMPS 0, 1, LT
+0090  05000584  JUMPS 2, 5, LE
+0094  05800284  JUMPS 1, 5, GE
+0098  07000484  JUMPS 2, 7, LT
+009c  07000584  JUMPS 2, 7, LE
+00a0  09000784  JUMPS 3, 9, LE
+00a4  0b800884  JUMPS 4, 11, GE
+00a8  01000090  WAKE
+00ac  07000092  SLEEP 7
+00b0  090000a0  TSENS r1, 2
+00b4  000000b0  HALT
+00b8  060000d0  LD r2, r1, 0
 .data
-00a4  00000000  <empty>
-00a8  fecadec0  <non-empty>
+00bc  00000000  <empty>
+00c0  fecadec0  <non-empty>
diff --git a/tests/fixtures/all_opcodes.esp32s2.lst b/tests/fixtures/all_opcodes.esp32s2.lst
index 97040c5..3140f84 100644
--- a/tests/fixtures/all_opcodes.esp32s2.lst
+++ b/tests/fixtures/all_opcodes.esp32s2.lst
@@ -32,15 +32,17 @@
 0078  00008084  JUMP 0, OV
 007c  01000080  JUMPR 0, 1, LT
 0080  05000580  JUMPR 1, 5, GT
-0084  05000680  JUMPR 1, 5, EQ
+0084  07000a80  JUMPR 2, 7, EQ
 0088  01800088  JUMPS 0, 1, LT
-008c  05801388  JUMPS 4, 5, GE
-0090  09802288  JUMPS 8, 9, LE
-0094  01000090  WAKE
-0098  07000040  WAIT 7
-009c  090000a0  TSENS r1, 2
-00a0  000000b0  HALT
-00a4  060000d0  LD r2, r1, 0
+008c  05801188  JUMPS 4, 5, GT
+0090  07002288  JUMPS 8, 7, EQ
+0094  09803288  JUMPS 12, 9, LE
+0098  0b804388  JUMPS 16, 11, GE
+009c  01000090  WAKE
+00a0  07000040  WAIT 7
+00a4  090000a0  TSENS r1, 2
+00a8  000000b0  HALT
+00ac  060000d0  LD r2, r1, 0
 .data
-00a8  00000000  <empty>
-00ac  fecadec0  <non-empty>
+00b0  00000000  <empty>
+00b4  fecadec0  <non-empty>

From 93b18e4cc8a8dd88e5d9409e2560cd4a2a40a814 Mon Sep 17 00:00:00 2001
From: Wilko Nienhaus <wilko.nienhaus@gmail.com>
Date: Tue, 11 Jul 2023 22:54:48 +0300
Subject: [PATCH 08/20] Add support for assembling new ST and LD instructions
 of the ESP32-S2/S3

For example STL and STH can be used to store to the lower or
upper 16-bits of a memory address respectively. (The original
ST instruction could only store to the lower 16-bits of a
memory address)

The following new instructions are now supported:
* LDL, LDH
* STL, STH, ST32, STI32, STI, STO
---
 esp32_ulp/opcodes_s2.py          | 86 +++++++++++++++++++++++++++++---
 tests/01_compat_tests.sh         |  4 ++
 tests/compat/loadstore.esp32s2.S | 31 ++++++++++++
 3 files changed, 115 insertions(+), 6 deletions(-)
 create mode 100644 tests/compat/loadstore.esp32s2.S

diff --git a/esp32_ulp/opcodes_s2.py b/esp32_ulp/opcodes_s2.py
index dbaeb22..22fa879 100644
--- a/esp32_ulp/opcodes_s2.py
+++ b/esp32_ulp/opcodes_s2.py
@@ -32,7 +32,17 @@
 
 OPCODE_ST = 6
 SUB_OPCODE_ST_AUTO = 1
-SUB_OPCODE_ST_OFFSET = 3
+# Note: SUB_OPCODE_ST_OFFSET should be 3
+# But in binutils-gdb they hardcoded the value to 2
+# This appears to be a bug, if one looks at the Technical
+# Reference Manual of the ESP32-S2.
+#
+# This issue is reported as a pull-request with fix:
+# https://github.com/espressif/binutils-gdb/pull/2
+#
+# We'll hard code this to 2 for now, until this is resolved in
+# binutils-gdb or the Technical Reference Manual is updated.
+SUB_OPCODE_ST_OFFSET = 2  # should be 3
 SUB_OPCODE_ST = 4
 
 OPCODE_ALU = 7
@@ -467,37 +477,101 @@ def i_adc(reg_dest, adc_idx, mux, _not_used=None):
     return _adc.all
 
 
-def i_st(reg_val, reg_addr, offset): ## FIXME do via i_st_manual
+def i_st_manual(reg_val, reg_addr, offset, label, upper, wr_way):
     _st.dreg = get_reg(reg_addr)
     _st.sreg = get_reg(reg_val)
+    _st.label = get_imm(label)
+    _st.upper = upper
+    _st.wr_way = wr_way
+    _st.unused1 = 0
+    _st.offset = get_imm(offset) // 4
+    _st.unused2 = 0
+    _st.sub_opcode = SUB_OPCODE_ST
+    _st.opcode = OPCODE_ST
+    return _st.all
+
+
+def i_stl(reg_val, reg_addr, offset, label="0"):
+    return i_st_manual(reg_val, reg_addr, offset, label, 0, 3 if label=="0" else 1)
+
+
+def i_sth(reg_val, reg_addr, offset, label="0"):
+    return i_st_manual(reg_val, reg_addr, offset, label, 1, 3 if label=="0" else 1)
+
+
+def i_st(reg_val, reg_addr, offset):
+    return i_stl(reg_val, reg_addr, offset)
+
+
+def i_st32(reg_val, reg_addr, offset, label):
+    return i_st_manual(reg_val, reg_addr, offset, label, 0, 0)
+
+
+def i_st_auto(reg_val, reg_addr, label, wr_way):
+    _st.dreg = get_reg(reg_addr)
+    _st.sreg = get_reg(reg_val)
+    _st.label = get_imm(label)
+    _st.upper = 0
+    _st.wr_way = wr_way
+    _st.unused1 = 0
+    _st.offset = 0
+    _st.unused2 = 0
+    _st.sub_opcode = SUB_OPCODE_ST_AUTO
+    _st.opcode = OPCODE_ST
+    return _st.all
+
+
+def i_sto(offset):
+    _st.dreg = 0
+    _st.sreg = 0
     _st.label = 0
     _st.upper = 0
-    _st.wr_way = 3
+    _st.wr_way = 0
     _st.unused1 = 0
     _st.offset = get_imm(offset) // 4
     _st.unused2 = 0
-    _st.sub_opcode = SUB_OPCODE_ST
+    _st.sub_opcode = SUB_OPCODE_ST_OFFSET
     _st.opcode = OPCODE_ST
     return _st.all
 
 
+def i_sti(reg_val, reg_addr, label="0"):
+    return i_st_auto(reg_val, reg_addr, label, 3 if label=="0" else 1)
+
+
+def i_sti32(reg_val, reg_addr, label):
+    return i_st_auto(reg_val, reg_addr, label, 0)
+
+
 def i_halt():
     _halt.unused = 0
     _halt.opcode = OPCODE_HALT
     return _halt.all
 
 
-def i_ld(reg_dest, reg_addr, offset): ## FIXME do via i_ld_manual
+def i_ld_manual(reg_dest, reg_addr, offset, rd_upper):
     _ld.dreg = get_reg(reg_dest)
     _ld.sreg = get_reg(reg_addr)
     _ld.unused1 = 0
     _ld.offset = get_imm(offset) // 4
     _ld.unused2 = 0
-    _ld.rd_upper = 0
+    _ld.rd_upper = rd_upper
     _ld.opcode = OPCODE_LD
     return _ld.all
 
 
+def i_ldl(reg_dest, reg_addr, offset):
+    return i_ld_manual(reg_dest, reg_addr, offset, 0)
+
+
+def i_ldh(reg_dest, reg_addr, offset):
+    return i_ld_manual(reg_dest, reg_addr, offset, 1)
+
+
+def i_ld(reg_dest, reg_addr, offset):
+    return i_ldl(reg_dest, reg_addr, offset)
+
+
 def i_move(reg_dest, reg_imm_src):
     # this is the only ALU instruction with 2 args: move r0, r1
     dest = get_reg(reg_dest)
diff --git a/tests/01_compat_tests.sh b/tests/01_compat_tests.sh
index 745e8e0..12328cc 100755
--- a/tests/01_compat_tests.sh
+++ b/tests/01_compat_tests.sh
@@ -17,6 +17,10 @@ run_tests_for_cpu() {
     for src_file in $(ls -1 compat/*.S); do
         src_name="${src_file%.S}"
 
+        # files with a cpu encoded into their name are only run for that cpu
+        if [[ $src_file =~ \.esp32\. && $cpu != esp32 ]] || [[ $src_file =~ \.esp32s2?\. && $cpu != esp32s2 ]]; then
+            continue
+        fi
         echo "Testing $src_file"
         echo -e "\tBuilding using micropython-esp32-ulp ($cpu)"
         ulp_file="${src_name}.ulp"
diff --git a/tests/compat/loadstore.esp32s2.S b/tests/compat/loadstore.esp32s2.S
new file mode 100644
index 0000000..d0c032f
--- /dev/null
+++ b/tests/compat/loadstore.esp32s2.S
@@ -0,0 +1,31 @@
+.set  offs, 0x20
+.set  lab1, 0x01
+
+.text
+LDL   R1, R2, 0x20
+LDL   R1, R2, offs
+LDH   R1, R2, 0x20
+LDH   R1, R2, offs
+
+STL   R1, R2, 0x20
+STL   R1, R2, offs
+STL   R1, R2, offs, 1
+STL   R1, R2, offs, lab1
+
+STH   R1, R2, 0x20
+STH   R1, R2, offs
+STH   R1, R2, offs, 1
+STH   R1, R2, offs, lab1
+
+ST32  R1, R2, 0x10, 1
+ST32  R1, R2, offs, lab1
+
+STI32 R1, R2, 1
+STI32 R1, R2, lab1
+
+STI   R1, R2
+STI   R1, R2, 1
+STI   R1, R2, lab1
+
+STO   0x20
+STO   offs

From 0111dc51446a9fe6a9f158f925127c9f083a6a07 Mon Sep 17 00:00:00 2001
From: Wilko Nienhaus <wilko.nienhaus@gmail.com>
Date: Wed, 12 Jul 2023 09:36:57 +0300
Subject: [PATCH 09/20] Add support for new ST and LD instructions to the
 disassembler

The following new instructions are now supported:
* LDL, LDH
* STL, STH, ST32, STI32, STI, STO

Note: The disassembler will return LD instead of LDL and
ST instead of STL, because they are each synonyms of the
other. We can only pick either and so we picked the keyword
that exists across both the ESP32 and the ESP32-S2/S3.
---
 tests/03_disassembler_tests.sh                |   4 +-
 tests/decode_s2.py                            |  17 +-
 tests/fixtures/all_opcodes-v.esp32s2.lst      | 186 +++++++++++++++++-
 .../{all_opcodes.S => all_opcodes.esp32.S}    |   0
 tests/fixtures/all_opcodes.esp32s2.S          |  91 +++++++++
 tests/fixtures/all_opcodes.esp32s2.lst        |  21 +-
 tests/fixtures/manual_bytes-v.esp32s2.lst     |   3 +
 tools/decode_s2.py                            |  31 ++-
 8 files changed, 341 insertions(+), 12 deletions(-)
 rename tests/fixtures/{all_opcodes.S => all_opcodes.esp32.S} (100%)
 create mode 100644 tests/fixtures/all_opcodes.esp32s2.S

diff --git a/tests/03_disassembler_tests.sh b/tests/03_disassembler_tests.sh
index b3740f4..2ecc7a0 100755
--- a/tests/03_disassembler_tests.sh
+++ b/tests/03_disassembler_tests.sh
@@ -13,11 +13,11 @@ test_disassembling_a_file() {
     fi
 
     testname=all_opcodes
-    fixture=fixtures/${testname}.S
+    fixture=fixtures/${testname}.${cpu}.S
     echo -e "\tBuilding $fixture using micropython-esp32-ulp ($cpu)"
 
     log_file="${testname}.log"
-    ulp_file="fixtures/${testname}.ulp"
+    ulp_file="fixtures/${testname}.${cpu}.ulp"
     micropython -m esp32_ulp -c $cpu $fixture 1>$log_file   # generates $ulp_file
 
     lst_file="${testname}.$cpu.lst"
diff --git a/tests/decode_s2.py b/tests/decode_s2.py
index ae46263..5fc2da6 100644
--- a/tests/decode_s2.py
+++ b/tests/decode_s2.py
@@ -86,8 +86,23 @@ def test_all_instructions():
     # OPCODE_ADC = 5
     assert_decode("00000050", opcodes._adc, 'ADC r0, 0, 0')
 
-    # OPCODE_ST = 6
+    # OPCODE_ST = 6, SUB_OPCODE_ST
     assert_decode("80010068", opcodes._st, 'ST r0, r0, 0')
+    assert_decode("c0010068", opcodes._st, 'STH r0, r0, 0')
+    assert_decode("90000068", opcodes._st, 'STL r0, r0, 0, 1')
+    assert_decode("d0000068", opcodes._st, 'STH r0, r0, 0, 1')
+    assert_decode("00000068", opcodes._st, 'ST32 r0, r0, 0, 0')
+    assert_decode("10000068", opcodes._st, 'ST32 r0, r0, 0, 1')
+
+    # OPCODE_ST = 6, SUB_OPCODE_ST_AUTO
+    assert_decode("80010062", opcodes._st, 'STI r0, r0')
+    assert_decode("90000062", opcodes._st, 'STI r0, r0, 1')
+    assert_decode("00000062", opcodes._st, 'STI32 r0, r0, 0')
+    assert_decode("10000062", opcodes._st, 'STI32 r0, r0, 1')
+
+    # OPCODE_ST = 6, SUB_OPCODE_ST_OFFSET
+    assert_decode("00000064", opcodes._st, 'STO 0')
+    assert_decode("00040064", opcodes._st, 'STO 1')
 
     # OPCODE_ALU = 7, SUB_OPCODE_ALU_REG
     assert_decode("00000070", opcodes._alu_reg, 'ADD r0, r0, r0')
diff --git a/tests/fixtures/all_opcodes-v.esp32s2.lst b/tests/fixtures/all_opcodes-v.esp32s2.lst
index bdc8855..50d440b 100644
--- a/tests/fixtures/all_opcodes-v.esp32s2.lst
+++ b/tests/fixtures/all_opcodes-v.esp32s2.lst
@@ -1,8 +1,8 @@
 header
 ULP magic    : b'ulp\x00' (0x00706c75)
 .text offset : 12 (0x0c)
-.text size   : 176 (0xb0)
-.data offset : 188 (0xbc)
+.text size   : 244 (0xf4)
+.data offset : 256 (0x100)
 .data size   : 8 (0x08)
 .bss size    : 0 (0x00)
 ----------------------------------------
@@ -63,6 +63,9 @@ ULP magic    : b'ulp\x00' (0x00706c75)
                  sub_opcode =   4
                  unused1    =   0
                  unused2    =   0
+                 label      =   0
+                 upper      =   0
+                 wr_way     =   3
 0020  06000070  ADD r2, r1, r0
                  dreg       =   2
                  opcode     =   7
@@ -349,7 +352,182 @@ ULP magic    : b'ulp\x00' (0x00706c75)
                  sreg       =   1
                  unused1    =   0
                  unused2    =   0
+                 rd_upper   =   0
+00b0  00000040  NOP
+                 cycles     =   0
+                 opcode     =   4
+                 unused     =   0
+00b4  092000d0  LD r1, r2, 8
+                 dreg       =   1
+                 offset     =   8
+                 opcode     =  13 (0x0d)
+                 sreg       =   2
+                 unused1    =   0
+                 unused2    =   0
+                 rd_upper   =   0
+00b8  092000d8  LDH r1, r2, 8
+                 dreg       =   1
+                 offset     =   8
+                 opcode     =  13 (0x0d)
+                 sreg       =   2
+                 unused1    =   0
+                 unused2    =   0
+                 rd_upper   =   1
+00bc  89210068  ST r1, r2, 8
+                 dreg       =   2
+                 offset     =   8
+                 opcode     =   6
+                 sreg       =   1
+                 sub_opcode =   4
+                 unused1    =   0
+                 unused2    =   0
+                 label      =   0
+                 upper      =   0
+                 wr_way     =   3
+00c0  89210068  ST r1, r2, 8
+                 dreg       =   2
+                 offset     =   8
+                 opcode     =   6
+                 sreg       =   1
+                 sub_opcode =   4
+                 unused1    =   0
+                 unused2    =   0
+                 label      =   0
+                 upper      =   0
+                 wr_way     =   3
+00c4  99200068  STL r1, r2, 8, 1
+                 dreg       =   2
+                 offset     =   8
+                 opcode     =   6
+                 sreg       =   1
+                 sub_opcode =   4
+                 unused1    =   0
+                 unused2    =   0
+                 label      =   1
+                 upper      =   0
+                 wr_way     =   1
+00c8  c9210068  STH r1, r2, 8
+                 dreg       =   2
+                 offset     =   8
+                 opcode     =   6
+                 sreg       =   1
+                 sub_opcode =   4
+                 unused1    =   0
+                 unused2    =   0
+                 label      =   0
+                 upper      =   1
+                 wr_way     =   3
+00cc  c9210068  STH r1, r2, 8
+                 dreg       =   2
+                 offset     =   8
+                 opcode     =   6
+                 sreg       =   1
+                 sub_opcode =   4
+                 unused1    =   0
+                 unused2    =   0
+                 label      =   0
+                 upper      =   1
+                 wr_way     =   3
+00d0  d9200068  STH r1, r2, 8, 1
+                 dreg       =   2
+                 offset     =   8
+                 opcode     =   6
+                 sreg       =   1
+                 sub_opcode =   4
+                 unused1    =   0
+                 unused2    =   0
+                 label      =   1
+                 upper      =   1
+                 wr_way     =   1
+00d4  09200068  ST32 r1, r2, 8, 0
+                 dreg       =   2
+                 offset     =   8
+                 opcode     =   6
+                 sreg       =   1
+                 sub_opcode =   4
+                 unused1    =   0
+                 unused2    =   0
+                 label      =   0
+                 upper      =   0
+                 wr_way     =   0
+00d8  19200068  ST32 r1, r2, 8, 1
+                 dreg       =   2
+                 offset     =   8
+                 opcode     =   6
+                 sreg       =   1
+                 sub_opcode =   4
+                 unused1    =   0
+                 unused2    =   0
+                 label      =   1
+                 upper      =   0
+                 wr_way     =   0
+00dc  89010062  STI r1, r2
+                 dreg       =   2
+                 offset     =   0
+                 opcode     =   6
+                 sreg       =   1
+                 sub_opcode =   1
+                 unused1    =   0
+                 unused2    =   0
+                 label      =   0
+                 upper      =   0
+                 wr_way     =   3
+00e0  89010062  STI r1, r2
+                 dreg       =   2
+                 offset     =   0
+                 opcode     =   6
+                 sreg       =   1
+                 sub_opcode =   1
+                 unused1    =   0
+                 unused2    =   0
+                 label      =   0
+                 upper      =   0
+                 wr_way     =   3
+00e4  99000062  STI r1, r2, 1
+                 dreg       =   2
+                 offset     =   0
+                 opcode     =   6
+                 sreg       =   1
+                 sub_opcode =   1
+                 unused1    =   0
+                 unused2    =   0
+                 label      =   1
+                 upper      =   0
+                 wr_way     =   1
+00e8  09000062  STI32 r1, r2, 0
+                 dreg       =   2
+                 offset     =   0
+                 opcode     =   6
+                 sreg       =   1
+                 sub_opcode =   1
+                 unused1    =   0
+                 unused2    =   0
+                 label      =   0
+                 upper      =   0
+                 wr_way     =   0
+00ec  19000062  STI32 r1, r2, 1
+                 dreg       =   2
+                 offset     =   0
+                 opcode     =   6
+                 sreg       =   1
+                 sub_opcode =   1
+                 unused1    =   0
+                 unused2    =   0
+                 label      =   1
+                 upper      =   0
+                 wr_way     =   0
+00f0  00200064  STO 8
+                 dreg       =   0
+                 offset     =   8
+                 opcode     =   6
+                 sreg       =   0
+                 sub_opcode =   2
+                 unused1    =   0
+                 unused2    =   0
+                 label      =   0
+                 upper      =   0
+                 wr_way     =   0
 ----------------------------------------
 .data
-00b0  00000000  <empty>
-00b4  fecadec0  <non-empty>
+00f4  00000000  <empty>
+00f8  fecadec0  <non-empty>
diff --git a/tests/fixtures/all_opcodes.S b/tests/fixtures/all_opcodes.esp32.S
similarity index 100%
rename from tests/fixtures/all_opcodes.S
rename to tests/fixtures/all_opcodes.esp32.S
diff --git a/tests/fixtures/all_opcodes.esp32s2.S b/tests/fixtures/all_opcodes.esp32s2.S
new file mode 100644
index 0000000..5e5dea0
--- /dev/null
+++ b/tests/fixtures/all_opcodes.esp32s2.S
@@ -0,0 +1,91 @@
+.data
+empty: .long 0
+magic: .long 0xc0decafe
+
+.text
+REG_WR 0x123, 1, 2, 3
+
+REG_RD 0x321, 2, 1
+
+I2C_RD 3, 2, 1, 0
+I2C_WR 0, 1, 2, 3, 4
+
+NOP
+WAIT 7
+
+ADC r3, 2, 1
+
+ST r3, r2, 1
+
+ADD r2, r1, r0
+SUB r2, r1, r0
+AND r2, r1, r0
+OR r2, r1, r0
+MOVE r2, r1
+LSH r2, r1, r0
+RSH r2, r1, r0
+
+ADD r2, r1, 0
+SUB r2, r1, 0
+AND r2, r1, 0
+OR r2, r1, 0
+MOVE r1, 0
+LSH r2, r1, 0
+RSH r2, r1, 0
+
+STAGE_RST
+STAGE_INC 7
+STAGE_DEC 3
+
+JUMP r0
+JUMP r1, EQ
+JUMP r2, OV
+
+JUMP 0
+JUMP 0, EQ
+JUMP 0, OV
+
+JUMPR 0, 1, LT
+JUMPR 4, 5, GT
+JUMPR 8, 7, EQ
+
+JUMPS 0, 1, LT
+JUMPS 4, 5, GT
+JUMPS 8, 7, EQ
+JUMPS 12, 9, LE
+JUMPS 16, 11, GE
+
+WAKE
+SLEEP 7
+
+TSENS r1, 2
+
+HALT
+
+LD r2, r1, 0
+
+# ESP32-S2 specific instructions
+NOP  # marker
+
+LDL R1, R2, 0x20
+LDH R1, R2, 0x20
+
+STL R1, R2, 0x20
+STL R1, R2, 0x20, 0
+STL R1, R2, 0x20, 1
+
+STH R1, R2, 0x20
+STH R1, R2, 0x20, 0
+STH R1, R2, 0x20, 1
+
+ST32 R1, R2, 0x20, 0
+ST32 R1, R2, 0x20, 1
+
+STI R1, R2
+STI R1, R2, 0
+STI R1, R2, 1
+
+STI32 R1, R2, 0
+STI32 R1, R2, 1
+
+STO   0x20
diff --git a/tests/fixtures/all_opcodes.esp32s2.lst b/tests/fixtures/all_opcodes.esp32s2.lst
index 3140f84..213390e 100644
--- a/tests/fixtures/all_opcodes.esp32s2.lst
+++ b/tests/fixtures/all_opcodes.esp32s2.lst
@@ -43,6 +43,23 @@
 00a4  090000a0  TSENS r1, 2
 00a8  000000b0  HALT
 00ac  060000d0  LD r2, r1, 0
+00b0  00000040  NOP
+00b4  092000d0  LD r1, r2, 8
+00b8  092000d8  LDH r1, r2, 8
+00bc  89210068  ST r1, r2, 8
+00c0  89210068  ST r1, r2, 8
+00c4  99200068  STL r1, r2, 8, 1
+00c8  c9210068  STH r1, r2, 8
+00cc  c9210068  STH r1, r2, 8
+00d0  d9200068  STH r1, r2, 8, 1
+00d4  09200068  ST32 r1, r2, 8, 0
+00d8  19200068  ST32 r1, r2, 8, 1
+00dc  89010062  STI r1, r2
+00e0  89010062  STI r1, r2
+00e4  99000062  STI r1, r2, 1
+00e8  09000062  STI32 r1, r2, 0
+00ec  19000062  STI32 r1, r2, 1
+00f0  00200064  STO 8
 .data
-00b0  00000000  <empty>
-00b4  fecadec0  <non-empty>
+00f4  00000000  <empty>
+00f8  fecadec0  <non-empty>
diff --git a/tests/fixtures/manual_bytes-v.esp32s2.lst b/tests/fixtures/manual_bytes-v.esp32s2.lst
index bcd452e..7d91dda 100644
--- a/tests/fixtures/manual_bytes-v.esp32s2.lst
+++ b/tests/fixtures/manual_bytes-v.esp32s2.lst
@@ -15,6 +15,9 @@
                  sub_opcode =   4
                  unused1    =   0
                  unused2    =   0
+                 label      =   0
+                 upper      =   0
+                 wr_way     =   3
 0008  2705cc19  REG_WR 0x127, 19, 19, 1
                  addr       =  39 (0x27)
                  data       =   1
diff --git a/tools/decode_s2.py b/tools/decode_s2.py
index f40db5d..2b6d440 100644
--- a/tools/decode_s2.py
+++ b/tools/decode_s2.py
@@ -63,8 +63,32 @@
         opcodes._i2c,
         lambda op: 'I2C_%s %s, %s, %s, %s' % ('RD' if op.rw == 0 else 'WR', op.sub_addr, op.high, op.low, op.i2c_sel)
     ),
-    opcodes.OPCODE_LD: ('LD', opcodes._ld, lambda op: 'LD r%s, r%s, %s' % (op.dreg, op.sreg, op.offset)),
-    opcodes.OPCODE_ST: ('ST', opcodes._st, lambda op: 'ST r%s, r%s, %s' % (op.sreg, op.dreg, op.offset)),
+    opcodes.OPCODE_LD: (
+        'LD/LDH',
+        opcodes._ld,
+        lambda op: '%s r%s, r%s, %s' % ('LDH' if op.rd_upper else 'LD', op.dreg, op.sreg, op.offset)
+    ),
+    opcodes.OPCODE_ST: ('ST', opcodes._st, {
+        opcodes.SUB_OPCODE_ST_AUTO: (
+            'STI/STI32',
+            opcodes._st,
+            lambda op: 'STI32 r%s, r%s, %s' % (op.sreg, op.dreg, op.label) if op.wr_way == 0
+                else 'STI r%s, r%s, %s' % (op.sreg, op.dreg, op.label) if op.label
+                else 'STI r%s, r%s' % (op.sreg, op.dreg)
+        ),
+        opcodes.SUB_OPCODE_ST_OFFSET: (
+            'STO',
+            opcodes._st,
+            lambda op: 'STO %s' % op.offset
+        ),
+        opcodes.SUB_OPCODE_ST: (
+            'ST/STH/ST32',
+            opcodes._st,
+            lambda op: '%s r%s, r%s, %s, %s' % ('STH' if op.upper else 'STL', op.sreg, op.dreg, op.offset, op.label) if op.wr_way and op.label
+                else '%s r%s, r%s, %s' % ('STH' if op.upper else 'ST', op.sreg, op.dreg, op.offset) if op.wr_way
+                else 'ST32 r%s, r%s, %s, %s' % (op.sreg, op.dreg, op.offset, op.label)
+        )
+    }),
     opcodes.OPCODE_RD_REG: (
         'RD_REG',
         opcodes._rd_reg,
@@ -117,7 +141,8 @@ def get_instruction_fields(ins):
         'high', 'i2c_sel', 'imm', 'low', 'mux', 'offset', 'opcode',
         'periph_sel', 'reg', 'rw', 'sar_sel', 'sel', 'sign', 'sreg',
         'sub_addr', 'sub_opcode', 'treg', 'type', 'unused', 'unused1',
-        'unused2', 'wakeup'
+        'unused2', 'wakeup',
+        'rd_upper', 'label', 'upper', 'wr_way',
     )
     field_details = []
     for field in possible_fields:

From 78275c2084ee4e09eda2e50c04f5d7b7c73a5a91 Mon Sep 17 00:00:00 2001
From: Wilko Nienhaus <wilko.nienhaus@gmail.com>
Date: Tue, 11 Jul 2023 20:44:41 +0300
Subject: [PATCH 10/20] Update documentation to reflect the new ESP32-S2/S3
 support

---
 README.rst     | 16 ++++++++++++----
 docs/index.rst |  7 ++++---
 2 files changed, 16 insertions(+), 7 deletions(-)

diff --git a/README.rst b/README.rst
index e29d841..ea8c2ca 100644
--- a/README.rst
+++ b/README.rst
@@ -15,7 +15,7 @@ micropython-esp32-ulp is an assembler toolchain for the ESP32 ULP (Ultra Low-Pow
 Co-Processor, written in MicroPython.
 
 It can translate small assembly language programs to a loadable/executable
-ULP machine code binary, directly on the ESP32 microcontroller.
+ULP-FSM (not RISC-V) machine code binary, directly on a ESP32 microcontroller.
 
 This is intended as an alternative approach to assembling such programs using
 the `binutils-gdb toolchain <https://github.com/espressif/binutils-gdb/tree/esp32ulp-elf-2.35>`_
@@ -30,6 +30,8 @@ Features
 The following features are supported:
 
 * the entire `ESP32 ULP instruction set <https://docs.espressif.com/projects/esp-idf/en/latest/esp32/api-reference/system/ulp_instruction_set.html>`_
+* the entire `ESP32-S2 ULP instruction set <https://docs.espressif.com/projects/esp-idf/en/latest/esp32s2/api-reference/system/ulp_instruction_set.html>`_
+  (this also covers the ESP32-S3) [#f1]_
 * constants defined with ``.set``
 * constants defined with ``#define``
 * expressions in assembly code and constant definitions
@@ -37,6 +39,10 @@ The following features are supported:
 * many ESP32 ULP code examples found on the web will work unmodified
 * a simple disassembler is also provided
 
+.. [#f1] Note: the ESP32-S2 and ESP32-S3 have the same ULP binary format between each other
+         but the binary format is different than that of the original ESP32 ULP. You need to
+         select the ``esp32s2`` cpu (`see docs </docs/disassembler.rst>`_) when assembling code
+         for use on an ESP32-S2/S3.
 
 Quick start
 -----------
@@ -66,10 +72,12 @@ See `docs/index.rst </docs/index.rst>`_.
 Requirements
 ------------
 
-The minimum supported version of MicroPython is v1.12.
+The minimum supported version of MicroPython is v1.12. (For ESP32-S2 and S3
+devices, a version greater than v1.20 is required as versions before that
+did not enable the ``esp32.ULP`` class).
 
-An ESP32 is required to run the ULP machine code binary produced by micropython-esp32-ulp
-(the ESP32-S2 will not work as it is not binary compatible with the ESP32).
+An ESP32 device is required to run the ULP machine code binary produced by
+micropython-esp32-ulp.
 
 
 License
diff --git a/docs/index.rst b/docs/index.rst
index 16d18dc..e9ae38d 100644
--- a/docs/index.rst
+++ b/docs/index.rst
@@ -93,7 +93,8 @@ assembly source file into a machine code binary file with a ``.ulp`` extension.
 That file can then be loaded directly without assembling the source again.
 
 1. Create/upload an assembly source file and run the following to get a
-   loadable ULP binary as a ``.ulp`` file:
+   loadable ULP binary as a ``.ulp`` file (specify ``cpu='esp32s2'`` if you
+   have an ESP32-S2 or ESP32-S3 device):
 
    .. code-block:: python
 
@@ -160,7 +161,6 @@ Currently the following are not supported:
 * assembler macros using ``.macro``
 * preprocessor macros using ``#define A(x,y) ...``
 * including files using ``#include``
-* ESP32-S2 (not binary compatible with the ESP32)
 
 
 Testing
@@ -171,7 +171,8 @@ output is identical with what Espressif's esp32-elf-as (from their `binutils-gdb
 <https://github.com/espressif/binutils-gdb/tree/esp32ulp-elf-2.35>`_) produces.
 
 micropython-esp32-ulp has been tested on the Unix port of MicroPython and on real ESP32
-devices with the chip type ESP32D0WDQ6 (revision 1) without SPIRAM.
+devices with the chip type ESP32D0WDQ6 (revision 1) without SPIRAM as well as ESP32-S2
+(ESP32-S2FH4) and ESP32-S3 (ESP32-S3R8) devices.
 
 Consult the Github Actions `workflow definition file </.github/workflows/run_tests.yaml>`_
 for how to run the different tests.

From 7c04c45d14e4df1140f10e2a53a80c013d6fc819 Mon Sep 17 00:00:00 2001
From: Wilko Nienhaus <wilko.nienhaus@gmail.com>
Date: Thu, 3 Aug 2023 09:52:07 +0300
Subject: [PATCH 11/20] fix link in documentation

---
 README.rst | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/README.rst b/README.rst
index ea8c2ca..c9b8dc3 100644
--- a/README.rst
+++ b/README.rst
@@ -41,8 +41,8 @@ The following features are supported:
 
 .. [#f1] Note: the ESP32-S2 and ESP32-S3 have the same ULP binary format between each other
          but the binary format is different than that of the original ESP32 ULP. You need to
-         select the ``esp32s2`` cpu (`see docs </docs/disassembler.rst>`_) when assembling code
-         for use on an ESP32-S2/S3.
+         select the ``esp32s2`` cpu (`see docs </docs/index.rst>`_) when assembling code for
+         use on an ESP32-S2/S3.
 
 Quick start
 -----------

From 2aff8a103c821e525b00bd3ca5298ee495441095 Mon Sep 17 00:00:00 2001
From: Wilko Nienhaus <wilko.nienhaus@gmail.com>
Date: Mon, 31 Jul 2023 21:32:17 +0300
Subject: [PATCH 12/20] Fix esp32s2 ST instructions with label field

Specifying 0 as the label is different than not specifying a
label at all. This commit corrects the behaviour when label 0
is used.

Also run the all_opcodes fixture as integration test to ensure
the same result as binutils-gdb/esp32ulp-as (which is how this
bug was found).
---
 esp32_ulp/opcodes_s2.py                  | 12 ++++++------
 tests/01_compat_tests.sh                 |  2 +-
 tests/fixtures/all_opcodes-v.esp32s2.lst | 12 ++++++------
 tests/fixtures/all_opcodes.esp32s2.lst   |  6 +++---
 4 files changed, 16 insertions(+), 16 deletions(-)

diff --git a/esp32_ulp/opcodes_s2.py b/esp32_ulp/opcodes_s2.py
index 22fa879..f2c1bd8 100644
--- a/esp32_ulp/opcodes_s2.py
+++ b/esp32_ulp/opcodes_s2.py
@@ -491,12 +491,12 @@ def i_st_manual(reg_val, reg_addr, offset, label, upper, wr_way):
     return _st.all
 
 
-def i_stl(reg_val, reg_addr, offset, label="0"):
-    return i_st_manual(reg_val, reg_addr, offset, label, 0, 3 if label=="0" else 1)
+def i_stl(reg_val, reg_addr, offset, label=None):
+    return i_st_manual(reg_val, reg_addr, offset, label if label else "0", 0, 1 if label else 3)
 
 
-def i_sth(reg_val, reg_addr, offset, label="0"):
-    return i_st_manual(reg_val, reg_addr, offset, label, 1, 3 if label=="0" else 1)
+def i_sth(reg_val, reg_addr, offset, label=None):
+    return i_st_manual(reg_val, reg_addr, offset, label if label else "0", 1, 1 if label else 3)
 
 
 def i_st(reg_val, reg_addr, offset):
@@ -535,8 +535,8 @@ def i_sto(offset):
     return _st.all
 
 
-def i_sti(reg_val, reg_addr, label="0"):
-    return i_st_auto(reg_val, reg_addr, label, 3 if label=="0" else 1)
+def i_sti(reg_val, reg_addr, label=None):
+    return i_st_auto(reg_val, reg_addr, label if label else "0", 1 if label else 3)
 
 
 def i_sti32(reg_val, reg_addr, label):
diff --git a/tests/01_compat_tests.sh b/tests/01_compat_tests.sh
index 12328cc..ebbd5f6 100755
--- a/tests/01_compat_tests.sh
+++ b/tests/01_compat_tests.sh
@@ -14,7 +14,7 @@ run_tests_for_cpu() {
     local cpu=$1
     echo "Testing for CPU: $cpu"
 
-    for src_file in $(ls -1 compat/*.S); do
+    for src_file in $(ls -1 compat/*.S fixtures/*.S); do
         src_name="${src_file%.S}"
 
         # files with a cpu encoded into their name are only run for that cpu
diff --git a/tests/fixtures/all_opcodes-v.esp32s2.lst b/tests/fixtures/all_opcodes-v.esp32s2.lst
index 50d440b..43126de 100644
--- a/tests/fixtures/all_opcodes-v.esp32s2.lst
+++ b/tests/fixtures/all_opcodes-v.esp32s2.lst
@@ -384,7 +384,7 @@ ULP magic    : b'ulp\x00' (0x00706c75)
                  label      =   0
                  upper      =   0
                  wr_way     =   3
-00c0  89210068  ST r1, r2, 8
+00c0  89200068  ST r1, r2, 8
                  dreg       =   2
                  offset     =   8
                  opcode     =   6
@@ -394,7 +394,7 @@ ULP magic    : b'ulp\x00' (0x00706c75)
                  unused2    =   0
                  label      =   0
                  upper      =   0
-                 wr_way     =   3
+                 wr_way     =   1
 00c4  99200068  STL r1, r2, 8, 1
                  dreg       =   2
                  offset     =   8
@@ -417,7 +417,7 @@ ULP magic    : b'ulp\x00' (0x00706c75)
                  label      =   0
                  upper      =   1
                  wr_way     =   3
-00cc  c9210068  STH r1, r2, 8
+00cc  c9200068  STH r1, r2, 8
                  dreg       =   2
                  offset     =   8
                  opcode     =   6
@@ -427,7 +427,7 @@ ULP magic    : b'ulp\x00' (0x00706c75)
                  unused2    =   0
                  label      =   0
                  upper      =   1
-                 wr_way     =   3
+                 wr_way     =   1
 00d0  d9200068  STH r1, r2, 8, 1
                  dreg       =   2
                  offset     =   8
@@ -472,7 +472,7 @@ ULP magic    : b'ulp\x00' (0x00706c75)
                  label      =   0
                  upper      =   0
                  wr_way     =   3
-00e0  89010062  STI r1, r2
+00e0  89000062  STI r1, r2
                  dreg       =   2
                  offset     =   0
                  opcode     =   6
@@ -482,7 +482,7 @@ ULP magic    : b'ulp\x00' (0x00706c75)
                  unused2    =   0
                  label      =   0
                  upper      =   0
-                 wr_way     =   3
+                 wr_way     =   1
 00e4  99000062  STI r1, r2, 1
                  dreg       =   2
                  offset     =   0
diff --git a/tests/fixtures/all_opcodes.esp32s2.lst b/tests/fixtures/all_opcodes.esp32s2.lst
index 213390e..d3d6d10 100644
--- a/tests/fixtures/all_opcodes.esp32s2.lst
+++ b/tests/fixtures/all_opcodes.esp32s2.lst
@@ -47,15 +47,15 @@
 00b4  092000d0  LD r1, r2, 8
 00b8  092000d8  LDH r1, r2, 8
 00bc  89210068  ST r1, r2, 8
-00c0  89210068  ST r1, r2, 8
+00c0  89200068  ST r1, r2, 8
 00c4  99200068  STL r1, r2, 8, 1
 00c8  c9210068  STH r1, r2, 8
-00cc  c9210068  STH r1, r2, 8
+00cc  c9200068  STH r1, r2, 8
 00d0  d9200068  STH r1, r2, 8, 1
 00d4  09200068  ST32 r1, r2, 8, 0
 00d8  19200068  ST32 r1, r2, 8, 1
 00dc  89010062  STI r1, r2
-00e0  89010062  STI r1, r2
+00e0  89000062  STI r1, r2
 00e4  99000062  STI r1, r2, 1
 00e8  09000062  STI32 r1, r2, 0
 00ec  19000062  STI32 r1, r2, 1

From 863af1c7cae7621f73bc828932eb824a83491792 Mon Sep 17 00:00:00 2001
From: Wilko Nienhaus <wilko.nienhaus@gmail.com>
Date: Tue, 25 Jul 2023 09:16:57 +0300
Subject: [PATCH 13/20] Housekeeping: Update SOC contants for ESP32

Updated as per ESP-IDF v5.0.2. Also added reference URL to those
constants in the ESP-IDF.
---
 esp32_ulp/soc.py | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/esp32_ulp/soc.py b/esp32_ulp/soc.py
index c6072e6..1a8845c 100644
--- a/esp32_ulp/soc.py
+++ b/esp32_ulp/soc.py
@@ -2,6 +2,9 @@
 Address / Register definitions for the ESP32 SoC
 """
 
+# Reference:
+# https://github.com/espressif/esp-idf/blob/v5.0.2/components/soc/esp32/include/soc/reg_base.h
+
 DR_REG_DPORT_BASE                       = 0x3ff00000
 DR_REG_AES_BASE                         = 0x3ff01000
 DR_REG_RSA_BASE                         = 0x3ff02000
@@ -38,7 +41,7 @@
 DR_REG_SPI_ENCRYPT_BASE                 = 0x3ff5B000
 DR_REG_NRX_BASE                         = 0x3ff5CC00
 DR_REG_BB_BASE                          = 0x3ff5D000
-DR_REG_PWM_BASE                         = 0x3ff5E000
+DR_REG_PWM0_BASE                        = 0x3ff5E000
 DR_REG_TIMERGROUP0_BASE                 = 0x3ff5F000
 DR_REG_TIMERGROUP1_BASE                 = 0x3ff60000
 DR_REG_RTCMEM0_BASE                     = 0x3ff61000
@@ -47,13 +50,12 @@
 DR_REG_SPI2_BASE                        = 0x3ff64000
 DR_REG_SPI3_BASE                        = 0x3ff65000
 DR_REG_SYSCON_BASE                      = 0x3ff66000
-DR_REG_APB_CTRL_BASE                    = 0x3ff66000
+DR_REG_APB_CTRL_BASE                    = 0x3ff66000  # Old name for SYSCON, to be removed
 DR_REG_I2C1_EXT_BASE                    = 0x3ff67000
 DR_REG_SDMMC_BASE                       = 0x3ff68000
 DR_REG_EMAC_BASE                        = 0x3ff69000
+DR_REG_CAN_BASE                         = 0x3ff6B000
 DR_REG_PWM1_BASE                        = 0x3ff6C000
 DR_REG_I2S1_BASE                        = 0x3ff6D000
 DR_REG_UART2_BASE                       = 0x3ff6E000
-DR_REG_PWM2_BASE                        = 0x3ff6F000
-DR_REG_PWM3_BASE                        = 0x3ff70000
-
+PERIPHS_SPI_ENCRYPT_BASEADDR            = DR_REG_SPI_ENCRYPT_BASE

From d2cd7923cfebd1a8c7a0e3f04df530f08f7f186f Mon Sep 17 00:00:00 2001
From: Wilko Nienhaus <wilko.nienhaus@gmail.com>
Date: Mon, 31 Jul 2023 19:11:29 +0300
Subject: [PATCH 14/20] Support peripheral register addresses of ESP32-S2/S3

Peripheral registers of the ESP32-S2 and S3 are at a different
location than those of the original ESP32. The location within
the address space is mostly the same, however we need to use
the correct base address when calculating periph_sel (type of
register) used in REG_RD and REG_WR instructions.

Note 1: To avoid creating an entirely new CPU (esp32s3) just for
handling the different peripheral register addresses of the S3,
while their binary format (instruction set) is identical, our
esp32s2 CPU support will now accept both ESP32-S2 and ESP32-S3
addresses. This should make a binary for the one seamlessly
work on the other (without reassembly), given that the offsets
of different peripheral registers between the S2 and S3 are
mostly (but not entirely) identical.

Note 2: Our esp32s2 cpu support will also accept peripheral
register addresses of the original ESP32. This was originally
done because Espressif's binutils-gdb/esp32-ulp-as incorrectly
validates addresses for the esp32s2 cpu, and to make our compat
tests pass, this was needed. However this also has a nice side-
effect of allowing some assembly written for the original ESP32
to work unmodified when assembled for an S2/S3, because some of
the peripheral registers live at the same offset from the base
for all three variants.
---
 esp32_ulp/opcodes_s2.py | 38 ++++++++++++++++++++----
 esp32_ulp/soc_s2.py     | 64 +++++++++++++++++++++++++++++++++++++++
 esp32_ulp/soc_s3.py     | 66 +++++++++++++++++++++++++++++++++++++++++
 tests/opcodes_s2.py     | 44 +++++++++++++++++++++------
 4 files changed, 197 insertions(+), 15 deletions(-)
 create mode 100644 esp32_ulp/soc_s2.py
 create mode 100644 esp32_ulp/soc_s3.py

diff --git a/esp32_ulp/opcodes_s2.py b/esp32_ulp/opcodes_s2.py
index f2c1bd8..88ce75d 100644
--- a/esp32_ulp/opcodes_s2.py
+++ b/esp32_ulp/opcodes_s2.py
@@ -5,7 +5,6 @@
 from ucollections import namedtuple
 from uctypes import struct, addressof, LITTLE_ENDIAN, UINT32, BFUINT32, BF_POS, BF_LEN
 
-from .soc import *
 from .util import split_tokens, validate_expression
 
 # XXX dirty hack: use a global for the symbol table
@@ -374,16 +373,43 @@ def get_cond(arg):
 
 
 def _soc_reg_to_ulp_periph_sel(reg):
+    # Accept peripheral register addresses of either the S2 or S3
+    # Since the address in the reg_rd or reg_wr instruction is an
+    # offset and not the actual address, and since the range of
+    # peripheral register addresses is the same for both the S2
+    # and S3, we will accept addresses in either address range.
+    # This allows us to avoid intruducing an additional cpu type
+    # for the S3, which is otherwise identical (binary format) to
+    # the S2.
+    if 0x3f408000 <= reg <= 0x3f40afff:  # ESP32-S2 address range
+        socmod = 'soc_s2'
+    elif 0x60008000 <= reg <= 0x6000afff:  # ESP32-S3 address range
+        socmod = 'soc_s3'
+    # Accept original ESP32 range too
+    # because binutils-gdb, when using cpu esp32s2 is broken
+    # and does not accept the address ranges of the esp32s2.
+    # As a nice side-effect some assembly written for an ESP32
+    # would work as-is when re-assembled for an ESP32-S2,
+    # because many (not all!) peripheral registers live at the
+    # same offset on all 3 ESP32s.
+    elif 0x3ff48000 <= reg <= 0x3ff4afff:  # original ESP32 address range
+        socmod = 'soc'
+    else:
+        raise ValueError("invalid register base")
+
+    relative_import = 1 if '/' in __file__ else 0
+    soc = __import__(socmod, None, None, [], relative_import)
+
     # Map SoC peripheral register to periph_sel field of RD_REG and WR_REG instructions.
-    if reg < DR_REG_RTCCNTL_BASE:
+    if reg < soc.DR_REG_RTCCNTL_BASE:
         raise ValueError("invalid register base")
-    elif reg < DR_REG_RTCIO_BASE:
+    elif reg < soc.DR_REG_RTCIO_BASE:
         ret = RD_REG_PERIPH_RTC_CNTL
-    elif reg < DR_REG_SENS_BASE:
+    elif reg < soc.DR_REG_SENS_BASE:
         ret = RD_REG_PERIPH_RTC_IO
-    elif reg < DR_REG_RTC_I2C_BASE:
+    elif reg < soc.DR_REG_RTC_I2C_BASE:
         ret = RD_REG_PERIPH_SENS
-    elif reg < DR_REG_IO_MUX_BASE:
+    elif reg < soc.DR_REG_IO_MUX_BASE:
         ret = RD_REG_PERIPH_RTC_I2C
     else:
         raise ValueError("invalid register base")
diff --git a/esp32_ulp/soc_s2.py b/esp32_ulp/soc_s2.py
new file mode 100644
index 0000000..1e35295
--- /dev/null
+++ b/esp32_ulp/soc_s2.py
@@ -0,0 +1,64 @@
+"""
+Address / Register definitions for the ESP32-S2 SoC
+"""
+
+# Reference:
+# https://github.com/espressif/esp-idf/blob/v5.0.2/components/soc/esp32s2/include/soc/reg_base.h
+
+DR_REG_SYSTEM_BASE                      = 0x3f4c0000
+DR_REG_SENSITIVE_BASE                   = 0x3f4c1000
+DR_REG_INTERRUPT_BASE                   = 0x3f4c2000
+DR_REG_DMA_COPY_BASE                    = 0x3f4c3000
+DR_REG_EXTMEM_BASE                      = 0x61800000
+DR_REG_MMU_TABLE                        = 0x61801000
+DR_REG_ITAG_TABLE                       = 0x61802000
+DR_REG_DTAG_TABLE                       = 0x61803000
+DR_REG_AES_BASE                         = 0x6003a000
+DR_REG_SHA_BASE                         = 0x6003b000
+DR_REG_RSA_BASE                         = 0x6003c000
+DR_REG_HMAC_BASE                        = 0x6003e000
+DR_REG_DIGITAL_SIGNATURE_BASE           = 0x6003d000
+DR_REG_CRYPTO_DMA_BASE                  = 0x6003f000
+DR_REG_ASSIST_DEBUG_BASE                = 0x3f4ce000
+DR_REG_DEDICATED_GPIO_BASE              = 0x3f4cf000
+DR_REG_INTRUSION_BASE                   = 0x3f4d0000
+DR_REG_UART_BASE                        = 0x3f400000
+DR_REG_SPI1_BASE                        = 0x3f402000
+DR_REG_SPI0_BASE                        = 0x3f403000
+DR_REG_GPIO_BASE                        = 0x3f404000
+DR_REG_GPIO_SD_BASE                     = 0x3f404f00
+DR_REG_FE2_BASE                         = 0x3f405000
+DR_REG_FE_BASE                          = 0x3f406000
+DR_REG_FRC_TIMER_BASE                   = 0x3f407000
+DR_REG_RTCCNTL_BASE                     = 0x3f408000
+DR_REG_RTCIO_BASE                       = 0x3f408400
+DR_REG_SENS_BASE                        = 0x3f408800
+DR_REG_RTC_I2C_BASE                     = 0x3f408C00
+DR_REG_IO_MUX_BASE                      = 0x3f409000
+DR_REG_HINF_BASE                        = 0x3f40B000
+DR_REG_I2S_BASE                         = 0x3f40F000
+DR_REG_UART1_BASE                       = 0x3f410000
+DR_REG_I2C_EXT_BASE                     = 0x3f413000
+DR_REG_UHCI0_BASE                       = 0x3f414000
+DR_REG_SLCHOST_BASE                     = 0x3f415000
+DR_REG_RMT_BASE                         = 0x3f416000
+DR_REG_PCNT_BASE                        = 0x3f417000
+DR_REG_SLC_BASE                         = 0x3f418000
+DR_REG_LEDC_BASE                        = 0x3f419000
+DR_REG_CP_BASE                          = 0x3f4c3000
+DR_REG_EFUSE_BASE                       = 0x3f41A000
+DR_REG_NRX_BASE                         = 0x3f41CC00
+DR_REG_BB_BASE                          = 0x3f41D000
+DR_REG_TIMERGROUP0_BASE                 = 0x3f41F000
+DR_REG_TIMERGROUP1_BASE                 = 0x3f420000
+DR_REG_RTC_SLOWMEM_BASE                 = 0x3f421000
+DR_REG_SYSTIMER_BASE                    = 0x3f423000
+DR_REG_SPI2_BASE                        = 0x3f424000
+DR_REG_SPI3_BASE                        = 0x3f425000
+DR_REG_SYSCON_BASE                      = 0x3f426000
+DR_REG_APB_CTRL_BASE                    = 0x3f426000  # Old name for SYSCON, to be removed
+DR_REG_I2C1_EXT_BASE                    = 0x3f427000
+DR_REG_SPI4_BASE                        = 0x3f437000
+DR_REG_USB_WRAP_BASE                    = 0x3f439000
+DR_REG_APB_SARADC_BASE                  = 0x3f440000
+DR_REG_USB_BASE                         = 0x60080000
diff --git a/esp32_ulp/soc_s3.py b/esp32_ulp/soc_s3.py
new file mode 100644
index 0000000..821b02c
--- /dev/null
+++ b/esp32_ulp/soc_s3.py
@@ -0,0 +1,66 @@
+"""
+Address / Register definitions for the ESP32-S3 SoC
+"""
+
+# Reference:
+# https://github.com/espressif/esp-idf/blob/v5.0.2/components/soc/esp32s3/include/soc/reg_base.h
+
+DR_REG_UART_BASE                        = 0x60000000
+DR_REG_SPI1_BASE                        = 0x60002000
+DR_REG_SPI0_BASE                        = 0x60003000
+DR_REG_GPIO_BASE                        = 0x60004000
+DR_REG_GPIO_SD_BASE                     = 0x60004f00
+DR_REG_FE2_BASE                         = 0x60005000
+DR_REG_FE_BASE                          = 0x60006000
+DR_REG_EFUSE_BASE                       = 0x60007000
+DR_REG_RTCCNTL_BASE                     = 0x60008000
+DR_REG_RTCIO_BASE                       = 0x60008400
+DR_REG_SENS_BASE                        = 0x60008800
+DR_REG_RTC_I2C_BASE                     = 0x60008C00
+DR_REG_IO_MUX_BASE                      = 0x60009000
+DR_REG_HINF_BASE                        = 0x6000B000
+DR_REG_UHCI1_BASE                       = 0x6000C000
+DR_REG_I2S_BASE                         = 0x6000F000
+DR_REG_UART1_BASE                       = 0x60010000
+DR_REG_BT_BASE                          = 0x60011000
+DR_REG_I2C_EXT_BASE                     = 0x60013000
+DR_REG_UHCI0_BASE                       = 0x60014000
+DR_REG_SLCHOST_BASE                     = 0x60015000
+DR_REG_RMT_BASE                         = 0x60016000
+DR_REG_PCNT_BASE                        = 0x60017000
+DR_REG_SLC_BASE                         = 0x60018000
+DR_REG_LEDC_BASE                        = 0x60019000
+DR_REG_NRX_BASE                         = 0x6001CC00
+DR_REG_BB_BASE                          = 0x6001D000
+DR_REG_PWM0_BASE                        = 0x6001E000
+DR_REG_TIMERGROUP0_BASE                 = 0x6001F000
+DR_REG_TIMERGROUP1_BASE                 = 0x60020000
+DR_REG_RTC_SLOWMEM_BASE                 = 0x60021000
+DR_REG_SYSTIMER_BASE                    = 0x60023000
+DR_REG_SPI2_BASE                        = 0x60024000
+DR_REG_SPI3_BASE                        = 0x60025000
+DR_REG_SYSCON_BASE                      = 0x60026000
+DR_REG_APB_CTRL_BASE                    = 0x60026000  # Old name for SYSCON, to be removed
+DR_REG_I2C1_EXT_BASE                    = 0x60027000
+DR_REG_SDMMC_BASE                       = 0x60028000
+DR_REG_PERI_BACKUP_BASE                 = 0x6002A000
+DR_REG_TWAI_BASE                        = 0x6002B000
+DR_REG_PWM1_BASE                        = 0x6002C000
+DR_REG_I2S1_BASE                        = 0x6002D000
+DR_REG_UART2_BASE                       = 0x6002E000
+DR_REG_USB_SERIAL_JTAG_BASE             = 0x60038000
+DR_REG_USB_WRAP_BASE                    = 0x60039000
+DR_REG_AES_BASE                         = 0x6003A000
+DR_REG_SHA_BASE                         = 0x6003B000
+DR_REG_RSA_BASE                         = 0x6003C000
+DR_REG_HMAC_BASE                        = 0x6003E000
+DR_REG_DIGITAL_SIGNATURE_BASE           = 0x6003D000
+DR_REG_GDMA_BASE                        = 0x6003F000
+DR_REG_APB_SARADC_BASE                  = 0x60040000
+DR_REG_LCD_CAM_BASE                     = 0x60041000
+DR_REG_SYSTEM_BASE                      = 0x600C0000
+DR_REG_SENSITIVE_BASE                   = 0x600C1000
+DR_REG_INTERRUPT_BASE                   = 0x600C2000
+DR_REG_EXTMEM_BASE                      = 0x600C4000
+DR_REG_ASSIST_DEBUG_BASE                = 0x600CE000
+DR_REG_WCL_BASE                         = 0x600D0000
diff --git a/tests/opcodes_s2.py b/tests/opcodes_s2.py
index 6e64e50..de6249d 100644
--- a/tests/opcodes_s2.py
+++ b/tests/opcodes_s2.py
@@ -149,9 +149,9 @@ def test_reg_direct_ulp_addressing():
     assert_raises(ValueError, opcodes.i_reg_rd, "0x400", "0", "0")
 
 
-def test_reg_address_translations():
+def test_reg_address_translations_s2():
     """
-    Test addressing of peripheral registers using full DPORT bus addresses
+    Test addressing of ESP32-S2 peripheral registers using full DPORT bus addresses
     """
 
     ins = make_ins("""
@@ -164,12 +164,37 @@ def test_reg_address_translations():
     """)
 
     # direct ULP address is derived from full address as follows:
-    # full:0x3ff484a8 == ulp:(0x3ff484a8-DR_REG_RTCCNTL_BASE) / 4
-    # full:0x3ff484a8 == ulp:(0x3ff484a8-0x3ff48000) / 4
-    # full:0x3ff484a8 == ulp:0x4a8 / 4
-    # full:0x3ff484a8 == ulp:0x12a
-    # see: https://github.com/espressif/binutils-esp32ulp/blob/249ec34/gas/config/tc-esp32ulp_esp32.c#L149
-    ins.all = opcodes.i_reg_rd("0x3ff484a8", "0", "0")
+    # full:0x3f4084a8 == ulp:(0x3f4084a8-DR_REG_RTCCNTL_BASE) / 4
+    # full:0x3f4084a8 == ulp:(0x3f4084a8-0x3f408000) / 4
+    # full:0x3f4084a8 == ulp:0x4a8 / 4
+    # full:0x3f4084a8 == ulp:0x12a
+    # see: https://github.com/espressif/binutils-esp32ulp/blob/249ec34/gas/config/tc-esp32ulp_esp32s2.c#L78
+    ins.all = opcodes.i_reg_rd("0x3f4084a8", "0", "0")
+    assert ins.periph_sel == 1  # high 2 bits of 0x12a
+    assert ins.addr == 0x2a  # low 8 bits of 0x12a
+
+
+def test_reg_address_translations_s3():
+    """
+    Test addressing of ESP32-S3 peripheral registers using full DPORT bus addresses
+    """
+
+    ins = make_ins("""
+    addr : 8        # Address within either RTC_CNTL, RTC_IO, or SARADC
+    periph_sel : 2  # Select peripheral: RTC_CNTL (0), RTC_IO(1), SARADC(2)
+    unused : 8      # Unused
+    low : 5         # Low bit
+    high : 5        # High bit
+    opcode : 4      # Opcode (OPCODE_RD_REG)
+    """)
+
+    # direct ULP address is derived from full address as follows:
+    # full:0x600084a8 == ulp:(0x600084a8-DR_REG_RTCCNTL_BASE) / 4
+    # full:0x600084a8 == ulp:(0x600084a8-0x60008000) / 4
+    # full:0x600084a8 == ulp:0x4a8 / 4
+    # full:0x600084a8 == ulp:0x12a
+    # see: https://github.com/espressif/binutils-esp32ulp/blob/249ec34/gas/config/tc-esp32ulp_esp32s2.c#L78
+    ins.all = opcodes.i_reg_rd("0x600084a8", "0", "0")
     assert ins.periph_sel == 1  # high 2 bits of 0x12a
     assert ins.addr == 0x2a  # low 8 bits of 0x12a
 
@@ -182,4 +207,5 @@ def test_reg_address_translations():
 test_get_cond()
 test_eval_arg()
 test_reg_direct_ulp_addressing()
-test_reg_address_translations()
+test_reg_address_translations_s2()
+test_reg_address_translations_s3()

From f50ac6c473a25701ab500742688794ae8600796a Mon Sep 17 00:00:00 2001
From: Wilko Nienhaus <wilko.nienhaus@gmail.com>
Date: Mon, 31 Jul 2023 19:12:07 +0300
Subject: [PATCH 15/20] Update integration tests to use Espressif's esp32s2
 tests

We also now use the correct include files from the ESP-IDF
when building the defines DB, correct for the cpu type we're
testing with. (That also means the defines DB is built once
per cpu type).

That, together with some ESP32-S2 specific test cases from
Espressif's esp32s2 assembler test-suite, make those test
cases more interesting to run, compared to only assembling
ESP32 examples with the esp32s2 cpu selected.

Note: This change no longer runs the ulp_tool examples for the
esp32s2 case, because those examples use contants (from the
ESP-IDF include files), which no longer exist for the ESP32-S2,
such as `RTC_IO_TOUCH_PAD*_HOLD_S`. Since the ulp_tool examples
primarily test the preprocessor's ability to resolve constants
from include files (via the defines DB), testing those examples
only once with the ESP32 cpu should be enough.
---
 tests/02_compat_rtc_tests.sh | 57 ++++++++++++++++++++++++++++++------
 1 file changed, 48 insertions(+), 9 deletions(-)

diff --git a/tests/02_compat_rtc_tests.sh b/tests/02_compat_rtc_tests.sh
index 9ad24b1..467ccae 100755
--- a/tests/02_compat_rtc_tests.sh
+++ b/tests/02_compat_rtc_tests.sh
@@ -36,20 +36,29 @@ fetch_binutils_esp32ulp_examples() {
         https://github.com/espressif/binutils-gdb.git 1>$log_file 2>&1
 }
 
+REUSE_DEFINES_DB=0
+
 build_defines_db() {
+    local cpu=$1
     local defines_db=defines.db
+    local defines_db_cpu=defines.$cpu.db
 
-    if [ "$1" = "-r" ] && [ -s "${defines_db}" ]; then
+    if [ "$REUSE_DEFINES_DB" = 1 ] && [ -s "${defines_db_cpu}" ]; then
         # reuse existing defines.db
+        echo "Reusing existing defines DB for cpu $cpu"
+        cp ${defines_db_cpu} ${defines_db}
         return
     fi
 
-    echo "Building defines DB from include files"
-    log_file=log/build_defines_db.log
+    echo "Building defines DB from $cpu include files"
+    log_file=log/build_defines_db.$cpu.log
     rm -f "${defines_db}"
     micropython -m esp32_ulp.parse_to_db \
-        esp-idf/components/soc/esp32/include/soc/*.h \
+        esp-idf/components/soc/$cpu/include/soc/*.h \
         esp-idf/components/esp_common/include/*.h 1>$log_file
+
+    # cache defines.db
+    cp ${defines_db} ${defines_db_cpu}
 }
 
 calc_file_hash() {
@@ -62,9 +71,9 @@ patch_test() {
     local test_name=$1
     local out_file="${test_name}.tmp"
 
-    if [ "${test_name}" = esp32ulp_jumpr ]; then
+    if [[ "${test_name}" =~ ^(esp32ulp_jumpr|esp32s2ulp_jumpr|esp32s2ulp_jump)$ ]]; then
         (
-            cd binutils-gdb/gas/testsuite/gas/esp32ulp/esp32
+            cd binutils-gdb/gas/testsuite/gas/esp32ulp/$cpu
             cp ${test_name}.s ${out_file}
             echo -e "\tPatching test to work around binutils-esp32ulp .global bug"
             cat >> ${out_file} <<EOF
@@ -89,6 +98,14 @@ EOF
 EOF
         )
         return 0
+    elif [ "${test_name}" = esp32s2ulp_ld ]; then
+        (
+            cd binutils-gdb/gas/testsuite/gas/esp32ulp/esp32s2
+            echo -e "\tPatching test to work around binutils-esp32ulp .global bug"
+            cp ${test_name}.s ${out_file}
+            echo ".global offs_min" >> ${out_file}
+        )
+        return 0
     fi
 
     return 1  # nothing was patched
@@ -98,13 +115,24 @@ make_log_dir
 fetch_esp_idf
 fetch_ulptool_examples
 fetch_binutils_esp32ulp_examples
-build_defines_db $1
 
 run_tests_for_cpu() {
     local cpu=$1
     echo "Testing for CPU: $cpu"
+    build_defines_db $cpu
+
+    LIST=$(echo binutils-gdb/gas/testsuite/gas/esp32ulp/$cpu/*.s)
+    if [ $cpu = esp32 ]; then
+        # append extra tests to test preprocessor
+        # examples have constants specific to ESP32 (original)
+        # so we only run these tests with cpu = esp32
+        # these tests primarily test our preprocessor, which is
+        # cpu independent, so we do not need to run them
+        # per each cpu.
+        LIST=$(echo ulptool/src/ulp_examples/*/*.s $LIST)
+    fi
 
-    for src_file in ulptool/src/ulp_examples/*/*.s binutils-gdb/gas/testsuite/gas/esp32ulp/esp32/*.s; do
+    for src_file in $LIST; do
 
         src_name="${src_file%.s}"
         src_dir="${src_name%/*}"
@@ -121,6 +149,13 @@ run_tests_for_cpu() {
             fi
         done
 
+        if [ "$cpu" = esp32s2 ]; then
+            if [ "${test_name}" = "hall_sensor" ]; then
+                echo -e "\tSkipping... not supported on $cpu"
+                continue 1
+            fi
+        fi
+
         # BEGIN: work around known issues with binutils-gdb (esp32ulp)
         ulp_file="${src_name}.ulp"
 
@@ -142,7 +177,7 @@ run_tests_for_cpu() {
         bin_file="${src_name}.bin"
 
         echo -e "\tBuilding using binutils ($cpu)"
-        gcc -I esp-idf/components/soc/esp32/include -I esp-idf/components/esp_common/include \
+        gcc -I esp-idf/components/soc/$cpu/include -I esp-idf/components/esp_common/include \
             -x assembler-with-cpp \
             -E -o ${pre_file} $src_file
         esp32ulp-elf-as --mcpu=$cpu -o $obj_file ${pre_file}
@@ -167,5 +202,9 @@ run_tests_for_cpu() {
     echo ""
 }
 
+if [ "$1" = -r ]; then
+    REUSE_DEFINES_DB=1
+fi
+
 run_tests_for_cpu esp32
 run_tests_for_cpu esp32s2

From 80332517fe7416f0089a219c5ed3043574605959 Mon Sep 17 00:00:00 2001
From: Wilko Nienhaus <wilko.nienhaus@gmail.com>
Date: Tue, 1 Aug 2023 09:49:27 +0300
Subject: [PATCH 16/20] Add mention of ESP32-S2/S3 differences to docs

---
 README.rst          | 13 ++++++++++++-
 docs/preprocess.rst | 16 ++++++++++++++++
 2 files changed, 28 insertions(+), 1 deletion(-)

diff --git a/README.rst b/README.rst
index c9b8dc3..20ddee1 100644
--- a/README.rst
+++ b/README.rst
@@ -31,7 +31,7 @@ The following features are supported:
 
 * the entire `ESP32 ULP instruction set <https://docs.espressif.com/projects/esp-idf/en/latest/esp32/api-reference/system/ulp_instruction_set.html>`_
 * the entire `ESP32-S2 ULP instruction set <https://docs.espressif.com/projects/esp-idf/en/latest/esp32s2/api-reference/system/ulp_instruction_set.html>`_
-  (this also covers the ESP32-S3) [#f1]_
+  (this also covers the ESP32-S3) [#f1]_ [#f2]_
 * constants defined with ``.set``
 * constants defined with ``#define``
 * expressions in assembly code and constant definitions
@@ -44,6 +44,17 @@ The following features are supported:
          select the ``esp32s2`` cpu (`see docs </docs/index.rst>`_) when assembling code for
          use on an ESP32-S2/S3.
 
+.. [#f2] Note: The ESP32-S2 and ESP32-S3 have the same ULP binary format, but the peripheral
+         register addresses (those accessed with REG_RD and REG_WR) are different. For best
+         results, use the correct peripheral register addresses for the specific variant you
+         are working with. The assembler (when used with ``cpu=esp32s2``) will accept
+         addresses for any of the 3 variants, because they are translated into relative
+         offsets anyway and many registers live at the same relative offset on all 3 variants.
+         This conveniently means that the same assembly code can assembled unmodified for each
+         variant and produce a correctly working binary - as long as only peripheral registers
+         are used, which have the same relative offset across the variants. Use with care!
+
+
 Quick start
 -----------
 
diff --git a/docs/preprocess.rst b/docs/preprocess.rst
index 4aa3c7b..45569a5 100644
--- a/docs/preprocess.rst
+++ b/docs/preprocess.rst
@@ -95,6 +95,21 @@ are not needed on the device either.)
       micropython -m esp32_ulp.parse_to_db \
         esp-idf/components/soc/esp32/include/soc/{soc,soc_ulp,rtc_cntl_reg,rtc_io_reg,sens_reg}.h
 
+
+   .. warning::
+
+      `:warning:` Ensure that you include the header files for the correct
+      variant you are working with. In the example code above, simply switch
+      ``esp32`` to ``esp32s2`` or ``esp32s3`` in the path to the include files.
+
+      There are subtle differences across the ESP32 variants such as which
+      constants are available or the value of certain constants. For example,
+      peripheral register addresses differ between the 3 variants even though
+      many constants for peripheral registers are available on all 3 variants.
+      Other constants such as those relating to the HOLD functionality of touch
+      pads are only available on the original ESP32.
+
+
 2. Using the defines database during preprocessing
 
    The preprocessor will automatically use a defines database, when using the
@@ -108,6 +123,7 @@ are not needed on the device either.)
    or instantiate the ``Preprocessor`` class directly, without passing it a
    DefinesDB instance via ``use_db``.
 
+
 Design choices
 --------------
 

From 9bc6fc355e8cb34c000bd72b665d8b33e01ec921 Mon Sep 17 00:00:00 2001
From: Wilko Nienhaus <wilko.nienhaus@gmail.com>
Date: Sun, 23 Jul 2023 09:05:42 +0300
Subject: [PATCH 17/20] Housekeeping: gitignore temp files from testing

---
 .gitignore | 18 +++++++++++++-----
 1 file changed, 13 insertions(+), 5 deletions(-)

diff --git a/.gitignore b/.gitignore
index ac78360..0b7309c 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,8 +1,16 @@
-tests/compat/*.bin
-tests/compat/*.elf
-tests/compat/*.o
-tests/compat/*.ulp
-tests/compat/*.log
+tests/binutils-gdb
+tests/esp-idf
+tests/ulptool
+tests/**/*.bin
+tests/**/*.elf
+tests/**/*.o
+tests/**/*.ulp
+tests/**/*.log
+tests/**/*.pre
+tests/log
+tests/*.lst
+tests/*.log
+tests/defines*.db
 demo.ulp
 *.pyc
 *.pyo

From a157ebebb30118995bada5d4f9cfb3368a66c0bf Mon Sep 17 00:00:00 2001
From: Wilko Nienhaus <wilko.nienhaus@gmail.com>
Date: Sat, 22 Jul 2023 19:04:43 +0300
Subject: [PATCH 18/20] Correct decoding of esp32s2 negative LD/ST offsets

The ESP32-S2/S3 support a negative offset in ST/LD instructions.
Those offsets are two's-complement encoded into a field that is
11-bits wide.

This change corrects the decoding of negative offsets given the
field width of just 11-bits, rather than relying on the 32 or
64 bits of a MicroPython `int`.

Note 1: Negative offsets used in JUMP instructions are encoded
differently (sign bit + positive value), and their decoding is
already done correctly.

Note 2: The LD/ST instructions in of the ESP32 do not support
negative offsets (according to Espressif tests), so their
decoding remains as is.
---
 tests/fixtures/all_opcodes-v.esp32s2.lst | 46 +++++++++++++++++++++---
 tests/fixtures/all_opcodes.esp32s2.S     |  6 ++++
 tests/fixtures/all_opcodes.esp32s2.lst   |  8 +++--
 tools/decode_s2.py                       | 23 +++++++++---
 4 files changed, 72 insertions(+), 11 deletions(-)

diff --git a/tests/fixtures/all_opcodes-v.esp32s2.lst b/tests/fixtures/all_opcodes-v.esp32s2.lst
index 43126de..a2ebb15 100644
--- a/tests/fixtures/all_opcodes-v.esp32s2.lst
+++ b/tests/fixtures/all_opcodes-v.esp32s2.lst
@@ -1,8 +1,8 @@
 header
 ULP magic    : b'ulp\x00' (0x00706c75)
 .text offset : 12 (0x0c)
-.text size   : 244 (0xf4)
-.data offset : 256 (0x100)
+.text size   : 260 (0x104)
+.data offset : 272 (0x110)
 .data size   : 8 (0x08)
 .bss size    : 0 (0x00)
 ----------------------------------------
@@ -527,7 +527,45 @@ ULP magic    : b'ulp\x00' (0x00706c75)
                  label      =   0
                  upper      =   0
                  wr_way     =   0
+00f4  09e01fd0  LD r1, r2, -8
+                 dreg       =   1
+                 offset     =  -8 (0x7f8)
+                 opcode     =  13 (0x0d)
+                 sreg       =   2
+                 unused1    =   0
+                 unused2    =   0
+                 rd_upper   =   0
+00f8  09e01fd8  LDH r1, r2, -8
+                 dreg       =   1
+                 offset     =  -8 (0x7f8)
+                 opcode     =  13 (0x0d)
+                 sreg       =   2
+                 unused1    =   0
+                 unused2    =   0
+                 rd_upper   =   1
+00fc  89e11f68  ST r1, r2, -8
+                 dreg       =   2
+                 offset     =  -8 (0x7f8)
+                 opcode     =   6
+                 sreg       =   1
+                 sub_opcode =   4
+                 unused1    =   0
+                 unused2    =   0
+                 label      =   0
+                 upper      =   0
+                 wr_way     =   3
+0100  c9e11f68  STH r1, r2, -8
+                 dreg       =   2
+                 offset     =  -8 (0x7f8)
+                 opcode     =   6
+                 sreg       =   1
+                 sub_opcode =   4
+                 unused1    =   0
+                 unused2    =   0
+                 label      =   0
+                 upper      =   1
+                 wr_way     =   3
 ----------------------------------------
 .data
-00f4  00000000  <empty>
-00f8  fecadec0  <non-empty>
+0104  00000000  <empty>
+0108  fecadec0  <non-empty>
diff --git a/tests/fixtures/all_opcodes.esp32s2.S b/tests/fixtures/all_opcodes.esp32s2.S
index 5e5dea0..7483840 100644
--- a/tests/fixtures/all_opcodes.esp32s2.S
+++ b/tests/fixtures/all_opcodes.esp32s2.S
@@ -89,3 +89,9 @@ STI32 R1, R2, 0
 STI32 R1, R2, 1
 
 STO   0x20
+
+LDL R1, R2, -0x20
+LDH R1, R2, -0x20
+
+STL R1, R2, -0x20
+STH R1, R2, -0x20
diff --git a/tests/fixtures/all_opcodes.esp32s2.lst b/tests/fixtures/all_opcodes.esp32s2.lst
index d3d6d10..5de1704 100644
--- a/tests/fixtures/all_opcodes.esp32s2.lst
+++ b/tests/fixtures/all_opcodes.esp32s2.lst
@@ -60,6 +60,10 @@
 00e8  09000062  STI32 r1, r2, 0
 00ec  19000062  STI32 r1, r2, 1
 00f0  00200064  STO 8
+00f4  09e01fd0  LD r1, r2, -8
+00f8  09e01fd8  LDH r1, r2, -8
+00fc  89e11f68  ST r1, r2, -8
+0100  c9e11f68  STH r1, r2, -8
 .data
-00f4  00000000  <empty>
-00f8  fecadec0  <non-empty>
+0104  00000000  <empty>
+0108  fecadec0  <non-empty>
diff --git a/tools/decode_s2.py b/tools/decode_s2.py
index 2b6d440..de3b3c7 100644
--- a/tools/decode_s2.py
+++ b/tools/decode_s2.py
@@ -66,7 +66,7 @@
     opcodes.OPCODE_LD: (
         'LD/LDH',
         opcodes._ld,
-        lambda op: '%s r%s, r%s, %s' % ('LDH' if op.rd_upper else 'LD', op.dreg, op.sreg, op.offset)
+        lambda op: '%s r%s, r%s, %s' % ('LDH' if op.rd_upper else 'LD', op.dreg, op.sreg, twos_comp(op.offset, 11))
     ),
     opcodes.OPCODE_ST: ('ST', opcodes._st, {
         opcodes.SUB_OPCODE_ST_AUTO: (
@@ -79,14 +79,14 @@
         opcodes.SUB_OPCODE_ST_OFFSET: (
             'STO',
             opcodes._st,
-            lambda op: 'STO %s' % op.offset
+            lambda op: 'STO %s' % twos_comp(op.offset, 11)
         ),
         opcodes.SUB_OPCODE_ST: (
             'ST/STH/ST32',
             opcodes._st,
-            lambda op: '%s r%s, r%s, %s, %s' % ('STH' if op.upper else 'STL', op.sreg, op.dreg, op.offset, op.label) if op.wr_way and op.label
-                else '%s r%s, r%s, %s' % ('STH' if op.upper else 'ST', op.sreg, op.dreg, op.offset) if op.wr_way
-                else 'ST32 r%s, r%s, %s, %s' % (op.sreg, op.dreg, op.offset, op.label)
+            lambda op: '%s r%s, r%s, %s, %s' % ('STH' if op.upper else 'STL', op.sreg, op.dreg, twos_comp(op.offset, 11), op.label) if op.wr_way and op.label
+                else '%s r%s, r%s, %s' % ('STH' if op.upper else 'ST', op.sreg, op.dreg, twos_comp(op.offset, 11)) if op.wr_way
+                else 'ST32 r%s, r%s, %s, %s' % (op.sreg, op.dreg, twos_comp(op.offset, 11), op.label)
         )
     }),
     opcodes.OPCODE_RD_REG: (
@@ -103,6 +103,16 @@
 }
 
 
+def twos_comp(val, bits):
+    """
+    compute the correct value of a 2's complement
+    based on the number of bits in the source
+    """
+    if (val & (1 << (bits - 1))) != 0:  # if sign bit is set e.g., 8bit: 128-255
+        val = val - (1 << bits)         # compute negative value
+    return val
+
+
 def decode_instruction(i):
     if i == 0:
         raise Exception('<empty>')
@@ -167,6 +177,9 @@ def get_instruction_fields(ins):
                 extra = ' (%s)' % bs_cmp_ops[val]
             else:
                 extra = ' (%s)' % cmp_ops[val]
+        elif field == 'offset':
+            if ins.opcode in (opcodes.OPCODE_ST, opcodes.OPCODE_LD):
+                val = twos_comp(val, 11)
 
         field_details.append((field, val, extra))
 

From ed28d27d2c5c853c514657701ce44ccf7ec8b6d6 Mon Sep 17 00:00:00 2001
From: Wilko Nienhaus <wilko.nienhaus@gmail.com>
Date: Wed, 30 Aug 2023 21:39:20 +0300
Subject: [PATCH 19/20] Fix translation of peripheral register addresses

This was already incorrect in the original ESP32 implementation
but was discovered while testing the new S2/S3 implementation.

This was also wrong within the ESP-IDF, that we based the translation
logic on. Espressif fixed the issue in this pull request:
https://github.com/espressif/esp-idf/pull/11652

We now also have unit tests and compat (integration) tests, that
compare our binary output against that of binutils-gdb/esp32-ulp-as,
which already did this translation correctly, but we didnt have a
test for the specific cases we handled incorrectly, so we didn't
notice this bug.

This fix has also been tested on a real device, because S2/S3 devices
need the IOMUX clock enabled in order to be able to read GPIO input
from the ULP, and enabling that clock required writing to a register
in the SENS address range, which didnt work correctly before this fix.
---
 esp32_ulp/opcodes.py       |  4 +--
 esp32_ulp/opcodes_s2.py    |  4 +--
 tests/01_compat_tests.sh   | 19 +++++++++++++-
 tests/compat/reg.esp32.S   | 15 +++++++++++
 tests/compat/reg.esp32s2.S | 15 +++++++++++
 tests/opcodes.py           | 26 +++++++++++++++++++
 tests/opcodes_s2.py        | 52 ++++++++++++++++++++++++++++++++++++++
 7 files changed, 130 insertions(+), 5 deletions(-)
 create mode 100644 tests/compat/reg.esp32.S
 create mode 100644 tests/compat/reg.esp32s2.S

diff --git a/esp32_ulp/opcodes.py b/esp32_ulp/opcodes.py
index 6910081..4efce0c 100644
--- a/esp32_ulp/opcodes.py
+++ b/esp32_ulp/opcodes.py
@@ -379,7 +379,7 @@ def i_reg_wr(reg, high_bit, low_bit, val):
         _wr_reg.addr = reg & 0xff
         _wr_reg.periph_sel = (reg & 0x300) >> 8
     else:
-        _wr_reg.addr = (reg & 0xff) >> 2
+        _wr_reg.addr = (reg >> 2) & 0xff
         _wr_reg.periph_sel = _soc_reg_to_ulp_periph_sel(reg)
     _wr_reg.data = get_imm(val)
     _wr_reg.low = get_imm(low_bit)
@@ -394,7 +394,7 @@ def i_reg_rd(reg, high_bit, low_bit):
         _rd_reg.addr = reg & 0xff
         _rd_reg.periph_sel = (reg & 0x300) >> 8
     else:
-        _rd_reg.addr = (reg & 0xff) >> 2
+        _rd_reg.addr = (reg >> 2) & 0xff
         _rd_reg.periph_sel = _soc_reg_to_ulp_periph_sel(reg)
     _rd_reg.unused = 0
     _rd_reg.low = get_imm(low_bit)
diff --git a/esp32_ulp/opcodes_s2.py b/esp32_ulp/opcodes_s2.py
index 88ce75d..9c64642 100644
--- a/esp32_ulp/opcodes_s2.py
+++ b/esp32_ulp/opcodes_s2.py
@@ -422,7 +422,7 @@ def i_reg_wr(reg, high_bit, low_bit, val):
         _wr_reg.addr = reg & 0xff
         _wr_reg.periph_sel = (reg & 0x300) >> 8
     else:
-        _wr_reg.addr = (reg & 0xff) >> 2
+        _wr_reg.addr = (reg >> 2) & 0xff
         _wr_reg.periph_sel = _soc_reg_to_ulp_periph_sel(reg)
     _wr_reg.data = get_imm(val)
     _wr_reg.low = get_imm(low_bit)
@@ -437,7 +437,7 @@ def i_reg_rd(reg, high_bit, low_bit):
         _rd_reg.addr = reg & 0xff
         _rd_reg.periph_sel = (reg & 0x300) >> 8
     else:
-        _rd_reg.addr = (reg & 0xff) >> 2
+        _rd_reg.addr = (reg >> 2) & 0xff
         _rd_reg.periph_sel = _soc_reg_to_ulp_periph_sel(reg)
     _rd_reg.unused = 0
     _rd_reg.low = get_imm(low_bit)
diff --git a/tests/01_compat_tests.sh b/tests/01_compat_tests.sh
index ebbd5f6..3729292 100755
--- a/tests/01_compat_tests.sh
+++ b/tests/01_compat_tests.sh
@@ -10,6 +10,19 @@ calc_file_hash() {
     shasum < $1 | cut -d' ' -f1
 }
 
+make_log_dir() {
+   mkdir -p log
+}
+
+fetch_esp_idf() {
+    [ -d esp-idf ] && return
+
+    echo "Fetching esp-idf"
+    log_file=log/fetch-esp-idf.log
+    git clone --depth 1 \
+        https://github.com/espressif/esp-idf.git 1>$log_file 2>&1
+}
+
 run_tests_for_cpu() {
     local cpu=$1
     echo "Testing for CPU: $cpu"
@@ -33,7 +46,9 @@ run_tests_for_cpu() {
         bin_file="${src_name}.bin"
 
         echo -e "\tBuilding using binutils ($cpu)"
-        gcc -E -o ${pre_file} $src_file
+        gcc -I esp-idf/components/soc/$cpu/include -I esp-idf/components/esp_common/include \
+            -x assembler-with-cpp \
+            -E -o ${pre_file} $src_file
         esp32ulp-elf-as --mcpu=$cpu -o $obj_file ${pre_file}
         esp32ulp-elf-ld -T esp32.ulp.ld -o $elf_file $obj_file
         esp32ulp-elf-objcopy -O binary $elf_file $bin_file
@@ -56,5 +71,7 @@ run_tests_for_cpu() {
     echo ""
 }
 
+make_log_dir
+fetch_esp_idf
 run_tests_for_cpu esp32
 run_tests_for_cpu esp32s2
diff --git a/tests/compat/reg.esp32.S b/tests/compat/reg.esp32.S
new file mode 100644
index 0000000..e9b1a14
--- /dev/null
+++ b/tests/compat/reg.esp32.S
@@ -0,0 +1,15 @@
+#include "soc/rtc_cntl_reg.h"
+#include "soc/soc_ulp.h"
+
+  reg_rd 0x012, 1, 2
+  reg_rd 0x234, 3, 4
+  reg_rd 0x345, 5, 6
+
+  reg_wr 0x012, 1, 2, 1
+  reg_wr 0x234, 3, 4, 1
+  reg_wr 0x345, 5, 6, 1
+
+  WRITE_RTC_REG(0x3ff484a8, 1, 2, 3)
+  READ_RTC_REG(0x3ff484a8, 1, 2)
+  WRITE_RTC_REG(0x3ff48904, 1, 2, 3)
+  READ_RTC_REG(0x3ff48904, 1, 2)
diff --git a/tests/compat/reg.esp32s2.S b/tests/compat/reg.esp32s2.S
new file mode 100644
index 0000000..c8c9920
--- /dev/null
+++ b/tests/compat/reg.esp32s2.S
@@ -0,0 +1,15 @@
+#include "soc/rtc_cntl_reg.h"
+#include "soc/soc_ulp.h"
+
+  reg_rd 0x012, 1, 2
+  reg_rd 0x234, 3, 4
+  reg_rd 0x345, 5, 6
+
+  reg_wr 0x012, 1, 2, 1
+  reg_wr 0x234, 3, 4, 1
+  reg_wr 0x345, 5, 6, 1
+
+  WRITE_RTC_REG(0x3f4084a8, 1, 2, 3)
+  READ_RTC_REG(0x3f4084a8, 1, 2)
+  WRITE_RTC_REG(0x3f408904, 1, 2, 3)
+  READ_RTC_REG(0x3f408904, 1, 2)
diff --git a/tests/opcodes.py b/tests/opcodes.py
index 85cd710..576c840 100644
--- a/tests/opcodes.py
+++ b/tests/opcodes.py
@@ -174,6 +174,31 @@ def test_reg_address_translations():
     assert ins.addr == 0x2a  # low 8 bits of 0x12a
 
 
+def test_reg_address_translations_sens():
+    """
+    Test addressing of peripheral registers using full DPORT bus addresses
+    """
+
+    ins = make_ins("""
+    addr : 8        # Address within either RTC_CNTL, RTC_IO, or SARADC
+    periph_sel : 2  # Select peripheral: RTC_CNTL (0), RTC_IO(1), SARADC(2)
+    unused : 8      # Unused
+    low : 5         # Low bit
+    high : 5        # High bit
+    opcode : 4      # Opcode (OPCODE_RD_REG)
+    """)
+
+    # direct ULP address is derived from full address as follows:
+    # full:0x3ff48904 == ulp:(0x3ff48904-DR_REG_RTCCNTL_BASE) / 4
+    # full:0x3ff48904 == ulp:(0x3ff48904-0x3f408000) / 4
+    # full:0x3ff48904 == ulp:0x904 / 4
+    # full:0x3ff48904 == ulp:0x241
+    # see: https://github.com/espressif/binutils-esp32ulp/blob/249ec34/gas/config/tc-esp32ulp_esp32s2.c#L78
+    ins.all = opcodes.i_reg_rd("0x3ff48904", "0", "0")
+    assert ins.periph_sel == 2  # high 2 bits of 0x241
+    assert ins.addr == 0x41  # low 8 bits of 0x241
+
+
 test_make_ins_struct_def()
 test_make_ins()
 test_arg_qualify()
@@ -183,3 +208,4 @@ def test_reg_address_translations():
 test_eval_arg()
 test_reg_direct_ulp_addressing()
 test_reg_address_translations()
+test_reg_address_translations_sens()
diff --git a/tests/opcodes_s2.py b/tests/opcodes_s2.py
index de6249d..2c724c3 100644
--- a/tests/opcodes_s2.py
+++ b/tests/opcodes_s2.py
@@ -174,6 +174,31 @@ def test_reg_address_translations_s2():
     assert ins.addr == 0x2a  # low 8 bits of 0x12a
 
 
+def test_reg_address_translations_s2_sens():
+    """
+    Test addressing of ESP32-S2 peripheral registers using full DPORT bus addresses
+    """
+
+    ins = make_ins("""
+    addr : 8        # Address within either RTC_CNTL, RTC_IO, or SARADC
+    periph_sel : 2  # Select peripheral: RTC_CNTL (0), RTC_IO(1), SARADC(2)
+    unused : 8      # Unused
+    low : 5         # Low bit
+    high : 5        # High bit
+    opcode : 4      # Opcode (OPCODE_RD_REG)
+    """)
+
+    # direct ULP address is derived from full address as follows:
+    # full:0x3f408904 == ulp:(0x3f408904-DR_REG_RTCCNTL_BASE) / 4
+    # full:0x3f408904 == ulp:(0x3f408904-0x3f408000) / 4
+    # full:0x3f408904 == ulp:0x904 / 4
+    # full:0x3f408904 == ulp:0x241
+    # see: https://github.com/espressif/binutils-esp32ulp/blob/249ec34/gas/config/tc-esp32ulp_esp32s2.c#L78
+    ins.all = opcodes.i_reg_rd("0x3f408904", "0", "0")
+    assert ins.periph_sel == 2  # high 2 bits of 0x241
+    assert ins.addr == 0x41  # low 8 bits of 0x241
+
+
 def test_reg_address_translations_s3():
     """
     Test addressing of ESP32-S3 peripheral registers using full DPORT bus addresses
@@ -199,6 +224,31 @@ def test_reg_address_translations_s3():
     assert ins.addr == 0x2a  # low 8 bits of 0x12a
 
 
+def test_reg_address_translations_s3_sens():
+    """
+    Test addressing of ESP32-S3 peripheral registers using full DPORT bus addresses
+    """
+
+    ins = make_ins("""
+    addr : 8        # Address within either RTC_CNTL, RTC_IO, or SARADC
+    periph_sel : 2  # Select peripheral: RTC_CNTL (0), RTC_IO(1), SARADC(2)
+    unused : 8      # Unused
+    low : 5         # Low bit
+    high : 5        # High bit
+    opcode : 4      # Opcode (OPCODE_RD_REG)
+    """)
+
+    # direct ULP address is derived from full address as follows:
+    # full:0x60008904 == ulp:(0x60008904-DR_REG_RTCCNTL_BASE) / 4
+    # full:0x60008904 == ulp:(0x60008904-0x60008000) / 4
+    # full:0x60008904 == ulp:0x904 / 4
+    # full:0x60008904 == ulp:0x241
+    # see: https://github.com/espressif/binutils-esp32ulp/blob/249ec34/gas/config/tc-esp32ulp_esp32s2.c#L78
+    ins.all = opcodes.i_reg_rd("0x60008904", "0", "0")
+    assert ins.periph_sel == 2  # high 2 bits of 0x241
+    assert ins.addr == 0x41  # low 8 bits of 0x241
+
+
 test_make_ins_struct_def()
 test_make_ins()
 test_arg_qualify()
@@ -209,3 +259,5 @@ def test_reg_address_translations_s3():
 test_reg_direct_ulp_addressing()
 test_reg_address_translations_s2()
 test_reg_address_translations_s3()
+test_reg_address_translations_s2_sens()
+test_reg_address_translations_s3_sens()

From debff30ca6093cf364f89432a89951acb6174d4f Mon Sep 17 00:00:00 2001
From: Wilko Nienhaus <wilko.nienhaus@gmail.com>
Date: Tue, 8 Aug 2023 09:03:31 +0300
Subject: [PATCH 20/20] Add examples for ESP32-S2 and ESP32-S3

There are now example files for the S2 and S3 (with ``_s2`` or ``_s3``
appended to their filenames).

Note: The s2 examples also work unmodified on the ESP32-S3, except the
readgpio example which needs different peripheral register addresses
on the S3.

The ``counter_s2.py`` example is unmodified compared to the original
example, except that the assembler is told to generate esp32s2 output.

The ``blink_s2.py``, ``readgpio_s2.py`` and ``readgpio_s3.py`` examples
have their rtc_io base address updated, as well as the constants
referring to the GPIO pins and channels and the peripheral register bits
used to read/write the GPIO inputs/outputs. These addresses/bits have
changed from the original ESP32. Otherwise the examples are identical to
the examples for the original ESP32.
---
 examples/blink.py       |   8 +--
 examples/blink_s2.py    | 112 ++++++++++++++++++++++++++++++++++++++++
 examples/counter.py     |   2 +
 examples/counter_s2.py  |  46 +++++++++++++++++
 examples/readgpio.py    |   6 ++-
 examples/readgpio_s2.py |  79 ++++++++++++++++++++++++++++
 examples/readgpio_s3.py |  79 ++++++++++++++++++++++++++++
 7 files changed, 327 insertions(+), 5 deletions(-)
 create mode 100644 examples/blink_s2.py
 create mode 100644 examples/counter_s2.py
 create mode 100644 examples/readgpio_s2.py
 create mode 100644 examples/readgpio_s3.py

diff --git a/examples/blink.py b/examples/blink.py
index 1350bc2..04e3b8f 100644
--- a/examples/blink.py
+++ b/examples/blink.py
@@ -1,4 +1,6 @@
 """
+Example for: ESP32
+
 Simple example showing how to control a GPIO pin from the ULP coprocessor.
 
 The GPIO port is configured to be attached to the RTC module, and then set
@@ -22,11 +24,11 @@
 
 source = """\
 # constants from:
-# https://github.com/espressif/esp-idf/blob/1cb31e5/components/soc/esp32/include/soc/soc.h 
+# https://github.com/espressif/esp-idf/blob/v5.0.2/components/soc/esp32/include/soc/reg_base.h
 #define DR_REG_RTCIO_BASE            0x3ff48400
 
 # constants from:
-# https://github.com/espressif/esp-idf/blob/1cb31e5/components/soc/esp32/include/soc/rtc_io_reg.h 
+# https://github.com/espressif/esp-idf/blob/v5.0.2/components/soc/esp32/include/soc/rtc_io_reg.h
 #define RTC_IO_TOUCH_PAD2_REG        (DR_REG_RTCIO_BASE + 0x9c)
 #define RTC_IO_TOUCH_PAD2_MUX_SEL_M  (BIT(19))
 #define RTC_GPIO_OUT_REG             (DR_REG_RTCIO_BASE + 0x0)
@@ -35,7 +37,7 @@
 #define RTC_GPIO_OUT_DATA_S          14
 
 # constants from:
-# https://github.com/espressif/esp-idf/blob/1cb31e5/components/soc/esp32/include/soc/rtc_io_channel.h 
+# https://github.com/espressif/esp-idf/blob/v5.0.2/components/soc/esp32/include/soc/rtc_io_channel.h
 #define RTCIO_GPIO2_CHANNEL          12
 
 # When accessed from the RTC module (ULP) GPIOs need to be addressed by their channel number
diff --git a/examples/blink_s2.py b/examples/blink_s2.py
new file mode 100644
index 0000000..86b0c3c
--- /dev/null
+++ b/examples/blink_s2.py
@@ -0,0 +1,112 @@
+"""
+Example for: ESP32-S2 and ESP32-S3
+
+The GPIO port is configured to be attached to the RTC module, and then set
+to OUTPUT mode. To avoid re-initializing the GPIO on every wakeup, a magic
+token gets set in memory.
+
+After every change of state, the ULP is put back to sleep again until the
+next wakeup. The ULP wakes up every 500ms to change the state of the GPIO
+pin. An LED attached to the GPIO pin would toggle on and off every 500ms.
+
+The end of the python script has a loop to show the value of the magic token
+and the current state, so you can confirm the magic token gets set and watch
+the state value changing. If the loop is stopped (Ctrl-C), the LED attached
+to the GPIO pin continues to blink, because the ULP runs independently from
+the main processor.
+"""
+
+from esp32 import ULP
+from machine import mem32
+from esp32_ulp import src_to_binary
+
+source = """\
+# constants from:
+# https://github.com/espressif/esp-idf/blob/v5.0.2/components/soc/esp32s2/include/soc/reg_base.h
+#define DR_REG_RTCIO_BASE            0x3f408400
+
+# constants from:
+# https://github.com/espressif/esp-idf/blob/v5.0.2/components/soc/esp32s2/include/soc/rtc_io_reg.h
+#define RTC_IO_TOUCH_PAD2_REG        (DR_REG_RTCIO_BASE + 0x8c)
+#define RTC_IO_TOUCH_PAD2_MUX_SEL_M  (BIT(19))
+#define RTC_GPIO_OUT_REG             (DR_REG_RTCIO_BASE + 0x0)
+#define RTC_GPIO_ENABLE_REG          (DR_REG_RTCIO_BASE + 0xc)
+#define RTC_GPIO_ENABLE_S            10
+#define RTC_GPIO_OUT_DATA_S          10
+
+# constants from:
+# https://github.com/espressif/esp-idf/blob/v5.0.2/components/soc/esp32s2/include/soc/rtc_io_channel.h
+#define RTCIO_GPIO2_CHANNEL          2
+
+# When accessed from the RTC module (ULP) GPIOs need to be addressed by their channel number
+.set gpio, RTCIO_GPIO2_CHANNEL
+.set token, 0xcafe  # magic token
+
+.text
+magic: .long 0
+state: .long 0
+
+.global entry
+entry:
+  # load magic flag
+  move r0, magic
+  ld r1, r0, 0
+
+  # test if we have initialised already
+  sub r1, r1, token
+  jump after_init, eq  # jump if magic == token (note: "eq" means the last instruction (sub) resulted in 0)
+
+init:
+  # connect GPIO to ULP (0: GPIO connected to digital GPIO module, 1: GPIO connected to analog RTC module)
+  WRITE_RTC_REG(RTC_IO_TOUCH_PAD2_REG, RTC_IO_TOUCH_PAD2_MUX_SEL_M, 1, 1);
+
+  # GPIO shall be output, not input (this also enables a pull-down by default)
+  WRITE_RTC_REG(RTC_GPIO_ENABLE_REG, RTC_GPIO_ENABLE_S + gpio, 1, 1)
+
+  # store that we're done with initialisation
+  move r0, magic
+  move r1, token
+  st r1, r0, 0
+
+after_init:
+  move r1, state
+  ld r0, r1, 0
+
+  move r2, 1
+  sub r0, r2, r0  # toggle state
+  st r0, r1, 0  # store updated state
+
+  jumpr on, 0, gt  # if r0 (state) > 0, jump to 'on'
+  jump off  # else jump to 'off'
+
+on:
+  # turn on led (set GPIO)
+  WRITE_RTC_REG(RTC_GPIO_OUT_REG, RTC_GPIO_OUT_DATA_S + gpio, 1, 1)
+  jump exit
+
+off:
+  # turn off led (clear GPIO)
+  WRITE_RTC_REG(RTC_GPIO_OUT_REG, RTC_GPIO_OUT_DATA_S + gpio, 1, 0)
+  jump exit
+
+exit:
+  halt  # go back to sleep until next wakeup period
+"""
+
+binary = src_to_binary(source, cpu="esp32s2")  # cpu is esp32 or esp32s2
+
+load_addr, entry_addr = 0, 8
+
+ULP_MEM_BASE = 0x50000000
+ULP_DATA_MASK = 0xffff  # ULP data is only in lower 16 bits
+
+ulp = ULP()
+ulp.set_wakeup_period(0, 500000)  # use timer0, wakeup after 500000usec (0.5s)
+ulp.load_binary(load_addr, binary)
+
+ulp.run(entry_addr)
+
+while True:
+    print(hex(mem32[ULP_MEM_BASE + load_addr] & ULP_DATA_MASK),  # magic token
+          hex(mem32[ULP_MEM_BASE + load_addr + 4] & ULP_DATA_MASK)  # current state
+          )
diff --git a/examples/counter.py b/examples/counter.py
index 057e66d..e3a72b5 100644
--- a/examples/counter.py
+++ b/examples/counter.py
@@ -1,4 +1,6 @@
 """
+Example for: ESP32
+
 Very basic example showing data exchange main CPU <--> ULP coprocessor.
 
 To show that the ULP is doing something, it just increments the value <data>.
diff --git a/examples/counter_s2.py b/examples/counter_s2.py
new file mode 100644
index 0000000..8119db7
--- /dev/null
+++ b/examples/counter_s2.py
@@ -0,0 +1,46 @@
+"""
+Example for: ESP32-S2 and ESP32-S3
+
+Very basic example showing data exchange main CPU <--> ULP coprocessor.
+
+To show that the ULP is doing something, it just increments the value <data>.
+It does that once per ulp timer wakeup (and then the ULP halts until it gets
+waked up via the timer again).
+
+The timer is set to a rather long period, so you can watch the data value
+incrementing (see loop at the end).
+"""
+
+from esp32 import ULP
+from machine import mem32
+
+from esp32_ulp import src_to_binary
+
+source = """\
+data:       .long 0
+
+entry:      move r3, data    # load address of data into r3
+            ld r2, r3, 0     # load data contents ([r3+0]) into r2
+            add r2, r2, 1    # increment r2
+            st r2, r3, 0     # store r2 contents into data ([r3+0])
+
+            halt             # halt ULP co-prozessor (until it gets waked up again)
+"""
+
+binary = src_to_binary(source, cpu="esp32s2")  # cpu is esp32 or esp32s2
+
+load_addr, entry_addr = 0, 4
+
+ULP_MEM_BASE = 0x50000000
+ULP_DATA_MASK = 0xffff  # ULP data is only in lower 16 bits
+
+ulp = ULP()
+ulp.set_wakeup_period(0, 50000)  # use timer0, wakeup after 50.000 cycles
+ulp.load_binary(load_addr, binary)
+
+mem32[ULP_MEM_BASE + load_addr] = 0x1000
+ulp.run(entry_addr)
+
+while True:
+    print(hex(mem32[ULP_MEM_BASE + load_addr] & ULP_DATA_MASK))
+
diff --git a/examples/readgpio.py b/examples/readgpio.py
index 8ac9436..8a8fca7 100644
--- a/examples/readgpio.py
+++ b/examples/readgpio.py
@@ -1,16 +1,18 @@
 """
+Example for: ESP32
+
 Very basic example showing how to read a GPIO pin from the ULP and access
 that data from the main CPU.
 
 In this case GPIO4 is being read. Note that the ULP needs to refer to GPIOs
 via their RTC channel number. You can see the mapping in this file:
-https://github.com/espressif/esp-idf/blob/v4.4.1/components/soc/esp32/include/soc/rtc_io_channel.h#L51
+https://github.com/espressif/esp-idf/blob/v5.0.2/components/soc/esp32/include/soc/rtc_io_channel.h#L51
 
 If you change to a different GPIO number, make sure to modify both the channel
 number and also the RTC_IO_TOUCH_PAD0_* references appropriately. The best place
 to see the mappings might be this table here (notice the "real GPIO numbers" as
 comments to each line):
-https://github.com/espressif/esp-idf/blob/v4.4.1/components/soc/esp32/rtc_io_periph.c#L61
+https://github.com/espressif/esp-idf/blob/v5.0.2/components/soc/esp32/rtc_io_periph.c#L53
 
 The timer is set to a rather long period, so you can watch the data value
 change as you change the GPIO input (see loop at the end).
diff --git a/examples/readgpio_s2.py b/examples/readgpio_s2.py
new file mode 100644
index 0000000..60ae43e
--- /dev/null
+++ b/examples/readgpio_s2.py
@@ -0,0 +1,79 @@
+"""
+Example for: ESP32-S2
+
+Very basic example showing how to read a GPIO pin from the ULP and access
+that data from the main CPU.
+
+In this case GPIO4 is being read. Note that the ULP needs to refer to GPIOs
+via their RTC channel number. You can see the mapping in this file:
+https://github.com/espressif/esp-idf/blob/v5.0.2/components/soc/esp32s2/include/soc/rtc_io_channel.h#L33
+
+If you change to a different GPIO number, make sure to modify both the channel
+number and also the RTC_IO_TOUCH_PAD4_* references appropriately. The best place
+to see the mappings might be this table here (notice the "real GPIO numbers" as
+comments to each line):
+https://github.com/espressif/esp-idf/blob/v5.0.2/components/soc/esp32s2/rtc_io_periph.c#L60
+
+The timer is set to a rather long period, so you can watch the data value
+change as you change the GPIO input (see loop at the end).
+"""
+
+from esp32 import ULP
+from machine import mem32
+
+from esp32_ulp import src_to_binary
+
+source = """\
+#define DR_REG_RTCIO_BASE            0x3f408400
+#define RTC_IO_TOUCH_PAD4_REG        (DR_REG_RTCIO_BASE + 0x94)
+#define RTC_IO_TOUCH_PAD4_MUX_SEL_M  (BIT(19))
+#define RTC_IO_TOUCH_PAD4_FUN_IE_M   (BIT(13))
+#define RTC_GPIO_IN_REG              (DR_REG_RTCIO_BASE + 0x24)
+#define RTC_GPIO_IN_NEXT_S           10
+#define DR_REG_SENS_BASE             0x3f408800
+#define SENS_SAR_IO_MUX_CONF_REG     (DR_REG_SENS_BASE + 0x0144)
+#define SENS_IOMUX_CLK_GATE_EN       (BIT(31))
+.set channel, 4
+
+state:      .long 0
+
+entry:
+            # enable IOMUX clock
+            WRITE_RTC_FIELD(SENS_SAR_IO_MUX_CONF_REG, SENS_IOMUX_CLK_GATE_EN, 1)
+
+            # connect GPIO to the RTC subsystem so the ULP can read it
+            WRITE_RTC_REG(RTC_IO_TOUCH_PAD4_REG, RTC_IO_TOUCH_PAD4_MUX_SEL_M, 1, 1)
+
+            # switch the GPIO into input mode
+            WRITE_RTC_REG(RTC_IO_TOUCH_PAD4_REG, RTC_IO_TOUCH_PAD4_FUN_IE_M, 1, 1)
+
+            # read the GPIO's current state into r0
+            READ_RTC_REG(RTC_GPIO_IN_REG, RTC_GPIO_IN_NEXT_S + channel, 1)
+
+            # set r3 to the memory address of "state"
+            move r3, state
+
+            # store what was read into r0 into the "state" variable
+            st r0, r3, 0
+
+            # halt ULP co-processor (until it gets woken up again)
+            halt
+"""
+
+binary = src_to_binary(source, cpu="esp32s2")  # cpu is esp32 or esp32s2
+
+load_addr, entry_addr = 0, 4
+
+ULP_MEM_BASE = 0x50000000
+ULP_DATA_MASK = 0xffff  # ULP data is only in lower 16 bits
+
+ulp = ULP()
+ulp.set_wakeup_period(0, 50000)  # use timer0, wakeup after 50.000 cycles
+ulp.load_binary(load_addr, binary)
+
+mem32[ULP_MEM_BASE + load_addr] = 0x0  # initialise state to 0
+ulp.run(entry_addr)
+
+while True:
+    print(hex(mem32[ULP_MEM_BASE + load_addr] & ULP_DATA_MASK))
+
diff --git a/examples/readgpio_s3.py b/examples/readgpio_s3.py
new file mode 100644
index 0000000..b1f9779
--- /dev/null
+++ b/examples/readgpio_s3.py
@@ -0,0 +1,79 @@
+"""
+Example for: ESP32-S3
+
+Very basic example showing how to read a GPIO pin from the ULP and access
+that data from the main CPU.
+
+In this case GPIO4 is being read. Note that the ULP needs to refer to GPIOs
+via their RTC channel number. You can see the mapping in this file:
+https://github.com/espressif/esp-idf/blob/v5.0.2/components/soc/esp32s3/include/soc/rtc_io_channel.h#L33
+
+If you change to a different GPIO number, make sure to modify both the channel
+number and also the RTC_IO_TOUCH_PAD2_* references appropriately. The best place
+to see the mappings might be this table here (notice the "real GPIO numbers" as
+comments to each line):
+https://github.com/espressif/esp-idf/blob/v5.0.2/components/soc/esp32s3/rtc_io_periph.c#L60
+
+The timer is set to a rather long period, so you can watch the data value
+change as you change the GPIO input (see loop at the end).
+"""
+
+from esp32 import ULP
+from machine import mem32
+
+from esp32_ulp import src_to_binary
+
+source = """\
+#define DR_REG_RTCIO_BASE            0x60008400
+#define RTC_IO_TOUCH_PAD2_REG        (DR_REG_RTCIO_BASE + 0x8c)
+#define RTC_IO_TOUCH_PAD2_MUX_SEL_M  (BIT(19))
+#define RTC_IO_TOUCH_PAD2_FUN_IE_M   (BIT(13))
+#define RTC_GPIO_IN_REG              (DR_REG_RTCIO_BASE + 0x24)
+#define RTC_GPIO_IN_NEXT_S           10
+#define DR_REG_SENS_BASE             0x60008800
+#define SENS_SAR_PERI_CLK_GATE_CONF_REG  (DR_REG_SENS_BASE + 0x104)
+#define SENS_IOMUX_CLK_EN            (BIT(31))
+.set channel, 2
+
+state:      .long 0
+
+entry:
+            # enable IOMUX clock
+            WRITE_RTC_FIELD(SENS_SAR_PERI_CLK_GATE_CONF_REG, SENS_IOMUX_CLK_EN, 1)
+
+            # connect GPIO to the RTC subsystem so the ULP can read it
+            WRITE_RTC_REG(RTC_IO_TOUCH_PAD2_REG, RTC_IO_TOUCH_PAD2_MUX_SEL_M, 1, 1)
+
+            # switch the GPIO into input mode
+            WRITE_RTC_REG(RTC_IO_TOUCH_PAD2_REG, RTC_IO_TOUCH_PAD2_FUN_IE_M, 1, 1)
+
+            # read the GPIO's current state into r0
+            READ_RTC_REG(RTC_GPIO_IN_REG, RTC_GPIO_IN_NEXT_S + channel, 1)
+
+            # set r3 to the memory address of "state"
+            move r3, state
+
+            # store what was read into r0 into the "state" variable
+            st r0, r3, 0
+
+            # halt ULP co-processor (until it gets woken up again)
+            halt
+"""
+
+binary = src_to_binary(source, cpu="esp32s2")  # cpu is esp32 or esp32s2
+
+load_addr, entry_addr = 0, 4
+
+ULP_MEM_BASE = 0x50000000
+ULP_DATA_MASK = 0xffff  # ULP data is only in lower 16 bits
+
+ulp = ULP()
+ulp.set_wakeup_period(0, 50000)  # use timer0, wakeup after 50.000 cycles
+ulp.load_binary(load_addr, binary)
+
+mem32[ULP_MEM_BASE + load_addr] = 0x0  # initialise state to 0
+ulp.run(entry_addr)
+
+while True:
+    print(hex(mem32[ULP_MEM_BASE + load_addr] & ULP_DATA_MASK))
+