Merge pull request #282 from primitivefinance/fix/jump-encoding

refactor(jump-processing): simplifies the jump encoding! much better
primitivefinance · Mar 13, 2023 · 64efc43 · 64efc43
2 parents b7777da + 72c8e89
commit 64efc43
Show file tree

Hide file tree

Showing 3 changed files with 146 additions and 69 deletions.
diff --git a/contracts/libraries/FVMLib.sol b/contracts/libraries/FVMLib.sol
@@ -32,9 +32,6 @@ pragma solidity 0.8.13;
 
 import "./AssemblyLib.sol";
 
-uint8 constant INSTRUCTION_CODE_SIZE_BYTES = 1;
-uint8 constant INSTRUCTION_POINTER_SIZE_BYTES = 2;
-uint8 constant INSTRUCTIONS_ARRAY_SIZE_BYTES = 1;
 uint8 constant JUMP_PROCESS_START_POINTER = 2;
 bytes1 constant UNKNOWN = 0x00;
 bytes1 constant ALLOCATE = 0x01;
@@ -55,114 +52,132 @@ error InvalidJump(uint256 pointer); // 0x80f63bd1
 error InvalidBytesLength(uint256 expected, uint256 length); // 0xe19dc95e
 
 /**
- * @dev Expects a serialized encoding of instructions that is delimited by pointers to the next instruction.
+ * @dev Expects a serialized encoding of instructions.
+ *      Serialized byte array -> [Jump Instruction Opcode,Total Amount of Instructions, Length of instruction[0], Data of instruction[0], Length of instruction[1],...]
  *
  * Motivation
  *      This serialization is intentional because it enables the use of a dynamic array for instructions.
  *      A fixed instruction array would pad unfilled array data with zeroes, wasting potentially a lot of bytes.
+ *      On optimistic rollups, these bytes are the most expensive (in gas) bytes!
  *
  * Simple Guide
  *      First, information is added about the set of instructions that will be processed.
  *          - The jump instruction code, to signal we want to process multiple instructions.
  *          - The amount of instructions we want to process.
- *          - The starting index of the next instruction in the string of bytes `data`, i.e. a "pointer".
+ *          - The length of the next instruction.
+ *          - The instruction data.
+ *          - The length of the next instruction.
+ *          - Etc...
  *      Since we want to process multiple instructions that are in one big string,
  *      the encoding has to put information at the beginning of the instruction to say
- *      "this instruction is 22 bytes long, so the next instruction data starts after 22 bytes".
- *      All instructions have pointer information appended to the front of the instruction for
- *      jump processing. These pointers are two bytes long, which means a 22 byte instruction will have 24 total bytes.
- *      This repeats in a loop until all the instructions have been processed.
+ *      "this instruction is 22 bytes long".
+ *      Then when it's decoded using the assumption "so the next instruction starts after 22 bytes".
  *
  * Glossary
  * | Term | Description | Size |
  * ---------------------------------
- * | Pointer | Index value with data for the next pointer's start location in the calldata. | 2 bytes |
+ * | Pointer | Index of the jump calldata that holds the length of an instruction. | 1 byte |
  * | Instruction Code | FVM "op code" to signal which operation to execute | 1 byte |
- * | Instructions length | Amount of instructions to be executed | 1 byte |
+ * | Total Instructions | Amount of instructions to be executed | 1 byte |
  *
  * Conclusion
- *      To summarize, the calldata can be sliced to get a pointer, e.g. `data[3:5]`.
- *      Then using that pointer as the start index to the `data`,
- *      we can get the next pointer, e.g. `data[data[3:5]:data[3:5] + `INSTRUCTION_POINTER_SIZE_BYTES`]`.
- *      Pointers are two bytes which means the end index (slicing calldata EXCLUDES the byte at the `:end` pointer)
- *      is computed by summing the start index and `INSTRUCTION_POINTER_SIZE_BYTES` in bytes.
+ *      To summarize, the calldata can be sliced to get the length of the instruction, e.g. `data[3:4]`.
+ *      The `pointer` is initialized as this value. The pointer acts as an accumulator that moves across the bytes string.
+ *      This accumulated value is the byte index of the last byte of the instruction.
  *
  * Example
  * | Byte Index                 | Data               |
  * ----------------------------------------------------------
  * | bytes[0]                   | 0xAA Instruction code     |
  * | bytes[1]                   | Amount of Instructions    |
- * | bytes[2:2+ptr length]        | ptr[0] := Pointer to instruction at index `1` of the instructions array to be executed.
- * | bytes[2+ptr length:ptr[0]]   | Instruction data at index `0` of the instructions array.
- * | bytes[ptr[0]:ptr[0] + ptr length]   | ptr[1] := Pointer to instruction at index `2`.
+ * | bytes[2]                   | ptr[0] := Length of instruction[0]
+ * | bytes[2:ptr[0] + 1]        | Data of instruction[0]. Calldata slice does not include end index.   |
+ * | bytes[ptr[0] + 1]          | ptr[1] := Length of instruction[1] |
  * | ...                        | Repeats in a loop for each instruction. |
  */
 function _jumpProcess(bytes calldata data, function(bytes calldata) _process) {
-    uint8 length = uint8(data[INSTRUCTIONS_ARRAY_SIZE_BYTES]);
-    uint16 pointer = JUMP_PROCESS_START_POINTER; // First pointer data is at index `JUMP_PROCESS_START_POINTER`.
-    uint256 start;
+    // Encoded `data`:| 0x | opcode | amount instructions | instruction length | instruction |
+    uint8 totalInstructions = uint8(data[1]);
+    // The "pointer" is pointing to the first byte of an instruction,
+    // which holds the data for the instruction's length in bytes.
+    uint256 idxPtr = JUMP_PROCESS_START_POINTER;
+    // As the instructions are processed,
+    // the pointer moves from the end to the start.
+    uint256 idxInstructionStart;
+    uint256 idxInstructionEnd;
     // For each instruction set...
-    for (uint256 i; i != length; ++i) {
-        // Start at the index of the first byte of the next instruction.
-        start = pointer;
-        // Set the new pointer to the next instruction, located at data at the index equal to the pointer.
-        pointer = uint16(
-            bytes2(data[pointer:pointer + INSTRUCTION_POINTER_SIZE_BYTES])
-        );
-        // The `start:` includes the pointer bytes, while the `:end` `pointer` is excluded.
-        if (pointer > data.length) revert InvalidJump(pointer);
-        bytes calldata instruction = data[start:pointer];
-        // Process the instruction.
-        _process(instruction[INSTRUCTION_POINTER_SIZE_BYTES:]); // note: Removes the pointer to the next instruction.
+    for (uint256 i; i != totalInstructions; ++i) {
+        // Start the instruction where the pointer is.
+        idxInstructionStart = idxPtr;
+        // Compute the index of the next pointer by summing
+        // the current pointer value, the length of the instruction,
+        // and the amount of bytes the instruction length takes (which is 1 byte).
+        idxInstructionEnd =
+            idxInstructionStart + uint8(bytes1(data[idxInstructionStart])) + 1;
+        // Make sure the pointer is not out of bounds.
+        if (idxInstructionEnd > data.length) {
+            revert InvalidJump(idxInstructionEnd);
+        }
+        // Calldata slicing EXCLUDES the `idxInstructionEnd` byte.
+        bytes calldata instruction = data[idxInstructionStart:idxInstructionEnd];
+        // Move the pointer to the EXCLUDED `idxInstructionEnd` byte.
+        // This byte holds the data for the index of byte with the next instruction's length.
+        idxPtr = idxInstructionEnd;
+        // Process the instruction after removing the instruction length,
+        // so only instruction data is passed to `_process`.
+        _process(instruction[1:]);
     }
 }
 
 /**
- * @dev Serializes an array of instructions into a `pointer` delimited string of instructions, led by JUMP_PROCESS FVM code.
- *
- * For this table, ptrs[0] is not a thing, just a way to describe the order of pointers.
- * E.g. ptrs[0] = First pointer; pointer to instruction at index 1 of the instructions array.
- * The actual code overwrites the `nextPointer` variable for each pointer and concats it to the bytes string.
- *
+ * @dev For debugging jump instructions, pass the jump instruction calldata to this function to get the individual instructions.
+ */
+function decodeJumpInstructions(bytes calldata data)
+    pure
+    returns (bytes[] memory)
+{
+    uint8 totalInstructions = uint8(data[1]);
+    uint256 idxPtr = JUMP_PROCESS_START_POINTER;
+    uint256 idxInstructionStart;
+    uint256 idxInstructionEnd;
+    bytes[] memory instructions = new bytes[](totalInstructions);
+    for (uint256 i; i != totalInstructions; ++i) {
+        idxInstructionStart = idxPtr;
+        idxInstructionEnd =
+            idxInstructionStart + uint8(bytes1(data[idxInstructionStart])) + 1;
+        if (idxInstructionEnd > data.length) {
+            revert InvalidJump(idxInstructionEnd);
+        }
+        bytes calldata instruction = data[idxInstructionStart:idxInstructionEnd];
+        idxPtr = idxInstructionEnd;
+
+        instructions[i] = instruction[1:];
+    }
+
+    return instructions;
+}
+
+/**
+ * @dev Serializes an array of instructions by appending the length of the instruction to each instruction packet.
+ * Adds the INSTRUCTION_JUMP opcode and total instructions quantity to the front of the `bytes` array.
  * @param instructions Dynamically sized array of FVM encoded instructions.
- *
- * Byte index   : Description of what fills the space.
- * ---------------------------------------------------
- * 0            : JUMP Instruction Code (0xAA)
- * 1            : Amount of instructions = Length of instructions array.
- * 2            : ptrs[0] := Pointer to instructions[1] = Instruction code (1 byte) + Amt Instructions (1 byte) + ptrs[0] (2 bytes) + instructions[0].length.
- * 3...(ptrs[0] - 1)  : instructions[0]
- * ptrs[0]      : ptrs[1] := Pointer to instructions[2] = ptrs[0] (2 bytes) + instructions[1].length + ptrs[1] (2 bytes).
- * (ptrs[0] + 1)...(ptrs[1] - 1): instructions[1]
- * ptrs[1]      : ptrs[2] := Pointer to instructions[3]
- * etc..
  */
 function encodeJumpInstruction(bytes[] memory instructions)
     pure
     returns (bytes memory)
 {
-    uint16 nextPointer;
-    uint8 len = uint8(instructions.length);
-    bytes memory payload = bytes.concat(INSTRUCTION_JUMP, bytes1(len));
+    uint8 totalInstructions = uint8(instructions.length);
+    bytes memory payload =
+        bytes.concat(INSTRUCTION_JUMP, bytes1(totalInstructions));
 
     // for each instruction set...
-    for (uint256 i; i != len; ++i) {
+    for (uint256 i; i != totalInstructions; ++i) {
         bytes memory instruction = instructions[i];
         // Amount of bytes of data for this instruction.
-        uint8 size = uint8(instruction.length);
-
-        // Using instruction and index of instruction in list, we create a new array with a pointer to the next instruction in front of the instruction payload.
-        // i == 0 only happens once so we short circuit via opposite case.
-        if (i != 0) {
-            nextPointer = nextPointer + size + INSTRUCTION_POINTER_SIZE_BYTES; // [currentPointer, instruction, nextPointer]
-        } else {
-            nextPointer = INSTRUCTION_CODE_SIZE_BYTES
-                + INSTRUCTIONS_ARRAY_SIZE_BYTES + INSTRUCTION_POINTER_SIZE_BYTES
-                + size;
-        }
-
+        uint8 instructionLength = uint8(instruction.length);
         // Appends pointer to next instruction to the beginning of this instruction.
-        bytes memory edited = bytes.concat(bytes2(nextPointer), instruction);
+        bytes memory edited =
+            bytes.concat(bytes1(instructionLength), instruction);
         // Concats the serialized bytes data with this edited instruction.
         payload = bytes.concat(payload, edited);
     }

diff --git a/foundry.toml b/foundry.toml
@@ -13,7 +13,8 @@ remappings = [
 
 [profile.optimized] # uses via_ir and optimizer
 via_ir = true
-optimizer_runs = 0
+optimizer_runs = 1
+optimizer = true
 out = 'optimized-out'
 
 [profile.test] # does not use optimizer

diff --git a/test/TestFVMJump.t.sol b/test/TestFVMJump.t.sol
@@ -3,6 +3,44 @@ pragma solidity ^0.8.4;
 
 import "./Setup.sol";
 
+contract DecodeJump {
+    function decodeJump(bytes calldata data)
+        public
+        view
+        returns (bytes[] memory)
+    {
+        return FVM.decodeJumpInstructions(data);
+    }
+
+    function decodeCreatePair(bytes calldata data)
+        public
+        view
+        returns (address, address)
+    {
+        return FVM.decodeCreatePair(data);
+    }
+
+    function decodeAllocate(bytes calldata data)
+        public
+        view
+        returns (uint8, uint64, uint128)
+    {
+        return FVM.decodeAllocate(data);
+    }
+
+    function sliceCalldata(
+        bytes calldata data,
+        uint256 start,
+        uint256 end
+    ) public view returns (bytes memory) {
+        if (end == 0) {
+            return data[start:];
+        } else {
+            return data[start:end];
+        }
+    }
+}
+
 contract TestFVMJump is Setup {
     modifier pauseGas() {
         vm.pauseGasMetering();
@@ -11,6 +49,29 @@ contract TestFVMJump is Setup {
 
     bytes[] instructions;
 
+    function test_encodeJumpInstruction() public {
+        address a0 = address(55);
+        address a1 = address(66);
+        uint64 poolId = uint64(5);
+        uint128 amount = uint128(7);
+        instructions.push(FVM.encodeCreatePair(a0, a1));
+        instructions.push(FVM.encodeAllocate(uint8(0), poolId, amount));
+        bytes memory payload = FVM.encodeJumpInstruction(instructions);
+
+        DecodeJump _contract = new DecodeJump();
+        bytes[] memory decoded = _contract.decodeJump(payload);
+        (address decoded_a0, address decoded_a1) =
+            _contract.decodeCreatePair(decoded[0]);
+        (, uint64 decoded_poolId, uint128 decoded_amount) =
+            _contract.decodeAllocate(decoded[1]);
+        assertEq(decoded_a0, a0, "invalid-a0");
+        assertEq(decoded_a1, a1, "invalid-a1");
+        assertEq(decoded_poolId, poolId, "invalid-poolId");
+        assertEq(decoded_amount, amount, "invalid-amount");
+
+        delete instructions;
+    }
+
     // Maximum 2^8 = 256 instructions.
     // The amount of instructions to process is limited to 2^8 since it uses 1 byte.
     function testFuzz_allocate_deallocate_many(uint8 totalCalls)