Skip to content

Commit

Permalink
Merge pull request #282 from primitivefinance/fix/jump-encoding
Browse files Browse the repository at this point in the history
refactor(jump-processing): simplifies the jump encoding! much better
  • Loading branch information
Alexangelj authored Mar 13, 2023
2 parents b7777da + 72c8e89 commit 64efc43
Show file tree
Hide file tree
Showing 3 changed files with 146 additions and 69 deletions.
151 changes: 83 additions & 68 deletions contracts/libraries/FVMLib.sol
Original file line number Diff line number Diff line change
Expand Up @@ -32,9 +32,6 @@ pragma solidity 0.8.13;

import "./AssemblyLib.sol";

uint8 constant INSTRUCTION_CODE_SIZE_BYTES = 1;
uint8 constant INSTRUCTION_POINTER_SIZE_BYTES = 2;
uint8 constant INSTRUCTIONS_ARRAY_SIZE_BYTES = 1;
uint8 constant JUMP_PROCESS_START_POINTER = 2;
bytes1 constant UNKNOWN = 0x00;
bytes1 constant ALLOCATE = 0x01;
Expand All @@ -55,114 +52,132 @@ error InvalidJump(uint256 pointer); // 0x80f63bd1
error InvalidBytesLength(uint256 expected, uint256 length); // 0xe19dc95e

/**
* @dev Expects a serialized encoding of instructions that is delimited by pointers to the next instruction.
* @dev Expects a serialized encoding of instructions.
* Serialized byte array -> [Jump Instruction Opcode,Total Amount of Instructions, Length of instruction[0], Data of instruction[0], Length of instruction[1],...]
*
* Motivation
* This serialization is intentional because it enables the use of a dynamic array for instructions.
* A fixed instruction array would pad unfilled array data with zeroes, wasting potentially a lot of bytes.
* On optimistic rollups, these bytes are the most expensive (in gas) bytes!
*
* Simple Guide
* First, information is added about the set of instructions that will be processed.
* - The jump instruction code, to signal we want to process multiple instructions.
* - The amount of instructions we want to process.
* - The starting index of the next instruction in the string of bytes `data`, i.e. a "pointer".
* - The length of the next instruction.
* - The instruction data.
* - The length of the next instruction.
* - Etc...
* Since we want to process multiple instructions that are in one big string,
* the encoding has to put information at the beginning of the instruction to say
* "this instruction is 22 bytes long, so the next instruction data starts after 22 bytes".
* All instructions have pointer information appended to the front of the instruction for
* jump processing. These pointers are two bytes long, which means a 22 byte instruction will have 24 total bytes.
* This repeats in a loop until all the instructions have been processed.
* "this instruction is 22 bytes long".
* Then when it's decoded using the assumption "so the next instruction starts after 22 bytes".
*
* Glossary
* | Term | Description | Size |
* ---------------------------------
* | Pointer | Index value with data for the next pointer's start location in the calldata. | 2 bytes |
* | Pointer | Index of the jump calldata that holds the length of an instruction. | 1 byte |
* | Instruction Code | FVM "op code" to signal which operation to execute | 1 byte |
* | Instructions length | Amount of instructions to be executed | 1 byte |
* | Total Instructions | Amount of instructions to be executed | 1 byte |
*
* Conclusion
* To summarize, the calldata can be sliced to get a pointer, e.g. `data[3:5]`.
* Then using that pointer as the start index to the `data`,
* we can get the next pointer, e.g. `data[data[3:5]:data[3:5] + `INSTRUCTION_POINTER_SIZE_BYTES`]`.
* Pointers are two bytes which means the end index (slicing calldata EXCLUDES the byte at the `:end` pointer)
* is computed by summing the start index and `INSTRUCTION_POINTER_SIZE_BYTES` in bytes.
* To summarize, the calldata can be sliced to get the length of the instruction, e.g. `data[3:4]`.
* The `pointer` is initialized as this value. The pointer acts as an accumulator that moves across the bytes string.
* This accumulated value is the byte index of the last byte of the instruction.
*
* Example
* | Byte Index | Data |
* ----------------------------------------------------------
* | bytes[0] | 0xAA Instruction code |
* | bytes[1] | Amount of Instructions |
* | bytes[2:2+ptr length] | ptr[0] := Pointer to instruction at index `1` of the instructions array to be executed.
* | bytes[2+ptr length:ptr[0]] | Instruction data at index `0` of the instructions array.
* | bytes[ptr[0]:ptr[0] + ptr length] | ptr[1] := Pointer to instruction at index `2`.
* | bytes[2] | ptr[0] := Length of instruction[0]
* | bytes[2:ptr[0] + 1] | Data of instruction[0]. Calldata slice does not include end index. |
* | bytes[ptr[0] + 1] | ptr[1] := Length of instruction[1] |
* | ... | Repeats in a loop for each instruction. |
*/
function _jumpProcess(bytes calldata data, function(bytes calldata) _process) {
uint8 length = uint8(data[INSTRUCTIONS_ARRAY_SIZE_BYTES]);
uint16 pointer = JUMP_PROCESS_START_POINTER; // First pointer data is at index `JUMP_PROCESS_START_POINTER`.
uint256 start;
// Encoded `data`:| 0x | opcode | amount instructions | instruction length | instruction |
uint8 totalInstructions = uint8(data[1]);
// The "pointer" is pointing to the first byte of an instruction,
// which holds the data for the instruction's length in bytes.
uint256 idxPtr = JUMP_PROCESS_START_POINTER;
// As the instructions are processed,
// the pointer moves from the end to the start.
uint256 idxInstructionStart;
uint256 idxInstructionEnd;
// For each instruction set...
for (uint256 i; i != length; ++i) {
// Start at the index of the first byte of the next instruction.
start = pointer;
// Set the new pointer to the next instruction, located at data at the index equal to the pointer.
pointer = uint16(
bytes2(data[pointer:pointer + INSTRUCTION_POINTER_SIZE_BYTES])
);
// The `start:` includes the pointer bytes, while the `:end` `pointer` is excluded.
if (pointer > data.length) revert InvalidJump(pointer);
bytes calldata instruction = data[start:pointer];
// Process the instruction.
_process(instruction[INSTRUCTION_POINTER_SIZE_BYTES:]); // note: Removes the pointer to the next instruction.
for (uint256 i; i != totalInstructions; ++i) {
// Start the instruction where the pointer is.
idxInstructionStart = idxPtr;
// Compute the index of the next pointer by summing
// the current pointer value, the length of the instruction,
// and the amount of bytes the instruction length takes (which is 1 byte).
idxInstructionEnd =
idxInstructionStart + uint8(bytes1(data[idxInstructionStart])) + 1;
// Make sure the pointer is not out of bounds.
if (idxInstructionEnd > data.length) {
revert InvalidJump(idxInstructionEnd);
}
// Calldata slicing EXCLUDES the `idxInstructionEnd` byte.
bytes calldata instruction = data[idxInstructionStart:idxInstructionEnd];
// Move the pointer to the EXCLUDED `idxInstructionEnd` byte.
// This byte holds the data for the index of byte with the next instruction's length.
idxPtr = idxInstructionEnd;
// Process the instruction after removing the instruction length,
// so only instruction data is passed to `_process`.
_process(instruction[1:]);
}
}

/**
* @dev Serializes an array of instructions into a `pointer` delimited string of instructions, led by JUMP_PROCESS FVM code.
*
* For this table, ptrs[0] is not a thing, just a way to describe the order of pointers.
* E.g. ptrs[0] = First pointer; pointer to instruction at index 1 of the instructions array.
* The actual code overwrites the `nextPointer` variable for each pointer and concats it to the bytes string.
*
* @dev For debugging jump instructions, pass the jump instruction calldata to this function to get the individual instructions.
*/
function decodeJumpInstructions(bytes calldata data)
pure
returns (bytes[] memory)
{
uint8 totalInstructions = uint8(data[1]);
uint256 idxPtr = JUMP_PROCESS_START_POINTER;
uint256 idxInstructionStart;
uint256 idxInstructionEnd;
bytes[] memory instructions = new bytes[](totalInstructions);
for (uint256 i; i != totalInstructions; ++i) {
idxInstructionStart = idxPtr;
idxInstructionEnd =
idxInstructionStart + uint8(bytes1(data[idxInstructionStart])) + 1;
if (idxInstructionEnd > data.length) {
revert InvalidJump(idxInstructionEnd);
}
bytes calldata instruction = data[idxInstructionStart:idxInstructionEnd];
idxPtr = idxInstructionEnd;

instructions[i] = instruction[1:];
}

return instructions;
}

/**
* @dev Serializes an array of instructions by appending the length of the instruction to each instruction packet.
* Adds the INSTRUCTION_JUMP opcode and total instructions quantity to the front of the `bytes` array.
* @param instructions Dynamically sized array of FVM encoded instructions.
*
* Byte index : Description of what fills the space.
* ---------------------------------------------------
* 0 : JUMP Instruction Code (0xAA)
* 1 : Amount of instructions = Length of instructions array.
* 2 : ptrs[0] := Pointer to instructions[1] = Instruction code (1 byte) + Amt Instructions (1 byte) + ptrs[0] (2 bytes) + instructions[0].length.
* 3...(ptrs[0] - 1) : instructions[0]
* ptrs[0] : ptrs[1] := Pointer to instructions[2] = ptrs[0] (2 bytes) + instructions[1].length + ptrs[1] (2 bytes).
* (ptrs[0] + 1)...(ptrs[1] - 1): instructions[1]
* ptrs[1] : ptrs[2] := Pointer to instructions[3]
* etc..
*/
function encodeJumpInstruction(bytes[] memory instructions)
pure
returns (bytes memory)
{
uint16 nextPointer;
uint8 len = uint8(instructions.length);
bytes memory payload = bytes.concat(INSTRUCTION_JUMP, bytes1(len));
uint8 totalInstructions = uint8(instructions.length);
bytes memory payload =
bytes.concat(INSTRUCTION_JUMP, bytes1(totalInstructions));

// for each instruction set...
for (uint256 i; i != len; ++i) {
for (uint256 i; i != totalInstructions; ++i) {
bytes memory instruction = instructions[i];
// Amount of bytes of data for this instruction.
uint8 size = uint8(instruction.length);

// Using instruction and index of instruction in list, we create a new array with a pointer to the next instruction in front of the instruction payload.
// i == 0 only happens once so we short circuit via opposite case.
if (i != 0) {
nextPointer = nextPointer + size + INSTRUCTION_POINTER_SIZE_BYTES; // [currentPointer, instruction, nextPointer]
} else {
nextPointer = INSTRUCTION_CODE_SIZE_BYTES
+ INSTRUCTIONS_ARRAY_SIZE_BYTES + INSTRUCTION_POINTER_SIZE_BYTES
+ size;
}

uint8 instructionLength = uint8(instruction.length);
// Appends pointer to next instruction to the beginning of this instruction.
bytes memory edited = bytes.concat(bytes2(nextPointer), instruction);
bytes memory edited =
bytes.concat(bytes1(instructionLength), instruction);
// Concats the serialized bytes data with this edited instruction.
payload = bytes.concat(payload, edited);
}
Expand Down
3 changes: 2 additions & 1 deletion foundry.toml
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,8 @@ remappings = [

[profile.optimized] # uses via_ir and optimizer
via_ir = true
optimizer_runs = 0
optimizer_runs = 1
optimizer = true
out = 'optimized-out'

[profile.test] # does not use optimizer
Expand Down
61 changes: 61 additions & 0 deletions test/TestFVMJump.t.sol
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,44 @@ pragma solidity ^0.8.4;

import "./Setup.sol";

contract DecodeJump {
function decodeJump(bytes calldata data)
public
view
returns (bytes[] memory)
{
return FVM.decodeJumpInstructions(data);
}

function decodeCreatePair(bytes calldata data)
public
view
returns (address, address)
{
return FVM.decodeCreatePair(data);
}

function decodeAllocate(bytes calldata data)
public
view
returns (uint8, uint64, uint128)
{
return FVM.decodeAllocate(data);
}

function sliceCalldata(
bytes calldata data,
uint256 start,
uint256 end
) public view returns (bytes memory) {
if (end == 0) {
return data[start:];
} else {
return data[start:end];
}
}
}

contract TestFVMJump is Setup {
modifier pauseGas() {
vm.pauseGasMetering();
Expand All @@ -11,6 +49,29 @@ contract TestFVMJump is Setup {

bytes[] instructions;

function test_encodeJumpInstruction() public {
address a0 = address(55);
address a1 = address(66);
uint64 poolId = uint64(5);
uint128 amount = uint128(7);
instructions.push(FVM.encodeCreatePair(a0, a1));
instructions.push(FVM.encodeAllocate(uint8(0), poolId, amount));
bytes memory payload = FVM.encodeJumpInstruction(instructions);

DecodeJump _contract = new DecodeJump();
bytes[] memory decoded = _contract.decodeJump(payload);
(address decoded_a0, address decoded_a1) =
_contract.decodeCreatePair(decoded[0]);
(, uint64 decoded_poolId, uint128 decoded_amount) =
_contract.decodeAllocate(decoded[1]);
assertEq(decoded_a0, a0, "invalid-a0");
assertEq(decoded_a1, a1, "invalid-a1");
assertEq(decoded_poolId, poolId, "invalid-poolId");
assertEq(decoded_amount, amount, "invalid-amount");

delete instructions;
}

// Maximum 2^8 = 256 instructions.
// The amount of instructions to process is limited to 2^8 since it uses 1 byte.
function testFuzz_allocate_deallocate_many(uint8 totalCalls)
Expand Down

0 comments on commit 64efc43

Please sign in to comment.