Skip to content

Commit

Permalink
feat(avm)!: byte indexed PC (#9582)
Browse files Browse the repository at this point in the history
This PR moves the AVM to use byte-indexed PCs
* Modifies the transpiler to remap brillig PCs
* Modifies the simulator to use byte indexed PCs
* Modifies witgen and circuit to use byte indexed PCs

Why are we doing this?
* Needed for bytecode decomposition in the circuit.
* Allow storing other stuff besides code in a contract, and then be able to use it in memory with an opcode "CODECOPY" or similar.

---

A note on how PCs are mapped in the transpiler: we do 2 passes. First we translate all instructions and leave brillig location operands as `BRILLIG_LOCATION`. On a second pass, since now we know the structure of the program and the brillig=>AVM pcs, we replace those.

There are a few big caveats
1. ~Since the JUMP(I) and INTERNALCALL operands are U16, we cannot jump or call a location bigger than 2^16. This effectively constrains the contract size to 65kB.~ We use 32 bit jumps now.
2. We can do the transformation in (only) 2 passes because we only have 1 variant of JUMP etc. Suppose we had an 8 bit variant, or a 32 bit variant, then we wouldn't know which one to use until the original PC has been mapped, but that itself can change the size of the instructions and trigger a remapping!

Solutions?
* For (1) I might propose having relative jumps JUMP(I)R with 8 and 16 bit variants, and an absolute JUMP with 32 bits.
* For (2) we might just need to remap until there is no change.

Part of #9059.
  • Loading branch information
fcarreiro authored Nov 4, 2024
1 parent d5642e8 commit 29724f3
Show file tree
Hide file tree
Showing 36 changed files with 838 additions and 623 deletions.
11 changes: 11 additions & 0 deletions avm-transpiler/src/instructions.rs
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,10 @@ impl AvmInstruction {
}
bytes
}

pub fn size(&self) -> usize {
self.to_bytes().len()
}
}

impl Debug for AvmInstruction {
Expand Down Expand Up @@ -101,13 +105,16 @@ pub enum AvmTypeTag {
/// Operands are usually 32 bits (offsets or jump destinations)
/// Constants (as used by the SET instruction) can have size
/// different from 32 bits
#[allow(non_camel_case_types)]
pub enum AvmOperand {
U8 { value: u8 },
U16 { value: u16 },
U32 { value: u32 },
U64 { value: u64 },
U128 { value: u128 },
FF { value: FieldElement },
// Unresolved brillig pc that needs translation to a 16 bit byte-indexed PC.
BRILLIG_LOCATION { brillig_pc: u32 },
}

impl Display for AvmOperand {
Expand All @@ -119,6 +126,9 @@ impl Display for AvmOperand {
AvmOperand::U64 { value } => write!(f, " U64:{}", value),
AvmOperand::U128 { value } => write!(f, " U128:{}", value),
AvmOperand::FF { value } => write!(f, " FF:{}", value),
AvmOperand::BRILLIG_LOCATION { brillig_pc } => {
write!(f, " BRILLIG_LOCATION:{}", brillig_pc)
}
}
}
}
Expand All @@ -132,6 +142,7 @@ impl AvmOperand {
AvmOperand::U64 { value } => value.to_be_bytes().to_vec(),
AvmOperand::U128 { value } => value.to_be_bytes().to_vec(),
AvmOperand::FF { value } => value.to_be_bytes(),
AvmOperand::BRILLIG_LOCATION { brillig_pc } => brillig_pc.to_be_bytes().to_vec(),
}
}
}
Expand Down
98 changes: 44 additions & 54 deletions avm-transpiler/src/transpile.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,22 +9,24 @@ use acvm::brillig_vm::brillig::{
use acvm::FieldElement;
use noirc_errors::debug_info::DebugInfo;

use crate::bit_traits::bits_needed_for;
use crate::bit_traits::{bits_needed_for, BitsQueryable};
use crate::instructions::{AddressingModeBuilder, AvmInstruction, AvmOperand, AvmTypeTag};
use crate::opcodes::AvmOpcode;
use crate::utils::{dbg_print_avm_program, dbg_print_brillig_program, make_operand};

/// Transpile a Brillig program to AVM bytecode
pub fn brillig_to_avm(
brillig_bytecode: &[BrilligOpcode<FieldElement>],
brillig_pcs_to_avm_pcs: &[usize],
) -> Vec<u8> {
/// Returns the bytecode and a mapping from Brillig program counter to AVM program counter.
pub fn brillig_to_avm(brillig_bytecode: &[BrilligOpcode<FieldElement>]) -> (Vec<u8>, Vec<usize>) {
dbg_print_brillig_program(brillig_bytecode);

let mut avm_instrs: Vec<AvmInstruction> = Vec::new();
let mut brillig_pcs_to_avm_pcs: Vec<usize> = [0_usize].to_vec();
let mut current_avm_pc: usize = 0;

// Transpile a Brillig instruction to one or more AVM instructions
for brillig_instr in brillig_bytecode {
let current_avm_instr_index = avm_instrs.len();

match brillig_instr {
BrilligOpcode::BinaryFieldOp { destination, op, lhs, rhs } => {
let bits_needed =
Expand Down Expand Up @@ -231,22 +233,22 @@ pub fn brillig_to_avm(
});
}
BrilligOpcode::Jump { location } => {
let avm_loc = brillig_pcs_to_avm_pcs[*location];
assert!(location.num_bits() <= 32);
avm_instrs.push(AvmInstruction {
opcode: AvmOpcode::JUMP_32,
operands: vec![make_operand(32, &avm_loc)],
operands: vec![AvmOperand::BRILLIG_LOCATION { brillig_pc: *location as u32 }],
..Default::default()
});
}
BrilligOpcode::JumpIf { condition, location } => {
let avm_loc = brillig_pcs_to_avm_pcs[*location];
assert!(location.num_bits() <= 32);
avm_instrs.push(AvmInstruction {
opcode: AvmOpcode::JUMPI_32,
indirect: Some(
AddressingModeBuilder::default().direct_operand(condition).build(),
),
operands: vec![
make_operand(32, &avm_loc),
AvmOperand::BRILLIG_LOCATION { brillig_pc: *location as u32 },
make_operand(16, &condition.to_usize()),
],
..Default::default()
Expand Down Expand Up @@ -295,10 +297,10 @@ pub fn brillig_to_avm(
));
}
BrilligOpcode::Call { location } => {
let avm_loc = brillig_pcs_to_avm_pcs[*location];
assert!(location.num_bits() <= 32);
avm_instrs.push(AvmInstruction {
opcode: AvmOpcode::INTERNALCALL,
operands: vec![AvmOperand::U32 { value: avm_loc as u32 }],
operands: vec![AvmOperand::BRILLIG_LOCATION { brillig_pc: *location as u32 }],
..Default::default()
});
}
Expand Down Expand Up @@ -342,8 +344,37 @@ pub fn brillig_to_avm(
brillig_instr
),
}

// Increment the AVM program counter.
current_avm_pc +=
avm_instrs.iter().skip(current_avm_instr_index).map(|i| i.size()).sum::<usize>();
brillig_pcs_to_avm_pcs.push(current_avm_pc);
}

// Now that we have the general structure of the AVM program, we need to resolve the
// Brillig jump locations.
let mut avm_instrs = avm_instrs
.into_iter()
.map(|i| match i.opcode {
AvmOpcode::JUMP_32 | AvmOpcode::JUMPI_32 | AvmOpcode::INTERNALCALL => {
let new_operands = i
.operands
.into_iter()
.map(|o| match o {
AvmOperand::BRILLIG_LOCATION { brillig_pc } => {
let avm_pc = brillig_pcs_to_avm_pcs[brillig_pc as usize];
assert!(avm_pc.num_bits() <= 32, "Oops! AVM PC is too large!");
AvmOperand::U32 { value: avm_pc as u32 }
}
_ => o,
})
.collect::<Vec<AvmOperand>>();
AvmInstruction { operands: new_operands, ..i }
}
_ => i,
})
.collect::<Vec<AvmInstruction>>();

// TEMPORARY: Add a "magic number" instruction to the end of the program.
// This makes it possible to know that the bytecode corresponds to the AVM.
// We are adding a MOV instruction that moves a value to itself.
Expand All @@ -362,7 +393,8 @@ pub fn brillig_to_avm(
for instruction in avm_instrs {
bytecode.extend_from_slice(&instruction.to_bytes());
}
bytecode

(bytecode, brillig_pcs_to_avm_pcs)
}

/// Handle brillig foreign calls
Expand Down Expand Up @@ -1496,48 +1528,6 @@ pub fn patch_debug_info_pcs(
debug_infos
}

/// Patch the assert messages with updated PCs since transpilation injects extra
/// opcodes into the bytecode.
pub fn patch_assert_message_pcs(
assert_messages: HashMap<usize, String>,
brillig_pcs_to_avm_pcs: &[usize],
) -> HashMap<usize, String> {
assert_messages
.into_iter()
.map(|(brillig_pc, message)| (brillig_pcs_to_avm_pcs[brillig_pc], message))
.collect()
}

/// Compute an array that maps each Brillig pc to an AVM pc.
/// This must be done before transpiling to properly transpile jump destinations.
/// This is necessary for two reasons:
/// 1. The transpiler injects `initial_offset` instructions at the beginning of the program.
/// 2. Some brillig instructions map to multiple AVM instructions
/// args:
/// initial_offset: how many AVM instructions were inserted at the start of the program
/// brillig: the Brillig program
/// returns: an array where each index is a Brillig pc,
/// and each value is the corresponding AVM pc.
pub fn map_brillig_pcs_to_avm_pcs(brillig_bytecode: &[BrilligOpcode<FieldElement>]) -> Vec<usize> {
let mut pc_map = vec![0; brillig_bytecode.len()];

pc_map[0] = 0; // first PC is always 0 as there are no instructions inserted by AVM at start
for i in 0..brillig_bytecode.len() - 1 {
let num_avm_instrs_for_this_brillig_instr = match &brillig_bytecode[i] {
BrilligOpcode::ForeignCall { function, .. }
if function == "avmOpcodeReturndataCopy" =>
{
2
}
_ => 1,
};
// next Brillig pc will map to an AVM pc offset by the
// number of AVM instructions generated for this Brillig one
pc_map[i + 1] = pc_map[i] + num_avm_instrs_for_this_brillig_instr;
}
pc_map
}

fn tag_from_bit_size(bit_size: BitSize) -> AvmTypeTag {
match bit_size {
BitSize::Integer(IntegerBitSize::U1) => AvmTypeTag::UINT1,
Expand Down
8 changes: 3 additions & 5 deletions avm-transpiler/src/transpile_contract.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ use serde::{Deserialize, Serialize};
use acvm::acir::circuit::Program;
use noirc_errors::debug_info::ProgramDebugInfo;

use crate::transpile::{brillig_to_avm, map_brillig_pcs_to_avm_pcs, patch_debug_info_pcs};
use crate::transpile::{brillig_to_avm, patch_debug_info_pcs};
use crate::utils::extract_brillig_from_acir_program;

/// Representation of a contract with some transpiled functions
Expand Down Expand Up @@ -88,17 +88,15 @@ impl From<CompiledAcirContractArtifact> for TranspiledContractArtifact {

for function in contract.functions {
if function.custom_attributes.contains(&"public".to_string()) {
// if function.name == "public_dispatch" {
info!("Transpiling AVM function {} on contract {}", function.name, contract.name);
// Extract Brillig Opcodes from acir
let acir_program = function.bytecode;
let brillig_bytecode = extract_brillig_from_acir_program(&acir_program);
info!("Extracted Brillig program has {} instructions", brillig_bytecode.len());

// Map Brillig pcs to AVM pcs (index is Brillig PC, value is AVM PC)
let brillig_pcs_to_avm_pcs = map_brillig_pcs_to_avm_pcs(brillig_bytecode);

// Transpile to AVM
let avm_bytecode = brillig_to_avm(brillig_bytecode, &brillig_pcs_to_avm_pcs);
let (avm_bytecode, brillig_pcs_to_avm_pcs) = brillig_to_avm(brillig_bytecode);

log::info!(
"{}::{}: bytecode is {} bytes",
Expand Down
4 changes: 3 additions & 1 deletion avm-transpiler/src/utils.rs
Original file line number Diff line number Diff line change
Expand Up @@ -50,9 +50,11 @@ pub fn dbg_print_avm_program(avm_program: &[AvmInstruction]) {
info!("Transpiled AVM program has {} instructions", avm_program.len());
trace!("Printing AVM program...");
let mut counts = std::collections::HashMap::<AvmOpcode, usize>::new();
let mut avm_pc = 0;
for (i, instruction) in avm_program.iter().enumerate() {
trace!("\tPC:{0}: {1}", i, &instruction.to_string());
trace!("\tIDX:{0} AVMPC:{1} - {2}", i, avm_pc, &instruction.to_string());
*counts.entry(instruction.opcode).or_insert(0) += 1;
avm_pc += instruction.size();
}
debug!("AVM opcode counts:");
let mut sorted_counts: Vec<_> = counts.into_iter().collect();
Expand Down
12 changes: 7 additions & 5 deletions barretenberg/cpp/pil/avm/main.pil
Original file line number Diff line number Diff line change
Expand Up @@ -379,19 +379,20 @@ namespace main(256);
sel_op_jump * (pc' - ia) = 0;

#[PC_JUMPI]
sel_op_jumpi * ((1 - id_zero) * (pc' - ia) + id_zero * (pc' - pc - 1)) = 0;
sel_op_jumpi * ((1 - id_zero) * (pc' - ia) + id_zero * (pc' - pc - 8)) = 0; // 8 = size of JUMPI_32 instruction

// TODO: Consolidation with #[PC_JUMP] and sel_op_internal_call * (pc' - ia) = 0; sel_op_internal_return * (pc' - ia) = 0;

//===== INTERNAL_CALL ======================================================
// - The program counter in the next row should be equal to the value loaded from the ia register
// - We then write the return location (pc + 1) into the call stack (in memory)
// - We then write the return location (pc + 5) into the call stack (in memory), whereby the constant 5
// corresponds to the size of the internal_call instruction in bytes.

#[RETURN_POINTER_INCREMENT]
sel_op_internal_call * (internal_return_ptr' - (internal_return_ptr + 1)) = 0;
sel_op_internal_call * (internal_return_ptr - mem_addr_b) = 0;
sel_op_internal_call * (pc' - ia) = 0;
sel_op_internal_call * ((pc + 1) - ib) = 0;
sel_op_internal_call * ((pc + 5) - ib) = 0; // 5 = size in bytes of internal call instruction

// TODO(md): Below relations may be removed through sub-op table lookup
sel_op_internal_call * (rwb - 1) = 0;
Expand Down Expand Up @@ -434,8 +435,9 @@ namespace main(256);

// When considering two adjacent main trace rows,
// the program counter must increment if not jumping or returning.
#[PC_INCREMENT]
CUR_AND_NEXT_ARE_MAIN * (1 - SEL_ALL_CTRL_FLOW) * (pc' - (pc + 1)) = 0;
// TODO: Adapt PC increment to byte-based PC indexing
// #[PC_INCREMENT]
// CUR_AND_NEXT_ARE_MAIN * (1 - SEL_ALL_CTRL_FLOW) * (pc' - (pc + 1)) = 0;

// When considering two adjacent main trace rows,
// the internal return ptr must stay the same if not jumping or returning.
Expand Down
Loading

0 comments on commit 29724f3

Please sign in to comment.