Skip to content

Commit

Permalink
Represent small values as single bytes (#4929)
Browse files Browse the repository at this point in the history
## Description

This change leverages the SB/LB instructions to change the memory
representation of all small enough values to make them fit in a single
byte instead of a full word.

The type size and passing calculations have been changed to align
elements of structs and enums to full words.

Structs and data section entries are filled with right-padding to align
their elements to words. Enums are still left-padded. The Data section
generation has been refactored to allow for these two padding modes.

Arrays and slices contain no inner padding, byte sequences will now be
properly consecutive and packed. Though, as a whole, they may be right
padded in certain circumstances to maintain word alignment.

Direct usages of LW/SW have been changed to LB/SB where appropriate.

The LWDataId virtual instruction has been changed to LoadDataId to
better represent the fact that it can load both word and byte sized
values.

## Checklist

- [x] I have linked to any relevant issues.
- [x] I have commented my code, particularly in hard-to-understand
areas.
- [x] I have updated the documentation where relevant (API docs, the
reference, and the Sway book).
- [x] I have added tests that prove my fix is effective or that my
feature works.
- [x] I have added (or requested a maintainer to add) the necessary
`Breaking*` or `New Feature` labels where relevant.
- [x] I have done my best to ensure that my PR adheres to [the Fuel Labs
Code Review
Standards](https://github.com/FuelLabs/rfcs/blob/master/text/code-standards/external-contributors.md).
- [x] I have requested a review from the relevant team or maintainers.

---------

Co-authored-by: xunilrj <xunilrj@hotmail.com>
Co-authored-by: Igor Rončević <ironcev@hotmail.com>
  • Loading branch information
3 people authored Nov 8, 2023
1 parent 37b0a70 commit 95481fc
Show file tree
Hide file tree
Showing 89 changed files with 2,445 additions and 858 deletions.
2 changes: 1 addition & 1 deletion sway-core/src/asm_generation/finalized_asm.rs
Original file line number Diff line number Diff line change
Expand Up @@ -118,7 +118,7 @@ fn to_bytecode_mut(
// Some LWs are expanded into two ops to allow for data larger than one word, so we calculate
// exactly how many ops will be generated to calculate the offset.
let offset_to_data_section_in_bytes = ops.iter().fold(0, |acc, item| match &item.opcode {
AllocatedOpcode::LWDataId(_reg, data_label)
AllocatedOpcode::LoadDataId(_reg, data_label)
if !data_section
.has_copy_type(data_label)
.expect("data label references non existent data -- internal error") =>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -348,7 +348,7 @@ impl AllocatedAbstractInstructionSet {
let data_id =
data_section.insert_data_value(Entry::new_word(offset, None, None));
realized_ops.push(RealizedOp {
opcode: AllocatedOpcode::LWDataId(r1, data_id),
opcode: AllocatedOpcode::LoadDataId(r1, data_id),
owning_span,
comment,
});
Expand Down Expand Up @@ -414,7 +414,7 @@ impl AllocatedAbstractInstructionSet {
Either::Right(Label(_)) => 0,

// A special case for LWDataId which may be 1 or 2 ops, depending on the source size.
Either::Left(AllocatedOpcode::LWDataId(_, ref data_id)) => {
Either::Left(AllocatedOpcode::LoadDataId(_, ref data_id)) => {
let has_copy_type = data_section.has_copy_type(data_id).expect(
"Internal miscalculation in data section -- \
data id did not match up to any actual data",
Expand Down
151 changes: 117 additions & 34 deletions sway-core/src/asm_generation/fuel/data_section.rs
Original file line number Diff line number Diff line change
@@ -1,61 +1,90 @@
use crate::asm_generation::from_ir::ir_type_size_in_bytes;
use crate::{
asm_generation::from_ir::ir_type_size_in_bytes, size_bytes_round_up_to_word_alignment,
};

use sway_ir::{Constant, ConstantValue, Context};

use std::{
collections::BTreeMap,
fmt::{self, Write},
iter::repeat,
};

// An entry in the data section. It's important for the size to be correct, especially for unions
// where the size could be larger than the represented value.
#[derive(Clone, Debug)]
pub struct Entry {
value: Datum,
size: usize,
padding: Padding,
// It is assumed, for now, that only configuration-time constants have a name. Otherwise, this
// is `None`.
name: Option<String>,
}

#[derive(Clone, Debug)]
pub enum Datum {
Byte(u8),
Word(u64),
ByteArray(Vec<u8>),
Collection(Vec<Entry>),
}

#[derive(Clone, Debug)]
pub(crate) enum Padding {
Left { target_size: usize },
Right { target_size: usize },
}

impl Padding {
pub fn target_size(&self) -> usize {
use Padding::*;
match self {
Left { target_size } | Right { target_size } => *target_size,
}
}
}

impl Entry {
pub(crate) fn new_word(value: u64, size: Option<usize>, name: Option<String>) -> Entry {
pub(crate) fn new_byte(value: u8, name: Option<String>, padding: Option<Padding>) -> Entry {
Entry {
value: Datum::Byte(value),
padding: padding.unwrap_or(Padding::Right { target_size: 1 }),
name,
}
}

pub(crate) fn new_word(value: u64, name: Option<String>, padding: Option<Padding>) -> Entry {
Entry {
value: Datum::Word(value),
size: size.unwrap_or(8),
padding: padding.unwrap_or(Padding::Right { target_size: 8 }),
name,
}
}

pub(crate) fn new_byte_array(
bytes: Vec<u8>,
size: Option<usize>,
name: Option<String>,
padding: Option<Padding>,
) -> Entry {
let size = size.unwrap_or(bytes.len());
Entry {
padding: padding.unwrap_or(Padding::Right {
target_size: bytes.len(),
}),
value: Datum::ByteArray(bytes),
size,
name,
}
}

pub(crate) fn new_collection(
elements: Vec<Entry>,
size: Option<usize>,
name: Option<String>,
padding: Option<Padding>,
) -> Entry {
let size = size.unwrap_or_else(|| elements.iter().map(|el| el.size).sum());
Entry {
padding: padding.unwrap_or(Padding::Right {
target_size: elements.iter().map(|el| el.padding.target_size()).sum(),
}),
value: Datum::Collection(elements),
size,
name,
}
}
Expand All @@ -64,11 +93,11 @@ impl Entry {
context: &Context,
constant: &Constant,
name: Option<String>,
padding: Option<Padding>,
) -> Entry {
// We have to do some painful special handling here for enums, which are tagged unions.
// This really should be handled by the IR more explicitly and is something that will
// hopefully be addressed by https://github.com/FuelLabs/sway/issues/2819#issuecomment-1256930392
let size = Some(ir_type_size_in_bytes(context, &constant.ty) as usize);

// Is this constant a tagged union?
if constant.ty.is_struct(context) {
Expand All @@ -81,43 +110,80 @@ impl Entry {
// we use unions (otherwise we should be generalising this a bit more).
if let ConstantValue::Struct(els) = &constant.value {
if els.len() == 2 {
let tag_entry = Entry::from_constant(context, &els[0], None);
let tag_entry = Entry::from_constant(context, &els[0], None, None);

// Here's the special case. We need to get the size of the union and
// attach it to this constant entry which will be one of the variants.
let mut val_entry = Entry::from_constant(context, &els[1], None);
val_entry.size = ir_type_size_in_bytes(context, &field_tys[1]) as usize;
let val_entry = {
let target_size = size_bytes_round_up_to_word_alignment!(
ir_type_size_in_bytes(context, &field_tys[1]) as usize
);
Entry::from_constant(
context,
&els[1],
None,
Some(Padding::Left { target_size }),
)
};

// Return here from our special case.
return Entry::new_collection(vec![tag_entry, val_entry], size, name);
return Entry::new_collection(vec![tag_entry, val_entry], name, padding);
}
}
}
};

// Not a tagged union, no trickiness required.
match &constant.value {
ConstantValue::Undef | ConstantValue::Unit => Entry::new_word(0, size, name),
ConstantValue::Bool(b) => Entry::new_word(u64::from(*b), size, name),
ConstantValue::Uint(u) => Entry::new_word(*u, size, name),
ConstantValue::U256(u) => Entry::new_byte_array(u.to_be_bytes().to_vec(), size, name),
ConstantValue::B256(bs) => Entry::new_byte_array(bs.to_be_bytes().to_vec(), size, name),
ConstantValue::String(bs) => Entry::new_byte_array(bs.clone(), size, name),

ConstantValue::Array(els) | ConstantValue::Struct(els) => Entry::new_collection(
ConstantValue::Undef | ConstantValue::Unit => Entry::new_byte(0, name, padding),
ConstantValue::Bool(b) => Entry::new_byte(u8::from(*b), name, padding),
ConstantValue::Uint(u) => {
if constant.ty.is_uint8(context) {
Entry::new_byte(*u as u8, name, padding)
} else {
Entry::new_word(*u, name, padding)
}
}
ConstantValue::U256(u) => {
Entry::new_byte_array(u.to_be_bytes().to_vec(), name, padding)
}
ConstantValue::B256(bs) => {
Entry::new_byte_array(bs.to_be_bytes().to_vec(), name, padding)
}
ConstantValue::String(bs) => Entry::new_byte_array(bs.clone(), name, padding),

ConstantValue::Array(els) => Entry::new_collection(
els.iter()
.map(|el| Entry::from_constant(context, el, None, None))
.collect(),
name,
padding,
),
ConstantValue::Struct(els) => Entry::new_collection(
els.iter()
.map(|el| Entry::from_constant(context, el, None))
.map(|el| {
let target_size = size_bytes_round_up_to_word_alignment!(
ir_type_size_in_bytes(context, &el.ty) as usize
);
Entry::from_constant(
context,
el,
None,
Some(Padding::Right { target_size }),
)
})
.collect(),
size,
name,
padding,
),
}
}

/// Converts a literal to a big-endian representation. This is padded to words.
pub(crate) fn to_bytes(&self) -> Vec<u8> {
// Get the big-endian byte representation of the basic value.
let mut bytes = match &self.value {
let bytes = match &self.value {
Datum::Byte(b) => vec![*b],
Datum::Word(w) => w.to_be_bytes().to_vec(),
Datum::ByteArray(bs) if bs.len() % 8 == 0 => bs.clone(),
Datum::ByteArray(bs) => bs
Expand All @@ -129,23 +195,32 @@ impl Entry {
Datum::Collection(els) => els.iter().flat_map(|el| el.to_bytes()).collect(),
};

// Pad the size out to match the specified size.
if self.size > bytes.len() {
let mut pad = vec![0; self.size - bytes.len()];
pad.append(&mut bytes);
bytes = pad;
match self.padding {
Padding::Left { target_size } => {
let target_size = size_bytes_round_up_to_word_alignment!(target_size);
let left_pad = target_size.saturating_sub(bytes.len());
[repeat(0u8).take(left_pad).collect(), bytes].concat()
}
Padding::Right { target_size } => {
let target_size = size_bytes_round_up_to_word_alignment!(target_size);
let right_pad = target_size.saturating_sub(bytes.len());
[bytes, repeat(0u8).take(right_pad).collect()].concat()
}
}

bytes
}

pub(crate) fn has_copy_type(&self) -> bool {
matches!(self.value, Datum::Word(_))
matches!(self.value, Datum::Word(_) | Datum::Byte(_))
}

pub(crate) fn is_byte(&self) -> bool {
matches!(self.value, Datum::Byte(_))
}

pub(crate) fn equiv(&self, entry: &Entry) -> bool {
fn equiv_data(lhs: &Datum, rhs: &Datum) -> bool {
match (lhs, rhs) {
(Datum::Byte(l), Datum::Byte(r)) => l == r,
(Datum::Word(l), Datum::Word(r)) => l == r,
(Datum::ByteArray(l), Datum::ByteArray(r)) => l == r,

Expand Down Expand Up @@ -218,6 +293,13 @@ impl DataSection {
.map(|entry| entry.has_copy_type())
}

/// Returns whether a specific [DataId] value is a byte entry.
pub(crate) fn is_byte(&self, id: &DataId) -> Option<bool> {
self.value_pairs
.get(id.0 as usize)
.map(|entry| entry.is_byte())
}

/// When generating code, sometimes a hard-coded data pointer is needed to reference
/// static values that have a length longer than one word.
/// This method appends pointers to the end of the data section (thus, not altering the data
Expand Down Expand Up @@ -253,6 +335,7 @@ impl fmt::Display for DataSection {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
fn display_entry(datum: &Datum) -> String {
match datum {
Datum::Byte(w) => format!(".byte {w}"),
Datum::Word(w) => format!(".word {w}"),
Datum::ByteArray(bs) => {
let mut hex_str = String::new();
Expand Down
Loading

0 comments on commit 95481fc

Please sign in to comment.