Represent small values as single bytes (#4929)

## Description This change leverages the SB/LB instructions to change the memory representation of all small enough values to make them fit in a single byte instead of a full word. The type size and passing calculations have been changed to align elements of structs and enums to full words. Structs and data section entries are filled with right-padding to align their elements to words. Enums are still left-padded. The Data section generation has been refactored to allow for these two padding modes. Arrays and slices contain no inner padding, byte sequences will now be properly consecutive and packed. Though, as a whole, they may be right padded in certain circumstances to maintain word alignment. Direct usages of LW/SW have been changed to LB/SB where appropriate. The LWDataId virtual instruction has been changed to LoadDataId to better represent the fact that it can load both word and byte sized values. ## Checklist - [x] I have linked to any relevant issues. - [x] I have commented my code, particularly in hard-to-understand areas. - [x] I have updated the documentation where relevant (API docs, the reference, and the Sway book). - [x] I have added tests that prove my fix is effective or that my feature works. - [x] I have added (or requested a maintainer to add) the necessary `Breaking*` or `New Feature` labels where relevant. - [x] I have done my best to ensure that my PR adheres to [the Fuel Labs Code Review Standards](https://github.com/FuelLabs/rfcs/blob/master/text/code-standards/external-contributors.md). - [x] I have requested a review from the relevant team or maintainers. --------- Co-authored-by: xunilrj <xunilrj@hotmail.com> Co-authored-by: Igor Rončević <ironcev@hotmail.com>
FuelLabs · Nov 8, 2023 · 95481fc · 95481fc
1 parent 37b0a70
commit 95481fc
Show file tree

Hide file tree

Showing 89 changed files with 2,445 additions and 858 deletions.
diff --git a/sway-core/src/asm_generation/finalized_asm.rs b/sway-core/src/asm_generation/finalized_asm.rs
@@ -118,7 +118,7 @@ fn to_bytecode_mut(
     // Some LWs are expanded into two ops to allow for data larger than one word, so we calculate
     // exactly how many ops will be generated to calculate the offset.
     let offset_to_data_section_in_bytes = ops.iter().fold(0, |acc, item| match &item.opcode {
-        AllocatedOpcode::LWDataId(_reg, data_label)
+        AllocatedOpcode::LoadDataId(_reg, data_label)
             if !data_section
                 .has_copy_type(data_label)
                 .expect("data label references non existent data -- internal error") =>

diff --git a/sway-core/src/asm_generation/fuel/allocated_abstract_instruction_set.rs b/sway-core/src/asm_generation/fuel/allocated_abstract_instruction_set.rs
@@ -348,7 +348,7 @@ impl AllocatedAbstractInstructionSet {
                         let data_id =
                             data_section.insert_data_value(Entry::new_word(offset, None, None));
                         realized_ops.push(RealizedOp {
-                            opcode: AllocatedOpcode::LWDataId(r1, data_id),
+                            opcode: AllocatedOpcode::LoadDataId(r1, data_id),
                             owning_span,
                             comment,
                         });
@@ -414,7 +414,7 @@ impl AllocatedAbstractInstructionSet {
             Either::Right(Label(_)) => 0,
 
             // A special case for LWDataId which may be 1 or 2 ops, depending on the source size.
-            Either::Left(AllocatedOpcode::LWDataId(_, ref data_id)) => {
+            Either::Left(AllocatedOpcode::LoadDataId(_, ref data_id)) => {
                 let has_copy_type = data_section.has_copy_type(data_id).expect(
                     "Internal miscalculation in data section -- \
                         data id did not match up to any actual data",

diff --git a/sway-core/src/asm_generation/fuel/data_section.rs b/sway-core/src/asm_generation/fuel/data_section.rs
@@ -1,61 +1,90 @@
-use crate::asm_generation::from_ir::ir_type_size_in_bytes;
+use crate::{
+    asm_generation::from_ir::ir_type_size_in_bytes, size_bytes_round_up_to_word_alignment,
+};
 
 use sway_ir::{Constant, ConstantValue, Context};
 
 use std::{
     collections::BTreeMap,
     fmt::{self, Write},
+    iter::repeat,
 };
 
 // An entry in the data section.  It's important for the size to be correct, especially for unions
 // where the size could be larger than the represented value.
 #[derive(Clone, Debug)]
 pub struct Entry {
     value: Datum,
-    size: usize,
+    padding: Padding,
     // It is assumed, for now, that only configuration-time constants have a name. Otherwise, this
     // is `None`.
     name: Option<String>,
 }
 
 #[derive(Clone, Debug)]
 pub enum Datum {
+    Byte(u8),
     Word(u64),
     ByteArray(Vec<u8>),
     Collection(Vec<Entry>),
 }
 
+#[derive(Clone, Debug)]
+pub(crate) enum Padding {
+    Left { target_size: usize },
+    Right { target_size: usize },
+}
+
+impl Padding {
+    pub fn target_size(&self) -> usize {
+        use Padding::*;
+        match self {
+            Left { target_size } | Right { target_size } => *target_size,
+        }
+    }
+}
+
 impl Entry {
-    pub(crate) fn new_word(value: u64, size: Option<usize>, name: Option<String>) -> Entry {
+    pub(crate) fn new_byte(value: u8, name: Option<String>, padding: Option<Padding>) -> Entry {
+        Entry {
+            value: Datum::Byte(value),
+            padding: padding.unwrap_or(Padding::Right { target_size: 1 }),
+            name,
+        }
+    }
+
+    pub(crate) fn new_word(value: u64, name: Option<String>, padding: Option<Padding>) -> Entry {
         Entry {
             value: Datum::Word(value),
-            size: size.unwrap_or(8),
+            padding: padding.unwrap_or(Padding::Right { target_size: 8 }),
             name,
         }
     }
 
     pub(crate) fn new_byte_array(
         bytes: Vec<u8>,
-        size: Option<usize>,
         name: Option<String>,
+        padding: Option<Padding>,
     ) -> Entry {
-        let size = size.unwrap_or(bytes.len());
         Entry {
+            padding: padding.unwrap_or(Padding::Right {
+                target_size: bytes.len(),
+            }),
             value: Datum::ByteArray(bytes),
-            size,
             name,
         }
     }
 
     pub(crate) fn new_collection(
         elements: Vec<Entry>,
-        size: Option<usize>,
         name: Option<String>,
+        padding: Option<Padding>,
     ) -> Entry {
-        let size = size.unwrap_or_else(|| elements.iter().map(|el| el.size).sum());
         Entry {
+            padding: padding.unwrap_or(Padding::Right {
+                target_size: elements.iter().map(|el| el.padding.target_size()).sum(),
+            }),
             value: Datum::Collection(elements),
-            size,
             name,
         }
     }
@@ -64,11 +93,11 @@ impl Entry {
         context: &Context,
         constant: &Constant,
         name: Option<String>,
+        padding: Option<Padding>,
     ) -> Entry {
         // We have to do some painful special handling here for enums, which are tagged unions.
         // This really should be handled by the IR more explicitly and is something that will
         // hopefully be addressed by https://github.com/FuelLabs/sway/issues/2819#issuecomment-1256930392
-        let size = Some(ir_type_size_in_bytes(context, &constant.ty) as usize);
 
         // Is this constant a tagged union?
         if constant.ty.is_struct(context) {
@@ -81,43 +110,80 @@ impl Entry {
                 // we use unions (otherwise we should be generalising this a bit more).
                 if let ConstantValue::Struct(els) = &constant.value {
                     if els.len() == 2 {
-                        let tag_entry = Entry::from_constant(context, &els[0], None);
+                        let tag_entry = Entry::from_constant(context, &els[0], None, None);
 
                         // Here's the special case.  We need to get the size of the union and
                         // attach it to this constant entry which will be one of the variants.
-                        let mut val_entry = Entry::from_constant(context, &els[1], None);
-                        val_entry.size = ir_type_size_in_bytes(context, &field_tys[1]) as usize;
+                        let val_entry = {
+                            let target_size = size_bytes_round_up_to_word_alignment!(
+                                ir_type_size_in_bytes(context, &field_tys[1]) as usize
+                            );
+                            Entry::from_constant(
+                                context,
+                                &els[1],
+                                None,
+                                Some(Padding::Left { target_size }),
+                            )
+                        };
 
                         // Return here from our special case.
-                        return Entry::new_collection(vec![tag_entry, val_entry], size, name);
+                        return Entry::new_collection(vec![tag_entry, val_entry], name, padding);
                     }
                 }
             }
         };
 
         // Not a tagged union, no trickiness required.
         match &constant.value {
-            ConstantValue::Undef | ConstantValue::Unit => Entry::new_word(0, size, name),
-            ConstantValue::Bool(b) => Entry::new_word(u64::from(*b), size, name),
-            ConstantValue::Uint(u) => Entry::new_word(*u, size, name),
-            ConstantValue::U256(u) => Entry::new_byte_array(u.to_be_bytes().to_vec(), size, name),
-            ConstantValue::B256(bs) => Entry::new_byte_array(bs.to_be_bytes().to_vec(), size, name),
-            ConstantValue::String(bs) => Entry::new_byte_array(bs.clone(), size, name),
-
-            ConstantValue::Array(els) | ConstantValue::Struct(els) => Entry::new_collection(
+            ConstantValue::Undef | ConstantValue::Unit => Entry::new_byte(0, name, padding),
+            ConstantValue::Bool(b) => Entry::new_byte(u8::from(*b), name, padding),
+            ConstantValue::Uint(u) => {
+                if constant.ty.is_uint8(context) {
+                    Entry::new_byte(*u as u8, name, padding)
+                } else {
+                    Entry::new_word(*u, name, padding)
+                }
+            }
+            ConstantValue::U256(u) => {
+                Entry::new_byte_array(u.to_be_bytes().to_vec(), name, padding)
+            }
+            ConstantValue::B256(bs) => {
+                Entry::new_byte_array(bs.to_be_bytes().to_vec(), name, padding)
+            }
+            ConstantValue::String(bs) => Entry::new_byte_array(bs.clone(), name, padding),
+
+            ConstantValue::Array(els) => Entry::new_collection(
+                els.iter()
+                    .map(|el| Entry::from_constant(context, el, None, None))
+                    .collect(),
+                name,
+                padding,
+            ),
+            ConstantValue::Struct(els) => Entry::new_collection(
                 els.iter()
-                    .map(|el| Entry::from_constant(context, el, None))
+                    .map(|el| {
+                        let target_size = size_bytes_round_up_to_word_alignment!(
+                            ir_type_size_in_bytes(context, &el.ty) as usize
+                        );
+                        Entry::from_constant(
+                            context,
+                            el,
+                            None,
+                            Some(Padding::Right { target_size }),
+                        )
+                    })
                     .collect(),
-                size,
                 name,
+                padding,
             ),
         }
     }
 
     /// Converts a literal to a big-endian representation. This is padded to words.
     pub(crate) fn to_bytes(&self) -> Vec<u8> {
         // Get the big-endian byte representation of the basic value.
-        let mut bytes = match &self.value {
+        let bytes = match &self.value {
+            Datum::Byte(b) => vec![*b],
             Datum::Word(w) => w.to_be_bytes().to_vec(),
             Datum::ByteArray(bs) if bs.len() % 8 == 0 => bs.clone(),
             Datum::ByteArray(bs) => bs
@@ -129,23 +195,32 @@ impl Entry {
             Datum::Collection(els) => els.iter().flat_map(|el| el.to_bytes()).collect(),
         };
 
-        // Pad the size out to match the specified size.
-        if self.size > bytes.len() {
-            let mut pad = vec![0; self.size - bytes.len()];
-            pad.append(&mut bytes);
-            bytes = pad;
+        match self.padding {
+            Padding::Left { target_size } => {
+                let target_size = size_bytes_round_up_to_word_alignment!(target_size);
+                let left_pad = target_size.saturating_sub(bytes.len());
+                [repeat(0u8).take(left_pad).collect(), bytes].concat()
+            }
+            Padding::Right { target_size } => {
+                let target_size = size_bytes_round_up_to_word_alignment!(target_size);
+                let right_pad = target_size.saturating_sub(bytes.len());
+                [bytes, repeat(0u8).take(right_pad).collect()].concat()
+            }
         }
-
-        bytes
     }
 
     pub(crate) fn has_copy_type(&self) -> bool {
-        matches!(self.value, Datum::Word(_))
+        matches!(self.value, Datum::Word(_) | Datum::Byte(_))
+    }
+
+    pub(crate) fn is_byte(&self) -> bool {
+        matches!(self.value, Datum::Byte(_))
     }
 
     pub(crate) fn equiv(&self, entry: &Entry) -> bool {
         fn equiv_data(lhs: &Datum, rhs: &Datum) -> bool {
             match (lhs, rhs) {
+                (Datum::Byte(l), Datum::Byte(r)) => l == r,
                 (Datum::Word(l), Datum::Word(r)) => l == r,
                 (Datum::ByteArray(l), Datum::ByteArray(r)) => l == r,
 
@@ -218,6 +293,13 @@ impl DataSection {
             .map(|entry| entry.has_copy_type())
     }
 
+    /// Returns whether a specific [DataId] value is a byte entry.
+    pub(crate) fn is_byte(&self, id: &DataId) -> Option<bool> {
+        self.value_pairs
+            .get(id.0 as usize)
+            .map(|entry| entry.is_byte())
+    }
+
     /// When generating code, sometimes a hard-coded data pointer is needed to reference
     /// static values that have a length longer than one word.
     /// This method appends pointers to the end of the data section (thus, not altering the data
@@ -253,6 +335,7 @@ impl fmt::Display for DataSection {
     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
         fn display_entry(datum: &Datum) -> String {
             match datum {
+                Datum::Byte(w) => format!(".byte {w}"),
                 Datum::Word(w) => format!(".word {w}"),
                 Datum::ByteArray(bs) => {
                     let mut hex_str = String::new();