AVM: Handle Teal programs with manual constant blocks better (#4442)

This PR fixes #4034, by forcing the use of "push*" when mixing manual constant blocks with the int/byte/addr/method pseudo-ops. (And a related disassembly bug). However, to avoid this "deoptimization" when manual blocks are used to add program metadata, deadcode constant blocks are ignored for this check.
algorand · Sep 1, 2022 · 7f6dbc7 · 7f6dbc7
1 parent e83acec
commit 7f6dbc7
Show file tree

Hide file tree

Showing 3 changed files with 253 additions and 43 deletions.
diff --git a/data/transactions/logic/assembler.go b/data/transactions/logic/assembler.go
@@ -224,11 +224,13 @@ type OpStream struct {
 
 	intc         []uint64       // observed ints in code. We'll put them into a intcblock
 	intcRefs     []intReference // references to int pseudo-op constants, used for optimization
-	hasIntcBlock bool           // prevent prepending intcblock because asm has one
+	cntIntcBlock int            // prevent prepending intcblock because asm has one
+	hasPseudoInt bool           // were any `int` pseudo ops used?
 
 	bytec         [][]byte        // observed bytes in code. We'll put them into a bytecblock
 	bytecRefs     []byteReference // references to byte/addr pseudo-op constants, used for optimization
-	hasBytecBlock bool            // prevent prepending bytecblock because asm has one
+	cntBytecBlock int             // prevent prepending bytecblock because asm has one
+	hasPseudoByte bool            // were any `byte` (or equivalent) pseudo ops used?
 
 	// tracks information we know to be true at the point being assembled
 	known        ProgramKnowledge
@@ -373,18 +375,18 @@ func (ops *OpStream) returns(spec *OpSpec, replacement StackType) {
 func (ops *OpStream) Intc(constIndex uint) {
 	switch constIndex {
 	case 0:
-		ops.pending.WriteByte(0x22) // intc_0
+		ops.pending.WriteByte(OpsByName[ops.Version]["intc_0"].Opcode)
 	case 1:
-		ops.pending.WriteByte(0x23) // intc_1
+		ops.pending.WriteByte(OpsByName[ops.Version]["intc_1"].Opcode)
 	case 2:
-		ops.pending.WriteByte(0x24) // intc_2
+		ops.pending.WriteByte(OpsByName[ops.Version]["intc_2"].Opcode)
 	case 3:
-		ops.pending.WriteByte(0x25) // intc_3
+		ops.pending.WriteByte(OpsByName[ops.Version]["intc_3"].Opcode)
 	default:
 		if constIndex > 0xff {
 			ops.error("cannot have more than 256 int constants")
 		}
-		ops.pending.WriteByte(0x21) // intc
+		ops.pending.WriteByte(OpsByName[ops.Version]["intc"].Opcode)
 		ops.pending.WriteByte(uint8(constIndex))
 	}
 	if constIndex >= uint(len(ops.intc)) {
@@ -394,8 +396,10 @@ func (ops *OpStream) Intc(constIndex uint) {
 	}
 }
 
-// Uint writes opcodes for loading a uint literal
-func (ops *OpStream) Uint(val uint64) {
+// IntLiteral writes opcodes for loading a uint literal
+func (ops *OpStream) IntLiteral(val uint64) {
+	ops.hasPseudoInt = true
+
 	found := false
 	var constIndex uint
 	for i, cv := range ops.intc {
@@ -405,7 +409,11 @@ func (ops *OpStream) Uint(val uint64) {
 			break
 		}
 	}
+
 	if !found {
+		if ops.cntIntcBlock > 0 {
+			ops.errorf("int %d used without %d in intcblock", val, val)
+		}
 		constIndex = uint(len(ops.intc))
 		ops.intc = append(ops.intc, val)
 	}
@@ -420,18 +428,18 @@ func (ops *OpStream) Uint(val uint64) {
 func (ops *OpStream) Bytec(constIndex uint) {
 	switch constIndex {
 	case 0:
-		ops.pending.WriteByte(0x28) // bytec_0
+		ops.pending.WriteByte(OpsByName[ops.Version]["bytec_0"].Opcode)
 	case 1:
-		ops.pending.WriteByte(0x29) // bytec_1
+		ops.pending.WriteByte(OpsByName[ops.Version]["bytec_1"].Opcode)
 	case 2:
-		ops.pending.WriteByte(0x2a) // bytec_2
+		ops.pending.WriteByte(OpsByName[ops.Version]["bytec_2"].Opcode)
 	case 3:
-		ops.pending.WriteByte(0x2b) // bytec_3
+		ops.pending.WriteByte(OpsByName[ops.Version]["bytec_3"].Opcode)
 	default:
 		if constIndex > 0xff {
 			ops.error("cannot have more than 256 byte constants")
 		}
-		ops.pending.WriteByte(0x27) // bytec
+		ops.pending.WriteByte(OpsByName[ops.Version]["bytec"].Opcode)
 		ops.pending.WriteByte(uint8(constIndex))
 	}
 	if constIndex >= uint(len(ops.bytec)) {
@@ -444,6 +452,8 @@ func (ops *OpStream) Bytec(constIndex uint) {
 // ByteLiteral writes opcodes and data for loading a []byte literal
 // Values are accumulated so that they can be put into a bytecblock
 func (ops *OpStream) ByteLiteral(val []byte) {
+	ops.hasPseudoByte = true
+
 	found := false
 	var constIndex uint
 	for i, cv := range ops.bytec {
@@ -454,6 +464,9 @@ func (ops *OpStream) ByteLiteral(val []byte) {
 		}
 	}
 	if !found {
+		if ops.cntBytecBlock > 0 {
+			ops.errorf("byte/addr/method used without value in bytecblock")
+		}
 		constIndex = uint(len(ops.bytec))
 		ops.bytec = append(ops.bytec, val)
 	}
@@ -468,23 +481,46 @@ func asmInt(ops *OpStream, spec *OpSpec, args []string) error {
 	if len(args) != 1 {
 		return ops.error("int needs one argument")
 	}
+
+	// After backBranchEnabledVersion, control flow is confusing, so if there's
+	// a manual cblock, use push instead of trying to use what's given.
+	if ops.cntIntcBlock > 0 && ops.Version >= backBranchEnabledVersion {
+		// We don't understand control-flow, so use pushint
+		ops.warnf("int %s used with explicit intcblock. must pushint", args[0])
+		pushint := OpsByName[ops.Version]["pushint"]
+		return asmPushInt(ops, &pushint, args)
+	}
+
+	// There are no backjumps, but there are multiple cblocks. Maybe one is
+	// conditional skipped. Too confusing.
+	if ops.cntIntcBlock > 1 {
+		pushint, ok := OpsByName[ops.Version]["pushint"]
+		if ok {
+			return asmPushInt(ops, &pushint, args)
+		}
+		return ops.errorf("int %s used with manual intcblocks. Use intc.", args[0])
+	}
+
+	// In both of the above clauses, we _could_ track whether a particular
+	// intcblock dominates the current instruction. If so, we could use it.
+
 	// check txn type constants
 	i, ok := txnTypeMap[args[0]]
 	if ok {
-		ops.Uint(i)
+		ops.IntLiteral(i)
 		return nil
 	}
 	// check OnCompetion constants
 	oc, isOCStr := onCompletionMap[args[0]]
 	if isOCStr {
-		ops.Uint(oc)
+		ops.IntLiteral(oc)
 		return nil
 	}
 	val, err := strconv.ParseUint(args[0], 0, 64)
 	if err != nil {
 		return ops.error(err)
 	}
-	ops.Uint(val)
+	ops.IntLiteral(val)
 	return nil
 }
 
@@ -696,6 +732,29 @@ func asmByte(ops *OpStream, spec *OpSpec, args []string) error {
 	if len(args) == 0 {
 		return ops.errorf("%s operation needs byte literal argument", spec.Name)
 	}
+
+	// After backBranchEnabledVersion, control flow is confusing, so if there's
+	// a manual cblock, use push instead of trying to use what's given.
+	if ops.cntBytecBlock > 0 && ops.Version >= backBranchEnabledVersion {
+		// We don't understand control-flow, so use pushbytes
+		ops.warnf("byte %s used with explicit bytecblock. must pushbytes", args[0])
+		pushbytes := OpsByName[ops.Version]["pushbytes"]
+		return asmPushBytes(ops, &pushbytes, args)
+	}
+
+	// There are no backjumps, but there are multiple cblocks. Maybe one is
+	// conditional skipped. Too confusing.
+	if ops.cntBytecBlock > 1 {
+		pushbytes, ok := OpsByName[ops.Version]["pushbytes"]
+		if ok {
+			return asmPushBytes(ops, &pushbytes, args)
+		}
+		return ops.errorf("byte %s used with manual bytecblocks. Use bytec.", args[0])
+	}
+
+	// In both of the above clauses, we _could_ track whether a particular
+	// bytecblock dominates the current instruction. If so, we could use it.
+
 	val, consumed, err := parseBinaryArgs(args)
 	if err != nil {
 		return ops.error(err)
@@ -734,21 +793,32 @@ func asmMethod(ops *OpStream, spec *OpSpec, args []string) error {
 
 func asmIntCBlock(ops *OpStream, spec *OpSpec, args []string) error {
 	ops.pending.WriteByte(spec.Opcode)
+	ivals := make([]uint64, len(args))
 	var scratch [binary.MaxVarintLen64]byte
 	l := binary.PutUvarint(scratch[:], uint64(len(args)))
 	ops.pending.Write(scratch[:l])
-	ops.intcRefs = nil
-	ops.intc = make([]uint64, len(args))
 	for i, xs := range args {
 		cu, err := strconv.ParseUint(xs, 0, 64)
 		if err != nil {
 			ops.error(err)
 		}
 		l = binary.PutUvarint(scratch[:], cu)
 		ops.pending.Write(scratch[:l])
-		ops.intc[i] = cu
+		if !ops.known.deadcode {
+			ivals[i] = cu
+		}
 	}
-	ops.hasIntcBlock = true
+	if !ops.known.deadcode {
+		// If we previously processed an `int`, we thought we could insert our
+		// own intcblock, but now we see a manual one.
+		if ops.hasPseudoInt {
+			ops.error("intcblock following int")
+		}
+		ops.intcRefs = nil
+		ops.intc = ivals
+		ops.cntIntcBlock++
+	}
+
 	return nil
 }
 
@@ -763,8 +833,7 @@ func asmByteCBlock(ops *OpStream, spec *OpSpec, args []string) error {
 			// intcblock, but parseBinaryArgs would have
 			// to return a useful consumed value even in
 			// the face of errors.  Hard.
-			ops.error(err)
-			return nil
+			return ops.error(err)
 		}
 		bvals = append(bvals, val)
 		rest = rest[consumed:]
@@ -777,9 +846,16 @@ func asmByteCBlock(ops *OpStream, spec *OpSpec, args []string) error {
 		ops.pending.Write(scratch[:l])
 		ops.pending.Write(bv)
 	}
-	ops.bytecRefs = nil
-	ops.bytec = bvals
-	ops.hasBytecBlock = true
+	if !ops.known.deadcode {
+		// If we previously processed a pseudo `byte`, we thought we could
+		// insert our own bytecblock, but now we see a manual one.
+		if ops.hasPseudoByte {
+			ops.error("bytecblock following byte/addr/method")
+		}
+		ops.bytecRefs = nil
+		ops.bytec = bvals
+		ops.cntBytecBlock++
+	}
 	return nil
 }
 
@@ -1640,7 +1716,7 @@ func (ops *OpStream) assemble(text string) error {
 		ops.error(err)
 	}
 
-	// backward compatibility: do not allow jumps behind last instruction in v1
+	// backward compatibility: do not allow jumps past last instruction in v1
 	if ops.Version <= 1 {
 		for label, dest := range ops.labels {
 			if dest == ops.pending.Len() {
@@ -1800,7 +1876,7 @@ func replaceBytes(s []byte, index, originalLen int, newBytes []byte) []byte {
 // This function only optimizes constants introduces by the int pseudo-op, not
 // preexisting intcblocks in the code.
 func (ops *OpStream) optimizeIntcBlock() error {
-	if ops.hasIntcBlock {
+	if ops.cntIntcBlock > 0 {
 		// don't optimize an existing intcblock, only int pseudo-ops
 		return nil
 	}
@@ -1843,7 +1919,7 @@ func (ops *OpStream) optimizeIntcBlock() error {
 // This function only optimizes constants introduces by the byte or addr
 // pseudo-ops, not preexisting bytecblocks in the code.
 func (ops *OpStream) optimizeBytecBlock() error {
-	if ops.hasBytecBlock {
+	if ops.cntBytecBlock > 0 {
 		// don't optimize an existing bytecblock, only byte/addr pseudo-ops
 		return nil
 	}
@@ -2018,17 +2094,17 @@ func (ops *OpStream) prependCBlocks() []byte {
 	prebytes := bytes.Buffer{}
 	vlen := binary.PutUvarint(scratch[:], ops.Version)
 	prebytes.Write(scratch[:vlen])
-	if len(ops.intc) > 0 && !ops.hasIntcBlock {
-		prebytes.WriteByte(0x20) // intcblock
+	if len(ops.intc) > 0 && ops.cntIntcBlock == 0 {
+		prebytes.WriteByte(OpsByName[ops.Version]["intcblock"].Opcode)
 		vlen := binary.PutUvarint(scratch[:], uint64(len(ops.intc)))
 		prebytes.Write(scratch[:vlen])
 		for _, iv := range ops.intc {
 			vlen = binary.PutUvarint(scratch[:], iv)
 			prebytes.Write(scratch[:vlen])
 		}
 	}
-	if len(ops.bytec) > 0 && !ops.hasBytecBlock {
-		prebytes.WriteByte(0x26) // bytecblock
+	if len(ops.bytec) > 0 && ops.cntBytecBlock == 0 {
+		prebytes.WriteByte(OpsByName[ops.Version]["bytecblock"].Opcode)
 		vlen := binary.PutUvarint(scratch[:], uint64(len(ops.bytec)))
 		prebytes.Write(scratch[:vlen])
 		for _, bv := range ops.bytec {
@@ -2266,7 +2342,7 @@ func disassemble(dis *disassembleState, spec *OpSpec) (string, error) {
 				return "", err
 			}
 
-			dis.intc = append(dis.intc, intc...)
+			dis.intc = intc
 			for i, iv := range intc {
 				if i != 0 {
 					out += " "
@@ -2279,7 +2355,7 @@ func disassemble(dis *disassembleState, spec *OpSpec) (string, error) {
 			if err != nil {
 				return "", err
 			}
-			dis.bytec = append(dis.bytec, bytec...)
+			dis.bytec = bytec
 			for i, bv := range bytec {
 				if i != 0 {
 					out += " "