Skip to content

Commit

Permalink
AVM: Handle Teal programs with manual constant blocks better (#4442)
Browse files Browse the repository at this point in the history
This PR fixes #4034, by forcing the use of "push*" when mixing manual
constant blocks with the int/byte/addr/method pseudo-ops. (And a
related disassembly bug).

However, to avoid this "deoptimization" when manual blocks are used to
add program metadata, deadcode constant blocks are ignored for this
check.
  • Loading branch information
jannotti authored Sep 1, 2022
1 parent e83acec commit 7f6dbc7
Show file tree
Hide file tree
Showing 3 changed files with 253 additions and 43 deletions.
146 changes: 111 additions & 35 deletions data/transactions/logic/assembler.go
Original file line number Diff line number Diff line change
Expand Up @@ -224,11 +224,13 @@ type OpStream struct {

intc []uint64 // observed ints in code. We'll put them into a intcblock
intcRefs []intReference // references to int pseudo-op constants, used for optimization
hasIntcBlock bool // prevent prepending intcblock because asm has one
cntIntcBlock int // prevent prepending intcblock because asm has one
hasPseudoInt bool // were any `int` pseudo ops used?

bytec [][]byte // observed bytes in code. We'll put them into a bytecblock
bytecRefs []byteReference // references to byte/addr pseudo-op constants, used for optimization
hasBytecBlock bool // prevent prepending bytecblock because asm has one
cntBytecBlock int // prevent prepending bytecblock because asm has one
hasPseudoByte bool // were any `byte` (or equivalent) pseudo ops used?

// tracks information we know to be true at the point being assembled
known ProgramKnowledge
Expand Down Expand Up @@ -373,18 +375,18 @@ func (ops *OpStream) returns(spec *OpSpec, replacement StackType) {
func (ops *OpStream) Intc(constIndex uint) {
switch constIndex {
case 0:
ops.pending.WriteByte(0x22) // intc_0
ops.pending.WriteByte(OpsByName[ops.Version]["intc_0"].Opcode)
case 1:
ops.pending.WriteByte(0x23) // intc_1
ops.pending.WriteByte(OpsByName[ops.Version]["intc_1"].Opcode)
case 2:
ops.pending.WriteByte(0x24) // intc_2
ops.pending.WriteByte(OpsByName[ops.Version]["intc_2"].Opcode)
case 3:
ops.pending.WriteByte(0x25) // intc_3
ops.pending.WriteByte(OpsByName[ops.Version]["intc_3"].Opcode)
default:
if constIndex > 0xff {
ops.error("cannot have more than 256 int constants")
}
ops.pending.WriteByte(0x21) // intc
ops.pending.WriteByte(OpsByName[ops.Version]["intc"].Opcode)
ops.pending.WriteByte(uint8(constIndex))
}
if constIndex >= uint(len(ops.intc)) {
Expand All @@ -394,8 +396,10 @@ func (ops *OpStream) Intc(constIndex uint) {
}
}

// Uint writes opcodes for loading a uint literal
func (ops *OpStream) Uint(val uint64) {
// IntLiteral writes opcodes for loading a uint literal
func (ops *OpStream) IntLiteral(val uint64) {
ops.hasPseudoInt = true

found := false
var constIndex uint
for i, cv := range ops.intc {
Expand All @@ -405,7 +409,11 @@ func (ops *OpStream) Uint(val uint64) {
break
}
}

if !found {
if ops.cntIntcBlock > 0 {
ops.errorf("int %d used without %d in intcblock", val, val)
}
constIndex = uint(len(ops.intc))
ops.intc = append(ops.intc, val)
}
Expand All @@ -420,18 +428,18 @@ func (ops *OpStream) Uint(val uint64) {
func (ops *OpStream) Bytec(constIndex uint) {
switch constIndex {
case 0:
ops.pending.WriteByte(0x28) // bytec_0
ops.pending.WriteByte(OpsByName[ops.Version]["bytec_0"].Opcode)
case 1:
ops.pending.WriteByte(0x29) // bytec_1
ops.pending.WriteByte(OpsByName[ops.Version]["bytec_1"].Opcode)
case 2:
ops.pending.WriteByte(0x2a) // bytec_2
ops.pending.WriteByte(OpsByName[ops.Version]["bytec_2"].Opcode)
case 3:
ops.pending.WriteByte(0x2b) // bytec_3
ops.pending.WriteByte(OpsByName[ops.Version]["bytec_3"].Opcode)
default:
if constIndex > 0xff {
ops.error("cannot have more than 256 byte constants")
}
ops.pending.WriteByte(0x27) // bytec
ops.pending.WriteByte(OpsByName[ops.Version]["bytec"].Opcode)
ops.pending.WriteByte(uint8(constIndex))
}
if constIndex >= uint(len(ops.bytec)) {
Expand All @@ -444,6 +452,8 @@ func (ops *OpStream) Bytec(constIndex uint) {
// ByteLiteral writes opcodes and data for loading a []byte literal
// Values are accumulated so that they can be put into a bytecblock
func (ops *OpStream) ByteLiteral(val []byte) {
ops.hasPseudoByte = true

found := false
var constIndex uint
for i, cv := range ops.bytec {
Expand All @@ -454,6 +464,9 @@ func (ops *OpStream) ByteLiteral(val []byte) {
}
}
if !found {
if ops.cntBytecBlock > 0 {
ops.errorf("byte/addr/method used without value in bytecblock")
}
constIndex = uint(len(ops.bytec))
ops.bytec = append(ops.bytec, val)
}
Expand All @@ -468,23 +481,46 @@ func asmInt(ops *OpStream, spec *OpSpec, args []string) error {
if len(args) != 1 {
return ops.error("int needs one argument")
}

// After backBranchEnabledVersion, control flow is confusing, so if there's
// a manual cblock, use push instead of trying to use what's given.
if ops.cntIntcBlock > 0 && ops.Version >= backBranchEnabledVersion {
// We don't understand control-flow, so use pushint
ops.warnf("int %s used with explicit intcblock. must pushint", args[0])
pushint := OpsByName[ops.Version]["pushint"]
return asmPushInt(ops, &pushint, args)
}

// There are no backjumps, but there are multiple cblocks. Maybe one is
// conditional skipped. Too confusing.
if ops.cntIntcBlock > 1 {
pushint, ok := OpsByName[ops.Version]["pushint"]
if ok {
return asmPushInt(ops, &pushint, args)
}
return ops.errorf("int %s used with manual intcblocks. Use intc.", args[0])
}

// In both of the above clauses, we _could_ track whether a particular
// intcblock dominates the current instruction. If so, we could use it.

// check txn type constants
i, ok := txnTypeMap[args[0]]
if ok {
ops.Uint(i)
ops.IntLiteral(i)
return nil
}
// check OnCompetion constants
oc, isOCStr := onCompletionMap[args[0]]
if isOCStr {
ops.Uint(oc)
ops.IntLiteral(oc)
return nil
}
val, err := strconv.ParseUint(args[0], 0, 64)
if err != nil {
return ops.error(err)
}
ops.Uint(val)
ops.IntLiteral(val)
return nil
}

Expand Down Expand Up @@ -696,6 +732,29 @@ func asmByte(ops *OpStream, spec *OpSpec, args []string) error {
if len(args) == 0 {
return ops.errorf("%s operation needs byte literal argument", spec.Name)
}

// After backBranchEnabledVersion, control flow is confusing, so if there's
// a manual cblock, use push instead of trying to use what's given.
if ops.cntBytecBlock > 0 && ops.Version >= backBranchEnabledVersion {
// We don't understand control-flow, so use pushbytes
ops.warnf("byte %s used with explicit bytecblock. must pushbytes", args[0])
pushbytes := OpsByName[ops.Version]["pushbytes"]
return asmPushBytes(ops, &pushbytes, args)
}

// There are no backjumps, but there are multiple cblocks. Maybe one is
// conditional skipped. Too confusing.
if ops.cntBytecBlock > 1 {
pushbytes, ok := OpsByName[ops.Version]["pushbytes"]
if ok {
return asmPushBytes(ops, &pushbytes, args)
}
return ops.errorf("byte %s used with manual bytecblocks. Use bytec.", args[0])
}

// In both of the above clauses, we _could_ track whether a particular
// bytecblock dominates the current instruction. If so, we could use it.

val, consumed, err := parseBinaryArgs(args)
if err != nil {
return ops.error(err)
Expand Down Expand Up @@ -734,21 +793,32 @@ func asmMethod(ops *OpStream, spec *OpSpec, args []string) error {

func asmIntCBlock(ops *OpStream, spec *OpSpec, args []string) error {
ops.pending.WriteByte(spec.Opcode)
ivals := make([]uint64, len(args))
var scratch [binary.MaxVarintLen64]byte
l := binary.PutUvarint(scratch[:], uint64(len(args)))
ops.pending.Write(scratch[:l])
ops.intcRefs = nil
ops.intc = make([]uint64, len(args))
for i, xs := range args {
cu, err := strconv.ParseUint(xs, 0, 64)
if err != nil {
ops.error(err)
}
l = binary.PutUvarint(scratch[:], cu)
ops.pending.Write(scratch[:l])
ops.intc[i] = cu
if !ops.known.deadcode {
ivals[i] = cu
}
}
ops.hasIntcBlock = true
if !ops.known.deadcode {
// If we previously processed an `int`, we thought we could insert our
// own intcblock, but now we see a manual one.
if ops.hasPseudoInt {
ops.error("intcblock following int")
}
ops.intcRefs = nil
ops.intc = ivals
ops.cntIntcBlock++
}

return nil
}

Expand All @@ -763,8 +833,7 @@ func asmByteCBlock(ops *OpStream, spec *OpSpec, args []string) error {
// intcblock, but parseBinaryArgs would have
// to return a useful consumed value even in
// the face of errors. Hard.
ops.error(err)
return nil
return ops.error(err)
}
bvals = append(bvals, val)
rest = rest[consumed:]
Expand All @@ -777,9 +846,16 @@ func asmByteCBlock(ops *OpStream, spec *OpSpec, args []string) error {
ops.pending.Write(scratch[:l])
ops.pending.Write(bv)
}
ops.bytecRefs = nil
ops.bytec = bvals
ops.hasBytecBlock = true
if !ops.known.deadcode {
// If we previously processed a pseudo `byte`, we thought we could
// insert our own bytecblock, but now we see a manual one.
if ops.hasPseudoByte {
ops.error("bytecblock following byte/addr/method")
}
ops.bytecRefs = nil
ops.bytec = bvals
ops.cntBytecBlock++
}
return nil
}

Expand Down Expand Up @@ -1640,7 +1716,7 @@ func (ops *OpStream) assemble(text string) error {
ops.error(err)
}

// backward compatibility: do not allow jumps behind last instruction in v1
// backward compatibility: do not allow jumps past last instruction in v1
if ops.Version <= 1 {
for label, dest := range ops.labels {
if dest == ops.pending.Len() {
Expand Down Expand Up @@ -1800,7 +1876,7 @@ func replaceBytes(s []byte, index, originalLen int, newBytes []byte) []byte {
// This function only optimizes constants introduces by the int pseudo-op, not
// preexisting intcblocks in the code.
func (ops *OpStream) optimizeIntcBlock() error {
if ops.hasIntcBlock {
if ops.cntIntcBlock > 0 {
// don't optimize an existing intcblock, only int pseudo-ops
return nil
}
Expand Down Expand Up @@ -1843,7 +1919,7 @@ func (ops *OpStream) optimizeIntcBlock() error {
// This function only optimizes constants introduces by the byte or addr
// pseudo-ops, not preexisting bytecblocks in the code.
func (ops *OpStream) optimizeBytecBlock() error {
if ops.hasBytecBlock {
if ops.cntBytecBlock > 0 {
// don't optimize an existing bytecblock, only byte/addr pseudo-ops
return nil
}
Expand Down Expand Up @@ -2018,17 +2094,17 @@ func (ops *OpStream) prependCBlocks() []byte {
prebytes := bytes.Buffer{}
vlen := binary.PutUvarint(scratch[:], ops.Version)
prebytes.Write(scratch[:vlen])
if len(ops.intc) > 0 && !ops.hasIntcBlock {
prebytes.WriteByte(0x20) // intcblock
if len(ops.intc) > 0 && ops.cntIntcBlock == 0 {
prebytes.WriteByte(OpsByName[ops.Version]["intcblock"].Opcode)
vlen := binary.PutUvarint(scratch[:], uint64(len(ops.intc)))
prebytes.Write(scratch[:vlen])
for _, iv := range ops.intc {
vlen = binary.PutUvarint(scratch[:], iv)
prebytes.Write(scratch[:vlen])
}
}
if len(ops.bytec) > 0 && !ops.hasBytecBlock {
prebytes.WriteByte(0x26) // bytecblock
if len(ops.bytec) > 0 && ops.cntBytecBlock == 0 {
prebytes.WriteByte(OpsByName[ops.Version]["bytecblock"].Opcode)
vlen := binary.PutUvarint(scratch[:], uint64(len(ops.bytec)))
prebytes.Write(scratch[:vlen])
for _, bv := range ops.bytec {
Expand Down Expand Up @@ -2266,7 +2342,7 @@ func disassemble(dis *disassembleState, spec *OpSpec) (string, error) {
return "", err
}

dis.intc = append(dis.intc, intc...)
dis.intc = intc
for i, iv := range intc {
if i != 0 {
out += " "
Expand All @@ -2279,7 +2355,7 @@ func disassemble(dis *disassembleState, spec *OpSpec) (string, error) {
if err != nil {
return "", err
}
dis.bytec = append(dis.bytec, bytec...)
dis.bytec = bytec
for i, bv := range bytec {
if i != 0 {
out += " "
Expand Down
Loading

0 comments on commit 7f6dbc7

Please sign in to comment.