From df37c4b987640849ad2b4033b4f94d7398eceba6 Mon Sep 17 00:00:00 2001 From: Didier Spezia Date: Wed, 26 Aug 2015 14:27:19 +0000 Subject: [PATCH] cmd/asm: fix several panics with erroneous input The parser tries to read as much information as possible, issuing some errors when needed. Errors generally do not stop the parsing. With some pathological input, it may result in various panics when the error message itself is built, or when the next operand is parsed. It happens while parsing pseudo-instructions. For instance, the following lines all generate a panic: TEXT TEXT% TEXT 1,1 TEXT $"toto", 0, $1 FUNCDATA DATA 0 DATA(0),1 FUNCDATA(SB GLOBL 0, 1 PCDATA 1 Added corresponding tests. Introduced a writer in the parser to capture error messages for testing purpose. It defaults to os.Stderr. Added an explicit check when symbol names cannot be displayed. Interrupted parsing early when the number of operands is wrong for pseudo-instructions. Note that the last point is a change of behavior, because some operands will not get parsed anymore in case of early error. IMO, it is acceptable, because only the first error of the line is considered anyway. If it is not acceptable, it can probably be improved at the price of a more verbose CL. Fixes #11765 Fixes #11760 Fixes #11759 Change-Id: I9602a848132e358a1bccad794d7555e0823970dd Reviewed-on: https://go-review.googlesource.com/13925 Reviewed-by: Rob Pike --- src/cmd/asm/internal/asm/asm.go | 22 ++++++-- src/cmd/asm/internal/asm/parse.go | 23 ++++---- src/cmd/asm/internal/asm/pseudo_test.go | 71 +++++++++++++++++++++++++ 3 files changed, 103 insertions(+), 13 deletions(-) create mode 100644 src/cmd/asm/internal/asm/pseudo_test.go diff --git a/src/cmd/asm/internal/asm/asm.go b/src/cmd/asm/internal/asm/asm.go index 7ac8bf49de8295..0d2c12f00b1ecf 100644 --- a/src/cmd/asm/internal/asm/asm.go +++ b/src/cmd/asm/internal/asm/asm.go @@ -66,10 +66,10 @@ func (p *Parser) append(prog *obj.Prog, cond string, doLabel bool) { // validateSymbol checks that addr represents a valid name for a pseudo-op. func (p *Parser) validateSymbol(pseudo string, addr *obj.Addr, offsetOk bool) { if addr.Name != obj.NAME_EXTERN && addr.Name != obj.NAME_STATIC || addr.Scale != 0 || addr.Reg != 0 { - p.errorf("%s symbol %q must be a symbol(SB)", pseudo, addr.Sym.Name) + p.errorf("%s symbol %q must be a symbol(SB)", pseudo, symbolName(addr)) } if !offsetOk && addr.Offset != 0 { - p.errorf("%s symbol %q must not be offset from SB", pseudo, addr.Sym.Name) + p.errorf("%s symbol %q must not be offset from SB", pseudo, symbolName(addr)) } } @@ -91,6 +91,7 @@ func (p *Parser) validateImmediate(pseudo string, addr *obj.Addr) { func (p *Parser) asmText(word string, operands [][]lex.Token) { if len(operands) != 2 && len(operands) != 3 { p.errorf("expect two or three operands for TEXT") + return } // Labels are function scoped. Patch existing labels and @@ -102,7 +103,7 @@ func (p *Parser) asmText(word string, operands [][]lex.Token) { // That means symbol plus indirect on SB and no offset. nameAddr := p.address(operands[0]) p.validateSymbol("TEXT", &nameAddr, false) - name := nameAddr.Sym.Name + name := symbolName(&nameAddr) next := 1 // Next operand is the optional text flag, a literal integer. @@ -171,6 +172,7 @@ func (p *Parser) asmText(word string, operands [][]lex.Token) { func (p *Parser) asmData(word string, operands [][]lex.Token) { if len(operands) != 2 { p.errorf("expect two operands for DATA") + return } // Operand 0 has the general form foo<>+0x04(SB)/4. @@ -178,12 +180,13 @@ func (p *Parser) asmData(word string, operands [][]lex.Token) { n := len(op) if n < 3 || op[n-2].ScanToken != '/' || op[n-1].ScanToken != scanner.Int { p.errorf("expect /size for DATA argument") + return } scale := p.parseScale(op[n-1].String()) op = op[:n-2] nameAddr := p.address(op) p.validateSymbol("DATA", &nameAddr, true) - name := nameAddr.Sym.Name + name := symbolName(&nameAddr) // Operand 1 is an immediate constant or address. valueAddr := p.address(operands[1]) @@ -220,6 +223,7 @@ func (p *Parser) asmData(word string, operands [][]lex.Token) { func (p *Parser) asmGlobl(word string, operands [][]lex.Token) { if len(operands) != 2 && len(operands) != 3 { p.errorf("expect two or three operands for GLOBL") + return } // Operand 0 has the general form foo<>+0x04(SB). @@ -257,6 +261,7 @@ func (p *Parser) asmGlobl(word string, operands [][]lex.Token) { func (p *Parser) asmPCData(word string, operands [][]lex.Token) { if len(operands) != 2 { p.errorf("expect two operands for PCDATA") + return } // Operand 0 must be an immediate constant. @@ -283,6 +288,7 @@ func (p *Parser) asmPCData(word string, operands [][]lex.Token) { func (p *Parser) asmFuncData(word string, operands [][]lex.Token) { if len(operands) != 2 { p.errorf("expect two operands for FUNCDATA") + return } // Operand 0 must be an immediate constant. @@ -622,6 +628,14 @@ func newAddr(x obj.Addr) *obj.Addr { return p } +// symbolName returns the symbol name, or an error string if none if available. +func symbolName(addr *obj.Addr) string { + if addr.Sym != nil { + return addr.Sym.Name + } + return "" +} + var emptyProg obj.Prog // getConstantPseudo checks that addr represents a plain constant and returns its value. diff --git a/src/cmd/asm/internal/asm/parse.go b/src/cmd/asm/internal/asm/parse.go index 6cf50df5bb2f42..a04f68f8c1b47a 100644 --- a/src/cmd/asm/internal/asm/parse.go +++ b/src/cmd/asm/internal/asm/parse.go @@ -8,6 +8,7 @@ package asm import ( "fmt" + "io" "log" "os" "strconv" @@ -37,6 +38,7 @@ type Parser struct { firstProg *obj.Prog lastProg *obj.Prog dataAddr map[string]int64 // Most recent address for DATA for this symbol. + errorWriter io.Writer } type Patch struct { @@ -46,11 +48,12 @@ type Patch struct { func NewParser(ctxt *obj.Link, ar *arch.Arch, lexer lex.TokenReader) *Parser { return &Parser{ - ctxt: ctxt, - arch: ar, - lex: lexer, - labels: make(map[string]*obj.Prog), - dataAddr: make(map[string]int64), + ctxt: ctxt, + arch: ar, + lex: lexer, + labels: make(map[string]*obj.Prog), + dataAddr: make(map[string]int64), + errorWriter: os.Stderr, } } @@ -67,10 +70,12 @@ func (p *Parser) errorf(format string, args ...interface{}) { return } p.errorLine = p.histLineNum - // Put file and line information on head of message. - format = "%s:%d: " + format + "\n" - args = append([]interface{}{p.lex.File(), p.lineNum}, args...) - fmt.Fprintf(os.Stderr, format, args...) + if p.lex != nil { + // Put file and line information on head of message. + format = "%s:%d: " + format + "\n" + args = append([]interface{}{p.lex.File(), p.lineNum}, args...) + } + fmt.Fprintf(p.errorWriter, format, args...) p.errorCount++ if p.errorCount > 10 { log.Fatal("too many errors") diff --git a/src/cmd/asm/internal/asm/pseudo_test.go b/src/cmd/asm/internal/asm/pseudo_test.go new file mode 100644 index 00000000000000..df1adc525a20b3 --- /dev/null +++ b/src/cmd/asm/internal/asm/pseudo_test.go @@ -0,0 +1,71 @@ +// Copyright 2015 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package asm + +import ( + "bytes" + "strings" + "testing" + + "cmd/asm/internal/arch" + "cmd/asm/internal/lex" +) + +func tokenize(s string) [][]lex.Token { + res := [][]lex.Token{} + if len(s) == 0 { + return res + } + for _, o := range strings.Split(s, ",") { + res = append(res, lex.Tokenize(o)) + } + return res +} + +func TestErroneous(t *testing.T) { + + tests := []struct { + pseudo string + operands string + expected string + }{ + {"TEXT", "", "expect two or three operands for TEXT"}, + {"TEXT", "%", "expect two or three operands for TEXT"}, + {"TEXT", "1, 1", "TEXT symbol \"\" must be a symbol(SB)"}, + {"TEXT", "$\"foo\", 0, $1", "TEXT symbol \"\" must be a symbol(SB)"}, + {"FUNCDATA", "", "expect two operands for FUNCDATA"}, + {"FUNCDATA", "(SB ", "expect two operands for FUNCDATA"}, + {"DATA", "", "expect two operands for DATA"}, + {"DATA", "0", "expect two operands for DATA"}, + {"DATA", "(0), 1", "expect /size for DATA argument"}, + {"GLOBL", "", "expect two or three operands for GLOBL"}, + {"GLOBL", "0,1", "GLOBL symbol \"\" must be a symbol(SB)"}, + {"PCDATA", "", "expect two operands for PCDATA"}, + {"PCDATA", "1", "expect two operands for PCDATA"}, + } + + // Note these errors should be independent of the architecture. + // Just run the test with amd64. + parser := newParser("amd64") + var buf bytes.Buffer + parser.errorWriter = &buf + + for _, test := range tests { + parser.errorCount = 0 + parser.lineNum++ + parser.histLineNum++ + op, ok := arch.Pseudos[test.pseudo] + if !ok { + t.Fatalf("Wrong pseudo-instruction: %s", test.pseudo) + } + parser.pseudo(op, test.pseudo, tokenize(test.operands)) + errorLine := buf.String() + if test.expected != errorLine { + t.Errorf("Unexpected error %q; expected %q", errorLine, test.expected) + } + buf.Reset() + } + +}