Add asciisanitizer package and sanitization to http clients (#125)

cli · Jul 24, 2023 · dadb47a · dadb47a
1 parent 27eb670
commit dadb47a
Show file tree

Hide file tree

Showing 5 changed files with 386 additions and 0 deletions.
diff --git a/go.mod b/go.mod
@@ -18,6 +18,7 @@ require (
 	github.com/thlib/go-timezone-local v0.0.0-20210907160436-ef149e42d28e
 	golang.org/x/sys v0.8.0
 	golang.org/x/term v0.5.0
+	golang.org/x/text v0.7.0
 	gopkg.in/h2non/gock.v1 v1.1.2
 	gopkg.in/yaml.v3 v3.0.1
 )

diff --git a/go.sum b/go.sum
@@ -99,6 +99,8 @@ golang.org/x/term v0.5.0 h1:n2a8QNdAb0sZNpU9R1ALUXBbY+w51fCQDN+7EdxNBsY=
 golang.org/x/term v0.5.0/go.mod h1:jMB1sMXY+tzblOD4FWmEbocvup2/aLOaQEp7JmGp78k=
 golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
 golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ=
+golang.org/x/text v0.7.0 h1:4BRB4x83lYWy72KwLD/qYDuTu7q9PjSagHvijDw7cLo=
+golang.org/x/text v0.7.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8=
 golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
 gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
 gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127 h1:qIbj1fsPNlZgppZ+VLlY7N33q108Sa+fhmuc+sWQYwY=

diff --git a/pkg/api/http_client.go b/pkg/api/http_client.go
@@ -2,6 +2,7 @@ package api
 
 import (
 	"fmt"
+	"io"
 	"net"
 	"net/http"
 	"os"
@@ -11,9 +12,11 @@ import (
 	"strings"
 	"time"
 
+	"github.com/cli/go-gh/v2/pkg/asciisanitizer"
 	"github.com/cli/go-gh/v2/pkg/term"
 	"github.com/henvic/httpretty"
 	"github.com/thlib/go-timezone-local/tzlocal"
+	"golang.org/x/text/transform"
 )
 
 const (
@@ -62,6 +65,8 @@ func NewHTTPClient(opts ClientOptions) (*http.Client, error) {
 		transport = opts.Transport
 	}
 
+	transport = newSanitizerRoundTripper(transport)
+
 	if opts.CacheDir == "" {
 		opts.CacheDir = filepath.Join(os.TempDir(), "gh-cli-cache")
 	}
@@ -221,6 +226,30 @@ func newUnixDomainSocketRoundTripper(socketPath string) http.RoundTripper {
 	}
 }
 
+type sanitizerRoundTripper struct {
+	rt http.RoundTripper
+}
+
+func newSanitizerRoundTripper(rt http.RoundTripper) http.RoundTripper {
+	return sanitizerRoundTripper{rt: rt}
+}
+
+func (srt sanitizerRoundTripper) RoundTrip(req *http.Request) (*http.Response, error) {
+	resp, err := srt.rt.RoundTrip(req)
+	if err != nil || !jsonTypeRE.MatchString(resp.Header.Get(contentType)) {
+		return resp, err
+	}
+	sanitizedReadCloser := struct {
+		io.Reader
+		io.Closer
+	}{
+		Reader: transform.NewReader(resp.Body, &asciisanitizer.Sanitizer{JSON: true}),
+		Closer: resp.Body,
+	}
+	resp.Body = sanitizedReadCloser
+	return resp, err
+}
+
 func currentTimeZone() string {
 	tz, err := tzlocal.RuntimeTZ()
 	if err != nil {

diff --git a/pkg/asciisanitizer/sanitizer.go b/pkg/asciisanitizer/sanitizer.go
@@ -0,0 +1,252 @@
+// Package asciisanitizer implements an ASCII control character sanitizer for UTF-8 strings.
+// It will transform ASCII control codes into equivalent inert characters that are safe for display in the terminal.
+// Without sanitization these ASCII control characters will be interpreted by the terminal.
+// This behaviour can be used maliciously as an attack vector, especially the ASCII control characters \x1B and \x9B.
+package asciisanitizer
+
+import (
+	"bytes"
+	"errors"
+	"strings"
+	"unicode"
+	"unicode/utf8"
+
+	"golang.org/x/text/transform"
+)
+
+// Sanitizer implements transform.Transformer interface.
+type Sanitizer struct {
+	// JSON tells the Sanitizer to replace strings that will be transformed
+	// into control characters when the string is marshaled to JSON. Set to
+	// true if the string being sanitized represents JSON formatted data.
+	JSON      bool
+	addEscape bool
+}
+
+// Transform uses a sliding window algorithm to detect C0 and C1 control characters as they are read and replaces
+// them with equivalent inert characters. Bytes that are not part of a control character are not modified.
+func (t *Sanitizer) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
+	transfer := func(write, read []byte) error {
+		readLength := len(read)
+		writeLength := len(write)
+		if writeLength > len(dst) {
+			return transform.ErrShortDst
+		}
+		copy(dst, write)
+		nDst += writeLength
+		dst = dst[writeLength:]
+		nSrc += readLength
+		src = src[readLength:]
+		return nil
+	}
+
+	for len(src) > 0 {
+		// When sanitizing JSON strings make sure that we have 6 bytes if available.
+		if t.JSON && len(src) < 6 && !atEOF {
+			err = transform.ErrShortSrc
+			return
+		}
+		r, size := utf8.DecodeRune(src)
+		if r == utf8.RuneError {
+			if !atEOF {
+				err = transform.ErrShortSrc
+				return
+			} else {
+				err = errors.New("invalid UTF-8 string")
+				return
+			}
+		}
+		// Replace C0 and C1 control characters.
+		if unicode.IsControl(r) {
+			if repl, found := mapControlToCaret(r); found {
+				err = transfer(repl, src[:size])
+				if err != nil {
+					return
+				}
+				continue
+			}
+		}
+		// Replace JSON C0 and C1 control characters.
+		if t.JSON && len(src) >= 6 {
+			if repl, found := mapJSONControlToCaret(src[:6]); found {
+				if t.addEscape {
+					// Add an escape character when necessary to prevent creating
+					// invalid JSON with our replacements.
+					repl = append([]byte{'\\'}, repl...)
+				}
+				err = transfer(repl, src[:6])
+				if err != nil {
+					return
+				}
+				continue
+			}
+		}
+		err = transfer(src[:size], src[:size])
+		if err != nil {
+			return
+		}
+		if t.JSON {
+			if r == '\\' {
+				t.addEscape = !t.addEscape
+			} else {
+				t.addEscape = false
+			}
+		}
+	}
+	return
+}
+
+// Reset resets the state and allows the Sanitizer to be reused.
+func (t *Sanitizer) Reset() {
+	t.addEscape = false
+}
+
+// mapControlToCaret maps C0 and C1 control characters to their caret notation.
+func mapControlToCaret(r rune) ([]byte, bool) {
+	//\t (09), \n (10), \v (11), \r (13) are safe C0 characters and are not sanitized.
+	m := map[rune]string{
+		0:   `^@`,
+		1:   `^A`,
+		2:   `^B`,
+		3:   `^C`,
+		4:   `^D`,
+		5:   `^E`,
+		6:   `^F`,
+		7:   `^G`,
+		8:   `^H`,
+		12:  `^L`,
+		14:  `^N`,
+		15:  `^O`,
+		16:  `^P`,
+		17:  `^Q`,
+		18:  `^R`,
+		19:  `^S`,
+		20:  `^T`,
+		21:  `^U`,
+		22:  `^V`,
+		23:  `^W`,
+		24:  `^X`,
+		25:  `^Y`,
+		26:  `^Z`,
+		27:  `^[`,
+		28:  `^\\`,
+		29:  `^]`,
+		30:  `^^`,
+		31:  `^_`,
+		128: `^@`,
+		129: `^A`,
+		130: `^B`,
+		131: `^C`,
+		132: `^D`,
+		133: `^E`,
+		134: `^F`,
+		135: `^G`,
+		136: `^H`,
+		137: `^I`,
+		138: `^J`,
+		139: `^K`,
+		140: `^L`,
+		141: `^M`,
+		142: `^N`,
+		143: `^O`,
+		144: `^P`,
+		145: `^Q`,
+		146: `^R`,
+		147: `^S`,
+		148: `^T`,
+		149: `^U`,
+		150: `^V`,
+		151: `^W`,
+		152: `^X`,
+		153: `^Y`,
+		154: `^Z`,
+		155: `^[`,
+		156: `^\\`,
+		157: `^]`,
+		158: `^^`,
+		159: `^_`,
+	}
+	if c, ok := m[r]; ok {
+		return []byte(c), true
+	}
+	return nil, false
+}
+
+// mapJSONControlToCaret maps JSON C0 and C1 control characters to their caret notation.
+// JSON control characters are six byte strings, representing a unicode code point,
+// ranging from \u0000 to \u001F and \u0080 to \u009F.
+func mapJSONControlToCaret(b []byte) ([]byte, bool) {
+	if len(b) != 6 {
+		return nil, false
+	}
+	if !bytes.HasPrefix(b, []byte(`\u00`)) {
+		return nil, false
+	}
+	//\t (\u0009), \n (\u000a), \v (\u000b), \r (\u000d) are safe C0 characters and are not sanitized.
+	m := map[string]string{
+		`\u0000`: `^@`,
+		`\u0001`: `^A`,
+		`\u0002`: `^B`,
+		`\u0003`: `^C`,
+		`\u0004`: `^D`,
+		`\u0005`: `^E`,
+		`\u0006`: `^F`,
+		`\u0007`: `^G`,
+		`\u0008`: `^H`,
+		`\u000c`: `^L`,
+		`\u000e`: `^N`,
+		`\u000f`: `^O`,
+		`\u0010`: `^P`,
+		`\u0011`: `^Q`,
+		`\u0012`: `^R`,
+		`\u0013`: `^S`,
+		`\u0014`: `^T`,
+		`\u0015`: `^U`,
+		`\u0016`: `^V`,
+		`\u0017`: `^W`,
+		`\u0018`: `^X`,
+		`\u0019`: `^Y`,
+		`\u001a`: `^Z`,
+		`\u001b`: `^[`,
+		`\u001c`: `^\\`,
+		`\u001d`: `^]`,
+		`\u001e`: `^^`,
+		`\u001f`: `^_`,
+		`\u0080`: `^@`,
+		`\u0081`: `^A`,
+		`\u0082`: `^B`,
+		`\u0083`: `^C`,
+		`\u0084`: `^D`,
+		`\u0085`: `^E`,
+		`\u0086`: `^F`,
+		`\u0087`: `^G`,
+		`\u0088`: `^H`,
+		`\u0089`: `^I`,
+		`\u008a`: `^J`,
+		`\u008b`: `^K`,
+		`\u008c`: `^L`,
+		`\u008d`: `^M`,
+		`\u008e`: `^N`,
+		`\u008f`: `^O`,
+		`\u0090`: `^P`,
+		`\u0091`: `^Q`,
+		`\u0092`: `^R`,
+		`\u0093`: `^S`,
+		`\u0094`: `^T`,
+		`\u0095`: `^U`,
+		`\u0096`: `^V`,
+		`\u0097`: `^W`,
+		`\u0098`: `^X`,
+		`\u0099`: `^Y`,
+		`\u009a`: `^Z`,
+		`\u009b`: `^[`,
+		`\u009c`: `^\\`,
+		`\u009d`: `^]`,
+		`\u009e`: `^^`,
+		`\u009f`: `^_`,
+	}
+	if c, ok := m[strings.ToLower(string(b))]; ok {
+		return []byte(c), true
+	}
+	return nil, false
+}