From dadb47ad1c11adc5824393f9cf37147a563547e0 Mon Sep 17 00:00:00 2001 From: Sam Coe Date: Mon, 24 Jul 2023 10:26:21 -0700 Subject: [PATCH] Add asciisanitizer package and sanitization to http clients (#125) --- go.mod | 1 + go.sum | 2 + pkg/api/http_client.go | 29 +++ pkg/asciisanitizer/sanitizer.go | 252 +++++++++++++++++++++++++++ pkg/asciisanitizer/sanitizer_test.go | 102 +++++++++++ 5 files changed, 386 insertions(+) create mode 100644 pkg/asciisanitizer/sanitizer.go create mode 100644 pkg/asciisanitizer/sanitizer_test.go diff --git a/go.mod b/go.mod index 5a454d4..7d616c6 100644 --- a/go.mod +++ b/go.mod @@ -18,6 +18,7 @@ require ( github.com/thlib/go-timezone-local v0.0.0-20210907160436-ef149e42d28e golang.org/x/sys v0.8.0 golang.org/x/term v0.5.0 + golang.org/x/text v0.7.0 gopkg.in/h2non/gock.v1 v1.1.2 gopkg.in/yaml.v3 v3.0.1 ) diff --git a/go.sum b/go.sum index d975321..a3044a7 100644 --- a/go.sum +++ b/go.sum @@ -99,6 +99,8 @@ golang.org/x/term v0.5.0 h1:n2a8QNdAb0sZNpU9R1ALUXBbY+w51fCQDN+7EdxNBsY= golang.org/x/term v0.5.0/go.mod h1:jMB1sMXY+tzblOD4FWmEbocvup2/aLOaQEp7JmGp78k= golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ= +golang.org/x/text v0.7.0 h1:4BRB4x83lYWy72KwLD/qYDuTu7q9PjSagHvijDw7cLo= +golang.org/x/text v0.7.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8= golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127 h1:qIbj1fsPNlZgppZ+VLlY7N33q108Sa+fhmuc+sWQYwY= diff --git a/pkg/api/http_client.go b/pkg/api/http_client.go index dc26e2f..0090e69 100644 --- a/pkg/api/http_client.go +++ b/pkg/api/http_client.go @@ -2,6 +2,7 @@ package api import ( "fmt" + "io" "net" "net/http" "os" @@ -11,9 +12,11 @@ import ( "strings" "time" + "github.com/cli/go-gh/v2/pkg/asciisanitizer" "github.com/cli/go-gh/v2/pkg/term" "github.com/henvic/httpretty" "github.com/thlib/go-timezone-local/tzlocal" + "golang.org/x/text/transform" ) const ( @@ -62,6 +65,8 @@ func NewHTTPClient(opts ClientOptions) (*http.Client, error) { transport = opts.Transport } + transport = newSanitizerRoundTripper(transport) + if opts.CacheDir == "" { opts.CacheDir = filepath.Join(os.TempDir(), "gh-cli-cache") } @@ -221,6 +226,30 @@ func newUnixDomainSocketRoundTripper(socketPath string) http.RoundTripper { } } +type sanitizerRoundTripper struct { + rt http.RoundTripper +} + +func newSanitizerRoundTripper(rt http.RoundTripper) http.RoundTripper { + return sanitizerRoundTripper{rt: rt} +} + +func (srt sanitizerRoundTripper) RoundTrip(req *http.Request) (*http.Response, error) { + resp, err := srt.rt.RoundTrip(req) + if err != nil || !jsonTypeRE.MatchString(resp.Header.Get(contentType)) { + return resp, err + } + sanitizedReadCloser := struct { + io.Reader + io.Closer + }{ + Reader: transform.NewReader(resp.Body, &asciisanitizer.Sanitizer{JSON: true}), + Closer: resp.Body, + } + resp.Body = sanitizedReadCloser + return resp, err +} + func currentTimeZone() string { tz, err := tzlocal.RuntimeTZ() if err != nil { diff --git a/pkg/asciisanitizer/sanitizer.go b/pkg/asciisanitizer/sanitizer.go new file mode 100644 index 0000000..ee3b07b --- /dev/null +++ b/pkg/asciisanitizer/sanitizer.go @@ -0,0 +1,252 @@ +// Package asciisanitizer implements an ASCII control character sanitizer for UTF-8 strings. +// It will transform ASCII control codes into equivalent inert characters that are safe for display in the terminal. +// Without sanitization these ASCII control characters will be interpreted by the terminal. +// This behaviour can be used maliciously as an attack vector, especially the ASCII control characters \x1B and \x9B. +package asciisanitizer + +import ( + "bytes" + "errors" + "strings" + "unicode" + "unicode/utf8" + + "golang.org/x/text/transform" +) + +// Sanitizer implements transform.Transformer interface. +type Sanitizer struct { + // JSON tells the Sanitizer to replace strings that will be transformed + // into control characters when the string is marshaled to JSON. Set to + // true if the string being sanitized represents JSON formatted data. + JSON bool + addEscape bool +} + +// Transform uses a sliding window algorithm to detect C0 and C1 control characters as they are read and replaces +// them with equivalent inert characters. Bytes that are not part of a control character are not modified. +func (t *Sanitizer) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) { + transfer := func(write, read []byte) error { + readLength := len(read) + writeLength := len(write) + if writeLength > len(dst) { + return transform.ErrShortDst + } + copy(dst, write) + nDst += writeLength + dst = dst[writeLength:] + nSrc += readLength + src = src[readLength:] + return nil + } + + for len(src) > 0 { + // When sanitizing JSON strings make sure that we have 6 bytes if available. + if t.JSON && len(src) < 6 && !atEOF { + err = transform.ErrShortSrc + return + } + r, size := utf8.DecodeRune(src) + if r == utf8.RuneError { + if !atEOF { + err = transform.ErrShortSrc + return + } else { + err = errors.New("invalid UTF-8 string") + return + } + } + // Replace C0 and C1 control characters. + if unicode.IsControl(r) { + if repl, found := mapControlToCaret(r); found { + err = transfer(repl, src[:size]) + if err != nil { + return + } + continue + } + } + // Replace JSON C0 and C1 control characters. + if t.JSON && len(src) >= 6 { + if repl, found := mapJSONControlToCaret(src[:6]); found { + if t.addEscape { + // Add an escape character when necessary to prevent creating + // invalid JSON with our replacements. + repl = append([]byte{'\\'}, repl...) + } + err = transfer(repl, src[:6]) + if err != nil { + return + } + continue + } + } + err = transfer(src[:size], src[:size]) + if err != nil { + return + } + if t.JSON { + if r == '\\' { + t.addEscape = !t.addEscape + } else { + t.addEscape = false + } + } + } + return +} + +// Reset resets the state and allows the Sanitizer to be reused. +func (t *Sanitizer) Reset() { + t.addEscape = false +} + +// mapControlToCaret maps C0 and C1 control characters to their caret notation. +func mapControlToCaret(r rune) ([]byte, bool) { + //\t (09), \n (10), \v (11), \r (13) are safe C0 characters and are not sanitized. + m := map[rune]string{ + 0: `^@`, + 1: `^A`, + 2: `^B`, + 3: `^C`, + 4: `^D`, + 5: `^E`, + 6: `^F`, + 7: `^G`, + 8: `^H`, + 12: `^L`, + 14: `^N`, + 15: `^O`, + 16: `^P`, + 17: `^Q`, + 18: `^R`, + 19: `^S`, + 20: `^T`, + 21: `^U`, + 22: `^V`, + 23: `^W`, + 24: `^X`, + 25: `^Y`, + 26: `^Z`, + 27: `^[`, + 28: `^\\`, + 29: `^]`, + 30: `^^`, + 31: `^_`, + 128: `^@`, + 129: `^A`, + 130: `^B`, + 131: `^C`, + 132: `^D`, + 133: `^E`, + 134: `^F`, + 135: `^G`, + 136: `^H`, + 137: `^I`, + 138: `^J`, + 139: `^K`, + 140: `^L`, + 141: `^M`, + 142: `^N`, + 143: `^O`, + 144: `^P`, + 145: `^Q`, + 146: `^R`, + 147: `^S`, + 148: `^T`, + 149: `^U`, + 150: `^V`, + 151: `^W`, + 152: `^X`, + 153: `^Y`, + 154: `^Z`, + 155: `^[`, + 156: `^\\`, + 157: `^]`, + 158: `^^`, + 159: `^_`, + } + if c, ok := m[r]; ok { + return []byte(c), true + } + return nil, false +} + +// mapJSONControlToCaret maps JSON C0 and C1 control characters to their caret notation. +// JSON control characters are six byte strings, representing a unicode code point, +// ranging from \u0000 to \u001F and \u0080 to \u009F. +func mapJSONControlToCaret(b []byte) ([]byte, bool) { + if len(b) != 6 { + return nil, false + } + if !bytes.HasPrefix(b, []byte(`\u00`)) { + return nil, false + } + //\t (\u0009), \n (\u000a), \v (\u000b), \r (\u000d) are safe C0 characters and are not sanitized. + m := map[string]string{ + `\u0000`: `^@`, + `\u0001`: `^A`, + `\u0002`: `^B`, + `\u0003`: `^C`, + `\u0004`: `^D`, + `\u0005`: `^E`, + `\u0006`: `^F`, + `\u0007`: `^G`, + `\u0008`: `^H`, + `\u000c`: `^L`, + `\u000e`: `^N`, + `\u000f`: `^O`, + `\u0010`: `^P`, + `\u0011`: `^Q`, + `\u0012`: `^R`, + `\u0013`: `^S`, + `\u0014`: `^T`, + `\u0015`: `^U`, + `\u0016`: `^V`, + `\u0017`: `^W`, + `\u0018`: `^X`, + `\u0019`: `^Y`, + `\u001a`: `^Z`, + `\u001b`: `^[`, + `\u001c`: `^\\`, + `\u001d`: `^]`, + `\u001e`: `^^`, + `\u001f`: `^_`, + `\u0080`: `^@`, + `\u0081`: `^A`, + `\u0082`: `^B`, + `\u0083`: `^C`, + `\u0084`: `^D`, + `\u0085`: `^E`, + `\u0086`: `^F`, + `\u0087`: `^G`, + `\u0088`: `^H`, + `\u0089`: `^I`, + `\u008a`: `^J`, + `\u008b`: `^K`, + `\u008c`: `^L`, + `\u008d`: `^M`, + `\u008e`: `^N`, + `\u008f`: `^O`, + `\u0090`: `^P`, + `\u0091`: `^Q`, + `\u0092`: `^R`, + `\u0093`: `^S`, + `\u0094`: `^T`, + `\u0095`: `^U`, + `\u0096`: `^V`, + `\u0097`: `^W`, + `\u0098`: `^X`, + `\u0099`: `^Y`, + `\u009a`: `^Z`, + `\u009b`: `^[`, + `\u009c`: `^\\`, + `\u009d`: `^]`, + `\u009e`: `^^`, + `\u009f`: `^_`, + } + if c, ok := m[strings.ToLower(string(b))]; ok { + return []byte(c), true + } + return nil, false +} diff --git a/pkg/asciisanitizer/sanitizer_test.go b/pkg/asciisanitizer/sanitizer_test.go new file mode 100644 index 0000000..7240288 --- /dev/null +++ b/pkg/asciisanitizer/sanitizer_test.go @@ -0,0 +1,102 @@ +package asciisanitizer + +import ( + "bytes" + "testing" + "testing/iotest" + + "github.com/stretchr/testify/require" + "golang.org/x/text/transform" +) + +func TestSanitizerTransform(t *testing.T) { + tests := []struct { + name string + json bool + input string + want string + }{ + { + name: "No control characters", + input: "The quick brown fox jumped over the lazy dog", + want: "The quick brown fox jumped over the lazy dog", + }, + { + name: "JSON sanitization maintains valid JSON", + json: true, + input: `\u001B \\u001B \\\u001B \\\\u001B`, + want: `^[ \\^[ \\^[ \\\\^[`, + }, + { + name: "JSON C0 control character", + json: true, + input: `0\u0000`, + want: "0^@", + }, + { + name: "JSON C0 control characters", + json: true, + input: `0\u0000 1\u0001 2\u0002 3\u0003 4\u0004 5\u0005 6\u0006 7\u0007 8\u0008 9\u0009 ` + + `A\u000a B\u000b C\u000c D\u000d E\u000e F\u000f ` + + `10\u0010 11\u0011 12\u0012 13\u0013 14\u0014 15\u0015 16\u0016 17\u0017 18\u0018 19\u0019 ` + + `1A\u001a 1B\u001b 1C\u001c 1D\u001d 1E\u001e 1F\u001f`, + want: `0^@ 1^A 2^B 3^C 4^D 5^E 6^F 7^G 8^H 9\u0009 ` + + `A\u000a B\u000b C^L D\u000d E^N F^O ` + + `10^P 11^Q 12^R 13^S 14^T 15^U 16^V 17^W 18^X 19^Y ` + + `1A^Z 1B^[ 1C^\\ 1D^] 1E^^ 1F^_`, + }, + { + name: "JSON C1 control characters", + json: true, + input: `80\u0080 81\u0081 82\u0082 83\u0083 84\u0084 85\u0085 86\u0086 87\u0087 88\u0088 89\u0089 ` + + `8A\u008a 8B\u008b 8C\u008c 8D\u008d 8E\u008e 8F\u008f ` + + `90\u0090 91\u0091 92\u0092 93\u0093 94\u0094 95\u0095 96\u0096 97\u0097 98\u0098 99\u0099 ` + + `9A\u009a 9B\u009b 9C\u009c 9D\u009d 9E\u009e 9F\u009f`, + want: `80^@ 81^A 82^B 83^C 84^D 85^E 86^F 87^G 88^H 89^I ` + + `8A^J 8B^K 8C^L 8D^M 8E^N 8F^O ` + + `90^P 91^Q 92^R 93^S 94^T 95^U 96^V 97^W 98^X 99^Y ` + + `9A^Z 9B^[ 9C^\\ 9D^] 9E^^ 9F^_`, + }, + { + name: "C0 control character", + input: "0\x00", + want: "0^@", + }, + { + name: "C0 control characters", + input: "0\x00 1\x01 2\x02 3\x03 4\x04 5\x05 6\x06 7\x07 8\x08 9\x09 " + + "A\x0A B\x0B C\x0C D\x0D E\x0E F\x0F " + + "10\x10 11\x11 12\x12 13\x13 14\x14 15\x15 16\x16 17\x17 18\x18 19\x19 " + + "1A\x1A 1B\x1B 1C\x1C 1D\x1D 1E\x1E 1F\x1F", + want: "0^@ 1^A 2^B 3^C 4^D 5^E 6^F 7^G 8^H 9\t " + + "A\n B\v C^L D\r E^N F^O " + + "10^P 11^Q 12^R 13^S 14^T 15^U 16^V 17^W 18^X 19^Y " + + "1A^Z 1B^[ 1C^\\\\ 1D^] 1E^^ 1F^_", + }, + { + name: "C1 control character", + input: "80\xC2\x80", + want: "80^@", + }, + { + name: "C1 control characters", + input: "80\xC2\x80 81\xC2\x81 82\xC2\x82 83\xC2\x83 84\xC2\x84 85\xC2\x85 86\xC2\x86 87\xC2\x87 88\xC2\x88 89\xC2\x89 " + + "8A\xC2\x8A 8B\xC2\x8B 8C\xC2\x8C 8D\xC2\x8D 8E\xC2\x8E 8F\xC2\x8F " + + "90\xC2\x90 91\xC2\x91 92\xC2\x92 93\xC2\x93 94\xC2\x94 95\xC2\x95 96\xC2\x96 97\xC2\x97 98\xC2\x98 99\xC2\x99 " + + "9A\xC2\x9A 9B\xC2\x9B 9C\xC2\x9C 9D\xC2\x9D 9E\xC2\x9E 9F\xC2\x9F", + want: "80^@ 81^A 82^B 83^C 84^D 85^E 86^F 87^G 88^H 89^I " + + "8A^J 8B^K 8C^L 8D^M 8E^N 8F^O " + + "90^P 91^Q 92^R 93^S 94^T 95^U 96^V 97^W 98^X 99^Y " + + "9A^Z 9B^[ 9C^\\\\ 9D^] 9E^^ 9F^_", + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + sanitizer := &Sanitizer{JSON: tt.json} + reader := bytes.NewReader([]byte(tt.input)) + transformReader := transform.NewReader(reader, sanitizer) + err := iotest.TestReader(transformReader, []byte(tt.want)) + require.NoError(t, err) + }) + } +}