From f0489f3cce8a7a3e4f087774dc6afaffdc7a30b5 Mon Sep 17 00:00:00 2001 From: Shreyas Goenka Date: Mon, 11 Mar 2024 14:19:49 +0100 Subject: [PATCH 01/23] Add telemetry for SDK usage from DBR --- client/client_test.go | 58 ++++++++++++++++++++++++++++++++++++ config/api_client.go | 12 ++++++++ useragent/patterns.go | 8 +++++ useragent/patterns_test.go | 29 ++++++++++++++++++ useragent/user_agent.go | 22 +++++++++++--- useragent/user_agent_test.go | 12 ++++++++ 6 files changed, 137 insertions(+), 4 deletions(-) diff --git a/client/client_test.go b/client/client_test.go index de04655ff..ba79f24a9 100644 --- a/client/client_test.go +++ b/client/client_test.go @@ -320,6 +320,64 @@ GET /a } } +func TestUserAgentForDBR(t *testing.T) { + goVersion := strings.TrimPrefix(runtime.Version(), "go") + cicdHeader := "" + if useragent.CiCdProvider() != "" { + cicdHeader = fmt.Sprintf(" cicd/%s", useragent.CiCdProvider()) + } + t.Setenv("DATABRICKS_RUNTIME_VERSION", "15.1") + expectedUserAgent := `unknown/0.0.0 databricks-sdk-go/` + version.Version + ` go/` + goVersion + ` os/` + runtime.GOOS + ` auth/pat` + cicdHeader + ` runtime/15.1` + + var userAgent string + c, err := New(&config.Config{ + Host: "some", + Token: "token", + ConfigFile: "/dev/null", + HTTPTransport: hc(func(r *http.Request) (*http.Response, error) { + userAgent = r.UserAgent() + return &http.Response{ + StatusCode: 200, + Body: io.NopCloser(strings.NewReader(`{}`)), + Request: r, + }, nil + }), + }) + require.NoError(t, err) + err = c.Do(context.Background(), "GET", "/a", nil, nil, nil) + assert.Equal(t, userAgent, expectedUserAgent) + require.NoError(t, err) +} + +func TestUserAgentForServerless(t *testing.T) { + goVersion := strings.TrimPrefix(runtime.Version(), "go") + cicdHeader := "" + if useragent.CiCdProvider() != "" { + cicdHeader = fmt.Sprintf(" cicd/%s", useragent.CiCdProvider()) + } + t.Setenv("DATABRICKS_RUNTIME_VERSION", "client.1") + expectedUserAgent := `unknown/0.0.0 databricks-sdk-go/` + version.Version + ` go/` + goVersion + ` os/` + runtime.GOOS + ` auth/pat` + cicdHeader + ` runtime/client.1` + + var userAgent string + c, err := New(&config.Config{ + Host: "some", + Token: "token", + ConfigFile: "/dev/null", + HTTPTransport: hc(func(r *http.Request) (*http.Response, error) { + userAgent = r.UserAgent() + return &http.Response{ + StatusCode: 200, + Body: io.NopCloser(strings.NewReader(`{}`)), + Request: r, + }, nil + }), + }) + require.NoError(t, err) + err = c.Do(context.Background(), "GET", "/a", nil, nil, nil) + assert.Equal(t, userAgent, expectedUserAgent) + require.NoError(t, err) +} + func testNonJSONResponseIncludedInError(t *testing.T, statusCode int, status, errorMessage string) { c, err := New(&config.Config{ Host: "some", diff --git a/config/api_client.go b/config/api_client.go index 8f748caf4..601ede730 100644 --- a/config/api_client.go +++ b/config/api_client.go @@ -6,6 +6,7 @@ import ( "fmt" "net/http" "net/url" + "os" "time" "github.com/databricks/databricks-sdk-go/apierr" @@ -64,6 +65,17 @@ func (c *Config) NewApiClient() (*httpclient.ApiClient, error) { *r = *r.WithContext(ctx) // replace request return nil }, + func(r *http.Request) error { + // Detect if the SDK is being run in a Databricks Runtime. + v, ok := os.LookupEnv("DATABRICKS_RUNTIME_VERSION") + if !ok { + return nil + } + // Add the detected Databricks Runtime version to the user agent + ctx := useragent.InContext(r.Context(), "runtime", v) + *r = *r.WithContext(ctx) // replace request + return nil + }, }, TransientErrors: []string{ "REQUEST_LIMIT_EXCEEDED", // This is temporary workaround for SCIM API returning 500. Remove when it's fixed diff --git a/useragent/patterns.go b/useragent/patterns.go index 52d7ebcd5..948b54fac 100644 --- a/useragent/patterns.go +++ b/useragent/patterns.go @@ -36,3 +36,11 @@ func matchAlphanumOrSemVer(s string) error { } return fmt.Errorf("invalid alphanumeric or semver string: %s", s) } + +func isAlphanum(s string) bool { + return regexpAlphanum.MatchString(s) +} + +func isSemVer(s string) bool { + return regexpSemVer.MatchString(s) +} diff --git a/useragent/patterns_test.go b/useragent/patterns_test.go index ed9fe62aa..3febb21bd 100644 --- a/useragent/patterns_test.go +++ b/useragent/patterns_test.go @@ -8,25 +8,54 @@ import ( func TestMatchSemVer(t *testing.T) { assert.NoError(t, matchSemVer("1.2.3")) + assert.True(t, isSemVer("1.2.3")) + assert.NoError(t, matchSemVer("0.0.0-dev+2e014739024a")) + assert.True(t, isSemVer("0.0.0-dev+2e014739024a")) + assert.Error(t, matchSemVer("1.2.3.4")) + assert.False(t, isSemVer("1.2.3.4")) + assert.Error(t, matchSemVer("1.2")) + assert.False(t, isSemVer("1.2")) } func TestMatchAlphanum(t *testing.T) { assert.NoError(t, matchAlphanum("foo")) + assert.True(t, isAlphanum("foo")) + assert.NoError(t, matchAlphanum("FOO")) + assert.True(t, isAlphanum("FOO")) + assert.NoError(t, matchAlphanum("FOO123")) + assert.True(t, isAlphanum("FOO123")) + assert.NoError(t, matchAlphanum("foo_bar")) + assert.True(t, isAlphanum("foo_bar")) + assert.NoError(t, matchAlphanum("foo-bar")) + assert.True(t, isAlphanum("foo-bar")) + assert.Error(t, matchAlphanum("foo bar")) + assert.False(t, isAlphanum("foo bar")) + assert.Error(t, matchAlphanum("foo/bar")) + assert.False(t, isAlphanum("foo/bar")) } func TestMatchAlphanumOrSemVer(t *testing.T) { assert.NoError(t, matchAlphanumOrSemVer("foo")) + assert.True(t, isAlphanum("foo") || isSemVer("foo")) + assert.NoError(t, matchAlphanumOrSemVer("1.2.3")) + assert.True(t, isAlphanum("1.2.3") || isSemVer("1.2.3")) + assert.NoError(t, matchAlphanumOrSemVer("0.0.0-dev+2e014739024a")) + assert.True(t, isAlphanum("0.0.0-dev+2e014739024a") || isSemVer("0.0.0-dev+2e014739024a")) + assert.Error(t, matchAlphanumOrSemVer("foo/bar")) + assert.False(t, isAlphanum("foo/bar") || isSemVer("foo/bar")) + assert.Error(t, matchAlphanumOrSemVer("1/2/3")) + assert.False(t, isAlphanum("1/2/3") || isSemVer("1/2/3")) } diff --git a/useragent/user_agent.go b/useragent/user_agent.go index 300a3797f..5ce29f419 100644 --- a/useragent/user_agent.go +++ b/useragent/user_agent.go @@ -85,13 +85,27 @@ func (u info) String() string { type data []info +// Validate the key value pair being set in the user agent. Error if invalid. +func validate(key, value string) error { + // DBR versions as set in the `DATABRICKS_RUNTIME_VERSION` environment variable + // are not valid semver strings. Eg: 15.1 or client.0. Thus we allow arbitrary + // values for the runtime key. + if key == "runtime" { + return nil + } + if !isAlphanum(key) { + return fmt.Errorf("expected user agent key to be alphanumeric: %s", key) + } + if !isAlphanum(value) && !isSemVer(value) { + return fmt.Errorf("expected user agent value for key %q to be alphanumeric or semver: %s", key, value) + } + return nil +} + // With always uses the latest value for a given alphanumeric key. // Panics if key or value don't satisfy alphanumeric or semver format. func (d data) With(key, value string) data { - if err := matchAlphanum(key); err != nil { - panic(err) - } - if err := matchAlphanumOrSemVer(value); err != nil { + if err := validate(key, value); err != nil { panic(err) } var c data diff --git a/useragent/user_agent_test.go b/useragent/user_agent_test.go index 815d32f5b..0d8f1f66e 100644 --- a/useragent/user_agent_test.go +++ b/useragent/user_agent_test.go @@ -60,3 +60,15 @@ func TestFromContext_Custom(t *testing.T) { func TestDefaultsAreValid(t *testing.T) { WithProduct(productName, productVersion) } + +func TestUserAgentValidate(t *testing.T) { + assert.EqualError(t, validate("non-alphanumic!", "abc"), "expected user agent key to be alphanumeric: non-alphanumic!") + assert.EqualError(t, validate("abc", "non-alphanumeric!"), "expected user agent value for key \"abc\" to be alphanumeric or semver: non-alphanumeric!") + assert.EqualError(t, validate("abc", "1.1.invalid"), "expected user agent value for key \"abc\" to be alphanumeric or semver: 1.1.invalid") + + assert.NoError(t, validate("runtime", "7.3")) + assert.NoError(t, validate("runtime", "client.7")) + assert.NoError(t, validate("runtime", "whatever#!@")) + assert.NoError(t, validate("abc", "123")) + assert.NoError(t, validate("abc", "1.1.1")) +} From 02308c1fa29fb44582d41e9e47ceabdac8970401 Mon Sep 17 00:00:00 2001 From: Shreyas Goenka Date: Mon, 11 Mar 2024 14:24:30 +0100 Subject: [PATCH 02/23] - --- config/api_client.go | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/config/api_client.go b/config/api_client.go index 601ede730..e961eb693 100644 --- a/config/api_client.go +++ b/config/api_client.go @@ -7,6 +7,7 @@ import ( "net/http" "net/url" "os" + "strings" "time" "github.com/databricks/databricks-sdk-go/apierr" @@ -71,6 +72,9 @@ func (c *Config) NewApiClient() (*httpclient.ApiClient, error) { if !ok { return nil } + // Replace any slashes and spaces with dashes, just in case. + v = strings.ReplaceAll(v, "/", "-") + v = strings.ReplaceAll(v, " ", "-") // Add the detected Databricks Runtime version to the user agent ctx := useragent.InContext(r.Context(), "runtime", v) *r = *r.WithContext(ctx) // replace request From ed0499e4c0d50ef1cabaeba60c490b0afe340c90 Mon Sep 17 00:00:00 2001 From: Shreyas Goenka Date: Mon, 11 Mar 2024 15:16:31 +0100 Subject: [PATCH 03/23] consolidate is / match functions --- useragent/patterns.go | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/useragent/patterns.go b/useragent/patterns.go index 948b54fac..80df2e5d9 100644 --- a/useragent/patterns.go +++ b/useragent/patterns.go @@ -17,21 +17,21 @@ var regexpSemVer = regexp.MustCompile(`^` + semVerCore + semVerPrerelease + semV var regexpAlphanum = regexp.MustCompile(`^[0-9A-Za-z_-]+$`) func matchSemVer(s string) error { - if regexpSemVer.MatchString(s) { + if isSemVer(s) { return nil } return fmt.Errorf("invalid semver string: %s", s) } func matchAlphanum(s string) error { - if regexpAlphanum.MatchString(s) { + if isAlphanum(s) { return nil } return fmt.Errorf("invalid alphanumeric string: %s", s) } func matchAlphanumOrSemVer(s string) error { - if regexpAlphanum.MatchString(s) || regexpSemVer.MatchString(s) { + if isAlphanum(s) || isSemVer(s) { return nil } return fmt.Errorf("invalid alphanumeric or semver string: %s", s) From b820b56e011432cf9e4e12fc7f8861f9df476074 Mon Sep 17 00:00:00 2001 From: Shreyas Goenka Date: Mon, 11 Mar 2024 15:23:26 +0100 Subject: [PATCH 04/23] make the UA keys constants --- config/api_client.go | 6 +++--- useragent/user_agent.go | 8 +++++++- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/config/api_client.go b/config/api_client.go index e961eb693..9981d001e 100644 --- a/config/api_client.go +++ b/config/api_client.go @@ -51,7 +51,7 @@ func (c *Config) NewApiClient() (*httpclient.ApiClient, error) { return nil }, func(r *http.Request) error { - ctx := useragent.InContext(r.Context(), "auth", c.AuthType) + ctx := useragent.InContext(r.Context(), useragent.AuthKey, c.AuthType) *r = *r.WithContext(ctx) // replace request return nil }, @@ -62,7 +62,7 @@ func (c *Config) NewApiClient() (*httpclient.ApiClient, error) { return nil } // Add the detected CI/CD provider to the user agent - ctx := useragent.InContext(r.Context(), "cicd", provider) + ctx := useragent.InContext(r.Context(), useragent.CicdKey, provider) *r = *r.WithContext(ctx) // replace request return nil }, @@ -76,7 +76,7 @@ func (c *Config) NewApiClient() (*httpclient.ApiClient, error) { v = strings.ReplaceAll(v, "/", "-") v = strings.ReplaceAll(v, " ", "-") // Add the detected Databricks Runtime version to the user agent - ctx := useragent.InContext(r.Context(), "runtime", v) + ctx := useragent.InContext(r.Context(), useragent.RuntimeKey, v) *r = *r.WithContext(ctx) // replace request return nil }, diff --git a/useragent/user_agent.go b/useragent/user_agent.go index 5ce29f419..b86b64dc7 100644 --- a/useragent/user_agent.go +++ b/useragent/user_agent.go @@ -10,6 +10,12 @@ import ( "golang.org/x/mod/semver" ) +const ( + RuntimeKey = "runtime" + CicdKey = "cicd" + AuthKey = "auth" +) + // WithProduct sets the product name and product version globally. // It should be called by developers to differentiate their application from others. func WithProduct(name, version string) { @@ -90,7 +96,7 @@ func validate(key, value string) error { // DBR versions as set in the `DATABRICKS_RUNTIME_VERSION` environment variable // are not valid semver strings. Eg: 15.1 or client.0. Thus we allow arbitrary // values for the runtime key. - if key == "runtime" { + if key == RuntimeKey { return nil } if !isAlphanum(key) { From f0b17822ac9cf503b646230a27d5a24401f2fa79 Mon Sep 17 00:00:00 2001 From: Shreyas Goenka Date: Mon, 11 Mar 2024 18:26:02 +0100 Subject: [PATCH 05/23] add sanitize function --- config/api_client.go | 5 +---- useragent/user_agent.go | 16 ++++++++++++++++ useragent/user_agent_test.go | 7 +++++++ 3 files changed, 24 insertions(+), 4 deletions(-) diff --git a/config/api_client.go b/config/api_client.go index 9981d001e..5dcc432d2 100644 --- a/config/api_client.go +++ b/config/api_client.go @@ -7,7 +7,6 @@ import ( "net/http" "net/url" "os" - "strings" "time" "github.com/databricks/databricks-sdk-go/apierr" @@ -72,9 +71,7 @@ func (c *Config) NewApiClient() (*httpclient.ApiClient, error) { if !ok { return nil } - // Replace any slashes and spaces with dashes, just in case. - v = strings.ReplaceAll(v, "/", "-") - v = strings.ReplaceAll(v, " ", "-") + v = useragent.Sanitize(v) // Add the detected Databricks Runtime version to the user agent ctx := useragent.InContext(r.Context(), useragent.RuntimeKey, v) *r = *r.WithContext(ctx) // replace request diff --git a/useragent/user_agent.go b/useragent/user_agent.go index b86b64dc7..29f5f3cfa 100644 --- a/useragent/user_agent.go +++ b/useragent/user_agent.go @@ -5,6 +5,7 @@ import ( "fmt" "runtime" "strings" + "unicode" "github.com/databricks/databricks-sdk-go/version" "golang.org/x/mod/semver" @@ -85,6 +86,21 @@ type info struct { Value string } +// Sanitize the user agent value. This is useful when the set of possible values is not +// known at compile time to be all valid. Having this sanitization then ensures +// downstream applications can correctly parse the full user agent header, by making sure +// characters like '/' and ' ' are not present in the value. +func Sanitize(s string) string { + allowList := func(r rune) rune { + if r == '.' || unicode.IsLetter(r) || unicode.IsDigit(r) { + return r + } + return '-' + } + + return strings.Map(allowList, s) +} + func (u info) String() string { return fmt.Sprintf("%s/%s", u.Key, u.Value) } diff --git a/useragent/user_agent_test.go b/useragent/user_agent_test.go index 0d8f1f66e..bca7aee6e 100644 --- a/useragent/user_agent_test.go +++ b/useragent/user_agent_test.go @@ -72,3 +72,10 @@ func TestUserAgentValidate(t *testing.T) { assert.NoError(t, validate("abc", "123")) assert.NoError(t, validate("abc", "1.1.1")) } + +func TestUserAgentNormalizeString(t *testing.T) { + assert.Equal(t, "abc123", Sanitize("abc123")) + assert.Equal(t, "1-2-3-4-5-6-7-8-", Sanitize("1@2#3?4,5/6!7 8 ")) + assert.Equal(t, "1.2.3", Sanitize("1.2.3")) + assert.Equal(t, "client.0", Sanitize("client.0")) +} From 6bfc8ca095fadbdc83fd34d6dca87e73b9e0184a Mon Sep 17 00:00:00 2001 From: Shreyas Goenka Date: Mon, 11 Mar 2024 18:28:22 +0100 Subject: [PATCH 06/23] - --- useragent/user_agent.go | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/useragent/user_agent.go b/useragent/user_agent.go index 29f5f3cfa..784306bd5 100644 --- a/useragent/user_agent.go +++ b/useragent/user_agent.go @@ -86,9 +86,9 @@ type info struct { Value string } -// Sanitize the user agent value. This is useful when the set of possible values is not -// known at compile time to be all valid. Having this sanitization then ensures -// downstream applications can correctly parse the full user agent header, by making sure +// Sanitize the user agent value. This is useful when the value is not ensured to be +// to be valid at compile time. Having this sanitization then ensures downstream +// applications can correctly parse the full user agent header, by making sure // characters like '/' and ' ' are not present in the value. func Sanitize(s string) string { allowList := func(r rune) rune { From d3fba9007b2557fa91012cd63e82e4025a021097 Mon Sep 17 00:00:00 2001 From: Shreyas Goenka Date: Mon, 11 Mar 2024 18:29:18 +0100 Subject: [PATCH 07/23] lint --- useragent/user_agent.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/useragent/user_agent.go b/useragent/user_agent.go index 784306bd5..1a561a359 100644 --- a/useragent/user_agent.go +++ b/useragent/user_agent.go @@ -86,7 +86,7 @@ type info struct { Value string } -// Sanitize the user agent value. This is useful when the value is not ensured to be +// Sanitize the user agent value. This is useful when the value is not ensured to be // to be valid at compile time. Having this sanitization then ensures downstream // applications can correctly parse the full user agent header, by making sure // characters like '/' and ' ' are not present in the value. From e01860b9f5c03b3319f314d6dda8269267be085d Mon Sep 17 00:00:00 2001 From: Shreyas Goenka Date: Wed, 13 Mar 2024 18:59:22 +0100 Subject: [PATCH 08/23] address comments --- client/client_test.go | 49 ++++++++++++++++++------------------ useragent/patterns.go | 9 +++---- useragent/patterns_test.go | 29 ++++++++++----------- useragent/user_agent.go | 37 +++++++++------------------ useragent/user_agent_test.go | 17 ++++++------- 5 files changed, 61 insertions(+), 80 deletions(-) diff --git a/client/client_test.go b/client/client_test.go index ba79f24a9..f9a75f2e2 100644 --- a/client/client_test.go +++ b/client/client_test.go @@ -321,32 +321,31 @@ GET /a } func TestUserAgentForDBR(t *testing.T) { - goVersion := strings.TrimPrefix(runtime.Version(), "go") - cicdHeader := "" - if useragent.CiCdProvider() != "" { - cicdHeader = fmt.Sprintf(" cicd/%s", useragent.CiCdProvider()) - } - t.Setenv("DATABRICKS_RUNTIME_VERSION", "15.1") - expectedUserAgent := `unknown/0.0.0 databricks-sdk-go/` + version.Version + ` go/` + goVersion + ` os/` + runtime.GOOS + ` auth/pat` + cicdHeader + ` runtime/15.1` + for _, dbrVersion := range []string{"client.0", "client.1", "15.0", "13.3", "14.4"} { + t.Run(dbrVersion, func(t *testing.T) { + t.Setenv("DATABRICKS_RUNTIME_VERSION", dbrVersion) - var userAgent string - c, err := New(&config.Config{ - Host: "some", - Token: "token", - ConfigFile: "/dev/null", - HTTPTransport: hc(func(r *http.Request) (*http.Response, error) { - userAgent = r.UserAgent() - return &http.Response{ - StatusCode: 200, - Body: io.NopCloser(strings.NewReader(`{}`)), - Request: r, - }, nil - }), - }) - require.NoError(t, err) - err = c.Do(context.Background(), "GET", "/a", nil, nil, nil) - assert.Equal(t, userAgent, expectedUserAgent) - require.NoError(t, err) + var userAgent string + c, err := New(&config.Config{ + Host: "some", + Token: "token", + ConfigFile: "/dev/null", + HTTPTransport: hc(func(r *http.Request) (*http.Response, error) { + userAgent = r.UserAgent() + return &http.Response{ + StatusCode: 200, + Body: io.NopCloser(strings.NewReader(`{}`)), + Request: r, + }, nil + }), + }) + require.NoError(t, err) + + err = c.Do(context.Background(), "GET", "/a", nil, nil, nil) + assert.Contains(t, userAgent, "runtime/"+dbrVersion) + require.NoError(t, err) + }) + } } func TestUserAgentForServerless(t *testing.T) { diff --git a/useragent/patterns.go b/useragent/patterns.go index 80df2e5d9..6e0166de2 100644 --- a/useragent/patterns.go +++ b/useragent/patterns.go @@ -30,11 +30,10 @@ func matchAlphanum(s string) error { return fmt.Errorf("invalid alphanumeric string: %s", s) } -func matchAlphanumOrSemVer(s string) error { - if isAlphanum(s) || isSemVer(s) { - return nil - } - return fmt.Errorf("invalid alphanumeric or semver string: %s", s) +var allowedValueChars = `0-9A-Za-z_\-\.` + +func isValidValue(s string) bool { + return regexp.MustCompile(`^[` + allowedValueChars + `]+$`).MatchString(s) } func isAlphanum(s string) bool { diff --git a/useragent/patterns_test.go b/useragent/patterns_test.go index 3febb21bd..3e46c62a7 100644 --- a/useragent/patterns_test.go +++ b/useragent/patterns_test.go @@ -43,19 +43,16 @@ func TestMatchAlphanum(t *testing.T) { assert.False(t, isAlphanum("foo/bar")) } -func TestMatchAlphanumOrSemVer(t *testing.T) { - assert.NoError(t, matchAlphanumOrSemVer("foo")) - assert.True(t, isAlphanum("foo") || isSemVer("foo")) - - assert.NoError(t, matchAlphanumOrSemVer("1.2.3")) - assert.True(t, isAlphanum("1.2.3") || isSemVer("1.2.3")) - - assert.NoError(t, matchAlphanumOrSemVer("0.0.0-dev+2e014739024a")) - assert.True(t, isAlphanum("0.0.0-dev+2e014739024a") || isSemVer("0.0.0-dev+2e014739024a")) - - assert.Error(t, matchAlphanumOrSemVer("foo/bar")) - assert.False(t, isAlphanum("foo/bar") || isSemVer("foo/bar")) - - assert.Error(t, matchAlphanumOrSemVer("1/2/3")) - assert.False(t, isAlphanum("1/2/3") || isSemVer("1/2/3")) -} +func TestIsValidValue(t *testing.T) { + assert.True(t, isValidValue("foo")) + assert.True(t, isValidValue("FOO")) + assert.True(t, isValidValue("FOO123")) + assert.True(t, isValidValue("foo_bar")) + assert.True(t, isValidValue("foo-bar")) + assert.True(t, isValidValue("foo.bar")) + + assert.False(t, isValidValue("foo bar")) + assert.False(t, isValidValue("foo/bar")) + assert.False(t, isValidValue("foo:)bar")) + assert.False(t, isValidValue("foo😊bar")) +} \ No newline at end of file diff --git a/useragent/user_agent.go b/useragent/user_agent.go index ac079663f..e23726e4b 100644 --- a/useragent/user_agent.go +++ b/useragent/user_agent.go @@ -4,9 +4,9 @@ import ( "context" "fmt" "os" + "regexp" "runtime" "strings" - "unicode" "github.com/databricks/databricks-sdk-go/version" "golang.org/x/mod/semver" @@ -100,21 +100,6 @@ type info struct { Value string } -// Sanitize the user agent value. This is useful when the value is not ensured to be -// to be valid at compile time. Having this sanitization then ensures downstream -// applications can correctly parse the full user agent header, by making sure -// characters like '/' and ' ' are not present in the value. -func Sanitize(s string) string { - allowList := func(r rune) rune { - if r == '.' || unicode.IsLetter(r) || unicode.IsDigit(r) { - return r - } - return '-' - } - - return strings.Map(allowList, s) -} - func (u info) String() string { return fmt.Sprintf("%s/%s", u.Key, u.Value) } @@ -123,21 +108,23 @@ type data []info // Validate the key value pair being set in the user agent. Error if invalid. func validate(key, value string) error { - // DBR versions as set in the `DATABRICKS_RUNTIME_VERSION` environment variable - // are not valid semver strings. Eg: 15.1 or client.0. Thus we allow arbitrary - // values for the runtime key. - if key == RuntimeKey { - return nil - } if !isAlphanum(key) { - return fmt.Errorf("expected user agent key to be alphanumeric: %s", key) + return fmt.Errorf("expected user agent key to be alphanumeric: %q", key) } - if !isAlphanum(value) && !isSemVer(value) { - return fmt.Errorf("expected user agent value for key %q to be alphanumeric or semver: %s", key, value) + if !isValidValue(value) { + return fmt.Errorf("expected user agent value for key %q to be alphanumeric or semver: %q", key, value) } return nil } +// Sanitize the user agent value. This is useful when the value is not ensured to be +// to be valid at compile time. Having this sanitization then ensures downstream +// applications can correctly parse the full user agent header, by making sure +// characters like '/' and ' ' are not present in the value. +func Sanitize(s string) string { + return regexp.MustCompile(`[^`+allowedValueChars+`]`).ReplaceAllString(s, "-") +} + // With always uses the latest value for a given alphanumeric key. // Panics if key or value don't satisfy alphanumeric or semver format. func (d data) With(key, value string) data { diff --git a/useragent/user_agent_test.go b/useragent/user_agent_test.go index 7e8d276b9..6451720e7 100644 --- a/useragent/user_agent_test.go +++ b/useragent/user_agent_test.go @@ -76,15 +76,14 @@ func TestDefaultsAreValid(t *testing.T) { } func TestUserAgentValidate(t *testing.T) { - assert.EqualError(t, validate("non-alphanumic!", "abc"), "expected user agent key to be alphanumeric: non-alphanumic!") - assert.EqualError(t, validate("abc", "non-alphanumeric!"), "expected user agent value for key \"abc\" to be alphanumeric or semver: non-alphanumeric!") - assert.EqualError(t, validate("abc", "1.1.invalid"), "expected user agent value for key \"abc\" to be alphanumeric or semver: 1.1.invalid") - - assert.NoError(t, validate("runtime", "7.3")) - assert.NoError(t, validate("runtime", "client.7")) - assert.NoError(t, validate("runtime", "whatever#!@")) - assert.NoError(t, validate("abc", "123")) - assert.NoError(t, validate("abc", "1.1.1")) + assert.EqualError(t, validate("foobar!", "abc"), "expected user agent key to be alphanumeric: \"foobar!\"") + assert.EqualError(t, validate("foo", "invalid!"), "expected user agent value for key \"foo\" to be alphanumeric or semver: \"invalid!\"") + assert.EqualError(t, validate("foo", "whatever#!@"), "expected user agent value for key \"foo\" to be alphanumeric or semver: \"whatever#!@\"") + + assert.NoError(t, validate("foo", "7.3")) + assert.NoError(t, validate("foo", "client.7")) + assert.NoError(t, validate("foo", "123")) + assert.NoError(t, validate("foo", "1.1.1")) } func TestUserAgentNormalizeString(t *testing.T) { From 9131f58e6751a53fa93cd5926c156fbb0db23fb2 Mon Sep 17 00:00:00 2001 From: Shreyas Goenka Date: Wed, 13 Mar 2024 19:09:15 +0100 Subject: [PATCH 09/23] add unicode characters to test --- useragent/patterns_test.go | 2 +- useragent/user_agent_test.go | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/useragent/patterns_test.go b/useragent/patterns_test.go index 3e46c62a7..cc786ceec 100644 --- a/useragent/patterns_test.go +++ b/useragent/patterns_test.go @@ -55,4 +55,4 @@ func TestIsValidValue(t *testing.T) { assert.False(t, isValidValue("foo/bar")) assert.False(t, isValidValue("foo:)bar")) assert.False(t, isValidValue("foo😊bar")) -} \ No newline at end of file +} diff --git a/useragent/user_agent_test.go b/useragent/user_agent_test.go index 6451720e7..4d355b1ce 100644 --- a/useragent/user_agent_test.go +++ b/useragent/user_agent_test.go @@ -91,4 +91,5 @@ func TestUserAgentNormalizeString(t *testing.T) { assert.Equal(t, "1-2-3-4-5-6-7-8-", Sanitize("1@2#3?4,5/6!7 8 ")) assert.Equal(t, "1.2.3", Sanitize("1.2.3")) assert.Equal(t, "client.0", Sanitize("client.0")) + assert.Equal(t, "unicode----", Sanitize("unicode ◬☋☜")) } From 362e46abcb3c4973d4159f0c6a956d597630e5de Mon Sep 17 00:00:00 2001 From: Shreyas Goenka Date: Wed, 13 Mar 2024 19:14:20 +0100 Subject: [PATCH 10/23] consolidate tests --- useragent/patterns_test.go | 14 ------------ useragent/user_agent_test.go | 43 ++++++++++++++++++++++++++++++++---- 2 files changed, 39 insertions(+), 18 deletions(-) diff --git a/useragent/patterns_test.go b/useragent/patterns_test.go index cc786ceec..b461ee168 100644 --- a/useragent/patterns_test.go +++ b/useragent/patterns_test.go @@ -42,17 +42,3 @@ func TestMatchAlphanum(t *testing.T) { assert.Error(t, matchAlphanum("foo/bar")) assert.False(t, isAlphanum("foo/bar")) } - -func TestIsValidValue(t *testing.T) { - assert.True(t, isValidValue("foo")) - assert.True(t, isValidValue("FOO")) - assert.True(t, isValidValue("FOO123")) - assert.True(t, isValidValue("foo_bar")) - assert.True(t, isValidValue("foo-bar")) - assert.True(t, isValidValue("foo.bar")) - - assert.False(t, isValidValue("foo bar")) - assert.False(t, isValidValue("foo/bar")) - assert.False(t, isValidValue("foo:)bar")) - assert.False(t, isValidValue("foo😊bar")) -} diff --git a/useragent/user_agent_test.go b/useragent/user_agent_test.go index 4d355b1ce..5813dc07b 100644 --- a/useragent/user_agent_test.go +++ b/useragent/user_agent_test.go @@ -86,10 +86,45 @@ func TestUserAgentValidate(t *testing.T) { assert.NoError(t, validate("foo", "1.1.1")) } -func TestUserAgentNormalizeString(t *testing.T) { - assert.Equal(t, "abc123", Sanitize("abc123")) - assert.Equal(t, "1-2-3-4-5-6-7-8-", Sanitize("1@2#3?4,5/6!7 8 ")) +func TestSanitize(t *testing.T) { + // Valid values + assert.True(t, isValidValue("foo")) + assert.Equal(t, "foo", Sanitize("foo")) + + assert.True(t, isValidValue("FOO")) + assert.Equal(t, "FOO", Sanitize("FOO")) + + assert.True(t, isValidValue("FOO123")) + assert.Equal(t, "FOO123", Sanitize("FOO123")) + + assert.True(t, isValidValue("foo_bar")) + assert.Equal(t, "foo_bar", Sanitize("foo_bar")) + + assert.True(t, isValidValue("foo-bar")) + assert.Equal(t, "foo-bar", Sanitize("foo-bar")) + + assert.True(t, isValidValue("foo.bar")) + assert.Equal(t, "foo.bar", Sanitize("foo.bar")) + + assert.True(t, isValidValue("1.2.3")) assert.Equal(t, "1.2.3", Sanitize("1.2.3")) + + assert.True(t, isValidValue("client.0")) assert.Equal(t, "client.0", Sanitize("client.0")) - assert.Equal(t, "unicode----", Sanitize("unicode ◬☋☜")) + + // Invalid Values, being sanitized correctly. + assert.False(t, isValidValue("1@2#3?4,5/6!7 8 ")) + assert.Equal(t, "1-2-3-4-5-6-7-8-", Sanitize("1@2#3?4,5/6!7 8 ")) + + assert.False(t, isValidValue("foo bar")) + assert.Equal(t, "foo-bar", Sanitize("foo bar")) + + assert.False(t, isValidValue("foo/bar")) + assert.Equal(t, "foo-bar", Sanitize("foo/bar")) + + assert.False(t, isValidValue("foo:)bar")) + assert.Equal(t, "foo--bar", Sanitize("foo:)bar")) + + assert.False(t, isValidValue("foo😊bar")) + assert.Equal(t, "foo-bar", Sanitize("foo😊bar")) } From d6b68ed78738286e43324e623a8d071496ce119d Mon Sep 17 00:00:00 2001 From: Shreyas Goenka Date: Wed, 13 Mar 2024 19:22:16 +0100 Subject: [PATCH 11/23] consolidate isAlphanum and validation function --- useragent/patterns.go | 30 +++++++++++++++--------------- useragent/patterns_test.go | 3 +++ useragent/user_agent.go | 4 ++-- useragent/user_agent_test.go | 26 +++++++++++++------------- 4 files changed, 33 insertions(+), 30 deletions(-) diff --git a/useragent/patterns.go b/useragent/patterns.go index 6e0166de2..dc95035a2 100644 --- a/useragent/patterns.go +++ b/useragent/patterns.go @@ -14,7 +14,9 @@ const ( var regexpSemVer = regexp.MustCompile(`^` + semVerCore + semVerPrerelease + semVerBuildmetadata + `$`) -var regexpAlphanum = regexp.MustCompile(`^[0-9A-Za-z_-]+$`) +func isSemVer(s string) bool { + return regexpSemVer.MatchString(s) +} func matchSemVer(s string) error { if isSemVer(s) { @@ -23,23 +25,21 @@ func matchSemVer(s string) error { return fmt.Errorf("invalid semver string: %s", s) } +// Alphanumeric characters, hyphen, underscore, and period. This is the subset of +// characters that we allow in user agent keys and values. This is to ensure that +// downstream applications can correctly parse the full user agent header. +// +// NOTE: HTTP headers in general only work well with ASCII characters. see: +// https://stackoverflow.com/questions/4400678/what-character-encoding-should-i-use-for-a-http-header +var alphanum = `0-9A-Za-z_\-\.` + +func isAlphanum(s string) bool { + return regexp.MustCompile(`^[` + alphanum + `]+$`).MatchString(s) +} + func matchAlphanum(s string) error { if isAlphanum(s) { return nil } return fmt.Errorf("invalid alphanumeric string: %s", s) } - -var allowedValueChars = `0-9A-Za-z_\-\.` - -func isValidValue(s string) bool { - return regexp.MustCompile(`^[` + allowedValueChars + `]+$`).MatchString(s) -} - -func isAlphanum(s string) bool { - return regexpAlphanum.MatchString(s) -} - -func isSemVer(s string) bool { - return regexpSemVer.MatchString(s) -} diff --git a/useragent/patterns_test.go b/useragent/patterns_test.go index b461ee168..fc4c6c7b7 100644 --- a/useragent/patterns_test.go +++ b/useragent/patterns_test.go @@ -36,6 +36,9 @@ func TestMatchAlphanum(t *testing.T) { assert.NoError(t, matchAlphanum("foo-bar")) assert.True(t, isAlphanum("foo-bar")) + assert.NoError(t, matchAlphanum("foo.bar")) + assert.True(t, isAlphanum("foo.bar")) + assert.Error(t, matchAlphanum("foo bar")) assert.False(t, isAlphanum("foo bar")) diff --git a/useragent/user_agent.go b/useragent/user_agent.go index e23726e4b..a821e80f1 100644 --- a/useragent/user_agent.go +++ b/useragent/user_agent.go @@ -111,7 +111,7 @@ func validate(key, value string) error { if !isAlphanum(key) { return fmt.Errorf("expected user agent key to be alphanumeric: %q", key) } - if !isValidValue(value) { + if !isAlphanum(value) { return fmt.Errorf("expected user agent value for key %q to be alphanumeric or semver: %q", key, value) } return nil @@ -122,7 +122,7 @@ func validate(key, value string) error { // applications can correctly parse the full user agent header, by making sure // characters like '/' and ' ' are not present in the value. func Sanitize(s string) string { - return regexp.MustCompile(`[^`+allowedValueChars+`]`).ReplaceAllString(s, "-") + return regexp.MustCompile(`[^`+alphanum+`]`).ReplaceAllString(s, "-") } // With always uses the latest value for a given alphanumeric key. diff --git a/useragent/user_agent_test.go b/useragent/user_agent_test.go index 5813dc07b..690595807 100644 --- a/useragent/user_agent_test.go +++ b/useragent/user_agent_test.go @@ -88,43 +88,43 @@ func TestUserAgentValidate(t *testing.T) { func TestSanitize(t *testing.T) { // Valid values - assert.True(t, isValidValue("foo")) + assert.True(t, isAlphanum("foo")) assert.Equal(t, "foo", Sanitize("foo")) - assert.True(t, isValidValue("FOO")) + assert.True(t, isAlphanum("FOO")) assert.Equal(t, "FOO", Sanitize("FOO")) - assert.True(t, isValidValue("FOO123")) + assert.True(t, isAlphanum("FOO123")) assert.Equal(t, "FOO123", Sanitize("FOO123")) - assert.True(t, isValidValue("foo_bar")) + assert.True(t, isAlphanum("foo_bar")) assert.Equal(t, "foo_bar", Sanitize("foo_bar")) - assert.True(t, isValidValue("foo-bar")) + assert.True(t, isAlphanum("foo-bar")) assert.Equal(t, "foo-bar", Sanitize("foo-bar")) - assert.True(t, isValidValue("foo.bar")) + assert.True(t, isAlphanum("foo.bar")) assert.Equal(t, "foo.bar", Sanitize("foo.bar")) - assert.True(t, isValidValue("1.2.3")) + assert.True(t, isAlphanum("1.2.3")) assert.Equal(t, "1.2.3", Sanitize("1.2.3")) - assert.True(t, isValidValue("client.0")) + assert.True(t, isAlphanum("client.0")) assert.Equal(t, "client.0", Sanitize("client.0")) // Invalid Values, being sanitized correctly. - assert.False(t, isValidValue("1@2#3?4,5/6!7 8 ")) + assert.False(t, isAlphanum("1@2#3?4,5/6!7 8 ")) assert.Equal(t, "1-2-3-4-5-6-7-8-", Sanitize("1@2#3?4,5/6!7 8 ")) - assert.False(t, isValidValue("foo bar")) + assert.False(t, isAlphanum("foo bar")) assert.Equal(t, "foo-bar", Sanitize("foo bar")) - assert.False(t, isValidValue("foo/bar")) + assert.False(t, isAlphanum("foo/bar")) assert.Equal(t, "foo-bar", Sanitize("foo/bar")) - assert.False(t, isValidValue("foo:)bar")) + assert.False(t, isAlphanum("foo:)bar")) assert.Equal(t, "foo--bar", Sanitize("foo:)bar")) - assert.False(t, isValidValue("foo😊bar")) + assert.False(t, isAlphanum("foo😊bar")) assert.Equal(t, "foo-bar", Sanitize("foo😊bar")) } From 0a039688ba2e31b7c5b6ffdc6df6cc681df83ff4 Mon Sep 17 00:00:00 2001 From: Shreyas Goenka Date: Wed, 13 Mar 2024 19:26:11 +0100 Subject: [PATCH 12/23] remove serverless test + cleanup --- client/client_test.go | 29 ----------------------------- useragent/patterns.go | 10 +++++----- useragent/patterns_test.go | 32 ++++++++++++++++---------------- useragent/user_agent.go | 8 ++++---- useragent/user_agent_test.go | 26 +++++++++++++------------- 5 files changed, 38 insertions(+), 67 deletions(-) diff --git a/client/client_test.go b/client/client_test.go index f9a75f2e2..8bf5aa920 100644 --- a/client/client_test.go +++ b/client/client_test.go @@ -348,35 +348,6 @@ func TestUserAgentForDBR(t *testing.T) { } } -func TestUserAgentForServerless(t *testing.T) { - goVersion := strings.TrimPrefix(runtime.Version(), "go") - cicdHeader := "" - if useragent.CiCdProvider() != "" { - cicdHeader = fmt.Sprintf(" cicd/%s", useragent.CiCdProvider()) - } - t.Setenv("DATABRICKS_RUNTIME_VERSION", "client.1") - expectedUserAgent := `unknown/0.0.0 databricks-sdk-go/` + version.Version + ` go/` + goVersion + ` os/` + runtime.GOOS + ` auth/pat` + cicdHeader + ` runtime/client.1` - - var userAgent string - c, err := New(&config.Config{ - Host: "some", - Token: "token", - ConfigFile: "/dev/null", - HTTPTransport: hc(func(r *http.Request) (*http.Response, error) { - userAgent = r.UserAgent() - return &http.Response{ - StatusCode: 200, - Body: io.NopCloser(strings.NewReader(`{}`)), - Request: r, - }, nil - }), - }) - require.NoError(t, err) - err = c.Do(context.Background(), "GET", "/a", nil, nil, nil) - assert.Equal(t, userAgent, expectedUserAgent) - require.NoError(t, err) -} - func testNonJSONResponseIncludedInError(t *testing.T, statusCode int, status, errorMessage string) { c, err := New(&config.Config{ Host: "some", diff --git a/useragent/patterns.go b/useragent/patterns.go index dc95035a2..4c2026193 100644 --- a/useragent/patterns.go +++ b/useragent/patterns.go @@ -31,14 +31,14 @@ func matchSemVer(s string) error { // // NOTE: HTTP headers in general only work well with ASCII characters. see: // https://stackoverflow.com/questions/4400678/what-character-encoding-should-i-use-for-a-http-header -var alphanum = `0-9A-Za-z_\-\.` +var validChars = `0-9A-Za-z_\-\.` -func isAlphanum(s string) bool { - return regexp.MustCompile(`^[` + alphanum + `]+$`).MatchString(s) +func isValid(s string) bool { + return regexp.MustCompile(`^[` + validChars + `]+$`).MatchString(s) } -func matchAlphanum(s string) error { - if isAlphanum(s) { +func matchValidChars(s string) error { + if isValid(s) { return nil } return fmt.Errorf("invalid alphanumeric string: %s", s) diff --git a/useragent/patterns_test.go b/useragent/patterns_test.go index fc4c6c7b7..d4eb47846 100644 --- a/useragent/patterns_test.go +++ b/useragent/patterns_test.go @@ -21,27 +21,27 @@ func TestMatchSemVer(t *testing.T) { } func TestMatchAlphanum(t *testing.T) { - assert.NoError(t, matchAlphanum("foo")) - assert.True(t, isAlphanum("foo")) + assert.NoError(t, matchValidChars("foo")) + assert.True(t, isValid("foo")) - assert.NoError(t, matchAlphanum("FOO")) - assert.True(t, isAlphanum("FOO")) + assert.NoError(t, matchValidChars("FOO")) + assert.True(t, isValid("FOO")) - assert.NoError(t, matchAlphanum("FOO123")) - assert.True(t, isAlphanum("FOO123")) + assert.NoError(t, matchValidChars("FOO123")) + assert.True(t, isValid("FOO123")) - assert.NoError(t, matchAlphanum("foo_bar")) - assert.True(t, isAlphanum("foo_bar")) + assert.NoError(t, matchValidChars("foo_bar")) + assert.True(t, isValid("foo_bar")) - assert.NoError(t, matchAlphanum("foo-bar")) - assert.True(t, isAlphanum("foo-bar")) + assert.NoError(t, matchValidChars("foo-bar")) + assert.True(t, isValid("foo-bar")) - assert.NoError(t, matchAlphanum("foo.bar")) - assert.True(t, isAlphanum("foo.bar")) + assert.NoError(t, matchValidChars("foo.bar")) + assert.True(t, isValid("foo.bar")) - assert.Error(t, matchAlphanum("foo bar")) - assert.False(t, isAlphanum("foo bar")) + assert.Error(t, matchValidChars("foo bar")) + assert.False(t, isValid("foo bar")) - assert.Error(t, matchAlphanum("foo/bar")) - assert.False(t, isAlphanum("foo/bar")) + assert.Error(t, matchValidChars("foo/bar")) + assert.False(t, isValid("foo/bar")) } diff --git a/useragent/user_agent.go b/useragent/user_agent.go index a821e80f1..046b6748e 100644 --- a/useragent/user_agent.go +++ b/useragent/user_agent.go @@ -21,7 +21,7 @@ const ( // WithProduct sets the product name and product version globally. // It should be called by developers to differentiate their application from others. func WithProduct(name, version string) { - if err := matchAlphanum(name); err != nil { + if err := matchValidChars(name); err != nil { panic(err) } if err := matchSemVer(version); err != nil { @@ -108,10 +108,10 @@ type data []info // Validate the key value pair being set in the user agent. Error if invalid. func validate(key, value string) error { - if !isAlphanum(key) { + if !isValid(key) { return fmt.Errorf("expected user agent key to be alphanumeric: %q", key) } - if !isAlphanum(value) { + if !isValid(value) { return fmt.Errorf("expected user agent value for key %q to be alphanumeric or semver: %q", key, value) } return nil @@ -122,7 +122,7 @@ func validate(key, value string) error { // applications can correctly parse the full user agent header, by making sure // characters like '/' and ' ' are not present in the value. func Sanitize(s string) string { - return regexp.MustCompile(`[^`+alphanum+`]`).ReplaceAllString(s, "-") + return regexp.MustCompile(`[^`+validChars+`]`).ReplaceAllString(s, "-") } // With always uses the latest value for a given alphanumeric key. diff --git a/useragent/user_agent_test.go b/useragent/user_agent_test.go index 690595807..0b0d5aed9 100644 --- a/useragent/user_agent_test.go +++ b/useragent/user_agent_test.go @@ -88,43 +88,43 @@ func TestUserAgentValidate(t *testing.T) { func TestSanitize(t *testing.T) { // Valid values - assert.True(t, isAlphanum("foo")) + assert.True(t, isValid("foo")) assert.Equal(t, "foo", Sanitize("foo")) - assert.True(t, isAlphanum("FOO")) + assert.True(t, isValid("FOO")) assert.Equal(t, "FOO", Sanitize("FOO")) - assert.True(t, isAlphanum("FOO123")) + assert.True(t, isValid("FOO123")) assert.Equal(t, "FOO123", Sanitize("FOO123")) - assert.True(t, isAlphanum("foo_bar")) + assert.True(t, isValid("foo_bar")) assert.Equal(t, "foo_bar", Sanitize("foo_bar")) - assert.True(t, isAlphanum("foo-bar")) + assert.True(t, isValid("foo-bar")) assert.Equal(t, "foo-bar", Sanitize("foo-bar")) - assert.True(t, isAlphanum("foo.bar")) + assert.True(t, isValid("foo.bar")) assert.Equal(t, "foo.bar", Sanitize("foo.bar")) - assert.True(t, isAlphanum("1.2.3")) + assert.True(t, isValid("1.2.3")) assert.Equal(t, "1.2.3", Sanitize("1.2.3")) - assert.True(t, isAlphanum("client.0")) + assert.True(t, isValid("client.0")) assert.Equal(t, "client.0", Sanitize("client.0")) // Invalid Values, being sanitized correctly. - assert.False(t, isAlphanum("1@2#3?4,5/6!7 8 ")) + assert.False(t, isValid("1@2#3?4,5/6!7 8 ")) assert.Equal(t, "1-2-3-4-5-6-7-8-", Sanitize("1@2#3?4,5/6!7 8 ")) - assert.False(t, isAlphanum("foo bar")) + assert.False(t, isValid("foo bar")) assert.Equal(t, "foo-bar", Sanitize("foo bar")) - assert.False(t, isAlphanum("foo/bar")) + assert.False(t, isValid("foo/bar")) assert.Equal(t, "foo-bar", Sanitize("foo/bar")) - assert.False(t, isAlphanum("foo:)bar")) + assert.False(t, isValid("foo:)bar")) assert.Equal(t, "foo--bar", Sanitize("foo:)bar")) - assert.False(t, isAlphanum("foo😊bar")) + assert.False(t, isValid("foo😊bar")) assert.Equal(t, "foo-bar", Sanitize("foo😊bar")) } From e34ad8c5e2afd20c1b4de2f95a787c46dcec3b03 Mon Sep 17 00:00:00 2001 From: Shreyas Goenka Date: Wed, 13 Mar 2024 19:27:43 +0100 Subject: [PATCH 13/23] - --- client/client_test.go | 2 ++ 1 file changed, 2 insertions(+) diff --git a/client/client_test.go b/client/client_test.go index 8bf5aa920..c2c8147f8 100644 --- a/client/client_test.go +++ b/client/client_test.go @@ -331,7 +331,9 @@ func TestUserAgentForDBR(t *testing.T) { Token: "token", ConfigFile: "/dev/null", HTTPTransport: hc(func(r *http.Request) (*http.Response, error) { + // Capture the user agent via the round tripper. userAgent = r.UserAgent() + return &http.Response{ StatusCode: 200, Body: io.NopCloser(strings.NewReader(`{}`)), From fee2f7f3d2aeaf613cd61a3cf161a6941825637a Mon Sep 17 00:00:00 2001 From: Shreyas Goenka Date: Wed, 13 Mar 2024 19:33:12 +0100 Subject: [PATCH 14/23] remove isSemver + cleanup --- client/client_test.go | 2 +- useragent/patterns.go | 6 +----- useragent/patterns_test.go | 7 ------- useragent/user_agent.go | 2 +- useragent/user_agent_test.go | 7 +++---- 5 files changed, 6 insertions(+), 18 deletions(-) diff --git a/client/client_test.go b/client/client_test.go index c2c8147f8..993054d14 100644 --- a/client/client_test.go +++ b/client/client_test.go @@ -333,7 +333,7 @@ func TestUserAgentForDBR(t *testing.T) { HTTPTransport: hc(func(r *http.Request) (*http.Response, error) { // Capture the user agent via the round tripper. userAgent = r.UserAgent() - + return &http.Response{ StatusCode: 200, Body: io.NopCloser(strings.NewReader(`{}`)), diff --git a/useragent/patterns.go b/useragent/patterns.go index 4c2026193..a4dbf0f74 100644 --- a/useragent/patterns.go +++ b/useragent/patterns.go @@ -14,12 +14,8 @@ const ( var regexpSemVer = regexp.MustCompile(`^` + semVerCore + semVerPrerelease + semVerBuildmetadata + `$`) -func isSemVer(s string) bool { - return regexpSemVer.MatchString(s) -} - func matchSemVer(s string) error { - if isSemVer(s) { + if regexpSemVer.MatchString(s) { return nil } return fmt.Errorf("invalid semver string: %s", s) diff --git a/useragent/patterns_test.go b/useragent/patterns_test.go index d4eb47846..733944a1e 100644 --- a/useragent/patterns_test.go +++ b/useragent/patterns_test.go @@ -8,16 +8,9 @@ import ( func TestMatchSemVer(t *testing.T) { assert.NoError(t, matchSemVer("1.2.3")) - assert.True(t, isSemVer("1.2.3")) - assert.NoError(t, matchSemVer("0.0.0-dev+2e014739024a")) - assert.True(t, isSemVer("0.0.0-dev+2e014739024a")) - assert.Error(t, matchSemVer("1.2.3.4")) - assert.False(t, isSemVer("1.2.3.4")) - assert.Error(t, matchSemVer("1.2")) - assert.False(t, isSemVer("1.2")) } func TestMatchAlphanum(t *testing.T) { diff --git a/useragent/user_agent.go b/useragent/user_agent.go index 046b6748e..a327a2256 100644 --- a/useragent/user_agent.go +++ b/useragent/user_agent.go @@ -112,7 +112,7 @@ func validate(key, value string) error { return fmt.Errorf("expected user agent key to be alphanumeric: %q", key) } if !isValid(value) { - return fmt.Errorf("expected user agent value for key %q to be alphanumeric or semver: %q", key, value) + return fmt.Errorf("expected user agent value for key %q to be alphanumeric: %q", key, value) } return nil } diff --git a/useragent/user_agent_test.go b/useragent/user_agent_test.go index 0b0d5aed9..94eaf88f0 100644 --- a/useragent/user_agent_test.go +++ b/useragent/user_agent_test.go @@ -76,13 +76,12 @@ func TestDefaultsAreValid(t *testing.T) { } func TestUserAgentValidate(t *testing.T) { - assert.EqualError(t, validate("foobar!", "abc"), "expected user agent key to be alphanumeric: \"foobar!\"") - assert.EqualError(t, validate("foo", "invalid!"), "expected user agent value for key \"foo\" to be alphanumeric or semver: \"invalid!\"") - assert.EqualError(t, validate("foo", "whatever#!@"), "expected user agent value for key \"foo\" to be alphanumeric or semver: \"whatever#!@\"") + assert.EqualError(t, validate("foobar!", ""), "expected user agent key to be alphanumeric: \"foobar!\"") + assert.EqualError(t, validate("foo", "invalid!"), "expected user agent value for key \"foo\" to be alphanumeric: \"invalid!\"") + assert.EqualError(t, validate("foo", "whatever#!@"), "expected user agent value for key \"foo\" to be alphanumeric: \"whatever#!@\"") assert.NoError(t, validate("foo", "7.3")) assert.NoError(t, validate("foo", "client.7")) - assert.NoError(t, validate("foo", "123")) assert.NoError(t, validate("foo", "1.1.1")) } From 0e697173b6987355ec0e29bec1085628ca4dbb5f Mon Sep 17 00:00:00 2001 From: Shreyas Goenka Date: Wed, 13 Mar 2024 19:35:40 +0100 Subject: [PATCH 15/23] - --- useragent/patterns_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/useragent/patterns_test.go b/useragent/patterns_test.go index 733944a1e..d181a76e6 100644 --- a/useragent/patterns_test.go +++ b/useragent/patterns_test.go @@ -13,7 +13,7 @@ func TestMatchSemVer(t *testing.T) { assert.Error(t, matchSemVer("1.2")) } -func TestMatchAlphanum(t *testing.T) { +func TestMatchValidChars(t *testing.T) { assert.NoError(t, matchValidChars("foo")) assert.True(t, isValid("foo")) From cc5ecc83d327d45632d7c7e14c68e87fe4d70132 Mon Sep 17 00:00:00 2001 From: Shreyas Goenka Date: Thu, 14 Mar 2024 14:28:55 +0100 Subject: [PATCH 16/23] address comments --- client/client_test.go | 1 + config/api_client.go | 6 ++-- useragent/patterns.go | 23 ++++++------- useragent/patterns_test.go | 39 ++++++++++------------- useragent/runtime.go | 29 +++++++++++++++++ useragent/user_agent.go | 36 ++++++++++----------- useragent/user_agent_test.go | 62 ++++++++---------------------------- 7 files changed, 89 insertions(+), 107 deletions(-) create mode 100644 useragent/runtime.go diff --git a/client/client_test.go b/client/client_test.go index 993054d14..168d469d1 100644 --- a/client/client_test.go +++ b/client/client_test.go @@ -321,6 +321,7 @@ GET /a } func TestUserAgentForDBR(t *testing.T) { + useragent.DisableRuntimeCaching = true for _, dbrVersion := range []string{"client.0", "client.1", "15.0", "13.3", "14.4"} { t.Run(dbrVersion, func(t *testing.T) { t.Setenv("DATABRICKS_RUNTIME_VERSION", dbrVersion) diff --git a/config/api_client.go b/config/api_client.go index 5dcc432d2..c4702e5ec 100644 --- a/config/api_client.go +++ b/config/api_client.go @@ -6,7 +6,6 @@ import ( "fmt" "net/http" "net/url" - "os" "time" "github.com/databricks/databricks-sdk-go/apierr" @@ -67,11 +66,10 @@ func (c *Config) NewApiClient() (*httpclient.ApiClient, error) { }, func(r *http.Request) error { // Detect if the SDK is being run in a Databricks Runtime. - v, ok := os.LookupEnv("DATABRICKS_RUNTIME_VERSION") - if !ok { + v := useragent.Runtime() + if v == "" { return nil } - v = useragent.Sanitize(v) // Add the detected Databricks Runtime version to the user agent ctx := useragent.InContext(r.Context(), useragent.RuntimeKey, v) *r = *r.WithContext(ctx) // replace request diff --git a/useragent/patterns.go b/useragent/patterns.go index a4dbf0f74..2e225a662 100644 --- a/useragent/patterns.go +++ b/useragent/patterns.go @@ -14,6 +14,8 @@ const ( var regexpSemVer = regexp.MustCompile(`^` + semVerCore + semVerPrerelease + semVerBuildmetadata + `$`) +var regexpAlphanum = regexp.MustCompile(`^[0-9A-Za-z_\.-]+$`) + func matchSemVer(s string) error { if regexpSemVer.MatchString(s) { return nil @@ -21,21 +23,16 @@ func matchSemVer(s string) error { return fmt.Errorf("invalid semver string: %s", s) } -// Alphanumeric characters, hyphen, underscore, and period. This is the subset of -// characters that we allow in user agent keys and values. This is to ensure that -// downstream applications can correctly parse the full user agent header. -// -// NOTE: HTTP headers in general only work well with ASCII characters. see: -// https://stackoverflow.com/questions/4400678/what-character-encoding-should-i-use-for-a-http-header -var validChars = `0-9A-Za-z_\-\.` - -func isValid(s string) bool { - return regexp.MustCompile(`^[` + validChars + `]+$`).MatchString(s) +func matchAlphanum(s string) error { + if regexpAlphanum.MatchString(s) { + return nil + } + return fmt.Errorf("invalid alphanumeric string: %s", s) } -func matchValidChars(s string) error { - if isValid(s) { +func matchAlphanumOrSemVer(s string) error { + if regexpAlphanum.MatchString(s) || regexpSemVer.MatchString(s) { return nil } - return fmt.Errorf("invalid alphanumeric string: %s", s) + return fmt.Errorf("invalid alphanumeric or semver string: %s", s) } diff --git a/useragent/patterns_test.go b/useragent/patterns_test.go index d181a76e6..b91dcfe53 100644 --- a/useragent/patterns_test.go +++ b/useragent/patterns_test.go @@ -13,28 +13,21 @@ func TestMatchSemVer(t *testing.T) { assert.Error(t, matchSemVer("1.2")) } -func TestMatchValidChars(t *testing.T) { - assert.NoError(t, matchValidChars("foo")) - assert.True(t, isValid("foo")) - - assert.NoError(t, matchValidChars("FOO")) - assert.True(t, isValid("FOO")) - - assert.NoError(t, matchValidChars("FOO123")) - assert.True(t, isValid("FOO123")) - - assert.NoError(t, matchValidChars("foo_bar")) - assert.True(t, isValid("foo_bar")) - - assert.NoError(t, matchValidChars("foo-bar")) - assert.True(t, isValid("foo-bar")) - - assert.NoError(t, matchValidChars("foo.bar")) - assert.True(t, isValid("foo.bar")) - - assert.Error(t, matchValidChars("foo bar")) - assert.False(t, isValid("foo bar")) +func TestMatchAlphanum(t *testing.T) { + for _, v := range []string{"foo", "FOO", "FOO123", "foo_bar", "foo-bar", "foo.bar"} { + assert.NoError(t, matchAlphanum(v)) + } + + for _, v := range []string{"foo bar", "foo/bar"} { + assert.Error(t, matchAlphanum(v)) + } +} - assert.Error(t, matchValidChars("foo/bar")) - assert.False(t, isValid("foo/bar")) +func TestMatchAlphanumOrSemVer(t *testing.T) { + for _, v := range []string{"foo", "1.2.3", "0.0.0-dev+2e014739024a", "client.0"} { + assert.NoError(t, matchAlphanumOrSemVer(v)) + } + for _, v := range []string{"foo/bar", "1/2/3"} { + assert.Error(t, matchAlphanumOrSemVer(v)) + } } diff --git a/useragent/runtime.go b/useragent/runtime.go new file mode 100644 index 000000000..9124af2c7 --- /dev/null +++ b/useragent/runtime.go @@ -0,0 +1,29 @@ +package useragent + +import ( + "os" + "sync" +) + +var runtimeOnce sync.Once +var runtimeVersion string + +// DisableRuntimeCaching is a flag to disable caching of runtime version. +// This is useful for testing. +var DisableRuntimeCaching bool + +func getRuntimeVersion() string { + v := os.Getenv("DATABRICKS_RUNTIME_VERSION") + return Sanitize(v) +} + +func Runtime() string { + if DisableRuntimeCaching { + return getRuntimeVersion() + } + + runtimeOnce.Do(func() { + runtimeVersion = getRuntimeVersion() + }) + return runtimeVersion +} diff --git a/useragent/user_agent.go b/useragent/user_agent.go index a327a2256..a34d6e025 100644 --- a/useragent/user_agent.go +++ b/useragent/user_agent.go @@ -21,7 +21,7 @@ const ( // WithProduct sets the product name and product version globally. // It should be called by developers to differentiate their application from others. func WithProduct(name, version string) { - if err := matchValidChars(name); err != nil { + if err := matchAlphanum(name); err != nil { panic(err) } if err := matchSemVer(version); err != nil { @@ -106,29 +106,29 @@ func (u info) String() string { type data []info -// Validate the key value pair being set in the user agent. Error if invalid. -func validate(key, value string) error { - if !isValid(key) { - return fmt.Errorf("expected user agent key to be alphanumeric: %q", key) - } - if !isValid(value) { - return fmt.Errorf("expected user agent value for key %q to be alphanumeric: %q", key, value) - } - return nil -} - -// Sanitize the user agent value. This is useful when the value is not ensured to be -// to be valid at compile time. Having this sanitization then ensures downstream -// applications can correctly parse the full user agent header, by making sure -// characters like '/' and ' ' are not present in the value. +// Sanitize replaces all non-alphanumeric characters with a hyphen. Use this to +// ensure that the user agent value is valid. This is useful when the value is not +// ensured to be valid at compile time. +// +// Example: You want to avoid having '/' and ' ' in the value because it will +// make downstream applications fail. +// +// Note: Semver strings are comprised of alphanumeric characters, hyphens, periods +// and plus signs. This function will not remove these characters. +// see: +// 1. https://semver.org/#spec-item-9 +// 2. https://semver.org/#spec-item-10 func Sanitize(s string) string { - return regexp.MustCompile(`[^`+validChars+`]`).ReplaceAllString(s, "-") + return regexp.MustCompile(`[^0-9A-Za-z_\.\+-]`).ReplaceAllString(s, "-") } // With always uses the latest value for a given alphanumeric key. // Panics if key or value don't satisfy alphanumeric or semver format. func (d data) With(key, value string) data { - if err := validate(key, value); err != nil { + if err := matchAlphanum(key); err != nil { + panic(err) + } + if err := matchAlphanumOrSemVer(value); err != nil { panic(err) } var c data diff --git a/useragent/user_agent_test.go b/useragent/user_agent_test.go index 94eaf88f0..71a734a9a 100644 --- a/useragent/user_agent_test.go +++ b/useragent/user_agent_test.go @@ -75,55 +75,19 @@ func TestDefaultsAreValid(t *testing.T) { WithProduct(productName, productVersion) } -func TestUserAgentValidate(t *testing.T) { - assert.EqualError(t, validate("foobar!", ""), "expected user agent key to be alphanumeric: \"foobar!\"") - assert.EqualError(t, validate("foo", "invalid!"), "expected user agent value for key \"foo\" to be alphanumeric: \"invalid!\"") - assert.EqualError(t, validate("foo", "whatever#!@"), "expected user agent value for key \"foo\" to be alphanumeric: \"whatever#!@\"") - - assert.NoError(t, validate("foo", "7.3")) - assert.NoError(t, validate("foo", "client.7")) - assert.NoError(t, validate("foo", "1.1.1")) -} - func TestSanitize(t *testing.T) { - // Valid values - assert.True(t, isValid("foo")) - assert.Equal(t, "foo", Sanitize("foo")) - - assert.True(t, isValid("FOO")) - assert.Equal(t, "FOO", Sanitize("FOO")) - - assert.True(t, isValid("FOO123")) - assert.Equal(t, "FOO123", Sanitize("FOO123")) - - assert.True(t, isValid("foo_bar")) - assert.Equal(t, "foo_bar", Sanitize("foo_bar")) - - assert.True(t, isValid("foo-bar")) - assert.Equal(t, "foo-bar", Sanitize("foo-bar")) - - assert.True(t, isValid("foo.bar")) - assert.Equal(t, "foo.bar", Sanitize("foo.bar")) - - assert.True(t, isValid("1.2.3")) - assert.Equal(t, "1.2.3", Sanitize("1.2.3")) - - assert.True(t, isValid("client.0")) - assert.Equal(t, "client.0", Sanitize("client.0")) - - // Invalid Values, being sanitized correctly. - assert.False(t, isValid("1@2#3?4,5/6!7 8 ")) - assert.Equal(t, "1-2-3-4-5-6-7-8-", Sanitize("1@2#3?4,5/6!7 8 ")) - - assert.False(t, isValid("foo bar")) - assert.Equal(t, "foo-bar", Sanitize("foo bar")) - - assert.False(t, isValid("foo/bar")) - assert.Equal(t, "foo-bar", Sanitize("foo/bar")) - - assert.False(t, isValid("foo:)bar")) - assert.Equal(t, "foo--bar", Sanitize("foo:)bar")) + for _, v := range []string{"foo", "FOO", "FOO123", "foo_bar", "foo-bar", "foo+bar", "foo.bar", "1.2.3", "client.0"} { + assert.Equal(t, v, Sanitize(v)) + } - assert.False(t, isValid("foo😊bar")) - assert.Equal(t, "foo-bar", Sanitize("foo😊bar")) + sanitizeMap := map[string]string{ + "1@2#3?4,5/6!7 8 ": "1-2-3-4-5-6-7-8-", + "foo bar": "foo-bar", + "foo/bar": "foo-bar", + "foo:)bar": "foo--bar", + "foo😊bar": "foo-bar", + } + for k, v := range sanitizeMap { + assert.Equal(t, v, Sanitize(k)) + } } From 45406a5556c98ba3947467ea6c9a5972a48183b5 Mon Sep 17 00:00:00 2001 From: Shreyas Goenka Date: Thu, 14 Mar 2024 15:04:52 +0100 Subject: [PATCH 17/23] address comments --- client/client_test.go | 49 ++++++++++++++++++------------------ useragent/patterns.go | 18 ++++++++++++- useragent/patterns_test.go | 20 ++++++++++++--- useragent/runtime.go | 8 ------ useragent/runtime_test.go | 26 +++++++++++++++++++ useragent/user_agent.go | 17 ------------- useragent/user_agent_test.go | 10 +++++++- 7 files changed, 92 insertions(+), 56 deletions(-) create mode 100644 useragent/runtime_test.go diff --git a/client/client_test.go b/client/client_test.go index 168d469d1..4489a5462 100644 --- a/client/client_test.go +++ b/client/client_test.go @@ -321,34 +321,33 @@ GET /a } func TestUserAgentForDBR(t *testing.T) { - useragent.DisableRuntimeCaching = true - for _, dbrVersion := range []string{"client.0", "client.1", "15.0", "13.3", "14.4"} { - t.Run(dbrVersion, func(t *testing.T) { - t.Setenv("DATABRICKS_RUNTIME_VERSION", dbrVersion) + t.Setenv("DATABRICKS_RUNTIME_VERSION", "client.0") - var userAgent string - c, err := New(&config.Config{ - Host: "some", - Token: "token", - ConfigFile: "/dev/null", - HTTPTransport: hc(func(r *http.Request) (*http.Response, error) { - // Capture the user agent via the round tripper. - userAgent = r.UserAgent() + var userAgent string + c, err := New(&config.Config{ + Host: "some", + Token: "token", + ConfigFile: "/dev/null", + HTTPTransport: hc(func(r *http.Request) (*http.Response, error) { + // Capture the user agent via the round tripper. + userAgent = r.UserAgent() - return &http.Response{ - StatusCode: 200, - Body: io.NopCloser(strings.NewReader(`{}`)), - Request: r, - }, nil - }), - }) - require.NoError(t, err) + return &http.Response{ + StatusCode: 200, + Body: io.NopCloser(strings.NewReader(`{}`)), + Request: r, + }, nil + }), + }) + require.NoError(t, err) - err = c.Do(context.Background(), "GET", "/a", nil, nil, nil) - assert.Contains(t, userAgent, "runtime/"+dbrVersion) - require.NoError(t, err) - }) - } + err = c.Do(context.Background(), "GET", "/a", nil, nil, nil) + + // We don't assert on the value here, because if this test is run inside + // DBR then the value will not match the expected value. This is because the + // value is cached during runtime. + assert.Contains(t, userAgent, "runtime/") + require.NoError(t, err) } func testNonJSONResponseIncludedInError(t *testing.T, statusCode int, status, errorMessage string) { diff --git a/useragent/patterns.go b/useragent/patterns.go index 2e225a662..c1cb21cc6 100644 --- a/useragent/patterns.go +++ b/useragent/patterns.go @@ -14,7 +14,23 @@ const ( var regexpSemVer = regexp.MustCompile(`^` + semVerCore + semVerPrerelease + semVerBuildmetadata + `$`) -var regexpAlphanum = regexp.MustCompile(`^[0-9A-Za-z_\.-]+$`) +// Sanitize replaces all non-alphanumeric characters with a hyphen. Use this to +// ensure that the user agent value is valid. This is useful when the value is not +// ensured to be valid at compile time. +// +// Example: You want to avoid having '/' and ' ' in the value because it will +// make downstream applications fail. +// +// Note: Semver strings are comprised of alphanumeric characters, hyphens, periods +// and plus signs. This function will not remove these characters. +// see: +// 1. https://semver.org/#spec-item-9 +// 2. https://semver.org/#spec-item-10 +var regexpAlphanum = regexp.MustCompile(`^[0-9A-Za-z_\.\+-]+$`) +var regexpAlphanumInverse = regexp.MustCompile(`[^0-9A-Za-z_\.\+-]`) +func Sanitize(s string) string { + return regexpAlphanumInverse.ReplaceAllString(s, "-") +} func matchSemVer(s string) error { if regexpSemVer.MatchString(s) { diff --git a/useragent/patterns_test.go b/useragent/patterns_test.go index b91dcfe53..18f0dca70 100644 --- a/useragent/patterns_test.go +++ b/useragent/patterns_test.go @@ -14,20 +14,32 @@ func TestMatchSemVer(t *testing.T) { } func TestMatchAlphanum(t *testing.T) { - for _, v := range []string{"foo", "FOO", "FOO123", "foo_bar", "foo-bar", "foo.bar"} { + for _, v := range []string{"foo", + "FOO", + "FOO123", + "foo_bar", + "foo-bar", + "foo.bar"} { assert.NoError(t, matchAlphanum(v)) } - for _, v := range []string{"foo bar", "foo/bar"} { + for _, v := range []string{ + "foo bar", + "foo/bar"} { assert.Error(t, matchAlphanum(v)) } } func TestMatchAlphanumOrSemVer(t *testing.T) { - for _, v := range []string{"foo", "1.2.3", "0.0.0-dev+2e014739024a", "client.0"} { + for _, v := range []string{"foo", + "1.2.3", + "0.0.0-dev+2e014739024a", + "client.0"} { assert.NoError(t, matchAlphanumOrSemVer(v)) } - for _, v := range []string{"foo/bar", "1/2/3"} { + for _, v := range []string{ + "foo/bar", + "1/2/3"} { assert.Error(t, matchAlphanumOrSemVer(v)) } } diff --git a/useragent/runtime.go b/useragent/runtime.go index 9124af2c7..f94437359 100644 --- a/useragent/runtime.go +++ b/useragent/runtime.go @@ -8,20 +8,12 @@ import ( var runtimeOnce sync.Once var runtimeVersion string -// DisableRuntimeCaching is a flag to disable caching of runtime version. -// This is useful for testing. -var DisableRuntimeCaching bool - func getRuntimeVersion() string { v := os.Getenv("DATABRICKS_RUNTIME_VERSION") return Sanitize(v) } func Runtime() string { - if DisableRuntimeCaching { - return getRuntimeVersion() - } - runtimeOnce.Do(func() { runtimeVersion = getRuntimeVersion() }) diff --git a/useragent/runtime_test.go b/useragent/runtime_test.go new file mode 100644 index 000000000..eff82ae83 --- /dev/null +++ b/useragent/runtime_test.go @@ -0,0 +1,26 @@ +package useragent + +import ( + "testing" + + "github.com/stretchr/testify/assert" +) + +func TestUserAgentRuntime(t *testing.T) { + for _, tc := range []string{ + "1.2.3", + "0.0.0-dev+2e014739024a", + "client.0", + "foo", + "15.0", + "13.3", + } { + t.Run(tc, func(t *testing.T) { + t.Setenv("DATABRICKS_RUNTIME_VERSION", tc) + + v := getRuntimeVersion() + assert.Equal(t, tc, v) + assert.NoError(t, matchAlphanumOrSemVer(v)) + }) + } +} diff --git a/useragent/user_agent.go b/useragent/user_agent.go index a34d6e025..bd9dc7af5 100644 --- a/useragent/user_agent.go +++ b/useragent/user_agent.go @@ -4,7 +4,6 @@ import ( "context" "fmt" "os" - "regexp" "runtime" "strings" @@ -106,22 +105,6 @@ func (u info) String() string { type data []info -// Sanitize replaces all non-alphanumeric characters with a hyphen. Use this to -// ensure that the user agent value is valid. This is useful when the value is not -// ensured to be valid at compile time. -// -// Example: You want to avoid having '/' and ' ' in the value because it will -// make downstream applications fail. -// -// Note: Semver strings are comprised of alphanumeric characters, hyphens, periods -// and plus signs. This function will not remove these characters. -// see: -// 1. https://semver.org/#spec-item-9 -// 2. https://semver.org/#spec-item-10 -func Sanitize(s string) string { - return regexp.MustCompile(`[^0-9A-Za-z_\.\+-]`).ReplaceAllString(s, "-") -} - // With always uses the latest value for a given alphanumeric key. // Panics if key or value don't satisfy alphanumeric or semver format. func (d data) With(key, value string) data { diff --git a/useragent/user_agent_test.go b/useragent/user_agent_test.go index 71a734a9a..52c605159 100644 --- a/useragent/user_agent_test.go +++ b/useragent/user_agent_test.go @@ -76,7 +76,15 @@ func TestDefaultsAreValid(t *testing.T) { } func TestSanitize(t *testing.T) { - for _, v := range []string{"foo", "FOO", "FOO123", "foo_bar", "foo-bar", "foo+bar", "foo.bar", "1.2.3", "client.0"} { + for _, v := range []string{"foo", + "FOO", + "FOO123", + "foo_bar", + "foo-bar", + "foo+bar", + "foo.bar", + "1.2.3", + "client.0"} { assert.Equal(t, v, Sanitize(v)) } From 171f11e122a3f81668d759baf32c6297ed92e785 Mon Sep 17 00:00:00 2001 From: Shreyas Goenka Date: Thu, 14 Mar 2024 15:19:39 +0100 Subject: [PATCH 18/23] - --- useragent/patterns_test.go | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/useragent/patterns_test.go b/useragent/patterns_test.go index 18f0dca70..4ce2920ca 100644 --- a/useragent/patterns_test.go +++ b/useragent/patterns_test.go @@ -19,13 +19,15 @@ func TestMatchAlphanum(t *testing.T) { "FOO123", "foo_bar", "foo-bar", - "foo.bar"} { + "foo.bar", + } { assert.NoError(t, matchAlphanum(v)) } for _, v := range []string{ "foo bar", - "foo/bar"} { + "foo/bar", + } { assert.Error(t, matchAlphanum(v)) } } @@ -34,12 +36,14 @@ func TestMatchAlphanumOrSemVer(t *testing.T) { for _, v := range []string{"foo", "1.2.3", "0.0.0-dev+2e014739024a", - "client.0"} { + "client.0", + } { assert.NoError(t, matchAlphanumOrSemVer(v)) } for _, v := range []string{ "foo/bar", - "1/2/3"} { + "1/2/3", + } { assert.Error(t, matchAlphanumOrSemVer(v)) } } From d312b8ab3b542f63dbdcd81837a6326ac74aee2d Mon Sep 17 00:00:00 2001 From: Shreyas Goenka Date: Thu, 14 Mar 2024 15:22:44 +0100 Subject: [PATCH 19/23] lint --- useragent/patterns.go | 1 + 1 file changed, 1 insertion(+) diff --git a/useragent/patterns.go b/useragent/patterns.go index c1cb21cc6..307e17b46 100644 --- a/useragent/patterns.go +++ b/useragent/patterns.go @@ -28,6 +28,7 @@ var regexpSemVer = regexp.MustCompile(`^` + semVerCore + semVerPrerelease + semV // 2. https://semver.org/#spec-item-10 var regexpAlphanum = regexp.MustCompile(`^[0-9A-Za-z_\.\+-]+$`) var regexpAlphanumInverse = regexp.MustCompile(`[^0-9A-Za-z_\.\+-]`) + func Sanitize(s string) string { return regexpAlphanumInverse.ReplaceAllString(s, "-") } From b19daed1171840319ee4276215933a592addd149 Mon Sep 17 00:00:00 2001 From: Shreyas Goenka Date: Thu, 14 Mar 2024 15:25:41 +0100 Subject: [PATCH 20/23] - --- useragent/patterns_test.go | 6 ++++-- useragent/user_agent_test.go | 6 ++++-- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/useragent/patterns_test.go b/useragent/patterns_test.go index 4ce2920ca..9bb815dc7 100644 --- a/useragent/patterns_test.go +++ b/useragent/patterns_test.go @@ -14,7 +14,8 @@ func TestMatchSemVer(t *testing.T) { } func TestMatchAlphanum(t *testing.T) { - for _, v := range []string{"foo", + for _, v := range []string{ + "foo", "FOO", "FOO123", "foo_bar", @@ -33,7 +34,8 @@ func TestMatchAlphanum(t *testing.T) { } func TestMatchAlphanumOrSemVer(t *testing.T) { - for _, v := range []string{"foo", + for _, v := range []string{ + "foo", "1.2.3", "0.0.0-dev+2e014739024a", "client.0", diff --git a/useragent/user_agent_test.go b/useragent/user_agent_test.go index 52c605159..549951bd8 100644 --- a/useragent/user_agent_test.go +++ b/useragent/user_agent_test.go @@ -76,7 +76,8 @@ func TestDefaultsAreValid(t *testing.T) { } func TestSanitize(t *testing.T) { - for _, v := range []string{"foo", + for _, v := range []string{ + "foo", "FOO", "FOO123", "foo_bar", @@ -84,7 +85,8 @@ func TestSanitize(t *testing.T) { "foo+bar", "foo.bar", "1.2.3", - "client.0"} { + "client.0", + } { assert.Equal(t, v, Sanitize(v)) } From 616b1cc945d0a66c8a8e8111e67003fc5286b668 Mon Sep 17 00:00:00 2001 From: Shreyas Goenka Date: Thu, 14 Mar 2024 16:47:13 +0100 Subject: [PATCH 21/23] Address comments --- client/client_test.go | 80 ++++++++++++++++++++++++++++++++---- useragent/patterns_test.go | 27 ++++++++++++ useragent/runtime.go | 9 ++++ useragent/user_agent_test.go | 27 ------------ 4 files changed, 108 insertions(+), 35 deletions(-) diff --git a/client/client_test.go b/client/client_test.go index 4489a5462..e4f6dc944 100644 --- a/client/client_test.go +++ b/client/client_test.go @@ -13,6 +13,7 @@ import ( "github.com/databricks/databricks-sdk-go/apierr" "github.com/databricks/databricks-sdk-go/config" + "github.com/databricks/databricks-sdk-go/internal/env" "github.com/databricks/databricks-sdk-go/useragent" "github.com/databricks/databricks-sdk-go/version" "github.com/stretchr/testify/assert" @@ -320,9 +321,7 @@ GET /a } } -func TestUserAgentForDBR(t *testing.T) { - t.Setenv("DATABRICKS_RUNTIME_VERSION", "client.0") - +func captureUserAgent(t *testing.T) string { var userAgent string c, err := New(&config.Config{ Host: "some", @@ -342,12 +341,77 @@ func TestUserAgentForDBR(t *testing.T) { require.NoError(t, err) err = c.Do(context.Background(), "GET", "/a", nil, nil, nil) - - // We don't assert on the value here, because if this test is run inside - // DBR then the value will not match the expected value. This is because the - // value is cached during runtime. - assert.Contains(t, userAgent, "runtime/") require.NoError(t, err) + + return userAgent +} + +func TestUserAgentForDBR(t *testing.T) { + for _, dbrVersion := range []string{"client.0", "client.1", "15.5", "15.5.0", "13.3"} { + env.CleanupEnvironment(t) + useragent.ClearCache() + + t.Setenv("DATABRICKS_RUNTIME_VERSION", dbrVersion) + userAgent := captureUserAgent(t) + + // The user agent should contain the runtime version. + assert.Contains(t, userAgent, "runtime/"+dbrVersion) + } +} + +func TestUserAgentForCiCd(t *testing.T) { + ciToEnv := map[string]map[string]string{ + "github": { + "GITHUB_ACTIONS": "true", + }, + "gitlab": { + "GITLAB_CI": "true", + }, + "jenkins": { + "JENKINS_URL": "https://jenkins.example.com", + }, + "azure-devops": { + "TF_BUILD": "True", + }, + "circle": { + "CIRCLECI": "true", + }, + "travis": { + "TRAVIS": "true", + }, + "bitbucket": { + "BITBUCKET_BUILD_NUMBER": "123", + }, + "google-cloud-build": { + "PROJECT_ID": "", + "BUILD_ID": "", + "PROJECT_NUMBER": "", + "LOCATION": "", + }, + "aws-code-build": { + "CODEBUILD_BUILD_ARN": "", + }, + "tf-cloud": { + "TFC_RUN_ID": "", + }, + } + + for ci, envVars := range ciToEnv { + t.Run(ci, func(t *testing.T) { + env.CleanupEnvironment(t) + useragent.ClearCache() + + for k, v := range envVars { + t.Setenv(k, v) + } + + userAgent := captureUserAgent(t) + + // The user agent should contain the CI/CD provider. + assert.Contains(t, userAgent, "cicd/"+ci) + }) + } + } func testNonJSONResponseIncludedInError(t *testing.T, statusCode int, status, errorMessage string) { diff --git a/useragent/patterns_test.go b/useragent/patterns_test.go index 9bb815dc7..313281feb 100644 --- a/useragent/patterns_test.go +++ b/useragent/patterns_test.go @@ -49,3 +49,30 @@ func TestMatchAlphanumOrSemVer(t *testing.T) { assert.Error(t, matchAlphanumOrSemVer(v)) } } + +func TestSanitize(t *testing.T) { + for _, v := range []string{ + "foo", + "FOO", + "FOO123", + "foo_bar", + "foo-bar", + "foo+bar", + "foo.bar", + "1.2.3", + "client.0", + } { + assert.Equal(t, v, Sanitize(v)) + } + + sanitizeMap := map[string]string{ + "1@2#3?4,5/6!7 8 ": "1-2-3-4-5-6-7-8-", + "foo bar": "foo-bar", + "foo/bar": "foo-bar", + "foo:)bar": "foo--bar", + "foo😊bar": "foo-bar", + } + for k, v := range sanitizeMap { + assert.Equal(t, v, Sanitize(k)) + } +} diff --git a/useragent/runtime.go b/useragent/runtime.go index f94437359..8116755c3 100644 --- a/useragent/runtime.go +++ b/useragent/runtime.go @@ -5,6 +5,15 @@ import ( "sync" ) +// Clear cached user agent values like the DBR version or the CI/CD provider +// being used. This is useful for testing. +func ClearCache() { + // Reset the sync.Once to their default values. This will recompute the + // values on the next call to Runtime() or CiCdProvider(). + runtimeOnce = sync.Once{} + providerOnce = sync.Once{} +} + var runtimeOnce sync.Once var runtimeVersion string diff --git a/useragent/user_agent_test.go b/useragent/user_agent_test.go index 549951bd8..fda9a02cc 100644 --- a/useragent/user_agent_test.go +++ b/useragent/user_agent_test.go @@ -74,30 +74,3 @@ func TestFromContext_Custom(t *testing.T) { func TestDefaultsAreValid(t *testing.T) { WithProduct(productName, productVersion) } - -func TestSanitize(t *testing.T) { - for _, v := range []string{ - "foo", - "FOO", - "FOO123", - "foo_bar", - "foo-bar", - "foo+bar", - "foo.bar", - "1.2.3", - "client.0", - } { - assert.Equal(t, v, Sanitize(v)) - } - - sanitizeMap := map[string]string{ - "1@2#3?4,5/6!7 8 ": "1-2-3-4-5-6-7-8-", - "foo bar": "foo-bar", - "foo/bar": "foo-bar", - "foo:)bar": "foo--bar", - "foo😊bar": "foo-bar", - } - for k, v := range sanitizeMap { - assert.Equal(t, v, Sanitize(k)) - } -} From 2eebe877b6c8e3e9280cf3cfb5f73a22afabac16 Mon Sep 17 00:00:00 2001 From: Shreyas Goenka Date: Fri, 15 Mar 2024 10:55:58 +0100 Subject: [PATCH 22/23] added t.Run to the test function --- client/client_test.go | 22 +++++++++++++++------- 1 file changed, 15 insertions(+), 7 deletions(-) diff --git a/client/client_test.go b/client/client_test.go index e4f6dc944..927af6666 100644 --- a/client/client_test.go +++ b/client/client_test.go @@ -347,15 +347,23 @@ func captureUserAgent(t *testing.T) string { } func TestUserAgentForDBR(t *testing.T) { - for _, dbrVersion := range []string{"client.0", "client.1", "15.5", "15.5.0", "13.3"} { - env.CleanupEnvironment(t) - useragent.ClearCache() + for _, dbrVersion := range []string{ + "client.0", + "client.1", + "15.5", + "15.5.0", + "13.3", + } { + t.Run(dbrVersion, func(t *testing.T) { + env.CleanupEnvironment(t) + useragent.ClearCache() - t.Setenv("DATABRICKS_RUNTIME_VERSION", dbrVersion) - userAgent := captureUserAgent(t) + t.Setenv("DATABRICKS_RUNTIME_VERSION", dbrVersion) + userAgent := captureUserAgent(t) - // The user agent should contain the runtime version. - assert.Contains(t, userAgent, "runtime/"+dbrVersion) + // The user agent should contain the runtime version. + assert.Contains(t, userAgent, "runtime/"+dbrVersion) + }) } } From bb483837d2ca497c2e27b3886a004bce56141680 Mon Sep 17 00:00:00 2001 From: Shreyas Goenka Date: Fri, 15 Mar 2024 11:03:16 +0100 Subject: [PATCH 23/23] add cases that need sanitization --- client/client_test.go | 27 +++++++++++++++++---------- 1 file changed, 17 insertions(+), 10 deletions(-) diff --git a/client/client_test.go b/client/client_test.go index 927af6666..987d8ab9b 100644 --- a/client/client_test.go +++ b/client/client_test.go @@ -347,22 +347,29 @@ func captureUserAgent(t *testing.T) string { } func TestUserAgentForDBR(t *testing.T) { - for _, dbrVersion := range []string{ - "client.0", - "client.1", - "15.5", - "15.5.0", - "13.3", + for v, sv := range map[string]string{ + // DBR versions that don't need to be sanitized. + "client.0": "client.0", + "client.1": "client.1", + "15.5": "15.5", + "15.5.0": "15.5.0", + "13.3": "13.3", + + // DBR versions that need to be sanitized. + "foo🧟bar": "foo-bar", + "foo/bar": "foo-bar", + "foo bar": "foo-bar", } { - t.Run(dbrVersion, func(t *testing.T) { + t.Run(v, func(t *testing.T) { env.CleanupEnvironment(t) useragent.ClearCache() - t.Setenv("DATABRICKS_RUNTIME_VERSION", dbrVersion) + t.Setenv("DATABRICKS_RUNTIME_VERSION", v) userAgent := captureUserAgent(t) - // The user agent should contain the runtime version. - assert.Contains(t, userAgent, "runtime/"+dbrVersion) + // The user agent should contain the runtime version, with the value + // sanitized if necessary. + assert.Contains(t, userAgent, "runtime/"+sv) }) } }