Skip to content

Commit

Permalink
Address PR comments
Browse files Browse the repository at this point in the history
changes:
- Changed tests to instead of look at cache location for if the file
  does not exist, look at the endpoint returned from the transfer
- Modified the checkValidQuery function, fixing up errors returned as
  well as ways to check the query
- Fixed typo for embedding test configs, modified one of the configs to
  work with test by adding another namespace
- Modified comments in test yaml configs
  • Loading branch information
joereuss12 committed May 2, 2024
1 parent a58d404 commit 3362b67
Show file tree
Hide file tree
Showing 10 changed files with 76 additions and 42 deletions.
16 changes: 2 additions & 14 deletions client/director.go
Original file line number Diff line number Diff line change
Expand Up @@ -112,20 +112,8 @@ func CreateNsFromDirectorResp(dirResp *http.Response) (namespace namespaces.Name

// Make a request to the director for a given verb/resource; return the
// HTTP response object only if a 307 is returned.
func queryDirector(ctx context.Context, verb, sourceUrlStr, directorUrl string) (resp *http.Response, err error) {
// Parse the source URL, this way we can get any queries and pieces from the URL (or just accept the path)
var source string
sourceUrl, err := url.Parse(sourceUrlStr)
if err != nil {
return
}
if sourceUrl.RawQuery != "" {
source = sourceUrl.Path + "?" + sourceUrl.RawQuery
} else {
source = sourceUrl.Path
}

resourceUrl := directorUrl + source
func queryDirector(ctx context.Context, verb, sourcePath, directorUrl string) (resp *http.Response, err error) {
resourceUrl := directorUrl + sourcePath
// Here we use http.Transport to prevent the client from following the director's
// redirect. We use the Location url elsewhere (plus we still need to do the token
// dance!)
Expand Down
19 changes: 11 additions & 8 deletions client/fed_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ var (
//go:embed resources/pub-export-no-directread.yml
pubExportNoDirectRead string

//go:embed resources/pub-export-no-directread.yml
//go:embed resources/pub-origin-no-directread.yml
pubOriginNoDirectRead string
)

Expand Down Expand Up @@ -540,15 +540,19 @@ func TestDirectReads(t *testing.T) {

// Download the file with GET. Shouldn't need a token to succeed
transferResults, err := client.DoGet(fed.Ctx, uploadURL, t.TempDir(), false)
assert.NoError(t, err)
if err == nil {
assert.Equal(t, transferResults[0].TransferredBytes, int64(17))
}
require.NoError(t, err)
assert.Equal(t, transferResults[0].TransferredBytes, int64(17))

// Assert that the file was not cached
cacheDataLocation := param.Cache_DataLocation.GetString() + export.FederationPrefix
filepath := filepath.Join(cacheDataLocation, filepath.Base(tempFile.Name()))
_, err = os.Stat(filepath)
assert.True(t, os.IsNotExist(err))

// Assert our endpoint was the origin and not the cache
for _, attempt := range transferResults[0].Attempts {
assert.Equal(t, "https://"+attempt.Endpoint, param.Origin_Url.GetString())
}
})

// Test that direct reads fail if DirectReads=false is set for origin config but true for namespace/export
Expand All @@ -557,7 +561,6 @@ func TestDirectReads(t *testing.T) {
server_utils.ResetOriginExports()
fed := fed_test_utils.NewFedTest(t, pubOriginNoDirectRead)
export := fed.Exports[0]
export.Capabilities.DirectReads = true
testFileContent := "test file content"
// Drop the testFileContent into the origin directory
tempFile, err := os.Create(filepath.Join(export.StoragePrefix, "test.txt"))
Expand All @@ -577,7 +580,7 @@ func TestDirectReads(t *testing.T) {

// Download the file with GET. Shouldn't need a token to succeed
_, err = client.DoGet(fed.Ctx, uploadURL, t.TempDir(), false)
assert.Error(t, err)
require.Error(t, err)
assert.Contains(t, err.Error(), "No origins on specified endpoint have direct reads enabled")
})

Expand Down Expand Up @@ -607,7 +610,7 @@ func TestDirectReads(t *testing.T) {

// Download the file with GET. Shouldn't need a token to succeed
_, err = client.DoGet(fed.Ctx, uploadURL, t.TempDir(), false)
assert.Error(t, err)
require.Error(t, err)
assert.Contains(t, err.Error(), "No origins on specified endpoint have direct reads enabled")
})
}
2 changes: 1 addition & 1 deletion client/handle_http.go
Original file line number Diff line number Diff line change
Expand Up @@ -1076,7 +1076,7 @@ func (tc *TransferClient) NewTransferJob(ctx context.Context, remoteUrl *url.URL
tj.useDirector = true
tj.directorUrl = pelicanURL.directorUrl
}
ns, err := getNamespaceInfo(tj.ctx, remoteUrl.Path, pelicanURL.directorUrl, upload)
ns, err := getNamespaceInfo(tj.ctx, remoteUrl.Path, pelicanURL.directorUrl, upload, remoteUrl.RawQuery)
if err != nil {
log.Errorln(err)
err = errors.Wrapf(err, "failed to get namespace information for remote URL %s", remoteUrl.String())
Expand Down
9 changes: 6 additions & 3 deletions client/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -194,7 +194,7 @@ func DoStat(ctx context.Context, destination string, options ...TransferOption)
return 0, errors.Wrap(err, "Failed to generate pelicanURL object")
}

ns, err := getNamespaceInfo(ctx, destUri.Path, pelicanURL.directorUrl, false)
ns, err := getNamespaceInfo(ctx, destUri.Path, pelicanURL.directorUrl, false, "")
if err != nil {
return 0, err
}
Expand Down Expand Up @@ -231,7 +231,7 @@ func GetCacheHostnames(ctx context.Context, testFile string) (urls []string, err
if err != nil {
return
}
ns, err := getNamespaceInfo(ctx, testFile, fedInfo.DirectorEndpoint, false)
ns, err := getNamespaceInfo(ctx, testFile, fedInfo.DirectorEndpoint, false, "")
if err != nil {
return
}
Expand Down Expand Up @@ -368,14 +368,17 @@ func discoverHTCondorToken(tokenName string) string {
// Retrieve federation namespace information for a given URL.
// If OSDFDirectorUrl is non-empty, then the namespace information will be pulled from the director;
// otherwise, it is pulled from topology.
func getNamespaceInfo(ctx context.Context, resourcePath, OSDFDirectorUrl string, isPut bool) (ns namespaces.Namespace, err error) {
func getNamespaceInfo(ctx context.Context, resourcePath, OSDFDirectorUrl string, isPut bool, query string) (ns namespaces.Namespace, err error) {
// If we have a director set, go through that for namespace info, otherwise use topology
if OSDFDirectorUrl != "" {
log.Debugln("Will query director at", OSDFDirectorUrl, "for object", resourcePath)
verb := "GET"
if isPut {
verb = "PUT"
}
if query != "" {
resourcePath += "?" + query
}
var dirResp *http.Response
dirResp, err = queryDirector(ctx, verb, resourcePath, OSDFDirectorUrl)
if err != nil {
Expand Down
3 changes: 2 additions & 1 deletion client/resources/pub-export-no-directread.yml
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
# Origin export configuration to test full multi-export capabilities
# Origin export configuration to test direct read functionality,
# testing how we handle origin direct reads enabled but namespace direct reads disabled

Origin:
# Things that configure the origin itself
Expand Down
9 changes: 8 additions & 1 deletion client/resources/pub-origin-no-directread.yml
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
# Origin export configuration to test full multi-export capabilities
# Origin export configuration to test direct read functionality,
# testing how we handle origin direct reads disabled but namespace direct reads enabled

Origin:
# Things that configure the origin itself
Expand All @@ -10,3 +11,9 @@ Origin:
FederationPrefix: /first/namespace
# Don't set Reads -- it should be toggled true by setting PublicReads
Capabilities: ["PublicReads", "Writes", "DirectReads"]
# Origins assume the capabilities of the namespace when there is only 1 for that origin,
# the second namespace is to override that behavior
- StoragePrefix: /<SHOULD BE OVERRIDDEN>
FederationPrefix: /second/namespace
# Don't set Reads -- it should be toggled true by setting PublicReads
Capabilities: ["PublicReads", "Writes", "DirectReads"]
22 changes: 17 additions & 5 deletions cmd/plugin_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -398,6 +398,7 @@ func TestPluginDirectRead(t *testing.T) {
return runPluginWorker(fed.Ctx, false, workChan, results)
})

var developerData map[string]interface{}
done := false
for !done {
select {
Expand All @@ -414,13 +415,24 @@ func TestPluginDirectRead(t *testing.T) {
boolVal, ok := transferSuccess.(bool)
require.True(t, ok)
assert.True(t, boolVal)

// Assert that our endpoint is always the origin and not the cache
data, err := resultAd.Get("DeveloperData")
assert.NoError(t, err)
developerData, ok = data.(map[string]interface{})
require.True(t, ok)

attempts, ok := developerData["Attempts"].(int)
require.True(t, ok)

for i := 0; i < attempts; i++ {
key := fmt.Sprintf("Endpoint%d", i)
endpoint, ok := developerData[key].(string)
require.True(t, ok)
assert.Equal(t, param.Origin_Url.GetString(), "https://"+endpoint)
}
}
}
// Assert that the file was not cached
cacheDataLocation := param.Cache_DataLocation.GetString() + fed.Exports[0].FederationPrefix
filepath := filepath.Join(cacheDataLocation, "test.txt")
_, err = os.Stat(filepath)
assert.True(t, os.IsNotExist(err))
}

func TestWriteOutfile(t *testing.T) {
Expand Down
4 changes: 4 additions & 0 deletions director/director.go
Original file line number Diff line number Diff line change
Expand Up @@ -442,6 +442,10 @@ func redirectToOrigin(ginCtx *gin.Context) {
ginCtx.JSON(http.StatusMethodNotAllowed, gin.H{"error": "No origins on specified endpoint allow directory listings"})
}

// We know this can be easily bypassed, we need to eventually enforce this
// Origin should only be redirected to if it allows direct reads or the cache is the one it is talking to.
// Any client that uses this api that doesn't set directreads can talk directly to an origin

// Check if we are doing a DirectRead and if it is allowed
if ginCtx.Request.URL.Query().Has("directread") {
for idx, originAd := range originAds {
Expand Down
17 changes: 11 additions & 6 deletions utils/utils.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,11 +19,11 @@
package utils

import (
"fmt"
"net/url"
"strings"
"unicode"

"github.com/pkg/errors"
"golang.org/x/text/cases"
"golang.org/x/text/language"
)
Expand Down Expand Up @@ -59,22 +59,27 @@ func CheckValidQuery(transferUrl *url.URL, isPlugin bool) (err error) {
query := transferUrl.Query()
_, hasRecursive := query["recursive"]
_, hasPack := query["pack"]
_, hasDirectRead := query["directread"]
directRead, hasDirectRead := query["directread"]

// If we are not the plugin, we should not use ?recursive (we should pass a -r flag)
// // If we are not the plugin, we should not use ?recursive (we should pass a -r flag)
if !isPlugin && hasRecursive {
return fmt.Errorf("Cannot use the recursive query parameter when not utilizing the pelican plugin")
return errors.New("cannot use the recursive query parameter when not utilizing the pelican plugin")
}

// If we have both recursive and pack, we should return a failure
if hasRecursive && hasPack {
return fmt.Errorf("Cannot have both recursive and pack query parameters")
return errors.New("cannot have both recursive and pack query parameters")
}

// If we have both recursive and pack, we should return a failure
if hasDirectRead && directRead[0] != "" {
return errors.New("directread query parameter should not have any values assigned to it")
}

// If we have no query, or we have recursive or pack, we are good
if len(query) == 0 || hasRecursive || hasPack || hasDirectRead {
return nil
}

return fmt.Errorf("Invalid query parameters provided in url: %s", transferUrl)
return errors.New("invalid query parameter(s) " + transferUrl.RawQuery + " provided in url " + transferUrl.String())
}
17 changes: 14 additions & 3 deletions utils/utils_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ func TestValidQuery(t *testing.T) {

err = CheckValidQuery(transferUrl, false)
assert.Error(t, err)
assert.Contains(t, err.Error(), "Invalid query parameters provided in url: pelican://something/here?recrustive=true")
assert.Contains(t, err.Error(), "invalid query parameter(s) recrustive=true provided in url pelican://something/here?recrustive=true")
})

// Test that both pack and recursive queries are not allowed together (only in plugin case)
Expand All @@ -76,7 +76,7 @@ func TestValidQuery(t *testing.T) {

err = CheckValidQuery(transferUrl, true)
assert.Error(t, err)
assert.Contains(t, err.Error(), "Cannot have both recursive and pack query parameters")
assert.Contains(t, err.Error(), "cannot have both recursive and pack query parameters")
})

// Test that a recursive query fails when not the plugin
Expand All @@ -87,7 +87,7 @@ func TestValidQuery(t *testing.T) {

err = CheckValidQuery(transferUrl, false)
assert.Error(t, err)
assert.Contains(t, err.Error(), "Cannot use the recursive query parameter when not utilizing the pelican plugin")
assert.Contains(t, err.Error(), "cannot use the recursive query parameter when not utilizing the pelican plugin")
})

// Test we pass with both pack and directread
Expand All @@ -109,4 +109,15 @@ func TestValidQuery(t *testing.T) {
err = CheckValidQuery(transferUrl, true)
assert.NoError(t, err)
})

// Test if we have a value assigned to directread, we fail
t.Run("testValueOnDirectReadFailure", func(t *testing.T) {
transferStr := "pelican://something/here?directread=false"
transferUrl, err := url.Parse(transferStr)
assert.NoError(t, err)

err = CheckValidQuery(transferUrl, false)
assert.Error(t, err)
assert.Equal(t, err.Error(), "directread query parameter should not have any values assigned to it")
})
}

0 comments on commit 3362b67

Please sign in to comment.