-
Notifications
You must be signed in to change notification settings - Fork 367
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
* added ingest command to lakectl to do client-side mapping of objects
- Loading branch information
Showing
17 changed files
with
620 additions
and
40 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Oops, something went wrong.
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Oops, something went wrong.
31 changes: 30 additions & 1 deletion
31
clients/java/src/main/java/io/treeverse/lakefs/clients/api/model/ObjectStageCreation.java
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,85 @@ | ||
package cmd | ||
|
||
import ( | ||
"strings" | ||
|
||
"github.com/spf13/cobra" | ||
"github.com/treeverse/lakefs/cmd/lakectl/cmd/store" | ||
"github.com/treeverse/lakefs/pkg/api" | ||
) | ||
|
||
const ingestSummaryTemplate = ` | ||
Staged {{ .Objects | yellow }} external objects (total of {{ .Bytes | human_bytes | yellow }}) | ||
` | ||
|
||
var ingestCmd = &cobra.Command{ | ||
Use: "ingest --from <object store URI> --to <lakeFS path URI> [--dry-run]", | ||
Short: "Ingest objects from an external source into a lakeFS branch (without actually copying them)", | ||
Run: func(cmd *cobra.Command, args []string) { | ||
ctx := cmd.Context() | ||
verbose := MustBool(cmd.Flags().GetBool("verbose")) | ||
dryRun := MustBool(cmd.Flags().GetBool("dry-run")) | ||
from := MustString(cmd.Flags().GetString("from")) | ||
to := MustString(cmd.Flags().GetString("to")) | ||
lakefsURI := MustParsePathURI("to", to) | ||
|
||
summary := struct { | ||
Objects int64 | ||
Bytes int64 | ||
}{} | ||
client := getClient() | ||
err := store.Walk(ctx, from, func(e store.ObjectStoreEntry) error { | ||
if dryRun { | ||
Fmt("%s\n", e) | ||
return nil | ||
} | ||
key := e.RelativeKey | ||
if lakefsURI.Path != nil && *lakefsURI.Path != "" { | ||
path := *lakefsURI.Path | ||
if strings.HasSuffix(*lakefsURI.Path, "/") { | ||
key = path + key | ||
} else { | ||
key = path + "/" + key | ||
} | ||
} | ||
mtime := e.Mtime.Unix() | ||
resp, err := client.StageObjectWithResponse(ctx, | ||
lakefsURI.Repository, | ||
lakefsURI.Ref, | ||
&api.StageObjectParams{ | ||
Path: key, | ||
}, | ||
api.StageObjectJSONRequestBody{ | ||
Checksum: e.ETag, | ||
Mtime: &mtime, | ||
PhysicalAddress: e.Address, | ||
SizeBytes: e.Size, | ||
}, | ||
) | ||
DieOnResponseError(resp, err) | ||
if verbose { | ||
Write("Staged "+fsStatTemplate+"\n", resp.JSON201) | ||
} | ||
summary.Objects += 1 | ||
summary.Bytes += api.Int64Value(resp.JSON201.SizeBytes) | ||
return nil | ||
}) | ||
if err != nil { | ||
DieFmt("error walking object store: %v", err) | ||
} | ||
|
||
// print summary | ||
Write(ingestSummaryTemplate, summary) | ||
}, | ||
} | ||
|
||
//nolint:gochecknoinits | ||
func init() { | ||
ingestCmd.Flags().String("from", "", "prefix to read from (e.g. \"s3://bucket/sub/path/\")") | ||
_ = ingestCmd.MarkFlagRequired("from") | ||
ingestCmd.Flags().String("to", "", "lakeFS path to load objects into (e.g. \"lakefs://repo/branch/sub/path/\")") | ||
_ = ingestCmd.MarkFlagRequired("to") | ||
ingestCmd.Flags().Bool("dry-run", false, "only print the paths to be ingested") | ||
ingestCmd.Flags().BoolP("verbose", "v", false, "print stats for each individual object staged") | ||
rootCmd.AddCommand(ingestCmd) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,88 @@ | ||
package store | ||
|
||
import ( | ||
"context" | ||
"errors" | ||
"fmt" | ||
"net/url" | ||
"os" | ||
"strings" | ||
|
||
"github.com/Azure/azure-pipeline-go/pipeline" | ||
"github.com/Azure/azure-storage-blob-go/azblob" | ||
) | ||
|
||
var ( | ||
ErrAzureInvalidURL = errors.New("invalid Azure storage URL") | ||
ErrAzureCredentials = errors.New("azure credentials error") | ||
) | ||
|
||
func GetAzureClient() (pipeline.Pipeline, error) { | ||
// From the Azure portal, get your storage account name and key and set environment variables. | ||
accountName, accountKey := os.Getenv("AZURE_STORAGE_ACCOUNT"), os.Getenv("AZURE_STORAGE_ACCESS_KEY") | ||
if len(accountName) == 0 || len(accountKey) == 0 { | ||
return nil, fmt.Errorf("%w: either the AZURE_STORAGE_ACCOUNT or AZURE_STORAGE_ACCESS_KEY environment variable is not set", ErrAzureCredentials) | ||
} | ||
|
||
// Create a default request pipeline using your storage account name and account key. | ||
credential, err := azblob.NewSharedKeyCredential(accountName, accountKey) | ||
if err != nil { | ||
return nil, fmt.Errorf("invalid credentials with error: %w", err) | ||
} | ||
return azblob.NewPipeline(credential, azblob.PipelineOptions{}), nil | ||
} | ||
|
||
type AzureBlobWalker struct { | ||
client pipeline.Pipeline | ||
} | ||
|
||
// extractAzurePrefix takes a URL that looks like this: https://storageaccount.blob.core.windows.net/container/prefix | ||
// and return the URL for the container and a prefix, if one exists | ||
func extractAzurePrefix(storageURI *url.URL) (*url.URL, string, error) { | ||
path := strings.TrimLeft(storageURI.Path, "/") | ||
if len(path) == 0 { | ||
return nil, "", fmt.Errorf("%w: could not parse container URL: %s", ErrAzureInvalidURL, storageURI) | ||
} | ||
parts := strings.SplitN(path, "/", 2) | ||
if len(parts) == 1 { | ||
// we only have a container | ||
return storageURI, "", nil | ||
} | ||
// we have both prefix and storage container, rebuild URL | ||
relativePath := url.URL{Path: "/" + parts[0]} | ||
return storageURI.ResolveReference(&relativePath), parts[1], nil | ||
} | ||
|
||
func getAzureBlobURL(containerURL *url.URL, blobName string) *url.URL { | ||
relativePath := url.URL{Path: containerURL.Path + "/" + blobName} | ||
return containerURL.ResolveReference(&relativePath) | ||
} | ||
|
||
func (a *AzureBlobWalker) Walk(ctx context.Context, storageURI *url.URL, walkFn func(e ObjectStoreEntry) error) error { | ||
// we use bucket as container and prefix as path | ||
containerURL, prefix, err := extractAzurePrefix(storageURI) | ||
if err != nil { | ||
return err | ||
} | ||
container := azblob.NewContainerURL(*containerURL, a.client) | ||
for marker := (azblob.Marker{}); marker.NotDone(); { | ||
listBlob, err := container.ListBlobsFlatSegment(ctx, marker, azblob.ListBlobsSegmentOptions{Prefix: prefix}) | ||
if err != nil { | ||
return err | ||
} | ||
marker = listBlob.NextMarker | ||
for _, blobInfo := range listBlob.Segment.BlobItems { | ||
if err := walkFn(ObjectStoreEntry{ | ||
FullKey: blobInfo.Name, | ||
RelativeKey: strings.TrimPrefix(blobInfo.Name, prefix), | ||
Address: getAzureBlobURL(containerURL, blobInfo.Name).String(), | ||
ETag: string(blobInfo.Properties.Etag), | ||
Mtime: blobInfo.Properties.LastModified, | ||
Size: *blobInfo.Properties.ContentLength, | ||
}); err != nil { | ||
return err | ||
} | ||
} | ||
} | ||
return nil | ||
} |
Oops, something went wrong.