From 3cda4d83ee4e5664995016e1be6773b70c878fe6 Mon Sep 17 00:00:00 2001 From: johnnyaug Date: Wed, 9 Sep 2020 18:40:33 +0300 Subject: [PATCH 01/39] docs: bucket creation - improvement (#592) --- docs/deploying/bucket.md | 56 +++++++++++++++++----------------------- 1 file changed, 23 insertions(+), 33 deletions(-) diff --git a/docs/deploying/bucket.md b/docs/deploying/bucket.md index 1fa97253ae5..af73460f02a 100644 --- a/docs/deploying/bucket.md +++ b/docs/deploying/bucket.md @@ -26,44 +26,34 @@ The path under the existing bucket should be empty. { "Sid": "Stmt1590051522178", "Action": [ - "s3:GetObject", - "s3:GetObjectVersion", - "s3:PutObject", - "s3:DeleteObject", - "s3:DeleteObjectVersion", - "s3:AbortMultipartUpload", - "s3:ListMultipartUploadParts" + "s3:GetObject", + "s3:GetObjectVersion", + "s3:PutObject", + "s3:AbortMultipartUpload", + "s3:ListMultipartUploadParts", + "s3:GetBucketVersioning", + "s3:ListBucket", + "s3:GetBucketLocation", + "s3:ListBucketMultipartUploads", + "s3:ListBucketVersions" ], "Effect": "Allow", - "Resource": [ - "arn:aws:s3:::/*" - ], + "Resource": ["arn:aws:s3:::", "arn:aws:s3:::/*"], "Principal": { - "AWS": [ - "arn:aws:iam:::role/" - ] - } - }, { - "Sid": "Stmt1590051522178", - "Action": [ - "s3:GetBucketVersioning", - "s3:ListBucket", - "s3:GetBucketLocation", - "s3:ListBucketMultipartUploads", - "s3:ListBucketVersions" - ], - "Effect": "Allow", - "Resource": [ - "arn:aws:s3:::" - ], - "Principal": { - "AWS": [ - "arn:aws:iam:::role/" - ] + "AWS": ["arn:aws:iam:::role/"] } } ] - } + } ``` - Replace ``, `` and `` with values relevant to your environment. + Replace ``, `` and `` with values relevant to your environment. + `IAM_ROLE` should be the role assumed by your lakeFS installation. + + Alternatively, if you use an AWS user's key-pair to authenticate lakeFS to AWS, change the policy's Principal to be the user: + + ```json + "Principal": { + "AWS": ["arn:aws:iam:::user/"] + } + ``` From de3fa26c772bc8b572c93c3287f7d077db1df7e0 Mon Sep 17 00:00:00 2001 From: YaelRiv <67264175+YaelRiv@users.noreply.github.com> Date: Thu, 10 Sep 2020 10:32:48 +0100 Subject: [PATCH 02/39] Update faq.md (#582) * Update faq.md Added a question on how do we compare to Delta / Hudi / Iceberg * Update docs/faq.md Co-authored-by: johnnyaug Co-authored-by: johnnyaug --- docs/faq.md | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/docs/faq.md b/docs/faq.md index 393d3a28dca..3646413cd03 100644 --- a/docs/faq.md +++ b/docs/faq.md @@ -34,7 +34,10 @@ The data we gather is limited to the following: 1. metadata regarding the database used (version, installed extensions and parameters such as DB Timezone and work memory) 1. periodic aggregated action counters (i.e. how many "get_object" operations occurred). -### 7. What inspired your logo? +### 7. What is the difference between lakeFS / Delta Lake / Hudi / Iceberg? +We support different use-cases. Delta Lake, Hudi and Iceberg all define dedicated, structured data formats that allow deletes and upserts. lakeFS is format-agnostic and enables consistent cross-collection versioning of your data using git-like operations. Read our [blog](https://lakefs.io/2020/08/10/data-versioning/) for a more detailed comparison. + +### 8. What inspired your logo? The [Axolotl](https://en.wikipedia.org/wiki/Axolotl){: target="_blank" } – a species of salamander, also known as the Mexican Lake Monster or the Peter Pan of the animal kingdom. It's a magical creature, living in a lake, just like us :-). ![Axolotl](https://upload.wikimedia.org/wikipedia/commons/f/f6/AxolotlBE.jpg) From d1a2423f5ae8c3b35ca15b786946d3ff378264e8 Mon Sep 17 00:00:00 2001 From: johnnyaug Date: Thu, 10 Sep 2020 12:52:35 +0300 Subject: [PATCH 03/39] 561 docs snippets copy button (#590) * WIP * copy button - docs * revert swagger.yml * newline eof --- docs/_includes/head_custom.html | 6 ++++++ docs/_layouts/default.html | 1 - docs/_sass/custom/custom.scss | 24 ++++++++++++++++++++++++ docs/assets/js/copy-code.js | 19 +++++++++++++++++++ 4 files changed, 49 insertions(+), 1 deletion(-) create mode 100644 docs/assets/js/copy-code.js diff --git a/docs/_includes/head_custom.html b/docs/_includes/head_custom.html index 46326cd659b..65efbc32bca 100644 --- a/docs/_includes/head_custom.html +++ b/docs/_includes/head_custom.html @@ -6,3 +6,9 @@ })(window,document,'script','dataLayer','GTM-KTZBZW9'); + + + + diff --git a/docs/_layouts/default.html b/docs/_layouts/default.html index 49888973b42..615c64ca8a2 100644 --- a/docs/_layouts/default.html +++ b/docs/_layouts/default.html @@ -6,7 +6,6 @@ {% include head.html %} - {% include gtag_frame.html %} diff --git a/docs/_sass/custom/custom.scss b/docs/_sass/custom/custom.scss index 7a3207f27b1..9a26f95d6c0 100644 --- a/docs/_sass/custom/custom.scss +++ b/docs/_sass/custom/custom.scss @@ -717,3 +717,27 @@ footer { } } } +.copy-code-container { + position: absolute; + right: 0.3em; +} +.copy-code-button:hover { + color: $teal-green; +} + +.copy-code-button:active { + color: $white; +} +.copy-code-button { + align-items: center; + justify-content: center; + border: none; + cursor: pointer; + font-size: 1rem; + color: $black; + padding: 0.4em 0.5em; +} + +.highlighter-rouge { + position: relative; +} diff --git a/docs/assets/js/copy-code.js b/docs/assets/js/copy-code.js new file mode 100644 index 00000000000..f07eb6a25d0 --- /dev/null +++ b/docs/assets/js/copy-code.js @@ -0,0 +1,19 @@ +$(() => { + let copyCodeContainer = $("
" + + "
"); + $("div.highlighter-rouge").prepend(copyCodeContainer); + $("div.highlighter-rouge .copy-code-button").click(function() { + const tempTextArea = document.createElement('textarea'); + console.log($(this)) + tempTextArea.textContent = $(this).parent().parent().find("code").text() + document.body.appendChild(tempTextArea); + const selection = document.getSelection(); + selection.removeAllRanges(); + tempTextArea.select(); + document.execCommand('copy'); + selection.removeAllRanges(); + document.body.removeChild(tempTextArea); + }); +}); From 8a42dd0915cda169b95fc6831f1162af73076ae5 Mon Sep 17 00:00:00 2001 From: johnnyaug Date: Thu, 10 Sep 2020 14:16:53 +0300 Subject: [PATCH 04/39] Improvment/docs copy button tweaks (#597) * docs fixes: - chrome css fix - remove all leading "$" from bash commands - comment out output of bash commands * comment out python outputs * fix typo --- docs/_sass/custom/custom.scss | 2 + docs/branching/recommendations.md | 178 +++++++++++++------------ docs/contributing.md | 10 +- docs/deploying/install.md | 28 ++-- docs/deploying/offboarding.md | 28 ++-- docs/quickstart/aws_cli.md | 21 +-- docs/quickstart/installing.md | 4 +- docs/quickstart/lakefs_cli.md | 47 ++++--- docs/quickstart/other_installations.md | 2 +- docs/reference/commands.md | 11 +- docs/using/aws_cli.md | 32 ++--- docs/using/boto.md | 33 ++--- docs/using/glue_hive_metastore.md | 22 +-- docs/using/presto.md | 2 +- docs/using/python.md | 101 +++++++------- docs/using/rclone.md | 43 +++--- 16 files changed, 300 insertions(+), 264 deletions(-) diff --git a/docs/_sass/custom/custom.scss b/docs/_sass/custom/custom.scss index 9a26f95d6c0..2706e7dbd33 100644 --- a/docs/_sass/custom/custom.scss +++ b/docs/_sass/custom/custom.scss @@ -736,6 +736,8 @@ footer { font-size: 1rem; color: $black; padding: 0.4em 0.5em; + background-color: transparent; + outline:none; } .highlighter-rouge { diff --git a/docs/branching/recommendations.md b/docs/branching/recommendations.md index f4163d05abf..33f53b5f4f3 100644 --- a/docs/branching/recommendations.md +++ b/docs/branching/recommendations.md @@ -34,10 +34,11 @@ Batch jobs in production require the following guarantees: 1. We'll start by creating a branch for this pipeline: ```shell - $ lakectl branch create \ - lakefs://example-repo@job-raw-data-grouping \ - --source lakefs://example-repo@main - created branch 'job-raw-data-grouping', pointing to commit ID: '~79RU9aUsQ9GLnU' + lakectl branch create \ + lakefs://example-repo@job-raw-data-grouping \ + --source lakefs://example-repo@main + # output: + # created branch 'job-raw-data-grouping', pointing to commit ID: '~79RU9aUsQ9GLnU' ``` 1. Now, let's change our code to use this branch. Assuming this code reads and writes from S3, this is simple: @@ -48,29 +49,31 @@ Batch jobs in production require the following guarantees: 1. In case of a failure, let's remove whatever intermediate state Spark might have left behind. We do this by simply reverting all uncommitted data: ```shell - $ lakectl branch revert lakefs://example-repo@job-raw-data-grouping - are you sure you want to revert all uncommitted changes?: y█ + lakectl branch revert lakefs://example-repo@job-raw-data-grouping + # are you sure you want to revert all uncommitted changes?: y█ ``` 1. Otherwise, if our job ended successfully, let's make our new data available to readers by committing and merging to main: For our commit, let's also add the Git commit hash for the job's source code and other metadata for reference: ```shell - $ lakectl commit lakefs://example-repo@job-raw-data-grouping \ - -m 'raw data grouping for 01/01/2020' \ - --meta job_commit_hash=501e31a67 \ - --meta airflow_run_url=http://... \ - --meta spark_version=2.4.6 - Commit for branch "job-raw-data-grouping" done. - - ID: ~79RU9aUsQ9GLnU - Timestamp: 2020-01-01 12:00:00 +0000 UTC - Parents: ~43aP3nUrR17LcX + lakectl commit lakefs://example-repo@job-raw-data-grouping \ + -m 'raw data grouping for 01/01/2020' \ + --meta job_commit_hash=501e31a67 \ + --meta airflow_run_url=http://... \ + --meta spark_version=2.4.6 + # output: + # Commit for branch "job-raw-data-grouping" done. + # + # ID: ~79RU9aUsQ9GLnU + # Timestamp: 2020-01-01 12:00:00 +0000 UTC + # Parents: ~43aP3nUrR17LcX ``` 1. Once committed, we can now atomically merge this commit to main: ```shell - $ lakectl merge lakefs://example-repo@job-raw-data-grouping lakefs://example-repo@main - new: 65 modified: 0 removed: 0 + lakectl merge lakefs://example-repo@job-raw-data-grouping lakefs://example-repo@main + # output: + # new: 65 modified: 0 removed: 0 ``` 1. That's it. All output created by our job is now merged into our main branch and available to readers @@ -97,24 +100,27 @@ In production data pipelines, we require the following guarantees: 1. Let's take the previous example and expand it a little. Instead of `job` branches that are derived from `main`, let's add an intermediate `pipeline` branch. ```shell - $ lakectl branch create \ - lakefs://example-repo@pipeline-raw-data-grouping \ - --source lakefs://example-repo@main - created branch 'pipeline-raw-data-grouping', pointing to commit ID: '~43aP3nUrR17LcX' + lakectl branch create \ + lakefs://example-repo@pipeline-raw-data-grouping \ + --source lakefs://example-repo@main + # output: + # created branch 'pipeline-raw-data-grouping', pointing to commit ID: '~43aP3nUrR17LcX' ``` 1. Now, for each job that takes part in the pipeline, we'll create a `job` branch that is **derived from the `pipline` branch**: ```shell - $ lakectl branch create \ - lakefs://example-repo@job-raw-data-grouping-by-user \ - --source lakefs://example-repo@pipeline-raw-data-grouping - created branch 'pipeline-raw-data-grouping', pointing to commit ID: '~43aP3nUrR17LcX' + lakectl branch create \ + lakefs://example-repo@job-raw-data-grouping-by-user \ + --source lakefs://example-repo@pipeline-raw-data-grouping + # output: + # created branch 'pipeline-raw-data-grouping', pointing to commit ID: '~43aP3nUrR17LcX' ``` 1. Once we have a job branch, we can run our jobs, validate and commit our output as we did in the previous section. 1. Only when all jobs have completed - and all their output has been merged to the `pipeline` branch, we can merge it into `main`: ```shell - $ lakectl merge lakefs://example-repo@pipeline-raw-data-grouping lakefs://example-repo@main - new: 542 modified: 0 removed: 0 + lakectl merge lakefs://example-repo@pipeline-raw-data-grouping lakefs://example-repo@main + # output: + # new: 542 modified: 0 removed: 0 ``` ## Use case #3 - Safe data stream ingestion @@ -142,10 +148,11 @@ When streaming data into a data lake, we require the following guarantees: 1. Let's create a branch for our consumer: ```shell - $ lakectl branch create \ - lakefs://example-repo@consumer-raw-data \ - --source lakefs://example-repo@main - created branch 'consumer-raw-data', pointing to commit ID: '~79RU9aUsQ9GLnU' + lakectl branch create \ + lakefs://example-repo@consumer-raw-data \ + --source lakefs://example-repo@main + # output: + # created branch 'consumer-raw-data', pointing to commit ID: '~79RU9aUsQ9GLnU' ``` 1. Let's change our consumer to write to the new branch: @@ -156,15 +163,16 @@ When streaming data into a data lake, we require the following guarantees: 1. Now that parquet files are written to our new branch, we want to commit periodically. This will allow us to rewind safely: ```shell - $ lakectl commit lakefs://example-repo@consumer-raw-data \ - -m 'raw data consumer checkpoint' \ - --meta kafka_committed_offset= \ - --meta confluent_platform_version=5.5 - Commit for branch "consumer-raw-data" done. - - ID: ~79RU9aUsQ9GLnU - Timestamp: 2020-01-01 12:00:00 +0000 UTC - Parents: ~43aP3nUrR17LcX + lakectl commit lakefs://example-repo@consumer-raw-data \ + -m 'raw data consumer checkpoint' \ + --meta kafka_committed_offset= \ + --meta confluent_platform_version=5.5 + # output: + # Commit for branch "consumer-raw-data" done. + # + # ID: ~79RU9aUsQ9GLnU + # Timestamp: 2020-01-01 12:00:00 +0000 UTC + # Parents: ~43aP3nUrR17LcX ``` Take note that `` represents the latest committed offset, which also represents the latest offset that exists in our branch. @@ -175,25 +183,26 @@ When streaming data into a data lake, we require the following guarantees: 1. Look at the commit history and pick the latest known commit that was valid ```shell - $ lakectl log lakefs://example-repo@ - commit ~43aP3nUrR17LcX - Author: rawDataConsumer - Date: 2020-07-20 12:00:00 +0000 UTC - - raw data consumer checkpoint - - kafka_committed_offset = ... - confluent_platform_version = 5.5 - - commit ~79RU9aUsQ9GLnU - Author: rawDataConsumer - Date: 2020-07-20 11:00:00 +0000 UTC + lakectl log lakefs://example-repo@ + # output: + # commit ~43aP3nUrR17LcX + # Author: rawDataConsumer + # Date: 2020-07-20 12:00:00 +0000 UTC + # + # raw data consumer checkpoint + # + # kafka_committed_offset = ... + # confluent_platform_version = 5.5 + # + # commit ~79RU9aUsQ9GLnU + # Author: rawDataConsumer + # Date: 2020-07-20 11:00:00 +0000 UTC ... ``` 1. Reset our branch to that commit: ```shell - $ lakectl branch revert lakefs://example-repo@consumer-raw-data --commit ~79RU9aUsQ9GLnU + lakectl branch revert lakefs://example-repo@consumer-raw-data --commit ~79RU9aUsQ9GLnU ``` 1. Take the `kafka_committed_offset` metadata from the commit, and reset our Kafka Consumer Group offset to that value 1. In case we're happy with the changes, we can decide how we want to expose new data to readers: @@ -243,25 +252,27 @@ Data science requires experimentation - We want to adjust a model or refine hype 1. Create `experiment` branches derived from the main branch: ```shell - $ lakectl branch create \ - lakefs://example-repo@exp-cnn-tests \ - --source lakefs://example-repo@main - created branch 'exp-cnn-tests', pointing to commit ID: '~43aP3nUrR17LcX' + lakectl branch create \ + lakefs://example-repo@exp-cnn-tests \ + --source lakefs://example-repo@main + # output: + # created branch 'exp-cnn-tests', pointing to commit ID: '~43aP3nUrR17LcX' ``` 1. Run the desired algorithm, committing the results along with the parameters used: ```shell - $ lakectl commit lakefs://example-repo@exp-cnn-tests \ - -m 'trying tensorflow cnn' \ - --meta tf_cnn_param_a=1 \ - --meta tf_cnn_param_b=2 \ - --meta tf_version=2.3.0 \ - --meta algo_git_hash=4d55f2e372 - Commit for branch "exp-cnn-tests" done. - - ID: ~79RU9aUsQ9GLnU - Timestamp: 2020-01-01 12:00:00 +0000 UTC - Parents: ~43aP3nUrR17LcX + lakectl commit lakefs://example-repo@exp-cnn-tests \ + -m 'trying tensorflow cnn' \ + --meta tf_cnn_param_a=1 \ + --meta tf_cnn_param_b=2 \ + --meta tf_version=2.3.0 \ + --meta algo_git_hash=4d55f2e372 + # output: + # Commit for branch "exp-cnn-tests" done. + # + # ID: ~79RU9aUsQ9GLnU + # Timestamp: 2020-01-01 12:00:00 +0000 UTC + # Parents: ~43aP3nUrR17LcX ``` 1. By being able to address different commits directly, we can compare results and experiment with the generated models easily. To read from a specific commit we can pass its ID instead of the branch name when calling S3: @@ -278,8 +289,9 @@ While snapshot isolation is a desired attribute, and ensures data doesn't change In lakeFS this is done by merging in the opposite direction - from the main branch into our experiment branch: ```shell -$ lakectl merge lakefs://example-repo@main lakefs://example-repo@exp-cnn-tests -new: 2592 modified: 12 removed: 1439 +lakectl merge lakefs://example-repo@main lakefs://example-repo@exp-cnn-tests +# output: +# new: 2592 modified: 12 removed: 1439 ``` ## Use case #5 - Ad-hoc exploration and experimentation @@ -299,23 +311,25 @@ For this, the following guarantees are required: 1. Start by creating a branch for the given user ```shell - $ lakectl branch create \ - lakefs://example-repo@user-janedoe \ - --source lakefs://example-repo@main - created branch 'user-janedoe', pointing to commit ID: '~79RU9aUsQ9GLnU' + lakectl branch create \ + lakefs://example-repo@user-janedoe \ + --source lakefs://example-repo@main + # output: + # created branch 'user-janedoe', pointing to commit ID: '~79RU9aUsQ9GLnU' ``` 1. Run whatever we want in our isolated branch by reading and writing from `s3://example-repo/user-janedoe/collections/...` 1. When we're done, we can throw away this branch ```shell - $ lakectl branch revert lakefs://example-repo@user-janedoe - Are you sure you want to revert all uncommitted changes?: y - $ lakectl branch delete lakefs://example-repo@user-janedoe - Are you sure you want to delete branch?: y + lakectl branch revert lakefs://example-repo@user-janedoe + # Are you sure you want to revert all uncommitted changes?: y + lakectl branch delete lakefs://example-repo@user-janedoe + # Are you sure you want to delete branch?: y ``` 1. Alternatively, if we do want to keep our branch around, but want to see up to date data, we can merge main into our user branch ```shell - $ lakectl merge lakefs://example-repo@main lakefs://example-repo@user-janedoe - new: 1927 modified: 3 removed: 782 + lakectl merge lakefs://example-repo@main lakefs://example-repo@user-janedoe + # output: + # new: 1927 modified: 3 removed: 782 ``` diff --git a/docs/contributing.md b/docs/contributing.md index fb1cb9ed362..9440cd949e7 100644 --- a/docs/contributing.md +++ b/docs/contributing.md @@ -41,7 +41,7 @@ Working on your first Pull Request? You can learn how from this free series, [Ho 1. Install statik: ```shell - $ go get github.com/rakyll/statik + go get github.com/rakyll/statik ``` Make sure `(go env GOPATH)/bin` is in your `$PATH` (or at least, that the `statik` binary is). @@ -50,13 +50,13 @@ Working on your first Pull Request? You can learn how from this free series, [Ho 1. Build the project: ```shell - $ make build + make build ``` 1. Make sure tests are passing: ```shell - $ make test + make test ``` ## Before creating a pull request @@ -72,13 +72,13 @@ Working on your first Pull Request? You can learn how from this free series, [Ho Once you've made the necessary changes to the code, make sure tests pass: ```shell - $ make test + make test ``` Check linting rules are passing: ```shell - $ make checks-validator + make checks-validator ``` lakeFS uses [go fmt](https://golang.org/cmd/gofmt/) as a style guide for Go code. diff --git a/docs/deploying/install.md b/docs/deploying/install.md index e3c325b26ac..744800a5423 100644 --- a/docs/deploying/install.md +++ b/docs/deploying/install.md @@ -87,11 +87,11 @@ gateways: Depending on your runtime environment, running lakeFS using docker would look like this: ```sh -$ docker run \ - --name lakefs \ - -p 8000:8000 \ - -v :/home/lakefs/.lakefs.yaml \ - treeverse/lakefs:latest run +docker run \ + --name lakefs \ + -p 8000:8000 \ + -v :/home/lakefs/.lakefs.yaml \ + treeverse/lakefs:latest run ``` ## Fargate and other container-based environments @@ -101,14 +101,14 @@ Some environments make it harder to use a configuration file, and are best confi Here is an example of running lakeFS using environment variables. See the [reference](../reference/configuration.md#using-environment-variables) for the full list of configurations. ```sh -$ docker run \ - --name lakefs \ - -p 8000:8000 \ - -e LAKEFS_DATABASE_CONNECTION_STRING="postgres://user:pass@..." \ - -e LAKEFS_AUTH_ENCRYPT_SECRET_KEY="" \ - -e LAKEFS_BLOCKSTORE_TYPE="s3" \ - -e LAKEFS_GATEWAYS_S3_DOMAIN_NAME="s3.lakefs.example.com" \ - treeverse/lakefs:latest run +docker run \ + --name lakefs \ + -p 8000:8000 \ + -e LAKEFS_DATABASE_CONNECTION_STRING="postgres://user:pass@..." \ + -e LAKEFS_AUTH_ENCRYPT_SECRET_KEY="" \ + -e LAKEFS_BLOCKSTORE_TYPE="s3" \ + -e LAKEFS_GATEWAYS_S3_DOMAIN_NAME="s3.lakefs.example.com" \ + treeverse/lakefs:latest run ``` ## AWS EC2 @@ -119,5 +119,5 @@ Alternatively, you can run lakeFS directly on an EC2 instance: 2. `lakefs` is a single binary, you can run it directly, but preferably run it as a service using systemd or your operating system's facilities. ```bash - $ lakefs --config run + lakefs --config run ``` \ No newline at end of file diff --git a/docs/deploying/offboarding.md b/docs/deploying/offboarding.md index be9a4cad05e..350ce750496 100644 --- a/docs/deploying/offboarding.md +++ b/docs/deploying/offboarding.md @@ -36,11 +36,11 @@ assuming the underlying S3 bucket is intact. Here's how to do it: To generate a manifest, connect to the PostgreSQL instance used by lakeFS and run the following command: ```shell - $ psql \ - --var "repository_name=repo1" \ - --var "branch_name=master" \ - --var "dst_bucket_name=bucket1" \ - postgres < create-extraction-manifest.sql > manifest.csv + psql \ + --var "repository_name=repo1" \ + --var "branch_name=master" \ + --var "dst_bucket_name=bucket1" \ + postgres < create-extraction-manifest.sql > manifest.csv ``` You can download the `create-extraction-manifest.sql` script from the [lakeFS GitHub repository](https://github.com/treeverse/lakeFS/blob/master/scripts/create-extraction-manifest.sql){: target="_blank" }. @@ -51,8 +51,8 @@ assuming the underlying S3 bucket is intact. Here's how to do it: 1. Copy the manifest to S3. Once copied, keep note of its etag - we'll need this to run the copy batch job: ```shell - $ cp /path/to/manifest.csv s3://my-bucket/path/to/manifest.csv - $ aws s3api head-object --bucket my-bucket --key path/to-manifest/csv | jq -r .ETag # Or look for ETag in the output + cp /path/to/manifest.csv s3://my-bucket/path/to/manifest.csv + aws s3api head-object --bucket my-bucket --key path/to-manifest/csv | jq -r .ETag # Or look for ETag in the output ``` 1. Once we have a manifest, let's define a S3 batch job that will copy all files for us. To do this, let's start by creating an IAM role called `lakeFSExportJobRole`, and grant it permissions as described in ["Granting permissions for Batch Operations"](https://docs.aws.amazon.com/AmazonS3/latest/dev/batch-ops-iam-role-policies.html#batch-ops-iam-role-policies-create){: target="_blank" } @@ -62,16 +62,16 @@ To do this, let's start by creating an IAM role called `lakeFSExportJobRole`, an 1. Take note of your account ID - this is required for running an S3 Batch Job: ```shell - $ aws sts get-caller-identity | jq -r .Account + aws sts get-caller-identity | jq -r .Account ``` 1. Dispatch a copy job using the [`run_copy.py`](https://github.com/treeverse/treeverse-distcp/blob/master/run_copy.py){: target="_blank" } script: ```shell - $ run_copy.py \ - --account-id "123456789" \ - --csv-path "s3://s3://my-bucket/path/to/manifest" \ - --csv-etag "..." \ - --report-path "s3://another-bucket/prefix/for/reports" \ - --lambda-handler-arn "arn:lambda:..." + run_copy.py \ + --account-id "123456789" \ + --csv-path "s3://s3://my-bucket/path/to/manifest" \ + --csv-etag "..." \ + --report-path "s3://another-bucket/prefix/for/reports" \ + --lambda-handler-arn "arn:lambda:..." ``` 1. You will get a job number. Now go to the [AWS S3 Batch Operations Console](https://s3.console.aws.amazon.com/s3/jobs){: target="_blank" }, switch to the region of your bucket, and confirm execution of that job. diff --git a/docs/quickstart/aws_cli.md b/docs/quickstart/aws_cli.md index f38f3448c97..5183a99bc4b 100644 --- a/docs/quickstart/aws_cli.md +++ b/docs/quickstart/aws_cli.md @@ -11,17 +11,19 @@ has_children: false 1. Configure a new connection profile using the credentials we generated earlier: ```bash - $ aws configure --profile local - AWS Access Key ID [None]: AKIAIOSFODNN7EXAMPLE - AWS Secret Access Key [None]: **************************************** - Default region name [None]: - Default output format [None]: + aws configure --profile local + # output: + # AWS Access Key ID [None]: AKIAIOSFODNN7EXAMPLE + # AWS Secret Access Key [None]: **************************************** + # Default region name [None]: + # Default output format [None]: ``` 1. Let's test to see that it works. We'll do that by calling `s3 ls` which should list our repositories for us: ```bash - $ aws --endpoint-url=http://s3.local.lakefs.io:8000 --profile local s3 ls - 2020-05-18 17:47:03 example + aws --endpoint-url=http://s3.local.lakefs.io:8000 --profile local s3 ls + # output: + # 2020-05-18 17:47:03 example ``` **Note:** We're using `s3.local.lakefs.io` - a special DNS record which always resolves to localhost, subdomains included. @@ -31,8 +33,9 @@ has_children: false 1. Great, now let's copy some files. We'll write to the master branch. This is done by prefixing our path with the name of the branch we'd like to read/write from: ```bash - $ aws --endpoint-url=http://s3.local.lakefs.io:8000 --profile local s3 cp ./foo.txt s3://example/master/ - upload: ./foo.txt to s3://example/master/foo.txt + aws --endpoint-url=http://s3.local.lakefs.io:8000 --profile local s3 cp ./foo.txt s3://example/master/ + # output: + # upload: ./foo.txt to s3://example/master/foo.txt ``` 1. Back in the lakeFS UI, we should be able to see our file added to the master branch! diff --git a/docs/quickstart/installing.md b/docs/quickstart/installing.md index 7c59de6ad73..e11831c4790 100644 --- a/docs/quickstart/installing.md +++ b/docs/quickstart/installing.md @@ -25,7 +25,7 @@ To run a local lakeFS instance, you can clone the repository and run [Docker Com 1. Clone the lakeFS repository: ```bash - $ git clone https://github.com/treeverse/lakeFS + git clone https://github.com/treeverse/lakeFS ``` 1. Navigate to the directory: `cd lakeFS`. @@ -33,7 +33,7 @@ To run a local lakeFS instance, you can clone the repository and run [Docker Com 1. Run the following command: ```bash - $ docker-compose up + docker-compose up ``` 1. Check your installation by opening [http://localhost:8000/setup](http://localhost:8000/setup){:target="_blank"} in your web browser. diff --git a/docs/quickstart/lakefs_cli.md b/docs/quickstart/lakefs_cli.md index e7b19fcb4f6..94072ac049d 100644 --- a/docs/quickstart/lakefs_cli.md +++ b/docs/quickstart/lakefs_cli.md @@ -23,35 +23,38 @@ has_children: false 1. configure the CLI to use the credentials you've created earlier: ```bash - $ lakectl config - Config file /home/janedoe/.lakectl.yaml will be used - Access key ID: AKIAIOSFODNN7EXAMPLE - Secret access key: **************************************** - Server endpoint URL: http://localhost:8000/api/v1 + lakectl config + # output: + # Config file /home/janedoe/.lakectl.yaml will be used + # Access key ID: AKIAIOSFODNN7EXAMPLE + # Secret access key: **************************************** + # Server endpoint URL: http://localhost:8000/api/v1 ``` 1. Now that we've configured it, let's run a few sample commands: ```bash - $ lakectl branch list lakefs://example - +----------+------------------------------------------------------------------+ - | REF NAME | COMMIT ID | - +----------+------------------------------------------------------------------+ - | master | a91f56a7e11be1348fc405053e5234e4af7d6da01ed02f3d9a8ba7b1f71499c8 | - +----------+------------------------------------------------------------------+ + lakectl branch list lakefs://example + # output: + # +----------+------------------------------------------------------------------+ + # | REF NAME | COMMIT ID | + # +----------+------------------------------------------------------------------+ + # | master | a91f56a7e11be1348fc405053e5234e4af7d6da01ed02f3d9a8ba7b1f71499c8 | + # +----------+------------------------------------------------------------------+ - $ lakectl commit lakefs://example@master -m 'added our first file!' - Commit for branch "master" done. - - ID: 901f7b21e1508e761642b142aea0ccf28451675199655381f65101ea230ebb87 - Timestamp: 2020-05-18 19:26:37 +0300 IDT - Parents: a91f56a7e11be1348fc405053e5234e4af7d6da01ed02f3d9a8ba7b1f71499c8 + lakectl commit lakefs://example@master -m 'added our first file!' + # output: + # Commit for branch "master" done. + # + # ID: 901f7b21e1508e761642b142aea0ccf28451675199655381f65101ea230ebb87 + # Timestamp: 2020-05-18 19:26:37 +0300 IDT + # Parents: a91f56a7e11be1348fc405053e5234e4af7d6da01ed02f3d9a8ba7b1f71499c8 - $ lakectl log lakefs://example@master - - commit 901f7b21e1508e761642b142aea0ccf28451675199655381f65101ea230ebb87 - Author: Example User - Date: 2020-05-18 19:26:37 +0300 IDT + lakectl log lakefs://example@master + # output: + # commit 901f7b21e1508e761642b142aea0ccf28451675199655381f65101ea230ebb87 + # Author: Example User + # Date: 2020-05-18 19:26:37 +0300 IDT added our first file! diff --git a/docs/quickstart/other_installations.md b/docs/quickstart/other_installations.md index fa56a6cd681..68fafe20803 100644 --- a/docs/quickstart/other_installations.md +++ b/docs/quickstart/other_installations.md @@ -64,5 +64,5 @@ Alternatively, you may opt to run the lakefs binary directly on your computer. 1. Run the server: ```bash - $ ./lakefs --config /path/to/config.yaml run + ./lakefs --config /path/to/config.yaml run ``` diff --git a/docs/reference/commands.md b/docs/reference/commands.md index 3dfa5663cd2..fe2a7ef01f9 100644 --- a/docs/reference/commands.md +++ b/docs/reference/commands.md @@ -27,11 +27,12 @@ The `lakectl` is distributed as a single binary, with no external dependencies - Once you've installed the lakectl command, run: ```bash -$ lakectl config -Config file /home/janedoe/.lakectl.yaml will be used -Access key ID: AKIAIOSFODNN7EXAMPLE -Secret access key: **************************************** -Server endpoint URL: http://localhost:8000/api/v1 +lakectl config +# output: +# Config file /home/janedoe/.lakectl.yaml will be used +# Access key ID: AKIAIOSFODNN7EXAMPLE +# Secret access key: **************************************** +# Server endpoint URL: http://localhost:8000/api/v1 ``` This will setup a `$HOME/.lakectl.yaml` file with the credentials and API endpoint you've supplied. diff --git a/docs/using/aws_cli.md b/docs/using/aws_cli.md index 90af8ef6ad4..e5fbc323764 100644 --- a/docs/using/aws_cli.md +++ b/docs/using/aws_cli.md @@ -28,18 +28,18 @@ We would like to configure an AWS profile for lakeFS. In order to configure the lakeFS credentials run: ```shell -$ aws configure --profile lakefs +aws configure --profile lakefs ``` we will be prompted to enter ```AWS Access Key ID``` , ```AWS Secret Access Key``` It should look like this: ```shell -$ aws configure --profile lakefs - -AWS Access Key ID [None]: AKIAIOSFODNN7EXAMPLE -AWS Secret Access Key [None]: wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY -Default region name [None]: -Default output format [None]: +aws configure --profile lakefs +# output: +# AWS Access Key ID [None]: AKIAIOSFODNN7EXAMPLE +# AWS Secret Access Key [None]: wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY +# Default region name [None]: +# Default output format [None]: ``` @@ -51,7 +51,7 @@ When accessing objects in s3 we will need to use the lakeFS path convention After configuring the credentials, This is how a command should look: ```shell -$ aws s3 --profile lakefs \ +aws s3 --profile lakefs \ --endpoint-url https://s3.lakefs.example.com \ ls s3://example-repo/master/example-directory ``` @@ -66,7 +66,7 @@ We could use an [alias](aws_cli.md#adding-an-alias) to make it shorter and more ### List directory ```shell -$ aws --profile lakefs \ +aws --profile lakefs \ --endpoint-url https://s3.lakefs.example.com \ s3 ls s3://example-repo/master/example-directory ``` @@ -74,33 +74,33 @@ $ aws --profile lakefs \ ### Copy from lakeFS to lakeFS ```shell -$ aws --profile lakefs \ +aws --profile lakefs \ --endpoint-url https://s3.lakefs.example.com \ s3 cp s3://example-repo/master/example-file-1 s3://example-repo/master/example-file-2 ``` ### Copy from lakeFS to a local path ```shell -$ aws --profile lakefs \ +aws --profile lakefs \ --endpoint-url https://s3.lakefs.example.com \ s3 cp s3://example-repo/master/example-file-1 /path/to/local/file ``` ### Copy from a local path to lakeFS ```shell -$ aws --profile lakefs \ +aws --profile lakefs \ --endpoint-url https://s3.lakefs.example.com \ s3 cp /path/to/local/file s3://example-repo/master/example-file-1 ``` ### Delete file ```shell -$ aws --profile lakefs \ +aws --profile lakefs \ --endpoint-url https://s3.lakefs.example.com \ s3 rm s3://example-repo/master/example-directory/example-file ``` ### Delete directory ```shell -$ aws --profile lakefs \ +aws --profile lakefs \ --endpoint-url https://s3.lakefs.example.com \ s3 rm s3://example-repo/master/example-directory/ --recursive ``` @@ -110,10 +110,10 @@ $ aws --profile lakefs \ In order to make the command shorter and more convenient we can create an alias: ```shell -$ alias awslfs='aws --endpoint https://s3.lakefs.example.com --profile lakefs' +alias awslfs='aws --endpoint https://s3.lakefs.example.com --profile lakefs' ``` Now, the ls command using the alias will be: ```shell -$ awslfs s3 ls s3://example-repo/master/example-directory +awslfs s3 ls s3://example-repo/master/example-directory ``` diff --git a/docs/using/boto.md b/docs/using/boto.md index 447b690aacb..3218dd25a33 100644 --- a/docs/using/boto.md +++ b/docs/using/boto.md @@ -58,20 +58,21 @@ for obj in list_resp['Contents']: ### Head Object Get object metadata using branch and path: ```python ->>> s3.head_object(Bucket='example-repo', Key='master/example-file.parquet') -{'ResponseMetadata': {'RequestId': '72A9EBD1210E90FA', - 'HostId': '', - 'HTTPStatusCode': 200, - 'HTTPHeaders': {'accept-ranges': 'bytes', - 'content-length': '1024', - 'etag': '"2398bc5880e535c61f7624ad6f138d62"', - 'last-modified': 'Sun, 24 May 2020 10:42:24 GMT', - 'x-amz-request-id': '72A9EBD1210E90FA', - 'date': 'Sun, 24 May 2020 10:45:42 GMT'}, - 'RetryAttempts': 0}, - 'AcceptRanges': 'bytes', - 'LastModified': datetime.datetime(2020, 5, 24, 10, 42, 24, tzinfo=tzutc()), - 'ContentLength': 1024, - 'ETag': '"2398bc5880e535c61f7624ad6f138d62"', - 'Metadata': {}} +s3.head_object(Bucket='example-repo', Key='master/example-file.parquet') +# output: +# {'ResponseMetadata': {'RequestId': '72A9EBD1210E90FA', +# 'HostId': '', +# 'HTTPStatusCode': 200, +# 'HTTPHeaders': {'accept-ranges': 'bytes', +# 'content-length': '1024', +# 'etag': '"2398bc5880e535c61f7624ad6f138d62"', +# 'last-modified': 'Sun, 24 May 2020 10:42:24 GMT', +# 'x-amz-request-id': '72A9EBD1210E90FA', +# 'date': 'Sun, 24 May 2020 10:45:42 GMT'}, +# 'RetryAttempts': 0}, +# 'AcceptRanges': 'bytes', +# 'LastModified': datetime.datetime(2020, 5, 24, 10, 42, 24, tzinfo=tzutc()), +# 'ContentLength': 1024, +# 'ETag': '"2398bc5880e535c61f7624ad6f138d62"', +# 'Metadata': {}} ``` diff --git a/docs/using/glue_hive_metastore.md b/docs/using/glue_hive_metastore.md index 99874751741..78bef785e44 100644 --- a/docs/using/glue_hive_metastore.md +++ b/docs/using/glue_hive_metastore.md @@ -107,17 +107,17 @@ we would like to create a copy of the table `example_by_dt` in schema `example_b Recommended: ``` bash -$ lakectl metastore copy --from-schema default --from-table exmpale_by_dt --to-branch example_branch +lakectl metastore copy --from-schema default --from-table exmpale_by_dt --to-branch example_branch ``` Glue: ``` bash -$ lakectl metastore copy --type glue --address 123456789012 --from-schema default --from-table exmpale_by_dt --to-schema default --to-table branch_example_by_dt --to-branch example_branch +lakectl metastore copy --type glue --address 123456789012 --from-schema default --from-table exmpale_by_dt --to-schema default --to-table branch_example_by_dt --to-branch example_branch ``` Hive: ``` bash -$ lakectl metastore copy --type hive --address thrift://hive-metastore:9083 --from-schema default --from-table example_by_dt --to-schema default --to-table branch_example_by_dt --to-branch exmample-branch +lakectl metastore copy --type hive --address thrift://hive-metastore:9083 --from-schema default --from-table example_by_dt --to-schema default --to-table branch_example_by_dt --to-branch exmample-branch ``` ### Copy partition @@ -134,23 +134,23 @@ We would like to merge back the partition: Recommended: ``` bash -$ lakectl metastore copy --from-schema example_branch --from-table example_by_dt --to-schema default --to-branch master -p 2020-08-01 +lakectl metastore copy --from-schema example_branch --from-table example_by_dt --to-schema default --to-branch master -p 2020-08-01 ``` Glue: ``` bash -$ lakectl metastore copy --type glue --address 123456789012 --from-schema example_branch --from-table example_by_dt --to-schema default --to-table example_by_dt --to-branch master -p 2020-08-01 +lakectl metastore copy --type glue --address 123456789012 --from-schema example_branch --from-table example_by_dt --to-schema default --to-table example_by_dt --to-branch master -p 2020-08-01 ``` Hive: ``` bash -$ lakectl metastore copy --type hive --address thrift://hive-metastore:9083 --from-schema example_branch --from-table example_by_dt --to-schema default --to-table example_by_dt --to-branch master -p 2020-08-01 +lakectl metastore copy --type hive --address thrift://hive-metastore:9083 --from-schema example_branch --from-table example_by_dt --to-schema default --to-table example_by_dt --to-branch master -p 2020-08-01 ``` In case our table is partitioned by more than one value, for example partitioned by year/month/day for year ```2020``` month ```08``` day ```01``` ``` bash -$ lakectl metastore copy --from-schema example_branch --from-table branch_example_by_dt --to-schema default --to-branch master -p 2020 -p 08 -p 01 +lakectl metastore copy --from-schema example_branch --from-table branch_example_by_dt --to-schema default --to-branch master -p 2020 -p 08 -p 01 ``` @@ -165,17 +165,17 @@ Suppose that we made some changes on the copied table `exmample_by_dt` on schema Recommended: ``` bash -$ lakectl metastore diff --from-schema default --from-table branch_example_by_dt --to-schema example_branch +lakectl metastore diff --from-schema default --from-table branch_example_by_dt --to-schema example_branch ``` Glue: ``` bash -$ lakectl metastore diff --type glue --address 123456789012 --from-schema default --from-table branch_example_by_dt --to-schema default --to-table example_by_dt +lakectl metastore diff --type glue --address 123456789012 --from-schema default --from-table branch_example_by_dt --to-schema default --to-table example_by_dt ``` Hive: ``` bash -$ lakectl metastore diff --type hive --address thrift://hive-metastore:9083 --from-schema default --from-table branch_example_by_dt --to-schema default --to-table example_by_dt +lakectl metastore diff --type hive --address thrift://hive-metastore:9083 --from-schema default --from-table branch_example_by_dt --to-schema default --to-table example_by_dt ``` The output will be something like: @@ -208,7 +208,7 @@ We want to query the table using Amazon Athena. To do this, we run the command: ``` bash -$ lakectl metastore create-symlink --address 123456789012 --branch master --from-schema default --from-table branch_example_by_dt --to-schema default --to-table sym_example_by_dt --repo example-repository --path path/to/table/in/lakeFS +lakectl metastore create-symlink --address 123456789012 --branch master --from-schema default --from-table branch_example_by_dt --to-schema default --to-table sym_example_by_dt --repo example-repository --path path/to/table/in/lakeFS ``` Now we can use Amazon Athena and query the created table `sym_example_by_dt` diff --git a/docs/using/presto.md b/docs/using/presto.md index 9d819c12068..4b05a1efbfe 100644 --- a/docs/using/presto.md +++ b/docs/using/presto.md @@ -108,7 +108,7 @@ WITH ( ### Example of copying a table with [metastore tools](glue_hive_metastore.md): Copy the created table `page_views` on schema `master` to schema `example_branch` with location `s3a://example/example_branch/page_views/` ```shell -$ lakectl metastore copy --from-schema master --from-table page_views --to-branch example_branch +lakectl metastore copy --from-schema master --from-table page_views --to-branch example_branch ``` diff --git a/docs/using/python.md b/docs/using/python.md index 252ddd945e9..9a680c7e515 100644 --- a/docs/using/python.md +++ b/docs/using/python.md @@ -26,7 +26,7 @@ A complete installation guide is available in the [bravado GitHub repository](ht For our example, we'll simply install it with pip: ```shell -$ pip install bravado==10.6.2 +pip install bravado==10.6.2 ``` At the time of writing this guide, the current stable bravado release is `10.6.2`. @@ -62,13 +62,13 @@ Now that we have a client object, we can use it to interact with the API. ### Listing and creating repositories ```python ->>> client.repositories.createRepository(repository={ - 'id': 'example-repo', - 'storage_namespace': 's3://storage-bucket/repos/example-repo', - 'default_branch':'main' - }).result() -repository(creation_date=1599560048, default_branch='main', id='example-repo', storage_namespace='s3://storage-bucket/repos/example-repo') - +client.repositories.createRepository(repository={ + 'id': 'example-repo', + 'storage_namespace': 's3://storage-bucket/repos/example-repo', + 'default_branch':'main' +}).result() +# output: +# repository(creation_date=1599560048, default_branch='main', id='example-repo', storage_namespace='s3://storage-bucket/repos/example-repo') ``` ### Creating a branch, uploading files, committing changes @@ -76,68 +76,74 @@ repository(creation_date=1599560048, default_branch='main', id='example-repo', s List current branches: ```python ->>> client.branches.listBranches(repository='test-repo').result() -{'pagination': pagination(has_more=False, max_per_page=1000, next_offset=None, results=1), 'results': ['main']} +client.branches.listBranches(repository='test-repo').result() +# output: +# {'pagination': pagination(has_more=False, max_per_page=1000, next_offset=None, results=1), 'results': ['main']} ``` Create a new branch: ```python ->>> client.branches.createBranch(repository='test-repo', branch={'name': 'experiment-aggregations1', 'source': 'main'}).result() -'~EiRd5nyjm8kWLDHesLTsywmd1MNW5hB3ApQi4' +client.branches.createBranch(repository='test-repo', branch={'name': 'experiment-aggregations1', 'source': 'main'}).result() +# output: +# '~EiRd5nyjm8kWLDHesLTsywmd1MNW5hB3ApQi4' ``` Let's list again, to see our newly created branch: ```python ->>> client.branches.listBranches(repository='test-repo').result() -{'pagination': pagination(has_more=False, max_per_page=1000, next_offset=None, results=2), - 'results': ['experiment-aggregations1', 'main']} +client.branches.listBranches(repository='test-repo').result() +# output: +# {'pagination': pagination(has_more=False, max_per_page=1000, next_offset=None, results=2), +# 'results': ['experiment-aggregations1', 'main']} ``` Great. Now, let's upload a file into our new branch: ```python ->>> with open('file.csv', 'rb') as file_handle: -... client.objects.uploadObject( -... repository='test-repo', -... branch='experiment-aggregations1', -... path='path/to/file.csv', -... content=file_handle -... ).result() -... -object_stats(checksum='319ccf050a10a87ba20e00a64c6d738e', mtime=1599563388, path='path/to/file.csv', path_type='object', size_bytes=727) +with open('file.csv', 'rb') as file_handle: + client.objects.uploadObject( + repository='test-repo', + branch='experiment-aggregations1', + path='path/to/file.csv', + content=file_handle + ).result() +# output: +# object_stats(checksum='319ccf050a10a87ba20e00a64c6d738e', mtime=1599563388, path='path/to/file.csv', path_type='object', size_bytes=727) ``` Diffing a single branch will show all uncommitted changes on that branch: ```python ->>> client.branches.diffBranch(repository='test-repo', branch='experiment-aggregations1').result() -{'results': [diff(path='path/to/file.csv', path_type='object', type='added')]} +client.branches.diffBranch(repository='test-repo', branch='experiment-aggregations1').result() +# output: +# {'results': [diff(path='path/to/file.csv', path_type='object', type='added')]} ``` As expected, our change appears here. Let's commit it, and attach some arbitrary metadata: ```python ->>> client.commits.commit( -... repository='test-repo', -... branch='experiment-aggregations1', -... commit={ -... 'message': 'Added a CSV file!', -... 'metadata': { -... 'using': 'python_api' -... } -... }).result() -commit(committer='jane.doe', creation_date=1599563809, id='~EiRd5nyjm8kWLDHesLTsywmd1MNW5hB3ApQnW', - message='Added a CSV file!', metadata={'using': -'python_api'}, parents=['~EiRd5nyjm8kWLDHesLTsywmd1MNW5hB3ApQnU']) +client.commits.commit( + repository='test-repo', + branch='experiment-aggregations1', + commit={ + 'message': 'Added a CSV file!', + 'metadata': { + 'using': 'python_api' + } + }).result() +# output: +# commit(committer='jane.doe', creation_date=1599563809, id='~EiRd5nyjm8kWLDHesLTsywmd1MNW5hB3ApQnW', +# message='Added a CSV file!', metadata={'using': +# 'python_api'}, parents=['~EiRd5nyjm8kWLDHesLTsywmd1MNW5hB3ApQnU']) ``` Diffing again, this time there should be no uncommitted branches: ```python ->>> client.branches.diffBranch(repository='test-repo', branch='experiment-aggregations1').result() -{'results': []} +client.branches.diffBranch(repository='test-repo', branch='experiment-aggregations1').result() +# output: +# {'results': []} ``` ### Merging changes from a branch into master @@ -145,22 +151,25 @@ Diffing again, this time there should be no uncommitted branches: Let's diff between our branch and the main branch: ```python ->>> client.refs.diffRefs(repository='test-repo', leftRef='experiment-aggregations1', rightRef='main').result() -{'results': [diff(path='path/to/file.csv', path_type='object', type='added')]} +client.refs.diffRefs(repository='test-repo', leftRef='experiment-aggregations1', rightRef='main').result() +# output: +# {'results': [diff(path='path/to/file.csv', path_type='object', type='added')]} ``` Looks like we have a change. Let's merge it: ```python ->>> client.refs.mergeIntoBranch(repository='test-repo', sourceRef='experiment-aggregations1', destinationRef='main').result() -{'results': [merge_result(path='path/to/object', path_type='object', type='added')]} +client.refs.mergeIntoBranch(repository='test-repo', sourceRef='experiment-aggregations1', destinationRef='main').result() +# output: +# {'results': [merge_result(path='path/to/object', path_type='object', type='added')]} ``` Let's diff again - there should be no changes as all changes are on our main branch already: ```python ->>> client.refs.diffRefs(repository='test-repo', leftRef='experiment-aggregations1', rightRef='main').result() -{'results': []} +client.refs.diffRefs(repository='test-repo', leftRef='experiment-aggregations1', rightRef='main').result() +# output: +# {'results': []} ``` ## Full API reference diff --git a/docs/using/rclone.md b/docs/using/rclone.md index e004559ce8f..f658f5a2b8b 100644 --- a/docs/using/rclone.md +++ b/docs/using/rclone.md @@ -21,42 +21,45 @@ To add the remote to Rclone, choose one of the following options: * Find the path to your Rclone configuration file and copy it for the next step. ```shell - $ rclone config file - Configuration file is stored at: - /home/myuser/.config/rclone/rclone.conf + rclone config file + # output: + # Configuration file is stored at: + # /home/myuser/.config/rclone/rclone.conf ``` * If your lakeFS access key is already set in an AWS profile or environment variables, just run the following command, replacing the endpoint property with your lakeFS endpoint: ```shell cat <> /home/myuser/.config/rclone/rclone.conf - [lakefs] - type = s3 - provider = AWS - endpoint = https://s3.lakefs.example.com - - EOT + # output: + # [lakefs] + # type = s3 + # provider = AWS + # endpoint = https://s3.lakefs.example.com + # + # EOT ``` * Otherwise, also include your lakeFS access key pair in the Rclone configuration file: ```shell cat <> /home/myuser/.config/rclone/rclone.conf - [lakefs] - type = s3 - provider = AWS - env_auth = false - access_key_id = AKIAIOSFODNN7EXAMPLE - secret_access_key = wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY - endpoint = https://s3.lakefs.example.com - EOT + # output: + # [lakefs] + # type = s3 + # provider = AWS + # env_auth = false + # access_key_id = AKIAIOSFODNN7EXAMPLE + # secret_access_key = wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY + # endpoint = https://s3.lakefs.example.com + # EOT ``` ### Option 2: use Rclone interactive config command Run this command and follow the instructions: ```shell -$ rclone config +rclone config ``` Choose AWS S3 as your type of storage, and enter your lakeFS endpoint as your S3 endpoint. You will have to choose whether you use your environment for authentication (recommended), @@ -67,11 +70,11 @@ or to enter the lakeFS access key pair into the Rclone configuration. ### Syncing your data from S3 to lakeFS ```shell -$ rclone sync mys3remote://mybucket/path/ lakefs:example-repo/master/path +rclone sync mys3remote://mybucket/path/ lakefs:example-repo/master/path ``` ### Syncing a local directory to lakeFS ```shell -$ rclone sync /home/myuser/path/ lakefs:example-repo/master/path +rclone sync /home/myuser/path/ lakefs:example-repo/master/path ``` From 6f9944ce82d7170097370759181d3c051e271d17 Mon Sep 17 00:00:00 2001 From: YaelRiv <67264175+YaelRiv@users.noreply.github.com> Date: Thu, 10 Sep 2020 12:38:18 +0100 Subject: [PATCH 05/39] Update index.md --- docs/index.md | 59 ++++++++++++--------------------------------------- 1 file changed, 13 insertions(+), 46 deletions(-) diff --git a/docs/index.md b/docs/index.md index a2872dee3ed..135279ed926 100755 --- a/docs/index.md +++ b/docs/index.md @@ -28,56 +28,23 @@ Since lakeFS is compatible with the S3 API, all popular applications will work w ![lakeFS s3 addressing](assets/img/s3_branch.png) -Here's why you need it: +## Here's why you need it: -### Fragile Writers +### Developer Environment for Data +* **Experimentation** - try tools, upgrade versions and evaluate code changes in isolation. By creating a branch of the lake you get an isolated snapshot of the lake you can experiment with while others are not exposed. You can compare branches with different experiments or compare your branch to the master lake. +* **Reproducibility** - go back to any point of time to a consistent version of your data lake. By making commit history available for a configurable duration - you can read from the lake at any given point in time, compare changes made, and safely rollback if necessary. -Writing to object stores is simple, scalable and cheap but could also be error prone, for example: +### Continuous Data Integration +* **Ingest new data safely by enforcing best practices** - make sure new data sources adhere to your lake’s best practices such as format and schema enforcement, naming convention, etc’. By consuming data to an isolated branch and creating pre-merge hooks you can define automated rules and tests that are required to pass before introducing new data sources. +* **Metadata validation** - prevent breaking changes from entering the production data environment. Ingesting data to an isolated branch can also prevent breaking changes from entering your production data environment -* Jobs (both streaming and batch) can fail, leaving partially written data -* It's hard to signal to readers that a collection of objects is ready to be consumed. This is sometimes worked around using SUCCESS files, Metastore registration or other home-grown solutions. - It's even harder to keep multiple such collections in sync. -* Unless we know the exact prior state, cleanly undoing operations is extremely difficult -* Eventual consistency may cause corruption or failure. For example, S3's list operation might not show recently written objects, leading to failing jobs +### Continuous Data Deployment +* **Instantly revert changes to data** - if low quality data is exposed to your consumers, you can revert instantly to a former, consistent and correct snapshot of your data lake. By making commit history available for a configurable duration - you can revert the lake to the previous version instantly with one atomic action. +* **Enforce cross collection consistency** - provide to consumers several collections of data that must be synchronized, in one atomic, revertable, action. Using branches, writers can provide consistency guarantees across different logical collections - merging to “main” is only done after several datasets have been created successfully. +* **Prevent data quality issues by enabling** - + * Testing of production data before exposing it to users / consumers + * Testing of intermediate results in your DAG to avoid cascading quality issues -lakeFS overcomes these obstacles by providing: - -* **Atomic Operations** - lakeFS allows data producers to manipulate multiple objects as a single, atomic operation. If something fails half-way, all changes can be instantly rolled back. lakeFS enables this by allowing branch creation. When creating a branch, all objects manipulated within that branch are only visible inside it. Once processing completes successfully, merging to the "main" branch is an atomic operation. If something fails mid-way, we can simply (and atomically) revert our branch to its previous committed state. -* **Consistency** - lakeFS enables object-level and cross-collection consistency: - * **object-level** consistency ensures all operations within a branch are strongly consistent (read-after-write, list-after-write, read-after-delete, etc). - * **cross-collection** consistency is achieved by providing [snapshot isolation](https://en.wikipedia.org/wiki/Snapshot_isolation){: target="_blank" }. Using branches, writers can provide consistency guarantees across different logical collections - merging to "main" is only done after several datasets have been created successfully. -* **History** - By using a branch/commit model, we can atomically and safely rollback any set of changes made to the lake. By making commit history available for a configurable amount of time - we can read from the lake at any given point in time, compare changes made - and undo them if necessary. - - -### Fragile Readers - -Reading data from the lake can also generate problems: - -- Data is constantly changing, sometimes during an experiment or while a long-running job is executing. -- it's almost impossible to build reproducible, testable queries - we have no guarantee that the input data won't change. - -lakeFS provides: - -* **Cross-Lake Isolation** - Creating a lakeFS branch provides you with a snapshot of the entire lake at a given point in time. All reads from that branch are guaranteed to always return the same results. -* **Consistency** - When data is produced in isolated branches and merged atomically into "main", writers can provide data that readers can trust as complete and validated. -* **History** - Since previous commits are retained for a configurable duration, readers can query data from the latest commit, or from any other point in time. - -### Data CI/CD - -Data is useless, unless it's trustworthy. When data is written today, it's already being exposed to readers and there's no clear validation process, like the one we have in-place for code. - -* There's no way to enforce naming conventions, schema rules, or the use of specific file formats. -* Validating the quality of the written data is usually done too late - it has already been written and is visible to readers. - - lakeFS introduces the concept of **Data CI/CD** - The ability to define automated rules and tests that are required - to pass before committing or merging changes to data. - -Data engineers can now define rules such as: - -* *No breaking schema changes allowed under the following paths: \[...\]* -* *The main branch should only contain Parquet and ORC files. CSV and TSV are not allowed.* -* *Data validation jobs must finish successfully for this set of collections: \[...\]* -* *The proportion of rows with a given value in a certain column is dramatically lower than usual (a possible bug in data collection)* ## Next steps From 65eab847ce20f450af788dd081dc9c98ec63ce58 Mon Sep 17 00:00:00 2001 From: YaelRiv <67264175+YaelRiv@users.noreply.github.com> Date: Thu, 10 Sep 2020 12:56:37 +0100 Subject: [PATCH 06/39] Update docs/index.md Co-authored-by: johnnyaug --- docs/index.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/index.md b/docs/index.md index 135279ed926..242937a285c 100755 --- a/docs/index.md +++ b/docs/index.md @@ -32,7 +32,7 @@ Since lakeFS is compatible with the S3 API, all popular applications will work w ### Developer Environment for Data * **Experimentation** - try tools, upgrade versions and evaluate code changes in isolation. By creating a branch of the lake you get an isolated snapshot of the lake you can experiment with while others are not exposed. You can compare branches with different experiments or compare your branch to the master lake. -* **Reproducibility** - go back to any point of time to a consistent version of your data lake. By making commit history available for a configurable duration - you can read from the lake at any given point in time, compare changes made, and safely rollback if necessary. +* **Reproducibility** - go back to any point in time to a consistent version of your data lake. By making commit history available for a configurable duration - you can read from the lake at any given point in time, compare changes made, and safely roll back if necessary. ### Continuous Data Integration * **Ingest new data safely by enforcing best practices** - make sure new data sources adhere to your lake’s best practices such as format and schema enforcement, naming convention, etc’. By consuming data to an isolated branch and creating pre-merge hooks you can define automated rules and tests that are required to pass before introducing new data sources. From 39be32efaf1405d531897a9c63756f9d22638afb Mon Sep 17 00:00:00 2001 From: YaelRiv <67264175+YaelRiv@users.noreply.github.com> Date: Thu, 10 Sep 2020 12:57:05 +0100 Subject: [PATCH 07/39] Update docs/index.md Co-authored-by: johnnyaug --- docs/index.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/index.md b/docs/index.md index 242937a285c..78b87629284 100755 --- a/docs/index.md +++ b/docs/index.md @@ -35,7 +35,7 @@ Since lakeFS is compatible with the S3 API, all popular applications will work w * **Reproducibility** - go back to any point in time to a consistent version of your data lake. By making commit history available for a configurable duration - you can read from the lake at any given point in time, compare changes made, and safely roll back if necessary. ### Continuous Data Integration -* **Ingest new data safely by enforcing best practices** - make sure new data sources adhere to your lake’s best practices such as format and schema enforcement, naming convention, etc’. By consuming data to an isolated branch and creating pre-merge hooks you can define automated rules and tests that are required to pass before introducing new data sources. +* **Ingest new data safely by enforcing best practices** - make sure new data sources adhere to your lake’s best practices such as format and schema enforcement, naming convention, etc. By consuming data to an isolated branch and creating pre-merge hooks you can define automated rules and tests that are required to pass before introducing new data sources. * **Metadata validation** - prevent breaking changes from entering the production data environment. Ingesting data to an isolated branch can also prevent breaking changes from entering your production data environment ### Continuous Data Deployment From 8635befc0c694a0c51d7bae9df368d22aa515cc3 Mon Sep 17 00:00:00 2001 From: YaelRiv <67264175+YaelRiv@users.noreply.github.com> Date: Thu, 10 Sep 2020 12:57:19 +0100 Subject: [PATCH 08/39] Update docs/index.md Co-authored-by: johnnyaug --- docs/index.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/index.md b/docs/index.md index 78b87629284..f58c3aced6b 100755 --- a/docs/index.md +++ b/docs/index.md @@ -40,7 +40,7 @@ Since lakeFS is compatible with the S3 API, all popular applications will work w ### Continuous Data Deployment * **Instantly revert changes to data** - if low quality data is exposed to your consumers, you can revert instantly to a former, consistent and correct snapshot of your data lake. By making commit history available for a configurable duration - you can revert the lake to the previous version instantly with one atomic action. -* **Enforce cross collection consistency** - provide to consumers several collections of data that must be synchronized, in one atomic, revertable, action. Using branches, writers can provide consistency guarantees across different logical collections - merging to “main” is only done after several datasets have been created successfully. +* **Enforce cross collection consistency** - provide to consumers several collections of data that must be synchronized, in one atomic, revertable, action. Using branches, writers can provide consistency guarantees across different logical collections - merging to the main branch is only done after all relevant datasets have been created successfully. * **Prevent data quality issues by enabling** - * Testing of production data before exposing it to users / consumers * Testing of intermediate results in your DAG to avoid cascading quality issues From 5ca01258f4c0f306c770c9d996aaaf6224facc21 Mon Sep 17 00:00:00 2001 From: YaelRiv <67264175+YaelRiv@users.noreply.github.com> Date: Thu, 10 Sep 2020 12:57:28 +0100 Subject: [PATCH 09/39] Update docs/index.md Co-authored-by: johnnyaug --- docs/index.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/index.md b/docs/index.md index f58c3aced6b..959f6d62d15 100755 --- a/docs/index.md +++ b/docs/index.md @@ -36,7 +36,7 @@ Since lakeFS is compatible with the S3 API, all popular applications will work w ### Continuous Data Integration * **Ingest new data safely by enforcing best practices** - make sure new data sources adhere to your lake’s best practices such as format and schema enforcement, naming convention, etc. By consuming data to an isolated branch and creating pre-merge hooks you can define automated rules and tests that are required to pass before introducing new data sources. -* **Metadata validation** - prevent breaking changes from entering the production data environment. Ingesting data to an isolated branch can also prevent breaking changes from entering your production data environment +* **Metadata validation** - prevent breaking changes from entering the production data environment. Ingesting data to an isolated branch can also prevent breaking changes from entering your production data environment. ### Continuous Data Deployment * **Instantly revert changes to data** - if low quality data is exposed to your consumers, you can revert instantly to a former, consistent and correct snapshot of your data lake. By making commit history available for a configurable duration - you can revert the lake to the previous version instantly with one atomic action. From 398f4a0b67e57843f6d71baf306e56fdc09f9759 Mon Sep 17 00:00:00 2001 From: YaelRiv <67264175+YaelRiv@users.noreply.github.com> Date: Thu, 10 Sep 2020 12:57:40 +0100 Subject: [PATCH 10/39] Update docs/index.md Co-authored-by: johnnyaug --- docs/index.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/index.md b/docs/index.md index 959f6d62d15..19ae679344e 100755 --- a/docs/index.md +++ b/docs/index.md @@ -31,7 +31,7 @@ Since lakeFS is compatible with the S3 API, all popular applications will work w ## Here's why you need it: ### Developer Environment for Data -* **Experimentation** - try tools, upgrade versions and evaluate code changes in isolation. By creating a branch of the lake you get an isolated snapshot of the lake you can experiment with while others are not exposed. You can compare branches with different experiments or compare your branch to the master lake. +* **Experimentation** - try tools, upgrade versions and evaluate code changes in isolation. By creating a branch of the lake you get an isolated snapshot of the lake you can experiment with while others are not exposed. You can compare branches with different experiments or compare your branch to the main branch of the lake. * **Reproducibility** - go back to any point in time to a consistent version of your data lake. By making commit history available for a configurable duration - you can read from the lake at any given point in time, compare changes made, and safely roll back if necessary. ### Continuous Data Integration From 1a654b4b84a0756e11c3c3f021c044629c10bf8f Mon Sep 17 00:00:00 2001 From: johnnyaug Date: Thu, 10 Sep 2020 15:36:13 +0300 Subject: [PATCH 11/39] docs: new s3-branches image --- docs/assets/img/s3_branch.png | Bin 32930 -> 41796 bytes docs/index.md | 2 +- 2 files changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/assets/img/s3_branch.png b/docs/assets/img/s3_branch.png index 01aff8b4b873edabea71312c6917a2691d658282..85bcd9b8f9ae76789293e2d063eda17edc80ee7e 100644 GIT binary patch literal 41796 zcmeFY^=cnjESWFDO5;6od zdQt|b3{GC1)q5p=tum&=rALG#PsO%C#|sesr3=?3hxEnsJzO7KaZ{!Sm8uDQpQO`A z^1ixR9?ihyGRH<_d!lL?*-g<^M7Y#}gHdPrNxMKVjT&-->qEhyMSX?@Y4}Nw03K=b zeRK+i01t7aEnKcFlp37+WRZ3pW4C*Pj28c$3DWI@8|Tn!nksmTP#N4>gqO(II?h*J zH1vc(L1M%Fusvne<7Xp`ssqI^4HLq?wdvDE9@h#AKgRdg$Cz-kc#QdRT=&H-!rj4l zZ@&Lp3x2M){53`q9gE*LV0pLnE$%I8pF=~=47Ef%)!C{9rG!Ubl5?PstmrEF_;2TL z%Pn_4k3RqnzUg0tQuFsfEcjQ*kATEq@hTL55Pm2}P}A37pNQHP3@Jv=FB)=F8v{>s zqV!U8qBk&&8>I<9m2M4wao&B8s^hJlYs>~H|IW14Z|$i5n}=RjmCQEzhY&UhwGV@m zp4Kx~sz5s8(_aZXH1vht=!S@b;botiw%WW&#IsdWZISkQQrZYK$~iBhUt_raVaqKK zDU{7TT+ben<#d8FIJ8-bgJeWiVJNJ~>_5k z7w?iU`;_sUmyI%bLR<;q<%cvPuGozzilju&$73N(D=x*au^enV{A{1e_yd};#RCx@ zbic95@$NDs@dptO3$>#gmLpY{48D}?2+K!m~U43^nF0Nf0E65fN`|z*MH>xlC&S9&a~h(mVrdHqiWL_jW++N_Yq} z)sY5>yz0Pu7#fHoHnFZWIyX=;o_S|~O+2z0k!Ev7caEFzCGvx)vY&Fgy|qDPI;EQr%Iw7~yP72Q;;9Zn z?x23x7mwvh^?EtD@T8`xvSo0vzV{-+ha4_G6ERB^$tS@r#xjgYP{VV{NN#*p5b*o6 z7U?am3 z=gMCwJ(2;j4phi8!qdN*s0e;j%9`TW!j%YXP@eo|mS(=jal-xkwEEFA$O?QI*4R`)N4>{yU0^ z@q7x=z(BJWanjE{Lnm8Ijc9daE-iuicdtisDQG7H>s#U(KkFR6HizV2n(F;f>_6C) zyS#f)Xot7&W!(Z_l0M`7RA8fR{_u>-FG@&}%oe32SxjY1OH7{eqpc+VGEJK5OIWlx zpqXeU%(E}GkGe0aPoNLq808ASCRv;^HBL|FX93TLY&CI)&lR7~Xh^8_sa)gM<6MU{ zOtG90zXdW;C&w`kw+%@R5e&x+`{MeB3-o7ykb19Tu9T$QsGLTBK%bc)SPDP)n^0Ol zZ(sR9wXFDV7H4*4Hkp@~mz&p*cgK~A_r|5v<^DKhx~1qQ+FaT>*U>7KliH5n4y7X4 zE$}{0p@?r%?C{eq<&%mRGn5C)0%dGeahhWFqZz}!uIYfE+Gr& zsamG^)(vyYWkpOT_@U}urN#|4UgcvI`Q?f8y5%bs z5L;rqI6GOp`^D9o!sS{!XIssc#^w7(?WL%?bTds0_n9o?iAI8zssf5bT z4JL?2K^t$6exb=AezKOTXct6;LDfBEQ90 z$mUC5`4AE;5t10%5K0t68(M`0A~M4HO{7dPg>yvmjiZ)292<|Mf%2GeB7k;t?%HeM zDLPZ2kvR=N^J9nLlV7Ax{zVqV~Bv-%EjaCuM`#!AA(tU&eEOpn2bk}FUoZFCG7 zK;T8W=P*bcl@{kySUiq9b*BQFyiSq};}X4g>S|hEa$KrP`ZWy~ldD-NgPq#)X?@+1 z`BB{MpLZ&fvIF;#>!@2e^WP8u-i|Ykw)0LoZr_F#(Wc5Xq_}w0uA9E>LbZd+XUp}q z$sQb^3H|K3S-r*FVtz1cF?!IvdtZs~7KR|fC=ug~;H!`OkMOWi09LlRMWQ)Q>l0wmAR~+J`*(4pa{gCSEIMJ6z0;%8*LX(+G5!GMX;C zzy98sO*~2(9mADMgWW>-&D)*_jUM&roE1%md15~88@4FOg?(H|sHjFfsmZp^c zadkUMs)AG-w7=WVJd(7oYrw`1W5^)MG)ZYDD&LB=uZ@j+>V=L*=sf*NtDD7n=cUPFj696H zb?@20YNtrviQd^S33oGdzEcn#a2@ml z*U~mJ{WtQjl}S1Y+%8t3eUK*7x1{{u#U2m2&Rd_K@<#?WVr;TK_`l8KO}TCz1r=pZ zm`*&kj=r!xnDjaq9SFUCx;|*hs?QoHxF&%5GkaP+ISn#aX;ySxb+`ivI?ugTU*>Pm z#@$cdX*Y^G{a3%9y*Uz_lRAhQ%gz&8J7+z;NWR*8{+SPlOq3S?rVGFams9kns~K(r z3y0Ia6JCT9JPVKbf-}F0j?#zn!)vi5=42Hg?!ITA4a=$bk62nbFr1g@13LE zt-Nj`tSvkLS2=Vz=`lE;1TYwEcY5*q_M&(Z15W3U^?8~#MNeBd_@}i?zjz#S=Z7(Y z+X!ON`A=R1)=Sq{)+xxZwX^I$%*=q!mi8|E+~0Iz9jFe{+Rkur_|*Rtco`L{OW67gR%%)- zvK9jyFQ z!;1LtEIBS1D+t5Mh>NIsz@KKKv=gf@y<#CA$w*&B-AamzilWCV;U%miiVF*OUw2aa zqvKQV388#3D)`4!u5(b#P?0EiGlPWp=d0Lj-(= z#@8~xIp6bHSd2er7M)(!;7R?2LnMcp>B4Xb*gyWO`H8anO@L%bCBA&{fA#&>Fgf{; z91*Gip8Vef&WPBYU)+Z++5SJ4d`o!ye`EOv@QW0j$Y^ z^zS2I%zImoZ1!~l>%=*S*3jm*I}6!IM8QVEuVd#}WGnviZkF4#2aSW*7csAPpo*ig z&gCJ6H*1xAXK|Jr4n@`rWLcpLBX7%>o?Fvf+?Cib zlmGhG5DveiJEU#hFG9}RKSJtmb;GxIK}4zKN0$$ds=v@)XHJO7nOEygC@Db14g)+1 z7JBOPJs3X0;kx7t&A8CH=+w;f?aeIJ_xB(*7^P*i+9P&fI33JKLvFg*8z;I>J+$XO z&hGk*PSae3uZX7LWBfXG9$N`XTKn~NeeLu-3lfyxx-a~Iv{RjB|Z zQdky5*E8TzTH+3d=O||=(|bR=&gTP}Yy=PHFba)=zn zsyw$ao3^7Gl+n#HuR6q7UB=w|(O3dhVu)%nAybR#nL%%vxAng58#fKox-pg^5pDR6 zLXCUu&eyOuBBGPe#thoHoaNn@);expyw92I0x*O?fARnD7B;itWhP*}wWWngk;ZuR z;RR{QO$-ibRtNC6_hSP!aA(DyHx=uAbr+{{k916*k>lv3Mw9kyyO_{|=^)K8gY6IR z!j^#8i<3v=-aQoGaaYS_-VGpL$3b>Jg^uMaI|QH*NYrvo+yH*T^Ic#fi_B4eKDM(D z2tk|~B+6yR(I9j6M->V%#F=Zy#ofh0*Ps*IJA;N(L4S*@i# zjr)N%-Bmjd(&%-TLDBr>JE^FZac)k(lo~g`%ZliFZ~wN0wQs(93yQ5PGof6nTqrRmQI&y z>)KrUtnmCd$~At#j%5UPECZt_$<66Sg0O>m!Y4ZEVLfRYz0tTt0r zM}bb5{#Wf*YGRG}76}o<*b=vm-WIkkxiSMc_rFh?mklSnlMiDN?yY9&*2X7#e%0Ec zXQ`d8S~Vw2*n(A!n@VR-LI0g?uNIkrRy zwU}+w<-LOS&C2p)qLTAQd`X}9PLHVL*LrUTQ2+&L=wd4&xE8@(jTAp}YV4Yw=e;P& ze{WmhD~fjhVn8$YEI`^<-L=FBfucJ4+e3o_QIY>mb6ITJ+olP5MvIq^i^hm#4G!fONAhVX$NT!)HJd2b2d687^x-_uy~H(KU-`xwc; zi+aN4JRapOU%az@o2{wqQ0xZ9_-WK)_i(n`wziGI<;aZ9IZ~L?vEv8b-dMesw+RS& z6{^Fu0sM4LdHv4QxsiKb4eYH$3Bni4`T1UW($bJ1XkRV#$0}k12fvK+WR;M-H{0^v zaW6Q<+JO6pi@lCG7(Z#G6q;3G{c3ObQg9|u ztQ1#v8=>N)4_%d4?oW>7nY*)IqJH8V4hf*PrmWkJa-BMdX${zjv8yGrpVW>k$uo$m z!xap+2KdO@Pb$4M=E=ykWorZiq)$5A6zJzz8;j3I*R@C}-VjH)h2q~2s) zgeK^J(FuIG7+f1CKYwvq&@ zp)mxF&~$pqy4JfyJORK>EMqo)WP7!fj&u0LrP`qLLF1~}jD|8|6@=p{d| ze$Vc-NxPWtvy5KX;G>)W%(u9t`L?FF=b19#i8VzqSElOe-Rp6afxGvzK*dg$U4}|K z*mi2E;k$$V+Y@yF;L&{SqlGj?&=ub=K?In0j@KCTWyIIGND&d8?+k3YC+SA0rSS(J zHBFT%BqoLa?1;vDptuU=5CyvVx-VHBREL$c3v&8rc$*I= zT`-pRP==i5#@B|;M{@3PhEMY~fNlx6yP0!T0@PX715hV_Yv&V5ed9*bvC_!G`u*yf zU6KBW27wI)m|C;(%@Yej29?~Bin`xVoi=A^B8G|hVjtAle%QkKd)E4ZO|C}gypQ*s zDy`1|ysbYqs7dT4l7}kn^23h5!8Fd>^ab&?!VnVV7uq7$hL;Uz?`rt_>oXoAM#^hj z4_?sfjKJ&hySf*c2dYi)B!>CEy2Ivv2N^~nv(H6YCL5LSKQ3_7*4Y&>sd4`k$Zjc39BX51|PA_a?|~OQxR`j;OW$V z07{=mm+%En9ZB}+d9(e%mR#rSF9$Wxvk-*dOzy*>yx-??c$~ZU_fj~wqUh9XCoCkM zX*R&6|!Re8dla71U^)6mTKsC-1Ry9rSh7j3N`s$0B1~x z{qMUtd!ddZ$nY?9pil5kLfS)3FwPF9;&bPd@@jY|*$snCaN+73=M!C{o4CAOK%B>$=ko}nh%2wJ2$E7!eJcECH0CE1c!d}r<9ua0Ea zflCu-&bIXG)XNKGS_!;A<(vX+0PVdUtyXr`CtvNR(_CW7<|~Gm&N#?0z7O<*Z%g45 zYRBjTJhWWo-8Vdw4}JO(F|==2EwHW&i8O=N#jAAi=65~g^O%AQ`4!bZ~YNV8wz7U%ZIUF3U8Q*rauz#9{})}Bqk@J7yBjPHtGgnZ4HSu zI=V!c+}gcBcLa*hxbVHcuCLZ@7U)fTQ1>+(n#l+HGHRbJ(8X1asEpWqPM;Gtc{h-M zN-g@(d2T&rL^|YqLgGWZZoPF&{GDnA14IHRMe$B>vAjBjzz5l~x~jD|5mJr0rt7jW z<$f7ao~S_LV5*ISKBtP&-E9w8z8fXn9AAn-(h%G7bV1Th;S!EmOGjpWs2#fX*X`xI zcehB(Nx0RKE1n*4Kj)f!yoGpPC%qf7u>a7GbmHL2oL99vPx#g^|-6*cF}5YN*AUgSI) z8gvJk-&h<4?kFq|*Zsj6w0Xh}Mmx7{RzTqVc6?iP&PU6l6)0^OwOYdo*2*X&A_Fx{ z^`{q!u1fhK9-bWuHF4NcH~S(WU@|#XDOYji<$+KFmF?j5oYMn8Uw`_^D4ad7zt-QY z;KrB0-i2{;2QVwGObFU~v$>i(n0P0wMj~sUqKkR>WiitnD~?m{94+8^G@idWu2IB% z>H1Oas5V*K|I}Y&S}YJ+-+B!*P3GU1nTE0V{8ey`;;KosDxgt{LH?8Cs+k;tO2{!k z?(U!Z;^4r75r?94sfnSqw@i-XEiXIkKA&<_)5qZ7!&|zdGO;A*0iG=+S(CmC0W9SZ*>lVz2?{RmY;1QRFVg00kK;y+TONa9O#E`e(`4xc0#9 z_U_F@rPidhnSp(#5YkKbl(NnG08@OGZ@NJ!-=ys!jJtTD->*G8A?)6lEZr=S6f&_jWmc>B$eDWk{kz|l7tn)o6 z$!yD>$`JC`|5o0dZ}CEULTt_1 z^o5mxCk!taZ-$)9M%-j(le{FR3{CppQv0Ih$z#1oGcZ7X>F;oI*mAtYYjCj<% zjZXMvH!>%WPWAx@v&Q1Y&&__a1Nz4MCA@=4)1pHAqx!;vUmyXN(6=YPjRAm#0HJ`r z`b8=?cY?!q%%h9lUKants#>-I|F@w&nyH=Iz;RTS^8K=}N8bLaR}QAkURw@zm~xit zc~5(k@ruH*e`=hcat-&tS5zpP1X_5G)A|!v`#H3$j&v+-3pdgbDYOsxtxkI_f{q#7 z0O%8%XHUI1muNp_Vd{jMZ0rsrO-}k57kBF0QTMU_{1mdHzsCc*PhHCTp-FoYpR(kt z2pw2z=h_G~$1(q=aa}GH2p{PSS;Yr9g?|Uxr#@|Jgc`JO42St{HSzN7WAvvf{Y6d8 zC3#9glD@_ZN?|x@hfX}2tkuaGpsn-uKTBcB9tsg^Y^Tr3>P^C_2-d?i03ERQWVaIA zq##VLpXCqTe0d}A=isM=Z$4QH*p|~_=J+>dQnK;Zj_zdv9&Wwh3af!tB21*;S;zS- zcjX&118`lyjh|w~%L0PMJD)yq*7w|PhcCh7V&Y1B=Bn>)q|?>(&OmQS@zU zAw5r!>r<80{KabsUW#tnbufYpWI{pS5M;NY_vhMC0v5OSda0tp^~FBTdvR=)_u?{& zVXO_Bc9v()6|~VHmw~1KeI&?nxSBA>;;N@78N%K_PLxo>!2;Q?i3?`cRkG+umD0NdjX&yA z6^E8);$@Omx?K$PM@CjAKbz{mBuu>HTBRK>dk!{3D3i%fxZZh+L_C_~r%VbEz zj1%D_4MGaP8htByO%Vrw3~O>&p^(F_I4`rzD;V}Kk#JyWu4CJMxq2e`{wt*C{Bj2dhs0?3 zP@x@QSh>SEyJlkZLB#gQ;Z_c&z2>%%!K9nxi2gmD&^(MezBY@l5W)r1n=po5leuG7kkdhaA-i3pyu26@B(*OSXf~~9xB1l| z(LGC5%c8cJ`Z$xLyZ2p}>+eCU z5?Me9s~;76U8R-dwLdy4;fFaO8~f+)ZSVDENzw)vIk@f7pHa0xCaZ(~*aB_HQz4u^veIXNz*1B>eADeD z(qHlq*a`h9YpqF+-U=1d@c>3@)WO;hO7#2d2TlD=%craD6-aaZE3Z7u;~gOSl*6By zO>Sgin8gmC+LKr=ach^AAwGd2EbtK9W#a^GrILOOXsciL@uyc{p zT^eaOy6)Z0O0EYtOfKCYX3>Gqeemn%DmUzLR3GB4-vni45AV)%VX`V<0p$%G8I0^?q}q!zYv3bTH7`dj7y1ue#3)zq>0BZ zP*k&T^JFhSxJz6^I)xh$F)DcJgb(ZFio}7;;l0=WTSZj+jm$_GM;v-z!_ ziBhrU{=Q?560{10=F5IO-Po*He_4RfW=?aYc^K9x#5BiY`ju zPnK}fd_npYyy(GPI9ujx)Z{Q8okg`v54H`W9gUtokxFrgB5w*A;{eSWx}hzoV1V zLukCBc=^tLj&A*ja4;;|!GJilb%d=%&W&*{CZsjT;4D?4b#J}&Z762F*U6EVQ1vifik#H z1$OaWRIkbv(L+I+WxsS#rFC|jy!+!9BpBiU)8QV5S*12PIT9TJVqUA~eQQ|UeP^S~ zE#XM?EQy;|4S@u&z|4mNnAGkPD#j4~*a(`{)W6{=OOr*!@E0X%LOWE{Dy6X8MaE&; zozQ1!a(ML#2toMp0=yPbw$?sNcR}JR*VLx)v+Yjkr4-3mx@>j;UAoq`tOa)??=YG6 z_(B(88pVE!>mqZ*ywRSH#wzNrzba+#eP^E8zyW@SR7H{k@llK8@~fb0h(30t}VN#6#c zWL;{1O-jp$_^}RMHzxq8XaLC~Eo_el6vQrUhYuSf^Jnc)Iu%78_wM8-1uA2{!;)Pc-^C5_dlK z*;Bd)sW8Ef+I>$##1PUDE$Qy@O(v^};}i3aXo%Mx!Q*`P_GcFnJ4nD=#(_n>Slf9S ztCxrELSK?L3!5!;IFC#|g)q>!%o_=Dp0FVm%a^G!SPq$8wyi_+XARFLn7xMuc&ca= z!i~A~eLdEiU15Gd=IGfs@Y!$yA&P-#$Rffbd__8>&V?=UWR{(jKaGBw-74XKK)U)~ zXPI~DePg3Z<$_npqdFfLqVaMy;tV{UE)l82h3xzo4^*|=u43}?|7d%rn?KHR5;M)i znBJfL2TD5=L7FCmKyoaXz+hR<37TwUB77eCx*+wgzIL`k0;LITkQvaO5Yodi$TEIbv>Y%1!WSef_>G6w(gfYCj!?-_X+i@`#uol84>8rx zrKFGo;^zKhyaA%d2L~JnHyjL*qHVAqG$f$9`o1Ns&4J{$q{F1Dn|`PNVK8;YdFZVjegBdYQD6?;p}l~Db(Si7HnxUfzfNj3-;oba;?|zJ2@@f1=?(BB|w1WB4&Tz zZ}m*3(WE6`wH~p~yxZ%+d&0z~vB&!f--(HO)k;?_VCKt4BJ2${TNV*cra~>yaY{0i z#05#+3m$Q0&Fw_=lTK#NF^p0??61P)W(aQy>E9H#x}ehaZ~AbY&28!n`g`R(N|?t2 z>eY%nblk2sDm_-R4-?Xma<=1Xi-obAFdiD5a%bZ-3)=(8Z9Wxxs~!RKx-_MdfY6%j z3Re-{j>{9DUZ);++hv;Px=kDUfB_X}W@Q>Y`AYQ!W-$#CF=mcI7It`PqPQI8jWgD_ zoqbr0?>YTUmQXqA)Wgs}YT1dHJf%IF>p&eQkugqpxyj|iI3rjsAMWmL=a%%FFT%RR zcUYOFbuD=Te~b9bd%;)p-Unxp)V?K>9^M{J?O?ie`8XA_fB_!4VQAOr1oD_Nf1!yt zSw?}C?%9sTjB;dL6`8}l1|$IzLKab?lo9H4ck%)~qpd_YmN>kr13xAZ&F2C-^=~?p zLpEcZwwJ%o6mK+L+F0W=jj*T!e~*d_j?xP?o6;|Zjo3-Shy`Ff-}n>S=CBwR2D;yl zVqCMQT_ZVreKQJ7Vs-xRi%zH)p?K za!Q>ldY|2Pftn1K#hqn#&jit-a$^a*gVIwSfbL~yxrUPo0WyEF92FSJ50|bOr4$RIWW6B3K22jOm!@|~t#E@`3eLts%!V*Q7xcbjPvW=(Qghd3$yS>ir z5SWk~@$atQ8PhMeI4`+=(GSnr!m{4O@){Q${8sqNtj6%3^Eq*ll$jUV>I^NjY|-Lw z?Zx+ah*9h2AAGwzX%WvAOI2}x5i$LdY+m*`pWIoEb@N5bQn>Q#e7(h(B7dn%qL=)X zR;rH+F_{WU`{7imgHL6*Qbn^)s;%n_iM<(V?D6hio!AvErrgr(rj`qP1LMvX7FW&X z0>#unakyqLN?p6v350Fb7~K^7?wt#q$KY&b-cVzW?bMcpL~S4z=t;o^f0SZ8lF;pRtR3mGI88s z#PpktjTydQW(9eu#Gk`GQ$6L$sRi51W%HO06+FFXm*QHbqnT3O#o2Jc9XTxE{Im3o zYn}}7o%oamyz8gat5VBGg~#bLqosW^R6#966ZDVZi&HxmW>21IwDboOF+8@xtVH7pv@1}CeERjZD zeL$_2@)0m#OH(TAy`S2(4KU+(v!8}jznd`j?L)~)=2NUJz4-=f1$X>KDi_VhULWx4 zf{3?CL(7#_wyZ!IM3X4w@ibt$MZqFqN^N()<3eZn;EvTnJ_QBEIWs6Ve!(qCu+~-` z_k`)s&p8cV+%H{-g@2NG1!S&fL}(vUQ%QoqO-05jHK29N#V{s<(I?f)f>gU01~;?`TGMHXr85L%|a>WZ^U_ z{o7&>OB%CZS%n@lYMBwP9dD5^T2l4+PfZ~hoJ-c47@Ua;tdWo81YuQ6rUp3c&D$GA zeODl6sC8Ou^mXO}*`bDs`twiEiB`waKS)KDoGbz2C}E|!hI7D}r1LM%zN3Re8}7{a z>#SpOrs{U9r2SLPdhuv&3j_>KUw^02=IR#W=CYJ)y84XjY222^{ZYz)nl9Y{?9CON z*g*WbYh|ZB8JJrt-~VSC%d|B*>FWu>>wc2G4$i5wUbgc2CFy6hbHGt3Y{#{J@hd+D zP#I+Omu6Y#gnB#G=}3IUa5-=FU(Gd`Rjn&@WL=xC{AaAtgRRqLVOG45f@)?qk&_-a z6JRAcJlj_q(%&+x*7hQ5x6AQ1K@6rf5?d3;Gr(6+wOiG&hq!-?T_o1RTDeGVX;>vx z+h*ddUv|$dxkGVX8w{u<$*?461c{qr#iHiRev5z7Lyya$gwR%IZXL)N46>vjzp!7l z7%o_+_$)NM43WKKlHqHJ`A*4Ij4>Jpm1+}fJtul;nJYsEfI-jS69D>rxG){h236u) zGR(gJt}noPIgxBtB3mLNvS7Dl`KD7h(Bw{W`lqh5n zZg0G1Fo;P(eEK%mNT9G?L63cAjqjjj6FZD~Ltr<%Re6_>)S+VIIDapxf`Yx7G zv1JX!I_}8{ER+9&ku-cjU~Y#aSXB1ja4H5b_agT8QyF{Ewh!Hj61}6d4|<_+U#($v z@VR@7o*eWa5q5C3_Svk^aY@9Y4HK!hS4K_UEqBR=0BLNNGN?9w_=y_uk2Vg;T?JsC zHQLK)O9IopK>wurP422NapGn`G+`!EAye$GiRxhoCnCn0Gms5luVzopG?%)pQxp~~ zQw7!k^$3?Qny!*}cb}?`4>()(hq(jKA0LPi5?O&uBACdDvTtB=VZQ1CB%nL#Ew@@hIQ z6>4ITy?QPve4j9E&!haSE|Lxwg$#Rs3whG*7DQ<}D;#T==)UyBd;5h;#POdbs^|-6 zt#0omL>&2Yt{<-f{nvDbmP*eiqd&0Cf5u@o>gnTt4IS{07alM>(>G77W|~)OW8b!nV)8T2*PgDoC`urdcjk-ln|P5n z&t2$W5s;QNETgX|kC-y@)A-!IAz(P%-@E4RT?7u8e@1lEeHPHFA7=VlK>PUp6>`d%FPnYP3x4748y5@9*R?gs0j z!rx$Cl9Aj2_LBEXNk7{xgwCl1gCycJSj>SHsG>w|B8<#s$hP*KpnhatqJZw5xTvz8 zHT~Q+=Od$##kr|Qj6v1ZwD#PjmuZ@>Y0lDm^;V&iS4mTa4E%(d|yGu<@N&-eD zF2N%c#CeIP73YQ8&vU)TWy3NPWq6+bpHbhC9`jWA6+<(bPKJ@?j9R)hc$2lfB;;cg zQw!qdM?{VCV{>x5^S=M+qMo9A|d7J70gf)R&8IVUSL2nMGhd?q+qqd7p_a z86WF}7==WXgp#T*ifnz;y%y4X?_KZ2>~2r`eR0pC;)QpiTn%|8hizlzxzXBx@mv<* z3yZSTV2EG)I-)2G{XeZy|IUP_A1o-NwePKj;M!x6h{Ir1JF_VT!v zjYHcyva1u|y?qN@Cdclg@ZYy@wyEE9&Yq_mv{sE98XUw${L(mRC@t~sM;l$ue`&Yf zu_n-YD4rnE?YeD;UCDzxlh7eNDd2}-L=K_+plHn_lN#OxYh~~Zej7JB<#w5nEL^J9 z`{!PDXN(`NLNFhrc8*I1Vh*(J9oqRETI6G6SbQsK8qYWv{8hPcbQoUf^UYIFZ?a@Y z+~LR9hi1E$BYKulCivdG&gmxkrp@KXrW3{*uI5JjayQ2Bii$0h{J4CWcd%ti;v z+$EE~OiZ$%mC?TH%6nqf4V8{#$X1^Uam{(LH=)Z!;ogbS9k^5uY7D^}fi zm5Q?}l5zoiM0}^GbDb9Eh5~I+c7^oWB-Cgl8XC}#B(O8**1{`2q_pg|#&Or-5=FI4 z5CJG8Z$R6wK=siw)z5jrdO9146D4*bxdqX0qW(4|MjXiR2$9v}aUUvr^M0EJr&SFNmGDWAMRZ}gYn?3e2&*ZcD|c+ zT`}lS@|}C+J*OHNmiEqrREUl;@HkG;S%f-nhLVNzqJ7}1+W=DdJo4O2z)2-K;tyr1 zb3jA-(`?Q4O~^aj#7=3A0+@UVZTc%K_U4*xU-(vJJfxOn*h`FZWLMpJHM7q8NpX53 z1MzE%Du3&yXVE)Knee_eRTR>rdu~UTN>~P;qc zNHJL)TR}xy>d=*Y2U;*+drnFL?<{~cLug)JhrLKEv0L^9UjN=Pn_I3yRzL8(?xf;E zwcL`LJG)=4W@ow-5I0KYOyaRv3mp1DF7vyKv)shry}TtH7Wq=kW~#04A#EkZz6#z`aC}ih0z}7%?2_ekR@BO>{2%t-Dk!dQTNuUN0>Rx0 z5+FcucXxLQ4vo8qKp?m~1b2tV3GVI=!9BP(+|J%#viCXv!(Vlu?!&EGMNz$0ui<0N zIb?Q;Tfi*+;&MU|nb}SW%U5ptw2erY`cmp$PP-@?MpvCD6F5H1<{d z-HI2+Yu7VZA}K$k4z@B1M1@?ks#+06xs6b<@PIlF8S5A}OhIp=XK6SMk0D8H(fgnd@DFp@;1e70@y^~neAlw%MqL*#D$RSs~bY@~n_0!!>QgoeQN+tLz@3(eT^}$#GTxG1#>wN|}0kLP5;}dZ;N5?^Q z_xz|eiA|am znnNnk*z5^N3QcYD^4SpWvA*g!5-6erVI_?rXdKwgpBeWK07DQoCA>0{d}t*-RcXZIiam3Gmb1x{r95|I><@8b}ofy#S|gO1Ym*yXRg5 z(U`xs{>U0+cvex1!3aFSznyrPSmL)*$V}n)^R0b6SZHkJ%^&FqoGW?No<0Sy05WB} zTm7nZY-2e@?UfCcc@D9jBHz<*ei_ivokW<(J1*8D2$!ch=GwrdU6rh=O**!i(1EpP zqWbVO5WE7EdkSqXU_yIoGwUreH=d(@rn_0FhehR3!f{7hWVGq?$^HJA8=R)7jY6w` ztOCwTF*op^pa<%K_r}d)_pQPhlVTz-Yr1kfi_cvv9|u>twHo3NT}IZ@`XAY7t=bqx z*cYb)WXgh6e(9!8w8JT0(#{Uc6Ntx$X);oVXhfMk7R{C>&X(Up53lFc8WusG)qRda zY@B&4TAn90=H-}NFq02NX|Seuc9kz5_8bgx4XN^q8Nrks<#;oDb7K1*TIZs|d`igo zG-(0)N&&SaEq%$iPXDf2K;2X!^Lllq6ePW(dE`gH#mt&a%8IzA7?<+epDBs8&tWc7 z_L6s8Hdi=~gC)7QJ4bhv{b6bDL7?%K9Wu^aWLmw`y8fM2W|~7q*=hVENeg_#O5LYP z&iMIQ7r{Q}N&0Fo>E6kk$IlDt{pouJ&8{HHI^ z*SiL@{aMh7*UAlBqRyGRc6a2>A5V;4T~Qw8%Svz3(`@v63@7RdUgSTYt5(W>o2VtY z3BBV%l#_R3kDb&ToE5zK#Cy3C=b}4--@JCv!RStQQ#4MaHbhN7M)BsN0oZfZW(>0A ze2Ub51zr`uYAz>F+uCR)GlpxV66W*^*V@KU7B-2Z)>Ig$_h_!&eS)OJHH$=dHLS$P zwYFU>&)@uRoJoL8PZ?2JaMUDH)6I>22EY_z%=aOy~ZF1;Zg>Kj++65 zx)(0CPsh{-1qnx60rz8d)){PZs>zm$S-aiC)rc~V>2dYafGaw*IJ>>202tGPi ze~4|ok)&+X^Z5jx-z5*kx7ly)l=J6W)ICg?AaZf(rd&b=kDv>Ef;rL&B{rQmhW z=un)B9LFppKKV|EQsDk7R%j{T`fO+Ly(KEL9D0q{>m(Fg<0aGgv zUZJckM9*_VL4|8D)k$H0aw(>o(-Y9UV#FFD#5pzmB$VEsIlZj>ds==L9G2yuSbS82FZiA~G^kDmD7UVfj7$|~{plApa3)G#FP_+aRISyD@sF5M)4{H+r$e9~?cW&3ctXt> z=%01GPJ-+Qh}|4`IjnwG^4!7z6^q7GUk!ZzYEB~)Sed}{UUmPdV52uwd@_@xpwCX% zf%nUE{*3U<=Xnge zP%v)Xvs`fhFf*_hrvH7$`OA5jWFDwW5M;a1u6_(2bN#Eaf4BF4jERco*DM2bt4)?4 z-|}n!MAk9S`5eV#I_h7`P(lM!;Z*F1@&{-bBHu^%#@4guqFm6c$L&LdfWMgoZPhLzRvwWK?J^{LqNQ~V*2gy{5fdkG!s4pr|)5a^eiB+ zXUh9rALFe!402;{6t#7(gsI8$hNWC?7y(wKM7$+?O|K=U=}G%kY*fJYy0{nbG9*0F zbM^*N>p0VS>v_j{t9P6G9ihN#X7`AIA9Rk}r1iG$i_mKX@ce=Z?07=z4*=C%)} zY3Yxx8&9!n!{ctiW)UbVp?|ItjA9{SN^D!cP#CFze_dpBfysi|6jc*xTN1Co-~|(r z&TusUxd(oNj05T|yUKQd-QSO(Fg+ZP#5^s=8L?VTG?LofSE8Y_hAjVrpu)(8fQ`Jj z^$7k80tRYAk${MWW-NpL@@Nbw?jug4_=gfD5(K45I`o23Zz@PkGVm3MCAB%^m}fQ{Cr$(3JXbn0 zveCQ?vlo&~^Y8nRm~;#+Uz^+ny612Uyv>b2Nlhxhj(fAb)T+q5UHf1i8(-Y(Ki0`L~GJ4hWBXOh8$M zzVN>B4fkI;Zv==s3EI;*l0)BjWLf*ZPMu0i86mK--T`rRtT8Rcp__Z0$BRQmemJ?%!7+3|OtGAbNZLvU=!^hvmKG z2iF@JXBAi!-iI^ve3=3nbMK+ONT&r}Kog>0oi}syX++oA$Nx4oIyf$qrr}s7u}v`HG+nY4KB8ei~o}-YA{iB@A@hKpobu4 z^!okthm$W=p_v);g_*0)FPtuo@s$1e2Z5rYay~=aL{Hhw^YdK)(y)WSti?G#&_*Et?2!gBUzvsqSB;P65jSd z&IAGxhT_*(G~QG*RNflJ#~x};sp!J7Q+>U^iU_tKuse}L!lnGrSEyP19#lMSuYYal z0!uc8vIuS~wr`!`|40X{&Y}T95+CaElK-N}68Je3eT0SBO{KUleQ_&kW;{mL`=-X- zG4szO0WfhiU1g~xbMTmcCq5x!Dn8VxYlv*|7mIYGGNq7cRc>1OCnIANXTQ=@jWeK_ow2?QX5GX zV?3{a-xwpr0|t^W8oeL)$NZ_NAdpeDtl7)r87B0fPJ=J-|DqfK6NHhI^ClQ``|k_# z<&1K}@uN$N9YtRuom@9w-WBy={guDe4=~j|gze4*5(q=GLN{d$k zisj5%@cw7~zeY;O`}=j7wL6)A5&q9VLd{@Lkhl7l1Ca1XCja(#|N1+O1~$u2MU0-b z|546A!IVnL{&y7O6dxn}$FKh}yMINb+TRMY^iFmi@jpuQ7i)rF=b7@@Mf`qa~&sBV%{Tjt4QN!swr8Z9J!J~)&%jB`%n-Q1D44|iI%p4)jYp3 zA0W7R=FT8`rR2?Gy?g8EN}Ha}ygeiFaYI}=61Ix$SBozqWZ^9j&rA$*zCN-qXiK%6 zioZjNj(Vw$epO5Tm>ta;L49J2x=m=DKTRArSbSy*+6l)Qd>kNG?Ra1pOVd^Tae>ts z-1p0`JRrp+#U$3U!gM}h*@~Y1IVa=nGh2-V0uhgsmLK=gJI)%?+sE!-0@Q^j-zb?4s&@0Ik73qA6N}iUrR=pt9vt?W~6at??O)FxHBE4$yA_rcON2j z9Kbw~A`NniyEiNZDbAlRpR6;Lv-$BK-~>`rhz?;P)Y)gb8%6U8JbdDIX!_CqW^sMC zL`T+oPc0q3|2o!k!twPgUX=xemSR$#<@5k0C@6l zFnM?>G2i}XSw&I4aCz2>#y8z1>7@K*Gt68|P8X%Ae#D47Z&&*J9?Q{nto9Q@s^xc9 z^Bb4I66$VO{`V{+m-nGT5I%2y##p2XiM_3}m)9dyt;Qbb~kwdz`hvHA9B}&8n(7CY}t%%GBa(%xXn+@gp%QQ=m zygP@8B6xghnQy;wW5Gt!)?556Gv&wf=Q&5CUD>sl7N&HJP392ZHrgV8m^&yMi%h-- zI4`$6T<&#G?Ja-Sxr>$M$xLm6z@vNz1xsi?G(JJXy03OFAZLBO-?ni z#lxbWNBaFK-=YBZ_CoT}{pEf%c=p4D&vf>XH-Jrl(!-&07-huUrQ-w> zGf;NKW6Q zoz#4Z$&oIy-9v(vQn{odZCX9nYk%e(b|FAyPLivzF=SJSOwdVk<*o^lmm@6P<7sco zG?JDB$$aFWc4_EbM*Yt6N6%wk{EJ=XN}HQ#^R}8stqDKi@N5ORgSEtVY)FHTM9^h* z`S6^k{x5p`eQ65-rv==kgF$2BbrT7}Im(X+iDN{e+MGb~ni<{OxRKTj^^e~#Dt*AK znO}X*CTNvYB<&XFZPfg;lmdJgkEmUAO3p_`^|%h;6B=mhP|Nm*nc>zwOg?1osFSZ7 z`8*gC8yZ|K-aF}5OMhtI&XvsU@a>YrVK-X7yNXpj$d45d41$A2eOkJy1Z$pYUsToT z)7v}kxS;-l5!&YM%R}Y0r-U!ScK3C{xXzrxP&nAgO}z&nzcG62F7l)6&W`wL;4Kgz zFjz_~?#Bf7X&slc)C0x~W7+fCn3|;N)E7U;Kktlc_^J-!OuXCu?ZPb~yX9KIVJFZD zzDS07?Bz$~)28;u9k%f5*Q`Vl;^7ZAGb&4I*|d??vx)Hp?)2tweuib;?j=<14Xu3Z zPjP_*9x`m4aM7KzpTdBJ(lP%vQT5YKxRtr4^vARlzFyr4O$*DrNZ)>bIpJ*^;cdwx zy^1K;^cLGDicj{-8WHI)cNx)gQ%SP`EYo_{(N91g*APGsJe4fJ9WbqxwXz&QFs-j#4}!H1DKk&T zYvZx1>332bHhfiYvB0Y%k*$i2I`KU_19HzNciHv8wc014j-4Yl;YgcV-rLTOs0Ewy zpAa-}kOY5x-}}7%x<&K+*-umEsfB6vl#Ib42_eyWt&++9TEjIBL4XCDGEI1QXu>eD zftNG&81#G%Ak;YP6V*aw0F7DmpmoyNxZ5RExdCEzxaZ_ObLCoR0j9&Qt7)m1Q`2EI z3qv2%HtR9k2S*yKuIcHp? z%w|kK!EA3k){Z-n?&Z*hw>{YQ`bE$#f`2-@Px?@m&vx-c2cb>|9IBJ9uMW&V?qFA` zPV&O}$PP?>{Ghq?y>mm|^VqDxt1Dy!8s)B=_4!<@>QfHfyZ$fs8^D?T1)=j!(Y2(& zGo$%_>N6LU%{eYkf@R6!*Ij*eRLasJjC%Vv<_pI}NsY)%diV$O**f@n3>c0q|F!J${W?QcsWxsTT5gn!I@=)s0`Dg6mbe_JUY4ph`^r7`m{Ncxn7TXF4Ykzi z6}Jb7e0#P+BY9~o?V(m{1B?lO>{KBJ8CJEMsgKF0oJ@&6?M{w!IMx5kQLuBGWUT4% z@3S0xa&q?DEu@uJnCWeIo?FhY$oM>l5^_wkTC0S1s-@~e+PYxlcP!J#!&G2mFI%!& zU1qjG=M_rT7H0Y;ifA+$+9rH`2-{g#dKWG;JiD}WANtr_s_RjE*!J67dLW}~lD=?N z`b>SaniajYltHWfTbx)h?rzX#;4sRElSSIf)wZW0;JH;trt!5rt5J09D z^1R{Gqe5Rb>+?pFMeFROyUq*&DvW9yaK!Gtt7{|BMHK7-6Arh%zunQ^Z-SbgUrD-v zMF${lMV>huYW117sy%duql-ZKY%Epgn)QidPVNmn~>5+12lp?8(x#>4nne^?CEmvni-=n z{B|H8HeKT3CXK|S!tn^3n}w#5(Ja^8<@Y7?V|OI0%y{(sJ{!vJ{g1zPRU-gzA&zJ2 z>j1RsA6!?eM}S&^M-sD#-c_3Oz;~AU%H6-ppN7JvYi(DSgG+KKd@CQj5AsyYL%7SK za3!c;N+t~5uN)?F(#H}i&!Qk0Qs!suuX&2}rXS?+XXzcGe}s{`gh?!rf$E=HvPt%H5)wgb zPalsdl3fWh-1_LdK4FsFA`xIrD?Jcac3AdTWZ?rdSQSB$?VYLB#-Q_N(xEna@kj3lm=|AA1lfN=@juIB_H6I6TFp*i6l(+;8oc`?&ZN%yu0XzIIai;B4#ntVchK z!pLy6@tvEd);XqfgOUW2Ws>3SFlic`ySG)U?OVbY92Pqpgn9Md6hSMF``j7`UwHHn zZ?`(%vNxSVyrH7IOm9v-b^;}vKN9BRF~~u5aBN zUQ+Wuezs#z$FPl3!FTF*1cPeItYd2Z+Po~_b^WJfkc3+sgz!1i&HiMb@{@;h_^CU= zV@%mA*Y&F$Ee^qH6%*b&XHINi#qdlATWGn;bI-tV;jk~p8BP={?+<6kPB)Yp(S&bd zOI9c{>4q1^4QCHm7U-Fe!Kq=!!^mYkUmfva=w;O%n6EeuW%DfU-*g*7O+v@%w?Kmn zGh9H+Px$-`#;_~?(6h(zA*)H%@ax{ExUPf4BE&5}QQOm9;t0JA4%RZkhZBv^q`^^P z$}~da?)*F~%q|l3dnt>=1c)M&rdcnmCIL7oCXd$|4V<;xJ~a&KPzXcgpbl=}F58@7 zL&f+JpW=@%a2;4RLNk8qaKGFw)Er(1R3!)JK&rnIgJP<U{yFQ zLC$Qgm9X)vDb@i871)+st8xPS_kJZhls`3mQrSHTr?(w+q`;z|&~)S1^%iaEYqym~ zBJk*JYxgK;R2Cj0WV!Pi%dlS71wJ)dlpEGn(xaio(ql7tE3&N&ArYK2J;>4caj#^2 z+vsNUWoFahX2lY0#5(U&E-M^LQD=h@UCRi#BAhF?2rgh+UO^@mRuJ0#6=HzbSqmGfQv{N_^?AwAAwK~1)S{#3V_kEX`(LQ?s2}DH z^dhuzlyA4FSBy}rT;(5Uu!S?BmuJ#@TeRCU(u5OFMj}1hkCdu0nL`T{NdrpQc1X_O zu>#%Yy(Q4f3HC`uTd-|Dsw02qaZ>ZPMs}~l(t7t6nci0I_&Kua;k~plG{xu{KC@I{ zZ7sA|Zg@i)QWU9%uynZvP-V?cY*)$f5GioAGEn5)~7vGs?<0Ub5a04etZJ0C92CD%=aIAhkI@_1of2?RP zYFepY?B+#)x^mpW05$HA@)qDP%+?55akfTc8Rr@q}} ziYc3?Z=SQ3Sr=%)xovwQM>3oNwF#Y5I3TwAjD+U|wrmh0OEyQzWm+KMEH=2NaD{x^ z#&3i!zpgy;QV?OCc*E}S{^;U9(_){Vzaze~Gr!M3o9QTPMNh;TkXy*4 zQdSY4g% zF+Q+|L9e_v>&Zl4jF%Re>Y-HVam!h9$lCj{wTr)69ndUCI73VC0UL>=g9vG(1_yUk zKR|rcpu`YCBZ0A6JP)0+ZZ|tTq5o1W_k0ku4=$xi0R&w7K+*4AqrcF9u( zf>1@%l7vMb2hN`@Q`Oz@v-S|UIfg3OBvs}F(y#D-8#rjS%Nl~BR{@r$yy>MKI#ACZ zDYB+Hf_d(Ef96tHKe>UM^$sS#Qzi6UTSby03!8X=;deun=cqT4V`vlEyS!1rBvES1 z`4DOvn)SJA9@)1>Nk{t;OXoi9Fhn*L4;2)UiVQ%pr#bIY5$_;ucE9{YEd4|!z z^ECC}zg@P_1Q^Yi)$__)yR^?Dyy1GCY%HK@N1~TuZ@cVE!GUg_`)Txg`9}Pr);Ype ztRN&47WEQrKL>sw`GXs@BaDQ0!!|0sxSzqx7OU^nbuxb1_UoPLvP#^_`H%B!HE^(gckK=q{m!+#U#Z+nN4V3A@RO*~f6od1jMW`hS3{$w-vt+)w|Mt>xb~yYl0%piYhhut`xyJb(>3k4hDF;-yS2_T=}NO zggUaa9~7oSUc|XYGcJ+UuYr(d>spW+RpTguHBqLd!L{2RZ}!=ZvAxqJ=-3}- zp{aOV$h0E(Mhmh{(QJi!t%ci1rK`J#(7=#o7{OyP+E`ZKmyrT=)W{_`e907c?HGAU z08Y3oU3zGP#0jw!nlwW_s0aqaa3}K{>TdF8R(ujNomI7Yut5_G%q-?(2Yo`65Hqb= z8!z>0aS2~^HHW{|32>e1X~7twn5P(D%+Fm0m5w8TL#Y~a%(reigK2sH(r^R-({oEQ z3U-4m4|YYcOor6tkFjn!_o;dHLdY~rgn)&^WA){pU1GySc#LhC z>^gc>V=mY)7EURz*$ehU-8EBg=mH_S;GHL-NQ2U4tNS@CI9P78z!T4(t~Mo6ZoXcw zD(XQFEABA76Ncs&%dXNjvPVxG>#BH4vnYwvG4Dmjqqu(gIk5wW%}>tPhb+vfLPR(kr{LgpQm|ThwuM`$0GufB8&p|CgieS5yD=i$Vq>8cIwu@R0&c zgMh0h6sqR!FsoNKns%;Jdu&4j*!q438q0&`Utg5-_7_kWp8H6L*O!=En@d)UL*GDT z5W@o*r^)frxzbf$r!DFs{hl#6v~X=-cixvwU$U31pH9Pws-nrPr);zc(AV0qCwOgm zh%|)WPiU{-OEwt4d>N8V{AeL_3MuGv^%ejtz!!D0Lax5GLNQcrx&zQ7AHa`_H^c19 zJ=gsxay$BLF(uGIFhqRGP_oM92Tjx=?B;XntT|hpcj26Nj}Bi0i+YbyQa;Z-F2}@O zp^&UYyOEI;9bv+`vjjFHmpEbCvFxI@Tw-?dc~y^&s%bQ01hfk%Mr~F`RQ~aW(nO&) z2vV5nvUsTho;QB}9dp}!l$QfM@IN1It|VJLg-!P`u_fYB-Jk_#!1*0Q=)k*yqs8Tk zz&3rLsKpTTO(u*D(1U#V^2Pw`=j<9~wu^gmu&R<)BiB#A+90?P#AnT8Cz&uT79}z%M2RKJ5(IOt#m9F+Ju?$<@7FF?EY~%p!2>CR%xGW&Ta^P@bM%3D zJFS?Ar5;*C1EJ~Mdsc>0gL>NUHm-z`dRw%oF-F)AGgXNq3X3d+re#4a2u_SI!p@z@ zuKN%_c!Dg7cPb|k4`l-%L73RQd+0Fxn(u1$@O*Ebt&_ph0|pVCHQel&^bUvUaG!E9u}P&=?O1t2bQiN!9;2^)q<>vxB0w8OvP`SH-ZmBX-ZfMp zUqQr_cY?R{+(S^gTV6~P&CZaVSHg#XHa}~x%p~)k;%?r(gH{slE%#oT=cRjr&cHpY z))P-jW#~IgyiTg@)e2ExBsn+h+ji+v*A_D4`P(Wy%GPM@0=4ory^+0%j{U_TI3I@` zKBIWGisMfzb$1*sSqGX&={~EU5BajC-gb72y!xPxYRRTOHd6&K+J9{a9o$L51Q(iCgEeG=3s^*@rwLN4~Ow)qLJkqUi$A?$d z1sT+hwvn53Tm~`i4%RBoa2V8C)UCU*i_|yQD>vOIcdJ{2)s;Wask8Ews&X9tLT7gL z8c=SLtII>aG+>=+ezVY{)u20U{Kkpy2BI5n@y832nB66GeKeTgbxaSEKq^41dYBmX z7~y9-Dj_uvy9h~B^MoeQNXb3eDbyD&d&&8JDu1sEa#9T2~!?4WO^`9 zJc(6=C8f!CA|>h@JCDn|g&D)TXS#`w&a?&fqdyrYPHsSB1|71~vlY8g>gBbEfUe#5 zQE%aS>4kf61gBZ+Zui!fZn4ZK zXT3x37#pR*V(w^v?OQr8%g)XvhvzIK&9#TjbdvzB)L&WhO3It*0J1|}btj9LU-U}m zUgu4eS>gwxV(-#?G2W6fdi3mR&gF`ZAdr782)KLpSkc{RRu>P~-*d3LxlEAKZYdJ=H)=u783RLb<-W65ag21vCT zArmNMXni^(LbgdAzY62?*qCtm$erWqbDU^)i8_UeibV_4Al5rM7E0Sjl_CzkWp?vs z#{%|eK7d*FTTWmUq*e_X6`Rk@St`Jw^*hg8#Hk_e@MVK1?{UG9NDoQr7XU6%IQ+#I zcz;^|tV=`SS-{re#k0dwP)Ig)aSysIigdI8s`FW8c7|6ZTBQ9?`xOT2HMg3oO#8J} zg^^a0keoNJ4-%Oj$&AM z3Ahk*Md5EoAZ3x{I8qLc4Q7pvinSFueOD4X+aFc$yM{F3h7Hvs zX|y;`X2S{OZZZt$u1EQt#-q0%r+%5=iy;vx6>O1Rvt`cDUzkD&egNLIcc;tOomMFI zMI;zmybXeTZ-tD=!qMkI2>7ZqS?#UmcxvN?!f{Rv5SU(yqP5JqL}b1kXMF#Q*GadN z!NsekHz+%vG#&jp(MNFe9C?35VYg%I5Cwnw(TrAm(U{f{`b_F;4s zI>&u^eMQH7$}g8DcOZZzvNV$cemtAHI=MyTbE2x-g*x=b9#q;=Z5EG zW^X;px~oo&nisII@`jBJ=SVLWA194GN8vR{IX)D~9{dJ+`a=5Mdlu1HtC6aa`{VP) z^Gi7?9z#DF%85I)56iRdj~ihH=j{YI;hv{6bo%~nnv1gzi|ZNlLG*F}sldJ@>j=~E zrK_vcyrhMg?0D zl;R&)+-(cnA5M=ya%SCc5iz4!Bp+al_h7IY{#ZrDHm9p_cJ-37u!O0M&I%iuU4ZVF zHn+YXsn=Y^r`}v;{@R^6+Mb`@0TPxjgr{1Q!0OjEL3>c=$>wWQ%Db!;w-^pItm4GmXQLZ~Z;6{siCb z=MB^{b~ha(`$;(GrOU!0=lx@kP==W(BRE&0{yL@Yk7KK`A2}fM;chz=`XUA?sXHM44#C$tCEuTIT*tmx#CLAKado1ZQo`GV zUG0UeQCrlZ?iKMOuC>}JonzRNws7ja1~~#G_Of}&Ci@+J8{;fS0x z>}SBEQz?r{qfSk>xlp)_kE4`4jBTs!fb8I;SE8ULt~e`LT2&ki^`?XRl|lSxhHO(PH7s=z2j|=xgQa?F za-`mBMO$K0be3dse(9IVsL8(H)}5J1s#nmYfGT-saWlD{sob!B!YYWyhyJ8Z*ho5t_D32-4F-BGngF)2;=7UqB^JAtpwRZ&!mSXxH%0+-W zx(RSDRD{l#>olXlbU4{oml6-2fI-OUpn%QG2nUnv0)ANOxbVwe za+JHb*4ESZ>>j_!m`TfFUKi+?)!+T3F+1(vJ zh8*q_$s(qN!Cu6uWo}cv(xN#j9>{=~0uiXaob-kX5W!^;t)eec_#6y0k_^3lBZJCmJ`>zCcFwP%OG6<7$0=Fpe zK4MBp?VvQ|!$z`XDY6UrmjuE24Il^;C=6BCdpm%h8T(|glHe_8xgk~K-OCrIm*kol zQG@!y#o{scEJOf}P_R~zB6wZMpbHlMkI4;hwMhq&53Zsaro3lj{O427{+Kf*UebsJ z27A08j5MgY|xTJ{pThbA<#Ojb@m_?didp#{f&P+HYNr z?FkL433-NiB2k!eFI2nl_0{gTejupIn)rC8h?;iGyc%G$XBdU{lUxmEPq%sN0WJ(b#pKjTgeLLvZrcHii0gcR6o~;cwW(;wm7;6OET7d! zPGpHyK4Sp5RAz(vGvr5R>mNwpGq zmts0s#a-N^YcymWIvnXr zzf68aOBJ>S1vX%V*zdPFttyRagY)0%wG+1|y0_akW^o9N`LgL~xg^%h(c<33rRoo5 z#B4@0;kjE)O4uJdTLrOo2(-}y+Mn$n3D2UQ;5G>EbdMxzHDo^L7(bB}mz)c7Nhok^ zW)hFoG7n!yCN4}mI9;N1nhzPiUxV&sV&O+-Fe;VRvLZ7o9uc)z zBk>ZY&1`$WSgT@?1`3p>NUXUFt0dNVbYoWjuuTsA22EnIc(sob4PKOB?;srVCP2jl0M(FG|s;a=%MKuiR7;Hkqbpct#L z!Po3(r`+Q`)`ZJqWK6dVIB@6Aoq{S8yapIqC%2#OcV1g&B;ik$K{K`{GfFA z__9`1dGgSqOp`m?#vwcFLLRyIUwUg^%kOrz7(ka1gsQdO42Z(eGbQ%E`yboC=NjBv zXTFb1^Q~00#C{W!vsHj#6A3EoAL*bbmEKw(JKH+quT-(f=*nQ-&TpGSKPadFHu^B9 zo>DT!(C|G%5^Zl}9%w)B623@v`Puk=%FpaL9QJMx0mm{2q?1vHgwg6MvXuUkZOeO4 zp?b^EXQvB1qC|{jJt)dafN~$Nirct`@~}knT;Hma zPlpjV;^*JYj&hktrw=vfxOqh;@uf1_jC9n&^0h8_v~oW~dCkwkW-yW9-3i^w-T8+hKTU7# z7Pu6@55&SfD+mgWcf%e2EA_bZ5_rf2W;CJG?=6j%=Or85rxAg7q~=7gJS*duYt-9j z)5*@G<{$h_z?Iuyx==Hk1Ne@M+l#S|;3UHos)@u}X+!1;yDY+|iPEp(eB>~!Jq<;X zVxNc-FN=HFZ!Vw&MG=NRCIs!E&Rl||i*`jVD}siipN?ls-UY{Du>ZSfiqFnTv&3Gv z#QTLZ(23wn&Xyitx5WCvM)<|{cV33w#@H5Iz9S-qrd93->386tJ*bjPbhD5>RV7=@ z$8ke>O-zrbuYi|6o$*W15kVZM2lu3v?_&YYo*V;IkG$d6hz>y65TV>{YD~J>*$+|L z1HfGYU(VTCa(iKs(z$ny+*BN%>BgN=+%R*b7DGf&|&T0F_*;ovK$!h-c7B~86%UZ7Yw zPtTMN3UM|(Y}D^AP3L_GYd{g5{Q5hWB#4uI*%%!pftb{E+B7gR0V@U@p{q!<*iLvL z50Xbhg?-)x7qqPi=u+POTy-lNRsv<){{Zn7lQN~?QUHEcXGXZ6i7lYB`L=T6F0_Li zN+SioiFsBNTKg#olbZ=m3JDdQZaMo|hHO&F2!iE(j4%Vpx)gh;_O6XPJbix({#mtU zHf~AK4w=zcct69MVDJW58Nbv2*Bz#Y*W~Amb8gaRY;{3oRK1NXJJ_DYGak@m0wYw6 zMmg9sH-%;&ImBs^TsT$|Z00g${izE3hK z|J{IjikJ@#fv0urHqPvn-p(jxy4^f$PhFm`s58n!-lasY&+d8wNoI7=-WDJse)ftHH;YJbRLq@$WZXgD5smmI;V%xeDv&a_A!U)i9v>7~Fu?aux7|NKowaopT-WuCytB5Er61VgWaTJ*y=1DlT;wW}h!7 zXgbTZolKY^e7xbLU~MH*;(c$4m@&A=>2m$&#OQ3#*K}zGVdfK$7LA<8r*JgO;8%5j zLvc3*3cqcOeR~6%*HqP=ZK%6&$4xwR5@yEf5`otO6Sxd%9`e|n8fgpU$8S7e`xqq= z?3~h5QzKc@<#Hha+qzd*KACIF6yj}5dKt%??Z4adj$GUbM=Nm}2=S3&M}J^4x7B%+ zr!K({kh47B&)Vc`kHKB!7Pb3-TR+cR7TpWZ!B(d2*uacr{@PI76!;4I3{c$=DtTak zhbU~iztdUD(L?1dtL~yRwan9AY{=!c6SFhnuX{<3N5tf{?voj0ozZ@^o=|!kBIOn- z;U|sDo7DNscZv57yN+2KInsATawrS`m1SRJa=&D~Pf^;fVUtbo?a0@cLbr$}NwnRH zkF5;z)}^`qq%c1vQoiTIM6zTn?!VX>;x6J=z2EM2Px8?Gf!_OW%*8yNtMg5(C}cMj z%rp(h5wA26Q0$!39N<_OA2Rc^)ioqOM>UNXaXBe;(LMRLW1AOS1AzfVyi9)Cr1P2k zW>Bp#RKZ2@cf_$->CxLn0KI4GG-^1~DxqFaF3kx%e+75WuynAr&Kl7+^-8h zT9NhHdcoY&ZcAz@Uv-co2Q$DJ(^8EVtBsU(UyrM_t#qY%++1d4ps4NSWB(QkB54qd z=;Gl@BK;0lB3z1l?!_l~waLx)C47@jqAM(R40!XrU%~;CKyBRS#uZOi?+9aUNQ76`#@jqj_*Nw?50e$;(Ka&Cj;T}MWjzc1<+w2c!*;GM_z}z3QF?K+J%j6L-j^al?iat)G3I5H@;~Z=10U*l0l z6pK7stCFI7|HHpOO8IUm79>9L`Qmu$j(2<)^ZnM7$;uOOWXkk>HUp=qSSe3~%lhyQ zZ;o{s##5QE#{$n1>ocx-sZMXu50$)gU_tFT1Qs(9cax@XpijBTAi)s-74AiD^OLO1 zB*N8$%LnwLrF*!X*eHl_j3MSD78d($v|PTBFF*XFven`J_We^I!*vYwrCS8;x0a0{PX!&0&@PTbrgIjPps5YN<Frik|J(!^hL170j|HUPIMJ}v_KAD2#1C44;cLkb`SB&6IozC+z~RjXR4 zXw!}~`i9ZjOUTM@@% z|4(~o8V}_c_Hi@F7DM)Z&mKZDmTV)0EZLK+$u12O#=ea;lzmIo*!LyOkSS7Rr--pH zAzRt$x&5Ed-_wiUKX3l?>iT@nygBFG*SYU=&hPiVE*o@uhDJWpnDac?t+bCgjgkm~ z0HVLCG*_!Iw1!SI1}{;|v6`qDL7&iw9@5lv8&$gz<8STP-G%CSXfMg z1YRbZOfS9b0$bl3?^p8dQ6+t%^v2vYCl*`$rjgvwN}+NYq)JR<+X>Ob4&I$O4f{x> z)J#1{{Z!8vd2K6uxZKckcLjmy2@O5295|lEK|n>;weE|)!QNM4{mMmjx~!x|W;dA| z$|lWNRyLSYFYfnVF@4Yi=!SMvv1!_`iB7GR%%Ccrl?NSiy+6O;RGeOZ|D_{-YEn*T zvrnEY9&JCi*W4e?oS@dq6?$R&BvE`y$RwkScw)q*Vw8$f7XaVVum0R4p-2(mxz^0X z@^MX-b7;z*nDQ=T2PCrhI=2B1+y`;w4{n+vYQ5pVIy{O9n*9yR*`}rUYE@%&H+Kl9 zeHSl_i%ZcfTi9lI>FrVYEZNiP?N>TAX!fbly?$JyO7+RKMeOjFCB%)+CT3lI1_poJ zsvmWcJD?3u??qgszA}vfjyxj{EFHRD`%kl8VYa%KViiN1r@^D^fk7fe70}R?+V=p`%3ueS$zgoy(~jVexuv&H)hv_lIpP*fz0*l*mXKHL{_ z`qC@Pg9IF~^zB6|<5qTo+A+eS{0=*Xbat4BIMx_VF~b@yJ(E9h3Y478h^foD-$xXN zY>EPj`DH@cSq>14y<>Z9-zC%F9JARB2xiB_T~~__1XybUnRQoan?6Z|-5s+qp1$-< z+70Q=uDuI6+Nk4Fm^rpOSg{-*I$~k%3C7w{SL<(d=yzg>k9y)lM7FWXBNpPJ4lWMJ zsMnVU0dbU~e*?2CFQ*l2!$`B50qgypDfY#KyM=hsqa%hwca9?9LS=$jbd4GYT(IEp zL2mUcX990qtwIlPA&)b`prDtu?Tm}<_V}AuJZ#cr(wF*1xEWD&kM3ac#VBo>YmXOkMwc&97O4iUnsBD7mf0%Vi&v5Aq_fb#1L+&`VIBo1bV2 zPe)kwQPdkwzVlyha=U1$8o@@d4akxc?qfhS4xNz^+vCScm;L(jY-K*LDzeBUhzJNE zkIq0NSps=i@_4ArLW!bfOnu~Ujyo4A+|vZI+QLH&D7|PRaey`I^_n}FQa)yP{u%!^Fz4?N2V~o>N zWl(3z*|j4+%lD!7ghVvgNfeu2d`QzO*auJ{M8Lof5OQ}E;Fv!y>VBHiuwa5- zuYzr)n?(5BZG%Tk-PSqElcz9hZR(UwhFT>$yk**8TLboDnrcIE^lBGWB+kxw*4K@V z@VC<86tF&A6}Ti#4Txu8hdy5l;gdmq2mPcHyQhd%6wwZ1$tI{7cnJlk?=BV${E+Zi zi)GaJZI_ibxJmjs5!e_V8Z=sYQVOUW%lZyBrrD6QXv&ss!?)Ssho9jeCz656EBpx@ zq;WKs=(*_}ZlF{_*c|q>x8>us2@=^0UiUW8ij?BkNop{BK)XpL=Q-QK=bfL4fH;o` zIL-~B(wxX>_m>`ET*|1jo0jQAAi-viX?kpJ{Da{nE&-kfD+pzFzQcIgpSMIttIIX}W~< zsKW;@wRdZXDR_@+=*QEgRR#nk18*c=|D{R4H#{dpZ8IbcCmWs*9KJ~8>RuGeYw3^<=2KFdCz)fE;|MIn4Url439l zYl~IS_TD^}9X*D)eziPdfNLEf!+%a^Lke>3F09?fnHMC1LyAie73Esig|OfV#WfOD zICU)560|y%pWkVEe3J%L8X4wXK25o)W>CDM<@o*1#YW}!s+4ukNt;n%5HBa63h8@+ zPQ|F)%$F_1uXn9EoLM0(#%>ili_}LCybxXqM}E>+Mi#ea*)ecV)d}01rfSDG-x_HM zE7bnILtcHpE(2X#9hl{iTM zhv!2*5;PMf9Vo(E)v3Znb_n;Y*hgFnfC}z2LqKCI2u|Lk9_b;$5^qBkkhawu6=78d zWyD79ySGEkgy9)gL-^Mlr+_~-^&so#WKd7$&?gXBhp}Es!4e6ZnaFjGE(yRK1j{T% zed=75^|tBRAehCKy!OV8KGKLyiK{P@jpv%MA!TWzI!jsg_@$t=KpTG&7FoVsqo=}Km>kJG?{_HYud3#)|!ho`o^*)i5CN^`d35A0~KW;on2({52Y(` zr05c{$%anB9f{wa!zC+)u30ba5V-ouJ__C}_cL$iJx$Z;VX{|QXtNMGzweS5%Vtn~ zyYBc?H4oB{49_7|w_nYsX>;g534Wq@DnABuD6dee48(k&a+y)1y2Ufk8IP!XSfR#| ztcA;3M19jZs2JszLZ{2)ZYQs!YIh(;3%bssKS9Q@$s>zRuF5juyHsZR#&t7Wds7dm$hyL36@6L}M-8RGBZ;Y`ELk=l zv=MldY+PQEQB72!Av}Yo3c0GL<5QXP(iv=B?IYFQ`MabVIZ*g0UZPKEWr-F{5GgwA zOS~#_%#pcDog{%xsZyTE#}ODnLLFE|(bK+yIli@=E*nQ6eEr?>E}8NHu4gV9N!T5+ z&Lb)LTQFCouPsyQm}wRsdBh%D6R^I@fF#LYa!Rv!YcjIu4c|%7m=r9z+PL#{4Jzs< zS>~KY<)VgR!%s@&9@+0CX&(!YU*R{Gy}gi|qA2jsp{xByV_oO#d;^PH6?F4Qd9`@P zI9~OAl{OJ%j{d7h@&>Q-v~v+uWKWg93J`eFO7(K_Ws<#G8=7W!WC1DVWti~vK**kP zg3Gef@@hU8$^*Vt)-q8e_;lLZ<(pe>$639hn=wl0!XXee-K7R@tk2e3|kjux4JI>?uv_u zGQLnI#kLe>LxX~zRBd-Z2LAhzAp|fecLyy7xVn=M(O3^5oh)QERd~e$77pjlfRZbH z58hnp$pRP^nlS{ci&`_b^VEP7l$7RjiZD;Gkc8w3GkB4nmQo4a& zK2rgw$6t`D<@z=ilwSAAEqB!F2a}hF#nR6Fwr>89ClGo<>fIf4QLlfHD#~oYhGivhw%IvTulbh2XUedjxm2hHE6ac5)f0~fGTMG%PlzvH2}fWq%#Mq zeYJ$qIY{BpAP{KXADEH+3sQkAB#sZ5Pu|#w44ywo3+Hr+O zMSJM}0)coM%n7sxK1Rg-tvg)2z3F2OZ0xO;GS7%afx?zWRB|F`z7-7ouT zKWy!-sRHgaeNUg$x4V!0VpNr7QIUv{ARr)6<>jP4LqI^^KtMo3Bfx?0WU;6mfq%fb zNyuv;fG>Xp^JwtjgznNh?&?mK?p~&@77$jBP7W3-6G&dEPoqG zx>}gJ+c-H=XxKPdKxkW-Q*d%peDbiT;AH1yr(ow40Dm}mC{&dwaE6Ba4sM%&Wr=I10>RrkcaL(8KVY2$JI)0Z`ow#9g&@F5Ul7~LQ z;)_j!m<&M<0;QNlt@pban=FM`DD~FXdFiMoUUX4ZzgzCXqi}7KkoUzFWc0sFP78@g z08;ATl`IH%^FP0G4g}}^Pm4%`X>82@t`Q7qU#fo>;TJKLe@$s5Q9v>Onwm#L3jb^Q z4TU5B!@ri_DgXaY|7q&~MV)^AQiLt`-&2#leo@ec1u9Ms=4)8jsTYToI2U0{19-qj zqgM!AK11*2=6(OvT1`}4yqchdRh)5y*2760q^25~D(q{`^HDlW8YA)96NT&iGpnWS z^l6TS`5-S|VUs`kQLJmLHwIcWY5z>hX1RuDkq7=Y(@e#ne_bMQ05=UaHKV7{O0Swx zawG8*fLkc4i4-M4E->BBMja_i2qTmdCO=rOIK(jvt|CEsUUDj|MvxSQGS&V0D6l8r zT0N$Z=ZbI=@z-M-BQ_9N6lH3L-6*e^KL!j*+S+prAy@=Ai2VLuM!5wT=jU>Ut2!n#&#q0ip(PWZKx6OepEweVi?V! z@P!jqK;B zn-+uaA-^)=`>J>SC+n!6?YYqVy2p*3k(Xkm5|>z_lpQxnEfm`CnD+D$e+b@0p$e0g zyXe;R$Odd-L9eL&cjJ8VW3%Duq1cS3ifGgW_@|v`T^tE*51iUF-BJ`$2k|Avm-kp- zz;W!-fg5jd1pw8FhwW17{=)-#cas$dX%K@RK={(idh9_Moz>$V-atnos8_4r=+RIO z1{K11$3vWBel`fnLNeCX(i0+0s60xp+;J!i98Ju09@^sM2B-C3^SjV(*WE3!f#7~Y-5nDN@1zaM)cih zhT1dn`_5kv0nFj$6_|p>9WQkFXbC+>9z#LzSfG9rF%^C7dHAehM86jc2OkWbG?y#- zD(dF#*zVb*arTjN^^`w0ombC^VSID!A?RnVn5r82ua{c$^6v})Dw|Ql?hlQajH=m*(@~qk_27B<{t>Lj*qLT;S4zzHW$EE z*ITPT%?R>@OBuU_0ht{nd#%K+c6N~ZQ^~lg3sJG$b}GKoTYs8nykw>>OwdG;+(3LO zbS#>?{Q9FT+eOJwip!eOgw>)wx~V2KK^a1Lz#qDWa&^18>BxD=M5MfqME2-NU43gm z8+2#svOi_X?nkWMdD-|KcEP$b*)_lqXVI_u$1^&}oeq(Am6|H^0N>YC9;fw42H=*3 zM)f`YGFI=YkL|yf9Iz%6?2=y>=a(H-53AaTY-(<#y?;t(4y6!6(|3CL-VE zUG?)X82%zAsK>c<-JLPW%EU6%NFV@Rw*7Kcg%C`C(LJ1rN{y3~L}fDZlXRq1=Zb~5 z?YUkWr4(i&gev3J$EVcuEN&*`oY)Vj6_I7$My(G?oA!;N-d<|cvRgJh$A9|KnSO*ov_K2zI z=$`nYjWu@q^8)?h?W=^SFLvk3TGn(^Ch#Phcq1_jgcE6k9RA(za$yjc{cxPmXeoJ@ zaO{hkqA$G59uR`rh}U3S)s-mwb3tTa6j|gI9YC8 z@fELzwpKIIbC;6j_tE_~&QoHVeev#VIsx$h!huWZZ!!Ga!r)>cNY1pU{B?zu7xBxygZMDWLB}8a(kucZA7ga~iC~m? zrl}bb-t&(ckp)gsja>`-nZj%5f@Q~33*~Vq!^>Rr3^!*m>p6iz?>twEoL|$up|fG{ zqf5}!zMY%;$ZatM#rkoNAh4Jd05#NR#%I(vbQIXjT-4vL%`YS>7+Qmg+$Kcr4Dn4C87a!vD+A2!J;ivP}oi0s<&;$W#u< z)L(4Q1ur%oi|!bav{0l8p5%%9t8^&O+xVRtPWrE^_n?5wNQP_wK8Pv~Xf(OyEo48x znH(LxeA}LFR|axH^0ssSAGj@Qo9S*&cYm6fF3!ZrPvW%L1~3p3wmu>wn7%x3;pM z=ix6izQ+m}nNz()!V!!{e0mCpM}ml3Bimik4aWw+@!D$`C>yJKV#;NH{QcC5@7bfo zqzhvdN=>534D`?jy`dX#`;%29*vpr7XAThT9f&7;>PdPtD1CVE>j-7I7&J6>?&JDI zdL=PwqY}O9TyuU6Ffr0qZQ%==f;IVNJisQ)BfHw)#hQKkxKRipUal&sT@}ewosDSf z{lXZy=!ME7KbkSvD-=t7sdqhdV(C5(9BR!X-(BkKGvnhb7KW(2rV6xd?NmINv)qW4>T~c4%4~G1zOs%SY0tp?T!2XAzYp1lTgt`4Dv%g7mNVx z?&SuZU?d(fepaV83*-woG63AuhDFQugxkz%l#QBM7Cv5ZKZ*NNpkoy*21S`#uO9Cz1)X{*Cz^k?JoQ761B zJ_0LAu6ucaQE_)7I(@%LWI!7&H_uZh@{8uu)@1_Wb4fgXsx*wA5{~YIHQOC?gy{Ja zb-=@(g{w4YZ~c;ETbG+241 z?$&+))T#3t8O?7{VmLqMBtf1A!x5KKTZEwtfrM$F-bseN>`gKyn=iZe6x71X5gOa| zUD|q?GZzWNFHRv?SKDWB(P^l-0=$>>61sV-i?c^C#w&>~^sYHbe(#?X68ApVf}IUT zCEmsJKsYwhRhR}kjs@>P+p7pcW12BJaZrz6BGQYY;r`@E+puwO6e@BT!AbuL@sj$m z_d@Q_T2d-Y2B5K~6g6m#Ez@E?`Yzj&P>os@e@Q>RS) zkLbt8ZXFK}|L$#UlKTCa^U_?f%_8obub(yUY9+=K6pRh=QlNvI0EDH)Xt5>ovK)uR z1I12l(;T>>vAo{M=Rw82Yw)Y8X-dR2w*9Rk1XT3lHS6&|Jm>()sl(?s7@iuDjUmbOi$U7t4o;sBz;k-E@D4ga(tu})P za=wb$Z*&;_Y12y#9~7KcsbpPgKg6(W)4_7tdiP@&PY25mS3%J_rT&zF81vS$(k0(^ z?{=I)P3Lq;-R>L`B+(9J*7b)^=(v3$7fv4{M$>>{;3H%5cbq{cCf--2*5(ihDx%9oxUcE(WT_Jhwgm$e))5yEl3H8QtaKu>5b$t^5#o(%Tr!^Kc9zO!DfN@cUAv@hV) z53$?2N+AVyTmRvr#xEo8;97UWE09fQBbN?v%5C+q1hY#-o~CwyYmhAuYuUCT+QXKK z&Z2W&)uZfytsHbt`(o}Te^vP{LnPbHQc#^S^0xtIJSy8(+NN-LlR3NSyWr8MEaxXb zwynMI^PO?yDEW4+oSZ!2iN9k0DD2%?U1(jq9Xdc!ya|!^uT5-9O4gsD_Hxc5%chna z+q3K`?)!gTB-d0$NO3(BYOwzhlslGjJH`b_0lWIlEvDEGtcA=DGHb301{SDBj@CG^ zNRB%7*?m#z$l8HHgy*ksoWKVEP_O5}uV`verNSzfgYOLa*OQ`AwP9ui26y)NmJsk4 zb~Web3P=5X`pMF)+j7Zd?HE%}>1R#@;2a8DWG@7G?TPBM=W=UcPKV5_^<8(lVULay zx0B0y@h|RbeqB4DTEaWq2zeYVwDo&eTV@C&B8|DU9AYSr2=Rv)E&XN`;d5hu|8$KA zu+XO%zuSG$Ir5o3Lfic?wnuD%sw`|OAHuX-d6w|I_A6;lj4(z%*!PJg3FM1? zYws-V{50xp{!W>mmLdOh#RI{LQ5|(L*bAal!QB&~$gt2tOqyNT6Kc3|Y!VVnNxJn&m)fL4D0$g6Bx=XqPFtV^5FE!>hfTUA^$FumDy>WB`XxUZDD zFeCm5_ej&)rN{znD^*rY+hy0I%qz4UZMk0Ctxg@s~3|` zN{PE4SYja?sJY6MjVD}*z-OxO@cOI7m9F~=yx6$q7F5TnpD}p372xv5s-=(**d;7n z!qr2q+LvBVD)n%RAdF#cYNdVL<(|7j?Nc&qBhQIJ9l*J@-ej@x636H)l058O;eT;o z=1Lbhz>Q#EBsFN|HBgTcfHj%=B2Z|$$@FmXYyLPTNv0xLO+7kMs#Iw?N?vRh?W_F= z|EZ0}t@T>tY|X45V1dwi`?%NpLu9Z~61nUWyMt?^%rN|Sb=we5z|yIHYgly|=ILHu zFIJuwgJL^JOeP7B)hNa|Q^n<|m&%s2mf-u8TfENq;m=S4Bn~?Kr;H9@3A*1Cihq9O2j-!mq|ol42=YTv@PxCfFkYyql8+ZsYeDt{W#$tD`Ma~NC!vo& zPB)c}-{-H<+9KfdZYKxs4V=!4CN#C9xet`yp|!)Y>n+iZVESjt3`9->f}d?jbbp~O zZj~Rw`TuFza`@Mh;NJ$o)YtQfITSLkhIPp=&q+Z4{2W8Dmf$8%X#$gDTeD~c!wprmKMulxpJ zC#DnbD>dTHhOsW`*rv_^ht{F$$|+}vjwJ{@P*)~GS7*Z26EYv*e38tVb1kE$saG2n zYVAg^_@sna-DyG2Lw=v3lyxjaDkkdABc%F(;xj;Y@bNZdW5%vx)|UXuad0dYf<%N| zwe3QDJ>5zZ?PO=g5FFl_v+(*^BGZgM1SKm3O6=Wtf%4O04Qpr%`h@C<2N8V`^?RMO zy$p9LU-^qIn6!0**7Q6TW4u4jBK|D5n5Fr_P&&#cLf)XDHe~@??)=R!=&mV zA^^Wt-BDdk#i)G05RazX-6kd;vJy}8PE|`yn1JaHk^sC&K?mR+e4S#7@;OKPkp3_% zW%UEUpvnDEX%=rb?Wn+0d60M(o8S(5Spu?iOpsEKM+53M8=1$6N0c!0JXcUbT=Nv_6M?3%} z^d67Z+i4pLqPPO3QIOG;nRhF=hAdJO{((p%#dfxo_SB}&w4V~;%+8zl^3e?&L;woD zEXQ6U{m?l1vSXtG4*jt;?E0RqgYa%Gvl!h@C+^HXlPFb;Ug;phY&(1G?)*MAkBayI z&41cDEujrVF_RywY>K6`87zX=_p>ZXs+w`+*hTu=;W6KF&Zo^|6V|qyeY-J4M zm>p5_K67>i6Kq&ayN`r;n_iHl#_v|{RXYM9){!&o9)VhW{i2(c_@&H@Bd%Wr*^S}w zp<)td3bBcs>?gZ5)NkNFu4{35F8@AKn^#d3T-A056_0pKe^_aZr zYH0^{y+{bA)fYXrgrbavEP8PL@L3a(F`TNg5LJ54GJ-eUkn?~H;Y|ajr8a=Bdy{}O1?IG>l~bOvTHqiGNYTK+RP6c8 zfVhz3VX*TiV(>BJiGZk2Hg)X5-L{+|@Q@lleQZysLukxuRUvOE%kr8LOB2B%H#K8C z&yd84VBBHAHOhMR_qPg)DNS{6+smN{Hz<)t_a5g99mkw-H}R3Pr3wL@D_WlLN{PaJ z2JvIyNr^*TaS|M3{`ZTiOc8|=j7)>32NpTDYbv)aFHhC8!f5kfcl=ClYT>_}yR*Tn zw4ja+26fTnj**8oebw_P=KYDHOdol9nRZbr{MvFcfe*^W^0FMpKtQU4fz=s$zMza% zt+eaAl+~ZfygkW+rJ)DAvE6&)S|eNl!aACgb^FTUnb%vsr*z`iK8JOT?7GKwXX!y$ zO_Z*3af1h64DC;Oy~iqKtwndod7%<6BHAW*VYb{4{n@ORzfb-P) ze$y^qLCkn?teQZZLz>q?iZ(SWmSTUXxK^aMv~YJ5Knn*rBzDQyKo9r@mkDr}oiXzv z@5bQ~?-$s#VXFvsnTzk6RB*DU=P`Gv{QPl&SLlP9SUr|BZSL%oOFy_18`{u<;wI~Kp(FOf@3+gh`JoK(%Ta4taT7Y^Ao2*|VQ%`O(*jqII{i9>6esd&(3Y z?bk4NF%L}fYBf~+0c;???M1gK)j=-Gi<#8S5oNkhXxmE)T4Vl`eYbGg&vrJWW?gSb zAh&Y=Sw~3p3Ny$NYDP8kn|Mt5(Y{xMwn~1@0dEV&dlr0&$S@oH*ySDe?TQkumi6ah zY{=YM%NXdA)C51XRSz77!l?QNb|8&^da6><`@zLzkScV zmf2Q!^D%4(>5fhHv0;;uU_YZ!-(4;cR_Avbs%v+t?{|#+d;N$v z?swn2JgoRPG$57q3J4_eO3SLL@dbvlcMqhk%Wae00}8--CZ`=OM< zu;WSx6y(8v{F>nQ2;{bepWaiN z;1uZa^XRt}jB6~E*@qzsJd|D^pw?X|7C5G%L;9VY$cfsO!hTd(N2Dxatp<5dD->GdW1~1uP0jE$P1_b^F#6 zqK?RZh%`OmUe~e2ewC(S{P&9Yf3>pi1SjQim3+a{#J;zm>$%)pE+o=ra|kD_{?V4G zLqd)#rzfE%^$yzdx_+t{D;rY9ai~D`>D#<35>)$LLFC+GFdA8izHy&j zUSxQaPcDs%6<1*Gv+w{k8x`1A4eXFQ z?>ELe1htQ!tDt`S1*Szp4Y|%-{xbdGTQ8ss5j=%fX>DHEWY7(n^E;65taR0P1DFD( z4eO2@b>S=Np{HN?A*w0dd{0qiuQB_ZUv9xfIHYj(@4cfwemmKhO%I*=@(ylXn`3c& zC%D33BbK8C)TDwSfw<$L>^jBTv)|b-aebV&DtEt+JhQYLy8`6j#NCyPeH+#mr?aXV zoWdy|Z{VkQNeo)Q$3AdmED+V+1h0}$e8H0*N39IHorBRpy4>fYRE*SG6aV#QGnG|F zoVbF!8UFTdtq`*bi-0LLleI>U{1MrYz2i&&YM5z|XLUe7nPkPa`D6?e^@=_2d6dP2 zfQZN4wz?I$z;{{cedUH@jKEn{2HWgOr|(M*U#__M@|WTQCqv@A3|FQ|VjtYj33TgL zK0np$+wDmlc<*FiQjwH#R&~t*eg~HV$|Y0@&LvfNdY3gsSyAo>-)q8bNR{Oy7Oa_6 z8oyElMGQtt&@#w+aGj2P7E;e8`z-|+Rgq{D_$Y{^y%IbMn2x{AZN=^#w24;wOq|cI zl-q@K=+-J1*!}UuoASgEP7Ki?G*FG<*^r6Xc%BJpre0a}d(RS&x4cvrY~1;_yZ9Av6tXsr!D%rw5^vbnzrzwL}Mfs7OcDLJE+RSFk0BJVn9J-PYL zP-(0H&)W?e8<};!`%8nIq2G;j=LPEfpHW&_Uu0NoPv~-U*bONVr$*$Xf zHgRwJs6Aq0)NF_0j)utJYx>jw^wkIK!543MeXz4&kFzowzu8syaz1PxR(e`5^voh^ zb67d`WOMjuTl4>A+gT7=p_0aAdp5qy9Win0>in9haIjly7oe|7GtYG1Nfuc-H7@ki zxn}lj1%6XJHDchhOkiBzCT6u0E6?tqvjyJG{NddUcd6w+sn|M5t(Yo8fDvn$Uf6IeV{=sV(Q}UH`X)fz)QLT zr}t_w;6TmfNFZ=Ajo}rXUwcOpeIBLKYbYI3Ps&U+94cVs?WfV>vg!0AOzR*d3ZUd| zb9{xfIyK`TPh^pO7ICZi;pB)y9_WFY){*fRE6#To5ZVA8Hgcd@&a?$4FLSu5>YbgChvbEf}Q(Mw%{A}?-p$*NBRwp!wQ>zzh zo5L;aDXYn|H=sF%Qt`QX?aI;bA$I2b0+V+h1wlNL&isOat<~VJBaB4mEQ>+}+0YBC zr!>!x1~}iAvCiX|NkNfjGqizp$+`Gn#9>lf?mk;2)a)ZDr$k?0dSrL{z#4Xq`%s=V zZx_{0?w(u_K&e6IK;vJ|e5d|){95%XYxCM`GtSh}8aoA?39bu_zdy66h$|%M^x{A5 z5>t!9CtoZ3cJ$cOC@!QYbipCw%+lDch7(~#Rf;xW0caJ&VgyObshaXL{RS8cQe&72m{Br952#Gb}O#(~0o-Ng345S`v< z+}NugE7bT%$ak%fLs`K-WhAM^G#*6AGz>{xVns~1%02YUhdHeBbkZI{Uct}9GY}9+ z1)6CE6=ej`50o%;J2?!O$GFi+rsV@nn?ZyMk#@cn3~FXvdd zX0D!4H6{pxm4VU5r}|&u2H}1h5+Nr}G0}&+vZk)PiMJb~_cu}f&CvSxUc-o0KC8d< z0h|U&J!NGB!*~Zs?dZcD{kUzp2d61wM|64Z)zvS%B@yF`gBOT{CUFwHcUR2V*{%iS z6AcEY&Z6HFua|gj#|ktdzU9O7)M9(OJmD^Q7eD@aq4Oeab$;u9b-QX&02jRNENyNy z-~6uX#s_Ci@s(~_L(4Be0@vHu0Yl)U4R}#d_9e>}=yv5agd09JBpnPE)EoqlaKK zWTd9z46d0g=4P)@$d^Ls3Pyf4qO2xSPkZ428f|;`)g3o`4<6~PeLoF_d-=&S$9jF} z6m4F>%QQo9rauq(M!19YkE~O#uL+1Yu=9&-6VbvGZ5S^p^u$-`G<4>5O!}sxsMUB1<%8JgM_CmKE#S za~=hZap+(u504L*mLRi^W1R0xN#P$%ZWWomohWUY@+9r*pz-Thz_E60BQBqd#65-O zA0Y}TUBM8Wg@=cKQ>mf&xW-#2&V`#kPjy-~4s`_B62!q5>%}zO922>laeyf}Eqh$plEc8hGQw&`HF@WgL6-Ti$ zST#h*Il};2bJi~ZPVJWjuRD zVS0AXfD}z+yx&jUR+?VgML@Y_vTwZpeT@wN>O?UEvM zDB>;d=qM9+;uQI+U@OKl@RfEC4q&OPEetMLS99*50|Zm;>uQ2li`n6q9wz>5%C`2$ zcYD7)KLpxfmb^N)NW|qafgPaD_sO8BZeTOIU%gSVX8h5H(sJ!ZKfiED%yM6rI)dTI z55CtwE*)h|;x5yX3cJ|Xd4_e!K^pgbcg%jTP|3+Bl77;#vD6}bT)_87|CPHO(OzTG z8K^MZ;sKR@&Z}p0$#T_Kf<*13)Bu^d<{J8U6K^Q%}o63crQ@dq)^$!eU zBWTcQ+KZC-S&{65sG9~Q^3D;{DME?6kq4KnX3uq2DTBH(O^BOpF*|Sb&Cb%CR^H$Z z?%6|SXR8M3#Ndv{XWHl4*c5T9)al(T@sCJs8;LZ-xF%h?2Bjhl9Y-!hC#>y94WWpj z=RX|a6!ypG##30^`BRSFXIe%WD5DmM0u+zmFT$d&=P;8(dQr2?SAO=Zu__TGWKc- zHFPc%ldF!ZxUdf(UFN#pekbjUXun)*{M?C(Bi67H-2dbYA{&=Lc-qp}7@AVAaDNCB zR@sd|+}ygDv|m#K=K#LX9!uhAcG5gr;n4r!YLC&=wP?=U7=Kz`l7?a_;?DOOq?Y0904 zN!-^O)EQkv50RYT`H0)0$7rYrJ1n{_d-n)*u{bn+saVNgkUO${uregby))$`g2o{N zd1(}~jwv~&;)ib$_k-*)LiokXXA6}pqm=6tVOBY4CU5||! zv2ZVc4zc;Z4hxax=5({gytKIK6WTWF*(`?2_&YzWV_XpJnaJ$ zW16mL0&fTK)?ag?c`(KZ3;n^aD?sK4aIJ1&YE|ouJztY)JF_Gm{?#moJ52AR#-h`! zN1ha5GSqp!*^G|}Hb1zg15t)7OiJqe7kgWX7keV~)9My=S`x|U z`g6Jpz76pFZ{C@Y>*ysI4$}IxdWr}l|Ly4XKVWLaKtyKN3pcGtM~~&v158NC3H)n+ zX6?}MXX%f-A6WYnD*t1K#u;2YwRXd(Z9>Y^<0%c1kZ#`{P`&7Jy60{})&YKiFnJCP z_?a{;iGL@!1Ej-KiL~)C?g`V|ddfGTv~lG|O<0W%lYARk0Q#W+1OPW8Y#7w7i`Z$=M~UUbk6j7c8SjZ^(QDu4ZP4c@0_y&DUc1Z{* zQHBKTY&H9zD3lqAS;qv2`ZtY=;%__=NK>w_LYSD}?EgJPB6p(hEx>ySl@g@ztS1|} zaq4sT_v?=T`8vLMVOi6l9(4S!2{1B3c*4ukMK?b~`EM|B{|58J=I3i0eg1nbAy5CB zrU8_hjfc^kGC#n!%_^+8DhsBGZ%we|_pkNI zipApa^@$I-10OJY1C}|0P@(cyWed&jeJb)DcV*4GAJTxOg(p0h+v+xznueU>HqPD01+(eYa;0x zxYF>yV*F1Jy*Z~~X`51I!9j=%lOk+!uzvqut=mNZUm*|wXW<3gis}&|${(YTQVI#F zK+U><>5ERQn)iXtF&F`1YuS$Lp0^=3oo7FGBKyB#db5@sf}xJ*I8+e9RP#qDW$KR3 znbFa%nAiD1{>-}YY_jb|nERxy3sZmh)bXKuZ!1K_C!&+_!y_r0{k*OOWm)nzAv0%b zeE)`g1OX3NqEYl6Ktb-*s=yZw>T-HItAKQrsNhg4YYie$Y%H&U8cs(7dbM#MZjg<= zeUCm0h!+#R?aV!&OX&Sm`72pGPBS;Q$Mb`AU^5=`^2A zW^WNEfgaRQV%5Uo&3D8UD^A$Ny|Mw|Kpz|g%c>IF)&p8OtFjaf#QzoBtKW_0nRaIb z{<`Uf6)R`oT7Zr+F|U)a$B($py7nIC4?%!%cr<<4@S2!*aIYuR`ku_C#%E8yds2sy z$C0jz(D*4V2MLxPf54nQ7y{z9$@Q&si4gj!F}lTN;R#U$rv%U3c}BkwyAH{?k5R|D z_=?>q=t~|7S1Y_BwP#Bxget;Z(ZVe3HAEj7vOsAG21M`|XhoF$7&+SlargKS0)`%Z zpC?Unc~Vb}$eDW?Ac8%;go&DL*^Tof3Oa}~TNO8N`uB6vYu~kPN}GsEocA$(yEkF| z__7WkXfl>DYT}h?)&@mX$ZM3dE~(Z$i6y?fTDGAdWEYxr<-@QnL^)SXf`K{RFg3-e zZ0CGehnSLVj(p?lbmX+ICj$JEStcZpUuYiSs03BGd^d6bNGwhG_OH$DlWZubbKc)} zh9dx2L$1qrrlXmfAFhOd3)Y&Wn!WM@U3mkIjSP{XIXxcj)dhQ~RwI~0xMKAJiXYZ&p-W-vJZ21rEmO}Lf zHs3>*F$5yYAL6h`mi4uR>IJh(qxSSszGuQL{O-k>Z72>Lx}t9eB1<*lTe4vC+moik{a``1T)Nf!Xh`-0RW!`F-B- zlgAa8yTY!z`s5KFe>`1ZTeYkr3BB z!hS)G$QKkLNnPigb7}Mjh5&U+gMndZerA_bEMQ1b%_yfLYZc|o<{d?S7N1T2p^DKMQ{0#r6RDgm%CKFA9yf5i8hZ) zsl>l4(JVX9P~zBI4u%6AXy9%tIo6VrhJIAz(>IHTHD{t1vPx2b9jl?FtdO3G$~q}X zig4gJNCZEhpv z%EAodYr7MKVUIr@l=y1YKN!oDoPegjNG?wFu-yy^Qn7Ege>kjH=dEcAe|V2Gsj&U( z?jVARi4k!NbfEb}lruSyFl=o$0UTQRGY+r?Xu68RgP}oL@}?#iB*aV@krFEKmRW;c`bV&Q9ZR|U*7<3)&mKi+i1HWbbKlkEv1hGUvhB1#fR z)p51PQ>uegzp28=Nt0=z5m|b-Smm2K6Q)Ce4&7Rr^}lKX7B5~;!Y?V92R%~nD1^e` zIbYj9xh1(hb`Xxszho5Me-48h1w&nU^3F5XJK~;~{MoPZYWDclXu_sg-N9eESk2 zzVfb8d6J3Vbi0>#bMjJB;d`#qM_bECAKW5K({5B!ujlDKkKPpK4RG;+QA}zlyh}&D zC90jpt8)w7G)Ew@)+S82b;X@(#i+@$8seNI*XqqviYPTtsDkmP0pKzzNbjSj*$f`^MO(eXyN(Eywar(yKmKoU-(LJ*no$;2v0aHzPd6;*k$@I0 zVQY44;twaDSXgT0fwQ-4s&v5rN%Um}v&|65{p_qKlx-O03cT@mqvf6u&Jbw^Nnwck zI&??ajdYvxuO>t}nchSI6BaXu5O_NnT03dtD0Abw(@k*`6x4MieN$WLZ7jKHcS1g{he_HI zU)BKsi^1uLcyNJ!*#jarpfrEoFm*RG;Bu2tY-No8t%f5o0a5Dw1z+kcFl0F#TTVpb zlh_=@;14Ucmz?@+5tba{oZa#a8y3xk%B&2$>w=&gI)QU=jgh=?<@IUCr}R-U1=@h_ zQRy4_*PYKdvp0QJL#PI`i&u0cH8y&`ssY@v97kkg2g?+`ooDl%F`HhyxpYFtjco00 z4A9-@JOZ+C)2GB?OuFZT$EUNtLYg)bYc*FXg0!W6m$?Dfy@!wm4>r;<_v-^}-E9fQ z-z!+1`jfzA>@NuaL8dTiFM*%Mg_Se(76DZ>Lg1P4O=SI|#RM7V^S!hhRA&=Wbqb61 zruw&yhL3*4H00?-~~D*2gniZWs>5mEL;PjyLgl zMPr>?y3+qUu=~juaBDl7@lXyZB-N|sTa1(jOV|y^i68= zetrdYpBp$U8X+uRQa;>g(&p5Q_)bCjy}TcN#@73Jv;iF)jhaJtS(YI(j0|Jy_JRy- z6x`Oo>BIm>>IL9PK_osU!Hl`@wbH;plQ9}t=-R$(@_d;zsukF;NevA*QuSckrdvaxGpAz+fByU!9Wp6>dS8)x>>bkuuXlhq&8b)e`8M|hjSl)Pa&9`FqNW%s(4 zY{gRI;`kkV=DoYq3*GTa1Upa^WT_8@Y-S!7?=(;{nE4IU;Rd^(ef#!6_J#p(U+w>o_TIuNjwWg!O$fmWNzla;f(3%RZ3w~LgS#)fI4mR(Ah^4` zySuwAi@U?(4vTYn?{|M+-GAX$-Knmcnwp-|GjqDn>2sc^XC95eXjGY0>|G?DQPkz8 zx83Qaoq)Z?Vm{9HZ;jiYrX3lbty@s~JC?XAl%H8F|AphVS zE@uerUavpp;e%~L4-mpr?}zvM3${_W3X11_r*pPv57+lMCq2AgcK-(X+dIAC+&93L ze|gu*#xW&<-VcFXNUfuRz$pa%pB4GdXFUWkT7!?}-*fiKM&k^rbIgdgGc1?|@@NcKG%)@m*vY4UU5@fAOjD@A0iESlwm_v4% zxrxnrE3eF-_YT?%Q4_h7LB%#A<8VwjySPgwrK6Y95%G0Zl@_A;s ziNIJ@P<{!mRZ44!I!~I;C?<|=_E^N)O5g%wV-05y>1^DJi&p!*;({jb&zF61RpL5S zE?Oqp(BNQ#1NwI^eG^w5aEg~DnO{@isPnpI`#gppra+(5^W(!1aS{Rh^tz81CIHvXWslkL`U@3F;T!y~yQF9f@8{V2=M z<^UkQJ{h6;6v};6sS?BczB_@LVP$xj+D#IqIn4NI5%jZZ$%JRPK;g+tFST6AfSfPIg8tKmi?@{zulw*p+Y4>ikG zvL{-34@r3Uyf=G|SKv0c+3F&(^5MRi!tl80&JRqBa;N7fC6asWWtm{l;@c6OzK5yG zE2&V{-MrEZ#u3;m(WYnTe|DZpd35XL`H)0$aYvMaQ%z>no+73VuR=Q z2-0Sy!_5RgX|lnH6=&r!;?=u;RMHm5q@5i1@Y}szo=$E}GvbT=2%%`+?wt$7#JGD5 zH{V^nr^WCaip-OW)QvfbEv9iZ_3HMPxPCC+3eoDTZ-TOyw6V`Swgnj-jv9iV`TuA9eL8I4CU8FO+`Vay|6bbYy4|DH4p?aY3{xL!QV z^dL%>3TM6PLMVWbOc8+7Yn86=E%`RqUjc9WV6T6|>Cfge*J0;(8616?J}sW98FEDh zmEr>C#m*I<&oV()JH)jU`uzQY^ndUi?H=~u79H#x<;PkSyKWB#0g$@owHD>wvTZFv zIsj%b8}8)Irg@IQ&}u);`_{gLK6LXdlS3Un>DEC~DR~^3arUV(z~b#dABp4X%E_4G z#f>t;zWaRA021E~p$fVj_~C3b*GDdjkuf_7-3}8AbCDO#UDmeoj*n(&8%c{H)nsoN zPEc1?Yl&AM8kKZQA-i_rH9rXD5peQ5Wc2V6yxrNWdR5*()=L8CS-|zu8SnInLlk;? z5^}P(?ua>C-+I(RZ;H!Lbe@VzsXWszLFH~~%+$NW9~a#c1<2|(^VxMT8#%!PY(v62 z;E0gW*n+9|jt@xC#`O0J7%>lo0}sAHk?33_hZgJ;TfM$nLOHAUi`P$Q%e%)(D=`@iZr5Tzg9gN+# zz*auY->c4pblXEAL+9DQajj0M!xr-J7G8z5v6BuBZchI+aohYxTBWTxk~Ee9I>J5F z(-XaQw0uMstY@|#aega3XK_f%(|s^o2QbE9%=7XgE@~GY z5eSD7hp;GDkJbIuhv8`c>}P^Q_)V@id?ZGDV7{|`N+q-XVANnL@Tx7!X^|%acd>BX z00>Qtymj=xHfnYDp$f9MSQzmd&?ZS5*{PzMCOH~0YkWoB6vyu&=9c;Z}1bs1fk1_;my^ea6>vZ7L{V-eRwvQE| z0JvI9L%L+b*PqxQqD!?Dm0E-PhwIKj!WKVquCd1_z-dO@c`4t>7eGL`-A?@NLQ*b6 zFZm%FWR1qmooerA=9z=MVmdoj?T=(>*+CoK#Y-ZSNwXf`+ygeUi#tzRYn}X?8S%J1 zP3_XQq&Fsm^b2Rd8cn%Z%jP@bnB;kFYpm};cWipnb#2`AG!>UWNl|tlh=(RE7#lUs zRep}v!Sl~wT}|3O7JdGlVFupzYP+x-tzpH<7#=W^zMX8T+(REyIu-rJ&jjOY1)5Yp z!hrDaiZ#Mm`o9#%(x5{9!9ja(8@{D9Mt2yM&13}M4gTcbM4u`$Z4DfFmrVl6HI5K z3U7jXMsiygvC8WpGJK=!W*WI`Lkva;3H?KEw=B`Mc$UZ|ho|-_cD5JH=4`pdr*96} zMy6~%!%kFUe#QV#RKu1f-WUF}$Fa&jcEQ$kGWEHKq{;PskN-R@0gdySn8MF_D34IY zXWMzLk_Qw~k1BWV6srk85y9o^iZ@ZsDpn+}u162EBaB`aKf{(?Wo7iZ@i3l#vVUI4y<7P98?>}gSTPu=2 z6F(Jyvkz~VDGh#t?NLhg?fww+Xh}ytKtXeUW=|Vy8-SPI)7(98p1`)neo!Mml^=-YXSvC1O=kp4)*26Txi0p89q zhQ;(?Av7vvrngPimE(u70TtjZok~QZjHlP6m+sd@nDkHU9M5HC&*p1+H_tog^Ks`* zfp7Ng209#d>EXtsnfXl{(A(>7P7EV*%4i?Xs}`l^_|&@r(RxJ6de8XAJa9r9NL#M^ zPlih=@#et{aiP@Mm4K>1QpmV4YNeH?up8-{a{I;Jf9eV%I4RZU@aU z)LZUhsMRwLe`#dg1ix}~9qwA1_16M-a&`2qZYHN11QBN0K2Dk8m6@dd<=d$+6egaw z8T&(YHAJ7=?Ea(4tgKz7)(34E-uXjZgxvdSesSvzV_8Aw>0^`TAG!8?OfZgF5Y-O7 znc*^O5#ZX-^7!6a9$_IDB8nVI|BUHrHdzO7Iww{mE}nay1A7VrH-xW3b=K`Z>p)$i zH;;xc+TtL|*B7-}6Xt@y{u`t+_Z_3X2XON=7w4YG%}%ae+NNEFL)PtsqL1#*j(zUe z<*E&<%geu_Aq8Suyywlz4{I*@H-ENube~VZJ*jWX^STN~@s&&iM}pHKK|^k{{pKvC zW$W{y%{&#AUN*9PAJ;2kEINcHSzbC1d34}T_hP}1ob1yTynYw!t>DDTpapcJCuLF= z+EYAYD>n-fNlzR{Jh>7%zw6c(|5S7m52CrM%($w(RlyqGD9K(;w7ByHvkdx8zx@Sd zXi?#AYB^IUeE8vW-G`CH+_1TWg4+Q zDhf5PfBZKRFhn51nlukJFtSXvfDoD|j5R1IYg}_cY4#yewULBhzkU`AL(qi2u@A9< zQ_Im=W`^E2yV~p6JQ!I8&*`irjUgqEct2q@nAh80lW&ayKUnNN^Q}Q;e3&nIh*>)=*xPIuMG{ZR1+}- z)0)?+w$d5=R&>AWli%EakcA35{)v`UtFed&b`FQDQJaV5WqLxsO=EP9^Xkh-WG#m~ zslfj6nBiUpR=uqe^(-v;oRyDqUpdm4W9{W&DvZ9g)ygC~8ZR0GL zp71J_OxB((IxI~rDUVqSWtt?-RNm(>q51HQEkCXMv5}p5n!qVNLaOCyo+9ErE1##r zPzr;HPVO4Z&1Pb1mOTG;VA4h{zgwZxF)K6v@Egp&#$k@P3DKmYywwP zr3|w4TAed4%8HiyhO>9`lTm7A14h)<909M7<#5xMMP${;srf4W{01sq3{cR<#*}nD zI3z>W2j>RT9xk+x(O0E$xhq3{402397+Bv1xEwDhl)OfU!*Bf#yZwRp*1G&p{wV zd5w<_Jr#kidBZIq;ywwPVUDJKbLd z3U*X881hJ@lxnNRc*Nsd%Vs$;#y2^chRH6U95eB~sl}1Ww|OO?MjU`gKc80$O26_S z$;m5;t7^mP8U=})H%<3?8No^@)Z5D3uP^eyj}izeforgo|}doCJ1{6?Q?P^1F2&Fx&zHyxD`wE9ic z+@Iz}kZk1Z0(n0;h1#+C#eBsznY%lAfBFq%;f zCj@OE)@^U!{<#B6^IXftK7VO?xOr|TL~g=QL@grbpdJhjP{voQE@|bzT*(OSpfp!T zm$so!+<{%WzJWvTwa}gwDrr?<=ux<_)}lPf0;#nZABp9m)Gb=DYB@D8769)Z@sg zwPB#u%650JUYu8{Zi|LiTzzN8UcXxfmzkaqJe5U(1 zp!JJm$A#St^O=?J3poPW{^O2C^hUh9lJCC^WjpvK`Tw!gdFY@HyCQU;WbV9Dg)2t; zLo9V2O#f73&*9e|`AsE9P0nKFg{lJ{1B7VbcWgTux~$~P+v6s4zjEC@y#MVxrmMSD!#{|9A+%OAUT}F#ecM6Z|6&SC zCUG3?y__s%=a76I>00LR@OL~Sq_+CE_xt%4!XaIQ6|}#Z3phM=ei2&l>ATM!lU6yv zSUu9Zmt70y75D=Xqh03$7?hxKRJ)KcmV5KY%MAzYuXt>4c5=ut8mGQUS%e0xjtz!; z4FXKCllJ7@jv|fwcYjvkyi|$;T?+Hh@Jx=3_0>J9bAa?@+U%O6^s7B5%ZOfwXfreW6%u z6J0Obb*p~FnnI3qS_vtUrSzC6U!IH?neoK7pGwwIlPac%5EA!G{HWnS57LOw`NP)R zL+RWb$Wkt0>i+h(e-tNYAVNbAm>L9`j_GPEVkc1Sn6>~c@h;%x@cB4@V7FAysV>$i zbTA;>m)bA2sU=1uaol{vqwg<#(toTL5;D1^lu}~EUHtDZYn}=!jo++1e*}yl`YZc3 zmWW<25}Qgn(c{e=N1rI$dOnP7=wOL z^VNk_+lxc9pI|M*8o;Lp`pY%PNr^iD1v|b%&4oD(O_R{WD1A3lwo*D@?U1tH9YkD>@mG_)BqHbTgpj9p+0_+o7_<25L%!u@HIm#c zXb*u+>2`OBuR)G5Q^DTk)J#75)t~$UUg`q~@?s4Er*D%t7R^rIHkR3Q=bO1#tOw$V+Wj7WPrhkI}^j7ec8c0VT-_Yxjsx`pllFh_I~Z&D4P_s5wehA zQ2pZ@BoRqBNTy?Js zU-s?RQbK)-hJrQ^o}8q$m@|gKE2=y!PMNoC558C;G1%KlcJyhiZ}RZQ@|hSe5B*k7 z3|tL;Jjh6;P=og@ncpX>cpeEUjTK-!|uQ%TS6jA#_I@p>a_xg`l0e`49uLTN&?U!c;sd+6)o z%RD4qE?l4SaoU%v&s8UX6U)|l>)cX_9iD%>DV>!sq4YO>SQHoCu5N;bx{qTU1l7wS zxp2kA4K3ej$AzLzh!GB=DuddK8AK4`&;ECl*{(n#(T-ft_Ta%xQzVg5xP$g&7z~PrGiTdzWpaUR$QJgNtyuuSIEnH6m&olaBw1Vsh2*`8sI!ry{>o4zl zS!T~-i$FsTYNN6%qoi-#_0GgOZ`cxxqDwg2`jJ7dW33^2yiGP&(dIM2K)!aY>6ZYm z1JZtrD^Rb5?>?*uyyP|(yu|l;fvj8}2)xt%)&@dAuk^TIAGn}<-eGb$p$^h%fc26# zL?atNIAK^sOR4nrg2maqLaXJr6J~g07CNx}z)!{ zfHw()7#w20g(bpC)U9>zAr;g8wbuiZ_g7}HLJO@*YW_-szM1QCa-*S7RGeGQ4Cvf` z&WpK5Y5~9LKS~*M^NA-fl@%g`1aS-|`WXwcflN=g$|l*nBcqAg(^~H#(2>Yh5??P2 z-^_n}A|ITnUwQe9H(QWYOc>k3t319sP5GAI>*%Tr_FHWi{%rjiVUqU+7qotFIw$x$ zo`a14TZ8$2Xox($mU{FGQ@F@C?G(O-GLTK}ZBmE=2jzo)fRg_teGQFylb4X;`(l~H zheP&5d6q2Gvx=BmA#o&J#T6IdofVg4?ZnK75%eW5ec|{I`=-zP-MM!6I&CmZOpix# zbI?7tiND!gWXzwqSI$tBV^KI%ncJp5hKtRh9;2jwj>kOx$O-gx#5D<; zpGjSYH@K?16$FPpS#d1Cd*tAcE(HY3$x;#higgv8}$_1)nscOZ^;qD&1IN6OOul)5hv8N;#ep<->ieU3{^4Cb1&5!dvP zOtzsBAg8-F!s6L72=n?}vmyC$h_l(P&`z~S`RHUPVViX3%iCajcK&ne3W|eYFJU^q zb{GMZypGSd>J)RCF2ulQvpF+CB)vL3P@<~{eO%t(8(N7wq_dPOpdGJK&g2M^dGYn* z%}T_}c7CoM#nLdfoS6pePhLDoAIa@seaM{?bWE$iSB6nfe0i~oRC$bk8$bhfj>_cF zKYRLgzquGrn9KskDx}Wpr_!d^VZx%ke?Ne;B+Y{pQ0FpSVD`t~zhAH0gnaURT4pC|A>wlZ6_xAUMh;gR^HMd90* zaF+_7mD~cZ^I*GN1WIh*MBmJzL@i|@J#uJ3&#r0VuJ_SmNzvz+#D%)*IX6tiL%@r- zZ85lRG@i!{E%VOb=*87LElr4b(c*0pwDs3Dk!5#N^eG(6`(ZyPNAQEeaiEOg3`2g+ zx+cC>lFE9_)%^B%&z`?mc^BW+V#rQ&!|1g@IGngd;m=`-KY6K;z@Q8=z9X@+B%Ay)h2c>7dZlE`Jvw;($7#V_8T7Wdai^L6erh& z4so<9OawgXeHIu;flP+7liFZDlHTPA7Gm4wD(>AT^6p?##v6*RO>(>AYO!@WUYbt* zW4Q?GHMX0#*i9x*kNO{2o8*6C?O&|3R2Bsz%psuPoi=iav17jrckM4G%(q#|$Boez z#5?=BM&#F-y4mkrI(IL7V;XR*^yoa9_{pl-)q< zvU2vrbZ~p4p8)4eolIT;wndKLW~#bt`8~#oQ$52Sd5#nlf046Nd4!}~KVYTi8hUI# zQl=lg0l8igBaRMxFUxW$34!s2R^5h}$@TvWE>882l!#M=XjX?zNM6RGI~k|@eJA-7 z`9<~Q_uJLp7Y@dajsRhMX#O%m;t_c~ppU~yhQ?YNDiMoVk=wX1%+`I*>EdPi`@D(A z_7=$~m@(_5f5Ip^>>=UYY6k}W6hGP+$`}4RTQv1UN$rmea5{mIk>BV*lVcHu%z0YfpndCQS_Z^A~($roH zXdm0i4x#N;>TS5XxIXVn&e**k(9cymK=4l{ZznLRfsCuW3iex^{OZ1>9j&i0L}Gno zE6~+)*F!RM_SmdBvTryNh%>%c6(Og_aALXb1MS7IV-8Cxzm1(ltfGbL%tnom(wZNe zhwd5`)lMZF9vrK*DBem-lk8{y=L&5?Qc(6 z>(4uh;D8(dT$5vRO;dNo*SR#cleB1>=FgUOWtO^|cUXDEbuu37NOLwR>!8npYX=)Q zSI$vVZG%IqIkRug53J=|=0ml3L5(ar^~m2@YFQ0wd=(AWF2OL?{z@Ri;OK>Dfe`@Q zlcL;xbaYG!c4R}9xlP`Pw>O_vDh;v0uAxzTC!iMg#RICJQC1=FaqoJOXU2>Dj0S%f zo}*eRY!au$o$jz5;*jRO#VY!R{&(qx&PtN(N@4Jva!!QF)4MQDChAYpG!>fir#OV( zeUXdwc^Rgn{OSvLro8I(oBzj9RxGz_T`cbZ9fIRGIW(?76!q zH*Wsk@GZy{3IQNI%Gb%X@szMUk2tVoEY6GPgy17J@862nsxZW^Tfr-7Jk@9`7RDH` zQf}M>I#r78Fi^~st@TWN>JH2-QV3WrzXo$LSCuX7QAU3|D8`Re<_DVqaFJ=|+{&l+L8pUFQM*;uDMy(0c9&Bbt;Av~s?Dw>Xd2|izV z`#G9U_VBCZiDS9P;2Y3*0FzCk!y6DJfa>~*=SOFF_A^Acy$la+d#c|xPv)&wHWyF} z4t48xXP;^?qG|IY_C~x5K*i%W_7c3hup`m5&ga=deSzM_8BbRlQ8}1i@ai z7|9|ZL+ZwPa{m=!%%=h6y$hRSS`YAjR#r!~)V;ncTrP>l%Tm@t_$6%vGC77$ zFR)WLuZl}AmpCc9CADTP+T8ZyImUSEn0r^y{FkceL`p`lnP>wHNep$vG1Xhj9Vlkn z?2va4g*rQnOC3kF6*?q`X-`g@O1Z1lR6lu0f3!AioJy4a(CyUKwO(h%bIcd-Me4$I z=6*rXls9fS7lA;2S?9_V*5<YGDV=ZO*!QwrLT%+~mkyQR!~UL(qb6Bug7+75(#ZL1O5_etH)Rt_kU9VR}h@=s*HyJy7zubLiXC zj?QK>xT;-5M0@55R@CMD=hV@8EZVN$*?tWx)oF``&r(16=&mA{{Q|OA4|@KYD^HH} zBWR`Ze%OSbqTD`I>cK-@pe1udm_xeMEAQTMw|B!9?cK3&?kt+F+1t*}R1bx5td~z5 z3K-HN%Q9O)Bv+FJe*=b(PEL19=h*CBcR!YMrw1Af6wOrCGscEUVQ)8D;otxD#clUU zk4u2YQOq_lRDNpu4To$uksYcY%3i18lC}_7t-cFAfK=atwpudaXV~&N>hZF#eGB8< zdxgwSUOJC_3G3{;HDc@SifySF0}V5|G78a!?Gc|SNm-57&kVhzy>%`(!Xx*~^3WGk z0d&t&F!4#t4?e*JaCrG?#RkQ2r`0a33&%wOTXERdc@oYE`^HGM48 za-N)V^J(AL5H%Uo97#;v_4Ojfbq0yNBOQWi-dVP(u~L{!20C3HD}*l3_p-PZhNqs- z;jIYbUL8zmVd%F`lLsaxTF#~T3m4}(@z0;|5Zgz&^7D9Y)26)eBX2R@xIggdJpNif z#>1WxxQxR7E0jv=@6iwu$62TrP@nA zN&Qr4Ta<0Z++Y6ick)g^stRjY)hmUZFf34*R2MrOcsJ|$jd{k%<7I$;H&?av>>R(- zJGSa+5x@caqdds*iIeeU8G}(`P52AabF+&JGt!#Ma5?;Iam825Zvy_%486qp9BWt2 z72ykeSXP90hd)M)mSF%adsZdAMiQ$FbYdDx$7Ao_Su$CboswLh$=?KoVU@a!K{j=n zVOPy=u#_}*8_oJ;!c=j$Y;H!1F4=RbI*>FA5^cQBbn_laNObe(ZgcapI`7ib*rGdo zY@ce$lzGQVm(YUfV#0aiP)3O>t%tHyHMXcO@j^b0WkaPla7fQ7flkk$rvRybwfFRf z@){k70rM$^!X`yF@Z191?e3UJ-tI|*<`Zt(I&%o3iOs%MSS8*9us#I2h+ z94aG0tYz--qS(L;faGUNYTNdrEdU3FUI#EY7W)wc$_(siZo*PEeTS?qBe(PpLR%M6 ziNLNo>!KX~FU&nO-0*G}VL@9nI-ikGXmE-#DYjC8Zv`kl^f#lgSWxXM75EhW;Be9t z2Cpj9pqgHS=XlCTYUnYfoWUKp?C<<&c^kybPWyVT@ZflgWl*tjRQOMz^_y0hd;fXA zYb9Idiow9*U~umn(nH6;Ge<{jbFtprGS2j|3~B~b$aeCVFND-k9eQ6U6Vie**`;#B z2R2k@PnP9;+7GEO3Tk7Y@xBDDUUIWGkJQ&Z{^h=l8nAk|z;n3@40&u%A$u6kOCsWn z@jr(iZu4!sS9|3m52>h7A%oY?_M z^=Y4<^T@ng0y`V!Zs(CbY?Hi45*<|tA-cbLNP+3G0-nDbkXuOd?locl{(CGzyLT$uV={~;NAYf3g(YI+{QyD%^p%cnf zk>whCYc^G`tJr2!*cC@iJme2^=p({fH1DP3`Wg_}&7Rw(iA8}H8n`kMQAzW8RB|HphxRNv{) zLAIG#OK=d{E!2uHQz4+b3K#M%-iDKs1SNh^k*`*uuU_Far2Mrved#w0 zBs8v%?&4pz`F1;539|DvYlH5BQe&>3_-$o55ed8fJH9?S<@PjMU!5x#s|9pVlZ~iN{wBtwbR&S?$=&4Ej`W(*+K( z{i1Z=Vp^UhlCTKWWyg0llYna51mfsD^fukb#3+qaQsSZE^mWxE`jr|d8Ge5hks;g)yDuNOF&zRC`#mH2T!S8)Nf=M|*6smISrF7nl=oGCHiz+@rm+ zAeoN;tU=;tfk|4Kl54rB<8{5q}gazX&XQPW( zV26CM1DPnOH6+IiMQ_~^%8W5G`cF5zUcqEmK&@;Lod23nSo3=JoSqbD<_kQ@A2Sgj zP0zLx{4nw96FQ(VG``r9uHhtX?e?sL5+NA^DL*T(NLk` zi9cc1)YO!mds80t9LH4i! zoGVkX2a%&%(=&tI!p;v)z*)?ERUbdSQFi|1gf0$S4ya{*3P4ojx@U$x|Jl9jSU5OOw+&$j#O= zNes9l&02oW7vT!!&#zXe^?GixUVSs3&7k*2S@tJZ0jpTXC0<8f(fYXxJ)l3ewaqar zZD$O5e{}jtvd%VXQzg`n|9q8wQ$q2cC{P(_Sn#eCvS|d?^>9BkYMmCcCIx`43gn8T zrzDoU?+Zt4*Ka}@I6`|OE{yeu=#9+I)auH)@-j$_m2}TVz08~X&|j2>nw9-ad6ylt zkfF%PR#8#Y$-g%=G6^csk25;U<3{z=D4d(mV@BmMGplGOBP0c4nK>WY%;i{a=or)` zNoB42Tg!(#eVaVr+!hq&wxpcd$`ZbL1h@E?OWSdYC}>-qw5{ek?|P=oYCAycZ(hjG zu2U|!#Dl9<)`JeZ4vWD*>$CjJ1$BqQ(xg*brsa|sapBr#%5hqqWmmORr4rYFnQLCS zB5GwTef9NfFxTwJ=vFeA22j~tO)*t(xUCBB#Kno}WxOl9!7*rUYI-6gEoJ7n54E3v z2C+GuRe>BF9LlgQ;_1A!H+>z$_yE@)-*d&vS@qcx=m(P0DNo(+^1~0P-az~#eYV{a z2sS=lV2^W6L0M3Sz(G6LfHAOV?X47@dolgyfmXVy^ui3E@T53!ef+xwjEU(9vrV~8 zf!*>ED}14{>BSdJn>&e(Fr3p3Ncid_c=znyZJaj$nB}m--yR0DGRABW*}>Q+!k0HO zAcV3s4Ap7|GbGB-BOa~SZ#%<>r^BaTegJ}6+V|VSFEPqZs2DlL5es|cwCS{v?N?%~ zcqJ@h#^KG0D=?Lb_FXxu3W}O(snoq0Basj-nUOC~3?lPWxS{W-Q4+n~`6IhVJJi>) zG@kA7DmF=tt=${$HcL$2+!|i~8-tO{(cUql*8!m!R&+6*v8w;vod88OX@{&nzBg4D0*%bPYsZp!|2vPvV)7pI>aTqycLK4xD`5K<6<8Rq$ zGuE&D9*LXxVU^bW@{MsdWb&dVLeA`3CI)KRbQShHIIYs_a8uc#XlP@L|Z79)v zGALQTOWV3MtjtQxFHG>xcDe!id`|lAEwockGqOEy05$l}?EALeBC{0@KP);I@VBP` zCU9KjPp$t_KA3(+X?>A#>Jbi~K%8mFFQ1s8Nu%jpJqI2~!jBPb$inacu7b(ecrSfk zs{KznU8_>zr~B{d$AUsK7u%k^vs@6fAH1)-z(q8Q5ue8{+|ocx<3BIa{qtH_4y~v{J&z+q~Ejd rzWv{`2I&9uT>pQg|DS%n5ziF7_v(?fo1YM`UN%Wlxu0b}^nLy>4VWO= diff --git a/docs/index.md b/docs/index.md index 19ae679344e..b0321cc877c 100755 --- a/docs/index.md +++ b/docs/index.md @@ -26,7 +26,7 @@ This branching model makes your data lake ACID compliant by allowing changes to Since lakeFS is compatible with the S3 API, all popular applications will work without modification, by simply adding the branch name to the object path: -![lakeFS s3 addressing](assets/img/s3_branch.png) +lakeFS s3 addressing ## Here's why you need it: From 995ee860218f39c904af0d7a3a39b42f56fcfd20 Mon Sep 17 00:00:00 2001 From: YaelRiv <67264175+YaelRiv@users.noreply.github.com> Date: Thu, 10 Sep 2020 14:33:24 +0100 Subject: [PATCH 12/39] Update README.md (#601) --- README.md | 27 ++++++++++++++------------- 1 file changed, 14 insertions(+), 13 deletions(-) diff --git a/README.md b/README.md index 8ac26b4f5da..7701b000698 100644 --- a/README.md +++ b/README.md @@ -24,19 +24,20 @@ For more information see the [Official Documentation](https://docs.lakefs.io). ## Capabilities -**Atomic Operations** - lakeFS allows data producers to manipulate multiple objects as a single, atomic operation. If something fails half-way, all changes can be instantly rolled back. - -**Consistency** - lakeFS enables object-level and cross-collection consistency: - - object-level consistency ensures all operations within a branch are strongly consistent (read-after-write, list-after-write, read-after-delete, etc). - - cross-collection consistency is achieved by providing snapshot isolation. Using branches, writers can provide consistency guarantees across different logical collections - merging to “main” is only done after several datasets have been created successfully. - -**History** - Commits are retained for a configurable duration, so readers can query data from the latest commit, or from any other point in time. Writers can atomically and safely rollback changes to previous versions. - -**Cross-Lake Isolation** - Creating a lakeFS branch provides you with a snapshot of the entire lake at a given point in time. -All reads from that branch are guaranteed to always return the same results. - -**Data CI/CD** - The ability to define automated rules and tests that are required to pass before committing or merging changes to data. - +**Developer Environment for Data** +* **Experimentation** - try tools, upgrade versions and evaluate code changes in isolation. +* **Reproducibility** - go back to any point of time to a consistent version of your data lake. + +**Continuous Data Integration** +* **Ingest new data safely by enforcing best practices** - make sure new data sources adhere to your lake’s best practices such as format and schema enforcement, naming convention, etc. +* **Metadata validation** - prevent breaking changes from entering the production data environment. + +**Continuous Data Deployment** +* **Instantly revert changes to data** - if low quality data is exposed to your consumers, you can revert instantly to a former, consistent and correct snapshot of your data lake. +* **Enforce cross collection consistency** - provide to consumers several collections of data that must be synchronized, in one atomic, revertable, action +* **Prevent data quality issues by enabling** + - Testing of production data before exposing it to users / consumers + - Testing of intermediate results in your DAG to avoid cascading quality issues ## Getting Started From 9e40dce6448d9dd4fe2d9bcb6c528f837644cbcf Mon Sep 17 00:00:00 2001 From: johnnyaug Date: Thu, 10 Sep 2020 16:57:54 +0300 Subject: [PATCH 13/39] Feature/import api orc (#548) --- api/api_controller.go | 9 +- block/gs/adapter.go | 2 +- block/inventory.go | 2 +- block/local/adapter.go | 2 +- block/mem/adapter.go | 2 +- block/s3/inventory.go | 138 +++++------- block/s3/inventory_iterator.go | 126 +++++------ block/s3/inventory_test.go | 265 +++++++++++++++--------- block/transient/adapter.go | 2 +- gateway/operations/mock_adapter_test.go | 2 +- go.mod | 5 + go.sum | 18 ++ inventory/s3/orc_reader.go | 136 ++++++++++++ inventory/s3/orc_utils.go | 137 ++++++++++++ inventory/s3/parquet_reader.go | 20 ++ inventory/s3/reader.go | 115 ++++++++++ inventory/s3/reader_test.go | 195 +++++++++++++++++ onboard/catalog_actions.go | 49 ++++- onboard/catalog_actions_test.go | 59 ++++-- onboard/import.go | 46 ++-- onboard/import_test.go | 21 +- onboard/utils_test.go | 11 +- 22 files changed, 1043 insertions(+), 319 deletions(-) create mode 100644 inventory/s3/orc_reader.go create mode 100644 inventory/s3/orc_utils.go create mode 100644 inventory/s3/parquet_reader.go create mode 100644 inventory/s3/reader.go create mode 100644 inventory/s3/reader_test.go diff --git a/api/api_controller.go b/api/api_controller.go index 84cd59820bd..cb2bc61ee71 100644 --- a/api/api_controller.go +++ b/api/api_controller.go @@ -2216,7 +2216,14 @@ func (c *Controller) ImportFromS3InventoryHandler() repositories.ImportFromS3Inv if err == nil { username = userModel.Username } - importer, err := onboard.CreateImporter(deps.ctx, deps.logger, deps.Cataloger, deps.BlockAdapter, username, params.ManifestURL, params.Repository) + importConfig := &onboard.ImporterConfig{ + CommitUsername: username, + InventoryURL: params.ManifestURL, + Repository: params.Repository, + InventoryGenerator: deps.BlockAdapter, + Cataloger: deps.Cataloger, + } + importer, err := onboard.CreateImporter(deps.ctx, deps.logger, importConfig) if err != nil { return repositories.NewImportFromS3InventoryDefault(http.StatusInternalServerError). WithPayload(responseErrorFrom(err)) diff --git a/block/gs/adapter.go b/block/gs/adapter.go index 6f744b4a5ba..ac6ed6072e9 100644 --- a/block/gs/adapter.go +++ b/block/gs/adapter.go @@ -405,7 +405,7 @@ func (a *Adapter) ValidateConfiguration(_ string) error { return nil } -func (a *Adapter) GenerateInventory(_ context.Context, _ logging.Logger, _ string) (block.Inventory, error) { +func (a *Adapter) GenerateInventory(_ context.Context, _ logging.Logger, _ string, _ bool) (block.Inventory, error) { return nil, fmt.Errorf("inventory %w", ErrNotImplemented) } diff --git a/block/inventory.go b/block/inventory.go index 51cf297d90f..93def4eeb4c 100644 --- a/block/inventory.go +++ b/block/inventory.go @@ -7,7 +7,7 @@ import ( ) type InventoryGenerator interface { - GenerateInventory(ctx context.Context, logger logging.Logger, inventoryURL string) (Inventory, error) + GenerateInventory(ctx context.Context, logger logging.Logger, inventoryURL string, shouldSort bool) (Inventory, error) } // Inventory represents a snapshot of the storage space diff --git a/block/local/adapter.go b/block/local/adapter.go index 6c1294add9b..139a5172f49 100644 --- a/block/local/adapter.go +++ b/block/local/adapter.go @@ -248,6 +248,6 @@ func (l *Adapter) ValidateConfiguration(_ string) error { return nil } -func (l *Adapter) GenerateInventory(_ context.Context, _ logging.Logger, _ string) (block.Inventory, error) { +func (l *Adapter) GenerateInventory(_ context.Context, _ logging.Logger, _ string, _ bool) (block.Inventory, error) { return nil, ErrInventoryNotSupported } diff --git a/block/mem/adapter.go b/block/mem/adapter.go index d31ad734a07..2fbc520b67c 100644 --- a/block/mem/adapter.go +++ b/block/mem/adapter.go @@ -217,6 +217,6 @@ func (a *Adapter) ValidateConfiguration(_ string) error { return nil } -func (a *Adapter) GenerateInventory(_ context.Context, _ logging.Logger, _ string) (block.Inventory, error) { +func (a *Adapter) GenerateInventory(_ context.Context, _ logging.Logger, _ string, _ bool) (block.Inventory, error) { return nil, ErrInventoryNotImplemented } diff --git a/block/s3/inventory.go b/block/s3/inventory.go index 5ac2b391a0f..3e1c431e24b 100644 --- a/block/s3/inventory.go +++ b/block/s3/inventory.go @@ -12,66 +12,51 @@ import ( "github.com/aws/aws-sdk-go/service/s3" "github.com/aws/aws-sdk-go/service/s3/s3iface" "github.com/treeverse/lakefs/block" + inventorys3 "github.com/treeverse/lakefs/inventory/s3" "github.com/treeverse/lakefs/logging" - s3parquet "github.com/xitongsys/parquet-go-source/s3" - "github.com/xitongsys/parquet-go/reader" ) -type manifest struct { - URL string `json:"-"` - InventoryBucketArn string `json:"destinationBucket"` - SourceBucket string `json:"sourceBucket"` - Files []manifestFile `json:"files"` - Format string `json:"fileFormat"` - inventoryBucket string -} +var ErrInventoryFilesRangesOverlap = errors.New("got s3 inventory with files covering overlapping ranges") -type manifestFile struct { - Key string `json:"key"` - firstKey string - numRows int +type Manifest struct { + URL string `json:"-"` + InventoryBucketArn string `json:"destinationBucket"` + SourceBucket string `json:"sourceBucket"` + Files []inventoryFile `json:"files"` // inventory list files, each contains a list of objects + Format string `json:"fileFormat"` + inventoryBucket string } -type ParquetReader interface { - Read(dstInterface interface{}) error - GetNumRows() int64 - SkipRows(int64) error +type inventoryFile struct { + Key string `json:"key"` // an s3 key for an inventory list file } -type parquetReaderGetter func(ctx context.Context, svc s3iface.S3API, inventoryBucket string, manifestFileKey string) (ParquetReader, CloseFunc, error) - -type CloseFunc func() error - -var ErrParquetOnlySupport = errors.New("currently only parquet inventories are supported") - -func (a *Adapter) GenerateInventory(ctx context.Context, logger logging.Logger, manifestURL string) (block.Inventory, error) { - return GenerateInventory(ctx, logger, manifestURL, a.s3, getParquetReader) +func (a *Adapter) GenerateInventory(ctx context.Context, logger logging.Logger, manifestURL string, shouldSort bool) (block.Inventory, error) { + return GenerateInventory(logger, manifestURL, a.s3, inventorys3.NewReader(ctx, a.s3, logger), shouldSort) } -func GenerateInventory(ctx context.Context, logger logging.Logger, manifestURL string, s3 s3iface.S3API, getParquetReader parquetReaderGetter) (block.Inventory, error) { +func GenerateInventory(logger logging.Logger, manifestURL string, s3 s3iface.S3API, inventoryReader inventorys3.IReader, shouldSort bool) (block.Inventory, error) { + if logger == nil { + logger = logging.Default() + } m, err := loadManifest(manifestURL, s3) if err != nil { return nil, err } - err = m.readFileMetadata(ctx, logger, s3, getParquetReader) + if shouldSort { + err = sortManifest(m, logger, inventoryReader) + } if err != nil { return nil, err } - if logger == nil { - logger = logging.Default() - } - sort.Slice(m.Files, func(i, j int) bool { - return m.Files[i].firstKey < m.Files[j].firstKey - }) - return &Inventory{Manifest: m, S3: s3, getParquetReader: getParquetReader, logger: logger}, nil + return &Inventory{Manifest: m, logger: logger, shouldSort: shouldSort, reader: inventoryReader}, nil } type Inventory struct { - S3 s3iface.S3API - Manifest *manifest - ctx context.Context //nolint:structcheck // known issue: https://github.com/golangci/golangci-lint/issues/826) - getParquetReader parquetReaderGetter - logger logging.Logger + Manifest *Manifest + logger logging.Logger + shouldSort bool + reader inventorys3.IReader } func (inv *Inventory) Iterator() block.InventoryIterator { @@ -86,33 +71,7 @@ func (inv *Inventory) InventoryURL() string { return inv.Manifest.URL } -func (m *manifest) readFileMetadata(ctx context.Context, logger logging.Logger, s3 s3iface.S3API, getParquetReader parquetReaderGetter) error { - for i := range m.Files { - filename := m.Files[i].Key - pr, closeReader, err := getParquetReader(ctx, s3, m.inventoryBucket, filename) - if err != nil { - return err - } - m.Files[i].numRows = int(pr.GetNumRows()) - // read first row from file to store the first key: - rows := make([]ParquetInventoryObject, 1) - err = pr.Read(&rows) - if err != nil { - return err - } - err = closeReader() - if err != nil { - logger.WithFields(logging.Fields{"bucket": m.inventoryBucket, "key": filename}). - Error("failed to close parquet reader after reading metadata") - } - if len(rows) != 0 { - m.Files[i].firstKey = rows[0].Key - } - } - return nil -} - -func loadManifest(manifestURL string, s3svc s3iface.S3API) (*manifest, error) { +func loadManifest(manifestURL string, s3svc s3iface.S3API) (*Manifest, error) { u, err := url.Parse(manifestURL) if err != nil { return nil, err @@ -121,13 +80,13 @@ func loadManifest(manifestURL string, s3svc s3iface.S3API) (*manifest, error) { if err != nil { return nil, err } - var m manifest + var m Manifest err = json.NewDecoder(output.Body).Decode(&m) if err != nil { return nil, err } - if m.Format != "Parquet" { - return nil, fmt.Errorf("%w. got: %s", ErrParquetOnlySupport, m.Format) + if m.Format != inventorys3.OrcFormatName && m.Format != inventorys3.ParquetFormatName { + return nil, fmt.Errorf("%w. got format: %s", inventorys3.ErrUnsupportedInventoryFormat, m.Format) } m.URL = manifestURL inventoryBucketArn, err := arn.Parse(m.InventoryBucketArn) @@ -138,19 +97,32 @@ func loadManifest(manifestURL string, s3svc s3iface.S3API) (*manifest, error) { return &m, nil } -func getParquetReader(ctx context.Context, svc s3iface.S3API, inventoryBucket string, manifestFileKey string) (ParquetReader, CloseFunc, error) { - pf, err := s3parquet.NewS3FileReaderWithClient(ctx, svc, inventoryBucket, manifestFileKey) - if err != nil { - return nil, nil, fmt.Errorf("failed to create parquet file reader: %w", err) - } - var rawObject ParquetInventoryObject - pr, err := reader.NewParquetReader(pf, &rawObject, 4) - if err != nil { - return nil, nil, fmt.Errorf("failed to create parquet reader: %w", err) +func sortManifest(m *Manifest, logger logging.Logger, reader inventorys3.IReader) error { + firstKeyByInventoryFile := make(map[string]string) + lastKeyByInventoryFile := make(map[string]string) + for _, f := range m.Files { + mr, err := reader.GetMetadataReader(m.Format, m.inventoryBucket, f.Key) + if err != nil { + return fmt.Errorf("failed to sort inventory files in manifest: %w", err) + } + firstKeyByInventoryFile[f.Key] = mr.FirstObjectKey() + lastKeyByInventoryFile[f.Key] = mr.LastObjectKey() + err = mr.Close() + if err != nil { + logger.Errorf("failed to close inventory file. file=%s, err=%w", f, err) + } } - closer := func() error { - pr.ReadStop() - return pf.Close() + sort.Slice(m.Files, func(i, j int) bool { + return firstKeyByInventoryFile[m.Files[i].Key] < firstKeyByInventoryFile[m.Files[j].Key] || + (firstKeyByInventoryFile[m.Files[i].Key] == firstKeyByInventoryFile[m.Files[j].Key] && + lastKeyByInventoryFile[m.Files[i].Key] < lastKeyByInventoryFile[m.Files[j].Key]) + }) + // validate sorting: if a file begins before the next one ends - the files cover overlapping ranges, + // which we don't know how to handle. + for i := 0; i < len(m.Files)-1; i++ { + if firstKeyByInventoryFile[m.Files[i+1].Key] < lastKeyByInventoryFile[m.Files[i].Key] { + return ErrInventoryFilesRangesOverlap + } } - return pr, closer, nil + return nil } diff --git a/block/s3/inventory_iterator.go b/block/s3/inventory_iterator.go index 49a1eaabe05..f4a085762ac 100644 --- a/block/s3/inventory_iterator.go +++ b/block/s3/inventory_iterator.go @@ -1,137 +1,115 @@ package s3 import ( + "errors" + "github.com/treeverse/lakefs/block" - "github.com/treeverse/lakefs/logging" + inventorys3 "github.com/treeverse/lakefs/inventory/s3" ) -const DefaultReadBatchSize = 100000 - -type ParquetInventoryObject struct { - Bucket string `parquet:"name=bucket, type=UTF8"` - Key string `parquet:"name=key, type=UTF8"` - IsLatest *bool `parquet:"name=is_latest, type=BOOLEAN"` - IsDeleteMarker *bool `parquet:"name=is_delete_marker, type=BOOLEAN"` - Size *int64 `parquet:"name=size, type=INT_64"` - LastModified *int64 `parquet:"name=last_modified_date, type=TIMESTAMP_MILLIS"` - Checksum *string `parquet:"name=e_tag, type=UTF8"` -} - -func (o *ParquetInventoryObject) GetPhysicalAddress() string { - return "s3://" + o.Bucket + "/" + o.Key -} +var ErrInventoryNotSorted = errors.New("got unsorted s3 inventory") type InventoryIterator struct { *Inventory - ReadBatchSize int - err error - val block.InventoryObject - buffer []ParquetInventoryObject - currentManifestFileIdx int - nextRowInParquet int - valIndexInBuffer int + err error + val *block.InventoryObject + buffer []inventorys3.InventoryObject + inventoryFileIndex int + valIndexInBuffer int } func NewInventoryIterator(inv *Inventory) *InventoryIterator { return &InventoryIterator{ - Inventory: inv, - ReadBatchSize: DefaultReadBatchSize, + Inventory: inv, + inventoryFileIndex: -1, } } func (it *InventoryIterator) Next() bool { - if len(it.Manifest.Files) == 0 { - // empty manifest - return false - } for { - val, valIndex := it.nextFromBuffer() + if len(it.Manifest.Files) == 0 { + // empty manifest + return false + } + val := it.nextFromBuffer() if val != nil { - // found the next object in buffer - it.valIndexInBuffer = valIndex - it.val = *val + // validate element order + if it.shouldSort && it.val != nil && val.Key < it.val.Key { + it.err = ErrInventoryNotSorted + return false + } + it.val = val return true } // value not found in buffer, need to reload the buffer it.valIndexInBuffer = -1 - // if needed, try to move on to the next manifest file: - file := it.Manifest.Files[it.currentManifestFileIdx] - if it.nextRowInParquet >= file.numRows && !it.moveToNextManifestFile() { + if !it.moveToNextInventoryFile() { // no more files left return false } - if !it.fillBuffer() { // fill from current manifest file + if !it.fillBuffer() { return false } } } -func (it *InventoryIterator) moveToNextManifestFile() bool { - if it.currentManifestFileIdx == len(it.Manifest.Files)-1 { +func (it *InventoryIterator) moveToNextInventoryFile() bool { + if it.inventoryFileIndex == len(it.Manifest.Files)-1 { return false } - it.currentManifestFileIdx += 1 - it.nextRowInParquet = 0 + it.inventoryFileIndex += 1 + it.logger.Debugf("moving to next manifest file: %s", it.Manifest.Files[it.inventoryFileIndex].Key) it.buffer = nil return true } func (it *InventoryIterator) fillBuffer() bool { - key := it.Manifest.Files[it.currentManifestFileIdx].Key - pr, closeReader, err := it.getParquetReader(it.ctx, it.S3, it.Manifest.inventoryBucket, key) + it.logger.Debug("start reading rows from inventory to buffer") + rdr, err := it.reader.GetFileReader(it.Manifest.Format, it.Manifest.inventoryBucket, it.Manifest.Files[it.inventoryFileIndex].Key) if err != nil { it.err = err return false } defer func() { - err = closeReader() + err = rdr.Close() if err != nil { - it.logger.WithFields(logging.Fields{"bucket": it.Manifest.inventoryBucket, "key": key}). - Error("failed to close parquet reader after filling buffer") + it.logger.Errorf("failed to close manifest file reader. file=%s, err=%w", it.Manifest.Files[it.inventoryFileIndex].Key, err) } }() - // skip the rows that have already been read: - err = pr.SkipRows(int64(it.nextRowInParquet)) - if err != nil { - it.err = err - return false - } - it.buffer = make([]ParquetInventoryObject, it.ReadBatchSize) - // read a batch of rows according to the batch size: - err = pr.Read(&it.buffer) + it.buffer = make([]inventorys3.InventoryObject, rdr.GetNumRows()) + err = rdr.Read(&it.buffer) if err != nil { it.err = err return false } - it.nextRowInParquet += len(it.buffer) return true } -func (it *InventoryIterator) nextFromBuffer() (*block.InventoryObject, int) { - var res block.InventoryObject +func (it *InventoryIterator) nextFromBuffer() *block.InventoryObject { for i := it.valIndexInBuffer + 1; i < len(it.buffer); i++ { - parquetObj := it.buffer[i] - if (parquetObj.IsLatest != nil && !*parquetObj.IsLatest) || - (parquetObj.IsDeleteMarker != nil && *parquetObj.IsDeleteMarker) { + obj := it.buffer[i] + if (obj.IsLatest != nil && !*obj.IsLatest) || + (obj.IsDeleteMarker != nil && *obj.IsDeleteMarker) { continue } - res = block.InventoryObject{ - Bucket: parquetObj.Bucket, - Key: parquetObj.Key, - PhysicalAddress: parquetObj.GetPhysicalAddress(), + res := block.InventoryObject{ + Bucket: obj.Bucket, + Key: obj.Key, + PhysicalAddress: obj.GetPhysicalAddress(), } - if parquetObj.Size != nil { - res.Size = *parquetObj.Size + if obj.Size != nil { + res.Size = *obj.Size } - if parquetObj.LastModified != nil { - res.LastModified = *parquetObj.LastModified + if obj.LastModified != nil { + res.LastModified = *obj.LastModified } - if parquetObj.Checksum != nil { - res.Checksum = *parquetObj.Checksum + if obj.Checksum != nil { + res.Checksum = *obj.Checksum } - return &res, i + it.valIndexInBuffer = i + return &res } - return nil, -1 + return nil } func (it *InventoryIterator) Err() error { @@ -139,5 +117,5 @@ func (it *InventoryIterator) Err() error { } func (it *InventoryIterator) Get() *block.InventoryObject { - return &it.val + return it.val } diff --git a/block/s3/inventory_test.go b/block/s3/inventory_test.go index c04f75c3970..4438215f7b9 100644 --- a/block/s3/inventory_test.go +++ b/block/s3/inventory_test.go @@ -1,8 +1,8 @@ package s3_test import ( - "context" "encoding/json" + "errors" "fmt" "io/ioutil" "reflect" @@ -10,57 +10,75 @@ import ( "strings" "testing" - "github.com/go-openapi/swag" - "github.com/treeverse/lakefs/logging" - - s32 "github.com/aws/aws-sdk-go/service/s3" + s3sdk "github.com/aws/aws-sdk-go/service/s3" "github.com/aws/aws-sdk-go/service/s3/s3iface" + "github.com/go-openapi/swag" "github.com/treeverse/lakefs/block/s3" + inventorys3 "github.com/treeverse/lakefs/inventory/s3" + "github.com/treeverse/lakefs/logging" ) -func rows(keys ...string) []*s3.ParquetInventoryObject { +var ErrReadFile = errors.New("error reading file") + +func rows(keys ...string) []*inventorys3.InventoryObject { if keys == nil { return nil } - res := make([]*s3.ParquetInventoryObject, len(keys)) + res := make([]*inventorys3.InventoryObject, len(keys)) for i, key := range keys { if key != "" { - res[i] = new(s3.ParquetInventoryObject) + res[i] = new(inventorys3.InventoryObject) res[i].Key = key - res[i].IsLatest = swag.Bool(!strings.HasPrefix(key, "expired_")) - res[i].IsDeleteMarker = swag.Bool(strings.HasPrefix(key, "del_")) + res[i].IsLatest = swag.Bool(!strings.Contains(key, "_expired")) + res[i].IsDeleteMarker = swag.Bool(strings.Contains(key, "_del")) } } return res } var fileContents = map[string][]string{ - "f1": {"del_1", "f1row1", "f1row2", "del_2"}, - "f2": {"f2row1", "f2row2"}, - "f3": {"f3row1", "f3row2"}, - "f4": {"f4row1", "f4row2", "f4row3", "f4row4", "f4row5", "f4row6", "f4row7"}, - "f5": {"a1", "a2", "a3"}, - "f6": {"a4", "a5", "a6", "a7"}, - "f7": {"f7row1", "del_1", "del_2", "del_3", "del_4", "del_5", "del_6", "expired_1", "expired_2", "expired_3", "f7row2"}, - "err_file1": {"a4", "", "a6", "a7"}, - "err_file2": {""}, - "all_deleted": {"del_1", "del_2", "del_3", "del_4", "del_5", "del_6", "del_7", "del_8"}, - "empty_file": {}, + "f1": {"f1row1_del", "f1row2", "f1row3", "f1row4_del"}, + "f2": {"f2row1", "f2row2"}, + "f3": {"f3row1", "f3row2"}, + "f4": {"f4row1", "f4row2", "f4row3", "f4row4", "f4row5", "f4row6", "f4row7"}, + "f5": {"f5row1", "f5row2", "f5row3"}, + "f6": {"f6row1", "f6row2", "f6row3", "f6row4"}, + "f7": {"f7row1", "f7row2_del", "f7row3_del", "f7row4_del", "f7row5_del", "f7row6_del", "f7row7_del", "f7row8_expired", "f7row9_expired", "f7row10_expired", "f7row11"}, + "err_file1": {"f8row1", "", "f8row2", "f8row3"}, + "err_file2": {""}, + "unsorted_file": {"f9row1", "f9row2", "f9row3", "f9row5", "f9row4"}, + "all_deleted1": {"fd1_del1", "fd1_del2", "fd1_del3", "fd1_del4", "fd1_del5", "fd1_del6", "fd1_del7", "fd1_del8"}, + "all_deleted2": {"fd2_del1", "fd2_del2", "fd2_del3", "fd2_del4", "fd2_del5", "fd2_del6", "fd2_del7", "fd2_del8"}, + "all_deleted3": {"fd3_del1", "fd3_del2", "fd3_del3", "fd3_del4", "fd3_del5", "fd3_del6", "fd3_del7", "fd3_del8"}, + "all_deleted4": {"fd4_del1", "fd4_del2", "fd4_del3", "fd4_del4", "fd4_del5", "fd4_del6", "fd4_del7", "fd4_del8"}, + "all_deleted5": {"fd5_del1", "fd5_del2", "fd5_del3", "fd5_del4", "fd5_del5", "fd5_del6", "fd5_del7", "fd5_del8"}, + "all_deleted6": {"fd6_del1", "fd6_del2", "fd6_del3", "fd6_del4", "fd6_del5", "fd6_del6", "fd6_del7", "fd6_del8"}, + "all_deleted7": {"fd7_del1", "fd7_del2", "fd7_del3", "fd7_del4", "fd7_del5", "fd7_del6", "fd7_del7", "fd7_del8"}, + "all_deleted8": {"fd8_del1", "fd8_del2", "fd8_del3", "fd8_del4", "fd8_del5", "fd8_del6", "fd8_del7", "fd8_del8"}, + "all_deleted9": {"fd9_del1", "fd9_del2", "fd9_del3", "fd9_del4", "fd9_del5", "fd9_del6", "fd9_del7", "fd9_del8"}, + "empty_file": {}, + "f_overlap1": {"fo_row1", "fo_row3", "fo_row5"}, + "f_overlap2": {"fo_row2", "fo_row4"}, + "f_overlap3": {"fo_row2", "fo_row6"}, + "f_overlap4": {"fo_row1", "fo_row4"}, + "f_overlap5": {"fo_row2", "fo_row4"}, } func TestIterator(t *testing.T) { testdata := []struct { InventoryFiles []string ExpectedObjects []string - ErrExpected bool + ErrExpected error + ShouldSort bool }{ { InventoryFiles: []string{"f1", "f2", "f3"}, - ExpectedObjects: []string{"f1row1", "f1row2", "f2row1", "f2row2", "f3row1", "f3row2"}, + ExpectedObjects: []string{"f1row2", "f1row3", "f2row1", "f2row2", "f3row1", "f3row2"}, }, { InventoryFiles: []string{"f3", "f2", "f1"}, - ExpectedObjects: []string{"f1row1", "f1row2", "f2row1", "f2row2", "f3row1", "f3row2"}, + ShouldSort: true, + ExpectedObjects: []string{"f1row2", "f1row3", "f2row1", "f2row2", "f3row1", "f3row2"}, }, { InventoryFiles: []string{}, @@ -72,94 +90,151 @@ func TestIterator(t *testing.T) { }, { InventoryFiles: []string{"f1", "f4"}, - ExpectedObjects: []string{"f1row1", "f1row2", "f4row1", "f4row2", "f4row3", "f4row4", "f4row5", "f4row6", "f4row7"}, + ExpectedObjects: []string{"f1row2", "f1row3", "f4row1", "f4row2", "f4row3", "f4row4", "f4row5", "f4row6", "f4row7"}, }, { InventoryFiles: []string{"f5", "f6"}, - ExpectedObjects: []string{"a1", "a2", "a3", "a4", "a5", "a6", "a7"}, + ExpectedObjects: []string{"f5row1", "f5row2", "f5row3", "f6row1", "f6row2", "f6row3", "f6row4"}, }, { - InventoryFiles: []string{"f6", "f5"}, - ExpectedObjects: []string{"a1", "a2", "a3", "a4", "a5", "a6", "a7"}, + InventoryFiles: []string{"f1", "unsorted_file"}, + ErrExpected: s3.ErrInventoryNotSorted, + ShouldSort: true, }, { InventoryFiles: []string{"f5", "err_file1"}, - ErrExpected: true, + ErrExpected: ErrReadFile, }, { - InventoryFiles: []string{"f1,", "f2", "f3", "f4", "f5", "f6", "err_file2"}, - ErrExpected: true, + InventoryFiles: []string{"f1", "f2", "f3", "f4", "f5", "f6", "err_file2"}, + ErrExpected: ErrReadFile, }, { InventoryFiles: []string{"f7"}, - ExpectedObjects: []string{"f7row1", "f7row2"}, + ExpectedObjects: []string{"f7row1", "f7row11"}, }, { - InventoryFiles: []string{"all_deleted", "all_deleted", "all_deleted"}, + InventoryFiles: []string{"all_deleted1", "all_deleted2", "all_deleted3"}, ExpectedObjects: []string{}, }, { - InventoryFiles: []string{"all_deleted", "all_deleted", "f1", "all_deleted", "all_deleted", "all_deleted", "all_deleted", "all_deleted", "f2", "all_deleted", "all_deleted"}, - ExpectedObjects: []string{"f1row1", "f1row2", "f2row1", "f2row2"}, + InventoryFiles: []string{"all_deleted1", "all_deleted2", "f1", "all_deleted3", "all_deleted4", "all_deleted5", "all_deleted6", "all_deleted7", "f2", "all_deleted8", "all_deleted9"}, + ExpectedObjects: []string{"f1row2", "f1row3", "f2row1", "f2row2"}, }, { - InventoryFiles: []string{"all_deleted", "all_deleted", "f2", "all_deleted", "all_deleted", "all_deleted", "all_deleted", "all_deleted", "f1", "all_deleted", "all_deleted"}, - ExpectedObjects: []string{"f1row1", "f1row2", "f2row1", "f2row2"}, + InventoryFiles: []string{"all_deleted1", "all_deleted2", "f2", "all_deleted3", "all_deleted4", "all_deleted5", "all_deleted6", "all_deleted7", "f1", "all_deleted8", "all_deleted9"}, + ExpectedObjects: []string{"f1row2", "f1row3", "f2row1", "f2row2"}, + ShouldSort: true, }, { InventoryFiles: []string{"empty_file"}, ExpectedObjects: []string{}, }, + { + InventoryFiles: []string{"f_overlap1", "f_overlap2"}, + ShouldSort: true, + ErrExpected: s3.ErrInventoryFilesRangesOverlap, + }, + { + InventoryFiles: []string{"f_overlap1", "f_overlap3"}, + ShouldSort: true, + ErrExpected: s3.ErrInventoryFilesRangesOverlap, + }, + { + InventoryFiles: []string{"f_overlap1", "f_overlap4"}, + ShouldSort: true, + ErrExpected: s3.ErrInventoryFilesRangesOverlap, + }, + { + InventoryFiles: []string{"f_overlap4", "f_overlap5"}, + ShouldSort: true, + ErrExpected: s3.ErrInventoryFilesRangesOverlap, + }, } manifestURL := "s3://example-bucket/manifest1.json" for _, test := range testdata { - for _, batchSize := range []int{1, 2, 3, 4, 5, 7, 9, 11, 15, 100, 1000, 10000} { - inv, err := s3.GenerateInventory(context.Background(), logging.Default(), manifestURL, &mockS3Client{ - FilesByManifestURL: map[string][]string{manifestURL: test.InventoryFiles}, - }, mockParquetReaderGetter) - if !test.ErrExpected && err != nil { - t.Fatalf("error: %v", err) - } else if err != nil { - continue - } - it := inv.Iterator() - it.(*s3.InventoryIterator).ReadBatchSize = batchSize - objects := make([]string, 0, len(test.ExpectedObjects)) - for it.Next() { - objects = append(objects, it.Get().Key) - } - if !test.ErrExpected && it.Err() != nil { - t.Fatalf("got unexpected error: %v", it.Err()) - } - if test.ErrExpected { - if it.Err() == nil { - print(len(test.ExpectedObjects)) - t.Fatalf("expected error but didn't get one") - } + s3api := &mockS3Client{ + FilesByManifestURL: map[string][]string{manifestURL: test.InventoryFiles}, + } + reader := &mockInventoryReader{openFiles: make(map[string]bool)} + inv, err := s3.GenerateInventory(logging.Default(), manifestURL, s3api, reader, test.ShouldSort) + if err != nil { + if errors.Is(err, test.ErrExpected) { continue } - if len(objects) != len(test.ExpectedObjects) { - t.Fatalf("unexpected number of objects in inventory. expected=%d, got=%d", len(test.ExpectedObjects), len(objects)) - } - if !reflect.DeepEqual(objects, test.ExpectedObjects) { - t.Fatalf("objects in inventory differrent than expected. expected=%v, got=%v", test.ExpectedObjects, objects) - } + t.Fatalf("error: %v", err) + } + it := inv.Iterator() + objects := make([]string, 0, len(test.ExpectedObjects)) + for it.Next() { + objects = append(objects, it.Get().Key) + } + if len(reader.openFiles) != 0 { + t.Errorf("some files stayed open: %v", reader.openFiles) + } + if !errors.Is(it.Err(), test.ErrExpected) { + t.Fatalf("got unexpected error. expected=%v, got=%v.", test.ErrExpected, it.Err()) + } + if test.ErrExpected != nil { + continue + } + if len(objects) != len(test.ExpectedObjects) { + t.Fatalf("unexpected number of objects in inventory. expected=%d, got=%d", len(test.ExpectedObjects), len(objects)) + } + if !reflect.DeepEqual(objects, test.ExpectedObjects) { + t.Fatalf("objects in inventory differrent than expected. expected=%v, got=%v", test.ExpectedObjects, objects) } } } -type mockParquetReader struct { - rows []*s3.ParquetInventoryObject - nextIdx int +type mockInventoryReader struct { + openFiles map[string]bool +} + +type mockInventoryFileReader struct { + rows []*inventorys3.InventoryObject + nextIdx int + inventoryReader *mockInventoryReader + key string +} + +func (m *mockInventoryFileReader) FirstObjectKey() string { + if len(m.rows) == 0 { + return "" + } + min := m.rows[0].Key + for _, r := range m.rows { + if r.Key < min { + min = r.Key + } + } + return min } -func (m *mockParquetReader) Read(dstInterface interface{}) error { - res := make([]s3.ParquetInventoryObject, 0, len(m.rows)) - dst := dstInterface.(*[]s3.ParquetInventoryObject) +func (m *mockInventoryFileReader) LastObjectKey() string { + max := "" + for _, r := range m.rows { + if r.Key > max { + max = r.Key + } + } + return max +} + +func (m *mockInventoryFileReader) Close() error { + m.nextIdx = -1 + m.rows = nil + delete(m.inventoryReader.openFiles, m.key) + return nil +} + +func (m *mockInventoryFileReader) Read(dstInterface interface{}) error { + res := make([]inventorys3.InventoryObject, 0, len(m.rows)) + dst := dstInterface.(*[]inventorys3.InventoryObject) for i := m.nextIdx; i < len(m.rows) && i < m.nextIdx+len(*dst); i++ { if m.rows[i] == nil { - return fmt.Errorf("got empty key") // for test - simulate file with error + return ErrReadFile // for test - simulate file with error } res = append(res, *m.rows[i]) } @@ -168,48 +243,38 @@ func (m *mockParquetReader) Read(dstInterface interface{}) error { return nil } -func (m *mockParquetReader) GetNumRows() int64 { +func (m *mockInventoryFileReader) GetNumRows() int64 { return int64(len(m.rows)) } -func (m *mockParquetReader) SkipRows(skip int64) error { - m.nextIdx += int(skip) - if m.nextIdx > len(m.rows) { - return fmt.Errorf("index out of bounds after skip. got index=%d, length=%d", m.nextIdx, len(m.rows)) - } - return nil -} -func mockParquetReaderGetter(_ context.Context, _ s3iface.S3API, bucket string, key string) (s3.ParquetReader, s3.CloseFunc, error) { - if bucket != "example-bucket" { - return nil, nil, fmt.Errorf("wrong bucket name: %s", bucket) - } - pr := &mockParquetReader{rows: rows(fileContents[key]...)} - return pr, func() error { - pr.nextIdx = -1 - pr.rows = nil - return nil - }, nil +func (m *mockInventoryReader) GetFileReader(_ string, _ string, key string) (inventorys3.FileReader, error) { + m.openFiles[key] = true + return &mockInventoryFileReader{rows: rows(fileContents[key]...), inventoryReader: m, key: key}, nil } -func (m *mockS3Client) GetObject(input *s32.GetObjectInput) (*s32.GetObjectOutput, error) { - output := s32.GetObjectOutput{} +func (m *mockInventoryReader) GetMetadataReader(_ string, _ string, key string) (inventorys3.MetadataReader, error) { + m.openFiles[key] = true + return &mockInventoryFileReader{rows: rows(fileContents[key]...), inventoryReader: m, key: key}, nil +} +func (m *mockS3Client) GetObject(input *s3sdk.GetObjectInput) (*s3sdk.GetObjectOutput, error) { + output := s3sdk.GetObjectOutput{} manifestURL := fmt.Sprintf("s3://%s%s", *input.Bucket, *input.Key) if !manifestExists(manifestURL) { return &output, nil } - manifestFileNames := m.FilesByManifestURL[manifestURL] - if manifestFileNames == nil { - manifestFileNames = []string{"inventory/lakefs-example-data/my_inventory/data/ea8268b2-a6ba-42de-8694-91a9833b4ff1.parquet"} + inventoryFileNames := m.FilesByManifestURL[manifestURL] + if inventoryFileNames == nil { + inventoryFileNames = []string{"inventory/lakefs-example-data/my_inventory/data/ea8268b2-a6ba-42de-8694-91a9833b4ff1.parquet"} } - manifestFiles := make([]interface{}, 0, len(manifestFileNames)) - for _, filename := range manifestFileNames { - manifestFiles = append(manifestFiles, struct { + inventoryFiles := make([]interface{}, 0, len(inventoryFileNames)) + for _, filename := range inventoryFileNames { + inventoryFiles = append(inventoryFiles, struct { Key string `json:"key"` }{ Key: filename, }) } - filesJSON, err := json.Marshal(manifestFiles) + filesJSON, err := json.Marshal(inventoryFiles) if err != nil { return nil, err } diff --git a/block/transient/adapter.go b/block/transient/adapter.go index e68e44d1520..366b1efa85d 100644 --- a/block/transient/adapter.go +++ b/block/transient/adapter.go @@ -106,6 +106,6 @@ func (a *Adapter) ValidateConfiguration(_ string) error { return nil } -func (a *Adapter) GenerateInventory(_ context.Context, _ logging.Logger, _ string) (block.Inventory, error) { +func (a *Adapter) GenerateInventory(_ context.Context, _ logging.Logger, _ string, _ bool) (block.Inventory, error) { return nil, ErrInventoryNotImplemented } diff --git a/gateway/operations/mock_adapter_test.go b/gateway/operations/mock_adapter_test.go index 05d724685a8..f5d27b275fb 100644 --- a/gateway/operations/mock_adapter_test.go +++ b/gateway/operations/mock_adapter_test.go @@ -80,6 +80,6 @@ func (a *mockAdapter) ValidateConfiguration(_ string) error { return nil } -func (a *mockAdapter) GenerateInventory(_ context.Context, _ logging.Logger, _ string) (block.Inventory, error) { +func (a *mockAdapter) GenerateInventory(_ context.Context, _ logging.Logger, _ string, _ bool) (block.Inventory, error) { return nil, nil } diff --git a/go.mod b/go.mod index fb25f3532f5..4b41e7fc04d 100644 --- a/go.mod +++ b/go.mod @@ -12,6 +12,7 @@ require ( github.com/cenkalti/backoff/v3 v3.2.2 // indirect github.com/containerd/containerd v1.3.6 // indirect github.com/containerd/continuity v0.0.0-20200710164510-efbc4488d8fe // indirect + github.com/cznic/mathutil v0.0.0-20180504122225-ca4c9f2c1369 github.com/davecgh/go-spew v1.1.1 github.com/dgryski/go-gk v0.0.0-20200319235926-a69029f61654 // indirect github.com/dlmiddlecote/sqlstats v1.0.1 @@ -26,9 +27,11 @@ require ( github.com/go-test/deep v1.0.7 github.com/gofrs/uuid v3.3.0+incompatible // indirect github.com/golang-migrate/migrate/v4 v4.12.2 + github.com/golang/protobuf v1.4.2 github.com/golangci/golangci-lint v1.30.0 github.com/google/uuid v1.1.1 github.com/gopherjs/gopherjs v0.0.0-20200217142428-fce0ec30dd00 // indirect + github.com/hashicorp/go-multierror v1.1.0 github.com/hnlq715/golang-lru v0.2.0 github.com/influxdata/tdigest v0.0.1 // indirect github.com/jackc/pgconn v1.6.4 @@ -39,6 +42,7 @@ require ( github.com/jedib0t/go-pretty v4.3.0+incompatible github.com/jessevdk/go-flags v1.4.0 github.com/jmoiron/sqlx v1.2.1-0.20190826204134-d7d95172beb5 + github.com/johannesboyne/gofakes3 v0.0.0-20200716060623-6b2b4cb092cc github.com/klauspost/compress v1.10.10 // indirect github.com/lib/pq v1.8.0 // indirect github.com/lunixbochs/vtclean v1.0.0 // indirect @@ -55,6 +59,7 @@ require ( github.com/rakyll/statik v0.1.7 github.com/rs/xid v1.2.1 github.com/schollz/progressbar/v3 v3.3.4 + github.com/scritchley/orc v0.0.0-20200625081059-e6fcbf41b2c2 github.com/shopspring/decimal v1.2.0 // indirect github.com/sirupsen/logrus v1.6.0 github.com/smartystreets/assertions v1.1.1 // indirect diff --git a/go.sum b/go.sum index 4d89f83a87f..63c90368014 100644 --- a/go.sum +++ b/go.sum @@ -96,6 +96,7 @@ github.com/asaskevich/govalidator v0.0.0-20200108200545-475eaeb16496/go.mod h1:o github.com/asaskevich/govalidator v0.0.0-20200428143746-21a406dcc535 h1:4daAzAu0S6Vi7/lbWECcX0j45yZReDZ56BQsrVBOEEY= github.com/asaskevich/govalidator v0.0.0-20200428143746-21a406dcc535/go.mod h1:oGkLhpf+kjZl6xBf758TQhh5XrAeiJv/7FRz/2spLIg= github.com/aws/aws-lambda-go v1.13.3/go.mod h1:4UKl9IzQMoD+QF79YdCuzCwp8VbmG4VAQwij/eHl5CU= +github.com/aws/aws-sdk-go v1.17.4/go.mod h1:KmX6BPdI08NWTb3/sm4ZGu5ShLoqVDhKgpiN924inxo= github.com/aws/aws-sdk-go v1.17.7/go.mod h1:KmX6BPdI08NWTb3/sm4ZGu5ShLoqVDhKgpiN924inxo= github.com/aws/aws-sdk-go v1.27.0/go.mod h1:KmX6BPdI08NWTb3/sm4ZGu5ShLoqVDhKgpiN924inxo= github.com/aws/aws-sdk-go v1.29.15/go.mod h1:1KvfttTE3SPKMpo8g2c6jL3ZKfXtFvKscTgahTma5Xg= @@ -119,6 +120,7 @@ github.com/bmizerany/assert v0.0.0-20160611221934-b7ed37b82869 h1:DDGfHa7BWjL4Yn github.com/bmizerany/assert v0.0.0-20160611221934-b7ed37b82869/go.mod h1:Ekp36dRnpXw/yCqJaO+ZrUyxD+3VXMFFr56k5XYrpB4= github.com/bmizerany/perks v0.0.0-20141205001514-d9a9656a3a4b h1:AP/Y7sqYicnjGDfD5VcY4CIfh1hRXBUavxrvELjTiOE= github.com/bmizerany/perks v0.0.0-20141205001514-d9a9656a3a4b/go.mod h1:ac9efd0D1fsDb3EJvhqgXRbFx7bs2wqZ10HQPeU8U/Q= +github.com/boltdb/bolt v1.3.1/go.mod h1:clJnj/oiGkjum5o1McbSZDSLxVThjynRyGBgiAx27Ps= github.com/bombsimon/wsl/v3 v3.1.0 h1:E5SRssoBgtVFPcYWUOFJEcgaySgdtTNYzsSKDOY7ss8= github.com/bombsimon/wsl/v3 v3.1.0/go.mod h1:st10JtZYLE4D5sC7b8xV4zTKZwAQjCH/Hy2Pm1FNZIc= github.com/c2h5oh/datasize v0.0.0-20171227191756-4eba002a5eae/go.mod h1:S/7n9copUssQ56c7aAgHqftWO4LTf4xY6CGWt8Bc+3M= @@ -171,6 +173,7 @@ github.com/cpuguy83/go-md2man/v2 v2.0.0-20190314233015-f79a8a8ca69d/go.mod h1:ma github.com/cpuguy83/go-md2man/v2 v2.0.0/go.mod h1:maD7wRr/U5Z6m/iR4s+kqSMx2CaBsrgA7czyZG/E6dU= github.com/creack/pty v1.1.7/go.mod h1:lj5s0c3V2DBrqTV7llrYr5NG6My20zk30Fl46Y7DoTY= github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E= +github.com/cznic/mathutil v0.0.0-20180504122225-ca4c9f2c1369 h1:XNT/Zf5l++1Pyg08/HV04ppB0gKxAqtZQBRYiYrUuYk= github.com/cznic/mathutil v0.0.0-20180504122225-ca4c9f2c1369/go.mod h1:e6NPNENfs9mPDVNRekM7lKScauxd5kXTr1Mfyig6TDM= github.com/daixiang0/gci v0.0.0-20200727065011-66f1df783cb2 h1:3Lhhps85OdA8ezsEKu+IA1hE+DBTjt/fjd7xNCrHbVA= github.com/daixiang0/gci v0.0.0-20200727065011-66f1df783cb2/go.mod h1:+AV8KmHTGxxwp/pY84TLQfFKp2vuKXXJVzF3kD/hfR4= @@ -647,6 +650,8 @@ github.com/jmoiron/sqlx v1.2.0 h1:41Ip0zITnmWNR/vHV+S4m+VoUivnWY5E4OJfLZjCJMA= github.com/jmoiron/sqlx v1.2.0/go.mod h1:1FEQNm3xlJgrMD+FBdI9+xvCksHtbpVBBw5dYhBSsks= github.com/jmoiron/sqlx v1.2.1-0.20190826204134-d7d95172beb5 h1:lrdPtrORjGv1HbbEvKWDUAy97mPpFm4B8hp77tcCUJY= github.com/jmoiron/sqlx v1.2.1-0.20190826204134-d7d95172beb5/go.mod h1:1FEQNm3xlJgrMD+FBdI9+xvCksHtbpVBBw5dYhBSsks= +github.com/johannesboyne/gofakes3 v0.0.0-20200716060623-6b2b4cb092cc h1:JJPhSHowepOF2+ElJVyb9jgt5ZyBkPMkPuhS0uODSFs= +github.com/johannesboyne/gofakes3 v0.0.0-20200716060623-6b2b4cb092cc/go.mod h1:fNiSoOiEI5KlkWXn26OwKnNe58ilTIkpBlgOrt7Olu8= github.com/joho/godotenv v1.3.0 h1:Zjp+RcGpHhGlrMbJzXTrZZPrWj+1vfm90La1wgB6Bhc= github.com/joho/godotenv v1.3.0/go.mod h1:7hK45KPybAkOC6peb+G5yklZfMxEjkZhHbwpqxOKXbg= github.com/jonboulle/clockwork v0.1.0/go.mod h1:Ii8DK3G1RaLaWxj9trq07+26W01tbo22gdxWY5EU2bo= @@ -927,6 +932,7 @@ github.com/quasilyte/regex/syntax v0.0.0-20200407221936-30656e2c4a95/go.mod h1:r github.com/rakyll/statik v0.1.7 h1:OF3QCZUuyPxuGEP7B4ypUa7sB/iHtqOTDYZXGM8KOdQ= github.com/rakyll/statik v0.1.7/go.mod h1:AlZONWzMtEnMs7W4e/1LURLiI49pIMmp6V9Unghqrcc= github.com/rcrowley/go-metrics v0.0.0-20181016184325-3113b8401b8a/go.mod h1:bCqnVzQkZxMG4s8nGwiZ5l3QUCyqpo9Y+/ZMZ9VjZe4= +github.com/remyoudompheng/bigfft v0.0.0-20190728182440-6a916e37a237 h1:HQagqIiBmr8YXawX/le3+O26N+vPPC1PtjaF3mwnook= github.com/remyoudompheng/bigfft v0.0.0-20190728182440-6a916e37a237/go.mod h1:qqbHyh8v60DhA7CoWK5oRCqLrMHRGoxYCSS9EjAz6Eo= github.com/rogpeppe/fastuuid v0.0.0-20150106093220-6724a57986af/go.mod h1:XWv6SoW27p1b0cqNHllgS5HIMJraePCO15w5zCzIWYg= github.com/rogpeppe/go-internal v1.1.0/go.mod h1:M8bDsm7K2OlrFYOpmOWEs/qY81heoFRclV5y23lUDJ4= @@ -944,14 +950,20 @@ github.com/ryancurrah/gomodguard v1.1.0/go.mod h1:4O8tr7hBODaGE6VIhfJDHcwzh5GUcc github.com/ryanrolds/sqlclosecheck v0.3.0 h1:AZx+Bixh8zdUBxUA1NxbxVAS78vTPq4rCb8OUZI9xFw= github.com/ryanrolds/sqlclosecheck v0.3.0/go.mod h1:1gREqxyTGR3lVtpngyFo3hZAgk0KCtEdgEkHwDbigdA= github.com/ryanuber/columnize v0.0.0-20160712163229-9b3edd62028f/go.mod h1:sm1tb6uqfes/u+d4ooFouqFdy9/2g9QGwK3SQygK0Ts= +github.com/ryszard/goskiplist v0.0.0-20150312221310-2dfbae5fcf46 h1:GHRpF1pTW19a8tTFrMLUcfWwyC0pnifVo2ClaLq+hP8= +github.com/ryszard/goskiplist v0.0.0-20150312221310-2dfbae5fcf46/go.mod h1:uAQ5PCi+MFsC7HjREoAz1BU+Mq60+05gifQSsHSDG/8= github.com/samuel/go-zookeeper v0.0.0-20190923202752-2cc03de413da/go.mod h1:gi+0XIa01GRL2eRQVjQkKGqKF3SF9vZR/HnPullcV2E= github.com/satori/go.uuid v1.2.0/go.mod h1:dA0hQrYB0VpLJoorglMZABFdXlWrHn1NEOzdhQKdks0= github.com/schollz/progressbar/v3 v3.3.4 h1:nMinx+JaEm/zJz4cEyClQeAw5rsYSB5th3xv+5lV6Vg= github.com/schollz/progressbar/v3 v3.3.4/go.mod h1:Rp5lZwpgtYmlvmGo1FyDwXMqagyRBQYSDwzlP9QDu84= github.com/sclevine/agouti v3.0.0+incompatible/go.mod h1:b4WX9W9L1sfQKXeJf1mUTLZKJ48R1S7H23Ji7oFO5Bw= +github.com/scritchley/orc v0.0.0-20200625081059-e6fcbf41b2c2 h1:k8dU3BcOjYXXoRknJFVHF3XkWQblkFfqTlN4A37mZ2Y= +github.com/scritchley/orc v0.0.0-20200625081059-e6fcbf41b2c2/go.mod h1:U4h1RViHcbDQl9stSaImdd7N3/ZnUkZ2yombj5cSgEY= github.com/sean-/seed v0.0.0-20170313163322-e2103e2c3529/go.mod h1:DxrIzT+xaE7yg65j358z/aeFdxmN0P9QXhEzd20vsDc= github.com/securego/gosec/v2 v2.4.0 h1:ivAoWcY5DMs9n04Abc1VkqZBO0FL0h4ShTcVsC53lCE= github.com/securego/gosec/v2 v2.4.0/go.mod h1:0/Q4cjmlFDfDUj1+Fib61sc+U5IQb2w+Iv9/C3wPVko= +github.com/shabbyrobe/gocovmerge v0.0.0-20180507124511-f6ea450bfb63 h1:J6qvD6rbmOil46orKqJaRPG+zTpoGlBTUdyv8ki63L0= +github.com/shabbyrobe/gocovmerge v0.0.0-20180507124511-f6ea450bfb63/go.mod h1:n+VKSARF5y/tS9XFSP7vWDfS+GUC5vs/YT7M5XDTUEM= github.com/shazow/go-diff v0.0.0-20160112020656-b6b7b6733b8c h1:W65qqJCIOVP4jpqPQ0YvHYKwcMEMVWIzWC5iNQQfBTU= github.com/shazow/go-diff v0.0.0-20160112020656-b6b7b6733b8c/go.mod h1:/PevMnwAxekIXwN8qQyfc5gl2NlkB3CQlkizAbOkeBs= github.com/shirou/gopsutil v0.0.0-20190901111213-e4ec7b275ada/go.mod h1:WWnYX4lzhCH5h/3YBfyVA3VbLYjlMZZAQcW9ojMexNc= @@ -989,6 +1001,7 @@ github.com/sourcegraph/go-diff v0.5.3 h1:lhIKJ2nXLZZ+AfbHpYxTn0pXpNTTui0DX7DO3xe github.com/sourcegraph/go-diff v0.5.3/go.mod h1:v9JDtjCE4HHHCZGId75rg8gkKKa98RVjBcBGsVmMmak= github.com/spaolacci/murmur3 v0.0.0-20180118202830-f09979ecbc72/go.mod h1:JwIasOWyU6f++ZhiEuf87xNszmSA2myDM2Kzu9HwQUA= github.com/spf13/afero v1.1.2/go.mod h1:j4pytiNVoe2o6bmDsKpLACNPDBIoEAkihy7loJ1B0CQ= +github.com/spf13/afero v1.2.1/go.mod h1:9ZxEEn6pIJ8Rxe320qSDBk6AsU0r9pR7Q4OcevTdifk= github.com/spf13/afero v1.2.2 h1:5jhuqJyZCZf2JRofRvN/nIFgIWNzPa3/Vz8mYylgbWc= github.com/spf13/afero v1.2.2/go.mod h1:9ZxEEn6pIJ8Rxe320qSDBk6AsU0r9pR7Q4OcevTdifk= github.com/spf13/afero v1.3.2/go.mod h1:5KUK8ByomD5Ti5Artl0RtHeI5pTF7MIDuXL3yY520V4= @@ -1199,6 +1212,7 @@ golang.org/x/net v0.0.0-20181220203305-927f97764cc3/go.mod h1:mL1N/T3taQHkDXs73r golang.org/x/net v0.0.0-20190108225652-1e06a53dbb7e/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= golang.org/x/net v0.0.0-20190125091013-d26f9f9a57f3/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= golang.org/x/net v0.0.0-20190213061140-3a22650c66bd/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= +golang.org/x/net v0.0.0-20190310074541-c10a0554eabf/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= golang.org/x/net v0.0.0-20190311183353-d8887717615a/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= golang.org/x/net v0.0.0-20190320064053-1272bf9dcd53/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= @@ -1259,6 +1273,7 @@ golang.org/x/sys v0.0.0-20181116152217-5ac8a444bdc5/go.mod h1:STP8DvDyc/dI5b8T5h golang.org/x/sys v0.0.0-20181122145206-62eef0e2fa9b/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190222072716-a9d3bda3a223/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= +golang.org/x/sys v0.0.0-20190310054646-10058d7d4faa/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20190312061237-fead79001313/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20190321052220-f7bb7a8bee54/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20190403152447-81d4e9dc473e/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= @@ -1328,6 +1343,7 @@ golang.org/x/tools v0.0.0-20190114222345-bf090417da8b/go.mod h1:n7NCudcB/nEzxVGm golang.org/x/tools v0.0.0-20190206041539-40960b6deb8e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= golang.org/x/tools v0.0.0-20190221204921-83362c3779f5/go.mod h1:9Yl7xja0Znq3iFh3HoIrodX9oNMXvdceNzlUR8zjMvY= golang.org/x/tools v0.0.0-20190226205152-f727befe758c/go.mod h1:9Yl7xja0Znq3iFh3HoIrodX9oNMXvdceNzlUR8zjMvY= +golang.org/x/tools v0.0.0-20190308174544-00c44ba9c14f/go.mod h1:25r3+/G6/xytQM8iWZKq3Hn0kr0rgFKPUNVEL/dr3z4= golang.org/x/tools v0.0.0-20190311212946-11955173bddd/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs= golang.org/x/tools v0.0.0-20190311215038-5c2858a9cfe5/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs= golang.org/x/tools v0.0.0-20190312151545-0bb0c0a6e846/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs= @@ -1545,6 +1561,7 @@ gopkg.in/jcmturner/dnsutils.v1 v1.0.1/go.mod h1:m3v+5svpVOhtFAP/wSz+yzh4Mc0Fg7eR gopkg.in/jcmturner/goidentity.v3 v3.0.0/go.mod h1:oG2kH0IvSYNIu80dVAyu/yoefjq1mNfM5bm88whjWx4= gopkg.in/jcmturner/gokrb5.v7 v7.3.0/go.mod h1:l8VISx+WGYp+Fp7KRbsiUuXTTOnxIc3Tuvyavf11/WM= gopkg.in/jcmturner/rpc.v1 v1.1.0/go.mod h1:YIdkC4XfD6GXbzje11McwsDuOlZQSb9W4vfLvuNnlv8= +gopkg.in/mgo.v2 v2.0.0-20180705113604-9856a29383ce/go.mod h1:yeKp02qBN3iKW1OzL3MGk2IdtZzaj7SFntXj72NppTA= gopkg.in/resty.v1 v1.12.0/go.mod h1:mDo4pnntr5jdWRML875a/NmxYqAlA73dVijT2AXvQQo= gopkg.in/square/go-jose.v2 v2.5.1/go.mod h1:M9dMgbHiYLoDGQrXy7OpJDJWiKiU//h+vD76mk0e1AI= gopkg.in/tomb.v1 v1.0.0-20141024135613-dd632973f1e7 h1:uRGJdciOHaEIrze2W8Q3AKkepLTh2hOroT7a+7czfdQ= @@ -1583,6 +1600,7 @@ modernc.org/fileutil v1.0.0/go.mod h1:JHsWpkrk/CnVV1H/eGlFf85BEpfkrp56ro8nojIq9Q modernc.org/golex v1.0.0/go.mod h1:b/QX9oBD/LhixY6NDh+IdGv17hgB+51fET1i2kPSmvk= modernc.org/internal v1.0.0/go.mod h1:VUD/+JAkhCpvkUitlEOnhpVxCgsBI90oTzSCRcqQVSM= modernc.org/lldb v1.0.0/go.mod h1:jcRvJGWfCGodDZz8BPwiKMJxGJngQ/5DrRapkQnLob8= +modernc.org/mathutil v1.0.0 h1:93vKjrJopTPrtTNpZ8XIovER7iCIH1QU7wNbOQXC60I= modernc.org/mathutil v1.0.0/go.mod h1:wU0vUrJsVWBZ4P6e7xtFJEhFSNsfRLJ8H458uRjg03k= modernc.org/ql v1.0.0/go.mod h1:xGVyrLIatPcO2C1JvI/Co8c0sr6y91HKFNy4pt9JXEY= modernc.org/sortutil v1.1.0/go.mod h1:ZyL98OQHJgH9IEfN71VsamvJgrtRX9Dj2gX+vH86L1k= diff --git a/inventory/s3/orc_reader.go b/inventory/s3/orc_reader.go new file mode 100644 index 00000000000..e899627fb33 --- /dev/null +++ b/inventory/s3/orc_reader.go @@ -0,0 +1,136 @@ +package s3 + +import ( + "context" + "reflect" + "time" + + "github.com/go-openapi/swag" + "github.com/hashicorp/go-multierror" + "github.com/scritchley/orc" +) + +type OrcInventoryFileReader struct { + reader *orc.Reader + cursor *orc.Cursor + ctx context.Context + orcSelect *OrcSelect + orcFile *OrcFile +} + +type OrcField struct { + IndexInFile int + IndexInSelect int +} + +type OrcSelect struct { + SelectFields []string // the list of fields to select from the file + IndexInSelect map[string]int // for each field, its index in the select query + IndexInFile map[string]int // for each field, its index in the original file +} + +func getOrcSelect(typeDescription *orc.TypeDescription) *OrcSelect { + relevantFields := []string{"bucket", "key", "size", "last_modified_date", "e_tag", "is_delete_marker", "is_latest"} + res := &OrcSelect{ + SelectFields: nil, + IndexInFile: make(map[string]int), + IndexInSelect: make(map[string]int), + } + for i, field := range typeDescription.Columns() { + res.IndexInFile[field] = i + } + j := 0 + for _, field := range relevantFields { + if _, ok := res.IndexInFile[field]; ok { + res.SelectFields = append(res.SelectFields, field) + res.IndexInSelect[field] = j + j++ + } + } + return res +} + +func (r *OrcInventoryFileReader) inventoryObjectFromRow(rowData []interface{}) InventoryObject { + var size *int64 + if sizeIdx, ok := r.orcSelect.IndexInSelect["size"]; ok && rowData[sizeIdx] != nil { + size = swag.Int64(rowData[sizeIdx].(int64)) + } + var lastModified *int64 + if lastModifiedIdx, ok := r.orcSelect.IndexInSelect["last_modified_date"]; ok && rowData[lastModifiedIdx] != nil { + lastModified = swag.Int64(rowData[lastModifiedIdx].(time.Time).Unix()) + } + var eTag *string + if eTagIdx, ok := r.orcSelect.IndexInSelect["e_tag"]; ok && rowData[eTagIdx] != nil { + eTag = swag.String(rowData[eTagIdx].(string)) + } + var isLatest *bool + if isLatestIdx, ok := r.orcSelect.IndexInSelect["is_latest"]; ok && rowData[isLatestIdx] != nil { + isLatest = swag.Bool(rowData[isLatestIdx].(bool)) + } + var isDeleteMarker *bool + if isDeleteMarkerIdx, ok := r.orcSelect.IndexInSelect["is_delete_marker"]; ok && rowData[isDeleteMarkerIdx] != nil { + isDeleteMarker = swag.Bool(rowData[isDeleteMarkerIdx].(bool)) + } + return InventoryObject{ + Bucket: rowData[r.orcSelect.IndexInSelect["bucket"]].(string), + Key: rowData[r.orcSelect.IndexInSelect["key"]].(string), + Size: size, + LastModified: lastModified, + Checksum: eTag, + IsLatest: isLatest, + IsDeleteMarker: isDeleteMarker, + } +} + +func (r *OrcInventoryFileReader) Read(dstInterface interface{}) error { + num := reflect.ValueOf(dstInterface).Elem().Len() + res := make([]InventoryObject, 0, num) + for { + select { + case <-r.ctx.Done(): + return r.ctx.Err() + default: + } + if !r.cursor.Next() { + if !r.cursor.Stripes() { + break + } + if !r.cursor.Next() { + break + } + } + res = append(res, r.inventoryObjectFromRow(r.cursor.Row())) + if len(res) == num { + break + } + } + + reflect.ValueOf(dstInterface).Elem().Set(reflect.ValueOf(res)) + return nil +} + +func (r *OrcInventoryFileReader) GetNumRows() int64 { + return int64(r.reader.NumRows()) +} + +func (r *OrcInventoryFileReader) Close() error { + var combinedErr error + if err := r.cursor.Close(); err != nil { + combinedErr = multierror.Append(combinedErr, err) + } + if err := r.reader.Close(); err != nil { + combinedErr = multierror.Append(combinedErr, err) + } + if err := r.orcFile.Close(); err != nil { + combinedErr = multierror.Append(combinedErr, err) + } + return combinedErr +} + +func (r *OrcInventoryFileReader) FirstObjectKey() string { + return *r.reader.Metadata().StripeStats[0].GetColStats()[r.orcSelect.IndexInFile["key"]+1].StringStatistics.Minimum +} + +func (r *OrcInventoryFileReader) LastObjectKey() string { + return *r.reader.Metadata().StripeStats[0].GetColStats()[r.orcSelect.IndexInFile["key"]+1].StringStatistics.Maximum +} diff --git a/inventory/s3/orc_utils.go b/inventory/s3/orc_utils.go new file mode 100644 index 00000000000..49036af4ccf --- /dev/null +++ b/inventory/s3/orc_utils.go @@ -0,0 +1,137 @@ +package s3 + +import ( + "context" + "fmt" + "io" + "io/ioutil" + "os" + "path" + + "github.com/aws/aws-sdk-go/aws" + "github.com/aws/aws-sdk-go/service/s3" + "github.com/aws/aws-sdk-go/service/s3/s3iface" + "github.com/aws/aws-sdk-go/service/s3/s3manager" + "github.com/go-openapi/swag" + gproto "github.com/golang/protobuf/proto" //nolint:staticcheck // orc lib uses old proto + "github.com/scritchley/orc/proto" + "github.com/treeverse/lakefs/logging" +) + +const ( + maxPostScriptSize = 256 // from the ORC specification: https://orc.apache.org/specification/ORCv1/ + orcInitialReadSize = 16000 +) + +// getTailLength reads the ORC postscript from the given file, returning the full tail length. +// The tail length equals (footer + metadata + postscript + 1) bytes. +func getTailLength(f *os.File) (int, error) { + stat, err := f.Stat() + if err != nil { + return 0, err + } + fileSize := stat.Size() + psPlusByte := int64(maxPostScriptSize + 1) + if psPlusByte > fileSize { + psPlusByte = fileSize + } + // Read the last 256 bytes into buffer to get postscript + postScriptBytes := make([]byte, psPlusByte) + sr := io.NewSectionReader(f, fileSize-psPlusByte, psPlusByte) + _, err = io.ReadFull(sr, postScriptBytes) + if err != nil { + return 0, err + } + psLen := int(postScriptBytes[len(postScriptBytes)-1]) + psOffset := len(postScriptBytes) - 1 - psLen + postScript := &proto.PostScript{} + err = gproto.Unmarshal(postScriptBytes[psOffset:psOffset+psLen], postScript) + if err != nil { + return 0, err + } + footerLength := int(postScript.GetFooterLength()) + metadataLength := int(postScript.GetMetadataLength()) + return footerLength + metadataLength + psLen + 1, nil +} + +func downloadRange(ctx context.Context, svc s3iface.S3API, logger logging.Logger, bucket string, key string, fromByte int64) (*os.File, error) { + f, err := ioutil.TempFile("", path.Base(key)) + if err != nil { + return nil, err + } + defer func() { + if err := os.Remove(f.Name()); err != nil { + logger.Errorf("failed to remove orc file after download. file=%s, err=%w", f.Name(), err) + } + }() + downloader := s3manager.NewDownloaderWithClient(svc) + var rng *string + if fromByte > 0 { + rng = aws.String(fmt.Sprintf("bytes=%d-", fromByte)) + } + logger.Debugf("start downloading %s[%s] to local file %s", key, swag.StringValue(rng), f.Name()) + _, err = downloader.DownloadWithContext(ctx, f, &s3.GetObjectInput{ + Bucket: aws.String(bucket), + Key: aws.String(key), + Range: rng, + }) + if err != nil { + return nil, err + } + logger.Debugf("finished downloading %s to local file %s", key, f.Name()) + return f, nil +} + +// DownloadOrc downloads a file from s3 and returns a ReaderSeeker to it. +// If tailOnly is set to true, download only the tail (metadata+footer) by trying the last `orcInitialReadSize` bytes of the file. +// Then, check the last byte to see if the whole tail was downloaded. If not, download again with the actual tail length. +func DownloadOrc(ctx context.Context, svc s3iface.S3API, logger logging.Logger, bucket string, key string, tailOnly bool) (*OrcFile, error) { + var size int64 + if tailOnly { + headObject, err := svc.HeadObject(&s3.HeadObjectInput{ + Bucket: aws.String(bucket), + Key: aws.String(key), + }) + if err != nil { + return nil, err + } + size = *headObject.ContentLength + } + f, err := downloadRange(ctx, svc, logger, bucket, key, size-orcInitialReadSize) + if err != nil { + return nil, err + } + if tailOnly { + tailLength, err := getTailLength(f) + if err != nil { + return nil, err + } + if tailLength > orcInitialReadSize { + // tail didn't fit in initially downloaded file + if err = f.Close(); err != nil { + logger.Errorf("failed to close orc file. file=%s, err=%w", f.Name(), err) + } + f, err = downloadRange(ctx, svc, logger, bucket, key, size-int64(tailLength)) + if err != nil { + return nil, err + } + } + } + return &OrcFile{f}, nil +} + +type OrcFile struct { + *os.File +} + +func (or *OrcFile) Size() int64 { + stats, err := or.Stat() + if err != nil { + return 0 + } + return stats.Size() +} + +func (or *OrcFile) Close() error { + return or.File.Close() +} diff --git a/inventory/s3/parquet_reader.go b/inventory/s3/parquet_reader.go new file mode 100644 index 00000000000..88509f68c59 --- /dev/null +++ b/inventory/s3/parquet_reader.go @@ -0,0 +1,20 @@ +package s3 + +import "github.com/xitongsys/parquet-go/reader" + +type ParquetInventoryFileReader struct { + reader.ParquetReader +} + +func (p *ParquetInventoryFileReader) Close() error { + p.ReadStop() + return p.PFile.Close() +} + +func (p *ParquetInventoryFileReader) FirstObjectKey() string { + return string(p.Footer.RowGroups[0].Columns[0].GetMetaData().GetStatistics().GetMinValue()) +} + +func (p *ParquetInventoryFileReader) LastObjectKey() string { + return string(p.Footer.RowGroups[0].Columns[0].GetMetaData().GetStatistics().GetMaxValue()) +} diff --git a/inventory/s3/reader.go b/inventory/s3/reader.go new file mode 100644 index 00000000000..3aebbe62348 --- /dev/null +++ b/inventory/s3/reader.go @@ -0,0 +1,115 @@ +package s3 + +import ( + "context" + "errors" + "fmt" + + "github.com/aws/aws-sdk-go/service/s3/s3iface" + "github.com/scritchley/orc" + "github.com/treeverse/lakefs/logging" + s3parquet "github.com/xitongsys/parquet-go-source/s3" + "github.com/xitongsys/parquet-go/reader" +) + +const ( + OrcFormatName = "ORC" + ParquetFormatName = "Parquet" +) + +var ( + ErrUnsupportedInventoryFormat = errors.New("unsupported inventory type. supported types: parquet, orc") +) + +type IReader interface { + GetFileReader(format string, bucket string, key string) (FileReader, error) + GetMetadataReader(format string, bucket string, key string) (MetadataReader, error) +} + +type InventoryObject struct { + Bucket string `parquet:"name=bucket, type=UTF8"` + Key string `parquet:"name=key, type=UTF8"` + IsLatest *bool `parquet:"name=is_latest, type=BOOLEAN"` + IsDeleteMarker *bool `parquet:"name=is_delete_marker, type=BOOLEAN"` + Size *int64 `parquet:"name=size, type=INT_64"` + LastModified *int64 `parquet:"name=last_modified_date, type=TIMESTAMP_MILLIS"` + Checksum *string `parquet:"name=e_tag, type=UTF8"` +} + +func (o *InventoryObject) GetPhysicalAddress() string { + return "s3://" + o.Bucket + "/" + o.Key +} + +type Reader struct { + ctx context.Context + svc s3iface.S3API + logger logging.Logger +} + +type MetadataReader interface { + GetNumRows() int64 + Close() error + FirstObjectKey() string + LastObjectKey() string +} + +type FileReader interface { + MetadataReader + Read(dstInterface interface{}) error +} + +func NewReader(ctx context.Context, svc s3iface.S3API, logger logging.Logger) IReader { + return &Reader{ctx: ctx, svc: svc, logger: logger} +} + +func (o *Reader) GetFileReader(format string, bucket string, key string) (FileReader, error) { + switch format { + case OrcFormatName: + return o.getOrcReader(bucket, key, false) + case ParquetFormatName: + return o.getParquetReader(bucket, key) + default: + return nil, ErrUnsupportedInventoryFormat + } +} + +func (o *Reader) GetMetadataReader(format string, bucket string, key string) (MetadataReader, error) { + switch format { + case OrcFormatName: + return o.getOrcReader(bucket, key, true) + default: + return o.GetFileReader(format, bucket, key) + } +} + +func (o *Reader) getParquetReader(bucket string, key string) (FileReader, error) { + pf, err := s3parquet.NewS3FileReaderWithClient(o.ctx, o.svc, bucket, key) + if err != nil { + return nil, fmt.Errorf("failed to create parquet file reader: %w", err) + } + var rawObject InventoryObject + pr, err := reader.NewParquetReader(pf, &rawObject, 4) + if err != nil { + return nil, fmt.Errorf("failed to create parquet reader: %w", err) + } + return &ParquetInventoryFileReader{ParquetReader: *pr}, nil +} + +func (o *Reader) getOrcReader(bucket string, key string, tailOnly bool) (FileReader, error) { + orcFile, err := DownloadOrc(o.ctx, o.svc, o.logger, bucket, key, tailOnly) + if err != nil { + return nil, err + } + orcReader, err := orc.NewReader(orcFile) + if err != nil { + return nil, err + } + orcSelect := getOrcSelect(orcReader.Schema()) + return &OrcInventoryFileReader{ + ctx: o.ctx, + reader: orcReader, + orcFile: orcFile, + orcSelect: orcSelect, + cursor: orcReader.Select(orcSelect.SelectFields...), + }, nil +} diff --git a/inventory/s3/reader_test.go b/inventory/s3/reader_test.go new file mode 100644 index 00000000000..6ea2e43e0c7 --- /dev/null +++ b/inventory/s3/reader_test.go @@ -0,0 +1,195 @@ +package s3 + +import ( + "context" + "fmt" + "io/ioutil" + "net/http/httptest" + "os" + "testing" + "time" + + "github.com/aws/aws-sdk-go/aws" + "github.com/aws/aws-sdk-go/aws/credentials" + "github.com/aws/aws-sdk-go/aws/session" + "github.com/aws/aws-sdk-go/service/s3" + "github.com/aws/aws-sdk-go/service/s3/s3iface" + "github.com/aws/aws-sdk-go/service/s3/s3manager" + "github.com/cznic/mathutil" + "github.com/go-openapi/swag" + "github.com/johannesboyne/gofakes3" + "github.com/johannesboyne/gofakes3/backend/s3mem" + "github.com/scritchley/orc" + "github.com/treeverse/lakefs/logging" +) + +const inventoryBucketName = "inventory-bucket" + +func generateOrc(t *testing.T, objs <-chan *InventoryObject) string { + f, err := ioutil.TempFile("", "orctest") + if err != nil { + t.Fatal(err) + } + defer func() { + _ = f.Close() + }() + schema, err := orc.ParseSchema("struct") + if err != nil { + t.Fatal(err) + } + w, err := orc.NewWriter(f, orc.SetSchema(schema), orc.SetStripeTargetSize(100)) + if err != nil { + t.Fatal(err) + } + for o := range objs { + err = w.Write(o.Bucket, o.Key, *o.Size, time.Unix(*o.LastModified, 0), *o.Checksum) + if err != nil { + t.Fatal(err) + } + } + err = w.Close() + if err != nil { + t.Fatal(err) + } + return f.Name() +} + +func getS3Fake(t *testing.T) (s3iface.S3API, *httptest.Server) { + backend := s3mem.New() + faker := gofakes3.New(backend) + ts := httptest.NewServer(faker.Server()) + // configure S3 client + s3Config := &aws.Config{ + Credentials: credentials.NewStaticCredentials("YOUR-ACCESSKEYID", "YOUR-SECRETACCESSKEY", ""), + Endpoint: aws.String(ts.URL), + Region: aws.String("eu-central-1"), + DisableSSL: aws.Bool(true), + S3ForcePathStyle: aws.Bool(true), + } + newSession, err := session.NewSession(s3Config) + if err != nil { + t.Fatal(err) + } + return s3.New(newSession), ts +} + +func objs(num int) <-chan *InventoryObject { + out := make(chan *InventoryObject) + go func() { + defer close(out) + for i := 0; i < num; i++ { + out <- &InventoryObject{ + Bucket: inventoryBucketName, + Key: fmt.Sprintf("f%05d", i), + Size: swag.Int64(500), + LastModified: swag.Int64(time.Now().Unix()), + Checksum: swag.String("abcdefg"), + } + } + }() + return out +} + +func uploadFile(t *testing.T, s3 s3iface.S3API, inventoryBucket string, inventoryFilename string, objs <-chan *InventoryObject) { + localOrcFile := generateOrc(t, objs) + f, err := os.Open(localOrcFile) + if err != nil { + t.Fatal(err) + } + defer func() { + _ = f.Close() + }() + uploader := s3manager.NewUploaderWithClient(s3) + _, err = uploader.Upload(&s3manager.UploadInput{ + Bucket: aws.String(inventoryBucket), + Key: aws.String(inventoryFilename), + Body: f, + }) + if err != nil { + t.Fatal(err) + } +} + +func TestInventoryReader(t *testing.T) { + svc, testServer := getS3Fake(t) + defer testServer.Close() + _, err := svc.CreateBucket(&s3.CreateBucketInput{ + Bucket: aws.String(inventoryBucketName), + }) + if err != nil { + t.Fatal(err) + } + testdata := []struct { + ObjectNum int + ExpectedReadObjects int + ExpectedMaxValue string + ExpectedMinValue string + }{ + { + ObjectNum: 2, + ExpectedReadObjects: 2, + ExpectedMinValue: "f00000", + ExpectedMaxValue: "f00001", + }, + { + ObjectNum: 12500, + ExpectedReadObjects: 12500, + ExpectedMinValue: "f00000", + ExpectedMaxValue: "f12499", + }, + { + ObjectNum: 100, + ExpectedReadObjects: 100, + ExpectedMinValue: "f00000", + ExpectedMaxValue: "f00099", + }, + } + + for _, test := range testdata { + + uploadFile(t, svc, inventoryBucketName, "myFile.orc", objs(test.ObjectNum)) + reader := NewReader(context.Background(), svc, logging.Default()) + fileReader, err := reader.GetFileReader("ORC", inventoryBucketName, "myFile.orc") + if err != nil { + t.Fatal(err) + } + numRowsResult := int(fileReader.GetNumRows()) + if test.ObjectNum != numRowsResult { + t.Fatalf("unexpected result from GetNumRows. expected=%d, got=%d", test.ObjectNum, numRowsResult) + } + minValueResult := fileReader.FirstObjectKey() + if test.ExpectedMinValue != minValueResult { + t.Fatalf("unexpected result from FirstObjectKey. expected=%s, got=%s", test.ExpectedMinValue, minValueResult) + } + maxValueResult := fileReader.LastObjectKey() + if test.ExpectedMaxValue != maxValueResult { + t.Fatalf("unexpected result from LastObjectKey. expected=%s, got=%s", test.ExpectedMaxValue, maxValueResult) + } + readBatchSize := 1000 + res := make([]InventoryObject, readBatchSize) + offset := 0 + readCount := 0 + for { + err = fileReader.Read(&res) + for i := offset; i < mathutil.Min(offset+readBatchSize, test.ObjectNum); i++ { + if res[i-offset].Key != fmt.Sprintf("f%05d", i) { + t.Fatalf("result in index %d different than expected. expected=%s, got=%s (batch #%d, index %d)", i, fmt.Sprintf("f%05d", i), res[i-offset].Key, offset/readBatchSize, i-offset) + } + } + offset += len(res) + readCount += len(res) + if err != nil { + t.Fatal(err) + } + if len(res) != readBatchSize { + break + } + } + if test.ExpectedReadObjects != readCount { + t.Fatalf("read unexpected number of keys from inventory. expected=%d, got=%d", test.ExpectedReadObjects, readCount) + } + if fileReader.Close() != nil { + t.Fatalf("failed to close file reader") + } + } +} diff --git a/onboard/catalog_actions.go b/onboard/catalog_actions.go index 8c85f90f4f1..9d5cadee033 100644 --- a/onboard/catalog_actions.go +++ b/onboard/catalog_actions.go @@ -4,13 +4,19 @@ import ( "context" "errors" "fmt" + "sync" "time" "github.com/treeverse/lakefs/catalog" "github.com/treeverse/lakefs/db" + "github.com/treeverse/lakefs/logging" ) -const DefaultWriteBatchSize = 100000 +const ( + DefaultWriteBatchSize = 25000 + DefaultWorkerCount = 16 + TaskChannelCapacity = 2 * DefaultWorkerCount +) type RepoActions interface { ApplyImport(ctx context.Context, it Iterator, dryRun bool) (*InventoryImportStats, error) @@ -23,19 +29,39 @@ type CatalogRepoActions struct { cataloger catalog.Cataloger repository string committer string + logger logging.Logger +} + +func NewCatalogActions(cataloger catalog.Cataloger, repository string, committer string, logger logging.Logger) RepoActions { + return &CatalogRepoActions{cataloger: cataloger, repository: repository, committer: committer, logger: logger} } -func NewCatalogActions(cataloger catalog.Cataloger, repository string, committer string) RepoActions { - return &CatalogRepoActions{cataloger: cataloger, repository: repository, committer: committer} +type task struct { + f func() error + err *error +} + +func worker(wg *sync.WaitGroup, tasks <-chan *task) { + for task := range tasks { + *task.err = task.f() + } + wg.Done() } func (c *CatalogRepoActions) ApplyImport(ctx context.Context, it Iterator, dryRun bool) (*InventoryImportStats, error) { var stats InventoryImportStats + var wg sync.WaitGroup batchSize := DefaultWriteBatchSize if c.WriteBatchSize > 0 { batchSize = c.WriteBatchSize } + errs := make([]*error, 0) + tasksChan := make(chan *task, TaskChannelCapacity) currentBatch := make([]catalog.Entry, 0, batchSize) + for w := 0; w < DefaultWorkerCount; w++ { + go worker(&wg, tasksChan) + } + wg.Add(DefaultWorkerCount) for it.Next() { diffObj := it.Get() obj := diffObj.Obj @@ -64,15 +90,26 @@ func (c *CatalogRepoActions) ApplyImport(ctx context.Context, it Iterator, dryRu if dryRun { continue } - err := c.cataloger.CreateEntries(ctx, c.repository, DefaultBranchName, previousBatch) - if err != nil { - return nil, fmt.Errorf("failed to create batch of %d entries (%w)", len(currentBatch), err) + tsk := &task{ + f: func() error { + return c.cataloger.CreateEntries(ctx, c.repository, DefaultBranchName, previousBatch) + }, + err: new(error), } + errs = append(errs, tsk.err) + tasksChan <- tsk } } + close(tasksChan) + wg.Wait() if it.Err() != nil { return nil, it.Err() } + for _, err := range errs { + if *err != nil { + return nil, *err + } + } if len(currentBatch) > 0 && !dryRun { err := c.cataloger.CreateEntries(ctx, c.repository, DefaultBranchName, currentBatch) if err != nil { diff --git a/onboard/catalog_actions_test.go b/onboard/catalog_actions_test.go index 57b79fe236b..5748a822fff 100644 --- a/onboard/catalog_actions_test.go +++ b/onboard/catalog_actions_test.go @@ -2,9 +2,13 @@ package onboard_test import ( "context" + "sync" + "sync/atomic" "testing" + "github.com/go-openapi/swag" "github.com/treeverse/lakefs/catalog" + "github.com/treeverse/lakefs/logging" "github.com/treeverse/lakefs/onboard" ) @@ -13,24 +17,31 @@ type mockCataloger struct { } var catalogCallData = struct { - addedEntries []catalog.Entry - deletedEntries []string - callLog map[string]int + addedEntries map[string]bool + deletedEntries map[string]bool + callLog map[string]*int32 + mux sync.Mutex }{} func (m mockCataloger) CreateEntries(_ context.Context, _, _ string, entries []catalog.Entry) error { - catalogCallData.addedEntries = append(catalogCallData.addedEntries, entries...) - catalogCallData.callLog["CreateEntries"]++ + catalogCallData.mux.Lock() + defer catalogCallData.mux.Unlock() + for _, e := range entries { + catalogCallData.addedEntries[e.Path] = true + } + atomic.AddInt32(catalogCallData.callLog["CreateEntries"], 1) return nil } func (m mockCataloger) DeleteEntry(_ context.Context, _, _ string, path string) error { - catalogCallData.deletedEntries = append(catalogCallData.deletedEntries, path) - catalogCallData.callLog["DeleteEntry"]++ + catalogCallData.mux.Lock() + defer catalogCallData.mux.Unlock() + catalogCallData.deletedEntries[path] = true + atomic.AddInt32(catalogCallData.callLog["DeleteEntry"], 1) return nil } func TestCreateAndDeleteRows(t *testing.T) { - c := onboard.NewCatalogActions(mockCataloger{}, "example-repo", "committer") + c := onboard.NewCatalogActions(mockCataloger{}, "example-repo", "committer", logging.Default()) c.(*onboard.CatalogRepoActions).WriteBatchSize = 5 catalogActions, ok := c.(*onboard.CatalogRepoActions) if !ok { @@ -39,8 +50,8 @@ func TestCreateAndDeleteRows(t *testing.T) { testdata := []struct { AddedRows []string DeletedRows []string - ExpectedAddCalls int - ExpectedDeleteCalls int + ExpectedAddCalls int32 + ExpectedDeleteCalls int32 }{ { AddedRows: []string{"a1", "b2", "c3"}, @@ -87,9 +98,11 @@ func TestCreateAndDeleteRows(t *testing.T) { } for _, dryRun := range []bool{true, false} { for _, test := range testdata { - catalogCallData.addedEntries = []catalog.Entry{} - catalogCallData.deletedEntries = []string{} - catalogCallData.callLog = make(map[string]int) + catalogCallData.addedEntries = make(map[string]bool) + catalogCallData.deletedEntries = make(map[string]bool) + catalogCallData.callLog = make(map[string]*int32) + catalogCallData.callLog["DeleteEntry"] = swag.Int32(0) + catalogCallData.callLog["CreateEntries"] = swag.Int32(0) stats, err := catalogActions.ApplyImport(context.Background(), onboard.NewDiffIterator( &mockInventoryIterator{rows: rows(test.DeletedRows...)}, &mockInventoryIterator{rows: rows(test.AddedRows...)}), dryRun) @@ -102,11 +115,11 @@ func TestCreateAndDeleteRows(t *testing.T) { expectedAddCalls = 0 expectedDeleteCalls = 0 } - if catalogCallData.callLog["CreateEntries"] != expectedAddCalls { - t.Fatalf("unexpected number of CreateEntries calls. expected=%d, got=%d", expectedAddCalls, catalogCallData.callLog["CreateEntries"]) + if *catalogCallData.callLog["CreateEntries"] != expectedAddCalls { + t.Fatalf("unexpected number of CreateEntries calls. expected=%d, got=%d", expectedAddCalls, *catalogCallData.callLog["CreateEntries"]) } - if catalogCallData.callLog["DeleteEntry"] != expectedDeleteCalls { - t.Fatalf("unexpected number of DeleteEntries calls. expected=%d, got=%d", expectedDeleteCalls, catalogCallData.callLog["DeleteEntry"]) + if *catalogCallData.callLog["DeleteEntry"] != expectedDeleteCalls { + t.Fatalf("unexpected number of DeleteEntries calls. expected=%d, got=%d", expectedDeleteCalls, *catalogCallData.callLog["DeleteEntry"]) } if stats.AddedOrChanged != len(test.AddedRows) { t.Fatalf("unexpected number of added entries in returned stats. expected=%d, got=%d", len(test.AddedRows), stats.AddedOrChanged) @@ -120,17 +133,17 @@ func TestCreateAndDeleteRows(t *testing.T) { if len(catalogCallData.addedEntries) != len(test.AddedRows) { t.Fatalf("unexpected number of added entries. expected=%d, got=%d", len(test.AddedRows), len(catalogCallData.addedEntries)) } - for i, entry := range catalogCallData.addedEntries { - if entry.Path != test.AddedRows[i] { - t.Fatalf("unexpected added entry at index %d: expected=%s, got=%s", i, test.AddedRows[i], entry.Path) + for _, path := range test.AddedRows { + if _, ok := catalogCallData.addedEntries[path]; !ok { + t.Fatalf("expected entry not added: %s", path) } } if len(catalogCallData.deletedEntries) != len(test.DeletedRows) { t.Fatalf("unexpected number of deleted entries. expected=%d, got=%d", len(test.DeletedRows), len(catalogCallData.deletedEntries)) } - for i, path := range catalogCallData.deletedEntries { - if path != test.DeletedRows[i] { - t.Fatalf("unexpected deleted entry at index %d: expected=%s, got=%s", i, test.AddedRows[i], path) + for _, path := range test.DeletedRows { + if _, ok := catalogCallData.deletedEntries[path]; !ok { + t.Fatalf("expected entry not deleted: %s", path) } } } diff --git a/onboard/import.go b/onboard/import.go index e82350fc636..b3db8b6382c 100644 --- a/onboard/import.go +++ b/onboard/import.go @@ -22,8 +22,17 @@ type Importer struct { inventory block.Inventory CatalogActions RepoActions logger logging.Logger + previousCommit *catalog.CommitLog } +type ImporterConfig struct { + CommitUsername string + InventoryURL string + Repository string + InventoryGenerator block.InventoryGenerator + Cataloger catalog.Cataloger + CatalogActions RepoActions +} type InventoryImportStats struct { AddedOrChanged int Deleted int @@ -34,17 +43,25 @@ type InventoryImportStats struct { var ErrNoInventoryURL = errors.New("no inventory_url in commit Metadata") -func CreateImporter(ctx context.Context, logger logging.Logger, cataloger catalog.Cataloger, inventoryGenerator block.InventoryGenerator, username string, inventoryURL string, repository string) (importer *Importer, err error) { +func CreateImporter(ctx context.Context, logger logging.Logger, config *ImporterConfig) (importer *Importer, err error) { res := &Importer{ - repository: repository, - inventoryGenerator: inventoryGenerator, + repository: config.Repository, + inventoryGenerator: config.InventoryGenerator, logger: logger, + CatalogActions: config.CatalogActions, + } + if res.CatalogActions == nil { + res.CatalogActions = NewCatalogActions(config.Cataloger, config.Repository, config.CommitUsername, logger) } - res.inventory, err = inventoryGenerator.GenerateInventory(ctx, logger, inventoryURL) + previousCommit, err := res.CatalogActions.GetPreviousCommit(ctx) if err != nil { - return nil, fmt.Errorf("failed to create inventory: %w", err) + return nil, fmt.Errorf("failed to get previous commit: %w", err) + } + res.previousCommit = previousCommit + res.inventory, err = config.InventoryGenerator.GenerateInventory(ctx, logger, config.InventoryURL, res.previousCommit != nil) + if err != nil { + return nil, fmt.Errorf("failed to read inventory: %w", err) } - res.CatalogActions = NewCatalogActions(cataloger, repository, username) return res, nil } @@ -53,7 +70,7 @@ func (s *Importer) diffIterator(ctx context.Context, commit catalog.CommitLog) ( if previousInventoryURL == "" { return nil, fmt.Errorf("%w. commit_ref=%s", ErrNoInventoryURL, commit.Reference) } - previousInv, err := s.inventoryGenerator.GenerateInventory(ctx, s.logger, previousInventoryURL) + previousInv, err := s.inventoryGenerator.GenerateInventory(ctx, s.logger, previousInventoryURL, true) if err != nil { return nil, fmt.Errorf("failed to create inventory for previous state: %w", err) } @@ -63,17 +80,14 @@ func (s *Importer) diffIterator(ctx context.Context, commit catalog.CommitLog) ( } func (s *Importer) Import(ctx context.Context, dryRun bool) (*InventoryImportStats, error) { - previousCommit, err := s.CatalogActions.GetPreviousCommit(ctx) - if err != nil { - return nil, fmt.Errorf("failed to get previous commit: %w", err) - } var dataToImport Iterator - if previousCommit == nil { + var err error + if s.previousCommit == nil { // no previous commit, add whole inventory it := s.inventory.Iterator() dataToImport = NewInventoryIterator(it) } else { - dataToImport, err = s.diffIterator(ctx, *previousCommit) + dataToImport, err = s.diffIterator(ctx, *s.previousCommit) if err != nil { return nil, err } @@ -83,9 +97,9 @@ func (s *Importer) Import(ctx context.Context, dryRun bool) (*InventoryImportSta return nil, err } stats.DryRun = dryRun - if previousCommit != nil { - stats.PreviousImportDate = previousCommit.CreationDate - stats.PreviousInventoryURL = previousCommit.Metadata["inventory_url"] + if s.previousCommit != nil { + stats.PreviousImportDate = s.previousCommit.CreationDate + stats.PreviousInventoryURL = s.previousCommit.Metadata["inventory_url"] } if !dryRun { commitMetadata := CreateCommitMetadata(s.inventory, *stats) diff --git a/onboard/import_test.go b/onboard/import_test.go index f6ca8830ac0..57ae5188d43 100644 --- a/onboard/import_test.go +++ b/onboard/import_test.go @@ -52,10 +52,10 @@ func TestImport(t *testing.T) { // do nothing, expect no errors }, { - NewInventory: []string{"a1", "a2", "a3", "a4", "a5", "a6", "a7"}, - PreviousInventory: []string{"a1", "a2", "a4", "a8", "a9", "a10"}, - ExpectedDeleted: []string{"a8", "a9", "a10"}, - ExpectedAdded: []string{"a3", "a5", "a6", "a7"}, + NewInventory: []string{"a01", "a02", "a03", "a04", "a05", "a06", "a07"}, + PreviousInventory: []string{"a01", "a02", "a04", "a08", "a09", "a10"}, + ExpectedDeleted: []string{"a08", "a09", "a10"}, + ExpectedAdded: []string{"a03", "a05", "a06", "a07"}, }, { NewInventory: []string{"a1", "a2", "a3", "a4", "a5", "a6", "a7"}, @@ -78,17 +78,24 @@ func TestImport(t *testing.T) { previousCommitInventory: previousInventoryURL, } } - importer, err := onboard.CreateImporter(context.TODO(), logging.Default(), nil, &mockInventoryGenerator{ + inventoryGenerator := &mockInventoryGenerator{ newInventoryURL: newInventoryURL, previousInventoryURL: previousInventoryURL, newInventory: test.NewInventory, previousInventory: test.PreviousInventory, sourceBucket: "example-repo", - }, "committer", newInventoryURL, "example-repo") + } + config := &onboard.ImporterConfig{ + CommitUsername: "committer", + InventoryURL: newInventoryURL, + Repository: "example-repo", + InventoryGenerator: inventoryGenerator, + CatalogActions: &catalogActionsMock, + } + importer, err := onboard.CreateImporter(context.TODO(), logging.Default(), config) if err != nil { t.Fatalf("failed to create importer: %v", err) } - importer.CatalogActions = &catalogActionsMock stats, err := importer.Import(context.Background(), dryRun) if err != nil { if !test.ExpectedErr { diff --git a/onboard/utils_test.go b/onboard/utils_test.go index 27cfec66064..d2157c3038a 100644 --- a/onboard/utils_test.go +++ b/onboard/utils_test.go @@ -3,6 +3,7 @@ package onboard_test import ( "context" "errors" + "sort" "github.com/treeverse/lakefs/block" "github.com/treeverse/lakefs/catalog" @@ -19,6 +20,7 @@ type mockInventory struct { rows []string inventoryURL string sourceBucket string + shouldSort bool } type objectActions struct { @@ -40,12 +42,12 @@ type mockInventoryGenerator struct { sourceBucket string } -func (m mockInventoryGenerator) GenerateInventory(_ context.Context, _ logging.Logger, inventoryURL string) (block.Inventory, error) { +func (m mockInventoryGenerator) GenerateInventory(_ context.Context, _ logging.Logger, inventoryURL string, shouldSort bool) (block.Inventory, error) { if inventoryURL == m.newInventoryURL { - return &mockInventory{rows: m.newInventory, inventoryURL: inventoryURL, sourceBucket: m.sourceBucket}, nil + return &mockInventory{rows: m.newInventory, inventoryURL: inventoryURL, sourceBucket: m.sourceBucket, shouldSort: shouldSort}, nil } if inventoryURL == m.previousInventoryURL { - return &mockInventory{rows: m.previousInventory, inventoryURL: inventoryURL, sourceBucket: m.sourceBucket}, nil + return &mockInventory{rows: m.previousInventory, inventoryURL: inventoryURL, sourceBucket: m.sourceBucket, shouldSort: shouldSort}, nil } return nil, errors.New("failed to create inventory") } @@ -118,6 +120,9 @@ func (m *mockInventoryIterator) Get() *block.InventoryObject { } func (m *mockInventory) Iterator() block.InventoryIterator { + if m.shouldSort { + sort.Strings(m.rows) + } return &mockInventoryIterator{ rows: rows(m.rows...), } From d333a486417aeda13f384ac2589d4257781538ba Mon Sep 17 00:00:00 2001 From: itaiad200 Date: Thu, 10 Sep 2020 17:09:14 +0300 Subject: [PATCH 14/39] Benchmark tests design (#573) --- design/benchmark_tests.md | 39 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 39 insertions(+) create mode 100644 design/benchmark_tests.md diff --git a/design/benchmark_tests.md b/design/benchmark_tests.md new file mode 100644 index 00000000000..4f83f1ec65c --- /dev/null +++ b/design/benchmark_tests.md @@ -0,0 +1,39 @@ +# Benchmarks Tests + +## Requirements +1. Measure the latency percentiles (50th, 90th, 95th, 99th 99.9) for a batch of repeated basic operations (e.g. 10K put requests), + at a predefined parallelism level. +2. Report the number of failed requests per operation. +3. Show trends of benchmarks tests execution times across different versions. +4. Portability: The tests don't assume anything on the environment where the lakeFS & DB are running. + lakeFS address is the only **required** argument for running the tests. + +## Non-Requirements +1. An active gatekeeper - single slow running cycle shouldn't block the build. +2. Expect 100% reliability of all actions - when performing thousands of operations, + some (exact percent TBD) will be allowed to fail. When measuring an external app, + it's the app responsibility to retry on transient errors. +3. Avoid repeating unit/integration/system tests scenarios. + +## Solution +### Test Execution +1. For each parallelism level, the flow will create a repository and will PUT 10k files in it. + a. Each filename starts with 1 of the common shared prefixes (~100 common prefixes) +2. 10k Read requests of those files. +3. Read each one of the common prefixes. +4. Commit the files. +5. Branch out and repeat 1-4. +6. Delete all files. + +### CI +Github workflow is triggered on a merge to master. +The workflow will deploy a temporary environment that includes the latest lakeFS app, and an RDS instance. +Workflow collects and structures the tests results from the lakeFS instance, +and sends them to a dedicated `prometheus` server. +The deployed environment is deleted after each collection of the results. + +### Results +A `Grafana` dashboard will display the benchmark tests results + +## Future metrics +Collect and display metrics from the RDS instance for additional analysis of potential bottlenecks. \ No newline at end of file From 84e0a0948f2cfe0834c1f53015ca2e37db82d34a Mon Sep 17 00:00:00 2001 From: YaelRiv <67264175+YaelRiv@users.noreply.github.com> Date: Thu, 10 Sep 2020 15:10:46 +0100 Subject: [PATCH 15/39] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 7701b000698..1f05a367a5f 100644 --- a/README.md +++ b/README.md @@ -24,7 +24,7 @@ For more information see the [Official Documentation](https://docs.lakefs.io). ## Capabilities -**Developer Environment for Data** +**Development Environment for Data** * **Experimentation** - try tools, upgrade versions and evaluate code changes in isolation. * **Reproducibility** - go back to any point of time to a consistent version of your data lake. From 0349cdf3ec1106f557406c60fd2141137b9237c2 Mon Sep 17 00:00:00 2001 From: YaelRiv <67264175+YaelRiv@users.noreply.github.com> Date: Thu, 10 Sep 2020 15:11:41 +0100 Subject: [PATCH 16/39] Update index.md --- docs/index.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/index.md b/docs/index.md index b0321cc877c..dec8668f60b 100755 --- a/docs/index.md +++ b/docs/index.md @@ -30,7 +30,7 @@ Since lakeFS is compatible with the S3 API, all popular applications will work w ## Here's why you need it: -### Developer Environment for Data +### Development Environment for Data * **Experimentation** - try tools, upgrade versions and evaluate code changes in isolation. By creating a branch of the lake you get an isolated snapshot of the lake you can experiment with while others are not exposed. You can compare branches with different experiments or compare your branch to the main branch of the lake. * **Reproducibility** - go back to any point in time to a consistent version of your data lake. By making commit history available for a configurable duration - you can read from the lake at any given point in time, compare changes made, and safely roll back if necessary. From e8edf6ba8bd385069e6da3c7f754a3c97ddc0e4e Mon Sep 17 00:00:00 2001 From: YaelRiv <67264175+YaelRiv@users.noreply.github.com> Date: Fri, 11 Sep 2020 13:15:47 +0100 Subject: [PATCH 17/39] Update setup.md --- docs/deploying/setup.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/deploying/setup.md b/docs/deploying/setup.md index 9bb90bb0214..d316f29e534 100644 --- a/docs/deploying/setup.md +++ b/docs/deploying/setup.md @@ -31,4 +31,4 @@ Once we have lakeFS configured and running, open `https:// Date: Sun, 13 Sep 2020 11:38:44 +0300 Subject: [PATCH 18/39] Docs let jekyll-seo plugin set the documentation header title (#602) --- docs/_config.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/docs/_config.yml b/docs/_config.yml index b719a3284a7..a452a31d903 100644 --- a/docs/_config.yml +++ b/docs/_config.yml @@ -64,4 +64,5 @@ footer_bottom_links: image: '/assets/img/shared-image.png' plugins: - - jekyll-redirect-from \ No newline at end of file + - jekyll-redirect-from + - jekyll-seo-tag From ad68dc873f8702095cc8a62d20a9c226d607ae21 Mon Sep 17 00:00:00 2001 From: itaiad200 Date: Sun, 13 Sep 2020 12:25:39 +0300 Subject: [PATCH 19/39] AWS AccountID to github secret (#596) --- .github/workflows/nessie.yaml | 4 ++++ nessie/ops/.env | 2 +- nessie/ops/docker-compose.yaml | 2 +- 3 files changed, 6 insertions(+), 2 deletions(-) diff --git a/.github/workflows/nessie.yaml b/.github/workflows/nessie.yaml index 5e0c2407858..c95d3aec4e2 100644 --- a/.github/workflows/nessie.yaml +++ b/.github/workflows/nessie.yaml @@ -84,6 +84,8 @@ jobs: - name: Run lakeFS S3 env: TAG: ${{ steps.version.outputs.tag }} + # Setting Account_ID as a secret as a way to avoid specifying it here + REPO: ${{ secrets.AWS_ACCOUNT_ID }}.dkr.ecr.us-east-1.amazonaws.com LAKEFS_STATS_ENABLED: "false" LAKEFS_BLOCKSTORE_TYPE: s3 LAKEFS_GATEWAYS_S3_DOMAIN_NAME: s3.local.lakefs.io:8000 @@ -112,6 +114,8 @@ jobs: - name: Run lakeFS GS env: TAG: ${{ steps.version.outputs.tag }} + # Setting Account_ID as a secret as a way to avoid specifying it here + REPO: ${{ secrets.AWS_ACCOUNT_ID }}.dkr.ecr.us-east-1.amazonaws.com LAKEFS_STATS_ENABLED: "false" LAKEFS_BLOCKSTORE_TYPE: gs LAKEFS_GATEWAYS_S3_DOMAIN_NAME: s3.local.lakefs.io:8000 diff --git a/nessie/ops/.env b/nessie/ops/.env index 0df73a320ae..2b9332cac60 100644 --- a/nessie/ops/.env +++ b/nessie/ops/.env @@ -1,3 +1,3 @@ TAG=dev LAKEFS_BLOCKSTORE_TYPE=local -DOCKER_REG=977611293394.dkr.ecr.us-east-1.amazonaws.com \ No newline at end of file +REPO=https://hub.docker.com/treeverse \ No newline at end of file diff --git a/nessie/ops/docker-compose.yaml b/nessie/ops/docker-compose.yaml index 1b99fde832b..83213655225 100644 --- a/nessie/ops/docker-compose.yaml +++ b/nessie/ops/docker-compose.yaml @@ -1,7 +1,7 @@ version: '3' services: lakefs: - image: "${DOCKER_REG}/lakefs:${TAG}" + image: "${REPO}/lakefs:${TAG}" ports: - "8000:8000" depends_on: From 47c41f92b52bffcd461198f619bbcd69a3c93bdc Mon Sep 17 00:00:00 2001 From: Barak Amar Date: Sun, 13 Sep 2020 15:13:40 +0300 Subject: [PATCH 20/39] reference for google storage --- docs/reference/configuration.md | 32 +++++++++++++++++++++++++++++++- docs/reference/retention.md | 4 ++++ docs/roadmap.md | 8 +++----- 3 files changed, 38 insertions(+), 6 deletions(-) diff --git a/docs/reference/configuration.md b/docs/reference/configuration.md index 6b2f35bc29a..1e85d67275a 100644 --- a/docs/reference/configuration.md +++ b/docs/reference/configuration.md @@ -38,8 +38,10 @@ This reference uses `.` to denote the nesting of values. **Note:** It is best to keep this somewhere safe such as KMS or Hashicorp Vault, and provide it to the system at run time {: .note } -* `blockstore.type` `(one of ["local", "s3", "mem"]: "mem")` - Block adapter to use. This controls where the underlying data will be stored +* `blockstore.type` `(one of ["local", "s3", "gs", "mem"]: "mem")` - Block adapter to use. This controls where the underlying data will be stored * `blockstore.local.path` `(string: "~/lakefs/data")` - When using the local Block Adapter, which directory to store files in +* `blockstore.gs.credentials_file` `(string : )` - If specified will be used as a file path of the JSON file that contains your Google service account key +* `blockstore.gs.credentials_json` `(string : )` - If specified will be used as JSON string that contains your Google service account key (when credentials_file is not set) * `blockstore.s3.region` `(string : "us-east-1")` - When using the S3 block adapter, AWS region to use * `blockstore.s3.profile` `(string : )` - If specified, will be used as a [named credentials profile](https://docs.aws.amazon.com/cli/latest/userguide/cli-configure-profiles.html) * `blockstore.s3.credentials_file` `(string : )` - If specified, will be used as a [credentials file](https://docs.aws.amazon.com/cli/latest/userguide/cli-configure-files.html) @@ -140,3 +142,31 @@ gateways: ``` [aws-s3-batch-permissions]: https://docs.aws.amazon.com/AmazonS3/latest/dev/batch-ops-iam-role-policies.html + + +## Example: Google Storage Deployment + +```yaml +--- +logging: + format: json + level: WARN + output: "-" + +database: + connection_string: "postgres://user:pass@lakefs.rds.amazonaws.com:5432/postgres" + +auth: + encrypt: + secret_key: "10a718b3f285d89c36e9864494cdd1507f3bc85b342df24736ea81f9a1134bcc" + +blockstore: + type: gs + gs: + credentials_file: /secrets/lakefs-service-account.json + +gateways: + s3: + domain_name: s3.my-company.com + region: us-east-1 +``` diff --git a/docs/reference/retention.md b/docs/reference/retention.md index c22179d5dbf..97ba1dc572f 100644 --- a/docs/reference/retention.md +++ b/docs/reference/retention.md @@ -13,6 +13,10 @@ configuration][s3-lifecycle], however not all properties exist. Most notably current support is only for object expiration and not storage class transition. + + **Note:** Currently only S3 block adapter supports retention + {: .note } + # Retention ## System configuration diff --git a/docs/roadmap.md b/docs/roadmap.md index e462eacbf18..d823d68c3a0 100644 --- a/docs/roadmap.md +++ b/docs/roadmap.md @@ -8,15 +8,13 @@ has_children: false # Roadmap - [ ] **General**{: .label } Improved Monitoring -- [ ] **Block Adapter**{: .label } Additional storage Adapters (GCS?) -- [ ] **Integrations**{: .label } native Hadoop/Spark Filesystem & OutputCommitter -- [ ] **Integrations**{: .label } Python SDK +- [ ] **Block Adapter**{: .label } Additional storage adapters (Azure storage?) +- [ ] **Integrations**{: .label } Native Hadoop/Spark Filesystem & OutputCommitter - [ ] **Integrations**{: .label } Java SDK - [ ] **Integrations**{: .label } Airflow Operator - [ ] **Auth**{: .label } Support instance Profiles - [ ] **Auth**{: .label } IAM/KMS integration -- [ ] **Ops**{: .label } Kubernetes support, Helm chart - [ ] **CI**{: .label } Web Hook support - [ ] **CI**{: .label } Protected branches -- [ ] **UI**{: .label } better pagination and data exploration +- [ ] **UI**{: .label } Better pagination and data exploration From 4cf9f02021937b12251139e4a64e03b9fb0c5955 Mon Sep 17 00:00:00 2001 From: Barak Amar Date: Sun, 13 Sep 2020 15:26:00 +0300 Subject: [PATCH 21/39] use google storage without deployment --- docs/reference/configuration.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/reference/configuration.md b/docs/reference/configuration.md index 1e85d67275a..054c52f44b4 100644 --- a/docs/reference/configuration.md +++ b/docs/reference/configuration.md @@ -144,7 +144,7 @@ gateways: [aws-s3-batch-permissions]: https://docs.aws.amazon.com/AmazonS3/latest/dev/batch-ops-iam-role-policies.html -## Example: Google Storage Deployment +## Example: Google Storage ```yaml --- From 34e6bc2d1e6a1d189241cee39b44b231dbc4d6d9 Mon Sep 17 00:00:00 2001 From: johnnyaug Date: Mon, 14 Sep 2020 10:51:37 +0300 Subject: [PATCH 22/39] add the $ for bash code (#598) --- docs/_sass/custom/custom.scss | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/docs/_sass/custom/custom.scss b/docs/_sass/custom/custom.scss index 2706e7dbd33..d7d62eb26d7 100644 --- a/docs/_sass/custom/custom.scss +++ b/docs/_sass/custom/custom.scss @@ -743,3 +743,13 @@ footer { .highlighter-rouge { position: relative; } + +.language-shell.highlighter-rouge pre.highlight::before, +.language-sh.highlighter-rouge pre.highlight::before, +.language-bash.highlighter-rouge pre.highlight::before{ + content: "$ "; + color: $teal-green; + font-weight: 400; + font-family: monospace; + font-size: 0.75em; +} \ No newline at end of file From 85e87e46e7a86a9fa6ba0ee9d4b6d113d7f3b12e Mon Sep 17 00:00:00 2001 From: johnnyaug Date: Mon, 14 Sep 2020 15:33:51 +0300 Subject: [PATCH 23/39] fix bug: wrong timestamp after ORC import + testing (#607) --- block/inventory.go | 3 +- block/s3/inventory_iterator.go | 5 +- block/s3/inventory_test.go | 39 +++++++---- inventory/s3/orc_reader.go | 18 +++--- inventory/s3/reader.go | 14 ++-- inventory/s3/reader_test.go | 23 ++++--- onboard/catalog_actions.go | 3 +- onboard/catalog_actions_test.go | 31 ++++++--- onboard/inventory.go | 6 +- onboard/inventory_test.go | 111 ++++++++++++++++++++++++++++---- onboard/utils_test.go | 30 ++++++--- 11 files changed, 212 insertions(+), 71 deletions(-) diff --git a/block/inventory.go b/block/inventory.go index 93def4eeb4c..0dbbe845a1e 100644 --- a/block/inventory.go +++ b/block/inventory.go @@ -2,6 +2,7 @@ package block import ( "context" + "time" "github.com/treeverse/lakefs/logging" ) @@ -21,7 +22,7 @@ type InventoryObject struct { Bucket string Key string Size int64 - LastModified int64 + LastModified time.Time Checksum string PhysicalAddress string } diff --git a/block/s3/inventory_iterator.go b/block/s3/inventory_iterator.go index f4a085762ac..f14034bbd87 100644 --- a/block/s3/inventory_iterator.go +++ b/block/s3/inventory_iterator.go @@ -2,6 +2,7 @@ package s3 import ( "errors" + "time" "github.com/treeverse/lakefs/block" inventorys3 "github.com/treeverse/lakefs/inventory/s3" @@ -100,8 +101,8 @@ func (it *InventoryIterator) nextFromBuffer() *block.InventoryObject { if obj.Size != nil { res.Size = *obj.Size } - if obj.LastModified != nil { - res.LastModified = *obj.LastModified + if obj.LastModifiedMillis != nil { + res.LastModified = time.Unix(*obj.LastModifiedMillis/int64(time.Second/time.Millisecond), 0) } if obj.Checksum != nil { res.Checksum = *obj.Checksum diff --git a/block/s3/inventory_test.go b/block/s3/inventory_test.go index 4438215f7b9..e715a4d41d7 100644 --- a/block/s3/inventory_test.go +++ b/block/s3/inventory_test.go @@ -5,14 +5,15 @@ import ( "errors" "fmt" "io/ioutil" - "reflect" "regexp" "strings" "testing" + "time" s3sdk "github.com/aws/aws-sdk-go/service/s3" "github.com/aws/aws-sdk-go/service/s3/s3iface" "github.com/go-openapi/swag" + "github.com/treeverse/lakefs/block" "github.com/treeverse/lakefs/block/s3" inventorys3 "github.com/treeverse/lakefs/inventory/s3" "github.com/treeverse/lakefs/logging" @@ -20,7 +21,7 @@ import ( var ErrReadFile = errors.New("error reading file") -func rows(keys ...string) []*inventorys3.InventoryObject { +func rows(keys []string, lastModified map[string]time.Time) []*inventorys3.InventoryObject { if keys == nil { return nil } @@ -31,6 +32,9 @@ func rows(keys ...string) []*inventorys3.InventoryObject { res[i].Key = key res[i].IsLatest = swag.Bool(!strings.Contains(key, "_expired")) res[i].IsDeleteMarker = swag.Bool(strings.Contains(key, "_del")) + if lastModified != nil { + res[i].LastModifiedMillis = swag.Int64(lastModified[key].Unix() * 1000) + } } } return res @@ -65,6 +69,13 @@ var fileContents = map[string][]string{ } func TestIterator(t *testing.T) { + now := time.Now() + lastModified := make(map[string]time.Time) + for _, rows := range fileContents { + for i, r := range rows { + lastModified[r] = now.Add(time.Hour * time.Duration(-i)) + } + } testdata := []struct { InventoryFiles []string ExpectedObjects []string @@ -151,13 +162,12 @@ func TestIterator(t *testing.T) { ErrExpected: s3.ErrInventoryFilesRangesOverlap, }, } - manifestURL := "s3://example-bucket/manifest1.json" for _, test := range testdata { s3api := &mockS3Client{ FilesByManifestURL: map[string][]string{manifestURL: test.InventoryFiles}, } - reader := &mockInventoryReader{openFiles: make(map[string]bool)} + reader := &mockInventoryReader{openFiles: make(map[string]bool), lastModified: lastModified} inv, err := s3.GenerateInventory(logging.Default(), manifestURL, s3api, reader, test.ShouldSort) if err != nil { if errors.Is(err, test.ErrExpected) { @@ -166,9 +176,9 @@ func TestIterator(t *testing.T) { t.Fatalf("error: %v", err) } it := inv.Iterator() - objects := make([]string, 0, len(test.ExpectedObjects)) + objects := make([]*block.InventoryObject, 0, len(test.ExpectedObjects)) for it.Next() { - objects = append(objects, it.Get().Key) + objects = append(objects, it.Get()) } if len(reader.openFiles) != 0 { t.Errorf("some files stayed open: %v", reader.openFiles) @@ -182,14 +192,21 @@ func TestIterator(t *testing.T) { if len(objects) != len(test.ExpectedObjects) { t.Fatalf("unexpected number of objects in inventory. expected=%d, got=%d", len(test.ExpectedObjects), len(objects)) } - if !reflect.DeepEqual(objects, test.ExpectedObjects) { - t.Fatalf("objects in inventory differrent than expected. expected=%v, got=%v", test.ExpectedObjects, objects) + for i, obj := range objects { + if obj.Key != test.ExpectedObjects[i] { + t.Fatalf("at index %d: expected=%s, got=%s", i, test.ExpectedObjects[i], obj.Key) + } + expectedLastModified := lastModified[obj.Key].Truncate(time.Second) + if obj.LastModified != expectedLastModified { + t.Fatalf("last modified for object in index %d different than expected. expected=%v, got=%v", i, expectedLastModified, obj.LastModified) + } } } } type mockInventoryReader struct { - openFiles map[string]bool + openFiles map[string]bool + lastModified map[string]time.Time } type mockInventoryFileReader struct { @@ -249,12 +266,12 @@ func (m *mockInventoryFileReader) GetNumRows() int64 { func (m *mockInventoryReader) GetFileReader(_ string, _ string, key string) (inventorys3.FileReader, error) { m.openFiles[key] = true - return &mockInventoryFileReader{rows: rows(fileContents[key]...), inventoryReader: m, key: key}, nil + return &mockInventoryFileReader{rows: rows(fileContents[key], m.lastModified), inventoryReader: m, key: key}, nil } func (m *mockInventoryReader) GetMetadataReader(_ string, _ string, key string) (inventorys3.MetadataReader, error) { m.openFiles[key] = true - return &mockInventoryFileReader{rows: rows(fileContents[key]...), inventoryReader: m, key: key}, nil + return &mockInventoryFileReader{rows: rows(fileContents[key], m.lastModified), inventoryReader: m, key: key}, nil } func (m *mockS3Client) GetObject(input *s3sdk.GetObjectInput) (*s3sdk.GetObjectOutput, error) { output := s3sdk.GetObjectOutput{} diff --git a/inventory/s3/orc_reader.go b/inventory/s3/orc_reader.go index e899627fb33..bf5accacff9 100644 --- a/inventory/s3/orc_reader.go +++ b/inventory/s3/orc_reader.go @@ -55,9 +55,9 @@ func (r *OrcInventoryFileReader) inventoryObjectFromRow(rowData []interface{}) I if sizeIdx, ok := r.orcSelect.IndexInSelect["size"]; ok && rowData[sizeIdx] != nil { size = swag.Int64(rowData[sizeIdx].(int64)) } - var lastModified *int64 + var lastModifiedMillis *int64 if lastModifiedIdx, ok := r.orcSelect.IndexInSelect["last_modified_date"]; ok && rowData[lastModifiedIdx] != nil { - lastModified = swag.Int64(rowData[lastModifiedIdx].(time.Time).Unix()) + lastModifiedMillis = swag.Int64(rowData[lastModifiedIdx].(time.Time).UnixNano() / int64(time.Millisecond)) } var eTag *string if eTagIdx, ok := r.orcSelect.IndexInSelect["e_tag"]; ok && rowData[eTagIdx] != nil { @@ -72,13 +72,13 @@ func (r *OrcInventoryFileReader) inventoryObjectFromRow(rowData []interface{}) I isDeleteMarker = swag.Bool(rowData[isDeleteMarkerIdx].(bool)) } return InventoryObject{ - Bucket: rowData[r.orcSelect.IndexInSelect["bucket"]].(string), - Key: rowData[r.orcSelect.IndexInSelect["key"]].(string), - Size: size, - LastModified: lastModified, - Checksum: eTag, - IsLatest: isLatest, - IsDeleteMarker: isDeleteMarker, + Bucket: rowData[r.orcSelect.IndexInSelect["bucket"]].(string), + Key: rowData[r.orcSelect.IndexInSelect["key"]].(string), + Size: size, + LastModifiedMillis: lastModifiedMillis, + Checksum: eTag, + IsLatest: isLatest, + IsDeleteMarker: isDeleteMarker, } } diff --git a/inventory/s3/reader.go b/inventory/s3/reader.go index 3aebbe62348..15278fb1a37 100644 --- a/inventory/s3/reader.go +++ b/inventory/s3/reader.go @@ -27,13 +27,13 @@ type IReader interface { } type InventoryObject struct { - Bucket string `parquet:"name=bucket, type=UTF8"` - Key string `parquet:"name=key, type=UTF8"` - IsLatest *bool `parquet:"name=is_latest, type=BOOLEAN"` - IsDeleteMarker *bool `parquet:"name=is_delete_marker, type=BOOLEAN"` - Size *int64 `parquet:"name=size, type=INT_64"` - LastModified *int64 `parquet:"name=last_modified_date, type=TIMESTAMP_MILLIS"` - Checksum *string `parquet:"name=e_tag, type=UTF8"` + Bucket string `parquet:"name=bucket, type=UTF8"` + Key string `parquet:"name=key, type=UTF8"` + IsLatest *bool `parquet:"name=is_latest, type=BOOLEAN"` + IsDeleteMarker *bool `parquet:"name=is_delete_marker, type=BOOLEAN"` + Size *int64 `parquet:"name=size, type=INT_64"` + LastModifiedMillis *int64 `parquet:"name=last_modified_date, type=TIMESTAMP_MILLIS"` + Checksum *string `parquet:"name=e_tag, type=UTF8"` } func (o *InventoryObject) GetPhysicalAddress() string { diff --git a/inventory/s3/reader_test.go b/inventory/s3/reader_test.go index 6ea2e43e0c7..b19eac67088 100644 --- a/inventory/s3/reader_test.go +++ b/inventory/s3/reader_test.go @@ -42,7 +42,7 @@ func generateOrc(t *testing.T, objs <-chan *InventoryObject) string { t.Fatal(err) } for o := range objs { - err = w.Write(o.Bucket, o.Key, *o.Size, time.Unix(*o.LastModified, 0), *o.Checksum) + err = w.Write(o.Bucket, o.Key, *o.Size, time.Unix(*o.LastModifiedMillis/1000, 0), *o.Checksum) if err != nil { t.Fatal(err) } @@ -73,17 +73,17 @@ func getS3Fake(t *testing.T) (s3iface.S3API, *httptest.Server) { return s3.New(newSession), ts } -func objs(num int) <-chan *InventoryObject { +func objs(num int, lastModified []time.Time) <-chan *InventoryObject { out := make(chan *InventoryObject) go func() { defer close(out) for i := 0; i < num; i++ { out <- &InventoryObject{ - Bucket: inventoryBucketName, - Key: fmt.Sprintf("f%05d", i), - Size: swag.Int64(500), - LastModified: swag.Int64(time.Now().Unix()), - Checksum: swag.String("abcdefg"), + Bucket: inventoryBucketName, + Key: fmt.Sprintf("f%05d", i), + Size: swag.Int64(500), + LastModifiedMillis: swag.Int64(lastModified[i%len(lastModified)].Unix() * 1000), + Checksum: swag.String("abcdefg"), } } }() @@ -146,8 +146,9 @@ func TestInventoryReader(t *testing.T) { } for _, test := range testdata { - - uploadFile(t, svc, inventoryBucketName, "myFile.orc", objs(test.ObjectNum)) + now := time.Now() + lastModified := []time.Time{now, now.Add(-1 * time.Hour), now.Add(-2 * time.Hour), now.Add(-3 * time.Hour)} + uploadFile(t, svc, inventoryBucketName, "myFile.orc", objs(test.ObjectNum, lastModified)) reader := NewReader(context.Background(), svc, logging.Default()) fileReader, err := reader.GetFileReader("ORC", inventoryBucketName, "myFile.orc") if err != nil { @@ -175,6 +176,10 @@ func TestInventoryReader(t *testing.T) { if res[i-offset].Key != fmt.Sprintf("f%05d", i) { t.Fatalf("result in index %d different than expected. expected=%s, got=%s (batch #%d, index %d)", i, fmt.Sprintf("f%05d", i), res[i-offset].Key, offset/readBatchSize, i-offset) } + expectedLastModified := lastModified[i%len(lastModified)].Unix() * 1000 + if *res[i-offset].LastModifiedMillis != expectedLastModified { + t.Fatalf("unexpected timestamp for result in index %d. expected=%d, got=%d (batch #%d, index %d)", i, expectedLastModified, *res[i-offset].LastModifiedMillis, offset/readBatchSize, i-offset) + } } offset += len(res) readCount += len(res) diff --git a/onboard/catalog_actions.go b/onboard/catalog_actions.go index 9d5cadee033..b4376401c41 100644 --- a/onboard/catalog_actions.go +++ b/onboard/catalog_actions.go @@ -5,7 +5,6 @@ import ( "errors" "fmt" "sync" - "time" "github.com/treeverse/lakefs/catalog" "github.com/treeverse/lakefs/db" @@ -78,7 +77,7 @@ func (c *CatalogRepoActions) ApplyImport(ctx context.Context, it Iterator, dryRu entry := catalog.Entry{ Path: obj.Key, PhysicalAddress: obj.PhysicalAddress, - CreationDate: time.Unix(0, obj.LastModified*int64(time.Millisecond)), + CreationDate: obj.LastModified, Size: obj.Size, Checksum: obj.Checksum, } diff --git a/onboard/catalog_actions_test.go b/onboard/catalog_actions_test.go index 5748a822fff..a6397e748d2 100644 --- a/onboard/catalog_actions_test.go +++ b/onboard/catalog_actions_test.go @@ -5,6 +5,7 @@ import ( "sync" "sync/atomic" "testing" + "time" "github.com/go-openapi/swag" "github.com/treeverse/lakefs/catalog" @@ -17,7 +18,7 @@ type mockCataloger struct { } var catalogCallData = struct { - addedEntries map[string]bool + addedEntries map[string]catalog.Entry deletedEntries map[string]bool callLog map[string]*int32 mux sync.Mutex @@ -27,7 +28,7 @@ func (m mockCataloger) CreateEntries(_ context.Context, _, _ string, entries []c catalogCallData.mux.Lock() defer catalogCallData.mux.Unlock() for _, e := range entries { - catalogCallData.addedEntries[e.Path] = true + catalogCallData.addedEntries[e.Path] = e } atomic.AddInt32(catalogCallData.callLog["CreateEntries"], 1) return nil @@ -98,14 +99,23 @@ func TestCreateAndDeleteRows(t *testing.T) { } for _, dryRun := range []bool{true, false} { for _, test := range testdata { - catalogCallData.addedEntries = make(map[string]bool) + catalogCallData.addedEntries = make(map[string]catalog.Entry) catalogCallData.deletedEntries = make(map[string]bool) catalogCallData.callLog = make(map[string]*int32) catalogCallData.callLog["DeleteEntry"] = swag.Int32(0) catalogCallData.callLog["CreateEntries"] = swag.Int32(0) - stats, err := catalogActions.ApplyImport(context.Background(), onboard.NewDiffIterator( - &mockInventoryIterator{rows: rows(test.DeletedRows...)}, - &mockInventoryIterator{rows: rows(test.AddedRows...)}), dryRun) + now := time.Now() + lastModified := []time.Time{now, now.Add(-1 * time.Hour), now.Add(-2 * time.Hour)} + leftInv := &mockInventory{ + keys: test.DeletedRows, + lastModified: lastModified, + } + rightInv := &mockInventory{ + keys: test.AddedRows, + lastModified: lastModified, + } + stats, err := catalogActions.ApplyImport(context.Background(), + onboard.NewDiffIterator(leftInv.Iterator(), rightInv.Iterator()), dryRun) if err != nil { t.Fatalf("failed to create/delete objects: %v", err) } @@ -133,8 +143,13 @@ func TestCreateAndDeleteRows(t *testing.T) { if len(catalogCallData.addedEntries) != len(test.AddedRows) { t.Fatalf("unexpected number of added entries. expected=%d, got=%d", len(test.AddedRows), len(catalogCallData.addedEntries)) } - for _, path := range test.AddedRows { - if _, ok := catalogCallData.addedEntries[path]; !ok { + for i, path := range test.AddedRows { + expectedLastModified := lastModified[i%len(lastModified)].Truncate(time.Second) + if e, ok := catalogCallData.addedEntries[path]; ok { + if e.CreationDate.Truncate(time.Second) != expectedLastModified { + t.Fatalf("entry added with unexpected creation-date. expected=%v, got=%v", expectedLastModified, e.CreationDate.Truncate(time.Second)) + } + } else { t.Fatalf("expected entry not added: %s", path) } } diff --git a/onboard/inventory.go b/onboard/inventory.go index ccf39aad4b3..d8ee024efed 100644 --- a/onboard/inventory.go +++ b/onboard/inventory.go @@ -19,6 +19,7 @@ type InventoryDiff struct { type ImportObject struct { Obj block.InventoryObject IsDeleted bool + IsChanged bool } type Iterator interface { @@ -82,7 +83,10 @@ func (d *DiffIterator) Next() bool { return true case d.leftInv.Get().Key == d.rightInv.Get().Key: if d.leftInv.Get().Checksum != d.rightInv.Get().Checksum { - d.value = ImportObject{Obj: *d.rightInv.Get()} + d.value = ImportObject{Obj: *d.rightInv.Get(), IsChanged: true} + d.leftNext = d.leftInv.Next() + d.rightNext = d.rightInv.Next() + return true } d.leftNext = d.leftInv.Next() d.rightNext = d.rightInv.Next() diff --git a/onboard/inventory_test.go b/onboard/inventory_test.go index 3006649c648..318517e6e64 100644 --- a/onboard/inventory_test.go +++ b/onboard/inventory_test.go @@ -1,18 +1,27 @@ package onboard_test import ( - "reflect" "testing" + "time" + "github.com/treeverse/lakefs/block" "github.com/treeverse/lakefs/onboard" ) +func generateLastModified(keys []string, times map[string]time.Time) []time.Time { + res := make([]time.Time, len(keys)) + for i, k := range keys { + res[i] = times[k] + } + return res +} func TestDiff(t *testing.T) { data := []struct { LeftInv []string RightInv []string ExpectedDiffAdded []string ExpectedDiffDeleted []string + ChangedFiles []string }{ { LeftInv: []string{"a1", "a2", "a3"}, @@ -62,28 +71,108 @@ func TestDiff(t *testing.T) { ExpectedDiffAdded: []string{}, ExpectedDiffDeleted: []string{"b1", "b2"}, }, + { + LeftInv: []string{"a1", "a2", "a3"}, + RightInv: []string{"a1", "a3", "b4"}, + ExpectedDiffAdded: []string{"b4"}, + ExpectedDiffDeleted: []string{"a2"}, + ChangedFiles: []string{"a3"}, + }, + { + LeftInv: []string{"a1", "a2", "a3"}, + RightInv: []string{"a1", "a2", "a3"}, + ExpectedDiffAdded: []string{}, + ExpectedDiffDeleted: []string{}, + ChangedFiles: []string{"a2"}, + }, + { + LeftInv: []string{"a1", "a2", "a3", "a4", "a5", "b1", "b2", "b3"}, + RightInv: []string{"b3"}, + ExpectedDiffAdded: []string{}, + ExpectedDiffDeleted: []string{"a1", "a2", "a3", "a4", "a5", "b1", "b2"}, + ChangedFiles: []string{"b3"}, + }, + { + LeftInv: []string{"a1"}, + RightInv: []string{"a1", "a2", "a3", "a4", "a5", "b1", "b2", "b3"}, + ExpectedDiffAdded: []string{"a2", "a3", "a4", "a5", "b1", "b2", "b3"}, + ExpectedDiffDeleted: []string{}, + ChangedFiles: []string{"a1"}, + }, } for _, test := range data { - rightInv := &mockInventory{rows: test.RightInv} - leftInv := &mockInventory{rows: test.LeftInv} + now := time.Now() + times := map[string]time.Time{ + "a1": now, + "a2": now.Add(-1 * time.Hour), + "a3": now.Add(-2 * time.Hour), + "a4": now.Add(-3 * time.Hour), + "a5": now.Add(-4 * time.Hour), + "b1": now, + "b2": now.Add(-1 * time.Hour), + "b3": now.Add(-2 * time.Hour), + "b4": now.Add(-3 * time.Hour), + "b5": now.Add(-4 * time.Hour), + "b6": now.Add(-5 * time.Hour), + } + rightInvChecksum := func(s string) string { + for _, changed := range test.ChangedFiles { + if changed == s { + return "abcde" + s + } + } + return s + } + rightInv := &mockInventory{keys: test.RightInv, lastModified: generateLastModified(test.RightInv, times), checksum: rightInvChecksum} + leftInv := &mockInventory{keys: test.LeftInv, lastModified: generateLastModified(test.LeftInv, times)} leftIt := leftInv.Iterator() rightIt := rightInv.Iterator() it := onboard.NewDiffIterator(leftIt, rightIt) - actualAdded := make([]string, 0, len(test.ExpectedDiffAdded)) - actualDeleted := make([]string, 0, len(test.ExpectedDiffDeleted)) + actualAdded := make([]*block.InventoryObject, 0, len(test.ExpectedDiffAdded)) + actualDeleted := make([]*block.InventoryObject, 0, len(test.ExpectedDiffDeleted)) + actualChanged := make([]*block.InventoryObject, 0, len(test.ChangedFiles)) for it.Next() { o := it.Get() if o.IsDeleted { - actualDeleted = append(actualDeleted, o.Obj.Key) + actualDeleted = append(actualDeleted, &o.Obj) + } else if o.IsChanged { + actualChanged = append(actualChanged, &o.Obj) } else { - actualAdded = append(actualAdded, o.Obj.Key) + actualAdded = append(actualAdded, &o.Obj) + } + } + if len(actualAdded) != len(test.ExpectedDiffAdded) { + t.Fatalf("number of added objects in diff different than expected. expected: %d, got: %d", len(test.ExpectedDiffAdded), len(actualAdded)) + } + if len(actualDeleted) != len(test.ExpectedDiffDeleted) { + t.Fatalf("number of deleted objects in diff different than expected. expected: %d, got: %d", len(test.ExpectedDiffDeleted), len(actualDeleted)) + } + if len(actualChanged) != len(test.ChangedFiles) { + t.Fatalf("number of changed objects in diff different than expected. expected: %d, got: %d", len(test.ChangedFiles), len(actualChanged)) + } + for i, expectedAdded := range test.ExpectedDiffAdded { + if actualAdded[i].Key != expectedAdded { + t.Fatalf("added object in diff index %d different than expected. expected: %s, got: %s", i, expectedAdded, actualAdded[i].Key) + } + if actualAdded[i].LastModified != times[expectedAdded] { + t.Fatalf("modified time for key %s different than expected. expected: %v, got: %v", expectedAdded, times[expectedAdded], actualAdded[i].LastModified) } } - if !reflect.DeepEqual(actualAdded, test.ExpectedDiffAdded) { - t.Fatalf("diff added object different than expected. expected: %v, got: %v", test.ExpectedDiffAdded, actualAdded) + for i, expectedDeleted := range test.ExpectedDiffDeleted { + if actualDeleted[i].Key != expectedDeleted { + t.Fatalf("deleted object in diff index %d different than expected. expected: %s, got: %s", i, expectedDeleted, actualDeleted[i].Key) + } + if actualDeleted[i].LastModified != times[expectedDeleted] { + t.Fatalf("modified time for key %s different than expected. expected: %v, got: %v", expectedDeleted, times[expectedDeleted], actualDeleted[i].LastModified) + } } - if !reflect.DeepEqual(actualDeleted, test.ExpectedDiffDeleted) { - t.Fatalf("diff deleted object different than expected. expected: %v, got: %v", test.ExpectedDiffDeleted, actualDeleted) + for i, expectedChanged := range test.ChangedFiles { + if actualChanged[i].Key != expectedChanged { + t.Fatalf("changed object in diff index %d different than expected. expected: %s, got: %s", i, expectedChanged, actualChanged[i].Key) + } + if actualChanged[i].LastModified != times[expectedChanged] { + t.Fatalf("modified time for key %s different than expected. expected: %v, got: %v", expectedChanged, times[expectedChanged], actualChanged[i].LastModified) + } } } } diff --git a/onboard/utils_test.go b/onboard/utils_test.go index d2157c3038a..43582a24b7c 100644 --- a/onboard/utils_test.go +++ b/onboard/utils_test.go @@ -4,6 +4,7 @@ import ( "context" "errors" "sort" + "time" "github.com/treeverse/lakefs/block" "github.com/treeverse/lakefs/catalog" @@ -17,10 +18,12 @@ const ( ) type mockInventory struct { - rows []string + keys []string inventoryURL string sourceBucket string shouldSort bool + lastModified []time.Time + checksum func(string) string } type objectActions struct { @@ -44,21 +47,25 @@ type mockInventoryGenerator struct { func (m mockInventoryGenerator) GenerateInventory(_ context.Context, _ logging.Logger, inventoryURL string, shouldSort bool) (block.Inventory, error) { if inventoryURL == m.newInventoryURL { - return &mockInventory{rows: m.newInventory, inventoryURL: inventoryURL, sourceBucket: m.sourceBucket, shouldSort: shouldSort}, nil + return &mockInventory{keys: m.newInventory, inventoryURL: inventoryURL, sourceBucket: m.sourceBucket, shouldSort: shouldSort}, nil } if inventoryURL == m.previousInventoryURL { - return &mockInventory{rows: m.previousInventory, inventoryURL: inventoryURL, sourceBucket: m.sourceBucket, shouldSort: shouldSort}, nil + return &mockInventory{keys: m.previousInventory, inventoryURL: inventoryURL, sourceBucket: m.sourceBucket, shouldSort: shouldSort}, nil } return nil, errors.New("failed to create inventory") } -func rows(keys ...string) []block.InventoryObject { - if keys == nil { +func (m *mockInventory) rows() []block.InventoryObject { + if m.keys == nil { return nil } - res := make([]block.InventoryObject, 0, len(keys)) - for _, key := range keys { - res = append(res, block.InventoryObject{Key: key}) + res := make([]block.InventoryObject, 0, len(m.keys)) + if m.checksum == nil { + m.checksum = func(s string) string { return s } + } + for i, key := range m.keys { + + res = append(res, block.InventoryObject{Key: key, LastModified: m.lastModified[i%len(m.lastModified)], Checksum: m.checksum(key)}) } return res } @@ -121,10 +128,13 @@ func (m *mockInventoryIterator) Get() *block.InventoryObject { func (m *mockInventory) Iterator() block.InventoryIterator { if m.shouldSort { - sort.Strings(m.rows) + sort.Strings(m.keys) + } + if m.lastModified == nil { + m.lastModified = []time.Time{time.Now()} } return &mockInventoryIterator{ - rows: rows(m.rows...), + rows: m.rows(), } } From bdb71d227678dbd1956ba1325cfe54561b8fc516 Mon Sep 17 00:00:00 2001 From: Barak Amar Date: Mon, 14 Sep 2020 17:21:20 +0300 Subject: [PATCH 24/39] Feature/diff pagination (#583) --- api/api_controller.go | 80 ++- api/client.go | 13 +- api/transform.go | 36 +- catalog/cataloger.go | 11 +- catalog/cataloger_diff.go | 106 ++-- catalog/cataloger_diff_test.go | 123 +++- catalog/cataloger_diff_uncommitted.go | 23 +- catalog/cataloger_diff_uncommitted_test.go | 65 ++- catalog/cataloger_list_commits_test.go | 20 +- catalog/cataloger_merge.go | 28 +- catalog/cataloger_merge_test.go | 552 +++++++++++++----- catalog/db.go | 2 +- catalog/db_test.go | 4 +- catalog/model.go | 5 + cmd/lakectl/cmd/merge.go | 48 +- docs/assets/js/swagger.yml | 51 +- nessie/sanity_api_test.go | 10 +- swagger.yml | 51 +- webui/src/actions/api.js | 15 +- webui/src/actions/refs.js | 17 +- webui/src/components/Changes.js | 164 ++++++ webui/src/components/ChangesPage.js | 242 ++++++++ webui/src/components/ComparePage.js | 60 +- .../src/components/RepositoryExplorerPage.js | 9 +- webui/src/components/TreePage.js | 191 +----- webui/src/index.css | 9 +- webui/src/store/refs.js | 4 +- 27 files changed, 1321 insertions(+), 618 deletions(-) create mode 100644 webui/src/components/Changes.js create mode 100644 webui/src/components/ChangesPage.js diff --git a/api/api_controller.go b/api/api_controller.go index cb2bc61ee71..2d7157e6558 100644 --- a/api/api_controller.go +++ b/api/api_controller.go @@ -749,28 +749,17 @@ func (c *Controller) MergeMergeIntoBranchHandler() refs.MergeIntoBranchHandler { message, metadata) - // convert merge differences into merge results - var mergeResults []*models.MergeResult - if res != nil { - mergeResults = make([]*models.MergeResult, len(res.Differences)) - for i, d := range res.Differences { - mergeResults[i] = transformDifferenceToMergeResult(d) - } - } - switch err { case nil: - pl := new(refs.MergeIntoBranchOKBody) - pl.Results = mergeResults - return refs.NewMergeIntoBranchOK().WithPayload(pl) + payload := newMergeResultFromCatalog(res) + return refs.NewMergeIntoBranchOK().WithPayload(payload) case catalog.ErrUnsupportedRelation: return refs.NewMergeIntoBranchDefault(http.StatusInternalServerError).WithPayload(responseError("branches have no common base")) case catalog.ErrBranchNotFound: return refs.NewMergeIntoBranchDefault(http.StatusInternalServerError).WithPayload(responseError("a branch does not exist ")) case catalog.ErrConflictFound: - pl := new(refs.MergeIntoBranchConflictBody) - pl.Results = mergeResults - return refs.NewMergeIntoBranchConflict().WithPayload(pl) + payload := newMergeResultFromCatalog(res) + return refs.NewMergeIntoBranchConflict().WithPayload(payload) case catalog.ErrNoDifferenceWasFound: return refs.NewMergeIntoBranchDefault(http.StatusInternalServerError).WithPayload(responseError("no difference was found")) default: @@ -779,6 +768,30 @@ func (c *Controller) MergeMergeIntoBranchHandler() refs.MergeIntoBranchHandler { }) } +func newMergeResultFromCatalog(res *catalog.MergeResult) *models.MergeResult { + if res == nil { + return nil + } + var summary models.MergeResultSummary + for k, v := range res.Summary { + val := int64(v) + switch k { + case catalog.DifferenceTypeAdded: + summary.Added = val + case catalog.DifferenceTypeChanged: + summary.Changed = val + case catalog.DifferenceTypeRemoved: + summary.Removed = val + case catalog.DifferenceTypeConflict: + summary.Conflict = val + } + } + return &models.MergeResult{ + Reference: res.Reference, + Summary: &summary, + } +} + func (c *Controller) BranchesDiffBranchHandler() branches.DiffBranchHandler { return branches.DiffBranchHandlerFunc(func(params branches.DiffBranchParams, user *models.User) middleware.Responder { deps, err := c.setupRequest(user, params.HTTPRequest, []permissions.Permission{ @@ -792,7 +805,9 @@ func (c *Controller) BranchesDiffBranchHandler() branches.DiffBranchHandler { } deps.LogAction("diff_workspace") cataloger := deps.Cataloger - diff, err := cataloger.DiffUncommitted(c.Context(), params.Repository, params.Branch) + limit := int(swag.Int64Value(params.Amount)) + after := swag.StringValue(params.After) + diff, hasMore, err := cataloger.DiffUncommitted(c.Context(), params.Repository, params.Branch, limit, after) if err != nil { return branches.NewDiffBranchDefault(http.StatusInternalServerError). WithPayload(responseError("could not diff branch: %s", err)) @@ -802,8 +817,19 @@ func (c *Controller) BranchesDiffBranchHandler() branches.DiffBranchHandler { for i, d := range diff { results[i] = transformDifferenceToDiff(d) } - - return branches.NewDiffBranchOK().WithPayload(&branches.DiffBranchOKBody{Results: results}) + var nextOffset string + if hasMore && len(diff) > 0 { + nextOffset = diff[len(diff)-1].Path + } + return branches.NewDiffBranchOK().WithPayload(&branches.DiffBranchOKBody{ + Results: results, + Pagination: &models.Pagination{ + NextOffset: nextOffset, + HasMore: swag.Bool(hasMore), + Results: swag.Int64(int64(len(diff))), + MaxPerPage: swag.Int64(MaxResultsPerPage), + }, + }) }) } @@ -820,7 +846,9 @@ func (c *Controller) RefsDiffRefsHandler() refs.DiffRefsHandler { } deps.LogAction("diff_refs") cataloger := deps.Cataloger - diff, err := cataloger.Diff(c.Context(), params.Repository, params.LeftRef, params.RightRef) + limit := int(swag.Int64Value(params.Amount)) + after := swag.StringValue(params.After) + diff, hasMore, err := cataloger.Diff(c.Context(), params.Repository, params.LeftRef, params.RightRef, limit, after) if errors.Is(err, catalog.ErrFeatureNotSupported) { return refs.NewDiffRefsDefault(http.StatusNotImplemented).WithPayload(responseError(err.Error())) } @@ -833,7 +861,19 @@ func (c *Controller) RefsDiffRefsHandler() refs.DiffRefsHandler { for i, d := range diff { results[i] = transformDifferenceToDiff(d) } - return refs.NewDiffRefsOK().WithPayload(&refs.DiffRefsOKBody{Results: results}) + var nextOffset string + if hasMore && len(diff) > 0 { + nextOffset = diff[len(diff)-1].Path + } + return refs.NewDiffRefsOK().WithPayload(&refs.DiffRefsOKBody{ + Results: results, + Pagination: &models.Pagination{ + NextOffset: nextOffset, + HasMore: swag.Bool(hasMore), + Results: swag.Int64(int64(len(diff))), + MaxPerPage: swag.Int64(MaxResultsPerPage), + }, + }) }) } diff --git a/api/client.go b/api/client.go index 3440712f266..aa193f1aef1 100644 --- a/api/client.go +++ b/api/client.go @@ -76,7 +76,7 @@ type RepositoryClient interface { DeleteObject(ctx context.Context, repository, branchID, path string) error DiffRefs(ctx context.Context, repository, leftRef, rightRef string) ([]*models.Diff, error) - Merge(ctx context.Context, repository, leftRef, rightRef string) ([]*models.MergeResult, error) + Merge(ctx context.Context, repository, leftRef, rightRef string) (*models.MergeResult, error) DiffBranch(ctx context.Context, repository, branch string) ([]*models.Diff, error) @@ -539,7 +539,7 @@ func (c *client) DiffRefs(ctx context.Context, repository, leftRef, rightRef str return diff.GetPayload().Results, nil } -func (c *client) Merge(ctx context.Context, repository, leftRef, rightRef string) ([]*models.MergeResult, error) { +func (c *client) Merge(ctx context.Context, repository, leftRef, rightRef string) (*models.MergeResult, error) { statusOK, err := c.remote.Refs.MergeIntoBranch(&refs.MergeIntoBranchParams{ DestinationRef: leftRef, SourceRef: rightRef, @@ -548,14 +548,13 @@ func (c *client) Merge(ctx context.Context, repository, leftRef, rightRef string }, c.auth) if err == nil { - return statusOK.Payload.Results, nil + return statusOK.Payload, nil } conflict, ok := err.(*refs.MergeIntoBranchConflict) - if ok { - return conflict.Payload.Results, catalog.ErrConflictFound - } else { - return nil, err + if !ok { + return conflict.Payload, catalog.ErrConflictFound } + return nil, err } func (c *client) DiffBranch(ctx context.Context, repoID, branch string) ([]*models.Diff, error) { diff --git a/api/transform.go b/api/transform.go index e149acd7101..a2990636db1 100644 --- a/api/transform.go +++ b/api/transform.go @@ -7,44 +7,26 @@ import ( "github.com/treeverse/lakefs/catalog" ) -func transformDifferenceToMergeResult(difference catalog.Difference) *models.MergeResult { - mr := &models.MergeResult{ - Path: difference.Path, - } - switch difference.Type { +func transformDifferenceTypeToString(d catalog.DifferenceType) string { + switch d { case catalog.DifferenceTypeAdded: - mr.Type = models.MergeResultTypeAdded + return models.DiffTypeAdded case catalog.DifferenceTypeRemoved: - mr.Type = models.MergeResultTypeRemoved + return models.DiffTypeRemoved case catalog.DifferenceTypeChanged: - mr.Type = models.MergeResultTypeChanged + return models.DiffTypeChanged case catalog.DifferenceTypeConflict: - mr.Type = models.MergeResultTypeConflict - } - - if strings.HasSuffix(difference.Path, catalog.DefaultPathDelimiter) { - mr.PathType = models.MergeResultPathTypeCommonPrefix - } else { - mr.PathType = models.MergeResultPathTypeObject + return models.DiffTypeConflict + default: + return "" } - return mr } func transformDifferenceToDiff(difference catalog.Difference) *models.Diff { d := &models.Diff{ Path: difference.Path, } - switch difference.Type { - case catalog.DifferenceTypeAdded: - d.Type = models.DiffTypeAdded - case catalog.DifferenceTypeRemoved: - d.Type = models.DiffTypeRemoved - case catalog.DifferenceTypeChanged: - d.Type = models.DiffTypeChanged - case catalog.DifferenceTypeConflict: - d.Type = models.DiffTypeConflict - } - + d.Type = transformDifferenceTypeToString(difference.Type) if strings.HasSuffix(difference.Path, catalog.DefaultPathDelimiter) { d.PathType = models.DiffPathTypeCommonPrefix } else { diff --git a/catalog/cataloger.go b/catalog/cataloger.go index 44deaae47fb..e7325b848f2 100644 --- a/catalog/cataloger.go +++ b/catalog/cataloger.go @@ -144,17 +144,12 @@ type Committer interface { } type Differ interface { - Diff(ctx context.Context, repository, leftBranch string, rightBranch string) (Differences, error) - DiffUncommitted(ctx context.Context, repository, branch string) (Differences, error) -} - -type MergeResult struct { - Differences Differences - Reference string + Diff(ctx context.Context, repository, leftBranch string, rightBranch string, limit int, after string) (Differences, bool, error) + DiffUncommitted(ctx context.Context, repository, branch string, limit int, after string) (Differences, bool, error) } type Merger interface { - Merge(ctx context.Context, repository, sourceBranch, destinationBranch string, committer string, message string, metadata Metadata) (*MergeResult, error) + Merge(ctx context.Context, repository, leftBranch, rightBranch, committer, message string, metadata Metadata) (*MergeResult, error) } type Cataloger interface { diff --git a/catalog/cataloger_diff.go b/catalog/cataloger_diff.go index 7c6e10f5c35..28558af4983 100644 --- a/catalog/cataloger_diff.go +++ b/catalog/cataloger_diff.go @@ -10,17 +10,25 @@ import ( "github.com/treeverse/lakefs/logging" ) -const diffResultsTableName = "catalog_diff_results" +const ( + DiffMaxLimit = 1000 -func (c *cataloger) Diff(ctx context.Context, repository string, leftBranch string, rightBranch string) (Differences, error) { + diffResultsTableName = "catalog_diff_results" +) + +func (c *cataloger) Diff(ctx context.Context, repository string, leftBranch string, rightBranch string, limit int, after string) (Differences, bool, error) { if err := Validate(ValidateFields{ {Name: "repository", IsValid: ValidateRepositoryName(repository)}, {Name: "leftBranch", IsValid: ValidateBranchName(leftBranch)}, {Name: "rightBranch", IsValid: ValidateBranchName(rightBranch)}, }); err != nil { - return nil, err + return nil, false, err + } + + if limit < 0 || limit > DiffMaxLimit { + limit = DiffMaxLimit } - differences, err := c.db.Transact(func(tx db.Tx) (interface{}, error) { + res, err := c.db.Transact(func(tx db.Tx) (interface{}, error) { leftID, err := c.getBranchIDCache(tx, repository, leftBranch) if err != nil { return nil, fmt.Errorf("left branch: %w", err) @@ -29,23 +37,29 @@ func (c *cataloger) Diff(ctx context.Context, repository string, leftBranch stri if err != nil { return nil, fmt.Errorf("right branch: %w", err) } - return c.doDiff(tx, leftID, rightID) + err = c.doDiff(tx, leftID, rightID) + if err != nil { + return nil, err + } + return getDiffDifferences(tx, limit+1, after) }, c.txOpts(ctx)...) if err != nil { - return nil, err + return nil, false, err } - return differences.(Differences), nil + differences := res.(Differences) + hasMore := paginateSlice(&differences, limit) + return differences, hasMore, nil } -func (c *cataloger) doDiff(tx db.Tx, leftID, rightID int64) (Differences, error) { +func (c *cataloger) doDiff(tx db.Tx, leftID, rightID int64) error { relation, err := getBranchesRelationType(tx, leftID, rightID) if err != nil { - return nil, err + return err } return c.doDiffByRelation(tx, relation, leftID, rightID) } -func (c *cataloger) doDiffByRelation(tx db.Tx, relation RelationType, leftID, rightID int64) (Differences, error) { +func (c *cataloger) doDiffByRelation(tx db.Tx, relation RelationType, leftID, rightID int64) error { switch relation { case RelationTypeFromParent: return c.diffFromParent(tx, leftID, rightID) @@ -59,21 +73,37 @@ func (c *cataloger) doDiffByRelation(tx db.Tx, relation RelationType, leftID, ri "left_id": leftID, "right_id": rightID, }).Debug("Diff by relation - unsupported type") - return nil, ErrFeatureNotSupported + return ErrFeatureNotSupported + } +} + +func (c *cataloger) getDiffSummary(tx db.Tx) (map[DifferenceType]int, error) { + var results []struct { + DiffType int `db:"diff_type"` + Count int `db:"count"` + } + err := tx.Select(&results, "SELECT diff_type, count(diff_type) as count FROM "+diffResultsTableName+" GROUP BY diff_type") + if err != nil { + return nil, fmt.Errorf("count diff resutls by type: %w", err) + } + m := make(map[DifferenceType]int, len(results)) + for _, res := range results { + m[DifferenceType(res.DiffType)] = res.Count } + return m, nil } -func (c *cataloger) diffFromParent(tx db.Tx, parentID, childID int64) (Differences, error) { +func (c *cataloger) diffFromParent(tx db.Tx, parentID, childID int64) error { // get the last child commit number of the last parent merge // if there is none - then it is the first merge var maxChildMerge CommitID childLineage, err := getLineage(tx, childID, UncommittedID) if err != nil { - return nil, fmt.Errorf("child lineage failed: %w", err) + return fmt.Errorf("child lineage failed: %w", err) } parentLineage, err := getLineage(tx, parentID, CommittedID) if err != nil { - return nil, fmt.Errorf("parent lineage failed: %w", err) + return fmt.Errorf("parent lineage failed: %w", err) } maxChildQuery, args, err := sq.Select("MAX(commit_id) as max_child_commit"). From("catalog_commits"). @@ -81,34 +111,44 @@ func (c *cataloger) diffFromParent(tx db.Tx, parentID, childID int64) (Differenc PlaceholderFormat(sq.Dollar). ToSql() if err != nil { - return nil, fmt.Errorf("get child last commit sql: %w", err) + return fmt.Errorf("get child last commit sql: %w", err) } err = tx.Get(&maxChildMerge, maxChildQuery, args...) if err != nil { - return nil, fmt.Errorf("get child last commit failed: %w", err) + return fmt.Errorf("get child last commit failed: %w", err) } diffFromParentSQL, args, err := sqDiffFromParentV(parentID, childID, maxChildMerge, parentLineage, childLineage). Prefix(`CREATE TEMP TABLE ` + diffResultsTableName + " ON COMMIT DROP AS "). PlaceholderFormat(sq.Dollar). ToSql() if err != nil { - return nil, fmt.Errorf("diff from parent sql: %w", err) + return fmt.Errorf("diff from parent sql: %w", err) } if _, err := tx.Exec(diffFromParentSQL, args...); err != nil { - return nil, fmt.Errorf("select diff from parent: %w", err) + return fmt.Errorf("select diff from parent: %w", err) } - return diffReadDifferences(tx) + return nil } -func diffReadDifferences(tx db.Tx) (Differences, error) { +func getDiffDifferences(tx db.Tx, limit int, after string) (Differences, error) { var result Differences - if err := tx.Select(&result, "SELECT diff_type, path FROM "+diffResultsTableName); err != nil { + query, args, err := psql.Select("diff_type", "path"). + From(diffResultsTableName). + Where(sq.Gt{"path": after}). + OrderBy("path"). + Limit(uint64(limit)). + ToSql() + if err != nil { + return nil, fmt.Errorf("format diff results query: %w", err) + } + err = tx.Select(&result, query, args...) + if err != nil { return nil, fmt.Errorf("select diff results: %w", err) } return result, nil } -func (c *cataloger) diffFromChild(tx db.Tx, childID, parentID int64) (Differences, error) { +func (c *cataloger) diffFromChild(tx db.Tx, childID, parentID int64) error { // read last merge commit numbers from commit table // if it is the first child-to-parent commit, than those commit numbers are calculated as follows: // the child is 0, as any change in the child was never merged to the parent. @@ -127,12 +167,12 @@ func (c *cataloger) diffFromChild(tx db.Tx, childID, parentID int64) (Difference PlaceholderFormat(sq.Dollar). ToSql() if err != nil { - return nil, fmt.Errorf("effective commits sql: %w", err) + return fmt.Errorf("effective commits sql: %w", err) } err = tx.Get(&effectiveCommits, effectiveCommitsQuery, args...) effectiveCommitsNotFound := errors.Is(err, db.ErrNotFound) if err != nil && !effectiveCommitsNotFound { - return nil, fmt.Errorf("select effective commit: %w", err) + return fmt.Errorf("select effective commit: %w", err) } if effectiveCommitsNotFound { effectiveCommits.ChildEffectiveCommit = 1 // we need all commits from the child. so any small number will do @@ -143,21 +183,21 @@ func (c *cataloger) diffFromChild(tx db.Tx, childID, parentID int64) (Difference Limit(1). ToSql() if err != nil { - return nil, fmt.Errorf("parent effective commit sql: %w", err) + return fmt.Errorf("parent effective commit sql: %w", err) } err = tx.Get(&effectiveCommits.ParentEffectiveCommit, parentEffectiveQuery, args...) if err != nil { - return nil, fmt.Errorf("select parent effective commit: %w", err) + return fmt.Errorf("select parent effective commit: %w", err) } } parentLineage, err := getLineage(tx, parentID, UncommittedID) if err != nil { - return nil, fmt.Errorf("parent lineage failed: %w", err) + return fmt.Errorf("parent lineage failed: %w", err) } childLineage, err := getLineage(tx, childID, CommittedID) if err != nil { - return nil, fmt.Errorf("child lineage failed: %w", err) + return fmt.Errorf("child lineage failed: %w", err) } childLineageValues := getLineageAsValues(childLineage, childID, MaxCommitID) @@ -167,18 +207,18 @@ func (c *cataloger) diffFromChild(tx db.Tx, childID, parentID int64) (Difference PlaceholderFormat(sq.Dollar). ToSql() if err != nil { - return nil, fmt.Errorf("diff from child sql: %w", err) + return fmt.Errorf("diff from child sql: %w", err) } if _, err := tx.Exec(diffFromChildSQL, args...); err != nil { - return nil, fmt.Errorf("exec diff from child: %w", err) + return fmt.Errorf("exec diff from child: %w", err) } - return diffReadDifferences(tx) + return nil } -func (c *cataloger) diffNonDirect(_ db.Tx, leftID, rightID int64) (Differences, error) { +func (c *cataloger) diffNonDirect(_ db.Tx, leftID, rightID int64) error { c.log.WithFields(logging.Fields{ "left_id": leftID, "right_id": rightID, }).Debug("Diff not direct - feature not supported") - return nil, ErrFeatureNotSupported + return ErrFeatureNotSupported } diff --git a/catalog/cataloger_diff_test.go b/catalog/cataloger_diff_test.go index dd148a21797..139c6acccde 100644 --- a/catalog/cataloger_diff_test.go +++ b/catalog/cataloger_diff_test.go @@ -5,10 +5,92 @@ import ( "strconv" "testing" - "github.com/davecgh/go-spew/spew" + "github.com/go-test/deep" "github.com/treeverse/lakefs/testutil" ) +func TestCataloger_Diff(t *testing.T) { + ctx := context.Background() + c := testCataloger(t) + repository := testCatalogerRepo(t, ctx, c, "repo", "master") + + // create N files and commit + commitChanges := func(n int, msg, branch string) { + for i := 0; i < n; i++ { + testCatalogerCreateEntry(t, ctx, c, repository, branch, "/file"+strconv.Itoa(i), nil, branch) + } + _, err := c.Commit(ctx, repository, branch, msg, "tester", nil) + testutil.MustDo(t, msg, err) + } + commitChanges(10, "Changes on master", "master") + testCatalogerBranch(t, ctx, c, repository, "branch1", "master") + commitChanges(20, "Changes on branch1", "branch1") + // delete some files and commit + for i := 0; i < 5; i++ { + testutil.MustDo(t, "delete file from branch", + c.DeleteEntry(ctx, repository, "branch1", "/file"+strconv.Itoa(i))) + } + _, err := c.Commit(ctx, repository, "branch1", "delete some files", "tester", nil) + testutil.MustDo(t, "delete some files from branch1", err) + + const limit = 3 + var after string + var differences Differences + for { + res, hasMore, err := c.Diff(ctx, repository, "branch1", "master", limit, after) + testutil.MustDo(t, "list diff changes", err) + if len(res) > limit { + t.Fatalf("Diff() result length=%d, expected no more than %d", len(res), limit) + } + differences = append(differences, res...) + if !hasMore { + break + } + after = res[len(res)-1].Path + } + + const expectedDifferencesLen = 20 + if len(differences) != expectedDifferencesLen { + t.Fatalf("Differences len=%d, expected=%d", len(differences), expectedDifferencesLen) + } + for i := 0; i < expectedDifferencesLen; i++ { + // lookup item in diff + name := "/file" + strconv.Itoa(i) + var d *Difference + for diffIdx := range differences { + if differences[diffIdx].Path == name { + d = &differences[diffIdx] + } + } + // verify diff record + if d == nil { + t.Fatalf("Missing diff for path=%s", name) + } + var expectedType DifferenceType + switch { + case i < 5: + expectedType = DifferenceTypeRemoved + case i >= 10: + expectedType = DifferenceTypeAdded + default: + expectedType = DifferenceTypeChanged + } + if d.Type != expectedType { + t.Fatalf("Path '%s' diff type=%d, expected=%d", d.Path, d.Type, expectedType) + } + } + + // check the case of 0 amount + res, hasMore, err := c.Diff(ctx, repository, "branch1", "master", 0, "") + testutil.MustDo(t, "list diff changes with 0 limit", err) + if !hasMore { + t.Error("Diff() limit 0 hasMore should be true") + } + if len(res) != 0 { + t.Errorf("Diff() limit 0 len results is %d, expected none", len(res)) + } +} + func TestCataloger_Diff_FromChildThreeBranches(t *testing.T) { ctx := context.Background() c := testCataloger(t) @@ -55,20 +137,35 @@ func TestCataloger_Diff_FromChildThreeBranches(t *testing.T) { if !IsValidReference(res.Reference) { t.Errorf("Merge reference = %s, expected a valid reference", res.Reference) } - expectedDifferences := Differences{ - Difference{Type: DifferenceTypeChanged, Path: "/file2"}, - Difference{Type: DifferenceTypeAdded, Path: "/file3"}, - Difference{Type: DifferenceTypeAdded, Path: "/file4"}, - Difference{Type: DifferenceTypeAdded, Path: "/file5"}, - Difference{Type: DifferenceTypeAdded, Path: "/file555"}, - Difference{Type: DifferenceTypeAdded, Path: "/file6"}, - Difference{Type: DifferenceTypeAdded, Path: "/file7"}, - Difference{Type: DifferenceTypeAdded, Path: "/file8"}, - Difference{Type: DifferenceTypeRemoved, Path: "/file1"}, + commitLog, err := c.GetCommit(ctx, repository, res.Reference) + testutil.MustDo(t, "get merge commit reference", err) + if len(commitLog.Parents) != 2 { + t.Fatal("merge commit log should have two parents") } - if !res.Differences.Equal(expectedDifferences) { - t.Errorf("Merge differences = %s, expected %s", spew.Sdump(res.Differences), spew.Sdump(expectedDifferences)) + if diff := deep.Equal(res.Summary, map[DifferenceType]int{ + DifferenceTypeRemoved: 1, + DifferenceTypeChanged: 1, + DifferenceTypeAdded: 7, + }); diff != nil { + t.Fatal("Merge Summary", diff) } + // TODO(barak): enable test after diff between commits is supported + //differences, _, err := c.Diff(ctx, repository, commitLog.Parents[0], commitLog.Parents[1], -1, "") + //testutil.MustDo(t, "diff merge changes", err) + //expectedDifferences := Differences{ + // Difference{Type: DifferenceTypeChanged, Path: "/file2"}, + // Difference{Type: DifferenceTypeAdded, Path: "/file3"}, + // Difference{Type: DifferenceTypeAdded, Path: "/file4"}, + // Difference{Type: DifferenceTypeAdded, Path: "/file5"}, + // Difference{Type: DifferenceTypeAdded, Path: "/file555"}, + // Difference{Type: DifferenceTypeAdded, Path: "/file6"}, + // Difference{Type: DifferenceTypeAdded, Path: "/file7"}, + // Difference{Type: DifferenceTypeAdded, Path: "/file8"}, + // Difference{Type: DifferenceTypeRemoved, Path: "/file1"}, + //} + //if !differences.Equal(expectedDifferences) { + // t.Errorf("Merge differences = %s, expected %s", spew.Sdump(differences), spew.Sdump(expectedDifferences)) + //} testVerifyEntries(t, ctx, c, repository, "master:HEAD", []testEntryInfo{ {Path: "/file1", Deleted: true}, diff --git a/catalog/cataloger_diff_uncommitted.go b/catalog/cataloger_diff_uncommitted.go index 662fce70b34..29940cb87a9 100644 --- a/catalog/cataloger_diff_uncommitted.go +++ b/catalog/cataloger_diff_uncommitted.go @@ -9,14 +9,18 @@ import ( "github.com/treeverse/lakefs/db" ) -func (c *cataloger) DiffUncommitted(ctx context.Context, repository, branch string) (Differences, error) { +func (c *cataloger) DiffUncommitted(ctx context.Context, repository, branch string, limit int, after string) (Differences, bool, error) { if err := Validate(ValidateFields{ {Name: "repository", IsValid: ValidateRepositoryName(repository)}, {Name: "branch", IsValid: ValidateBranchName(branch)}, }); err != nil { - return nil, err + return nil, false, err } - differences, err := c.db.Transact(func(tx db.Tx) (interface{}, error) { + + if limit < 0 || limit > DiffMaxLimit { + limit = DiffMaxLimit + } + res, err := c.db.Transact(func(tx db.Tx) (interface{}, error) { branchID, err := c.getBranchIDCache(tx, repository, branch) if err != nil { return nil, err @@ -32,7 +36,12 @@ func (c *cataloger) DiffUncommitted(ctx context.Context, repository, branch stri JoinClause( sqEntriesLineageV(branchID, CommittedID, lineage). Prefix("LEFT JOIN (").Suffix(") AS v ON v.path=e.path")). - Where(sq.Eq{"e.branch_id": branchID, "e.is_committed": false}) + Where(sq.And{ + sq.Eq{"e.branch_id": branchID, "e.is_committed": false}, + sq.Gt{"e.path": after}, + }). + Limit(uint64(limit + 1)). + OrderBy("path") sql, args, err := q.ToSql() if err != nil { return nil, fmt.Errorf("build sql: %w", err) @@ -45,7 +54,9 @@ func (c *cataloger) DiffUncommitted(ctx context.Context, repository, branch stri return result, nil }, c.txOpts(ctx, db.ReadOnly())...) if err != nil { - return nil, err + return nil, false, err } - return differences.(Differences), nil + differences := res.(Differences) + hasMore := paginateSlice(&differences, limit) + return differences, hasMore, nil } diff --git a/catalog/cataloger_diff_uncommitted_test.go b/catalog/cataloger_diff_uncommitted_test.go index 782abc7a5b2..810a30d699a 100644 --- a/catalog/cataloger_diff_uncommitted_test.go +++ b/catalog/cataloger_diff_uncommitted_test.go @@ -5,11 +5,56 @@ import ( "strconv" "testing" - "github.com/davecgh/go-spew/spew" - + "github.com/go-test/deep" "github.com/treeverse/lakefs/testutil" ) +func TestCataloger_DiffUncommitted_Pagination(t *testing.T) { + ctx := context.Background() + c := testCataloger(t) + repository := testCatalogerRepo(t, ctx, c, "repo", "master") + + const numOfFiles = 10 + var expectedDifferences Differences + for i := 0; i < numOfFiles; i++ { + p := "/file" + strconv.Itoa(i) + testCatalogerCreateEntry(t, ctx, c, repository, "master", p, nil, "") + expectedDifferences = append(expectedDifferences, Difference{Type: DifferenceTypeAdded, Path: p}) + } + const changesPerPage = 3 + var differences Differences + var after string + for { + res, hasMore, err := c.DiffUncommitted(ctx, repository, "master", changesPerPage, after) + testutil.MustDo(t, "diff uncommitted changes", err) + if err != nil { + t.Fatalf("DiffUncommitted err=%s, expected none", err) + } + if len(res) > changesPerPage { + t.Fatalf("DiffUncommitted() result length %d, expected equal or less than %d", len(res), changesPerPage) + } + differences = append(differences, res...) + if !hasMore { + break + } + after = res[len(res)-1].Path + } + if diff := deep.Equal(differences, expectedDifferences); diff != nil { + t.Fatal("DiffUncommitted", diff) + } + + // check the case where we ask for 0 amount + res, hasMore, err := c.DiffUncommitted(ctx, repository, "master", 0, "") + testutil.MustDo(t, "diff uncommitted with 0 limit", err) + if !hasMore { + t.Error("DiffUncommitted() has more should be true") + } + if len(res) != 0 { + t.Errorf("DiffUncommitted() has %d items in result when expected none", len(res)) + } + +} + func TestCataloger_DiffUncommitted_Changes(t *testing.T) { ctx := context.Background() c := testCataloger(t) @@ -32,7 +77,7 @@ func TestCataloger_DiffUncommitted_Changes(t *testing.T) { testCatalogerCreateEntry(t, ctx, c, repository, "master", overFilename, nil, "seed1") // verify that diff uncommitted show the above change - differences, err := c.DiffUncommitted(ctx, repository, "master") + differences, _, err := c.DiffUncommitted(ctx, repository, "master", -1, "") if err != nil { t.Fatalf("DiffUncommitted err = %s, expected none", err) } @@ -42,8 +87,8 @@ func TestCataloger_DiffUncommitted_Changes(t *testing.T) { Difference{Type: DifferenceTypeChanged, Path: "/file2"}, Difference{Type: DifferenceTypeAdded, Path: "/file5"}, } - if !changes.Equal(differences) { - t.Fatalf("DiffUncommitted differences = %s, expected = %s", spew.Sdump(differences), spew.Sdump(changes)) + if diff := deep.Equal(differences, changes); diff != nil { + t.Fatal("DiffUncommitted", diff) } } @@ -60,13 +105,15 @@ func TestCataloger_DiffUncommitted_NoChance(t *testing.T) { testutil.MustDo(t, "commit to master", err) // verify that diff uncommitted show the above change - differences, err := c.DiffUncommitted(ctx, repository, "master") + differences, hasMore, err := c.DiffUncommitted(ctx, repository, "master", -1, "") if err != nil { t.Fatalf("DiffUncommitted err = %s, expected none", err) } - changes := Differences{} - if !changes.Equal(differences) { - t.Fatalf("DiffUncommitted differences = %s, expected = %s", spew.Sdump(differences), spew.Sdump(changes)) + if len(differences) != 0 { + t.Fatalf("DiffUncommitted differences len=%d, expected 0", len(differences)) + } + if hasMore { + t.Fatal("DiffUncommitted hadMore is true, expected false") } } diff --git a/catalog/cataloger_list_commits_test.go b/catalog/cataloger_list_commits_test.go index de31c517105..53a12d34c36 100644 --- a/catalog/cataloger_list_commits_test.go +++ b/catalog/cataloger_list_commits_test.go @@ -357,9 +357,23 @@ func TestCataloger_ListCommits_LineageFromChild(t *testing.T) { } merge2, err := c.Merge(ctx, repository, "br_2_1", "master", "tester", "merge br_2_1 to master", nil) testutil.MustDo(t, "merge br_2_1 into master", err) - if merge2.Differences[0].Type != DifferenceTypeChanged || merge2.Differences[0].Path != "master-file" { - t.Error("merge br_2_1 into master with unexpected results", merge2.Differences[0]) - } + commitLog, err := c.GetCommit(ctx, repository, merge2.Reference) + testutil.MustDo(t, "get merge commit reference", err) + if len(commitLog.Parents) != 2 { + t.Fatal("merge commit log should have two parents") + } + if diff := deep.Equal(merge2.Summary, map[DifferenceType]int{ + DifferenceTypeChanged: 1, + }); diff != nil { + t.Fatal("Merge Summary", diff) + } + // TODO(barak): enable test after diff between commits is supported + //differences, _, err := c.Diff(ctx, repository, commitLog.Parents[0], commitLog.Parents[1], -1, "") + //testutil.MustDo(t, "diff merge changes", err) + // + //if differences[0].Type != DifferenceTypeChanged || differences[0].Path != "master-file" { + // t.Error("merge br_2_1 into master with unexpected results", differences[0]) + //} masterList, _, err = c.ListCommits(ctx, repository, "master", "", 100) testutil.MustDo(t, "list master commits", err) diff --git a/catalog/cataloger_merge.go b/catalog/cataloger_merge.go index 459f71843e1..17fb66047b1 100644 --- a/catalog/cataloger_merge.go +++ b/catalog/cataloger_merge.go @@ -11,7 +11,7 @@ import ( "github.com/treeverse/lakefs/logging" ) -func (c *cataloger) Merge(ctx context.Context, repository, leftBranch, rightBranch string, committer string, message string, metadata Metadata) (*MergeResult, error) { +func (c *cataloger) Merge(ctx context.Context, repository, leftBranch, rightBranch, committer, message string, metadata Metadata) (*MergeResult, error) { if err := Validate(ValidateFields{ {Name: "repository", IsValid: ValidateRepositoryName(repository)}, {Name: "leftBranch", IsValid: ValidateBranchName(leftBranch)}, @@ -21,7 +21,7 @@ func (c *cataloger) Merge(ctx context.Context, repository, leftBranch, rightBran return nil, err } - var result *MergeResult + mergeResult := &MergeResult{} _, err := c.db.Transact(func(tx db.Tx) (interface{}, error) { leftID, err := getBranchID(tx, repository, leftBranch, LockTypeUpdate) if err != nil { @@ -36,18 +36,24 @@ func (c *cataloger) Merge(ctx context.Context, repository, leftBranch, rightBran return nil, fmt.Errorf("branch relation: %w", err) } - differences, err := c.doDiffByRelation(tx, relation, leftID, rightID) + err = c.doDiffByRelation(tx, relation, leftID, rightID) if err != nil { return nil, err } - result = &MergeResult{ - Differences: differences, + mergeResult.Summary, err = c.getDiffSummary(tx) + if err != nil { + return nil, err } - diffCounts := result.Differences.CountByType() - if diffCounts[DifferenceTypeConflict] > 0 { + // check for conflicts + if mergeResult.Summary[DifferenceTypeConflict] > 0 { return nil, ErrConflictFound } - if len(diffCounts) == 0 { + // check for changes + var total int + for _, c := range mergeResult.Summary { + total += c + } + if total == 0 { leftCommitAdvanced, err := checkZeroDiffCommit(tx, leftID, rightID) if err != nil { return nil, err @@ -64,10 +70,10 @@ func (c *cataloger) Merge(ctx context.Context, repository, leftBranch, rightBran if err != nil { return nil, err } - result.Reference = MakeReference(rightBranch, commitID) - return nil, err + mergeResult.Reference = MakeReference(rightBranch, commitID) + return nil, nil }, c.txOpts(ctx)...) - return result, err + return mergeResult, err } // checkZeroDiffCommit - Checks if the current commit id of source branch advanced since last merge. diff --git a/catalog/cataloger_merge_test.go b/catalog/cataloger_merge_test.go index d82c8eb3a8e..82140672fba 100644 --- a/catalog/cataloger_merge_test.go +++ b/catalog/cataloger_merge_test.go @@ -6,6 +6,8 @@ import ( "strconv" "testing" + "github.com/go-test/deep" + "github.com/davecgh/go-spew/spew" "github.com/treeverse/lakefs/testutil" ) @@ -63,14 +65,29 @@ func TestCataloger_Merge_FromParentNoChangesInChild(t *testing.T) { {Path: delFilename, Deleted: true}, }) - expectedDifferences := Differences{ - Difference{Type: DifferenceTypeChanged, Path: "/file2"}, - Difference{Type: DifferenceTypeAdded, Path: "/file5"}, - Difference{Type: DifferenceTypeRemoved, Path: "/file1"}, - } - if !res.Differences.Equal(expectedDifferences) { - t.Fatalf("Merge differences = %s, expected %s", spew.Sdump(res.Differences), spew.Sdump(expectedDifferences)) - } + commitLog, err := c.GetCommit(ctx, repository, res.Reference) + testutil.MustDo(t, "get merge commit reference", err) + if len(commitLog.Parents) != 2 { + t.Fatal("merge commit log should have two parents") + } + if diff := deep.Equal(res.Summary, map[DifferenceType]int{ + DifferenceTypeRemoved: 1, + DifferenceTypeChanged: 1, + DifferenceTypeAdded: 1, + }); diff != nil { + t.Fatal("Merge Summary", diff) + } + // TODO(barak): enable test after diff between commits is supported + //differences, _, err := c.Diff(ctx, repository, commitLog.Parents[0], commitLog.Parents[1], -1, "") + //testutil.MustDo(t, "diff merge changes", err) + //expectedDifferences := Differences{ + // Difference{Type: DifferenceTypeChanged, Path: "/file2"}, + // Difference{Type: DifferenceTypeAdded, Path: "/file5"}, + // Difference{Type: DifferenceTypeRemoved, Path: "/file1"}, + //} + //if !differences.Equal(expectedDifferences) { + // t.Fatalf("Merge differences = %s, expected %s", spew.Sdump(differences), spew.Sdump(expectedDifferences)) + //} } func TestCataloger_Merge_FromParentConflicts(t *testing.T) { @@ -113,22 +130,25 @@ func TestCataloger_Merge_FromParentConflicts(t *testing.T) { // merge should identify conflicts on pending changes res, err := c.Merge(ctx, repository, "master", "branch1", "tester", "", nil) - // expected to find 2 conflicts on the files we update/created with the same path + // expected to find 2 conflicts on the files we update/created with the same path if !errors.Is(err, ErrConflictFound) { t.Errorf("Merge err = %s, expected conflict with err = %s", err, ErrConflictFound) } - if res == nil { - t.Errorf("Result is nil") - } else if IsValidReference(res.Reference) { - t.Errorf("Merge reference = %s, expected valid reference", res.Reference) + if res.Reference != "" { + t.Errorf("Merge reference = %s, expected to be empty", res.Reference) } expectedDifferences := Differences{ Difference{Type: DifferenceTypeConflict, Path: "/file2"}, Difference{Type: DifferenceTypeConflict, Path: "/file5"}, } - if !res.Differences.Equal(expectedDifferences) { - t.Errorf("Merge differences = %s, expected %s", spew.Sdump(res.Differences), spew.Sdump(expectedDifferences)) + if res.Summary[DifferenceTypeConflict] != len(expectedDifferences) { + t.Fatalf("Merge summary conflicts=%d, expected %d", res.Summary[DifferenceTypeConflict], len(expectedDifferences)) + } + differences, _, err := c.Diff(ctx, repository, "master", "branch1", -1, "") + testutil.MustDo(t, "diff merge changes", err) + if !differences.Equal(expectedDifferences) { + t.Errorf("Merge differences = %s, expected %s", spew.Sdump(differences), spew.Sdump(expectedDifferences)) } } @@ -145,9 +165,17 @@ func TestCataloger_Merge_FromParentNoChangesInParent(t *testing.T) { if IsValidReference(res.Reference) { t.Errorf("Merge reference = %s, expected valid reference", res.Reference) } - if len(res.Differences) != 0 { - t.Errorf("Merge differences len=%d, expected 0", len(res.Differences)) - } + // TODO(barak): enable test after diff between commits is supported + //commitLog, err := c.GetCommit(ctx, repository, reference) + //testutil.MustDo(t, "get merge commit reference", err) + //if len(commitLog.Parents) != 2 { + // t.Fatal("merge commit log should have two parents") + //} + //differences, _, err := c.Diff(ctx, repository, commitLog.Parents[0], commitLog.Parents[1], -1, "") + //testutil.MustDo(t, "diff merge changes", err) + //if len(differences) != 0 { + // t.Errorf("Merge differences len=%d, expected 0", len(differences)) + //} } func TestCataloger_Merge_FromParentChangesInBoth(t *testing.T) { @@ -197,14 +225,30 @@ func TestCataloger_Merge_FromParentChangesInBoth(t *testing.T) { if !IsValidReference(res.Reference) { t.Errorf("Merge reference = %s, expected a reference commit number", res.Reference) } - expectedDifferences := Differences{ - Difference{Type: DifferenceTypeRemoved, Path: "/file1"}, - Difference{Type: DifferenceTypeChanged, Path: "/file2"}, - Difference{Type: DifferenceTypeAdded, Path: "/file5"}, - } - if !res.Differences.Equal(expectedDifferences) { - t.Errorf("Merge differences = %s, expected %s", spew.Sdump(res.Differences), spew.Sdump(expectedDifferences)) - } + commitLog, err := c.GetCommit(ctx, repository, res.Reference) + testutil.MustDo(t, "get merge commit reference", err) + if len(commitLog.Parents) != 2 { + t.Fatal("merge commit log should have two parents") + } + if diff := deep.Equal(res.Summary, map[DifferenceType]int{ + DifferenceTypeRemoved: 1, + DifferenceTypeChanged: 1, + DifferenceTypeAdded: 1, + }); diff != nil { + t.Fatal("Merge Summary", diff) + } + + // TODO(barak): enable test after diff between commits is supported + //differences, _, err := c.Diff(ctx, repository, commitLog.Parents[0], commitLog.Parents[1], -1, "") + //testutil.MustDo(t, "diff merge changes", err) + //expectedDifferences := Differences{ + // Difference{Type: DifferenceTypeRemoved, Path: "/file1"}, + // Difference{Type: DifferenceTypeChanged, Path: "/file2"}, + // Difference{Type: DifferenceTypeAdded, Path: "/file5"}, + //} + //if !differences.Equal(expectedDifferences) { + // t.Errorf("Merge differences = %s, expected %s", spew.Sdump(differences), spew.Sdump(expectedDifferences)) + //} testVerifyEntries(t, ctx, c, repository, "branch1", []testEntryInfo{ {Path: newFilename}, @@ -263,14 +307,30 @@ func TestCataloger_Merge_FromParentThreeBranches(t *testing.T) { if !IsValidReference(res.Reference) { t.Errorf("Merge reference = %s, expected a valid reference", res.Reference) } - expectedDifferences := Differences{ - Difference{Type: DifferenceTypeRemoved, Path: "/file1"}, - Difference{Type: DifferenceTypeChanged, Path: "/file2"}, - Difference{Type: DifferenceTypeAdded, Path: "/file555"}, - } - if !res.Differences.Equal(expectedDifferences) { - t.Errorf("Merge differences = %s, expected %s", spew.Sdump(res.Differences), spew.Sdump(expectedDifferences)) - } + commitLog, err := c.GetCommit(ctx, repository, res.Reference) + testutil.MustDo(t, "get merge commit reference", err) + if len(commitLog.Parents) != 2 { + t.Fatal("merge commit log should have two parents") + } + if diff := deep.Equal(res.Summary, map[DifferenceType]int{ + DifferenceTypeRemoved: 1, + DifferenceTypeChanged: 1, + DifferenceTypeAdded: 1, + }); diff != nil { + t.Fatal("Merge Summary", diff) + } + + // TODO(barak): enable test after diff between commits is supported + //differences, _, err := c.Diff(ctx, repository, commitLog.Parents[0], commitLog.Parents[1], -1, "") + //testutil.MustDo(t, "diff merge changes", err) + //expectedDifferences := Differences{ + // Difference{Type: DifferenceTypeRemoved, Path: "/file1"}, + // Difference{Type: DifferenceTypeChanged, Path: "/file2"}, + // Difference{Type: DifferenceTypeAdded, Path: "/file555"}, + //} + //if !differences.Equal(expectedDifferences) { + // t.Errorf("Merge differences = %s, expected %s", spew.Sdump(differences), spew.Sdump(expectedDifferences)) + //} testVerifyEntries(t, ctx, c, repository, "branch2", []testEntryInfo{ {Path: newFilename}, @@ -300,7 +360,9 @@ func TestCataloger_Merge_FromChildNoChanges(t *testing.T) { if !errors.Is(err, expectedErr) { t.Fatalf("Merge from branch1 to master err=%s, expected=%s", err, expectedErr) } - if res.Reference != "" { + if res == nil { + t.Fatal("Merge result is nil, expected to have a diff result") + } else if res.Reference != "" { t.Fatalf("Merge reference = %s, expected none", res.Reference) } } @@ -345,21 +407,35 @@ func TestCataloger_Merge_FromChildChangesOnChild(t *testing.T) { if !IsValidReference(res.Reference) { t.Fatalf("Merge reference = %s, expected valid reference", res.Reference) } - testVerifyEntries(t, ctx, c, repository, "master", []testEntryInfo{ {Path: newFilename}, {Path: overFilename, Seed: "seed1"}, {Path: delFilename, Deleted: true}, }) - expectedDifferences := Differences{ - Difference{Type: DifferenceTypeRemoved, Path: "/file1"}, - Difference{Type: DifferenceTypeChanged, Path: "/file2"}, - Difference{Type: DifferenceTypeAdded, Path: "/file5"}, - } - if !res.Differences.Equal(expectedDifferences) { - t.Fatalf("Merge differences = %s, expected %s", spew.Sdump(res.Differences), spew.Sdump(expectedDifferences)) - } + commitLog, err := c.GetCommit(ctx, repository, res.Reference) + testutil.MustDo(t, "get merge commit reference", err) + if len(commitLog.Parents) != 2 { + t.Fatal("merge commit log should have two parents") + } + if diff := deep.Equal(res.Summary, map[DifferenceType]int{ + DifferenceTypeRemoved: 1, + DifferenceTypeChanged: 1, + DifferenceTypeAdded: 1, + }); diff != nil { + t.Fatal("Merge Summary", diff) + } + // TODO(barak): enable test after diff between commits is supported + //differences, _, err := c.Diff(ctx, repository, commitLog.Parents[0], commitLog.Parents[1], -1, "") + //testutil.MustDo(t, "diff merge changes", err) + //expectedDifferences := Differences{ + // Difference{Type: DifferenceTypeRemoved, Path: "/file1"}, + // Difference{Type: DifferenceTypeChanged, Path: "/file2"}, + // Difference{Type: DifferenceTypeAdded, Path: "/file5"}, + //} + //if !differences.Equal(expectedDifferences) { + // t.Fatalf("Merge differences = %s, expected %s", spew.Sdump(differences), spew.Sdump(expectedDifferences)) + //} } func TestCataloger_Merge_FromChildThreeBranches(t *testing.T) { @@ -405,17 +481,32 @@ func TestCataloger_Merge_FromChildThreeBranches(t *testing.T) { if !IsValidReference(res.Reference) { t.Errorf("Merge reference = %s, expected a valid reference", res.Reference) } - expectedDifferences := Differences{ - Difference{Type: DifferenceTypeChanged, Path: "/file2"}, - Difference{Type: DifferenceTypeAdded, Path: "/file555"}, - Difference{Type: DifferenceTypeAdded, Path: "/file6"}, - Difference{Type: DifferenceTypeAdded, Path: "/file7"}, - Difference{Type: DifferenceTypeAdded, Path: "/file8"}, - Difference{Type: DifferenceTypeRemoved, Path: "/file1"}, - } - if !res.Differences.Equal(expectedDifferences) { - t.Errorf("Merge differences = %s, expected %s", spew.Sdump(res.Differences), spew.Sdump(expectedDifferences)) - } + commitLog, err := c.GetCommit(ctx, repository, res.Reference) + testutil.MustDo(t, "get merge commit reference", err) + if len(commitLog.Parents) != 2 { + t.Fatal("merge commit log should have two parents") + } + if diff := deep.Equal(res.Summary, map[DifferenceType]int{ + DifferenceTypeRemoved: 1, + DifferenceTypeChanged: 1, + DifferenceTypeAdded: 4, + }); diff != nil { + t.Fatal("Merge Summary", diff) + } + // TODO(barak): enable test after diff between commits is supported + //differences, _, err := c.Diff(ctx, repository, commitLog.Parents[0], commitLog.Parents[1], -1, "") + //testutil.MustDo(t, "diff merge changes", err) + //expectedDifferences := Differences{ + // Difference{Type: DifferenceTypeChanged, Path: "/file2"}, + // Difference{Type: DifferenceTypeAdded, Path: "/file555"}, + // Difference{Type: DifferenceTypeAdded, Path: "/file6"}, + // Difference{Type: DifferenceTypeAdded, Path: "/file7"}, + // Difference{Type: DifferenceTypeAdded, Path: "/file8"}, + // Difference{Type: DifferenceTypeRemoved, Path: "/file1"}, + //} + //if !differences.Equal(expectedDifferences) { + // t.Errorf("Merge differences = %s, expected %s", spew.Sdump(differences), spew.Sdump(expectedDifferences)) + //} testVerifyEntries(t, ctx, c, repository, "branch1:HEAD", []testEntryInfo{ {Path: "/file1", Deleted: true}, @@ -434,20 +525,30 @@ func TestCataloger_Merge_FromChildThreeBranches(t *testing.T) { if !IsValidReference(res.Reference) { t.Errorf("Merge reference = %s, expected valid reference", res.Reference) } - expectedDifferences = Differences{ - Difference{Type: DifferenceTypeChanged, Path: "/file2"}, - Difference{Type: DifferenceTypeAdded, Path: "/file3"}, - Difference{Type: DifferenceTypeAdded, Path: "/file4"}, - Difference{Type: DifferenceTypeAdded, Path: "/file5"}, - Difference{Type: DifferenceTypeAdded, Path: "/file555"}, - Difference{Type: DifferenceTypeAdded, Path: "/file6"}, - Difference{Type: DifferenceTypeAdded, Path: "/file7"}, - Difference{Type: DifferenceTypeAdded, Path: "/file8"}, - Difference{Type: DifferenceTypeRemoved, Path: "/file1"}, - } - if !res.Differences.Equal(expectedDifferences) { - t.Errorf("Merge differences = %s, expected %s", spew.Sdump(res.Differences), spew.Sdump(expectedDifferences)) - } + if diff := deep.Equal(res.Summary, map[DifferenceType]int{ + DifferenceTypeRemoved: 1, + DifferenceTypeChanged: 1, + DifferenceTypeAdded: 7, + }); diff != nil { + t.Fatal("Merge Summary", diff) + } + // TODO(barak): enable test after diff between commits is supported + //differences, _, err := c.Diff(ctx, repository, "master", "tester", -1, "") + //testutil.MustDo(t, "diff merge changes", err) + //expectedDifferences := Differences{ + // Difference{Type: DifferenceTypeChanged, Path: "/file2"}, + // Difference{Type: DifferenceTypeAdded, Path: "/file3"}, + // Difference{Type: DifferenceTypeAdded, Path: "/file4"}, + // Difference{Type: DifferenceTypeAdded, Path: "/file5"}, + // Difference{Type: DifferenceTypeAdded, Path: "/file555"}, + // Difference{Type: DifferenceTypeAdded, Path: "/file6"}, + // Difference{Type: DifferenceTypeAdded, Path: "/file7"}, + // Difference{Type: DifferenceTypeAdded, Path: "/file8"}, + // Difference{Type: DifferenceTypeRemoved, Path: "/file1"}, + //} + //if !differences.Equal(expectedDifferences) { + // t.Errorf("Merge differences = %s, expected %s", spew.Sdump(differences), spew.Sdump(expectedDifferences)) + //} testVerifyEntries(t, ctx, c, repository, "master", []testEntryInfo{ {Path: "/file1", Deleted: true}, @@ -481,12 +582,25 @@ func TestCataloger_Merge_FromChildNewDelSameEntry(t *testing.T) { t.Fatalf("Merge reference = %s, expected valid reference", res.Reference) } testVerifyEntries(t, ctx, c, repository, "master", []testEntryInfo{{Path: "/file0"}}) - expectedDifferences := Differences{ - Difference{Type: DifferenceTypeAdded, Path: "/file0"}, - } - if !res.Differences.Equal(expectedDifferences) { - t.Fatalf("Merge differences = %s, expected %s", spew.Sdump(res.Differences), spew.Sdump(expectedDifferences)) - } + commitLog, err := c.GetCommit(ctx, repository, res.Reference) + testutil.MustDo(t, "get merge commit reference", err) + if len(commitLog.Parents) != 2 { + t.Fatal("merge commit log should have two parents") + } + if diff := deep.Equal(res.Summary, map[DifferenceType]int{ + DifferenceTypeAdded: 1, + }); diff != nil { + t.Fatal("Merge Summary", diff) + } + // TODO(barak): enable test after diff between commits is supported + //expectedDifferences := Differences{ + // Difference{Type: DifferenceTypeAdded, Path: "/file0"}, + //} + //differences, _, err := c.Diff(ctx, repository, commitLog.Parents[0], commitLog.Parents[1], -1, "") + //testutil.MustDo(t, "diff merge changes", err) + //if !differences.Equal(expectedDifferences) { + // t.Fatalf("Merge differences = %s, expected %s", spew.Sdump(differences), spew.Sdump(expectedDifferences)) + //} // delete file on branch and commit testutil.MustDo(t, "Delete file0 from branch", @@ -503,12 +617,28 @@ func TestCataloger_Merge_FromChildNewDelSameEntry(t *testing.T) { t.Fatalf("Merge reference = %s, expected valid reference", res.Reference) } testVerifyEntries(t, ctx, c, repository, "master", []testEntryInfo{{Path: "/file0", Deleted: true}}) - expectedDifferences = Differences{ - Difference{Type: DifferenceTypeRemoved, Path: "/file0"}, - } - if !res.Differences.Equal(expectedDifferences) { - t.Fatalf("Merge differences = %s, expected %s", spew.Sdump(res.Differences), spew.Sdump(expectedDifferences)) - } + commitLog, err = c.GetCommit(ctx, repository, res.Reference) + testutil.MustDo(t, "get merge commit reference", err) + if len(commitLog.Parents) != 2 { + t.Fatal("merge commit log should have two parents") + } + if diff := deep.Equal(res.Summary, map[DifferenceType]int{ + DifferenceTypeRemoved: 1, + }); diff != nil { + t.Fatal("Merge Summary", diff) + } + // TODO(barak): enable test after diff between commits is supported + //expectedDifferences = Differences{ + // Difference{Type: DifferenceTypeRemoved, Path: "/file0"}, + //} + //differences, _, err := c.Diff(ctx, repository, commitLog.Parents[0], commitLog.Parents[1], -1, "") + //testutil.MustDo(t, "diff merge changes", err) + //if !differences.Equal(expectedDifferences) { + // t.Fatalf("Merge differences = %s, expected %s", spew.Sdump(differences), spew.Sdump(expectedDifferences)) + //} + //if !differences.Equal(expectedDifferences) { + // t.Fatalf("Merge differences = %s, expected %s", spew.Sdump(differences), spew.Sdump(expectedDifferences)) + //} } func TestCataloger_Merge_FromChildNewEntrySameEntry(t *testing.T) { @@ -531,12 +661,26 @@ func TestCataloger_Merge_FromChildNewEntrySameEntry(t *testing.T) { t.Fatalf("Merge reference = %s, expected valid reference", res.Reference) } testVerifyEntries(t, ctx, c, repository, "master", []testEntryInfo{{Path: "/file0"}}) - expectedDifferences := Differences{ - Difference{Type: DifferenceTypeAdded, Path: "/file0"}, - } - if !res.Differences.Equal(expectedDifferences) { - t.Fatalf("Merge differences = %s, expected %s", spew.Sdump(res.Differences), spew.Sdump(expectedDifferences)) - } + + commitLog, err := c.GetCommit(ctx, repository, res.Reference) + testutil.MustDo(t, "get merge commit reference", err) + if len(commitLog.Parents) != 2 { + t.Fatal("merge commit log should have two parents") + } + if diff := deep.Equal(res.Summary, map[DifferenceType]int{ + DifferenceTypeAdded: 1, + }); diff != nil { + t.Fatal("Merge Summary", diff) + } + // TODO(barak): enable test after diff between commits is supported + //expectedDifferences := Differences{ + // Difference{Type: DifferenceTypeAdded, Path: "/file0"}, + //} + //differences, _, err := c.Diff(ctx, repository, commitLog.Parents[0], commitLog.Parents[1], -1, "") + //testutil.MustDo(t, "diff merge changes", err) + //if !differences.Equal(expectedDifferences) { + // t.Fatalf("Merge differences = %s, expected %s", spew.Sdump(differences), spew.Sdump(expectedDifferences)) + //} // create same file and commit to branch testCatalogerCreateEntry(t, ctx, c, repository, "branch1", "/file0", nil, "") @@ -551,10 +695,21 @@ func TestCataloger_Merge_FromChildNewEntrySameEntry(t *testing.T) { if !IsValidReference(res.Reference) { t.Fatalf("Merge reference = %s, expected valid reference", res.Reference) } - expectedDifferences = Differences{} - if !res.Differences.Equal(expectedDifferences) { - t.Fatalf("Merge differences = %s, expected %s", spew.Sdump(res.Differences), spew.Sdump(expectedDifferences)) - } + commitLog, err = c.GetCommit(ctx, repository, res.Reference) + testutil.MustDo(t, "get merge commit reference", err) + if len(commitLog.Parents) != 2 { + t.Fatal("merge commit log should have two parents") + } + if diff := deep.Equal(res.Summary, map[DifferenceType]int{}); diff != nil { + t.Fatal("Merge Summary", diff) + } + // TODO(barak): enable test after diff between commits is supported + //expectedDifferences = Differences{} + //differences, _, err := c.Diff(ctx, repository, commitLog.Parents[0], commitLog.Parents[1], -1, "") + //testutil.MustDo(t, "diff merge changes", err) + //if !differences.Equal(expectedDifferences) { + // t.Fatalf("Merge differences = %s, expected %s", spew.Sdump(differences), spew.Sdump(expectedDifferences)) + //} } func TestCataloger_Merge_FromChildDelModifyGrandparentFiles(t *testing.T) { @@ -592,13 +747,27 @@ func TestCataloger_Merge_FromChildDelModifyGrandparentFiles(t *testing.T) { {Path: "/file0", Deleted: true}, {Path: "/file1", Seed: "seed1"}, }) - expectedDifferences := Differences{ - Difference{Type: DifferenceTypeRemoved, Path: "/file0"}, - Difference{Type: DifferenceTypeChanged, Path: "/file1"}, - } - if !res.Differences.Equal(expectedDifferences) { - t.Fatalf("Merge differences = %s, expected %s", spew.Sdump(res.Differences), spew.Sdump(expectedDifferences)) - } + commitLog, err := c.GetCommit(ctx, repository, res.Reference) + testutil.MustDo(t, "get merge commit reference", err) + if len(commitLog.Parents) != 2 { + t.Fatal("merge commit log should have two parents") + } + if diff := deep.Equal(res.Summary, map[DifferenceType]int{ + DifferenceTypeRemoved: 1, + DifferenceTypeChanged: 1, + }); diff != nil { + t.Fatal("Merge Summary", diff) + } + // TODO(barak): enable test after diff between commits is supported + //expectedDifferences := Differences{ + // Difference{Type: DifferenceTypeRemoved, Path: "/file0"}, + // Difference{Type: DifferenceTypeChanged, Path: "/file1"}, + //} + //differences, _, err := c.Diff(ctx, repository, commitLog.Parents[0], commitLog.Parents[1], -1, "") + //testutil.MustDo(t, "diff merge changes", err) + //if !differences.Equal(expectedDifferences) { + // t.Fatalf("Merge differences = %s, expected %s", spew.Sdump(differences), spew.Sdump(expectedDifferences)) + //} } func TestCataloger_Merge_FromChildConflicts(t *testing.T) { @@ -627,15 +796,30 @@ func TestCataloger_Merge_FromChildConflicts(t *testing.T) { if !errors.Is(err, ErrConflictFound) { t.Fatalf("Merge from branch1 to master err=%s, expected conflict", err) } - if res.Reference != "" { + if res == nil { + t.Fatal("Merge result is nil, expected to have value in case of conflict") + } else if res.Reference != "" { t.Fatalf("Merge reference = %s, expected none", res.Reference) } - expectedDifferences := Differences{ - Difference{Type: DifferenceTypeConflict, Path: "/file0"}, - } - if !res.Differences.Equal(expectedDifferences) { - t.Fatalf("Merge differences = %s, expected %s", spew.Sdump(res.Differences), spew.Sdump(expectedDifferences)) - } + if diff := deep.Equal(res.Summary, map[DifferenceType]int{ + DifferenceTypeConflict: 1, + }); diff != nil { + t.Fatal("Merge Summary", diff) + } + // TODO(barak): enable test after diff between commits is supported + //expectedDifferences := Differences{ + // Difference{Type: DifferenceTypeConflict, Path: "/file0"}, + //} + //commitLog, err := c.GetCommit(ctx, repository, reference) + //testutil.MustDo(t, "get merge commit reference", err) + //if len(commitLog.Parents) != 2 { + // t.Fatal("merge commit log should have two parents") + //} + //differences, _, err := c.Diff(ctx, repository, commitLog.Parents[0], commitLog.Parents[1], -1, "") + //testutil.MustDo(t, "diff merge changes", err) + //if !differences.Equal(expectedDifferences) { + // t.Fatalf("Merge differences = %s, expected %s", spew.Sdump(differences), spew.Sdump(expectedDifferences)) + //} } func TestCataloger_Merge_FromParentThreeBranchesExtended1(t *testing.T) { @@ -685,14 +869,29 @@ func TestCataloger_Merge_FromParentThreeBranchesExtended1(t *testing.T) { if !IsValidReference(res.Reference) { t.Errorf("Merge reference = %s, expected a valid reference", res.Reference) } - expectedDifferences := Differences{ - Difference{Type: DifferenceTypeRemoved, Path: "/file1"}, - Difference{Type: DifferenceTypeChanged, Path: "/file2"}, - Difference{Type: DifferenceTypeAdded, Path: "/file555"}, - } - if !res.Differences.Equal(expectedDifferences) { - t.Errorf("Merge differences = %s, expected %s", spew.Sdump(res.Differences), spew.Sdump(expectedDifferences)) - } + commitLog, err := c.GetCommit(ctx, repository, res.Reference) + testutil.MustDo(t, "get merge commit reference", err) + if len(commitLog.Parents) != 2 { + t.Fatal("merge commit log should have two parents") + } + if diff := deep.Equal(res.Summary, map[DifferenceType]int{ + DifferenceTypeRemoved: 1, + DifferenceTypeChanged: 1, + DifferenceTypeAdded: 1, + }); diff != nil { + t.Fatal("Merge Summary", diff) + } + // TODO(barak): enable test after diff between commits is supported + //expectedDifferences := Differences{ + // Difference{Type: DifferenceTypeRemoved, Path: "/file1"}, + // Difference{Type: DifferenceTypeChanged, Path: "/file2"}, + // Difference{Type: DifferenceTypeAdded, Path: "/file555"}, + //} + //differences, _, err := c.Diff(ctx, repository, commitLog.Parents[0], commitLog.Parents[1], -1, "") + //testutil.MustDo(t, "diff merge changes", err) + //if !differences.Equal(expectedDifferences) { + // t.Errorf("Merge differences = %s, expected %s", spew.Sdump(differences), spew.Sdump(expectedDifferences)) + //} testVerifyEntries(t, ctx, c, repository, "branch2", []testEntryInfo{ {Path: newFilename}, @@ -731,23 +930,47 @@ func TestCataloger_Merge_FromParentThreeBranchesExtended1(t *testing.T) { _, _ = c.Commit(ctx, repository, "master", "commit file0 creation", "tester", nil) res, err = c.Merge(ctx, repository, "master", "branch1", "tester", "", nil) testutil.MustDo(t, "merge master to branch1", err) - if res == nil { - t.Fatal("No merge results") - } - expectedDifferences = Differences{ - Difference{Type: DifferenceTypeAdded, Path: "/file0"}, - } - if !res.Differences.Equal(expectedDifferences) { - t.Errorf("Merge differences = %s, expected %s", spew.Sdump(res.Differences), spew.Sdump(expectedDifferences)) - } + if res.Reference == "" { + t.Fatal("No merge reference") + } + commitLog, err = c.GetCommit(ctx, repository, res.Reference) + testutil.MustDo(t, "get merge commit reference", err) + if len(commitLog.Parents) != 2 { + t.Fatal("merge commit log should have two parents") + } + if diff := deep.Equal(res.Summary, map[DifferenceType]int{ + DifferenceTypeAdded: 1, + }); diff != nil { + t.Fatal("Merge Summary", diff) + } + // TODO(barak): enable test after diff between commits is supported + //expectedDifferences = Differences{ + // Difference{Type: DifferenceTypeAdded, Path: "/file0"}, + //} + //differences, _, err = c.Diff(ctx, repository, commitLog.Parents[0], commitLog.Parents[1], -1, "") + //testutil.MustDo(t, "diff merge changes", err) + //if !differences.Equal(expectedDifferences) { + // t.Errorf("Merge differences = %s, expected %s", spew.Sdump(differences), spew.Sdump(expectedDifferences)) + //} res, err = c.Merge(ctx, repository, "branch1", "branch2", "tester", "", nil) testutil.MustDo(t, "merge branch1 to branch2", err) - if res == nil { + if res.Reference == "" { t.Fatal("No merge results") } - if len(res.Differences) != 0 { - t.Errorf("unexpected Merge differences = %s", spew.Sdump(res.Differences)) + commitLog, err = c.GetCommit(ctx, repository, res.Reference) + testutil.MustDo(t, "get merge commit reference", err) + if len(commitLog.Parents) != 2 { + t.Fatal("merge commit log should have two parents") + } + if diff := deep.Equal(res.Summary, map[DifferenceType]int{}); diff != nil { + t.Fatal("Merge Summary", diff) } + // TODO(barak): enable test after diff between commits is supported + //differences, _, err = c.Diff(ctx, repository, commitLog.Parents[0], commitLog.Parents[1], -1, "") + //testutil.MustDo(t, "diff merge changes", err) + //if len(differences) != 0 { + // t.Errorf("unexpected Merge differences = %s", spew.Sdump(differences)) + //} // deletion in master will force physically delete in grandchild testutil.MustDo(t, "delete committed file on master", @@ -756,22 +979,30 @@ func TestCataloger_Merge_FromParentThreeBranchesExtended1(t *testing.T) { testutil.MustDo(t, "commit file0 delete", err) res, err = c.Merge(ctx, repository, "master", "branch1", "tester", "bubling /file0 deletion up", nil) testutil.MustDo(t, "merge master to branch1", err) - if res == nil { - t.Fatal("No merge results") + if res.Reference == "" { + t.Fatal("No merge reference") } res, err = c.Merge(ctx, repository, "branch1", "branch2", "tester", "forcing file0 on branch2 to delete", nil) testutil.MustDo(t, "merge master to branch1", err) - if res == nil { - t.Fatal("No merge results") + if res.Reference == "" { + t.Fatal("No merge reference") } - expectedDifferences = Differences{ - Difference{Type: DifferenceTypeRemoved, Path: "/file0"}, - } - if !res.Differences.Equal(expectedDifferences) { - t.Errorf("Merge differences = %s, expected %s", spew.Sdump(res.Differences), spew.Sdump(expectedDifferences)) + commitLog, err = c.GetCommit(ctx, repository, res.Reference) + testutil.MustDo(t, "get merge commit reference", err) + if len(commitLog.Parents) != 2 { + t.Fatal("merge commit log should have two parents") } + // TODO(barak): enable test after diff between commits is supported + //expectedDifferences = Differences{ + // Difference{Type: DifferenceTypeRemoved, Path: "/file0"}, + //} + //differences, _, err = c.Diff(ctx, repository, commitLog.Parents[0], commitLog.Parents[1], -1, "") + //testutil.MustDo(t, "diff merge changes", err) + //if !differences.Equal(expectedDifferences) { + // t.Errorf("Merge differences = %s, expected %s", spew.Sdump(differences), spew.Sdump(expectedDifferences)) + //} //identical entries created in child and grandparent do not create conflict - even when grandparent is uncommitted _, err = c.Merge(ctx, repository, "branch2", "branch1", "tester", "empty updates", nil) @@ -802,27 +1033,50 @@ func TestCataloger_Merge_FromParentThreeBranchesExtended1(t *testing.T) { res, err = c.Merge(ctx, repository, "branch1", "branch2", "tester", "delete /file111 up", nil) testutil.MustDo(t, "merge branch1 to branch2", err) - if res == nil { + if res.Reference == "" { t.Fatal("No merge results") } - expectedDifferences = Differences{ - Difference{Type: DifferenceTypeRemoved, Path: "/file111"}, - } - if !res.Differences.Equal(expectedDifferences) { - t.Errorf("Merge differences = %s, expected %s", spew.Sdump(res.Differences), spew.Sdump(expectedDifferences)) - } + commitLog, err = c.GetCommit(ctx, repository, res.Reference) + testutil.MustDo(t, "get merge commit reference", err) + if len(commitLog.Parents) != 2 { + t.Fatal("merge commit log should have two parents") + } + if diff := deep.Equal(res.Summary, map[DifferenceType]int{ + DifferenceTypeRemoved: 1, + }); diff != nil { + t.Fatal("Merge Summary", diff) + } + // TODO(barak): enable test after diff between commits is supported + //expectedDifferences = Differences{ + // Difference{Type: DifferenceTypeRemoved, Path: "/file111"}, + //} + //differences, _, err = c.Diff(ctx, repository, commitLog.Parents[0], commitLog.Parents[1], -1, "") + //testutil.MustDo(t, "diff merge changes", err) + //if !differences.Equal(expectedDifferences) { + // t.Errorf("Merge differences = %s, expected %s", spew.Sdump(differences), spew.Sdump(expectedDifferences)) + //} res, err = c.Merge(ctx, repository, "branch1", "master", "tester", "try delete /file111 . get conflict", nil) if !errors.Is(err, ErrConflictFound) { t.Fatalf("Expected to get conflict error, got err=%+v", err) } if res == nil { - t.Fatal("No merge results") - } - expectedDifferences = Differences{ - Difference{Type: DifferenceTypeConflict, Path: "/file111"}, - } - if !res.Differences.Equal(expectedDifferences) { - t.Errorf("Merge differences = %s, expected %s", spew.Sdump(res.Differences), spew.Sdump(expectedDifferences)) - } + t.Fatal("Expected merge result, got none") + } else if res.Reference != "" { + t.Fatalf("Expected empty reference, got %s", res.Reference) + } + if diff := deep.Equal(res.Summary, map[DifferenceType]int{ + DifferenceTypeConflict: 1, + }); diff != nil { + t.Fatal("Merge Summary", diff) + } + // TODO(barak): enable test after diff between commits is supported + //expectedDifferences = Differences{ + // Difference{Type: DifferenceTypeConflict, Path: "/file111"}, + //} + //differences, _, err = c.Diff(ctx, repository, commitLog.Parents[0], commitLog.Parents[1], -1, "") + //testutil.MustDo(t, "diff merge changes", err) + //if !differences.Equal(expectedDifferences) { + // t.Errorf("Merge differences = %s, expected %s", spew.Sdump(differences), spew.Sdump(expectedDifferences)) + //} } diff --git a/catalog/db.go b/catalog/db.go index d6e06de6853..636c8c20e89 100644 --- a/catalog/db.go +++ b/catalog/db.go @@ -107,7 +107,7 @@ func getBranchesRelationType(tx db.Tx, sourceBranchID, destinationBranchID int64 // paginateSlice take slice address, resize and return 'has more' when needed func paginateSlice(s interface{}, limit int) bool { - if limit <= 0 { + if limit < 0 { return false } v := reflect.ValueOf(s) diff --git a/catalog/db_test.go b/catalog/db_test.go index a83225d8f3a..d7de60dcb47 100644 --- a/catalog/db_test.go +++ b/catalog/db_test.go @@ -60,8 +60,8 @@ func Test_paginateSlice(t *testing.T) { s: []string{"one", "two", "three"}, limit: 0, }, - want: []string{"one", "two", "three"}, - wantHasMore: false, + want: []string{}, + wantHasMore: true, }, } for _, tt := range tests { diff --git a/catalog/model.go b/catalog/model.go index 15550143e4a..7e4dd75f5d9 100644 --- a/catalog/model.go +++ b/catalog/model.go @@ -35,6 +35,11 @@ type CommitLog struct { Parents []string } +type MergeResult struct { + Summary map[DifferenceType]int + Reference string +} + type commitLogRaw struct { BranchName string `db:"branch_name"` CommitID CommitID `db:"commit_id"` diff --git a/cmd/lakectl/cmd/merge.go b/cmd/lakectl/cmd/merge.go index 1cc658195b3..d0335e6df91 100644 --- a/cmd/lakectl/cmd/merge.go +++ b/cmd/lakectl/cmd/merge.go @@ -4,13 +4,10 @@ import ( "context" "errors" "fmt" - "os" "github.com/treeverse/lakefs/catalog" - "github.com/jedib0t/go-pretty/text" "github.com/spf13/cobra" - "github.com/treeverse/lakefs/api/gen/models" "github.com/treeverse/lakefs/uri" ) @@ -43,57 +40,16 @@ var mergeCmd = &cobra.Command{ result, err := client.Merge(context.Background(), leftRefURI.Repository, leftRefURI.Ref, rightRefURI.Ref) if errors.Is(err, catalog.ErrConflictFound) { - _, _ = os.Stdout.WriteString("Conflicts:\n") - for _, line := range result { - if line.Type == models.DiffTypeConflict { - FmtMerge(line) - } - } + _, _ = fmt.Printf("Conflicts: %d\n", result.Summary.Conflict) return } if err != nil { DieErr(err) } - var added, changed, removed int - for _, r := range result { - switch r.Type { - case models.DiffTypeAdded: - added++ - case models.DiffTypeChanged: - changed++ - case models.DiffTypeRemoved: - removed++ - } - } - _, _ = os.Stdout.WriteString(fmt.Sprintf("new: %d modified: %d removed: %d\n", added, changed, removed)) + _, _ = fmt.Printf("new: %d modified: %d removed: %d\n", result.Summary.Added, result.Summary.Changed, result.Summary.Removed) }, } -func FmtMerge(diff *models.MergeResult) { - var color text.Color - var action string - - switch diff.Type { - case models.DiffTypeAdded: - color = text.FgGreen - action = "+ added" - case models.DiffTypeRemoved: - color = text.FgRed - action = "- removed" - case models.DiffTypeChanged: - color = text.FgYellow - action = "~ modified" - case models.DiffTypeConflict: - color = text.FgHiYellow - action = "* conflict" - default: - color = text.FgHiRed - action = ". other" - } - - _, _ = os.Stdout.WriteString(color.Sprintf(" %s %s\n", action, diff.Path)) -} - //nolint:gochecknoinits func init() { rootCmd.AddCommand(mergeCmd) diff --git a/docs/assets/js/swagger.yml b/docs/assets/js/swagger.yml index f3f7eebfee4..6c0066b9909 100644 --- a/docs/assets/js/swagger.yml +++ b/docs/assets/js/swagger.yml @@ -69,14 +69,19 @@ definitions: merge_result: type: object properties: - type: - type: string - enum: [added, removed, changed, conflict] - path: - type: string - path_type: + summary: + type: object + properties: + added: + type: integer + removed: + type: integer + changed: + type: integer + conflict: + type: integer + reference: type: string - enum: [common_prefix, object] repository_creation: type: object @@ -1535,12 +1540,7 @@ paths: 200: description: merge completed schema: - type: object - properties: - results: - type: array - items: - $ref: "#/definitions/merge_result" + $ref: "#/definitions/merge_result" 401: description: Unauthorized schema: @@ -1552,12 +1552,7 @@ paths: 409: description: conflict schema: - type: object - properties: - results: - type: array - items: - $ref: "#/definitions/merge_result" + $ref: "#/definitions/merge_result" default: description: generic error response schema: @@ -1573,6 +1568,12 @@ paths: name: branch required: true type: string + - in: query + name: after + type: string + - in: query + name: amount + type: integer get: tags: - branches @@ -1584,6 +1585,8 @@ paths: schema: type: object properties: + pagination: + $ref: "#/definitions/pagination" results: type: array items: @@ -1618,6 +1621,12 @@ paths: required: true type: string description: a reference (could be either a branch or a commit ID) to compare against + - in: query + name: after + type: string + - in: query + name: amount + type: integer get: tags: - refs @@ -1629,6 +1638,8 @@ paths: schema: type: object properties: + pagination: + $ref: "#/definitions/pagination" results: type: array items: @@ -2008,4 +2019,4 @@ paths: description: check that the API server is up and running responses: 204: - description: NoContent \ No newline at end of file + description: NoContent diff --git a/nessie/sanity_api_test.go b/nessie/sanity_api_test.go index 1690549a474..3efcd752529 100644 --- a/nessie/sanity_api_test.go +++ b/nessie/sanity_api_test.go @@ -121,7 +121,8 @@ func TestSanityAPI(t *testing.T) { diffResp, err = client.Refs.DiffRefs(refs.NewDiffRefsParamsWithContext(ctx). WithRepository(repo). WithLeftRef("branch1"). - WithRightRef(masterBranch), nil) + WithRightRef(masterBranch). + WithAmount(swag.Int64(-1)), nil) require.NoError(t, err, "diff between branch1 and master") require.ElementsMatch(t, diffResp.Payload.Results, []*models.Diff{ {Path: "file0", PathType: "object", Type: "changed"}, @@ -135,11 +136,8 @@ func TestSanityAPI(t *testing.T) { WithSourceRef("branch1"). WithDestinationRef(masterBranch), nil) require.NoError(t, err, "merge branch1 to master") - require.ElementsMatch(t, mergeResp.Payload.Results, []*models.MergeResult{ - {Path: "file0", PathType: "object", Type: "changed"}, - {Path: "file1", PathType: "object", Type: "removed"}, - {Path: "fileX", PathType: "object", Type: "added"}, - }) + require.NotEmpty(t, mergeResp.Payload.Reference, "merge should return a commit reference") + require.Equal(t, mergeResp.Payload.Summary, &models.MergeResultSummary{Added: 1, Changed: 1, Conflict: 0, Removed: 1}, "merge summary") log.Debug("branch1 - diff after merge") diffResp, err = client.Refs.DiffRefs(refs.NewDiffRefsParamsWithContext(ctx). diff --git a/swagger.yml b/swagger.yml index 484c1d40c1d..4517c99bca4 100644 --- a/swagger.yml +++ b/swagger.yml @@ -69,14 +69,19 @@ definitions: merge_result: type: object properties: - type: - type: string - enum: [added, removed, changed, conflict] - path: - type: string - path_type: + summary: + type: object + properties: + added: + type: integer + removed: + type: integer + changed: + type: integer + conflict: + type: integer + reference: type: string - enum: [common_prefix, object] repository_creation: type: object @@ -1536,12 +1541,7 @@ paths: 200: description: merge completed schema: - type: object - properties: - results: - type: array - items: - $ref: "#/definitions/merge_result" + $ref: "#/definitions/merge_result" 401: description: Unauthorized schema: @@ -1553,12 +1553,7 @@ paths: 409: description: conflict schema: - type: object - properties: - results: - type: array - items: - $ref: "#/definitions/merge_result" + $ref: "#/definitions/merge_result" default: description: generic error response schema: @@ -1574,6 +1569,12 @@ paths: name: branch required: true type: string + - in: query + name: after + type: string + - in: query + name: amount + type: integer get: tags: - branches @@ -1585,6 +1586,8 @@ paths: schema: type: object properties: + pagination: + $ref: "#/definitions/pagination" results: type: array items: @@ -1619,6 +1622,12 @@ paths: required: true type: string description: a reference (could be either a branch or a commit ID) to compare against + - in: query + name: after + type: string + - in: query + name: amount + type: integer get: tags: - refs @@ -1630,6 +1639,8 @@ paths: schema: type: object properties: + pagination: + $ref: "#/definitions/pagination" results: type: array items: @@ -2009,4 +2020,4 @@ paths: description: check that the API server is up and running responses: 204: - description: NoContent \ No newline at end of file + description: NoContent diff --git a/webui/src/actions/api.js b/webui/src/actions/api.js index 0aa3366a339..40f207d7787 100644 --- a/webui/src/actions/api.js +++ b/webui/src/actions/api.js @@ -414,7 +414,7 @@ class Branches { } } - async list(repoId, after, amount) { + async list(repoId, after, amount = DEFAULT_LISTING_AMOUNT) { const query = qs({after, amount}); const response = await apiRequest(`/repositories/${repoId}/branches?${query}`); if (response.status !== 200) { @@ -424,7 +424,7 @@ class Branches { } - async filter(repoId, from, amount) { + async filter(repoId, from, amount = DEFAULT_LISTING_AMOUNT) { if (!from) { return this.list(repoId, from, amount); } @@ -456,7 +456,7 @@ class Branches { class Objects { - async list(repoId, ref, tree, after = "", amount = 300, readUncommitted = true) { + async list(repoId, ref, tree, after = "", amount = DEFAULT_LISTING_AMOUNT, readUncommitted = true) { const query = qs({prefix:tree, amount, after, readUncommitted}); const response = await apiRequest(`/repositories/${repoId}/refs/${ref}/objects/ls?${query}`); if (response.status !== 200) { @@ -502,7 +502,7 @@ class Objects { } class Commits { - async log(repoId, branchId, after, amount) { + async log(repoId, branchId, after, amount = DEFAULT_LISTING_AMOUNT) { const query = qs({after, amount}); const response = await apiRequest(`/repositories/${repoId}/branches/${branchId}/commits?${query}`); if (response.status !== 200) { @@ -524,12 +524,13 @@ class Commits { } class Refs { - async diff(repoId, leftRef, rightRef) { + async diff(repoId, leftRef, rightRef, after, amount = DEFAULT_LISTING_AMOUNT) { + const query = qs({after, amount}); let response; if (leftRef === rightRef) { - response = await apiRequest(`/repositories/${repoId}/branches/${leftRef}/diff`); + response = await apiRequest(`/repositories/${repoId}/branches/${leftRef}/diff?${query}`); } else { - response = await apiRequest(`/repositories/${repoId}/refs/${leftRef}/diff/${rightRef}`); + response = await apiRequest(`/repositories/${repoId}/refs/${leftRef}/diff/${rightRef}?${query}`); } if (response.status !== 200) { throw new Error(await extractError(response)); diff --git a/webui/src/actions/refs.js b/webui/src/actions/refs.js index b4955891e94..7b69a8b9ec5 100644 --- a/webui/src/actions/refs.js +++ b/webui/src/actions/refs.js @@ -1,13 +1,20 @@ import * as api from "./api"; import {AsyncActionType} from "./request"; -export const - DIFF_REFS = new AsyncActionType('DIFF_REFS'), - MERGE_REFS = new AsyncActionType('MERGE_REFS'); +export const PAGINATION_AMOUNT = 300; +export const DIFF_REFS = new AsyncActionType('DIFF_REFS'); +export const DIFF_REFS_PAGINATE = new AsyncActionType('DIFF_REFS_PAGINATE'); +export const MERGE_REFS = new AsyncActionType('MERGE_REFS'); -export const diff = (repoId, leftRef, rightRef) => { +export const diff = (repoId, leftRef, rightRef, amount = PAGINATION_AMOUNT) => { return DIFF_REFS.execute(async () => { - return await api.refs.diff(repoId, leftRef, rightRef); + return await api.refs.diff(repoId, leftRef, rightRef, '', amount); + }); +}; + +export const diffPaginate = (repoId, leftRef, rightRef, after, amount = PAGINATION_AMOUNT) => { + return DIFF_REFS_PAGINATE.execute(async () => { + return await api.refs.diff(repoId, leftRef, rightRef, after, amount); }); }; diff --git a/webui/src/components/Changes.js b/webui/src/components/Changes.js new file mode 100644 index 00000000000..6024fe48380 --- /dev/null +++ b/webui/src/components/Changes.js @@ -0,0 +1,164 @@ +import React from "react"; +import Alert from "react-bootstrap/Alert"; +import Table from "react-bootstrap/Table"; +import { + HistoryIcon, + PencilIcon, + PlusIcon, + CircleSlashIcon, + TrashcanIcon +} from "@primer/octicons-react"; +import Button from "react-bootstrap/Button"; +import OverlayTrigger from "react-bootstrap/OverlayTrigger"; +import Tooltip from "react-bootstrap/Tooltip"; +import {connect} from "react-redux"; +import {listBranches} from "../actions/branches"; +import Card from "react-bootstrap/Card"; +import {resetRevertBranch, revertBranch} from "../actions/branches"; + +const ChangeRowActions = connect( + ({ branches }) => ({ revert: branches.revert }), + ({ revertBranch, resetRevertBranch }) +)(({repo, refId, entry, revertBranch, revert}) => { + return ( + revert change)}> + + + ); +}); + +const ChangeEntryRow = ({repo, refId, entry, showActions}) => { + let rowClass = 'tree-row '; + switch (entry.type) { + case 'changed': + rowClass += 'diff-changed'; + break; + case 'added': + rowClass += 'diff-added'; + break; + case 'removed': + rowClass += 'diff-removed'; + break; + case 'conflict': + rowClass += 'diff-conflict'; + break; + default: + break; + } + + const pathText = entry.path; + + let diffIndicator; + switch (entry.type) { + case 'removed': + diffIndicator = ( + removed)}> + + + + + ); + break; + case 'added': + diffIndicator = ( + added)}> + + + + + ); + break; + case 'changed': + diffIndicator = ( + changed)}> + + + + + ); + break; + case 'conflict': + diffIndicator = ( + conflict)}> + + + + + ); + break; + default: + break; + } + + let entryActions; + if (showActions && entry.path_type === 'object') { + entryActions = ; + } + + return ( + <> + + + {diffIndicator} + + + {pathText} + + + {entryActions} + + + + ); +}; + +const Changes = ({list, repo, refId, showActions}) => { + const results = list.payload ? list.payload.results : []; + let body; + if (list.loading) { + body = (Loading...); + } else if (list.error) { + body = {list.error} + } else if (results.length === 0) { + body = (No changes); + } else { + body = ( + + + {results.map(entry => ( + + ))} + +
+ ); + } + return ( +
+ + {body} + +
+ ); +}; + +export default connect( + ({branches}) => ({ + listBranchesState: branches.list, + }), + ({listBranches}) +)(Changes); diff --git a/webui/src/components/ChangesPage.js b/webui/src/components/ChangesPage.js new file mode 100644 index 00000000000..d039bd4ff4c --- /dev/null +++ b/webui/src/components/ChangesPage.js @@ -0,0 +1,242 @@ +import React, {useCallback, useEffect, useRef, useState} from "react"; +import {useLocation, useHistory} from "react-router-dom"; +import {HistoryIcon, GitCommitIcon, PlusIcon, SyncIcon, XIcon} from "@primer/octicons-react"; +import RefDropdown from "./RefDropdown"; +import Changes from "./Changes"; +import {connect} from "react-redux"; +import {diff, diffPaginate, PAGINATION_AMOUNT} from "../actions/refs"; +import {Button, ButtonToolbar, Col, Form, Modal, OverlayTrigger, Row, Tooltip} from "react-bootstrap"; +import {resetRevertBranch, revertBranch} from "../actions/branches"; +import ConfirmationModal from "./ConfirmationModal"; +import {doCommit, resetCommit} from "../actions/commits"; +import Alert from "react-bootstrap/Alert"; + +const CommitButton = connect( + ({ commits }) => ({ commitState: commits.commit }), + ({ doCommit, resetCommit }) +)(({ repo, refId, commitState, doCommit, resetCommit, disabled }) => { + + const textRef = useRef(null); + + const [show, setShow] = useState(false); + const [metadataFields, setMetadataFields] = useState([]); + + const commitDisabled = commitState.inProgress; + + const onHide = () => { + if (commitDisabled) return; + setShow(false); + setMetadataFields([]); + }; + + useEffect(() => { + if (commitState.done) { + setShow(false); + setMetadataFields([]); + resetCommit(); + } + }, [resetCommit, commitState.done]); + + const onSubmit = () => { + if (commitDisabled) return; + const message = textRef.current.value; + const metadata = {}; + metadataFields.forEach(pair => { + if (pair.key.length > 0) + metadata[pair.key] = pair.value; + }); + doCommit(repo.id, refId.id, message, metadata); + }; + + if (!refId || refId.type !== 'branch') { + return ; + } + + return ( + <> + + + Commit Changes + + +
{ + onSubmit(); + e.preventDefault(); + }}> + + + + + {metadataFields.map((f, i) => { + return ( + + + + { + metadataFields[i].key = e.currentTarget.value; + setMetadataFields(metadataFields); + }}/> + + + { + metadataFields[i].value = e.currentTarget.value; + setMetadataFields(metadataFields); + }}/> + + + + + + + + + ) + })} + + +
+ {(!!commitState.error) ? ({commitState.error}) : ()} +
+ + + + +
+ + + ); +}); + +const RevertButton = connect( + ({ branches }) => ({ status: branches.revert }), + ({ revertBranch, resetRevertBranch }) +)(({ repo, refId, status, revertBranch, resetRevertBranch, disabled }) => { + if (!refId || refId.type !== 'branch') { + return null; + } + const [show, setShow] = useState(false); + const submitDisabled = status.inProgress; + + const onHide = () => { + if (submitDisabled) return; + setShow(false); + }; + + useEffect(() => { + if (status.error) { + window.alert(status.error); + resetRevertBranch(); + } else if (status.done) { + setShow(false); + resetRevertBranch(); + } + }, [status, resetRevertBranch]); + + const onSubmit = () => { + if (submitDisabled) return; + revertBranch(repo.id, refId.id, {type: "reset"}); + setShow(false); + }; + + return ( + <> + + + + ); +}); + +const ChangesPage = ({repo, refId, path, diff, diffPaginate, diffResults, commitState, revertState}) => { + const history = useHistory(); + const location = useLocation(); + + const refreshData = useCallback(() => { + diff(repo.id, refId.id, refId.id); + }, [repo.id, refId.id, diff]); + + useEffect(() => { + refreshData(); + },[repo.id, refId.id, refreshData, commitState.done, revertState.done]); + + const paginator =(!diffResults.loading && !!diffResults.payload && diffResults.payload.pagination && diffResults.payload.pagination.has_more); + const hasNoChanges = !diffResults.payload || !diffResults.payload.results || diffResults.payload.results.length === 0; + return( + <> +
+
+ + { + const params = new URLSearchParams(location.search); + if (ref.type === 'branch') { + params.set('branch', ref.id); + params.delete('commit'); // if we explicitly selected a branch, remove an existing commit if any + } else { + params.set('commit', ref.id); + params.delete('branch'); // if we explicitly selected a commit, remove an existing branch if any + } + history.push({...location, search: params.toString()}) + }}/> + + + + Refresh}> + + + + + + + +
+ + <> + + + {paginator && +

+ +

+ } + +
+ + + ); +}; + + +export default connect( + ({refs, commits, branches}) => ({ + diffResults: refs.diff, + commitState: commits.commit, + revertState: branches.revert, + }), + ({diff, diffPaginate}) +)(ChangesPage); diff --git a/webui/src/components/ComparePage.js b/webui/src/components/ComparePage.js index 6f6ad10b715..ee0666866d7 100644 --- a/webui/src/components/ComparePage.js +++ b/webui/src/components/ComparePage.js @@ -3,21 +3,19 @@ import {useHistory, useLocation} from "react-router-dom"; import {connect} from "react-redux"; import {Alert, ButtonToolbar, Button, OverlayTrigger, Tooltip} from "react-bootstrap"; import {SyncIcon, GitMergeIcon} from "@primer/octicons-react"; -import {PAGINATION_AMOUNT, listTree, listTreePaginate} from "../actions/objects"; -import {diff, resetDiff, merge, resetMerge} from "../actions/refs"; +import {diff, diffPaginate, merge, resetMerge} from "../actions/refs"; import RefDropdown from "./RefDropdown"; -import Tree from "./Tree"; +import Changes from "./Changes"; import ConfirmationModal from "./ConfirmationModal"; - -const readUncommitted = false; +import {PAGINATION_AMOUNT} from "../actions/refs"; const MergeButton = connect( ({ refs }) => ({ mergeState: refs.merge, diffResults: refs.diff, }), - ({ merge, resetMerge, resetDiff }) -)(({ repo, refId, compare, merge, mergeState, resetMerge, resetDiff, diffResults }) => { + ({ merge, resetMerge }) +)(({ repo, refId, compare, merge, mergeState, resetMerge, diffResults }) => { if (!refId || refId.type !== 'branch' || !compare || compare.type !== 'branch') { return null; } @@ -53,10 +51,11 @@ const MergeButton = connect( if (mergeState.error) { window.alert(mergeState.error); resetMerge(); - } else if (mergeState.payload && mergeState.payload.results.length > 0) { - resetDiff(); + // TODO(barak): test if we need to reset and refresh diff after merge?! + // } else if (mergeState.payload && mergeState.payload.results.length > 0) { + // resetDiff(); } - }, [resetMerge, mergeState, resetDiff]); + }, [resetMerge, mergeState]); const onSubmit = () => { if (disabled) return; @@ -142,34 +141,28 @@ const CompareToolbar = ({repo, refId, compare, refresh}) => { ); }; -const ComparePage = ({repo, refId, compareRef, path, list, listTree, listTreePaginate, diff, resetDiff, diffResults, resetMerge, mergeResults }) => { - const history = useHistory(); - const location = useLocation(); - +const ComparePage = ({repo, refId, compareRef, diff, diffPaginate, diffResults, resetMerge, mergeResults }) => { const refreshData = useCallback(() => { - listTree(repo.id, refId.id, path, PAGINATION_AMOUNT, readUncommitted); if (compareRef) { diff(repo.id, refId.id, compareRef.id); - } else { - resetDiff(); } - }, [repo.id, refId.id, path, listTree, diff, resetDiff, compareRef]); + }, [repo.id, refId.id, diff, compareRef]); useEffect(() => { refreshData(); - }, [refreshData, repo.id, refId.id, path, listTree, diff, resetDiff, compareRef]); + }, [refreshData, repo.id, refId.id, diff, diffPaginate, compareRef]); - const paginator =(!list.loading && !!list.payload && list.payload.pagination && list.payload.pagination.has_more); + const paginator =(!diffResults.loading && !!diffResults.payload && diffResults.payload.pagination && diffResults.payload.pagination.has_more); const showMergeCompleted = !!(mergeResults && mergeResults.payload); - const compareWithSelf = (compareRef && refId.type === compareRef.type && refId.id === compareRef.id); - const alertText = list.error || diffResults.error || ''; + const compareWith = !compareRef || (compareRef && refId.type === compareRef.type && refId.id === compareRef.id); + const alertText = diffResults.error || ''; return (
- + There isn’t anything to compare. You’ll need to use two different sources to get a valid comparison. @@ -182,25 +175,19 @@ const ComparePage = ({repo, refId, compareRef, path, list, listTree, listTreePag {alertText} - {!(compareWithSelf || alertText) && + {!(compareWith || alertText) && <> - { - const params = new URLSearchParams(location.search); - params.set('path', path); - history.push({...location, search: params.toString()}); - }} - diffResults={diffResults} - list={list} - path={path}/> + list={diffResults} + /> {paginator &&

@@ -213,10 +200,9 @@ const ComparePage = ({repo, refId, compareRef, path, list, listTree, listTreePag }; export default connect( - ({ objects, refs }) => ({ - list: objects.list, + ({ refs }) => ({ diffResults: refs.diff, mergeResults: refs.merge, }), - ({ listTree, listTreePaginate, diff, resetDiff, resetMerge }) + ({ diff, diffPaginate, resetMerge }) )(ComparePage); diff --git a/webui/src/components/RepositoryExplorerPage.js b/webui/src/components/RepositoryExplorerPage.js index aa1a9b18f10..46d6f993a1c 100644 --- a/webui/src/components/RepositoryExplorerPage.js +++ b/webui/src/components/RepositoryExplorerPage.js @@ -11,9 +11,10 @@ import { } from "react-router-dom"; import Breadcrumb from "react-bootstrap/Breadcrumb"; -import {GitCommitIcon, DatabaseIcon, GitBranchIcon, GitCompareIcon, SettingsIcon} from "@primer/octicons-react"; +import {FileDiffIcon, GitCommitIcon, DatabaseIcon, GitBranchIcon, GitCompareIcon, SettingsIcon} from "@primer/octicons-react"; import TreePage from './TreePage'; +import ChangesPage from './ChangesPage'; import CommitsPage from './CommitsPage'; import {connect} from "react-redux"; import {getRepository} from "../actions/repositories"; @@ -63,6 +64,9 @@ const RepositoryTabs = () => { Objects + + Changes + Commits @@ -185,6 +189,9 @@ const RepositoryExplorerPage = ({ repo, getRepository }) => { + + + diff --git a/webui/src/components/TreePage.js b/webui/src/components/TreePage.js index 8d21fad3c1e..2b28857dc91 100644 --- a/webui/src/components/TreePage.js +++ b/webui/src/components/TreePage.js @@ -1,57 +1,13 @@ import React, {useCallback, useEffect, useRef, useState} from "react"; import {useHistory, useLocation} from "react-router-dom"; import {connect} from "react-redux"; -import {Button, ButtonToolbar, Col, Form, Modal, OverlayTrigger, Row, Tooltip} from "react-bootstrap"; -import {GitCommitIcon, PlusIcon, SyncIcon, XIcon} from "@primer/octicons-react"; +import {Button, ButtonToolbar, Form, Modal, OverlayTrigger, Tooltip} from "react-bootstrap"; +import {GitCommitIcon, SyncIcon} from "@primer/octicons-react"; import {deleteObject, listTree, listTreePaginate, upload, uploadDone} from "../actions/objects"; -import {diff, resetDiff} from "../actions/refs"; import RefDropdown from "./RefDropdown"; import Tree from "./Tree"; -import {doCommit, resetCommit} from "../actions/commits"; -import {listBranches, resetRevertBranch, revertBranch} from "../actions/branches"; +import {listBranches} from "../actions/branches"; import Alert from "react-bootstrap/Alert"; -import ConfirmationModal from "./ConfirmationModal"; - -const RevertButton = connect( - ({ branches }) => ({ status: branches.revert }), - ({ revertBranch, resetRevertBranch }) -)(({ repo, refId, changes, status, revertBranch, resetRevertBranch }) => { - if (!refId || refId.type !== 'branch') { - return null; - } - const [show, setShow] = useState(false); - const disabled = status.inProgress || changes === 0; - - const onHide = () => { - if (disabled) return; - setShow(false); - }; - - useEffect(() => { - if (status.error) { - window.alert(status.error); - resetRevertBranch(); - } else if (status.done) { - setShow(false); - resetRevertBranch(); - } - }, [status, resetRevertBranch]); - - const onSubmit = () => { - if (disabled) return; - revertBranch(repo.id, refId.id, {type: "reset"}); - setShow(false); - }; - - return ( - <> - - - - ); -}); const UploadButton = connect( ({ objects }) => ({ uploadState: objects.upload }), @@ -123,144 +79,19 @@ const UploadButton = connect( ); }); - -const CommitButton = connect( - ({ commits }) => ({ commitState: commits.commit }), - ({ doCommit, resetCommit }) -)(({ repo, refId, commitState, doCommit, resetCommit, changes }) => { - - const textRef = useRef(null); - - const [show, setShow] = useState(false); - const [metadataFields, setMetadataFields] = useState([]); - - const disabled = commitState.inProgress; - - const onHide = () => { - if (disabled) return; - setShow(false); - setMetadataFields([]); - }; - - useEffect(() => { - if (commitState.done) { - setShow(false); - setMetadataFields([]); - resetCommit(); - } - }, [resetCommit, commitState.done]); - - const onSubmit = () => { - if (disabled) return; - const message = textRef.current.value; - const metadata = {}; - metadataFields.forEach(pair => { - if (pair.key.length > 0) - metadata[pair.key] = pair.value; - }); - doCommit(repo.id, refId.id, message, metadata); - }; - - if (!refId || refId.type !== 'branch') { - return ; - } - - let commitDisabled = true; - let commitVariant = 'secondary'; - if (changes > 0) { - commitDisabled = false; - commitVariant = 'success'; - } - - return ( - <> - - - Commit Changes - - -

{ - onSubmit(); - e.preventDefault(); - }}> - - - - - {metadataFields.map((f, i) => { - return ( - - - - { - metadataFields[i].key = e.currentTarget.value; - setMetadataFields(metadataFields); - }}/> - - - { - metadataFields[i].value = e.currentTarget.value; - setMetadataFields(metadataFields); - }}/> - - - - - - - - - ) - })} - - -
- {(!!commitState.error) ? ({commitState.error}) : ()} - - - - - - - - - ); -}); - -const TreePage = ({repo, refId, path, list, listTree, listTreePaginate, diff, resetDiff, diffResults, uploadState, deleteObject, deleteState, commitState, revertState, importState, setShowImportModal}) => { +const TreePage = ({repo, refId, path, list, listTree, listTreePaginate, uploadState, deleteObject, deleteState, importState, setShowImportModal}) => { const history = useHistory(); const location = useLocation(); const[showUploadModal, setShowUploadModal] = useState(false) const refreshData = useCallback(() => { listTree(repo.id, refId.id, path); - if (refId.type === 'branch') { - diff(repo.id, refId.id, refId.id); - } else { - resetDiff(); - } - }, [repo.id, refId, path, listTree, diff, resetDiff]); + }, [repo.id, refId, path, listTree]); useEffect(() => { refreshData(); - }, [refreshData, repo.id, refId, path, listTree, diff, resetDiff, uploadState.done, commitState.done, deleteState.done, revertState.done, importState.done]); + }, [refreshData, repo.id, refId, path, listTree, uploadState.done, deleteState.done, importState.done]); const paginator = (!list.loading && !!list.payload && list.payload.pagination && list.payload.pagination.has_more); - const changes = diffResults.payload ? diffResults.payload.results.length : 0; return (
@@ -290,9 +121,7 @@ const TreePage = ({repo, refId, path, list, listTree, listTreePaginate, diff, re - -
@@ -308,7 +137,6 @@ const TreePage = ({repo, refId, path, list, listTree, listTreePaginate, diff, re onDelete={(entry) => { deleteObject(repo.id, refId.id, entry.path); }} - diffResults={diffResults} list={list} path={path} setShowUploadModal={setShowUploadModal} @@ -327,15 +155,12 @@ const TreePage = ({repo, refId, path, list, listTree, listTreePaginate, diff, re }; export default connect( - ({objects, refs, commits, branches}) => ({ + ({objects}) => ({ list: objects.list, - diffResults: refs.diff, uploadState: objects.upload, deleteState: objects.delete, - commitState: commits.commit, - revertState: branches.revert, importState: objects.import, importDryRunState: objects.importDryRun, }), - ({listTree, listTreePaginate, diff, resetDiff, deleteObject, listBranches}) + ({listTree, listTreePaginate, deleteObject, listBranches}) )(TreePage); diff --git a/webui/src/index.css b/webui/src/index.css index 7cbcd398806..c6238f4a104 100644 --- a/webui/src/index.css +++ b/webui/src/index.css @@ -184,9 +184,8 @@ color: #808080; } - .diff-changed { - background-color: #ffeeba; + background-color: #d1ecf1; } .diff-added { @@ -197,6 +196,10 @@ background-color: #ffdce0; } +.diff-conflict { + background-color: #fff3cd; +} + .tree-row-actions { width: 30px; padding:0; @@ -294,4 +297,4 @@ td .form-group { .btn { text-transform: none; -} \ No newline at end of file +} diff --git a/webui/src/store/refs.js b/webui/src/store/refs.js index 7343bf37fa2..a3d60070c68 100644 --- a/webui/src/store/refs.js +++ b/webui/src/store/refs.js @@ -1,5 +1,5 @@ import * as async from "./async"; -import {MERGE_REFS, DIFF_REFS} from "../actions/refs"; +import {MERGE_REFS, DIFF_REFS, DIFF_REFS_PAGINATE} from "../actions/refs"; const initialState = { diff: async.initialState, @@ -13,6 +13,8 @@ export default (state = initialState, action) => { merge: async.reduce(MERGE_REFS, state.merge, action), }; + state.diff = async.reducePaginate(DIFF_REFS_PAGINATE, state.diff, action); + switch (action.type) { default: return state; From 1aff5ce7a2911d2948fdeb54821040e7e177c473 Mon Sep 17 00:00:00 2001 From: arielshaqed Date: Mon, 14 Sep 2020 17:41:24 +0300 Subject: [PATCH 25/39] "make build" so the exact copy of swagger.yml is copied (#611) --- docs/assets/js/swagger.yml | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/docs/assets/js/swagger.yml b/docs/assets/js/swagger.yml index 6c0066b9909..4517c99bca4 100644 --- a/docs/assets/js/swagger.yml +++ b/docs/assets/js/swagger.yml @@ -230,10 +230,11 @@ definitions: setup: type: object properties: - display_name: + username: + description: an identifier for the user (e.g. jane.doe) type: string required: - - display_name + - username credentials: type: object From aedb34235c7001f9d106325373d0a55b41f797db Mon Sep 17 00:00:00 2001 From: "Ariel Shaqed (Scolnicov)" Date: Mon, 14 Sep 2020 13:20:25 +0300 Subject: [PATCH 26/39] Create repository using time on DB Using system time [causes weirdness](https://youtu.be/-w0WPkB3XJ4) when there are multiple lakefs daemons. --- catalog/cataloger_create_repository.go | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/catalog/cataloger_create_repository.go b/catalog/cataloger_create_repository.go index 91be91133f8..19ebeb4e08c 100644 --- a/catalog/cataloger_create_repository.go +++ b/catalog/cataloger_create_repository.go @@ -37,9 +37,8 @@ func (c *cataloger) CreateRepository(ctx context.Context, repository string, sto } // create repository with ref to branch - creationDate := c.clock.Now() if _, err := tx.Exec(`INSERT INTO catalog_repositories (id,name,storage_namespace,creation_date,default_branch) - VALUES ($1,$2,$3,$4,$5)`, repoID, repository, storageNamespace, creationDate, branchID); err != nil { + VALUES ($1,$2,$3,transaction_timestamp(),$4)`, repoID, repository, storageNamespace, branchID); err != nil { return nil, fmt.Errorf("insert repository: %w", err) } @@ -51,8 +50,8 @@ func (c *cataloger) CreateRepository(ctx context.Context, repository string, sto // create initial commit _, err := tx.Exec(`INSERT INTO catalog_commits (branch_id,commit_id,committer,message,creation_date,previous_commit_id) - VALUES ($1,nextval('catalog_commit_id_seq'),$2,$3,$4,0)`, - branchID, CatalogerCommitter, createRepositoryCommitMessage, creationDate) + VALUES ($1,nextval('catalog_commit_id_seq'),$2,$3,transaction_timestamp(),0)`, + branchID, CatalogerCommitter, createRepositoryCommitMessage) if err != nil { return nil, fmt.Errorf("insert commit: %w", err) } From 84212bee3651530c6216546afff0cd88649fe80e Mon Sep 17 00:00:00 2001 From: "Ariel Shaqed (Scolnicov)" Date: Mon, 14 Sep 2020 14:54:39 +0300 Subject: [PATCH 27/39] Create branches, commits, merges using time on DB --- catalog/cataloger.go | 9 --------- catalog/cataloger_commit.go | 13 ++++++++----- catalog/cataloger_commit_test.go | 11 ++++++----- catalog/cataloger_create_branch.go | 25 ++++++++++++------------- catalog/cataloger_get_commit_test.go | 16 ++++++---------- catalog/cataloger_merge.go | 8 ++++---- 6 files changed, 36 insertions(+), 46 deletions(-) diff --git a/catalog/cataloger.go b/catalog/cataloger.go index e7325b848f2..1da7a8e81de 100644 --- a/catalog/cataloger.go +++ b/catalog/cataloger.go @@ -7,7 +7,6 @@ import ( "sync" "time" - "github.com/benbjohnson/clock" "github.com/treeverse/lakefs/catalog/params" "github.com/treeverse/lakefs/db" "github.com/treeverse/lakefs/logging" @@ -181,7 +180,6 @@ type CacheConfig struct { // cataloger main catalog implementation based on mvcc type cataloger struct { params.Catalog - clock clock.Clock log logging.Logger db db.Database wg sync.WaitGroup @@ -194,12 +192,6 @@ type cataloger struct { type CatalogerOption func(*cataloger) -func WithClock(newClock clock.Clock) CatalogerOption { - return func(c *cataloger) { - c.clock = newClock - } -} - func WithCacheEnabled(b bool) CatalogerOption { return func(c *cataloger) { c.Cache.Enabled = b @@ -247,7 +239,6 @@ func WithParams(p params.Catalog) CatalogerOption { func NewCataloger(db db.Database, options ...CatalogerOption) Cataloger { c := &cataloger{ - clock: clock.New(), log: logging.Default().WithField("service_name", "cataloger"), db: db, dedupCh: make(chan *dedupRequest, dedupChannelSize), diff --git a/catalog/cataloger_commit.go b/catalog/cataloger_commit.go index abf08c85141..b26a3d238b6 100644 --- a/catalog/cataloger_commit.go +++ b/catalog/cataloger_commit.go @@ -3,6 +3,7 @@ package catalog import ( "context" "fmt" + "time" "github.com/jmoiron/sqlx" "github.com/treeverse/lakefs/db" @@ -57,11 +58,13 @@ func (c *cataloger) Commit(ctx context.Context, repository, branch string, messa } // insert commit record - creationDate := c.clock.Now() - _, err = tx.Exec(`INSERT INTO catalog_commits (branch_id,commit_id,committer,message,creation_date,metadata,merge_type,previous_commit_id) - VALUES ($1,$2,$3,$4,$5,$6,$7,$8)`, - branchID, commitID, committer, message, creationDate, metadata, RelationTypeNone, lastCommitID) - if err != nil { + var creationDate time.Time + if err = tx.Get(&creationDate, + `INSERT INTO catalog_commits (branch_id,commit_id,committer,message,creation_date,metadata,merge_type,previous_commit_id) + VALUES ($1,$2,$3,$4,transaction_timestamp(),$5,$6,$7) + RETURNING creation_date`, + branchID, commitID, committer, message, metadata, RelationTypeNone, lastCommitID, + ); err != nil { return nil, err } reference := MakeReference(branch, commitID) diff --git a/catalog/cataloger_commit_test.go b/catalog/cataloger_commit_test.go index ac16a095ce9..e0a7d38fecd 100644 --- a/catalog/cataloger_commit_test.go +++ b/catalog/cataloger_commit_test.go @@ -12,16 +12,13 @@ import ( "github.com/davecgh/go-spew/spew" - "github.com/benbjohnson/clock" "github.com/treeverse/lakefs/testutil" ) func TestCataloger_Commit(t *testing.T) { ctx := context.Background() - now := time.Now().Round(time.Minute) - fakeClock := clock.NewMock() - fakeClock.Set(now) - c := testCataloger(t, WithClock(fakeClock)) + now := time.Now().Truncate(time.Minute) + c := testCataloger(t) defer func() { _ = c.Close() }() repository := testCatalogerRepo(t, ctx, c, "repository", "master") meta := Metadata{"key1": "val1", "key2": "val2"} @@ -34,6 +31,7 @@ func TestCataloger_Commit(t *testing.T) { PhysicalAddress: fileAddr, Size: int64(i) + 1, Metadata: meta, + CreationDate: now, }, CreateEntryParams{}); err != nil { t.Fatal("create entry for testing", fileName, err) } @@ -97,6 +95,9 @@ func TestCataloger_Commit(t *testing.T) { t.Errorf("Commit() error = %v, wantErr %v", err, tt.wantErr) return } + if got != nil { + got.CreationDate = got.CreationDate.Truncate(time.Minute) + } if !reflect.DeepEqual(got, tt.want) { t.Errorf("Commit() got = %s, want = %s", spew.Sdump(got), spew.Sdump(tt.want)) } diff --git a/catalog/cataloger_create_branch.go b/catalog/cataloger_create_branch.go index 9aeae7a087e..bf3c238b79a 100644 --- a/catalog/cataloger_create_branch.go +++ b/catalog/cataloger_create_branch.go @@ -3,6 +3,7 @@ package catalog import ( "context" "fmt" + "time" "github.com/treeverse/lakefs/db" ) @@ -51,23 +52,21 @@ func (c *cataloger) CreateBranch(ctx context.Context, repository, branch string, return nil, fmt.Errorf("insert branch: %w", err) } - // create initial commit - creationDate := c.clock.Now() - insertReturns := struct { - CommitID CommitID `db:"commit_id"` - MergeSourceCommit CommitID `db:"merge_source_commit"` + CommitID CommitID `db:"commit_id"` + MergeSourceCommit CommitID `db:"merge_source_commit"` + TransactionTimestamp time.Time `db:"transaction_timestamp"` }{} commitMsg := fmt.Sprintf(createBranchCommitMessageFormat, branch, sourceBranch) err = tx.Get(&insertReturns, `INSERT INTO catalog_commits (branch_id,commit_id,previous_commit_id,committer,message, creation_date,merge_source_branch,merge_type,lineage_commits,merge_source_commit) - VALUES ($1,nextval('catalog_commit_id_seq'),0,$2,$3,$4,$5,'from_parent', - (select (select max(commit_id) from catalog_commits where branch_id=$5)|| - (select distinct on (branch_id) lineage_commits from catalog_commits - where branch_id=$5 and merge_type='from_parent' order by branch_id,commit_id desc)) - ,(select max(commit_id) from catalog_commits where branch_id=$5 )) - RETURNING commit_id,merge_source_commit`, - branchID, CatalogerCommitter, commitMsg, creationDate, sourceBranchID) + VALUES ($1,nextval('catalog_commit_id_seq'),0,$2,$3,transaction_timestamp(),$4,'from_parent', + (select (select max(commit_id) from catalog_commits where branch_id=$4) || + (select distinct on (branch_id) lineage_commits from catalog_commits + where branch_id=$4 and merge_type='from_parent' order by branch_id,commit_id desc)) + ,(select max(commit_id) from catalog_commits where branch_id=$4 )) + RETURNING commit_id,merge_source_commit,transaction_timestamp()`, + branchID, CatalogerCommitter, commitMsg, sourceBranchID) if err != nil { return nil, fmt.Errorf("insert commit: %w", err) } @@ -77,7 +76,7 @@ func (c *cataloger) CreateBranch(ctx context.Context, repository, branch string, commitLog := &CommitLog{ Committer: CatalogerCommitter, Message: commitMsg, - CreationDate: creationDate, + CreationDate: insertReturns.TransactionTimestamp, Reference: reference, Parents: []string{parentReference}, } diff --git a/catalog/cataloger_get_commit_test.go b/catalog/cataloger_get_commit_test.go index d102a3a3cf7..075a0158c4c 100644 --- a/catalog/cataloger_get_commit_test.go +++ b/catalog/cataloger_get_commit_test.go @@ -8,18 +8,14 @@ import ( "testing" "time" - "github.com/benbjohnson/clock" - "github.com/davecgh/go-spew/spew" "github.com/treeverse/lakefs/testutil" ) func TestCataloger_GetCommit(t *testing.T) { ctx := context.Background() - now := time.Now().Round(time.Minute) - mockClock := clock.NewMock() - mockClock.Set(now) - c := testCataloger(t, WithClock(mockClock)) + now := time.Now().Truncate(time.Minute) + c := testCataloger(t) defer func() { _ = c.Close() }() // test data @@ -97,6 +93,9 @@ func TestCataloger_GetCommit(t *testing.T) { t.Errorf("GetCommit() error = %s, wantErr %t", err, tt.wantErr) return } + if got != nil { + got.CreationDate = got.CreationDate.Truncate(time.Minute) + } if !reflect.DeepEqual(got, tt.want) { t.Errorf("GetCommit() got = %s, want %s", spew.Sdump(got), spew.Sdump(tt.want)) } @@ -106,10 +105,7 @@ func TestCataloger_GetCommit(t *testing.T) { func TestCataloger_GetMergeCommit(t *testing.T) { ctx := context.Background() - now := time.Now().Round(time.Minute) - mockClock := clock.NewMock() - mockClock.Set(now) - c := testCataloger(t, WithClock(mockClock)) + c := testCataloger(t) defer func() { _ = c.Close() }() repo := testCatalogerRepo(t, ctx, c, "repo", "master") diff --git a/catalog/cataloger_merge.go b/catalog/cataloger_merge.go index 17fb66047b1..8e96ceeb228 100644 --- a/catalog/cataloger_merge.go +++ b/catalog/cataloger_merge.go @@ -168,8 +168,8 @@ func (c *cataloger) mergeFromParent(tx db.Tx, previousMaxCommitID, nextCommitID _, err = tx.Exec(`INSERT INTO catalog_commits (branch_id, commit_id, previous_commit_id,committer, message, creation_date, metadata, merge_type, merge_source_branch, merge_source_commit, lineage_commits) - VALUES ($1,$2,$3,$4,$5,$6,$7,'from_parent',$8,$9,string_to_array($10,',')::bigint[])`, - childID, nextCommitID, previousMaxCommitID, committer, msg, c.clock.Now(), metadata, parentID, parentLastCommitID, childNewLineage) + VALUES ($1,$2,$3,$4,$5,transaction_timestamp(),$6,'from_parent',$7,$8,string_to_array($9,',')::bigint[])`, + childID, nextCommitID, previousMaxCommitID, committer, msg, metadata, parentID, parentLastCommitID, childNewLineage) if err != nil { return err } @@ -209,8 +209,8 @@ func (c *cataloger) mergeFromChild(tx db.Tx, previousMaxCommitID, nextCommitID C return err } _, err = tx.Exec(`INSERT INTO catalog_commits (branch_id,commit_id,previous_commit_id,committer,message,creation_date,metadata,merge_type,merge_source_branch,merge_source_commit) - VALUES ($1,$2,$3,$4,$5,$6,$7,'from_child',$8,$9)`, - parentID, nextCommitID, previousMaxCommitID, committer, msg, c.clock.Now(), metadata, childID, childLastCommitID) + VALUES ($1,$2,$3,$4,$5,transaction_timestamp(),$6,'from_child',$7,$8)`, + parentID, nextCommitID, previousMaxCommitID, committer, msg, metadata, childID, childLastCommitID) return err } From b994757e19ae3242acc990d440293274026ccdd9 Mon Sep 17 00:00:00 2001 From: "Ariel Shaqed (Scolnicov)" Date: Mon, 14 Sep 2020 18:06:22 +0300 Subject: [PATCH 28/39] [CR] Compute close time matches Just truncating fails when execution spans a rounding boundary. Meanwhile comparing test time to test DB time is reasonably safe because the DB container is started as part of the test -- no significant time skew is expected. (But yes, a test that loads a database is a component rather than a system test.) --- catalog/cataloger_commit_test.go | 21 +++++++++++++++++---- catalog/cataloger_get_commit_test.go | 13 +++++++++---- 2 files changed, 26 insertions(+), 8 deletions(-) diff --git a/catalog/cataloger_commit_test.go b/catalog/cataloger_commit_test.go index e0a7d38fecd..7d6ea246ae7 100644 --- a/catalog/cataloger_commit_test.go +++ b/catalog/cataloger_commit_test.go @@ -15,9 +15,16 @@ import ( "github.com/treeverse/lakefs/testutil" ) +func timeDifference(a, b time.Time) time.Duration { + diff := a.Sub(b) + if diff < time.Duration(0) { + return -diff + } + return diff +} + func TestCataloger_Commit(t *testing.T) { ctx := context.Background() - now := time.Now().Truncate(time.Minute) c := testCataloger(t) defer func() { _ = c.Close() }() repository := testCatalogerRepo(t, ctx, c, "repository", "master") @@ -31,7 +38,7 @@ func TestCataloger_Commit(t *testing.T) { PhysicalAddress: fileAddr, Size: int64(i) + 1, Metadata: meta, - CreationDate: now, + CreationDate: time.Now(), }, CreateEntryParams{}); err != nil { t.Fatal("create entry for testing", fileName, err) } @@ -57,7 +64,7 @@ func TestCataloger_Commit(t *testing.T) { Reference: "~KJ8Wd1Rs96Z", Committer: "tester", Message: "Simple commit", - CreationDate: now, + CreationDate: time.Now(), Metadata: meta, Parents: []string{"~KJ8Wd1Rs96Y"}, }, @@ -90,13 +97,19 @@ func TestCataloger_Commit(t *testing.T) { } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { + now := time.Now() got, err := c.Commit(ctx, tt.args.repository, tt.args.branch, tt.args.message, tt.args.committer, tt.args.metadata) if (err != nil) != tt.wantErr { t.Errorf("Commit() error = %v, wantErr %v", err, tt.wantErr) return } if got != nil { - got.CreationDate = got.CreationDate.Truncate(time.Minute) + if timeDifference(got.CreationDate, now) > 10*time.Second { + t.Errorf("expected creation time %s, got very different %s", got.CreationDate, now) + } + if tt.want != nil { + got.CreationDate = tt.want.CreationDate + } } if !reflect.DeepEqual(got, tt.want) { t.Errorf("Commit() got = %s, want = %s", spew.Sdump(got), spew.Sdump(tt.want)) diff --git a/catalog/cataloger_get_commit_test.go b/catalog/cataloger_get_commit_test.go index 075a0158c4c..3f728de33c5 100644 --- a/catalog/cataloger_get_commit_test.go +++ b/catalog/cataloger_get_commit_test.go @@ -14,7 +14,6 @@ import ( func TestCataloger_GetCommit(t *testing.T) { ctx := context.Background() - now := time.Now().Truncate(time.Minute) c := testCataloger(t) defer func() { _ = c.Close() }() @@ -48,7 +47,7 @@ func TestCataloger_GetCommit(t *testing.T) { Reference: "~KJ8Wd1Rs96Z", Committer: "tester0", Message: "Commit0", - CreationDate: now, + CreationDate: time.Now(), Metadata: Metadata{"k0": "v0"}, Parents: []string{"~KJ8Wd1Rs96Y"}, }, @@ -61,7 +60,7 @@ func TestCataloger_GetCommit(t *testing.T) { Reference: "~KJ8Wd1Rs96a", Committer: "tester1", Message: "Commit1", - CreationDate: now, + CreationDate: time.Now(), Metadata: Metadata{"k1": "v1"}, Parents: []string{"~KJ8Wd1Rs96Z"}, }, @@ -88,13 +87,19 @@ func TestCataloger_GetCommit(t *testing.T) { } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { + now := time.Now() got, err := c.GetCommit(ctx, repository, tt.reference) if (err != nil) != tt.wantErr { t.Errorf("GetCommit() error = %s, wantErr %t", err, tt.wantErr) return } if got != nil { - got.CreationDate = got.CreationDate.Truncate(time.Minute) + if timeDifference(got.CreationDate, now) > 10*time.Second { + t.Errorf("expected creation time %s, got very different %s", got.CreationDate, now) + } + if tt.want != nil { + got.CreationDate = tt.want.CreationDate + } } if !reflect.DeepEqual(got, tt.want) { t.Errorf("GetCommit() got = %s, want %s", spew.Sdump(got), spew.Sdump(tt.want)) From aa21c37f7c1d07e6dd17f8c2b3076d0f1c1c1ff2 Mon Sep 17 00:00:00 2001 From: Barak Amar Date: Mon, 14 Sep 2020 18:22:03 +0300 Subject: [PATCH 29/39] docker-compose use lakefs stats enabled from env if needed (#612) --- docker-compose.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/docker-compose.yaml b/docker-compose.yaml index cca85a8bad0..998eb16b4c8 100644 --- a/docker-compose.yaml +++ b/docker-compose.yaml @@ -14,6 +14,7 @@ services: - LAKEFS_GATEWAYS_S3_DOMAIN_NAME=s3.local.lakefs.io:8000 - LAKEFS_BLOCKSTORE_S3_CREDENTIALS_ACCESS_KEY_ID=${AWS_ACCESS_KEY_ID} - LAKEFS_BLOCKSTORE_S3_CREDENTIALS_ACCESS_SECRET_KEY=${AWS_SECRET_ACCESS_KEY} + - LAKEFS_STATS_ENABLED - LAKEFS_LOGGING_LEVEL entrypoint: ["/app/wait-for", "postgres:5432", "--", "/app/lakefs", "run"] postgres: From a48d5132b0d8c47e29463a74798a5a332494e8f8 Mon Sep 17 00:00:00 2001 From: Barak Amar Date: Tue, 15 Sep 2020 17:16:15 +0300 Subject: [PATCH 30/39] Bugfix/installation id not set and collector posting data is dropped (#616) --- api/api_controller.go | 11 -------- api/handler_test.go | 2 -- auth/metadata.go | 56 ++++++++++++++++++++----------------- cmd/lakefs/cmd/init.go | 2 +- cmd/lakefs/cmd/run.go | 30 +++++++------------- config/config.go | 4 ++- gateway/playback_test.go | 2 -- loadtest/local_load_test.go | 2 -- stats/collector.go | 12 -------- 9 files changed, 44 insertions(+), 77 deletions(-) diff --git a/api/api_controller.go b/api/api_controller.go index 2d7157e6558..96e58336ee2 100644 --- a/api/api_controller.go +++ b/api/api_controller.go @@ -249,17 +249,6 @@ func (c *Controller) SetupLakeFSHandler() setupop.SetupLakeFSHandler { }) } - // write metadata - metadata, err := c.deps.Meta.Write() - if err != nil { - return setupop.NewSetupLakeFSDefault(http.StatusInternalServerError). - WithPayload(&models.Error{ - Message: err.Error(), - }) - } - - c.deps.Collector.SetInstallationID(metadata["installation_id"]) - c.deps.Collector.CollectMetadata(metadata) c.deps.Collector.CollectEvent("global", "init") // setup admin user diff --git a/api/handler_test.go b/api/handler_test.go index d37b3dd1382..c441863b7c6 100644 --- a/api/handler_test.go +++ b/api/handler_test.go @@ -71,8 +71,6 @@ func createDefaultAdminUser(authService auth.Service, t *testing.T) *authmodel.C type mockCollector struct{} -func (m *mockCollector) SetInstallationID(_ string) {} - func (m *mockCollector) CollectMetadata(_ map[string]string) {} func (m *mockCollector) CollectEvent(_, _ string) {} diff --git a/auth/metadata.go b/auth/metadata.go index 1caebff4fcf..f659066c0a8 100644 --- a/auth/metadata.go +++ b/auth/metadata.go @@ -1,7 +1,6 @@ package auth import ( - "errors" "runtime" "time" @@ -12,7 +11,6 @@ import ( ) type MetadataManager interface { - InstallationID() (string, error) SetupTimestamp() (time.Time, error) UpdateSetupTimestamp(time.Time) error Write() (map[string]string, error) @@ -35,6 +33,23 @@ func NewDBMetadataManager(version string, database db.Database) *DBMetadataManag } } +func insertOrGetInstallationID(tx db.Tx) (string, error) { + newInstallationID := uuid.New().String() + res, err := tx.Exec(`INSERT INTO auth_installation_metadata (key_name, key_value) + VALUES ($1,$2) + ON CONFLICT DO NOTHING`, + InstallationIDKeyName, newInstallationID) + if err != nil { + return "", err + } + if affected, err := res.RowsAffected(); err != nil { + return "", err + } else if affected == 1 { + return newInstallationID, nil + } + return getInstallationID(tx) +} + func getInstallationID(tx db.Tx) (string, error) { var installationID string err := tx.Get(&installationID, `SELECT key_value FROM auth_installation_metadata WHERE key_name = $1`, @@ -66,16 +81,6 @@ func writeMetadata(tx sqlx.Execer, items map[string]string) error { return nil } -func (d *DBMetadataManager) InstallationID() (string, error) { - installationID, err := d.db.Transact(func(tx db.Tx) (interface{}, error) { - return getInstallationID(tx) - }, db.WithLogger(logging.Dummy()), db.ReadOnly()) - if err != nil { - return "", err - } - return installationID.(string), nil -} - func (d *DBMetadataManager) UpdateSetupTimestamp(ts time.Time) error { _, err := d.db.Transact(func(tx db.Tx) (interface{}, error) { return nil, writeMetadata(tx, map[string]string{ @@ -107,25 +112,24 @@ func (d *DBMetadataManager) Write() (map[string]string, error) { metadata[k] = v } } - - // see if we have existing metadata or we need to generate one _, err = d.db.Transact(func(tx db.Tx) (interface{}, error) { - // get installation ID - if we don't have one we'll generate one - _, err := getInstallationID(tx) - if err != nil && !errors.Is(err, db.ErrNotFound) { + // write metadata + err = writeMetadata(tx, metadata) + if err != nil { return nil, err } - - if err != nil { // i.e. err is db.ErrNotFound - // we don't have an installation ID - let's write one. - installationID := uuid.Must(uuid.NewUUID()).String() - metadata["installation_id"] = installationID - metadata["setup_time"] = time.Now().UTC().Format(time.RFC3339) + // write installation id + installationID, err := insertOrGetInstallationID(tx) + if err == nil { + metadata[InstallationIDKeyName] = installationID } - err = writeMetadata(tx, metadata) - return nil, err + // get setup timestamp + setupTS, err := getSetupTimestamp(tx) + if err == nil { + metadata[SetupTimestampKeyName] = setupTS.UTC().Format(time.RFC3339) + } + return nil, nil }, db.WithLogger(logging.Dummy())) - return metadata, err } diff --git a/cmd/lakefs/cmd/init.go b/cmd/lakefs/cmd/init.go index 5379f5ff185..aac1c7c6ded 100644 --- a/cmd/lakefs/cmd/init.go +++ b/cmd/lakefs/cmd/init.go @@ -57,7 +57,7 @@ var initCmd = &cobra.Command{ ctx, cancelFn := context.WithCancel(context.Background()) processID, bufferedCollectorArgs := cfg.GetStatsBufferedCollectorArgs() - stats := stats.NewBufferedCollector(metadata["installation_id"], processID, bufferedCollectorArgs...) + stats := stats.NewBufferedCollector(metadata[auth.InstallationIDKeyName], processID, bufferedCollectorArgs...) go stats.Run(ctx) stats.CollectMetadata(metadata) stats.CollectEvent("global", "init") diff --git a/cmd/lakefs/cmd/run.go b/cmd/lakefs/cmd/run.go index e9e4f411de8..8afa7c37ca1 100644 --- a/cmd/lakefs/cmd/run.go +++ b/cmd/lakefs/cmd/run.go @@ -4,7 +4,6 @@ import ( "context" "errors" "fmt" - "math/rand" "net/http" "os" "os/signal" @@ -78,12 +77,17 @@ var runCmd = &cobra.Command{ meta := auth.NewDBMetadataManager(config.Version, dbPool) - installationID, err := meta.InstallationID() + processID, bufferedCollectorArgs := cfg.GetStatsBufferedCollectorArgs() + + // collect and write metadata + metadata, err := meta.Write() if err != nil { - installationID = "" // no installation ID is available + logger.WithError(err).Debug("failed to collect account metadata") } - processID, bufferedCollectorArgs := cfg.GetStatsBufferedCollectorArgs() - stats := stats.NewBufferedCollector(installationID, processID, bufferedCollectorArgs...) + + stats := stats.NewBufferedCollector(metadata[auth.InstallationIDKeyName], processID, bufferedCollectorArgs...) + // send metadata + stats.CollectMetadata(metadata) dedupCleaner := dedup.NewCleaner(blockStore, cataloger.DedupReportChannel()) defer func() { @@ -122,22 +126,8 @@ var runCmd = &cobra.Command{ ctx, cancelFn := context.WithCancel(context.Background()) go stats.Run(ctx) - stats.CollectEvent("global", "run") - // stagger a bit and update metadata - go func() { - // avoid a thundering herd in case we have many lakeFS instances starting together - const maxSplay = 10 * time.Second - randSource := rand.New(rand.NewSource(time.Now().UnixNano())) //nolint:gosec - time.Sleep(time.Duration(randSource.Intn(int(maxSplay)))) - - metadata, err := meta.Write() - if err != nil { - logger.WithError(err).Trace("failed to collect account metadata") - return - } - stats.CollectMetadata(metadata) - }() + stats.CollectEvent("global", "run") logging.Default().WithField("listen_address", cfg.GetListenAddress()).Info("starting HTTP server") server := &http.Server{ diff --git a/config/config.go b/config/config.go index b73d020720f..59e0f727d28 100644 --- a/config/config.go +++ b/config/config.go @@ -299,9 +299,11 @@ func (c *Config) GetStatsFlushInterval() time.Duration { } func (c *Config) GetStatsBufferedCollectorArgs() (processID string, opts []stats.BufferedCollectorOpts) { - sender := stats.NewDummySender() + var sender stats.Sender if c.GetStatsEnabled() && Version != UnreleasedVersion { sender = stats.NewHTTPSender(c.GetStatsAddress(), time.Now) + } else { + sender = stats.NewDummySender() } return uuid.Must(uuid.NewUUID()).String(), []stats.BufferedCollectorOpts{ diff --git a/gateway/playback_test.go b/gateway/playback_test.go index 4d4e6c8a0af..7fe82a94d38 100644 --- a/gateway/playback_test.go +++ b/gateway/playback_test.go @@ -93,8 +93,6 @@ func TestMain(m *testing.M) { os.Exit(code) } -func (m *mockCollector) SetInstallationID(installationID string) {} - func (m *mockCollector) CollectMetadata(accountMetadata map[string]string) {} func (m *mockCollector) CollectEvent(class, action string) {} diff --git a/loadtest/local_load_test.go b/loadtest/local_load_test.go index 489f841ba87..4248fc2997d 100644 --- a/loadtest/local_load_test.go +++ b/loadtest/local_load_test.go @@ -45,8 +45,6 @@ func TestMain(m *testing.M) { type mockCollector struct{} -func (m *mockCollector) SetInstallationID(_ string) {} - func (m *mockCollector) CollectMetadata(_ map[string]string) {} func (m *mockCollector) CollectEvent(_, _ string) {} diff --git a/stats/collector.go b/stats/collector.go index b4fb119731f..6197dbee34e 100644 --- a/stats/collector.go +++ b/stats/collector.go @@ -3,7 +3,6 @@ package stats import ( "context" "fmt" - "sync" "time" "github.com/treeverse/lakefs/logging" @@ -16,7 +15,6 @@ const ( ) type Collector interface { - SetInstallationID(installationID string) CollectEvent(class, action string) CollectMetadata(accountMetadata map[string]string) } @@ -79,7 +77,6 @@ type BufferedCollector struct { sendTimeout time.Duration flushTicker FlushTicker done chan bool - mutex *sync.RWMutex installationID string processID string } @@ -125,7 +122,6 @@ func NewBufferedCollector(installationID, processID string, opts ...BufferedColl sendTimeout: DefaultSendTimeout, flushTicker: &TimeTicker{ticker: time.NewTicker(DefaultFlushInterval)}, installationID: installationID, - mutex: &sync.RWMutex{}, processID: processID, } @@ -136,8 +132,6 @@ func NewBufferedCollector(installationID, processID string, opts ...BufferedColl return s } func (s *BufferedCollector) getInstallationID() string { - s.mutex.RLock() - defer s.mutex.RUnlock() return s.installationID } @@ -207,12 +201,6 @@ func makeMetrics(counters keyIndex) []Metric { return metrics } -func (s *BufferedCollector) SetInstallationID(installationID string) { - s.mutex.Lock() - defer s.mutex.Unlock() - s.installationID = installationID -} - func (s *BufferedCollector) CollectMetadata(accountMetadata map[string]string) { entries := make([]MetadataEntry, len(accountMetadata)) i := 0 From a1dae12436b4d84a0f78dc4c3481ed7ec238c856 Mon Sep 17 00:00:00 2001 From: Barak Amar Date: Tue, 15 Sep 2020 17:22:48 +0300 Subject: [PATCH 31/39] fix lakectl client pagination (#614) --- api/client.go | 26 +++++++++++------- cmd/lakectl/cmd/diff.go | 58 ++++++++++++++++++++++++++++------------- 2 files changed, 57 insertions(+), 27 deletions(-) diff --git a/api/client.go b/api/client.go index aa193f1aef1..27dfbb18fec 100644 --- a/api/client.go +++ b/api/client.go @@ -75,10 +75,10 @@ type RepositoryClient interface { UploadObject(ctx context.Context, repository, branchID, path string, r io.Reader) (*models.ObjectStats, error) DeleteObject(ctx context.Context, repository, branchID, path string) error - DiffRefs(ctx context.Context, repository, leftRef, rightRef string) ([]*models.Diff, error) + DiffRefs(ctx context.Context, repository, leftRef, rightRef string, after string, amount int) ([]*models.Diff, *models.Pagination, error) Merge(ctx context.Context, repository, leftRef, rightRef string) (*models.MergeResult, error) - DiffBranch(ctx context.Context, repository, branch string) ([]*models.Diff, error) + DiffBranch(ctx context.Context, repository, branch string, after string, amount int) ([]*models.Diff, *models.Pagination, error) GetRetentionPolicy(ctx context.Context, repository string) (*models.RetentionPolicyWithCreationDate, error) UpdateRetentionPolicy(ctx context.Context, repository string, policy *models.RetentionPolicy) error @@ -526,17 +526,20 @@ func (c *client) GetCommitLog(ctx context.Context, repository, branchID, after s return resp.GetPayload().Results, resp.GetPayload().Pagination, nil } -func (c *client) DiffRefs(ctx context.Context, repository, leftRef, rightRef string) ([]*models.Diff, error) { +func (c *client) DiffRefs(ctx context.Context, repository, leftRef, rightRef, after string, amount int) ([]*models.Diff, *models.Pagination, error) { diff, err := c.remote.Refs.DiffRefs(&refs.DiffRefsParams{ + After: swag.String(after), + Amount: swag.Int64(int64(amount)), LeftRef: leftRef, - RightRef: rightRef, Repository: repository, + RightRef: rightRef, Context: ctx, }, c.auth) if err != nil { - return nil, err + return nil, nil, err } - return diff.GetPayload().Results, nil + payload := diff.GetPayload() + return payload.Results, payload.Pagination, nil } func (c *client) Merge(ctx context.Context, repository, leftRef, rightRef string) (*models.MergeResult, error) { @@ -557,17 +560,21 @@ func (c *client) Merge(ctx context.Context, repository, leftRef, rightRef string return nil, err } -func (c *client) DiffBranch(ctx context.Context, repoID, branch string) ([]*models.Diff, error) { +func (c *client) DiffBranch(ctx context.Context, repoID, branch string, after string, amount int) ([]*models.Diff, *models.Pagination, error) { diff, err := c.remote.Branches.DiffBranch(&branches.DiffBranchParams{ + After: swag.String(after), + Amount: swag.Int64(int64(amount)), Branch: branch, Repository: repoID, Context: ctx, }, c.auth) if err != nil { - return nil, err + return nil, nil, err } - return diff.GetPayload().Results, nil + payload := diff.GetPayload() + return payload.Results, payload.Pagination, nil } + func (c *client) Symlink(ctx context.Context, repoID, branch, path string) (string, error) { resp, err := c.remote.Metadata.CreateSymlink(&metadata.CreateSymlinkParams{ Location: swag.String(path), @@ -580,6 +587,7 @@ func (c *client) Symlink(ctx context.Context, repoID, branch, path string) (stri } return resp.GetPayload(), nil } + func (c *client) GetRetentionPolicy(ctx context.Context, repository string) (*models.RetentionPolicyWithCreationDate, error) { policy, err := c.remote.Retention.GetRetentionPolicy(&retention.GetRetentionPolicyParams{ Repository: repository, diff --git a/cmd/lakectl/cmd/diff.go b/cmd/lakectl/cmd/diff.go index f09c4422e1f..185031d8d42 100644 --- a/cmd/lakectl/cmd/diff.go +++ b/cmd/lakectl/cmd/diff.go @@ -4,8 +4,10 @@ import ( "context" "os" + "github.com/go-openapi/swag" "github.com/jedib0t/go-pretty/text" "github.com/spf13/cobra" + "github.com/treeverse/lakefs/api" "github.com/treeverse/lakefs/api/gen/models" "github.com/treeverse/lakefs/uri" ) @@ -13,6 +15,7 @@ import ( const ( diffCmdMinArgs = 1 diffCmdMaxArgs = 2 + diffPageSize = 100 ) var diffCmd = &cobra.Command{ @@ -26,8 +29,6 @@ var diffCmd = &cobra.Command{ Run: func(cmd *cobra.Command, args []string) { client := getClient() - var diff []*models.Diff - var err error const diffWithOtherArgsCount = 2 if len(args) == diffWithOtherArgsCount { if err := IsRefURI(1)(args); err != nil { @@ -35,31 +36,52 @@ var diffCmd = &cobra.Command{ } leftRefURI := uri.Must(uri.Parse(args[0])) rightRefURI := uri.Must(uri.Parse(args[1])) - if leftRefURI.Repository != rightRefURI.Repository { Die("both references must belong to the same repository", 1) } - - diff, err = client.DiffRefs(context.Background(), leftRefURI.Repository, leftRefURI.Ref, rightRefURI.Ref) - if err != nil { - DieErr(err) - } - for _, line := range diff { - FmtDiff(line, true) - } + printDiffRefs(client, leftRefURI.Repository, leftRefURI.Ref, rightRefURI.Ref) } else { branchURI := uri.Must(uri.Parse(args[0])) - diff, err = client.DiffBranch(context.Background(), branchURI.Repository, branchURI.Ref) - if err != nil { - DieErr(err) - } - for _, line := range diff { - FmtDiff(line, false) - } + printDiffBranch(client, branchURI.Repository, branchURI.Ref) } }, } +func printDiffBranch(client api.Client, repository string, branch string) { + var after string + for { + diff, pagination, err := client.DiffBranch(context.Background(), repository, branch, after, diffPageSize) + if err != nil { + DieErr(err) + } + for _, line := range diff { + FmtDiff(line, false) + } + if !swag.BoolValue(pagination.HasMore) { + break + } + after = pagination.NextOffset + } +} + +func printDiffRefs(client api.Client, repository string, leftRef string, rightRef string) { + var after string + for { + diff, pagination, err := client.DiffRefs(context.Background(), repository, leftRef, rightRef, + after, diffPageSize) + if err != nil { + DieErr(err) + } + for _, line := range diff { + FmtDiff(line, true) + } + if !swag.BoolValue(pagination.HasMore) { + break + } + after = pagination.NextOffset + } +} + func FmtDiff(diff *models.Diff, withDirection bool) { var color text.Color var action string From 080bae035f77519ce8525791e6c2b8f3e58eded0 Mon Sep 17 00:00:00 2001 From: Barak Amar Date: Thu, 17 Sep 2020 11:35:55 +0300 Subject: [PATCH 32/39] Fix #619 commit tobstone min commit should point to the new commit --- catalog/cataloger_commit.go | 13 +++++++------ catalog/cataloger_commit_test.go | 32 ++++++++++++++++++++++++++++++++ 2 files changed, 39 insertions(+), 6 deletions(-) diff --git a/catalog/cataloger_commit.go b/catalog/cataloger_commit.go index b26a3d238b6..c25774a1145 100644 --- a/catalog/cataloger_commit.go +++ b/catalog/cataloger_commit.go @@ -39,16 +39,17 @@ func (c *cataloger) Commit(ctx context.Context, repository, branch string, messa return nil, fmt.Errorf("delete uncommitted tombstones: %w", err) } - affectedTombstone, err := commitTombstones(tx, branchID, lastCommitID) - if err != nil { - return nil, fmt.Errorf("commit tombstones: %w", err) - } - // uncommitted to committed entries commitID, err := getNextCommitID(tx) if err != nil { return nil, fmt.Errorf("next commit id: %w", err) } + + affectedTombstone, err := commitTombstones(tx, branchID, commitID) + if err != nil { + return nil, fmt.Errorf("commit tombstones: %w", err) + } + affectedNew, err := commitEntries(tx, branchID, commitID) if err != nil { return nil, fmt.Errorf("commit entries: %w", err) @@ -108,7 +109,7 @@ func commitDeleteUncommittedTombstones(tx sqlx.Execer, branchID int64, commitID } func commitTombstones(tx sqlx.Execer, branchID int64, commitID CommitID) (int64, error) { - res, err := tx.Exec(`UPDATE catalog_entries_v SET min_commit = $2, max_commit = $2 WHERE branch_id = $1 AND NOT is_committed AND is_deleted`, + res, err := tx.Exec(`UPDATE catalog_entries_v SET min_commit = $2 WHERE branch_id = $1 AND NOT is_committed AND is_tombstone`, branchID, commitID) if err != nil { return 0, err diff --git a/catalog/cataloger_commit_test.go b/catalog/cataloger_commit_test.go index 7d6ea246ae7..a56052e5721 100644 --- a/catalog/cataloger_commit_test.go +++ b/catalog/cataloger_commit_test.go @@ -254,5 +254,37 @@ func TestCataloger_Commit_Scenario(t *testing.T) { t.Errorf("Delete should left no entries, got %d", len(entries)) } }) +} + +func TestCataloger_CommitTombstoneShouldNotChangeHistory(t *testing.T) { + ctx := context.Background() + c := testCataloger(t) + defer func() { _ = c.Close() }() + repository := testCatalogerRepo(t, ctx, c, "repository", "master") + // create file + testCatalogerCreateEntry(t, ctx, c, repository, "master", "file42", nil, "") + _, err := c.Commit(ctx, repository, "master", "commit new file", "tester", nil) + testutil.MustDo(t, "commit new file", err) + + // create branch + branchCommit, err := c.CreateBranch(ctx, repository, "branch1", "master") + testutil.MustDo(t, "create branch", err) + + // delete file on branch (with commit) - should create tombstone + err = c.DeleteEntry(ctx, repository, "branch1", "file42") + testutil.MustDo(t, "delete entry", err) + + // commit the delete - should create tombstone + _, err = c.Commit(ctx, repository, "branch1", "commit delete file", "tester", nil) + testutil.MustDo(t, "commit delete file", err) + + // verify that the file is deleted + ent, err := c.GetEntry(ctx, repository, branchCommit.Reference, "file42", GetEntryParams{}) + testutil.MustDo(t, "get entry from create branch commit - branch1", err) + + checksumFile42 := testCreateEntryCalcChecksum("file42", "") + if ent.Checksum != checksumFile42 { + t.Fatalf("get entry from branch commit checksum=%s, expected, %s", ent.Checksum, checksumFile42) + } } From aae7cae133277724832d2d5ffbc34d8c6e265192 Mon Sep 17 00:00:00 2001 From: Barak Amar Date: Thu, 17 Sep 2020 15:04:27 +0300 Subject: [PATCH 33/39] remove reduntant update --- catalog/cataloger_commit.go | 19 +++---------------- 1 file changed, 3 insertions(+), 16 deletions(-) diff --git a/catalog/cataloger_commit.go b/catalog/cataloger_commit.go index c25774a1145..e96e2176678 100644 --- a/catalog/cataloger_commit.go +++ b/catalog/cataloger_commit.go @@ -45,16 +45,12 @@ func (c *cataloger) Commit(ctx context.Context, repository, branch string, messa return nil, fmt.Errorf("next commit id: %w", err) } - affectedTombstone, err := commitTombstones(tx, branchID, commitID) - if err != nil { - return nil, fmt.Errorf("commit tombstones: %w", err) - } - + // commit entries (include the tombstones) affectedNew, err := commitEntries(tx, branchID, commitID) if err != nil { return nil, fmt.Errorf("commit entries: %w", err) } - if (affectedNew + affectedTombstone + committedAffected) == 0 { + if (affectedNew + committedAffected) == 0 { return nil, ErrNothingToCommit } @@ -108,17 +104,8 @@ func commitDeleteUncommittedTombstones(tx sqlx.Execer, branchID int64, commitID return res.RowsAffected() } -func commitTombstones(tx sqlx.Execer, branchID int64, commitID CommitID) (int64, error) { - res, err := tx.Exec(`UPDATE catalog_entries_v SET min_commit = $2 WHERE branch_id = $1 AND NOT is_committed AND is_tombstone`, - branchID, commitID) - if err != nil { - return 0, err - } - return res.RowsAffected() -} - func commitEntries(tx sqlx.Execer, branchID int64, commitID CommitID) (int64, error) { - res, err := tx.Exec(`UPDATE catalog_entries_v SET min_commit = $2 WHERE branch_id = $1 AND NOT is_committed AND NOT is_deleted`, + res, err := tx.Exec(`UPDATE catalog_entries_v SET min_commit = $2 WHERE branch_id = $1 AND NOT is_committed`, branchID, commitID) if err != nil { return 0, err From e0410fd0bc0737b71ca5f9eba626b9830194c07b Mon Sep 17 00:00:00 2001 From: Barak Amar Date: Mon, 21 Sep 2020 11:34:36 +0300 Subject: [PATCH 34/39] nessie control stats and ignore all dev versions when post stats (#626) --- config/config.go | 2 +- nessie/ops/docker-compose.yaml | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/config/config.go b/config/config.go index 59e0f727d28..a67d4048d04 100644 --- a/config/config.go +++ b/config/config.go @@ -300,7 +300,7 @@ func (c *Config) GetStatsFlushInterval() time.Duration { func (c *Config) GetStatsBufferedCollectorArgs() (processID string, opts []stats.BufferedCollectorOpts) { var sender stats.Sender - if c.GetStatsEnabled() && Version != UnreleasedVersion { + if c.GetStatsEnabled() && !strings.HasPrefix(Version, UnreleasedVersion) { sender = stats.NewHTTPSender(c.GetStatsAddress(), time.Now) } else { sender = stats.NewDummySender() diff --git a/nessie/ops/docker-compose.yaml b/nessie/ops/docker-compose.yaml index 83213655225..74935077a69 100644 --- a/nessie/ops/docker-compose.yaml +++ b/nessie/ops/docker-compose.yaml @@ -16,6 +16,7 @@ services: - LAKEFS_BLOCKSTORE_S3_CREDENTIALS_ACCESS_SECRET_KEY=${AWS_SECRET_ACCESS_KEY} - LAKEFS_LOGGING_LEVEL=DEBUG - LAKEFS_BLOCKSTORE_GS_CREDENTIALS_JSON + - LAKEFS_STATS_ENABLED entrypoint: ["/app/wait-for", "postgres:5432", "--", "/app/lakefs", "run"] postgres: image: "postgres:11" From 7ae334ca1350a2e40e7121772d53afb335b74233 Mon Sep 17 00:00:00 2001 From: Barak Amar Date: Mon, 21 Sep 2020 15:47:33 +0300 Subject: [PATCH 35/39] hacktoberfest badge --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index 1f05a367a5f..4b280e98c7a 100644 --- a/README.md +++ b/README.md @@ -2,6 +2,7 @@

+[![Hacktoberfest](https://badgen.net/badge/hacktoberfest/friendly/pink)](CODE_OF_CONDUCT.md) [![License](https://img.shields.io/badge/License-Apache%202.0-blue.svg)](https://raw.githubusercontent.com/treeverse/lakeFS/master/LICENSE) [![Go](https://github.com/treeverse/lakeFS/workflows/Go/badge.svg?branch=master)](https://github.com/treeverse/lakeFS/actions?query=workflow%3AGo+branch%3Amaster++) [![Node](https://github.com/treeverse/lakeFS/workflows/Node/badge.svg?branch=master)](https://github.com/treeverse/lakeFS/actions?query=workflow%3ANode+branch%3Amaster++) From 9cbe657a29614c287f71ab59d891db2ac9efa1cf Mon Sep 17 00:00:00 2001 From: Barak Amar Date: Mon, 21 Sep 2020 15:48:31 +0300 Subject: [PATCH 36/39] reference contributing doc --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 4b280e98c7a..2d11bda48ef 100644 --- a/README.md +++ b/README.md @@ -2,7 +2,7 @@

-[![Hacktoberfest](https://badgen.net/badge/hacktoberfest/friendly/pink)](CODE_OF_CONDUCT.md) +[![Hacktoberfest](https://badgen.net/badge/hacktoberfest/friendly/pink)](docs/contributing.md) [![License](https://img.shields.io/badge/License-Apache%202.0-blue.svg)](https://raw.githubusercontent.com/treeverse/lakeFS/master/LICENSE) [![Go](https://github.com/treeverse/lakeFS/workflows/Go/badge.svg?branch=master)](https://github.com/treeverse/lakeFS/actions?query=workflow%3AGo+branch%3Amaster++) [![Node](https://github.com/treeverse/lakeFS/workflows/Node/badge.svg?branch=master)](https://github.com/treeverse/lakeFS/actions?query=workflow%3ANode+branch%3Amaster++) From 27e3c96c96a1deb22dafcef64064bff6bb14f20f Mon Sep 17 00:00:00 2001 From: arielshaqed Date: Mon, 21 Sep 2020 16:59:02 +0300 Subject: [PATCH 37/39] Add Hacktoberfest information --- README.md | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/README.md b/README.md index 2d11bda48ef..4472494dcda 100644 --- a/README.md +++ b/README.md @@ -23,6 +23,12 @@ lakeFS supports AWS S3 or Google Cloud Storage as its underlying storage service For more information see the [Official Documentation](https://docs.lakefs.io). +## Get ready for Hacktoberfest! + +Contribute to open-source projects throughout October 2020 by stepping up to Digital Ocean's annual tradition of hosting [Hacktoberfest](https://hacktoberfest.digitalocean.com/). We _always_ welcome pull requests to lakeFS - but throughout October your pull requests to open source projects can get you some cool schwag. Check out our [contributing guide](https://docs.lakefs.io/contributing), which tells you everything you need to know including how to contact us. Or read the instructions for how to get all your October PRs to count in the [Hacktoberfest resources guide](https://hacktoberfest.digitalocean.com/details/). + +Not sure what to do? We marked some issues that could get you started quickly on our [Hacktoberfest list](https://github.com/treeverse/lakeFS/issues?q=is%3Aissue+is%3Aopen+label%3Ahacktoberfest). + ## Capabilities **Development Environment for Data** From 58521600eebfe9ac9ee04006165f92e8429695d5 Mon Sep 17 00:00:00 2001 From: YaelRiv <67264175+YaelRiv@users.noreply.github.com> Date: Mon, 21 Sep 2020 15:13:34 +0100 Subject: [PATCH 38/39] Update README.md This is great! Made some small changes and moved it after the community section. --- README.md | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index 4472494dcda..15e864d4a94 100644 --- a/README.md +++ b/README.md @@ -23,11 +23,6 @@ lakeFS supports AWS S3 or Google Cloud Storage as its underlying storage service For more information see the [Official Documentation](https://docs.lakefs.io). -## Get ready for Hacktoberfest! - -Contribute to open-source projects throughout October 2020 by stepping up to Digital Ocean's annual tradition of hosting [Hacktoberfest](https://hacktoberfest.digitalocean.com/). We _always_ welcome pull requests to lakeFS - but throughout October your pull requests to open source projects can get you some cool schwag. Check out our [contributing guide](https://docs.lakefs.io/contributing), which tells you everything you need to know including how to contact us. Or read the instructions for how to get all your October PRs to count in the [Hacktoberfest resources guide](https://hacktoberfest.digitalocean.com/details/). - -Not sure what to do? We marked some issues that could get you started quickly on our [Hacktoberfest list](https://github.com/treeverse/lakeFS/issues?q=is%3Aissue+is%3Aopen+label%3Ahacktoberfest). ## Capabilities @@ -88,6 +83,12 @@ Keep up to date and get lakeFS support via: - [YouTube](https://www.youtube.com/channel/UCZiDUd28ex47BTLuehb1qSA) (learn from video tutorials) - [Contact us](https://lakefs.io/contact-us/) (for anything) +## Get ready for Hacktoberfest! + +Contribute to open-source projects throughout October 2020 by stepping up to Digital Ocean's annual tradition of hosting [Hacktoberfest](https://hacktoberfest.digitalocean.com/). We _always_ welcome pull requests to lakeFS - but throughout October your pull requests to open source projects can get you some cool swag. Check out our [contributing guide](https://docs.lakefs.io/contributing) and join our [slack channel](https://join.slack.com/t/lakefs/shared_invite/zt-g86mkroy-186GzaxR4xOar1i1Us0bzw) for help, more ideas, and discussions. + +Not sure what to do? We marked some issues that could get you started quickly on our [Hacktoberfest list](https://github.com/treeverse/lakeFS/issues?q=is%3Aissue+is%3Aopen+label%3Ahacktoberfest). + ## More information - [lakeFS documentation](https://docs.lakefs.io) From 328a3bec12cf693cdc178594eb4df0d5358a2ea4 Mon Sep 17 00:00:00 2001 From: Itai Admi Date: Tue, 22 Sep 2020 11:30:23 +0300 Subject: [PATCH 39/39] First benchmark test --- .env | 4 +- benchmarks/main_test.go | 236 ++++++++++++++++++++++++++++++++++++++++ go.mod | 2 + go.sum | 5 + nessie/main_test.go | 113 +------------------ testutil/setup.go | 120 ++++++++++++++++++++ 6 files changed, 370 insertions(+), 110 deletions(-) create mode 100644 benchmarks/main_test.go create mode 100644 testutil/setup.go diff --git a/.env b/.env index 862650c0efc..f430c1a4067 100644 --- a/.env +++ b/.env @@ -1,2 +1,2 @@ -TAG=latest -LAKEFS_BLOCKSTORE_TYPE=local \ No newline at end of file +TAG=0.10.2 +LAKEFS_BLOCKSTORE_TYPE=local diff --git a/benchmarks/main_test.go b/benchmarks/main_test.go new file mode 100644 index 00000000000..4e4ef12b072 --- /dev/null +++ b/benchmarks/main_test.go @@ -0,0 +1,236 @@ +package benchmarks + +import ( + "bytes" + "context" + "fmt" + "github.com/aws/aws-sdk-go/service/s3" + "github.com/go-openapi/runtime" + "github.com/go-openapi/swag" + dto "github.com/prometheus/client_model/go" + "github.com/prometheus/prom2json" + "github.com/spf13/viper" + "github.com/stretchr/testify/require" + "github.com/thanhpk/randstr" + genclient "github.com/treeverse/lakefs/api/gen/client" + "github.com/treeverse/lakefs/api/gen/client/objects" + "github.com/treeverse/lakefs/api/gen/client/repositories" + "github.com/treeverse/lakefs/api/gen/models" + "github.com/treeverse/lakefs/logging" + "github.com/treeverse/lakefs/testutil" + "net/http" + "os" + "strconv" + "strings" + "sync" + "sync/atomic" + "testing" + "time" +) + +var ( + logger logging.Logger + client *genclient.Lakefs + svc *s3.S3 +) + +func TestMain(m *testing.M) { + //benchmarkTests := flag.Bool("benchmark-tests", false, "Run benchmark tests") + //flag.Parse() + //if !*benchmarkTests { + // os.Exit(0) + //} + + viper.SetDefault("parallelism_level", 500) + viper.SetDefault("files_amount", 10000) + viper.SetDefault("global_timeout", 30*time.Minute) + + logger, client, svc = testutil.SetupTestingEnv("benchmark", "lakefs-benchmarking") + logger.Info("Setup succeeded, running the tests") + + if code := m.Run(); code != 0 { + logger.Info("Tests run failed") + os.Exit(code) + } + + scrapePrometheus() +} + +var monitoredOps = map[string]bool{ + "getObject": true, + "uploadObject": true, +} + +func scrapePrometheus() { + lakefsEndpoint := viper.GetString("endpoint_url") + resp, err := http.DefaultClient.Get(lakefsEndpoint + "/metrics") + if err != nil { + panic(err) + } + + ch := make(chan *dto.MetricFamily) + go func() { _ = prom2json.ParseResponse(resp, ch) }() + metrics := []*dto.Metric{} + + for { + a, ok := <-ch + if !ok { + break + } + + if *a.Name == "api_request_duration_seconds" { + for _, m := range a.Metric { + for _, label := range m.Label { + if *label.Name == "operation" && monitoredOps[*label.Value] { + metrics = append(metrics, m) + } + } + } + } + } + + for _, m := range metrics { + fmt.Printf("%v\n", *m) + } +} + +const ( + contentSuffixLength = 32 + //contentLength = 128 * 1024 + contentLength = 1 * 1024 +) + +func TestBenchmarkLakeFS(t *testing.T) { + ctx, cancel := context.WithTimeout(context.Background(), viper.GetDuration("global_timeout")) + defer cancel() + + ns := viper.GetString("storage_namespace") + repoName := strings.ToLower(t.Name()) + logger.WithFields(logging.Fields{ + "repository": repoName, + "storage_namespace": ns, + "name": repoName, + }).Debug("Create repository for test") + _, err := client.Repositories.CreateRepository(repositories.NewCreateRepositoryParamsWithContext(ctx). + WithRepository(&models.RepositoryCreation{ + DefaultBranch: "master", + ID: swag.String(repoName), + StorageNamespace: swag.String(ns), + }), nil) + require.NoErrorf(t, err, "failed to create repository '%s', storage '%s'", t.Name(), ns) + + parallelism := viper.GetInt("parallelism_level") + filesAmount := viper.GetInt("files_amount") + + contentPrefix := randstr.Hex(contentLength - contentSuffixLength) + failed := doInParallel(ctx, repoName, parallelism, filesAmount, contentPrefix, uploader) + logger.WithField("failedCount", failed).Info("Finished uploading files") + + failed = doInParallel(ctx, repoName, parallelism, filesAmount, "", reader) + logger.WithField("failedCount", failed).Info("Finished reading files") + +} + +func doInParallel(ctx context.Context, repoName string, level, filesAmount int, contentPrefix string, do func(context.Context, chan string, string, string) int) int { + filesCh := make(chan string, level) + wg := sync.WaitGroup{} + var failed int64 + + for i := 0; i < level; i++ { + go func() { + wg.Add(1) + fail := do(ctx, filesCh, repoName, contentPrefix) + atomic.AddInt64(&failed, int64(fail)) + wg.Done() + }() + } + + for i := 1; i <= filesAmount; i++ { + filesCh <- strconv.Itoa(i) + } + + close(filesCh) + wg.Wait() + + return int(failed) +} + +func uploader(ctx context.Context, ch chan string, repoName, contentPrefix string) int { + failed := 0 + for { + select { + case <-ctx.Done(): + return failed + case file, ok := <-ch: + if !ok { + // channel closed + return failed + } + + // Making sure content isn't duplicated to avoid dedup mechanisms in lakeFS + content := contentPrefix + randstr.Hex(contentSuffixLength) + contentReader := runtime.NamedReader("content", strings.NewReader(content)) + + if err := linearRetry(func() error { + _, err := client.Objects.UploadObject( + objects.NewUploadObjectParamsWithContext(ctx). + WithRepository(repoName). + WithBranch("master"). + WithPath(file). + WithContent(contentReader), nil) + return err + }); err != nil { + failed++ + logger.WithField("fileNum", file).Error("Failed uploading file") + } + } + } +} + +func reader(ctx context.Context, ch chan string, repoName, _ string) int { + failed := 0 + for { + select { + case <-ctx.Done(): + return failed + case file, ok := <-ch: + if !ok { + // channel closed + return failed + } + + if err := linearRetry(func() error { + var b bytes.Buffer + _, err := client.Objects.GetObject( + objects.NewGetObjectParamsWithContext(ctx). + WithRepository(repoName). + WithRef("master"). + WithPath(file), nil, &b) + return err + }); err != nil { + failed++ + logger.WithField("fileNum", file).Error("Failed reading file") + } + } + } +} + +const ( + tries = 3 + retryTimeout = 200 * time.Millisecond +) + +func linearRetry(do func() error) error { + var err error + for i := 1; i <= tries; i++ { + if err = do(); err == nil { + return nil + } + + if i != tries { + // skip sleep in the last iteration + time.Sleep(retryTimeout) + } + } + return err +} diff --git a/go.mod b/go.mod index 4b41e7fc04d..bf44db9b6ef 100644 --- a/go.mod +++ b/go.mod @@ -55,7 +55,9 @@ require ( github.com/opencontainers/go-digest v1.0.0 // indirect github.com/ory/dockertest/v3 v3.6.0 github.com/prometheus/client_golang v1.7.1 + github.com/prometheus/client_model v0.2.0 github.com/prometheus/common v0.11.1 // indirect + github.com/prometheus/prom2json v1.3.0 github.com/rakyll/statik v0.1.7 github.com/rs/xid v1.2.1 github.com/schollz/progressbar/v3 v3.3.4 diff --git a/go.sum b/go.sum index 63c90368014..5876301c008 100644 --- a/go.sum +++ b/go.sum @@ -74,9 +74,11 @@ github.com/afex/hystrix-go v0.0.0-20180502004556-fa1af6a1f4f5/go.mod h1:SkGFH1ia github.com/ajstarks/svgo v0.0.0-20180226025133-644b8db467af/go.mod h1:K08gAheRH3/J6wwsYMMT4xOr94bZjxIelGM0+d/wbFw= github.com/alecthomas/jsonschema v0.0.0-20180308105923-f2c93856175a/go.mod h1:qpebaTNSsyUn5rPSJMsfqEtDw71TTggXM6stUDI16HA= github.com/alecthomas/template v0.0.0-20160405071501-a0175ee3bccc/go.mod h1:LOuyumcjzFXgccqObfd/Ljyb9UuFJ6TxHnclSeseNhc= +github.com/alecthomas/template v0.0.0-20190718012654-fb15b899a751 h1:JYp7IbQjafoB+tBA3gMyHYHrpOtNuDiK/uB5uXxq5wM= github.com/alecthomas/template v0.0.0-20190718012654-fb15b899a751/go.mod h1:LOuyumcjzFXgccqObfd/Ljyb9UuFJ6TxHnclSeseNhc= github.com/alecthomas/units v0.0.0-20151022065526-2efee857e7cf/go.mod h1:ybxpYRFXyAe+OPACYpWeL0wqObRcbAqCMya13uyzqw0= github.com/alecthomas/units v0.0.0-20190717042225-c3de453c63f4/go.mod h1:ybxpYRFXyAe+OPACYpWeL0wqObRcbAqCMya13uyzqw0= +github.com/alecthomas/units v0.0.0-20190924025748-f65c72e2690d h1:UQZhZ2O0vMHr2cI+DC1Mbh0TJxzA3RcLoMsFw+aXw7E= github.com/alecthomas/units v0.0.0-20190924025748-f65c72e2690d/go.mod h1:rBZYJk541a8SKzHPHnH3zbiI+7dagKZ0cgpgrD7Fyho= github.com/andybalholm/cascadia v1.1.0/go.mod h1:GsXiBklL0woXo1j/WYWtSYYC4ouU9PqHO0sqidkEA4Y= github.com/apache/arrow/go/arrow v0.0.0-20200601151325-b2287a20f230/go.mod h1:QNYViu/X0HXDHw7m3KXzWSVXIbfUvJqBFe6Gj8/pYA0= @@ -923,6 +925,8 @@ github.com/prometheus/procfs v0.0.2/go.mod h1:TjEm7ze935MbeOT/UhFTIMYKhuLP4wbCsT github.com/prometheus/procfs v0.0.8/go.mod h1:7Qr8sr6344vo1JqZ6HhLceV9o3AJ1Ff+GxbHq6oeK9A= github.com/prometheus/procfs v0.1.3 h1:F0+tqvhOksq22sc6iCHF5WGlWjdwj92p0udFh1VFBS8= github.com/prometheus/procfs v0.1.3/go.mod h1:lV6e/gmhEcM9IjHGsFOCxxuZ+z1YqCvr4OA4YeYWdaU= +github.com/prometheus/prom2json v1.3.0 h1:BlqrtbT9lLH3ZsOVhXPsHzFrApCTKRifB7gjJuypu6Y= +github.com/prometheus/prom2json v1.3.0/go.mod h1:rMN7m0ApCowcoDlypBHlkNbp5eJQf/+1isKykIP5ZnM= github.com/prometheus/tsdb v0.7.1/go.mod h1:qhTCs0VvXwvX/y3TZrWD7rabWM+ijKTux40TwIPHuXU= github.com/quasilyte/go-consistent v0.0.0-20190521200055-c6f3937de18c/go.mod h1:5STLWrekHfjyYwxBRVRXNOSewLJ3PWfDJd1VyTS21fI= github.com/quasilyte/go-ruleguard v0.1.2-0.20200318202121-b00d7a75d3d8 h1:DvnesvLtRPQOvaUbfXfh0tpMHg29by0H7F2U+QIkSu8= @@ -1536,6 +1540,7 @@ google.golang.org/protobuf v1.24.0/go.mod h1:r/3tXBNzIEhYS9I1OUVjXDlt8tc493IdKGj google.golang.org/protobuf v1.25.0 h1:Ejskq+SyPohKW+1uil0JJMtmHCgJPJ/qWTxr8qp+R4c= google.golang.org/protobuf v1.25.0/go.mod h1:9JNX74DMeImyA3h4bdi1ymwjUzf21/xIlbajtzgsN7c= gopkg.in/airbrake/gobrake.v2 v2.0.9/go.mod h1:/h5ZAUhDkGaJfjzjKLSjv6zCL6O0LLBxU4K+aSYdM/U= +gopkg.in/alecthomas/kingpin.v2 v2.2.6 h1:jMFz6MfLP0/4fUyZle81rXUoxOBFi19VUFKVDOQfozc= gopkg.in/alecthomas/kingpin.v2 v2.2.6/go.mod h1:FMv+mEhP44yOT+4EoQTLFTRgOQ1FBLkstjWtayDeSgw= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= diff --git a/nessie/main_test.go b/nessie/main_test.go index 409a274d3ff..f2878c8ee1c 100644 --- a/nessie/main_test.go +++ b/nessie/main_test.go @@ -1,27 +1,13 @@ package nessie import ( - "context" - "errors" "flag" - "net/url" - "os" - "testing" - "time" - - "github.com/aws/aws-sdk-go/aws" - "github.com/aws/aws-sdk-go/aws/credentials" - "github.com/aws/aws-sdk-go/aws/session" "github.com/aws/aws-sdk-go/service/s3" - httptransport "github.com/go-openapi/runtime/client" - "github.com/go-openapi/strfmt" - "github.com/go-openapi/swag" - "github.com/rs/xid" - "github.com/spf13/viper" genclient "github.com/treeverse/lakefs/api/gen/client" - "github.com/treeverse/lakefs/api/gen/client/setup" - "github.com/treeverse/lakefs/api/gen/models" "github.com/treeverse/lakefs/logging" + "github.com/treeverse/lakefs/testutil" + "os" + "testing" ) var ( @@ -36,99 +22,10 @@ func TestMain(m *testing.M) { if !*systemTests { os.Exit(0) } - logger = logging.Default() - - viper.SetDefault("setup_lakefs", true) - viper.SetDefault("setup_lakefs_timeout", 5*time.Minute) - viper.SetDefault("endpoint_url", "http://localhost:8000") - viper.SetDefault("s3_endpoint", "s3.local.lakefs.io:8000") - viper.SetDefault("access_key_id", "") - viper.SetDefault("secret_access_key", "") - viper.SetDefault("storage_namespace", "s3://nessie-system-testing/"+xid.New().String()) - - viper.AddConfigPath(".") - viper.SetEnvPrefix("NESSIE") - viper.SetConfigName("nessie") - viper.AutomaticEnv() - - err := viper.ReadInConfig() - if err != nil && !errors.As(err, &viper.ConfigFileNotFoundError{}) { - logger.WithError(err).Fatal("Failed to read configuration") - } - - ctx := context.Background() - - // initialize the env/repo - logger = logging.Default() - logger.WithField("settings", viper.AllSettings()).Info("Starting nessie") - - endpointURL := viper.GetString("endpoint_url") - u, err := url.Parse(endpointURL) - if err != nil { - logger.WithError(err).Fatal("Failed to parse endpoint URL", endpointURL) - } - - apiBasePath := genclient.DefaultBasePath - if u.Path != "" { - apiBasePath = u.Path - } - r := httptransport.New(u.Host, apiBasePath, []string{u.Scheme}) - client = genclient.New(r, strfmt.Default) - if err := waitUntilLakeFSRunning(ctx, client); err != nil { - logger.WithError(err).Fatal("Waiting for lakeFS") - } - - setupLakeFS := viper.GetBool("setup_lakefs") - if setupLakeFS { - // first setup of lakeFS - const adminUserName = "nessie" - res, err := client.Setup.SetupLakeFS(&setup.SetupLakeFSParams{ - User: &models.Setup{ - Username: swag.String(adminUserName), - }, - Context: ctx, - }) - if err != nil { - logger.WithError(err).Fatal("Failed to setup lakeFS") - } - logger.Info("Cluster setup successfully") - viper.Set("access_key_id", res.Payload.AccessKeyID) - viper.Set("secret_access_key", res.Payload.AccessSecretKey) - } - r.DefaultAuthentication = httptransport.BasicAuth(viper.GetString("access_key_id"), viper.GetString("secret_access_key")) - - s3Endpoint := viper.GetString("s3_endpoint") - awsSession := session.Must(session.NewSession()) - svc = s3.New(awsSession, - aws.NewConfig(). - WithRegion("us-east-1"). - WithEndpoint(s3Endpoint). - WithDisableSSL(true). - WithCredentials(credentials.NewCredentials( - &credentials.StaticProvider{ - Value: credentials.Value{ - AccessKeyID: viper.GetString("access_key_id"), - SecretAccessKey: viper.GetString("secret_access_key"), - }}))) + logger, client, svc = testutil.SetupTestingEnv("nessie", "nessie-system-testing") logger.Info("Setup succeeded, running the tests") + os.Exit(m.Run()) } -func waitUntilLakeFSRunning(ctx context.Context, cl *genclient.Lakefs) error { - setupCtx, cancel := context.WithTimeout(ctx, viper.GetDuration("setup_lakefs_timeout")) - defer cancel() - for { - _, err := cl.HealthCheck.HealthCheck(nil) - if err == nil { - return nil - } - logger.WithError(err).Info("Setup failed") - - select { - case <-setupCtx.Done(): - return setupCtx.Err() - case <-time.After(5 * time.Second): - } - } -} diff --git a/testutil/setup.go b/testutil/setup.go new file mode 100644 index 00000000000..59e8a986f33 --- /dev/null +++ b/testutil/setup.go @@ -0,0 +1,120 @@ +package testutil + +import ( + "context" + "errors" + "fmt" + "github.com/aws/aws-sdk-go/aws" + "github.com/aws/aws-sdk-go/aws/credentials" + "github.com/aws/aws-sdk-go/aws/session" + "github.com/aws/aws-sdk-go/service/s3" + httptransport "github.com/go-openapi/runtime/client" + "github.com/go-openapi/strfmt" + "github.com/go-openapi/swag" + "github.com/rs/xid" + "github.com/spf13/viper" + genclient "github.com/treeverse/lakefs/api/gen/client" + "github.com/treeverse/lakefs/api/gen/client/setup" + "github.com/treeverse/lakefs/api/gen/models" + "github.com/treeverse/lakefs/logging" + "net/url" + "strings" + "time" +) + +func SetupTestingEnv(name, storageNS string) (logging.Logger, *genclient.Lakefs, *s3.S3) { + logger := logging.Default() + + viper.SetDefault("setup_lakefs", true) + viper.SetDefault("setup_lakefs_timeout", 5*time.Minute) + viper.SetDefault("endpoint_url", "http://internal-benchmark-load-balancer-1759434086.us-east-1.elb.amazonaws.com:8000") + viper.SetDefault("s3_endpoint", "s3.local.lakefs.io:8000") + viper.SetDefault("access_key_id", "") + viper.SetDefault("secret_access_key", "") + viper.SetDefault("storage_namespace", fmt.Sprintf("s3://%s/%s", storageNS, xid.New().String())) + + viper.AddConfigPath(".") + viper.SetEnvPrefix(strings.ToUpper(name)) + viper.SetConfigName(strings.ToLower(name)) + viper.AutomaticEnv() + + err := viper.ReadInConfig() + if err != nil && !errors.As(err, &viper.ConfigFileNotFoundError{}) { + logger.WithError(err).Fatal("Failed to read configuration") + } + + ctx := context.Background() + + // initialize the env/repo + logger = logging.Default() + logger.WithField("settings", viper.AllSettings()).Info(fmt.Sprintf("Starting %s", name)) + + endpointURL := viper.GetString("endpoint_url") + u, err := url.Parse(endpointURL) + if err != nil { + logger.WithError(err).Fatal("Failed to parse endpoint URL", endpointURL) + } + + apiBasePath := genclient.DefaultBasePath + if u.Path != "" { + apiBasePath = u.Path + } + r := httptransport.New(u.Host, apiBasePath, []string{u.Scheme}) + client := genclient.New(r, strfmt.Default) + if err := waitUntilLakeFSRunning(ctx, logger, client); err != nil { + logger.WithError(err).Fatal("Waiting for lakeFS") + } + + setupLakeFS := viper.GetBool("setup_lakefs") + if setupLakeFS { + // first setup of lakeFS + adminUserName := name + res, err := client.Setup.SetupLakeFS(&setup.SetupLakeFSParams{ + User: &models.Setup{ + Username: swag.String(adminUserName), + }, + Context: ctx, + }) + if err != nil { + logger.WithError(err).Fatal("Failed to setup lakeFS") + } + logger.Info("Cluster setup successfully") + viper.Set("access_key_id", res.Payload.AccessKeyID) + viper.Set("secret_access_key", res.Payload.AccessSecretKey) + } + r.DefaultAuthentication = httptransport.BasicAuth(viper.GetString("access_key_id"), viper.GetString("secret_access_key")) + + s3Endpoint := viper.GetString("s3_endpoint") + awsSession := session.Must(session.NewSession()) + svc := s3.New(awsSession, + aws.NewConfig(). + WithRegion("us-east-1"). + WithEndpoint(s3Endpoint). + WithDisableSSL(true). + WithCredentials(credentials.NewCredentials( + &credentials.StaticProvider{ + Value: credentials.Value{ + AccessKeyID: viper.GetString("access_key_id"), + SecretAccessKey: viper.GetString("secret_access_key"), + }}))) + + return logger, client, svc +} + +func waitUntilLakeFSRunning(ctx context.Context, logger logging.Logger, cl *genclient.Lakefs) error { + setupCtx, cancel := context.WithTimeout(ctx, viper.GetDuration("setup_lakefs_timeout")) + defer cancel() + for { + _, err := cl.HealthCheck.HealthCheck(nil) + if err == nil { + return nil + } + logger.WithError(err).Info("Setup failed") + + select { + case <-setupCtx.Done(): + return setupCtx.Err() + case <-time.After(5 * time.Second): + } + } +}