Skip to content

Commit

Permalink
Enable fake GCS server in test and extend test coverage for (external…
Browse files Browse the repository at this point in the history
…) table creation
  • Loading branch information
gruuya committed Nov 24, 2023
1 parent cc6e16e commit 5fb7156
Show file tree
Hide file tree
Showing 3 changed files with 46 additions and 38 deletions.
15 changes: 4 additions & 11 deletions docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -23,20 +23,13 @@ services:
test-minio/seafowl-test-bucket/table_with_ns_column.parquet; exit 0; "
fake-gcs:
image: fsouza/fake-gcs-server
image: tustvold/fake-gcs-server
command:
[
"-scheme",
"http",
"-port",
"4443",
"-external-url",
"http://[::]:4443",
"-backend",
"memory",
]
["-scheme", "http", "-backend", "memory", "-data", "test/", "-public-host", "localhost:4443"]
ports:
- "4443:4443"
volumes:
- ./tests/data:/test/test-data

create-fake-gcs-buckets:
image: curlimages/curl:8.00.1
Expand Down
61 changes: 37 additions & 24 deletions tests/statements/ddl.rs
Original file line number Diff line number Diff line change
@@ -1,8 +1,12 @@
use crate::statements::*;

#[rstest]
#[tokio::test]
async fn test_create_table() {
let (context, _) = make_context_with_pg(ObjectStoreType::InMemory).await;
async fn test_create_table(
#[values(ObjectStoreType::InMemory, ObjectStoreType::Gcs)]
object_store_type: ObjectStoreType,
) {
let (context, _) = make_context_with_pg(object_store_type).await;

context
.plan_query(
Expand Down Expand Up @@ -490,42 +494,51 @@ async fn test_create_table_in_staging_schema() {
assert_eq!(err.to_string(), expected_err,);
}

// Test creating external table in the native object store type (`in`)
// that is stored in the external object store (`from`).
#[rstest]
#[case::minio_http(
#[case::in_mem_from_mock_http(None, "", ObjectStoreType::InMemory)]
#[case::in_mem_from_minio_http(
Some("http://localhost:9000/seafowl-test-bucket/table_with_ns_column.parquet"),
"",
ObjectStoreType::InMemory
)]
// Tests the case of inheriting the credentials from the underlying object store, since none are
// provided via the `OPTIONS` clause, so we must use `ObjectStoreType::S3`.
#[case::minio_s3(
// Tests the case of inheriting the credentials from the native object store, since none are
// provided via the `OPTIONS` clause.
#[case::in_minio_s3_from_minio_s3(
Some("s3://seafowl-test-bucket/table_with_ns_column.parquet"),
"",
ObjectStoreType::S3(None)
)]
#[case::in_gcs_from_gcs(
Some("gs://test-data/table_with_ns_column.parquet"),
"",
ObjectStoreType::Gcs
)]
// Tests the case of explicitly specifying the `OPTIONS` clause to construct a dynamic object store.
// so we can use anything other than `ObjectStoreType::S3`.
#[case::minio_s3_with_options(Some(
"s3://seafowl-test-bucket/table_with_ns_column.parquet"
), " OPTIONS ('access_key_id' 'minioadmin', 'secret_access_key' 'minioadmin', 'endpoint' 'http://127.0.0.1:9000') ", ObjectStoreType::InMemory)]
#[case::mock_http_server(None, "", ObjectStoreType::InMemory)]
// so we can use anything other than the native object store.
#[case::in_mem_from_minio_s3_with_options(
Some("s3://seafowl-test-bucket/table_with_ns_column.parquet"),
" OPTIONS ('access_key_id' 'minioadmin', 'secret_access_key' 'minioadmin', 'endpoint' 'http://127.0.0.1:9000') ",
ObjectStoreType::InMemory,
)]
#[case::in_gcs_from_minio_s3_with_options(
Some("s3://seafowl-test-bucket/table_with_ns_column.parquet"),
" OPTIONS ('access_key_id' 'minioadmin', 'secret_access_key' 'minioadmin', 'endpoint' 'http://127.0.0.1:9000') ",
ObjectStoreType::Gcs,
)]
#[case::in_minio_s3_from_gcs_with_options(
Some("gs://test-data/table_with_ns_column.parquet"),
&format!(" OPTIONS ('google_application_credentials' '{FAKE_GCS_CREDS_PATH}') "),
ObjectStoreType::S3(None),
)]
#[tokio::test]
async fn test_create_external_table(
#[case] minio_url: Option<&str>,
#[case] location: Option<&str>,
#[case] options: &str,
#[case] object_store_type: ObjectStoreType,
) {
/*
Test CREATE EXTERNAL TABLE works with an HTTP mock server and MinIO.
This also works with https + actual S3 (tested manually)
SELECT * FROM datafusion.public.supply_chains LIMIT 1 results in:
bytes_scanned{filename=seafowl-public.s3.eu-west-1.amazonaws.com/tutorial/trase-supply-chains.parquet}=232699
*/

let url = match minio_url {
let url = match location {
None => {
let (mock_server, _) = testutils::make_mock_parquet_server(true, true).await;
// Add a query string that's ignored by the mock (make sure DataFusion doesn't eat the whole URL)
Expand Down Expand Up @@ -590,7 +603,7 @@ async fn test_create_external_table(
.await
.unwrap();
let results = context.collect(plan).await.unwrap();
let expected = if minio_url.is_none() {
let expected = if location.is_none() {
vec![
"+-------+",
"| col_1 |",
Expand Down
8 changes: 5 additions & 3 deletions tests/statements/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -40,8 +40,10 @@ mod testutils;
mod time_travel;
mod vacuum;

const FAKE_GCS_CREDS_PATH: &str = "/tmp/fake-gcs-server.json";

enum ObjectStoreType {
_Gcs, // TODO: GCS Bucket tests await github.com/fsouza/fake-gcs-server/issues/852
Gcs, // TODO: GCS bucket tests with multipart uploads await github.com/fsouza/fake-gcs-server/issues/852
Local,
InMemory,
// S3 object store with an optional path to the actual data folder
Expand Down Expand Up @@ -84,11 +86,11 @@ ttl = 30
),
None,
),
ObjectStoreType::_Gcs => {
ObjectStoreType::Gcs => {
let creds_json = json!({"gcs_base_url": "http://localhost:4443", "disable_oauth": true, "client_email": "", "private_key": ""});
// gcs_base_url should match docker-compose.yml:fake-gcs-server
let google_application_credentials_path =
std::path::Path::new("/tmp/fake-gcs-server.json");
std::path::Path::new(FAKE_GCS_CREDS_PATH);
std::fs::write(
google_application_credentials_path,
serde_json::to_vec(&creds_json).expect("Unable to serialize creds JSON"),
Expand Down

0 comments on commit 5fb7156

Please sign in to comment.