diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/LoadStmt.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/LoadStmt.java index 183d3c110949cf..29d8a1ddf8f7ab 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/analysis/LoadStmt.java +++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/LoadStmt.java @@ -587,7 +587,11 @@ public RedirectStatus getRedirectStatus() { private void checkEndpoint(String endpoint) throws UserException { HttpURLConnection connection = null; try { - String urlStr = "http://" + endpoint; + String urlStr = endpoint; + // Add default protocol if not specified + if (!endpoint.startsWith("http://") && !endpoint.startsWith("https://")) { + urlStr = "http://" + endpoint; + } SecurityChecker.getInstance().startSSRFChecking(urlStr); URL url = new URL(urlStr); connection = (HttpURLConnection) url.openConnection(); @@ -599,7 +603,13 @@ private void checkEndpoint(String endpoint) throws UserException { if (e instanceof UserException) { msg = ((UserException) e).getDetailMessage(); } else { - msg = e.getMessage(); + msg = String.format("%s: %s", e.getClass().getSimpleName(), + e.getMessage() != null ? e.getMessage() : "Unknown error"); + if (e.getCause() != null) { + msg += String.format(" (Caused by: %s: %s)", + e.getCause().getClass().getSimpleName(), + e.getCause().getMessage() != null ? e.getCause().getMessage() : "Unknown cause"); + } } throw new UserException(InternalErrorCode.GET_REMOTE_DATA_ERROR, "Failed to access object storage, message=" + msg, e); @@ -638,6 +648,8 @@ public void checkS3Param() throws UserException { } public void checkWhiteList(String endpoint) throws UserException { + endpoint = endpoint.replaceFirst("^http://", ""); + endpoint = endpoint.replaceFirst("^https://", ""); List whiteList = new ArrayList<>(Arrays.asList(Config.s3_load_endpoint_white_list)); whiteList.removeIf(String::isEmpty); if (!whiteList.isEmpty() && !whiteList.contains(endpoint)) { diff --git a/regression-test/suites/load_p0/broker_load/test_domain_connection_and_ak_sk_correction.groovy b/regression-test/suites/load_p0/broker_load/test_domain_connection_and_ak_sk_correction.groovy index 927c267718ba94..3403d4f3fd3df1 100644 --- a/regression-test/suites/load_p0/broker_load/test_domain_connection_and_ak_sk_correction.groovy +++ b/regression-test/suites/load_p0/broker_load/test_domain_connection_and_ak_sk_correction.groovy @@ -79,6 +79,49 @@ suite("test_domain_connection_and_ak_sk_correction", "load_p0") { """ logger.info("the first sql result is {}", result) + def endpoint = getS3Endpoint().replace("http://", "").replace("https://", "") + def httpEndpoint = "http://" + endpoint + label = UUID.randomUUID().toString().replace("-", "") + result = sql """ + LOAD LABEL ${label} + ( + DATA INFILE("s3://${getS3BucketName()}/regression/tpch/sf1/part.tbl") + INTO TABLE ${tableName} + COLUMNS TERMINATED BY "|" + (p_partkey, p_name, p_mfgr, p_brand, p_type, p_size, p_container, p_retailprice, p_comment, temp) + ) + WITH S3 + ( + "AWS_ENDPOINT" = "${httpEndpoint}", + "AWS_ACCESS_KEY" = "${getS3AK()}", + "AWS_SECRET_KEY" = "${getS3SK()}", + "AWS_REGION" = "${getS3Region()}", + "PROVIDER" = "${getS3Provider()}" + ); + """ + logger.info("the first sql result is {}", result) + + def httpsEndpoint = "https://" + endpoint + label = UUID.randomUUID().toString().replace("-", "") + result = sql """ + LOAD LABEL ${label} + ( + DATA INFILE("s3://${getS3BucketName()}/regression/tpch/sf1/part.tbl") + INTO TABLE ${tableName} + COLUMNS TERMINATED BY "|" + (p_partkey, p_name, p_mfgr, p_brand, p_type, p_size, p_container, p_retailprice, p_comment, temp) + ) + WITH S3 + ( + "AWS_ENDPOINT" = "${httpsEndpoint}", + "AWS_ACCESS_KEY" = "${getS3AK()}", + "AWS_SECRET_KEY" = "${getS3SK()}", + "AWS_REGION" = "${getS3Region()}", + "PROVIDER" = "${getS3Provider()}" + ); + """ + logger.info("the first sql result is {}", result) + label = UUID.randomUUID().toString().replace("-", "") try { result = sql """