diff --git a/fe/fe-core/src/main/java/org/apache/doris/common/util/Util.java b/fe/fe-core/src/main/java/org/apache/doris/common/util/Util.java index 8e808e3c097402..87d8e7332b6cdc 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/common/util/Util.java +++ b/fe/fe-core/src/main/java/org/apache/doris/common/util/Util.java @@ -614,7 +614,11 @@ public static TFileCompressType getFileCompressType(String compressType) { return TFileCompressType.UNKNOWN; } final String upperCaseType = compressType.toUpperCase(); - return TFileCompressType.valueOf(upperCaseType); + try { + return TFileCompressType.valueOf(upperCaseType); + } catch (IllegalArgumentException e) { + return TFileCompressType.UNKNOWN; + } } /** diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/property/fileformat/AvroFileFormatProperties.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/property/fileformat/AvroFileFormatProperties.java new file mode 100644 index 00000000000000..f5a5c34a3f4b6f --- /dev/null +++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/property/fileformat/AvroFileFormatProperties.java @@ -0,0 +1,50 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris.datasource.property.fileformat; + +import org.apache.doris.nereids.exceptions.AnalysisException; +import org.apache.doris.thrift.TFileAttributes; +import org.apache.doris.thrift.TFileFormatType; +import org.apache.doris.thrift.TFileTextScanRangeParams; +import org.apache.doris.thrift.TResultFileSinkOptions; + +import java.util.Map; + +public class AvroFileFormatProperties extends FileFormatProperties { + public AvroFileFormatProperties() { + super(TFileFormatType.FORMAT_AVRO); + } + + @Override + public void analyzeFileFormatProperties(Map formatProperties, boolean isRemoveOriginProperty) + throws AnalysisException { + } + + @Override + public TResultFileSinkOptions toTResultFileSinkOptions() { + return null; + } + + @Override + public TFileAttributes toTFileAttributes() { + TFileAttributes fileAttributes = new TFileAttributes(); + TFileTextScanRangeParams fileTextScanRangeParams = new TFileTextScanRangeParams(); + fileAttributes.setTextParams(fileTextScanRangeParams); + return fileAttributes; + } +} diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/property/fileformat/CsvFileFormatProperties.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/property/fileformat/CsvFileFormatProperties.java new file mode 100644 index 00000000000000..74a9a58aec7a26 --- /dev/null +++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/property/fileformat/CsvFileFormatProperties.java @@ -0,0 +1,191 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris.datasource.property.fileformat; + +import org.apache.doris.analysis.Separator; +import org.apache.doris.catalog.Column; +import org.apache.doris.common.util.Util; +import org.apache.doris.nereids.exceptions.AnalysisException; +import org.apache.doris.qe.ConnectContext; +import org.apache.doris.thrift.TFileAttributes; +import org.apache.doris.thrift.TFileFormatType; +import org.apache.doris.thrift.TFileTextScanRangeParams; +import org.apache.doris.thrift.TResultFileSinkOptions; +import org.apache.doris.thrift.TTextSerdeType; + +import com.google.common.base.Strings; +import com.google.common.collect.Lists; +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; + +import java.util.List; +import java.util.Map; + +public class CsvFileFormatProperties extends FileFormatProperties { + public static final Logger LOG = LogManager.getLogger( + org.apache.doris.datasource.property.fileformat.CsvFileFormatProperties.class); + + public static final String DEFAULT_COLUMN_SEPARATOR = "\t"; + public static final String DEFAULT_HIVE_TEXT_COLUMN_SEPARATOR = "\001"; + public static final String DEFAULT_LINE_DELIMITER = "\n"; + + public static final String PROP_COLUMN_SEPARATOR = "column_separator"; + public static final String PROP_LINE_DELIMITER = "line_delimiter"; + + public static final String PROP_SKIP_LINES = "skip_lines"; + public static final String PROP_CSV_SCHEMA = "csv_schema"; + public static final String PROP_COMPRESS_TYPE = "compress_type"; + public static final String PROP_TRIM_DOUBLE_QUOTES = "trim_double_quotes"; + + public static final String PROP_ENCLOSE = "enclose"; + + private String headerType = ""; + private TTextSerdeType textSerdeType = TTextSerdeType.JSON_TEXT_SERDE; + private String columnSeparator = DEFAULT_COLUMN_SEPARATOR; + private String lineDelimiter = DEFAULT_LINE_DELIMITER; + private boolean trimDoubleQuotes; + private int skipLines; + private byte enclose; + + // used by tvf + // User specified csv columns, it will override columns got from file + private final List csvSchema = Lists.newArrayList(); + + String defaultColumnSeparator = DEFAULT_COLUMN_SEPARATOR; + + public CsvFileFormatProperties() { + super(TFileFormatType.FORMAT_CSV_PLAIN); + } + + public CsvFileFormatProperties(String defaultColumnSeparator, TTextSerdeType textSerdeType) { + super(TFileFormatType.FORMAT_CSV_PLAIN); + this.defaultColumnSeparator = defaultColumnSeparator; + this.textSerdeType = textSerdeType; + } + + public CsvFileFormatProperties(String headerType) { + super(TFileFormatType.FORMAT_CSV_PLAIN); + this.headerType = headerType; + } + + + @Override + public void analyzeFileFormatProperties(Map formatProperties, boolean isRemoveOriginProperty) + throws AnalysisException { + try { + // analyze properties specified by user + columnSeparator = getOrDefault(formatProperties, PROP_COLUMN_SEPARATOR, + defaultColumnSeparator, isRemoveOriginProperty); + if (Strings.isNullOrEmpty(columnSeparator)) { + throw new AnalysisException("column_separator can not be empty."); + } + columnSeparator = Separator.convertSeparator(columnSeparator); + + lineDelimiter = getOrDefault(formatProperties, PROP_LINE_DELIMITER, + DEFAULT_LINE_DELIMITER, isRemoveOriginProperty); + if (Strings.isNullOrEmpty(lineDelimiter)) { + throw new AnalysisException("line_delimiter can not be empty."); + } + lineDelimiter = Separator.convertSeparator(lineDelimiter); + + String enclosedString = getOrDefault(formatProperties, PROP_ENCLOSE, + "", isRemoveOriginProperty); + if (!Strings.isNullOrEmpty(enclosedString)) { + if (enclosedString.length() > 1) { + throw new AnalysisException("enclose should not be longer than one byte."); + } + enclose = (byte) enclosedString.charAt(0); + if (enclose == 0) { + throw new AnalysisException("enclose should not be byte [0]."); + } + } + + trimDoubleQuotes = Boolean.valueOf(getOrDefault(formatProperties, + PROP_TRIM_DOUBLE_QUOTES, "", isRemoveOriginProperty)) + .booleanValue(); + skipLines = Integer.valueOf(getOrDefault(formatProperties, + PROP_SKIP_LINES, "0", isRemoveOriginProperty)).intValue(); + if (skipLines < 0) { + throw new AnalysisException("skipLines should not be less than 0."); + } + + String compressTypeStr = getOrDefault(formatProperties, + PROP_COMPRESS_TYPE, "UNKNOWN", isRemoveOriginProperty); + compressionType = Util.getFileCompressType(compressTypeStr); + + } catch (org.apache.doris.common.AnalysisException e) { + throw new AnalysisException(e.getMessage()); + } + } + + @Override + public TResultFileSinkOptions toTResultFileSinkOptions() { + return null; + } + + // The method `analyzeFileFormatProperties` must have been called once before this method + @Override + public TFileAttributes toTFileAttributes() { + TFileAttributes fileAttributes = new TFileAttributes(); + TFileTextScanRangeParams fileTextScanRangeParams = new TFileTextScanRangeParams(); + fileTextScanRangeParams.setColumnSeparator(this.columnSeparator); + fileTextScanRangeParams.setLineDelimiter(this.lineDelimiter); + if (this.enclose != 0) { + fileTextScanRangeParams.setEnclose(this.enclose); + } + fileAttributes.setTextParams(fileTextScanRangeParams); + fileAttributes.setHeaderType(headerType); + fileAttributes.setTrimDoubleQuotes(trimDoubleQuotes); + fileAttributes.setSkipLines(skipLines); + fileAttributes.setEnableTextValidateUtf8( + ConnectContext.get().getSessionVariable().enableTextValidateUtf8); + return fileAttributes; + } + + public String getHeaderType() { + return headerType; + } + + public TTextSerdeType getTextSerdeType() { + return textSerdeType; + } + + public String getColumnSeparator() { + return columnSeparator; + } + + public String getLineDelimiter() { + return lineDelimiter; + } + + public boolean isTrimDoubleQuotes() { + return trimDoubleQuotes; + } + + public int getSkipLines() { + return skipLines; + } + + public byte getEnclose() { + return enclose; + } + + public List getCsvSchema() { + return csvSchema; + } +} diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/property/fileformat/FileFormatProperties.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/property/fileformat/FileFormatProperties.java new file mode 100644 index 00000000000000..ad51b06ed425e7 --- /dev/null +++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/property/fileformat/FileFormatProperties.java @@ -0,0 +1,124 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris.datasource.property.fileformat; + +import org.apache.doris.nereids.exceptions.AnalysisException; +import org.apache.doris.thrift.TFileAttributes; +import org.apache.doris.thrift.TFileCompressType; +import org.apache.doris.thrift.TFileFormatType; +import org.apache.doris.thrift.TResultFileSinkOptions; +import org.apache.doris.thrift.TTextSerdeType; + +import java.util.Map; + +public abstract class FileFormatProperties { + public static final String PROP_FORMAT = "format"; + public static final String FORMAT_PARQUET = "parquet"; + public static final String FORMAT_CSV = "csv"; + public static final String FORMAT_CSV_WITH_NAMES = "csv_with_names"; + public static final String FORMAT_CSV_WITH_NAMES_AND_TYPES = "csv_with_names_and_types"; + public static final String FORMAT_HIVE_TEXT = "hive_text"; + public static final String FORMAT_ORC = "orc"; + public static final String FORMAT_JSON = "json"; + public static final String FORMAT_AVRO = "avro"; + public static final String FORMAT_WAL = "wal"; + public static final String FORMAT_ARROW = "arrow"; + public static final String PROP_COMPRESS_TYPE = "compress_type"; + + protected TFileFormatType fileFormatType; + + protected TFileCompressType compressionType; + + public FileFormatProperties(TFileFormatType fileFormatType) { + this.fileFormatType = fileFormatType; + } + + /** + * Analyze user properties + * @param formatProperties properties specified by user + * @param isRemoveOriginProperty if this param is set to true, then this method would remove the origin property + * @throws AnalysisException + */ + public abstract void analyzeFileFormatProperties( + Map formatProperties, boolean isRemoveOriginProperty) + throws AnalysisException; + + /** + * generate TResultFileSinkOptions according to the properties of specified file format + * You must call method `analyzeFileFormatProperties` once before calling method `toTResultFileSinkOptions` + */ + public abstract TResultFileSinkOptions toTResultFileSinkOptions(); + + /** + * generate TFileAttributes according to the properties of specified file format + * You must call method `analyzeFileFormatProperties` once before calling method `toTFileAttributes` + */ + public abstract TFileAttributes toTFileAttributes(); + + public static FileFormatProperties createFileFormatProperties(String formatString) { + switch (formatString) { + case FORMAT_CSV: + return new CsvFileFormatProperties(); + case FORMAT_HIVE_TEXT: + return new CsvFileFormatProperties(CsvFileFormatProperties.DEFAULT_HIVE_TEXT_COLUMN_SEPARATOR, + TTextSerdeType.HIVE_TEXT_SERDE); + case FORMAT_CSV_WITH_NAMES: + return new CsvFileFormatProperties( + FORMAT_CSV_WITH_NAMES); + case FORMAT_CSV_WITH_NAMES_AND_TYPES: + return new CsvFileFormatProperties( + FORMAT_CSV_WITH_NAMES_AND_TYPES); + case FORMAT_PARQUET: + return new ParquetFileFormatProperties(); + case FORMAT_ORC: + return new OrcFileFormatProperties(); + case FORMAT_JSON: + return new JsonFileFormatProperties(); + case FORMAT_AVRO: + return new AvroFileFormatProperties(); + case FORMAT_WAL: + return new WalFileFormatProperties(); + default: + throw new AnalysisException("format:" + formatString + " is not supported."); + } + } + + public static FileFormatProperties createFileFormatProperties(Map formatProperties) + throws AnalysisException { + String formatString = formatProperties.getOrDefault(PROP_FORMAT, "") + .toLowerCase(); + return createFileFormatProperties(formatString); + } + + protected String getOrDefault(Map props, String key, String defaultValue, + boolean isRemove) { + String value = props.getOrDefault(key, defaultValue); + if (isRemove) { + props.remove(key); + } + return value; + } + + public TFileFormatType getFileFormatType() { + return fileFormatType; + } + + public TFileCompressType getCompressionType() { + return compressionType; + } +} diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/property/fileformat/JsonFileFormatProperties.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/property/fileformat/JsonFileFormatProperties.java new file mode 100644 index 00000000000000..4ed03f455d5505 --- /dev/null +++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/property/fileformat/JsonFileFormatProperties.java @@ -0,0 +1,117 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris.datasource.property.fileformat; + +import org.apache.doris.common.util.Util; +import org.apache.doris.nereids.exceptions.AnalysisException; +import org.apache.doris.thrift.TFileAttributes; +import org.apache.doris.thrift.TFileFormatType; +import org.apache.doris.thrift.TFileTextScanRangeParams; +import org.apache.doris.thrift.TResultFileSinkOptions; + +import java.util.Map; + +public class JsonFileFormatProperties extends FileFormatProperties { + public static final String PROP_JSON_ROOT = "json_root"; + public static final String PROP_JSON_PATHS = "jsonpaths"; + public static final String PROP_STRIP_OUTER_ARRAY = "strip_outer_array"; + public static final String PROP_READ_JSON_BY_LINE = "read_json_by_line"; + public static final String PROP_NUM_AS_STRING = "num_as_string"; + public static final String PROP_FUZZY_PARSE = "fuzzy_parse"; + + // from ExternalFileTableValuedFunction: + private String jsonRoot = ""; + private String jsonPaths = ""; + private boolean stripOuterArray; + private boolean readJsonByLine; + private boolean numAsString; + private boolean fuzzyParse; + + + public JsonFileFormatProperties() { + super(TFileFormatType.FORMAT_JSON); + } + + @Override + public void analyzeFileFormatProperties(Map formatProperties, boolean isRemoveOriginProperty) + throws AnalysisException { + jsonRoot = getOrDefault(formatProperties, PROP_JSON_ROOT, + "", isRemoveOriginProperty); + jsonPaths = getOrDefault(formatProperties, PROP_JSON_PATHS, + "", isRemoveOriginProperty); + readJsonByLine = Boolean.valueOf( + getOrDefault(formatProperties, PROP_READ_JSON_BY_LINE, + "", isRemoveOriginProperty)).booleanValue(); + stripOuterArray = Boolean.valueOf( + getOrDefault(formatProperties, PROP_STRIP_OUTER_ARRAY, + "", isRemoveOriginProperty)).booleanValue(); + numAsString = Boolean.valueOf( + getOrDefault(formatProperties, PROP_NUM_AS_STRING, + "", isRemoveOriginProperty)).booleanValue(); + fuzzyParse = Boolean.valueOf( + getOrDefault(formatProperties, PROP_FUZZY_PARSE, + "", isRemoveOriginProperty)).booleanValue(); + + String compressTypeStr = getOrDefault(formatProperties, PROP_COMPRESS_TYPE, + "UNKNOWN", isRemoveOriginProperty); + compressionType = Util.getFileCompressType(compressTypeStr); + } + + @Override + public TResultFileSinkOptions toTResultFileSinkOptions() { + return null; + } + + @Override + public TFileAttributes toTFileAttributes() { + TFileAttributes fileAttributes = new TFileAttributes(); + TFileTextScanRangeParams fileTextScanRangeParams = new TFileTextScanRangeParams(); + fileAttributes.setTextParams(fileTextScanRangeParams); + fileAttributes.setJsonRoot(jsonRoot); + fileAttributes.setJsonpaths(jsonPaths); + fileAttributes.setReadJsonByLine(readJsonByLine); + fileAttributes.setStripOuterArray(stripOuterArray); + fileAttributes.setNumAsString(numAsString); + fileAttributes.setFuzzyParse(fuzzyParse); + return fileAttributes; + } + + public String getJsonRoot() { + return jsonRoot; + } + + public String getJsonPaths() { + return jsonPaths; + } + + public boolean isStripOuterArray() { + return stripOuterArray; + } + + public boolean isReadJsonByLine() { + return readJsonByLine; + } + + public boolean isNumAsString() { + return numAsString; + } + + public boolean isFuzzyParse() { + return fuzzyParse; + } +} diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/property/fileformat/OrcFileFormatProperties.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/property/fileformat/OrcFileFormatProperties.java new file mode 100644 index 00000000000000..68081c275e4aee --- /dev/null +++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/property/fileformat/OrcFileFormatProperties.java @@ -0,0 +1,57 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris.datasource.property.fileformat; + +import org.apache.doris.nereids.exceptions.AnalysisException; +import org.apache.doris.thrift.TFileAttributes; +import org.apache.doris.thrift.TFileCompressType; +import org.apache.doris.thrift.TFileFormatType; +import org.apache.doris.thrift.TFileTextScanRangeParams; +import org.apache.doris.thrift.TResultFileSinkOptions; + +import java.util.Map; + +public class OrcFileFormatProperties extends FileFormatProperties { + private TFileCompressType orcCompressionType = TFileCompressType.ZLIB; + + public OrcFileFormatProperties() { + super(TFileFormatType.FORMAT_ORC); + } + + @Override + public void analyzeFileFormatProperties(Map formatProperties, boolean isRemoveOriginProperty) + throws AnalysisException { + } + + @Override + public TResultFileSinkOptions toTResultFileSinkOptions() { + return null; + } + + @Override + public TFileAttributes toTFileAttributes() { + TFileAttributes fileAttributes = new TFileAttributes(); + TFileTextScanRangeParams fileTextScanRangeParams = new TFileTextScanRangeParams(); + fileAttributes.setTextParams(fileTextScanRangeParams); + return fileAttributes; + } + + public TFileCompressType getOrcCompressionType() { + return orcCompressionType; + } +} diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/property/fileformat/ParquetFileFormatProperties.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/property/fileformat/ParquetFileFormatProperties.java new file mode 100644 index 00000000000000..565df5ba22219e --- /dev/null +++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/property/fileformat/ParquetFileFormatProperties.java @@ -0,0 +1,67 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris.datasource.property.fileformat; + +import org.apache.doris.nereids.exceptions.AnalysisException; +import org.apache.doris.thrift.TFileAttributes; +import org.apache.doris.thrift.TFileFormatType; +import org.apache.doris.thrift.TFileTextScanRangeParams; +import org.apache.doris.thrift.TParquetCompressionType; +import org.apache.doris.thrift.TParquetVersion; +import org.apache.doris.thrift.TResultFileSinkOptions; + +import java.util.Map; + +public class ParquetFileFormatProperties extends FileFormatProperties { + public static final String PARQUET_DISABLE_DICTIONARY = "disable_dictionary"; + public static final TParquetVersion parquetVersion = TParquetVersion.PARQUET_1_0; + public static final String PARQUET_VERSION = "version"; + + private TParquetCompressionType parquetCompressionType = TParquetCompressionType.SNAPPY; + private boolean parquetDisableDictionary = false; + + public ParquetFileFormatProperties() { + super(TFileFormatType.FORMAT_PARQUET); + } + + @Override + public void analyzeFileFormatProperties(Map formatProperties, boolean isRemoveOriginProperty) + throws AnalysisException { + } + + @Override + public TResultFileSinkOptions toTResultFileSinkOptions() { + return null; + } + + @Override + public TFileAttributes toTFileAttributes() { + TFileAttributes fileAttributes = new TFileAttributes(); + TFileTextScanRangeParams fileTextScanRangeParams = new TFileTextScanRangeParams(); + fileAttributes.setTextParams(fileTextScanRangeParams); + return fileAttributes; + } + + public TParquetCompressionType getParquetCompressionType() { + return parquetCompressionType; + } + + public boolean isParquetDisableDictionary() { + return parquetDisableDictionary; + } +} diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/property/fileformat/WalFileFormatProperties.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/property/fileformat/WalFileFormatProperties.java new file mode 100644 index 00000000000000..ba37bcc8534ac4 --- /dev/null +++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/property/fileformat/WalFileFormatProperties.java @@ -0,0 +1,50 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris.datasource.property.fileformat; + +import org.apache.doris.nereids.exceptions.AnalysisException; +import org.apache.doris.thrift.TFileAttributes; +import org.apache.doris.thrift.TFileFormatType; +import org.apache.doris.thrift.TFileTextScanRangeParams; +import org.apache.doris.thrift.TResultFileSinkOptions; + +import java.util.Map; + +public class WalFileFormatProperties extends FileFormatProperties { + public WalFileFormatProperties() { + super(TFileFormatType.FORMAT_WAL); + } + + @Override + public TResultFileSinkOptions toTResultFileSinkOptions() { + return null; + } + + @Override + public TFileAttributes toTFileAttributes() { + TFileAttributes fileAttributes = new TFileAttributes(); + TFileTextScanRangeParams fileTextScanRangeParams = new TFileTextScanRangeParams(); + fileAttributes.setTextParams(fileTextScanRangeParams); + return fileAttributes; + } + + @Override + public void analyzeFileFormatProperties(Map formatProperties, boolean isRemoveOriginProperty) + throws AnalysisException { + } +} diff --git a/fe/fe-core/src/test/java/org/apache/doris/datasource/property/fileformat/AvroFileFormatPropertiesTest.java b/fe/fe-core/src/test/java/org/apache/doris/datasource/property/fileformat/AvroFileFormatPropertiesTest.java new file mode 100644 index 00000000000000..a7fc534e0de5cc --- /dev/null +++ b/fe/fe-core/src/test/java/org/apache/doris/datasource/property/fileformat/AvroFileFormatPropertiesTest.java @@ -0,0 +1,42 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris.datasource.property.fileformat; + +import org.junit.Before; +import org.junit.Test; + +import java.util.HashMap; +import java.util.Map; + + +public class AvroFileFormatPropertiesTest { + + private AvroFileFormatProperties avroFileFormatProperties; + + @Before + public void setUp() { + avroFileFormatProperties = new AvroFileFormatProperties(); + } + + @Test + public void testAnalyzeFileFormatProperties() { + Map properties = new HashMap<>(); + // Add properties if needed + avroFileFormatProperties.analyzeFileFormatProperties(properties, true); + } +} diff --git a/fe/fe-core/src/test/java/org/apache/doris/datasource/property/fileformat/CsvFileFormatPropertiesTest.java b/fe/fe-core/src/test/java/org/apache/doris/datasource/property/fileformat/CsvFileFormatPropertiesTest.java new file mode 100644 index 00000000000000..a496378b5e57ea --- /dev/null +++ b/fe/fe-core/src/test/java/org/apache/doris/datasource/property/fileformat/CsvFileFormatPropertiesTest.java @@ -0,0 +1,224 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris.datasource.property.fileformat; + +import org.apache.doris.nereids.exceptions.AnalysisException; +import org.apache.doris.thrift.TFileCompressType; + +import org.junit.Assert; +import org.junit.Before; +import org.junit.Test; + +import java.util.HashMap; +import java.util.Map; + +public class CsvFileFormatPropertiesTest { + + private CsvFileFormatProperties csvFileFormatProperties; + + @Before + public void setUp() { + csvFileFormatProperties = new CsvFileFormatProperties(); + } + + @Test + public void testAnalyzeFileFormatPropertiesValid() throws AnalysisException { + Map properties = new HashMap<>(); + properties.put(CsvFileFormatProperties.PROP_COLUMN_SEPARATOR, ","); + properties.put(CsvFileFormatProperties.PROP_LINE_DELIMITER, "\n"); + properties.put(CsvFileFormatProperties.PROP_SKIP_LINES, "1"); + + csvFileFormatProperties.analyzeFileFormatProperties(properties, true); + + Assert.assertEquals(",", csvFileFormatProperties.getColumnSeparator()); + Assert.assertEquals("\n", csvFileFormatProperties.getLineDelimiter()); + Assert.assertEquals(1, csvFileFormatProperties.getSkipLines()); + } + + @Test + public void testAnalyzeFileFormatPropertiesInvalidSeparator() { + Map properties = new HashMap<>(); + properties.put(CsvFileFormatProperties.PROP_COLUMN_SEPARATOR, ""); + + Assert.assertThrows(AnalysisException.class, () -> { + csvFileFormatProperties.analyzeFileFormatProperties(properties, true); + }); + } + + @Test + public void testAnalyzeFileFormatPropertiesInvalidLineDelimiter() { + Map properties = new HashMap<>(); + properties.put(CsvFileFormatProperties.PROP_LINE_DELIMITER, ""); + + Assert.assertThrows(AnalysisException.class, () -> { + csvFileFormatProperties.analyzeFileFormatProperties(properties, true); + }); + } + + @Test + public void testAnalyzeFileFormatPropertiesInvalidEnclose() { + Map properties = new HashMap<>(); + properties.put(CsvFileFormatProperties.PROP_ENCLOSE, "invalid"); + + Assert.assertThrows(AnalysisException.class, () -> { + csvFileFormatProperties.analyzeFileFormatProperties(properties, true); + }); + } + + @Test + public void testAnalyzeFileFormatPropertiesValidEnclose() throws AnalysisException { + Map properties = new HashMap<>(); + properties.put(CsvFileFormatProperties.PROP_ENCLOSE, "\""); + + csvFileFormatProperties.analyzeFileFormatProperties(properties, true); + Assert.assertEquals((byte) '"', csvFileFormatProperties.getEnclose()); + } + + @Test + public void testAnalyzeFileFormatPropertiesSkipLinesNegative() { + Map properties = new HashMap<>(); + properties.put(CsvFileFormatProperties.PROP_SKIP_LINES, "-1"); + + Assert.assertThrows(AnalysisException.class, () -> { + csvFileFormatProperties.analyzeFileFormatProperties(properties, true); + }); + } + + @Test + public void testAnalyzeFileFormatPropertiesSkipLinesLargeValue() throws AnalysisException { + Map properties = new HashMap<>(); + properties.put(CsvFileFormatProperties.PROP_SKIP_LINES, "1000"); + + csvFileFormatProperties.analyzeFileFormatProperties(properties, true); + Assert.assertEquals(1000, csvFileFormatProperties.getSkipLines()); + } + + @Test + public void testAnalyzeFileFormatPropertiesTrimDoubleQuotesTrue() throws AnalysisException { + Map properties = new HashMap<>(); + properties.put(CsvFileFormatProperties.PROP_TRIM_DOUBLE_QUOTES, "true"); + + csvFileFormatProperties.analyzeFileFormatProperties(properties, true); + Assert.assertEquals(true, csvFileFormatProperties.isTrimDoubleQuotes()); + } + + @Test + public void testAnalyzeFileFormatPropertiesTrimDoubleQuotesFalse() throws AnalysisException { + Map properties = new HashMap<>(); + properties.put(CsvFileFormatProperties.PROP_TRIM_DOUBLE_QUOTES, "false"); + + csvFileFormatProperties.analyzeFileFormatProperties(properties, true); + Assert.assertEquals(false, csvFileFormatProperties.isTrimDoubleQuotes()); + } + + @Test + public void testAnalyzeFileFormatPropertiesInvalidCompressType() { + Map properties = new HashMap<>(); + properties.put(CsvFileFormatProperties.PROP_COMPRESS_TYPE, "invalid"); + csvFileFormatProperties.analyzeFileFormatProperties(properties, true); + Assert.assertEquals(TFileCompressType.UNKNOWN, csvFileFormatProperties.getCompressionType()); + } + + @Test + public void testAnalyzeFileFormatPropertiesValidCompressType() throws AnalysisException { + Map properties = new HashMap<>(); + properties.put(CsvFileFormatProperties.PROP_COMPRESS_TYPE, "gz"); + + csvFileFormatProperties.analyzeFileFormatProperties(properties, true); + Assert.assertEquals(TFileCompressType.GZ, csvFileFormatProperties.getCompressionType()); + } + + @Test + public void testAnalyzeFileFormatPropertiesEmptyCsvSchema() { + Map properties = new HashMap<>(); + properties.put(CsvFileFormatProperties.PROP_CSV_SCHEMA, ""); + csvFileFormatProperties.analyzeFileFormatProperties(properties, true); + } + + @Test + public void testAnalyzeFileFormatPropertiesValidEncloseMultipleCharacters() { + Map properties = new HashMap<>(); + properties.put(CsvFileFormatProperties.PROP_ENCLOSE, "\"\""); + + Assert.assertThrows(AnalysisException.class, () -> { + csvFileFormatProperties.analyzeFileFormatProperties(properties, true); + }); + } + + @Test + public void testAnalyzeFileFormatPropertiesValidEncloseEmpty() { + Map properties = new HashMap<>(); + properties.put(CsvFileFormatProperties.PROP_ENCLOSE, ""); + + csvFileFormatProperties.analyzeFileFormatProperties(properties, true); + Assert.assertEquals(0, csvFileFormatProperties.getEnclose()); + } + + @Test + public void testAnalyzeFileFormatPropertiesSkipLinesAsString() { + Map properties = new HashMap<>(); + properties.put(CsvFileFormatProperties.PROP_SKIP_LINES, "abc"); + + Assert.assertThrows(NumberFormatException.class, () -> { + csvFileFormatProperties.analyzeFileFormatProperties(properties, true); + }); + } + + @Test + public void testAnalyzeFileFormatPropertiesValidColumnSeparator() throws AnalysisException { + Map properties = new HashMap<>(); + properties.put(CsvFileFormatProperties.PROP_COLUMN_SEPARATOR, ";"); + + csvFileFormatProperties.analyzeFileFormatProperties(properties, true); + Assert.assertEquals(";", csvFileFormatProperties.getColumnSeparator()); + } + + @Test + public void testAnalyzeFileFormatPropertiesLineDelimiterAsString() { + Map properties = new HashMap<>(); + properties.put(CsvFileFormatProperties.PROP_LINE_DELIMITER, "abc"); + csvFileFormatProperties.analyzeFileFormatProperties(properties, true); + } + + @Test + public void testAnalyzeFileFormatPropertiesValidLineDelimiter() throws AnalysisException { + Map properties = new HashMap<>(); + properties.put(CsvFileFormatProperties.PROP_LINE_DELIMITER, "\r\n"); + + csvFileFormatProperties.analyzeFileFormatProperties(properties, true); + Assert.assertEquals("\r\n", csvFileFormatProperties.getLineDelimiter()); + } + + @Test + public void testAnalyzeFileFormatPropertiesValidTrimDoubleQuotes() throws AnalysisException { + Map properties = new HashMap<>(); + properties.put(CsvFileFormatProperties.PROP_TRIM_DOUBLE_QUOTES, "true"); + + csvFileFormatProperties.analyzeFileFormatProperties(properties, true); + Assert.assertEquals(true, csvFileFormatProperties.isTrimDoubleQuotes()); + } + + @Test + public void testAnalyzeFileFormatPropertiesInvalidTrimDoubleQuotes() { + Map properties = new HashMap<>(); + properties.put(CsvFileFormatProperties.PROP_TRIM_DOUBLE_QUOTES, "invalid"); + + csvFileFormatProperties.analyzeFileFormatProperties(properties, true); + Assert.assertEquals(false, csvFileFormatProperties.isTrimDoubleQuotes()); + } +} diff --git a/fe/fe-core/src/test/java/org/apache/doris/datasource/property/fileformat/FileFormatPropertiesTest.java b/fe/fe-core/src/test/java/org/apache/doris/datasource/property/fileformat/FileFormatPropertiesTest.java new file mode 100644 index 00000000000000..74d8d0db2ad19b --- /dev/null +++ b/fe/fe-core/src/test/java/org/apache/doris/datasource/property/fileformat/FileFormatPropertiesTest.java @@ -0,0 +1,34 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris.datasource.property.fileformat; + +import org.apache.doris.nereids.exceptions.AnalysisException; + +import org.junit.Assert; +import org.junit.Test; + + +public class FileFormatPropertiesTest { + + @Test + public void testCreateFileFormatPropertiesInvalidFormat() { + Assert.assertThrows(AnalysisException.class, () -> { + FileFormatProperties.createFileFormatProperties("invalid_format"); + }); + } +} diff --git a/fe/fe-core/src/test/java/org/apache/doris/datasource/property/fileformat/JsonFileFormatPropertiesTest.java b/fe/fe-core/src/test/java/org/apache/doris/datasource/property/fileformat/JsonFileFormatPropertiesTest.java new file mode 100644 index 00000000000000..f614d3223866f1 --- /dev/null +++ b/fe/fe-core/src/test/java/org/apache/doris/datasource/property/fileformat/JsonFileFormatPropertiesTest.java @@ -0,0 +1,199 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris.datasource.property.fileformat; + +import org.apache.doris.nereids.exceptions.AnalysisException; + +import org.junit.Assert; +import org.junit.Before; +import org.junit.Test; + +import java.util.HashMap; +import java.util.Map; + +public class JsonFileFormatPropertiesTest { + + private JsonFileFormatProperties jsonFileFormatProperties; + + @Before + public void setUp() { + jsonFileFormatProperties = new JsonFileFormatProperties(); + } + + @Test + public void testAnalyzeFileFormatPropertiesEmpty() throws AnalysisException { + Map properties = new HashMap<>(); + jsonFileFormatProperties.analyzeFileFormatProperties(properties, true); + + Assert.assertEquals("", jsonFileFormatProperties.getJsonRoot()); + Assert.assertEquals("", jsonFileFormatProperties.getJsonPaths()); + Assert.assertEquals(false, jsonFileFormatProperties.isStripOuterArray()); + Assert.assertEquals(false, jsonFileFormatProperties.isReadJsonByLine()); + Assert.assertEquals(false, jsonFileFormatProperties.isNumAsString()); + Assert.assertEquals(false, jsonFileFormatProperties.isFuzzyParse()); + } + + @Test + public void testAnalyzeFileFormatPropertiesValidJsonRoot() throws AnalysisException { + Map properties = new HashMap<>(); + properties.put(JsonFileFormatProperties.PROP_JSON_ROOT, "data.items"); + + jsonFileFormatProperties.analyzeFileFormatProperties(properties, true); + Assert.assertEquals("data.items", jsonFileFormatProperties.getJsonRoot()); + } + + @Test + public void testAnalyzeFileFormatPropertiesValidJsonPaths() throws AnalysisException { + Map properties = new HashMap<>(); + properties.put(JsonFileFormatProperties.PROP_JSON_PATHS, + "[\"$.name\", \"$.age\", \"$.city\"]"); + + jsonFileFormatProperties.analyzeFileFormatProperties(properties, true); + Assert.assertEquals("[\"$.name\", \"$.age\", \"$.city\"]", jsonFileFormatProperties.getJsonPaths()); + } + + @Test + public void testAnalyzeFileFormatPropertiesStripOuterArrayTrue() throws AnalysisException { + Map properties = new HashMap<>(); + properties.put(JsonFileFormatProperties.PROP_STRIP_OUTER_ARRAY, "true"); + + jsonFileFormatProperties.analyzeFileFormatProperties(properties, true); + Assert.assertEquals(true, jsonFileFormatProperties.isStripOuterArray()); + } + + @Test + public void testAnalyzeFileFormatPropertiesStripOuterArrayFalse() throws AnalysisException { + Map properties = new HashMap<>(); + properties.put(JsonFileFormatProperties.PROP_STRIP_OUTER_ARRAY, "false"); + + jsonFileFormatProperties.analyzeFileFormatProperties(properties, true); + Assert.assertEquals(false, jsonFileFormatProperties.isStripOuterArray()); + } + + @Test + public void testAnalyzeFileFormatPropertiesReadJsonByLineTrue() throws AnalysisException { + Map properties = new HashMap<>(); + properties.put(JsonFileFormatProperties.PROP_READ_JSON_BY_LINE, "true"); + + jsonFileFormatProperties.analyzeFileFormatProperties(properties, true); + Assert.assertEquals(true, jsonFileFormatProperties.isReadJsonByLine()); + } + + @Test + public void testAnalyzeFileFormatPropertiesReadJsonByLineFalse() throws AnalysisException { + Map properties = new HashMap<>(); + properties.put(JsonFileFormatProperties.PROP_READ_JSON_BY_LINE, "false"); + + jsonFileFormatProperties.analyzeFileFormatProperties(properties, true); + Assert.assertEquals(false, jsonFileFormatProperties.isReadJsonByLine()); + } + + @Test + public void testAnalyzeFileFormatPropertiesNumAsStringTrue() throws AnalysisException { + Map properties = new HashMap<>(); + properties.put(JsonFileFormatProperties.PROP_NUM_AS_STRING, "true"); + + jsonFileFormatProperties.analyzeFileFormatProperties(properties, true); + Assert.assertEquals(true, jsonFileFormatProperties.isNumAsString()); + } + + @Test + public void testAnalyzeFileFormatPropertiesNumAsStringFalse() throws AnalysisException { + Map properties = new HashMap<>(); + properties.put(JsonFileFormatProperties.PROP_NUM_AS_STRING, "false"); + + jsonFileFormatProperties.analyzeFileFormatProperties(properties, true); + Assert.assertEquals(false, jsonFileFormatProperties.isNumAsString()); + } + + @Test + public void testAnalyzeFileFormatPropertiesFuzzyParseTrue() throws AnalysisException { + Map properties = new HashMap<>(); + properties.put(JsonFileFormatProperties.PROP_FUZZY_PARSE, "true"); + + jsonFileFormatProperties.analyzeFileFormatProperties(properties, true); + Assert.assertEquals(true, jsonFileFormatProperties.isFuzzyParse()); + } + + @Test + public void testAnalyzeFileFormatPropertiesFuzzyParseFalse() throws AnalysisException { + Map properties = new HashMap<>(); + properties.put(JsonFileFormatProperties.PROP_FUZZY_PARSE, "false"); + + jsonFileFormatProperties.analyzeFileFormatProperties(properties, true); + Assert.assertEquals(false, jsonFileFormatProperties.isFuzzyParse()); + } + + @Test + public void testAnalyzeFileFormatPropertiesInvalidBooleanValue() throws AnalysisException { + Map properties = new HashMap<>(); + properties.put(JsonFileFormatProperties.PROP_FUZZY_PARSE, "invalid"); + + jsonFileFormatProperties.analyzeFileFormatProperties(properties, true); + Assert.assertEquals(false, jsonFileFormatProperties.isFuzzyParse()); + } + + @Test + public void testAnalyzeFileFormatPropertiesAllProperties() throws AnalysisException { + Map properties = new HashMap<>(); + properties.put(JsonFileFormatProperties.PROP_JSON_ROOT, "data.records"); + properties.put(JsonFileFormatProperties.PROP_JSON_PATHS, "[\"$.id\", \"$.name\"]"); + properties.put(JsonFileFormatProperties.PROP_STRIP_OUTER_ARRAY, "true"); + properties.put(JsonFileFormatProperties.PROP_READ_JSON_BY_LINE, "true"); + properties.put(JsonFileFormatProperties.PROP_NUM_AS_STRING, "true"); + properties.put(JsonFileFormatProperties.PROP_FUZZY_PARSE, "true"); + + jsonFileFormatProperties.analyzeFileFormatProperties(properties, true); + + Assert.assertEquals("data.records", jsonFileFormatProperties.getJsonRoot()); + Assert.assertEquals("[\"$.id\", \"$.name\"]", jsonFileFormatProperties.getJsonPaths()); + Assert.assertEquals(true, jsonFileFormatProperties.isStripOuterArray()); + Assert.assertEquals(true, jsonFileFormatProperties.isReadJsonByLine()); + Assert.assertEquals(true, jsonFileFormatProperties.isNumAsString()); + Assert.assertEquals(true, jsonFileFormatProperties.isFuzzyParse()); + } + + @Test + public void testAnalyzeFileFormatPropertiesSpecialCharactersInJsonRoot() throws AnalysisException { + Map properties = new HashMap<>(); + properties.put(JsonFileFormatProperties.PROP_JSON_ROOT, "data.special@#$%^&*()"); + + jsonFileFormatProperties.analyzeFileFormatProperties(properties, true); + Assert.assertEquals("data.special@#$%^&*()", jsonFileFormatProperties.getJsonRoot()); + } + + @Test + public void testAnalyzeFileFormatPropertiesComplexJsonPaths() throws AnalysisException { + Map properties = new HashMap<>(); + properties.put(JsonFileFormatProperties.PROP_JSON_PATHS, + "[\"$.deeply.nested[0].array[*].field\", \"$.complex.path[?(@.type=='value')]\"]"); + + jsonFileFormatProperties.analyzeFileFormatProperties(properties, true); + Assert.assertEquals("[\"$.deeply.nested[0].array[*].field\", \"$.complex.path[?(@.type=='value')]\"]", + jsonFileFormatProperties.getJsonPaths()); + } + + @Test + public void testAnalyzeFileFormatPropertiesEmptyJsonPaths() throws AnalysisException { + Map properties = new HashMap<>(); + properties.put(JsonFileFormatProperties.PROP_JSON_PATHS, ""); + + jsonFileFormatProperties.analyzeFileFormatProperties(properties, true); + Assert.assertEquals("", jsonFileFormatProperties.getJsonPaths()); + } +} diff --git a/fe/fe-core/src/test/java/org/apache/doris/datasource/property/fileformat/OrcFileFormatPropertiesTest.java b/fe/fe-core/src/test/java/org/apache/doris/datasource/property/fileformat/OrcFileFormatPropertiesTest.java new file mode 100644 index 00000000000000..2db57de674c42c --- /dev/null +++ b/fe/fe-core/src/test/java/org/apache/doris/datasource/property/fileformat/OrcFileFormatPropertiesTest.java @@ -0,0 +1,45 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris.datasource.property.fileformat; + +import org.apache.doris.thrift.TFileCompressType; + +import org.junit.Assert; +import org.junit.Before; +import org.junit.Test; + +import java.util.HashMap; +import java.util.Map; + +public class OrcFileFormatPropertiesTest { + + private OrcFileFormatProperties orcFileFormatProperties; + + @Before + public void setUp() { + orcFileFormatProperties = new OrcFileFormatProperties(); + } + + @Test + public void testAnalyzeFileFormatProperties() { + Map properties = new HashMap<>(); + // Add properties if needed + orcFileFormatProperties.analyzeFileFormatProperties(properties, true); + Assert.assertEquals(TFileCompressType.ZLIB, orcFileFormatProperties.getOrcCompressionType()); + } +} diff --git a/fe/fe-core/src/test/java/org/apache/doris/datasource/property/fileformat/ParquetFileFormatPropertiesTest.java b/fe/fe-core/src/test/java/org/apache/doris/datasource/property/fileformat/ParquetFileFormatPropertiesTest.java new file mode 100644 index 00000000000000..17b99dd7065c91 --- /dev/null +++ b/fe/fe-core/src/test/java/org/apache/doris/datasource/property/fileformat/ParquetFileFormatPropertiesTest.java @@ -0,0 +1,47 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris.datasource.property.fileformat; + +import org.apache.doris.thrift.TParquetCompressionType; + +import org.junit.Assert; +import org.junit.Before; +import org.junit.Test; + +import java.util.HashMap; +import java.util.Map; + +public class ParquetFileFormatPropertiesTest { + + private ParquetFileFormatProperties parquetFileFormatProperties; + + @Before + public void setUp() { + parquetFileFormatProperties = new ParquetFileFormatProperties(); + } + + @Test + public void testAnalyzeFileFormatProperties() { + Map properties = new HashMap<>(); + // Add properties if needed + parquetFileFormatProperties.analyzeFileFormatProperties(properties, true); + + Assert.assertEquals(TParquetCompressionType.SNAPPY, parquetFileFormatProperties.getParquetCompressionType()); + Assert.assertEquals(false, parquetFileFormatProperties.isParquetDisableDictionary()); + } +} diff --git a/fe/fe-core/src/test/java/org/apache/doris/datasource/property/fileformat/WalFileFormatPropertiesTest.java b/fe/fe-core/src/test/java/org/apache/doris/datasource/property/fileformat/WalFileFormatPropertiesTest.java new file mode 100644 index 00000000000000..d94b49aca978f4 --- /dev/null +++ b/fe/fe-core/src/test/java/org/apache/doris/datasource/property/fileformat/WalFileFormatPropertiesTest.java @@ -0,0 +1,41 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris.datasource.property.fileformat; + +import org.junit.Before; +import org.junit.Test; + +import java.util.HashMap; +import java.util.Map; + +public class WalFileFormatPropertiesTest { + + private WalFileFormatProperties walFileFormatProperties; + + @Before + public void setUp() { + walFileFormatProperties = new WalFileFormatProperties(); + } + + @Test + public void testAnalyzeFileFormatProperties() { + Map properties = new HashMap<>(); + // Add properties if needed + walFileFormatProperties.analyzeFileFormatProperties(properties, true); + } +}