diff --git a/.gitignore b/.gitignore index 75849fa..fcb9609 100644 --- a/.gitignore +++ b/.gitignore @@ -52,3 +52,4 @@ core.* heapdump.* javacore.* Snap.* +*.hprof diff --git a/build.gradle.kts b/build.gradle.kts index ac50baf..d439902 100644 --- a/build.gradle.kts +++ b/build.gradle.kts @@ -4,8 +4,8 @@ fun properties(key: String) = project.findProperty(key).toString() plugins { id("java") - id("org.jetbrains.kotlin.jvm") version "1.6.10" - id("org.jetbrains.intellij") version "1.4.0" + id("org.jetbrains.kotlin.jvm") version "1.7.21" + id("org.jetbrains.intellij") version "1.10.0" } group = properties("pluginGroup") @@ -17,25 +17,25 @@ repositories { dependencies { //Avro dependencies - implementation("org.apache.avro:avro:1.11.0") + implementation("org.apache.avro:avro:1.11.1") implementation("org.xerial.snappy:snappy-java:1.1.8.4") //Parquet dependencies - implementation("org.apache.parquet:parquet-avro:1.12.2") - implementation("org.apache.parquet:parquet-column:1.12.2") - implementation("org.apache.parquet:parquet-hadoop:1.12.2") - implementation("org.apache.parquet:parquet-format-structures:1.12.2") - implementation("org.apache.hadoop:hadoop-client:3.3.2") + implementation("org.apache.parquet:parquet-avro:1.12.3") + implementation("org.apache.parquet:parquet-column:1.12.3") + implementation("org.apache.parquet:parquet-hadoop:1.12.3") + implementation("org.apache.parquet:parquet-format-structures:1.12.3") + implementation("org.apache.hadoop:hadoop-client:3.3.4") //External dependencies - implementation("com.google.code.gson:gson:2.9.0") + implementation("com.google.code.gson:gson:2.10") implementation("com.google.guava:guava:31.1-jre") - implementation("com.fifesoft:rsyntaxtextarea:3.1.6") - implementation("com.github.wnameless.json:json-flattener:0.13.0") + implementation("com.fifesoft:rsyntaxtextarea:3.3.0") + implementation("com.github.wnameless.json:json-flattener:0.15.1") //Test dependencies - testImplementation("org.junit.jupiter:junit-jupiter-engine:5.8.2") - testImplementation("org.assertj:assertj-core:3.22.0") + testImplementation("org.junit.jupiter:junit-jupiter-engine:5.9.1") + testImplementation("org.assertj:assertj-core:3.23.1") } configurations.implementation { diff --git a/gradle.properties b/gradle.properties index 5475d5a..2da2796 100644 --- a/gradle.properties +++ b/gradle.properties @@ -12,6 +12,6 @@ platformType=IC platformVersion=2021.1.3 javaVersion=11 -gradleVersion=7.4 +gradleVersion=7.6 kotlin.stdlib.default.dependency=false diff --git a/gradle/wrapper/gradle-wrapper.jar b/gradle/wrapper/gradle-wrapper.jar index 41d9927..943f0cb 100644 Binary files a/gradle/wrapper/gradle-wrapper.jar and b/gradle/wrapper/gradle-wrapper.jar differ diff --git a/gradle/wrapper/gradle-wrapper.properties b/gradle/wrapper/gradle-wrapper.properties index 41dfb87..f398c33 100644 --- a/gradle/wrapper/gradle-wrapper.properties +++ b/gradle/wrapper/gradle-wrapper.properties @@ -1,5 +1,6 @@ distributionBase=GRADLE_USER_HOME distributionPath=wrapper/dists -distributionUrl=https\://services.gradle.org/distributions/gradle-7.4-bin.zip +distributionUrl=https\://services.gradle.org/distributions/gradle-7.6-bin.zip +networkTimeout=10000 zipStoreBase=GRADLE_USER_HOME zipStorePath=wrapper/dists diff --git a/gradlew b/gradlew index 1b6c787..65dcd68 100755 --- a/gradlew +++ b/gradlew @@ -55,7 +55,7 @@ # Darwin, MinGW, and NonStop. # # (3) This script is generated from the Groovy template -# https://github.com/gradle/gradle/blob/master/subprojects/plugins/src/main/resources/org/gradle/api/internal/plugins/unixStartScript.txt +# https://github.com/gradle/gradle/blob/HEAD/subprojects/plugins/src/main/resources/org/gradle/api/internal/plugins/unixStartScript.txt # within the Gradle project. # # You can find Gradle at https://github.com/gradle/gradle/. @@ -80,10 +80,10 @@ do esac done -APP_HOME=$( cd "${APP_HOME:-./}" && pwd -P ) || exit - -APP_NAME="Gradle" +# This is normally unused +# shellcheck disable=SC2034 APP_BASE_NAME=${0##*/} +APP_HOME=$( cd "${APP_HOME:-./}" && pwd -P ) || exit # Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script. DEFAULT_JVM_OPTS='"-Xmx64m" "-Xms64m"' @@ -143,12 +143,16 @@ fi if ! "$cygwin" && ! "$darwin" && ! "$nonstop" ; then case $MAX_FD in #( max*) + # In POSIX sh, ulimit -H is undefined. That's why the result is checked to see if it worked. + # shellcheck disable=SC3045 MAX_FD=$( ulimit -H -n ) || warn "Could not query maximum file descriptor limit" esac case $MAX_FD in #( '' | soft) :;; #( *) + # In POSIX sh, ulimit -n is undefined. That's why the result is checked to see if it worked. + # shellcheck disable=SC3045 ulimit -n "$MAX_FD" || warn "Could not set maximum file descriptor limit to $MAX_FD" esac @@ -205,6 +209,12 @@ set -- \ org.gradle.wrapper.GradleWrapperMain \ "$@" +# Stop when "xargs" is not available. +if ! command -v xargs >/dev/null 2>&1 +then + die "xargs is not available" +fi + # Use "xargs" to parse quoted args. # # With -n1 it outputs one arg per line, with the quotes and backslashes removed. diff --git a/gradlew.bat b/gradlew.bat index 107acd3..93e3f59 100644 --- a/gradlew.bat +++ b/gradlew.bat @@ -14,7 +14,7 @@ @rem limitations under the License. @rem -@if "%DEBUG%" == "" @echo off +@if "%DEBUG%"=="" @echo off @rem ########################################################################## @rem @rem Gradle startup script for Windows @@ -25,7 +25,8 @@ if "%OS%"=="Windows_NT" setlocal set DIRNAME=%~dp0 -if "%DIRNAME%" == "" set DIRNAME=. +if "%DIRNAME%"=="" set DIRNAME=. +@rem This is normally unused set APP_BASE_NAME=%~n0 set APP_HOME=%DIRNAME% @@ -40,7 +41,7 @@ if defined JAVA_HOME goto findJavaFromJavaHome set JAVA_EXE=java.exe %JAVA_EXE% -version >NUL 2>&1 -if "%ERRORLEVEL%" == "0" goto execute +if %ERRORLEVEL% equ 0 goto execute echo. echo ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH. @@ -75,13 +76,15 @@ set CLASSPATH=%APP_HOME%\gradle\wrapper\gradle-wrapper.jar :end @rem End local scope for the variables with windows NT shell -if "%ERRORLEVEL%"=="0" goto mainEnd +if %ERRORLEVEL% equ 0 goto mainEnd :fail rem Set variable GRADLE_EXIT_CONSOLE if you need the _script_ return code instead of rem the _cmd.exe /c_ return code! -if not "" == "%GRADLE_EXIT_CONSOLE%" exit 1 -exit /b 1 +set EXIT_CODE=%ERRORLEVEL% +if %EXIT_CODE% equ 0 set EXIT_CODE=1 +if not ""=="%GRADLE_EXIT_CONSOLE%" exit %EXIT_CODE% +exit /b %EXIT_CODE% :mainEnd if "%OS%"=="Windows_NT" endlocal diff --git a/src/main/java/uk/co/hadoopathome/intellij/viewer/FileViewerToolWindow.java b/src/main/java/uk/co/hadoopathome/intellij/viewer/FileViewerToolWindow.java index 4e76fe8..146ed58 100644 --- a/src/main/java/uk/co/hadoopathome/intellij/viewer/FileViewerToolWindow.java +++ b/src/main/java/uk/co/hadoopathome/intellij/viewer/FileViewerToolWindow.java @@ -105,14 +105,6 @@ public synchronized void drop(DropTargetDropEvent evt) { File file = ((List) evt.getTransferable().getTransferData(DataFlavor.javaFileListFlavor)) .get(0); - String fileName = file.getName().toLowerCase(); - if (!fileName.contains("avro") && !fileName.contains("parquet")) { - JOptionPane.showMessageDialog( - null, - String.format( - "File name \"%s\" must contain either \"avro\" or \"parquet\"", fileName)); - return; - } String path = file.getPath(); schemaTextPane.setText(String.format("Processing file %s", path)); LOGGER.info(String.format("Received file %s", path)); @@ -186,10 +178,9 @@ private void populatePanes(File file, int numRecords) { protected Boolean doInBackground() { schemaTextPane.setText(String.format("Processing file %s...", file.getPath())); try { - Reader reader = - currentFile.getName().toLowerCase().contains("avro") - ? new AvroFileReader(currentFile) - : new ParquetFileReader(currentFile); + Reader reader = detectFileType(currentFile); + LOGGER.info( + String.format("Detected file %s as a %s", currentFile, reader.getClass())); List records = reader.getRecords(numRecords); int totalRecords = reader.getNumRecords(); configureDataPanes(records); @@ -216,6 +207,29 @@ protected Boolean doInBackground() { swingWorker.execute(); } + /** + * Identifies the file type of the dropped file by attempting to parse it with both readers - + * either Avro or Parquet. + * + * @param currentFile the file to be parsed + * @return the AvroFileReader or ParquetFileReader, else an exception if the file is not + * recognised by either + */ + private Reader detectFileType(File currentFile) throws IOException { + try { + return new AvroFileReader(currentFile); + } catch (Exception e) { + LOGGER.debug(String.format("File %s is not an Avro file", currentFile)); + } + try { + return new ParquetFileReader(currentFile); + } catch (Exception e) { + LOGGER.debug(String.format("File %s is not a Parquet file", currentFile)); + } + throw new IOException( + String.format("File %s is not recognised as either Parquet or Avro", currentFile)); + } + /** * Populates the raw and table data panes with records and configures the radio buttons. If * invalid JSON is found, the table pane is disabled and no data is loaded into it. diff --git a/src/main/java/uk/co/hadoopathome/intellij/viewer/fileformat/AvroFileReader.java b/src/main/java/uk/co/hadoopathome/intellij/viewer/fileformat/AvroFileReader.java index 2eb50ba..b558439 100644 --- a/src/main/java/uk/co/hadoopathome/intellij/viewer/fileformat/AvroFileReader.java +++ b/src/main/java/uk/co/hadoopathome/intellij/viewer/fileformat/AvroFileReader.java @@ -30,10 +30,11 @@ public class AvroFileReader implements Reader { private final File file; private final GenericDatumReader datumReader; - public AvroFileReader(File file) throws OutOfMemoryError { + public AvroFileReader(File file) throws OutOfMemoryError, IOException { this.file = file; GenericDataConfigurer.configureGenericData(); this.datumReader = new GenericDatumReader<>(null, null, GenericData.get()); + getRecords(1); } @Override diff --git a/src/main/java/uk/co/hadoopathome/intellij/viewer/fileformat/ParquetFileReader.java b/src/main/java/uk/co/hadoopathome/intellij/viewer/fileformat/ParquetFileReader.java index c22155c..ba5e435 100644 --- a/src/main/java/uk/co/hadoopathome/intellij/viewer/fileformat/ParquetFileReader.java +++ b/src/main/java/uk/co/hadoopathome/intellij/viewer/fileformat/ParquetFileReader.java @@ -20,11 +20,8 @@ import java.io.IOException; import java.io.InputStream; import java.nio.file.Path; -import java.time.Instant; import java.util.ArrayList; import java.util.List; -import org.apache.avro.Conversion; -import org.apache.avro.LogicalType; import org.apache.avro.Schema; import org.apache.avro.data.TimeConversions; import org.apache.avro.generic.GenericData; @@ -47,11 +44,12 @@ public class ParquetFileReader implements Reader { private final Path path; private final Configuration conf; - public ParquetFileReader(File file) { + public ParquetFileReader(File file) throws IOException { this.path = file.toPath(); this.conf = new Configuration(); this.conf.set("parquet.avro.readInt96AsFixed", "true"); GenericDataConfigurer.configureGenericData(); + getRecords(1); } @Override @@ -117,7 +115,6 @@ public List getRecords(int numRecords) throws IOException, IllegalArgume * https://stackoverflow.com/a/52041154/729819. */ private GenericRecord deserialize(Schema schema, byte[] data) throws IOException { - GenericData.get().addLogicalTypeConversion(new TimestampMillisConversion()); InputStream is = new ByteArrayInputStream(data); Decoder decoder = DecoderFactory.get().binaryDecoder(is, null); DatumReader reader = new GenericDatumReader<>(schema, schema, GenericData.get()); @@ -133,24 +130,4 @@ private byte[] toByteArray(Schema schema, GenericRecord genericRecord) throws IO encoder.flush(); return baos.toByteArray(); } - - public static class TimestampMillisConversion extends Conversion { - public TimestampMillisConversion() {} - - public Class getConvertedType() { - return String.class; - } - - public String getLogicalTypeName() { - return "timestamp-millis"; - } - - public String fromLong(Long millisFromEpoch, Schema schema, LogicalType type) { - return Instant.ofEpochMilli(millisFromEpoch).toString(); - } - - public Long toLong(String timestamp, Schema schema, LogicalType type) { - return new Long(timestamp); - } - } } diff --git a/src/test/java/uk/co/hadoopathome/intellij/viewer/fileformat/AvroFileReaderTest.java b/src/test/java/uk/co/hadoopathome/intellij/viewer/fileformat/AvroFileReaderTest.java index 7e35ec3..9a7b6c7 100644 --- a/src/test/java/uk/co/hadoopathome/intellij/viewer/fileformat/AvroFileReaderTest.java +++ b/src/test/java/uk/co/hadoopathome/intellij/viewer/fileformat/AvroFileReaderTest.java @@ -15,6 +15,7 @@ import static org.assertj.core.api.Assertions.assertThat; import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.junit.jupiter.api.Assertions.fail; import java.io.File; import java.io.IOException; @@ -76,25 +77,28 @@ public void testComplexNesting() throws IOException { @Test @DisplayName("Assert that an Avro file with a decimal LogicalType is correctly parsed") - public void testDecimalLogicalType() throws IOException { - AvroFileReader avroFileReader = readRecords(DECIMAL_LOGICAL_TYPE); - int totalRecords = avroFileReader.getNumRecords(); - assertThat(totalRecords).isEqualTo(1); - List records = avroFileReader.getRecords(100); - assertThat(records).hasSize(1); - String firstRecord = records.get(0); - assertThat(firstRecord).contains("25.190000"); + public void testDecimalLogicalType() { + try { + AvroFileReader avroFileReader = readRecords(DECIMAL_LOGICAL_TYPE); + int totalRecords = avroFileReader.getNumRecords(); + assertThat(totalRecords).isEqualTo(1); + List records = avroFileReader.getRecords(100); + assertThat(records).hasSize(1); + String firstRecord = records.get(0); + assertThat(firstRecord).contains("25.190000"); + } catch (IOException e) { + fail(); + } } @Test @DisplayName("Assert that an invalid Avro file throws an exception") public void testInvalidFile() { File file = new File(getClass().getClassLoader().getResource(INVALID_AVRO_FILE).getFile()); - AvroFileReader avroFileReader = new AvroFileReader(file); - assertThrows(OutOfMemoryError.class, () -> avroFileReader.getRecords(5)); + assertThrows(OutOfMemoryError.class, () -> new AvroFileReader(file)); } - private AvroFileReader readRecords(String fileName) { + private AvroFileReader readRecords(String fileName) throws IOException { File file = new File(getClass().getClassLoader().getResource(fileName).getFile()); return new AvroFileReader(file); } diff --git a/src/test/java/uk/co/hadoopathome/intellij/viewer/fileformat/ParquetFileReaderTest.java b/src/test/java/uk/co/hadoopathome/intellij/viewer/fileformat/ParquetFileReaderTest.java index 89a009e..cb070c3 100644 --- a/src/test/java/uk/co/hadoopathome/intellij/viewer/fileformat/ParquetFileReaderTest.java +++ b/src/test/java/uk/co/hadoopathome/intellij/viewer/fileformat/ParquetFileReaderTest.java @@ -83,7 +83,7 @@ public void testList() throws IOException { } @Test - @DisplayName("Assert that a Parquet file with an INT96 column can still be displayed") + @DisplayName("Assert that a Parquet file with an INT96 column can be displayed") public void testInt96File() throws IOException { ParquetFileReader parquetFileReader = readRecords(INT96_PARQUET_FILE); int totalRecords = parquetFileReader.getNumRecords(); @@ -102,7 +102,7 @@ public void testInt96File() throws IOException { } @Test - @DisplayName("Assert that a Parquet file with a LogicalType date column can still be displayed") + @DisplayName("Assert that a Parquet file with a LogicalType date column can be displayed") public void testDateLogicalType() throws IOException { ParquetFileReader parquetFileReader = readRecords(LOGICAL_DATE_PARQUET_FILE); int totalRecords = parquetFileReader.getNumRecords(); @@ -115,8 +115,7 @@ public void testDateLogicalType() throws IOException { } @Test - @DisplayName( - "Assert that a Parquet file with a LogicalType decimal column can still be displayed") + @DisplayName("Assert that a Parquet file with a LogicalType decimal column can be displayed") public void testDecimalLogicalType() throws IOException { ParquetFileReader parquetFileReader = readRecords(LOGICAL_DECIMAL_PARQUET_FILE); int totalRecords = parquetFileReader.getNumRecords(); @@ -127,7 +126,7 @@ public void testDecimalLogicalType() throws IOException { assertThat(firstRecord).contains("{\"name\": \"ben\", \"score\": 1.15}"); } - private ParquetFileReader readRecords(String fileName) { + private ParquetFileReader readRecords(String fileName) throws IOException { File file = new File(getClass().getClassLoader().getResource(fileName).getFile()); return new ParquetFileReader(file); }