Skip to content

Commit

Permalink
fix(tasks): proper classloader handling and file extension
Browse files Browse the repository at this point in the history
  • Loading branch information
tchiotludo committed Sep 18, 2023
1 parent 75cc04d commit aa452ce
Showing 1 changed file with 11 additions and 7 deletions.
18 changes: 11 additions & 7 deletions src/main/java/io/kestra/plugin/tika/Parse.java
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
import io.swagger.v3.oas.annotations.media.Schema;
import lombok.*;
import lombok.experimental.SuperBuilder;
import org.apache.commons.io.FileUtils;
import org.apache.commons.io.FilenameUtils;
import org.apache.tika.config.TikaConfig;
import org.apache.tika.detect.Detector;
Expand All @@ -28,10 +29,12 @@
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.xml.sax.ContentHandler;
import org.xml.sax.SAXException;
import org.xml.sax.helpers.DefaultHandler;

import java.io.*;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.net.URI;
import java.nio.file.Files;
import java.nio.file.Path;
Expand Down Expand Up @@ -130,9 +133,9 @@ public class Parse extends Task implements RunnableTask<Parse.Output> {
public Parse.Output run(RunContext runContext) throws Exception {
Logger logger = runContext.logger();

TikaConfig config = TikaConfig.getDefaultConfig();
TikaConfig config = new TikaConfig(this.getClass().getClassLoader());

AutoDetectParser parser = new AutoDetectParser();
AutoDetectParser parser = new AutoDetectParser(config);
Metadata metadata = new Metadata();
EmbeddedDocumentExtractor embeddedDocumentExtractor = new EmbeddedDocumentExtractor(
config,
Expand Down Expand Up @@ -199,7 +202,7 @@ public Parse.Output run(RunContext runContext) throws Exception {
.build();

if (this.store) {
Path tempFile = runContext.tempFile();
Path tempFile = runContext.tempFile(".ion");
try (
OutputStream output = new FileOutputStream(tempFile.toFile());
) {
Expand Down Expand Up @@ -245,13 +248,14 @@ public boolean shouldParseEmbedded(Metadata metadata) {
return this.parseEmbedded;
}
@Override
public void parseEmbedded(InputStream stream, ContentHandler handler, Metadata metadata, boolean outputHtml) throws SAXException, IOException {
public void parseEmbedded(InputStream stream, ContentHandler handler, Metadata metadata, boolean outputHtml) throws IOException {
String name = this.fileName(stream, metadata);
String extension = FilenameUtils.getExtension(name);

logger.debug("Extracting file {}", name);

// Upload
Path path = runContext.tempFile();
Path path = runContext.tempFile("." + extension);
//noinspection ResultOfMethodCallIgnored
path.toFile().delete();

Expand Down

0 comments on commit aa452ce

Please sign in to comment.