Skip to content

Commit

Permalink
FINERACT-1794: Address improvement in file upload APIs
Browse files Browse the repository at this point in the history
  • Loading branch information
vidakovic committed Nov 22, 2022
1 parent 27489a6 commit 90f854b
Show file tree
Hide file tree
Showing 13 changed files with 389 additions and 13 deletions.
32 changes: 32 additions & 0 deletions buildSrc/src/main/groovy/org.apache.fineract.dependencies.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,38 @@ dependencyManagement {
dependency 'com.github.spullara.mustache.java:compiler:0.9.10'
dependency 'com.jayway.jsonpath:json-path:2.7.0'
dependency 'org.apache.tika:tika-core:2.4.1'
dependency ('org.apache.tika:tika-parser-microsoft-module:2.6.0') {
exclude 'org.bouncycastle:bcprov-jdk15on'
exclude 'org.bouncycastle:bcmail-jdk15on'
exclude 'commons-logging:commons-logging'
exclude 'org.apache.logging.log4j:log4j-api'
exclude 'org.slf4j:slf4j-api'
exclude 'commons-io:commons-io'
exclude 'commons-codec:commons-codec'
exclude 'org.apache.commons:commons-compress'
exclude 'org.apache.commons:commons-lang3'
exclude 'org.apache.poi:poi'
exclude 'org.apache.poi:poi-scratchpad'
exclude 'org.glassfish.jaxb:jaxb-runtime'
exclude 'org.apache.commons:commons-compress'
exclude 'xml-apis:xml-apis'
}
dependency ('org.apache.tika:tika-parser-miscoffice-module:2.6.0') {
exclude 'org.bouncycastle:bcprov-jdk15on'
exclude 'org.bouncycastle:bcmail-jdk15on'
exclude 'commons-logging:commons-logging'
exclude 'org.apache.logging.log4j:log4j-api'
exclude 'org.slf4j:slf4j-api'
exclude 'commons-io:commons-io'
exclude 'commons-codec:commons-codec'
exclude 'org.apache.commons:commons-compress'
exclude 'org.apache.commons:commons-lang3'
exclude 'org.apache.poi:poi'
exclude 'org.apache.poi:poi-scratchpad'
exclude 'org.glassfish.jaxb:jaxb-runtime'
exclude 'org.apache.commons:commons-compress'
exclude 'xml-apis:xml-apis'
}
dependency 'org.apache.httpcomponents:httpclient:4.5.13'
dependency 'jakarta.management.j2ee:jakarta.management.j2ee-api:1.1.4'
dependency 'jakarta.jms:jakarta.jms-api:2.0.3'
Expand Down
2 changes: 2 additions & 0 deletions fineract-provider/dependencies.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,8 @@ dependencies {
'org.apache.poi:poi',
'org.apache.poi:poi-ooxml',
'org.apache.tika:tika-core',
'org.apache.tika:tika-parser-microsoft-module',
'org.apache.tika:tika-parser-miscoffice-module',

'org.liquibase:liquibase-core',

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
*/
package org.apache.fineract.infrastructure.bulkimport.service;

import java.io.BufferedInputStream;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.File;
Expand Down Expand Up @@ -92,7 +93,7 @@ public Long importWorkbook(String entity, InputStream inputStream, FormDataConte
IOUtils.copy(inputStream, baos);
final byte[] bytes = baos.toByteArray();
InputStream clonedInputStream = new ByteArrayInputStream(bytes);
InputStream clonedInputStreamWorkbook = new ByteArrayInputStream(bytes);
final BufferedInputStream bis = new BufferedInputStream(new ByteArrayInputStream(bytes));
final Tika tika = new Tika();
final TikaInputStream tikaInputStream = TikaInputStream.get(clonedInputStream);
final String fileType = tika.detect(tikaInputStream);
Expand All @@ -104,7 +105,7 @@ public Long importWorkbook(String entity, InputStream inputStream, FormDataConte
"Uploaded file extension is not recognized.");

}
Workbook workbook = new HSSFWorkbook(clonedInputStreamWorkbook);
Workbook workbook = new HSSFWorkbook(clonedInputStream);
GlobalEntityType entityType = null;
int primaryColumn = 0;
if (entity.trim().equalsIgnoreCase(GlobalEntityType.CLIENTS_PERSON.toString())) {
Expand Down Expand Up @@ -169,7 +170,7 @@ public Long importWorkbook(String entity, InputStream inputStream, FormDataConte
throw new GeneralPlatformDomainRuleException("error.msg.unable.to.find.resource", "Unable to find requested resource");

}
return publishEvent(primaryColumn, fileDetail, clonedInputStreamWorkbook, entityType, workbook, locale, dateFormat);
return publishEvent(primaryColumn, fileDetail, bis, entityType, workbook, locale, dateFormat);
}
throw new GeneralPlatformDomainRuleException("error.msg.null", "One or more of the given parameters not found");
} catch (IOException e) {
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.fineract.infrastructure.documentmanagement.contentrepository;

import java.io.BufferedInputStream;

public interface ContentPathSanitizer {

String sanitize(String path);

String sanitize(String path, BufferedInputStream is);
}
Original file line number Diff line number Diff line change
Expand Up @@ -33,18 +33,20 @@ public class ContentRepositoryFactory {
private final ApplicationContext applicationContext;
private final ExternalServicesPropertiesReadPlatformService externalServicesReadPlatformService;

private final FileSystemContentPathSanitizer contentPathSanitizer;

public ContentRepository getRepository() {
final ConfigurationDomainService configurationDomainServiceJpa = this.applicationContext.getBean("configurationDomainServiceJpa",
ConfigurationDomainService.class);
if (configurationDomainServiceJpa.isAmazonS3Enabled()) {
return createS3DocumentStore();
}
return new FileSystemContentRepository();
return new FileSystemContentRepository(contentPathSanitizer);
}

public ContentRepository getRepository(final StorageType documentStoreType) {
if (documentStoreType == StorageType.FILE_SYSTEM) {
return new FileSystemContentRepository();
return new FileSystemContentRepository(contentPathSanitizer);
}
return createS3DocumentStore();
}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,143 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.fineract.infrastructure.documentmanagement.contentrepository;

import java.io.BufferedInputStream;
import java.nio.file.Path;
import java.util.List;
import java.util.regex.Pattern;
import javax.annotation.PostConstruct;
import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j;
import org.apache.commons.io.FilenameUtils;
import org.apache.commons.lang3.StringUtils;
import org.apache.fineract.infrastructure.core.service.ThreadLocalContextUtil;
import org.apache.fineract.infrastructure.documentmanagement.exception.ContentManagementException;
import org.apache.tika.Tika;
import org.apache.tika.io.TikaInputStream;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.parser.AutoDetectParser;
import org.apache.tika.sax.BodyContentHandler;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.stereotype.Component;

@Slf4j
@RequiredArgsConstructor
@Component
public class FileSystemContentPathSanitizer implements ContentPathSanitizer {

private static Pattern OVERWRITE_SIBLING_IMAGE = Pattern.compile(".*\\.\\./+[0-9]+/+.*");

@Value("${fineract.content.regex-whitelist-enabled}")
private boolean isRegexWhitelistEnabled;
@Value("${fineract.content.regex-whitelist}")
private List<String> regexWhitelist;
@Value("${fineract.content.mime-whitelist-enabled}")
private boolean isMimeWhitelistEnabled;
@Value("${fineract.content.mime-whitelist}")
private List<String> mimeWhitelist;
private List<Pattern> regexWhitelistPatterns;

@PostConstruct
public void init() {
regexWhitelistPatterns = regexWhitelist.stream().map(Pattern::compile).toList();
}

@Override
public String sanitize(String path) {
return sanitize(path, null);
}

@Override
public String sanitize(String path, BufferedInputStream is) {
try {
if (OVERWRITE_SIBLING_IMAGE.matcher(path).matches()) {
throw new RuntimeException(String.format("Trying to overwrite another resource's image: %s", path));
}

String sanitizedPath = Path.of(path).normalize().toString();

String fileName = FilenameUtils.getName(sanitizedPath).toLowerCase();

if (log.isDebugEnabled()) {
log.debug("Path: {} -> {} ({})", path, sanitizedPath, fileName);
}

if (isRegexWhitelistEnabled) {
boolean matches = regexWhitelistPatterns.stream().anyMatch(p -> p.matcher(fileName).matches());

if (!matches) {
throw new RuntimeException(String.format("File name not allowed: %s", fileName));
}
}

if (is != null && isMimeWhitelistEnabled) {
Tika tika = new Tika();
String extensionMimeType = tika.detect(fileName);

if (StringUtils.isEmpty(extensionMimeType)) {
throw new RuntimeException(String.format("Could not detect mime type for filename %s!", fileName));
}

if (!mimeWhitelist.contains(extensionMimeType)) {
throw new RuntimeException(
String.format("Detected mime type %s for filename %s not allowed!", extensionMimeType, fileName));
}

String contentMimeType = detectContentMimeType(is);

if (StringUtils.isEmpty(contentMimeType)) {
throw new RuntimeException(String.format("Could not detect content mime type for %s!", fileName));
}

if (!mimeWhitelist.contains(contentMimeType)) {
throw new RuntimeException(
String.format("Detected content mime type %s for %s not allowed!", contentMimeType, fileName));
}

if (!contentMimeType.equalsIgnoreCase(extensionMimeType)) {
throw new RuntimeException(String.format("Detected filename (%s) and content (%s) mime type do not match!",
extensionMimeType, contentMimeType));
}
}

Path target = Path.of(sanitizedPath);
Path rootFolder = Path.of(FileSystemContentRepository.FINERACT_BASE_DIR,
ThreadLocalContextUtil.getTenant().getName().replaceAll(" ", "").trim());

if (!target.startsWith(rootFolder)) {
throw new RuntimeException(String.format("Path traversal attempt: %s (%s)", target, rootFolder));
}

return sanitizedPath;
} catch (Exception e) {
throw new ContentManagementException(path, e.getMessage(), e);
}
}

private String detectContentMimeType(BufferedInputStream bis) throws Exception {
TikaInputStream tis = TikaInputStream.get(bis);
AutoDetectParser parser = new AutoDetectParser();
BodyContentHandler handler = new BodyContentHandler(-1);
Metadata metadata = new Metadata();
parser.parse(tis, handler, metadata);

return metadata.get("Content-Type");
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
package org.apache.fineract.infrastructure.documentmanagement.contentrepository;

import com.google.common.io.Files;
import java.io.BufferedInputStream;
import java.io.ByteArrayInputStream;
import java.io.File;
import java.io.IOException;
Expand All @@ -42,6 +43,12 @@ public class FileSystemContentRepository implements ContentRepository {

public static final String FINERACT_BASE_DIR = System.getProperty("user.home") + File.separator + ".fineract";

private final FileSystemContentPathSanitizer pathSanitizer;

public FileSystemContentRepository(final FileSystemContentPathSanitizer pathSanitizer) {
this.pathSanitizer = pathSanitizer;
}

@Override
public String saveFile(final InputStream uploadedInputStream, final DocumentCommand documentCommand) {
final String fileName = documentCommand.getFileName();
Expand Down Expand Up @@ -88,23 +95,29 @@ public void deleteFile(final String documentPath) {
}

private void deleteFileInternal(final String documentPath) {
final File fileToBeDeleted = new File(documentPath);
String path = pathSanitizer.sanitize(documentPath);

final File fileToBeDeleted = new File(path);
final boolean fileDeleted = fileToBeDeleted.delete();
if (!fileDeleted) {
// no need to throw an Error, what's a caller going to do about it, so simply log a warning
LOG.warn("Unable to delete file {}", documentPath);
LOG.warn("Unable to delete file {}", path);
}
}

@Override
public FileData fetchFile(final DocumentData documentData) {
final File file = new File(documentData.fileLocation());
return new FileData(Files.asByteSource(file), documentData.fileName(), documentData.contentType());
String path = pathSanitizer.sanitize(documentData.fileLocation());

final File file = new File(path);
return new FileData(Files.asByteSource(file), file.getName(), documentData.contentType());
}

@Override
public FileData fetchImage(final ImageData imageData) {
final File file = new File(imageData.location());
String path = pathSanitizer.sanitize(imageData.location());

final File file = new File(path);
return new FileData(Files.asByteSource(file), imageData.getEntityDisplayName(), imageData.contentType().getValue());
}

Expand Down Expand Up @@ -139,9 +152,10 @@ private void makeDirectories(final String uploadDocumentLocation) throws IOExcep
}

private void writeFileToFileSystem(final String fileName, final InputStream uploadedInputStream, final String fileLocation) {
try {
makeDirectories(fileLocation);
FileUtils.copyInputStreamToFile(uploadedInputStream, new File(fileLocation)); // NOSONAR
try (BufferedInputStream bis = new BufferedInputStream(uploadedInputStream)) {
String sanitizedPath = pathSanitizer.sanitize(fileLocation, bis);
makeDirectories(sanitizedPath);
FileUtils.copyInputStreamToFile(bis, new File(sanitizedPath)); // NOSONAR
} catch (final IOException ioException) {
LOG.warn("writeFileToFileSystem() IOException (logged because cause is not propagated in ContentManagementException)",
ioException);
Expand Down
5 changes: 5 additions & 0 deletions fineract-provider/src/main/resources/application.properties
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,11 @@ fineract.mode.batch-manager-enabled=${FINERACT_MODE_BATCH_MANAGER_ENABLED:true}
fineract.correlation.enabled=${FINERACT_LOGGING_HTTP_CORRELATION_ID_ENABLED:false}
fineract.correlation.header-name=${FINERACT_LOGGING_HTTP_CORRELATION_ID_HEADER_NAME:X-Correlation-ID}

fineract.content.regex-whitelist-enabled=${FINERACT_CONTENT_REGEX_WHITELIST_ENABLED:true}
fineract.content.regex-whitelist=${FINERACT_CONTENT_REGEX_WHITELIST:.*\\.pdf$,.*\\.doc,.*\\.docx,.*\\.xls,.*\\.xlsx,.*\\.jpg,.*\\.jpeg,.*\\.png}
fineract.content.mime-whitelist-enabled=${FINERACT_CONTENT_MIME_WHITELIST_ENABLED:true}
fineract.content.mime-whitelist=${FINERACT_CONTENT_MIME_WHITELIST:application/pdf,application/msword,application/vnd.openxmlformats-officedocument.wordprocessingml.document,application/vnd.ms-excel,application/vnd.openxmlformats-officedocument.spreadsheetml.sheet,image/jpeg,image/png}

# Logging pattern for the console
logging.pattern.console=${CONSOLE_LOG_PATTERN:%clr(%d{yyyy-MM-dd HH:mm:ss.SSS}){faint} %clr(${LOG_LEVEL_PATTERN:-%5p}) %clr(${PID:- }){magenta} %clr(%replace([%X{correlationId}]){'\\[\\]', ''}) %clr(---){faint} %clr([%15.15t]){faint} %clr(%-40.40logger{39}){cyan} %clr(:){faint} %m%n${LOG_EXCEPTION_CONVERSION_WORD:%wEx}}

Expand Down
11 changes: 11 additions & 0 deletions fineract-provider/src/test/resources/application-test.properties
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,17 @@ fineract.mode.read-enabled=true
fineract.mode.write-enabled=true
fineract.mode.batch-enabled=true

fineract.content.regex-whitelist-enabled=true
fineract.content.regex-whitelist=.*\\.pdf$,.*\\.doc,.*\\.docx,.*\\.xls,.*\\.xlsx,.*\\.jpg,.*\\.jpeg,.*\\.png
fineract.content.mime-whitelist-enabled=true
fineract.content.mime-whitelist=application/pdf,application/msword,application/vnd.openxmlformats-officedocument.wordprocessingml.document,application/vnd.ms-excel,application/vnd.openxmlformats-officedocument.spreadsheetml.sheet,image/jpeg,image/png
fineract.content.filesystem.enabled=true
fineract.content.filesystem.rootFolder=${user.home}/.fineract
fineract.content.s3.enabled=false
fineract.content.s3.bucketName=
fineract.content.s3.accessName=
fineract.content.s3.secretName=

management.health.jms.enabled=false

# FINERACT 1296
Expand Down
Loading

0 comments on commit 90f854b

Please sign in to comment.