Skip to content

Commit 5b9ea50

Browse files
committed
MIME type validation for dataset parts with type DB
- Updated `validateFile` method to handle DB-specific MIME type checks. - Enhanced `DatasetServiceImpl` logic to differentiate validation for DB and non-DB dataset parts. - Added integration tests for invalid MIME types for DB dataset parts in `createDatasetPart`, `updateDataset`, and `replaceDatasetPart`.
1 parent be36cd3 commit 5b9ea50

File tree

3 files changed

+248
-10
lines changed

3 files changed

+248
-10
lines changed

dataset/src/integrationTest/kotlin/com/cosmotech/dataset/service/DatasetServiceIntegrationTest.kt

Lines changed: 224 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -89,6 +89,7 @@ class DatasetServiceIntegrationTest() : CsmTestBase() {
8989
val CONNECTED_DEFAULT_USER = "test.user@cosmotech.com"
9090
val EMPTY_SOURCE_FILE_NAME = "emptyfile.csv"
9191
val CUSTOMER_SOURCE_FILE_NAME = "customers.csv"
92+
val CUSTOMER_ZIPPED_SOURCE_FILE_NAME = "customers.zip"
9293
val CUSTOMER_50K_SOURCE_FILE_NAME = "customers_50K.csv"
9394
val CUSTOMERS_WITH_QUOTES_SOURCE_FILE_NAME = "customerswithquotes.csv"
9495
val CUSTOMERS_WITH_DOUBLE_QUOTES_SOURCE_FILE_NAME = "customerswithdoublequotes.csv"
@@ -1567,6 +1568,48 @@ class DatasetServiceIntegrationTest() : CsmTestBase() {
15671568
exception.message)
15681569
}
15691570

1571+
@Test
1572+
fun `test createDatasetPart DB with mimetype unsupported`() {
1573+
1574+
val datasetCreateRequest = DatasetCreateRequest(name = "Dataset Test")
1575+
1576+
val createDataset =
1577+
datasetApiService.createDataset(
1578+
organizationSaved.id, workspaceSaved.id, datasetCreateRequest, arrayOf())
1579+
1580+
assertTrue(createDataset.parts.isEmpty())
1581+
1582+
val resourceTestFile =
1583+
resourceLoader.getResource("classpath:/$CUSTOMER_ZIPPED_SOURCE_FILE_NAME").file
1584+
1585+
val fileToSend = FileInputStream(resourceTestFile)
1586+
1587+
val mockMultipartFile =
1588+
MockMultipartFile(
1589+
"file",
1590+
CUSTOMER_ZIPPED_SOURCE_FILE_NAME,
1591+
MediaType.MULTIPART_FORM_DATA_VALUE,
1592+
IOUtils.toByteArray(fileToSend))
1593+
1594+
val exception =
1595+
assertThrows<CsmAccessForbiddenException> {
1596+
datasetApiService.createDatasetPart(
1597+
organizationSaved.id,
1598+
workspaceSaved.id,
1599+
createDataset.id,
1600+
mockMultipartFile,
1601+
DatasetPartCreateRequest(
1602+
name = "Customer list",
1603+
sourceName = CUSTOMER_ZIPPED_SOURCE_FILE_NAME,
1604+
description = "List of customers",
1605+
tags = mutableListOf("part", "public", "customers"),
1606+
type = DatasetPartTypeEnum.File))
1607+
}
1608+
assertEquals(
1609+
"MIME type application/zip for file $CUSTOMER_ZIPPED_SOURCE_FILE_NAME is not authorized.",
1610+
exception.message)
1611+
}
1612+
15701613
@Test
15711614
fun `test createDatasetPart with unallowed file name`() {
15721615

@@ -2386,6 +2429,116 @@ class DatasetServiceIntegrationTest() : CsmTestBase() {
23862429
constructFilePathForDatasetPart(updatedDataset, 0)))
23872430
}
23882431

2432+
@Test
2433+
fun `test updateDataset with DB dataset part and mimetype unsupported`() {
2434+
2435+
// Create a Dataset with dataset Part
2436+
val datasetPartName = "Customers list"
2437+
val datasetPartDescription = "List of customers"
2438+
val datasetPartTags = mutableListOf("part", "public", "customers")
2439+
val datasetPartAdditionalData =
2440+
mutableMapOf("part" to "data", "complex" to mutableMapOf("nested" to "data"))
2441+
val datasetPartCreateRequest =
2442+
DatasetPartCreateRequest(
2443+
name = datasetPartName,
2444+
sourceName = CUSTOMER_SOURCE_FILE_NAME,
2445+
description = datasetPartDescription,
2446+
tags = datasetPartTags,
2447+
additionalData = datasetPartAdditionalData,
2448+
type = DatasetPartTypeEnum.File)
2449+
2450+
val datasetName = "Customer Dataset"
2451+
val datasetDescription = "Dataset for customers"
2452+
val datasetTags = mutableListOf("dataset", "public", "customers")
2453+
val datasetAdditionalData =
2454+
mutableMapOf("dataset" to "data", "complex" to mutableMapOf("nested" to "data"))
2455+
val datasetCreateRequest =
2456+
DatasetCreateRequest(
2457+
name = datasetName,
2458+
description = datasetDescription,
2459+
tags = datasetTags,
2460+
additionalData = datasetAdditionalData,
2461+
parts = mutableListOf(datasetPartCreateRequest))
2462+
2463+
val resourceTestFile = resourceLoader.getResource("classpath:/$CUSTOMER_SOURCE_FILE_NAME").file
2464+
2465+
val fileToSend = FileInputStream(resourceTestFile)
2466+
2467+
val mockMultipartFile =
2468+
MockMultipartFile(
2469+
"files",
2470+
CUSTOMER_SOURCE_FILE_NAME,
2471+
MediaType.MULTIPART_FORM_DATA_VALUE,
2472+
IOUtils.toByteArray(fileToSend))
2473+
2474+
val createdDataset =
2475+
datasetApiService.createDataset(
2476+
organizationSaved.id,
2477+
workspaceSaved.id,
2478+
datasetCreateRequest,
2479+
arrayOf(mockMultipartFile))
2480+
2481+
// Create a DatasetUpdateRequest with new dataset part
2482+
val newDatasetPartName = "Product list"
2483+
val newDatasetPartDescription = "List of Product"
2484+
val newDatasetPartTags = mutableListOf("part", "public", "product")
2485+
val newDatasetPartAdditionalData = mutableMapOf<String, Any>("part" to "new data")
2486+
val newDatasetPartCreateRequest =
2487+
DatasetPartCreateRequest(
2488+
name = newDatasetPartName,
2489+
sourceName = CUSTOMER_ZIPPED_SOURCE_FILE_NAME,
2490+
description = newDatasetPartDescription,
2491+
tags = newDatasetPartTags,
2492+
additionalData = newDatasetPartAdditionalData,
2493+
type = DatasetPartTypeEnum.DB)
2494+
2495+
val newDatasetName = "Shop Dataset"
2496+
val newDatasetDescription = "Dataset for shop"
2497+
val newDatasetTags = mutableListOf("dataset", "public", "shop")
2498+
val newDatasetAdditionalData = mutableMapOf<String, Any>("dataset" to "new data")
2499+
val newDatasetSecurity =
2500+
DatasetSecurity(
2501+
default = ROLE_NONE,
2502+
accessControlList =
2503+
mutableListOf(
2504+
DatasetAccessControl(CONNECTED_ADMIN_USER, ROLE_ADMIN),
2505+
DatasetAccessControl(CONNECTED_DEFAULT_USER, ROLE_EDITOR)))
2506+
val datasetUpdateRequest =
2507+
DatasetUpdateRequest(
2508+
name = newDatasetName,
2509+
description = newDatasetDescription,
2510+
tags = newDatasetTags,
2511+
additionalData = newDatasetAdditionalData,
2512+
parts = mutableListOf(newDatasetPartCreateRequest),
2513+
security = newDatasetSecurity)
2514+
2515+
val newDatasetPartTestFile =
2516+
resourceLoader.getResource("classpath:/$CUSTOMER_ZIPPED_SOURCE_FILE_NAME").file
2517+
2518+
val newDatasetPartFileToSend = FileInputStream(newDatasetPartTestFile)
2519+
2520+
val newDatasetPartMockMultipartFile =
2521+
MockMultipartFile(
2522+
"files",
2523+
CUSTOMER_ZIPPED_SOURCE_FILE_NAME,
2524+
MediaType.MULTIPART_FORM_DATA_VALUE,
2525+
IOUtils.toByteArray(newDatasetPartFileToSend))
2526+
2527+
val exception =
2528+
assertThrows<CsmAccessForbiddenException> {
2529+
datasetApiService.updateDataset(
2530+
organizationSaved.id,
2531+
workspaceSaved.id,
2532+
createdDataset.id,
2533+
datasetUpdateRequest,
2534+
arrayOf(newDatasetPartMockMultipartFile))
2535+
}
2536+
2537+
assertEquals(
2538+
"MIME type application/zip for file $CUSTOMER_ZIPPED_SOURCE_FILE_NAME is not authorized.",
2539+
exception.message)
2540+
}
2541+
23892542
@Test
23902543
fun `test updateDataset with empty body`() {
23912544

@@ -3033,6 +3186,77 @@ class DatasetServiceIntegrationTest() : CsmTestBase() {
30333186
exception.message)
30343187
}
30353188

3189+
@Test
3190+
fun `test replaceDatasetPart with DB dataset part with mimetype unsupported`() {
3191+
3192+
// Create a Dataset with dataset Part
3193+
val datasetPartCreateRequest =
3194+
DatasetPartCreateRequest(
3195+
name = "Customers list",
3196+
sourceName = CUSTOMER_SOURCE_FILE_NAME,
3197+
description = "List of customers",
3198+
tags = mutableListOf("part", "public", "customers"),
3199+
type = DatasetPartTypeEnum.DB)
3200+
3201+
val datasetCreateRequest =
3202+
DatasetCreateRequest(
3203+
name = "Customer Dataset",
3204+
description = "Dataset for customers",
3205+
tags = mutableListOf("dataset", "public", "customers"),
3206+
parts = mutableListOf(datasetPartCreateRequest))
3207+
3208+
val resourceTestFile = resourceLoader.getResource("classpath:/$CUSTOMER_SOURCE_FILE_NAME").file
3209+
3210+
val fileToSend = FileInputStream(resourceTestFile)
3211+
3212+
val mockMultipartFile =
3213+
MockMultipartFile(
3214+
"files",
3215+
CUSTOMER_SOURCE_FILE_NAME,
3216+
MediaType.MULTIPART_FORM_DATA_VALUE,
3217+
IOUtils.toByteArray(fileToSend))
3218+
3219+
val createdDataset =
3220+
datasetApiService.createDataset(
3221+
organizationSaved.id,
3222+
workspaceSaved.id,
3223+
datasetCreateRequest,
3224+
arrayOf(mockMultipartFile))
3225+
3226+
// Create a DatasetUpdateRequest with new dataset part
3227+
val datasetPartUpdateRequest =
3228+
DatasetPartUpdateRequest(
3229+
sourceName = CUSTOMER_ZIPPED_SOURCE_FILE_NAME,
3230+
description = "Dataset for shop",
3231+
tags = mutableListOf("dataset", "public", "shop"))
3232+
3233+
val wrongTypeTestFile =
3234+
resourceLoader.getResource("classpath:/$CUSTOMER_ZIPPED_SOURCE_FILE_NAME").file
3235+
3236+
val wrongTypeFileToSend = FileInputStream(wrongTypeTestFile)
3237+
3238+
val wrongTypeMockMultipartFile =
3239+
MockMultipartFile(
3240+
"files",
3241+
CUSTOMER_ZIPPED_SOURCE_FILE_NAME,
3242+
MediaType.MULTIPART_FORM_DATA_VALUE,
3243+
IOUtils.toByteArray(wrongTypeFileToSend))
3244+
3245+
val exception =
3246+
assertThrows<CsmAccessForbiddenException> {
3247+
datasetApiService.replaceDatasetPart(
3248+
organizationSaved.id,
3249+
workspaceSaved.id,
3250+
createdDataset.id,
3251+
createdDataset.parts[0].id,
3252+
wrongTypeMockMultipartFile,
3253+
datasetPartUpdateRequest)
3254+
}
3255+
assertEquals(
3256+
"MIME type application/zip for file $CUSTOMER_ZIPPED_SOURCE_FILE_NAME is not authorized.",
3257+
exception.message)
3258+
}
3259+
30363260
@Test
30373261
fun `test replaceDatasetPart with File dataset part with unallowed file name`() {
30383262

961 Bytes
Binary file not shown.

dataset/src/main/kotlin/com/cosmotech/dataset/service/DatasetServiceImpl.kt

Lines changed: 24 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -847,7 +847,6 @@ class DatasetServiceImpl(
847847
datasetPartUpdateRequest: DatasetPartUpdateRequest?
848848
): DatasetPart {
849849
val dataset = getVerifiedDataset(organizationId, workspaceId, datasetId, PERMISSION_WRITE)
850-
validateFile(file)
851850
val datasetPart =
852851
datasetPartRepository
853852
.findBy(organizationId, workspaceId, datasetId, datasetPartId)
@@ -857,6 +856,7 @@ class DatasetServiceImpl(
857856
"workspace $workspaceId and dataset $datasetId")
858857
}
859858

859+
validateFile(datasetPart.type == DatasetPartTypeEnum.DB, file)
860860
val now = Instant.now().toEpochMilli()
861861
val userId = getCurrentAccountIdentifier(csmPlatformProperties)
862862
val editInfo = DatasetEditInfo(timestamp = now, userId = userId)
@@ -977,21 +977,25 @@ class DatasetServiceImpl(
977977
"You must upload a file with the same name as the Dataset Part sourceName. " +
978978
"You provided ${datasetPartCreateRequest.sourceName} and ${file.originalFilename} instead."
979979
}
980-
981-
validateFile(file)
980+
val isDBFile = datasetPartCreateRequest.type == DatasetPartTypeEnum.DB
981+
validateFile(isDBFile, file)
982982
}
983983

984-
private fun validateFile(file: MultipartFile) {
984+
private fun validateFile(isDBFile: Boolean, file: MultipartFile) {
985985
val originalFilename = file.originalFilename
986986

987987
require(!originalFilename.isNullOrBlank()) { "File name must not be null or blank" }
988988
require(!originalFilename.contains("..") && !originalFilename.startsWith("/")) {
989989
"Invalid filename: '${originalFilename}'. File name should neither contains '..' nor starts by '/'."
990990
}
991-
resourceScanner.scanMimeTypes(
992-
originalFilename,
993-
file.inputStream,
994-
csmPlatformProperties.upload.authorizedMimeTypes.datasets)
991+
if (isDBFile) {
992+
resourceScanner.scanMimeTypes(originalFilename, file.inputStream, listOf("text/csv"))
993+
} else {
994+
resourceScanner.scanMimeTypes(
995+
originalFilename,
996+
file.inputStream,
997+
csmPlatformProperties.upload.authorizedMimeTypes.datasets)
998+
}
995999
}
9961000

9971001
private fun validDatasetCreateRequest(
@@ -1016,7 +1020,12 @@ class DatasetServiceImpl(
10161020
"Multipart file names: ${files.map { it.originalFilename }}. " +
10171021
"Dataset parts source names: ${datasetCreateRequest.parts?.map { it.sourceName }}."
10181022
}
1019-
files.forEach { file -> validateFile(file) }
1023+
val parts = datasetCreateRequest.parts
1024+
files.forEach { file ->
1025+
val isDBFile =
1026+
parts?.find { it.sourceName == file.originalFilename }?.type == DatasetPartTypeEnum.DB
1027+
validateFile(isDBFile, file)
1028+
}
10201029
}
10211030

10221031
private fun validDatasetUpdateRequest(
@@ -1047,7 +1056,12 @@ class DatasetServiceImpl(
10471056
"Dataset parts source names: ${datasetUpdateRequest.parts?.map { it.sourceName } ?: emptyList()}."
10481057
}
10491058

1050-
files.forEach { file -> validateFile(file) }
1059+
val parts = datasetUpdateRequest.parts
1060+
files.forEach { file ->
1061+
val isDBFile =
1062+
parts?.find { it.sourceName == file.originalFilename }?.type == DatasetPartTypeEnum.DB
1063+
validateFile(isDBFile, file)
1064+
}
10511065
}
10521066
}
10531067

0 commit comments

Comments
 (0)