From 3c9c18c0020ac38095512437b0f326a8527d55c5 Mon Sep 17 00:00:00 2001 From: reivilibre Date: Thu, 16 Dec 2021 13:56:54 +0000 Subject: [PATCH] Use mmmagic for MIME type sniffing (#61) --- package.json | 2 +- src/reporting.js | 38 ++++++++++++++++++++++++++++++-------- test/handlers-test.js | 22 +++++++++++++++++++++- 3 files changed, 52 insertions(+), 10 deletions(-) diff --git a/package.json b/package.json index 10293ad..d316ae4 100644 --- a/package.json +++ b/package.json @@ -14,9 +14,9 @@ "cors": "^2.8.4", "express": "^4.16.3", "express-validation": "^1.0.2", - "file-type": "^10.5.0", "joi": "^13.3.0", "js-yaml": "^3.12.0", + "mmmagic": "^0.5.3", "rimraf": "^2.6.2", "simple-get": "^4.0.0", "tunnel": "0.0.6" diff --git a/src/reporting.js b/src/reporting.js index 80125a0..fc276e5 100644 --- a/src/reporting.js +++ b/src/reporting.js @@ -1,6 +1,6 @@ /** - Copyright 2018 New Vector Ltd. +Copyright 2021 The Matrix.org Foundation C.I.C. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -28,7 +28,29 @@ const decryptFile = require('./decrypt-file.js'); const crypto = require('crypto'); const { getConfig } = require('./config.js'); const {createProxyTunnel } = require('./proxy.js'); -const fileType = require('file-type'); +const mmmagic = require('mmmagic'); + +/** + * Async function that determines the file type of a file, using libmagic's + * MIME type sniffing. + * There WILL be false detections but the quality is usually very good. + * + * @param {Buffer} fileData The data of the file. + * + * @returns {Promise} A promise that resolves with a string MIME type. + */ +function sniffFileType(fileData) { + return new Promise((resolve, reject) => { + const magic = new mmmagic.Magic(mmmagic.MAGIC_MIME_TYPE); + magic.detect(fileData, function(err, result) { + if (err) { + reject(err); + } else { + resolve(result); + } + }); + }); +} // Generate a bas64 SHA 256 hash of the input string function base64sha256(s) { @@ -331,12 +353,12 @@ async function generateReport(console, httpUrl, matrixFile, filePath, tempDir, s // Further validate the mimetype of the file from the decrypted content if (mimetypeArray) { - const mimetype = fileType(decryptedFileContents); + const mimetype = await sniffFileType(decryptedFileContents); if (mimetype === null) { console.info(`Skipping unsupported decrypted file - unknown mimetype [${filePath}]`); return {clean: false, info: 'File type not supported'}; - } else if (!mimetypeArray.includes(mimetype.mime)) { - console.info(`Skipping unsupported decrypted file ${mimetype.mime} [${filePath}]`); + } else if (!mimetypeArray.includes(mimetype)) { + console.info(`Skipping unsupported decrypted file ${mimetype} [${filePath}]`); return {clean: false, info: 'File type not supported'}; } } @@ -351,12 +373,12 @@ async function generateReport(console, httpUrl, matrixFile, filePath, tempDir, s } else { if (mimetypeArray) { const fileData = fs.readFileSync(filePath); - const mimetype = fileType(fileData); + const mimetype = await sniffFileType(fileData); if (mimetype === null) { console.info(`Skipping unsupported file - unknown mimetype [${filePath}]`); return {clean: false, info: 'File type not supported'}; - } else if (!mimetypeArray.includes(mimetype.mime)) { - console.info(`Skipping unsupported file type ${mimetype.mime} [${filePath}]`); + } else if (!mimetypeArray.includes(mimetype)) { + console.info(`Skipping unsupported file type ${mimetype} [${filePath}]`); return {clean: false, info: 'File type not supported'}; } } diff --git a/test/handlers-test.js b/test/handlers-test.js index ac1bb6f..f32ae77 100644 --- a/test/handlers-test.js +++ b/test/handlers-test.js @@ -100,7 +100,7 @@ describe('handlers', () => { }); }); - it('responds with a scan report if the mimetype is accepted in the configuration file', async () => { + it('responds with a scan report if the (binary format) mimetype is accepted in the configuration file', async () => { setConfig({ scan: { baseUrl: "https://matrix.org", @@ -119,6 +119,26 @@ describe('handlers', () => { assert(response.body.clean, true); }); }); + + it('responds with a scan report if the (textual format) mimetype is accepted in the configuration file', async () => { + setConfig({ + scan: { + baseUrl: "https://matrix.org", + tempDirectory: "/tmp", + script: "true" + }, + altRemovalCmd: 'rm', + acceptedMimeType: ['text/plain'] + }); + const app = await createApp(); + return request(app) + .get('/_matrix/media_proxy/unstable/scan/matrix.org/obKqrnKoYPggwCLnvewDUrih') + .expect('Content-Type', /json/) + .expect(200) + .then(response => { + assert(response.body.clean, true); + }); + }); }); describe('GET /_matrix/media_proxy/unstable/thumbnail/matrix.org/EawFuailhYTuSPSGDGsNFigt?width=100&height=100&method=scale', () => {