diff --git a/src/core/config/Categories.json b/src/core/config/Categories.json
index 8235ab10b4..0b78a5bf21 100755
--- a/src/core/config/Categories.json
+++ b/src/core/config/Categories.json
@@ -59,7 +59,9 @@
"From Braille",
"Parse TLV",
"CSV to JSON",
- "JSON to CSV"
+ "JSON to CSV",
+ "Parse Mime",
+ "Decode Mime Encoded Words"
]
},
{
diff --git a/src/core/lib/ChrEnc.mjs b/src/core/lib/ChrEnc.mjs
index 02b2e9a2b8..e4fa04d192 100644
--- a/src/core/lib/ChrEnc.mjs
+++ b/src/core/lib/ChrEnc.mjs
@@ -56,3 +56,50 @@ export const IO_FORMAT = {
"Simplified Chinese GB18030 (54936)": 54936,
};
+/**
+ * Preferred MIME encoding format mappings.
+ */
+export const MIME_FORMAT = {
+ "utf-8": 65001,
+ "utf-7": 65000,
+ "unicode": 1200,
+ "ibm500": 500,
+ "ebcdic-cp-us": 37,
+ "windows-874": 874,
+ "shift_jis": 932,
+ "gbk": 936,
+ "gb2312": 936,
+ "ks_c_5601-1987": 949,
+ "big5": 950,
+ "windows-1250": 1250,
+ "windows-1251": 1251,
+ "windows-1252": 1252,
+ "windows-1253": 1253,
+ "windows-1254": 1254,
+ "windows-1255": 1255,
+ "windows-1256": 1256,
+ "windows-1257": 1257,
+ "windows-1258": 1258,
+ "us-ascii": 20127,
+ "koi8-r": 20866,
+ "koi8-u": 21866,
+ "iso-8859-1": 28591,
+ "iso-8859-2": 28592,
+ "iso-8859-3": 28593,
+ "iso-8859-4": 28594,
+ "iso-8859-5": 28595,
+ "iso-8859-6": 28596,
+ "iso-8859-7": 28597,
+ "iso-8859-8": 28598,
+ "iso-8859-9": 28599,
+ "iso-8859-10": 28600,
+ "iso-8859-11": 28601,
+ "iso-8859-13": 28603,
+ "iso-8859-14": 28604,
+ "iso-8859-15": 28605,
+ "iso-8859-16": 28606,
+ "iso-2022": 50222,
+ "x-euc": 51932,
+ "euc-kr": 51949,
+ "gb18030": 54936,
+};
diff --git a/src/core/lib/Mime.mjs b/src/core/lib/Mime.mjs
new file mode 100644
index 0000000000..56785b3732
--- /dev/null
+++ b/src/core/lib/Mime.mjs
@@ -0,0 +1,310 @@
+import OperationError from "../errors/OperationError";
+import cptable from "../vendor/js-codepage/cptable.js";
+import {decodeQuotedPrintable} from "../lib/QuotedPrintable";
+import {MIME_FORMAT} from "../lib/ChrEnc";
+import Utils from "../Utils";
+
+/**
+ * Class to do general Mime format parsing
+ *
+ * @author bwhitn [brian.m.whitney@outlook.com]
+ * @copyright Crown Copyright 2016
+ * @license Apache-2.0
+ */
+class Mime {
+ /**
+ * Mime Constructor
+ */
+ constructor(input) {
+ this.mimeObj = Mime._parseMime(input);
+ }
+
+ /**
+ * Extract data from mimeObjects and return object array containing them.
+ * extractData([["testa", "header", "subheader"], ["testb", "header"]]) would
+ * returns an array of objects {fields: {testa: "somestringornull", testb: "somestringornull"}, header: "somestringornull", body: "somestringornull"}
+ *
+ * @param {string[][]} headerObjects
+ * @param {boolean} header
+ * @param {boolean} body
+ * @param {boolean} recursive
+ * @returns {object[]}
+ */
+ extractData(headerObjects, header=true, body=true, recursive=true) {
+ const output = [];
+ Mime.walkMime(this.mimeObj, function(mimePart) {
+ const outObj = {};
+ outObj.fields = {};
+ if (body) {
+ const contType = Mime._extractField(mimePart, "content-type");
+ if (contType && !contType.startsWith("multipart/")) {
+ outObj.body = mimePart.body;
+ } else {
+ outObj.body = null;
+ }
+ }
+ if (header) {
+ outObj.header = mimePart.rawHeader;
+ }
+ if (!headerObjects) {
+ output.push(outObj);
+ return;
+ }
+ if (!Array.isArray(headerObjects)) {
+ throw new OperationError("Invalid extraction in headers. Not an Array.");
+ }
+ headerObjects.forEach(function(obj) {
+ if (!Array.isArray(obj)) {
+ throw new OperationError("Invalid extraction in headers Object. Not an Array.");
+ }
+ switch (obj.length) {
+ case 2:
+ outObj.fields[obj[0]] = Mime._extractField(mimePart, obj[1]);
+ break;
+ case 3:
+ outObj.fields[obj[0]] = Mime._extractField(mimePart, obj[1], obj[2]);
+ break;
+ default:
+ throw new OperationError("Invalid extraction in headers. Invalid Array size.");
+ }
+ });
+ output.push(outObj);
+ }, recursive);
+ return output;
+ }
+
+ /**
+ * Common helper function to decode Mime encoded words in headers.
+ *
+ * @param {boolean} recursive
+ */
+ decodeHeaderWords(recursive=true) {
+ Mime.walkMime(this.mimeObj, function(mimePart) {
+ if (mimePart.rawHeader) {
+ mimePart.rawHeader = Mime.replaceEncodedWord(mimePart.rawHeader);
+ }
+ }, recursive);
+ }
+
+ /**
+ * Common helper function to decode Mime bodies.
+ *
+ * @param {boolean} recursive
+ */
+ decodeMimeObjects(recursive=true) {
+ Mime.walkMime(this.mimeObj, function(mimePart) {
+ Mime.decodeMimeMessage(mimePart);
+ }, recursive);
+ }
+
+ /**
+ * Walks a MIME document and returns a Mime Object.
+ *
+ * @param {string} mimeData
+ * @returns {object}
+ */
+ static _parseMime(mimeData) {
+ const mimeObj = Mime._splitParseHead(mimeData);
+ const contType = Mime._extractField(mimeObj, "content-type");
+ const boundary = Mime._extractField(mimeObj, "content-type", "boundary");
+ if (mimeObj.body && contType && contType.startsWith("multipart/")) {
+ if (!boundary) {
+ throw new OperationError("Invalid mulitpart section no boundary");
+ }
+ const sections = [];
+ for (const val of Mime._splitMultipart(mimeObj.body, boundary)) {
+ sections.push(Mime._parseMime(val));
+ }
+ if (sections.length) {
+ mimeObj.body = sections;
+ }
+ }
+ return mimeObj;
+ }
+
+ /**
+ * Executes a function on a mime object. These methods should modify the mimeObj.
+ *
+ * @param {Object} mimeObj
+ * @param {function} methods
+ * @param {boolean} recursive
+ */
+ static walkMime(mimeObj, method, recursive=true) {
+ const contType = Mime._extractField(mimeObj, "content-type");
+ method(mimeObj);
+ if (recursive && mimeObj.body && Array.isArray(mimeObj.body) && contType && contType.startsWith("multipart/")) {
+ mimeObj.body.forEach(function(obj) {
+ Mime.walkMime(obj, method);
+ });
+ }
+ }
+
+ /**
+ * Attempts to decode a mimeObj's data by applying appropriate character and content decoders based on the header data.
+ *
+ * @param {Object} mimeObj
+ */
+ static decodeMimeMessage(mimeObj) {
+ const contType = Mime._extractField(mimeObj, "content-type");
+ const contEnc = Mime._extractField(mimeObj, "content-transfer-encoding");
+ let charEnc = Mime._extractField(mimeObj, "content-type", "charset");
+ if (contType != null) {
+ if (!charEnc && contType.startsWith("text/")) {
+ charEnc = "us-ascii";
+ }
+ }
+ if (mimeObj.body && contEnc && typeof mimeObj.body === "string") {
+ mimeObj.body = Mime._decodeMimeData(mimeObj.body, charEnc, contEnc);
+ }
+ }
+
+ /**
+ * Takes a string and decodes quoted words inside them
+ * These take the form of:
+ * input "=?utf-8?Q?Hello_World!?="
+ * output "Hello World!"
+ *
+ * @param {string} input
+ * @param {string} type
+ * @returns {string}
+ */
+ static replaceEncodedWord(input) {
+ return input.replace(/=\?([^?]+)\?(Q|B)\?([^?]+)\?=/g, function (a, charEnc, contEnc, input) {
+ contEnc = (contEnc === "B") ? "base64" : "quoted-printable";
+ if (contEnc === "quoted-printable") {
+ input = input.replace(/_/g, " ");
+ }
+ return Utils.byteArrayToUtf8(Mime._decodeMimeData(input, charEnc, contEnc));
+ });
+ }
+
+ /**
+ * Breaks the header from the body and parses the header. The returns an
+ * object or null. The object contains the raw header, decoded body, and
+ * parsed header object.
+ *
+ * @param {string} input
+ * @returns {object}
+ */
+ static _splitParseHead(input) {
+ const emlRegex = /(?:\r?\n){2}/g;
+ const matchObj = emlRegex.exec(input);
+ if (matchObj) {
+ const splitEmail = [input.substring(0, matchObj.index), input.substring(emlRegex.lastIndex)];
+ return {rawHeader: splitEmail[0], body: splitEmail[1], header: Mime._parseHeader(splitEmail[0])};
+ }
+ return {rawHeader: input, body: null, header: Mime._parseHeader(input)};
+ }
+
+ /**
+ *
+ *
+ *
+ */
+ static _parseHeader(input) {
+ const sectionRegex = /([A-Za-z-]+):\s+([\x00-\xff]+?)(?=$|\r?\n\S)/g;
+ const headerObj = {};
+ let section;
+ while ((section = sectionRegex.exec(input))) {
+ const fieldName = section[1].toLowerCase();
+ const fieldValue = Mime.replaceEncodedWord(section[2].replace(/\n|\r/g, " "));
+ if (fieldName in headerObj) {
+ headerObj[fieldName].push(fieldValue);
+ } else {
+ headerObj[fieldName] = [fieldValue];
+ }
+ }
+ return headerObj;
+ }
+
+ /**
+ * Return decoded MIME data given the character encoding and content encoding.
+ *
+ * @param {string} input
+ * @param {string} charEnc
+ * @param {string} contEnc
+ * @returns {string}
+ */
+ static _decodeMimeData(input, charEnc, contEnc) {
+ try {
+ switch (contEnc) {
+ case "base64":
+ input = Utils.convertToByteArray(input, "base64");
+ break;
+ case "quoted-printable":
+ input = decodeQuotedPrintable(input);
+ }
+ if (charEnc && MIME_FORMAT.hasOwnProperty(charEnc.toLowerCase())) {
+ input = Utils.strToByteArray(cptable.utils.decode(MIME_FORMAT[charEnc.toLowerCase()], input));
+ }
+ return input;
+ } catch (err) {
+ throw new OperationError("Invalid Mime Format");
+ }
+ }
+
+ /**
+ * Parses a header field and returns an object that contains
+ * normalized keys with corresponding values along with single values under
+ * a value array.
+ *
+ * @param {string} field
+ * @returns {string}
+ */
+ static _extractField(mimeObj, field, subfield=null) {
+ if (subfield) {
+ subfield = subfield.toLowerCase();
+ }
+ if (mimeObj.header.hasOwnProperty(field)) {
+ const fieldSplit = mimeObj.header[field][0].split(/;\s+/g);
+ for (let i = 0; i < fieldSplit.length; i++) {
+ const eq = fieldSplit[i].indexOf("=");
+ if (eq >= 0 && fieldSplit[i].length > eq && subfield) {
+ const kv = [fieldSplit[i].substring(0, eq), fieldSplit[i].substring(eq + 1).trim()];
+ if ((kv[1].startsWith("'") && kv[1].endsWith("'")) || (kv[1].startsWith("\"") && kv[1].endsWith("\""))) {
+ const val = (/(['"])(.+)\1/.exec(kv[1]));
+ if (val && val.length === 3) {
+ kv[1] = val[2];
+ }
+ }
+ if (subfield === kv[0].toLowerCase()) {
+ return kv[1];
+ }
+ } else if (!subfield){
+ return fieldSplit[i].trim().toLowerCase();
+ }
+ }
+ }
+ return null;
+ }
+
+ /**
+ * Splits a Mime document by the current boundaries and attempts to account
+ * for the current new line size which can be either the standard \r\n or \n.
+ *
+ * @param {string} input
+ * @param {string} boundary
+ * @return {string[]}
+ */
+ static *_splitMultipart(input, boundary) {
+ const newline = input.indexOf("\r") >= 0 ? "\r\n" : "\n";
+ const boundaryStr = "--".concat(boundary);
+ const boundaryStrEnd = newline.concat(boundaryStr);
+ const last = input.indexOf(boundaryStrEnd.concat("--"));
+ let begin = 0;
+ for (let end = 0; end !== last; begin = end) {
+ begin = input.indexOf(boundaryStr, begin);
+ if (begin < 0) {
+ break;
+ }
+ begin += boundaryStr.length;
+ end = input.indexOf(boundaryStrEnd, begin);
+ if (end <= begin) {
+ break;
+ }
+ yield input.substring(begin, end);
+ }
+ }
+}
+
+export default Mime;
diff --git a/src/core/lib/QuotedPrintable.mjs b/src/core/lib/QuotedPrintable.mjs
new file mode 100644
index 0000000000..e7f7ece1ba
--- /dev/null
+++ b/src/core/lib/QuotedPrintable.mjs
@@ -0,0 +1,35 @@
+/**
+ * Some parts taken from mimelib (http://github.com/andris9/mimelib)
+ * @author Andris Reinman
+ * @license MIT
+ *
+ * @author n1474335 [n1474335@gmail.com]
+ * @copyright Crown Copyright 2016
+ * @license Apache-2.0
+ */
+
+/**
+ * @param {string} input
+ * @returns {byteArray}
+ */
+export function decodeQuotedPrintable(input) {
+ const str = input.replace(/=(?:\r?\n|$)/g, "");
+
+ const encodedBytesCount = (str.match(/=[\da-fA-F]{2}/g) || []).length,
+ bufferLength = str.length - encodedBytesCount * 2,
+ buffer = new Array(bufferLength);
+ let chr, hex,
+ bufferPos = 0;
+
+ for (let i = 0, len = str.length; i < len; i++) {
+ chr = str.charAt(i);
+ if (chr === "=" && (hex = str.substr(i + 1, 2)) && /[\da-fA-F]{2}/.test(hex)) {
+ buffer[bufferPos++] = parseInt(hex, 16);
+ i += 2;
+ continue;
+ }
+ buffer[bufferPos++] = chr.charCodeAt(0);
+ }
+
+ return buffer;
+}
diff --git a/src/core/operations/DecodeMimeEncodedWords.mjs b/src/core/operations/DecodeMimeEncodedWords.mjs
new file mode 100644
index 0000000000..9997d74043
--- /dev/null
+++ b/src/core/operations/DecodeMimeEncodedWords.mjs
@@ -0,0 +1,43 @@
+/**
+ * @author bwhitn [brian.m.whitney@outlook.com]
+ * @copyright Crown Copyright 2016
+ * @license Apache-2.0
+ */
+
+import Operation from "../Operation";
+import Mime from "../lib/Mime";
+
+/**
+ * Operation for Finding and replacing Mime encoded words.
+ */
+class DecodeMimeEncodedWords extends Operation {
+
+ /**
+ * DecodeMimeEncodedWords constructor
+ */
+ constructor() {
+ super();
+ this.name = "Decode Mime Encoded Words";
+ this.module = "Default";
+ this.description = ["Parser an IMF formatted messages following RFC5322.",
+ "
", "Decodes Mime encoded words that are found in IMF messages.",
+ ].join("\n");
+ this.infoURL = "https://tools.ietf.org/html/rfc2047";
+ this.inputType = "string";
+ this.outputType = "string";
+ this.args = [];
+ }
+
+ /**
+ *
+ *
+ *
+ *
+ *
+ */
+ run(input, args) {
+ return Mime.replaceEncodedWord(input);
+ }
+}
+
+export default DecodeMimeEncodedWords;
diff --git a/src/core/operations/FromQuotedPrintable.mjs b/src/core/operations/FromQuotedPrintable.mjs
index 61466e4eb7..9f24519d1e 100644
--- a/src/core/operations/FromQuotedPrintable.mjs
+++ b/src/core/operations/FromQuotedPrintable.mjs
@@ -9,6 +9,7 @@
*/
import Operation from "../Operation";
+import {decodeQuotedPrintable} from "../lib/QuotedPrintable";
/**
* From Quoted Printable operation
@@ -43,25 +44,7 @@ class FromQuotedPrintable extends Operation {
* @returns {byteArray}
*/
run(input, args) {
- const str = input.replace(/=(?:\r?\n|$)/g, "");
-
- const encodedBytesCount = (str.match(/=[\da-fA-F]{2}/g) || []).length,
- bufferLength = str.length - encodedBytesCount * 2,
- buffer = new Array(bufferLength);
- let chr, hex,
- bufferPos = 0;
-
- for (let i = 0, len = str.length; i < len; i++) {
- chr = str.charAt(i);
- if (chr === "=" && (hex = str.substr(i + 1, 2)) && /[\da-fA-F]{2}/.test(hex)) {
- buffer[bufferPos++] = parseInt(hex, 16);
- i += 2;
- continue;
- }
- buffer[bufferPos++] = chr.charCodeAt(0);
- }
-
- return buffer;
+ return decodeQuotedPrintable(input);
}
}
diff --git a/src/core/operations/ParseMime.mjs b/src/core/operations/ParseMime.mjs
new file mode 100644
index 0000000000..3d2a3c5c71
--- /dev/null
+++ b/src/core/operations/ParseMime.mjs
@@ -0,0 +1,117 @@
+/**
+ * @author bwhitn [brian.m.whitney@outlook.com]
+ * @copyright Crown Copyright 2016
+ * @license Apache-2.0
+ */
+
+import Operation from "../Operation";
+import Mime from "../lib/Mime";
+import Utils from "../Utils";
+
+/**
+ *
+ */
+class ParseMime extends Operation {
+
+ /**
+ * ParseMime constructor
+ */
+ constructor() {
+ super();
+ this.name = "Parse Mime";
+ this.module = "Default";
+ this.description = ["Generic Mime Message parser that decodes Mime messages into files",
+ "
",
+ "The output will be the root header and the associated mime parts.",
+ "This includes Internet Message Format which are found in SMTP traffic."
+ ].join("\n");
+ this.infoURL = "https://tools.ietf.org/html/rfc2045";
+ this.inputType = "string";
+ this.outputType = "List";
+ this.presentType = "html";
+ this.args = [
+ {
+ "name": "Decode Encoded-Words",
+ "type": "boolean",
+ "value": false
+ }
+ ];
+ }
+
+ /**
+ * Basic Email Parser that displays the header and mime sections as files.
+ * Args 0 boolean decode quoted words
+ *
+ * @param {string} input
+ * @param {boolean} decodeWords
+ * @returns {File[]}
+ */
+ run(input, args) {
+ const eml = new Mime(input);
+ if (!eml.mimeObj) {
+ return [];
+ }
+ eml.decodeMimeObjects();
+ if (args[0]) {
+ eml.decodeHeaderWords(false);
+ }
+ const fields = [["filename", "content-disposition", "filename"],
+ ["name", "content-type", "name"],
+ ["type", "content-type"],
+ ["subject", "subject"]];
+ const dataObj = eml.extractData(fields);
+ let subject = null;
+ const retval = [];
+ if (dataObj.length >= 1) {
+ subject = dataObj[0].fields.subject;
+ if (dataObj[0].header) {
+ retval.push(new File([dataObj[0].header], "Header.txt", {type: "text/plain"}));
+ }
+ }
+ dataObj.forEach(function(obj) {
+ if (obj.body) {
+ let name = obj.fields.filename ? obj.fields.filename : obj.fields.name;
+ const type = obj.fields.type ? obj.fields.type : "text/plain";
+ if (!name) {
+ name = (subject ? subject : "Undefined").concat(ParseMime.getFileExt(type));
+ }
+ if (Array.isArray(obj.body)) {
+ retval.push(new File([Uint8Array.from(obj.body)], name, {type: type}));
+ } else {
+ retval.push(new File([obj.body], name, {type: type}));
+ }
+ }
+ });
+ return retval;
+ }
+
+ /**
+ * Simple function to add a common file extention based on mime type string.
+ *
+ * @param {string} mimetype
+ * @returns {string}
+ */
+ static getFileExt(mimetype) {
+ switch (mimetype) {
+ case "text/plain":
+ return ".txt";
+ case "text/html":
+ return ".htm";
+ case "application/rtf":
+ return ".rtf";
+ }
+ return ".bin";
+ }
+
+ /**
+ * Displays the files in HTML for web apps.
+ *
+ * @param {File[]} files
+ * @returns {html}
+ */
+ async present(files) {
+ return await Utils.displayFilesAsHTML(files);
+ }
+}
+
+export default ParseMime;
diff --git a/tests/operations/index.mjs b/tests/operations/index.mjs
index fb68ed9ca6..ffdd2c055b 100644
--- a/tests/operations/index.mjs
+++ b/tests/operations/index.mjs
@@ -57,6 +57,7 @@ import "./tests/JWTSign";
import "./tests/JWTVerify";
import "./tests/MS";
import "./tests/Magic";
+import "./tests/Mime";
import "./tests/MorseCode";
import "./tests/NetBIOS";
import "./tests/OTP";
diff --git a/tests/operations/tests/Mime.mjs b/tests/operations/tests/Mime.mjs
new file mode 100644
index 0000000000..05c2f5dac0
--- /dev/null
+++ b/tests/operations/tests/Mime.mjs
@@ -0,0 +1,22 @@
+/**
+ * Mime tests.
+ *
+ * @author bwhitn [brian.m.whitney@outlook.com]
+ * @copyright Crown Copyright 2018
+ * @license Apache-2.0
+ */
+import TestRegister from "../TestRegister";
+
+TestRegister.addTests([
+ {
+ name: "Decode Mime Encoded Words",
+ input: "This is a GBK base64 encoded word: =?GBK?B?zfjC57P4yqY=?=.\nThis is a Cyrillic UTF-8 quoted word: =?utf-8?Q?=d0=9a=d0=b8=d0=b1=d0=b5=d1=80_=d0=a8=d0=b5=d1=84?=.",
+ expectedOutput: "This is a GBK base64 encoded word: 网络厨师.\nThis is a Cyrillic UTF-8 quoted word: Кибер Шеф.",
+ recipeConfig: [
+ {
+ op: "Decode Mime Encoded Words",
+ args: []
+ }
+ ]
+ },
+]);