From 7ae0b08b4dbe34326209418932a91d07fbc17f6c Mon Sep 17 00:00:00 2001 From: Brian Whitney Date: Tue, 20 Nov 2018 06:40:43 -0500 Subject: [PATCH 01/30] Start of eml --- src/core/config/Categories.json | 3 +- src/core/lib/ChrEnc.mjs | 47 ++++++++ src/core/operations/ParseIMF.mjs | 181 +++++++++++++++++++++++++++++++ 3 files changed, 230 insertions(+), 1 deletion(-) create mode 100644 src/core/operations/ParseIMF.mjs diff --git a/src/core/config/Categories.json b/src/core/config/Categories.json index 1891c46003..32d64b0823 100755 --- a/src/core/config/Categories.json +++ b/src/core/config/Categories.json @@ -54,7 +54,8 @@ "From MessagePack", "To Braille", "From Braille", - "Parse TLV" + "Parse TLV", + "Parse Internet Message Format" ] }, { diff --git a/src/core/lib/ChrEnc.mjs b/src/core/lib/ChrEnc.mjs index 02b2e9a2b8..e4fa04d192 100644 --- a/src/core/lib/ChrEnc.mjs +++ b/src/core/lib/ChrEnc.mjs @@ -56,3 +56,50 @@ export const IO_FORMAT = { "Simplified Chinese GB18030 (54936)": 54936, }; +/** + * Preferred MIME encoding format mappings. + */ +export const MIME_FORMAT = { + "utf-8": 65001, + "utf-7": 65000, + "unicode": 1200, + "ibm500": 500, + "ebcdic-cp-us": 37, + "windows-874": 874, + "shift_jis": 932, + "gbk": 936, + "gb2312": 936, + "ks_c_5601-1987": 949, + "big5": 950, + "windows-1250": 1250, + "windows-1251": 1251, + "windows-1252": 1252, + "windows-1253": 1253, + "windows-1254": 1254, + "windows-1255": 1255, + "windows-1256": 1256, + "windows-1257": 1257, + "windows-1258": 1258, + "us-ascii": 20127, + "koi8-r": 20866, + "koi8-u": 21866, + "iso-8859-1": 28591, + "iso-8859-2": 28592, + "iso-8859-3": 28593, + "iso-8859-4": 28594, + "iso-8859-5": 28595, + "iso-8859-6": 28596, + "iso-8859-7": 28597, + "iso-8859-8": 28598, + "iso-8859-9": 28599, + "iso-8859-10": 28600, + "iso-8859-11": 28601, + "iso-8859-13": 28603, + "iso-8859-14": 28604, + "iso-8859-15": 28605, + "iso-8859-16": 28606, + "iso-2022": 50222, + "x-euc": 51932, + "euc-kr": 51949, + "gb18030": 54936, +}; diff --git a/src/core/operations/ParseIMF.mjs b/src/core/operations/ParseIMF.mjs new file mode 100644 index 0000000000..e0127de43b --- /dev/null +++ b/src/core/operations/ParseIMF.mjs @@ -0,0 +1,181 @@ +/** + * @author bwhitn [brian.m.whitney@outlook.com] + * @copyright Crown Copyright 2016 + * @license Apache-2.0 + */ + + import Operation from "../Operation"; + import OperationError from "../errors/OperationError"; + import cptable from "../vendor/js-codepage/cptable.js"; + import {fromBase64} from "../lib/Base64"; + import {MIME_FORMAT} from "../lib/ChrEnc"; + + + //TODO: fix function header + /** + * Return the conetent encoding for a mime section from a header object. + * CONTENT_TYPE returns the content type of a mime header from a header object. + * Returns the filename from a mime header object. + * Returns the boundary value for the mime section from a header object. + * @constant + * @default + */ + const FIELD_ITEM = { + FILENAME: [/filename=".*?([^~#%&*\][\\:<>?/|]+)"/, "content-disposition"], + CONTENT_TYPE: [/\s*([^;\s]+)/, "content-type"], + BOUNDARY: [/boundary="(.+?)"/, "content-type"], + CHARSET: [/charset=([a-z0-9-]+)/, "content-type"], + TRANSER_ENCODING: [/\s*([A-Za-z0-9-]+)\s*/, "content-transfer-encoding"], + } + + /** + * @constant + * @default + */ + //TODO: should 8 bit and 7 bit be treated the same? + const DECODER = { + "base64": function (input) { + return Utils.fromBase64(input, Base64.ALPHABET, "string", true); + }, + "quoted-printable": function (input) { + return QuotedPrintable.mimeDecode(input); + }, + "7bit": function (input) { + return input; + }, + "8bit": function (input) { + return input; + }, + } + + + class ParseIMF extends Operation { + + /** + * Internet MessageFormat constructor + */ + constructor() { + super(); + this.name = "Parse Internet Message Format"; + this.module = "Default"; + this.description = ["Parser an IMF formatted messages following RFC5322.", + "

", + "Parses an IMF formated message. These often have the file extention ".eml"e; and contain the email headers and body. The output will be a file list of the headers and mime parts.", + ].join("\n"); + this.infoURL = "https://tools.ietf.org/html/rfc5322"; + this.inputType = "string"; + this.outputType = "string"; + this.args = []; + } + + /** + * Basic Email Parser that displays the header and mime sections as files. + * + * @param {string} input + * @param {Object[]} args + * @returns {string} + */ + run(input, args) { + if (!input) { + return; + } + let headerBody = Email._splitHeaderFromBody(input); + let header = headerBody[0]; + let headerArray = Email._parseHeader(header); + if (args[0]) { + header = Email._replaceDecodeWord(header); + } + return JSON.stringify(headerArray); + } + + /** + * Breaks the header from the body and returns [header, body] + * + * @param {string} input + * @returns {string[]} + */ + splitHeaderFromBody(input) { + const emlRegex = /^([\x20-\x7e\n\r\t]+?)(?:\r?\n){2}([\x20-\x7e\t\n\r]*)/; + let splitEmail = emlRegex.exec(input); + if (splitEmail) { + //TODO: Array splice vs shift? + splitEmail.shift(); + return splitEmail; + } + } + + /** + * Takes a string and decodes quoted words inside them + * These take the form of =?utf-8?Q?Hello?= + * + * @param {string} input + * @returns {string} + */ + replaceDecodeWord(input) { + return input.replace(/=\?([^?]+)\?(Q|B)\?([^?]+)\?=/g, function (a, charEnc, contEnc, input) { + //TODO fix Q encoding as it isn't identical to quoted-printable. ie _=" " + contEnc = (contEnc === "B") ? "base64" : "quoted-printable"; + return this.decodeMimeData(input, charEnc, contEnc); + }); + } + + /** + * Breaks a header into a object to be used by other functions. + * It removes any line feeds or carriage returns from the values and + * replaces it with a space. + * + * @param {string} input + * @returns {object} + */ + parseHeader(input) { + const sectionRegex = /([A-Z-]+):\s+([\x20-\x7e\r\n\t]+?)(?=$|\r?\n\S)/gi; + let header = {}, section; + while ((section = sectionRegex.exec(input))) { + let fieldName = section[1].toLowerCase(); + let fieldValue = section[2].replace(/\n|\r/g, " "); + if (header[fieldName]) { + header[fieldName].push(fieldValue); + } else { + header[fieldName] = [fieldValue]; + } + } + return header; + } + + /** + * Return decoded MIME data given the character encoding and content encoding. + * + * @param {string} input + * @param {string} charEnc + * @param {string} contEnc + * @returns {string} + */ + decodeMimeData(input, charEnc, contEnc) { + //TODO: make exceptions for unknown charEnc and contEnc? + input = this.DECODER[contEnc](input); + if (charEnc) { + input = cptable.utils.decode(this.MIME_FORMAT[charEnc.toLowerCase()], input); + } + return input; + } + + /** + * Returns a header item given a header object, itemName, and index number. + * + * @param {object} header + * @param {object} FIELD_ITEM + * @param {integer} fieldNum + * @returns {string} + */ + getHeaderItem(header, fieldItem, fieldNum = 0){ + if (fieldItem[1] in header && header[fieldItem[1]].length > fieldNum) { + let field = header[fieldItem[1]][fieldNum], item; + if ((item = fieldItem[0].exec(field))) { + return item[1]; + } + } + } + +} + +export default ParseIMF From ed17ed2919e5eb03bcec0b96c76bb798571a382c Mon Sep 17 00:00:00 2001 From: bwhitn Date: Tue, 20 Nov 2018 22:12:38 -0500 Subject: [PATCH 02/30] Moved quotedprintable to own library, completed a little work on IMF parsing --- src/core/lib/QuotedPrintable.mjs | 37 +++++++++++++++++++++ src/core/operations/FromQuotedPrintable.mjs | 21 ++---------- src/core/operations/ParseIMF.mjs | 11 +++--- 3 files changed, 45 insertions(+), 24 deletions(-) create mode 100644 src/core/lib/QuotedPrintable.mjs diff --git a/src/core/lib/QuotedPrintable.mjs b/src/core/lib/QuotedPrintable.mjs new file mode 100644 index 0000000000..13e78df430 --- /dev/null +++ b/src/core/lib/QuotedPrintable.mjs @@ -0,0 +1,37 @@ +/** + * Some parts taken from mimelib (http://github.com/andris9/mimelib) + * @author Andris Reinman + * @license MIT + * + * @author n1474335 [n1474335@gmail.com] + * @copyright Crown Copyright 2016 + * @license Apache-2.0 + */ + +import Operation from "../Operation"; + +/** + * @param {string} input + * @returns {byteArray} + */ +export function decodeQuotedPrintable(input) { + const str = input.replace(/=(?:\r?\n|$)/g, ""); + + const encodedBytesCount = (str.match(/=[\da-fA-F]{2}/g) || []).length, + bufferLength = str.length - encodedBytesCount * 2, + buffer = new Array(bufferLength); + let chr, hex, + bufferPos = 0; + + for (let i = 0, len = str.length; i < len; i++) { + chr = str.charAt(i); + if (chr === "=" && (hex = str.substr(i + 1, 2)) && /[\da-fA-F]{2}/.test(hex)) { + buffer[bufferPos++] = parseInt(hex, 16); + i += 2; + continue; + } + buffer[bufferPos++] = chr.charCodeAt(0); + } + + return buffer; +} diff --git a/src/core/operations/FromQuotedPrintable.mjs b/src/core/operations/FromQuotedPrintable.mjs index ee079ec62d..10cedb3505 100644 --- a/src/core/operations/FromQuotedPrintable.mjs +++ b/src/core/operations/FromQuotedPrintable.mjs @@ -9,6 +9,7 @@ */ import Operation from "../Operation"; +import {decodeQuotedPrintable} from "../lib/QuotedPrintable" /** * From Quoted Printable operation @@ -43,25 +44,7 @@ class FromQuotedPrintable extends Operation { * @returns {byteArray} */ run(input, args) { - const str = input.replace(/=(?:\r?\n|$)/g, ""); - - const encodedBytesCount = (str.match(/=[\da-fA-F]{2}/g) || []).length, - bufferLength = str.length - encodedBytesCount * 2, - buffer = new Array(bufferLength); - let chr, hex, - bufferPos = 0; - - for (let i = 0, len = str.length; i < len; i++) { - chr = str.charAt(i); - if (chr === "=" && (hex = str.substr(i + 1, 2)) && /[\da-fA-F]{2}/.test(hex)) { - buffer[bufferPos++] = parseInt(hex, 16); - i += 2; - continue; - } - buffer[bufferPos++] = chr.charCodeAt(0); - } - - return buffer; + return decodeQuotedPrintable(input); } } diff --git a/src/core/operations/ParseIMF.mjs b/src/core/operations/ParseIMF.mjs index e0127de43b..803631c106 100644 --- a/src/core/operations/ParseIMF.mjs +++ b/src/core/operations/ParseIMF.mjs @@ -8,6 +8,7 @@ import OperationError from "../errors/OperationError"; import cptable from "../vendor/js-codepage/cptable.js"; import {fromBase64} from "../lib/Base64"; + import {decodeQuotedPrintable} from "../lib/QuotedPrintable" import {MIME_FORMAT} from "../lib/ChrEnc"; @@ -35,10 +36,10 @@ //TODO: should 8 bit and 7 bit be treated the same? const DECODER = { "base64": function (input) { - return Utils.fromBase64(input, Base64.ALPHABET, "string", true); + return fromBase64(input, Base64.ALPHABET, "string", true); }, "quoted-printable": function (input) { - return QuotedPrintable.mimeDecode(input); + return decodeQuotedPrintable(input); }, "7bit": function (input) { return input; @@ -79,11 +80,11 @@ if (!input) { return; } - let headerBody = Email._splitHeaderFromBody(input); + let headerBody = this.splitHeaderFromBody(input); let header = headerBody[0]; - let headerArray = Email._parseHeader(header); + let headerArray = this.parseHeader(header); if (args[0]) { - header = Email._replaceDecodeWord(header); + header = this.replaceDecodeWord(header); } return JSON.stringify(headerArray); } From 13b10a68a2bb53f555ac732daaaedd6fd8572fcd Mon Sep 17 00:00:00 2001 From: bwhitn Date: Tue, 20 Nov 2018 22:36:29 -0500 Subject: [PATCH 03/30] fixing indent --- src/core/operations/ParseIMF.mjs | 303 ++++++++++++++++--------------- 1 file changed, 152 insertions(+), 151 deletions(-) diff --git a/src/core/operations/ParseIMF.mjs b/src/core/operations/ParseIMF.mjs index 803631c106..68932f97d6 100644 --- a/src/core/operations/ParseIMF.mjs +++ b/src/core/operations/ParseIMF.mjs @@ -4,178 +4,179 @@ * @license Apache-2.0 */ - import Operation from "../Operation"; - import OperationError from "../errors/OperationError"; - import cptable from "../vendor/js-codepage/cptable.js"; - import {fromBase64} from "../lib/Base64"; - import {decodeQuotedPrintable} from "../lib/QuotedPrintable" - import {MIME_FORMAT} from "../lib/ChrEnc"; +import Operation from "../Operation"; +import OperationError from "../errors/OperationError"; +import cptable from "../vendor/js-codepage/cptable.js"; +import {fromBase64} from "../lib/Base64"; +import {decodeQuotedPrintable} from "../lib/QuotedPrintable"; +import {MIME_FORMAT} from "../lib/ChrEnc"; +import Utils from "../Utils"; - //TODO: fix function header - /** - * Return the conetent encoding for a mime section from a header object. - * CONTENT_TYPE returns the content type of a mime header from a header object. - * Returns the filename from a mime header object. - * Returns the boundary value for the mime section from a header object. - * @constant - * @default - */ - const FIELD_ITEM = { - FILENAME: [/filename=".*?([^~#%&*\][\\:<>?/|]+)"/, "content-disposition"], - CONTENT_TYPE: [/\s*([^;\s]+)/, "content-type"], - BOUNDARY: [/boundary="(.+?)"/, "content-type"], - CHARSET: [/charset=([a-z0-9-]+)/, "content-type"], - TRANSER_ENCODING: [/\s*([A-Za-z0-9-]+)\s*/, "content-transfer-encoding"], - } +// TODO: fix function header +/** + * Return the conetent encoding for a mime section from a header object. + * CONTENT_TYPE returns the content type of a mime header from a header object. + * Returns the filename from a mime header object. + * Returns the boundary value for the mime section from a header object. + * @constant + * @default + */ +const FIELD_ITEM = { + FILENAME: [/filename=".*?([^~#%&*\][\\:<>?/|]+)"/, "content-disposition"], + CONTENT_TYPE: [/\s*([^;\s]+)/, "content-type"], + BOUNDARY: [/boundary="(.+?)"/, "content-type"], + CHARSET: [/charset=([a-z0-9-]+)/, "content-type"], + TRANSER_ENCODING: [/\s*([A-Za-z0-9-]+)\s*/, "content-transfer-encoding"], +} - /** - * @constant - * @default - */ - //TODO: should 8 bit and 7 bit be treated the same? - const DECODER = { - "base64": function (input) { - return fromBase64(input, Base64.ALPHABET, "string", true); - }, - "quoted-printable": function (input) { - return decodeQuotedPrintable(input); - }, - "7bit": function (input) { - return input; - }, - "8bit": function (input) { - return input; - }, - } +/** + * @constant + * @default + */ +// TODO: should 8 bit and 7 bit be treated the same? +const DECODER = { + "base64": function (input) { + return fromBase64(input); + }, + "quoted-printable": function (input) { + return Utils.byteArrayToUtf8(decodeQuotedPrintable(input)); + }, + "7bit": function (input) { + return input; + }, + "8bit": function (input) { + return input; + }, +} - class ParseIMF extends Operation { +class ParseIMF extends Operation { - /** - * Internet MessageFormat constructor - */ - constructor() { - super(); - this.name = "Parse Internet Message Format"; - this.module = "Default"; - this.description = ["Parser an IMF formatted messages following RFC5322.", - "

", - "Parses an IMF formated message. These often have the file extention ".eml"e; and contain the email headers and body. The output will be a file list of the headers and mime parts.", - ].join("\n"); - this.infoURL = "https://tools.ietf.org/html/rfc5322"; - this.inputType = "string"; - this.outputType = "string"; - this.args = []; + /** + * Internet MessageFormat constructor + */ + constructor() { + super(); + this.name = "Parse Internet Message Format"; + this.module = "Default"; + this.description = ["Parser an IMF formatted messages following RFC5322.", + "

", + "Parses an IMF formated message. These often have the file extention ".eml"e; and contain the email headers and body. The output will be a file list of the headers and mime parts.", + ].join("\n"); + this.infoURL = "https://tools.ietf.org/html/rfc5322"; + this.inputType = "string"; + this.outputType = "string"; + this.args = []; } /** - * Basic Email Parser that displays the header and mime sections as files. - * - * @param {string} input - * @param {Object[]} args - * @returns {string} - */ - run(input, args) { - if (!input) { - return; - } - let headerBody = this.splitHeaderFromBody(input); - let header = headerBody[0]; - let headerArray = this.parseHeader(header); - if (args[0]) { - header = this.replaceDecodeWord(header); + * Basic Email Parser that displays the header and mime sections as files. + * + * @param {string} input + * @param {Object[]} args + * @returns {string} + */ + run(input, args) { + if (!input) { + return; + } + let headerBody = this.splitHeaderFromBody(input); + let header = headerBody[0]; + let headerArray = this.parseHeader(header); + if (args[0]) { + header = this.replaceDecodeWord(header); + } + return JSON.stringify(headerArray); } - return JSON.stringify(headerArray); - } - /** - * Breaks the header from the body and returns [header, body] - * - * @param {string} input - * @returns {string[]} - */ - splitHeaderFromBody(input) { - const emlRegex = /^([\x20-\x7e\n\r\t]+?)(?:\r?\n){2}([\x20-\x7e\t\n\r]*)/; - let splitEmail = emlRegex.exec(input); - if (splitEmail) { - //TODO: Array splice vs shift? - splitEmail.shift(); - return splitEmail; - } - } + /** + * Breaks the header from the body and returns [header, body] + * + * @param {string} input + * @returns {string[]} + */ + splitHeaderFromBody(input) { + const emlRegex = /^([\x20-\x7e\n\r\t]+?)(?:\r?\n){2}([\x20-\x7e\t\n\r]*)/; + let splitEmail = emlRegex.exec(input); + if (splitEmail) { + //TODO: Array splice vs shift? + splitEmail.shift(); + return splitEmail; + } + } - /** - * Takes a string and decodes quoted words inside them - * These take the form of =?utf-8?Q?Hello?= - * - * @param {string} input - * @returns {string} - */ - replaceDecodeWord(input) { - return input.replace(/=\?([^?]+)\?(Q|B)\?([^?]+)\?=/g, function (a, charEnc, contEnc, input) { - //TODO fix Q encoding as it isn't identical to quoted-printable. ie _=" " - contEnc = (contEnc === "B") ? "base64" : "quoted-printable"; - return this.decodeMimeData(input, charEnc, contEnc); - }); - } + /** + * Takes a string and decodes quoted words inside them + * These take the form of =?utf-8?Q?Hello?= + * + * @param {string} input + * @returns {string} + */ + replaceDecodeWord(input) { + return input.replace(/=\?([^?]+)\?(Q|B)\?([^?]+)\?=/g, function (a, charEnc, contEnc, input) { + //TODO fix Q encoding as it isn't identical to quoted-printable. ie _=" " + contEnc = (contEnc === "B") ? "base64" : "quoted-printable"; + return this.decodeMimeData(input, charEnc, contEnc); + }); + } - /** - * Breaks a header into a object to be used by other functions. - * It removes any line feeds or carriage returns from the values and - * replaces it with a space. - * - * @param {string} input - * @returns {object} - */ - parseHeader(input) { - const sectionRegex = /([A-Z-]+):\s+([\x20-\x7e\r\n\t]+?)(?=$|\r?\n\S)/gi; - let header = {}, section; - while ((section = sectionRegex.exec(input))) { - let fieldName = section[1].toLowerCase(); - let fieldValue = section[2].replace(/\n|\r/g, " "); - if (header[fieldName]) { - header[fieldName].push(fieldValue); - } else { - header[fieldName] = [fieldValue]; - } - } - return header; - } + /** + * Breaks a header into a object to be used by other functions. + * It removes any line feeds or carriage returns from the values and + * replaces it with a space. + * + * @param {string} input + * @returns {object} + */ + parseHeader(input) { + const sectionRegex = /([A-Z-]+):\s+([\x20-\x7e\r\n\t]+?)(?=$|\r?\n\S)/gi; + let header = {}, section; + while ((section = sectionRegex.exec(input))) { + let fieldName = section[1].toLowerCase(); + let fieldValue = section[2].replace(/\n|\r/g, " "); + if (header[fieldName]) { + header[fieldName].push(fieldValue); + } else { + header[fieldName] = [fieldValue]; + } + } + return header; + } - /** - * Return decoded MIME data given the character encoding and content encoding. - * - * @param {string} input - * @param {string} charEnc - * @param {string} contEnc - * @returns {string} - */ - decodeMimeData(input, charEnc, contEnc) { + /** + * Return decoded MIME data given the character encoding and content encoding. + * + * @param {string} input + * @param {string} charEnc + * @param {string} contEnc + * @returns {string} + */ + decodeMimeData(input, charEnc, contEnc) { //TODO: make exceptions for unknown charEnc and contEnc? input = this.DECODER[contEnc](input); if (charEnc) { input = cptable.utils.decode(this.MIME_FORMAT[charEnc.toLowerCase()], input); } return input; - } + } - /** - * Returns a header item given a header object, itemName, and index number. - * - * @param {object} header - * @param {object} FIELD_ITEM - * @param {integer} fieldNum - * @returns {string} - */ - getHeaderItem(header, fieldItem, fieldNum = 0){ - if (fieldItem[1] in header && header[fieldItem[1]].length > fieldNum) { - let field = header[fieldItem[1]][fieldNum], item; - if ((item = fieldItem[0].exec(field))) { - return item[1]; - } - } - } + /** + * Returns a header item given a header object, itemName, and index number. + * + * @param {object} header + * @param {object} FIELD_ITEM + * @param {integer} fieldNum + * @returns {string} + */ + getHeaderItem(header, fieldItem, fieldNum = 0){ + if (fieldItem[1] in header && header[fieldItem[1]].length > fieldNum) { + let field = header[fieldItem[1]][fieldNum], item; + if ((item = fieldItem[0].exec(field))) { + return item[1]; + } + } + } } From 39fd0167179e0b69084f8115736b162462d5b432 Mon Sep 17 00:00:00 2001 From: Brian Whitney Date: Wed, 21 Nov 2018 15:26:09 -0500 Subject: [PATCH 04/30] working on email parsing --- src/core/operations/ParseIMF.mjs | 34 +++++++++++++++++--------------- 1 file changed, 18 insertions(+), 16 deletions(-) diff --git a/src/core/operations/ParseIMF.mjs b/src/core/operations/ParseIMF.mjs index 68932f97d6..ec66a04c23 100644 --- a/src/core/operations/ParseIMF.mjs +++ b/src/core/operations/ParseIMF.mjs @@ -79,15 +79,15 @@ class ParseIMF extends Operation { */ run(input, args) { if (!input) { - return; + return ""; } - let headerBody = this.splitHeaderFromBody(input); + let headerBody = splitHeaderFromBody(input); let header = headerBody[0]; - let headerArray = this.parseHeader(header); - if (args[0]) { - header = this.replaceDecodeWord(header); + let headerArray = parseHeader(header); + if (true) { + header = replaceDecodeWord(header); } - return JSON.stringify(headerArray); + return header; } /** @@ -96,8 +96,8 @@ class ParseIMF extends Operation { * @param {string} input * @returns {string[]} */ - splitHeaderFromBody(input) { - const emlRegex = /^([\x20-\x7e\n\r\t]+?)(?:\r?\n){2}([\x20-\x7e\t\n\r]*)/; + static splitHeaderFromBody(input) { + const emlRegex = /^([\x20-\xff\n\r\t]+?)(?:\r?\n){2}([\x20-\xff\t\n\r]*)/; let splitEmail = emlRegex.exec(input); if (splitEmail) { //TODO: Array splice vs shift? @@ -113,11 +113,13 @@ class ParseIMF extends Operation { * @param {string} input * @returns {string} */ - replaceDecodeWord(input) { + static replaceDecodeWord(input) { return input.replace(/=\?([^?]+)\?(Q|B)\?([^?]+)\?=/g, function (a, charEnc, contEnc, input) { - //TODO fix Q encoding as it isn't identical to quoted-printable. ie _=" " contEnc = (contEnc === "B") ? "base64" : "quoted-printable"; - return this.decodeMimeData(input, charEnc, contEnc); + if (contEnc === "quoted-printable") { + input = input.replace("_", " "); + } + return decodeMimeData(input, charEnc, contEnc); }); } @@ -129,7 +131,7 @@ class ParseIMF extends Operation { * @param {string} input * @returns {object} */ - parseHeader(input) { + static parseHeader(input) { const sectionRegex = /([A-Z-]+):\s+([\x20-\x7e\r\n\t]+?)(?=$|\r?\n\S)/gi; let header = {}, section; while ((section = sectionRegex.exec(input))) { @@ -152,11 +154,11 @@ class ParseIMF extends Operation { * @param {string} contEnc * @returns {string} */ - decodeMimeData(input, charEnc, contEnc) { + static decodeMimeData(input, charEnc, contEnc) { //TODO: make exceptions for unknown charEnc and contEnc? - input = this.DECODER[contEnc](input); + input = DECODER[contEnc](input); if (charEnc) { - input = cptable.utils.decode(this.MIME_FORMAT[charEnc.toLowerCase()], input); + input = cptable.utils.decode(MIME_FORMAT[charEnc.toLowerCase()], input); } return input; } @@ -169,7 +171,7 @@ class ParseIMF extends Operation { * @param {integer} fieldNum * @returns {string} */ - getHeaderItem(header, fieldItem, fieldNum = 0){ + static getHeaderItem(header, fieldItem, fieldNum = 0){ if (fieldItem[1] in header && header[fieldItem[1]].length > fieldNum) { let field = header[fieldItem[1]][fieldNum], item; if ((item = fieldItem[0].exec(field))) { From 1e0bb72dfa10151885e5740240f66aaa5c27853f Mon Sep 17 00:00:00 2001 From: bwhitn Date: Wed, 21 Nov 2018 23:36:32 -0500 Subject: [PATCH 05/30] working on IMF --- src/core/operations/ParseIMF.mjs | 55 ++++++++++++++++++++------------ 1 file changed, 34 insertions(+), 21 deletions(-) diff --git a/src/core/operations/ParseIMF.mjs b/src/core/operations/ParseIMF.mjs index ec66a04c23..81478c2e4f 100644 --- a/src/core/operations/ParseIMF.mjs +++ b/src/core/operations/ParseIMF.mjs @@ -12,7 +12,6 @@ import {decodeQuotedPrintable} from "../lib/QuotedPrintable"; import {MIME_FORMAT} from "../lib/ChrEnc"; import Utils from "../Utils"; - // TODO: fix function header /** * Return the conetent encoding for a mime section from a header object. @@ -22,7 +21,7 @@ import Utils from "../Utils"; * @constant * @default */ -const FIELD_ITEM = { +const IMF_FIELD_ITEM = { FILENAME: [/filename=".*?([^~#%&*\][\\:<>?/|]+)"/, "content-disposition"], CONTENT_TYPE: [/\s*([^;\s]+)/, "content-type"], BOUNDARY: [/boundary="(.+?)"/, "content-type"], @@ -35,7 +34,7 @@ const FIELD_ITEM = { * @default */ // TODO: should 8 bit and 7 bit be treated the same? -const DECODER = { +const IMF_DECODER = { "base64": function (input) { return fromBase64(input); }, @@ -50,7 +49,6 @@ const DECODER = { }, } - class ParseIMF extends Operation { /** @@ -66,28 +64,43 @@ class ParseIMF extends Operation { ].join("\n"); this.infoURL = "https://tools.ietf.org/html/rfc5322"; this.inputType = "string"; - this.outputType = "string"; - this.args = []; + this.outputType = "List"; + this.presentType = "html"; + this.args = [ + { + "name": "Decode Quoted Words", + "type": "boolean", + "value": false + } + ]; } /** * Basic Email Parser that displays the header and mime sections as files. + * Args 0 boolean decode quoted words * * @param {string} input * @param {Object[]} args - * @returns {string} + * @returns {File[]} */ run(input, args) { if (!input) { - return ""; + return []; } - let headerBody = splitHeaderFromBody(input); + let headerBody = ParseIMF.splitHeaderFromBody(input); let header = headerBody[0]; - let headerArray = parseHeader(header); - if (true) { - header = replaceDecodeWord(header); + let headerArray = ParseIMF.parseHeader(header); + if (args[0]) { + header = ParseIMF.replaceDecodeWord(header); } - return header; + let retval = []; + let i = 0; + headerBody.forEach(function(file){ + file = new File(Array.from(file), "test"+String(i), {type: "text/plain"}) + retval.push(file); + i++; + }); + return retval; } /** @@ -117,9 +130,9 @@ class ParseIMF extends Operation { return input.replace(/=\?([^?]+)\?(Q|B)\?([^?]+)\?=/g, function (a, charEnc, contEnc, input) { contEnc = (contEnc === "B") ? "base64" : "quoted-printable"; if (contEnc === "quoted-printable") { - input = input.replace("_", " "); + input = input.replace(/_/g, " "); } - return decodeMimeData(input, charEnc, contEnc); + return ParseIMF.decodeMimeData(input, charEnc, contEnc); }); } @@ -155,12 +168,12 @@ class ParseIMF extends Operation { * @returns {string} */ static decodeMimeData(input, charEnc, contEnc) { - //TODO: make exceptions for unknown charEnc and contEnc? - input = DECODER[contEnc](input); - if (charEnc) { - input = cptable.utils.decode(MIME_FORMAT[charEnc.toLowerCase()], input); - } - return input; + //TODO: make exceptions for unknown charEnc and contEnc? + input = IMF_DECODER[contEnc](input); + if (charEnc) { + input = cptable.utils.decode(MIME_FORMAT[charEnc.toLowerCase()], input); + } + return input; } /** From d8667a67be7bc7ae303ae9f3b96ee6dbb0326056 Mon Sep 17 00:00:00 2001 From: Brian Whitney Date: Thu, 22 Nov 2018 14:51:53 -0500 Subject: [PATCH 06/30] got file output working. Adding Mime walking --- src/core/operations/ParseIMF.mjs | 28 +++++++++++++++++++++++++--- 1 file changed, 25 insertions(+), 3 deletions(-) diff --git a/src/core/operations/ParseIMF.mjs b/src/core/operations/ParseIMF.mjs index 81478c2e4f..eea43f0ef7 100644 --- a/src/core/operations/ParseIMF.mjs +++ b/src/core/operations/ParseIMF.mjs @@ -90,19 +90,41 @@ class ParseIMF extends Operation { let headerBody = ParseIMF.splitHeaderFromBody(input); let header = headerBody[0]; let headerArray = ParseIMF.parseHeader(header); - if (args[0]) { - header = ParseIMF.replaceDecodeWord(header); + if (args[0] && headerBody.length > 0) { + headerBody[0] = ParseIMF.replaceDecodeWord(headerBody[0]); } let retval = []; let i = 0; headerBody.forEach(function(file){ - file = new File(Array.from(file), "test"+String(i), {type: "text/plain"}) + file = new File([file], "test"+String(i), {type: "text/plain"}); retval.push(file); i++; }); return retval; } + /** + * Displays the files in HTML for web apps. + * + * @param {File[]} files + * @returns {html} + */ + async present(files) { + return await Utils.displayFilesAsHTML(files); + } + + /** + * Walks a MIME document and returns an array of Mime data and header objects. + * + * @param {string} input + * @param {object} header + * @returns {object[]} + */ + static walkMime(input, header) { + let output = []; + if header[""] + } + /** * Breaks the header from the body and returns [header, body] * From 189cf337d6f38acf587788c9612ebb4ed76536ad Mon Sep 17 00:00:00 2001 From: Brian Whitney Date: Fri, 23 Nov 2018 15:00:34 -0500 Subject: [PATCH 07/30] Working on Mime walking --- src/core/operations/ParseIMF.mjs | 77 ++++++++++++++++++++------------ 1 file changed, 49 insertions(+), 28 deletions(-) diff --git a/src/core/operations/ParseIMF.mjs b/src/core/operations/ParseIMF.mjs index eea43f0ef7..5a49286aeb 100644 --- a/src/core/operations/ParseIMF.mjs +++ b/src/core/operations/ParseIMF.mjs @@ -29,26 +29,6 @@ const IMF_FIELD_ITEM = { TRANSER_ENCODING: [/\s*([A-Za-z0-9-]+)\s*/, "content-transfer-encoding"], } -/** - * @constant - * @default - */ -// TODO: should 8 bit and 7 bit be treated the same? -const IMF_DECODER = { - "base64": function (input) { - return fromBase64(input); - }, - "quoted-printable": function (input) { - return Utils.byteArrayToUtf8(decodeQuotedPrintable(input)); - }, - "7bit": function (input) { - return input; - }, - "8bit": function (input) { - return input; - }, -} - class ParseIMF extends Operation { /** @@ -93,10 +73,11 @@ class ParseIMF extends Operation { if (args[0] && headerBody.length > 0) { headerBody[0] = ParseIMF.replaceDecodeWord(headerBody[0]); } + let retfiles = ParseIMF.walkMime(headerBody[1], headerArray, input.indexOf("\r") >= 0); let retval = []; let i = 0; - headerBody.forEach(function(file){ - file = new File([file], "test"+String(i), {type: "text/plain"}); + retfiles.forEach(function(file){ + file = new File([file.data], "test"+String(i), {type: "text/plain"}); retval.push(file); i++; }); @@ -120,9 +101,39 @@ class ParseIMF extends Operation { * @param {object} header * @returns {object[]} */ - static walkMime(input, header) { - let output = []; - if header[""] + static walkMime(input, header, rn) { + let new_line_length = rn ? 2 : 1; + const content_type_reg = /([^;]+);\s+boundary\=(['"])(.+?)\2/g; + const inner_content_type_reg = /^([^;]+);\s+type\=(['"])(.+?)\2;\s+boundary\=(['"])(.+?)\4/g; + let output_sections = []; + if (header.hasOwnProperty("mime-version") || (header.hasOwnProperty("content-type") && header["content-type"][0].startsWith("multipart/"))) { + let content_boundary = null; + let idx = 3; + if (header["content-type"][0].indexOf("type=") > 0) { + content_boundary = inner_content_type_reg.exec(header["content-type"][0]); + idx = 5; + } else { + content_boundary = content_type_reg.exec(header["content-type"][0]); + } + const boundary_str = "--".concat(content_boundary[idx]); + let start = input.indexOf(boundary_str) + boundary_str.length + new_line_length; + let end = input.indexOf(boundary_str.concat("--")) - new_line_length; + let output = input.substring(start, end); + let headerBody = ParseIMF.splitHeaderFromBody(output); + let headerArray = ParseIMF.parseHeader(headerBody[0]); + let parts = ParseIMF.walkMime(headerBody[1], headerArray, rn); + parts.forEach(function(part){ + output_sections.push(part); + }); + } else if (header.hasOwnProperty("content-type") && header.hasOwnProperty("content-transfer-encoding")) { + const cont_type_data_reg = /^([^;]+);\s+charset\=(['"])(.+?)\2/g; + let cont_type = cont_type_data_reg.exec(header["content-type"][0]); + let val = ParseIMF.decodeMimeData(input, cont_type[3], header["content-transfer-encoding"][0]); + return [{type:cont_type[1], data: val}]; + } else { + throw new OperationError("Invalid Mime section"); + } + return output_sections; } /** @@ -190,9 +201,19 @@ class ParseIMF extends Operation { * @returns {string} */ static decodeMimeData(input, charEnc, contEnc) { - //TODO: make exceptions for unknown charEnc and contEnc? - input = IMF_DECODER[contEnc](input); - if (charEnc) { + switch (contEnc) { + case "base64": + input = fromBase64(input); + break; + case "quoted-printable": + input = Utils.byteArrayToUtf8(decodeQuotedPrintable(input)); + break; + case "7bit": + case "8bit": + default: + break; + } + if (charEnc && MIME_FORMAT.hasOwnProperty(charEnc.toLowerCase())) { input = cptable.utils.decode(MIME_FORMAT[charEnc.toLowerCase()], input); } return input; From 9ef9dff83201bb4919b57fcbae77cdd5ced86b6f Mon Sep 17 00:00:00 2001 From: bwhitn Date: Fri, 23 Nov 2018 23:43:06 -0500 Subject: [PATCH 08/30] Have some initial functionallity --- src/core/operations/ParseIMF.mjs | 113 +++++++++++++++++++++---------- 1 file changed, 76 insertions(+), 37 deletions(-) diff --git a/src/core/operations/ParseIMF.mjs b/src/core/operations/ParseIMF.mjs index 5a49286aeb..3428ab4b7f 100644 --- a/src/core/operations/ParseIMF.mjs +++ b/src/core/operations/ParseIMF.mjs @@ -21,12 +21,10 @@ import Utils from "../Utils"; * @constant * @default */ -const IMF_FIELD_ITEM = { - FILENAME: [/filename=".*?([^~#%&*\][\\:<>?/|]+)"/, "content-disposition"], - CONTENT_TYPE: [/\s*([^;\s]+)/, "content-type"], - BOUNDARY: [/boundary="(.+?)"/, "content-type"], - CHARSET: [/charset=([a-z0-9-]+)/, "content-type"], - TRANSER_ENCODING: [/\s*([A-Za-z0-9-]+)\s*/, "content-transfer-encoding"], +const FILE_TYPE_SUFFIX = { + "text/plain": "txt", + "text/html": "htm", + "application/rtf": "rtf", } class ParseIMF extends Operation { @@ -64,22 +62,34 @@ class ParseIMF extends Operation { * @returns {File[]} */ run(input, args) { + // TODO: need to add Non-Mime emails + // TODO: need to add header info to output + // TODO: no uuencode function. see if we can fix this + // TODO: Need to parse multipart headers better as they are key value pairs separated by a ";\s+". if (!input) { return []; } let headerBody = ParseIMF.splitHeaderFromBody(input); - let header = headerBody[0]; - let headerArray = ParseIMF.parseHeader(header); + let headerArray = ParseIMF.parseHeader(headerBody[0]); if (args[0] && headerBody.length > 0) { headerBody[0] = ParseIMF.replaceDecodeWord(headerBody[0]); } let retfiles = ParseIMF.walkMime(headerBody[1], headerArray, input.indexOf("\r") >= 0); let retval = []; - let i = 0; - retfiles.forEach(function(file){ - file = new File([file.data], "test"+String(i), {type: "text/plain"}); + retfiles.forEach(function(fileObj){ + let file = null; + if (fileObj.name !== null) { + file = new File([fileObj.data], fileObj.name, {type: fileObj.type}); + } else { + let name = ParseIMF.replaceDecodeWord(headerArray["subject"][0]).concat("."); + if (fileObj.type in FILE_TYPE_SUFFIX) { + name = name.concat(FILE_TYPE_SUFFIX[fileObj.type]); + } else { + name = name.concat("bin"); + } + file = new File([fileObj.data], name, {type: fileObj.type}); + } retval.push(file); - i++; }); return retval; } @@ -115,21 +125,37 @@ class ParseIMF extends Operation { } else { content_boundary = content_type_reg.exec(header["content-type"][0]); } - const boundary_str = "--".concat(content_boundary[idx]); - let start = input.indexOf(boundary_str) + boundary_str.length + new_line_length; - let end = input.indexOf(boundary_str.concat("--")) - new_line_length; - let output = input.substring(start, end); - let headerBody = ParseIMF.splitHeaderFromBody(output); - let headerArray = ParseIMF.parseHeader(headerBody[0]); - let parts = ParseIMF.walkMime(headerBody[1], headerArray, rn); - parts.forEach(function(part){ - output_sections.push(part); + let mime_parts = ParseIMF.splitMultipart(input, content_boundary[idx], new_line_length); + mime_parts.forEach(function(mime_part){ + let headerBody = ParseIMF.splitHeaderFromBody(mime_part); + let headerArray = ParseIMF.parseHeader(headerBody[0]); + let parts = ParseIMF.walkMime(headerBody[1], headerArray, rn); + parts.forEach(function(part){ + output_sections.push(part); + }); }); } else if (header.hasOwnProperty("content-type") && header.hasOwnProperty("content-transfer-encoding")) { - const cont_type_data_reg = /^([^;]+);\s+charset\=(['"])(.+?)\2/g; - let cont_type = cont_type_data_reg.exec(header["content-type"][0]); - let val = ParseIMF.decodeMimeData(input, cont_type[3], header["content-transfer-encoding"][0]); - return [{type:cont_type[1], data: val}]; + let contType = null; + let dataValue = null; + let fileName = null; + let charEnc = null; + // TODO: if there is no content disposition filename try content type name. + if (header.hasOwnProperty("content-disposition")) { + const cont_disp = /^([^;]+);.*?filename\=(['"]?)(.+?)\2$/g; + let dispo = cont_disp.exec(header["content-disposition"][0]); + // TODO: Remove path if it contains it. + fileName = dispo[3]; + const cont_type_file = /^([^;]+);\s+name\=(["']?)(.+?)\2$/g; + let content = cont_type_file.exec(header["content-type"][0]); + let contType = content[1]; + } else { + const cont_type_data_reg = /^([^;]+);\s+charset\=(['"]?)(.+?)\2$/g; + let content = cont_type_data_reg.exec(header["content-type"][0]); + contType = content[1]; + charEnc = content[3] + } + dataValue = ParseIMF.decodeMimeData(input, charEnc, header["content-transfer-encoding"][0]); + return [{type: contType, data: dataValue, name: fileName}]; } else { throw new OperationError("Invalid Mime section"); } @@ -178,7 +204,7 @@ class ParseIMF extends Operation { * @returns {object} */ static parseHeader(input) { - const sectionRegex = /([A-Z-]+):\s+([\x20-\x7e\r\n\t]+?)(?=$|\r?\n\S)/gi; + const sectionRegex = /([A-Za-z-]+):\s+([\x20-\xff\r\n\t]+?)(?=$|\r?\n\S)/g; let header = {}, section; while ((section = sectionRegex.exec(input))) { let fieldName = section[1].toLowerCase(); @@ -220,22 +246,35 @@ class ParseIMF extends Operation { } /** - * Returns a header item given a header object, itemName, and index number. * - * @param {object} header - * @param {object} FIELD_ITEM - * @param {integer} fieldNum - * @returns {string} + * + * + * */ - static getHeaderItem(header, fieldItem, fieldNum = 0){ - if (fieldItem[1] in header && header[fieldItem[1]].length > fieldNum) { - let field = header[fieldItem[1]][fieldNum], item; - if ((item = fieldItem[0].exec(field))) { - return item[1]; + static splitMultipart(input, boundary, new_line_length) { + let output = []; + let newline = new_line_length === 2 ? "\r\n" : "\n"; + const boundary_str = "--".concat(boundary, newline); + const last = input.indexOf("--".concat(boundary, "--", newline)) - new_line_length; + let start = 0; + while(true) { + let start = input.indexOf(boundary_str, start); + if (start >= 0) { + start = start + boundary_str.length; + } else { + break; } + let end = input.indexOf(boundary_str, start) - new_line_length; + if (end > start) { + output.push(input.substring(start, end)); + } else { + output.push(input.substring(start, last)); + break; + } + start = end; } + return output; } - } export default ParseIMF From 2a2312dc7d2f2ab4b2c102202f1e1ce29839421c Mon Sep 17 00:00:00 2001 From: bwhitn Date: Sun, 25 Nov 2018 23:04:07 -0500 Subject: [PATCH 09/30] Fixing issues --- src/core/lib/Mime.mjs | 0 src/core/operations/ParseIMF.mjs | 115 +++++++++++++++++++++---------- 2 files changed, 77 insertions(+), 38 deletions(-) create mode 100644 src/core/lib/Mime.mjs diff --git a/src/core/lib/Mime.mjs b/src/core/lib/Mime.mjs new file mode 100644 index 0000000000..e69de29bb2 diff --git a/src/core/operations/ParseIMF.mjs b/src/core/operations/ParseIMF.mjs index 3428ab4b7f..cd2ed4f105 100644 --- a/src/core/operations/ParseIMF.mjs +++ b/src/core/operations/ParseIMF.mjs @@ -12,7 +12,6 @@ import {decodeQuotedPrintable} from "../lib/QuotedPrintable"; import {MIME_FORMAT} from "../lib/ChrEnc"; import Utils from "../Utils"; -// TODO: fix function header /** * Return the conetent encoding for a mime section from a header object. * CONTENT_TYPE returns the content type of a mime header from a header object. @@ -61,11 +60,16 @@ class ParseIMF extends Operation { * @param {Object[]} args * @returns {File[]} */ + // NOTE: Liberties taken include: + // header normalization by lowercasing field names and certain header values + // No checks are made to verify quoted words are valid encodings e.g. underscore vs escape + // This attempts to decode mime reguardless if it is \r\n (correct newline) or \n (incorrect) + // Both Base64 and QuotedPrintable is used for decode. UUEncode is not available right now and is a standardized encoding format. run(input, args) { - // TODO: need to add Non-Mime emails - // TODO: need to add header info to output - // TODO: no uuencode function. see if we can fix this - // TODO: Need to parse multipart headers better as they are key value pairs separated by a ";\s+". + // TODO: need to add Non-Mime email support + // TODO Later: no uuencode function. See if we can fix this. + // TODO: may want to do base64 decode of binary to bytearray. + // TODO Later: need to look at binhex decoder maybe. if (!input) { return []; } @@ -74,14 +78,14 @@ class ParseIMF extends Operation { if (args[0] && headerBody.length > 0) { headerBody[0] = ParseIMF.replaceDecodeWord(headerBody[0]); } + let retval = [new File([headerBody[0]], "Header", {type: "text/plain"})]; let retfiles = ParseIMF.walkMime(headerBody[1], headerArray, input.indexOf("\r") >= 0); - let retval = []; retfiles.forEach(function(fileObj){ let file = null; if (fileObj.name !== null) { file = new File([fileObj.data], fileObj.name, {type: fileObj.type}); } else { - let name = ParseIMF.replaceDecodeWord(headerArray["subject"][0]).concat("."); + let name = headerArray["subject"][0].concat("."); if (fileObj.type in FILE_TYPE_SUFFIX) { name = name.concat(FILE_TYPE_SUFFIX[fileObj.type]); } else { @@ -113,19 +117,14 @@ class ParseIMF extends Operation { */ static walkMime(input, header, rn) { let new_line_length = rn ? 2 : 1; - const content_type_reg = /([^;]+);\s+boundary\=(['"])(.+?)\2/g; - const inner_content_type_reg = /^([^;]+);\s+type\=(['"])(.+?)\2;\s+boundary\=(['"])(.+?)\4/g; let output_sections = []; - if (header.hasOwnProperty("mime-version") || (header.hasOwnProperty("content-type") && header["content-type"][0].startsWith("multipart/"))) { + if (header.hasOwnProperty("content-type") && header["content-type"][0].startsWith("multipart/")) { + let contType = ParseIMF.decodeComplexField(header["content-type"][0]); let content_boundary = null; - let idx = 3; - if (header["content-type"][0].indexOf("type=") > 0) { - content_boundary = inner_content_type_reg.exec(header["content-type"][0]); - idx = 5; - } else { - content_boundary = content_type_reg.exec(header["content-type"][0]); + if (contType.hasOwnProperty("boundary")) { + content_boundary = contType.boundary; } - let mime_parts = ParseIMF.splitMultipart(input, content_boundary[idx], new_line_length); + let mime_parts = ParseIMF.splitMultipart(input, content_boundary, new_line_length); mime_parts.forEach(function(mime_part){ let headerBody = ParseIMF.splitHeaderFromBody(mime_part); let headerArray = ParseIMF.parseHeader(headerBody[0]); @@ -135,27 +134,31 @@ class ParseIMF extends Operation { }); }); } else if (header.hasOwnProperty("content-type") && header.hasOwnProperty("content-transfer-encoding")) { - let contType = null; - let dataValue = null; - let fileName = null; - let charEnc = null; - // TODO: if there is no content disposition filename try content type name. - if (header.hasOwnProperty("content-disposition")) { - const cont_disp = /^([^;]+);.*?filename\=(['"]?)(.+?)\2$/g; - let dispo = cont_disp.exec(header["content-disposition"][0]); - // TODO: Remove path if it contains it. - fileName = dispo[3]; - const cont_type_file = /^([^;]+);\s+name\=(["']?)(.+?)\2$/g; - let content = cont_type_file.exec(header["content-type"][0]); - let contType = content[1]; - } else { - const cont_type_data_reg = /^([^;]+);\s+charset\=(['"]?)(.+?)\2$/g; - let content = cont_type_data_reg.exec(header["content-type"][0]); - contType = content[1]; - charEnc = content[3] + let contType = null, fileName = null, charEnc = null, contTran = null; + let contDispoObj = header.hasOwnProperty("content-disposition") ? ParseIMF.decodeComplexField(header["content-disposition"][0]) : null; + let contTypeObj = ParseIMF.decodeComplexField(header["content-type"][0]); + let contEncObj = ParseIMF.decodeComplexField(header["content-transfer-encoding"][0]); + if (contDispoObj != null && contDispoObj.hasOwnProperty("filename")) { + fileName = contDispoObj.filename; + } + if (contTypeObj != null) { + if (contTypeObj.hasOwnProperty("value")) { + contType = contTypeObj.value[0]; + } + if (contTypeObj.hasOwnProperty("charset")) { + charEnc = contTypeObj.charset; + } + if (fileName == null && contTypeObj.hasOwnProperty("name")) { + fileName = contTypeObj.name; + } + } + if (contEncObj != null && contEncObj.hasOwnProperty("value")) { + contTran = contEncObj.value[0]; } - dataValue = ParseIMF.decodeMimeData(input, charEnc, header["content-transfer-encoding"][0]); - return [{type: contType, data: dataValue, name: fileName}]; + if (contTran != null) { + input = ParseIMF.decodeMimeData(input, charEnc, contTran); + } + return [{type: contType, data: input, name: fileName}]; } else { throw new OperationError("Invalid Mime section"); } @@ -208,7 +211,7 @@ class ParseIMF extends Operation { let header = {}, section; while ((section = sectionRegex.exec(input))) { let fieldName = section[1].toLowerCase(); - let fieldValue = section[2].replace(/\n|\r/g, " "); + let fieldValue = ParseIMF.replaceDecodeWord(section[2].replace(/\n|\r/g, " ")); if (header[fieldName]) { header[fieldName].push(fieldValue); } else { @@ -245,6 +248,42 @@ class ParseIMF extends Operation { return input; } + /** + * + * + * + * + * + */ + static decodeComplexField(field) { + let fieldSplit = field.split(/;\s+/g); + let retVal = {}; + fieldSplit.forEach(function(item){ + if (item.indexOf("=") >= 0) { + let eq = item.indexOf("="); + let kv = null; + if (item.length > eq) { + kv = [item.substring(0, eq), item.substring(eq + 1).trim()]; + } else { + throw OperationError("Not a valid header entry"); + } + if ((kv[1].startsWith("\'") && kv[1].endsWith("\'")) + || (kv[1].startsWith("\"") && kv[1].endsWith("\""))) { + kv[1] = (/(['"])(.+)\1/.exec(kv[1]))[2]; + } + retVal[kv[0].toLowerCase()] = kv[1]; + } else { + item = item.trim().toLowerCase(); + if (retVal.hasOwnProperty("value")) { + retVal.value.push(item); + } else { + retVal.value = [item]; + } + } + }); + return retVal; + } + /** * * From 97219d090e3c23676a3b0ff602d0bb418c156529 Mon Sep 17 00:00:00 2001 From: Brian Whitney Date: Mon, 26 Nov 2018 15:07:34 -0500 Subject: [PATCH 10/30] fixing bugs and reformating code --- src/core/operations/ParseIMF.mjs | 134 ++++++++++++++++++------------- 1 file changed, 76 insertions(+), 58 deletions(-) diff --git a/src/core/operations/ParseIMF.mjs b/src/core/operations/ParseIMF.mjs index cd2ed4f105..9bbf641772 100644 --- a/src/core/operations/ParseIMF.mjs +++ b/src/core/operations/ParseIMF.mjs @@ -73,19 +73,19 @@ class ParseIMF extends Operation { if (!input) { return []; } - let headerBody = ParseIMF.splitHeaderFromBody(input); - let headerArray = ParseIMF.parseHeader(headerBody[0]); - if (args[0] && headerBody.length > 0) { - headerBody[0] = ParseIMF.replaceDecodeWord(headerBody[0]); + let emlObj = ParseIMF.splitParse(input); + if (!emlObj.body) { throw new OperationError("No body was found");} + if (args[0]) { + emlObj.rawHeader = ParseIMF.replaceDecodeWord(emlObj.rawHeader); } - let retval = [new File([headerBody[0]], "Header", {type: "text/plain"})]; - let retfiles = ParseIMF.walkMime(headerBody[1], headerArray, input.indexOf("\r") >= 0); + let retval = [new File([emlObj.rawHeader], "Header", {type: "text/plain"})]; + let retfiles = ParseIMF.walkMime(emlObj, input.indexOf("\r") >= 0); retfiles.forEach(function(fileObj){ let file = null; if (fileObj.name !== null) { file = new File([fileObj.data], fileObj.name, {type: fileObj.type}); } else { - let name = headerArray["subject"][0].concat("."); + let name = emlObj.header["subject"][0].concat("."); if (fileObj.type in FILE_TYPE_SUFFIX) { name = name.concat(FILE_TYPE_SUFFIX[fileObj.type]); } else { @@ -115,31 +115,16 @@ class ParseIMF extends Operation { * @param {object} header * @returns {object[]} */ - static walkMime(input, header, rn) { + static walkMime(parentObj, rn) { let new_line_length = rn ? 2 : 1; - let output_sections = []; - if (header.hasOwnProperty("content-type") && header["content-type"][0].startsWith("multipart/")) { - let contType = ParseIMF.decodeComplexField(header["content-type"][0]); - let content_boundary = null; - if (contType.hasOwnProperty("boundary")) { - content_boundary = contType.boundary; - } - let mime_parts = ParseIMF.splitMultipart(input, content_boundary, new_line_length); - mime_parts.forEach(function(mime_part){ - let headerBody = ParseIMF.splitHeaderFromBody(mime_part); - let headerArray = ParseIMF.parseHeader(headerBody[0]); - let parts = ParseIMF.walkMime(headerBody[1], headerArray, rn); - parts.forEach(function(part){ - output_sections.push(part); - }); - }); - } else if (header.hasOwnProperty("content-type") && header.hasOwnProperty("content-transfer-encoding")) { - let contType = null, fileName = null, charEnc = null, contTran = null; - let contDispoObj = header.hasOwnProperty("content-disposition") ? ParseIMF.decodeComplexField(header["content-disposition"][0]) : null; - let contTypeObj = ParseIMF.decodeComplexField(header["content-type"][0]); - let contEncObj = ParseIMF.decodeComplexField(header["content-transfer-encoding"][0]); - if (contDispoObj != null && contDispoObj.hasOwnProperty("filename")) { - fileName = contDispoObj.filename; + let contType = null, fileName = null, charEnc = null, contDispoObj = null; + if (parentObj.header.hasOwnProperty("content-type")) { + let contTypeObj = ParseIMF.decodeComplexField(parentObj.header["content-type"][0]); + if (parentObj.header.hasOwnProperty("content-disposition")) { + contDispoObj = ParseIMF.decodeComplexField(parentObj.header["content-disposition"][0]) + if (contDispoObj != null && contDispoObj.hasOwnProperty("filename")) { + fileName = contDispoObj.filename; + } } if (contTypeObj != null) { if (contTypeObj.hasOwnProperty("value")) { @@ -152,35 +137,40 @@ class ParseIMF extends Operation { fileName = contTypeObj.name; } } - if (contEncObj != null && contEncObj.hasOwnProperty("value")) { - contTran = contEncObj.value[0]; + if (contType.startsWith("multipart/")) { + let content_boundary = null; + let output_sections = []; + if (contTypeObj.hasOwnProperty("boundary")) { + content_boundary = contTypeObj.boundary; + } + let mime_parts = ParseIMF.splitMultipart(parentObj.body, content_boundary, new_line_length); + mime_parts.forEach(function(mime_part){ + let mimeObj = ParseIMF.splitParse(mime_part); + if (!mimeObj.body) { + return []; + } + let parts = ParseIMF.walkMime(mimeObj, rn); + parts.forEach(function(part){ + output_sections.push(part); + }); + }); + return output_sections; } - if (contTran != null) { - input = ParseIMF.decodeMimeData(input, charEnc, contTran); + if (parentObj.header.hasOwnProperty("content-transfer-encoding")) { + let contEncObj = ParseIMF.decodeComplexField(parentObj.header["content-transfer-encoding"][0]); + let contTran = null; + if (contEncObj != null && contEncObj.hasOwnProperty("value")) { + contTran = contEncObj.value[0]; + } + if (contTran != null) { + parentObj.body = ParseIMF.decodeMimeData(parentObj.body, charEnc, contTran); + } } - return [{type: contType, data: input, name: fileName}]; - } else { - throw new OperationError("Invalid Mime section"); + return [{type: contType, data: parentObj.body, name: fileName}]; } - return output_sections; + throw new OperationError("Invalid Mime section"); } - /** - * Breaks the header from the body and returns [header, body] - * - * @param {string} input - * @returns {string[]} - */ - static splitHeaderFromBody(input) { - const emlRegex = /^([\x20-\xff\n\r\t]+?)(?:\r?\n){2}([\x20-\xff\t\n\r]*)/; - let splitEmail = emlRegex.exec(input); - if (splitEmail) { - //TODO: Array splice vs shift? - splitEmail.shift(); - return splitEmail; - } - } - /** * Takes a string and decodes quoted words inside them * These take the form of =?utf-8?Q?Hello?= @@ -198,6 +188,34 @@ class ParseIMF extends Operation { }); } + + /** + * Breaks the header from the body and returns [header, body] + * + * @param {string} input + * @returns {string[]} + */ + static splitParse(input) { + const emlRegex = /(?:\r?\n){2}/g; + let matchobj = emlRegex.exec(input); + if (matchobj) { + let splitEmail = [input.substring(0,matchobj.index), input.substring(emlRegex.lastIndex)]; + const sectionRegex = /([A-Za-z-]+):\s+([\x00-\xff]+?)(?=$|\r?\n\S)/g; + let headerObj = {}, section; + while ((section = sectionRegex.exec(splitEmail[0]))) { + let fieldName = section[1].toLowerCase(); + let fieldValue = ParseIMF.replaceDecodeWord(section[2].replace(/\n|\r/g, " ")); + if (fieldName in headerObj) { + headerObj[fieldName].push(fieldValue); + } else { + headerObj[fieldName] = [fieldValue]; + } + } + return {rawHeader:splitEmail[0], body: splitEmail[1], header: headerObj}; + } + return {rawHeader: null, body:null, header:null}; + } + /** * Breaks a header into a object to be used by other functions. * It removes any line feeds or carriage returns from the values and @@ -205,9 +223,9 @@ class ParseIMF extends Operation { * * @param {string} input * @returns {object} - */ + * static parseHeader(input) { - const sectionRegex = /([A-Za-z-]+):\s+([\x20-\xff\r\n\t]+?)(?=$|\r?\n\S)/g; + const sectionRegex = /([A-Za-z-]+):\s+([\x00-\xff]+?)(?=$|\r?\n\S)/g; let header = {}, section; while ((section = sectionRegex.exec(input))) { let fieldName = section[1].toLowerCase(); @@ -219,7 +237,7 @@ class ParseIMF extends Operation { } } return header; - } + } */ /** * Return decoded MIME data given the character encoding and content encoding. From a23c94cd76b3e2dcb6ccfbc35aedd426d95d570d Mon Sep 17 00:00:00 2001 From: bwhitn Date: Mon, 26 Nov 2018 22:50:03 -0500 Subject: [PATCH 11/30] fixing bugs and working on making it more compatable --- src/core/operations/ParseIMF.mjs | 76 ++++++++++++++------------------ 1 file changed, 32 insertions(+), 44 deletions(-) diff --git a/src/core/operations/ParseIMF.mjs b/src/core/operations/ParseIMF.mjs index 9bbf641772..d45910b127 100644 --- a/src/core/operations/ParseIMF.mjs +++ b/src/core/operations/ParseIMF.mjs @@ -20,7 +20,7 @@ import Utils from "../Utils"; * @constant * @default */ -const FILE_TYPE_SUFFIX = { +const BODY_FILE_TYPE = { "text/plain": "txt", "text/html": "htm", "application/rtf": "rtf", @@ -61,15 +61,13 @@ class ParseIMF extends Operation { * @returns {File[]} */ // NOTE: Liberties taken include: - // header normalization by lowercasing field names and certain header values // No checks are made to verify quoted words are valid encodings e.g. underscore vs escape // This attempts to decode mime reguardless if it is \r\n (correct newline) or \n (incorrect) - // Both Base64 and QuotedPrintable is used for decode. UUEncode is not available right now and is a standardized encoding format. + // Both Base64 and QuotedPrintable is used for decode. UUEncode is not available right now + // and is a standardized encoding format. run(input, args) { - // TODO: need to add Non-Mime email support // TODO Later: no uuencode function. See if we can fix this. - // TODO: may want to do base64 decode of binary to bytearray. - // TODO Later: need to look at binhex decoder maybe. + // TODO: content-type can be omitted and would mean us-ascii charset and text/plain. if (!input) { return []; } @@ -85,9 +83,14 @@ class ParseIMF extends Operation { if (fileObj.name !== null) { file = new File([fileObj.data], fileObj.name, {type: fileObj.type}); } else { - let name = emlObj.header["subject"][0].concat("."); - if (fileObj.type in FILE_TYPE_SUFFIX) { - name = name.concat(FILE_TYPE_SUFFIX[fileObj.type]); + let name = null; + if ("subject" in emlObj.header) { + name = emlObj.header["subject"][0].concat("."); + } else { + name = "Undefined."; + } + if (fileObj.type in BODY_FILE_TYPE) { + name = name.concat(BODY_FILE_TYPE[fileObj.type]); } else { name = name.concat("bin"); } @@ -160,7 +163,7 @@ class ParseIMF extends Operation { let contEncObj = ParseIMF.decodeComplexField(parentObj.header["content-transfer-encoding"][0]); let contTran = null; if (contEncObj != null && contEncObj.hasOwnProperty("value")) { - contTran = contEncObj.value[0]; + contTran = contEncObj.value[0]; } if (contTran != null) { parentObj.body = ParseIMF.decodeMimeData(parentObj.body, charEnc, contTran); @@ -190,10 +193,12 @@ class ParseIMF extends Operation { /** - * Breaks the header from the body and returns [header, body] + * Breaks the header from the body and parses the header. The returns an + * object or null. The object contains the raw header, decoded body, and + * parsed header object. * * @param {string} input - * @returns {string[]} + * @returns {object} */ static splitParse(input) { const emlRegex = /(?:\r?\n){2}/g; @@ -213,32 +218,9 @@ class ParseIMF extends Operation { } return {rawHeader:splitEmail[0], body: splitEmail[1], header: headerObj}; } - return {rawHeader: null, body:null, header:null}; + return null; } - /** - * Breaks a header into a object to be used by other functions. - * It removes any line feeds or carriage returns from the values and - * replaces it with a space. - * - * @param {string} input - * @returns {object} - * - static parseHeader(input) { - const sectionRegex = /([A-Za-z-]+):\s+([\x00-\xff]+?)(?=$|\r?\n\S)/g; - let header = {}, section; - while ((section = sectionRegex.exec(input))) { - let fieldName = section[1].toLowerCase(); - let fieldValue = ParseIMF.replaceDecodeWord(section[2].replace(/\n|\r/g, " ")); - if (header[fieldName]) { - header[fieldName].push(fieldValue); - } else { - header[fieldName] = [fieldValue]; - } - } - return header; - } */ - /** * Return decoded MIME data given the character encoding and content encoding. * @@ -267,11 +249,11 @@ class ParseIMF extends Operation { } /** + * Parse a complex header field and return an object that contains normalized + * keys with corresponding values and single values under a value array. * - * - * - * - * + * @param {string} field + * @returns {object} */ static decodeComplexField(field) { let fieldSplit = field.split(/;\s+/g); @@ -303,16 +285,22 @@ class ParseIMF extends Operation { } /** + * Splits a Mime document by the current boundaries and try to account for + * the current new line size which can be either the standard \r\n or \n. * - * - * - * + * @param {string} input + * @param {string} boundary + * @param {string} new_line_length + * @return {string[]} */ static splitMultipart(input, boundary, new_line_length) { let output = []; let newline = new_line_length === 2 ? "\r\n" : "\n"; const boundary_str = "--".concat(boundary, newline); - const last = input.indexOf("--".concat(boundary, "--", newline)) - new_line_length; + let last = input.indexOf("--".concat(boundary, "--", newline)) - new_line_length; + if (last < 0) { + last = input.indexOf("--".concat(boundary, "--")) - new_line_length; + } let start = 0; while(true) { let start = input.indexOf(boundary_str, start); From fa5d2b130f53c15245ce0ecd9320561b86ff9b93 Mon Sep 17 00:00:00 2001 From: Brian Whitney Date: Tue, 27 Nov 2018 14:49:02 -0500 Subject: [PATCH 12/30] working on moving parsing to lib --- src/core/lib/Mime.mjs | 292 +++++++++++++++++++++++++++++++ src/core/operations/ParseIMF.mjs | 47 ++--- 2 files changed, 316 insertions(+), 23 deletions(-) diff --git a/src/core/lib/Mime.mjs b/src/core/lib/Mime.mjs index e69de29bb2..e813155f77 100644 --- a/src/core/lib/Mime.mjs +++ b/src/core/lib/Mime.mjs @@ -0,0 +1,292 @@ +/** + * @author bwhitn [brian.m.whitney@outlook.com] + * @copyright Crown Copyright 2016 + * @license Apache-2.0 + */ + +import OperationError from "../errors/OperationError"; +import cptable from "../vendor/js-codepage/cptable.js"; +import {fromBase64} from "../lib/Base64"; +import {decodeQuotedPrintable} from "../lib/QuotedPrintable"; +import {MIME_FORMAT} from "../lib/ChrEnc"; +import Utils from "../Utils"; + +/** + * + * + * @constant + * @default + */ +const BODY_FILE_TYPE = { + "text/plain": "txt", + "text/html": "htm", + "application/rtf": "rtf", +} + +class Mime { + /** + * Internet MessageFormat constructor + */ + constructor(input) { + this.input = input; + this.rn = input.indexOf("\r") >= 0; + } + + /** + * Basic Email Parser that displays the header and mime sections as files. + * Args 0 boolean decode quoted words + * + * @param {string} input + * @param {boolean} decodeWords + * @returns {File[]} + */ + // NOTE: Liberties taken include: + // No checks are made to verify quoted words are valid encodings e.g. underscore vs escape + // This attempts to decode mime reguardless if it is \r\n (correct newline) or \n (incorrect) + // Both Base64 and QuotedPrintable is used for decode. UUEncode is not available right now + // and is a standardized encoding format. + decodeMime(decodeWords) { + // TODO Later: no uuencode function. See if we can fix this. + // TODO: content-type can be omitted and would mean us-ascii charset and text/plain. + if (!this.input) { + return []; + } + let emlObj = Mime._splitParse(this.input); + if (!emlObj.body) { throw new OperationError("No body was found");} + if (decodeWords) { + emlObj.rawHeader = Mime.replaceEncodedWord(emlObj.rawHeader); + } + let retval = [new File([emlObj.rawHeader], "Header", {type: "text/plain"})]; + let retfiles = this._walkMime(emlObj); + retfiles.forEach(function(fileObj){ + let file = null; + if (fileObj.name !== null) { + file = new File([fileObj.data], fileObj.name, {type: fileObj.type}); + } else { + let name = null; + if ("subject" in emlObj.header) { + name = emlObj.header["subject"][0].concat("."); + } else { + name = "Undefined."; + } + if (fileObj.type in BODY_FILE_TYPE) { + name = name.concat(BODY_FILE_TYPE[fileObj.type]); + } else { + name = name.concat("bin"); + } + file = new File([fileObj.data], name, {type: fileObj.type}); + } + retval.push(file); + }); + return retval; + } + + /** + * Walks a MIME document and returns an array of Mime data and header objects. + * + * @param {string} input + * @param {object} header + * @returns {object[]} + */ + _walkMime(parentObj) { + let new_line_length = this.rn ? 2 : 1; + let contType = null, fileName = null, charEnc = null, contDispoObj = null; + if (parentObj.header.hasOwnProperty("content-type")) { + let contTypeObj = Mime._decodeComplexField(parentObj.header["content-type"][0]); + if (parentObj.header.hasOwnProperty("content-disposition")) { + contDispoObj = Mime._decodeComplexField(parentObj.header["content-disposition"][0]) + if (contDispoObj != null && contDispoObj.hasOwnProperty("filename")) { + fileName = contDispoObj.filename; + } + } + if (contTypeObj != null) { + if (contTypeObj.hasOwnProperty("value")) { + contType = contTypeObj.value[0]; + } + if (contTypeObj.hasOwnProperty("charset")) { + charEnc = contTypeObj.charset; + } + if (fileName == null && contTypeObj.hasOwnProperty("name")) { + fileName = contTypeObj.name; + } + } + if (contType.startsWith("multipart/")) { + let content_boundary = null; + let output_sections = []; + if (contTypeObj.hasOwnProperty("boundary")) { + content_boundary = contTypeObj.boundary; + } + let mime_parts = Mime._splitMultipart(parentObj.body, content_boundary, new_line_length); + mime_parts.forEach(function(mime_part){ + let mimeObj = Mime._splitParse(mime_part); + if (!mimeObj.body) { + return []; + } + let parts = this._walkMime(mimeObj); + parts.forEach(function(part){ + output_sections.push(part); + }, this); + }, this); + return output_sections; + } + if (parentObj.header.hasOwnProperty("content-transfer-encoding")) { + let contEncObj = Mime._decodeComplexField(parentObj.header["content-transfer-encoding"][0]); + let contTran = null; + if (contEncObj != null && contEncObj.hasOwnProperty("value")) { + contTran = contEncObj.value[0]; + } + if (contTran != null) { + parentObj.body = Mime._decodeMimeData(parentObj.body, charEnc, contTran); + } + } + return [{type: contType, data: parentObj.body, name: fileName}]; + } + throw new OperationError("Invalid Mime section"); + } + + /** + * Takes a string and decodes quoted words inside them + * These take the form of =?utf-8?Q?Hello?= + * + * @param {string} input + * @returns {string} + */ + static replaceEncodedWord(input) { + return input.replace(/=\?([^?]+)\?(Q|B)\?([^?]+)\?=/g, function (a, charEnc, contEnc, input) { + contEnc = (contEnc === "B") ? "base64" : "quoted-printable"; + if (contEnc === "quoted-printable") { + input = input.replace(/_/g, " "); + } + return Mime._decodeMimeData(input, charEnc, contEnc); + }); + } + + + /** + * Breaks the header from the body and parses the header. The returns an + * object or null. The object contains the raw header, decoded body, and + * parsed header object. + * + * @param {string} input + * @returns {object} + */ + static _splitParse(input) { + const emlRegex = /(?:\r?\n){2}/g; + let matchobj = emlRegex.exec(input); + if (matchobj) { + let splitEmail = [input.substring(0,matchobj.index), input.substring(emlRegex.lastIndex)]; + const sectionRegex = /([A-Za-z-]+):\s+([\x00-\xff]+?)(?=$|\r?\n\S)/g; + let headerObj = {}, section; + while ((section = sectionRegex.exec(splitEmail[0]))) { + let fieldName = section[1].toLowerCase(); + let fieldValue = Mime.replaceEncodedWord(section[2].replace(/\n|\r/g, " ")); + if (fieldName in headerObj) { + headerObj[fieldName].push(fieldValue); + } else { + headerObj[fieldName] = [fieldValue]; + } + } + return {rawHeader:splitEmail[0], body: splitEmail[1], header: headerObj}; + } + return null; + } + + /** + * Return decoded MIME data given the character encoding and content encoding. + * + * @param {string} input + * @param {string} charEnc + * @param {string} contEnc + * @returns {string} + */ + static _decodeMimeData(input, charEnc, contEnc) { + switch (contEnc) { + case "base64": + input = fromBase64(input); + break; + case "quoted-printable": + input = Utils.byteArrayToUtf8(decodeQuotedPrintable(input)); + break; + } + if (charEnc && MIME_FORMAT.hasOwnProperty(charEnc.toLowerCase())) { + input = cptable.utils.decode(MIME_FORMAT[charEnc.toLowerCase()], input); + } + return input; + } + + /** + * Parses a complex header field and returns an object that contains + * normalized keys with corresponding values along with single values under + * a value array. + * + * @param {string} field + * @returns {object} + */ + static _decodeComplexField(field) { + let fieldSplit = field.split(/;\s+/g); + let retVal = {}; + fieldSplit.forEach(function(item){ + if (item.indexOf("=") >= 0) { + let eq = item.indexOf("="); + let kv = null; + if (item.length > eq) { + kv = [item.substring(0, eq), item.substring(eq + 1).trim()]; + } else { + throw OperationError("Not a valid header entry"); + } + if ((kv[1].startsWith("\'") && kv[1].endsWith("\'")) + || (kv[1].startsWith("\"") && kv[1].endsWith("\""))) { + kv[1] = (/(['"])(.+)\1/.exec(kv[1]))[2]; + } + retVal[kv[0].toLowerCase()] = kv[1]; + } else { + item = item.trim().toLowerCase(); + if (retVal.hasOwnProperty("value")) { + retVal.value.push(item); + } else { + retVal.value = [item]; + } + } + }); + return retVal; + } + + /** + * Splits a Mime document by the current boundaries and attempts to account + * for the current new line size which can be either the standard \r\n or \n. + * + * @param {string} input + * @param {string} boundary + * @param {string} new_line_length + * @return {string[]} + */ + static _splitMultipart(input, boundary, new_line_length) { + let output = []; + let newline = new_line_length === 2 ? "\r\n" : "\n"; + const boundary_str = "--".concat(boundary, newline); + let last = input.indexOf("--".concat(boundary, "--", newline)) - new_line_length; + if (last < 0) { + last = input.indexOf("--".concat(boundary, "--")) - new_line_length; + } + let start = 0; + while(true) { + let start = input.indexOf(boundary_str, start); + if (start >= 0) { + start = start + boundary_str.length; + } else { + break; + } + let end = input.indexOf(boundary_str, start) - new_line_length; + if (end > start) { + output.push(input.substring(start, end)); + } else { + output.push(input.substring(start, last)); + break; + } + start = end; + } + return output; + } +} + +export default Mime; diff --git a/src/core/operations/ParseIMF.mjs b/src/core/operations/ParseIMF.mjs index d45910b127..8bd9a87fdd 100644 --- a/src/core/operations/ParseIMF.mjs +++ b/src/core/operations/ParseIMF.mjs @@ -10,21 +10,20 @@ import cptable from "../vendor/js-codepage/cptable.js"; import {fromBase64} from "../lib/Base64"; import {decodeQuotedPrintable} from "../lib/QuotedPrintable"; import {MIME_FORMAT} from "../lib/ChrEnc"; +import Mime from "../lib/Mime"; import Utils from "../Utils"; /** - * Return the conetent encoding for a mime section from a header object. - * CONTENT_TYPE returns the content type of a mime header from a header object. - * Returns the filename from a mime header object. - * Returns the boundary value for the mime section from a header object. + * + * * @constant * @default - */ + * const BODY_FILE_TYPE = { "text/plain": "txt", "text/html": "htm", "application/rtf": "rtf", -} +} */ class ParseIMF extends Operation { @@ -52,6 +51,11 @@ class ParseIMF extends Operation { ]; } + run(input, args) { + let mimeObj = new Mime(input); + return mimeObj.decodeMime(args[0]); + } + /** * Basic Email Parser that displays the header and mime sections as files. * Args 0 boolean decode quoted words @@ -59,7 +63,7 @@ class ParseIMF extends Operation { * @param {string} input * @param {Object[]} args * @returns {File[]} - */ + * // NOTE: Liberties taken include: // No checks are made to verify quoted words are valid encodings e.g. underscore vs escape // This attempts to decode mime reguardless if it is \r\n (correct newline) or \n (incorrect) @@ -99,7 +103,7 @@ class ParseIMF extends Operation { retval.push(file); }); return retval; - } + } */ /** * Displays the files in HTML for web apps. @@ -117,7 +121,7 @@ class ParseIMF extends Operation { * @param {string} input * @param {object} header * @returns {object[]} - */ + * static walkMime(parentObj, rn) { let new_line_length = rn ? 2 : 1; let contType = null, fileName = null, charEnc = null, contDispoObj = null; @@ -180,7 +184,7 @@ class ParseIMF extends Operation { * * @param {string} input * @returns {string} - */ + * static replaceDecodeWord(input) { return input.replace(/=\?([^?]+)\?(Q|B)\?([^?]+)\?=/g, function (a, charEnc, contEnc, input) { contEnc = (contEnc === "B") ? "base64" : "quoted-printable"; @@ -199,7 +203,7 @@ class ParseIMF extends Operation { * * @param {string} input * @returns {object} - */ + * static splitParse(input) { const emlRegex = /(?:\r?\n){2}/g; let matchobj = emlRegex.exec(input); @@ -228,7 +232,7 @@ class ParseIMF extends Operation { * @param {string} charEnc * @param {string} contEnc * @returns {string} - */ + * static decodeMimeData(input, charEnc, contEnc) { switch (contEnc) { case "base64": @@ -237,10 +241,6 @@ class ParseIMF extends Operation { case "quoted-printable": input = Utils.byteArrayToUtf8(decodeQuotedPrintable(input)); break; - case "7bit": - case "8bit": - default: - break; } if (charEnc && MIME_FORMAT.hasOwnProperty(charEnc.toLowerCase())) { input = cptable.utils.decode(MIME_FORMAT[charEnc.toLowerCase()], input); @@ -249,12 +249,13 @@ class ParseIMF extends Operation { } /** - * Parse a complex header field and return an object that contains normalized - * keys with corresponding values and single values under a value array. + * Parses a complex header field and returns an object that contains + * normalized keys with corresponding values along with single values under + * a value array. * * @param {string} field * @returns {object} - */ + * static decodeComplexField(field) { let fieldSplit = field.split(/;\s+/g); let retVal = {}; @@ -285,14 +286,14 @@ class ParseIMF extends Operation { } /** - * Splits a Mime document by the current boundaries and try to account for - * the current new line size which can be either the standard \r\n or \n. + * Splits a Mime document by the current boundaries and attempts to account + * for the current new line size which can be either the standard \r\n or \n. * * @param {string} input * @param {string} boundary * @param {string} new_line_length * @return {string[]} - */ + * static splitMultipart(input, boundary, new_line_length) { let output = []; let newline = new_line_length === 2 ? "\r\n" : "\n"; @@ -319,7 +320,7 @@ class ParseIMF extends Operation { start = end; } return output; - } + } */ } export default ParseIMF From e2ee627d0986e7129f348dd89398a24b5c578150 Mon Sep 17 00:00:00 2001 From: bwhitn Date: Tue, 27 Nov 2018 22:46:08 -0500 Subject: [PATCH 13/30] adding decode mime encoded words and removing duplicate code --- src/core/config/Categories.json | 3 +- .../operations/DecodeMimeEncodedWords.mjs | 36 +++ src/core/operations/ParseIMF.mjs | 282 +----------------- 3 files changed, 43 insertions(+), 278 deletions(-) create mode 100644 src/core/operations/DecodeMimeEncodedWords.mjs diff --git a/src/core/config/Categories.json b/src/core/config/Categories.json index 32d64b0823..4fc4c578c3 100755 --- a/src/core/config/Categories.json +++ b/src/core/config/Categories.json @@ -55,7 +55,8 @@ "To Braille", "From Braille", "Parse TLV", - "Parse Internet Message Format" + "Parse Internet Message Format", + "Decode Mime Encoded Words" ] }, { diff --git a/src/core/operations/DecodeMimeEncodedWords.mjs b/src/core/operations/DecodeMimeEncodedWords.mjs new file mode 100644 index 0000000000..988d3a4f24 --- /dev/null +++ b/src/core/operations/DecodeMimeEncodedWords.mjs @@ -0,0 +1,36 @@ +/** + * @author bwhitn [brian.m.whitney@outlook.com] + * @copyright Crown Copyright 2016 + * @license Apache-2.0 + */ + +import Operation from "../Operation"; +import OperationError from "../errors/OperationError"; +import Mime from "../lib/Mime"; +import Utils from "../Utils"; + +class DecodeMimeEncodedWords extends Operation { + + /** + * DecodeMimeEncodedWords constructor + */ + constructor() { + super(); + this.name = "Decode Mime Encoded Words"; + this.module = "Default"; + this.description = ["Parser an IMF formatted messages following RFC5322.", + "

", + "Parses an IMF formated message. These often have the file extention ".eml"e; and contain the email headers and body. The output will be a file list of the headers and mime parts.", + ].join("\n"); + this.infoURL = "https://tools.ietf.org/html/rfc2047"; + this.inputType = "string"; + this.outputType = "string"; + this.args = []; + } + + run(input, args) { + return Mime.replaceEncodedWord(input); + } +} + +export default DecodeMimeEncodedWords; diff --git a/src/core/operations/ParseIMF.mjs b/src/core/operations/ParseIMF.mjs index 8bd9a87fdd..0bfb6c1ebd 100644 --- a/src/core/operations/ParseIMF.mjs +++ b/src/core/operations/ParseIMF.mjs @@ -6,37 +6,21 @@ import Operation from "../Operation"; import OperationError from "../errors/OperationError"; -import cptable from "../vendor/js-codepage/cptable.js"; -import {fromBase64} from "../lib/Base64"; -import {decodeQuotedPrintable} from "../lib/QuotedPrintable"; -import {MIME_FORMAT} from "../lib/ChrEnc"; import Mime from "../lib/Mime"; import Utils from "../Utils"; -/** - * - * - * @constant - * @default - * -const BODY_FILE_TYPE = { - "text/plain": "txt", - "text/html": "htm", - "application/rtf": "rtf", -} */ - class ParseIMF extends Operation { /** - * Internet MessageFormat constructor + * Internet Message Format constructor */ constructor() { super(); this.name = "Parse Internet Message Format"; this.module = "Default"; - this.description = ["Parser an IMF formatted messages following RFC5322.", + this.description = ["Parse an IMF formatted messages following RFC5322.", "

", - "Parses an IMF formated message. These often have the file extention ".eml"e; and contain the email headers and body. The output will be a file list of the headers and mime parts.", + "Parses an IMF formated message. These often have the file extention ".eml"e; and contain the email headers and body. The output will be a file list of the root header and decoded mime parts.", ].join("\n"); this.infoURL = "https://tools.ietf.org/html/rfc5322"; this.inputType = "string"; @@ -44,7 +28,7 @@ class ParseIMF extends Operation { this.presentType = "html"; this.args = [ { - "name": "Decode Quoted Words", + "name": "Decode Encoded-Words", "type": "boolean", "value": false } @@ -56,55 +40,6 @@ class ParseIMF extends Operation { return mimeObj.decodeMime(args[0]); } - /** - * Basic Email Parser that displays the header and mime sections as files. - * Args 0 boolean decode quoted words - * - * @param {string} input - * @param {Object[]} args - * @returns {File[]} - * - // NOTE: Liberties taken include: - // No checks are made to verify quoted words are valid encodings e.g. underscore vs escape - // This attempts to decode mime reguardless if it is \r\n (correct newline) or \n (incorrect) - // Both Base64 and QuotedPrintable is used for decode. UUEncode is not available right now - // and is a standardized encoding format. - run(input, args) { - // TODO Later: no uuencode function. See if we can fix this. - // TODO: content-type can be omitted and would mean us-ascii charset and text/plain. - if (!input) { - return []; - } - let emlObj = ParseIMF.splitParse(input); - if (!emlObj.body) { throw new OperationError("No body was found");} - if (args[0]) { - emlObj.rawHeader = ParseIMF.replaceDecodeWord(emlObj.rawHeader); - } - let retval = [new File([emlObj.rawHeader], "Header", {type: "text/plain"})]; - let retfiles = ParseIMF.walkMime(emlObj, input.indexOf("\r") >= 0); - retfiles.forEach(function(fileObj){ - let file = null; - if (fileObj.name !== null) { - file = new File([fileObj.data], fileObj.name, {type: fileObj.type}); - } else { - let name = null; - if ("subject" in emlObj.header) { - name = emlObj.header["subject"][0].concat("."); - } else { - name = "Undefined."; - } - if (fileObj.type in BODY_FILE_TYPE) { - name = name.concat(BODY_FILE_TYPE[fileObj.type]); - } else { - name = name.concat("bin"); - } - file = new File([fileObj.data], name, {type: fileObj.type}); - } - retval.push(file); - }); - return retval; - } */ - /** * Displays the files in HTML for web apps. * @@ -114,213 +49,6 @@ class ParseIMF extends Operation { async present(files) { return await Utils.displayFilesAsHTML(files); } - - /** - * Walks a MIME document and returns an array of Mime data and header objects. - * - * @param {string} input - * @param {object} header - * @returns {object[]} - * - static walkMime(parentObj, rn) { - let new_line_length = rn ? 2 : 1; - let contType = null, fileName = null, charEnc = null, contDispoObj = null; - if (parentObj.header.hasOwnProperty("content-type")) { - let contTypeObj = ParseIMF.decodeComplexField(parentObj.header["content-type"][0]); - if (parentObj.header.hasOwnProperty("content-disposition")) { - contDispoObj = ParseIMF.decodeComplexField(parentObj.header["content-disposition"][0]) - if (contDispoObj != null && contDispoObj.hasOwnProperty("filename")) { - fileName = contDispoObj.filename; - } - } - if (contTypeObj != null) { - if (contTypeObj.hasOwnProperty("value")) { - contType = contTypeObj.value[0]; - } - if (contTypeObj.hasOwnProperty("charset")) { - charEnc = contTypeObj.charset; - } - if (fileName == null && contTypeObj.hasOwnProperty("name")) { - fileName = contTypeObj.name; - } - } - if (contType.startsWith("multipart/")) { - let content_boundary = null; - let output_sections = []; - if (contTypeObj.hasOwnProperty("boundary")) { - content_boundary = contTypeObj.boundary; - } - let mime_parts = ParseIMF.splitMultipart(parentObj.body, content_boundary, new_line_length); - mime_parts.forEach(function(mime_part){ - let mimeObj = ParseIMF.splitParse(mime_part); - if (!mimeObj.body) { - return []; - } - let parts = ParseIMF.walkMime(mimeObj, rn); - parts.forEach(function(part){ - output_sections.push(part); - }); - }); - return output_sections; - } - if (parentObj.header.hasOwnProperty("content-transfer-encoding")) { - let contEncObj = ParseIMF.decodeComplexField(parentObj.header["content-transfer-encoding"][0]); - let contTran = null; - if (contEncObj != null && contEncObj.hasOwnProperty("value")) { - contTran = contEncObj.value[0]; - } - if (contTran != null) { - parentObj.body = ParseIMF.decodeMimeData(parentObj.body, charEnc, contTran); - } - } - return [{type: contType, data: parentObj.body, name: fileName}]; - } - throw new OperationError("Invalid Mime section"); - } - - /** - * Takes a string and decodes quoted words inside them - * These take the form of =?utf-8?Q?Hello?= - * - * @param {string} input - * @returns {string} - * - static replaceDecodeWord(input) { - return input.replace(/=\?([^?]+)\?(Q|B)\?([^?]+)\?=/g, function (a, charEnc, contEnc, input) { - contEnc = (contEnc === "B") ? "base64" : "quoted-printable"; - if (contEnc === "quoted-printable") { - input = input.replace(/_/g, " "); - } - return ParseIMF.decodeMimeData(input, charEnc, contEnc); - }); - } - - - /** - * Breaks the header from the body and parses the header. The returns an - * object or null. The object contains the raw header, decoded body, and - * parsed header object. - * - * @param {string} input - * @returns {object} - * - static splitParse(input) { - const emlRegex = /(?:\r?\n){2}/g; - let matchobj = emlRegex.exec(input); - if (matchobj) { - let splitEmail = [input.substring(0,matchobj.index), input.substring(emlRegex.lastIndex)]; - const sectionRegex = /([A-Za-z-]+):\s+([\x00-\xff]+?)(?=$|\r?\n\S)/g; - let headerObj = {}, section; - while ((section = sectionRegex.exec(splitEmail[0]))) { - let fieldName = section[1].toLowerCase(); - let fieldValue = ParseIMF.replaceDecodeWord(section[2].replace(/\n|\r/g, " ")); - if (fieldName in headerObj) { - headerObj[fieldName].push(fieldValue); - } else { - headerObj[fieldName] = [fieldValue]; - } - } - return {rawHeader:splitEmail[0], body: splitEmail[1], header: headerObj}; - } - return null; - } - - /** - * Return decoded MIME data given the character encoding and content encoding. - * - * @param {string} input - * @param {string} charEnc - * @param {string} contEnc - * @returns {string} - * - static decodeMimeData(input, charEnc, contEnc) { - switch (contEnc) { - case "base64": - input = fromBase64(input); - break; - case "quoted-printable": - input = Utils.byteArrayToUtf8(decodeQuotedPrintable(input)); - break; - } - if (charEnc && MIME_FORMAT.hasOwnProperty(charEnc.toLowerCase())) { - input = cptable.utils.decode(MIME_FORMAT[charEnc.toLowerCase()], input); - } - return input; - } - - /** - * Parses a complex header field and returns an object that contains - * normalized keys with corresponding values along with single values under - * a value array. - * - * @param {string} field - * @returns {object} - * - static decodeComplexField(field) { - let fieldSplit = field.split(/;\s+/g); - let retVal = {}; - fieldSplit.forEach(function(item){ - if (item.indexOf("=") >= 0) { - let eq = item.indexOf("="); - let kv = null; - if (item.length > eq) { - kv = [item.substring(0, eq), item.substring(eq + 1).trim()]; - } else { - throw OperationError("Not a valid header entry"); - } - if ((kv[1].startsWith("\'") && kv[1].endsWith("\'")) - || (kv[1].startsWith("\"") && kv[1].endsWith("\""))) { - kv[1] = (/(['"])(.+)\1/.exec(kv[1]))[2]; - } - retVal[kv[0].toLowerCase()] = kv[1]; - } else { - item = item.trim().toLowerCase(); - if (retVal.hasOwnProperty("value")) { - retVal.value.push(item); - } else { - retVal.value = [item]; - } - } - }); - return retVal; - } - - /** - * Splits a Mime document by the current boundaries and attempts to account - * for the current new line size which can be either the standard \r\n or \n. - * - * @param {string} input - * @param {string} boundary - * @param {string} new_line_length - * @return {string[]} - * - static splitMultipart(input, boundary, new_line_length) { - let output = []; - let newline = new_line_length === 2 ? "\r\n" : "\n"; - const boundary_str = "--".concat(boundary, newline); - let last = input.indexOf("--".concat(boundary, "--", newline)) - new_line_length; - if (last < 0) { - last = input.indexOf("--".concat(boundary, "--")) - new_line_length; - } - let start = 0; - while(true) { - let start = input.indexOf(boundary_str, start); - if (start >= 0) { - start = start + boundary_str.length; - } else { - break; - } - let end = input.indexOf(boundary_str, start) - new_line_length; - if (end > start) { - output.push(input.substring(start, end)); - } else { - output.push(input.substring(start, last)); - break; - } - start = end; - } - return output; - } */ } -export default ParseIMF +export default ParseIMF; From 72111cd2bbeb3df7a4a756a052ac48d0122538b5 Mon Sep 17 00:00:00 2001 From: bwhitn Date: Wed, 28 Nov 2018 22:51:14 -0500 Subject: [PATCH 14/30] dressing/simplifying code, added uuencoding --- src/core/lib/Mime.mjs | 132 ++++++++++++++++++++---------------------- 1 file changed, 63 insertions(+), 69 deletions(-) diff --git a/src/core/lib/Mime.mjs b/src/core/lib/Mime.mjs index e813155f77..17d19fc9a3 100644 --- a/src/core/lib/Mime.mjs +++ b/src/core/lib/Mime.mjs @@ -11,18 +11,14 @@ import {decodeQuotedPrintable} from "../lib/QuotedPrintable"; import {MIME_FORMAT} from "../lib/ChrEnc"; import Utils from "../Utils"; + /** - * - * - * @constant - * @default + * NOTE: Liberties taken include: + * No checks are made to verify quoted words are valid encodings e.g. underscore vs escape + * This attempts to decode mime reguardless if it is \r\n (correct newline) or \n (incorrect) + * Both Base64 and QuotedPrintable is used for decode. UUEncode is not available right now + * and is a standardized encoding format. */ -const BODY_FILE_TYPE = { - "text/plain": "txt", - "text/html": "htm", - "application/rtf": "rtf", -} - class Mime { /** * Internet MessageFormat constructor @@ -40,47 +36,44 @@ class Mime { * @param {boolean} decodeWords * @returns {File[]} */ - // NOTE: Liberties taken include: - // No checks are made to verify quoted words are valid encodings e.g. underscore vs escape - // This attempts to decode mime reguardless if it is \r\n (correct newline) or \n (incorrect) - // Both Base64 and QuotedPrintable is used for decode. UUEncode is not available right now - // and is a standardized encoding format. decodeMime(decodeWords) { - // TODO Later: no uuencode function. See if we can fix this. // TODO: content-type can be omitted and would mean us-ascii charset and text/plain. if (!this.input) { return []; } - let emlObj = Mime._splitParse(this.input); + let emlObj = Mime._splitParseHead(this.input); if (!emlObj.body) { throw new OperationError("No body was found");} if (decodeWords) { emlObj.rawHeader = Mime.replaceEncodedWord(emlObj.rawHeader); } let retval = [new File([emlObj.rawHeader], "Header", {type: "text/plain"})]; - let retfiles = this._walkMime(emlObj); - retfiles.forEach(function(fileObj){ - let file = null; - if (fileObj.name !== null) { - file = new File([fileObj.data], fileObj.name, {type: fileObj.type}); - } else { - let name = null; + this._walkMime(emlObj).forEach(function(fileObj){ + let name = fileObj.name; + if (fileObj.name === null) { if ("subject" in emlObj.header) { - name = emlObj.header["subject"][0].concat("."); - } else { - name = "Undefined."; - } - if (fileObj.type in BODY_FILE_TYPE) { - name = name.concat(BODY_FILE_TYPE[fileObj.type]); + name = emlObj.header["subject"][0]; } else { - name = name.concat("bin"); + name = "Undefined"; } - file = new File([fileObj.data], name, {type: fileObj.type}); + name = name.concat(Mime.getFileExt(fileObj.type)); } - retval.push(file); + retval.push(new File([fileObj.data], name, {type: fileObj.type})); }); return retval; } + static getFileExt(mimetype) { + switch (mimetype) { + case "text/plain": + return ".txt"; + case "text/html": + return ".htm"; + case "application/rtf": + return ".rtf"; + } + return ".bin"; + } + /** * Walks a MIME document and returns an array of Mime data and header objects. * @@ -111,19 +104,17 @@ class Mime { } } if (contType.startsWith("multipart/")) { - let content_boundary = null; let output_sections = []; - if (contTypeObj.hasOwnProperty("boundary")) { - content_boundary = contTypeObj.boundary; + if (!contTypeObj.hasOwnProperty("boundary")) { + throw new OperationError("Invalid mulitpart section no boundary"); } - let mime_parts = Mime._splitMultipart(parentObj.body, content_boundary, new_line_length); + let mime_parts = this._splitMultipart(parentObj.body, contTypeObj.boundary, new_line_length); mime_parts.forEach(function(mime_part){ - let mimeObj = Mime._splitParse(mime_part); + let mimeObj = Mime._splitParseHead(mime_part); if (!mimeObj.body) { return []; } - let parts = this._walkMime(mimeObj); - parts.forEach(function(part){ + this._walkMime(mimeObj).forEach(function(part){ output_sections.push(part); }, this); }, this); @@ -131,12 +122,8 @@ class Mime { } if (parentObj.header.hasOwnProperty("content-transfer-encoding")) { let contEncObj = Mime._decodeComplexField(parentObj.header["content-transfer-encoding"][0]); - let contTran = null; if (contEncObj != null && contEncObj.hasOwnProperty("value")) { - contTran = contEncObj.value[0]; - } - if (contTran != null) { - parentObj.body = Mime._decodeMimeData(parentObj.body, charEnc, contTran); + parentObj.body = Mime._decodeMimeData(parentObj.body, charEnc, contEncObj.value[0]); } } return [{type: contType, data: parentObj.body, name: fileName}]; @@ -170,7 +157,7 @@ class Mime { * @param {string} input * @returns {object} */ - static _splitParse(input) { + static _splitParseHead(input) { const emlRegex = /(?:\r?\n){2}/g; let matchobj = emlRegex.exec(input); if (matchobj) { @@ -207,6 +194,21 @@ class Mime { case "quoted-printable": input = Utils.byteArrayToUtf8(decodeQuotedPrintable(input)); break; + case "x-uuencode": + //TODO: need to trim before and after; + let match = /^\s*begin[^\n]+\n(.*)\r?\n`\r?\nend\s*$/gs.exec(input); + let lineReg = /\r?\n?.(.*)$/gm; + let line = null; + let lines = []; + while ((line = lineReg.exec(match[1]))) { + lines.push(fromBase64(line[1], " -_")); + } + if (match) { + input = lines.join(""); + } else { + throw new OperationError("Invalid uuencoding"); + } + break; } if (charEnc && MIME_FORMAT.hasOwnProperty(charEnc.toLowerCase())) { input = cptable.utils.decode(MIME_FORMAT[charEnc.toLowerCase()], input); @@ -226,19 +228,18 @@ class Mime { let fieldSplit = field.split(/;\s+/g); let retVal = {}; fieldSplit.forEach(function(item){ - if (item.indexOf("=") >= 0) { - let eq = item.indexOf("="); - let kv = null; + let eq = item.indexOf("="); + if (eq >= 0) { if (item.length > eq) { - kv = [item.substring(0, eq), item.substring(eq + 1).trim()]; + let kv = [item.substring(0, eq), item.substring(eq + 1).trim()]; + if ((kv[1].startsWith("\'") && kv[1].endsWith("\'")) + || (kv[1].startsWith("\"") && kv[1].endsWith("\""))) { + kv[1] = (/(['"])(.+)\1/.exec(kv[1]))[2]; + } + retVal[kv[0].toLowerCase()] = kv[1]; } else { throw OperationError("Not a valid header entry"); } - if ((kv[1].startsWith("\'") && kv[1].endsWith("\'")) - || (kv[1].startsWith("\"") && kv[1].endsWith("\""))) { - kv[1] = (/(['"])(.+)\1/.exec(kv[1]))[2]; - } - retVal[kv[0].toLowerCase()] = kv[1]; } else { item = item.trim().toLowerCase(); if (retVal.hasOwnProperty("value")) { @@ -260,29 +261,22 @@ class Mime { * @param {string} new_line_length * @return {string[]} */ - static _splitMultipart(input, boundary, new_line_length) { + _splitMultipart(input, boundary, new_line_length) { let output = []; - let newline = new_line_length === 2 ? "\r\n" : "\n"; - const boundary_str = "--".concat(boundary, newline); - let last = input.indexOf("--".concat(boundary, "--", newline)) - new_line_length; - if (last < 0) { - last = input.indexOf("--".concat(boundary, "--")) - new_line_length; - } + const boundary_str = "--".concat(boundary, this.rn ? "\r\n" : "\n"); + let last = input.indexOf("--".concat(boundary, "--")) - new_line_length; let start = 0; while(true) { let start = input.indexOf(boundary_str, start); - if (start >= 0) { - start = start + boundary_str.length; - } else { + if (start < 0) { break; } + start = start + boundary_str.length; let end = input.indexOf(boundary_str, start) - new_line_length; - if (end > start) { - output.push(input.substring(start, end)); - } else { - output.push(input.substring(start, last)); + if (end <= start) { break; } + output.push(input.substring(start, end)); start = end; } return output; From 78ba4d4fd3e75a4be27cf3dbd463a4e183a2195b Mon Sep 17 00:00:00 2001 From: bwhitn Date: Sat, 1 Dec 2018 11:41:14 -0500 Subject: [PATCH 15/30] fixing content-type issue --- src/core/lib/Mime.mjs | 102 +++++++++++++++++++----------------------- 1 file changed, 46 insertions(+), 56 deletions(-) diff --git a/src/core/lib/Mime.mjs b/src/core/lib/Mime.mjs index 17d19fc9a3..3cb8d6ca30 100644 --- a/src/core/lib/Mime.mjs +++ b/src/core/lib/Mime.mjs @@ -83,53 +83,59 @@ class Mime { */ _walkMime(parentObj) { let new_line_length = this.rn ? 2 : 1; - let contType = null, fileName = null, charEnc = null, contDispoObj = null; + let contType = null, + fileName = null, + charEnc = null, + contDispoObj = null, + contTypeObj = null; if (parentObj.header.hasOwnProperty("content-type")) { - let contTypeObj = Mime._decodeComplexField(parentObj.header["content-type"][0]); - if (parentObj.header.hasOwnProperty("content-disposition")) { - contDispoObj = Mime._decodeComplexField(parentObj.header["content-disposition"][0]) - if (contDispoObj != null && contDispoObj.hasOwnProperty("filename")) { - fileName = contDispoObj.filename; - } + contTypeObj = Mime._decodeComplexField(parentObj.header["content-type"][0]); + } + if (parentObj.header.hasOwnProperty("content-disposition")) { + contDispoObj = Mime._decodeComplexField(parentObj.header["content-disposition"][0]) + if (contDispoObj != null && contDispoObj.hasOwnProperty("filename")) { + fileName = contDispoObj.filename; } - if (contTypeObj != null) { - if (contTypeObj.hasOwnProperty("value")) { - contType = contTypeObj.value[0]; - } - if (contTypeObj.hasOwnProperty("charset")) { - charEnc = contTypeObj.charset; - } - if (fileName == null && contTypeObj.hasOwnProperty("name")) { - fileName = contTypeObj.name; - } + } + if (contTypeObj != null) { + if (contTypeObj.hasOwnProperty("value")) { + contType = contTypeObj.value[0]; } - if (contType.startsWith("multipart/")) { - let output_sections = []; - if (!contTypeObj.hasOwnProperty("boundary")) { - throw new OperationError("Invalid mulitpart section no boundary"); - } - let mime_parts = this._splitMultipart(parentObj.body, contTypeObj.boundary, new_line_length); - mime_parts.forEach(function(mime_part){ - let mimeObj = Mime._splitParseHead(mime_part); - if (!mimeObj.body) { - return []; - } - this._walkMime(mimeObj).forEach(function(part){ - output_sections.push(part); - }, this); - }, this); - return output_sections; + if (contTypeObj.hasOwnProperty("charset")) { + charEnc = contTypeObj.charset; + } + if (fileName == null && contTypeObj.hasOwnProperty("name")) { + fileName = contTypeObj.name; } - if (parentObj.header.hasOwnProperty("content-transfer-encoding")) { - let contEncObj = Mime._decodeComplexField(parentObj.header["content-transfer-encoding"][0]); - if (contEncObj != null && contEncObj.hasOwnProperty("value")) { - parentObj.body = Mime._decodeMimeData(parentObj.body, charEnc, contEncObj.value[0]); + } else { + contType = "text/plain"; + charEnc = "us-ascii"; + } + if (contType.startsWith("multipart/")) { + let output_sections = []; + if (!contTypeObj.hasOwnProperty("boundary")) { + throw new OperationError("Invalid mulitpart section no boundary"); + } + let mime_parts = this._splitMultipart(parentObj.body, contTypeObj.boundary, new_line_length); + mime_parts.forEach(function(mime_part){ + let mimeObj = Mime._splitParseHead(mime_part); + if (!mimeObj.body) { + return []; } + this._walkMime(mimeObj).forEach(function(part){ + output_sections.push(part); + }, this); + }, this); + return output_sections; + } + if (parentObj.header.hasOwnProperty("content-transfer-encoding")) { + let contEncObj = Mime._decodeComplexField(parentObj.header["content-transfer-encoding"][0]); + if (contEncObj != null && contEncObj.hasOwnProperty("value")) { + parentObj.body = Mime._decodeMimeData(parentObj.body, charEnc, contEncObj.value[0]); } - return [{type: contType, data: parentObj.body, name: fileName}]; } - throw new OperationError("Invalid Mime section"); - } + return [{type: contType, data: parentObj.body, name: fileName}]; + } /** * Takes a string and decodes quoted words inside them @@ -193,22 +199,6 @@ class Mime { break; case "quoted-printable": input = Utils.byteArrayToUtf8(decodeQuotedPrintable(input)); - break; - case "x-uuencode": - //TODO: need to trim before and after; - let match = /^\s*begin[^\n]+\n(.*)\r?\n`\r?\nend\s*$/gs.exec(input); - let lineReg = /\r?\n?.(.*)$/gm; - let line = null; - let lines = []; - while ((line = lineReg.exec(match[1]))) { - lines.push(fromBase64(line[1], " -_")); - } - if (match) { - input = lines.join(""); - } else { - throw new OperationError("Invalid uuencoding"); - } - break; } if (charEnc && MIME_FORMAT.hasOwnProperty(charEnc.toLowerCase())) { input = cptable.utils.decode(MIME_FORMAT[charEnc.toLowerCase()], input); From a980462d71e40d4f0c1efc588c125750bb936056 Mon Sep 17 00:00:00 2001 From: Brian Whitney Date: Sat, 1 Dec 2018 23:09:50 -0500 Subject: [PATCH 16/30] fixed some lint errors --- src/core/lib/Mime.mjs | 114 +++++++++--------- src/core/lib/QuotedPrintable.mjs | 2 - .../operations/DecodeMimeEncodedWords.mjs | 15 ++- src/core/operations/FromQuotedPrintable.mjs | 2 +- src/core/operations/ParseIMF.mjs | 19 ++- 5 files changed, 84 insertions(+), 68 deletions(-) diff --git a/src/core/lib/Mime.mjs b/src/core/lib/Mime.mjs index 3cb8d6ca30..7d96f2c17f 100644 --- a/src/core/lib/Mime.mjs +++ b/src/core/lib/Mime.mjs @@ -11,7 +11,7 @@ import {decodeQuotedPrintable} from "../lib/QuotedPrintable"; import {MIME_FORMAT} from "../lib/ChrEnc"; import Utils from "../Utils"; - +// FIXME: files are a bit off on size. /** * NOTE: Liberties taken include: * No checks are made to verify quoted words are valid encodings e.g. underscore vs escape @@ -37,21 +37,22 @@ class Mime { * @returns {File[]} */ decodeMime(decodeWords) { - // TODO: content-type can be omitted and would mean us-ascii charset and text/plain. if (!this.input) { return []; } - let emlObj = Mime._splitParseHead(this.input); - if (!emlObj.body) { throw new OperationError("No body was found");} + const emlObj = Mime._splitParseHead(this.input); + if (!emlObj.body) { + throw new OperationError("No body was found"); + } if (decodeWords) { emlObj.rawHeader = Mime.replaceEncodedWord(emlObj.rawHeader); } - let retval = [new File([emlObj.rawHeader], "Header", {type: "text/plain"})]; + const retval = [new File([emlObj.rawHeader], "Header", {type: "text/plain"})]; this._walkMime(emlObj).forEach(function(fileObj){ let name = fileObj.name; if (fileObj.name === null) { - if ("subject" in emlObj.header) { - name = emlObj.header["subject"][0]; + if (emlObj.header.hasOwnProperty("subject")) { + name = emlObj.header.subject[0]; } else { name = "Undefined"; } @@ -62,6 +63,12 @@ class Mime { return retval; } + /** + * Simple function to add a common file extention based on mime type string. + * + * @param {string} mimetype + * @returns {string} + */ static getFileExt(mimetype) { switch (mimetype) { case "text/plain": @@ -75,24 +82,23 @@ class Mime { } /** - * Walks a MIME document and returns an array of Mime data and header objects. + * Walks a MIME document and returns an array of Mime data. * - * @param {string} input - * @param {object} header + * @param {object} parentObj * @returns {object[]} */ _walkMime(parentObj) { - let new_line_length = this.rn ? 2 : 1; - let contType = null, + const newLineLength = this.rn ? 2 : 1; + let contType = "text/plain", fileName = null, - charEnc = null, + charEnc = "us-ascii", contDispoObj = null, contTypeObj = null; if (parentObj.header.hasOwnProperty("content-type")) { contTypeObj = Mime._decodeComplexField(parentObj.header["content-type"][0]); } if (parentObj.header.hasOwnProperty("content-disposition")) { - contDispoObj = Mime._decodeComplexField(parentObj.header["content-disposition"][0]) + contDispoObj = Mime._decodeComplexField(parentObj.header["content-disposition"][0]); if (contDispoObj != null && contDispoObj.hasOwnProperty("filename")) { fileName = contDispoObj.filename; } @@ -107,29 +113,23 @@ class Mime { if (fileName == null && contTypeObj.hasOwnProperty("name")) { fileName = contTypeObj.name; } - } else { - contType = "text/plain"; - charEnc = "us-ascii"; } if (contType.startsWith("multipart/")) { - let output_sections = []; + const sections = []; if (!contTypeObj.hasOwnProperty("boundary")) { throw new OperationError("Invalid mulitpart section no boundary"); } - let mime_parts = this._splitMultipart(parentObj.body, contTypeObj.boundary, new_line_length); - mime_parts.forEach(function(mime_part){ - let mimeObj = Mime._splitParseHead(mime_part); - if (!mimeObj.body) { - return []; + const mimeParts = this._splitMultipart(parentObj.body, contTypeObj.boundary, newLineLength); + mimeParts.forEach(function(mimePart){ + const mimeObj = Mime._splitParseHead(mimePart); + if (mimeObj) { + this._walkMime(mimeObj).forEach(part => sections.push(part)); } - this._walkMime(mimeObj).forEach(function(part){ - output_sections.push(part); - }, this); }, this); - return output_sections; + return sections; } if (parentObj.header.hasOwnProperty("content-transfer-encoding")) { - let contEncObj = Mime._decodeComplexField(parentObj.header["content-transfer-encoding"][0]); + const contEncObj = Mime._decodeComplexField(parentObj.header["content-transfer-encoding"][0]); if (contEncObj != null && contEncObj.hasOwnProperty("value")) { parentObj.body = Mime._decodeMimeData(parentObj.body, charEnc, contEncObj.value[0]); } @@ -165,21 +165,22 @@ class Mime { */ static _splitParseHead(input) { const emlRegex = /(?:\r?\n){2}/g; - let matchobj = emlRegex.exec(input); - if (matchobj) { - let splitEmail = [input.substring(0,matchobj.index), input.substring(emlRegex.lastIndex)]; + const matchObj = emlRegex.exec(input); + if (matchObj) { + const splitEmail = [input.substring(0, matchObj.index), input.substring(emlRegex.lastIndex)]; const sectionRegex = /([A-Za-z-]+):\s+([\x00-\xff]+?)(?=$|\r?\n\S)/g; - let headerObj = {}, section; + const headerObj = {}; + let section; while ((section = sectionRegex.exec(splitEmail[0]))) { - let fieldName = section[1].toLowerCase(); - let fieldValue = Mime.replaceEncodedWord(section[2].replace(/\n|\r/g, " ")); + const fieldName = section[1].toLowerCase(); + const fieldValue = Mime.replaceEncodedWord(section[2].replace(/\n|\r/g, " ")); if (fieldName in headerObj) { headerObj[fieldName].push(fieldValue); } else { headerObj[fieldName] = [fieldValue]; } } - return {rawHeader:splitEmail[0], body: splitEmail[1], header: headerObj}; + return {rawHeader: splitEmail[0], body: splitEmail[1], header: headerObj}; } return null; } @@ -215,30 +216,29 @@ class Mime { * @returns {object} */ static _decodeComplexField(field) { - let fieldSplit = field.split(/;\s+/g); - let retVal = {}; + const fieldSplit = field.split(/;\s+/g); + const retVal = {}; fieldSplit.forEach(function(item){ - let eq = item.indexOf("="); + const eq = item.indexOf("="); if (eq >= 0) { if (item.length > eq) { - let kv = [item.substring(0, eq), item.substring(eq + 1).trim()]; - if ((kv[1].startsWith("\'") && kv[1].endsWith("\'")) - || (kv[1].startsWith("\"") && kv[1].endsWith("\""))) { + const kv = [item.substring(0, eq), item.substring(eq + 1).trim()]; + if ((kv[1].startsWith("'") && kv[1].endsWith("'")) || (kv[1].startsWith("\"") && kv[1].endsWith("\""))) { kv[1] = (/(['"])(.+)\1/.exec(kv[1]))[2]; } - retVal[kv[0].toLowerCase()] = kv[1]; + this[kv[0].toLowerCase()] = kv[1]; } else { throw OperationError("Not a valid header entry"); } } else { item = item.trim().toLowerCase(); - if (retVal.hasOwnProperty("value")) { - retVal.value.push(item); + if (this.hasOwnProperty("value")) { + this.value.push(item); } else { - retVal.value = [item]; + this.value = [item]; } } - }); + }, retVal); return retVal; } @@ -248,25 +248,27 @@ class Mime { * * @param {string} input * @param {string} boundary - * @param {string} new_line_length + * @param {string} newLineLength * @return {string[]} */ - _splitMultipart(input, boundary, new_line_length) { - let output = []; - const boundary_str = "--".concat(boundary, this.rn ? "\r\n" : "\n"); - let last = input.indexOf("--".concat(boundary, "--")) - new_line_length; - let start = 0; - while(true) { - let start = input.indexOf(boundary_str, start); + _splitMultipart(input, boundary, newLineLength) { + const output = []; + const boundaryStr = "--".concat(boundary, this.rn ? "\r\n" : "\n"); + const last = input.indexOf("--".concat(boundary, "--")) - newLineLength; + for (;;) { + let start = input.indexOf(boundaryStr, start); if (start < 0) { break; } - start = start + boundary_str.length; - let end = input.indexOf(boundary_str, start) - new_line_length; + start += boundaryStr.length; + const end = input.indexOf(boundaryStr, start) - newLineLength; if (end <= start) { break; } output.push(input.substring(start, end)); + if (end === last) { + break; + } start = end; } return output; diff --git a/src/core/lib/QuotedPrintable.mjs b/src/core/lib/QuotedPrintable.mjs index 13e78df430..e7f7ece1ba 100644 --- a/src/core/lib/QuotedPrintable.mjs +++ b/src/core/lib/QuotedPrintable.mjs @@ -8,8 +8,6 @@ * @license Apache-2.0 */ -import Operation from "../Operation"; - /** * @param {string} input * @returns {byteArray} diff --git a/src/core/operations/DecodeMimeEncodedWords.mjs b/src/core/operations/DecodeMimeEncodedWords.mjs index 988d3a4f24..80f0c71179 100644 --- a/src/core/operations/DecodeMimeEncodedWords.mjs +++ b/src/core/operations/DecodeMimeEncodedWords.mjs @@ -5,10 +5,11 @@ */ import Operation from "../Operation"; -import OperationError from "../errors/OperationError"; import Mime from "../lib/Mime"; -import Utils from "../Utils"; +/** + * Operation for Finding and replacing Mime encoded words. + */ class DecodeMimeEncodedWords extends Operation { /** @@ -19,8 +20,7 @@ class DecodeMimeEncodedWords extends Operation { this.name = "Decode Mime Encoded Words"; this.module = "Default"; this.description = ["Parser an IMF formatted messages following RFC5322.", - "

", - "Parses an IMF formated message. These often have the file extention ".eml"e; and contain the email headers and body. The output will be a file list of the headers and mime parts.", + "

", "Parses an IMF formated message. These often have the file extention ".eml"e; and contain the email headers and body. The output will be a file list of the headers and mime parts.", ].join("\n"); this.infoURL = "https://tools.ietf.org/html/rfc2047"; this.inputType = "string"; @@ -28,6 +28,13 @@ class DecodeMimeEncodedWords extends Operation { this.args = []; } + /** + * + * + * + * + * + */ run(input, args) { return Mime.replaceEncodedWord(input); } diff --git a/src/core/operations/FromQuotedPrintable.mjs b/src/core/operations/FromQuotedPrintable.mjs index 10cedb3505..1a3b3bfda8 100644 --- a/src/core/operations/FromQuotedPrintable.mjs +++ b/src/core/operations/FromQuotedPrintable.mjs @@ -9,7 +9,7 @@ */ import Operation from "../Operation"; -import {decodeQuotedPrintable} from "../lib/QuotedPrintable" +import {decodeQuotedPrintable} from "../lib/QuotedPrintable"; /** * From Quoted Printable operation diff --git a/src/core/operations/ParseIMF.mjs b/src/core/operations/ParseIMF.mjs index 0bfb6c1ebd..848706f62f 100644 --- a/src/core/operations/ParseIMF.mjs +++ b/src/core/operations/ParseIMF.mjs @@ -5,10 +5,12 @@ */ import Operation from "../Operation"; -import OperationError from "../errors/OperationError"; import Mime from "../lib/Mime"; import Utils from "../Utils"; +/** + * + */ class ParseIMF extends Operation { /** @@ -19,8 +21,8 @@ class ParseIMF extends Operation { this.name = "Parse Internet Message Format"; this.module = "Default"; this.description = ["Parse an IMF formatted messages following RFC5322.", - "

", - "Parses an IMF formated message. These often have the file extention ".eml"e; and contain the email headers and body. The output will be a file list of the root header and decoded mime parts.", + "

", + "Parses an IMF formated message. These often have the file extention ".eml"e; and contain the email headers and body. The output will be a file list of the root header and decoded mime parts.", ].join("\n"); this.infoURL = "https://tools.ietf.org/html/rfc5322"; this.inputType = "string"; @@ -35,9 +37,16 @@ class ParseIMF extends Operation { ]; } + /** + * + * + * + * + * + */ run(input, args) { - let mimeObj = new Mime(input); - return mimeObj.decodeMime(args[0]); + //let mimeObj = new Mime(input); + return new Mime(input).decodeMime(args[0]); } /** From 94b2638365e8cb8e418f2d7a5407d78384a1cb40 Mon Sep 17 00:00:00 2001 From: bwhitn Date: Tue, 11 Dec 2018 01:22:23 -0500 Subject: [PATCH 17/30] fixing encoding/parsing issues --- src/core/lib/Mime.mjs | 21 ++++++++++++------- .../operations/DecodeMimeEncodedWords.mjs | 2 +- src/core/operations/ParseIMF.mjs | 16 +++++++------- 3 files changed, 21 insertions(+), 18 deletions(-) diff --git a/src/core/lib/Mime.mjs b/src/core/lib/Mime.mjs index 7d96f2c17f..549a7f2060 100644 --- a/src/core/lib/Mime.mjs +++ b/src/core/lib/Mime.mjs @@ -6,7 +6,6 @@ import OperationError from "../errors/OperationError"; import cptable from "../vendor/js-codepage/cptable.js"; -import {fromBase64} from "../lib/Base64"; import {decodeQuotedPrintable} from "../lib/QuotedPrintable"; import {MIME_FORMAT} from "../lib/ChrEnc"; import Utils from "../Utils"; @@ -58,7 +57,7 @@ class Mime { } name = name.concat(Mime.getFileExt(fileObj.type)); } - retval.push(new File([fileObj.data], name, {type: fileObj.type})); + retval.push(new File([Uint8Array.from(fileObj.data)], name, {type: fileObj.type})); }); return retval; } @@ -91,7 +90,7 @@ class Mime { const newLineLength = this.rn ? 2 : 1; let contType = "text/plain", fileName = null, - charEnc = "us-ascii", + charEnc = null, contDispoObj = null, contTypeObj = null; if (parentObj.header.hasOwnProperty("content-type")) { @@ -109,6 +108,10 @@ class Mime { } if (contTypeObj.hasOwnProperty("charset")) { charEnc = contTypeObj.charset; + } else { + if (contType.startsWith("text/")) { + charEnc = "us-ascii"; + } } if (fileName == null && contTypeObj.hasOwnProperty("name")) { fileName = contTypeObj.name; @@ -196,10 +199,11 @@ class Mime { static _decodeMimeData(input, charEnc, contEnc) { switch (contEnc) { case "base64": - input = fromBase64(input); + input = Utils.convertToByteArray(input, "base64"); + //input = fromBase64(input); break; case "quoted-printable": - input = Utils.byteArrayToUtf8(decodeQuotedPrintable(input)); + input = decodeQuotedPrintable(input); } if (charEnc && MIME_FORMAT.hasOwnProperty(charEnc.toLowerCase())) { input = cptable.utils.decode(MIME_FORMAT[charEnc.toLowerCase()], input); @@ -253,15 +257,16 @@ class Mime { */ _splitMultipart(input, boundary, newLineLength) { const output = []; - const boundaryStr = "--".concat(boundary, this.rn ? "\r\n" : "\n"); - const last = input.indexOf("--".concat(boundary, "--")) - newLineLength; + const newline = this.rn ? "\r\n" : "\n"; + const boundaryStr = newline.concat("--", boundary); + const last = input.indexOf(newline.concat("--", boundary, "--")); for (;;) { let start = input.indexOf(boundaryStr, start); if (start < 0) { break; } start += boundaryStr.length; - const end = input.indexOf(boundaryStr, start) - newLineLength; + const end = input.indexOf(boundaryStr, start); if (end <= start) { break; } diff --git a/src/core/operations/DecodeMimeEncodedWords.mjs b/src/core/operations/DecodeMimeEncodedWords.mjs index 80f0c71179..9997d74043 100644 --- a/src/core/operations/DecodeMimeEncodedWords.mjs +++ b/src/core/operations/DecodeMimeEncodedWords.mjs @@ -20,7 +20,7 @@ class DecodeMimeEncodedWords extends Operation { this.name = "Decode Mime Encoded Words"; this.module = "Default"; this.description = ["Parser an IMF formatted messages following RFC5322.", - "

", "Parses an IMF formated message. These often have the file extention ".eml"e; and contain the email headers and body. The output will be a file list of the headers and mime parts.", + "

", "Decodes Mime encoded words that are found in IMF messages.", ].join("\n"); this.infoURL = "https://tools.ietf.org/html/rfc2047"; this.inputType = "string"; diff --git a/src/core/operations/ParseIMF.mjs b/src/core/operations/ParseIMF.mjs index 848706f62f..72a49cba2d 100644 --- a/src/core/operations/ParseIMF.mjs +++ b/src/core/operations/ParseIMF.mjs @@ -9,7 +9,7 @@ import Mime from "../lib/Mime"; import Utils from "../Utils"; /** - * + * Operation for parsing IMF messages into file list. */ class ParseIMF extends Operation { @@ -18,11 +18,12 @@ class ParseIMF extends Operation { */ constructor() { super(); + this.name = "Parse Internet Message Format"; this.module = "Default"; this.description = ["Parse an IMF formatted messages following RFC5322.", "

", - "Parses an IMF formated message. These often have the file extention ".eml"e; and contain the email headers and body. The output will be a file list of the root header and decoded mime parts.", + "Parses an IMF formated message like those sent in SMTP. These often have the file extention ".eml"e; and contain the email headers and body. The output will be a file list of the root header and decoded mime parts." ].join("\n"); this.infoURL = "https://tools.ietf.org/html/rfc5322"; this.inputType = "string"; @@ -30,7 +31,7 @@ class ParseIMF extends Operation { this.presentType = "html"; this.args = [ { - "name": "Decode Encoded-Words", + "name": "Decode Mime Encoded Words", "type": "boolean", "value": false } @@ -38,14 +39,11 @@ class ParseIMF extends Operation { } /** - * - * - * - * - * + * @param {string} + * @param {Object[]} + * @returns {File[]} */ run(input, args) { - //let mimeObj = new Mime(input); return new Mime(input).decodeMime(args[0]); } From 3e8934035a11dc6e7d87516a43661f741f26602a Mon Sep 17 00:00:00 2001 From: Brian Whitney Date: Tue, 11 Dec 2018 18:20:45 -0500 Subject: [PATCH 18/30] got parsing working correctly. Decoupling some of the functions. --- src/core/lib/Mime.mjs | 314 +++++++++++++++++++++++++++++++----------- 1 file changed, 235 insertions(+), 79 deletions(-) diff --git a/src/core/lib/Mime.mjs b/src/core/lib/Mime.mjs index 549a7f2060..c6ca24e0ad 100644 --- a/src/core/lib/Mime.mjs +++ b/src/core/lib/Mime.mjs @@ -10,7 +10,6 @@ import {decodeQuotedPrintable} from "../lib/QuotedPrintable"; import {MIME_FORMAT} from "../lib/ChrEnc"; import Utils from "../Utils"; -// FIXME: files are a bit off on size. /** * NOTE: Liberties taken include: * No checks are made to verify quoted words are valid encodings e.g. underscore vs escape @@ -47,7 +46,9 @@ class Mime { emlObj.rawHeader = Mime.replaceEncodedWord(emlObj.rawHeader); } const retval = [new File([emlObj.rawHeader], "Header", {type: "text/plain"})]; - this._walkMime(emlObj).forEach(function(fileObj){ + let testval = Mime._walkMime(this.input); + console.log(JSON.stringify(testval)); + testval.forEach(function(fileObj){ let name = fileObj.name; if (fileObj.name === null) { if (emlObj.header.hasOwnProperty("subject")) { @@ -62,6 +63,153 @@ class Mime { return retval; } + /** + { + "rawHeader": "Message-ID: <39235FC5.276CCE00@example.com>\nDate: Wed, 17 May 2000 23:13:09 -0400\nFrom: Doug Sauder \nX-Mailer: Mozilla 4.7 [en] (WinNT; I)\nX-Accept-Language: en\nMIME-Version: 1.0\nTo: Heinz =?iso-8859-1?Q?M=FCller?= \nSubject: Die Hasen und die =?iso-8859-1?Q?Fr=F6sche?= (Netscape Messenger 4.7)\nContent-Type: multipart/mixed;\n boundary=\"------------A1E83A41894D3755390B838A\"", + "body": [ + { + "rawHeader": "\nContent-Type: multipart/alternative;\n boundary=\"------------F03F94BA73D3B9E8C1B94D92\"", + "body": [ + { + "rawHeader": "\nContent-Type: text/plain; charset=iso-8859-1\nContent-Transfer-Encoding: quoted-printable", + "body": "[blue ball]\n\nDie Hasen und die Fr=F6sche\n\nDie Hasen klagten einst =FCber ihre mi=DFliche Lage; \"wir leben\", sprach =\nein\nRedner, \"in steter Furcht vor Menschen und Tieren, eine Beute der Hunde,\nder Adler, ja fast aller Raubtiere! Unsere stete Angst ist =E4rger als de=\nr\nTod selbst. Auf, la=DFt uns ein f=FCr allemal sterben.\"\n\nIn einem nahen Teich wollten sie sich nun ers=E4ufen; sie eilten ihm zu;\nallein das au=DFerordentliche Get=F6se und ihre wunderbare Gestalt\nerschreckte eine Menge Fr=F6sche, die am Ufer sa=DFen, so sehr, da=DF sie=\n aufs\nschnellste untertauchten.\n\n\"Halt\", rief nun eben dieser Sprecher, \"wir wollen das Ers=E4ufen noch ei=\nn\nwenig aufschieben, denn auch uns f=FCrchten, wie ihr seht, einige Tiere,\nwelche also wohl noch ungl=FCcklicher sein m=FCssen als wir.\"\n\n[Image]\n\n\n", + "header": { + "content-type": [ + "text/plain; charset=iso-8859-1" + ], + "content-transfer-encoding": [ + "quoted-printable" + ] + } + }, + { + "rawHeader": "\nContent-Type: multipart/related;\n boundary=\"------------C02FA3D0A04E95F295FB25EB\"", + "body": [ + { + "rawHeader": "\nContent-Type: text/html; charset=us-ascii\nContent-Transfer-Encoding: 7bit", + "body": "\n\n\"blue\n

Die Hasen und die Frösche\n

Die Hasen klagten einst über ihre mißliche Lage; \"wir leben\",\nsprach ein Redner, \"in steter Furcht vor Menschen und Tieren, eine Beute\nder Hunde, der Adler, ja fast aller Raubtiere! Unsere stete Angst ist ärger\nals der Tod selbst. Auf, laßt uns ein für allemal sterben.\"\n

In einem nahen Teich wollten sie sich nun ersäufen; sie eilten\nihm zu; allein das außerordentliche Getöse und ihre wunderbare\nGestalt erschreckte eine Menge Frösche, die am Ufer saßen, so\nsehr, daß sie aufs schnellste untertauchten.\n

\"Halt\", rief nun eben dieser Sprecher, \"wir wollen das Ersäufen\nnoch ein wenig aufschieben, denn auch uns fürchten, wie ihr seht,\neinige Tiere, welche also wohl noch unglücklicher sein müssen\nals wir.\"\n

\n
 \n
 \n", + "header": { + "content-type": [ + "text/html; charset=us-ascii" + ], + "content-transfer-encoding": [ + "7bit" + ] + } + }, + { + "rawHeader": "\nContent-Type: image/png\nContent-ID: \nContent-Transfer-Encoding: base64\nContent-Disposition: inline; filename=\"C:\\TEMP\\nsmailEG.png\"", + "body": "iVBORw0KGgoAAAANSUhEUgAAABsAAAAbCAMAAAC6CgRnAAADAFBMVEX///8AAAgAABAAABgA\nAAAACCkAEEIAEEoACDEAEFIIIXMIKXsIKYQIIWsAGFoACDkIIWMQOZwYQqUYQq0YQrUQOaUQ\nMZQAGFIQMYwpUrU5Y8Y5Y84pWs4YSs4YQs4YQr1Ca8Z7nNacvd6Mtd5jlOcxa94hUt4YStYY\nQsYQMaUAACHO5+/n7++cxu9ShO8pWucQOa1Ke86tzt6lzu9ajO8QMZxahNat1ufO7++Mve9K\ne+8YOaUYSsaMvee15++Uve8AAClajOdzpe9rnO8IKYwxY+8pWu8IIXsAAAAAAAAAAAAAAAAA\nAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\nAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\nAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\nAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\nAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\nAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\nAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\nAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\nAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\nAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADB\nMg1VAAAAAXRSTlMAQObYZgAAABZ0RVh0U29mdHdhcmUAZ2lmMnBuZyAyLjAuMT1evmgAAAGI\nSURBVHicddJtV5swGAbgEk6AJhBSk4bMCUynBSLaqovbrG/bfPn/vyh70lbsscebL5xznTsh\n5BmNhgQoRChwo50EOIohUYLDj4zHhKYQkrEoQdvock4ne0IKMVUpKZLQDeqSTIsv+18PyqqW\nUw2IBsRM7307PPp+fDJrWtnpLDJvewYxnewfnvanZ+fzpmwXijC8KbqEa3Fx2ff91Y95U9XC\nUpaDeQwiMpHXP/v+1++bWVPWQoGFawtjury9vru/f/C1Vi7ezT0WWpQHf/7+u/G71aLThK/M\njRxmT6KdzZ9fGk9yatMsTgZLl3XVgFRAC6spj/13enssqJVtWVa3NdBSacL8+VZmYqKmdd1C\nSYoOiMOSGwtzlqqlFFIuOqv0a1ZEZrUkWICLLFW266y1KvWE1zV/iDAH1EopnVLCiygZCIom\nH3NCKX0lnI+B1iuuzCGTxwXjnDO4d7NpbX42YJJHkBwmAm2TxwAZg40J3+Xtbv1rgOAZwG0N\nxW62p+lT+Yi747sD/wEUVMzYmWkOvwAAACV0RVh0Q29tbWVudABjbGlwMmdpZiB2LjAuNiBi\neSBZdmVzIFBpZ3VldDZzO7wAAAAASUVORK5CYII=", + "header": { + "content-type": [ + "image/png" + ], + "content-id": [ + "" + ], + "content-transfer-encoding": [ + "base64" + ], + "content-disposition": [ + "inline; filename=\"C:\\TEMP\\nsmailEG.png\"" + ] + } + }, + { + "rawHeader": "\nContent-Type: image/png\nContent-ID: \nContent-Transfer-Encoding: base64\nContent-Disposition: inline; filename=\"C:\\TEMP\\nsmail39.png\"", + "body": "iVBORw0KGgoAAAANSUhEUgAAABsAAAAbCAMAAAC6CgRnAAADAFBMVEX///8AAAABAAALAAAV\nAAAaAAAXAAARAAAKAAADAAAcAAAyAABEAABNAABIAAA9AAAjAAAWAAAmAABhAAB7AACGAACH\nAAB9AAB0AABgAAA5AAAUAAAGAAAnAABLAABvAACQAAClAAC7AAC/AACrAAChAACMAABzAABb\nAAAuAAAIAABMAAB3AACZAAC0GRnKODjVPT3bKSndBQW4AACoAAB5AAAxAAAYAAAEAABFAACa\nAAC7JCTRYWHfhITmf3/mVlbqHx/SAAC5AACjAABdAABCAAAoAAAJAABnAAC6Dw/QVFTek5Pl\nrKzpmZntZWXvJSXXAADBAACxAACcAABtAABTAAA2AAAbAAAFAABKAACBAADLICDdZ2fonJzr\npqbtiorvUVHvFBTRAADDAAC2AAB4AABeAABAAAAiAABXAACSAADCAADaGxvoVVXseHjveHjv\nV1fvJibhAADOAAC3AACnAACVAABHAAArAAAPAACdAADFAADhBQXrKCjvPDzvNTXvGxvjAADQ\nAADJAAC1AACXAACEAABsAABPAAASAAACAABiAADpAADvAgLnAADYAADLAAC6AACwAABwAAAT\nAAAkAABYAADIAADTAADNAACzAACDAABuAAAeAAB+AADAAACkAACNAAB/AABpAABQAAAwAACR\nAACpAAC8AACqAACbAABlAABJAAAqAAAOAAA0AACsAACvAACtAACmAACJAAB6AABrAABaAAA+\nAAApAABqAACCAACfAACeAACWAACPAAB8AAAZAAAHAABVAACOAACKAAA4AAAQAAA/AAByAACA\nAABcAAA3AAAsAABmAABDAABWAAAgAAAzAAA8AAA6AAAfAAAMAAAdAAANAAAAAAAAAAAAAAAA\nAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\nAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAD8\nLtlFAAAAAXRSTlMAQObYZgAAABZ0RVh0U29mdHdhcmUAZ2lmMnBuZyAyLjAuMT1evmgAAAII\nSURBVHicY2CAg/8QwIABmJhZWFnZ2Dk4MaU5uLh5eHn5+LkFBDlQJf8zC/EIi4iKiUtI8koJ\nScsgyf5nlpWTV1BUUlZRVVPX4NFk1UJIyghp6+jq6RsYGhmbKJgK85mZW8Dk/rNaSlhZ29ja\n2Ts4Ojkr6Li4urFDNf53N/Ow8vTy9vH18w8IDAoWDQkNC4+ASP5ni4wKio6JjYtPSExKTnFW\nSE1LF4A69n9GZlZ2Tm5efkFhUXFySWlZlEd5RSVY7j+TkGRVdU1tXX1DY1Ozcktpa1t7h2Yn\nOAj+d7l1tyo79vT29SdNSJ44SbFVdHIo9xSIHNPUaWqTpifNSJrZnK00S0U1a/acUG5piNz/\nuXLzVJ2qm6dXz584S2WB1cJFi5cshZr539xVftnyFKUVTi2TVjqvyhJLXb1m7TqoHPt6F/HW\n0g0bN63crGqVtWXrtu07BJihcsw71+zanRW8Z89eq337RQ/Ip60xO3gIElX/LbikDm8T36Kw\nbNmRo7O3zpHkPSZwHBqL//8flz1x2OOkyKJTi7aqbzutfUZI2gIuF8F2lr/D5dw2+fZdwpl8\nYVOlI+CJ4/9/joOyYed5QzMvhGqnm2V0WiClm///D0lfXHtJ6vLlK9w7rx7vQk5SQJbFtSms\n1y9evXid7QZacgOxmSxktNzdtSwwU+J/VICaCPFIYU3XAJhIOtjf5sfyAAAAJXRFWHRDb21t\nZW50AGNsaXAyZ2lmIHYuMC42IGJ5IFl2ZXMgUGlndWV0NnM7vAAAAABJRU5ErkJggg==", + "header": { + "content-type": [ + "image/png" + ], + "content-id": [ + "" + ], + "content-transfer-encoding": [ + "base64" + ], + "content-disposition": [ + "inline; filename=\"C:\\TEMP\\nsmail39.png\"" + ] + } + } + ], + "header": { + "content-type": [ + "multipart/related; boundary=\"------------C02FA3D0A04E95F295FB25EB\"" + ] + } + } + ], + "header": { + "content-type": [ + "multipart/alternative; boundary=\"------------F03F94BA73D3B9E8C1B94D92\"" + ] + } + }, + { + "rawHeader": "\nContent-Type: image/png;\n name=\"redball.png\"\nContent-Transfer-Encoding: base64\nContent-Disposition: inline;\n filename=\"redball.png\"", + "body": "iVBORw0KGgoAAAANSUhEUgAAABsAAAAbCAMAAAC6CgRnAAADAFBMVEX///8AAAABAAALAAAV\nAAAaAAAXAAARAAAKAAADAAAcAAAyAABEAABNAABIAAA9AAAjAAAWAAAmAABhAAB7AACGAACH\nAAB9AAB0AABgAAA5AAAUAAAGAAAnAABLAABvAACQAAClAAC7AAC/AACrAAChAACMAABzAABb\nAAAuAAAIAABMAAB3AACZAAC0GRnKODjVPT3bKSndBQW4AACoAAB5AAAxAAAYAAAEAABFAACa\nAAC7JCTRYWHfhITmf3/mVlbqHx/SAAC5AACjAABdAABCAAAoAAAJAABnAAC6Dw/QVFTek5Pl\nrKzpmZntZWXvJSXXAADBAACxAACcAABtAABTAAA2AAAbAAAFAABKAACBAADLICDdZ2fonJzr\npqbtiorvUVHvFBTRAADDAAC2AAB4AABeAABAAAAiAABXAACSAADCAADaGxvoVVXseHjveHjv\nV1fvJibhAADOAAC3AACnAACVAABHAAArAAAPAACdAADFAADhBQXrKCjvPDzvNTXvGxvjAADQ\nAADJAAC1AACXAACEAABsAABPAAASAAACAABiAADpAADvAgLnAADYAADLAAC6AACwAABwAAAT\nAAAkAABYAADIAADTAADNAACzAACDAABuAAAeAAB+AADAAACkAACNAAB/AABpAABQAAAwAACR\nAACpAAC8AACqAACbAABlAABJAAAqAAAOAAA0AACsAACvAACtAACmAACJAAB6AABrAABaAAA+\nAAApAABqAACCAACfAACeAACWAACPAAB8AAAZAAAHAABVAACOAACKAAA4AAAQAAA/AAByAACA\nAABcAAA3AAAsAABmAABDAABWAAAgAAAzAAA8AAA6AAAfAAAMAAAdAAANAAAAAAAAAAAAAAAA\nAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\nAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAD8\nLtlFAAAAAXRSTlMAQObYZgAAABZ0RVh0U29mdHdhcmUAZ2lmMnBuZyAyLjAuMT1evmgAAAII\nSURBVHicY2CAg/8QwIABmJhZWFnZ2Dk4MaU5uLh5eHn5+LkFBDlQJf8zC/EIi4iKiUtI8koJ\nScsgyf5nlpWTV1BUUlZRVVPX4NFk1UJIyghp6+jq6RsYGhmbKJgK85mZW8Dk/rNaSlhZ29ja\n2Ts4Ojkr6Li4urFDNf53N/Ow8vTy9vH18w8IDAoWDQkNC4+ASP5ni4wKio6JjYtPSExKTnFW\nSE1LF4A69n9GZlZ2Tm5efkFhUXFySWlZlEd5RSVY7j+TkGRVdU1tXX1DY1Ozcktpa1t7h2Yn\nOAj+d7l1tyo79vT29SdNSJ44SbFVdHIo9xSIHNPUaWqTpifNSJrZnK00S0U1a/acUG5piNz/\nuXLzVJ2qm6dXz584S2WB1cJFi5cshZr539xVftnyFKUVTi2TVjqvyhJLXb1m7TqoHPt6F/HW\n0g0bN63crGqVtWXrtu07BJihcsw71+zanRW8Z89eq337RQ/Ip60xO3gIElX/LbikDm8T36Kw\nbNmRo7O3zpHkPSZwHBqL//8flz1x2OOkyKJTi7aqbzutfUZI2gIuF8F2lr/D5dw2+fZdwpl8\nYVOlI+CJ4/9/joOyYed5QzMvhGqnm2V0WiClm///D0lfXHtJ6vLlK9w7rx7vQk5SQJbFtSms\n1y9evXid7QZacgOxmSxktNzdtSwwU+J/VICaCPFIYU3XAJhIOtjf5sfyAAAAJXRFWHRDb21t\nZW50AGNsaXAyZ2lmIHYuMC42IGJ5IFl2ZXMgUGlndWV0NnM7vAAAAABJRU5ErkJggg==", + "header": { + "content-type": [ + "image/png; name=\"redball.png\"" + ], + "content-transfer-encoding": [ + "base64" + ], + "content-disposition": [ + "inline; filename=\"redball.png\"" + ] + } + }, + { + "rawHeader": "\nContent-Type: image/png;\n name=\"greenball.png\"\nContent-Transfer-Encoding: base64\nContent-Disposition: inline;\n filename=\"greenball.png\"", + "body": "iVBORw0KGgoAAAANSUhEUgAAABsAAAAbCAMAAAC6CgRnAAADAFBMVEX///8AAAAAEAAAGAAA\nIQAACAAAMQAAQgAAUgAAWgAASgAIYwAIcwAIewAQjAAIawAAOQAAYwAQlAAQnAAhpQAQpQAh\nrQBCvRhjxjFjxjlSxiEpzgAYvQAQrQAYrQAhvQCU1mOt1nuE1lJK3hgh1gAYxgAYtQAAKQBC\nzhDO55Te563G55SU52NS5yEh3gAYzgBS3iGc52vW75y974yE71JC7xCt73ul3nNa7ykh5wAY\n1gAx5wBS7yFr7zlK7xgp5wAp7wAx7wAIhAAQtQAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\nAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\nAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\nAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\nAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\nAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\nAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\nAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\nAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\nAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\nAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAp\n1fnZAAAAAXRSTlMAQObYZgAAABZ0RVh0U29mdHdhcmUAZ2lmMnBuZyAyLjAuMT1evmgAAAFt\nSURBVHicddJtV8IgFAdwD2zIgMEE1+NcqdsoK+m5tCyz7/+ZiLmHsyzvq53zO/cy+N9ery1b\nVe9PWQA9z4MQ+H8Yoj7GASZ95IHfaBGmLOSchyIgyOu22mgQSjUcDuNYcoGjLiLK1cHh0fHJ\naTKKOcMItgYxT89OzsfjyTTLC8UF0c2ZNmKquJhczq6ub+YmSVUYRF59GeDastu7+9nD41Nm\nkiJ2jc2J3kAWZ9Pr55fH18XSmRuKUTXUaqHy7O19tfr4NFle/w3YDrWRUIlZrL/W86XJkyJV\nG9EaEjIx2XyZmZJGioeUaL+2AY8TY8omR6nkLKhu70zjUKVJXsp3quS2DVSJWNh3zzJKCyex\nI0ZxBP3afE0ElyqOlZJyw8r3BE2SFiJCyxA434SCkg65RhdeQBljQtCg39LWrA90RDDG1EWr\nYUO23hMANUKRRl61E529cR++D2G5LK002dr/qrcfu9u0V3bxn/XdhR/NYeeN0ggsLAAAACV0\nRVh0Q29tbWVudABjbGlwMmdpZiB2LjAuNiBieSBZdmVzIFBpZ3VldDZzO7wAAAAASUVORK5C\nYII=", + "header": { + "content-type": [ + "image/png; name=\"greenball.png\"" + ], + "content-transfer-encoding": [ + "base64" + ], + "content-disposition": [ + "inline; filename=\"greenball.png\"" + ] + } + } + ], + "header": { + "message-id": [ + "<39235FC5.276CCE00@example.com>" + ], + "date": [ + "Wed, 17 May 2000 23:13:09 -0400" + ], + "from": [ + "Doug Sauder " + ], + "x-mailer": [ + "Mozilla 4.7 [en] (WinNT; I)" + ], + "x-accept-language": [ + "en" + ], + "mime-version": [ + "1.0" + ], + "to": [ + "Heinz Müller " + ], + "subject": [ + "Die Hasen und die Frösche (Netscape Messenger 4.7)" + ], + "content-type": [ + "multipart/mixed; boundary=\"------------A1E83A41894D3755390B838A\"" + ] + } + } + */ + /** * Simple function to add a common file extention based on mime type string. * @@ -83,68 +231,79 @@ class Mime { /** * Walks a MIME document and returns an array of Mime data. * - * @param {object} parentObj + * @param {string} mimeObj * @returns {object[]} */ - _walkMime(parentObj) { - const newLineLength = this.rn ? 2 : 1; - let contType = "text/plain", - fileName = null, - charEnc = null, - contDispoObj = null, - contTypeObj = null; - if (parentObj.header.hasOwnProperty("content-type")) { - contTypeObj = Mime._decodeComplexField(parentObj.header["content-type"][0]); - } - if (parentObj.header.hasOwnProperty("content-disposition")) { - contDispoObj = Mime._decodeComplexField(parentObj.header["content-disposition"][0]); - if (contDispoObj != null && contDispoObj.hasOwnProperty("filename")) { - fileName = contDispoObj.filename; + static _walkMime(mimeObj) { + mimeObj = Mime._splitParseHead(mimeObj); + const contType = Mime._decodeComplexField(mimeObj, "content-type"); + const boundary = Mime._decodeComplexField(mimeObj, "content-type", "boundary"); + if (contType && contType.startsWith("multipart/")) { + if (!boundary) { + throw new OperationError("Invalid mulitpart section no boundary"); } + const sections = []; + //const mimeParts = Mime._splitMultipart(mimeObj.body, boundary); + Mime._splitMultipart(mimeObj.body, boundary).forEach((mimePart) => { + sections.push(Mime._walkMime(mimePart)); + }, sections); + mimeObj.body = sections; } - if (contTypeObj != null) { - if (contTypeObj.hasOwnProperty("value")) { - contType = contTypeObj.value[0]; - } - if (contTypeObj.hasOwnProperty("charset")) { - charEnc = contTypeObj.charset; - } else { - if (contType.startsWith("text/")) { - charEnc = "us-ascii"; - } - } - if (fileName == null && contTypeObj.hasOwnProperty("name")) { - fileName = contTypeObj.name; - } + return mimeObj + } + +/** + static parsestuff() { + let contType = "text/plain", + fileName = null, + charEnc = null, + contDispoObj = null, + contTypeObj = null; + if (parentObj.header.hasOwnProperty("content-type")) { + contTypeObj = Mime._decodeComplexField(parentObj.header["content-type"][0]); + } + if (parentObj.header.hasOwnProperty("content-disposition")) { + contDispoObj = Mime._decodeComplexField(parentObj.header["content-disposition"][0]); + if (contDispoObj != null && contDispoObj.hasOwnProperty("filename")) { + fileName = contDispoObj.filename; } - if (contType.startsWith("multipart/")) { - const sections = []; - if (!contTypeObj.hasOwnProperty("boundary")) { - throw new OperationError("Invalid mulitpart section no boundary"); - } - const mimeParts = this._splitMultipart(parentObj.body, contTypeObj.boundary, newLineLength); - mimeParts.forEach(function(mimePart){ - const mimeObj = Mime._splitParseHead(mimePart); - if (mimeObj) { - this._walkMime(mimeObj).forEach(part => sections.push(part)); - } - }, this); - return sections; + } + if (contTypeObj != null) { + if (contTypeObj.hasOwnProperty("value")) { + contType = contTypeObj.value[0]; } - if (parentObj.header.hasOwnProperty("content-transfer-encoding")) { - const contEncObj = Mime._decodeComplexField(parentObj.header["content-transfer-encoding"][0]); - if (contEncObj != null && contEncObj.hasOwnProperty("value")) { - parentObj.body = Mime._decodeMimeData(parentObj.body, charEnc, contEncObj.value[0]); + if (contTypeObj.hasOwnProperty("charset")) { + charEnc = contTypeObj.charset; + } else { + if (contType.startsWith("text/")) { + charEnc = "us-ascii"; } } - return [{type: contType, data: parentObj.body, name: fileName}]; + if (fileName == null && contTypeObj.hasOwnProperty("name")) { + fileName = contTypeObj.name; + } } + if (mimeObj) { + this._walkMime(mimeObj).forEach(part => sections.push(part)); + } + if (parentObj.header.hasOwnProperty("content-transfer-encoding")) { + const contEncObj = Mime._decodeComplexField(parentObj.header["content-transfer-encoding"][0]); + if (contEncObj != null && contEncObj.hasOwnProperty("value")) { + parentObj.body = Mime._decodeMimeData(parentObj.body, charEnc, contEncObj.value[0]); + } + } + return [{type: contType, data: parentObj.body, name: fileName}]; +} +*/ /** * Takes a string and decodes quoted words inside them - * These take the form of =?utf-8?Q?Hello?= + * These take the form of: + * input "=?utf-8?Q?Hello_World!?=" + * output "Hello World!" * * @param {string} input + * @param {string} type * @returns {string} */ static replaceEncodedWord(input) { @@ -153,7 +312,7 @@ class Mime { if (contEnc === "quoted-printable") { input = input.replace(/_/g, " "); } - return Mime._decodeMimeData(input, charEnc, contEnc); + return Utils.byteArrayToUtf8(Mime._decodeMimeData(input, charEnc, contEnc)); }); } @@ -200,13 +359,13 @@ class Mime { switch (contEnc) { case "base64": input = Utils.convertToByteArray(input, "base64"); - //input = fromBase64(input); break; case "quoted-printable": input = decodeQuotedPrintable(input); } if (charEnc && MIME_FORMAT.hasOwnProperty(charEnc.toLowerCase())) { - input = cptable.utils.decode(MIME_FORMAT[charEnc.toLowerCase()], input); + input = Utils.strToByteArray(cptable.utils.decode(MIME_FORMAT[charEnc.toLowerCase()], input)); + } return input; } @@ -217,33 +376,31 @@ class Mime { * a value array. * * @param {string} field - * @returns {object} + * @returns {string} */ - static _decodeComplexField(field) { - const fieldSplit = field.split(/;\s+/g); - const retVal = {}; - fieldSplit.forEach(function(item){ - const eq = item.indexOf("="); - if (eq >= 0) { - if (item.length > eq) { - const kv = [item.substring(0, eq), item.substring(eq + 1).trim()]; - if ((kv[1].startsWith("'") && kv[1].endsWith("'")) || (kv[1].startsWith("\"") && kv[1].endsWith("\""))) { - kv[1] = (/(['"])(.+)\1/.exec(kv[1]))[2]; + static _decodeComplexField(mimeObj, field, subfield="value") { + if (mimeObj.header.hasOwnProperty(field)) { + const fieldSplit = mimeObj.header[field][0].split(/;\s+/g); + for (let i = 0; i < fieldSplit.length; i++) { + const eq = fieldSplit[i].indexOf("="); + if (eq >= 0) { + if (fieldSplit[i].length > eq) { + const kv = [fieldSplit[i].substring(0, eq), fieldSplit[i].substring(eq + 1).trim()]; + if ((kv[1].startsWith("'") && kv[1].endsWith("'")) || (kv[1].startsWith("\"") && kv[1].endsWith("\""))) { + kv[1] = (/(['"])(.+)\1/.exec(kv[1]))[2]; + } + if (subfield.toLowerCase() === kv[0].toLowerCase()) { + return kv[1]; + } + } else { + throw OperationError("Not a valid header entry"); } - this[kv[0].toLowerCase()] = kv[1]; - } else { - throw OperationError("Not a valid header entry"); - } - } else { - item = item.trim().toLowerCase(); - if (this.hasOwnProperty("value")) { - this.value.push(item); - } else { - this.value = [item]; + } else if (subfield == "value"){ + return fieldSplit[i].trim().toLowerCase(); } } - }, retVal); - return retVal; + } + return null; } /** @@ -252,12 +409,11 @@ class Mime { * * @param {string} input * @param {string} boundary - * @param {string} newLineLength * @return {string[]} */ - _splitMultipart(input, boundary, newLineLength) { + static _splitMultipart(input, boundary) { const output = []; - const newline = this.rn ? "\r\n" : "\n"; + const newline = input.indexOf("\r") >= 0 ? "\r\n" : "\n"; const boundaryStr = newline.concat("--", boundary); const last = input.indexOf(newline.concat("--", boundary, "--")); for (;;) { From 1c6eff0c6c8ccbcda4575ddf4b732934f41f045e Mon Sep 17 00:00:00 2001 From: bwhitn Date: Wed, 12 Dec 2018 05:59:46 -0500 Subject: [PATCH 19/30] working on walking a mime object --- src/core/lib/Mime.mjs | 65 +++++++++++++++---------------------------- 1 file changed, 23 insertions(+), 42 deletions(-) diff --git a/src/core/lib/Mime.mjs b/src/core/lib/Mime.mjs index c6ca24e0ad..a34a635f50 100644 --- a/src/core/lib/Mime.mjs +++ b/src/core/lib/Mime.mjs @@ -23,7 +23,6 @@ class Mime { */ constructor(input) { this.input = input; - this.rn = input.indexOf("\r") >= 0; } /** @@ -46,8 +45,7 @@ class Mime { emlObj.rawHeader = Mime.replaceEncodedWord(emlObj.rawHeader); } const retval = [new File([emlObj.rawHeader], "Header", {type: "text/plain"})]; - let testval = Mime._walkMime(this.input); - console.log(JSON.stringify(testval)); + let testval = Mime._parseMime(this.input); testval.forEach(function(fileObj){ let name = fileObj.name; if (fileObj.name === null) { @@ -234,7 +232,7 @@ class Mime { * @param {string} mimeObj * @returns {object[]} */ - static _walkMime(mimeObj) { + static _parseMime(mimeObj) { mimeObj = Mime._splitParseHead(mimeObj); const contType = Mime._decodeComplexField(mimeObj, "content-type"); const boundary = Mime._decodeComplexField(mimeObj, "content-type", "boundary"); @@ -243,58 +241,41 @@ class Mime { throw new OperationError("Invalid mulitpart section no boundary"); } const sections = []; - //const mimeParts = Mime._splitMultipart(mimeObj.body, boundary); Mime._splitMultipart(mimeObj.body, boundary).forEach((mimePart) => { - sections.push(Mime._walkMime(mimePart)); + sections.push(Mime._parseMime(mimePart)); }, sections); mimeObj.body = sections; } return mimeObj } -/** - static parsestuff() { - let contType = "text/plain", - fileName = null, - charEnc = null, - contDispoObj = null, - contTypeObj = null; - if (parentObj.header.hasOwnProperty("content-type")) { - contTypeObj = Mime._decodeComplexField(parentObj.header["content-type"][0]); - } - if (parentObj.header.hasOwnProperty("content-disposition")) { - contDispoObj = Mime._decodeComplexField(parentObj.header["content-disposition"][0]); - if (contDispoObj != null && contDispoObj.hasOwnProperty("filename")) { - fileName = contDispoObj.filename; + static walkMime(mimeObj, methods, recursive=true) { + let contType = Mime._decodeComplexField(mimeObj, "content-type"); + if (contType && contType.startsWith("mulipart/") && recursive) { + for (let i = 0; i < mimeObj.body.length; i++) { + mimeObj + } + } else { + methods.forEach(method => method(mimeObj)); + yield mimeObj; } } - if (contTypeObj != null) { - if (contTypeObj.hasOwnProperty("value")) { - contType = contTypeObj.value[0]; - } - if (contTypeObj.hasOwnProperty("charset")) { - charEnc = contTypeObj.charset; - } else { - if (contType.startsWith("text/")) { + + static decodeMimeMessage(mimeObj) { + let contType = Mime._decodeComplexField(mimeObj, "content-type"), + charEnc = Mime._decodeComplexField(mimeObj, "content-type", "charset"), + //name = Mime._decodeComplexField(mimeObj, "content-disposition", "filename"), + //nameAlt = Mime._decodeComplexField(mimeObj, "content-type", "name"), + contEnc = Mime._decodeComplexField(mimeObj, "content-transfer-encoding"); + if (contType != null) { + if (!charEnc && contType.startsWith("text/")) { charEnc = "us-ascii"; } } - if (fileName == null && contTypeObj.hasOwnProperty("name")) { - fileName = contTypeObj.name; + if (contEnc && typeof mimeObj.body === "string") { + mimeObj.body = Mime._decodeMimeData(mimeObj.body, charEnc, contEnc); } } - if (mimeObj) { - this._walkMime(mimeObj).forEach(part => sections.push(part)); - } - if (parentObj.header.hasOwnProperty("content-transfer-encoding")) { - const contEncObj = Mime._decodeComplexField(parentObj.header["content-transfer-encoding"][0]); - if (contEncObj != null && contEncObj.hasOwnProperty("value")) { - parentObj.body = Mime._decodeMimeData(parentObj.body, charEnc, contEncObj.value[0]); - } - } - return [{type: contType, data: parentObj.body, name: fileName}]; -} -*/ /** * Takes a string and decodes quoted words inside them From c0f4dc6c64eb86ecc9b2f81e757e3ff4b5ff5c21 Mon Sep 17 00:00:00 2001 From: bwhitn Date: Thu, 13 Dec 2018 05:46:38 -0500 Subject: [PATCH 20/30] working on functions --- src/core/lib/Mime.mjs | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/src/core/lib/Mime.mjs b/src/core/lib/Mime.mjs index a34a635f50..dbe0e94c9f 100644 --- a/src/core/lib/Mime.mjs +++ b/src/core/lib/Mime.mjs @@ -252,12 +252,9 @@ class Mime { static walkMime(mimeObj, methods, recursive=true) { let contType = Mime._decodeComplexField(mimeObj, "content-type"); if (contType && contType.startsWith("mulipart/") && recursive) { - for (let i = 0; i < mimeObj.body.length; i++) { - mimeObj - } + mimeObj.body.forEach(obj => Mime.walkMime(obj, methods, recursive)); } else { methods.forEach(method => method(mimeObj)); - yield mimeObj; } } From 1d0d539797a04984940a4e2d749cb3030bc0f57a Mon Sep 17 00:00:00 2001 From: Brian Whitney Date: Sat, 15 Dec 2018 21:14:46 -0500 Subject: [PATCH 21/30] making small changes --- src/core/lib/Mime.mjs | 92 +++++++++++++++++++++++++++---------------- 1 file changed, 57 insertions(+), 35 deletions(-) diff --git a/src/core/lib/Mime.mjs b/src/core/lib/Mime.mjs index dbe0e94c9f..3264e09243 100644 --- a/src/core/lib/Mime.mjs +++ b/src/core/lib/Mime.mjs @@ -22,7 +22,7 @@ class Mime { * Internet MessageFormat constructor */ constructor(input) { - this.input = input; + this.mimeObj = Mime._parseMime(input); } /** @@ -34,18 +34,18 @@ class Mime { * @returns {File[]} */ decodeMime(decodeWords) { - if (!this.input) { + if (!this.mimeObj) { return []; } - const emlObj = Mime._splitParseHead(this.input); - if (!emlObj.body) { - throw new OperationError("No body was found"); - } + //const emlObj = Mime._splitParseHead(this.input); + //if (!emlObj.body) { + // throw new OperationError("No body was found"); + //} if (decodeWords) { emlObj.rawHeader = Mime.replaceEncodedWord(emlObj.rawHeader); } - const retval = [new File([emlObj.rawHeader], "Header", {type: "text/plain"})]; - let testval = Mime._parseMime(this.input); + //const retval = [new File([emlObj.rawHeader], "Header", {type: "text/plain"})]; + //let testval = Mime._parseMime(this.input); testval.forEach(function(fileObj){ let name = fileObj.name; if (fileObj.name === null) { @@ -227,15 +227,26 @@ class Mime { } /** - * Walks a MIME document and returns an array of Mime data. + * Helper function to return objects as an array. * - * @param {string} mimeObj * @returns {object[]} */ - static _parseMime(mimeObj) { - mimeObj = Mime._splitParseHead(mimeObj); - const contType = Mime._decodeComplexField(mimeObj, "content-type"); - const boundary = Mime._decodeComplexField(mimeObj, "content-type", "boundary"); + toObjArray() { + const out = []; + Mime.walkMime(this.mimeObj, mimePart => out.push(mimePart)); + return out; + } + + /** + * Walks a MIME document and returns an array of Mime data. + * + * @param {string} mimeData + * @returns {object} + */ + static _parseMime(mimeData) { + let mimeObj = Mime._splitParseHead(mimeData); + const contType = Mime.decodeComplexField(mimeObj, "content-type"); + const boundary = Mime.decodeComplexField(mimeObj, "content-type", "boundary"); if (contType && contType.startsWith("multipart/")) { if (!boundary) { throw new OperationError("Invalid mulitpart section no boundary"); @@ -249,28 +260,40 @@ class Mime { return mimeObj } + /** + * Executes methods on a mime object. These methods should modify the mimeObj. + * + * @param {Object} mimeObj + * @param {function[]} methods + * @param {boolean} recursive + * @returns {null} + */ static walkMime(mimeObj, methods, recursive=true) { - let contType = Mime._decodeComplexField(mimeObj, "content-type"); - if (contType && contType.startsWith("mulipart/") && recursive) { - mimeObj.body.forEach(obj => Mime.walkMime(obj, methods, recursive)); + let contType = Mime.decodeComplexField(mimeObj, "content-type"); + if (recursive && contType && contType.startsWith("mulitpart/")) { + mimeObj.body.forEach(obj => Mime.walkMime(obj, methods)); } else { methods.forEach(method => method(mimeObj)); } } + /** + * Attempts to decode a mimeObj's data by applying appropriate character and content decoders based on the header data. + * + * @param {Object} mimeObj + * @returns {null} + */ static decodeMimeMessage(mimeObj) { - let contType = Mime._decodeComplexField(mimeObj, "content-type"), - charEnc = Mime._decodeComplexField(mimeObj, "content-type", "charset"), - //name = Mime._decodeComplexField(mimeObj, "content-disposition", "filename"), - //nameAlt = Mime._decodeComplexField(mimeObj, "content-type", "name"), - contEnc = Mime._decodeComplexField(mimeObj, "content-transfer-encoding"); + let contType = Mime.decodeComplexField(mimeObj, "content-type"), + charEnc = Mime.decodeComplexField(mimeObj, "content-type", "charset"), + contEnc = Mime.decodeComplexField(mimeObj, "content-transfer-encoding"); if (contType != null) { if (!charEnc && contType.startsWith("text/")) { charEnc = "us-ascii"; } } if (contEnc && typeof mimeObj.body === "string") { - mimeObj.body = Mime._decodeMimeData(mimeObj.body, charEnc, contEnc); + mimeObj.body = Mime.decodeMimeData(mimeObj.body, charEnc, contEnc); } } @@ -295,6 +318,7 @@ class Mime { } + //TODO: Allow only a header as input /** * Breaks the header from the body and parses the header. The returns an * object or null. The object contains the raw header, decoded body, and @@ -356,7 +380,7 @@ class Mime { * @param {string} field * @returns {string} */ - static _decodeComplexField(mimeObj, field, subfield="value") { + static decodeComplexField(mimeObj, field, subfield="value") { if (mimeObj.header.hasOwnProperty(field)) { const fieldSplit = mimeObj.header[field][0].split(/;\s+/g); for (let i = 0; i < fieldSplit.length; i++) { @@ -381,6 +405,7 @@ class Mime { return null; } + //TODO: make this a yield instead of string array. /** * Splits a Mime document by the current boundaries and attempts to account * for the current new line size which can be either the standard \r\n or \n. @@ -394,21 +419,18 @@ class Mime { const newline = input.indexOf("\r") >= 0 ? "\r\n" : "\n"; const boundaryStr = newline.concat("--", boundary); const last = input.indexOf(newline.concat("--", boundary, "--")); - for (;;) { - let start = input.indexOf(boundaryStr, start); - if (start < 0) { - break; - } - start += boundaryStr.length; - const end = input.indexOf(boundaryStr, start); - if (end <= start) { + let begin = 0; + for (let end = 0; end !== last; begin = end) { + begin = input.indexOf(boundaryStr, begin); + if (begin < 0) { break; } - output.push(input.substring(start, end)); - if (end === last) { + begin += boundaryStr.length; + end = input.indexOf(boundaryStr, begin); + if (end <= begin) { break; } - start = end; + output.push(input.substring(begin, end)); } return output; } From 5f28349769e393f512471032e75eb643d39ccb3c Mon Sep 17 00:00:00 2001 From: Brian Whitney Date: Sun, 16 Dec 2018 20:35:47 -0500 Subject: [PATCH 22/30] Fixed the code just need to dress it up, update the comments, create the test cases --- src/core/lib/Mime.mjs | 346 ++++++++++--------------------- src/core/operations/ParseIMF.mjs | 69 +++++- 2 files changed, 171 insertions(+), 244 deletions(-) diff --git a/src/core/lib/Mime.mjs b/src/core/lib/Mime.mjs index 3264e09243..d0ab56d9a0 100644 --- a/src/core/lib/Mime.mjs +++ b/src/core/lib/Mime.mjs @@ -14,266 +14,128 @@ import Utils from "../Utils"; * NOTE: Liberties taken include: * No checks are made to verify quoted words are valid encodings e.g. underscore vs escape * This attempts to decode mime reguardless if it is \r\n (correct newline) or \n (incorrect) - * Both Base64 and QuotedPrintable is used for decode. UUEncode is not available right now - * and is a standardized encoding format. + * Both Base64 and QuotedPrintable is used for decode. */ class Mime { /** - * Internet MessageFormat constructor + * Mime Constructor */ constructor(input) { this.mimeObj = Mime._parseMime(input); } /** - * Basic Email Parser that displays the header and mime sections as files. - * Args 0 boolean decode quoted words + * Extract data from mimeObjects and return object array containing them. * - * @param {string} input - * @param {boolean} decodeWords - * @returns {File[]} + * @param {string[][]} headerObjects + * @param {boolean} header + * @param {boolean} body + * @param {boolean} recursive + * @returns {object[]} */ - decodeMime(decodeWords) { - if (!this.mimeObj) { - return []; - } - //const emlObj = Mime._splitParseHead(this.input); - //if (!emlObj.body) { - // throw new OperationError("No body was found"); - //} - if (decodeWords) { - emlObj.rawHeader = Mime.replaceEncodedWord(emlObj.rawHeader); - } - //const retval = [new File([emlObj.rawHeader], "Header", {type: "text/plain"})]; - //let testval = Mime._parseMime(this.input); - testval.forEach(function(fileObj){ - let name = fileObj.name; - if (fileObj.name === null) { - if (emlObj.header.hasOwnProperty("subject")) { - name = emlObj.header.subject[0]; + extractData(headerObjects, header=true, body=true, recursive=true) { + const output = []; + Mime.walkMime(this.mimeObj, function(mimePart) { + const outObj = {}; + outObj.fields = {}; + if (body) { + const contType = Mime._extractField(mimePart, "content-type"); + if (contType && !contType.startsWith("multipart/")) { + outObj.body = mimePart.body; } else { - name = "Undefined"; + outObj.body = null; } - name = name.concat(Mime.getFileExt(fileObj.type)); } - retval.push(new File([Uint8Array.from(fileObj.data)], name, {type: fileObj.type})); - }); - return retval; - } - - /** - { - "rawHeader": "Message-ID: <39235FC5.276CCE00@example.com>\nDate: Wed, 17 May 2000 23:13:09 -0400\nFrom: Doug Sauder \nX-Mailer: Mozilla 4.7 [en] (WinNT; I)\nX-Accept-Language: en\nMIME-Version: 1.0\nTo: Heinz =?iso-8859-1?Q?M=FCller?= \nSubject: Die Hasen und die =?iso-8859-1?Q?Fr=F6sche?= (Netscape Messenger 4.7)\nContent-Type: multipart/mixed;\n boundary=\"------------A1E83A41894D3755390B838A\"", - "body": [ - { - "rawHeader": "\nContent-Type: multipart/alternative;\n boundary=\"------------F03F94BA73D3B9E8C1B94D92\"", - "body": [ - { - "rawHeader": "\nContent-Type: text/plain; charset=iso-8859-1\nContent-Transfer-Encoding: quoted-printable", - "body": "[blue ball]\n\nDie Hasen und die Fr=F6sche\n\nDie Hasen klagten einst =FCber ihre mi=DFliche Lage; \"wir leben\", sprach =\nein\nRedner, \"in steter Furcht vor Menschen und Tieren, eine Beute der Hunde,\nder Adler, ja fast aller Raubtiere! Unsere stete Angst ist =E4rger als de=\nr\nTod selbst. Auf, la=DFt uns ein f=FCr allemal sterben.\"\n\nIn einem nahen Teich wollten sie sich nun ers=E4ufen; sie eilten ihm zu;\nallein das au=DFerordentliche Get=F6se und ihre wunderbare Gestalt\nerschreckte eine Menge Fr=F6sche, die am Ufer sa=DFen, so sehr, da=DF sie=\n aufs\nschnellste untertauchten.\n\n\"Halt\", rief nun eben dieser Sprecher, \"wir wollen das Ers=E4ufen noch ei=\nn\nwenig aufschieben, denn auch uns f=FCrchten, wie ihr seht, einige Tiere,\nwelche also wohl noch ungl=FCcklicher sein m=FCssen als wir.\"\n\n[Image]\n\n\n", - "header": { - "content-type": [ - "text/plain; charset=iso-8859-1" - ], - "content-transfer-encoding": [ - "quoted-printable" - ] - } - }, - { - "rawHeader": "\nContent-Type: multipart/related;\n boundary=\"------------C02FA3D0A04E95F295FB25EB\"", - "body": [ - { - "rawHeader": "\nContent-Type: text/html; charset=us-ascii\nContent-Transfer-Encoding: 7bit", - "body": "\n\n\"blue\n

Die Hasen und die Frösche\n

Die Hasen klagten einst über ihre mißliche Lage; \"wir leben\",\nsprach ein Redner, \"in steter Furcht vor Menschen und Tieren, eine Beute\nder Hunde, der Adler, ja fast aller Raubtiere! Unsere stete Angst ist ärger\nals der Tod selbst. Auf, laßt uns ein für allemal sterben.\"\n

In einem nahen Teich wollten sie sich nun ersäufen; sie eilten\nihm zu; allein das außerordentliche Getöse und ihre wunderbare\nGestalt erschreckte eine Menge Frösche, die am Ufer saßen, so\nsehr, daß sie aufs schnellste untertauchten.\n

\"Halt\", rief nun eben dieser Sprecher, \"wir wollen das Ersäufen\nnoch ein wenig aufschieben, denn auch uns fürchten, wie ihr seht,\neinige Tiere, welche also wohl noch unglücklicher sein müssen\nals wir.\"\n

\n
 \n
 \n", - "header": { - "content-type": [ - "text/html; charset=us-ascii" - ], - "content-transfer-encoding": [ - "7bit" - ] - } - }, - { - "rawHeader": "\nContent-Type: image/png\nContent-ID: \nContent-Transfer-Encoding: base64\nContent-Disposition: inline; filename=\"C:\\TEMP\\nsmailEG.png\"", - "body": "iVBORw0KGgoAAAANSUhEUgAAABsAAAAbCAMAAAC6CgRnAAADAFBMVEX///8AAAgAABAAABgA\nAAAACCkAEEIAEEoACDEAEFIIIXMIKXsIKYQIIWsAGFoACDkIIWMQOZwYQqUYQq0YQrUQOaUQ\nMZQAGFIQMYwpUrU5Y8Y5Y84pWs4YSs4YQs4YQr1Ca8Z7nNacvd6Mtd5jlOcxa94hUt4YStYY\nQsYQMaUAACHO5+/n7++cxu9ShO8pWucQOa1Ke86tzt6lzu9ajO8QMZxahNat1ufO7++Mve9K\ne+8YOaUYSsaMvee15++Uve8AAClajOdzpe9rnO8IKYwxY+8pWu8IIXsAAAAAAAAAAAAAAAAA\nAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\nAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\nAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\nAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\nAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\nAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\nAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\nAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\nAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\nAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADB\nMg1VAAAAAXRSTlMAQObYZgAAABZ0RVh0U29mdHdhcmUAZ2lmMnBuZyAyLjAuMT1evmgAAAGI\nSURBVHicddJtV5swGAbgEk6AJhBSk4bMCUynBSLaqovbrG/bfPn/vyh70lbsscebL5xznTsh\n5BmNhgQoRChwo50EOIohUYLDj4zHhKYQkrEoQdvock4ne0IKMVUpKZLQDeqSTIsv+18PyqqW\nUw2IBsRM7307PPp+fDJrWtnpLDJvewYxnewfnvanZ+fzpmwXijC8KbqEa3Fx2ff91Y95U9XC\nUpaDeQwiMpHXP/v+1++bWVPWQoGFawtjury9vru/f/C1Vi7ezT0WWpQHf/7+u/G71aLThK/M\njRxmT6KdzZ9fGk9yatMsTgZLl3XVgFRAC6spj/13enssqJVtWVa3NdBSacL8+VZmYqKmdd1C\nSYoOiMOSGwtzlqqlFFIuOqv0a1ZEZrUkWICLLFW266y1KvWE1zV/iDAH1EopnVLCiygZCIom\nH3NCKX0lnI+B1iuuzCGTxwXjnDO4d7NpbX42YJJHkBwmAm2TxwAZg40J3+Xtbv1rgOAZwG0N\nxW62p+lT+Yi747sD/wEUVMzYmWkOvwAAACV0RVh0Q29tbWVudABjbGlwMmdpZiB2LjAuNiBi\neSBZdmVzIFBpZ3VldDZzO7wAAAAASUVORK5CYII=", - "header": { - "content-type": [ - "image/png" - ], - "content-id": [ - "" - ], - "content-transfer-encoding": [ - "base64" - ], - "content-disposition": [ - "inline; filename=\"C:\\TEMP\\nsmailEG.png\"" - ] - } - }, - { - "rawHeader": "\nContent-Type: image/png\nContent-ID: \nContent-Transfer-Encoding: base64\nContent-Disposition: inline; filename=\"C:\\TEMP\\nsmail39.png\"", - "body": "iVBORw0KGgoAAAANSUhEUgAAABsAAAAbCAMAAAC6CgRnAAADAFBMVEX///8AAAABAAALAAAV\nAAAaAAAXAAARAAAKAAADAAAcAAAyAABEAABNAABIAAA9AAAjAAAWAAAmAABhAAB7AACGAACH\nAAB9AAB0AABgAAA5AAAUAAAGAAAnAABLAABvAACQAAClAAC7AAC/AACrAAChAACMAABzAABb\nAAAuAAAIAABMAAB3AACZAAC0GRnKODjVPT3bKSndBQW4AACoAAB5AAAxAAAYAAAEAABFAACa\nAAC7JCTRYWHfhITmf3/mVlbqHx/SAAC5AACjAABdAABCAAAoAAAJAABnAAC6Dw/QVFTek5Pl\nrKzpmZntZWXvJSXXAADBAACxAACcAABtAABTAAA2AAAbAAAFAABKAACBAADLICDdZ2fonJzr\npqbtiorvUVHvFBTRAADDAAC2AAB4AABeAABAAAAiAABXAACSAADCAADaGxvoVVXseHjveHjv\nV1fvJibhAADOAAC3AACnAACVAABHAAArAAAPAACdAADFAADhBQXrKCjvPDzvNTXvGxvjAADQ\nAADJAAC1AACXAACEAABsAABPAAASAAACAABiAADpAADvAgLnAADYAADLAAC6AACwAABwAAAT\nAAAkAABYAADIAADTAADNAACzAACDAABuAAAeAAB+AADAAACkAACNAAB/AABpAABQAAAwAACR\nAACpAAC8AACqAACbAABlAABJAAAqAAAOAAA0AACsAACvAACtAACmAACJAAB6AABrAABaAAA+\nAAApAABqAACCAACfAACeAACWAACPAAB8AAAZAAAHAABVAACOAACKAAA4AAAQAAA/AAByAACA\nAABcAAA3AAAsAABmAABDAABWAAAgAAAzAAA8AAA6AAAfAAAMAAAdAAANAAAAAAAAAAAAAAAA\nAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\nAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAD8\nLtlFAAAAAXRSTlMAQObYZgAAABZ0RVh0U29mdHdhcmUAZ2lmMnBuZyAyLjAuMT1evmgAAAII\nSURBVHicY2CAg/8QwIABmJhZWFnZ2Dk4MaU5uLh5eHn5+LkFBDlQJf8zC/EIi4iKiUtI8koJ\nScsgyf5nlpWTV1BUUlZRVVPX4NFk1UJIyghp6+jq6RsYGhmbKJgK85mZW8Dk/rNaSlhZ29ja\n2Ts4Ojkr6Li4urFDNf53N/Ow8vTy9vH18w8IDAoWDQkNC4+ASP5ni4wKio6JjYtPSExKTnFW\nSE1LF4A69n9GZlZ2Tm5efkFhUXFySWlZlEd5RSVY7j+TkGRVdU1tXX1DY1Ozcktpa1t7h2Yn\nOAj+d7l1tyo79vT29SdNSJ44SbFVdHIo9xSIHNPUaWqTpifNSJrZnK00S0U1a/acUG5piNz/\nuXLzVJ2qm6dXz584S2WB1cJFi5cshZr539xVftnyFKUVTi2TVjqvyhJLXb1m7TqoHPt6F/HW\n0g0bN63crGqVtWXrtu07BJihcsw71+zanRW8Z89eq337RQ/Ip60xO3gIElX/LbikDm8T36Kw\nbNmRo7O3zpHkPSZwHBqL//8flz1x2OOkyKJTi7aqbzutfUZI2gIuF8F2lr/D5dw2+fZdwpl8\nYVOlI+CJ4/9/joOyYed5QzMvhGqnm2V0WiClm///D0lfXHtJ6vLlK9w7rx7vQk5SQJbFtSms\n1y9evXid7QZacgOxmSxktNzdtSwwU+J/VICaCPFIYU3XAJhIOtjf5sfyAAAAJXRFWHRDb21t\nZW50AGNsaXAyZ2lmIHYuMC42IGJ5IFl2ZXMgUGlndWV0NnM7vAAAAABJRU5ErkJggg==", - "header": { - "content-type": [ - "image/png" - ], - "content-id": [ - "" - ], - "content-transfer-encoding": [ - "base64" - ], - "content-disposition": [ - "inline; filename=\"C:\\TEMP\\nsmail39.png\"" - ] - } - } - ], - "header": { - "content-type": [ - "multipart/related; boundary=\"------------C02FA3D0A04E95F295FB25EB\"" - ] - } - } - ], - "header": { - "content-type": [ - "multipart/alternative; boundary=\"------------F03F94BA73D3B9E8C1B94D92\"" - ] - } - }, - { - "rawHeader": "\nContent-Type: image/png;\n name=\"redball.png\"\nContent-Transfer-Encoding: base64\nContent-Disposition: inline;\n filename=\"redball.png\"", - "body": "iVBORw0KGgoAAAANSUhEUgAAABsAAAAbCAMAAAC6CgRnAAADAFBMVEX///8AAAABAAALAAAV\nAAAaAAAXAAARAAAKAAADAAAcAAAyAABEAABNAABIAAA9AAAjAAAWAAAmAABhAAB7AACGAACH\nAAB9AAB0AABgAAA5AAAUAAAGAAAnAABLAABvAACQAAClAAC7AAC/AACrAAChAACMAABzAABb\nAAAuAAAIAABMAAB3AACZAAC0GRnKODjVPT3bKSndBQW4AACoAAB5AAAxAAAYAAAEAABFAACa\nAAC7JCTRYWHfhITmf3/mVlbqHx/SAAC5AACjAABdAABCAAAoAAAJAABnAAC6Dw/QVFTek5Pl\nrKzpmZntZWXvJSXXAADBAACxAACcAABtAABTAAA2AAAbAAAFAABKAACBAADLICDdZ2fonJzr\npqbtiorvUVHvFBTRAADDAAC2AAB4AABeAABAAAAiAABXAACSAADCAADaGxvoVVXseHjveHjv\nV1fvJibhAADOAAC3AACnAACVAABHAAArAAAPAACdAADFAADhBQXrKCjvPDzvNTXvGxvjAADQ\nAADJAAC1AACXAACEAABsAABPAAASAAACAABiAADpAADvAgLnAADYAADLAAC6AACwAABwAAAT\nAAAkAABYAADIAADTAADNAACzAACDAABuAAAeAAB+AADAAACkAACNAAB/AABpAABQAAAwAACR\nAACpAAC8AACqAACbAABlAABJAAAqAAAOAAA0AACsAACvAACtAACmAACJAAB6AABrAABaAAA+\nAAApAABqAACCAACfAACeAACWAACPAAB8AAAZAAAHAABVAACOAACKAAA4AAAQAAA/AAByAACA\nAABcAAA3AAAsAABmAABDAABWAAAgAAAzAAA8AAA6AAAfAAAMAAAdAAANAAAAAAAAAAAAAAAA\nAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\nAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAD8\nLtlFAAAAAXRSTlMAQObYZgAAABZ0RVh0U29mdHdhcmUAZ2lmMnBuZyAyLjAuMT1evmgAAAII\nSURBVHicY2CAg/8QwIABmJhZWFnZ2Dk4MaU5uLh5eHn5+LkFBDlQJf8zC/EIi4iKiUtI8koJ\nScsgyf5nlpWTV1BUUlZRVVPX4NFk1UJIyghp6+jq6RsYGhmbKJgK85mZW8Dk/rNaSlhZ29ja\n2Ts4Ojkr6Li4urFDNf53N/Ow8vTy9vH18w8IDAoWDQkNC4+ASP5ni4wKio6JjYtPSExKTnFW\nSE1LF4A69n9GZlZ2Tm5efkFhUXFySWlZlEd5RSVY7j+TkGRVdU1tXX1DY1Ozcktpa1t7h2Yn\nOAj+d7l1tyo79vT29SdNSJ44SbFVdHIo9xSIHNPUaWqTpifNSJrZnK00S0U1a/acUG5piNz/\nuXLzVJ2qm6dXz584S2WB1cJFi5cshZr539xVftnyFKUVTi2TVjqvyhJLXb1m7TqoHPt6F/HW\n0g0bN63crGqVtWXrtu07BJihcsw71+zanRW8Z89eq337RQ/Ip60xO3gIElX/LbikDm8T36Kw\nbNmRo7O3zpHkPSZwHBqL//8flz1x2OOkyKJTi7aqbzutfUZI2gIuF8F2lr/D5dw2+fZdwpl8\nYVOlI+CJ4/9/joOyYed5QzMvhGqnm2V0WiClm///D0lfXHtJ6vLlK9w7rx7vQk5SQJbFtSms\n1y9evXid7QZacgOxmSxktNzdtSwwU+J/VICaCPFIYU3XAJhIOtjf5sfyAAAAJXRFWHRDb21t\nZW50AGNsaXAyZ2lmIHYuMC42IGJ5IFl2ZXMgUGlndWV0NnM7vAAAAABJRU5ErkJggg==", - "header": { - "content-type": [ - "image/png; name=\"redball.png\"" - ], - "content-transfer-encoding": [ - "base64" - ], - "content-disposition": [ - "inline; filename=\"redball.png\"" - ] - } - }, - { - "rawHeader": "\nContent-Type: image/png;\n name=\"greenball.png\"\nContent-Transfer-Encoding: base64\nContent-Disposition: inline;\n filename=\"greenball.png\"", - "body": "iVBORw0KGgoAAAANSUhEUgAAABsAAAAbCAMAAAC6CgRnAAADAFBMVEX///8AAAAAEAAAGAAA\nIQAACAAAMQAAQgAAUgAAWgAASgAIYwAIcwAIewAQjAAIawAAOQAAYwAQlAAQnAAhpQAQpQAh\nrQBCvRhjxjFjxjlSxiEpzgAYvQAQrQAYrQAhvQCU1mOt1nuE1lJK3hgh1gAYxgAYtQAAKQBC\nzhDO55Te563G55SU52NS5yEh3gAYzgBS3iGc52vW75y974yE71JC7xCt73ul3nNa7ykh5wAY\n1gAx5wBS7yFr7zlK7xgp5wAp7wAx7wAIhAAQtQAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\nAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\nAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\nAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\nAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\nAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\nAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\nAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\nAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\nAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\nAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAp\n1fnZAAAAAXRSTlMAQObYZgAAABZ0RVh0U29mdHdhcmUAZ2lmMnBuZyAyLjAuMT1evmgAAAFt\nSURBVHicddJtV8IgFAdwD2zIgMEE1+NcqdsoK+m5tCyz7/+ZiLmHsyzvq53zO/cy+N9ery1b\nVe9PWQA9z4MQ+H8Yoj7GASZ95IHfaBGmLOSchyIgyOu22mgQSjUcDuNYcoGjLiLK1cHh0fHJ\naTKKOcMItgYxT89OzsfjyTTLC8UF0c2ZNmKquJhczq6ub+YmSVUYRF59GeDastu7+9nD41Nm\nkiJ2jc2J3kAWZ9Pr55fH18XSmRuKUTXUaqHy7O19tfr4NFle/w3YDrWRUIlZrL/W86XJkyJV\nG9EaEjIx2XyZmZJGioeUaL+2AY8TY8omR6nkLKhu70zjUKVJXsp3quS2DVSJWNh3zzJKCyex\nI0ZxBP3afE0ElyqOlZJyw8r3BE2SFiJCyxA434SCkg65RhdeQBljQtCg39LWrA90RDDG1EWr\nYUO23hMANUKRRl61E529cR++D2G5LK002dr/qrcfu9u0V3bxn/XdhR/NYeeN0ggsLAAAACV0\nRVh0Q29tbWVudABjbGlwMmdpZiB2LjAuNiBieSBZdmVzIFBpZ3VldDZzO7wAAAAASUVORK5C\nYII=", - "header": { - "content-type": [ - "image/png; name=\"greenball.png\"" - ], - "content-transfer-encoding": [ - "base64" - ], - "content-disposition": [ - "inline; filename=\"greenball.png\"" - ] - } - } - ], - "header": { - "message-id": [ - "<39235FC5.276CCE00@example.com>" - ], - "date": [ - "Wed, 17 May 2000 23:13:09 -0400" - ], - "from": [ - "Doug Sauder " - ], - "x-mailer": [ - "Mozilla 4.7 [en] (WinNT; I)" - ], - "x-accept-language": [ - "en" - ], - "mime-version": [ - "1.0" - ], - "to": [ - "Heinz Müller " - ], - "subject": [ - "Die Hasen und die Frösche (Netscape Messenger 4.7)" - ], - "content-type": [ - "multipart/mixed; boundary=\"------------A1E83A41894D3755390B838A\"" - ] - } + if (header) { + outObj.header = mimePart.rawHeader; + } + if (!headerObjects) { + output.push(outObj); + return; + } + if (!Array.isArray(headerObjects)) { + throw new OperationError("Invalid extraction in headers. Not an Array."); + } + headerObjects.forEach(function(obj) { + if (!Array.isArray(obj)) { + throw new OperationError("Invalid extraction in headers Object. Not an Array."); + } + switch (obj.length) { + case 2: + outObj.fields[obj[0]] = Mime._extractField(mimePart, obj[1]); + break; + case 3: + outObj.fields[obj[0]] = Mime._extractField(mimePart, obj[1], obj[2]); + break; + default: + throw new OperationError("Invalid extraction in headers. Invalid Array size."); + } + }); + output.push(outObj); + }, recursive); + return output; } - */ /** - * Simple function to add a common file extention based on mime type string. + * Common helper function to decode Mime encoded words in headers. * - * @param {string} mimetype - * @returns {string} + * @param {boolean} recursive */ - static getFileExt(mimetype) { - switch (mimetype) { - case "text/plain": - return ".txt"; - case "text/html": - return ".htm"; - case "application/rtf": - return ".rtf"; - } - return ".bin"; + decodeHeaderWords(recursive=true) { + Mime.walkMime(this.mimeObj, mimePart => { + if (mimePart.rawHeader) { + mimePart.rawHeader = Mime.replaceEncodedWord(mimePart.rawHeader); + } + }, recursive); } /** - * Helper function to return objects as an array. + * Common helper function to decode Mime bodies. * - * @returns {object[]} + * @param {boolean} recursive */ - toObjArray() { - const out = []; - Mime.walkMime(this.mimeObj, mimePart => out.push(mimePart)); - return out; + decodeMimeObjects(recursive=true) { + Mime.walkMime(this.mimeObj, + mimePart => Mime.decodeMimeMessage(mimePart), + recursive); } /** - * Walks a MIME document and returns an array of Mime data. + * Walks a MIME document and returns a Mime Object. * * @param {string} mimeData * @returns {object} */ static _parseMime(mimeData) { - let mimeObj = Mime._splitParseHead(mimeData); - const contType = Mime.decodeComplexField(mimeObj, "content-type"); - const boundary = Mime.decodeComplexField(mimeObj, "content-type", "boundary"); - if (contType && contType.startsWith("multipart/")) { + const mimeObj = Mime._splitParseHead(mimeData); + const contType = Mime._extractField(mimeObj, "content-type"); + const boundary = Mime._extractField(mimeObj, "content-type", "boundary"); + if (mimeObj.body && contType && contType.startsWith("multipart/")) { if (!boundary) { throw new OperationError("Invalid mulitpart section no boundary"); } const sections = []; + Mime._splitMultipart(mimeObj.body, boundary).forEach((mimePart) => { sections.push(Mime._parseMime(mimePart)); }, sections); mimeObj.body = sections; } - return mimeObj + return mimeObj; } /** - * Executes methods on a mime object. These methods should modify the mimeObj. + * Executes a function on a mime object. These methods should modify the mimeObj. * * @param {Object} mimeObj - * @param {function[]} methods + * @param {function} methods * @param {boolean} recursive - * @returns {null} */ - static walkMime(mimeObj, methods, recursive=true) { - let contType = Mime.decodeComplexField(mimeObj, "content-type"); - if (recursive && contType && contType.startsWith("mulitpart/")) { - mimeObj.body.forEach(obj => Mime.walkMime(obj, methods)); - } else { - methods.forEach(method => method(mimeObj)); + static walkMime(mimeObj, method, recursive=true) { + const contType = Mime._extractField(mimeObj, "content-type"); + method(mimeObj); + if (recursive && mimeObj.body && contType && contType.startsWith("multipart/")) { + mimeObj.body.forEach(obj => Mime.walkMime(obj, method)); } } @@ -281,19 +143,18 @@ class Mime { * Attempts to decode a mimeObj's data by applying appropriate character and content decoders based on the header data. * * @param {Object} mimeObj - * @returns {null} */ static decodeMimeMessage(mimeObj) { - let contType = Mime.decodeComplexField(mimeObj, "content-type"), - charEnc = Mime.decodeComplexField(mimeObj, "content-type", "charset"), - contEnc = Mime.decodeComplexField(mimeObj, "content-transfer-encoding"); + const contType = Mime._extractField(mimeObj, "content-type"); + const contEnc = Mime._extractField(mimeObj, "content-transfer-encoding"); + let charEnc = Mime._extractField(mimeObj, "content-type", "charset"); if (contType != null) { if (!charEnc && contType.startsWith("text/")) { charEnc = "us-ascii"; } } - if (contEnc && typeof mimeObj.body === "string") { - mimeObj.body = Mime.decodeMimeData(mimeObj.body, charEnc, contEnc); + if (mimeObj.body && contEnc && typeof mimeObj.body === "string") { + mimeObj.body = Mime._decodeMimeData(mimeObj.body, charEnc, contEnc); } } @@ -332,21 +193,30 @@ class Mime { const matchObj = emlRegex.exec(input); if (matchObj) { const splitEmail = [input.substring(0, matchObj.index), input.substring(emlRegex.lastIndex)]; - const sectionRegex = /([A-Za-z-]+):\s+([\x00-\xff]+?)(?=$|\r?\n\S)/g; - const headerObj = {}; - let section; - while ((section = sectionRegex.exec(splitEmail[0]))) { - const fieldName = section[1].toLowerCase(); - const fieldValue = Mime.replaceEncodedWord(section[2].replace(/\n|\r/g, " ")); - if (fieldName in headerObj) { - headerObj[fieldName].push(fieldValue); - } else { - headerObj[fieldName] = [fieldValue]; - } + return {rawHeader: splitEmail[0], body: splitEmail[1], header: Mime._parseHeader(splitEmail[0])}; + } + return {rawHeader: input, body: null, header: Mime._parseHeader(input)}; + } + + /** + * + * + * + */ + static _parseHeader(input) { + const sectionRegex = /([A-Za-z-]+):\s+([\x00-\xff]+?)(?=$|\r?\n\S)/g; + const headerObj = {}; + let section; + while ((section = sectionRegex.exec(input))) { + const fieldName = section[1].toLowerCase(); + const fieldValue = Mime.replaceEncodedWord(section[2].replace(/\n|\r/g, " ")); + if (fieldName in headerObj) { + headerObj[fieldName].push(fieldValue); + } else { + headerObj[fieldName] = [fieldValue]; } - return {rawHeader: splitEmail[0], body: splitEmail[1], header: headerObj}; } - return null; + return headerObj; } /** @@ -373,14 +243,14 @@ class Mime { } /** - * Parses a complex header field and returns an object that contains + * Parses a header field and returns an object that contains * normalized keys with corresponding values along with single values under * a value array. * * @param {string} field * @returns {string} */ - static decodeComplexField(mimeObj, field, subfield="value") { + static _extractField(mimeObj, field, subfield="value") { if (mimeObj.header.hasOwnProperty(field)) { const fieldSplit = mimeObj.header[field][0].split(/;\s+/g); for (let i = 0; i < fieldSplit.length; i++) { @@ -389,7 +259,10 @@ class Mime { if (fieldSplit[i].length > eq) { const kv = [fieldSplit[i].substring(0, eq), fieldSplit[i].substring(eq + 1).trim()]; if ((kv[1].startsWith("'") && kv[1].endsWith("'")) || (kv[1].startsWith("\"") && kv[1].endsWith("\""))) { - kv[1] = (/(['"])(.+)\1/.exec(kv[1]))[2]; + const val = (/(['"])(.+)\1/.exec(kv[1])); + if (val && val.length === 3) { + kv[1] = val[2]; + } } if (subfield.toLowerCase() === kv[0].toLowerCase()) { return kv[1]; @@ -397,7 +270,7 @@ class Mime { } else { throw OperationError("Not a valid header entry"); } - } else if (subfield == "value"){ + } else if (subfield === "value"){ return fieldSplit[i].trim().toLowerCase(); } } @@ -405,7 +278,6 @@ class Mime { return null; } - //TODO: make this a yield instead of string array. /** * Splits a Mime document by the current boundaries and attempts to account * for the current new line size which can be either the standard \r\n or \n. @@ -418,7 +290,7 @@ class Mime { const output = []; const newline = input.indexOf("\r") >= 0 ? "\r\n" : "\n"; const boundaryStr = newline.concat("--", boundary); - const last = input.indexOf(newline.concat("--", boundary, "--")); + const last = input.indexOf(boundaryStr, "--"); let begin = 0; for (let end = 0; end !== last; begin = end) { begin = input.indexOf(boundaryStr, begin); diff --git a/src/core/operations/ParseIMF.mjs b/src/core/operations/ParseIMF.mjs index 72a49cba2d..52d626374e 100644 --- a/src/core/operations/ParseIMF.mjs +++ b/src/core/operations/ParseIMF.mjs @@ -9,7 +9,7 @@ import Mime from "../lib/Mime"; import Utils from "../Utils"; /** - * Operation for parsing IMF messages into file list. + * */ class ParseIMF extends Operation { @@ -18,12 +18,11 @@ class ParseIMF extends Operation { */ constructor() { super(); - this.name = "Parse Internet Message Format"; this.module = "Default"; this.description = ["Parse an IMF formatted messages following RFC5322.", "

", - "Parses an IMF formated message like those sent in SMTP. These often have the file extention ".eml"e; and contain the email headers and body. The output will be a file list of the root header and decoded mime parts." + "Parses an IMF formated message. These often have the file extention ".eml"e; and contain the email headers and body. The output will be a file list of the root header and decoded mime parts.", ].join("\n"); this.infoURL = "https://tools.ietf.org/html/rfc5322"; this.inputType = "string"; @@ -31,7 +30,7 @@ class ParseIMF extends Operation { this.presentType = "html"; this.args = [ { - "name": "Decode Mime Encoded Words", + "name": "Decode Encoded-Words", "type": "boolean", "value": false } @@ -39,12 +38,68 @@ class ParseIMF extends Operation { } /** - * @param {string} - * @param {Object[]} + * Basic Email Parser that displays the header and mime sections as files. + * Args 0 boolean decode quoted words + * + * @param {string} input + * @param {boolean} decodeWords * @returns {File[]} */ run(input, args) { - return new Mime(input).decodeMime(args[0]); + const eml = new Mime(input); + if (!eml.mimeObj) { + return []; + } + eml.decodeMimeObjects(); + if (args[0]) { + eml.decodeHeaderWords(false); + } + const fields = [["filename", "content-disposition", "filename"], + ["name", "content-type", "name"], + ["type", "content-type"], + ["subject", "subject"]]; + const dataObj = eml.extractData(fields); + let subject = null; + const retval = []; + if (dataObj.length >= 1) { + subject = dataObj[0].fields.subject; + if (dataObj[0].header) { + retval.push(new File([dataObj[0].header], "Header.txt", {type: "text/plain"})); + } + } + dataObj.forEach(function(obj) { + if (obj.body) { + let name = obj.fields.filename ? obj.fields.filename : obj.fields.name; + const type = obj.fields.type ? obj.fields.type : "text/plain"; + if (!name) { + name = (subject ? subject : "Undefined").concat(ParseIMF.getFileExt(type)); + } + if (Array.isArray(obj.body)) { + retval.push(new File([Uint8Array.from(obj.body)], name, {type: type})); + } else { + retval.push(new File([obj.body], name, {type: type})); + } + } + }); + return retval; + } + + /** + * Simple function to add a common file extention based on mime type string. + * + * @param {string} mimetype + * @returns {string} + */ + static getFileExt(mimetype) { + switch (mimetype) { + case "text/plain": + return ".txt"; + case "text/html": + return ".htm"; + case "application/rtf": + return ".rtf"; + } + return ".bin"; } /** From 7bbd471de37fafddf308181f0f80193c71c5b52d Mon Sep 17 00:00:00 2001 From: bwhitn Date: Sun, 16 Dec 2018 23:55:40 -0500 Subject: [PATCH 23/30] code cleaning --- src/core/lib/Mime.mjs | 45 +++++++++++++++++--------------- src/core/operations/ParseIMF.mjs | 4 +-- 2 files changed, 26 insertions(+), 23 deletions(-) diff --git a/src/core/lib/Mime.mjs b/src/core/lib/Mime.mjs index d0ab56d9a0..dfe5e5ded5 100644 --- a/src/core/lib/Mime.mjs +++ b/src/core/lib/Mime.mjs @@ -24,6 +24,7 @@ class Mime { this.mimeObj = Mime._parseMime(input); } + //TODO: Generator? /** * Extract data from mimeObjects and return object array containing them. * @@ -82,7 +83,7 @@ class Mime { * @param {boolean} recursive */ decodeHeaderWords(recursive=true) { - Mime.walkMime(this.mimeObj, mimePart => { + Mime.walkMime(this.mimeObj, function(mimePart) { if (mimePart.rawHeader) { mimePart.rawHeader = Mime.replaceEncodedWord(mimePart.rawHeader); } @@ -95,9 +96,9 @@ class Mime { * @param {boolean} recursive */ decodeMimeObjects(recursive=true) { - Mime.walkMime(this.mimeObj, - mimePart => Mime.decodeMimeMessage(mimePart), - recursive); + Mime.walkMime(this.mimeObj, function(mimePart) { + Mime.decodeMimeMessage(mimePart); + }, recursive); } /** @@ -116,7 +117,7 @@ class Mime { } const sections = []; - Mime._splitMultipart(mimeObj.body, boundary).forEach((mimePart) => { + Mime._splitMultipart(mimeObj.body, boundary).forEach(function(mimePart) { sections.push(Mime._parseMime(mimePart)); }, sections); mimeObj.body = sections; @@ -135,7 +136,9 @@ class Mime { const contType = Mime._extractField(mimeObj, "content-type"); method(mimeObj); if (recursive && mimeObj.body && contType && contType.startsWith("multipart/")) { - mimeObj.body.forEach(obj => Mime.walkMime(obj, method)); + mimeObj.body.forEach(function(obj) { + Mime.walkMime(obj, method); + }); } } @@ -250,27 +253,26 @@ class Mime { * @param {string} field * @returns {string} */ - static _extractField(mimeObj, field, subfield="value") { + static _extractField(mimeObj, field, subfield=null) { + if (subfield) { + subfield = subfield.toLowerCase(); + } if (mimeObj.header.hasOwnProperty(field)) { const fieldSplit = mimeObj.header[field][0].split(/;\s+/g); for (let i = 0; i < fieldSplit.length; i++) { const eq = fieldSplit[i].indexOf("="); - if (eq >= 0) { - if (fieldSplit[i].length > eq) { - const kv = [fieldSplit[i].substring(0, eq), fieldSplit[i].substring(eq + 1).trim()]; - if ((kv[1].startsWith("'") && kv[1].endsWith("'")) || (kv[1].startsWith("\"") && kv[1].endsWith("\""))) { - const val = (/(['"])(.+)\1/.exec(kv[1])); - if (val && val.length === 3) { - kv[1] = val[2]; - } - } - if (subfield.toLowerCase() === kv[0].toLowerCase()) { - return kv[1]; + if (eq >= 0 && fieldSplit[i].length > eq && subfield) { + const kv = [fieldSplit[i].substring(0, eq), fieldSplit[i].substring(eq + 1).trim()]; + if ((kv[1].startsWith("'") && kv[1].endsWith("'")) || (kv[1].startsWith("\"") && kv[1].endsWith("\""))) { + const val = (/(['"])(.+)\1/.exec(kv[1])); + if (val && val.length === 3) { + kv[1] = val[2]; } - } else { - throw OperationError("Not a valid header entry"); } - } else if (subfield === "value"){ + if (subfield === kv[0].toLowerCase()) { + return kv[1]; + } + } else if (!subfield){ return fieldSplit[i].trim().toLowerCase(); } } @@ -278,6 +280,7 @@ class Mime { return null; } + //TODO: turn into generator function /** * Splits a Mime document by the current boundaries and attempts to account * for the current new line size which can be either the standard \r\n or \n. diff --git a/src/core/operations/ParseIMF.mjs b/src/core/operations/ParseIMF.mjs index 52d626374e..83fd81b836 100644 --- a/src/core/operations/ParseIMF.mjs +++ b/src/core/operations/ParseIMF.mjs @@ -30,7 +30,7 @@ class ParseIMF extends Operation { this.presentType = "html"; this.args = [ { - "name": "Decode Encoded-Words", + "name": "Decode Mime Encoded Words", "type": "boolean", "value": false } @@ -61,7 +61,7 @@ class ParseIMF extends Operation { const dataObj = eml.extractData(fields); let subject = null; const retval = []; - if (dataObj.length >= 1) { + if (dataObj.length) { subject = dataObj[0].fields.subject; if (dataObj[0].header) { retval.push(new File([dataObj[0].header], "Header.txt", {type: "text/plain"})); From 4fc37c4d02051242d9f93bed512bf3ac8d1728c6 Mon Sep 17 00:00:00 2001 From: Brian Whitney Date: Mon, 17 Dec 2018 17:10:34 -0500 Subject: [PATCH 24/30] touching up code and comments --- src/core/lib/Mime.mjs | 32 +++++++++++++++----------------- src/core/operations/ParseIMF.mjs | 10 +++++----- 2 files changed, 20 insertions(+), 22 deletions(-) diff --git a/src/core/lib/Mime.mjs b/src/core/lib/Mime.mjs index dfe5e5ded5..5bdd59ce49 100644 --- a/src/core/lib/Mime.mjs +++ b/src/core/lib/Mime.mjs @@ -24,9 +24,10 @@ class Mime { this.mimeObj = Mime._parseMime(input); } - //TODO: Generator? /** * Extract data from mimeObjects and return object array containing them. + * extractData([["testa", "header", "subheader"], ["testb", "header"]]) would + * returns an array of objects {fields: {testa: "somestringornull", testb: "somestringornull"}, header: "somestringornull", body: "somestringornull"} * * @param {string[][]} headerObjects * @param {boolean} header @@ -116,11 +117,12 @@ class Mime { throw new OperationError("Invalid mulitpart section no boundary"); } const sections = []; - - Mime._splitMultipart(mimeObj.body, boundary).forEach(function(mimePart) { - sections.push(Mime._parseMime(mimePart)); - }, sections); - mimeObj.body = sections; + for (const val of Mime._splitMultipart(mimeObj.body, boundary)) { + sections.push(Mime._parseMime(val)); + } + if (sections.length) { + mimeObj.body = sections; + } } return mimeObj; } @@ -135,7 +137,7 @@ class Mime { static walkMime(mimeObj, method, recursive=true) { const contType = Mime._extractField(mimeObj, "content-type"); method(mimeObj); - if (recursive && mimeObj.body && contType && contType.startsWith("multipart/")) { + if (recursive && mimeObj.body && Array.isArray(mimeObj.body) && contType && contType.startsWith("multipart/")) { mimeObj.body.forEach(function(obj) { Mime.walkMime(obj, method); }); @@ -181,8 +183,6 @@ class Mime { }); } - - //TODO: Allow only a header as input /** * Breaks the header from the body and parses the header. The returns an * object or null. The object contains the raw header, decoded body, and @@ -280,7 +280,6 @@ class Mime { return null; } - //TODO: turn into generator function /** * Splits a Mime document by the current boundaries and attempts to account * for the current new line size which can be either the standard \r\n or \n. @@ -289,11 +288,11 @@ class Mime { * @param {string} boundary * @return {string[]} */ - static _splitMultipart(input, boundary) { - const output = []; + static *_splitMultipart(input, boundary) { const newline = input.indexOf("\r") >= 0 ? "\r\n" : "\n"; - const boundaryStr = newline.concat("--", boundary); - const last = input.indexOf(boundaryStr, "--"); + const boundaryStr = "--".concat(boundary); + const boundaryStrEnd = newline.concat(boundaryStr); + const last = input.indexOf(boundaryStrEnd.concat("--")); let begin = 0; for (let end = 0; end !== last; begin = end) { begin = input.indexOf(boundaryStr, begin); @@ -301,13 +300,12 @@ class Mime { break; } begin += boundaryStr.length; - end = input.indexOf(boundaryStr, begin); + end = input.indexOf(boundaryStrEnd, begin); if (end <= begin) { break; } - output.push(input.substring(begin, end)); + yield input.substring(begin, end); } - return output; } } diff --git a/src/core/operations/ParseIMF.mjs b/src/core/operations/ParseIMF.mjs index 83fd81b836..9df69d31b3 100644 --- a/src/core/operations/ParseIMF.mjs +++ b/src/core/operations/ParseIMF.mjs @@ -18,11 +18,11 @@ class ParseIMF extends Operation { */ constructor() { super(); - this.name = "Parse Internet Message Format"; + this.name = "Parse IMF"; this.module = "Default"; - this.description = ["Parse an IMF formatted messages following RFC5322.", + this.description = ["Parse an Internet Message Format (IMF) messages following RFC5322.", "

", - "Parses an IMF formated message. These often have the file extention ".eml"e; and contain the email headers and body. The output will be a file list of the root header and decoded mime parts.", + "Parses an IMF formated message. These often have the file extention ".eml" and contain the email headers and body. The output will be a file list of the root header and decoded mime parts.", ].join("\n"); this.infoURL = "https://tools.ietf.org/html/rfc5322"; this.inputType = "string"; @@ -30,7 +30,7 @@ class ParseIMF extends Operation { this.presentType = "html"; this.args = [ { - "name": "Decode Mime Encoded Words", + "name": "Decode Encoded-Words", "type": "boolean", "value": false } @@ -61,7 +61,7 @@ class ParseIMF extends Operation { const dataObj = eml.extractData(fields); let subject = null; const retval = []; - if (dataObj.length) { + if (dataObj.length >= 1) { subject = dataObj[0].fields.subject; if (dataObj[0].header) { retval.push(new File([dataObj[0].header], "Header.txt", {type: "text/plain"})); From f7b8e5176418b34c63804499431f6f0f0827766f Mon Sep 17 00:00:00 2001 From: bwhitn Date: Mon, 17 Dec 2018 22:48:15 -0500 Subject: [PATCH 25/30] working on test cases and dressing a bit of code --- src/core/operations/ParseIMF.mjs | 2 +- test/index.mjs | 1 + test/tests/operations/Mime.mjs | 23 +++++++++++++++++++++++ 3 files changed, 25 insertions(+), 1 deletion(-) create mode 100644 test/tests/operations/Mime.mjs diff --git a/src/core/operations/ParseIMF.mjs b/src/core/operations/ParseIMF.mjs index 9df69d31b3..0799ef83c4 100644 --- a/src/core/operations/ParseIMF.mjs +++ b/src/core/operations/ParseIMF.mjs @@ -61,7 +61,7 @@ class ParseIMF extends Operation { const dataObj = eml.extractData(fields); let subject = null; const retval = []; - if (dataObj.length >= 1) { + if (dataObj.length) { subject = dataObj[0].fields.subject; if (dataObj[0].header) { retval.push(new File([dataObj[0].header], "Header.txt", {type: "text/plain"})); diff --git a/test/index.mjs b/test/index.mjs index e40ad9d0bd..6f2e112853 100644 --- a/test/index.mjs +++ b/test/index.mjs @@ -75,6 +75,7 @@ import "./tests/operations/ToGeohash.mjs"; import "./tests/operations/TranslateDateTimeFormat"; import "./tests/operations/Magic"; import "./tests/operations/ParseTLV"; +import "./tests/operations/Mime"; let allTestsPassing = true; const testStatusCounts = { diff --git a/test/tests/operations/Mime.mjs b/test/tests/operations/Mime.mjs new file mode 100644 index 0000000000..8fc8a885df --- /dev/null +++ b/test/tests/operations/Mime.mjs @@ -0,0 +1,23 @@ +/** + * Mime tests. + * + * @author bwhitn [brian.m.whitney@outlook.com] + * @copyright Crown Copyright 2018 + * @license Apache-2.0 + */ +import TestRegister from "../../TestRegister"; + +TestRegister.addTests([ + { + name: "Decode Mime Encoded Words", + input: "This is a GBK base64 encoded word: =?GBK?B?x/izx7ncvta52NPazfjC59Pfx+nQxc+i16g=?=.\nThis is a Cyrillic quoted word: =?utf-8?Q?=D0=A2=D0=B5=D1=81=D1=82_=D0=A2=D0=B5=D1=81=D1=82_=D0=A2=D0=B5=D1=81=D1=82?=.", + expectedOutput: ["This is a GBK base64 encoded word: 区城管局关于网络舆情信息专.", + "This is a Cyrillic quoted word: Тест Тест Тест."].join("\n"), + recipeConfig: [ + { + "op": "Decode Mime Encoded Words", + "args": [] + } + ] + }, +]); From fe6b6c8f472a2ac6bcebeeace546ecbd908c06a3 Mon Sep 17 00:00:00 2001 From: Brian Whitney Date: Tue, 18 Dec 2018 18:42:22 -0500 Subject: [PATCH 26/30] Modified Mime test --- test/tests/operations/Mime.mjs | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/test/tests/operations/Mime.mjs b/test/tests/operations/Mime.mjs index 8fc8a885df..dcd52dc964 100644 --- a/test/tests/operations/Mime.mjs +++ b/test/tests/operations/Mime.mjs @@ -10,9 +10,8 @@ import TestRegister from "../../TestRegister"; TestRegister.addTests([ { name: "Decode Mime Encoded Words", - input: "This is a GBK base64 encoded word: =?GBK?B?x/izx7ncvta52NPazfjC59Pfx+nQxc+i16g=?=.\nThis is a Cyrillic quoted word: =?utf-8?Q?=D0=A2=D0=B5=D1=81=D1=82_=D0=A2=D0=B5=D1=81=D1=82_=D0=A2=D0=B5=D1=81=D1=82?=.", - expectedOutput: ["This is a GBK base64 encoded word: 区城管局关于网络舆情信息专.", - "This is a Cyrillic quoted word: Тест Тест Тест."].join("\n"), + input: "This is a GBK base64 encoded word: =?GBK?B?572R57uc5Y6o5biI?=\nThis is a Cyrillic quoted word: =?utf-8?Q?=D0=A2=D0=B5=D1=81=D1=82_=D0=A2=D0=B5=D1=81=D1=82_=D0=A2=D0=B5=D1=81=D1=82?=.", + expectedOutput: "This is a GBK base64 encoded word: 网络厨师.\nThis is a Cyrillic quoted word: Кибер Шеф.", recipeConfig: [ { "op": "Decode Mime Encoded Words", From 343541981f8b23a3816ba25ce0829391f1f143d5 Mon Sep 17 00:00:00 2001 From: Brian Whitney Date: Wed, 19 Dec 2018 11:22:11 -0500 Subject: [PATCH 27/30] throws an error when formating is invalid --- src/core/lib/Mime.mjs | 25 ++++++++++++++----------- 1 file changed, 14 insertions(+), 11 deletions(-) diff --git a/src/core/lib/Mime.mjs b/src/core/lib/Mime.mjs index 5bdd59ce49..0ac77fea50 100644 --- a/src/core/lib/Mime.mjs +++ b/src/core/lib/Mime.mjs @@ -231,18 +231,21 @@ class Mime { * @returns {string} */ static _decodeMimeData(input, charEnc, contEnc) { - switch (contEnc) { - case "base64": - input = Utils.convertToByteArray(input, "base64"); - break; - case "quoted-printable": - input = decodeQuotedPrintable(input); - } - if (charEnc && MIME_FORMAT.hasOwnProperty(charEnc.toLowerCase())) { - input = Utils.strToByteArray(cptable.utils.decode(MIME_FORMAT[charEnc.toLowerCase()], input)); - + try { + switch (contEnc) { + case "base64": + input = Utils.convertToByteArray(input, "base64"); + break; + case "quoted-printable": + input = decodeQuotedPrintable(input); + } + if (charEnc && MIME_FORMAT.hasOwnProperty(charEnc.toLowerCase())) { + input = Utils.strToByteArray(cptable.utils.decode(MIME_FORMAT[charEnc.toLowerCase()], input)); + } + return input; + } catch (err) { + throw new OperationError("Invalid Mime Format"); } - return input; } /** From 8041b789b2cad3ef3e5d33fe1d9816d2ebab557b Mon Sep 17 00:00:00 2001 From: Brian Whitney Date: Wed, 19 Dec 2018 11:39:05 -0500 Subject: [PATCH 28/30] removed comment --- src/core/lib/Mime.mjs | 15 +++++---------- 1 file changed, 5 insertions(+), 10 deletions(-) diff --git a/src/core/lib/Mime.mjs b/src/core/lib/Mime.mjs index 0ac77fea50..56785b3732 100644 --- a/src/core/lib/Mime.mjs +++ b/src/core/lib/Mime.mjs @@ -1,9 +1,3 @@ -/** - * @author bwhitn [brian.m.whitney@outlook.com] - * @copyright Crown Copyright 2016 - * @license Apache-2.0 - */ - import OperationError from "../errors/OperationError"; import cptable from "../vendor/js-codepage/cptable.js"; import {decodeQuotedPrintable} from "../lib/QuotedPrintable"; @@ -11,10 +5,11 @@ import {MIME_FORMAT} from "../lib/ChrEnc"; import Utils from "../Utils"; /** - * NOTE: Liberties taken include: - * No checks are made to verify quoted words are valid encodings e.g. underscore vs escape - * This attempts to decode mime reguardless if it is \r\n (correct newline) or \n (incorrect) - * Both Base64 and QuotedPrintable is used for decode. + * Class to do general Mime format parsing + * + * @author bwhitn [brian.m.whitney@outlook.com] + * @copyright Crown Copyright 2016 + * @license Apache-2.0 */ class Mime { /** From 34288bc25fc733dbd8a20fda32c19c58921aa02f Mon Sep 17 00:00:00 2001 From: Brian Whitney Date: Wed, 19 Dec 2018 12:33:14 -0500 Subject: [PATCH 29/30] Change to Parse Mime --- src/core/config/Categories.json | 2 +- .../{ParseIMF.mjs => ParseMime.mjs} | 19 ++++++++++--------- 2 files changed, 11 insertions(+), 10 deletions(-) rename src/core/operations/{ParseIMF.mjs => ParseMime.mjs} (83%) diff --git a/src/core/config/Categories.json b/src/core/config/Categories.json index 4fc4c578c3..a03b6f4772 100755 --- a/src/core/config/Categories.json +++ b/src/core/config/Categories.json @@ -55,7 +55,7 @@ "To Braille", "From Braille", "Parse TLV", - "Parse Internet Message Format", + "Parse Mime", "Decode Mime Encoded Words" ] }, diff --git a/src/core/operations/ParseIMF.mjs b/src/core/operations/ParseMime.mjs similarity index 83% rename from src/core/operations/ParseIMF.mjs rename to src/core/operations/ParseMime.mjs index 0799ef83c4..3d2a3c5c71 100644 --- a/src/core/operations/ParseIMF.mjs +++ b/src/core/operations/ParseMime.mjs @@ -11,20 +11,21 @@ import Utils from "../Utils"; /** * */ -class ParseIMF extends Operation { +class ParseMime extends Operation { /** - * Internet Message Format constructor + * ParseMime constructor */ constructor() { super(); - this.name = "Parse IMF"; + this.name = "Parse Mime"; this.module = "Default"; - this.description = ["Parse an Internet Message Format (IMF) messages following RFC5322.", + this.description = ["Generic Mime Message parser that decodes Mime messages into files", "

", - "Parses an IMF formated message. These often have the file extention ".eml" and contain the email headers and body. The output will be a file list of the root header and decoded mime parts.", + "The output will be the root header and the associated mime parts.", + "This includes Internet Message Format which are found in SMTP traffic." ].join("\n"); - this.infoURL = "https://tools.ietf.org/html/rfc5322"; + this.infoURL = "https://tools.ietf.org/html/rfc2045"; this.inputType = "string"; this.outputType = "List"; this.presentType = "html"; @@ -61,7 +62,7 @@ class ParseIMF extends Operation { const dataObj = eml.extractData(fields); let subject = null; const retval = []; - if (dataObj.length) { + if (dataObj.length >= 1) { subject = dataObj[0].fields.subject; if (dataObj[0].header) { retval.push(new File([dataObj[0].header], "Header.txt", {type: "text/plain"})); @@ -72,7 +73,7 @@ class ParseIMF extends Operation { let name = obj.fields.filename ? obj.fields.filename : obj.fields.name; const type = obj.fields.type ? obj.fields.type : "text/plain"; if (!name) { - name = (subject ? subject : "Undefined").concat(ParseIMF.getFileExt(type)); + name = (subject ? subject : "Undefined").concat(ParseMime.getFileExt(type)); } if (Array.isArray(obj.body)) { retval.push(new File([Uint8Array.from(obj.body)], name, {type: type})); @@ -113,4 +114,4 @@ class ParseIMF extends Operation { } } -export default ParseIMF; +export default ParseMime; From 22f792363485f7123e0abb12e64feec2a08c16c0 Mon Sep 17 00:00:00 2001 From: Brian Whitney Date: Tue, 19 Feb 2019 07:17:25 -0500 Subject: [PATCH 30/30] Think I actually commited the test fixes this time --- tests/operations/index.mjs | 56 +-------------------------------- tests/operations/tests/Mime.mjs | 8 ++--- 2 files changed, 5 insertions(+), 59 deletions(-) diff --git a/tests/operations/index.mjs b/tests/operations/index.mjs index 07e3ed750a..ffdd2c055b 100644 --- a/tests/operations/index.mjs +++ b/tests/operations/index.mjs @@ -24,7 +24,6 @@ global.ENVIRONMENT_IS_WEB = function() { }; import TestRegister from "./TestRegister"; -<<<<<<< HEAD:tests/operations/index.mjs import "./tests/BCD"; import "./tests/BSON"; import "./tests/Base58"; @@ -58,6 +57,7 @@ import "./tests/JWTSign"; import "./tests/JWTVerify"; import "./tests/MS"; import "./tests/Magic"; +import "./tests/Mime"; import "./tests/MorseCode"; import "./tests/NetBIOS"; import "./tests/OTP"; @@ -87,60 +87,6 @@ import "./tests/ConvertCoordinateFormat"; // Cannot test operations that use the File type yet //import "./tests/SplitColourChannels"; -======= -import "./tests/operations/BCD"; -import "./tests/operations/BSON"; -import "./tests/operations/Base58"; -import "./tests/operations/Base64"; -import "./tests/operations/BitwiseOp"; -import "./tests/operations/ByteRepr"; -import "./tests/operations/CartesianProduct"; -import "./tests/operations/CharEnc"; -import "./tests/operations/Checksum"; -import "./tests/operations/Ciphers"; -import "./tests/operations/Code"; -import "./tests/operations/Comment"; -import "./tests/operations/Compress"; -import "./tests/operations/ConditionalJump"; -import "./tests/operations/Crypt"; -import "./tests/operations/DateTime"; -import "./tests/operations/ExtractEmailAddresses"; -import "./tests/operations/Fork"; -import "./tests/operations/FromDecimal"; -import "./tests/operations/FromGeohash"; -import "./tests/operations/Hash"; -import "./tests/operations/HaversineDistance"; -import "./tests/operations/Hexdump"; -import "./tests/operations/Image"; -import "./tests/operations/Jump"; -import "./tests/operations/JWTDecode"; -import "./tests/operations/JWTSign"; -import "./tests/operations/JWTVerify"; -import "./tests/operations/MS"; -import "./tests/operations/Magic"; -import "./tests/operations/MorseCode"; -import "./tests/operations/NetBIOS"; -import "./tests/operations/OTP"; -import "./tests/operations/PGP"; -import "./tests/operations/PHP"; -import "./tests/operations/ParseIPRange"; -import "./tests/operations/PowerSet"; -import "./tests/operations/Regex"; -import "./tests/operations/Register"; -import "./tests/operations/RemoveDiacritics"; -import "./tests/operations/Rotate"; -import "./tests/operations/SeqUtils"; -import "./tests/operations/SetDifference"; -import "./tests/operations/SetIntersection"; -import "./tests/operations/SetUnion"; -import "./tests/operations/StrUtils"; -import "./tests/operations/SymmetricDifference"; -import "./tests/operations/ToGeohash.mjs"; -import "./tests/operations/TranslateDateTimeFormat"; -import "./tests/operations/Magic"; -import "./tests/operations/ParseTLV"; -import "./tests/operations/Mime"; ->>>>>>> 34288bc25fc733dbd8a20fda32c19c58921aa02f:test/index.mjs let allTestsPassing = true; const testStatusCounts = { diff --git a/tests/operations/tests/Mime.mjs b/tests/operations/tests/Mime.mjs index bb304ef1a7..05c2f5dac0 100644 --- a/tests/operations/tests/Mime.mjs +++ b/tests/operations/tests/Mime.mjs @@ -10,12 +10,12 @@ import TestRegister from "../TestRegister"; TestRegister.addTests([ { name: "Decode Mime Encoded Words", - input: "This is a GBK base64 encoded word: =?GBK?B?572R57uc5Y6o5biI?=\nThis is a Cyrillic quoted word: =?utf-8?Q?=D0=A2=D0=B5=D1=81=D1=82_=D0=A2=D0=B5=D1=81=D1=82_=D0=A2=D0=B5=D1=81=D1=82?=.", - expectedOutput: "This is a GBK base64 encoded word: 网络厨师.\nThis is a Cyrillic quoted word: Кибер Шеф.", + input: "This is a GBK base64 encoded word: =?GBK?B?zfjC57P4yqY=?=.\nThis is a Cyrillic UTF-8 quoted word: =?utf-8?Q?=d0=9a=d0=b8=d0=b1=d0=b5=d1=80_=d0=a8=d0=b5=d1=84?=.", + expectedOutput: "This is a GBK base64 encoded word: 网络厨师.\nThis is a Cyrillic UTF-8 quoted word: Кибер Шеф.", recipeConfig: [ { - "op": "Decode Mime Encoded Words", - "args": [] + op: "Decode Mime Encoded Words", + args: [] } ] },