Skip to content

Commit f1752de

Browse files
Fix a few bugs and started working on #452
1 parent efd2a7a commit f1752de

File tree

5 files changed

+21
-8
lines changed

5 files changed

+21
-8
lines changed

Diff for: CHANGELOG.md

+6
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,9 @@
1+
**v0.53.2**
2+
* [[TeamMsgExtractor #452](https://github.com/TeamMsgExtractor/msg-extractor/issues/452)]
3+
* Fixed a bug that would cause an error in task objects due to a lack of `enumerate`.
4+
* Fix `TOCEntry` not initializing `DVTargetDevice` correctly.
5+
* Add temporary properties for `ContentID` to `SignedAttachment`. AFAIK these can't ever be set, but this prevents errors in some places.
6+
17
**v0.53.1**
28
* Expanded allowable range for `red-black-tree-mod`.
39
* Fix issue with `MessageBase.asEmailMessage()` that prevented embedded MSG files from being attached.

Diff for: extract_msg/attachments/signed_att.py

+8-2
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@ def __init__(self, msg, data: bytes, name: str, mimetype: str, node: email.messa
4545
self.__node = node
4646
self.__treePath = msg.treePath + [makeWeakRef(self)]
4747

48-
self.__data = None
48+
self.__data = b''
4949
# To add support for embedded MSG files, we are going to completely
5050
# ignore the mimetype and just do a few simple checks to see if we can
5151
# use the bytes as am embedded file.
@@ -59,7 +59,7 @@ def __init__(self, msg, data: bytes, name: str, mimetype: str, node: email.messa
5959
except Exception:
6060
logger.exception('Signed message was an OLE file, but could not be read as an MSG file due to an exception.')
6161

62-
if self.__data is None:
62+
if not self.__data:
6363
self.__data = data
6464

6565
def _handleFnc(self, _zip, filename, customPath: pathlib.Path, kwargs) -> pathlib.Path:
@@ -205,6 +205,12 @@ def saveEmbededMessage(self, **kwargs) -> constants.SAVE_TYPE:
205205
def asBytes(self) -> bytes:
206206
return self.__asBytes
207207

208+
@property
209+
def contentID(self) -> None:
210+
return None
211+
212+
cid = contentID
213+
208214
@property
209215
def data(self) -> Union[bytes, MSGFile]:
210216
"""

Diff for: extract_msg/msg_classes/message_base.py

+5-4
Original file line numberDiff line numberDiff line change
@@ -96,6 +96,7 @@ def __init__(self, path, **kwargs):
9696
except Exception as e:
9797
# Prevent an error in the body from preventing opening.
9898
logger.exception('Critical error accessing the body. File opened but accessing the body will throw an exception.')
99+
self._htmlEncoding = None
99100
except:
100101
try:
101102
self.close()
@@ -380,7 +381,7 @@ def getSaveHtmlBody(self, preparedHtml: bool = False, charset: str = 'utf-8', **
380381

381382
# If we are preparing the HTML, then we should
382383
if preparedHtml and charset:
383-
bs = bs4.BeautifulSoup(data, features = 'html.parser')
384+
bs = bs4.BeautifulSoup(data, features = 'html.parser', from_encoding=self._htmlEncoding)
384385
if not bs.find('meta', {'http-equiv': 'Content-Type'}):
385386
# Setup the attributes for the tag.
386387
tagAttrs = {
@@ -405,7 +406,7 @@ def getSaveHtmlBody(self, preparedHtml: bool = False, charset: str = 'utf-8', **
405406

406407
return data
407408
else:
408-
return self.htmlBody
409+
return self.htmlBody or b''
409410

410411
def getSavePdfBody(self, wkPath = None, wkOptions = None, **kwargs) -> bytes:
411412
"""
@@ -511,7 +512,7 @@ def injectHtmlHeader(self, prepared: bool = False) -> bytes:
511512
# the <html> and <body> tag are missing, we determine where to put
512513
# the body tag (around everything if there is no <head> tag,
513514
# otherwise at the end) and then wrap it all in the <html> tag.
514-
parser = bs4.BeautifulSoup(body, features = 'html.parser')
515+
parser = bs4.BeautifulSoup(body, features = 'html.parser', from_encoding=self._htmlEncoding)
515516
if not parser.find('html') and not parser.find('body'):
516517
if parser.find('head') or parser.find('footer'):
517518
# Create the parser we will be using for the corrections.
@@ -1186,7 +1187,7 @@ def htmlBodyPrepared(self) -> Optional[bytes]:
11861187
return self.htmlBody
11871188

11881189
# Create the BeautifulSoup instance to use.
1189-
soup = bs4.BeautifulSoup(self.htmlBody, 'html.parser')
1190+
soup = bs4.BeautifulSoup(self.htmlBody, 'html.parser', from_encoding=self._htmlEncoding)
11901191

11911192
# Get a list of image tags to see if we can inject into. If the source
11921193
# of an image starts with "cid:" that means it is one of the attachments

Diff for: extract_msg/msg_classes/task_request.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,7 @@ def taskObject(self) -> Optional[Task]:
6363
# The task object MUST be the first attachment, but we will be
6464
# lenient and allow it to be in any position. It not existing,
6565
# however, will not be tolerated.
66-
task = next(((index, att) for index, att in self.attachments if isinstance(att.data, Task)), None)
66+
task = next(((index, att) for index, att in enumerate(self.attachments) if isinstance(att.data, Task)), None)
6767

6868
if task is None:
6969
if ErrorBehavior.STANDARDS_VIOLATION in self.errorBehavior:

Diff for: extract_msg/structures/toc_entry.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ def __init__(self, reader: Optional[Union[bytes, BytesReader]] = None):
2020
self.__lindex = 0
2121
self.__tymed = 0
2222
self.__advf = 0
23-
self.__targetDevice = DVTargetDevice()
23+
self.__targetDevice = DVTargetDevice(None)
2424
return
2525

2626
if isinstance(reader, bytes):

0 commit comments

Comments
 (0)