Skip to content

Commit

Permalink
Add error message byte order mark (#63)
Browse files Browse the repository at this point in the history
* there was an issue with the bom test, this is fixed now

* fixed issue with line number

* Added an error for a file with a byte order mark

* increased version

---------

Co-authored-by: SanderKools <s.w.a.kools@minezk.nl>
  • Loading branch information
SanderKools-Ordina and SanderKools authored Nov 7, 2023
1 parent 0140ab3 commit 27d8524
Show file tree
Hide file tree
Showing 4 changed files with 18 additions and 6 deletions.
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,7 @@ a dict with three keys:
| "no_csaf_file" | "All CSAF fields must point to a provider-metadata.json file." |
| "pgp_data_error" | "Signed message did not contain a correct ASCII-armored PGP block." |
| "pgp_error" | "Decoding or parsing of the pgp message failed." |
| "bom_in_file" | "The Byte-Order Mark was found in the UTF-8 File. Security.txt must be encoded using UTF-8 in Net-Unicode form, the BOM signature must not appear at the beginning." |


### Possible recommendations
Expand Down
2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
requests==2.31.0
python-dateutil==2.8.2
langcodes==3.3.0
pytest==7.4.0
pytest==7.4.3
requests-mock==1.11.0
PGPy==0.6.0
14 changes: 11 additions & 3 deletions sectxt/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
import dateutil.parser
import requests

__version__ = "0.9.0"
__version__ = "0.9.1"

s = requests.Session()

Expand Down Expand Up @@ -103,8 +103,10 @@ def _add_error(
explicit_line_no=None
) -> None:
if explicit_line_no:
self._line_no = explicit_line_no
err_dict: ErrorDict = {"code": code, "message": message, "line": self._line_no}
error_line = explicit_line_no
else:
error_line = self._line_no
err_dict: ErrorDict = {"code": code, "message": message, "line": error_line}
self._errors.append(err_dict)

def _add_recommendation(
Expand Down Expand Up @@ -418,6 +420,12 @@ def _get_str(self, content: bytes) -> str:
try:
if content.startswith(codecs.BOM_UTF8):
content = content.replace(codecs.BOM_UTF8, b'')
self._add_error(
"bom_in_file",
"The Byte-Order Mark was found in the UTF-8 File. "
"Security.txt must be encoded using UTF-8 in Net-Unicode form, "
"the BOM signature must not appear at the beginning."
)
return content.decode('utf-8')
except UnicodeError:
self._add_error("utf8", "Content must be utf-8 encoded.")
Expand Down
7 changes: 5 additions & 2 deletions test/test_sectxt.py
Original file line number Diff line number Diff line change
Expand Up @@ -292,12 +292,15 @@ def test_invalid_uri_scheme(requests_mock: Mocker):

def test_byte_order_mark(requests_mock: Mocker):
with Mocker() as m:
expires = f"Expires: {(date.today() + timedelta(days=10)).isoformat()}T18:37:07z\n"
byte_content_with_bom = b'\xef\xbb\xbf\xef\xbb\xbfContact: mailto:me@example.com\n' \
b'Expires: 2023-08-11T18:37:07z\n'
+ bytes(expires, "utf-8")
m.get(
"https://example.com/.well-known/security.txt",
headers={"content-type": "text/plain"},
content=byte_content_with_bom,
)
s = SecurityTXT("example.com")
assert(s.is_valid())
assert(not s.is_valid())
if not any(d["code"] == "bom_in_file" for d in s.errors):
pytest.fail("bom_in_file error code should be given")

0 comments on commit 27d8524

Please sign in to comment.