Skip to content

Commit

Permalink
implement recommended UTF-8 encoding for reading and writing SPDX files
Browse files Browse the repository at this point in the history
Signed-off-by: Christian Decker <christian.decker@homag.com>
  • Loading branch information
Christian Decker committed Sep 14, 2023
1 parent 3d3100a commit a1584b7
Show file tree
Hide file tree
Showing 11 changed files with 20 additions and 22 deletions.
3 changes: 1 addition & 2 deletions src/spdx_tools/spdx/parser/json/json_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,15 +2,14 @@
#
# SPDX-License-Identifier: Apache-2.0
import json
from typing import Optional

from beartype.typing import Dict

from spdx_tools.spdx.model import Document
from spdx_tools.spdx.parser.jsonlikedict.json_like_dict_parser import JsonLikeDictParser


def parse_from_file(file_name: str, encoding: Optional[str] = None) -> Document:
def parse_from_file(file_name: str, encoding: str = "utf-8") -> Document:
with open(file_name, encoding=encoding) as file:
input_doc_as_dict: Dict = json.load(file)

Expand Down
9 changes: 7 additions & 2 deletions src/spdx_tools/spdx/parser/parse_anything.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from typing import Optional
import logging

from spdx_tools.spdx.formats import FileFormat, file_name_to_format
from spdx_tools.spdx.parser.json import json_parser
Expand All @@ -19,7 +19,12 @@
from spdx_tools.spdx.parser.yaml import yaml_parser


def parse_file(file_name: str, encoding: Optional[str] = None):
def parse_file(file_name: str, encoding: str = "utf-8"):
if encoding != "utf-8":
logging.warning(
"It's recommended to use the UTF-8 encoding for any SPDX file. Consider changing the encoding of the file."
)

input_format = file_name_to_format(file_name)
if input_format == FileFormat.RDF_XML:
return rdf_parser.parse_from_file(file_name, encoding)
Expand Down
4 changes: 1 addition & 3 deletions src/spdx_tools/spdx/parser/rdf/rdf_parser.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,6 @@
# SPDX-FileCopyrightText: 2023 spdx contributors
#
# SPDX-License-Identifier: Apache-2.0
from typing import Optional

from beartype.typing import Any, Dict
from rdflib import RDF, Graph

Expand All @@ -24,7 +22,7 @@
from spdx_tools.spdx.rdfschema.namespace import SPDX_NAMESPACE


def parse_from_file(file_name: str, encoding: Optional[str] = None) -> Document:
def parse_from_file(file_name: str, encoding: str = "utf-8") -> Document:
graph = Graph()
with open(file_name, encoding=encoding) as file:
graph.parse(file, format="xml")
Expand Down
4 changes: 1 addition & 3 deletions src/spdx_tools/spdx/parser/tagvalue/tagvalue_parser.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,11 @@
# SPDX-FileCopyrightText: 2023 spdx contributors
#
# SPDX-License-Identifier: Apache-2.0
from typing import Optional

from spdx_tools.spdx.model import Document
from spdx_tools.spdx.parser.tagvalue.parser import Parser


def parse_from_file(file_name: str, encoding: Optional[str] = None) -> Document:
def parse_from_file(file_name: str, encoding: str = "utf-8") -> Document:
parser = Parser()
with open(file_name, encoding=encoding) as file:
data = file.read()
Expand Down
4 changes: 1 addition & 3 deletions src/spdx_tools/spdx/parser/xml/xml_parser.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,6 @@
# SPDX-FileCopyrightText: 2023 spdx contributors
#
# SPDX-License-Identifier: Apache-2.0
from typing import Optional

import xmltodict
from beartype.typing import Any, Dict

Expand Down Expand Up @@ -38,7 +36,7 @@
]


def parse_from_file(file_name: str, encoding: Optional[str] = None) -> Document:
def parse_from_file(file_name: str, encoding: str = "utf-8") -> Document:
with open(file_name, encoding=encoding) as file:
parsed_xml: Dict = xmltodict.parse(file.read(), encoding="utf-8")

Expand Down
4 changes: 1 addition & 3 deletions src/spdx_tools/spdx/parser/yaml/yaml_parser.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,14 @@
# SPDX-FileCopyrightText: 2023 spdx contributors
#
# SPDX-License-Identifier: Apache-2.0
from typing import Optional

import yaml
from beartype.typing import Dict

from spdx_tools.spdx.model import Document
from spdx_tools.spdx.parser.jsonlikedict.json_like_dict_parser import JsonLikeDictParser


def parse_from_file(file_name: str, encoding: Optional[str] = None) -> Document:
def parse_from_file(file_name: str, encoding: str = "utf-8") -> Document:
with open(file_name, encoding=encoding) as file:
input_doc_as_dict: Dict = yaml.safe_load(file)

Expand Down
2 changes: 1 addition & 1 deletion src/spdx_tools/spdx/writer/json/json_writer.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,5 +34,5 @@ def write_document_to_file(
converter: DocumentConverter = None,
drop_duplicates: bool = True,
):
with open(file_name, "w") as out:
with open(file_name, "w", encoding="utf-8") as out:
write_document_to_stream(document, out, validate, converter, drop_duplicates)
2 changes: 1 addition & 1 deletion src/spdx_tools/spdx/writer/tagvalue/tagvalue_writer.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ def write_document_to_stream(document: Document, stream: TextIO, validate: bool


def write_document_to_file(document: Document, file_name: str, validate: bool = True, drop_duplicates: bool = True):
with open(file_name, "w") as out:
with open(file_name, "w", encoding="utf-8") as out:
write_document_to_stream(document, out, validate, drop_duplicates)


Expand Down
2 changes: 1 addition & 1 deletion src/spdx_tools/spdx/writer/xml/xml_writer.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,5 +33,5 @@ def write_document_to_file(
converter: DocumentConverter = None,
drop_duplicates: bool = True,
):
with open(file_name, "w") as out:
with open(file_name, "w", encoding="utf-8") as out:
write_document_to_stream(document, out, validate, converter, drop_duplicates)
2 changes: 1 addition & 1 deletion src/spdx_tools/spdx/writer/yaml/yaml_writer.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,5 +33,5 @@ def write_document_to_file(
converter: DocumentConverter = None,
drop_duplicates: bool = True,
):
with open(file_name, "w") as out:
with open(file_name, "w", encoding="utf-8") as out:
write_document_to_stream(document, out, validate, converter, drop_duplicates)
6 changes: 4 additions & 2 deletions tests/spdx/writer/json/test_json_writer.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,10 +21,12 @@ def test_write_json(temporary_file_path: str):
document = document_fixture()
write_document_to_file(document, temporary_file_path, validate=True)

with open(temporary_file_path) as written_file:
with open(temporary_file_path, encoding="utf-8") as written_file:
written_json = json.load(written_file)

with open(os.path.join(os.path.dirname(__file__), "expected_results", "expected.json")) as expected_file:
with open(
os.path.join(os.path.dirname(__file__), "expected_results", "expected.json"), encoding="utf-8"
) as expected_file:
expected_json = json.load(expected_file)

assert written_json == expected_json
Expand Down

0 comments on commit a1584b7

Please sign in to comment.