huggingface · LysandreJik · Nov 12, 2020 · Nov 12, 2020
diff --git a/utils/check_copies.py b/utils/check_copies.py
@@ -42,7 +42,7 @@ def find_code_in_transformers(object_name):
             f"`object_name` should begin with the name of a module of transformers but got {object_name}."
         )
 
-    with open(os.path.join(TRANSFORMERS_PATH, f"{module}.py"), "r", encoding="utf-8") as f:
+    with open(os.path.join(TRANSFORMERS_PATH, f"{module}.py"), "r", encoding="utf-8", newline="\n") as f:
         lines = f.readlines()
 
     # Now let's find the class / func in the code!
@@ -82,10 +82,10 @@ def blackify(code):
         code = f"class Bla:\n{code}"
     with tempfile.TemporaryDirectory() as d:
         fname = os.path.join(d, "tmp.py")
-        with open(fname, "w", encoding="utf-8") as f:
+        with open(fname, "w", encoding="utf-8", newline="\n") as f:
             f.write(code)
         os.system(f"black -q --line-length 119 --target-version py35 {fname}")
-        with open(fname, "r", encoding="utf-8") as f:
+        with open(fname, "r", encoding="utf-8", newline="\n") as f:
             result = f.read()
             return result[len("class Bla:\n") :] if has_indent else result
 
@@ -96,7 +96,7 @@ def is_copy_consistent(filename, overwrite=False):
 
     Return the differences or overwrites the content depending on `overwrite`.
     """
-    with open(filename, "r", encoding="utf-8") as f:
+    with open(filename, "r", encoding="utf-8", newline="\n") as f:
         lines = f.readlines()
     diffs = []
     line_index = 0
@@ -150,7 +150,7 @@ def is_copy_consistent(filename, overwrite=False):
     if overwrite and len(diffs) > 0:
         # Warn the user a file has been modified.
         print(f"Detected changes, rewriting {filename}.")
-        with open(filename, "w", encoding="utf-8") as f:
+        with open(filename, "w", encoding="utf-8", newline="\n") as f:
             f.writelines(lines)
     return diffs
 
@@ -176,7 +176,7 @@ def get_model_list():
     # If the introduction or the conclusion of the list change, the prompts may need to be updated.
     _start_prompt = "🤗 Transformers currently provides the following architectures"
     _end_prompt = "1. Want to contribute a new model?"
-    with open(os.path.join(REPO_PATH, "README.md"), "r", encoding="utf-8") as f:
+    with open(os.path.join(REPO_PATH, "README.md"), "r", encoding="utf-8", newline="\n") as f:
         lines = f.readlines()
     # Find the start of the list.
     start_index = 0
@@ -254,7 +254,7 @@ def check_model_list_copy(overwrite=False, max_per_line=119):
     """ Check the model lists in the README and index.rst are consistent and maybe `overwrite`. """
     _start_prompt = "    This list is updated automatically from the README"
     _end_prompt = ".. toctree::"
-    with open(os.path.join(PATH_TO_DOCS, "index.rst"), "r", encoding="utf-8") as f:
+    with open(os.path.join(PATH_TO_DOCS, "index.rst"), "r", encoding="utf-8", newline="\n") as f:
         lines = f.readlines()
     # Find the start of the list.
     start_index = 0
@@ -279,7 +279,7 @@ def check_model_list_copy(overwrite=False, max_per_line=119):
 
     if converted_list != rst_list:
         if overwrite:
-            with open(os.path.join(PATH_TO_DOCS, "index.rst"), "w", encoding="utf-8") as f:
+            with open(os.path.join(PATH_TO_DOCS, "index.rst"), "w", encoding="utf-8", newline="\n") as f:
                 f.writelines(lines[:start_index] + [converted_list] + lines[end_index:])
         else:
             raise ValueError(

diff --git a/utils/check_dummies.py b/utils/check_dummies.py
@@ -166,7 +166,7 @@ def {0}(*args, **kwargs):
 
 def read_init():
     """ Read the init and extracts PyTorch, TensorFlow, SentencePiece and Tokenizers objects. """
-    with open(os.path.join(PATH_TO_TRANSFORMERS, "__init__.py"), "r", encoding="utf-8") as f:
+    with open(os.path.join(PATH_TO_TRANSFORMERS, "__init__.py"), "r", encoding="utf-8", newline="\n") as f:
         lines = f.readlines()
 
     line_index = 0
@@ -321,21 +321,21 @@ def check_dummies(overwrite=False):
     tf_file = os.path.join(path, "dummy_tf_objects.py")
     flax_file = os.path.join(path, "dummy_flax_objects.py")
 
-    with open(sentencepiece_file, "r", encoding="utf-8") as f:
+    with open(sentencepiece_file, "r", encoding="utf-8", newline="\n") as f:
         actual_sentencepiece_dummies = f.read()
-    with open(tokenizers_file, "r", encoding="utf-8") as f:
+    with open(tokenizers_file, "r", encoding="utf-8", newline="\n") as f:
         actual_tokenizers_dummies = f.read()
-    with open(pt_file, "r", encoding="utf-8") as f:
+    with open(pt_file, "r", encoding="utf-8", newline="\n") as f:
         actual_pt_dummies = f.read()
-    with open(tf_file, "r", encoding="utf-8") as f:
+    with open(tf_file, "r", encoding="utf-8", newline="\n") as f:
         actual_tf_dummies = f.read()
-    with open(flax_file, "r", encoding="utf-8") as f:
+    with open(flax_file, "r", encoding="utf-8", newline="\n") as f:
         actual_flax_dummies = f.read()
 
     if sentencepiece_dummies != actual_sentencepiece_dummies:
         if overwrite:
             print("Updating transformers.utils.dummy_sentencepiece_objects.py as the main __init__ has new objects.")
-            with open(sentencepiece_file, "w", encoding="utf-8") as f:
+            with open(sentencepiece_file, "w", encoding="utf-8", newline="\n") as f:
                 f.write(sentencepiece_dummies)
         else:
             raise ValueError(
@@ -346,7 +346,7 @@ def check_dummies(overwrite=False):
     if tokenizers_dummies != actual_tokenizers_dummies:
         if overwrite:
             print("Updating transformers.utils.dummy_tokenizers_objects.py as the main __init__ has new objects.")
-            with open(tokenizers_file, "w", encoding="utf-8") as f:
+            with open(tokenizers_file, "w", encoding="utf-8", newline="\n") as f:
                 f.write(tokenizers_dummies)
         else:
             raise ValueError(
@@ -357,7 +357,7 @@ def check_dummies(overwrite=False):
     if pt_dummies != actual_pt_dummies:
         if overwrite:
             print("Updating transformers.utils.dummy_pt_objects.py as the main __init__ has new objects.")
-            with open(pt_file, "w", encoding="utf-8") as f:
+            with open(pt_file, "w", encoding="utf-8", newline="\n") as f:
                 f.write(pt_dummies)
         else:
             raise ValueError(
@@ -368,7 +368,7 @@ def check_dummies(overwrite=False):
     if tf_dummies != actual_tf_dummies:
         if overwrite:
             print("Updating transformers.utils.dummy_tf_objects.py as the main __init__ has new objects.")
-            with open(tf_file, "w", encoding="utf-8") as f:
+            with open(tf_file, "w", encoding="utf-8", newline="\n") as f:
                 f.write(tf_dummies)
         else:
             raise ValueError(
@@ -379,7 +379,7 @@ def check_dummies(overwrite=False):
     if flax_dummies != actual_flax_dummies:
         if overwrite:
             print("Updating transformers.utils.dummy_flax_objects.py as the main __init__ has new objects.")
-            with open(flax_file, "w", encoding="utf-8") as f:
+            with open(flax_file, "w", encoding="utf-8", newline="\n") as f:
                 f.write(flax_dummies)
         else:
             raise ValueError(

diff --git a/utils/check_repo.py b/utils/check_repo.py
@@ -197,7 +197,7 @@ def get_model_doc_files():
 def find_tested_models(test_file):
     """ Parse the content of test_file to detect what's in all_model_classes"""
     # This is a bit hacky but I didn't find a way to import the test_file as a module and read inside the class
-    with open(os.path.join(PATH_TO_TESTS, test_file), "r", encoding="utf-8") as f:
+    with open(os.path.join(PATH_TO_TESTS, test_file), "r", encoding="utf-8", newline="\n") as f:
         content = f.read()
     all_models = re.findall(r"all_model_classes\s+=\s+\(\s*\(([^\)]*)\)", content)
     # Check with one less parenthesis
@@ -255,7 +255,7 @@ def check_all_models_are_tested():
 
 def find_documented_classes(doc_file):
     """ Parse the content of doc_file to detect which classes it documents"""
-    with open(os.path.join(PATH_TO_DOC, doc_file), "r", encoding="utf-8") as f:
+    with open(os.path.join(PATH_TO_DOC, doc_file), "r", encoding="utf-8", newline="\n") as f:
         content = f.read()
     return re.findall(r"autoclass:: transformers.(\S+)\s+", content)
 
@@ -360,7 +360,7 @@ def check_all_models_are_auto_configured():
 
 def check_decorator_order(filename):
     """ Check that in the test file `filename` the slow decorator is always last."""
-    with open(filename, "r", encoding="utf-8") as f:
+    with open(filename, "r", encoding="utf-8", newline="\n") as f:
         lines = f.readlines()
     decorator_before = None
     errors = []

diff --git a/utils/style_doc.py b/utils/style_doc.py
@@ -357,14 +357,14 @@ def init_in_block(self, text):
 
 def style_rst_file(doc_file, max_len=119, check_only=False):
     """ Style one rst file `doc_file` to `max_len`."""
-    with open(doc_file, "r", encoding="utf-8") as f:
+    with open(doc_file, "r", encoding="utf-8", newline="\n") as f:
         doc = f.read()
     clean_doc = rst_styler.style(doc, max_len=max_len)
 
     diff = clean_doc != doc
     if not check_only and diff:
         print(f"Overwriting content of {doc_file}.")
-        with open(doc_file, "w", encoding="utf-8") as f:
+        with open(doc_file, "w", encoding="utf-8", newline="\n") as f:
             f.write(clean_doc)
 
     return diff
@@ -404,7 +404,7 @@ def style_docstring(docstring, max_len=119):
 
 def style_file_docstrings(code_file, max_len=119, check_only=False):
     """Style all docstrings in `code_file` to `max_len`."""
-    with open(code_file, "r", encoding="utf-8") as f:
+    with open(code_file, "r", encoding="utf-8", newline="\n") as f:
         code = f.read()
     splits = code.split('"""')
     splits = [
@@ -416,7 +416,7 @@ def style_file_docstrings(code_file, max_len=119, check_only=False):
     diff = clean_code != code
     if not check_only and diff:
         print(f"Overwriting content of {code_file}.")
-        with open(code_file, "w", encoding="utf-8") as f:
+        with open(code_file, "w", encoding="utf-8", newline="\n") as f:
             f.write(clean_code)
 
     return diff