From f4b93c8a5a5efc30421849eb065514bf9b9bffe1 Mon Sep 17 00:00:00 2001
From: Jacob Beck <jake@fishtownanalytics.com>
Date: Wed, 17 Jun 2020 12:03:01 -0600
Subject: [PATCH 1/4] handle long paths in windows, add a test

---
 CHANGELOG.md                                  |  1 +
 core/dbt/clients/system.py                    | 47 +++++++++++++-
 .../test_docs_generate.py                     | 64 +++++++++++++++++++
 test/integration/base.py                      |  5 +-
 4 files changed, 115 insertions(+), 2 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index b6e7ddcaec9..ba8aeff50b2 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -5,6 +5,7 @@
 - `dbt deps` now respects the `--project-dir` flag, so using `dbt deps --project-dir=/some/path` and then `dbt run --project-dir=/some/path` will properly find dependencies ([#2519](https://github.com/fishtown-analytics/dbt/issues/2519), [#2534](https://github.com/fishtown-analytics/dbt/pull/2534))
 - `packages.yml` revision/version fields can be float-like again (`revision: '1.0'` is valid). ([#2518](https://github.com/fishtown-analytics/dbt/issues/2518), [#2535](https://github.com/fishtown-analytics/dbt/pull/2535))
 - Parallel RPC requests no longer step on each others' arguments ([[#2484](https://github.com/fishtown-analytics/dbt/issues/2484), [#2554](https://github.com/fishtown-analytics/dbt/pull/2554)])
+- On windows (depending upon OS support), dbt no longer fails with errors when writing artifacts ([#2558](https://github.com/fishtown-analytics/dbt/issues/2558), [#2566](https://github.com/fishtown-analytics/dbt/pull/2566))
 
 ## dbt 0.17.0 (June 08, 2020)
 
diff --git a/core/dbt/clients/system.py b/core/dbt/clients/system.py
index b167d192e4e..eebe6fe8945 100644
--- a/core/dbt/clients/system.py
+++ b/core/dbt/clients/system.py
@@ -66,6 +66,7 @@ def find_matching(
 
 
 def load_file_contents(path: str, strip: bool = True) -> str:
+    path = convert_path(path)
     with open(path, 'rb') as handle:
         to_return = handle.read().decode('utf-8')
 
@@ -81,6 +82,7 @@ def make_directory(path: str) -> None:
     exist. This function handles the case where two threads try to create
     a directory at once.
     """
+    path = convert_path(path)
     if not os.path.exists(path):
         # concurrent writes that try to create the same dir can fail
         try:
@@ -99,6 +101,7 @@ def make_file(path: str, contents: str = '', overwrite: bool = False) -> bool:
     exists. The file is saved with contents `contents`
     """
     if overwrite or not os.path.exists(path):
+        path = convert_path(path)
         with open(path, 'w') as fh:
             fh.write(contents)
         return True
@@ -121,6 +124,7 @@ def supports_symlinks() -> bool:
 
 
 def write_file(path: str, contents: str = '') -> bool:
+    path = convert_path(path)
     make_directory(os.path.dirname(path))
     with open(path, 'w', encoding='utf-8') as f:
         f.write(str(contents))
@@ -163,7 +167,7 @@ def rmdir(path: str) -> None:
     different permissions on Windows. Otherwise, removing directories (eg.
     cloned via git) can cause rmtree to throw a PermissionError exception
     """
-    logger.debug("DEBUG** Window rmdir sys.platform: {}".format(sys.platform))
+    path = convert_path(path)
     if sys.platform == 'win32':
         onerror = _windows_rmdir_readonly
     else:
@@ -172,15 +176,49 @@ def rmdir(path: str) -> None:
     shutil.rmtree(path, onerror=onerror)
 
 
+def convert_path(path: str) -> str:
+    """Convert a path that dbt has, which might be >260 characters long, to one
+    that will be writable/readable on Windows.
+
+    On other platforms, this is a no-op.
+    """
+    if sys.platform != 'win32':
+        return path
+    prefix = '\\\\?\\'
+    # Nothing to do
+    if path.startswith(prefix):
+        return path
+    path = os.path.normpath(path)
+    if path.startswith('\\'):
+        # if a path starts with '\\', splitdrive() on it will return '' for the
+        # drive, but the prefix requires a drive letter. So let's add the drive
+        # letter back in.
+        curdrive = os.path.splitdrive(os.getcwd())[0]
+        path = curdrive + path
+
+    # now our path is either an absolute UNC path or relative to the current
+    # directory. If it's relative, we need to make it absolute or the prefix
+    # won't work.
+    if not os.path.splitdrive(path)[0]:
+        path = os.path.join(os.getcwd(), path)
+
+    if not path.startswith(prefix):
+        path = prefix + path
+    return path
+
+
 def remove_file(path: str) -> None:
+    path = convert_path(path)
     os.remove(path)
 
 
 def path_exists(path: str) -> bool:
+    path = convert_path(path)
     return os.path.lexists(path)
 
 
 def path_is_symlink(path: str) -> bool:
+    path = convert_path(path)
     return os.path.islink(path)
 
 
@@ -326,6 +364,7 @@ def run_cmd(
 
 
 def download(url: str, path: str, timeout: Union[float, tuple] = None) -> None:
+    path = convert_path(path)
     connection_timeout = timeout or float(os.getenv('DBT_HTTP_TIMEOUT', 10))
     response = requests.get(url, timeout=connection_timeout)
     with open(path, 'wb') as handle:
@@ -334,6 +373,8 @@ def download(url: str, path: str, timeout: Union[float, tuple] = None) -> None:
 
 
 def rename(from_path: str, to_path: str, force: bool = False) -> None:
+    from_path = convert_path(from_path)
+    to_path = convert_path(to_path)
     is_symlink = path_is_symlink(to_path)
 
     if os.path.exists(to_path) and force:
@@ -348,6 +389,7 @@ def rename(from_path: str, to_path: str, force: bool = False) -> None:
 def untar_package(
     tar_path: str, dest_dir: str, rename_to: Optional[str] = None
 ) -> None:
+    tar_path = convert_path(tar_path)
     tar_dir_name = None
     with tarfile.open(tar_path, 'r') as tarball:
         tarball.extractall(dest_dir)
@@ -384,6 +426,8 @@ def move(src, dst):
     This is almost identical to the real shutil.move, except it uses our rmtree
     and skips handling non-windows OSes since the existing one works ok there.
     """
+    src = convert_path(src)
+    dst = convert_path(dst)
     if os.name != 'nt':
         return shutil.move(src, dst)
 
@@ -418,4 +462,5 @@ def rmtree(path):
     """Recursively remove path. On permissions errors on windows, try to remove
     the read-only flag and try again.
     """
+    path = convert_path(path)
     return shutil.rmtree(path, onerror=chmod_and_retry)
diff --git a/test/integration/029_docs_generate_tests/test_docs_generate.py b/test/integration/029_docs_generate_tests/test_docs_generate.py
index 213ace98b67..de0ba561eaa 100644
--- a/test/integration/029_docs_generate_tests/test_docs_generate.py
+++ b/test/integration/029_docs_generate_tests/test_docs_generate.py
@@ -2,15 +2,19 @@
 import json
 import os
 import random
+import shutil
+import tempfile
 import time
 from datetime import datetime
 from unittest.mock import ANY, patch
 
+from pytest import mark
 from test.integration.base import DBTIntegrationTest, use_profile, AnyFloat, \
     AnyString, AnyStringWith, normalize, Normalized
 
 from dbt.exceptions import CompilationException
 
+
 def _read_file(path):
     with open(path, 'r') as fp:
         return fp.read().replace('\r', '').replace('\\r', '')
@@ -3180,3 +3184,63 @@ def test_postgres_override_used(self):
             self.run_dbt(['docs', 'generate'])
 
         self.assertIn('rejected: no catalogs for you', str(exc.exception))
+
+
+@mark.skipif(os.name != 'nt', reason='This is only relevant on windows')
+class TestDocsGenerateLongWindowsPaths(DBTIntegrationTest):
+    def _generate_test_root_dir(self):
+        assert os.name == 'nt'
+        magic_prefix = '\\\\?\\'
+
+        # tempfile.mkdtemp doesn't use `\\?\` by default so we have to
+        # get a tiny bit creative.
+        temp_dir = tempfile.gettempdir()
+        if not temp_dir.startswith(magic_prefix):
+            temp_dir = magic_prefix + temp_dir
+        outer = tempfile.mkdtemp(prefix='dbt-int-test-', dir=temp_dir)
+        # then inside _that_ directory make a new one that gets us to just
+        # barely 260 total. I picked 250 to account for the '\' and anything
+        # else. The key is that len(inner) + len('target\\compiled\\...') will
+        # be >260 chars
+        new_length = 250 - len(outer)
+        inner = os.path.join(outer, 'a'*new_length)
+        os.mkdir(inner)
+        return normalize(inner)
+
+    def _symlink_test_folders(self):
+        # dbt's normal symlink behavior breaks this test, so special-case it
+        for entry in os.listdir(self.test_original_source_path):
+            src = os.path.join(self.test_original_source_path, entry)
+            tst = os.path.join(self.test_root_dir, entry)
+            if entry == 'trivial_models':
+                shutil.copytree(src, tst)
+            elif entry == 'local_dependency':
+                continue
+            elif os.path.isdir(entry) or entry.endswith('.sql'):
+                os.symlink(src, tst)
+
+    @property
+    def schema(self):
+        return 'docs_generate_029'
+
+    @staticmethod
+    def dir(path):
+        return normalize(path)
+
+    @property
+    def models(self):
+        return self.dir("trivial_models")
+
+    def run_and_generate(self):
+        self.assertEqual(len(self.run_dbt(['run'])), 1)
+        os.remove(normalize('target/manifest.json'))
+        os.remove(normalize('target/run_results.json'))
+        self.run_dbt(['docs', 'generate'])
+
+    @use_profile('postgres')
+    def test_postgres_long_paths(self):
+        self.run_and_generate()
+        # this doesn't use abspath, so all should be well here
+        manifest = _read_json('./target/manifest.json')
+        self.assertIn('nodes', manifest)
+        assert os.path.exists('./target/run/test/trivial_models/model.sql')
diff --git a/test/integration/base.py b/test/integration/base.py
index 7122d1a6c86..0ca24bf8b94 100644
--- a/test/integration/base.py
+++ b/test/integration/base.py
@@ -348,6 +348,9 @@ def test_root_realpath(self):
         else:
             return self.test_root_dir
 
+    def _generate_test_root_dir(self):
+        return normalize(tempfile.mkdtemp(prefix='dbt-int-test-'))
+
     def setUp(self):
         self.dbt_core_install_root = os.path.dirname(dbt.__file__)
         log_manager.reset_handlers()
@@ -357,7 +360,7 @@ def setUp(self):
         self._logs_dir = os.path.join(self.initial_dir, 'logs', self.prefix)
         _really_makedirs(self._logs_dir)
         self.test_original_source_path = _pytest_get_test_root()
-        self.test_root_dir = normalize(tempfile.mkdtemp(prefix='dbt-int-test-'))
+        self.test_root_dir = self._generate_test_root_dir()
 
         os.chdir(self.test_root_dir)
         try:

From ff272c797a5314116caedc30ad97cb58069c0940 Mon Sep 17 00:00:00 2001
From: Jacob Beck <jake@fishtownanalytics.com>
Date: Thu, 18 Jun 2020 15:17:27 -0600
Subject: [PATCH 2/4] make sure dbt clean still works...

---
 test/integration/029_docs_generate_tests/test_docs_generate.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/test/integration/029_docs_generate_tests/test_docs_generate.py b/test/integration/029_docs_generate_tests/test_docs_generate.py
index de0ba561eaa..18066105620 100644
--- a/test/integration/029_docs_generate_tests/test_docs_generate.py
+++ b/test/integration/029_docs_generate_tests/test_docs_generate.py
@@ -3244,3 +3244,5 @@ def test_postgres_long_paths(self):
         manifest = _read_json('./target/manifest.json')
         self.assertIn('nodes', manifest)
         assert os.path.exists('./target/run/test/trivial_models/model.sql')
+        self.run_dbt(['clean'])
+        assert not os.path.exists('./target/run')

From 8e63da2f04e59cc8203099fd11a4cd62141de5b0 Mon Sep 17 00:00:00 2001
From: Jacob Beck <jake@fishtownanalytics.com>
Date: Thu, 18 Jun 2020 17:11:15 -0600
Subject: [PATCH 3/4] Be pickier about when to mangle paths, handle the case
 where we still hit path length limits

---
 core/dbt/clients/system.py | 95 +++++++++++++++++++++++++++++++-------
 1 file changed, 78 insertions(+), 17 deletions(-)

diff --git a/core/dbt/clients/system.py b/core/dbt/clients/system.py
index eebe6fe8945..2193e6e7bc9 100644
--- a/core/dbt/clients/system.py
+++ b/core/dbt/clients/system.py
@@ -19,6 +19,12 @@
 
 from dbt.logger import GLOBAL_LOGGER as logger
 
+if sys.platform == 'win32':
+    from ctypes import WinDLL, c_bool
+else:
+    WinDLL = None
+    c_bool = None
+
 
 def find_matching(
     root_path: str,
@@ -125,10 +131,24 @@ def supports_symlinks() -> bool:
 
 def write_file(path: str, contents: str = '') -> bool:
     path = convert_path(path)
-    make_directory(os.path.dirname(path))
-    with open(path, 'w', encoding='utf-8') as f:
-        f.write(str(contents))
-
+    try:
+        make_directory(os.path.dirname(path))
+        with open(path, 'w', encoding='utf-8') as f:
+            f.write(str(contents))
+    except FileNotFoundError as exc:
+        if (
+            os.name == 'nt' and
+            getattr(exc, 'winerror', 0) == 3 and
+            len(path) >= 260
+        ):
+            # all our hard work and the path was still too long. Log and
+            # continue.
+            logger.debug(
+                f'Could not write to path {path}: Path was too long '
+                f'({len(path)} characters)'
+            )
+        else:
+            raise
     return True
 
 
@@ -176,32 +196,73 @@ def rmdir(path: str) -> None:
     shutil.rmtree(path, onerror=onerror)
 
 
+def _win_prepare_path(path: str) -> str:
+    """Given a windows path, prepare it for use by making sure it is absolute
+    and normalized.
+    """
+    path = os.path.normpath(path)
+
+    # if a path starts with '\', splitdrive() on it will return '' for the
+    # drive, but the prefix requires a drive letter. So let's add the drive
+    # letter back in.
+    # Unless it starts with '\\'. In that case, the path is a UNC mount point
+    # and splitdrive will be fine.
+    if not path.startswith('\\\\') and path.startswith('\\'):
+        curdrive = os.path.splitdrive(os.getcwd())[0]
+        path = curdrive + path
+
+    # now our path is either an absolute UNC path or relative to the current
+    # directory. If it's relative, we need to make it absolute or the prefix
+    # won't work. `ntpath.abspath` allegedly doesn't always play nice with long
+    # paths, so do this instead.
+    if not os.path.splitdrive(path)[0]:
+        path = os.path.join(os.getcwd(), path)
+
+    return path
+
+
+def _supports_long_paths() -> bool:
+    if sys.platform != 'win32':
+        return True
+    # Eryk Sun says to use `WinDLL('ntdll')` instead of `windll.ntdll` because
+    # of pointer caching in a comment here:
+    # https://stackoverflow.com/a/35097999/11262881
+    # I don't know exaclty what he means, but I am inclined to believe him as
+    # he's pretty active on Python windows bugs!
+    try:
+        dll = WinDLL('ntdll')
+    except OSError:  # I don't think this happens? you need ntdll to run python
+        return False
+    # not all windows versions have it at all
+    if not hasattr(dll, 'RtlAreLongPathsEnabled'):
+        return False
+    # tell windows we want to get back a single unsigned byte (a bool).
+    dll.RtlAreLongPathsEnabled.restype = c_bool
+    return dll.RtlAreLongPathsEnabled()
+
+
 def convert_path(path: str) -> str:
     """Convert a path that dbt has, which might be >260 characters long, to one
     that will be writable/readable on Windows.
 
     On other platforms, this is a no-op.
     """
-    if sys.platform != 'win32':
+    # some parts of python seem to append '\*.*' to strings, better safe than
+    # sorry.
+    if len(path) < 250:
+        return path
+    if _supports_long_paths():
         return path
+
     prefix = '\\\\?\\'
     # Nothing to do
     if path.startswith(prefix):
         return path
-    path = os.path.normpath(path)
-    if path.startswith('\\'):
-        # if a path starts with '\\', splitdrive() on it will return '' for the
-        # drive, but the prefix requires a drive letter. So let's add the drive
-        # letter back in.
-        curdrive = os.path.splitdrive(os.getcwd())[0]
-        path = curdrive + path
 
-    # now our path is either an absolute UNC path or relative to the current
-    # directory. If it's relative, we need to make it absolute or the prefix
-    # won't work.
-    if not os.path.splitdrive(path)[0]:
-        path = os.path.join(os.getcwd(), path)
+    path = _win_prepare_path(path)
 
+    # add the prefix. The check is just in case os.getcwd() does something
+    # unexpected - I believe this if-state should always be True though!
     if not path.startswith(prefix):
         path = prefix + path
     return path

From db50880399b743792e3a60a2085243b55de68372 Mon Sep 17 00:00:00 2001
From: Jacob Beck <jake@fishtownanalytics.com>
Date: Fri, 19 Jun 2020 07:26:40 -0600
Subject: [PATCH 4/4] fix this test for spawn() vs fork()

---
 test/integration/048_rpc_test/test_rpc.py | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/test/integration/048_rpc_test/test_rpc.py b/test/integration/048_rpc_test/test_rpc.py
index 70320681750..7c29dc006b0 100644
--- a/test/integration/048_rpc_test/test_rpc.py
+++ b/test/integration/048_rpc_test/test_rpc.py
@@ -768,11 +768,9 @@ def test_timeout_postgres(self):
         self.assertIn('message', error_data)
         self.assertEqual(error_data['message'], 'RPC timed out after 1.0s')
         self.assertIn('logs', error_data)
-        if sys.platform == 'darwin':
-            # because fork() without exec() is broken on macos, we use 'spawn'
-            # so on macos we don't get logs back because we didn't fork()
-            return
-        self.assertTrue(len(error_data['logs']) > 0)
+        # because fork() without exec() is broken, we use 'spawn' so we don't
+        # get logs back because we didn't fork()
+        return
 
 
 @mark.flaky(rerun_filter=addr_in_use)