Skip to content

Commit

Permalink
Merge pull request #2566 from fishtown-analytics/fix/windows-long-paths
Browse files Browse the repository at this point in the history
Fix windows long paths
  • Loading branch information
beckjake authored Jun 19, 2020
2 parents 64e9d70 + db50880 commit 0cddd9c
Show file tree
Hide file tree
Showing 5 changed files with 185 additions and 11 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
- `dbt deps` now respects the `--project-dir` flag, so using `dbt deps --project-dir=/some/path` and then `dbt run --project-dir=/some/path` will properly find dependencies ([#2519](https://github.com/fishtown-analytics/dbt/issues/2519), [#2534](https://github.com/fishtown-analytics/dbt/pull/2534))
- `packages.yml` revision/version fields can be float-like again (`revision: '1.0'` is valid). ([#2518](https://github.com/fishtown-analytics/dbt/issues/2518), [#2535](https://github.com/fishtown-analytics/dbt/pull/2535))
- Parallel RPC requests no longer step on each others' arguments ([[#2484](https://github.com/fishtown-analytics/dbt/issues/2484), [#2554](https://github.com/fishtown-analytics/dbt/pull/2554)])
- On windows (depending upon OS support), dbt no longer fails with errors when writing artifacts ([#2558](https://github.com/fishtown-analytics/dbt/issues/2558), [#2566](https://github.com/fishtown-analytics/dbt/pull/2566))

## dbt 0.17.0 (June 08, 2020)

Expand Down
116 changes: 111 additions & 5 deletions core/dbt/clients/system.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,12 @@

from dbt.logger import GLOBAL_LOGGER as logger

if sys.platform == 'win32':
from ctypes import WinDLL, c_bool
else:
WinDLL = None
c_bool = None


def find_matching(
root_path: str,
Expand Down Expand Up @@ -66,6 +72,7 @@ def find_matching(


def load_file_contents(path: str, strip: bool = True) -> str:
path = convert_path(path)
with open(path, 'rb') as handle:
to_return = handle.read().decode('utf-8')

Expand All @@ -81,6 +88,7 @@ def make_directory(path: str) -> None:
exist. This function handles the case where two threads try to create
a directory at once.
"""
path = convert_path(path)
if not os.path.exists(path):
# concurrent writes that try to create the same dir can fail
try:
Expand All @@ -99,6 +107,7 @@ def make_file(path: str, contents: str = '', overwrite: bool = False) -> bool:
exists. The file is saved with contents `contents`
"""
if overwrite or not os.path.exists(path):
path = convert_path(path)
with open(path, 'w') as fh:
fh.write(contents)
return True
Expand All @@ -121,10 +130,25 @@ def supports_symlinks() -> bool:


def write_file(path: str, contents: str = '') -> bool:
make_directory(os.path.dirname(path))
with open(path, 'w', encoding='utf-8') as f:
f.write(str(contents))

path = convert_path(path)
try:
make_directory(os.path.dirname(path))
with open(path, 'w', encoding='utf-8') as f:
f.write(str(contents))
except FileNotFoundError as exc:
if (
os.name == 'nt' and
getattr(exc, 'winerror', 0) == 3 and
len(path) >= 260
):
# all our hard work and the path was still too long. Log and
# continue.
logger.debug(
f'Could not write to path {path}: Path was too long '
f'({len(path)} characters)'
)
else:
raise
return True


Expand Down Expand Up @@ -163,7 +187,7 @@ def rmdir(path: str) -> None:
different permissions on Windows. Otherwise, removing directories (eg.
cloned via git) can cause rmtree to throw a PermissionError exception
"""
logger.debug("DEBUG** Window rmdir sys.platform: {}".format(sys.platform))
path = convert_path(path)
if sys.platform == 'win32':
onerror = _windows_rmdir_readonly
else:
Expand All @@ -172,15 +196,90 @@ def rmdir(path: str) -> None:
shutil.rmtree(path, onerror=onerror)


def _win_prepare_path(path: str) -> str:
"""Given a windows path, prepare it for use by making sure it is absolute
and normalized.
"""
path = os.path.normpath(path)

# if a path starts with '\', splitdrive() on it will return '' for the
# drive, but the prefix requires a drive letter. So let's add the drive
# letter back in.
# Unless it starts with '\\'. In that case, the path is a UNC mount point
# and splitdrive will be fine.
if not path.startswith('\\\\') and path.startswith('\\'):
curdrive = os.path.splitdrive(os.getcwd())[0]
path = curdrive + path

# now our path is either an absolute UNC path or relative to the current
# directory. If it's relative, we need to make it absolute or the prefix
# won't work. `ntpath.abspath` allegedly doesn't always play nice with long
# paths, so do this instead.
if not os.path.splitdrive(path)[0]:
path = os.path.join(os.getcwd(), path)

return path


def _supports_long_paths() -> bool:
if sys.platform != 'win32':
return True
# Eryk Sun says to use `WinDLL('ntdll')` instead of `windll.ntdll` because
# of pointer caching in a comment here:
# https://stackoverflow.com/a/35097999/11262881
# I don't know exaclty what he means, but I am inclined to believe him as
# he's pretty active on Python windows bugs!
try:
dll = WinDLL('ntdll')
except OSError: # I don't think this happens? you need ntdll to run python
return False
# not all windows versions have it at all
if not hasattr(dll, 'RtlAreLongPathsEnabled'):
return False
# tell windows we want to get back a single unsigned byte (a bool).
dll.RtlAreLongPathsEnabled.restype = c_bool
return dll.RtlAreLongPathsEnabled()


def convert_path(path: str) -> str:
"""Convert a path that dbt has, which might be >260 characters long, to one
that will be writable/readable on Windows.
On other platforms, this is a no-op.
"""
# some parts of python seem to append '\*.*' to strings, better safe than
# sorry.
if len(path) < 250:
return path
if _supports_long_paths():
return path

prefix = '\\\\?\\'
# Nothing to do
if path.startswith(prefix):
return path

path = _win_prepare_path(path)

# add the prefix. The check is just in case os.getcwd() does something
# unexpected - I believe this if-state should always be True though!
if not path.startswith(prefix):
path = prefix + path
return path


def remove_file(path: str) -> None:
path = convert_path(path)
os.remove(path)


def path_exists(path: str) -> bool:
path = convert_path(path)
return os.path.lexists(path)


def path_is_symlink(path: str) -> bool:
path = convert_path(path)
return os.path.islink(path)


Expand Down Expand Up @@ -326,6 +425,7 @@ def run_cmd(


def download(url: str, path: str, timeout: Union[float, tuple] = None) -> None:
path = convert_path(path)
connection_timeout = timeout or float(os.getenv('DBT_HTTP_TIMEOUT', 10))
response = requests.get(url, timeout=connection_timeout)
with open(path, 'wb') as handle:
Expand All @@ -334,6 +434,8 @@ def download(url: str, path: str, timeout: Union[float, tuple] = None) -> None:


def rename(from_path: str, to_path: str, force: bool = False) -> None:
from_path = convert_path(from_path)
to_path = convert_path(to_path)
is_symlink = path_is_symlink(to_path)

if os.path.exists(to_path) and force:
Expand All @@ -348,6 +450,7 @@ def rename(from_path: str, to_path: str, force: bool = False) -> None:
def untar_package(
tar_path: str, dest_dir: str, rename_to: Optional[str] = None
) -> None:
tar_path = convert_path(tar_path)
tar_dir_name = None
with tarfile.open(tar_path, 'r') as tarball:
tarball.extractall(dest_dir)
Expand Down Expand Up @@ -384,6 +487,8 @@ def move(src, dst):
This is almost identical to the real shutil.move, except it uses our rmtree
and skips handling non-windows OSes since the existing one works ok there.
"""
src = convert_path(src)
dst = convert_path(dst)
if os.name != 'nt':
return shutil.move(src, dst)

Expand Down Expand Up @@ -418,4 +523,5 @@ def rmtree(path):
"""Recursively remove path. On permissions errors on windows, try to remove
the read-only flag and try again.
"""
path = convert_path(path)
return shutil.rmtree(path, onerror=chmod_and_retry)
66 changes: 66 additions & 0 deletions test/integration/029_docs_generate_tests/test_docs_generate.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,15 +2,19 @@
import json
import os
import random
import shutil
import tempfile
import time
from datetime import datetime
from unittest.mock import ANY, patch

from pytest import mark
from test.integration.base import DBTIntegrationTest, use_profile, AnyFloat, \
AnyString, AnyStringWith, normalize, Normalized

from dbt.exceptions import CompilationException


def _read_file(path):
with open(path, 'r') as fp:
return fp.read().replace('\r', '').replace('\\r', '')
Expand Down Expand Up @@ -3180,3 +3184,65 @@ def test_postgres_override_used(self):
self.run_dbt(['docs', 'generate'])

self.assertIn('rejected: no catalogs for you', str(exc.exception))


@mark.skipif(os.name != 'nt', reason='This is only relevant on windows')
class TestDocsGenerateLongWindowsPaths(DBTIntegrationTest):
def _generate_test_root_dir(self):
assert os.name == 'nt'
magic_prefix = '\\\\?\\'

# tempfile.mkdtemp doesn't use `\\?\` by default so we have to
# get a tiny bit creative.
temp_dir = tempfile.gettempdir()
if not temp_dir.startswith(magic_prefix):
temp_dir = magic_prefix + temp_dir
outer = tempfile.mkdtemp(prefix='dbt-int-test-', dir=temp_dir)
# then inside _that_ directory make a new one that gets us to just
# barely 260 total. I picked 250 to account for the '\' and anything
# else. The key is that len(inner) + len('target\\compiled\\...') will
# be >260 chars
new_length = 250 - len(outer)
inner = os.path.join(outer, 'a'*new_length)
os.mkdir(inner)
return normalize(inner)

def _symlink_test_folders(self):
# dbt's normal symlink behavior breaks this test, so special-case it
for entry in os.listdir(self.test_original_source_path):
src = os.path.join(self.test_original_source_path, entry)
tst = os.path.join(self.test_root_dir, entry)
if entry == 'trivial_models':
shutil.copytree(src, tst)
elif entry == 'local_dependency':
continue
elif os.path.isdir(entry) or entry.endswith('.sql'):
os.symlink(src, tst)

@property
def schema(self):
return 'docs_generate_029'

@staticmethod
def dir(path):
return normalize(path)

@property
def models(self):
return self.dir("trivial_models")

def run_and_generate(self):
self.assertEqual(len(self.run_dbt(['run'])), 1)
os.remove(normalize('target/manifest.json'))
os.remove(normalize('target/run_results.json'))
self.run_dbt(['docs', 'generate'])

@use_profile('postgres')
def test_postgres_long_paths(self):
self.run_and_generate()
# this doesn't use abspath, so all should be well here
manifest = _read_json('./target/manifest.json')
self.assertIn('nodes', manifest)
assert os.path.exists('./target/run/test/trivial_models/model.sql')
self.run_dbt(['clean'])
assert not os.path.exists('./target/run')
8 changes: 3 additions & 5 deletions test/integration/048_rpc_test/test_rpc.py
Original file line number Diff line number Diff line change
Expand Up @@ -768,11 +768,9 @@ def test_timeout_postgres(self):
self.assertIn('message', error_data)
self.assertEqual(error_data['message'], 'RPC timed out after 1.0s')
self.assertIn('logs', error_data)
if sys.platform == 'darwin':
# because fork() without exec() is broken on macos, we use 'spawn'
# so on macos we don't get logs back because we didn't fork()
return
self.assertTrue(len(error_data['logs']) > 0)
# because fork() without exec() is broken, we use 'spawn' so we don't
# get logs back because we didn't fork()
return


@mark.flaky(rerun_filter=addr_in_use)
Expand Down
5 changes: 4 additions & 1 deletion test/integration/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -348,6 +348,9 @@ def test_root_realpath(self):
else:
return self.test_root_dir

def _generate_test_root_dir(self):
return normalize(tempfile.mkdtemp(prefix='dbt-int-test-'))

def setUp(self):
self.dbt_core_install_root = os.path.dirname(dbt.__file__)
log_manager.reset_handlers()
Expand All @@ -357,7 +360,7 @@ def setUp(self):
self._logs_dir = os.path.join(self.initial_dir, 'logs', self.prefix)
_really_makedirs(self._logs_dir)
self.test_original_source_path = _pytest_get_test_root()
self.test_root_dir = normalize(tempfile.mkdtemp(prefix='dbt-int-test-'))
self.test_root_dir = self._generate_test_root_dir()

os.chdir(self.test_root_dir)
try:
Expand Down

0 comments on commit 0cddd9c

Please sign in to comment.