Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix windows long paths #2566

Merged
merged 4 commits into from
Jun 19, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
- `dbt deps` now respects the `--project-dir` flag, so using `dbt deps --project-dir=/some/path` and then `dbt run --project-dir=/some/path` will properly find dependencies ([#2519](https://github.com/fishtown-analytics/dbt/issues/2519), [#2534](https://github.com/fishtown-analytics/dbt/pull/2534))
- `packages.yml` revision/version fields can be float-like again (`revision: '1.0'` is valid). ([#2518](https://github.com/fishtown-analytics/dbt/issues/2518), [#2535](https://github.com/fishtown-analytics/dbt/pull/2535))
- Parallel RPC requests no longer step on each others' arguments ([[#2484](https://github.com/fishtown-analytics/dbt/issues/2484), [#2554](https://github.com/fishtown-analytics/dbt/pull/2554)])
- On windows (depending upon OS support), dbt no longer fails with errors when writing artifacts ([#2558](https://github.com/fishtown-analytics/dbt/issues/2558), [#2566](https://github.com/fishtown-analytics/dbt/pull/2566))

## dbt 0.17.0 (June 08, 2020)

Expand Down
116 changes: 111 additions & 5 deletions core/dbt/clients/system.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,12 @@

from dbt.logger import GLOBAL_LOGGER as logger

if sys.platform == 'win32':
from ctypes import WinDLL, c_bool
else:
WinDLL = None
c_bool = None


def find_matching(
root_path: str,
Expand Down Expand Up @@ -66,6 +72,7 @@ def find_matching(


def load_file_contents(path: str, strip: bool = True) -> str:
path = convert_path(path)
with open(path, 'rb') as handle:
to_return = handle.read().decode('utf-8')

Expand All @@ -81,6 +88,7 @@ def make_directory(path: str) -> None:
exist. This function handles the case where two threads try to create
a directory at once.
"""
path = convert_path(path)
if not os.path.exists(path):
# concurrent writes that try to create the same dir can fail
try:
Expand All @@ -99,6 +107,7 @@ def make_file(path: str, contents: str = '', overwrite: bool = False) -> bool:
exists. The file is saved with contents `contents`
"""
if overwrite or not os.path.exists(path):
path = convert_path(path)
with open(path, 'w') as fh:
fh.write(contents)
return True
Expand All @@ -121,10 +130,25 @@ def supports_symlinks() -> bool:


def write_file(path: str, contents: str = '') -> bool:
make_directory(os.path.dirname(path))
with open(path, 'w', encoding='utf-8') as f:
f.write(str(contents))

path = convert_path(path)
try:
make_directory(os.path.dirname(path))
with open(path, 'w', encoding='utf-8') as f:
f.write(str(contents))
except FileNotFoundError as exc:
if (
os.name == 'nt' and
getattr(exc, 'winerror', 0) == 3 and
len(path) >= 260
):
# all our hard work and the path was still too long. Log and
# continue.
logger.debug(
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

👍

f'Could not write to path {path}: Path was too long '
f'({len(path)} characters)'
)
else:
raise
return True


Expand Down Expand Up @@ -163,7 +187,7 @@ def rmdir(path: str) -> None:
different permissions on Windows. Otherwise, removing directories (eg.
cloned via git) can cause rmtree to throw a PermissionError exception
"""
logger.debug("DEBUG** Window rmdir sys.platform: {}".format(sys.platform))
path = convert_path(path)
if sys.platform == 'win32':
onerror = _windows_rmdir_readonly
else:
Expand All @@ -172,15 +196,90 @@ def rmdir(path: str) -> None:
shutil.rmtree(path, onerror=onerror)


def _win_prepare_path(path: str) -> str:
"""Given a windows path, prepare it for use by making sure it is absolute
and normalized.
"""
path = os.path.normpath(path)

# if a path starts with '\', splitdrive() on it will return '' for the
# drive, but the prefix requires a drive letter. So let's add the drive
# letter back in.
# Unless it starts with '\\'. In that case, the path is a UNC mount point
# and splitdrive will be fine.
if not path.startswith('\\\\') and path.startswith('\\'):
curdrive = os.path.splitdrive(os.getcwd())[0]
path = curdrive + path

# now our path is either an absolute UNC path or relative to the current
# directory. If it's relative, we need to make it absolute or the prefix
# won't work. `ntpath.abspath` allegedly doesn't always play nice with long
# paths, so do this instead.
if not os.path.splitdrive(path)[0]:
path = os.path.join(os.getcwd(), path)

return path


def _supports_long_paths() -> bool:
if sys.platform != 'win32':
return True
# Eryk Sun says to use `WinDLL('ntdll')` instead of `windll.ntdll` because
# of pointer caching in a comment here:
# https://stackoverflow.com/a/35097999/11262881
# I don't know exaclty what he means, but I am inclined to believe him as
# he's pretty active on Python windows bugs!
try:
dll = WinDLL('ntdll')
except OSError: # I don't think this happens? you need ntdll to run python
return False
# not all windows versions have it at all
if not hasattr(dll, 'RtlAreLongPathsEnabled'):
return False
# tell windows we want to get back a single unsigned byte (a bool).
dll.RtlAreLongPathsEnabled.restype = c_bool
return dll.RtlAreLongPathsEnabled()


def convert_path(path: str) -> str:
"""Convert a path that dbt has, which might be >260 characters long, to one
that will be writable/readable on Windows.

On other platforms, this is a no-op.
"""
# some parts of python seem to append '\*.*' to strings, better safe than
# sorry.
if len(path) < 250:
return path
if _supports_long_paths():
return path

prefix = '\\\\?\\'
# Nothing to do
if path.startswith(prefix):
return path

path = _win_prepare_path(path)

# add the prefix. The check is just in case os.getcwd() does something
# unexpected - I believe this if-state should always be True though!
if not path.startswith(prefix):
path = prefix + path
return path


def remove_file(path: str) -> None:
path = convert_path(path)
os.remove(path)


def path_exists(path: str) -> bool:
path = convert_path(path)
return os.path.lexists(path)


def path_is_symlink(path: str) -> bool:
path = convert_path(path)
return os.path.islink(path)


Expand Down Expand Up @@ -326,6 +425,7 @@ def run_cmd(


def download(url: str, path: str, timeout: Union[float, tuple] = None) -> None:
path = convert_path(path)
connection_timeout = timeout or float(os.getenv('DBT_HTTP_TIMEOUT', 10))
response = requests.get(url, timeout=connection_timeout)
with open(path, 'wb') as handle:
Expand All @@ -334,6 +434,8 @@ def download(url: str, path: str, timeout: Union[float, tuple] = None) -> None:


def rename(from_path: str, to_path: str, force: bool = False) -> None:
from_path = convert_path(from_path)
to_path = convert_path(to_path)
is_symlink = path_is_symlink(to_path)

if os.path.exists(to_path) and force:
Expand All @@ -348,6 +450,7 @@ def rename(from_path: str, to_path: str, force: bool = False) -> None:
def untar_package(
tar_path: str, dest_dir: str, rename_to: Optional[str] = None
) -> None:
tar_path = convert_path(tar_path)
tar_dir_name = None
with tarfile.open(tar_path, 'r') as tarball:
tarball.extractall(dest_dir)
Expand Down Expand Up @@ -384,6 +487,8 @@ def move(src, dst):
This is almost identical to the real shutil.move, except it uses our rmtree
and skips handling non-windows OSes since the existing one works ok there.
"""
src = convert_path(src)
dst = convert_path(dst)
if os.name != 'nt':
return shutil.move(src, dst)

Expand Down Expand Up @@ -418,4 +523,5 @@ def rmtree(path):
"""Recursively remove path. On permissions errors on windows, try to remove
the read-only flag and try again.
"""
path = convert_path(path)
return shutil.rmtree(path, onerror=chmod_and_retry)
66 changes: 66 additions & 0 deletions test/integration/029_docs_generate_tests/test_docs_generate.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,15 +2,19 @@
import json
import os
import random
import shutil
import tempfile
import time
from datetime import datetime
from unittest.mock import ANY, patch

from pytest import mark
from test.integration.base import DBTIntegrationTest, use_profile, AnyFloat, \
AnyString, AnyStringWith, normalize, Normalized

from dbt.exceptions import CompilationException


def _read_file(path):
with open(path, 'r') as fp:
return fp.read().replace('\r', '').replace('\\r', '')
Expand Down Expand Up @@ -3180,3 +3184,65 @@ def test_postgres_override_used(self):
self.run_dbt(['docs', 'generate'])

self.assertIn('rejected: no catalogs for you', str(exc.exception))


@mark.skipif(os.name != 'nt', reason='This is only relevant on windows')
class TestDocsGenerateLongWindowsPaths(DBTIntegrationTest):
def _generate_test_root_dir(self):
assert os.name == 'nt'
magic_prefix = '\\\\?\\'

# tempfile.mkdtemp doesn't use `\\?\` by default so we have to
# get a tiny bit creative.
temp_dir = tempfile.gettempdir()
if not temp_dir.startswith(magic_prefix):
temp_dir = magic_prefix + temp_dir
outer = tempfile.mkdtemp(prefix='dbt-int-test-', dir=temp_dir)
# then inside _that_ directory make a new one that gets us to just
# barely 260 total. I picked 250 to account for the '\' and anything
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

is there a reason we want to just barely eclipse 260 chars instead of going for, say, 300 chars? I just want to make sure I understand the logic behind this test.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah! We have to os.chdir into the directory we build here, and the \\?\ doesn't work for os.chdir because the underlying windows APIs for that function don't handle it.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

thanks, i hate it

# else. The key is that len(inner) + len('target\\compiled\\...') will
# be >260 chars
new_length = 250 - len(outer)
inner = os.path.join(outer, 'a'*new_length)
os.mkdir(inner)
return normalize(inner)

def _symlink_test_folders(self):
# dbt's normal symlink behavior breaks this test, so special-case it
for entry in os.listdir(self.test_original_source_path):
src = os.path.join(self.test_original_source_path, entry)
tst = os.path.join(self.test_root_dir, entry)
if entry == 'trivial_models':
shutil.copytree(src, tst)
elif entry == 'local_dependency':
continue
elif os.path.isdir(entry) or entry.endswith('.sql'):
os.symlink(src, tst)

@property
def schema(self):
return 'docs_generate_029'

@staticmethod
def dir(path):
return normalize(path)

@property
def models(self):
return self.dir("trivial_models")

def run_and_generate(self):
self.assertEqual(len(self.run_dbt(['run'])), 1)
os.remove(normalize('target/manifest.json'))
os.remove(normalize('target/run_results.json'))
self.run_dbt(['docs', 'generate'])

@use_profile('postgres')
def test_postgres_long_paths(self):
self.run_and_generate()
# this doesn't use abspath, so all should be well here
manifest = _read_json('./target/manifest.json')
self.assertIn('nodes', manifest)
assert os.path.exists('./target/run/test/trivial_models/model.sql')
self.run_dbt(['clean'])
assert not os.path.exists('./target/run')
8 changes: 3 additions & 5 deletions test/integration/048_rpc_test/test_rpc.py
Original file line number Diff line number Diff line change
Expand Up @@ -768,11 +768,9 @@ def test_timeout_postgres(self):
self.assertIn('message', error_data)
self.assertEqual(error_data['message'], 'RPC timed out after 1.0s')
self.assertIn('logs', error_data)
if sys.platform == 'darwin':
# because fork() without exec() is broken on macos, we use 'spawn'
# so on macos we don't get logs back because we didn't fork()
return
self.assertTrue(len(error_data['logs']) > 0)
# because fork() without exec() is broken, we use 'spawn' so we don't
# get logs back because we didn't fork()
return


@mark.flaky(rerun_filter=addr_in_use)
Expand Down
5 changes: 4 additions & 1 deletion test/integration/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -348,6 +348,9 @@ def test_root_realpath(self):
else:
return self.test_root_dir

def _generate_test_root_dir(self):
return normalize(tempfile.mkdtemp(prefix='dbt-int-test-'))

def setUp(self):
self.dbt_core_install_root = os.path.dirname(dbt.__file__)
log_manager.reset_handlers()
Expand All @@ -357,7 +360,7 @@ def setUp(self):
self._logs_dir = os.path.join(self.initial_dir, 'logs', self.prefix)
_really_makedirs(self._logs_dir)
self.test_original_source_path = _pytest_get_test_root()
self.test_root_dir = normalize(tempfile.mkdtemp(prefix='dbt-int-test-'))
self.test_root_dir = self._generate_test_root_dir()

os.chdir(self.test_root_dir)
try:
Expand Down