Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

gto show --repo accepts remote repositories #269

Merged
merged 26 commits into from
Sep 21, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
26 commits
Select commit Hold shift + click to select a range
d984781
git_utils module that allows to get a git.Repo object from 1. a Repo …
Sep 13, 2022
ddfe434
delete function convert_to_git_repo in favor of decorator git_clone_i…
Sep 15, 2022
e533d2d
improve detection of remote git url and enable cloning via ssh in tests
Sep 16, 2022
feea54e
modify show to handle remote repos
Sep 16, 2022
2d79391
reformat sample remote repo expected registry json for better readibi…
Sep 16, 2022
b5e9810
refactor test structure
Sep 16, 2022
b188057
blackify code
Sep 16, 2022
2297ac5
update git show command --repo help text
Sep 16, 2022
4dd89ed
Merge pull request #1 from francesco086/show-on-remote-repo
francesco086 Sep 16, 2022
76aebe2
add empty line at the end of json file
Sep 16, 2022
4686711
remove tests requiring ssh key
Sep 16, 2022
74e0493
introduce and use @skip_for_windows_py_lt_3_9
Sep 19, 2022
7af727a
add info for windows users in case gto show fails
Sep 19, 2022
3d2d970
add info for windows users in case gto show fails
Sep 19, 2022
fcb55d0
support url without .git at the end
Sep 19, 2022
6415782
fix win test
Sep 19, 2022
8dfa922
fix win test
Sep 19, 2022
c1dbcaa
move remote git regex in the constants module
Sep 20, 2022
ec0c7eb
remove todo
Sep 20, 2022
0293e67
add more path examples to test in test_if_local_url_then_true
Sep 20, 2022
3f9cd51
prefer constants over get functions in data module in tests
Sep 20, 2022
fe29fa0
reduce complexity of _turn_args_into_kwargs
Sep 20, 2022
b2475e7
default os env reading of GITHUB_MATRIX_PYTHON to "" instead of "2"
Sep 20, 2022
f548a3c
introduce cloned_git_repo
Sep 20, 2022
8464ec0
refactored tests
Sep 20, 2022
35eeb93
Apply suggestions from code review
francesco086 Sep 21, 2022
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .github/workflows/check-test-release.yml
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,9 @@ jobs:
git config --global user.email "olivaw@iterative.ai"
git config --global user.name "Olivaw"
pytest
env:
GITHUB_MATRIX_OS: ${{ matrix.os }}
GITHUB_MATRIX_PYTHON: ${{ matrix.python }}
- name: "Upload coverage to Codecov"
uses: codecov/codecov-action@v3
with:
Expand Down
2 changes: 2 additions & 0 deletions gto/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
)
from gto.exceptions import NoRepo, NotImplementedInGTO, WrongArgs
from gto.ext import EnrichmentInfo
from gto.git_utils import git_clone_remote_repo
from gto.index import (
EnrichmentManager,
FileIndexManager,
Expand Down Expand Up @@ -267,6 +268,7 @@ def check_ref(repo: Union[str, Repo], ref: str):
return reg.check_ref(ref)


@git_clone_remote_repo
def show(
repo: Union[str, Repo],
name: Optional[str] = None,
Expand Down
8 changes: 7 additions & 1 deletion gto/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -205,7 +205,13 @@ def GTOGroupSection(section):

# Typer options to control git-related operations
option_rev = Option(None, "--rev", help="Repo revision to use", show_default=True)
option_repo = Option(".", "-r", "--repo", help="Repository to use", show_default=True)
option_repo = Option(
".",
"-r",
"--repo",
help="Repository to use (remote repos accepted)",
show_default=True,
)
option_all_branches = Option(
False,
"-a",
Expand Down
5 changes: 5 additions & 0 deletions gto/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,11 @@ class Action(Enum):
f"^(?P<artifact>{name})(((#(?P<stage>{name})|@(?P<latest>latest)|@(?P<greatest>greatest))))$"
)

# taken from https://stackoverflow.com/a/22312124/19782654, modified to include url without .git at the end
remote_git_repo_regex = re.compile(
r"((git|ssh|http(s)?)|(git@[\w\.]+))(:(//)?)([\w\.@\:/\-~]+)(/)?"
)


def mark_artifact_unregistered(artifact_name):
return f"*{artifact_name}"
Expand Down
69 changes: 69 additions & 0 deletions gto/git_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
import inspect
import logging
from contextlib import contextmanager
from functools import wraps
from tempfile import TemporaryDirectory
from typing import Callable, Dict

from git import Repo

from gto.constants import remote_git_repo_regex


def git_clone_remote_repo(f: Callable):
@wraps(f)
def wrapped_f(*args, **kwargs):
kwargs = _turn_args_into_kwargs(args, kwargs)

if isinstance(kwargs["repo"], str) and is_url_of_remote_repo(
repo=kwargs["repo"]
):
try:
with cloned_git_repo(repo=kwargs["repo"]) as tmp_dir:
kwargs["repo"] = tmp_dir
return f(**kwargs)
except (NotADirectoryError, PermissionError) as e:
raise e.__class__(
"Are you using windows with python < 3.9? "
"This may be the reason of this error: https://bugs.python.org/issue42796. "
"Consider upgrading python."
) from e

return f(**kwargs)

def _turn_args_into_kwargs(
args: tuple, kwargs: Dict[str, object]
) -> Dict[str, object]:
kwargs_complement = {
k: args[i]
for i, k in enumerate(inspect.getfullargspec(f).args)
if i < len(args)
}
kwargs.update(kwargs_complement)
return kwargs

return wrapped_f


def is_url_of_remote_repo(repo: str) -> bool:
if remote_git_repo_regex.fullmatch(repo) is not None:
logging.debug("%s recognized as remote git repo", repo)
return True

logging.debug("%s NOT recognized as remote git repo", repo)
return False


@contextmanager
def cloned_git_repo(repo: str):
tmp_dir = TemporaryDirectory()
logging.debug("create temporary directory %s", tmp_dir)
git_clone(repo=repo, dir=tmp_dir.name)
yield tmp_dir.name
logging.debug("delete temporary directory %s", tmp_dir)
tmp_dir.cleanup()


def git_clone(repo: str, dir: str) -> None:
logging.debug("clone %s in directory %s", repo, dir)
Repo.clone_from(url=repo, to_path=dir)
Comment on lines +57 to +69
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Let's merge these functions and test a single one only? Now having git_clone as a separate seems redundant. You can easily test cloned_git_repo without generating a temporary directory additionally in tests.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can gladly do it for the tests (actually it connects with something I wanted to discuss about public/private functions -> will add it below), but for the functions I would suggest to leave them as they are, again to leave the door open for a different engine. It doesn't do any harm and makes the separation clear: this is the only thing you need to change if you want to use a different git engine.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks, I checked out the second PR you created.

My opinion:

  1. We should keep tests. We can remove them anytime later. Keeping the functions public is also OK for me, this is in the fashion of the rest of GTO codebase I think.
  2. We can keep these functions separate, doesn't matter for now I guess.

I think the PR is ready, no need to overthink this. We can get fix all of that when that would be needed. Let's merge this and move along. WDYT?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

fine with me! :) let's do that

17 changes: 17 additions & 0 deletions tests/resources/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
import json
from pathlib import Path

SAMPLE_HTTP_REMOTE_REPO = "https://github.com/iterative/example-gto.git"
SAMPLE_HTTP_REMOTE_REPO_WITHOUT_DOT_GIT_SUFFIX = (
"https://github.com/iterative/example-gto"
)
SAMPLE_REMOTE_REPO_URL = r"https://github.com/iterative/example-gto-frozen.git"


def get_sample_remote_repo_expected_registry() -> dict:
with open(
Path(__file__).parent / "sample_remote_repo_expected_registry.json",
"r",
encoding="utf-8",
) as f:
return json.load(f)
47 changes: 47 additions & 0 deletions tests/resources/sample_remote_repo_expected_registry.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
{
"deprecated-model": {
"version": "v0.0.1",
"stage": {
"dev": null,
"prod": null,
"staging": null
},
"registered": true
},
"churn": {
"version": "v3.1.0",
"stage": {
"dev": null,
"prod": "v3.0.0",
"staging": "v3.1.0"
},
"registered": true
},
"segment": {
"version": "v0.4.1",
"stage": {
"dev": "v0.4.1",
"prod": null,
"staging": null
},
"registered": true
},
"cv-class": {
"version": "v0.1.13",
"stage": {
"dev": null,
"prod": null,
"staging": null
},
"registered": true
},
"model-a": {
"version": "v0.0.2",
"stage": {
"dev": "v0.0.2",
"prod": null,
"staging": null
},
"registered": true
}
}
16 changes: 16 additions & 0 deletions tests/skip_presets.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
import pytest

from tests.utils import (
is_os_windows_and_py_lt_3_8,
is_os_windows_and_py_lt_3_9,
)

skip_for_windows_py_lt_3_9 = pytest.mark.skipif(
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The same q as above, does it make sense to keep these in conftest.py? Except for introducing one more file. @mike0sv ?

is_os_windows_and_py_lt_3_9(),
reason="functionality requires python features not working on windows with python<3.9",
)

only_for_windows_py_lt_3_8 = pytest.mark.skipif(
not is_os_windows_and_py_lt_3_8(),
reason="test makes sense only for windows with python<3.9",
)
9 changes: 9 additions & 0 deletions tests/test_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,12 @@
import pytest

import gto
import tests.resources
from gto.api import show
from gto.exceptions import PathIsUsed, WrongArgs
from gto.tag import find
from gto.versions import SemVer
from tests.skip_presets import skip_for_windows_py_lt_3_9
from tests.utils import check_obj


Expand Down Expand Up @@ -289,3 +292,9 @@ def test_is_gto_repo_because_of_artifacts_yaml(empty_git_repo):
repo, write_file = empty_git_repo
write_file("artifacts.yaml", "{}")
assert gto.api._is_gto_repo(repo)


@skip_for_windows_py_lt_3_9
def test_if_show_on_remote_git_repo_then_return_expected_registry():
result = show(repo=tests.resources.SAMPLE_REMOTE_REPO_URL)
assert result == tests.resources.get_sample_remote_repo_expected_registry()
120 changes: 120 additions & 0 deletions tests/test_git_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,120 @@
from pathlib import Path
from tempfile import TemporaryDirectory
from typing import Union
from unittest.mock import patch

import pytest
from git import Repo

import tests.resources
from gto.git_utils import (
git_clone,
git_clone_remote_repo,
is_url_of_remote_repo,
)
from tests.skip_presets import (
only_for_windows_py_lt_3_8,
skip_for_windows_py_lt_3_9,
)


def test_git_clone_remote_repo_if_repo_is_a_meaningless_string_then_leave_it_unchanged():
assert_f_called_with_repo_return_repo_itself(repo="meaningless_string")


def test_git_clone_remote_repo_if_repo_is_a_local_git_repo_then_leave_it_unchanged(
tmp_local_git_repo,
):
assert_f_called_with_repo_return_repo_itself(repo=tmp_local_git_repo)


def test_git_clone_remote_repo_if_repo_gitpython_object_then_leave_it_unchanged(
tmp_local_git_repo,
):
assert_f_called_with_repo_return_repo_itself(repo=Repo(path=tmp_local_git_repo))


@skip_for_windows_py_lt_3_9
def test_git_clone_remote_repo_if_repo_is_remote_url_then_clone_and_set_repo_to_its_local_path():
with patch("gto.git_utils.git_clone") as mocked_git_clone:
mocked_git_clone.side_effect = git_clone
local_repo = decorated_func(
repo=tests.resources.SAMPLE_HTTP_REMOTE_REPO, spam=0, jam=3
)
mocked_git_clone.assert_called_once_with(
repo=tests.resources.SAMPLE_HTTP_REMOTE_REPO, dir=local_repo
)


@only_for_windows_py_lt_3_8
def test_if_repo_is_remote_url_and_windows_os_error_then_hint_win_with_py_lt_3_9_may_be_the_cause():
with pytest.raises(OSError) as e:
decorated_func(repo=tests.resources.SAMPLE_HTTP_REMOTE_REPO, spam=0, jam=3)
assert e.type in (NotADirectoryError, PermissionError)
assert "windows" in str(e)
assert "python" in str(e)
assert "< 3.9" in str(e)


@pytest.mark.parametrize(
"repo",
(
"/local/path",
"/local/path",
".",
),
)
def test_is_url_of_remote_repo_if_local_url_then_true(repo: str):
assert is_url_of_remote_repo(repo=repo) is False


@pytest.mark.parametrize(
"repo",
(
tests.resources.SAMPLE_HTTP_REMOTE_REPO,
tests.resources.SAMPLE_HTTP_REMOTE_REPO_WITHOUT_DOT_GIT_SUFFIX,
),
)
def test_is_url_of_remote_repo_if_remote_url_then_true(repo: str):
assert is_url_of_remote_repo(repo=repo) is True


@skip_for_windows_py_lt_3_9
@pytest.mark.parametrize(
"repo",
(
tests.resources.SAMPLE_HTTP_REMOTE_REPO,
tests.resources.SAMPLE_HTTP_REMOTE_REPO_WITHOUT_DOT_GIT_SUFFIX,
),
)
def test_git_clone_if_valid_remote_then_clone(repo: str):
with TemporaryDirectory() as tmp_repo_dir:
git_clone(repo=repo, dir=tmp_repo_dir)
assert_dir_contain_git_repo(dir=tmp_repo_dir)


@git_clone_remote_repo
def decorated_func(
spam: int, repo: Union[Repo, str], jam: int
): # pylint: disable=unused-argument
return repo


def assert_f_called_with_repo_return_repo_itself(repo: Union[str, Repo]) -> None:
assert decorated_func(0, repo, 3) is repo
assert decorated_func(0, repo, jam=3) is repo
assert decorated_func(0, jam=3, repo=repo) is repo
assert decorated_func(spam=0, jam=3, repo=repo) is repo


@pytest.fixture
def tmp_local_git_repo() -> str:
tmp_repo_dir = TemporaryDirectory() # pylint: disable=consider-using-with
Repo.init(path=tmp_repo_dir.name)
yield tmp_repo_dir.name
tmp_repo_dir.cleanup()


def assert_dir_contain_git_repo(dir: str) -> None:
assert (Path(dir) / ".git").is_dir()
assert (Path(dir) / ".git/HEAD").is_file()
15 changes: 15 additions & 0 deletions tests/utils.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import os
from copy import deepcopy
from typing import Any, Dict, Sequence, Set, Union

Expand Down Expand Up @@ -32,3 +33,17 @@ def check_obj(
obj_values = omit(obj, skip_keys)
values = omit(values, skip_keys)
assert_equals(obj_values, values)


def is_os_windows_and_py_lt_3_9() -> bool:
return (
os.environ.get("GITHUB_MATRIX_OS") == "windows-latest"
and os.environ.get("GITHUB_MATRIX_PYTHON", "") < "3.9"
)


def is_os_windows_and_py_lt_3_8() -> bool:
return (
os.environ.get("GITHUB_MATRIX_OS") == "windows-latest"
and os.environ.get("GITHUB_MATRIX_PYTHON", "") < "3.8"
)