Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Prompt before overwriting files, some error handling, and refactoring #34

Open
wants to merge 4 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
225 changes: 170 additions & 55 deletions gitdir/gitdir.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,16 +7,30 @@
import json
import sys
from colorama import Fore, Style, init
from pathlib import Path

init()

# this ANSI code lets us erase the current line
ERASE_LINE = "\x1b[2K"

COLOR_NAME_TO_CODE = {"default": "", "red": Fore.RED, "green": Style.BRIGHT + Fore.GREEN}
COLOR_NAME_TO_CODE = {
"default": "",
"red": Fore.RED,
"green": Style.BRIGHT + Fore.GREEN,
"yellow": Style.BRIGHT + Fore.YELLOW,
}


def print_text(text, color="default", in_place=False, **kwargs): # type: (str, str, bool, any) -> None
class FileToDownload:
def __init__(self, name, url, path, dest_path):
self.name = name
self.url = url
self.path = path
self.dest_path: Path = dest_path


def print_text(text, color="default", in_place=False, **kwargs) -> None:
"""
print text to console, a wrapper to built-in print

Expand All @@ -30,31 +44,107 @@ def print_text(text, color="default", in_place=False, **kwargs): # type: (str,
print(COLOR_NAME_TO_CODE[color] + text + Style.RESET_ALL, **kwargs)


def prompt_yes_no(question: str, default: bool = True) -> bool:
"""
Prompt user for a yes/no question.

:param question: question to ask
:param default: default answer if user just presses enter
:return: True if user answers yes, False if user answers no
"""
if default:
yes_no = "Y/n"
else:
yes_no = "y/N"

while True:
print_text("{} [{}] ".format(question, yes_no), end="")
choice = input().lower()
if choice in {"y", "yes"}:
return True
elif choice in {"n", "no"}:
return False
elif choice == "":
return default
else:
print_text("Please respond with 'yes' or 'no' (or 'y' or 'n').")


def create_url(url):
"""
From the given url, produce a URL that is compatible with Github's REST API. Can handle blob or tree paths.
"""
repo_only_url = re.compile(r"https:\/\/github\.com\/[a-z\d](?:[a-z\d]|-(?=[a-z\d])){0,38}\/[a-zA-Z0-9]+$")
repo_only_url = re.compile(
r"https:\/\/github\.com\/[a-z\d](?:[a-z\d]|-(?=[a-z\d])){0,38}\/[a-zA-Z0-9]+$"
)
re_branch = re.compile("/(tree|blob)/(.+?)/")

# Check if the given url is a url to a GitHub repo. If it is, tell the
# user to use 'git clone' to download it
if re.match(repo_only_url,url):
print_text("✘ The given url is a complete repository. Use 'git clone' to download the repository",
"red", in_place=True)
if re.match(repo_only_url, url):
print_text(
"✘ The given url is a complete repository. Use 'git clone' to download the repository",
"red",
in_place=True,
)
sys.exit()

# extract the branch name from the given url (e.g master)
branch = re_branch.search(url)
download_dirs = url[branch.end():]
api_url = (url[:branch.start()].replace("github.com", "api.github.com/repos", 1) +
"/contents/" + download_dirs + "?ref=" + branch.group(2))
if branch is None:
print_text(
"✘ Could not find branch name in the given url", "red", in_place=True
Copy link
Author

@alichtman alichtman Jan 13, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Optimally, I think this would try common branch names like main and master, but that's out of scope for this PR

)
sys.exit()
download_dirs = url[branch.end() :]
api_url = (
url[: branch.start()].replace("github.com", "api.github.com/repos", 1)
+ "/contents/"
+ download_dirs
+ "?ref="
+ branch.group(2)
)
return api_url, download_dirs


def download(repo_url, flatten=False, output_dir="./"):
""" Downloads the files and directories in repo_url. If flatten is specified, the contents of any and all
sub-directories will be pulled upwards into the root folder. """
def download_file(file_to_download: FileToDownload, force: bool) -> None:
if os.path.exists(file_to_download.dest_path) and not force:
if prompt_yes_no(
"✘ File {} already exists. Overwrite?".format(file_to_download.dest_path),
default=False,
):
urllib.request.urlretrieve(file_to_download.url, file_to_download.dest_path)
# bring the cursor to the beginning, erase the current line, and dont make a new line
print_text(
"Downloading (overwriting): "
+ Fore.WHITE
+ "{} to {}".format(
file_to_download.name, file_to_download.dest_path.resolve()
),
"green",
in_place=True,
)
else:
print_text(
"Skipped: " + Fore.WHITE + "{}".format(file_to_download.name),
"yellow",
in_place=True,
)
else:
urllib.request.urlretrieve(file_to_download.url, file_to_download.dest_path)
# bring the cursor to the beginning, erase the current line, and dont make a new line
print_text(
"Downloaded: "
+ Fore.WHITE
+ "{} to {}".format(file_to_download.name, file_to_download.dest_path),
"green",
in_place=True,
)


def download(repo_url, flatten=False, force=False, output_dir="./"):
"""Downloads the files and directories in repo_url. If flatten is specified, the contents of any and all
sub-directories will be pulled upwards into the root folder."""

# generate the url which returns the JSON data
api_url, download_dirs = create_url(repo_url)
Expand All @@ -68,10 +158,9 @@ def download(repo_url, flatten=False, output_dir="./"):
else:
dir_out = output_dir

dir_out = Path(dir_out)

try:
opener = urllib.request.build_opener()
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

These only need to be set once, globally.

opener.addheaders = [('User-agent', 'Mozilla/5.0')]
urllib.request.install_opener(opener)
response = urllib.request.urlretrieve(api_url)
except KeyboardInterrupt:
# when CTRL+C is pressed during the execution of this script,
Expand All @@ -80,8 +169,7 @@ def download(repo_url, flatten=False, output_dir="./"):
sys.exit()

if not flatten:
# make a directory with the name which is taken from
# the actual repo
# make a directory with the name which is taken from the actual repo
os.makedirs(dir_out, exist_ok=True)

# total files count
Expand All @@ -95,81 +183,108 @@ def download(repo_url, flatten=False, output_dir="./"):

# If the data is a file, download it as one.
if isinstance(data, dict) and data["type"] == "file":
print("Single file download")
try:
# download the file
opener = urllib.request.build_opener()
opener.addheaders = [('User-agent', 'Mozilla/5.0')]
urllib.request.install_opener(opener)
urllib.request.urlretrieve(data["download_url"], os.path.join(dir_out, data["name"]))
# bring the cursor to the beginning, erase the current line, and dont make a new line
print_text("Downloaded: " + Fore.WHITE + "{}".format(data["name"]), "green", in_place=True)
dest_path = dir_out / Path(data["name"])
file_to_download = FileToDownload(
name=data["name"],
url=data["download_url"],
dest_path=dest_path,
path=data["path"],
)
download_file(file_to_download, force)

return total_files
except KeyboardInterrupt:
# when CTRL+C is pressed during the execution of this script,
# bring the cursor to the beginning, erase the current line, and dont make a new line
print_text("✘ Got interrupted", 'red', in_place=False)
print_text("✘ Got interrupted", "red", in_place=False)
sys.exit()

for file in data:
file_url = file["download_url"]
file_name = file["name"]
file_path = file["path"]

if flatten:
path = os.path.basename(file_path)
path = Path(os.path.basename(file_path))
else:
path = file_path
dirname = os.path.dirname(path)
path = Path(file_path)

file_to_download = FileToDownload(
name=file["name"],
url=file["download_url"],
dest_path=path,
path=file["path"],
)

if dirname != '':
os.makedirs(os.path.dirname(path), exist_ok=True)
if path.parent != "":
os.makedirs(path.parent, exist_ok=True)
else:
pass

if file_url is not None:
if file_to_download.url is not None:
try:
opener = urllib.request.build_opener()
opener.addheaders = [('User-agent', 'Mozilla/5.0')]
urllib.request.install_opener(opener)
# download the file
urllib.request.urlretrieve(file_url, path)

# bring the cursor to the beginning, erase the current line, and dont make a new line
print_text("Downloaded: " + Fore.WHITE + "{}".format(file_name), "green", in_place=False, end="\n",
flush=True)

download_file(file_to_download, force)
except KeyboardInterrupt:
# when CTRL+C is pressed during the execution of this script,
# bring the cursor to the beginning, erase the current line, and dont make a new line
print_text("✘ Got interrupted", 'red', in_place=False)
print_text("✘ Got interrupted", "red", in_place=False)
sys.exit()
else:
download(file["html_url"], flatten, download_dirs)
download(file["html_url"], flatten, force, download_dirs)

return total_files


def set_up_url_opener():
"""
Set up the URL opener to mimic a browser.
"""
opener = urllib.request.build_opener()
opener.addheaders = [("User-agent", "Mozilla/5.0")]
urllib.request.install_opener(opener)


def main():
if sys.platform != 'win32':
if sys.platform != "win32":
# disbale CTRL+Z
signal.signal(signal.SIGTSTP, signal.SIG_IGN)

parser = argparse.ArgumentParser(description="Download directories/folders from GitHub")
parser.add_argument('urls', nargs="+",
help="List of Github directories to download.")
parser.add_argument('--output_dir', "-d", dest="output_dir", default="./",
help="All directories will be downloaded to the specified directory.")

parser.add_argument('--flatten', '-f', action="store_true",
help='Flatten directory structures. Do not create extra directory and download found files to'
' output directory. (default to current directory if not specified)')
parser = argparse.ArgumentParser(
description="Download directories/folders from GitHub"
)
parser.add_argument(
"urls", nargs="+", help="List of Github directories to download."
)
parser.add_argument(
"--output_dir",
"-d",
dest="output_dir",
default="./",
help="All directories will be downloaded to the specified directory.",
)

parser.add_argument(
"--flatten",
"-f",
action="store_true",
help="Flatten directory structures. Do not create extra directory and download found files to"
" output directory. (default to current directory if not specified)",
)

parser.add_argument(
"--force",
action="store_true",
help="Force overwriting existing files.",
)

args = parser.parse_args()

set_up_url_opener()

flatten = args.flatten
for url in args.urls:
total_files = download(url, flatten, args.output_dir)
download(url, flatten, args.force, args.output_dir)

print_text("βœ” Download complete", "green", in_place=True)

Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

setuptools.setup(
name='gitdir',
version='1.2.7',
version='1.2.8',
author='Siddharth Dushantha',
author_email='siddharth.dushantha@gmail.com',
description='Download a single directory/folder from a GitHub repo',
Expand Down