## Translators: ".tar.gz" is a file extension, and files with that extension are called "gzipped tar files": these terms should not be translated + ## Translators: ".zip" is a file extension, and files with that extension are called "zipped files": these terms should not be translated %if library:

${Text(_("Be sure you want to import a library before continuing. The contents of the imported library will replace the contents of the existing library. {em_start}You cannot undo a library import{em_end}. Before you proceed, we recommend that you export the current library, so that you have a backup copy of it.")).format(em_start=HTML(''), em_end=HTML(""))}

${_("The library that you import must be in a .tar.gz file (that is, a .tar file compressed with GNU Zip). This .tar.gz file must contain a library.xml file. It may also contain other files.")}

${_("The import process has five stages. During the first two stages, you must stay on this page. You can leave this page after the Unpacking stage has completed. We recommend, however, that you don't make important changes to your library until the import operation has completed.")}

%else:

${Text(_("Be sure you want to import a course before continuing. The contents of the imported course will replace the contents of the existing course. {em_start}You cannot undo a course import{em_end}. Before you proceed, we recommend that you export the current course, so that you have a backup copy of it.")).format(em_start=HTML(''), em_end=HTML(""))}

-

${_("The course that you import must be in a .tar.gz file (that is, a .tar file compressed with GNU Zip). This .tar.gz file must contain a course.xml file. It may also contain other files.")}

+

${_("The course that you import must be in a .tar.gz file (that is, a .tar file compressed with GNU Zip) or .zip (that is a compressed file). This .tar.gz or .zip file must contain a course.xml file. It may also contain other files.")}

${_("The import process has five stages. During the first two stages, you must stay on this page. You can leave this page after the Unpacking stage has completed. We recommend, however, that you don't make important changes to your course until the import operation has completed.")}

%endif @@ -60,11 +61,12 @@

## Translators: ".tar.gz" is a file extension, and files with that extension are called "gzipped tar files": these terms should not be translated + ## Translators: ".zip" is a file extension, and files with that extension are called "zipped files": these terms should not be translated

%if library: ${_("Select a .tar.gz File to Replace Your Library Content")} %else: - ${_("Select a .tar.gz File to Replace Your Course Content")} + ${_("Select a .tar.gz or .zip File to Replace Your Course Content")} %endif

diff --git a/openedx/core/lib/extract_archive.py b/openedx/core/lib/extract_archive.py new file mode 100644 index 000000000000..4706e0caffb7 --- /dev/null +++ b/openedx/core/lib/extract_archive.py @@ -0,0 +1,109 @@ +""" +Safe version of extractall which does not extract any files that would +be, or symlink to a file that is, outside of the directory extracted in. + +Adapted from: +http://stackoverflow.com/questions/10060069/safely-extract-zip-or-tar-using-python +""" + +import logging +from os.path import abspath, dirname +from os.path import join as joinpath +from os.path import realpath +from typing import List, Union +from zipfile import ZipFile, ZipInfo +from tarfile import TarFile, TarInfo + +from django.conf import settings +from django.core.exceptions import SuspiciousOperation + +log = logging.getLogger(__name__) + + +def resolved(rpath): + """ + Returns the canonical absolute path of `rpath`. + """ + return realpath(abspath(rpath)) + + +def _is_bad_path(path, base): + """ + Is (the canonical absolute path of) `path` outside `base`? + """ + return not resolved(joinpath(base, path)).startswith(base) + + +def _is_bad_link(info, base): + """ + Does the file sym- or hard-link to files outside `base`? + """ + # Links are interpreted relative to the directory containing the link + tip = resolved(joinpath(base, dirname(info.name))) + return _is_bad_path(info.linkname, base=tip) + + +def _check_tarinfo(finfo: TarInfo, base: str): + """ + Checks a file in a tar archive (TarInfo object) for safety. + + It ensures that the file isn't a hard link or symlink to a file pointing to + a path outside the archive and checks that the file isn't a device file. + + Raises: + SuspiciousOperation: If the TarInfo object is found to be a + hard link, symlink, or a special device file. + """ + if finfo.issym() and _is_bad_link(finfo, base): + log.debug("File %r is blocked: Hard link to %r", finfo.name, finfo.linkname) + raise SuspiciousOperation("Hard link") + if finfo.islnk() and _is_bad_link(finfo, base): + log.debug("File %r is blocked: Symlink to %r", finfo.name, finfo.linkname) + raise SuspiciousOperation("Symlink") + if finfo.isdev(): + log.debug("File %r is blocked: FIFO, device or character file", finfo.name) + raise SuspiciousOperation("Dev file") + + +def _checkmembers(members: Union[List[ZipInfo], List[TarInfo]], base: str): + """ + Check that all elements of the archive file are safe. + """ + base = resolved(base) + + # check that we're not trying to import outside of the github_repo_root + if not base.startswith(resolved(settings.GITHUB_REPO_ROOT)): + raise SuspiciousOperation("Attempted to import course outside of data dir") + + for finfo in members: + if isinstance(finfo, ZipInfo): + filename = finfo.filename + elif isinstance(finfo, TarInfo): + filename = finfo.name + _check_tarinfo(finfo, base) + if _is_bad_path(filename, base): + log.debug("File %r is blocked (illegal path)", filename) + raise SuspiciousOperation("Illegal path") + + +def safe_extractall(file_name, output_path): + """ + Extract Zip or Tar files + """ + archive = None + if not output_path.endswith("/"): + output_path += "/" + try: + if file_name.endswith(".zip"): + archive = ZipFile(file_name, "r") + members = archive.infolist() + elif file_name.endswith(".tar.gz"): + archive = TarFile.open(file_name) + members = archive.getmembers() + else: + raise ValueError("Unsupported archive format") + _checkmembers(members, output_path) + archive.extractall(output_path) + finally: + if archive: + archive.close() diff --git a/openedx/core/lib/extract_tar.py b/openedx/core/lib/extract_tar.py deleted file mode 100644 index e8780efb51b3..000000000000 --- a/openedx/core/lib/extract_tar.py +++ /dev/null @@ -1,77 +0,0 @@ -""" -Safe version of tarfile.extractall which does not extract any files that would -be, or symlink to a file that is, outside of the directory extracted in. - -Adapted from: -http://stackoverflow.com/questions/10060069/safely-extract-zip-or-tar-using-python -""" - -import logging -from os.path import join as joinpath -from os.path import abspath, dirname, realpath - -from django.conf import settings -from django.core.exceptions import SuspiciousOperation - -log = logging.getLogger(__name__) - - -def resolved(rpath): - """ - Returns the canonical absolute path of `rpath`. - """ - return realpath(abspath(rpath)) - - -def _is_bad_path(path, base): - """ - Is (the canonical absolute path of) `path` outside `base`? - """ - return not resolved(joinpath(base, path)).startswith(base) - - -def _is_bad_link(info, base): - """ - Does the file sym- or hard-link to files outside `base`? - """ - # Links are interpreted relative to the directory containing the link - tip = resolved(joinpath(base, dirname(info.name))) - return _is_bad_path(info.linkname, base=tip) - - -def safemembers(members, base): - """ - Check that all elements of a tar file are safe. - """ - - base = resolved(base) - - # check that we're not trying to import outside of the github_repo_root - if not base.startswith(resolved(settings.GITHUB_REPO_ROOT)): - raise SuspiciousOperation("Attempted to import course outside of data dir") - - for finfo in members: - if _is_bad_path(finfo.name, base): # lint-amnesty, pylint: disable=no-else-raise - log.debug("File %r is blocked (illegal path)", finfo.name) - raise SuspiciousOperation("Illegal path") - elif finfo.issym() and _is_bad_link(finfo, base): - log.debug("File %r is blocked: Hard link to %r", finfo.name, finfo.linkname) - raise SuspiciousOperation("Hard link") - elif finfo.islnk() and _is_bad_link(finfo, base): - log.debug("File %r is blocked: Symlink to %r", finfo.name, - finfo.linkname) - raise SuspiciousOperation("Symlink") - elif finfo.isdev(): - log.debug("File %r is blocked: FIFO, device or character file", - finfo.name) - raise SuspiciousOperation("Dev file") - - return members - - -def safetar_extractall(tar_file, path=".", members=None): # pylint: disable=unused-argument - """ - Safe version of `tar_file.extractall()`. - """ - path = str(path) - return tar_file.extractall(path, safemembers(tar_file, path))