galaxyproject · bgruening · Mar 9, 2018 · Dec 13, 2017 · Feb 26, 2018 · Dec 13, 2017
diff --git a/lib/galaxy/actions/library.py b/lib/galaxy/actions/library.py
@@ -249,16 +249,14 @@ def _make_library_uploaded_dataset(self, trans, params, name, path, type, librar
         uploaded_dataset.to_posix_lines = params.get('to_posix_lines', None)
         uploaded_dataset.space_to_tab = params.get('space_to_tab', None)
         uploaded_dataset.tag_using_filenames = params.get('tag_using_filenames', True)
+        uploaded_dataset.purge_source = getattr(trans.app.config, 'ftp_upload_purge', True)
         if in_folder:
             uploaded_dataset.in_folder = in_folder
         uploaded_dataset.data = upload_common.new_upload(trans, 'api', uploaded_dataset, library_bunch)
         uploaded_dataset.link_data_only = link_data_only
         uploaded_dataset.uuid = uuid_str
         if link_data_only == 'link_to_files':
-            uploaded_dataset.data.file_name = os.path.abspath(path)
-            # Since we are not copying the file into Galaxy's managed
-            # default file location, the dataset should never be purgable.
-            uploaded_dataset.data.dataset.purgable = False
+            uploaded_dataset.data.link_to(path)
             trans.sa_session.add_all((uploaded_dataset.data, uploaded_dataset.data.dataset))
             trans.sa_session.flush()
         return uploaded_dataset

diff --git a/lib/galaxy/app.py b/lib/galaxy/app.py
@@ -12,7 +12,9 @@
 from galaxy import config, jobs
 from galaxy.jobs import metrics as job_metrics
 from galaxy.managers.collections import DatasetCollectionManager
+from galaxy.managers.folders import FolderManager
 from galaxy.managers.histories import HistoryManager
+from galaxy.managers.libraries import LibraryManager
 from galaxy.managers.tags import GalaxyTagManager
 from galaxy.openid.providers import OpenIDProviders
 from galaxy.queue_worker import GalaxyQueueWorker
@@ -96,6 +98,8 @@ def __init__(self, **kwargs):
         self.history_manager = HistoryManager(self)
         self.dependency_resolvers_view = DependencyResolversView(self)
         self.test_data_resolver = test_data.TestDataResolver(file_dirs=self.config.tool_test_data_directories)
+        self.library_folder_manager = FolderManager()
+        self.library_manager = LibraryManager()
 
         # Tool Data Tables
         self._configure_tool_data_tables(from_shed_config=False)

diff --git a/lib/galaxy/datatypes/sniff.py b/lib/galaxy/datatypes/sniff.py
@@ -14,6 +14,7 @@
 import zipfile
 
 from six import text_type
+from six.moves.urllib.request import urlopen
 
 from galaxy import util
 from galaxy.util import compression_utils
@@ -39,6 +40,12 @@ def get_test_fname(fname):
     return full_path
 
 
+def stream_url_to_file(path):
+    page = urlopen(path)  # page will be .close()ed in stream_to_file
+    temp_name = stream_to_file(page, prefix='url_paste', source_encoding=util.get_charset_from_http_headers(page.headers))
+    return temp_name
+
+
 def stream_to_open_named_file(stream, fd, filename, source_encoding=None, source_error='strict', target_encoding=None, target_error='strict'):
     """Writes a stream to the provided file descriptor, returns the file name. Closes file descriptor"""
     # signature and behavor is somewhat odd, due to backwards compatibility, but this can/should be done better
@@ -131,7 +138,7 @@ def convert_newlines(fname, in_place=True, tmp_dir=None, tmp_prefix="gxupload"):
         return (i, temp_name)
 
 
-def sep2tabs(fname, in_place=True, patt="\\s+"):
+def sep2tabs(fname, in_place=True, patt="\\s+", tmp_dir=None, tmp_prefix="gxupload"):
     """
     Transforms in place a 'sep' separated file to a tab separated one
 
@@ -143,13 +150,18 @@ def sep2tabs(fname, in_place=True, patt="\\s+"):
     '1\\t2\\n3\\t4\\n'
     """
     regexp = re.compile(patt)
-    fd, temp_name = tempfile.mkstemp()
+    fd, temp_name = tempfile.mkstemp(prefix=tmp_prefix, dir=tmp_dir)
     with os.fdopen(fd, "wt") as fp:
         i = None
         for i, line in enumerate(open(fname)):
-            line = line.rstrip('\r\n')
-            elems = regexp.split(line)
-            fp.write("%s\n" % '\t'.join(elems))
+            if line.endswith("\r"):
+                line = line.rstrip('\r')
+                elems = regexp.split(line)
+                fp.write("%s\r" % '\t'.join(elems))
+            else:
+                line = line.rstrip('\n')
+                elems = regexp.split(line)
+                fp.write("%s\n" % '\t'.join(elems))
     if i is None:
         i = 0
     else:

diff --git a/lib/galaxy/datatypes/upload_util.py b/lib/galaxy/datatypes/upload_util.py
@@ -0,0 +1,47 @@
+from galaxy.datatypes import sniff
+from galaxy.datatypes.binary import Binary
+
+
+class UploadProblemException(Exception):
+
+    def __init__(self, message):
+        self.message = message
+
+
+def handle_unsniffable_binary_check(data_type, ext, path, name, is_binary, requested_ext, check_content, registry):
+    """Return modified values of data_type and ext if unsniffable binary encountered.
+
+    Throw UploadProblemException if content problems or extension mismatches occur.
+
+    Precondition: check_binary called returned True.
+    """
+    if is_binary or registry.is_extension_unsniffable_binary(requested_ext):
+        # We have a binary dataset, but it is not Bam, Sff or Pdf
+        data_type = 'binary'
+        parts = name.split(".")
+        if len(parts) > 1:
+            ext = parts[-1].strip().lower()
+            is_ext_unsniffable_binary = registry.is_extension_unsniffable_binary(ext)
+            if check_content and not is_ext_unsniffable_binary:
+                raise UploadProblemException('The uploaded binary file contains inappropriate content')
+
+            elif is_ext_unsniffable_binary and requested_ext != ext:
+                err_msg = "You must manually set the 'File Format' to '%s' when uploading %s files." % (ext, ext)
+                raise UploadProblemException(err_msg)
+    return data_type, ext
+
+
+def handle_sniffable_binary_check(data_type, ext, path, registry):
+    """Return modified values of data_type and ext if sniffable binary encountered.
+
+    Precondition: check_binary called returned True.
+    """
+    # Sniff the data type
+    guessed_ext = sniff.guess_ext(path, registry.sniff_order)
+    # Set data_type only if guessed_ext is a binary datatype
+    datatype = registry.get_datatype_by_extension(guessed_ext)
+    if isinstance(datatype, Binary):
+        data_type = guessed_ext
+        ext = guessed_ext
+
+    return data_type, ext
diff --git a/lib/galaxy/dependencies/pinned-requirements.txt b/lib/galaxy/dependencies/pinned-requirements.txt
@@ -15,6 +15,7 @@ uWSGI==2.0.15
 pysam==0.14
 
 # pure Python packages
+bdbag==1.1.1
 bleach==2.1.3
 bz2file==0.98; python_version < '3.3'
 ipaddress==1.0.18; python_version < '3.3'

diff --git a/lib/galaxy/jobs/__init__.py b/lib/galaxy/jobs/__init__.py
@@ -1380,7 +1380,7 @@ def path_rewriter(path):
         collected_datasets = {
             'primary': self.tool.collect_primary_datasets(out_data, self.get_tool_provided_job_metadata(), tool_working_directory, input_ext, input_dbkey)
         }
-        self.tool.collect_dynamic_collections(
+        self.tool.collect_dynamic_outputs(
             out_collections,
             self.get_tool_provided_job_metadata(),
             job_working_directory=tool_working_directory,

diff --git a/lib/galaxy/managers/collections.py b/lib/galaxy/managers/collections.py
@@ -46,17 +46,22 @@ def __init__(self, app):
         self.tag_manager = tags.GalaxyTagManager(app.model.context)
         self.ldda_manager = lddas.LDDAManager(app)
 
-    def precreate_dataset_collection_instance(self, trans, parent, name, implicit_inputs, implicit_output_name, structure):
+    def precreate_dataset_collection_instance(self, trans, parent, name, structure, implicit_inputs=None, implicit_output_name=None):
         # TODO: prebuild all required HIDs and send them in so no need to flush in between.
-        dataset_collection = self.precreate_dataset_collection(structure)
+        dataset_collection = self.precreate_dataset_collection(structure, allow_unitialized_element=implicit_output_name is not None)
         instance = self._create_instance_for_collection(
             trans, parent, name, dataset_collection, implicit_inputs=implicit_inputs, implicit_output_name=implicit_output_name, flush=False
         )
         return instance
 
-    def precreate_dataset_collection(self, structure):
-        if structure.is_leaf or not structure.children_known:
-            return model.DatasetCollectionElement.UNINITIALIZED_ELEMENT
+    def precreate_dataset_collection(self, structure, allow_unitialized_element=True):
+        has_structure = not structure.is_leaf and structure.children_known
+        if not has_structure and allow_unitialized_element:
+            dataset_collection = model.DatasetCollectionElement.UNINITIALIZED_ELEMENT
+        elif not has_structure:
+            collection_type_description = structure.collection_type_description
+            dataset_collection = model.DatasetCollection(populated=False)
+            dataset_collection.collection_type = collection_type_description.collection_type
         else:
             collection_type_description = structure.collection_type_description
             dataset_collection = model.DatasetCollection(populated=False)
@@ -67,7 +72,7 @@ def precreate_dataset_collection(self, structure):
                 if substructure.is_leaf:
                     element = model.DatasetCollectionElement.UNINITIALIZED_ELEMENT
                 else:
-                    element = self.precreate_dataset_collection(substructure)
+                    element = self.precreate_dataset_collection(substructure, allow_unitialized_element=allow_unitialized_element)
 
                 element = model.DatasetCollectionElement(
                     element=element,
@@ -78,7 +83,7 @@ def precreate_dataset_collection(self, structure):
             dataset_collection.elements = elements
             dataset_collection.element_count = len(elements)
 
-            return dataset_collection
+        return dataset_collection
 
     def create(self, trans, parent, name, collection_type, element_identifiers=None,
                elements=None, implicit_collection_info=None, trusted_identifiers=None,

diff --git a/lib/galaxy/model/__init__.py b/lib/galaxy/model/__init__.py
@@ -2035,6 +2035,12 @@ def set_file_name(self, filename):
         return self.dataset.set_file_name(filename)
     file_name = property(get_file_name, set_file_name)
 
+    def link_to(self, path):
+        self.file_name = os.path.abspath(path)
+        # Since we are not copying the file into Galaxy's managed
+        # default file location, the dataset should never be purgable.
+        self.dataset.purgable = False
+
     @property
     def extra_files_path(self):
         return self.dataset.extra_files_path

diff --git a/lib/galaxy/tools/__init__.py b/lib/galaxy/tools/__init__.py
@@ -103,6 +103,7 @@
 # Tools that require Galaxy's Python environment to be preserved.
 GALAXY_LIB_TOOLS_UNVERSIONED = [
     "upload1",
+    "__DATA_FETCH__",
     # Legacy tools bundled with Galaxy.
     "vcf_to_maf_customtrack1",
     "laj_1",
@@ -1107,7 +1108,10 @@ def parse_input_elem(self, page_source, enctypes, context=None):
                 group.file_type_name = elem.get('file_type_name', group.file_type_name)
                 group.default_file_type = elem.get('default_file_type', group.default_file_type)
                 group.metadata_ref = elem.get('metadata_ref', group.metadata_ref)
-                rval[group.file_type_name].refresh_on_change = True
+                try:
+                    rval[group.file_type_name].refresh_on_change = True
+                except KeyError:
+                    pass
                 group_page_source = XmlPageSource(elem)
                 group.inputs = self.parse_input_elem(group_page_source, enctypes, context)
                 rval[group.name] = group
@@ -1658,10 +1662,10 @@ def collect_primary_datasets(self, output, tool_provided_metadata, job_working_d
         """
         return output_collect.collect_primary_datasets(self, output, tool_provided_metadata, job_working_directory, input_ext, input_dbkey=input_dbkey)
 
-    def collect_dynamic_collections(self, output, tool_provided_metadata, **kwds):
-        """ Find files corresponding to dynamically structured collections.
+    def collect_dynamic_outputs(self, output, tool_provided_metadata, **kwds):
+        """Collect dynamic outputs associated with a job from this tool.
         """
-        return output_collect.collect_dynamic_collections(self, output, tool_provided_metadata, **kwds)
+        return output_collect.collect_dynamic_outputs(self, output, tool_provided_metadata, **kwds)
 
     def to_archive(self):
         tool = self