galaxyproject · jmchilton · Jun 26, 2017 · Jun 25, 2017 · Jun 26, 2017 · Jun 26, 2017
diff --git a/.ci/flake8_lint_include_list.txt b/.ci/flake8_lint_include_list.txt
@@ -62,6 +62,7 @@ lib/galaxy/datatypes/metadata.py
 lib/galaxy/datatypes/msa.py
 lib/galaxy/datatypes/ngsindex.py
 lib/galaxy/datatypes/proteomics.py
+lib/galaxy/datatypes/registry.py
 lib/galaxy/datatypes/sequence.py
 lib/galaxy/datatypes/tabular.py
 lib/galaxy/datatypes/text.py

diff --git a/.ci/py3_sources.txt b/.ci/py3_sources.txt
@@ -20,6 +20,7 @@ lib/galaxy/datatypes/interval.py
 lib/galaxy/datatypes/msa.py
 lib/galaxy/datatypes/ngsindex.py
 lib/galaxy/datatypes/proteomics.py
+lib/galaxy/datatypes/registry.py
 lib/galaxy/datatypes/sequence.py
 lib/galaxy/datatypes/sniff.py
 lib/galaxy/datatypes/tabular.py

diff --git a/lib/galaxy/datatypes/data.py b/lib/galaxy/datatypes/data.py
@@ -74,6 +74,7 @@ class Data( object ):
     """
     edam_data = "data_0006"
     edam_format = "format_1915"
+    file_ext = 'data'
     # Data is not chunkable by default.
     CHUNKABLE = False
 
@@ -682,7 +683,7 @@ def matches_any( self, target_datatypes ):
         Check if this datatype is of any of the target_datatypes or is
         a subtype thereof.
         """
-        datatype_classes = tuple( [ datatype if isclass( datatype ) else datatype.__class__ for datatype in target_datatypes ] )
+        datatype_classes = tuple( datatype if isclass( datatype ) else datatype.__class__ for datatype in target_datatypes )
         return isinstance( self, datatype_classes )
 
     def merge( split_files, output_file):

diff --git a/lib/galaxy/datatypes/registry.py b/lib/galaxy/datatypes/registry.py
@@ -3,28 +3,30 @@
 """
 from __future__ import absolute_import
 
-import os
 import imp
 import logging
+import os
 import tempfile
-
-import yaml
 from collections import OrderedDict as odict
 from xml.etree.ElementTree import Element
 
+import yaml
+
 import galaxy.util
 
-from . import data
-from . import tabular
-from . import interval
-from . import images
-from . import sequence
-from . import qualityscore
-from . import xml
-from . import coverage
-from . import tracks
-from . import binary
-from . import text
+from . import (
+    binary,
+    coverage,
+    data,
+    images,
+    interval,
+    qualityscore,
+    sequence,
+    tabular,
+    text,
+    tracks,
+    xml
+)
 from .display_applications.application import DisplayApplication
 
 
@@ -493,11 +495,11 @@ def get_mimetype_by_extension( self, ext, default='application/octet-stream' ):
         return mimetype
 
     def get_datatype_by_extension( self, ext ):
-        """Returns a datatype based on an extension"""
+        """Returns a datatype object based on an extension"""
         try:
             builder = self.datatypes_by_extension[ ext ]
         except KeyError:
-            builder = data.Text()
+            builder = None
         return builder
 
     def change_datatype( self, data, ext ):
@@ -635,7 +637,7 @@ def load_display_applications( self, app, installed_repository_dict=None, deacti
                     else:
                         self.log.exception( "Error loading display application (%s)" % config_path )
         # Handle display_application subclass inheritance.
-        for extension, d_type1 in self.datatypes_by_extension.iteritems():
+        for extension, d_type1 in self.datatypes_by_extension.items():
             for d_type2, display_app in self.inherit_display_application_by_class:
                 current_app = d_type1.get_display_application( display_app.id, None )
                 if current_app is None and isinstance( d_type1, type( d_type2 ) ):
@@ -794,10 +796,10 @@ def get_converters_by_datatype( self, ext ):
         """Returns available converters by source type"""
         converters = odict()
         source_datatype = type( self.get_datatype_by_extension( ext ) )
-        for ext2, dict in self.datatype_converters.items():
+        for ext2, converters_dict in self.datatype_converters.items():
             converter_datatype = type( self.get_datatype_by_extension( ext2 ) )
             if issubclass( source_datatype, converter_datatype ):
-                converters.update( dict )
+                converters.update( converters_dict )
         # Ensure ext-level converters are present
         if ext in self.datatype_converters.keys():
             converters.update( self.datatype_converters[ ext ] )
@@ -813,7 +815,10 @@ def get_converter_by_target_type( self, source_ext, target_ext ):
     def find_conversion_destination_for_dataset_by_extensions( self, dataset, accepted_formats, converter_safe=True ):
         """Returns ( target_ext, existing converted dataset )"""
         for convert_ext in self.get_converters_by_datatype( dataset.ext ):
-            if self.get_datatype_by_extension( convert_ext ).matches_any( accepted_formats ):
+            convert_ext_datatype = self.get_datatype_by_extension( convert_ext )
+            if convert_ext_datatype is None:
+                self.log.warning("Datatype class not found for extension '%s', which is used as target for conversion from datatype '%s'" % (convert_ext, dataset.ext))
+            elif convert_ext_datatype.matches_any( accepted_formats ):
                 converted_dataset = dataset.get_converted_files_by_type( convert_ext )
                 if converted_dataset:
                     ret_data = converted_dataset
@@ -825,14 +830,14 @@ def find_conversion_destination_for_dataset_by_extensions( self, dataset, accept
         return ( None, None )
 
     def get_composite_extensions( self ):
-        return [ ext for ( ext, d_type ) in self.datatypes_by_extension.iteritems() if d_type.composite_type is not None ]
+        return [ ext for ( ext, d_type ) in self.datatypes_by_extension.items() if d_type.composite_type is not None ]
 
     def get_upload_metadata_params( self, context, group, tool ):
         """Returns dict of case value:inputs for metadata conditional for upload tool"""
         rval = {}
-        for ext, d_type in self.datatypes_by_extension.iteritems():
+        for ext, d_type in self.datatypes_by_extension.items():
             inputs = []
-            for meta_name, meta_spec in d_type.metadata_spec.iteritems():
+            for meta_name, meta_spec in d_type.metadata_spec.items():
                 if meta_spec.set_in_upload:
                     help_txt = meta_spec.desc
                     if not help_txt or help_txt == meta_name:

diff --git a/lib/galaxy/model/__init__.py b/lib/galaxy/model/__init__.py
@@ -1947,7 +1947,14 @@ def extra_files_path( self ):
 
     @property
     def datatype( self ):
-        return _get_datatypes_registry().get_datatype_by_extension( self.extension )
+        extension = self.extension
+        if not extension or extension == 'auto' or extension == '_sniff_':
+            extension = 'data'
+        ret = _get_datatypes_registry().get_datatype_by_extension( extension )
+        if ret is None:
+            log.warning("Datatype class not found for extension '%s'" % extension)
+            return _get_datatypes_registry().get_datatype_by_extension( 'data' )
+        return ret
 
     def get_metadata( self ):
         # using weakref to store parent (to prevent circ ref),

diff --git a/lib/galaxy/tools/parameters/basic.py b/lib/galaxy/tools/parameters/basic.py
@@ -75,7 +75,7 @@ def __init__( self, tool, input_source, context=None ):
         self.tool = tool
         self.refresh_on_change_values = []
         self.argument = input_source.get( "argument" )
-        self.name = ToolParameter.parse_name( input_source )
+        self.name = self.__class__.parse_name( input_source )
         self.type = input_source.get( "type" )
         self.hidden = input_source.get( "hidden", False )
         self.refresh_on_change = input_source.get_bool( "refresh_on_change", False )
@@ -227,8 +227,8 @@ def build( cls, tool, param ):
         else:
             return parameter_types[ param_type ]( tool, param )
 
-    @classmethod
-    def parse_name( cls, input_source ):
+    @staticmethod
+    def parse_name( input_source ):
         name = input_source.get( 'name' )
         if name is None:
             argument = input_source.get( 'argument' )
@@ -1428,12 +1428,16 @@ def _datatypes_registery( self, trans, tool ):
     def _parse_formats( self, trans, tool, input_source ):
         datatypes_registry = self._datatypes_registery( trans, tool )
 
-        # Build tuple of classes for supported data formats
-        formats = []
+        # Build list of classes for supported data formats
         self.extensions = input_source.get( 'format', 'data' ).split( "," )
         normalized_extensions = [extension.strip().lower() for extension in self.extensions]
+        formats = []
         for extension in normalized_extensions:
-            formats.append( datatypes_registry.get_datatype_by_extension( extension ) )
+            datatype = datatypes_registry.get_datatype_by_extension(extension)
+            if datatype is not None:
+                formats.append(datatype)
+            else:
+                log.warning("Datatype class not found for extension '%s', which is used in the 'format' attribute of parameter '%s'" % (extension, self.name))
         self.formats = formats
 
     def _parse_options( self, input_source ):
@@ -1566,10 +1570,12 @@ def __init__( self, tool, input_source, trans=None):
         self._parse_options( input_source )
         # Load conversions required for the dataset input
         self.conversions = []
-        for name, conv_extensions in input_source.parse_conversion_tuples():
-            assert None not in [ name, conv_extensions ], 'A name (%s) and type (%s) are required for explicit conversion' % ( name, conv_extensions )
-            conv_types = [ tool.app.datatypes_registry.get_datatype_by_extension( conv_extensions.lower() ) ]
-            self.conversions.append( ( name, conv_extensions, conv_types ) )
+        for name, conv_extension in input_source.parse_conversion_tuples():
+            assert None not in [ name, conv_extension ], 'A name (%s) and type (%s) are required for explicit conversion' % ( name, conv_extension )
+            conv_type = tool.app.datatypes_registry.get_datatype_by_extension( conv_extension.lower() )
+            if conv_type is None:
+                raise ValueError("Datatype class not found for extension '%s', which is used as 'type' attribute in conversion of data parameter '%s'" % (conv_type, self.name))
+            self.conversions.append( ( name, conv_extension, [conv_type] ) )
 
     def match_collections( self, history, dataset_matcher, reduction=True ):
         dataset_collection_matcher = DatasetCollectionMatcher( dataset_matcher )

diff --git a/lib/galaxy/tools/wrappers.py b/lib/galaxy/tools/wrappers.py
@@ -246,7 +246,13 @@ def is_collection( self ):
         return False
 
     def is_of_type( self, *exts ):
-        datatypes = [ self.datatypes_registry.get_datatype_by_extension( e ) for e in exts ]
+        datatypes = []
+        for e in exts:
+            datatype = self.datatypes_registry.get_datatype_by_extension(e)
+            if datatype is not None:
+                datatypes.append(datatype)
+            else:
+                log.warning("Datatype class not found for extension '%s', which is used as parameter of 'is_of_type()' method" % (e))
         return self.dataset.datatype.matches_any( datatypes )
 
     def __str__( self ):

diff --git a/test/functional/tools/sample_datatypes_conf.xml b/test/functional/tools/sample_datatypes_conf.xml
@@ -20,5 +20,6 @@
     <datatype extension="biom1" type="galaxy.datatypes.text:Biom1" display_in_upload="True" subclass="True" mimetype="application/json"/>
 	<datatype extension="bed" type="galaxy.datatypes.interval:Bed" display_in_upload="true" description="BED format provides a flexible way to define the data lines that are displayed in an annotation track. BED lines have three required columns and nine additional optional columns. The three required columns are chrom, chromStart and chromEnd." description_url="https://galaxyproject.org/learn/datatypes/#bed">
     </datatype>
+    <datatype extension="xml" type="galaxy.datatypes.xml:GenericXml" mimetype="application/xml" display_in_upload="true"/>
   </registration>
 </datatypes>