Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix Registry.get_datatype_by_extension() to return None if ext is unknown #4224

Merged
merged 3 commits into from
Jun 26, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .ci/flake8_lint_include_list.txt
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@ lib/galaxy/datatypes/metadata.py
lib/galaxy/datatypes/msa.py
lib/galaxy/datatypes/ngsindex.py
lib/galaxy/datatypes/proteomics.py
lib/galaxy/datatypes/registry.py
lib/galaxy/datatypes/sequence.py
lib/galaxy/datatypes/tabular.py
lib/galaxy/datatypes/text.py
Expand Down
1 change: 1 addition & 0 deletions .ci/py3_sources.txt
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ lib/galaxy/datatypes/interval.py
lib/galaxy/datatypes/msa.py
lib/galaxy/datatypes/ngsindex.py
lib/galaxy/datatypes/proteomics.py
lib/galaxy/datatypes/registry.py
lib/galaxy/datatypes/sequence.py
lib/galaxy/datatypes/sniff.py
lib/galaxy/datatypes/tabular.py
Expand Down
3 changes: 2 additions & 1 deletion lib/galaxy/datatypes/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,7 @@ class Data( object ):
"""
edam_data = "data_0006"
edam_format = "format_1915"
file_ext = 'data'
# Data is not chunkable by default.
CHUNKABLE = False

Expand Down Expand Up @@ -682,7 +683,7 @@ def matches_any( self, target_datatypes ):
Check if this datatype is of any of the target_datatypes or is
a subtype thereof.
"""
datatype_classes = tuple( [ datatype if isclass( datatype ) else datatype.__class__ for datatype in target_datatypes ] )
datatype_classes = tuple( datatype if isclass( datatype ) else datatype.__class__ for datatype in target_datatypes )
return isinstance( self, datatype_classes )

def merge( split_files, output_file):
Expand Down
51 changes: 28 additions & 23 deletions lib/galaxy/datatypes/registry.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,28 +3,30 @@
"""
from __future__ import absolute_import

import os
import imp
import logging
import os
import tempfile

import yaml
from collections import OrderedDict as odict
from xml.etree.ElementTree import Element

import yaml

import galaxy.util

from . import data
from . import tabular
from . import interval
from . import images
from . import sequence
from . import qualityscore
from . import xml
from . import coverage
from . import tracks
from . import binary
from . import text
from . import (
binary,
coverage,
data,
images,
interval,
qualityscore,
sequence,
tabular,
text,
tracks,
xml
)
from .display_applications.application import DisplayApplication


Expand Down Expand Up @@ -493,11 +495,11 @@ def get_mimetype_by_extension( self, ext, default='application/octet-stream' ):
return mimetype

def get_datatype_by_extension( self, ext ):
"""Returns a datatype based on an extension"""
"""Returns a datatype object based on an extension"""
try:
builder = self.datatypes_by_extension[ ext ]
except KeyError:
builder = data.Text()
builder = None
return builder

def change_datatype( self, data, ext ):
Expand Down Expand Up @@ -635,7 +637,7 @@ def load_display_applications( self, app, installed_repository_dict=None, deacti
else:
self.log.exception( "Error loading display application (%s)" % config_path )
# Handle display_application subclass inheritance.
for extension, d_type1 in self.datatypes_by_extension.iteritems():
for extension, d_type1 in self.datatypes_by_extension.items():
for d_type2, display_app in self.inherit_display_application_by_class:
current_app = d_type1.get_display_application( display_app.id, None )
if current_app is None and isinstance( d_type1, type( d_type2 ) ):
Expand Down Expand Up @@ -794,10 +796,10 @@ def get_converters_by_datatype( self, ext ):
"""Returns available converters by source type"""
converters = odict()
source_datatype = type( self.get_datatype_by_extension( ext ) )
for ext2, dict in self.datatype_converters.items():
for ext2, converters_dict in self.datatype_converters.items():
converter_datatype = type( self.get_datatype_by_extension( ext2 ) )
if issubclass( source_datatype, converter_datatype ):
converters.update( dict )
converters.update( converters_dict )
# Ensure ext-level converters are present
if ext in self.datatype_converters.keys():
converters.update( self.datatype_converters[ ext ] )
Expand All @@ -813,7 +815,10 @@ def get_converter_by_target_type( self, source_ext, target_ext ):
def find_conversion_destination_for_dataset_by_extensions( self, dataset, accepted_formats, converter_safe=True ):
"""Returns ( target_ext, existing converted dataset )"""
for convert_ext in self.get_converters_by_datatype( dataset.ext ):
if self.get_datatype_by_extension( convert_ext ).matches_any( accepted_formats ):
convert_ext_datatype = self.get_datatype_by_extension( convert_ext )
if convert_ext_datatype is None:
self.log.warning("Datatype class not found for extension '%s', which is used as target for conversion from datatype '%s'" % (convert_ext, dataset.ext))
elif convert_ext_datatype.matches_any( accepted_formats ):
converted_dataset = dataset.get_converted_files_by_type( convert_ext )
if converted_dataset:
ret_data = converted_dataset
Expand All @@ -825,14 +830,14 @@ def find_conversion_destination_for_dataset_by_extensions( self, dataset, accept
return ( None, None )

def get_composite_extensions( self ):
return [ ext for ( ext, d_type ) in self.datatypes_by_extension.iteritems() if d_type.composite_type is not None ]
return [ ext for ( ext, d_type ) in self.datatypes_by_extension.items() if d_type.composite_type is not None ]

def get_upload_metadata_params( self, context, group, tool ):
"""Returns dict of case value:inputs for metadata conditional for upload tool"""
rval = {}
for ext, d_type in self.datatypes_by_extension.iteritems():
for ext, d_type in self.datatypes_by_extension.items():
inputs = []
for meta_name, meta_spec in d_type.metadata_spec.iteritems():
for meta_name, meta_spec in d_type.metadata_spec.items():
if meta_spec.set_in_upload:
help_txt = meta_spec.desc
if not help_txt or help_txt == meta_name:
Expand Down
9 changes: 8 additions & 1 deletion lib/galaxy/model/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -1947,7 +1947,14 @@ def extra_files_path( self ):

@property
def datatype( self ):
return _get_datatypes_registry().get_datatype_by_extension( self.extension )
extension = self.extension
if not extension or extension == 'auto' or extension == '_sniff_':
extension = 'data'
ret = _get_datatypes_registry().get_datatype_by_extension( extension )
if ret is None:
log.warning("Datatype class not found for extension '%s'" % extension)
return _get_datatypes_registry().get_datatype_by_extension( 'data' )
return ret

def get_metadata( self ):
# using weakref to store parent (to prevent circ ref),
Expand Down
26 changes: 16 additions & 10 deletions lib/galaxy/tools/parameters/basic.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@ def __init__( self, tool, input_source, context=None ):
self.tool = tool
self.refresh_on_change_values = []
self.argument = input_source.get( "argument" )
self.name = ToolParameter.parse_name( input_source )
self.name = self.__class__.parse_name( input_source )
self.type = input_source.get( "type" )
self.hidden = input_source.get( "hidden", False )
self.refresh_on_change = input_source.get_bool( "refresh_on_change", False )
Expand Down Expand Up @@ -227,8 +227,8 @@ def build( cls, tool, param ):
else:
return parameter_types[ param_type ]( tool, param )

@classmethod
def parse_name( cls, input_source ):
@staticmethod
def parse_name( input_source ):
name = input_source.get( 'name' )
if name is None:
argument = input_source.get( 'argument' )
Expand Down Expand Up @@ -1428,12 +1428,16 @@ def _datatypes_registery( self, trans, tool ):
def _parse_formats( self, trans, tool, input_source ):
datatypes_registry = self._datatypes_registery( trans, tool )

# Build tuple of classes for supported data formats
formats = []
# Build list of classes for supported data formats
self.extensions = input_source.get( 'format', 'data' ).split( "," )
normalized_extensions = [extension.strip().lower() for extension in self.extensions]
formats = []
for extension in normalized_extensions:
formats.append( datatypes_registry.get_datatype_by_extension( extension ) )
datatype = datatypes_registry.get_datatype_by_extension(extension)
if datatype is not None:
formats.append(datatype)
else:
log.warning("Datatype class not found for extension '%s', which is used in the 'format' attribute of parameter '%s'" % (extension, self.name))
self.formats = formats

def _parse_options( self, input_source ):
Expand Down Expand Up @@ -1566,10 +1570,12 @@ def __init__( self, tool, input_source, trans=None):
self._parse_options( input_source )
# Load conversions required for the dataset input
self.conversions = []
for name, conv_extensions in input_source.parse_conversion_tuples():
assert None not in [ name, conv_extensions ], 'A name (%s) and type (%s) are required for explicit conversion' % ( name, conv_extensions )
conv_types = [ tool.app.datatypes_registry.get_datatype_by_extension( conv_extensions.lower() ) ]
self.conversions.append( ( name, conv_extensions, conv_types ) )
for name, conv_extension in input_source.parse_conversion_tuples():
assert None not in [ name, conv_extension ], 'A name (%s) and type (%s) are required for explicit conversion' % ( name, conv_extension )
conv_type = tool.app.datatypes_registry.get_datatype_by_extension( conv_extension.lower() )
if conv_type is None:
raise ValueError("Datatype class not found for extension '%s', which is used as 'type' attribute in conversion of data parameter '%s'" % (conv_type, self.name))
self.conversions.append( ( name, conv_extension, [conv_type] ) )

def match_collections( self, history, dataset_matcher, reduction=True ):
dataset_collection_matcher = DatasetCollectionMatcher( dataset_matcher )
Expand Down
8 changes: 7 additions & 1 deletion lib/galaxy/tools/wrappers.py
Original file line number Diff line number Diff line change
Expand Up @@ -246,7 +246,13 @@ def is_collection( self ):
return False

def is_of_type( self, *exts ):
datatypes = [ self.datatypes_registry.get_datatype_by_extension( e ) for e in exts ]
datatypes = []
for e in exts:
datatype = self.datatypes_registry.get_datatype_by_extension(e)
if datatype is not None:
datatypes.append(datatype)
else:
log.warning("Datatype class not found for extension '%s', which is used as parameter of 'is_of_type()' method" % (e))
return self.dataset.datatype.matches_any( datatypes )

def __str__( self ):
Expand Down
1 change: 1 addition & 0 deletions test/functional/tools/sample_datatypes_conf.xml
Original file line number Diff line number Diff line change
Expand Up @@ -20,5 +20,6 @@
<datatype extension="biom1" type="galaxy.datatypes.text:Biom1" display_in_upload="True" subclass="True" mimetype="application/json"/>
<datatype extension="bed" type="galaxy.datatypes.interval:Bed" display_in_upload="true" description="BED format provides a flexible way to define the data lines that are displayed in an annotation track. BED lines have three required columns and nine additional optional columns. The three required columns are chrom, chromStart and chromEnd." description_url="https://galaxyproject.org/learn/datatypes/#bed">
</datatype>
<datatype extension="xml" type="galaxy.datatypes.xml:GenericXml" mimetype="application/xml" display_in_upload="true"/>
</registration>
</datatypes>