From 2288a88ab793198147fdb5a1050d0325a831d9f8 Mon Sep 17 00:00:00 2001 From: John Chilton Date: Mon, 17 Apr 2017 13:12:57 -0400 Subject: [PATCH] Fix relabel_from_file collection operation error handling if duplicate identifiers. --- lib/galaxy/tools/__init__.py | 20 ++++++++++++-------- lib/galaxy/tools/relabel_from_file.xml | 11 +++++++++++ test-data/new_labels_bad_2.txt | 2 ++ 3 files changed, 25 insertions(+), 8 deletions(-) create mode 100644 test-data/new_labels_bad_2.txt diff --git a/lib/galaxy/tools/__init__.py b/lib/galaxy/tools/__init__.py index 1d4861face62..e442812a3e94 100755 --- a/lib/galaxy/tools/__init__.py +++ b/lib/galaxy/tools/__init__.py @@ -2515,6 +2515,16 @@ def produce_outputs(self, trans, out_data, output_collections, incoming, history new_labels_dataset_assoc = incoming["how"]["labels"] strict = string_as_bool(incoming["how"]["strict"]) new_elements = odict() + + def add_copied_value_to_new_elements(new_label, dce_object): + new_label = new_label.strip() + if new_label in new_elements: + raise Exception("New identifier [%s] appears twice in resulting collection, these values must be unique." % new_label) + copied_value = dce_object.copy() + if getattr(copied_value, "history_content_type", None) == "dataset": + history.add_dataset(copied_value, set_hid=False) + new_elements[new_label] = copied_value + new_labels_path = new_labels_dataset_assoc.file_name new_labels = open(new_labels_path, "r").readlines(1024 * 1000000) if strict and len(hdca.collection.elements) != len(new_labels): @@ -2531,18 +2541,12 @@ def produce_outputs(self, trans, out_data, output_collections, incoming, history new_label = new_labels_dict.get(element_identifier, default) if not new_label: raise Exception("Failed to find new label for identifier [%s]" % element_identifier) - copied_value = dce_object.copy() - if getattr(copied_value, "history_content_type", None) == "dataset": - history.add_dataset(copied_value, set_hid=False) - new_elements[new_label] = copied_value + add_copied_value_to_new_elements(new_label, dce_object) else: # If new_labels_dataset_assoc is not a two-column tabular dataset we label with the current line of the dataset for i, dce in enumerate(hdca.collection.elements): dce_object = dce.element_object - copied_value = dce_object.copy() - if getattr(copied_value, "history_content_type", None) == "dataset": - history.add_dataset(copied_value, set_hid=False) - new_elements[new_labels[i].strip()] = copied_value + add_copied_value_to_new_elements(new_labels[i], dce_object) for key in new_elements.keys(): if not re.match("^[\w\-_]+$", key): raise Exception("Invalid new colleciton identifier [%s]" % key) diff --git a/lib/galaxy/tools/relabel_from_file.xml b/lib/galaxy/tools/relabel_from_file.xml index 8d37e671c7c4..ca806b3b5ec5 100644 --- a/lib/galaxy/tools/relabel_from_file.xml +++ b/lib/galaxy/tools/relabel_from_file.xml @@ -119,6 +119,17 @@ + + + + + + + + + + + This tool will take an input list and a text file with new identifiers diff --git a/test-data/new_labels_bad_2.txt b/test-data/new_labels_bad_2.txt new file mode 100644 index 000000000000..591022662d72 --- /dev/null +++ b/test-data/new_labels_bad_2.txt @@ -0,0 +1,2 @@ +newi1 +newi1 \ No newline at end of file