Skip to content

Commit

Permalink
Merge pull request #19022 from wm75/enhance-collection-tools
Browse files Browse the repository at this point in the history
Enhance relabel_from_file to work with any column pair in mapping file
  • Loading branch information
jmchilton authored Nov 12, 2024
2 parents 7b0cf65 + abb8897 commit 6da90b3
Show file tree
Hide file tree
Showing 3 changed files with 82 additions and 21 deletions.
37 changes: 26 additions & 11 deletions lib/galaxy/tools/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -339,6 +339,7 @@ class safe_update(NamedTuple):
"__BUILD_LIST__": safe_update(parse_version("1.0.0"), parse_version("1.1.0")),
"__APPLY_RULES__": safe_update(parse_version("1.0.0"), parse_version("1.1.0")),
"__EXTRACT_DATASET__": safe_update(parse_version("1.0.0"), parse_version("1.0.1")),
"__RELABEL_FROM_FILE__": safe_update(parse_version("1.0.0"), parse_version("1.1.0")),
"Grep1": safe_update(parse_version("1.0.1"), parse_version("1.0.4")),
"Show beginning1": safe_update(parse_version("1.0.0"), parse_version("1.0.2")),
"Show tail1": safe_update(parse_version("1.0.0"), parse_version("1.0.1")),
Expand Down Expand Up @@ -3983,24 +3984,38 @@ def add_copied_value_to_new_elements(new_label, dce_object):
new_labels = fh.readlines(1024 * 1000000)
if strict and len(hdca.collection.elements) != len(new_labels):
raise exceptions.MessageException("Relabel mapping file contains incorrect number of identifiers")
if how_type == "tabular":
# We have a tabular file, where the first column is an existing element identifier,
# and the second column is the new element identifier.
if how_type in ["tabular", "tabular_extended"]:
# We have a tabular file, where one column lists existing element identifiers,
# another one the corresponding new element identifiers.
# In tabular_extended mode the two columns ("from" and "to") are user-specified,
# while in simple tabular mode they default to the first and second column and
# these must be the only two columns in the input.
from_index = int(incoming["how"].get("from", 1)) - 1
to_index = int(incoming["how"].get("to", 2)) - 1
if from_index < 0 or to_index < 0:
raise exceptions.MessageException(
"Column < 1 specified for relabel mapping file. Column count starts at 1."
)
new_labels_dict = {}
source_new_label = (line.strip().split("\t") for line in new_labels)
for i, label_pair in enumerate(source_new_label):
if not len(label_pair) == 2:
raise exceptions.MessageException(
f"Relabel mapping file line {i + 1} contains {len(label_pair)} columns, but 2 are required"
)
new_labels_dict[label_pair[0]] = label_pair[1]
try:
for i, line in enumerate(new_labels, 1):
cols = line.strip().split("\t")
if how_type == "tabular" and len(cols) != 2:
raise exceptions.MessageException(
f"Relabel mapping file contains {len(cols)} columns on line {i}, but 2 are required"
)
new_labels_dict[cols[from_index]] = cols[to_index]
except IndexError:
raise exceptions.MessageException(
f"Specified column number > number of columns [{len(cols)}] on line {i} of relabel mapping file."
)
for dce in hdca.collection.elements:
dce_object = dce.element_object
element_identifier = dce.element_identifier
default = None if strict else element_identifier
new_label = new_labels_dict.get(element_identifier, default)
if not new_label:
raise exceptions.MessageException(f"Failed to find new label for identifier [{element_identifier}]")
raise exceptions.MessageException(f"Failed to find original identifier [{element_identifier}]")
add_copied_value_to_new_elements(new_label, dce_object)
else:
# If new_labels_dataset_assoc is not a two-column tabular dataset we label with the current line of the dataset
Expand Down
65 changes: 55 additions & 10 deletions lib/galaxy/tools/relabel_from_file.xml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
<tool id="__RELABEL_FROM_FILE__"
name="Relabel identifiers"
version="1.0.0"
version="1.1.0"
tool_type="relabel_from_file">
<description></description>
<type class="RelabelFromFileTool" module="galaxy.tools" />
Expand All @@ -13,16 +13,23 @@
<param type="data_collection" name="input" label="Input Collection" help="collection to change identifiers in"/>
<conditional name="how">
<param type="select" name="how_select" label="How should the new labels be specified?">
<option value="txt">Using lines in a simple text file.</option>
<option value="tabular">Map original identifiers to new ones using a two column table.</option>
<option value="txt">Use lines in a simple text file as new identifiers.</option>
<option value="tabular">Map original identifiers to new ones using a two-column table.</option>
<option value="tabular_extended">Use any two columns of a table for identifier mapping.</option>
</param>
<when value="txt">
<param type="data" name="labels" format="txt" label="New Identifiers" />
<param name="strict" type="boolean" label="Ensure strict mapping" help="If selected, the target file must contain exactly the correct number of lines." truevalue="true" falsevalue="false" />
<param name="strict" type="boolean" label="Ensure strict mapping" help="If selected, the text file must contain exactly the correct number of lines." truevalue="true" falsevalue="false" />
</when>
<when value="tabular">
<param type="data" name="labels" format="tabular" label="New Identifiers" />
<param name="strict" type="boolean" label="Ensure strict mapping" help="If checked, the target file must contain exactly the correct number of lines and each input identifier must match exactly one element of the input collection." truevalue="true" falsevalue="false" />
<param type="data" name="labels" format="tabular" label="Identifier mapping" />
<param name="strict" type="boolean" label="Ensure strict mapping" help="If checked, the table must contain exactly the correct number of lines and each original identifier must match exactly one element of the input collection." truevalue="true" falsevalue="false" />
</when>
<when value="tabular_extended">
<param type="data" name="labels" format="tabular" label="Identifier mapping" />
<param name="from" type="data_column" data_ref="labels" value="1" label="Column with current identifiers"/>
<param name="to" type="data_column" data_ref="labels" value="2" label="Column with new identifiers"/>
<param name="strict" type="boolean" label="Ensure strict mapping" help="If checked, the table must contain exactly the correct number of lines and each original identifier must match exactly one element of the input collection." truevalue="true" falsevalue="false" />
</when>
</conditional>
</inputs>
Expand Down Expand Up @@ -87,6 +94,37 @@
</element>
</output_collection>
</test>
<!-- test non-default columns -->
<test>
<param name="input">
<collection type="list:paired">
<element name="i1">
<collection type="paired">
<element name="forward" value="simple_line.txt" />
<element name="reverse" value="simple_line_alternative.txt" />
</collection>
</element>
</collection>
</param>
<param name="how_select" value="tabular_extended" />
<param name="labels" value="new_labels_3.txt" ftype="tabular" />
<param name="from" value="3" />
<param name="to" value="1" />
<output_collection name="output" type="list:paired">
<element name="new_i1">
<element name="forward">
<assert_contents>
<has_text_matching expression="^This is a line of text.\n$" />
</assert_contents>
</element>
<element name="reverse">
<assert_contents>
<has_text_matching expression="^This is a different line of text.\n$" />
</assert_contents>
</element>
</element>
</output_collection>
</test>
<!-- test strict -->
<test expect_failure="true">
<param name="input">
Expand Down Expand Up @@ -147,9 +185,9 @@ Changes identifiers of datasets within a collection using identifiers from a sup
Description
===========
New identifiers can be supplied as either a simple list or a tab-delimited file mapping old identifier to the new ones. This is controlled using **How should the new identifiers be specified?** drop-down:
New identifiers can be supplied as either a simple list or a tab-delimited file mapping old identifiers to new ones. This is controlled using **How should the new identifiers be specified?** drop-down:
**Using lines in a simple text file**
**Use lines in a simple text file as new identifiers**
Given a collection::
Expand All @@ -175,15 +213,15 @@ the tool will return::
-------
**Map original identifiers to new ones using a two column table**
**Map original identifiers to new ones using a two-column table**
Given a collection::
Collection: [Dataset A]
[Dataset B]
[Dataset X]
and a simple text file (you can see that entries do not have to be in order here)::
and a simple tabular file (you can see that entries do not have to be in order here)::
B Beta
X Gamma
Expand All @@ -195,6 +233,13 @@ the tool will return::
[Dataset Beta]
[Dataset Gamma]
-------
**Map original identifiers to new ones using a two-column table**
This mode works exactly as the previous one, but the tabular mapping file is allowed to have more than two columns, and you can specify which of them holds the original and new element identifiers, respectively.
-------
.. class:: warningmark
Expand Down
1 change: 1 addition & 0 deletions test-data/new_labels_3.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
new_i1 to_ignore i1 to_ignore

0 comments on commit 6da90b3

Please sign in to comment.