Skip to content

Commit

Permalink
fast5 to pod5 conversion
Browse files Browse the repository at this point in the history
  • Loading branch information
TomHarrop committed Jun 17, 2024
1 parent 604d9d9 commit ce00a56
Show file tree
Hide file tree
Showing 5 changed files with 111 additions and 5 deletions.
23 changes: 23 additions & 0 deletions tools/dorado/.shed.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
---
auto_tool_repositories:
name_template: "{{ tool_id }}"
description_template: "{{ tool_name }} from the dorado suite"
categories:
- Sequence Analysis
description: Dorado is a high-performance, easy-to-use, open source basecaller for Oxford Nanopore reads.
exclude:
- tool_test_output.html
- tool_test_output.json
homepage_url: https://github.com/nanoporetech/dorado
long_description: >
Dorado is a high-performance, easy-to-use, open source basecaller for Oxford Nanopore reads.
name: dorado
owner: galaxy-australia
remote_repository_url: https://github.com/usegalaxy-au/tools-au/tree/main/tools/dorado
suite:
name: suite_dorado
description: >
Dorado is a high-performance, easy-to-use, open source basecaller for Oxford Nanopore reads.
long_description: >
Dorado is a high-performance, easy-to-use, open source basecaller for Oxford Nanopore reads.
type: unrestricted
62 changes: 58 additions & 4 deletions tools/dorado/dorado.xml
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,8 @@ calls.bam
]]></command>
<inputs>
<param name="pod5_file" type="data" format="fast5" label="Raw fast5 file"/>
<param name="model" type="select" label="Basecalling model. See the Help section for info on model names." >
<param name="pod5_file" type="data" format="fast5" label="Raw pod5 file" help="Only pod5 is supported. You can convert fast5 to pod5 with the fast5_to_pod5 tool."/>
<param name="model" type="select" label="Basecalling model. See the Help section for info on model names.">
<options from_data_table="dorado_models">
<!-- only allow models that shipped in this container -->
<filter type="static_value" column="1" value="@CONTAINER_HASH@"/>
Expand All @@ -40,6 +40,43 @@ calls.bam
</param>
<param type="select" argument="--kit-name" optional="true" label="Enable barcoding with the selected kit name." help="Reads are classified into their barcode groups during basecalling. The classification will be reflected in the read group name as well as in the BC tag of the output record.">
<option value="EXP-NBD103">EXP-NBD103</option>
<option value="EXP-NBD104">EXP-NBD104</option>
<option value="EXP-NBD114">EXP-NBD114</option>
<option value="EXP-NBD196">EXP-NBD196</option>
<option value="EXP-PBC001">EXP-PBC001</option>
<option value="EXP-PBC096">EXP-PBC096</option>
<option value="SQK-16S024">SQK-16S024</option>
<option value="SQK-16S114-24">SQK-16S114-24</option>
<option value="SQK-LWB001">SQK-LWB001</option>
<option value="SQK-MLK111-96-XL">SQK-MLK111-96-XL</option>
<option value="SQK-MLK114-96-XL">SQK-MLK114-96-XL</option>
<option value="SQK-NBD111-24">SQK-NBD111-24</option>
<option value="SQK-NBD111-96">SQK-NBD111-96</option>
<option value="SQK-NBD114-24">SQK-NBD114-24</option>
<option value="SQK-NBD114-96">SQK-NBD114-96</option>
<option value="SQK-PBK004">SQK-PBK004</option>
<option value="SQK-PCB109">SQK-PCB109</option>
<option value="SQK-PCB110">SQK-PCB110</option>
<option value="SQK-PCB111-24">SQK-PCB111-24</option>
<option value="SQK-PCB114-24">SQK-PCB114-24</option>
<option value="SQK-RAB201">SQK-RAB201</option>
<option value="SQK-RAB204">SQK-RAB204</option>
<option value="SQK-RBK001">SQK-RBK001</option>
<option value="SQK-RBK004">SQK-RBK004</option>
<option value="SQK-RBK110-96">SQK-RBK110-96</option>
<option value="SQK-RBK111-24">SQK-RBK111-24</option>
<option value="SQK-RBK111-96">SQK-RBK111-96</option>
<option value="SQK-RBK114-24">SQK-RBK114-24</option>
<option value="SQK-RBK114-96">SQK-RBK114-96</option>
<option value="SQK-RLB001">SQK-RLB001</option>
<option value="SQK-RPB004">SQK-RPB004</option>
<option value="SQK-RPB114-24">SQK-RPB114-24</option>
<option value="TWIST-16-UDI">TWIST-16-UDI</option>
<option value="TWIST-96A-UDI">TWIST-96A-UDI</option>
<option value="VSK-PTC001">VSK-PTC001</option>
<option value="VSK-VMK001">VSK-VMK001</option>
<option value="VSK-VMK004">VSK-VMK004</option>
<option value="VSK-VPS001">VSK-VPS001</option>
</param>
</inputs>
<outputs>
Expand Down Expand Up @@ -77,13 +114,30 @@ calls.bam
</assert_contents>
</output>
</test>
<!-- TODO: need a test for barcoded data. -->
<test expect_num_outputs="2">
<param name="pod5_file" value="FAL00375_473bf0ed_0.ten_reads.pod5"/>
<param name="model" value="dna_r9.4.1_e8_fast@v3.4"/>
<param name="trim" value="adapters"/>
<param name="kit_name" value="EXP-NBD103"/>
<output name="out_bam">
<assert_contents>
<has_size value="60692" delta="6000"/>
</assert_contents>
</output>
<output name="out_tsv">
<assert_contents>
<has_size value="1651" delta="200"/>
</assert_contents>
</output>
</test>
</tests>
<help><![CDATA[
Basecall raw Nanopore data using Oxford Nanopore’s open source
`dorado <https://github.com/nanoporetech/dorado/>`__ basecaller.
The input is ``pod5`` format. If you have older data in fast5 format,
you can convert them using the pod5 convert tool.
The input is pod5 format. If you have older data in fast5 format, you
can convert them using the ``fast5 to pod5`` convert tool.
Basecalling models
------------------
Expand Down
29 changes: 29 additions & 0 deletions tools/dorado/dorado_pod5_convert.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
<tool id="dorado_pod5_convert" name="fast5 to pod5" description="converter for raw Oxford Nanopore data" version="@VERSION@+galaxy0" python_template_version="3.5" profile="21.05">
<macros>
<import>macros.xml</import>
</macros>
<expand macro="xrefs"/>
<expand macro="requirements"/>
<command><![CDATA[
ln -s '$fast5_in' ./input.fast5 &&
pod5 convert fast5 input.fast5 --output output.pod5
]]>
</command>
<inputs>
<param name="fast5_in" type="data" format="fast5" label="Oxford Nanopore raw data in fast5 format"/>
</inputs>
<outputs>
<data format="fast5" name="pod5_out" label="Oxford Nanopore raw data in in pod5 format" from_work_dir="output.pod5"/>
</outputs>
<tests>
<test expect_num_outputs="1">
<param name="fast5_in" value="FAL00375_473bf0ed_0.ten_reads.0_0.fast5"/>
<output name="pod5_out" file="convert.test1.pod5"/>
</test>
</tests>
<help><![CDATA[
Convert fast5 to `pod5 <https://github.com/nanoporetech/pod5-file-format>`__ for basecalling with Dorado.
]]></help>
</tool>
2 changes: 1 addition & 1 deletion tools/dorado/macros.xml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
<macros>
<!-- UPDATING: pull the latest container and check the version. Update both tokens. You MUST also update the model list. See README.md for more. -->
<token name="@VERSION@">0.7.1</token>
<token name="@VERSION@">0.7.1+80da5f5</token>
<token name="@CONTAINER_HASH@">1c65eb070a9fc1d88710c4dc09b06541f96fdd28</token>
<xml name="requirements">
<requirements>
Expand Down
Binary file not shown.

0 comments on commit ce00a56

Please sign in to comment.