Skip to content

Commit

Permalink
Merge pull request #136 from TomHarrop/plot_ragtag_paf
Browse files Browse the repository at this point in the history
Plot ragtag paf
  • Loading branch information
TomHarrop authored Sep 25, 2024
2 parents 6c1d2ac + e8829cf commit 26f6433
Show file tree
Hide file tree
Showing 6 changed files with 505 additions and 0 deletions.
12 changes: 12 additions & 0 deletions tools/plot_ragtag_paf/.shed.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
categories:
- Assembly
- Visualization
description: Plot RagTag output to compare query contigs against the reference
exclude:
- tool_test_output.html
- tool_test_output.json
homepage_url: http://github.com/usegalaxy-au/tools-au
long_description: Plot RagTag output to compare query contigs against the reference
name: plot_ragtag_paf
owner: galaxy-australia
remote_repository_url: https://github.com/usegalaxy-au/tools-au
257 changes: 257 additions & 0 deletions tools/plot_ragtag_paf/plot_ragtag_paf.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,257 @@
#!/usr/bin/env Rscript

options(
show.error.messages = FALSE,
error = function() {
cat(geterrmessage(), file = stderr())
q("no", 1, FALSE)
}
)

library(data.table)
library(ggplot2)
library(pafr)
library(viridisLite)
library(yaml)

args <- commandArgs(trailingOnly = TRUE)

#############
# FUNCTIONS #
#############


# Adjust the PAF coordinates to the continuous x-axis
adjust_coords <- function(qname, qstart, qend, tname, tstart, tend) {
my_qstart <- lookup_qstart(qname)
my_tstart <- lookup_tstart(tname)
return(
data.table(
adj_qstart = qstart + my_qstart,
adj_qend = qend + my_qstart,
adj_tstart = tstart + my_tstart,
adj_tend = tend + my_tstart
)
)
}

# calculate spacing between contigs so the ref and query take up the same x-axis
# space
get_padding <- function(paf) {
tlen_sum <- unique(paf, by = "tname")[, sum(tlen)]
tgaps_total <- paf[, length(unique(tname))]

qlen_sum <- unique(paf, by = "qname")[, sum(qlen)]
qgaps_total <- paf[, length(unique(qname))]

# the minumum gap is going to be one tenth of the x-axis space
if (tlen_sum > qlen_sum) {
min_gap <- (tlen_sum * gap_size) / tgaps_total
full_tlen <- tlen_sum + (tgaps_total * min_gap)

total_qpadding <- full_tlen - qlen_sum
return(
c(
"t_padding" = min_gap,
"q_padding" = total_qpadding / qgaps_total
)
)
} else if (tlen_sum < qlen_sum) {
min_gap <- (qlen_sum * gap_size) / qgaps_total
full_qlen <- qlen_sum + (qgaps_total * min_gap)

total_tpadding <- full_qlen - tlen_sum

return(
c(
"t_padding" = total_tpadding / tgaps_total,
"q_padding" = min_gap
)
)
} else {
min_gap <- (tlen_sum * gap_size) / tgaps_total
return(
c(
"t_padding" = min_gap,
"q_padding" = min_gap
)
)
}
}

# offsets for the adjusted coordinates
lookup_tstart <- function(x) {
return(unique(tstarts[tname == x, shift_tstart]))
}


lookup_qstart <- function(x) {
return(unique(qstarts[qname == x, shift_qstart]))
}


###########
# GLOBALS #
###########

# PAF column spec
sort_columns <- c("tname", "tstart", "tend", "qname", "qstart", "qend")

config_file <- args[1]
config <- yaml.load_file(config_file)
typed_config <- lapply(config, type.convert, as.is = TRUE)
list2env(typed_config, envir = .GlobalEnv)

########
# MAIN #
########

# read the data
agp <- fread(agp_file, fill = TRUE, skip = 2)[!V5 %in% c("N", "U")]
raw_paf <- read_paf(paf_file)
paf_dt <- data.table(raw_paf)

# calculate spacing
padding <- get_padding(paf_dt[tp == "P" & nmatch >= min_nmatch])

# order the reference contigs
paf_dt[, tname := factor(tname, levels = gtools::mixedsort(unique(tname)))]
setkeyv(paf_dt, cols = sort_columns)

# generate continuous reference coordinates
tpaf <- unique(paf_dt, by = "tname")
tpaf[, pad_tstart := shift(cumsum(tlen + padding[["t_padding"]]), 1, 0)]
tpaf[, shift_tstart := pad_tstart + (padding[["t_padding"]] / 2)]
tpaf[, pad_tend := shift_tstart + tlen]

# order the query contigs by their position in the AGP file
query_order <- agp[, V6]

query_paf <- paf_dt[qname %in% query_order & tp == "P" & nmatch >= min_nmatch]
subset_query_order <- query_order[query_order %in% query_paf[, qname]]

# Map query contigs onto a universal x-scale
query_paf[, qname := factor(qname, levels = subset_query_order)]
setkeyv(query_paf, cols = sort_columns)

qpaf <- unique(query_paf, by = "qname")
qpaf[, pad_qstart := shift(cumsum(qlen + padding[["q_padding"]]), 1, 0)]
qpaf[, shift_qstart := pad_qstart + (padding[["q_padding"]] / 2)]
qpaf[, pad_qend := shift_qstart + qlen]

# generate offsets for the alignment records
tstarts <- unique(tpaf[, .(tname, shift_tstart)])
qstarts <- unique(qpaf[, .(qname, shift_qstart)])

# adjust the alignment coordinates
paf_dt[,
c(
"adj_qstart",
"adj_qend",
"adj_tstart",
"adj_tend"
) := adjust_coords(qname, qstart, qend, tname, tstart, tend),
by = .(qname, qstart, qend, tname, tstart, tend)
]

# generate polygons. P is for primary alignments only
polygon_y_bump <- 0.017 # account for contig thickness
paf_polygons <- paf_dt[
tp == "P" & nmatch >= min_nmatch,
.(
x = c(adj_tstart, adj_qstart, adj_qend, adj_tend),
y = c(
t_y + polygon_y_bump,
q_y - polygon_y_bump,
q_y - polygon_y_bump,
t_y + polygon_y_bump
),
id = paste0("polygon", .GRP)
),
by = .(qname, qstart, qend, tname, tstart, tend)
]

# set up plot
total_height <- (q_y - t_y) * 1.618
y_axis_space <- (total_height - (q_y - t_y)) / 2
middle_x <- tpaf[1, shift_tstart] + tpaf[.N, pad_tend] / 2



all_contig_names <- c(tpaf[, unique(tname)])
all_colours <- viridis(
length(all_contig_names) + palette_space + 1
)
names(all_colours) <- c(
"query",
rep("blank", palette_space),
all_contig_names
)

# Plot the ideogram with ribbons connecting the two sets of contigs
gp <- ggplot() +
theme_void(base_family = "Lato", base_size = 12) +
scale_fill_manual(
values = all_colours, guide = "none"
) +
scale_colour_manual(
values = all_colours, guide = "none"
) +
geom_polygon(
data = paf_polygons,
aes(
x = x, y = y, group = id, fill = tname
), alpha = 0.5
) +
geom_segment(
data = tpaf,
aes(
x = shift_tstart,
xend = pad_tend,
colour = tname
),
y = t_y,
linewidth = 5,
lineend = "butt"
) +
geom_segment(
data = qpaf,
aes(
x = shift_qstart,
xend = pad_qend
),
colour = all_colours[["query"]],
y = q_y,
linewidth = 5,
lineend = "butt"
) +
ylim(
t_y - y_axis_space,
q_y + y_axis_space
) +
annotate(
geom = "text",
label = "Query contigs",
x = middle_x,
y = q_y + (y_axis_space / 3),
hjust = 0.5,
vjust = 0.5
) +
annotate(
geom = "text",
label = "Reference contigs",
x = middle_x,
y = t_y - (y_axis_space / 3),
hjust = 0.5,
vjust = 0.5
)

ggsave(plot_file,
gp,
width = plot_width,
height = plot_height,
units = "mm",
device = cairo_pdf
)

sessionInfo()
78 changes: 78 additions & 0 deletions tools/plot_ragtag_paf/plot_ragtag_paf.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
<tool id="plot_ragtag_paf" name="Plot RagTag output" version="0.0.1">
<description>to compare query contigs to the reference</description>
<requirements>
<requirement type="package" version="0.0.2">r-pafr</requirement>
<requirement type="package" version="0.4.2">r-viridislite</requirement>
<requirement type="package" version="1.15.4">r-data.table</requirement>
<requirement type="package" version="2.3.10">r-yaml</requirement>
<requirement type="package" version="3.5.1">r-ggplot2</requirement>
<requirement type="package" version="3.9.5">r-gtools</requirement>
</requirements>
<command detect_errors="exit_code"><![CDATA[
Rscript
'${__tool_directory__}/plot_ragtag_paf.R'
'${plot_config}'
]]></command>
<configfiles>
<configfile name="plot_config">
agp_file: '${input_agp}'
paf_file: '${input_paf}'
plot_file: 'ragtag.pdf'
fontsize: '${plot_params.fontsize}'
gap_size: '${plot_params.gap_size}'
min_nmatch: '${min_nmatch}'
palette_space: '${plot_params.palette_space}'
plot_height: '${plot_params.plot_height}'
plot_width: '${plot_params.plot_width}'
q_y: '${plot_params.q_y}'
t_y: '${plot_params.t_y}'
</configfile>
</configfiles>
<inputs>
<param format="paf" name="input_paf" label="PAF output from RagTag" type="data"/>
<param format="agp" name="input_agp" label="AGP output from RagTag" type="data"/>
<param type="integer" name="min_nmatch" label="Minimum alignment length to include in plot" value="20000"/>
<section name="plot_params" title="Plot parameters">
<param type="integer" name="plot_width" label="Plot width (mm)" value="254"/>
<param type="integer" name="plot_height" label="Plot height (mm)" value="191"/>
<param type="integer" name="fontsize" label="Fontsize (pt)" value="12"/>
<param type="integer" name="palette_space" label="Number of unused colours in the colour palette between the query contig colour and the colour of the first reference contig." help="Try a higher value if the query contigs look too similar to the reference contigs." value="4"/>
<param type="float" min="0" max="1" name="gap_size" label="Total length of gaps between contigs relative to the total assembly length." help="Try a higher value if the contigs look too close together." value="0.1"/>
<param type="integer" name="t_y" label="Position of reference contigs on the y-axis" value="1"/>
<param type="integer" name="q_y" label="Position of query contigs on the y-axis" value="2"/>
</section>
</inputs>
<outputs>
<data name="plot" format="pdf" label="${tool.name} on ${on_string}" from_work_dir="ragtag.pdf"/>
</outputs>
<tests>
<test expect_num_outputs="1">
<param name="input_paf" ftype="paf.gz" value="ragtag.paf.gz"/>
<param name="input_agp" ftype="agp" value="ragtag.agp"/>
<output name="plot">
<assert_contents>
<has_size size="8758" delta="1000" />
</assert_contents>
</output>
</test>
</tests>
<help>
Accepts the PAF and AGP produced by the RagTag Scaffold and draws a plot
showing the alignment of the query contigs to the reference contigs.

.. figure:: $PATH_TO_IMAGES/ragtag.png
:alt: RagTag plot

</help>
<citations>
<citation type="bibtex">
@misc{plot_ragtag_paf,
title = {plot_ragtag_paf},
author = {Harrop, Tom},
year = {2024},
organization = {Galaxy Australia},
url = {https://github.com/usegalaxy-au/tools-au},
}
</citation>
</citations>
</tool>
Binary file added tools/plot_ragtag_paf/static/images/ragtag.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading

0 comments on commit 26f6433

Please sign in to comment.