Skip to content

Commit

Permalink
Merge pull request #27 from thackl/read_docs
Browse files Browse the repository at this point in the history
improve read docs and clean-up function reference
  • Loading branch information
thackl authored Feb 1, 2021
2 parents 8f21c04 + ed441ad commit 1bfbdfe
Show file tree
Hide file tree
Showing 49 changed files with 625 additions and 177 deletions.
1 change: 0 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,5 @@ docs/
org/
R-deprecated/
data-raw/
exec/
tests/
vignettes-manual-export/
6 changes: 0 additions & 6 deletions NAMESPACE
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
# Generated by roxygen2: do not edit by hand

S3method(activate,tbl_genome_layout)
S3method(add_clusters,gggenomes)
S3method(add_clusters,gggenomes_layout)
S3method(add_feats,gggenomes)
Expand Down Expand Up @@ -81,8 +80,6 @@ export(PositionPile)
export(PositionSixframe)
export(PositionStrand)
export(PositionStrandpile)
export(activate)
export(active)
export(add_clusters)
export(add_feats)
export(add_links)
Expand Down Expand Up @@ -120,7 +117,6 @@ export(geom_feat_note)
export(geom_feat_tag)
export(geom_feat_text)
export(geom_gene)
export(geom_gene_fast)
export(geom_gene_label)
export(geom_gene_note)
export(geom_gene_tag)
Expand Down Expand Up @@ -191,7 +187,5 @@ importFrom(jsonlite,fromJSON)
importFrom(magrittr,"%<>%")
importFrom(magrittr,"%>%")
importFrom(readr,read_tsv)
importFrom(rlang,enquo)
importFrom(rlang,quo_text)
importFrom(tidyr,unnest)
importFrom(tidyr,unnest_wider)
1 change: 1 addition & 0 deletions R/aaa.R
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ swap_if <- function(x, condition, ...){
#'
#' @param x vector (coerced to character)
#' @param collapse character string to separate elements.
#' @keywords internal
comma <- function(x, collapse = ","){
paste(x, collapse=collapse)
}
Expand Down
24 changes: 0 additions & 24 deletions R/activate.R

This file was deleted.

4 changes: 3 additions & 1 deletion R/coords.R
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
#' The virtual x-start of the full length seq in the plot
#'
#' Together with the seq strand, this is sufficient to project feats
#' @keywords internal
anchor <- function(x, start, strand){
x - (start-1) * strand_int(strand)
}
Expand All @@ -9,13 +10,14 @@ anchor <- function(x, start, strand){
#'
#' @param seq_anchor the virtual x-start of the full length sequence in plot
#' space
#' @keywords internal
x <- function(start, end, strand, seq_x, seq_start, seq_strand){
a <- anchor(seq_x, seq_start, seq_strand)
b <- if_reverse(strand, end, start-1)
a + b * strand_int(seq_strand)
}

#' @rdname x
#' @keywords internal
xend <- function(start, end, strand, seq_x, seq_start, seq_strand){
a <- anchor(seq_x, seq_start, seq_strand)
b <- if_reverse(strand, start-1, end)#, end + width(start,end))
Expand Down
2 changes: 2 additions & 0 deletions R/feats.R
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
#' @param ... passed on to `layout_seqs()`
#' @return a tbl_df with plot coordinates
#' @export
#' @keywords internal
as_feats <- function(x, seqs, ..., everything=TRUE){
UseMethod("as_feats")
}
Expand Down Expand Up @@ -66,6 +67,7 @@ as_feats.tbl_df <- function(x, seqs, ..., everything=TRUE){
#' example after focusing in on a subregion. Choices are to "drop" them, "keep"
#' them or "trim" them to the subregion boundaries.
#' @param ... not used
#' @keywords internal
layout_feats <- function(x, seqs, keep="strand",
marginal=c("trim", "drop", "keep"), ...){
marginal <- match.arg(marginal)
Expand Down
10 changes: 0 additions & 10 deletions R/geom.R
Original file line number Diff line number Diff line change
Expand Up @@ -18,16 +18,6 @@ geom_seq <- function(mapping = NULL, data = seqs(),
geom_segment(mapping = mapping, data = data, arrow = arrow, ...)
}

#' Draw genes fast but not so pretty
#'
#' @param data feat_layout
#' @export
geom_gene_fast <- function(mapping = NULL, data = genes(),
arrow = TRUE, nudge_by_strand = NULL, size = 4, color = "cornflowerblue", ...){
geom_feat(mapping=mapping, data=data, arrow=arrow,
nudge_by_strand=nudge_by_strand, size=size, color = color, ...)
}

#' draw feat labels
#'
#' @export
Expand Down
2 changes: 1 addition & 1 deletion R/geom_gene_text.R
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
#' Text
#' Add text to genes, features, etc.
#'
#'
#' @inheritParams ggplot2::geom_text
Expand Down
8 changes: 5 additions & 3 deletions R/gggenomes.R
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,7 @@ gggenomes <- function(seqs=NULL, genes=NULL, feats=NULL, links=NULL, ...,
#' ggplot.default tries to `fortify(data)` and we don't want that here
#'
#' @export
#' @keywords internal
ggplot.gggenomes_layout <- function(data, mapping = aes(), ...,
environment = parent.frame()) {
if (!missing(mapping) && !inherits(mapping, "uneval")) {
Expand Down Expand Up @@ -134,15 +135,15 @@ layout_genomes <- function(seqs=NULL, genes=NULL, feats=NULL, links=NULL,
seqs <- mutate(seqs, bin_id = {{ infer_bin_id }})
}else{
if(is.null(feats) & is.null(links))
stop("Need at least one of: seqs, genes, feats or links")
abort("Need at least one of: seqs, genes, feats or links")

# infer dummy seqs
if(!is.null(feats)){
write("No seqs provided, inferring seqs from feats", stderr())
inform("No seqs provided, inferring seqs from feats")
seqs <- infer_seqs_from_feats(feats[[1]], {{infer_bin_id}}, {{infer_start}},
{{infer_end}}, {{infer_length}})
}else if(!is.null(links)){
write("No seqs or feats provided, inferring seqs from links", stderr())
inform("No seqs or feats provided, inferring seqs from links")
seqs <- infer_seqs_from_links(links[[1]], {{infer_bin_id}}, {{infer_start}},
{{infer_end}}, {{infer_length}})
}
Expand All @@ -165,6 +166,7 @@ layout_genomes <- function(seqs=NULL, genes=NULL, feats=NULL, links=NULL,
#' and error because dim(gggenome_layout) is undefined. Return dim of primary
#' table instead
#' @export
#' @keywords internal
dim.gggenomes_layout <- function(x) dim(get_seqs(x))

#' @export
Expand Down
2 changes: 2 additions & 0 deletions R/links.R
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
#' @param x link data convertible to a link layout
#' @inheritParams as_feats
#' @export
#' @keywords internal
as_links <- function(x, seqs, ..., everything=TRUE){
UseMethod("as_links")
}
Expand Down Expand Up @@ -64,6 +65,7 @@ as_links.tbl_df <- function(x, seqs, ..., everything=TRUE){
#'
#' @inheritParams as_links
#' @param ... not used
#' @keywords internal
layout_links <- function(x, seqs, keep="strand", adjacent_only = TRUE,
marginal=c("trim", "drop", "keep"), ...){
marginal <- match.arg(marginal)
Expand Down
4 changes: 3 additions & 1 deletion R/pull_tracks.R
Original file line number Diff line number Diff line change
Expand Up @@ -213,6 +213,7 @@ pull_track.gggenomes_layout <- function(.x, .track_id=1, ..., .track_type=NULL,
#' giving the position from the left/right.
#' @param track_type restrict to these types of tracks - affects position-based
#' selection
#' @param ignore_pos names of tracks to ignore when selecting by position.
#' @return The selected track_id as an unnamed string
vars_track <- function(x, track_id, track_type = c("seqs", "feats", "links"),
ignore_pos = NULL){
Expand Down Expand Up @@ -246,7 +247,8 @@ vars_track <- function(x, track_id, track_type = c("seqs", "feats", "links"),
})
}

#' Error messages for `vars_track``
#' Error messages for vars_track
#' @keywords internal
vars_track_error <- function(bad_value, track_ids, ignore_pos){
if(is_function(bad_value)) bad_value <- "<function>"
if(is.numeric(bad_value)) bad_value <- as.character(bad_value)
Expand Down
32 changes: 27 additions & 5 deletions R/read.R
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
#' Swap query and subject
#' Swap query and subject in blast-like feature tables
#'
#' Swap query and subject columns in a table read with [read_feats()] or
#' [read_links()], for example, from blast searches. Swaps columns with
Expand Down Expand Up @@ -30,9 +30,18 @@ swap_query <- function(x){
x
}

#' Default column types for defined formats
#' Default column names and types for defined formats
#'
#' Intended to be used in [readr::read_tsv()]-like functions that accept a
#' `col_names` and a `col_types` argument.
#'
#' @export
#' @return a vector with default column names for the given format
#' @eval def_names_rd()
#' @describeIn def_names default column names for defined formats
#' @examples
#' # read a blast-tabular file with read_tsv
#' read_tsv(ex("emales/emales-prot-ava.o6"), col_names=def_names("blast"))
def_names <- function(format){
ff <- gggenomes_global$def_names
if(!format %in% names(ff)){
Expand All @@ -44,7 +53,8 @@ def_names <- function(format){
ff[[format]]
}

#' Default column types for defined formats

#' @describeIn def_names default column types for defined formats
#' @export
#' @return a vector with default column types for the given format
def_types <- function(format){
Expand Down Expand Up @@ -186,9 +196,9 @@ file_formats_rd <- function(){
ff <- mutate(ff, context = ifelse(duplicated(context), "", context))

ff <- str_c(sep = "\n",
"@section Defined Contexts, Formats and Extensions:",
"@section Defined contexts, formats and extensions:",
"\\preformatted{",
#sprintf("%-8s %-7s %s", "Context", "Format", "Extensions"),
#sprintf("%-9s %-12s %s", "Context", "Format", "Extensions"),
str_c(collapse = "\n",
str_glue_data(ff, '{sprintf("%-8s", context)} ',
'{sprintf("%-7s", format)} [{extension}]')),
Expand All @@ -197,6 +207,18 @@ file_formats_rd <- function(){
ff
}

def_names_rd <- function(){
ns <- gggenomes_global$def_names
ts <- gggenomes_global$def_types
str_c(sep = "\n",
"@section Defined formats, column types and names:",
"\\preformatted{",
paste0(map(names(ns),
~sprintf(" %-10s %-15s %s", .x, ts[[.x]], comma(ns[[.x]]))), collapse="\n"),
"}"
)
}

is_connection <- function(x) inherits(x, "connection")

#' Read AliTV .json file
Expand Down
36 changes: 19 additions & 17 deletions R/read_feats.R
Original file line number Diff line number Diff line change
Expand Up @@ -8,30 +8,32 @@
#' different genome.
#'
#' @param files files to reads. Should all be of same format.
#' @param format If NULL, will be guessed from file extension. Else, any format
#' known to gggenomes ("gff3", "gbk", ... see [file_formats()] for full list)
#' or any suffix that maps to a known `read_<suffix>` function to be called,
#' such as [readr::read_tsv()], for example.
#' @param .id When binding output from several files, how to name the column
#' with the name of the file each record came from. Defaults to "file_id". Set
#' to "bin_id" if every file represents a different bin.
#' @param ... additional arguments passed on to the specific read function for
#' the given format.
#' @param format If NULL, guess from file extension. Else, any format known to
#' gggenomes (gff3, gbk, ... see [file_formats()] for full list) or any suffix
#' of a known `read_<suffix>` function, e.g. tsv for `readr::read_tsv()`.
#' @param .id the name of the column storing the file name each record came
#' from. Defaults to "file_id". Set to "bin_id" if every file represents a
#' different bin.
#' @param ... additional arguments passed on to the format-specific read
#' function called down the line.
#'
#' @return a gggenomes compatible feature or link tibble
#' @return A gggenomes-compatible feature or link tibble
#' @export
#' @examples
#' # read a file
#' read_feats(ex("eden-utr.gff"))
#' \dontrun{
#'
#' # read all gffs from a directory
#' read_feats(list.files("path/to/directory", "*.gff$", full.names=TRUE))
#' read_feats(list.files(ex("emales/"), "*.gff$", full.names=TRUE))
#'
#' \dontrun{
#' # read remote files
#' gbk_phages <- c(
#' PSSP7 = "ftp://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/000/858/745/GCF_000858745.1_ViralProj15134/GCF_000858745.1_ViralProj15134_genomic.gff.gz",
#' PSSP3 = "ftp://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/000/904/555/GCF_000904555.1_ViralProj195517/GCF_000904555.1_ViralProj195517_genomic.gff.gz")
#' PSSP7 = "ftp://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/000/858/745/GCF_000858745.1_ViralProj15134/GCF_000858745.1_ViralProj15134_genomic.gff.gz",
#' PSSP3 = "ftp://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/000/904/555/GCF_000904555.1_ViralProj195517/GCF_000904555.1_ViralProj195517_genomic.gff.gz")
#' read_feats(gbk_phages)
#' }
#' @describeIn read_feats read files as features mapping onto sequences
read_feats <- function(files, format=NULL, .id="file_id", ...){
if(is_connection(files))
files <- list(files) # weird things happen to pipes in vectors
Expand All @@ -54,22 +56,22 @@ read_feats <- function(files, format=NULL, .id="file_id", ...){
feats
}

#' @rdname read_feats
#' @export
#' @describeIn read_feats read files as subfeatures mapping onto other features
read_subfeats <- function(files, format=NULL, .id="file_id", ...){
feats <- read_feats(files=files, format=format, ...)
rename(feats, feat_id=seq_id, feat_id2=seq_id2)
}

#' @rdname read_feats
#' @export
#' @describeIn read_feats read files as links connecting sequences
read_links <- function(files, format=NULL, .id="file_id", ...){
feats <- read_feats(files=files, format=format, ...)
rename(feats, seq_id=seq_id, start=start, end=end)
}

#' @rdname read_feats
#' @export
#' @describeIn read_feats read files as sublinks connecting features
read_sublinks <- function(files, format=NULL, .id="file_id", ...){
feats <- read_feats(files=files, format=format, ...)
rename(feats, feat_id=seq_id, start=start, end=end, feat_id2=seq_id2)
Expand Down
9 changes: 5 additions & 4 deletions R/read_seqs.R
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,13 @@
#'
#' Read ID, description and length for each sequence from common formats
#' including FASTA, samtools/seqkit FASTA index files, and GFF3. Default columns
#' are "seq_id", "seq_desc" and "length".
#' are seq_id, seq_desc and length.
#'
#' @importFrom readr read_tsv
#' @param file fasta or .fai/.seqkit.fai fasta index
#' @export
#' @return gggenomes-compatible seqs tibble
#' @return A gggenomes-compatible sequence tibble
#' @describeIn read_seqs read seqs from files with automatic format detection
#' @examples
#' # from a fasta file
#' read_seqs(ex("emales/emales.fna"))
Expand Down Expand Up @@ -40,7 +41,7 @@ read_seqs <- function(files, format=NULL, .id="file_id", ...){
seqs
}

#' @rdname read_seqs
#' @describeIn read_seqs read seqs from a single file in fasta, gbk or gff3 format.
#' @export
read_seq_len <- function(file, col_names = def_names("seq_len"),
col_types = def_types("seq_len"), ...){
Expand All @@ -52,7 +53,7 @@ read_seq_len <- function(file, col_names = def_names("seq_len"),

}

#' @rdname read_seqs
#' @describeIn read_seqs read seqs from a single file in seqkit/samtools fai format.
#' @export
read_fai <- function(file, col_names=def_names("fai"),
col_types=def_types("fai"), ...){
Expand Down
3 changes: 3 additions & 0 deletions R/seqs.R
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
#'
#' as_seqs(chr)
#' @export
#' @keywords internal
as_seqs <- function(x, ...){
UseMethod("as_seqs")
}
Expand Down Expand Up @@ -164,6 +165,8 @@ get_seqs.gggenomes <- function(x){
get_seqs.gggenomes_layout <- function(x){
x$seqs[["seqs"]]
}

#' @rdname get_seqs
#' @export
set_seqs <- function(x, value){
UseMethod("set_seqs")
Expand Down
Loading

0 comments on commit 1bfbdfe

Please sign in to comment.