Skip to content

Commit

Permalink
Add remove_rare_or_common_geno() to build_gene_genotype_from_snps() s…
Browse files Browse the repository at this point in the history
…o that ancestral reconstruction doesn't fail and we only test genotypes for which convergence is remotely possible. Update unit tests to reflect this change.
  • Loading branch information
Katie Saund committed Jul 11, 2020
1 parent 56afa57 commit eb96f06
Show file tree
Hide file tree
Showing 2 changed files with 30 additions and 18 deletions.
17 changes: 11 additions & 6 deletions R/group_genotypes.R
Original file line number Diff line number Diff line change
@@ -1,17 +1,19 @@
#' Build grouped genotype
#'
#' @description Build presence/absence of the grouped genotypes (e.g. gene) from
#' the presence/absence of the ungrouped genotypes (e.g. snps).
#' the presence/absence of the ungrouped genotypes (e.g. snps). Remove columns
#' (genes) where the genotype is too rare or common (presence = 0, 1, N, N-1).
#'
#' @param geno Matrix. Columns are genotypes. Rows are isolates.
#' @param gene_to_snp_lookup_table Matrix. 2 columns. 1st column are the
#' ungrouped genotypes 2nd column are the grouped genotypes. The table's 1st
#' column must contain only genotypes that are found in the row.names(geno).
#' @param tr Phylogenetic tree.
#'
#' @return samples_by_genes. Matrix.
#'
#' @noRd
build_gene_genotype_from_snps <- function(geno, gene_to_snp_lookup_table){
build_gene_genotype_from_snps <- function(geno, gene_to_snp_lookup_table, tr){
# Check input ----------------------------------------------------------------
check_class(geno, "matrix")
check_class(gene_to_snp_lookup_table, "matrix")
Expand All @@ -38,8 +40,10 @@ build_gene_genotype_from_snps <- function(geno, gene_to_snp_lookup_table){

samples_by_genes <- samples_by_genes > 0
class(samples_by_genes) <- "numeric"

samples_by_genes <- remove_rare_or_common_geno(samples_by_genes, tr)
# Return output --------------------------------------------------------------
return(samples_by_genes)
return(samples_by_genes$mat)
}

#' Remove rare and common genotypes from grouped genotypes
Expand All @@ -51,6 +55,7 @@ build_gene_genotype_from_snps <- function(geno, gene_to_snp_lookup_table){
#' @param geno Matrix. Binary. Nrow = Ntip(tr). Ncol = number of original
#' genotypes.
#' @param lookup Matrix. Ncol = 2. Nrow = genotypes with group assignments.
#' @param tr Tree.
#'
#' @return List of four objects:
#' \describe{
Expand All @@ -62,7 +67,7 @@ build_gene_genotype_from_snps <- function(geno, gene_to_snp_lookup_table){
#' \item{genotype.}{Matrix.}
#' }
#' @noRd
prepare_grouped_genotype <- function(geno, lookup){
prepare_grouped_genotype <- function(geno, lookup, tr){
# Check input ----------------------------------------------------------------
check_dimensions(lookup, min_rows = 1, exact_cols = 2, min_cols = 2)
check_dimensions(geno, min_cols = 1, min_rows = 1)
Expand Down Expand Up @@ -99,7 +104,7 @@ prepare_grouped_genotype <- function(geno, lookup){
}

# Do the grouping step ----
grouped_genotype <- build_gene_genotype_from_snps(genotype, gene_snp_lookup)
grouped_genotype <- build_gene_genotype_from_snps(genotype, gene_snp_lookup, tr)

# Return output ----
results <- list("snps_per_gene" = snps_per_gene,
Expand Down Expand Up @@ -186,7 +191,7 @@ prepare_genotype <- function(group_logical, geno, tr, lookup){
#
# Function -------------------------------------------------------------------
if (group_logical) {
prepped_geno <- prepare_grouped_genotype(geno, lookup)
prepped_geno <- prepare_grouped_genotype(geno, lookup, tr)
} else {
prepped_geno <- prepare_ungrouped_genotype(geno, tr)
}
Expand Down
31 changes: 19 additions & 12 deletions tests/testthat/test_group_genotypes.R
Original file line number Diff line number Diff line change
Expand Up @@ -53,9 +53,9 @@ test_that("prepare_genotype gives expected genotype for a grouped input", {

# Test
# Expected output matrix
expected_geno <- temp_geno[, c(1, 2, 5, 6, 7)]
expected_geno[, 5] <- rowSums(temp_geno[,7:8])
colnames(expected_geno) <- paste0("GENE", c(1, 2, 5, 6, 7))
expected_geno <- temp_geno[, c(1, 5, 6, 7)]
expected_geno[, 4] <- rowSums(temp_geno[, 7:8])
colnames(expected_geno) <- paste0("GENE", c(1, 5, 6, 7))
expect_equal(temp_result$genotype, expected_geno)
expect_equal(temp_result$gene_snp_lookup, temp_lookup[c(1, 2, 5, 6, 7, 8), ])
expect_equal(as.numeric(unname(temp_result$snps_per_gene)), c(1, 1, 1, 1, 2))
Expand Down Expand Up @@ -211,12 +211,12 @@ test_that("prepare_grouped_genotype works for valid inputs", {
temp_lookup[, 2] <-
c("GENE1", "GENE2", "GENE3", "GENE4", "GENE5", "GENE6", "GENE7", "GENE7")

temp_result <- prepare_grouped_genotype(temp_geno, temp_lookup)
temp_result <- prepare_grouped_genotype(temp_geno, temp_lookup, temp_tree)

# Test
expected_geno <- temp_geno[, c(1, 2, 5, 6, 7)]
expected_geno[, 5] <- rowSums(temp_geno[,7:8])
colnames(expected_geno) <- paste0("GENE", c(1, 2, 5, 6, 7))
expected_geno <- temp_geno[, c(1, 5, 6, 7)]
expected_geno[, 4] <- rowSums(temp_geno[,7:8])
colnames(expected_geno) <- paste0("GENE", c(1, 5, 6, 7))
expect_equal(temp_result$genotype, expected_geno)
expect_equal(temp_result$gene_snp_lookup, temp_lookup[c(1, 2, 5, 6, 7, 8), ])
expect_equal(as.numeric(unname(temp_result$snps_per_gene)), c(1, 1, 1, 1, 2))
Expand All @@ -227,22 +227,29 @@ test_that("prepare_grouped_genotype works for valid inputs", {
# test build_gene_genotype_from_snps
test_that("build_gene_genotype_from_snps works for valid inputs", {
# Set up
ntip <- 4
temp_geno <- matrix(c(0, 1),
ncol = 4,
nrow = 3)
nrow = ntip)
temp_geno[2, 1] <- temp_geno[2, 3] <- temp_geno[4, 2] <- 0
temp_geno[1, 3] <- 1

colnames(temp_geno) <- c("a", "b", "d", "h")
row.names(temp_geno) <- c("sample1", "sample2", "sample3")
row.names(temp_geno) <- c("sample1", "sample2", "sample3", "sample4")
temp_key <- matrix(c(letters[1:8],
c(rep("One", 3), rep("Two", 3), rep("Three", 2))),
ncol = 2,
nrow = 8)
colnames(temp_key) <- c("snp", "gene")
temp_key <- temp_key[temp_key[, 1] %in% colnames(temp_geno),, drop = FALSE]
results <- build_gene_genotype_from_snps(temp_geno, temp_key)
temp_tree <- ape::rcoal(ntip)
temp_tree$node.label <- c(100, 100, 100)
temp_tree$tip.label <- row.names(temp_geno)
results <- build_gene_genotype_from_snps(temp_geno, temp_key, temp_tree)

expected_results <- matrix(c(1, 1, 1, 0, 1, 0, 1, 0, 1), nrow = 3, ncol = 3)
expected_results <- temp_geno[ , c(1, 3:4)]
expected_results[2, 1] <- 1
colnames(expected_results) <- c("One", "Two", "Three")
row.names(expected_results) <- c("sample1", "sample2", "sample3")

# Test
expect_identical(results, expected_results)
Expand Down

0 comments on commit eb96f06

Please sign in to comment.