Skip to content

Commit

Permalink
stricter rules for gene names, fixes #149
Browse files Browse the repository at this point in the history
  • Loading branch information
assaron committed Jun 11, 2024
1 parent 6d2787e commit 41467e3
Show file tree
Hide file tree
Showing 6 changed files with 42 additions and 18 deletions.
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Package: fgsea
Title: Fast Gene Set Enrichment Analysis
Version: 1.31.0
Version: 1.31.1
Authors@R: c(person("Gennady", "Korotkevich", role = "aut"),
person("Vladimir", "Sukhov", role = "aut"),
person("Nikolay", "Budin", role = "ctb"),
Expand Down
15 changes: 10 additions & 5 deletions R/fgsea.R
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,16 @@ preparePathwaysAndStats <- function(pathways, stats, minSize, maxSize, gseaParam
stop("stats should be named")
}

# Error if stats names are NA
if (any(is.na(names(stats)))) {
stop("NAs in names(stats) are not allowed")
}

# Error for duplicate gene names
if (any(duplicated(names(stats)))) {
stop("Duplicate names(stats) are not allowed")
}

# Error if stats are non-finite
if (any(!is.finite(stats))){
stop("Not all stats values are finite numbers")
Expand All @@ -63,11 +73,6 @@ preparePathwaysAndStats <- function(pathways, stats, minSize, maxSize, gseaParam
"The order of those tied genes will be arbitrary, which may produce unexpected results.")
}

# Warning message for duplicate gene names
if (any(duplicated(names(stats)))) {
warning("There are duplicate gene names, fgsea may produce unexpected results.")
}

if (all(stats > 0) & scoreType == "std"){
warning("All values in the stats vector are greater than zero and scoreType is \"std\", ",
"maybe you should switch to scoreType = \"pos\".")
Expand Down
15 changes: 10 additions & 5 deletions R/geseca-utils.R
Original file line number Diff line number Diff line change
Expand Up @@ -9,14 +9,19 @@ checkGesecaArgs <- function(E, pathways){
stop("E rows should be named")
}

# Error if E has non-finite values
if (any(!is.finite(E))){
stop("Not all E values are finite numbers")
# Error if stats names are NA
if (any(is.na(rownames(E)))) {
stop("NAs in rownames(E) are not allowed")
}

# Warning message for duplicate gene names
# Error for duplicate gene names
if (any(duplicated(rownames(E)))) {
warning("There are duplicate gene names, geseca may produce unexpected results.")
stop("Duplicate rownames(E) are not allowed")
}

# Error if E has non-finite values
if (any(!is.finite(E))){
stop("Not all E values are finite numbers")
}
}

Expand Down
11 changes: 9 additions & 2 deletions tests/testthat/test_geseca.R
Original file line number Diff line number Diff line change
Expand Up @@ -47,13 +47,20 @@ test_that("GESECA: works with pathways of one gene", {

})

test_that("GESECA: throws a warning when there are duplicate gene names", {
test_that("GESECA checks gene names", {
data("exampleExpressionMatrix")
data("examplePathways")
E <- exampleExpressionMatrix
rownames(E)[1] <- rownames(E)[2]

expect_warning(geseca(E=E, pathways=examplePathways, minSize=15))
expect_error(geseca(E=E, pathways=examplePathways, minSize=15))

E <- exampleExpressionMatrix
rownames(E)[1] <- NA
expect_error(geseca(E=E, pathways=examplePathways, minSize=15))

E <- unname(exampleExpressionMatrix)
expect_error(geseca(E=E, pathways=examplePathways, minSize=15))
})


Expand Down
13 changes: 10 additions & 3 deletions tests/testthat/test_gsea_analysis.R
Original file line number Diff line number Diff line change
Expand Up @@ -114,13 +114,20 @@ test_that("Ties detection in ranking works", {
minSize=10, maxSize=50, BPPARAM=SerialParam()))
})

test_that("fgseaSimple throws a warning when there are duplicate gene names", {
test_that("fgseaSimple correctly checks gene names", {
data(examplePathways)
data(exampleRanks)
exampleRanks.dupNames <- exampleRanks
names(exampleRanks.dupNames)[41] <- names(exampleRanks.dupNames)[42]

expect_warning(fgseaSimple(examplePathways, exampleRanks.dupNames, nperm=100, minSize=10, maxSize=50, nproc=1))
expect_error(fgseaSimple(examplePathways, exampleRanks.dupNames, nperm=100, minSize=10, maxSize=50, nproc=1))

ranks <- exampleRanks
names(ranks)[41] <- NA
expect_error(fgseaSimple(examplePathways, ranks, nperm=100, minSize=10, maxSize=50, nproc=1))

ranks <- unname(exampleRanks)
expect_error(fgseaSimple(examplePathways, ranks, nperm=100, minSize=10, maxSize=50, nproc=1))

})

Expand Down Expand Up @@ -167,7 +174,7 @@ test_that("fgseaSimple throws a warning when there are unbalanced gene-level sta
expect_warning(fgseaSimple(pathway, ranks, nperm = 200, minSize = 15, maxSize = 500))
})

test_that("fgseaSimple and fgseaMultilevel properly handle duplicated in gene sets", {
test_that("fgseaSimple and fgseaMultilevel properly handle duplicated genes in gene sets", {
data(exampleRanks)
data(examplePathways)

Expand Down
4 changes: 2 additions & 2 deletions tests/testthat/test_gsea_multilevel.R
Original file line number Diff line number Diff line change
Expand Up @@ -75,13 +75,13 @@ test_that("fgseaMultilevel works with zero pathways", {
})


test_that("fgseaMultilevel throws a warning when there are duplicate gene names", {
test_that("fgseaMultilevel throws an error when there are duplicate gene names", {
data(examplePathways)
data(exampleRanks)
exampleRanks.dupNames <- exampleRanks
names(exampleRanks.dupNames)[41] <- names(exampleRanks.dupNames)[42]

expect_warning(fgseaMultilevel(examplePathways, exampleRanks.dupNames, sampleSize=100, minSize=10, maxSize=50, nproc=1))
expect_error(fgseaMultilevel(examplePathways, exampleRanks.dupNames, sampleSize=100, minSize=10, maxSize=50, nproc=1))

})

Expand Down

0 comments on commit 41467e3

Please sign in to comment.