Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implemented tests for extract features part #42

Merged
merged 30 commits into from
Jun 14, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
30 commits
Select commit Hold shift + click to select a range
8d26b68
Added languageserver to dev environment
hechth Feb 18, 2022
083674d
Added additional dependencies
hechth Feb 18, 2022
15fe9df
Moved extract_features function to separate file
hechth Feb 18, 2022
576cf57
refactored proc.cdf and pulled out plotting function
hechth Feb 18, 2022
e97570a
reformat
hechth Feb 18, 2022
f663e9a
added comment for possible refactoring
hechth Feb 18, 2022
7bed94a
refactored adaptive.bin
hechth Feb 18, 2022
646b55b
Added additional dependencies
hechth Feb 18, 2022
ae9ddf2
Moved extract_features function to separate file
hechth Feb 18, 2022
4513f9a
refactored proc.cdf and pulled out plotting function
hechth Feb 18, 2022
05f9b24
reformat
hechth Feb 18, 2022
32138a5
added comment for possible refactoring
hechth Feb 18, 2022
4273900
refactored adaptive.bin
hechth Feb 18, 2022
d310dd1
Extracted index variables
hechth Feb 22, 2022
34c81a6
merge
Jun 10, 2022
5983703
Implemented test for proc.cdf
Jun 10, 2022
30926f8
Added user name to devcontainer
Jun 10, 2022
9f2707c
moved plotting to separate file
Jun 10, 2022
3b9e0b1
updated devcontainer with git init
Jun 10, 2022
7d8f1b1
updated devcontainer
Jun 10, 2022
59cfb93
Updated r editor support & devcontainer settings
hechth Jun 10, 2022
aab9a19
reformat
Jun 10, 2022
174a6f4
added roxygen stub
hechth Jun 13, 2022
f511ba9
Added test for extract features
hechth Jun 13, 2022
72ed4c2
added parquet viewer to devcontainer extensions
hechth Jun 13, 2022
ac818c5
added dependency to datacomparer
Jun 13, 2022
d9c221a
reformatting
Jun 13, 2022
3eadd55
updated process.cdf
hechth Jun 13, 2022
ffa99b1
Implemented test for "extract features" function
hechth Jun 13, 2022
998a1c2
added test for prof to features
Jun 13, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 14 additions & 4 deletions .devcontainer/devcontainer.json
Original file line number Diff line number Diff line change
Expand Up @@ -12,19 +12,29 @@
"features": {
"git": {
"version": "latest",
"ppa": false
"ppa": false,
},
"common": {
"installZsh": "false",
"username": "false",

}
},

// Add the IDs of extensions you want installed when the container is created.
"extensions": ["ikuyadeu.r"],
"extensions": [
"reditorsupport.r",
"rdebugger.r-debugger",
"eamodio.gitlens",
"mutantdino.resourcemonitor",
"meakbiyik.vscode-r-test-adapter",
"dvirtz.parquet-viewer"
],
"settings": {
"r.rterm.linux": "/bin/local/miniconda/envs/recetox-aplcms/bin/radian",
"r.rpath.linux": "/bin/local/miniconda/envs/recetox-aplcms/bin/R"
},

"onCreateCommand": "apt update && apt install -y locales && locale-gen en_US.UTF-8",
"onCreateCommand": "apt update && apt install -y locales && locale-gen en_US.UTF-8 && git config --global --add safe.directory /workspaces/recetox-aplcms",

"postAttachCommand": "/bin/bash"
}
1 change: 1 addition & 0 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -17,3 +17,4 @@ NeedsCompilation: no
Suggests:
arrow, dataCompareR, testthat (>= 3.0.0)
Config/testthat/edition: 3
RoxygenNote: 7.2.0
347 changes: 188 additions & 159 deletions R/adaptive.bin.R
Original file line number Diff line number Diff line change
@@ -1,162 +1,191 @@
adaptive.bin <-
function(x, min.run, min.pres, tol, baseline.correct, weighted=FALSE)
{
masses<-x$masses
labels<-x$labels
intensi<-x$intensi
times<-x$times

rm(x)

base.curve<-unique(times)
base.curve<-base.curve[order(base.curve)]
base.curve<-cbind(base.curve, base.curve*0)

curr.order<-order(masses)
intensi<-intensi[curr.order]
labels<-labels[curr.order]
masses<-masses[curr.order]

rm(curr.order)

cat(c("m/z tolerance is: ", tol,"\n"))

l<-length(masses)

curr.bw<-0.5*tol*max(masses)
if(weighted)
{
all.mass.den<-density(masses, weights=intensi/sum(intensi), bw=curr.bw, n=2^min(15, floor(log2(l))-2))
}else{
all.mass.den<-density(masses, bw=curr.bw, n=2^min(15, floor(log2(l))-2))
}
all.mass.turns<-find.turn.point(all.mass.den$y)
all.mass.vlys<-all.mass.den$x[all.mass.turns$vlys]
breaks<-c(0, unique(round(approx(masses,1:l,xout=all.mass.vlys,rule=2,ties='ordered')$y))[-1])

grps<-masses*0 # this is which peak group the signal belongs to, not time group

min.lab<-min(labels)
max.lab<-max(labels)
times<-unique(labels)
times<-times[order(times)]
curr.label<-1
min.count.run<-min.run*length(times)/(max(times)-min(times))
time.range<-diff(range(times))
aver.time.range<-(max(labels)-min(labels))/length(times)

newprof<-matrix(0, nrow=length(masses),ncol=4)
height.rec<-matrix(0, nrow=length(masses),ncol=3)
prof.pointer<-1
height.pointer<-1

for(i in 1:(length(breaks)-1))
{
this.labels<-labels[(breaks[i]+1):breaks[i+1]]
if(length(unique(this.labels)) >= min.count.run*min.pres)
{
this.masses<-masses[(breaks[i]+1):breaks[i+1]]
this.intensi<-intensi[(breaks[i]+1):breaks[i+1]]

curr.order<-order(this.labels)
this.masses<-this.masses[curr.order]
this.intensi<-this.intensi[curr.order]
this.labels<-this.labels[curr.order]

this.bw=0.5*tol*median(this.masses)
if(weighted)
{
mass.den<-density(this.masses, weights=this.intensi/sum(this.intensi), bw=this.bw)
}else{
mass.den<-density(this.masses, bw=this.bw)
}
mass.den$y[mass.den$y < min(this.intensi)/10]<-0
mass.turns<-find.turn.point(mass.den$y)
mass.pks<-mass.den$x[mass.turns$pks]
mass.vlys<-c(-Inf, mass.den$x[mass.turns$vlys], Inf)


for(j in 1:length(mass.pks))
{
mass.lower<-max(mass.vlys[mass.vlys < mass.pks[j]])
mass.upper<-min(mass.vlys[mass.vlys > mass.pks[j]])

if(length(mass.pks) == 1) mass.lower<-mass.lower-1
mass.sel<-which(this.masses > mass.lower & this.masses <= mass.upper)

if(length(mass.sel) > 0)
{
that.labels<-this.labels[mass.sel]
that.masses<-this.masses[mass.sel]
that.intensi<-this.intensi[mass.sel]

that.merged<-combine.seq.3(that.labels, that.masses, that.intensi)
if(nrow(that.merged)==1)
{
new.merged<-that.merged
}else{
new.merged<-that.merged[order(that.merged[,1]),]
}

that.labels<-new.merged[,2]
that.masses<-new.merged[,1]
that.intensi<-new.merged[,3]
that.range<-diff(range(that.labels))

if(that.range > 0.5*time.range & length(that.labels) > that.range*min.pres & length(that.labels)/(diff(range(that.labels))/aver.time.range) > min.pres)
{
that.intensi<-rm.ridge(that.labels, that.intensi, bw=max(10*min.run, that.range/2))

that.sel<-which(that.intensi != 0)
that.labels<-that.labels[that.sel]
that.masses<-that.masses[that.sel]
that.intensi<-that.intensi[that.sel]
}
that.n<-length(that.masses)
newprof[prof.pointer:(prof.pointer+that.n-1),]<-cbind(that.masses, that.labels, that.intensi, rep(curr.label, that.n))
prof.pointer<-prof.pointer+that.n
height.rec[height.pointer,]<-c(curr.label,that.n, max(that.intensi))
height.pointer<-height.pointer+1
curr.label<-curr.label+1
}
}
}else{
if(runif(1)<0.05)
{
this.masses<-masses[(breaks[i]+1):breaks[i+1]]
this.intensi<-intensi[(breaks[i]+1):breaks[i+1]]
curr.order<-order(this.labels)
this.masses<-this.masses[curr.order]
this.intensi<-this.intensi[curr.order]
this.labels<-this.labels[curr.order]


that.merged<-combine.seq.3(this.labels, this.masses, this.intensi)
that.n<-nrow(that.merged)
newprof[prof.pointer:(prof.pointer+that.n-1),]<-cbind(that.merged, rep(curr.label, that.n))
prof.pointer<-prof.pointer+that.n

height.rec[height.pointer,]<-c(curr.label,that.n, max(that.merged[,3]))
height.pointer<-height.pointer+1
curr.label<-curr.label+1
}
compute_densities <- function(masses, tol, weighted, intensities, bw_func, n = 512) {
bandwidth <- 0.5 * tol * bw_func(masses)
if (weighted) {
weights <- intensities / sum(intensities)
all.mass.den <- density(masses, weights = weights, bw = bandwidth, n = n)
} else {
all.mass.den <- density(masses, bw = bandwidth, n = n)
}
return(all.mass.den)
}

compute_mass_values <- function(tol, masses, intensi, weighted) {
n <- 2^min(15, floor(log2(length(masses))) - 2)

all.mass.den <- compute_densities(masses, tol, weighted, intensi, max, n)

all.mass.turns <- find.turn.point(all.mass.den$y)
all.mass.vlys <- all.mass.den$x[all.mass.turns$vlys]
return(all.mass.vlys)
}

compute_breaks <- function(tol, masses, intensi, weighted) {
all.mass.vlys <- compute_mass_values(tol, masses, intensi, weighted)
breaks <- c(0, unique(round(approx(masses, 1:length(masses), xout = all.mass.vlys, rule = 2, ties = "ordered")$y))[-1])
return(breaks)
}

adaptive.bin <- function(x,
min.run,
min.pres,
tol,
baseline.correct,
weighted = FALSE) {
# order inputs after mz values
masses <- x$masses
labels <- x$labels
intensi <- x$intensi
times <- x$times

rm(x)

curr.order <- order(masses)
intensi <- intensi[curr.order]
labels <- labels[curr.order]
masses <- masses[curr.order]

rm(curr.order)

cat(c("m/z tolerance is: ", tol, "\n"))

times <- unique(labels)
times <- times[order(times)]

# calculate function parameters
min.count.run <- min.run * length(times) / (max(times) - min(times))
time.range <- diff(range(times))
aver.time.range <- (max(labels) - min(labels)) / length(times)

# init data
newprof <- matrix(0, nrow = length(masses), ncol = 4)
height.rec <- matrix(0, nrow = length(masses), ncol = 3)

# init counters
curr.label <- 1
prof.pointer <- 1
height.pointer <- 1

breaks <- compute_breaks(tol, masses, intensi, weighted)

for (i in 1:(length(breaks) - 1))
{
start <- breaks[i] + 1
end <- breaks[i + 1]
# get number of scans in bin
this.labels <- labels[start: end]

if (length(unique(this.labels)) >= min.count.run * min.pres) {
# extract mz and intensity values for bin
this.masses <- masses[start:end]
this.intensi <- intensi[start:end]

# reorder in order of labels (scan number)
curr.order <- order(this.labels)
this.masses <- this.masses[curr.order]
this.intensi <- this.intensi[curr.order]
this.labels <- this.labels[curr.order]

mass.den <- compute_densities(this.masses, tol, weighted, this.intensi, median)

mass.den$y[mass.den$y < min(this.intensi) / 10] <- 0
mass.turns <- find.turn.point(mass.den$y)
mass.pks <- mass.den$x[mass.turns$pks]
mass.vlys <- c(-Inf, mass.den$x[mass.turns$vlys], Inf)


for (j in 1:length(mass.pks))
{
# compute boundaries
mass.lower <- max(mass.vlys[mass.vlys < mass.pks[j]])
mass.upper <- min(mass.vlys[mass.vlys > mass.pks[j]])

if (length(mass.pks) == 1) mass.lower <- mass.lower - 1

# compute if we are in mass range from mass.lower to mass.upper
mass.sel <- which(this.masses > mass.lower & this.masses <= mass.upper)

if (length(mass.sel) > 0) {

# get rows which fulfill condition
that.labels <- this.labels[mass.sel]
that.masses <- this.masses[mass.sel]
that.intensi <- this.intensi[mass.sel]

# rearrange in order of labels
that.merged <- combine.seq.3(that.labels, that.masses, that.intensi)
if (nrow(that.merged) == 1) {
new.merged <- that.merged
} else {
new.merged <- that.merged[order(that.merged[, 1]), ]
}

that.labels <- new.merged[, 2]
that.masses <- new.merged[, 1]
that.intensi <- new.merged[, 3]
that.range <- diff(range(that.labels))

if (that.range > 0.5 * time.range & length(that.labels) > that.range * min.pres & length(that.labels) / (diff(range(that.labels)) / aver.time.range) > min.pres) {
that.intensi <- rm.ridge(that.labels, that.intensi, bw = max(10 * min.run, that.range / 2))

# filter out 0 entries
that.sel <- which(that.intensi != 0)
that.labels <- that.labels[that.sel]
that.masses <- that.masses[that.sel]
that.intensi <- that.intensi[that.sel]
}

that.n <- length(that.masses)

newprof[prof.pointer:(prof.pointer + that.n - 1), ] <- cbind(that.masses, that.labels, that.intensi, rep(curr.label, that.n))
height.rec[height.pointer, ] <- c(curr.label, that.n, max(that.intensi))

# increment counters
prof.pointer <- prof.pointer + that.n
height.pointer <- height.pointer + 1
curr.label <- curr.label + 1
}
}
} else {
if (runif(1) < 0.05) {

# reassignment
this.masses <- masses[start:end]
this.intensi <- intensi[start:end]

# reordering
curr.order <- order(this.labels)
this.masses <- this.masses[curr.order]
this.intensi <- this.intensi[curr.order]
this.labels <- this.labels[curr.order]

that.merged <- combine.seq.3(this.labels, this.masses, this.intensi)
that.n <- nrow(that.merged)

newprof[prof.pointer:(prof.pointer + that.n - 1), ] <- cbind(that.merged, rep(curr.label, that.n))
height.rec[height.pointer, ] <- c(curr.label, that.n, max(that.merged[, 3]))

# increment counters
prof.pointer <- prof.pointer + that.n
height.pointer <- height.pointer + 1
curr.label <- curr.label + 1
}
}

newprof<-newprof[1:(prof.pointer-1),]
height.rec<-height.rec[1:(height.pointer-1),]

newprof<-newprof[order(newprof[,1],newprof[,2]),]

raw.prof<-new("list")
raw.prof$height.rec<-height.rec
raw.prof$masses<-newprof[,1]
raw.prof$labels<-newprof[,2]
raw.prof$intensi<-newprof[,3]
raw.prof$grps<-newprof[,4]
raw.prof$times<-times
raw.prof$tol<-tol
raw.prof$min.count.run<-min.count.run

return(raw.prof)
}

newprof <- newprof[1:(prof.pointer - 1), ]
height.rec <- height.rec[1:(height.pointer - 1), ]

newprof <- newprof[order(newprof[, 1], newprof[, 2]), ]

raw.prof <- new("list")
raw.prof$height.rec <- height.rec
raw.prof$masses <- newprof[, 1]
raw.prof$labels <- newprof[, 2]
raw.prof$intensi <- newprof[, 3]
raw.prof$grps <- newprof[, 4]
raw.prof$times <- times
raw.prof$tol <- tol
raw.prof$min.count.run <- min.count.run

return(raw.prof)
}
Loading