diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json
index 55069af9..3a49acbf 100644
--- a/.devcontainer/devcontainer.json
+++ b/.devcontainer/devcontainer.json
@@ -12,19 +12,29 @@
 	"features": {
 		"git": {
 			"version": "latest",
-			"ppa": false
+			"ppa": false,
 		},
 		"common": {
 			"installZsh": "false",
 			"username": "false",
-
 		}
 	},
 	
 	// Add the IDs of extensions you want installed when the container is created.
-	"extensions": ["ikuyadeu.r"],
+	"extensions": [
+		"reditorsupport.r",
+		"rdebugger.r-debugger",
+		"eamodio.gitlens",
+		"mutantdino.resourcemonitor",
+		"meakbiyik.vscode-r-test-adapter",
+		"dvirtz.parquet-viewer"
+	],
+	"settings": {
+		"r.rterm.linux": "/bin/local/miniconda/envs/recetox-aplcms/bin/radian",
+		"r.rpath.linux": "/bin/local/miniconda/envs/recetox-aplcms/bin/R"
+	},
 
-	"onCreateCommand": "apt update && apt install -y locales && locale-gen en_US.UTF-8",
+	"onCreateCommand": "apt update && apt install -y locales && locale-gen en_US.UTF-8 && git config --global --add safe.directory /workspaces/recetox-aplcms",
 
 	"postAttachCommand": "/bin/bash"
 }
diff --git a/DESCRIPTION b/DESCRIPTION
index 0e9a580b..2c3a32a3 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -17,3 +17,4 @@ NeedsCompilation: no
 Suggests: 
     arrow, dataCompareR, testthat (>= 3.0.0)
 Config/testthat/edition: 3
+RoxygenNote: 7.2.0
diff --git a/R/adaptive.bin.R b/R/adaptive.bin.R
index c87e6755..f70eefd4 100644
--- a/R/adaptive.bin.R
+++ b/R/adaptive.bin.R
@@ -1,162 +1,191 @@
-adaptive.bin <-
-function(x, min.run, min.pres, tol, baseline.correct, weighted=FALSE)
-{
-    masses<-x$masses
-    labels<-x$labels
-    intensi<-x$intensi
-    times<-x$times
-    
-    rm(x)
-    
-    base.curve<-unique(times)
-    base.curve<-base.curve[order(base.curve)]
-    base.curve<-cbind(base.curve, base.curve*0)
-    
-    curr.order<-order(masses)
-    intensi<-intensi[curr.order]
-    labels<-labels[curr.order]
-    masses<-masses[curr.order]
-    
-    rm(curr.order)
-    
-    cat(c("m/z tolerance is: ", tol,"\n"))
-    
-    l<-length(masses)
-    
-    curr.bw<-0.5*tol*max(masses)
-    if(weighted)
-    {
-        all.mass.den<-density(masses, weights=intensi/sum(intensi), bw=curr.bw, n=2^min(15, floor(log2(l))-2))
-    }else{
-        all.mass.den<-density(masses, bw=curr.bw, n=2^min(15, floor(log2(l))-2))
-    }
-    all.mass.turns<-find.turn.point(all.mass.den$y)
-    all.mass.vlys<-all.mass.den$x[all.mass.turns$vlys]
-    breaks<-c(0, unique(round(approx(masses,1:l,xout=all.mass.vlys,rule=2,ties='ordered')$y))[-1])
-    
-    grps<-masses*0                   # this is which peak group the signal belongs to, not time group
-    
-    min.lab<-min(labels)
-    max.lab<-max(labels)
-    times<-unique(labels)
-    times<-times[order(times)]
-    curr.label<-1
-    min.count.run<-min.run*length(times)/(max(times)-min(times))
-    time.range<-diff(range(times))
-    aver.time.range<-(max(labels)-min(labels))/length(times)
-    
-    newprof<-matrix(0, nrow=length(masses),ncol=4)
-    height.rec<-matrix(0, nrow=length(masses),ncol=3)
-    prof.pointer<-1
-    height.pointer<-1
-    
-    for(i in 1:(length(breaks)-1))
-    {
-        this.labels<-labels[(breaks[i]+1):breaks[i+1]]
-        if(length(unique(this.labels)) >= min.count.run*min.pres)
-        {
-            this.masses<-masses[(breaks[i]+1):breaks[i+1]]
-            this.intensi<-intensi[(breaks[i]+1):breaks[i+1]]
-            
-            curr.order<-order(this.labels)
-            this.masses<-this.masses[curr.order]
-            this.intensi<-this.intensi[curr.order]
-            this.labels<-this.labels[curr.order]
-            
-            this.bw=0.5*tol*median(this.masses)
-            if(weighted)
-            {
-                mass.den<-density(this.masses, weights=this.intensi/sum(this.intensi), bw=this.bw)
-            }else{
-                mass.den<-density(this.masses, bw=this.bw)
-            }
-            mass.den$y[mass.den$y < min(this.intensi)/10]<-0
-            mass.turns<-find.turn.point(mass.den$y)
-            mass.pks<-mass.den$x[mass.turns$pks]
-            mass.vlys<-c(-Inf, mass.den$x[mass.turns$vlys], Inf)
-            
-            
-            for(j in 1:length(mass.pks))
-            {
-                mass.lower<-max(mass.vlys[mass.vlys < mass.pks[j]])
-                mass.upper<-min(mass.vlys[mass.vlys > mass.pks[j]])
-                
-                if(length(mass.pks) == 1) mass.lower<-mass.lower-1
-                mass.sel<-which(this.masses > mass.lower & this.masses <= mass.upper)
-                
-                if(length(mass.sel) > 0)
-                {
-                    that.labels<-this.labels[mass.sel]
-                    that.masses<-this.masses[mass.sel]
-                    that.intensi<-this.intensi[mass.sel]
-                    
-                    that.merged<-combine.seq.3(that.labels, that.masses, that.intensi)
-                    if(nrow(that.merged)==1)
-                    {
-                        new.merged<-that.merged
-                    }else{
-                        new.merged<-that.merged[order(that.merged[,1]),]
-                    }
-                    
-                    that.labels<-new.merged[,2]
-                    that.masses<-new.merged[,1]
-                    that.intensi<-new.merged[,3]
-                    that.range<-diff(range(that.labels))
-                    
-                    if(that.range > 0.5*time.range & length(that.labels) > that.range*min.pres & length(that.labels)/(diff(range(that.labels))/aver.time.range) > min.pres)
-                    {
-                        that.intensi<-rm.ridge(that.labels, that.intensi, bw=max(10*min.run, that.range/2))
-                        
-                        that.sel<-which(that.intensi != 0)
-                        that.labels<-that.labels[that.sel]
-                        that.masses<-that.masses[that.sel]
-                        that.intensi<-that.intensi[that.sel]
-                    }
-                    that.n<-length(that.masses)
-                    newprof[prof.pointer:(prof.pointer+that.n-1),]<-cbind(that.masses, that.labels, that.intensi, rep(curr.label, that.n))
-                    prof.pointer<-prof.pointer+that.n
-                    height.rec[height.pointer,]<-c(curr.label,that.n, max(that.intensi))
-                    height.pointer<-height.pointer+1
-                    curr.label<-curr.label+1
-                }
-            }
-        }else{
-            if(runif(1)<0.05)
-            {
-                this.masses<-masses[(breaks[i]+1):breaks[i+1]]
-                this.intensi<-intensi[(breaks[i]+1):breaks[i+1]]
-                curr.order<-order(this.labels)
-                this.masses<-this.masses[curr.order]
-                this.intensi<-this.intensi[curr.order]
-                this.labels<-this.labels[curr.order]
-                
-                
-                that.merged<-combine.seq.3(this.labels, this.masses, this.intensi)
-                that.n<-nrow(that.merged)
-                newprof[prof.pointer:(prof.pointer+that.n-1),]<-cbind(that.merged, rep(curr.label, that.n))
-                prof.pointer<-prof.pointer+that.n
-                
-                height.rec[height.pointer,]<-c(curr.label,that.n, max(that.merged[,3]))
-                height.pointer<-height.pointer+1
-                curr.label<-curr.label+1
-            }
+compute_densities <- function(masses, tol, weighted, intensities, bw_func, n = 512) {
+  bandwidth <- 0.5 * tol * bw_func(masses)
+  if (weighted) {
+    weights <- intensities / sum(intensities)
+    all.mass.den <- density(masses, weights = weights, bw = bandwidth, n = n)
+  } else {
+    all.mass.den <- density(masses, bw = bandwidth, n = n)
+  }
+  return(all.mass.den)
+}
+
+compute_mass_values <- function(tol, masses, intensi, weighted) {
+  n <- 2^min(15, floor(log2(length(masses))) - 2)
+
+  all.mass.den <- compute_densities(masses, tol, weighted, intensi, max, n)
+
+  all.mass.turns <- find.turn.point(all.mass.den$y)
+  all.mass.vlys <- all.mass.den$x[all.mass.turns$vlys]
+  return(all.mass.vlys)
+}
+
+compute_breaks <- function(tol, masses, intensi, weighted) {
+  all.mass.vlys <- compute_mass_values(tol, masses, intensi, weighted)
+  breaks <- c(0, unique(round(approx(masses, 1:length(masses), xout = all.mass.vlys, rule = 2, ties = "ordered")$y))[-1])
+  return(breaks)
+}
+
+adaptive.bin <- function(x,
+                         min.run,
+                         min.pres,
+                         tol,
+                         baseline.correct,
+                         weighted = FALSE) {
+  # order inputs after mz values
+  masses <- x$masses
+  labels <- x$labels
+  intensi <- x$intensi
+  times <- x$times
+
+  rm(x)
+
+  curr.order <- order(masses)
+  intensi <- intensi[curr.order]
+  labels <- labels[curr.order]
+  masses <- masses[curr.order]
+
+  rm(curr.order)
+
+  cat(c("m/z tolerance is: ", tol, "\n"))
+
+  times <- unique(labels)
+  times <- times[order(times)]
+
+  # calculate function parameters
+  min.count.run <- min.run * length(times) / (max(times) - min(times))
+  time.range <- diff(range(times))
+  aver.time.range <- (max(labels) - min(labels)) / length(times)
+
+  # init data
+  newprof <- matrix(0, nrow = length(masses), ncol = 4)
+  height.rec <- matrix(0, nrow = length(masses), ncol = 3)
+
+  # init counters
+  curr.label <- 1
+  prof.pointer <- 1
+  height.pointer <- 1
+
+  breaks <- compute_breaks(tol, masses, intensi, weighted)
+
+  for (i in 1:(length(breaks) - 1))
+  {
+    start <- breaks[i] + 1
+    end <- breaks[i + 1]
+    # get number of scans in bin
+    this.labels <- labels[start: end]
+
+    if (length(unique(this.labels)) >= min.count.run * min.pres) {
+      # extract mz and intensity values for bin
+      this.masses <- masses[start:end]
+      this.intensi <- intensi[start:end]
+
+      # reorder in order of labels (scan number)
+      curr.order <- order(this.labels)
+      this.masses <- this.masses[curr.order]
+      this.intensi <- this.intensi[curr.order]
+      this.labels <- this.labels[curr.order]
+
+      mass.den <- compute_densities(this.masses, tol, weighted, this.intensi, median)
+
+      mass.den$y[mass.den$y < min(this.intensi) / 10] <- 0
+      mass.turns <- find.turn.point(mass.den$y)
+      mass.pks <- mass.den$x[mass.turns$pks]
+      mass.vlys <- c(-Inf, mass.den$x[mass.turns$vlys], Inf)
+
+
+      for (j in 1:length(mass.pks))
+      {
+        # compute boundaries
+        mass.lower <- max(mass.vlys[mass.vlys < mass.pks[j]])
+        mass.upper <- min(mass.vlys[mass.vlys > mass.pks[j]])
+
+        if (length(mass.pks) == 1) mass.lower <- mass.lower - 1
+
+        # compute if we are in mass range from mass.lower to mass.upper
+        mass.sel <- which(this.masses > mass.lower & this.masses <= mass.upper)
+
+        if (length(mass.sel) > 0) {
+
+          # get rows which fulfill condition
+          that.labels <- this.labels[mass.sel]
+          that.masses <- this.masses[mass.sel]
+          that.intensi <- this.intensi[mass.sel]
+
+          # rearrange in order of labels
+          that.merged <- combine.seq.3(that.labels, that.masses, that.intensi)
+          if (nrow(that.merged) == 1) {
+            new.merged <- that.merged
+          } else {
+            new.merged <- that.merged[order(that.merged[, 1]), ]
+          }
+
+          that.labels <- new.merged[, 2]
+          that.masses <- new.merged[, 1]
+          that.intensi <- new.merged[, 3]
+          that.range <- diff(range(that.labels))
+
+          if (that.range > 0.5 * time.range & length(that.labels) > that.range * min.pres & length(that.labels) / (diff(range(that.labels)) / aver.time.range) > min.pres) {
+            that.intensi <- rm.ridge(that.labels, that.intensi, bw = max(10 * min.run, that.range / 2))
+
+            # filter out 0 entries
+            that.sel <- which(that.intensi != 0)
+            that.labels <- that.labels[that.sel]
+            that.masses <- that.masses[that.sel]
+            that.intensi <- that.intensi[that.sel]
+          }
+
+          that.n <- length(that.masses)
+
+          newprof[prof.pointer:(prof.pointer + that.n - 1), ] <- cbind(that.masses, that.labels, that.intensi, rep(curr.label, that.n))
+          height.rec[height.pointer, ] <- c(curr.label, that.n, max(that.intensi))
+
+          # increment counters
+          prof.pointer <- prof.pointer + that.n
+          height.pointer <- height.pointer + 1
+          curr.label <- curr.label + 1
         }
+      }
+    } else {
+      if (runif(1) < 0.05) {
+
+        # reassignment
+        this.masses <- masses[start:end]
+        this.intensi <- intensi[start:end]
+
+        # reordering
+        curr.order <- order(this.labels)
+        this.masses <- this.masses[curr.order]
+        this.intensi <- this.intensi[curr.order]
+        this.labels <- this.labels[curr.order]
+
+        that.merged <- combine.seq.3(this.labels, this.masses, this.intensi)
+        that.n <- nrow(that.merged)
+
+        newprof[prof.pointer:(prof.pointer + that.n - 1), ] <- cbind(that.merged, rep(curr.label, that.n))
+        height.rec[height.pointer, ] <- c(curr.label, that.n, max(that.merged[, 3]))
+
+        # increment counters
+        prof.pointer <- prof.pointer + that.n
+        height.pointer <- height.pointer + 1
+        curr.label <- curr.label + 1
+      }
     }
-    
-    newprof<-newprof[1:(prof.pointer-1),]
-    height.rec<-height.rec[1:(height.pointer-1),]
-    
-    newprof<-newprof[order(newprof[,1],newprof[,2]),]
-    
-    raw.prof<-new("list")
-    raw.prof$height.rec<-height.rec
-    raw.prof$masses<-newprof[,1]
-    raw.prof$labels<-newprof[,2]
-    raw.prof$intensi<-newprof[,3]
-    raw.prof$grps<-newprof[,4]
-    raw.prof$times<-times
-    raw.prof$tol<-tol
-    raw.prof$min.count.run<-min.count.run
-    
-    return(raw.prof)
+  }
+
+  newprof <- newprof[1:(prof.pointer - 1), ]
+  height.rec <- height.rec[1:(height.pointer - 1), ]
+
+  newprof <- newprof[order(newprof[, 1], newprof[, 2]), ]
+
+  raw.prof <- new("list")
+  raw.prof$height.rec <- height.rec
+  raw.prof$masses <- newprof[, 1]
+  raw.prof$labels <- newprof[, 2]
+  raw.prof$intensi <- newprof[, 3]
+  raw.prof$grps <- newprof[, 4]
+  raw.prof$times <- times
+  raw.prof$tol <- tol
+  raw.prof$min.count.run <- min.count.run
+
+  return(raw.prof)
 }
diff --git a/R/extract_features.R b/R/extract_features.R
new file mode 100644
index 00000000..eb8ea1e2
--- /dev/null
+++ b/R/extract_features.R
@@ -0,0 +1,63 @@
+extract_features <- function(
+  cluster,
+  filenames,
+  min_pres,
+  min_run,
+  mz_tol,
+  baseline_correct,
+  baseline_correct_noise_percentile,
+  intensity_weighted,
+  min_bandwidth,
+  max_bandwidth,
+  sd_cut,
+  sigma_ratio_lim,
+  shape_model,
+  peak_estim_method,
+  component_eliminate,
+  moment_power,
+  BIC_factor
+) {
+  clusterExport(cluster, list(
+    'proc.cdf',
+    'prof.to.features',
+    'load.lcms',
+    'adaptive.bin',
+    'find.turn.point',
+    'combine.seq.3',
+    'cont.index',
+    'interpol.area',
+    'load_file',
+    'load_data',
+    'plot_raw_profile_histogram',
+    'compute_mass_values',
+    'compute_densities',
+    'compute_breaks'
+  ))
+
+  parLapply(cluster, filenames, function(filename) {
+    profile <- proc.cdf(
+      filename = filename,
+      min.pres = min_pres,
+      min.run = min_run,
+      tol = mz_tol,
+      baseline.correct = baseline_correct,
+      baseline.correct.noise.percentile = baseline_correct_noise_percentile,
+      intensity.weighted = intensity_weighted,
+      do.plot = FALSE,
+      cache = FALSE
+    )
+    features <- prof.to.features(
+      a = profile,
+      min.bw = min_bandwidth,
+      max.bw = max_bandwidth,
+      sd.cut = sd_cut,
+      sigma.ratio.lim = sigma_ratio_lim,
+      shape.model = shape_model,
+      estim.method = peak_estim_method,
+      component.eliminate = component_eliminate,
+      power = moment_power,
+      BIC.factor = BIC_factor,
+      do.plot = FALSE
+    )
+  })
+}
diff --git a/R/plot.R b/R/plot.R
new file mode 100644
index 00000000..972f35da
--- /dev/null
+++ b/R/plot.R
@@ -0,0 +1,34 @@
+plot_raw_profile_histogram <- function(
+  raw.prof,
+  min.pres,
+  baseline.correct,
+  baseline.correct.noise.percentile,
+  tol,
+  new.prof
+) {
+  h.1 <- log10(raw.prof$height.rec[raw.prof$height.rec[, 2] <= max(2, raw.prof$min.count.run * min.pres / 2), 3])
+  h.2 <- log10(raw.prof$height.rec[raw.prof$height.rec[, 2] >= raw.prof$min.count.run * min.pres, 3])
+
+  if (is.na(baseline.correct)) {
+    baseline.correct <- 10^quantile(h.1, baseline.correct.noise.percentile)
+    message(c("maximal height cut is automatically set at the", baseline.correct.noise.percentile, "percentile of noise group heights: ", baseline.correct))
+  } else {
+    message(c("maximal height cut is provided by user: ", baseline.correct))
+  }
+  par(mfrow = c(2, 2))
+
+  plot(c(-1, 1), c(-1, 1), type = "n", xlab = "", ylab = "", main = "tolerance level loaded", axes = FALSE)
+  text(x = 0, y = 0, tol, cex = 1.2)
+
+  if (length(h.1) > 50) {
+    plot(density(h.1), xlab = "maximum height of group (log scale)", xlim = range(c(h.1, h.2)), main = "Black - noise groups \n Blue - selected groups")
+  } else {
+    plot(NA, NA, xlab = "maximum height of group (log scale)", xlim = range(c(h.1, h.2)), ylim = c(0, 1), main = "Black - noise groups \n Blue - selected groups")
+    if (length(h.1) > 0) abline(v = h.1)
+  }
+
+  abline(v = log10(baseline.correct), col = "red")
+  lines(density(log10(new.prof$height.rec)), col = "blue")
+  hist(new.prof$time.range.rec, xlab = "Range of retention time in the same group", ylab = "Density", freq = FALSE, nclass = 100, main = "Group retention time range distribution")
+  hist(new.prof$mz.pres.rec, xlab = "% signal present in the same group", ylab = "Density", freq = FALSE, nclass = 20, main = "Group % present signal distribution")
+}
\ No newline at end of file
diff --git a/R/proc.cdf.R b/R/proc.cdf.R
index ce82f8fc..7c97c159 100644
--- a/R/proc.cdf.R
+++ b/R/proc.cdf.R
@@ -1,59 +1,73 @@
-proc.cdf <- function(filename, min.pres = 0.5, min.run = 12, tol = 1e-5, baseline.correct = 0, baseline.correct.noise.percentile = 0, do.plot = TRUE, intensity.weighted = FALSE, cache = TRUE) {
+load_file <- function(filename) {
+  this <- load.lcms(filename)
+
+  # this could eventually be replaced using drop_na
+  na.sel <- c(which(is.na(this$masses)), which(is.na(this$labels)), which(is.na(this$intensi)))
+  if (length(na.sel) > 0) {
+    na.sel <- unique(na.sel)
+    this$masses <- this$masses[-na.sel]
+    this$labels <- this$labels[-na.sel]
+    this$intensi <- this$intensi[-na.sel]
+
+    warning("there are NA values in the m/z or intensity. Check the file:", filename)
+  }
+  # TODO
+  # this <- tidyr::drop_na(as.data.frame(this))
+  return(this)
+}
+
+
+load_data <- function(filename,
+                      cache,
+                      min.run,
+                      min.pres,
+                      tol,
+                      baseline.correct,
+                      intensity.weighted) {
   rawprof_filename <- paste(strsplit(tolower(filename), "\\.")[[1]][1], "_", min.run, "_", min.pres, "_", tol, ".rawprof", sep = "")
 
   if (cache && file.exists(rawprof_filename)) {
     load(rawprof_filename)
   } else {
-    this <- load.lcms(filename)
-
-    na.sel <- c(which(is.na(this$masses)), which(is.na(this$labels)), which(is.na(this$intensi)))
-    if (length(na.sel) > 0) {
-      na.sel <- unique(na.sel)
-      this$masses <- this$masses[-na.sel]
-      this$labels <- this$labels[-na.sel]
-      this$intensi <- this$intensi[-na.sel]
-
-      warning("there are NA values in the m/z or intensity. Check the file:", filename)
-    }
-    raw.prof <- adaptive.bin(this, min.run = min.run, min.pres = min.pres, tol = tol, baseline.correct = baseline.correct, weighted = intensity.weighted)
+    raw.data <- load_file(filename)
+    raw.prof <- adaptive.bin(raw.data, min.run = min.run, min.pres = min.pres, tol = tol, baseline.correct = baseline.correct, weighted = intensity.weighted)
   }
 
   if (cache && !file.exists(rawprof_filename)) {
     save(raw.prof, file = rawprof_filename)
   }
 
-  newprof <- cbind(raw.prof$masses, raw.prof$labels, raw.prof$intensi, raw.prof$grps)
-  h.1 <- log10(raw.prof$height.rec[raw.prof$height.rec[, 2] <= max(2, raw.prof$min.count.run * min.pres / 2), 3])
-  h.2 <- log10(raw.prof$height.rec[raw.prof$height.rec[, 2] >= raw.prof$min.count.run * min.pres, 3])
+  return(raw.prof)
+}
 
-  if (is.na(baseline.correct)) {
-    baseline.correct <- 10^quantile(h.1, baseline.correct.noise.percentile)
-    message(c("maximal height cut is automatically set at the", baseline.correct.noise.percentile, "percentile of noise group heights: ", baseline.correct))
-  } else {
-    message(c("maximal height cut is provided by user: ", baseline.correct))
-  }
+#' Process file and return the profile.
+#' @export
+proc.cdf <- function(filename,
+                     min.pres = 0.5,
+                     min.run = 12,
+                     tol = 1e-05,
+                     baseline.correct = 0.0,
+                     baseline.correct.noise.percentile = 0.05,
+                     do.plot = FALSE,
+                     intensity.weighted = FALSE,
+                     cache = FALSE) {
+  raw.prof <- load_data(filename, cache, min.run, min.pres, tol, baseline.correct, intensity.weighted)
 
+  newprof <- cbind(raw.prof$masses, raw.prof$labels, raw.prof$intensi, raw.prof$grps)
   run.sel <- raw.prof$height.rec[which(raw.prof$height.rec[, 2] >= raw.prof$min.count.run * min.pres & raw.prof$height.rec[, 3] > baseline.correct), 1]
-  newprof <- newprof[newprof[, 4] %in% run.sel,]
+
+  newprof <- newprof[newprof[, 4] %in% run.sel, ]
   new.prof <- cont.index(newprof, min.pres = min.pres, min.run = min.run)
 
   if (do.plot) {
-    par(mfrow = c(2, 2))
-
-    plot(c(-1,1), c(-1,1), type = "n", xlab = "", ylab = "", main = "tolerance level loaded", axes = FALSE)
-    text(x = 0, y = 0, tol, cex = 1.2)
-
-    if (length(h.1) > 50) {
-      plot(density(h.1), xlab = "maximum height of group (log scale)", xlim = range(c(h.1, h.2)), main = "Black - noise groups \n Blue - selected groups")
-    } else {
-      plot(NA, NA, xlab = "maximum height of group (log scale)", xlim = range(c(h.1, h.2)), ylim = c(0, 1), main = "Black - noise groups \n Blue - selected groups")
-      if (length(h.1) > 0) abline(v = h.1)
-    }
-
-    abline(v = log10(baseline.correct), col = "red")
-    lines(density(log10(new.prof$height.rec)), col = "blue")
-    hist(new.prof$time.range.rec, xlab = "Range of retention time in the same group", ylab = "Density", freq = FALSE, nclass = 100, main = "Group retention time range distribution")
-    hist(new.prof$mz.pres.rec, xlab = "% signal present in the same group", ylab = "Density", freq = FALSE, nclass = 20, main = "Group % present signal distribution")
+    plot_raw_profile_histogram(
+      raw.prof,
+      min.pres,
+      baseline.correct,
+      baseline.correct.noise.percentile,
+      tol,
+      new.prof
+    )
   }
 
   return(new.prof$new.rec)
diff --git a/R/prof.to.features.R b/R/prof.to.features.R
index 54084d9e..c3c7efac 100644
--- a/R/prof.to.features.R
+++ b/R/prof.to.features.R
@@ -1,712 +1,683 @@
-prof.to.features <-
-function(a, bandwidth=0.5, min.bw=NA, max.bw=NA, sd.cut=c(0.1,100), sigma.ratio.lim=c(0.1, 10), shape.model="bi-Gaussian", estim.method="moment",do.plot=TRUE, power=1, component.eliminate=0.01, BIC.factor=2)
-{
-    if(sum(shape.model %in% c("bi-Gaussian", "Gaussian")) == 0)
-    {
+prof.to.features <- function(a,
+                             bandwidth = 0.5,
+                             min.bw = NA,
+                             max.bw = NA,
+                             sd.cut = c(0.01, 500),
+                             sigma.ratio.lim = c(0.01, 100),
+                             shape.model = "bi-Gaussian",
+                             estim.method = "moment",
+                             do.plot = TRUE,
+                             power = 1,
+                             component.eliminate = 0.01,
+                             BIC.factor = 2) {
+    if (sum(shape.model %in% c("bi-Gaussian", "Gaussian")) == 0) {
         print("Error: peak shape model has to be Gaussian or bi-Gaussian")
         return(0)
     }
-    if(sum(estim.method %in% c("moment", "EM")) == 0)
-    {
+    if (sum(estim.method %in% c("moment", "EM")) == 0) {
         print("Error: peak model estimation method has to be moment or EM")
         return(0)
     }
-    
+
     #####################
-    bigauss.esti.EM <- function(t, x, max.iter=50, epsilon=0.005, power=1, do.plot=FALSE, truth=NA, sigma.ratio.lim=c(0.3, 1))
-    {
-        
+    bigauss.esti.EM <- function(t, x, max.iter = 50, epsilon = 0.005, power = 1, do.plot = FALSE, truth = NA, sigma.ratio.lim = c(0.3, 1)) {
+
         ## function takes into x and t, and then computes the value of
         ## sigma.1, sigma.2 and a using iterative method. the returned
         ## values include estimated sigmas, a and a boolean variable on
         ## whether the termination criteria is satified upon the end of the
         ## program.
-        
-        sel<-which(x>1e-10)
-        if(length(sel)==0)
-        {
-            return(c(median(t),1,1,0))
+
+        sel <- which(x > 1e-10)
+        if (length(sel) == 0) {
+            return(c(median(t), 1, 1, 0))
         }
-        if(length(sel)==1)
-        {
-            return(c(t[sel], 1,1,0))
+        if (length(sel) == 1) {
+            return(c(t[sel], 1, 1, 0))
         }
-        t<-t[sel]
-        x<-x[sel]
-        solve.sigma <- function(x,t,a){
-            
+        t <- t[sel]
+        x <- x[sel]
+        solve.sigma <- function(x, t, a) {
+
             ## this function takes the value intensity level x, retention time t
             ## and assumed breaking point a, calculates the square estimated of
             ## sigma.1 and sigma.2.
-            
-            prep.uv <- function(x,t,a){
-                
+
+            prep.uv <- function(x, t, a) {
+
                 ## this function prepares the parameters required for latter
                 ## compuation. u, v, and sum of x.
-                
-                temp <- (t-a)^2 * x
-                u <- sum(temp * as.numeric(t<a))
-                v <- sum(temp * as.numeric(t>=a))
-                return(list(u=u,
-                v=v,
-                x.sum=sum(x)))
+
+                temp <- (t - a)^2 * x
+                u <- sum(temp * as.numeric(t < a))
+                v <- sum(temp * as.numeric(t >= a))
+                return(list(
+                    u = u,
+                    v = v,
+                    x.sum = sum(x)
+                ))
             }
-            tt <- prep.uv(x,t,a)
-            sigma.1 <- tt$u/tt$x.sum*((tt$v/tt$u)^(1/3)+1)
-            sigma.2 <- tt$v/tt$x.sum*((tt$u/tt$v)^(1/3)+1)
-            return(list(sigma.1=sigma.1,
-            sigma.2=sigma.2))
+            tt <- prep.uv(x, t, a)
+            sigma.1 <- tt$u / tt$x.sum * ((tt$v / tt$u)^(1 / 3) + 1)
+            sigma.2 <- tt$v / tt$x.sum * ((tt$u / tt$v)^(1 / 3) + 1)
+            return(list(
+                sigma.1 = sigma.1,
+                sigma.2 = sigma.2
+            ))
         }
-        
-        solve.a <- function(x,t,a,sigma.1,sigma.2){
-            
+
+        solve.a <- function(x, t, a, sigma.1, sigma.2) {
+
             ## thif function solves the value of a using the x, t, a from the
             ## previous step, and sigma.1, and sigma.2
-            
-            w <- x * (as.numeric(t<a)/sigma.1 + as.numeric(t>=a)/sigma.2)
-            return(sum(t * w)/ sum(w))
+
+            w <- x * (as.numeric(t < a) / sigma.1 + as.numeric(t >= a) / sigma.2)
+            return(sum(t * w) / sum(w))
         }
-        
+
         ## epsilon is the threshold for continuing the iteration. change in
         ## a smaller than epsilon will terminate the iteration.
         ## epsilon <- min(diff(sort(t)))/2
-        
+
         ## using the median value of t as the initial value of a.
-        a.old <- t[which(x==max(x))[1]]
+        a.old <- t[which(x == max(x))[1]]
         a.new <- a.old
-        change <- 10*epsilon
-        
+        change <- 10 * epsilon
+
         ## n.iter is the number of iteration covered so far.
         n.iter <- 0
-        
-        while((change>epsilon) & (n.iter<max.iter)){
+
+        while ((change > epsilon) & (n.iter < max.iter)) {
             ## print(c(n.iter,change))
             a.old <- a.new
-            n.iter <- n.iter+1
-            sigma <- solve.sigma(x,t,a.old)
-            if(n.iter == 1) sigma[is.na(sigma)]<-as.numeric(sigma[which(!is.na(sigma))])[1]/10
-            a.new <- solve.a(x,t,a.old,sigma$sigma.1,sigma$sigma.2)
-            change <- abs(a.old-a.new)
+            n.iter <- n.iter + 1
+            sigma <- solve.sigma(x, t, a.old)
+            if (n.iter == 1) sigma[is.na(sigma)] <- as.numeric(sigma[which(!is.na(sigma))])[1] / 10
+            a.new <- solve.a(x, t, a.old, sigma$sigma.1, sigma$sigma.2)
+            change <- abs(a.old - a.new)
         }
         #  return(list(a=a.new,
         #              sigma.1=sigma$sigma.1,
         #              sigma.2=sigma$sigma.2,
         #              iter.end=(max.iter>n.iter)
         #              ))
-        d<-x
-        sigma$sigma.2<-sqrt(sigma$sigma.2)
-        sigma$sigma.1<-sqrt(sigma$sigma.1)
-        
-        d[t<a.new]<-dnorm(t[t<a.new],mean=a.new,sd=sigma$sigma.1)*sigma$sigma.1
-        d[t>=a.new]<-dnorm(t[t>=a.new],mean=a.new,sd=sigma$sigma.2)*sigma$sigma.2
-        scale<-exp(sum(d[d>1e-3]^2*log(x[d>1e-3]/d[d>1e-3]))/sum(d[d>1e-3]^2))
+        d <- x
+        sigma$sigma.2 <- sqrt(sigma$sigma.2)
+        sigma$sigma.1 <- sqrt(sigma$sigma.1)
+
+        d[t < a.new] <- dnorm(t[t < a.new], mean = a.new, sd = sigma$sigma.1) * sigma$sigma.1
+        d[t >= a.new] <- dnorm(t[t >= a.new], mean = a.new, sd = sigma$sigma.2) * sigma$sigma.2
+        scale <- exp(sum(d[d > 1e-3]^2 * log(x[d > 1e-3] / d[d > 1e-3])) / sum(d[d > 1e-3]^2))
         return(c(a.new, sigma$sigma.1, sigma$sigma.2, scale))
     }
-    
-    bigauss.esti<-function(x,y,power=1, do.plot=FALSE, truth=NA, sigma.ratio.lim=c(0.3, 3))
-    {
-        sel<-which(y>1e-10)
-        if(length(sel)<2)
-        {
-            to.return<-c(median(x),1,1,0)
-        }else{
-            x<-x[sel]
-            y<-y[sel]
-            #			sel<-order(x)
-            #			y<-y[sel]
-            #			x<-x[sel]
-            
-            y.0<-y
-            if(do.plot) plot(x,y)
-            if(do.plot & ! is.na(truth[1]))
-            {
-                true.y1<-dnorm(x[x<truth[1]], mean=truth[1],sd=truth[2])*truth[2]*truth[4]
-                true.y2<-dnorm(x[x>=truth[1]],mean=truth[1],sd=truth[3])*truth[3]*truth[4]
-                lines(x, c(true.y1,true.y2),col="green")
+
+    bigauss.esti <- function(x, y, power = 1, do.plot = FALSE, truth = NA, sigma.ratio.lim = c(0.3, 3)) {
+        sel <- which(y > 1e-10)
+        if (length(sel) < 2) {
+            to.return <- c(median(x), 1, 1, 0)
+        } else {
+            x <- x[sel]
+            y <- y[sel]
+            # 			sel<-order(x)
+            # 			y<-y[sel]
+            # 			x<-x[sel]
+
+            y.0 <- y
+            if (do.plot) plot(x, y)
+            if (do.plot & !is.na(truth[1])) {
+                true.y1 <- dnorm(x[x < truth[1]], mean = truth[1], sd = truth[2]) * truth[2] * truth[4]
+                true.y2 <- dnorm(x[x >= truth[1]], mean = truth[1], sd = truth[3]) * truth[3] * truth[4]
+                lines(x, c(true.y1, true.y2), col = "green")
             }
-            max.y.0<-max(y.0,na.rm=TRUE)
-            y<-(y/max.y.0)^power
-            
-            l<-length(x)
-            min.d<-min(diff(x))
-            dx<-c(x[2]-x[1], (x[3:l]-x[1:(l-2)])/2, x[l]-x[l-1])
-            if(l ==2) dx=rep(diff(x),2)
-            dx[dx>4*min.d]<-4*min.d
-            
-            y.cum<-cumsum(y*dx)
-            x.y.cum<-cumsum(y*x*dx)
-            xsqr.y.cum<-cumsum(y*x^2*dx)
-            
-            y.cum.rev<-cumsum((y*dx)[l:1])[l:1]    # reverse cum sum
-            x.y.cum.rev<-cumsum((x*y*dx)[l:1])[l:1]
-            xsqr.y.cum.rev<-cumsum((y*x^2*dx)[l:1])[l:1]
-            
-            sel<-which(y.cum >= sigma.ratio.lim[1]/(sigma.ratio.lim[1] + 1)*y.cum[l])
-            if(length(sel)>0)
-            {
-                start<-max(1, min(sel))
-            }else{
-                start<-1
+            max.y.0 <- max(y.0, na.rm = TRUE)
+            y <- (y / max.y.0)^power
+
+            l <- length(x)
+            min.d <- min(diff(x))
+            dx <- c(x[2] - x[1], (x[3:l] - x[1:(l - 2)]) / 2, x[l] - x[l - 1])
+            if (l == 2) dx <- rep(diff(x), 2)
+            dx[dx > 4 * min.d] <- 4 * min.d
+
+            y.cum <- cumsum(y * dx)
+            x.y.cum <- cumsum(y * x * dx)
+            xsqr.y.cum <- cumsum(y * x^2 * dx)
+
+            y.cum.rev <- cumsum((y * dx)[l:1])[l:1] # reverse cum sum
+            x.y.cum.rev <- cumsum((x * y * dx)[l:1])[l:1]
+            xsqr.y.cum.rev <- cumsum((y * x^2 * dx)[l:1])[l:1]
+
+            sel <- which(y.cum >= sigma.ratio.lim[1] / (sigma.ratio.lim[1] + 1) * y.cum[l])
+            if (length(sel) > 0) {
+                start <- max(1, min(sel))
+            } else {
+                start <- 1
             }
-            sel<-which(y.cum <= sigma.ratio.lim[2]/(sigma.ratio.lim[2] + 1)*y.cum[l])
-            if(length(sel)>0)
-            {
-                end<-min(l-1, max(sel))
-            }else{
-                end<-l-1
+            sel <- which(y.cum <= sigma.ratio.lim[2] / (sigma.ratio.lim[2] + 1) * y.cum[l])
+            if (length(sel) > 0) {
+                end <- min(l - 1, max(sel))
+            } else {
+                end <- l - 1
             }
-            if(end <= start)
-            {
-                m<-min(mean(x[start:end]), x[max(which(y.cum.rev>0))])
-            }else{
-                m.candi<-x[start:end]+diff(x[start:(end+1)])/2
-                rec<-matrix(0, ncol=3, nrow=end-start+1)
-                
-                s1<-sqrt((xsqr.y.cum[start:end]+m.candi^2*y.cum[start:end]-2*m.candi*x.y.cum[start:end])/y.cum[start:end])
-                s2<-sqrt((xsqr.y.cum.rev[start:end+1]+m.candi^2*y.cum.rev[start:end+1]-2*m.candi*x.y.cum.rev[start:end+1])/y.cum.rev[start:end+1])
-                rec[,1]<-s1
-                rec[,2]<-s2
-                rec[,3]<-y.cum[start:end]/y.cum.rev[start:end+1]
-                
-                d<-log(rec[,1]/rec[,2])-log(rec[,3])
-                if(min(d,na.rm=TRUE)*max(d,na.rm=TRUE) < 0)
-                {
-                    sel<-c(which(d==max(d[d<0]))[1], which(d==min(d[d>=0])))
-                    m<-(sum(abs(d[sel])*m.candi[sel]))/(sum(abs(d[sel])))
-                }else{
-                    d<-abs(d)
-                    m<-m.candi[which(d==min(d,na.rm=TRUE))[1]]
+            if (end <= start) {
+                m <- min(mean(x[start:end]), x[max(which(y.cum.rev > 0))])
+            } else {
+                m.candi <- x[start:end] + diff(x[start:(end + 1)]) / 2
+                rec <- matrix(0, ncol = 3, nrow = end - start + 1)
+
+                s1 <- sqrt((xsqr.y.cum[start:end] + m.candi^2 * y.cum[start:end] - 2 * m.candi * x.y.cum[start:end]) / y.cum[start:end])
+                s2 <- sqrt((xsqr.y.cum.rev[start:end + 1] + m.candi^2 * y.cum.rev[start:end + 1] - 2 * m.candi * x.y.cum.rev[start:end + 1]) / y.cum.rev[start:end + 1])
+                rec[, 1] <- s1
+                rec[, 2] <- s2
+                rec[, 3] <- y.cum[start:end] / y.cum.rev[start:end + 1]
+
+                d <- log(rec[, 1] / rec[, 2]) - log(rec[, 3])
+                if (min(d, na.rm = TRUE) * max(d, na.rm = TRUE) < 0) {
+                    sel <- c(which(d == max(d[d < 0]))[1], which(d == min(d[d >= 0])))
+                    m <- (sum(abs(d[sel]) * m.candi[sel])) / (sum(abs(d[sel])))
+                } else {
+                    d <- abs(d)
+                    m <- m.candi[which(d == min(d, na.rm = TRUE))[1]]
                 }
             }
-            
-            if(do.plot) abline(v=m)
-            
-            sel1<-which(x<m)
-            sel2<-which(x>=m)
-            s1<-sqrt(sum((x[sel1]-m)^2*y[sel1]*dx[sel1])/sum(y[sel1]*dx[sel1]))
-            s2<-sqrt(sum((x[sel2]-m)^2*y[sel2]*dx[sel2])/sum(y[sel2]*dx[sel2]))
-            
-            
-            if(power != 1)
-            {
-                s1<-s1*sqrt(power)
-                s2<-s2*sqrt(power)
+
+            if (do.plot) abline(v = m)
+
+            sel1 <- which(x < m)
+            sel2 <- which(x >= m)
+            s1 <- sqrt(sum((x[sel1] - m)^2 * y[sel1] * dx[sel1]) / sum(y[sel1] * dx[sel1]))
+            s2 <- sqrt(sum((x[sel2] - m)^2 * y[sel2] * dx[sel2]) / sum(y[sel2] * dx[sel2]))
+
+
+            if (power != 1) {
+                s1 <- s1 * sqrt(power)
+                s2 <- s2 * sqrt(power)
             }
-            
-            d1<-dnorm(x[sel1], sd=s1, mean=m)
-            d2<-dnorm(x[sel2], sd=s2, mean=m)
-            d<-c(d1*s1,d2*s2)                            # notice this "density" doesnt integrate to 1. Rather it integrates to (s1+s2)/2
-            y<-y.0
-            
-            dy.ratio<-d^2*log(y/d)
-            dy.ratio[is.na(dy.ratio)]<-0
-            dy.ratio[dy.ratio == -Inf]<-0
-            dy.ratio[dy.ratio == Inf]<-0
-            
-            
-            scale<-exp(sum(dy.ratio)/sum(d^2))
-            
-            if(do.plot)
-            {
-                fit.1<-d*scale
-                lines(x[y>0],fit.1,col="red")
+
+            d1 <- dnorm(x[sel1], sd = s1, mean = m)
+            d2 <- dnorm(x[sel2], sd = s2, mean = m)
+            d <- c(d1 * s1, d2 * s2) # notice this "density" doesnt integrate to 1. Rather it integrates to (s1+s2)/2
+            y <- y.0
+
+            dy.ratio <- d^2 * log(y / d)
+            dy.ratio[is.na(dy.ratio)] <- 0
+            dy.ratio[dy.ratio == -Inf] <- 0
+            dy.ratio[dy.ratio == Inf] <- 0
+
+
+            scale <- exp(sum(dy.ratio) / sum(d^2))
+
+            if (do.plot) {
+                fit.1 <- d * scale
+                lines(x[y > 0], fit.1, col = "red")
             }
-            
-            to.return<-c(m,s1,s2,scale)
-            if(sum(is.na(to.return)) > 0)
-            {
-                m<-sum(x*y)/sum(y)
-                s1<-s2<-sum(y*(x-m)^2)/sum(y)
-                scale<-sum(y)/s1
-                to.return<-c(m,s1,s2,scale)
+
+            to.return <- c(m, s1, s2, scale)
+            if (sum(is.na(to.return)) > 0) {
+                m <- sum(x * y) / sum(y)
+                s1 <- s2 <- sum(y * (x - m)^2) / sum(y)
+                scale <- sum(y) / s1
+                to.return <- c(m, s1, s2, scale)
             }
         }
         return(to.return)
     }
-    
+
     ##############
-    bigauss.mix<-function(x,y,power=1, do.plot=FALSE, sigma.ratio.lim=c(0.1, 10), bw=c(15,30,60), eliminate=.05, max.iter=25, estim.method, BIC.factor=2)
-    {
-        all.bw<-bw[order(bw)]
-        
-        x.0<-x
-        y.0<-y
-        
-        sel<-y>1e-5
-        x<-x[sel]
-        y<-y[sel]
-        sel<-order(x)
-        y<-y[sel]
-        x<-x[sel]
-        results<-new("list")
-        smoother.pk.rec<-smoother.vly.rec<-new("list")
-        bic.rec<-all.bw
-        
-        if(do.plot)
-        {
-            par(mfrow=c(ceiling(length(all.bw)/2),2))
-            par(mar=c(1,1,1,1))
+    bigauss.mix <- function(x, y, power = 1, do.plot = FALSE, sigma.ratio.lim = c(0.1, 10), bw = c(15, 30, 60), eliminate = .05, max.iter = 25, estim.method, BIC.factor = 2) {
+        all.bw <- bw[order(bw)]
+
+        x.0 <- x
+        y.0 <- y
+
+        sel <- y > 1e-5
+        x <- x[sel]
+        y <- y[sel]
+        sel <- order(x)
+        y <- y[sel]
+        x <- x[sel]
+        results <- new("list")
+        smoother.pk.rec <- smoother.vly.rec <- new("list")
+        bic.rec <- all.bw
+
+        if (do.plot) {
+            par(mfrow = c(ceiling(length(all.bw) / 2), 2))
+            par(mar = c(1, 1, 1, 1))
         }
-        
-        last.num.pks<-Inf
-        
-        for(bw.n in length(all.bw):1)
+
+        last.num.pks <- Inf
+
+        for (bw.n in length(all.bw):1)
         {
-            bw<-all.bw[bw.n]
-            this.smooth<-ksmooth(x.0,y.0, kernel="normal", bandwidth=bw)
-            turns<-find.turn.point(this.smooth$y)
-            pks<-this.smooth$x[turns$pks]
-            vlys<-c(-Inf, this.smooth$x[turns$vlys], Inf)
-            
-            smoother.pk.rec[[bw.n]]<-pks
-            smoother.vly.rec[[bw.n]]<-vlys
-            if(length(pks) != last.num.pks)
-            {
-                last.num.pks<-length(pks)
-                l<-length(x)
-                dx<-c(x[2]-x[1], (x[3:l]-x[1:(l-2)])/2, x[l]-x[l-1])
-                if(l ==2) dx=rep(diff(x),2)
-                
+            bw <- all.bw[bw.n]
+            this.smooth <- ksmooth(x.0, y.0, kernel = "normal", bandwidth = bw)
+            turns <- find.turn.point(this.smooth$y)
+            pks <- this.smooth$x[turns$pks]
+            vlys <- c(-Inf, this.smooth$x[turns$vlys], Inf)
+
+            smoother.pk.rec[[bw.n]] <- pks
+            smoother.vly.rec[[bw.n]] <- vlys
+            if (length(pks) != last.num.pks) {
+                last.num.pks <- length(pks)
+                l <- length(x)
+                dx <- c(x[2] - x[1], (x[3:l] - x[1:(l - 2)]) / 2, x[l] - x[l - 1])
+                if (l == 2) dx <- rep(diff(x), 2)
+
                 # initiation
-                m<-s1<-s2<-delta<-pks
-                for(i in 1:length(m))
+                m <- s1 <- s2 <- delta <- pks
+                for (i in 1:length(m))
                 {
-                    sel.1<-which(x >= max(vlys[vlys < m[i]]) & x < m[i])
-                    s1[i]<-sqrt(sum((x[sel.1]-m[i])^2 * y[sel.1]*dx[sel.1])/sum(y[sel.1]*dx[sel.1]))
-                    
-                    sel.2<-which(x >= m[i] & x < min(vlys[vlys > m[i]]))
-                    s2[i]<-sqrt(sum((x[sel.2]-m[i])^2 * y[sel.2] * dx[sel.2])/sum(y[sel.2]*dx[sel.2]))
-                    
-                    delta[i]<-(sum(y[sel.1]*dx[sel.1]) + sum(y[sel.2]*dx[sel.2]))/((sum(dnorm(x[sel.1], mean=m[i], sd=s1[i])) * s1[i] /2)+(sum(dnorm(x[sel.2], mean=m[i], sd=s2[i])) * s2[i] /2))
+                    sel.1 <- which(x >= max(vlys[vlys < m[i]]) & x < m[i])
+                    s1[i] <- sqrt(sum((x[sel.1] - m[i])^2 * y[sel.1] * dx[sel.1]) / sum(y[sel.1] * dx[sel.1]))
+
+                    sel.2 <- which(x >= m[i] & x < min(vlys[vlys > m[i]]))
+                    s2[i] <- sqrt(sum((x[sel.2] - m[i])^2 * y[sel.2] * dx[sel.2]) / sum(y[sel.2] * dx[sel.2]))
+
+                    delta[i] <- (sum(y[sel.1] * dx[sel.1]) + sum(y[sel.2] * dx[sel.2])) / ((sum(dnorm(x[sel.1], mean = m[i], sd = s1[i])) * s1[i] / 2) + (sum(dnorm(x[sel.2], mean = m[i], sd = s2[i])) * s2[i] / 2))
                 }
-                delta[is.na(delta)]<-1e-10
-                s1[is.na(s1)]<-1e-10
-                s2[is.na(s2)]<-1e-10
-                
-                
-                fit<-matrix(0,ncol=length(m), nrow=length(x))   # this is the matrix of fitted values
-                
-                this.change=Inf
-                counter=0
-                
-                while(this.change > 0.1 & counter <= max.iter)
-                {
-                    counter<-counter+1
-                    old.m<-m
-                    
+                delta[is.na(delta)] <- 1e-10
+                s1[is.na(s1)] <- 1e-10
+                s2[is.na(s2)] <- 1e-10
+
+
+                fit <- matrix(0, ncol = length(m), nrow = length(x)) # this is the matrix of fitted values
+
+                this.change <- Inf
+                counter <- 0
+
+                while (this.change > 0.1 & counter <= max.iter) {
+                    counter <- counter + 1
+                    old.m <- m
+
                     # E step
-                    cuts<-c(-Inf, m, Inf)
-                    for(j in 2:length(cuts))
+                    cuts <- c(-Inf, m, Inf)
+                    for (j in 2:length(cuts))
                     {
-                        sel<-which(x >= cuts[j-1] & x < cuts[j])
-                        use.s1<-which(1:length(m) >= (j-1))
-                        s.to.use<-s2
-                        s.to.use[use.s1]<-s1[use.s1]
-                        for(i in 1:ncol(fit))
+                        sel <- which(x >= cuts[j - 1] & x < cuts[j])
+                        use.s1 <- which(1:length(m) >= (j - 1))
+                        s.to.use <- s2
+                        s.to.use[use.s1] <- s1[use.s1]
+                        for (i in 1:ncol(fit))
                         {
-                            fit[sel,i]<-dnorm(x[sel], mean=m[i], sd = s.to.use[i]) * s.to.use[i] *delta[i]
+                            fit[sel, i] <- dnorm(x[sel], mean = m[i], sd = s.to.use[i]) * s.to.use[i] * delta[i]
                         }
                     }
-                    fit[is.na(fit)]<-0
-                    sum.fit<-apply(fit, 1, sum)
-                    
+                    fit[is.na(fit)] <- 0
+                    sum.fit <- apply(fit, 1, sum)
+
                     # Elimination step
-                    fit<-fit/sum.fit
-                    fit2<-fit * y
-                    perc.explained<-apply(fit2,2,sum)/sum(y)
-                    max.erase<-max(1, round(length(perc.explained)/5))
-                    
-                    to.erase<-which(perc.explained <= min(eliminate, perc.explained[order(perc.explained, na.last=FALSE)[max.erase]]))
-                    
-                    
-                    if(length(to.erase) > 0)
-                    {
-                        m<-m[-to.erase]
-                        s1<-s1[-to.erase]
-                        s2<-s2[-to.erase]
-                        delta<-delta[-to.erase]
-                        fit<-fit[,-to.erase]
-                        if(is.null(ncol(fit))) fit<-matrix(fit, ncol=1)
-                        sum.fit<-apply(fit, 1, sum)
-                        fit<-fit/sum.fit
-                        old.m<-old.m[-to.erase]
+                    fit <- fit / sum.fit
+                    fit2 <- fit * y
+                    perc.explained <- apply(fit2, 2, sum) / sum(y)
+                    max.erase <- max(1, round(length(perc.explained) / 5))
+
+                    to.erase <- which(perc.explained <= min(eliminate, perc.explained[order(perc.explained, na.last = FALSE)[max.erase]]))
+
+
+                    if (length(to.erase) > 0) {
+                        m <- m[-to.erase]
+                        s1 <- s1[-to.erase]
+                        s2 <- s2[-to.erase]
+                        delta <- delta[-to.erase]
+                        fit <- fit[, -to.erase]
+                        if (is.null(ncol(fit))) fit <- matrix(fit, ncol = 1)
+                        sum.fit <- apply(fit, 1, sum)
+                        fit <- fit / sum.fit
+                        old.m <- old.m[-to.erase]
                     }
-                    
+
                     # M step
-                    for(i in 1:length(m))
+                    for (i in 1:length(m))
                     {
-                        this.y<-y * fit[,i]
-                        if(estim.method=="moment")
-                        {
-                            this.fit<-bigauss.esti(x,this.y,power=power,do.plot=FALSE, sigma.ratio.lim=sigma.ratio.lim)
-                        }else{
-                            this.fit<-bigauss.esti.EM(x,this.y,power=power,do.plot=FALSE, sigma.ratio.lim=sigma.ratio.lim)
+                        this.y <- y * fit[, i]
+                        if (estim.method == "moment") {
+                            this.fit <- bigauss.esti(x, this.y, power = power, do.plot = FALSE, sigma.ratio.lim = sigma.ratio.lim)
+                        } else {
+                            this.fit <- bigauss.esti.EM(x, this.y, power = power, do.plot = FALSE, sigma.ratio.lim = sigma.ratio.lim)
                         }
-                        m[i]<-this.fit[1]
-                        s1[i]<-this.fit[2]
-                        s2[i]<-this.fit[3]
-                        delta[i]<-this.fit[4]
+                        m[i] <- this.fit[1]
+                        s1[i] <- this.fit[2]
+                        s2[i] <- this.fit[3]
+                        delta[i] <- this.fit[4]
                     }
-                    delta[is.na(delta)]<-0
-                    
-                    #amount of change
-                    this.change<-sum((old.m-m)^2)
+                    delta[is.na(delta)] <- 0
+
+                    # amount of change
+                    this.change <- sum((old.m - m)^2)
                 }
-                cuts<-c(-Inf, m, Inf)
-                fit<-fit*0
-                for(j in 2:length(cuts))
+                cuts <- c(-Inf, m, Inf)
+                fit <- fit * 0
+                for (j in 2:length(cuts))
                 {
-                    sel<-which(x >= cuts[j-1] & x < cuts[j])
-                    use.s1<-which(1:length(m) >= (j-1))
-                    s.to.use<-s2
-                    s.to.use[use.s1]<-s1[use.s1]
-                    for(i in 1:ncol(fit))
+                    sel <- which(x >= cuts[j - 1] & x < cuts[j])
+                    use.s1 <- which(1:length(m) >= (j - 1))
+                    s.to.use <- s2
+                    s.to.use[use.s1] <- s1[use.s1]
+                    for (i in 1:ncol(fit))
                     {
-                        if(s.to.use[i] != 0)
-                        {
-                            fit[sel,i]<-dnorm(x[sel], mean=m[i], sd = s.to.use[i]) * s.to.use[i] *delta[i]
+                        if (s.to.use[i] != 0) {
+                            fit[sel, i] <- dnorm(x[sel], mean = m[i], sd = s.to.use[i]) * s.to.use[i] * delta[i]
                         }
                     }
                 }
-                
-                if(do.plot)
-                {
-                    plot(x,y,cex=.1,main=paste("bw=",bw))
-                    sum.fit<-apply(fit, 1, sum)
-                    lines(x,sum.fit)
-                    abline(v=m)
-                    cols<-c("red","green","blue","cyan","brown","black",rep("grey",100))
-                    for(i in 1:length(m))
+
+                if (do.plot) {
+                    plot(x, y, cex = .1, main = paste("bw=", bw))
+                    sum.fit <- apply(fit, 1, sum)
+                    lines(x, sum.fit)
+                    abline(v = m)
+                    cols <- c("red", "green", "blue", "cyan", "brown", "black", rep("grey", 100))
+                    for (i in 1:length(m))
                     {
-                        lines(x, fit[,i],col=cols[i])
+                        lines(x, fit[, i], col = cols[i])
                     }
                 }
-                area<-delta*(s1+s2)/2
-                rss<-sum((y-apply(fit,1,sum))^2)
-                l<-length(x)
-                bic<-l*log(rss/l)+4*length(m)*log(l)*BIC.factor
-                results[[bw.n]]<-cbind(m,s1,s2,delta,area)
-                bic.rec[bw.n]<-bic
-            }else{
-                results[[bw.n]]<-NA
-                bic.rec[bw.n]<-Inf
-                results[[bw.n]]<-results[[bw.n+1]]
-                
+                area <- delta * (s1 + s2) / 2
+                rss <- sum((y - apply(fit, 1, sum))^2)
+                l <- length(x)
+                bic <- l * log(rss / l) + 4 * length(m) * log(l) * BIC.factor
+                results[[bw.n]] <- cbind(m, s1, s2, delta, area)
+                bic.rec[bw.n] <- bic
+            } else {
+                results[[bw.n]] <- NA
+                bic.rec[bw.n] <- Inf
+                results[[bw.n]] <- results[[bw.n + 1]]
             }
         }
-        sel<-which(bic.rec == min(bic.rec, na.rm=TRUE))
-        if(length(sel) > 1) sel<-sel[which(all.bw[sel]==max(all.bw[sel]))]
-        rec<-new("list")
-        rec$param<-results[[sel]]
-        rec$smoother.pks<-smoother.pk.rec
-        rec$smoother.vlys<-smoother.vly.rec
-        rec$all.param<-results
-        rec$bic<-bic.rec
+        sel <- which(bic.rec == min(bic.rec, na.rm = TRUE))
+        if (length(sel) > 1) sel <- sel[which(all.bw[sel] == max(all.bw[sel]))]
+        rec <- new("list")
+        rec$param <- results[[sel]]
+        rec$smoother.pks <- smoother.pk.rec
+        rec$smoother.vlys <- smoother.vly.rec
+        rec$all.param <- results
+        rec$bic <- bic.rec
         return(rec)
     }
-    
+
     #############
-    normix<-function(that.curve, pks, vlys, ignore=0.1, max.iter=50, prob.cut=1e-2)
-    {
-        x<-that.curve[,1]
-        y<-that.curve[,2]
-        
-        if(length(pks)==1)
-        {
-            miu<-sum(x*y)/sum(y)
-            sigma<-sqrt(sum(y*(x-miu)^2)/sum(y))
-            fitted<-dnorm(x, mean=miu, sd=sigma)
-            this.sel<-y>0 & fitted/dnorm(miu,mean=miu,sd=sigma)>prob.cut
-            sc<-exp(sum(fitted[this.sel]^2*log(y[this.sel]/fitted[this.sel])/sum(fitted[this.sel]^2)))
-        }else{
-            pks<-pks[order(pks)]
-            vlys<-vlys[order(vlys)]
-            l<-length(pks)
-            miu<-sigma<-sc<-pks
-            w<-matrix(0,nrow=l, ncol=length(x))
-            
-            for(m in 1:l)
+    normix <- function(that.curve, pks, vlys, ignore = 0.1, max.iter = 50, prob.cut = 1e-2) {
+        x <- that.curve[, 1]
+        y <- that.curve[, 2]
+
+        if (length(pks) == 1) {
+            miu <- sum(x * y) / sum(y)
+            sigma <- sqrt(sum(y * (x - miu)^2) / sum(y))
+            fitted <- dnorm(x, mean = miu, sd = sigma)
+            this.sel <- y > 0 & fitted / dnorm(miu, mean = miu, sd = sigma) > prob.cut
+            sc <- exp(sum(fitted[this.sel]^2 * log(y[this.sel] / fitted[this.sel]) / sum(fitted[this.sel]^2)))
+        } else {
+            pks <- pks[order(pks)]
+            vlys <- vlys[order(vlys)]
+            l <- length(pks)
+            miu <- sigma <- sc <- pks
+            w <- matrix(0, nrow = l, ncol = length(x))
+
+            for (m in 1:l)
             {
-                this.low<-max(vlys[vlys<=pks[m]])
-                this.high<-min(vlys[vlys>=pks[m]])
-                this.x<-x[x>=this.low & x<=this.high]
-                this.y<-y[x>=this.low & x<=this.high]
-                
-                miu[m]<-sum(this.x*this.y)/sum(this.y)
-                #if(sum(this.y>0) > 1)
-                #{
-                sigma[m]<-sqrt(sum(this.y*(this.x-miu[m])^2)/sum(this.y))
-                #}else{
+                this.low <- max(vlys[vlys <= pks[m]])
+                this.high <- min(vlys[vlys >= pks[m]])
+                this.x <- x[x >= this.low & x <= this.high]
+                this.y <- y[x >= this.low & x <= this.high]
+
+                miu[m] <- sum(this.x * this.y) / sum(this.y)
+                # if(sum(this.y>0) > 1)
+                # {
+                sigma[m] <- sqrt(sum(this.y * (this.x - miu[m])^2) / sum(this.y))
+                # }else{
                 #  sigma[m]<-mean(diff(this.x))/2
-                #}
-                fitted<-dnorm(this.x, mean=miu[m], sd=sigma[m])
-                this.sel<-this.y>0 & fitted/dnorm(miu[m],mean=miu[m],sd=sigma[m]) > prob.cut
-                sc[m]<-exp(sum(fitted[this.sel]^2*log(this.y[this.sel]/fitted[this.sel])/sum(fitted[this.sel]^2)))
-                #sc[m]<-lm(this.y[this.sel]~fitted[this.sel]+0)$coef
+                # }
+                fitted <- dnorm(this.x, mean = miu[m], sd = sigma[m])
+                this.sel <- this.y > 0 & fitted / dnorm(miu[m], mean = miu[m], sd = sigma[m]) > prob.cut
+                sc[m] <- exp(sum(fitted[this.sel]^2 * log(this.y[this.sel] / fitted[this.sel]) / sum(fitted[this.sel]^2)))
+                # sc[m]<-lm(this.y[this.sel]~fitted[this.sel]+0)$coef
             }
-            
-            to.erase<-which(is.na(miu) | is.na(sigma) | sigma==0 | is.na(sc))
-            if(length(to.erase)>0)
-            {
-                l<-l-length(to.erase)
-                miu<-miu[-to.erase]
-                sigma<-sigma[-to.erase]
-                sc<-sc[-to.erase]
-                w<-w[-to.erase,]
+
+            to.erase <- which(is.na(miu) | is.na(sigma) | sigma == 0 | is.na(sc))
+            if (length(to.erase) > 0) {
+                l <- l - length(to.erase)
+                miu <- miu[-to.erase]
+                sigma <- sigma[-to.erase]
+                sc <- sc[-to.erase]
+                w <- w[-to.erase, ]
             }
-            
-            direc<-1
-            diff<-1000
-            iter<-0
-            
-            while(diff>0.05 & iter<max.iter)
-            {
-                iter<-iter+1
-                if(l==1)
-                {
-                    miu<-sum(x*y)/sum(y)
-                    sigma<-sqrt(sum(y*(x-miu)^2)/sum(y))
-                    fitted<-dnorm(x, mean=miu, sd=sigma)
-                    this.sel<-y>0 & fitted/dnorm(miu,mean=miu,sd=sigma) > prob.cut
-                    sc<-exp(sum(fitted[this.sel]^2*log(y[this.sel]/fitted[this.sel])/sum(fitted[this.sel]^2)))
-                    #sc<-lm(y[this.sel]~fitted[this.sel]+0)$coef
-                    break;
+
+            direc <- 1
+            diff <- 1000
+            iter <- 0
+
+            while (diff > 0.05 & iter < max.iter) {
+                iter <- iter + 1
+                if (l == 1) {
+                    miu <- sum(x * y) / sum(y)
+                    sigma <- sqrt(sum(y * (x - miu)^2) / sum(y))
+                    fitted <- dnorm(x, mean = miu, sd = sigma)
+                    this.sel <- y > 0 & fitted / dnorm(miu, mean = miu, sd = sigma) > prob.cut
+                    sc <- exp(sum(fitted[this.sel]^2 * log(y[this.sel] / fitted[this.sel]) / sum(fitted[this.sel]^2)))
+                    # sc<-lm(y[this.sel]~fitted[this.sel]+0)$coef
+                    break
                 }
-                miu.0<-miu
-                sigma.0<-sigma
-                sc.0<-sc
-                
-                all.w<-y*0
-                for(m in 1:l)
+                miu.0 <- miu
+                sigma.0 <- sigma
+                sc.0 <- sc
+
+                all.w <- y * 0
+                for (m in 1:l)
                 {
-                    all.w<-all.w+dnorm(x,mean=miu[m],sd=sigma[m])*sc[m]
+                    all.w <- all.w + dnorm(x, mean = miu[m], sd = sigma[m]) * sc[m]
                 }
-                
-                for(m in 1:l)
+
+                for (m in 1:l)
                 {
-                    w[m,]<-dnorm(x,mean=miu[m],sd=sigma[m])*sc[m]/all.w
+                    w[m, ] <- dnorm(x, mean = miu[m], sd = sigma[m]) * sc[m] / all.w
                 }
-                
-                if(sum(is.na(w)) >0) break;
-                
-                for(m in 1:l)
+
+                if (sum(is.na(w)) > 0) break
+
+                for (m in 1:l)
                 {
-                    this.y<-y*w[m,]
-                    miu[m]<-sum(x*this.y)/sum(this.y)
-                    sigma[m]<-sqrt(sum(this.y*(x-miu[m])^2)/sum(this.y))
-                    fitted<-dnorm(x,mean=miu[m],sd=sigma[m])
-                    this.sel<-this.y>0 & fitted/dnorm(miu[m],mean=miu[m],sd=sigma[m])> prob.cut
-                    sc[m]<-exp(sum(fitted[this.sel]^2*log(this.y[this.sel]/fitted[this.sel])/sum(fitted[this.sel]^2)))
-                    #sc[m]<-lm(this.y[this.sel]~fitted[this.sel]+0)$coef
+                    this.y <- y * w[m, ]
+                    miu[m] <- sum(x * this.y) / sum(this.y)
+                    sigma[m] <- sqrt(sum(this.y * (x - miu[m])^2) / sum(this.y))
+                    fitted <- dnorm(x, mean = miu[m], sd = sigma[m])
+                    this.sel <- this.y > 0 & fitted / dnorm(miu[m], mean = miu[m], sd = sigma[m]) > prob.cut
+                    sc[m] <- exp(sum(fitted[this.sel]^2 * log(this.y[this.sel] / fitted[this.sel]) / sum(fitted[this.sel]^2)))
+                    # sc[m]<-lm(this.y[this.sel]~fitted[this.sel]+0)$coef
                 }
-                diff<-sum((miu.0-miu)^2)
-                
-                www<-w
-                for(m in 1:l)
+                diff <- sum((miu.0 - miu)^2)
+
+                www <- w
+                for (m in 1:l)
                 {
-                    www[m,]<-www[m,]*y
+                    www[m, ] <- www[m, ] * y
                 }
-                www<-apply(www,1,sum)
-                www[which(is.na(sc))]<-0
-                www<-www/sum(www)
-                max.erase<-max(1, round(l/5))
-                
-                to.erase<-which(www <= min(ignore, www[order(www, na.last=FALSE)[max.erase]]))
-                
-                if(length(to.erase)>0)
-                {
-                    l<-l-length(to.erase)
-                    miu<-miu[-to.erase]
-                    sigma<-sigma[-to.erase]
-                    sc<-sc[-to.erase]
-                    w<-w[-to.erase,]
-                    diff<-1000
+                www <- apply(www, 1, sum)
+                www[which(is.na(sc))] <- 0
+                www <- www / sum(www)
+                max.erase <- max(1, round(l / 5))
+
+                to.erase <- which(www <= min(ignore, www[order(www, na.last = FALSE)[max.erase]]))
+
+                if (length(to.erase) > 0) {
+                    l <- l - length(to.erase)
+                    miu <- miu[-to.erase]
+                    sigma <- sigma[-to.erase]
+                    sc <- sc[-to.erase]
+                    w <- w[-to.erase, ]
+                    diff <- 1000
                 }
             }
         }
-        l<-length(miu)
-        if(l==1)
-        {
-            rec<-matrix(c(miu, sigma, sc),nrow=1)
-        }else{
-            rec<-cbind(miu,sigma,sc)
+        l <- length(miu)
+        if (l == 1) {
+            rec <- matrix(c(miu, sigma, sc), nrow = 1)
+        } else {
+            rec <- cbind(miu, sigma, sc)
         }
-        colnames(rec)<-c("miu","sigma","scale")
+        colnames(rec) <- c("miu", "sigma", "scale")
         return(rec)
     }
-    
+
     ##########
-    
-    normix.bic<-function(x,y,power=2, do.plot=FALSE, bw=c(15,30,60), eliminate=.05, max.iter=50, BIC.factor=2)
-    {
-        all.bw<-bw[order(bw)]
-        sel<-y>1e-5
-        x<-x[sel]
-        y<-y[sel]
-        sel<-order(x)
-        y<-y[sel]
-        x<-x[sel]
-        results<-new("list")
-        smoother.pk.rec<-smoother.vly.rec<-new("list")
-        bic.rec<-all.bw
-        
-        if(do.plot)
-        {
-            par(mfrow=c(ceiling(length(all.bw)/2),2))
-            par(mar=c(1,1,1,1))
+
+    normix.bic <- function(x, y, power = 2, do.plot = FALSE, bw = c(15, 30, 60), eliminate = .05, max.iter = 50, BIC.factor = 2) {
+        all.bw <- bw[order(bw)]
+        sel <- y > 1e-5
+        x <- x[sel]
+        y <- y[sel]
+        sel <- order(x)
+        y <- y[sel]
+        x <- x[sel]
+        results <- new("list")
+        smoother.pk.rec <- smoother.vly.rec <- new("list")
+        bic.rec <- all.bw
+
+        if (do.plot) {
+            par(mfrow = c(ceiling(length(all.bw) / 2), 2))
+            par(mar = c(1, 1, 1, 1))
         }
-        
-        last.num.pks<-Inf
-        
-        for(bw.n in length(all.bw):1)
+
+        last.num.pks <- Inf
+
+        for (bw.n in length(all.bw):1)
         {
-            bw<-all.bw[bw.n]
-            this.smooth<-ksmooth(x,y, kernel="normal", bandwidth=bw)
-            turns<-find.turn.point(this.smooth$y)
-            pks<-this.smooth$x[turns$pks]
-            vlys<-c(-Inf, this.smooth$x[turns$vlys], Inf)
-            
-            smoother.pk.rec[[bw.n]]<-pks
-            smoother.vly.rec[[bw.n]]<-vlys
-            if(length(pks) != last.num.pks)
-            {
-                last.num.pks<-length(pks)
-                aaa<-normix(cbind(x,y), pks=pks, vlys=vlys, ignore=eliminate, max.iter=max.iter)
-                
-                total.fit<-x*0
-                for(i in 1:nrow(aaa))
+            bw <- all.bw[bw.n]
+            this.smooth <- ksmooth(x, y, kernel = "normal", bandwidth = bw)
+            turns <- find.turn.point(this.smooth$y)
+            pks <- this.smooth$x[turns$pks]
+            vlys <- c(-Inf, this.smooth$x[turns$vlys], Inf)
+
+            smoother.pk.rec[[bw.n]] <- pks
+            smoother.vly.rec[[bw.n]] <- vlys
+            if (length(pks) != last.num.pks) {
+                last.num.pks <- length(pks)
+                aaa <- normix(cbind(x, y), pks = pks, vlys = vlys, ignore = eliminate, max.iter = max.iter)
+
+                total.fit <- x * 0
+                for (i in 1:nrow(aaa))
                 {
-                    total.fit<-total.fit+dnorm(x,mean=aaa[i,1], sd=aaa[i,2])*aaa[i,3]
+                    total.fit <- total.fit + dnorm(x, mean = aaa[i, 1], sd = aaa[i, 2]) * aaa[i, 3]
                 }
-                
-                if(do.plot)
-                {
-                    plot(x,y,cex=.1,main=paste("bw=",bw))
-                    abline(v=aaa[,1])
-                    cols<-c("red","green","blue","cyan","brown","black",rep("grey",100))
-                    for(i in 1:nrow(aaa))
+
+                if (do.plot) {
+                    plot(x, y, cex = .1, main = paste("bw=", bw))
+                    abline(v = aaa[, 1])
+                    cols <- c("red", "green", "blue", "cyan", "brown", "black", rep("grey", 100))
+                    for (i in 1:nrow(aaa))
                     {
-                        lines(x, dnorm(x,mean=aaa[i,1], sd=aaa[i,2])*aaa[i,3],col=cols[i])
+                        lines(x, dnorm(x, mean = aaa[i, 1], sd = aaa[i, 2]) * aaa[i, 3], col = cols[i])
                     }
                 }
-                
-                rss<-sum((y-total.fit)^2)
-                l<-length(x)
-                bic<-l*log(rss/l)+3*nrow(aaa)*log(l)*BIC.factor
-                results[[bw.n]]<-aaa
-                bic.rec[bw.n]<-bic
-            }else{
-                bic.rec[bw.n]<-Inf
-                results[[bw.n]]<-results[[bw.n+1]]
-                
+
+                rss <- sum((y - total.fit)^2)
+                l <- length(x)
+                bic <- l * log(rss / l) + 3 * nrow(aaa) * log(l) * BIC.factor
+                results[[bw.n]] <- aaa
+                bic.rec[bw.n] <- bic
+            } else {
+                bic.rec[bw.n] <- Inf
+                results[[bw.n]] <- results[[bw.n + 1]]
             }
         }
-        sel<-which(bic.rec == min(bic.rec))
-        if(length(sel) > 1) sel<-sel[which(all.bw[sel]==max(all.bw[sel]))]
-        rec<-new("list")
-        rec$param<-results[[sel]]
-        rec$smoother.pks<-smoother.pk.rec
-        rec$smoother.vlys<-smoother.vly.rec
-        rec$all.param<-results
-        rec$bic<-bic.rec
+        sel <- which(bic.rec == min(bic.rec))
+        if (length(sel) > 1) sel <- sel[which(all.bw[sel] == max(all.bw[sel]))]
+        rec <- new("list")
+        rec$param <- results[[sel]]
+        rec$smoother.pks <- smoother.pk.rec
+        rec$smoother.vlys <- smoother.vly.rec
+        rec$all.param <- results
+        rec$bic <- bic.rec
         return(rec)
     }
-    
+
     ##########
-    
-    if(is.na(min.bw)) min.bw<-diff(range(a[,2], na.rm=TRUE))/60
-    if(is.na(max.bw)) max.bw<-diff(range(a[,2], na.rm=TRUE))/15
-    if(min.bw >= max.bw) min.bw<-max.bw/4
-    
-    base.curve<-unique(a[,2])
-    all.span<-range(base.curve)
-    base.curve<-base.curve[order(base.curve)]
-    base.curve<-cbind(base.curve, base.curve*0)
-    all.times<-base.curve[,1]
-    if(all.times[1]>0) all.times<-c(0,all.times)
-    all.times<-c(all.times, 2*all.times[length(all.times)]-all.times[length(all.times)-1])
-    all.times<-(all.times[1:(length(all.times)-1)]+all.times[2:length(all.times)])/2
-    all.times<-all.times[2:length(all.times)]-all.times[1:(length(all.times)-1)]
-    
-    this.features<-matrix(0,nrow=1,ncol=5)
-    colnames(this.features)<-c("mz","pos","sd1","sd2","area")
-    n=1
-    start=1
-    nrowa<-nrow(a)
-    
-    a.breaks<-c(0, which(a[1:(nrowa-1),4] != a[2:nrowa,4]), nrowa)
-    mz.sd.rec<-NA
-    
-    for(nnn in 1:(length(a.breaks)-1))
+
+    if (is.na(min.bw)) min.bw <- diff(range(a[, 2], na.rm = TRUE)) / 60
+    if (is.na(max.bw)) max.bw <- diff(range(a[, 2], na.rm = TRUE)) / 15
+    if (min.bw >= max.bw) min.bw <- max.bw / 4
+
+    base.curve <- unique(a[, 2])
+    all.span <- range(base.curve)
+    base.curve <- base.curve[order(base.curve)]
+    base.curve <- cbind(base.curve, base.curve * 0)
+    all.times <- base.curve[, 1]
+    if (all.times[1] > 0) all.times <- c(0, all.times)
+    all.times <- c(all.times, 2 * all.times[length(all.times)] - all.times[length(all.times) - 1])
+    all.times <- (all.times[1:(length(all.times) - 1)] + all.times[2:length(all.times)]) / 2
+    all.times <- all.times[2:length(all.times)] - all.times[1:(length(all.times) - 1)]
+
+    this.features <- matrix(0, nrow = 1, ncol = 5)
+    colnames(this.features) <- c("mz", "pos", "sd1", "sd2", "area")
+    n <- 1
+    start <- 1
+    nrowa <- nrow(a)
+
+    a.breaks <- c(0, which(a[1:(nrowa - 1), 4] != a[2:nrowa, 4]), nrowa)
+    mz.sd.rec <- NA
+
+    for (nnn in 1:(length(a.breaks) - 1))
     {
-        this<-a[(a.breaks[nnn]+1):a.breaks[nnn+1],]
-        if(is.null(nrow(this))) this<-matrix(this, nrow=1)
-        this<-this[order(this[,2]),]
-        if(is.null(nrow(this))) this<-matrix(this, nrow=1)
-        mz.sd.rec<-c(mz.sd.rec, sd(this[,1]))
-        
-        this.count.1<-nrow(this)
-        if(this.count.1<=10)
-        {
-            if(this.count.1>1)
-            {
-                this.inte<-interpol.area(this[,2], this[,3], base.curve[,1], all.times)
-                xxx<-c(median(this[,1]),median(this[,2]), sd(this[,2]), sd(this[,2]), this.inte)
-            }else{
-                this.time.weights<-all.times[which(base.curve[,1] %in% this[2])]
-                xxx<-c(this[1], this[2], NA, NA, this[3]*this.time.weights)
+        this <- a[(a.breaks[nnn] + 1):a.breaks[nnn + 1], ]
+        if (is.null(nrow(this))) this <- matrix(this, nrow = 1)
+        this <- this[order(this[, 2]), ]
+        if (is.null(nrow(this))) this <- matrix(this, nrow = 1)
+        mz.sd.rec <- c(mz.sd.rec, sd(this[, 1]))
+
+        this.count.1 <- nrow(this)
+        if (this.count.1 <= 10) {
+            if (this.count.1 > 1) {
+                this.inte <- interpol.area(this[, 2], this[, 3], base.curve[, 1], all.times)
+                xxx <- c(median(this[, 1]), median(this[, 2]), sd(this[, 2]), sd(this[, 2]), this.inte)
+            } else {
+                this.time.weights <- all.times[which(base.curve[, 1] %in% this[2])]
+                xxx <- c(this[1], this[2], NA, NA, this[3] * this.time.weights)
             }
-            this.features<-rbind(this.features,xxx)
-        }else{
-            this.span<-range(this[,2])
-            this.curve<-base.curve[base.curve[,1]>=this.span[1] & base.curve[,1] <=this.span[2],]
-            this.curve[this.curve[,1] %in% this[,2],2]<-this[,3]
-            
-            bw<-min(max(bandwidth*(this.span[2]-this.span[1]),min.bw), max.bw)
-            bw<-seq(bw, 2*bw, length.out=3)
-            if(bw[1] > 1.5*min.bw) bw<-c(max(min.bw, bw[1]/2), bw)
-            
-            if(shape.model == "Gaussian")
-            {
-                xxx<-normix.bic(this.curve[,1], this.curve[,2], power=power, bw=bw, eliminate=component.eliminate, BIC.factor=BIC.factor)$param
-                if(nrow(xxx) == 1)
-                {
-                    xxx<-c(xxx[1,1:2],xxx[1,2],xxx[1,3])
-                }else{
-                    xxx<-cbind(xxx[,1:2],xxx[,2],xxx[,3])
+            this.features <- rbind(this.features, xxx)
+        } else {
+            this.span <- range(this[, 2])
+            this.curve <- base.curve[base.curve[, 1] >= this.span[1] & base.curve[, 1] <= this.span[2], ]
+            this.curve[this.curve[, 1] %in% this[, 2], 2] <- this[, 3]
+
+            bw <- min(max(bandwidth * (this.span[2] - this.span[1]), min.bw), max.bw)
+            bw <- seq(bw, 2 * bw, length.out = 3)
+            if (bw[1] > 1.5 * min.bw) bw <- c(max(min.bw, bw[1] / 2), bw)
+
+            if (shape.model == "Gaussian") {
+                xxx <- normix.bic(this.curve[, 1], this.curve[, 2], power = power, bw = bw, eliminate = component.eliminate, BIC.factor = BIC.factor)$param
+                if (nrow(xxx) == 1) {
+                    xxx <- c(xxx[1, 1:2], xxx[1, 2], xxx[1, 3])
+                } else {
+                    xxx <- cbind(xxx[, 1:2], xxx[, 2], xxx[, 3])
                 }
-            }else{
-                xxx<-bigauss.mix(this.curve[,1], this.curve[,2], sigma.ratio.lim=sigma.ratio.lim, bw=bw, power=power, estim.method=estim.method, eliminate=component.eliminate, BIC.factor=BIC.factor)$param[,c(1,2,3,5)]
+            } else {
+                xxx <- bigauss.mix(this.curve[, 1], this.curve[, 2], sigma.ratio.lim = sigma.ratio.lim, bw = bw, power = power, estim.method = estim.method, eliminate = component.eliminate, BIC.factor = BIC.factor)$param[, c(1, 2, 3, 5)]
             }
-            
-            if(is.null(nrow(xxx)))
-            {
-                this.features<-rbind(this.features,c(median(this[,1]),xxx))
-            }else{
-                for(m in 1:nrow(xxx))
+
+            if (is.null(nrow(xxx))) {
+                this.features <- rbind(this.features, c(median(this[, 1]), xxx))
+            } else {
+                for (m in 1:nrow(xxx))
                 {
-                    this.d<-abs(this[,2]-xxx[m,1])
-                    this.features<-rbind(this.features, c(mean(this[which(this.d==min(this.d)),1]),xxx[m,]))
+                    this.d <- abs(this[, 2] - xxx[m, 1])
+                    this.features <- rbind(this.features, c(mean(this[which(this.d == min(this.d)), 1]), xxx[m, ]))
                 }
             }
         }
-        
-        #message(nnn)
+
+        # message(nnn)
     }
-    this.features<-this.features[-1,]
-    this.features<-this.features[order(this.features[,1], this.features[,2]),]
-    this.features<-this.features[which(apply(this.features[,3:4],1,min)>sd.cut[1] & apply(this.features[,3:4],1,max)<sd.cut[2]),]
-    rownames(this.features)<-NULL
-    
-    if(do.plot)
-    {
-        par(mfrow=c(2,2))
-        plot(c(-1,1),c(-1,1),type="n",xlab="",ylab="",main="",axes=FALSE)
-        text(x=0,y=0,"Estimate peak \n area/location", cex=1.5)
-        hist(mz.sd.rec,xlab="m/z SD", ylab="Frequency",main="m/z SD distribution")
-        hist(c(this.features[,3],this.features[,4]), xlab="Retention time SD", ylab="Frequency", main="Retention time SD distribution")
-        hist(log10(this.features[,5]), xlab="peak strength (log scale)", ylab="Frequency", main="Peak strength distribution")
+    this.features <- this.features[-1, ]
+    this.features <- this.features[order(this.features[, 1], this.features[, 2]), ]
+    this.features <- this.features[which(apply(this.features[, 3:4], 1, min) > sd.cut[1] & apply(this.features[, 3:4], 1, max) < sd.cut[2]), ]
+    rownames(this.features) <- NULL
+
+    if (do.plot) {
+        par(mfrow = c(2, 2))
+        plot(c(-1, 1), c(-1, 1), type = "n", xlab = "", ylab = "", main = "", axes = FALSE)
+        text(x = 0, y = 0, "Estimate peak \n area/location", cex = 1.5)
+        hist(mz.sd.rec, xlab = "m/z SD", ylab = "Frequency", main = "m/z SD distribution")
+        hist(c(this.features[, 3], this.features[, 4]), xlab = "Retention time SD", ylab = "Frequency", main = "Retention time SD distribution")
+        hist(log10(this.features[, 5]), xlab = "peak strength (log scale)", ylab = "Frequency", main = "Peak strength distribution")
     }
-    
+
     return(this.features)
 }
diff --git a/R/unsupervised.R b/R/unsupervised.R
index 2de6b8b3..61b2aabf 100644
--- a/R/unsupervised.R
+++ b/R/unsupervised.R
@@ -48,64 +48,6 @@ sort_samples_by_acquisition_number <- function (filenames) {
   sort(unlist(filenames))
 }
 
-extract_features <- function(
-  cluster,
-  filenames,
-  min_pres,
-  min_run,
-  mz_tol,
-  baseline_correct,
-  baseline_correct_noise_percentile,
-  intensity_weighted,
-  min_bandwidth,
-  max_bandwidth,
-  sd_cut,
-  sigma_ratio_lim,
-  shape_model,
-  peak_estim_method,
-  component_eliminate,
-  moment_power,
-  BIC_factor
-) {
-  clusterExport(cluster, list(
-    'proc.cdf',
-    'prof.to.features',
-    'load.lcms',
-    'adaptive.bin',
-    'find.turn.point',
-    'combine.seq.3',
-    'cont.index',
-    'interpol.area'
-  ))
-
-  parLapply(cluster, filenames, function(filename) {
-    profile <- proc.cdf(
-      filename = filename,
-      min.pres = min_pres,
-      min.run = min_run,
-      tol = mz_tol,
-      baseline.correct = baseline_correct,
-      baseline.correct.noise.percentile = baseline_correct_noise_percentile,
-      intensity.weighted = intensity_weighted,
-      do.plot = FALSE,
-      cache = FALSE
-    )
-    features <- prof.to.features(
-      a = profile,
-      min.bw = min_bandwidth,
-      max.bw = max_bandwidth,
-      sd.cut = sd_cut,
-      sigma.ratio.lim = sigma_ratio_lim,
-      shape.model = shape_model,
-      estim.method = peak_estim_method,
-      component.eliminate = component_eliminate,
-      power = moment_power,
-      BIC.factor = BIC_factor,
-      do.plot = FALSE
-    )
-  })
-}
-
 align_features <- function(sample_names, ...) {
   aligned <- feature.align(...)
   feature_names <- seq_len(nrow(aligned$pk.times))
diff --git a/conda/environment-dev.yaml b/conda/environment-dev.yaml
index 663bf593..7b7b3b83 100644
--- a/conda/environment-dev.yaml
+++ b/conda/environment-dev.yaml
@@ -26,4 +26,9 @@ dependencies:
   - r-tidyr
   - r-stringr
   - r-datacomparer
-  - r-tibble
\ No newline at end of file
+  - r-tibble
+  - r-languageserver
+  - r-jsonlite
+  - r-styler
+  - r-datacomparer
+  - radian
diff --git a/tests/testdata/extracted_features/extracted_0.parquet b/tests/testdata/extracted_features/extracted_0.parquet
new file mode 100644
index 00000000..7a8661b8
Binary files /dev/null and b/tests/testdata/extracted_features/extracted_0.parquet differ
diff --git a/tests/testdata/extracted_features/extracted_1.parquet b/tests/testdata/extracted_features/extracted_1.parquet
new file mode 100644
index 00000000..7a8661b8
Binary files /dev/null and b/tests/testdata/extracted_features/extracted_1.parquet differ
diff --git a/tests/testdata/extracted_features/extracted_2.parquet b/tests/testdata/extracted_features/extracted_2.parquet
new file mode 100644
index 00000000..7a8661b8
Binary files /dev/null and b/tests/testdata/extracted_features/extracted_2.parquet differ
diff --git a/tests/testdata/proc_cdf_expected.Rds b/tests/testdata/proc_cdf_expected.Rds
new file mode 100644
index 00000000..b64a84d8
Binary files /dev/null and b/tests/testdata/proc_cdf_expected.Rds differ
diff --git a/tests/testdata/prof_to_features_expected.Rds b/tests/testdata/prof_to_features_expected.Rds
new file mode 100644
index 00000000..97791b3b
Binary files /dev/null and b/tests/testdata/prof_to_features_expected.Rds differ
diff --git a/tests/testthat/test-extract_features.R b/tests/testthat/test-extract_features.R
new file mode 100644
index 00000000..051d1f44
--- /dev/null
+++ b/tests/testthat/test-extract_features.R
@@ -0,0 +1,33 @@
+test_that("extract single feature works", {
+  filename <- c("../testdata/mbr_test0.mzml")
+
+  extracted_features <- proc.cdf(filename, cache = FALSE)
+  actual <- prof.to.features(extracted_features, do.plot = FALSE)
+
+  expected <- as.data.frame(arrow::read_parquet("../testdata/extracted_features/extracted_0.parquet"))
+  actual <- as.data.frame(actual)
+
+  actual <- unique(actual)
+  expected <- unique(expected)
+
+  keys <- c("mz", "pos", "sd1", "sd2")
+
+  actual <- dplyr::arrange_at(
+    actual, keys
+  )
+  expected <- dplyr::arrange_at(
+    expected, keys
+  )
+
+  report <- dataCompareR::rCompare(actual, expected, keys = keys, roundDigits = 4, mismatches = 100000)
+  dataCompareR::saveReport(report, reportName = "extract_features_0", showInViewer = FALSE, HTMLReport = FALSE, mismatchCount = 10000)
+
+  expect_true(nrow(report$rowMatching$inboth) >= 0.9 * nrow(expected))
+
+  incommon <- as.numeric(rownames(report$rowMatching$inboth))
+
+  subset_actual <- actual %>% dplyr::slice(incommon)
+  subset_expected <- expected %>% dplyr::slice(incommon)
+
+  expect_equal(subset_actual$area, subset_expected$area, tolerance = 0.01 * max(expected$area))
+})
diff --git a/tests/testthat/test-proc.cdf.R b/tests/testthat/test-proc.cdf.R
new file mode 100644
index 00000000..5d987406
--- /dev/null
+++ b/tests/testthat/test-proc.cdf.R
@@ -0,0 +1,6 @@
+test_that("test proc.cdf test", {
+  filename <- c('../testdata/mbr_test0.mzml')
+  actual <- proc.cdf(filename, cache = FALSE)
+  expected <- readRDS('../testdata/proc_cdf_expected.Rds')
+  expect_equal(actual, expected)
+}) 
\ No newline at end of file
diff --git a/tests/testthat/test-prof.to.features.R b/tests/testthat/test-prof.to.features.R
new file mode 100644
index 00000000..5b43dcdf
--- /dev/null
+++ b/tests/testthat/test-prof.to.features.R
@@ -0,0 +1,11 @@
+test_that("prof.to.features works", {
+    extracted_features <- readRDS('../testdata/proc_cdf_expected.Rds')
+    actual <- prof.to.features(
+        extracted_features,
+        sd.cut = c(0.1, 100),
+        sigma.ratio.lim = c(0.1, 10),
+        do.plot = FALSE)
+
+    expected <- readRDS("../testdata/prof_to_features_expected.Rds")
+    expect_equal(actual, expected)
+})
\ No newline at end of file