Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

this is attempt at iregnet pred #2

Open
wants to merge 9 commits into
base: master
Choose a base branch
from
76 changes: 31 additions & 45 deletions iregnet.predictions.R
Original file line number Diff line number Diff line change
@@ -1,24 +1,27 @@
source("packages.R")

testFold.csv.vec <- Sys.glob(file.path(
testFold.dir.vec <- Sys.glob(file.path(
"data", "*", "cv", "*", "testFolds",
"*"))

n.pred.vec <- sapply(testFold.csv.vec, function(testFold.csv){
path <- file.path(dirname(testFold.csv), "models", "*", "predictions.csv")
n.pred.vec <- sapply(testFold.dir.vec, function(testFold.dir){
path <- file.path(dirname(testFold.dir), "models", "*", "predictions.csv")
length(Sys.glob(path))
})
table(n.pred.vec)

OneFold <- function(testFold.csv){
OneFold <- function(testFold.dir){
library(data.table)
test.fold <- as.integer(basename(testFold.csv))
cv.type.dir <- dirname(dirname(testFold.csv))
test.fold <- as.integer(basename(testFold.dir))
cv.type.dir <- dirname(dirname(testFold.dir))
data.dir <- dirname(dirname(cv.type.dir))
folds.csv <- file.path(cv.type.dir, "folds.csv")
folds.dt <- fread(folds.csv)
data.list <- list()
for(data.type in c("inputs", "outputs")){
csv.xz <- file.path(data.dir, paste0(data.type, ".csv.xz"))
dt <- fread(cmd=paste("xzcat", csv.xz))
stopifnot(nrow(dt) == nrow(folds.dt))
m <- as.matrix(dt[, -1, with=FALSE])
rownames(m) <- dt$sequenceID
data.list[[data.type]] <- m
Expand All @@ -34,14 +37,9 @@ OneFold <- function(testFold.csv){
}
keep.inputs <- apply(is.finite(data.list$inputs), 2, all)
data.list$inputs <- data.list$inputs[, keep.inputs, drop=FALSE]
#order.dt <- fread(testFold.csv)
testFold.dt <- fread(testFold.csv)
all.id.vec <- rownames(data.list$inputs)
id.list <- list(
#train=order.dt$sequenceID,
train=testFold.dt$sequenceID,
#test=all.id.vec[!all.id.vec %in% order.dt$sequenceID])
test=all.id.vec[!all.id.vec %in% testFold.dt$sequenceID])
train=folds.dt[fold != test.fold, sequenceID],
test=folds.dt[fold == test.fold, sequenceID])
set.list <- list()
for(set.name in names(id.list)){
set.id.vec <- id.list[[set.name]]
Expand All @@ -50,13 +48,28 @@ OneFold <- function(testFold.csv){
})
}

#### Stopped HERE


result.list <- list()
pred.mat.list <- list()
size.i.vec <- seq_along(train.size.vec)
##size.i.vec <- length(train.size.vec)

####

fit.list <- list()
scale.i.list <- list( estimated = list( init= NA, estimate = TRUE),
fixed = list( init= 1, estimate = FALSE) )

X.train <- matrix(set.list$train$inputs, nrow(set.list$train$inputs), ncol(set.list$train$inputs))
Y.train <- matrix( set.list$train$outputs , nrow(set.list$train$outputs) , ncol(set.list$train$outputs))

for( model.type in c( "gaussian", "logistic", "extreme_value")){
for( scale.type in scale.i.list){
fit.list[length(fit.list) + 1] <- cv.iregnet(X.train, Y.train , family = model.type,
Copy link
Author

@andruuhurst andruuhurst Nov 1, 2019

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hey @tdhock
the for loops iterate through until it gets to the extreme_value. then I get this error

Error in stopifnot_error(paste("family must be one of", paste(names(pfun.list), : family must be one of gaussian, logistic, exponential

Copy link
Author

@andruuhurst andruuhurst Nov 1, 2019

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I tried using iregnet( ...., extreme_value,..) instead and it returned this error

Error in dimnames(x) <- dn : 
  length of 'dimnames' [1] not equal to array extent

any thoughts?

Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

first of all you should use cv.iregnet (which automatically performs selection of regularization/penalty parameter) and not iregnet (which does not perform selection).

for a quick fix I would suggest using only logistic and gaussian (remove extreme value).

also please post a minimal reproducible example as an issue on https://github.com/anujkhare/iregnet/issues . Make sure to include what version of the software you are using, via the packageDescription command, this is what it looks like on my system,

> packageDescription("iregnet")
Package: iregnet
Type: Package
Title: Regularized interval regression
Version: 0.1.0.9000
Author: Anuj Khare <khareanuj18@gmail.com>, Toby D Hocking
        <toby.hocking@r-project.org>, Jelle Goeman, Aditya Samantaray
        <aditya.samantaray1@gmail.com>
Maintainer: Anuj Khare <khareanuj18@gmail.com>, Aditya Samantaray
        <aditya.samantaray1@gmail.com>
Description: Interval regression with four types of censoring and
        elastic net regularization.
License: GPL-3
LazyData: TRUE
Suggests: ElemStatLearn, glmnet, testthat, knitr, rmarkdown
LinkingTo: Rcpp
Depends: R (>= 2.10)
Imports: ggplot2, utils, methods, stats, survival, foreach, doParallel,
        data.table, Matrix, namedCapture, penaltyLearning
RoxygenNote: 6.1.1
Remotes: tdhock/penaltyLearning
VignetteBuilder: knitr
RemoteType: github
RemoteHost: api.github.com
RemoteRepo: iregnet
RemoteUsername: anujkhare
RemoteRef: 4d77f047c3a00a5524e1cbe140226417e3aedd92
RemoteSha: 4d77f047c3a00a5524e1cbe140226417e3aedd92
GithubRepo: iregnet
GithubUsername: anujkhare
GithubRef: 4d77f047c3a00a5524e1cbe140226417e3aedd92
GithubSHA1: 4d77f047c3a00a5524e1cbe140226417e3aedd92
NeedsCompilation: yes
Packaged: 2019-07-19 18:40:17 UTC; th798
Built: R 3.6.1; x86_64-w64-mingw32; 2019-07-19 18:40:18 UTC; windows

-- File: C:/Users/th798/R/win-library/3.6/iregnet/Meta/package.rds 
> 

scale_init= scale.type$init ,estimate_scale= scale.type$estimate)
}
}


######

for(size.i in size.i.vec){
train.size <- train.size.vec[[size.i]]
maybe.both.inf <- set.list$train$outputs[1:train.size, ]
Expand Down Expand Up @@ -187,31 +200,4 @@ consistent.dt[pred.in.test != test.seqIDs]








folds.csv.vec <- Sys.glob("data/*/cv/*/folds.csv")

fit.list <- list()
scale.i.list <- list( estimated = list( init= "NA", estimate = TRUE), fixed = list( init= 1, estimate = FALSE) )
model.list <- list( "guassian", "logistic", "extreme_value")



table(n.pred.vec)
#take in test fold
#one fold



#for( model.type in model.i.list){
# for( scale.type in scale.i.list){
# fit.list <- c( fit.list , cv.iregnet(X.train, Y.train , family = model.type,
# scale_init= scale.type$init ,estimate_scale = scale.type$estimate))
# }
#}


## fwrite(result, iregnet.csv)