-
Notifications
You must be signed in to change notification settings - Fork 0
/
generate_sas_import.R
executable file
·54 lines (45 loc) · 2.07 KB
/
generate_sas_import.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
#! /usr/bin/env Rscript
# http://cran.r-project.org/web/packages/optparse/vignettes/optparse.pdf
suppressPackageStartupMessages(library("optparse"))
option_list <- list(
make_option(c('--dlm'), action='store', dest='dlm', type='character', default='|', metavar="'|'", help='delimiter; defaults to pipe (|)')
, make_option(c('-i', '--input'), action='store', dest='input', type='character', default=NA, metavar="table_description.csv", help='input file with 3 columns: variable, data_type, data_length (typically generated by csvdescribe.py); defaults to standard input')
, make_option(c('-o', '--output'), action='store', dest='output', type='character', default=NA, metavar="data_import.sas", help='output sas program; defaults to standard output')
)
opt <- parse_args(OptionParser(option_list=option_list))
dlm <- opt$dlm
if(is.na(opt$input)) {
ifile <- file('stdin')
} else{
ifile <- file(opt$input)
}
open(ifile)
if(is.na(opt$output)) {
ofile <- NA
} else{
ofile <- file(opt$output, open='w')
}
d1 <- read.csv(ifile, sep=dlm)
## library(stringr)
## d1$variable <- str_replace(d1$variable, '[^a-zA-Z0-9_]', '_')
d1$variable <- gsub(pattern='[^a-zA-Z0-9_]', replacement='_', x=d1$variable, perl=TRUE)
informat_statement <- paste(d1$variable
, ifelse(d1$data_type=='char', paste0('$', pmax(d1$data_length, 1), '.'), paste0(pmin(d1$data_length, 32), '.'))
, sep=' ', collapse='\n')
informat_statement <- paste('informat\n', informat_statement, '\n;\n', collapse='\n')
input_statement <- paste(d1$variable
, ifelse(d1$data_type=='char', '$', '')
, sep=' ', collapse='\n')
input_statement <- paste('input\n', input_statement, '\n;\n', collapse='\n')
if(!is.na(opt$output))
sink(file=ofile, type='output')
cat("data &dsn ;
infile &ifile DLM=&dlm DSD MISSOVER LRECL=")
cat(sprintf('%d ', pmax(sum(d1$data_length)+10, 32767)))
cat("FIRSTOBS=2 ;
")
cat(informat_statement)
cat(input_statement)
cat('run ;\n')
if(!is.na(opt$output))
sink()