diff --git a/CHANGELOG.md b/CHANGELOG.md index b2834128091..a33579bc384 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -17,12 +17,21 @@ For more information about this file see also [Keep a Changelog](http://keepacha ### Added - Expanded initial conditions workflow for pool-based models, including PEcAn.data.land::prepare_pools to calculate pools from IC file (to be coupled with write.configs) + +- #1594 shiny/workflowPlot Adding interactiveness using ggploltly +- #1594 shiny/workflowPlot Load outputs from multiple runs of the model +- #1594 shiny/workflowPlot Ways to toggle geometries (e.g. geom_point vs. geom_line). +- #1594 shiny/workflowPlot Smoothing using geom_smooth (Slider for specifying moving window width) +- #1594 shiny/workflowPlot Comparing model output vs loaded data according to [tutorial](https://github.com/PecanProject/pecan/blob/develop/documentation/tutorials/AnalyzeOutput/modelVSdata.Rmd) + - Allow SIPNET and DALEC met files and model2netcdf to start or end mid year + ### Changed - Clean up directory structure: * Move `base` packages (`utils`, `settings`, `db`, `visualizaton`) to a `base` directory, for consistency with `modules` and `models` * Move `logger.*` functions out of the `PEcAn.utils` package and into the `pecan.logger` package +- #1594 shiny/workflowPlot Refactoring of code. `get_workflow_ids` in db/R/query.dplyr.R changed with `ensemble = FALSE`. Also allowing to load all workflow IDs. `load_data_single_run` and `var_names_all` also moved from shiny/workflowPlot/server.R to query.dplyr.R ## [1.5.10] - Prerelease ### Added diff --git a/base/db/R/query.dplyr.R b/base/db/R/query.dplyr.R index d9a26dd090b..46e032db099 100644 --- a/base/db/R/query.dplyr.R +++ b/base/db/R/query.dplyr.R @@ -143,10 +143,16 @@ get_workflow_ids <- function(bety, session, all.ids=FALSE) { ids <- unlist(query[names(query) == "workflow_id"], use.names = FALSE) } else { # Get all workflow IDs - ids <- workflows(bety, ensemble = TRUE) %>% - dplyr::distinct(workflow_id) %>% - dplyr::pull() %>% - sort(decreasing = TRUE) + + ids <- workflows(bety, ensemble = FALSE) %>% distinct(workflow_id) %>% collect %>% + .[["workflow_id"]] %>% sort(decreasing = TRUE) + # pull(.,workflow_id) %>% sort(decreasing = TRUE) + +# ids <- workflows(bety, ensemble = TRUE) %>% +# dplyr::distinct(workflow_id) %>% +# dplyr::pull() %>% +# sort(decreasing = TRUE) + } return(ids) } # get_workflow_ids @@ -271,9 +277,11 @@ load_data_single_run <- function(bety, workflow_id, run_id) { x <- ncdays2date(ncdf4::ncvar_get(nc, 'time'), ncdf4::ncatt_get(nc, 'time')) y <- ncdf4::ncvar_get(nc, var_name) b <- !is.na(x) & !is.na(y) & sw != 0 - dates <- if (is.na(dates)) x[b] else c(dates, x[b]) - dates <- as.Date(dates) - vals <- if (is.na(vals)) y[b] else c(vals, y[b]) + + dates <- if(is.na(dates)) x[b] else c(dates, x[b]) + dates <- as.POSIXct(dates) + vals <- if(is.na(vals)) y[b] else c(vals, y[b]) + xlab <- "Time" # Values of the data which we will plot valuesDF <- data.frame(dates,vals) diff --git a/shiny/workflowPlot/helper.R b/shiny/workflowPlot/helper.R index c4f2c9d28c4..7115f781bca 100644 --- a/shiny/workflowPlot/helper.R +++ b/shiny/workflowPlot/helper.R @@ -1,3 +1,4 @@ +# Helper function which checks and downloads required packages checkAndDownload<-function(packageNames) { for(packageName in packageNames) { if(!isInstalled(packageName)) { @@ -9,9 +10,11 @@ checkAndDownload<-function(packageNames) { isInstalled <- function(mypkg){ is.element(mypkg, installed.packages()[,1]) } -checkAndDownload(c('plotly','scales','dplyr')) +# checkAndDownload(c('plotly','scales','dplyr')) + # Stashing Code for file upload to shiny app # Based on https://shiny.rstudio.com/gallery/file-upload.html + # ui.R # tags$hr(), # fileInput('file1', 'Choose CSV File to upload data', @@ -29,7 +32,10 @@ checkAndDownload(c('plotly','scales','dplyr')) # 'Double Quote'='"', # 'Single Quote'="'"), # ''), +# textInput("inputRecordID", "Input Record ID for file", "1000011260"), +# textInput("formatID", "Format ID for file (Default CSV)", "5000000002"), # actionButton("load_data", "Load External Data") + # server.R # loadExternalData <-eventReactive(input$load_data,{ # inFile <- input$file1 diff --git a/shiny/workflowPlot/server.R b/shiny/workflowPlot/server.R index c0edc16f98f..d78bf8774cf 100644 --- a/shiny/workflowPlot/server.R +++ b/shiny/workflowPlot/server.R @@ -2,15 +2,17 @@ library(PEcAn.visualization) library(PEcAn.DB) library(PEcAn.settings) library(PEcAn.benchmark) +library(PEcAn.utils) library(shiny) library(ncdf4) library(ggplot2) # Helper allows to load functions and variables that could be shared both by server.R and ui.R -source('helper.R') +# source('helper.R') library(plotly) library(scales) library(lubridate) library(dplyr) +library(reshape2) # Maximum size of file allowed to be uploaded: 100MB options(shiny.maxRequestSize=100*1024^2) # Define server logic @@ -18,7 +20,7 @@ server <- shinyServer(function(input, output, session) { bety <- betyConnect() # Update all workflow ids observe({ - # Ideally get_workflow_ids function (line 137) in db/R/query.dplyr.R should take a flag to check + # get_workflow_ids function (line 137) in db/R/query.dplyr.R takes a flag to check # if we want to load all workflow ids. # get_workflow_id function from query.dplyr.R all_ids <- get_workflow_ids(bety, session,all.ids=TRUE) @@ -33,7 +35,7 @@ server <- shinyServer(function(input, output, session) { # Will return a list run_id_list <- c() for(w_id in w_ids){ - # For all the workflow ids + # For all the workflow ids r_ids <- get_run_ids(bety, w_id) for(r_id in r_ids){ # Each workflow id can have more than one run ids @@ -98,27 +100,25 @@ server <- shinyServer(function(input, output, session) { # Allows to load actual data (different from model output) following the tutorial # https://github.com/PecanProject/pecan/blob/develop/documentation/tutorials/AnalyzeOutput/modelVSdata.Rmd # @params: bety,settings,File_path,File_format - loadObservationData <- function(bety,settings,File_path,File_format){ - start.year<-as.numeric(lubridate::year(settings$run$start.date)) - end.year<-as.numeric(lubridate::year(settings$run$end.date)) - site.id<-settings$run$site$id + # loadObservationData <- function(bety,settings,File_path,File_format){ + loadObservationData <- function(bety,inputs_df){ + input_id <- inputs_df$input_id + # File_format <- getFileFormat(bety,input_id) + File_format <- PEcAn.DB::query.format.vars(bety = bety, input.id = input_id) + start.year <- as.numeric(lubridate::year(inputs_df$start_date)) + end.year <- as.numeric(lubridate::year(inputs_df$end_date)) + File_path <- inputs_df$filePath + # TODO There is an issue with the db where file names are not saved properly. + # To make it work with the VM, uncomment the line below + # File_path <- paste0(inputs_df$filePath,'.csv') + site.id <- inputs_df$site_id site<-PEcAn.DB::query.site(site.id,bety$con) observations<-PEcAn.benchmark::load_data(data.path = File_path, format= File_format, time.row = File_format$time.row, site = site, start_year = start.year, end_year = end.year) return(observations) } - # This function as a wrapper over PEcAn.DB::query.format.vars where - # file format can be retrieved using either by input or format id. - getFileFormat <- function(bety,input.id,format.id=NULL){ - # Retaining the code for getting file format using format Id as in tutorial - # File_format <- PEcAn.DB::query.format.vars(bety = bety, format.id = format.id) - File_format <- PEcAn.DB::query.format.vars(bety = bety, input.id = input.id) - return(File_format) - } getSettingsFromWorkflowId <- function(bety,workflowID){ - basePath <- tbl(bety, 'workflows') %>% filter(id %in% workflowID) %>% pull(folder) + basePath <- dplyr::tbl(bety, 'workflows') %>% dplyr::filter(id %in% workflowID) %>% dplyr::pull(folder) configPath <- file.path(basePath, 'pecan.CONFIGS.xml') - # Second way of proving configPath. More of a hack - # configPath <- paste0("~/output/PEcAn_",workflowID,"/pecan.CONFIGS.xml") settings<-PEcAn.settings::read.settings(configPath) return(settings) } @@ -134,15 +134,33 @@ server <- shinyServer(function(input, output, session) { } updateSelectizeInput(session, "all_site_id", choices=site_id_list) }) - # Get input id from selected site id + # Get input id from selected site id. Returns inputs_df which is used to load observation data getInputs <- function(bety,site_Id){ - inputIds <- tbl(bety, 'inputs') %>% filter(site_id %in% site_Id) %>% distinct(id) %>% pull(id) - inputIds <- sort(inputIds) - return(inputIds) + # Subsetting the input id list based on the current (VM) machine + my_hostname <- PEcAn.utils::fqdn() + my_machine_id <- dplyr::tbl(bety, 'machines') %>% dplyr::filter(hostname == my_hostname) %>% dplyr::pull(id) + # Inner join 'inputs' table with 'dbfiles' table + # inputs_df would contain all the information about the site and input id required for + # the tutorial mentioned above to compare model run with actual observations + inputs_df <- dplyr::tbl(bety, 'dbfiles') %>% + dplyr::filter(container_type == 'Input', machine_id == my_machine_id) %>% + dplyr::inner_join(tbl(bety, 'inputs') %>% dplyr::filter(site_id %in% site_Id), by = c('container_id' = 'id')) %>% + dplyr::collect() + # Order by container id (==input id) + inputs_df <- inputs_df[order(inputs_df$container_id),] + # Mutate column as (input id, name) to be shown to the user + inputs_df <- inputs_df %>% + dplyr::mutate(input_selection_list = paste(inputs_df$container_id, inputs_df$name), + filePath = paste0(inputs_df$file_path,'/', inputs_df$file_name)) %>% + dplyr::select(input_id = container_id,filePath,input_selection_list,start_date,end_date,site_id,name, + machine_id,file_name,file_path) + return(inputs_df) } + # Update input id list as (input id, name) observe({ req(input$all_site_id) - updateSelectizeInput(session, "all_input_id", choices=getInputs(bety,input$all_site_id)) + inputs_df <- getInputs(bety,c(input$all_site_id)) + updateSelectizeInput(session, "all_input_id", choices=inputs_df$input_selection_list) }) # Renders ggplotly output$outputPlot <- renderPlotly({ @@ -154,15 +172,14 @@ server <- shinyServer(function(input, output, session) { ) # Load data masterDF <- loadNewData() - # masterDF <- rbind(modelData,externalData) - # Convert from factor to character. For subsetting + # Convert from factor to character. For subsetting masterDF$var_name <- as.character(masterDF$var_name) - # Convert to factor. Required for ggplot + # Convert to factor. Required for ggplot masterDF$run_id <- as.factor(as.character(masterDF$run_id)) # Filter by variable name df <- masterDF %>% dplyr::filter(var_name == input$variable_name) - # make dynamic slider + # Another way to make dynamic slider # https://stackoverflow.com/questions/18700589/interactive-reactive-change-of-min-max-values-of-sliderinput # output$slider <- renderUI({ # sliderInput("smooth_n", "Value for smoothing:", min=0, max=nrow(df), value=80) @@ -172,8 +189,10 @@ server <- shinyServer(function(input, output, session) { title <- unique(df$title) xlab <- unique(df$xlab) ylab <- unique(df$ylab) - # ggplot function for now scatter plots. - plt <- ggplot(df, aes(x=dates, y=vals, color=run_id)) + # ggplot function for scatter plots. + plt <- ggplot(df, aes(x=dates, y=vals, color=run_id)) + # model_geom <- switch(input$plotType, scatterPlot = geom_point, lineChart = geom_line) + # plt <- plt + model_geom() # Toggle chart type using switch switch(input$plotType, "scatterPlot" = { @@ -183,48 +202,51 @@ server <- shinyServer(function(input, output, session) { plt <- plt + geom_line() } ) - plt <- plt + labs(title=title, x=xlab, y=ylab) + geom_smooth(n=input$smooth_n) - # Check if user wants to load external data + # Check if user wants to load external data (==observations) # Similar to using event reactive - if (input$load_data>0) { - # File_format <- getFileFormat(bety,input$formatID) - # Retaining the code for getting file format using inputRecordID - File_format <- getFileFormat(bety,input$all_input_id) - ids_DF <- parse_ids_from_input_runID(input$all_run_id) - settings <- getSettingsFromWorkflowId(bety,ids_DF$wID[1]) - inFile <- input$fileUploaded - filePath <- PEcAn.DB::dbfile.file(type = 'Input', id = input$all_input_id,con = bety$con) - externalData <- loadObservationData(bety,settings,filePath,File_format) - # If variable found in the uploaded file + if (input$load_data>0) { + # Input ID is of the form (input id, Name). Split by space and use the first element + inputs_df <- getInputs(bety,c(input$all_site_id)) + inputs_df <- inputs_df %>% dplyr::filter(input_selection_list == input$all_input_id) + externalData <- loadObservationData(bety,inputs_df) + # If variable found in the uploaded file. + # TODO for now, actual observations can be plotted again a single model run (particular run id) + # Have to enhance to allow multiple run ids if (input$variable_name %in% names(externalData)){ - externalData <- externalData %>% dplyr::select(posix,dplyr::one_of(input$variable_name)) - names(externalData) <- c("dates","vals") - externalData$dates <- as.Date(externalData$dates) + # No need for subsetting though as align data returns for now only the provided variable name + # externalData <- externalData %>% dplyr::select(posix,dplyr::one_of(input$variable_name)) + var = input$variable_name + df = df %>% select(posix = dates, var = vals) + colnames(df)[2]<-paste0(var) # Required for align data to work + aligned_data = PEcAn.benchmark::align_data(model.calc = df, obvs.calc = externalData, var =var, align_method = "match_timestep") + colnames(aligned_data) <- c("model","observations","Date") # Order returned by align_data + # Melt dataframe to plot two types of columns together + aligned_data <- reshape2::melt(aligned_data, "Date") data_geom <- switch(input$data_geom, point = geom_point, line = geom_line) - plt <- plt + data_geom(data = externalData,aes(x=dates, y=vals),color='black', linetype = 'dashed') + plt <- ggplot(aligned_data, aes(x=Date, y=value, color=variable)) + data_geom() output$outputNoVariableFound <- renderText({ - paste0("Plotting data outputs in black") + paste0("Plotting data outputs.") }) } # Shiny output if variable not found else { output$outputNoVariableFound <- renderText({ - paste0("Not plotting uploaded data because the column is absent. Select another variable") + paste0("Data related to variable not found in the observations uploaded. Select another variable") }) } } - # Earlier smoothing and y labels + plt <- plt + labs(title=title, x=xlab, y=ylab) + geom_smooth(n=input$smooth_n) + # Earlier code for smoothing, y labels, color and fill values + # Retaining if we want to use ggplot instead of ggplotly # geom_smooth(aes(fill = "Spline fit")) + # scale_y_continuous(labels=fancy_scientific) + - # Earlier color and fill values # scale_color_manual(name = "", values = "black") + - # scale_fill_manual(name = "", values = "grey50") + # scale_fill_manual(name = "", values = "grey50") plt<-ggplotly(plt) # Not able to add icon over ggplotly # add_icon() }) - # Shiny server closes here -}) - +}) # Shiny server closes here +# To run the shiny app locally # runApp(port=6480, launch.browser=FALSE) # runApp(port=5658, launch.browser=FALSE) diff --git a/shiny/workflowPlot/ui.R b/shiny/workflowPlot/ui.R index d9ae275a96b..b2a88379968 100644 --- a/shiny/workflowPlot/ui.R +++ b/shiny/workflowPlot/ui.R @@ -1,13 +1,13 @@ library(shiny) +library(plotly) # Helper allows to load functions and variables that could be shared both by server.R and ui.R -source('helper.R') +# source('helper.R') # Define UI ui <- shinyUI(fluidPage( # Application title titlePanel("Workflow Plots"), sidebarLayout( sidebarPanel( - # helpText(), p("Please select the workflow IDs to continue. You can select multiple IDs"), selectizeInput("all_workflow_id", "Mutliple Workflow IDs", c(),multiple=TRUE), p("Please select the run IDs. You can select multiple IDs"), @@ -24,14 +24,9 @@ ui <- shinyUI(fluidPage( tags$hr(), tags$hr(), selectizeInput("all_site_id", "Select Site ID", c()), + # If loading multiple sites in future + # selectizeInput("all_site_id", "Select Site ID", c(), multiple=TRUE), selectizeInput("all_input_id", "Select Input ID", c()), - # fileInput('fileUploaded', 'Choose file to upload data' - # # accept=c('text/csv', - # # 'text/comma-separated-values,text/plain', - # # '.csv') - # ), - # textInput("inputRecordID", "Input Record ID for file", "1000011260"), - # textInput("formatID", "Format ID for file (Default CSV)", "5000000002"), radioButtons("data_geom", "Plot Type (for loaded data)", c("Scatter Plot" = "point", "Line Chart" = "line"),