Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

refactor: performance logs are now captured directly instead of through console logs #776

Open
wants to merge 1 commit into
base: develop
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
61 changes: 44 additions & 17 deletions R/Achilles.R
Original file line number Diff line number Diff line change
Expand Up @@ -151,6 +151,8 @@ achilles <- function(connectionDetails,
totalStart <- Sys.time()
achillesSql <- c()

performanceTable <- data.frame(analysis_id = integer(), elapsed_seconds = numeric(), start_time = numeric(), end_time = numeric ())

# Check if the correct parameters are supplied when running in sqlOnly mode
if (sqlOnly && missing(connectionDetails) && is.null(sqlDialect)) {
stop("Error: When specifying sqlOnly = TRUE, sqlDialect or connectionDetails must be supplied.")
Expand Down Expand Up @@ -325,11 +327,18 @@ achilles <- function(connectionDetails,
resultsDatabaseSchema = resultsDatabaseSchema
)

sqlPerformanceTable <- SqlRender::loadRenderTranslateSql(
sqlFilename = "achilles_performance_ddl.sql",
packageName = "Achilles",
dbms = connectionDetails$dbms,
resultsDatabaseSchema = resultsDatabaseSchema
)

# Populate achilles_analysis without the "distribution" and "distributed_field"
# columns from achilles_analysis_details.csv
analysisDetailsCsv <- Achilles::getAnalysisDetails()
analysisDetailsCsv <- analysisDetailsCsv[,-c(2, 3)]

if (!sqlOnly) {
# Create empty achilles_analysis
DatabaseConnector::executeSql(
Expand All @@ -342,6 +351,16 @@ achilles <- function(connectionDetails,
progressBar = F,
reportOverallTime = F
)
DatabaseConnector::executeSql(
connection = connection,
sql = sqlPerformanceTable,
errorReportFile = file.path(
outputFolder,
"achillesErrorCreateAchillesPerformance.txt"
),
progressBar = F,
reportOverallTime = F
)

# Populate achilles_analysis with data from achilles_analysis_details.csv from above
suppressMessages(
Expand Down Expand Up @@ -381,7 +400,7 @@ achilles <- function(connectionDetails,

# Generate Main Analyses
mainAnalysisIds <- analysisDetails$analysis_id

mainSqls <- lapply(mainAnalysisIds, function(analysisId) {
list(
analysisId = analysisId,
Expand Down Expand Up @@ -435,7 +454,10 @@ achilles <- function(connectionDetails,
progressBar = FALSE,
reportOverallTime = FALSE
)
delta <- Sys.time() - start
endTime <- Sys.time()
delta <- endTime - start
analysisId <- as.integer(mainSql$analysisId)
performanceTable[nrow(performanceTable) + 1,] <- c(analysisId,delta,start,endTime)
ParallelLogger::logInfo(sprintf(
"[Main Analysis] [COMPLETE] %d (%f %s)",
as.integer(mainSql$analysisId),
Expand Down Expand Up @@ -487,6 +509,17 @@ achilles <- function(connectionDetails,

ParallelLogger::stopCluster(cluster = cluster)
}

DatabaseConnector::insertTable(
connection = connection,
databaseSchema = resultsDatabaseSchema,
tableName = "achilles_performance",
data = performanceTable,
dropTableIfExists = FALSE,
createTable = FALSE,
tempTable = FALSE,
progressBar = F
)
}

# Merge scratch tables into final analysis tables
Expand All @@ -511,7 +544,8 @@ achilles <- function(connectionDetails,
smallCellCount = smallCellCount,
outputFolder = outputFolder,
sqlOnly = sqlOnly,
logFile = logFile
logFile = logFile,
performanceTable = performanceTable
)
})

Expand Down Expand Up @@ -1150,7 +1184,8 @@ optimizeAtlasCache <- function(connectionDetails,
smallCellCount,
outputFolder,
sqlOnly,
logFile) {
logFile,
performanceTable) {
castedNames <- apply(resultsTable$schema, 1, function(field) {
SqlRender::render(
"cast(@fieldName as @fieldType) as @fieldName",
Expand Down Expand Up @@ -1307,21 +1342,13 @@ optimizeAtlasCache <- function(connectionDetails,
}

.getAchillesResultBenchmark <- function(analysisId, logs) {
logs <- logs[logs$analysisId == analysisId,]
logs <- logs[logs$analysis_id == analysisId,]
if (nrow(logs) == 1) {
runTime <- strsplit(logs[1,]$runTime, " ")[[1]]
runTimeValue <- round(as.numeric(runTime[1]), 2)
runTimeUnit <- runTime[2]
if (runTimeUnit == "mins") {
runTimeValue <- runTimeValue * 60
} else if (runTimeUnit == "hours") {
runTimeValue <- runTimeValue * 60 * 60
} else if (runTimeUnit == "days") {
runTimeValue <- runTimeValue * 60 * 60 * 24
}
runTime <- logs[1,]$elapsed_seconds
runTimeValue <- round(runTime[1], 2)
runTimeValue
} else {
"ERROR: no runtime found in log file"
"ERROR: no runtime found"
}
}

Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
select aa.analysis_id, aa.analysis_name, aa.category, stratum_1 elapsed_seconds
select ap.analysis_id, aa.analysis_name, aa.category, ap.elapsed_seconds elapsed_seconds
from @results_database_schema.ACHILLES_ANALYSIS aa
join @results_database_schema.ACHILLES_RESULTS ar on aa.analysis_id + 2000000 = ar.analysis_id
join @results_database_schema.ACHILLES_PERFORMANCE ap on ap.analysis_id = aa.analysis_id
union
select aa.analysis_id, aa.analysis_name, aa.category, stratum_1 elapsed_seconds
select ap.analysis_id, aa.analysis_name, aa.category, ap.elapsed_seconds elapsed_seconds
from @results_database_schema.ACHILLES_ANALYSIS aa
join @results_database_schema.ACHILLES_RESULTS_DIST ar on aa.analysis_id + 2000000 = ar.analysis_id
join @results_database_schema.ACHILLES_PERFORMANCE ap on ap.analysis_id = aa.analysis_id
Loading