diff --git a/Gemfile.lock b/Gemfile.lock index f230ace8..599d878b 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -1,6 +1,6 @@ GIT remote: https://github.com/ncbo/goo.git - revision: 15023141f6051d4fa6cba6081d082c720327b0c9 + revision: b8c7867450ec6ea2d3167eb9d9b1aed5614a1ce3 branch: develop specs: goo (0.0.2) @@ -26,7 +26,7 @@ GIT GIT remote: https://github.com/ncbo/ontologies_linked_data.git - revision: 297f630ee5a35a78b015adf32fdb1e3af59ca652 + revision: 79527b94fbb59081ba58281a5cd51ec3448fadb0 branch: develop specs: ontologies_linked_data (0.0.1) @@ -46,7 +46,7 @@ GIT GIT remote: https://github.com/ncbo/sparql-client.git - revision: fb4a89b420f8eb6dda5190a126b6c62e32c4c0c9 + revision: 55e7dbf858eb571c767bc67868f9af61663859cb branch: develop specs: sparql-client (1.0.1) @@ -235,9 +235,7 @@ GEM webrick (1.8.1) PLATFORMS - ruby x86_64-darwin-18 - x86_64-darwin-21 DEPENDENCIES cube-ruby diff --git a/bin/ncbo_ontology_archive_old_submissions b/bin/ncbo_ontology_archive_old_submissions index 3dc5c87c..535c129e 100755 --- a/bin/ncbo_ontology_archive_old_submissions +++ b/bin/ncbo_ontology_archive_old_submissions @@ -11,31 +11,121 @@ require_relative '../lib/ncbo_cron' config_exists = File.exist?(File.expand_path('../../config/config.rb', __FILE__)) abort("Please create a config/config.rb file using the config/config.rb.sample as a template") unless config_exists require_relative '../config/config' +require 'optparse' -logfile = 'archive_old_submissions.log' +options = { delete: false } +opt_parser = OptionParser.new do |opts| + # Set a banner, displayed at the top of the help screen. + opts.banner = "Usage: #{File.basename(__FILE__)} [options]" + + options[:logfile] = STDOUT + opts.on( '-l', '--logfile FILE', "Write log to FILE (default is STDOUT)" ) do |filename| + options[:logfile] = filename + end + + # Delete submission if it contains bad data + opts.on( '-d', '--delete', "Delete submissions that contain bad data" ) do + options[:delete] = true + end + + # Display the help screen, all programs are assumed to have this option. + opts.on( '-h', '--help', 'Display this screen' ) do + puts opts + exit + end +end + +opt_parser.parse! +logfile = options[:logfile] if File.file?(logfile); File.delete(logfile); end logger = Logger.new(logfile) -options = { process_rdf: false, index_search: false, index_commit: false, - run_metrics: false, reasoning: false, archive: true } +process_actions = { process_rdf: false, generate_labels: false, index_search: false, index_commit: false, + process_annotator: false, diff: false, run_metrics: false, archive: true } onts = LinkedData::Models::Ontology.all onts.each { |ont| ont.bring(:acronym, :submissions) } -onts.sort! { |a,b| a.acronym <=> b.acronym } +onts.sort! { |a, b| a.acronym <=> b.acronym } +bad_submissions = {} onts.each do |ont| latest_sub = ont.latest_submission - if not latest_sub.nil? + + unless latest_sub.nil? id = latest_sub.submissionId subs = ont.submissions - old_subs = subs.reject { |sub| sub.submissionId >= id } - old_subs.sort! { |a,b| a.submissionId <=> b.submissionId } + + old_subs = subs.reject { |sub| + begin + sub.submissionId >= id + rescue => e + msg = "Invalid submission ID detected (String instead of Integer): #{ont.acronym}/#{sub.submissionId} - #{e.class}:\n#{e.backtrace.join("\n")}" + puts msg + logger.error(msg) + + if options[:delete] + sub.delete if options[:delete] + msg = "Deleted submission #{ont.acronym}/#{sub.submissionId} due to invalid Submission ID" + puts msg + logger.error(msg) + end + bad_submissions["#{ont.acronym}/#{sub.submissionId}"] = "Invalid Submission ID" + true + end + } + old_subs.sort! { |a, b| a.submissionId <=> b.submissionId } old_subs.each do |sub| - if not sub.archived? + unless sub.archived? msg = "#{ont.acronym}: found un-archived old submission with ID #{sub.submissionId}." puts msg logger.info msg - NcboCron::Models::OntologySubmissionParser.new.process_submission(logger, sub.id.to_s, options) + + begin + NcboCron::Models::OntologySubmissionParser.new.process_submission(logger, sub.id.to_s, process_actions) + rescue => e + if e.class == Goo::Base::NotValidException + if sub.valid? + msg = "Error archiving submission #{ont.acronym}/#{sub.submissionId} - #{e.class}:\n#{e.backtrace.join("\n")}" + puts msg + logger.error(msg) + bad_submissions["#{ont.acronym}/#{sub.submissionId}"] = "Submission passes valid check but cannot be saved" + else + msg = "Error archiving submission #{ont.acronym}/#{sub.submissionId}:\n#{JSON.pretty_generate(sub.errors)}" + puts msg + logger.error(msg) + + if options[:delete] + sub.delete if options[:delete] + msg = "Deleted submission #{ont.acronym}/#{sub.submissionId} due to invalid data" + puts msg + logger.error(msg) + end + bad_submissions["#{ont.acronym}/#{sub.submissionId}"] = "Submission is not valid to be saved" + end + else + msg = "Error archiving submission #{ont.acronym}/#{sub.submissionId} - #{e.class}:\n#{e.backtrace.join("\n")}" + puts msg + logger.error(msg) + + if options[:delete] && (e.class == Net::HTTPBadResponse || e.class == Errno::ECONNREFUSED) + sub.delete + msg = "Deleted submission #{ont.acronym}/#{sub.submissionId} due to a non-working pull URL" + puts msg + logger.error(msg) + end + bad_submissions["#{ont.acronym}/#{sub.submissionId}"] = "#{e.class} - Runtime error" + end + end end end end end +msg = JSON.pretty_generate(bad_submissions) +puts +puts msg +logger.error(msg) + +msg = "Number of errored submissions: #{bad_submissions.length}" +puts msg +logger.error(msg) + + diff --git a/lib/ncbo_cron/ontology_submission_parser.rb b/lib/ncbo_cron/ontology_submission_parser.rb index dfa7b320..a6512312 100644 --- a/lib/ncbo_cron/ontology_submission_parser.rb +++ b/lib/ncbo_cron/ontology_submission_parser.rb @@ -166,7 +166,7 @@ def process_submission(logger, submission_id, actions=ACTIONS) # Check to make sure the file has been downloaded if sub.pullLocation && (!sub.uploadFilePath || !File.exist?(sub.uploadFilePath)) - multi_logger.debug "Pull location found, but no file in the upload file path. Retrying download." + multi_logger.debug "Pull location found (#{sub.pullLocation}, but no file in the upload file path (#{sub.uploadFilePath}. Retrying download." file, filename = sub.download_ontology_file file_location = sub.class.copy_file_repository(sub.ontology.acronym, sub.submissionId, file, filename) file_location = "../" + file_location if file_location.start_with?(".") # relative path fix