From c70eed871b30401b040ad4d2c32d1635816609f9 Mon Sep 17 00:00:00 2001 From: Syphax Bouazzouni Date: Thu, 11 Aug 2022 16:28:13 +0200 Subject: [PATCH 01/10] extract do_ontology_pull function --- lib/ncbo_cron/ontology_pull.rb | 55 ++++++++++++++++++++++++++++++++++ 1 file changed, 55 insertions(+) diff --git a/lib/ncbo_cron/ontology_pull.rb b/lib/ncbo_cron/ontology_pull.rb index ac6da70e..732cb892 100644 --- a/lib/ncbo_cron/ontology_pull.rb +++ b/lib/ncbo_cron/ontology_pull.rb @@ -93,6 +93,61 @@ def do_remote_ontology_pull(options = {}) new_submissions end + def do_ontology_pull(ontology_acronym, enable_pull_umls: false, umls_download_url: '' , isLong: false, logger:nil) + ont = LinkedData::Models::Ontology.find(ontology_acronym).include(:acronym).first + raise StandardError, "Ontology #{ontology_acronym} not found" if ont.nil? + + last = ont.latest_submission(status: :any) + raise StandardError, "No submission found for #{ontology_acronym}" if last.nil? + + last.bring(:hasOntologyLanguage) if last.bring?(:hasOntologyLanguage) + if !enable_pull_umls && last.hasOntologyLanguage.umls? + raise StandardError, "Pull umls not enabled" + end + + last.bring(:pullLocation) if last.bring?(:pullLocation) + raise StandardError, "#{ontology_acronym} has no pullLocation" if last.pullLocation.nil? + + last.bring(:uploadFilePath) if last.bring?(:uploadFilePath) + + if last.hasOntologyLanguage.umls? && umls_download_url + last.pullLocation= RDF::URI.new(umls_download_url + last.pullLocation.split("/")[-1]) + logger.info("Using alternative download for umls #{last.pullLocation.to_s}") + logger.flush + end + + if last.remote_file_exists?(last.pullLocation.to_s) + logger.info "Checking download for #{ont.acronym}" + logger.info "Location: #{last.pullLocation.to_s}"; logger.flush + file, filename = last.download_ontology_file() + file = File.open(file.path, "rb") + remote_contents = file.read + md5remote = Digest::MD5.hexdigest(remote_contents) + + if last.uploadFilePath && File.exist?(last.uploadFilePath) + file_contents = open(last.uploadFilePath) { |f| f.read } + md5local = Digest::MD5.hexdigest(file_contents) + new_file_exists = (not md5remote.eql?(md5local)) + else + # There is no existing file, so let's create a submission with the downloaded one + new_file_exists = true + end + + if new_file_exists + logger.info "New file found for #{ont.acronym}\nold: #{md5local}\nnew: #{md5remote}" + logger.flush() + new_submissions << create_submission(ont, last, file, filename, logger) + else + logger.info "There is no new file found for #{ont.acronym}" + logger.flush() + end + + file.close + else + raise RemoteFileException + end + end + def create_submission(ont, sub, file, filename, logger=nil, add_to_pull=true,new_version=nil,new_released=nil) logger ||= Kernel.const_defined?("LOGGER") ? Kernel.const_get("LOGGER") : Logger.new(STDOUT) From d47dc9b0154f1bafe720bdea22ee530d91fbad1a Mon Sep 17 00:00:00 2001 From: Syphax Bouazzouni Date: Thu, 11 Aug 2022 16:28:50 +0200 Subject: [PATCH 02/10] some simple code refactor in the ontology_pull --- lib/ncbo_cron/ontology_pull.rb | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/lib/ncbo_cron/ontology_pull.rb b/lib/ncbo_cron/ontology_pull.rb index 732cb892..b9515db8 100644 --- a/lib/ncbo_cron/ontology_pull.rb +++ b/lib/ncbo_cron/ontology_pull.rb @@ -38,7 +38,7 @@ def do_remote_ontology_pull(options = {}) next if last.pullLocation.nil? last.bring(:uploadFilePath) if last.bring?(:uploadFilePath) - if last.hasOntologyLanguage.umls? && umls_download_url + if last.hasOntologyLanguage.umls? && !umls_download_url.empty? last.pullLocation= RDF::URI.new(umls_download_url + last.pullLocation.split("/")[-1]) logger.info("Using alternative download for umls #{last.pullLocation.to_s}") logger.flush @@ -66,7 +66,6 @@ def do_remote_ontology_pull(options = {}) logger.flush() new_submissions << create_submission(ont, last, file, filename, logger) end - file.close else begin From 42dd776f17940e96b18efc1329c9b11ca20397c8 Mon Sep 17 00:00:00 2001 From: Syphax Bouazzouni Date: Thu, 11 Aug 2022 16:33:41 +0200 Subject: [PATCH 03/10] simple code refactor of test_ontology_pull --- test/test_ontology_pull.rb | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/test/test_ontology_pull.rb b/test/test_ontology_pull.rb index 57fa9f47..74923677 100644 --- a/test/test_ontology_pull.rb +++ b/test/test_ontology_pull.rb @@ -41,14 +41,14 @@ def self.after_suite @@redis.del NcboCron::Models::OntologySubmissionParser::QUEUE_HOLDER end - def test_remote_ontology_pull() + def test_remote_ontology_pull ontologies = init_ontologies(1) ont = LinkedData::Models::Ontology.find(ontologies[0].id).first ont.bring(:submissions) if ont.bring?(:submissions) assert_equal 1, ont.submissions.length pull = NcboCron::Models::OntologyPull.new - pull.do_remote_ontology_pull() + pull.do_remote_ontology_pull # check that the pull creates a new submission when the file has changed ont = LinkedData::Models::Ontology.find(ontologies[0].id).first @@ -72,7 +72,7 @@ def test_remote_ontology_pull() ont = LinkedData::Models::Ontology.find(ontologies[0].id).first ont.bring(:submissions) if ont.bring?(:submissions) assert_equal 2, ont.submissions.length - pull.do_remote_ontology_pull() + pull.do_remote_ontology_pull assert_equal 2, ont.submissions.length end @@ -172,7 +172,7 @@ def init_ontologies(submission_count) sub.pullLocation = RDF::IRI.new(@@url) sub.save() rescue binding.pry end - return ontologies + ontologies end end From ffd6c8a7a790fb4a514d29422f9a4b72bd433ae6 Mon Sep 17 00:00:00 2001 From: Syphax Bouazzouni Date: Thu, 11 Aug 2022 16:34:21 +0200 Subject: [PATCH 04/10] add a script to do a ontology pull on an ontology on demand --- bin/ncbo_ontology_pull | 46 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 46 insertions(+) create mode 100755 bin/ncbo_ontology_pull diff --git a/bin/ncbo_ontology_pull b/bin/ncbo_ontology_pull new file mode 100755 index 00000000..0af734be --- /dev/null +++ b/bin/ncbo_ontology_pull @@ -0,0 +1,46 @@ +#!/usr/bin/env ruby + +$0 = "ncbo_cron" + +# Exit cleanly from an early interrupt +Signal.trap("INT") { exit 1 } + +# Setup the bundled gems in our environment +require 'bundler/setup' +# redis store for looking up queued jobs +require 'redis' + +require_relative '../lib/ncbo_cron' +require_relative '../config/config' +require 'optparse' + +ontology_acronym = '' +opt_parser = OptionParser.new do |opts| + opts.on('-o', '--ontology ACRONYM', 'Ontology acronym to pull if new version exist') do |acronym| + ontology_acronym = acronym + end + + # Display the help screen, all programs are assumed to have this option. + opts.on( '-h', '--help', 'Display this screen') do + puts opts + exit + end +end +opt_parser.parse! + +logger = Logger.new($stdout) +logger.info "Starting ncbo pull"; logger.flush +puller = NcboCron::Models::OntologyPull.new +begin + puller.do_ontology_pull(ontology_acronym, logger: logger , enable_pull_umls:true ) +rescue NcboCron::Models::OntologyPull::RemoteFileException => e + logger.error "RemoteFileException: No submission file at pull location #{last.pullLocation.to_s} for ontology #{ont.acronym}." + logger.flush +rescue StandardError => e + e.backtrace + logger.error e.message + logger.flush +end +logger.info "Finished ncbo pull"; logger.flush + + From 857ef76706eea431e07b01d3549e64f1c3d65f9f Mon Sep 17 00:00:00 2001 From: Syphax Bouazzouni Date: Thu, 11 Aug 2022 16:40:05 +0200 Subject: [PATCH 05/10] set the name of the new script in $0 --- bin/ncbo_ontology_pull | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bin/ncbo_ontology_pull b/bin/ncbo_ontology_pull index 0af734be..131ef543 100755 --- a/bin/ncbo_ontology_pull +++ b/bin/ncbo_ontology_pull @@ -1,6 +1,6 @@ #!/usr/bin/env ruby -$0 = "ncbo_cron" +$0 = "ncbo_ontology_pull" # Exit cleanly from an early interrupt Signal.trap("INT") { exit 1 } From 96cd18524974b20541c5cf24e4e5c2062ca4c301 Mon Sep 17 00:00:00 2001 From: Syphax Bouazzouni Date: Thu, 11 Aug 2022 19:14:04 +0200 Subject: [PATCH 06/10] extract new_file_exists? method from do_ontology_pull --- lib/ncbo_cron/ontology_pull.rb | 46 +++++++++++++++++++++------------- 1 file changed, 28 insertions(+), 18 deletions(-) diff --git a/lib/ncbo_cron/ontology_pull.rb b/lib/ncbo_cron/ontology_pull.rb index b9515db8..2f8da64e 100644 --- a/lib/ncbo_cron/ontology_pull.rb +++ b/lib/ncbo_cron/ontology_pull.rb @@ -92,9 +92,10 @@ def do_remote_ontology_pull(options = {}) new_submissions end - def do_ontology_pull(ontology_acronym, enable_pull_umls: false, umls_download_url: '' , isLong: false, logger:nil) - ont = LinkedData::Models::Ontology.find(ontology_acronym).include(:acronym).first - raise StandardError, "Ontology #{ontology_acronym} not found" if ont.nil? + def do_ontology_pull(ontology_acronym, enable_pull_umls: false, umls_download_url: '', logger: nil) + ont = LinkedData::Models::Ontology.find(ontology_acronym).include(:acronym).first + new_submission = nil + raise StandardError, "Ontology #{ontology_acronym} not found" if ont.nil? last = ont.latest_submission(status: :any) raise StandardError, "No submission found for #{ontology_acronym}" if last.nil? @@ -118,32 +119,22 @@ def do_ontology_pull(ontology_acronym, enable_pull_umls: false, umls_download_ur if last.remote_file_exists?(last.pullLocation.to_s) logger.info "Checking download for #{ont.acronym}" logger.info "Location: #{last.pullLocation.to_s}"; logger.flush - file, filename = last.download_ontology_file() - file = File.open(file.path, "rb") - remote_contents = file.read - md5remote = Digest::MD5.hexdigest(remote_contents) - - if last.uploadFilePath && File.exist?(last.uploadFilePath) - file_contents = open(last.uploadFilePath) { |f| f.read } - md5local = Digest::MD5.hexdigest(file_contents) - new_file_exists = (not md5remote.eql?(md5local)) - else - # There is no existing file, so let's create a submission with the downloaded one - new_file_exists = true - end + file, filename = last.download_ontology_file + file, md5local, md5remote, new_file_exists = new_file_exists?(file, last) if new_file_exists logger.info "New file found for #{ont.acronym}\nold: #{md5local}\nnew: #{md5remote}" logger.flush() - new_submissions << create_submission(ont, last, file, filename, logger) + new_submission = create_submission(ont, last, file, filename, logger) else logger.info "There is no new file found for #{ont.acronym}" logger.flush() end file.close + new_submission else - raise RemoteFileException + raise RemoteFileException.new(last) end end @@ -212,6 +203,25 @@ def create_submission(ont, sub, file, filename, logger=nil, new_sub end + + private + + def new_file_exists?(file, last) + file = File.open(file.path, "rb") + remote_contents = file.read + md5remote = Digest::MD5.hexdigest(remote_contents) + + if last.uploadFilePath && File.exist?(last.uploadFilePath) + file_contents = open(last.uploadFilePath) { |f| f.read } + md5local = Digest::MD5.hexdigest(file_contents) + new_file_exists = (not md5remote.eql?(md5local)) + else + # There is no existing file, so let's create a submission with the downloaded one + new_file_exists = true + end + return file, md5local, md5remote, new_file_exists + end + def redis_goo Redis.new(host: LinkedData.settings.goo_redis_host, port: LinkedData.settings.goo_redis_port, timeout: 30) end From ee5ca71480d412aad1f570337faca9e4590d11dd Mon Sep 17 00:00:00 2001 From: Syphax Bouazzouni Date: Thu, 11 Aug 2022 19:23:15 +0200 Subject: [PATCH 07/10] save the submission in the RemoteFileException --- lib/ncbo_cron/ontology_pull.rb | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/lib/ncbo_cron/ontology_pull.rb b/lib/ncbo_cron/ontology_pull.rb index 2f8da64e..c072b2ca 100644 --- a/lib/ncbo_cron/ontology_pull.rb +++ b/lib/ncbo_cron/ontology_pull.rb @@ -8,11 +8,15 @@ module Models class OntologyPull class RemoteFileException < StandardError - end + attr_reader :submission - def initialize() + def initialize(submission) + super + @submission = submission + end end + def do_remote_ontology_pull(options = {}) logger = options[:logger] || Logger.new($stdout) logger.info "UMLS auto-pull #{options[:enable_pull_umls] == true}" From 1627331fe3bddc2169a7c9c487c2d11f5a2ba5da Mon Sep 17 00:00:00 2001 From: Syphax Bouazzouni Date: Thu, 11 Aug 2022 19:25:55 +0200 Subject: [PATCH 08/10] some automatic code refactor/lint --- lib/ncbo_cron/ontology_pull.rb | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/lib/ncbo_cron/ontology_pull.rb b/lib/ncbo_cron/ontology_pull.rb index c072b2ca..3afa3a17 100644 --- a/lib/ncbo_cron/ontology_pull.rb +++ b/lib/ncbo_cron/ontology_pull.rb @@ -27,7 +27,7 @@ def do_remote_ontology_pull(options = {}) ontologies.select! { |ont| ont_to_include.include?(ont.acronym) } unless ont_to_include.empty? enable_pull_umls = options[:enable_pull_umls] umls_download_url = options[:pull_umls_url] - ontologies.sort! {|a, b| a.acronym.downcase <=> b.acronym.downcase} + ontologies.sort! { |a, b| a.acronym.downcase <=> b.acronym.downcase } new_submissions = [] ontologies.each do |ont| @@ -115,7 +115,7 @@ def do_ontology_pull(ontology_acronym, enable_pull_umls: false, umls_download_ur last.bring(:uploadFilePath) if last.bring?(:uploadFilePath) if last.hasOntologyLanguage.umls? && umls_download_url - last.pullLocation= RDF::URI.new(umls_download_url + last.pullLocation.split("/")[-1]) + last.pullLocation = RDF::URI.new(umls_download_url + last.pullLocation.split("/")[-1]) logger.info("Using alternative download for umls #{last.pullLocation.to_s}") logger.flush end @@ -142,8 +142,8 @@ def do_ontology_pull(ontology_acronym, enable_pull_umls: false, umls_download_ur end end - def create_submission(ont, sub, file, filename, logger=nil, - add_to_pull=true,new_version=nil,new_released=nil) + def create_submission(ont, sub, file, filename, logger = nil, + add_to_pull = true, new_version = nil, new_released = nil) logger ||= Kernel.const_defined?("LOGGER") ? Kernel.const_get("LOGGER") : Logger.new(STDOUT) new_sub = LinkedData::Models::OntologySubmission.new @@ -172,9 +172,9 @@ def create_submission(ont, sub, file, filename, logger=nil, # check if OWLAPI is able to parse the file before creating a new submission owlapi = LinkedData::Parser::OWLAPICommand.new( - full_file_path, - File.expand_path(new_sub.data_folder.to_s), - logger: logger) + full_file_path, + File.expand_path(new_sub.data_folder.to_s), + logger: logger) owlapi.disable_reasoner parsable = true @@ -193,7 +193,7 @@ def create_submission(ont, sub, file, filename, logger=nil, if add_to_pull submission_queue = NcboCron::Models::OntologySubmissionParser.new - submission_queue.queue_submission(new_sub, {all: true}) + submission_queue.queue_submission(new_sub, { all: true }) logger.info("OntologyPull created a new submission (#{submission_id}) for ontology #{ont.acronym}") end else From fe303438f6ffc8a8db6417950aa8a13fecae947f Mon Sep 17 00:00:00 2001 From: Syphax Bouazzouni Date: Thu, 11 Aug 2022 19:27:15 +0200 Subject: [PATCH 09/10] use the new do_ontology_pull in the old do_remote_ontology_pull --- lib/ncbo_cron/ontology_pull.rb | 62 +++++++--------------------------- 1 file changed, 12 insertions(+), 50 deletions(-) diff --git a/lib/ncbo_cron/ontology_pull.rb b/lib/ncbo_cron/ontology_pull.rb index 3afa3a17..7aa9fc23 100644 --- a/lib/ncbo_cron/ontology_pull.rb +++ b/lib/ncbo_cron/ontology_pull.rb @@ -32,59 +32,21 @@ def do_remote_ontology_pull(options = {}) ontologies.each do |ont| begin - last = ont.latest_submission(status: :any) - next if last.nil? - last.bring(:hasOntologyLanguage) if last.bring?(:hasOntologyLanguage) - if !enable_pull_umls && last.hasOntologyLanguage.umls? - next - end - last.bring(:pullLocation) if last.bring?(:pullLocation) - next if last.pullLocation.nil? - last.bring(:uploadFilePath) if last.bring?(:uploadFilePath) - - if last.hasOntologyLanguage.umls? && !umls_download_url.empty? - last.pullLocation= RDF::URI.new(umls_download_url + last.pullLocation.split("/")[-1]) - logger.info("Using alternative download for umls #{last.pullLocation.to_s}") + begin + new_submissions << self.do_ontology_pull(ont.acronym, + enable_pull_umls: enable_pull_umls, + umls_download_url: umls_download_url, + logger: logger) + rescue RemoteFileException => error + logger.info "RemoteFileException: No submission file at pull location #{error.submission.pullLocation.to_s} for ontology #{ont.acronym}." logger.flush + LinkedData::Utils::Notifications.remote_ontology_pull(error.submission) end - - if last.remote_file_exists?(last.pullLocation.to_s) - logger.info "Checking download for #{ont.acronym}" - logger.info "Location: #{last.pullLocation.to_s}"; logger.flush - file, filename = last.download_ontology_file() - file = File.open(file.path, "rb") - remote_contents = file.read - md5remote = Digest::MD5.hexdigest(remote_contents) - - if last.uploadFilePath && File.exist?(last.uploadFilePath) - file_contents = open(last.uploadFilePath) { |f| f.read } - md5local = Digest::MD5.hexdigest(file_contents) - new_file_exists = (not md5remote.eql?(md5local)) - else - # There is no existing file, so let's create a submission with the downloaded one - new_file_exists = true - end - - if new_file_exists - logger.info "New file found for #{ont.acronym}\nold: #{md5local}\nnew: #{md5remote}" - logger.flush() - new_submissions << create_submission(ont, last, file, filename, logger) - end - file.close - else - begin - raise RemoteFileException - rescue RemoteFileException - logger.info "RemoteFileException: No submission file at pull location #{last.pullLocation.to_s} for ontology #{ont.acronym}." - logger.flush - LinkedData::Utils::Notifications.remote_ontology_pull(last) - end - end - rescue Exception => e - logger.error "Problem retrieving #{ont.acronym} in OntologyPull:\n" + e.message + "\n" + e.backtrace.join("\n\t") - logger.flush() - next end + rescue Exception => e + logger.error "Problem retrieving #{ont.acronym} in OntologyPull:\n" + e.message + "\n" + e.backtrace.join("\n\t") + logger.flush() + next end if options[:cache_clear] == true From 8535b30cd3347e56d9b28255f4806f604ff668db Mon Sep 17 00:00:00 2001 From: Syphax Bouazzouni Date: Mon, 5 Dec 2022 10:03:06 +0100 Subject: [PATCH 10/10] remove forgot variables --- bin/ncbo_ontology_pull | 4 ---- 1 file changed, 4 deletions(-) diff --git a/bin/ncbo_ontology_pull b/bin/ncbo_ontology_pull index 131ef543..a017e4d7 100755 --- a/bin/ncbo_ontology_pull +++ b/bin/ncbo_ontology_pull @@ -33,11 +33,7 @@ logger.info "Starting ncbo pull"; logger.flush puller = NcboCron::Models::OntologyPull.new begin puller.do_ontology_pull(ontology_acronym, logger: logger , enable_pull_umls:true ) -rescue NcboCron::Models::OntologyPull::RemoteFileException => e - logger.error "RemoteFileException: No submission file at pull location #{last.pullLocation.to_s} for ontology #{ont.acronym}." - logger.flush rescue StandardError => e - e.backtrace logger.error e.message logger.flush end