diff --git a/bin/ncbo_ontology_pull b/bin/ncbo_ontology_pull new file mode 100755 index 00000000..a017e4d7 --- /dev/null +++ b/bin/ncbo_ontology_pull @@ -0,0 +1,42 @@ +#!/usr/bin/env ruby + +$0 = "ncbo_ontology_pull" + +# Exit cleanly from an early interrupt +Signal.trap("INT") { exit 1 } + +# Setup the bundled gems in our environment +require 'bundler/setup' +# redis store for looking up queued jobs +require 'redis' + +require_relative '../lib/ncbo_cron' +require_relative '../config/config' +require 'optparse' + +ontology_acronym = '' +opt_parser = OptionParser.new do |opts| + opts.on('-o', '--ontology ACRONYM', 'Ontology acronym to pull if new version exist') do |acronym| + ontology_acronym = acronym + end + + # Display the help screen, all programs are assumed to have this option. + opts.on( '-h', '--help', 'Display this screen') do + puts opts + exit + end +end +opt_parser.parse! + +logger = Logger.new($stdout) +logger.info "Starting ncbo pull"; logger.flush +puller = NcboCron::Models::OntologyPull.new +begin + puller.do_ontology_pull(ontology_acronym, logger: logger , enable_pull_umls:true ) +rescue StandardError => e + logger.error e.message + logger.flush +end +logger.info "Finished ncbo pull"; logger.flush + + diff --git a/lib/ncbo_cron/ontology_pull.rb b/lib/ncbo_cron/ontology_pull.rb index ac6da70e..7aa9fc23 100644 --- a/lib/ncbo_cron/ontology_pull.rb +++ b/lib/ncbo_cron/ontology_pull.rb @@ -8,11 +8,15 @@ module Models class OntologyPull class RemoteFileException < StandardError - end + attr_reader :submission - def initialize() + def initialize(submission) + super + @submission = submission + end end + def do_remote_ontology_pull(options = {}) logger = options[:logger] || Logger.new($stdout) logger.info "UMLS auto-pull #{options[:enable_pull_umls] == true}" @@ -23,65 +27,26 @@ def do_remote_ontology_pull(options = {}) ontologies.select! { |ont| ont_to_include.include?(ont.acronym) } unless ont_to_include.empty? enable_pull_umls = options[:enable_pull_umls] umls_download_url = options[:pull_umls_url] - ontologies.sort! {|a, b| a.acronym.downcase <=> b.acronym.downcase} + ontologies.sort! { |a, b| a.acronym.downcase <=> b.acronym.downcase } new_submissions = [] ontologies.each do |ont| begin - last = ont.latest_submission(status: :any) - next if last.nil? - last.bring(:hasOntologyLanguage) if last.bring?(:hasOntologyLanguage) - if !enable_pull_umls && last.hasOntologyLanguage.umls? - next - end - last.bring(:pullLocation) if last.bring?(:pullLocation) - next if last.pullLocation.nil? - last.bring(:uploadFilePath) if last.bring?(:uploadFilePath) - - if last.hasOntologyLanguage.umls? && umls_download_url - last.pullLocation= RDF::URI.new(umls_download_url + last.pullLocation.split("/")[-1]) - logger.info("Using alternative download for umls #{last.pullLocation.to_s}") + begin + new_submissions << self.do_ontology_pull(ont.acronym, + enable_pull_umls: enable_pull_umls, + umls_download_url: umls_download_url, + logger: logger) + rescue RemoteFileException => error + logger.info "RemoteFileException: No submission file at pull location #{error.submission.pullLocation.to_s} for ontology #{ont.acronym}." logger.flush + LinkedData::Utils::Notifications.remote_ontology_pull(error.submission) end - - if last.remote_file_exists?(last.pullLocation.to_s) - logger.info "Checking download for #{ont.acronym}" - logger.info "Location: #{last.pullLocation.to_s}"; logger.flush - file, filename = last.download_ontology_file() - file = File.open(file.path, "rb") - remote_contents = file.read - md5remote = Digest::MD5.hexdigest(remote_contents) - - if last.uploadFilePath && File.exist?(last.uploadFilePath) - file_contents = open(last.uploadFilePath) { |f| f.read } - md5local = Digest::MD5.hexdigest(file_contents) - new_file_exists = (not md5remote.eql?(md5local)) - else - # There is no existing file, so let's create a submission with the downloaded one - new_file_exists = true - end - - if new_file_exists - logger.info "New file found for #{ont.acronym}\nold: #{md5local}\nnew: #{md5remote}" - logger.flush() - new_submissions << create_submission(ont, last, file, filename, logger) - end - - file.close - else - begin - raise RemoteFileException - rescue RemoteFileException - logger.info "RemoteFileException: No submission file at pull location #{last.pullLocation.to_s} for ontology #{ont.acronym}." - logger.flush - LinkedData::Utils::Notifications.remote_ontology_pull(last) - end - end - rescue Exception => e - logger.error "Problem retrieving #{ont.acronym} in OntologyPull:\n" + e.message + "\n" + e.backtrace.join("\n\t") - logger.flush() - next end + rescue Exception => e + logger.error "Problem retrieving #{ont.acronym} in OntologyPull:\n" + e.message + "\n" + e.backtrace.join("\n\t") + logger.flush() + next end if options[:cache_clear] == true @@ -93,8 +58,54 @@ def do_remote_ontology_pull(options = {}) new_submissions end - def create_submission(ont, sub, file, filename, logger=nil, - add_to_pull=true,new_version=nil,new_released=nil) + def do_ontology_pull(ontology_acronym, enable_pull_umls: false, umls_download_url: '', logger: nil) + ont = LinkedData::Models::Ontology.find(ontology_acronym).include(:acronym).first + new_submission = nil + raise StandardError, "Ontology #{ontology_acronym} not found" if ont.nil? + + last = ont.latest_submission(status: :any) + raise StandardError, "No submission found for #{ontology_acronym}" if last.nil? + + last.bring(:hasOntologyLanguage) if last.bring?(:hasOntologyLanguage) + if !enable_pull_umls && last.hasOntologyLanguage.umls? + raise StandardError, "Pull umls not enabled" + end + + last.bring(:pullLocation) if last.bring?(:pullLocation) + raise StandardError, "#{ontology_acronym} has no pullLocation" if last.pullLocation.nil? + + last.bring(:uploadFilePath) if last.bring?(:uploadFilePath) + + if last.hasOntologyLanguage.umls? && umls_download_url + last.pullLocation = RDF::URI.new(umls_download_url + last.pullLocation.split("/")[-1]) + logger.info("Using alternative download for umls #{last.pullLocation.to_s}") + logger.flush + end + + if last.remote_file_exists?(last.pullLocation.to_s) + logger.info "Checking download for #{ont.acronym}" + logger.info "Location: #{last.pullLocation.to_s}"; logger.flush + file, filename = last.download_ontology_file + file, md5local, md5remote, new_file_exists = new_file_exists?(file, last) + + if new_file_exists + logger.info "New file found for #{ont.acronym}\nold: #{md5local}\nnew: #{md5remote}" + logger.flush() + new_submission = create_submission(ont, last, file, filename, logger) + else + logger.info "There is no new file found for #{ont.acronym}" + logger.flush() + end + + file.close + new_submission + else + raise RemoteFileException.new(last) + end + end + + def create_submission(ont, sub, file, filename, logger = nil, + add_to_pull = true, new_version = nil, new_released = nil) logger ||= Kernel.const_defined?("LOGGER") ? Kernel.const_get("LOGGER") : Logger.new(STDOUT) new_sub = LinkedData::Models::OntologySubmission.new @@ -123,9 +134,9 @@ def create_submission(ont, sub, file, filename, logger=nil, # check if OWLAPI is able to parse the file before creating a new submission owlapi = LinkedData::Parser::OWLAPICommand.new( - full_file_path, - File.expand_path(new_sub.data_folder.to_s), - logger: logger) + full_file_path, + File.expand_path(new_sub.data_folder.to_s), + logger: logger) owlapi.disable_reasoner parsable = true @@ -144,7 +155,7 @@ def create_submission(ont, sub, file, filename, logger=nil, if add_to_pull submission_queue = NcboCron::Models::OntologySubmissionParser.new - submission_queue.queue_submission(new_sub, {all: true}) + submission_queue.queue_submission(new_sub, { all: true }) logger.info("OntologyPull created a new submission (#{submission_id}) for ontology #{ont.acronym}") end else @@ -158,6 +169,25 @@ def create_submission(ont, sub, file, filename, logger=nil, new_sub end + + private + + def new_file_exists?(file, last) + file = File.open(file.path, "rb") + remote_contents = file.read + md5remote = Digest::MD5.hexdigest(remote_contents) + + if last.uploadFilePath && File.exist?(last.uploadFilePath) + file_contents = open(last.uploadFilePath) { |f| f.read } + md5local = Digest::MD5.hexdigest(file_contents) + new_file_exists = (not md5remote.eql?(md5local)) + else + # There is no existing file, so let's create a submission with the downloaded one + new_file_exists = true + end + return file, md5local, md5remote, new_file_exists + end + def redis_goo Redis.new(host: LinkedData.settings.goo_redis_host, port: LinkedData.settings.goo_redis_port, timeout: 30) end diff --git a/test/test_ontology_pull.rb b/test/test_ontology_pull.rb index 57fa9f47..74923677 100644 --- a/test/test_ontology_pull.rb +++ b/test/test_ontology_pull.rb @@ -41,14 +41,14 @@ def self.after_suite @@redis.del NcboCron::Models::OntologySubmissionParser::QUEUE_HOLDER end - def test_remote_ontology_pull() + def test_remote_ontology_pull ontologies = init_ontologies(1) ont = LinkedData::Models::Ontology.find(ontologies[0].id).first ont.bring(:submissions) if ont.bring?(:submissions) assert_equal 1, ont.submissions.length pull = NcboCron::Models::OntologyPull.new - pull.do_remote_ontology_pull() + pull.do_remote_ontology_pull # check that the pull creates a new submission when the file has changed ont = LinkedData::Models::Ontology.find(ontologies[0].id).first @@ -72,7 +72,7 @@ def test_remote_ontology_pull() ont = LinkedData::Models::Ontology.find(ontologies[0].id).first ont.bring(:submissions) if ont.bring?(:submissions) assert_equal 2, ont.submissions.length - pull.do_remote_ontology_pull() + pull.do_remote_ontology_pull assert_equal 2, ont.submissions.length end @@ -172,7 +172,7 @@ def init_ontologies(submission_count) sub.pullLocation = RDF::IRI.new(@@url) sub.save() rescue binding.pry end - return ontologies + ontologies end end