Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add a script to pull a new version of an ontology on demand #59

Merged
42 changes: 42 additions & 0 deletions bin/ncbo_ontology_pull
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
#!/usr/bin/env ruby

$0 = "ncbo_ontology_pull"

# Exit cleanly from an early interrupt
Signal.trap("INT") { exit 1 }

# Setup the bundled gems in our environment
require 'bundler/setup'
# redis store for looking up queued jobs
require 'redis'

require_relative '../lib/ncbo_cron'
require_relative '../config/config'
require 'optparse'

ontology_acronym = ''
opt_parser = OptionParser.new do |opts|
opts.on('-o', '--ontology ACRONYM', 'Ontology acronym to pull if new version exist') do |acronym|
ontology_acronym = acronym
end

# Display the help screen, all programs are assumed to have this option.
opts.on( '-h', '--help', 'Display this screen') do
puts opts
exit
end
end
opt_parser.parse!

logger = Logger.new($stdout)
logger.info "Starting ncbo pull"; logger.flush
puller = NcboCron::Models::OntologyPull.new
begin
puller.do_ontology_pull(ontology_acronym, logger: logger , enable_pull_umls:true )
rescue StandardError => e
logger.error e.message
logger.flush
end
logger.info "Finished ncbo pull"; logger.flush


150 changes: 90 additions & 60 deletions lib/ncbo_cron/ontology_pull.rb
Original file line number Diff line number Diff line change
Expand Up @@ -8,11 +8,15 @@ module Models
class OntologyPull

class RemoteFileException < StandardError
end
attr_reader :submission

def initialize()
def initialize(submission)
super
@submission = submission
end
end


def do_remote_ontology_pull(options = {})
logger = options[:logger] || Logger.new($stdout)
logger.info "UMLS auto-pull #{options[:enable_pull_umls] == true}"
Expand All @@ -23,65 +27,26 @@ def do_remote_ontology_pull(options = {})
ontologies.select! { |ont| ont_to_include.include?(ont.acronym) } unless ont_to_include.empty?
enable_pull_umls = options[:enable_pull_umls]
umls_download_url = options[:pull_umls_url]
ontologies.sort! {|a, b| a.acronym.downcase <=> b.acronym.downcase}
ontologies.sort! { |a, b| a.acronym.downcase <=> b.acronym.downcase }
new_submissions = []

ontologies.each do |ont|
begin
last = ont.latest_submission(status: :any)
next if last.nil?
last.bring(:hasOntologyLanguage) if last.bring?(:hasOntologyLanguage)
if !enable_pull_umls && last.hasOntologyLanguage.umls?
next
end
last.bring(:pullLocation) if last.bring?(:pullLocation)
next if last.pullLocation.nil?
last.bring(:uploadFilePath) if last.bring?(:uploadFilePath)

if last.hasOntologyLanguage.umls? && umls_download_url
last.pullLocation= RDF::URI.new(umls_download_url + last.pullLocation.split("/")[-1])
logger.info("Using alternative download for umls #{last.pullLocation.to_s}")
begin
new_submissions << self.do_ontology_pull(ont.acronym,
enable_pull_umls: enable_pull_umls,
umls_download_url: umls_download_url,
logger: logger)
rescue RemoteFileException => error
logger.info "RemoteFileException: No submission file at pull location #{error.submission.pullLocation.to_s} for ontology #{ont.acronym}."
logger.flush
LinkedData::Utils::Notifications.remote_ontology_pull(error.submission)
end

if last.remote_file_exists?(last.pullLocation.to_s)
logger.info "Checking download for #{ont.acronym}"
logger.info "Location: #{last.pullLocation.to_s}"; logger.flush
file, filename = last.download_ontology_file()
file = File.open(file.path, "rb")
remote_contents = file.read
md5remote = Digest::MD5.hexdigest(remote_contents)

if last.uploadFilePath && File.exist?(last.uploadFilePath)
file_contents = open(last.uploadFilePath) { |f| f.read }
md5local = Digest::MD5.hexdigest(file_contents)
new_file_exists = (not md5remote.eql?(md5local))
else
# There is no existing file, so let's create a submission with the downloaded one
new_file_exists = true
end

if new_file_exists
logger.info "New file found for #{ont.acronym}\nold: #{md5local}\nnew: #{md5remote}"
logger.flush()
new_submissions << create_submission(ont, last, file, filename, logger)
end

file.close
else
begin
raise RemoteFileException
rescue RemoteFileException
logger.info "RemoteFileException: No submission file at pull location #{last.pullLocation.to_s} for ontology #{ont.acronym}."
logger.flush
LinkedData::Utils::Notifications.remote_ontology_pull(last)
end
end
rescue Exception => e
logger.error "Problem retrieving #{ont.acronym} in OntologyPull:\n" + e.message + "\n" + e.backtrace.join("\n\t")
logger.flush()
next
end
rescue Exception => e
logger.error "Problem retrieving #{ont.acronym} in OntologyPull:\n" + e.message + "\n" + e.backtrace.join("\n\t")
logger.flush()
next
end

if options[:cache_clear] == true
Expand All @@ -93,8 +58,54 @@ def do_remote_ontology_pull(options = {})
new_submissions
end

def create_submission(ont, sub, file, filename, logger=nil,
add_to_pull=true,new_version=nil,new_released=nil)
def do_ontology_pull(ontology_acronym, enable_pull_umls: false, umls_download_url: '', logger: nil)
ont = LinkedData::Models::Ontology.find(ontology_acronym).include(:acronym).first
new_submission = nil
raise StandardError, "Ontology #{ontology_acronym} not found" if ont.nil?

last = ont.latest_submission(status: :any)
raise StandardError, "No submission found for #{ontology_acronym}" if last.nil?

last.bring(:hasOntologyLanguage) if last.bring?(:hasOntologyLanguage)
if !enable_pull_umls && last.hasOntologyLanguage.umls?
raise StandardError, "Pull umls not enabled"
end

last.bring(:pullLocation) if last.bring?(:pullLocation)
raise StandardError, "#{ontology_acronym} has no pullLocation" if last.pullLocation.nil?

last.bring(:uploadFilePath) if last.bring?(:uploadFilePath)

if last.hasOntologyLanguage.umls? && umls_download_url
last.pullLocation = RDF::URI.new(umls_download_url + last.pullLocation.split("/")[-1])
logger.info("Using alternative download for umls #{last.pullLocation.to_s}")
logger.flush
end

if last.remote_file_exists?(last.pullLocation.to_s)
logger.info "Checking download for #{ont.acronym}"
logger.info "Location: #{last.pullLocation.to_s}"; logger.flush
file, filename = last.download_ontology_file
file, md5local, md5remote, new_file_exists = new_file_exists?(file, last)

if new_file_exists
logger.info "New file found for #{ont.acronym}\nold: #{md5local}\nnew: #{md5remote}"
logger.flush()
new_submission = create_submission(ont, last, file, filename, logger)
else
logger.info "There is no new file found for #{ont.acronym}"
logger.flush()
end

file.close
new_submission
else
raise RemoteFileException.new(last)
end
end

def create_submission(ont, sub, file, filename, logger = nil,
add_to_pull = true, new_version = nil, new_released = nil)
logger ||= Kernel.const_defined?("LOGGER") ? Kernel.const_get("LOGGER") : Logger.new(STDOUT)
new_sub = LinkedData::Models::OntologySubmission.new

Expand Down Expand Up @@ -123,9 +134,9 @@ def create_submission(ont, sub, file, filename, logger=nil,

# check if OWLAPI is able to parse the file before creating a new submission
owlapi = LinkedData::Parser::OWLAPICommand.new(
full_file_path,
File.expand_path(new_sub.data_folder.to_s),
logger: logger)
full_file_path,
File.expand_path(new_sub.data_folder.to_s),
logger: logger)
owlapi.disable_reasoner
parsable = true

Expand All @@ -144,7 +155,7 @@ def create_submission(ont, sub, file, filename, logger=nil,

if add_to_pull
submission_queue = NcboCron::Models::OntologySubmissionParser.new
submission_queue.queue_submission(new_sub, {all: true})
submission_queue.queue_submission(new_sub, { all: true })
logger.info("OntologyPull created a new submission (#{submission_id}) for ontology #{ont.acronym}")
end
else
Expand All @@ -158,6 +169,25 @@ def create_submission(ont, sub, file, filename, logger=nil,
new_sub
end


private

def new_file_exists?(file, last)
file = File.open(file.path, "rb")
remote_contents = file.read
md5remote = Digest::MD5.hexdigest(remote_contents)

if last.uploadFilePath && File.exist?(last.uploadFilePath)
file_contents = open(last.uploadFilePath) { |f| f.read }
md5local = Digest::MD5.hexdigest(file_contents)
new_file_exists = (not md5remote.eql?(md5local))
else
# There is no existing file, so let's create a submission with the downloaded one
new_file_exists = true
end
return file, md5local, md5remote, new_file_exists
end

def redis_goo
Redis.new(host: LinkedData.settings.goo_redis_host, port: LinkedData.settings.goo_redis_port, timeout: 30)
end
Expand Down
8 changes: 4 additions & 4 deletions test/test_ontology_pull.rb
Original file line number Diff line number Diff line change
Expand Up @@ -41,14 +41,14 @@ def self.after_suite
@@redis.del NcboCron::Models::OntologySubmissionParser::QUEUE_HOLDER
end

def test_remote_ontology_pull()
def test_remote_ontology_pull
ontologies = init_ontologies(1)
ont = LinkedData::Models::Ontology.find(ontologies[0].id).first
ont.bring(:submissions) if ont.bring?(:submissions)
assert_equal 1, ont.submissions.length

pull = NcboCron::Models::OntologyPull.new
pull.do_remote_ontology_pull()
pull.do_remote_ontology_pull

# check that the pull creates a new submission when the file has changed
ont = LinkedData::Models::Ontology.find(ontologies[0].id).first
Expand All @@ -72,7 +72,7 @@ def test_remote_ontology_pull()
ont = LinkedData::Models::Ontology.find(ontologies[0].id).first
ont.bring(:submissions) if ont.bring?(:submissions)
assert_equal 2, ont.submissions.length
pull.do_remote_ontology_pull()
pull.do_remote_ontology_pull
assert_equal 2, ont.submissions.length
end

Expand Down Expand Up @@ -172,7 +172,7 @@ def init_ontologies(submission_count)
sub.pullLocation = RDF::IRI.new(@@url)
sub.save() rescue binding.pry
end
return ontologies
ontologies
end

end