Skip to content

Commit

Permalink
Merge pull request #174 from ontoportal-lirmm/pl/enhance-zipped-submi…
Browse files Browse the repository at this point in the history
…ssions-support

Fix: Enhance zipped submissions support - follow up
  • Loading branch information
alexskr authored Sep 11, 2023
2 parents e33a0e4 + 299a01c commit 711ebf2
Show file tree
Hide file tree
Showing 3 changed files with 64 additions and 13 deletions.
33 changes: 20 additions & 13 deletions lib/ontologies_linked_data/utils/file.rb
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
require 'net/http'
require 'uri'
require 'zip'
require 'zlib'
require 'tmpdir'

module LinkedData
Expand All @@ -20,7 +21,7 @@ def self.gzip?(file_path)
raise ArgumentError, "File path #{file_path} not found" unless File.exist? file_path

file_type = `file --mime -b #{Shellwords.escape(file_path)}`
file_type.split(';')[0] == 'application/x-gzip'
file_type.split(';')[0] == 'application/gzip' || file_type.split(';')[0] == 'application/x-gzip'
end

def self.files_from_zip(file_path)
Expand All @@ -30,15 +31,22 @@ def self.files_from_zip(file_path)
end

files = []
Zip::File.open(file_path) do |zipfile|
zipfile.each do |file|
if not file.directory?
if not file.name.split('/')[-1].start_with? '.' #a hidden file in __MACOSX or .DS_Store
if gzip?(file_path)
Zlib::GzipReader.open(file_path) do |file|
files << file.orig_name unless File.directory?(file) || file.orig_name.split('/')[-1].start_with?('.') # a hidden file in __MACOSX or .DS_Store
end
elsif zip?(file_path)
Zip::File.open(file_path) do |zip_files|
zip_files.each do |file|
unless file.directory? || file.name.split('/')[-1].start_with?('.') # a hidden file in __MACOSX or .DS_Store
files << file.name
end
end
end
else
raise StandardError, "Unsupported file format: #{File.extname(file_path)}"
end

return files
end

Expand All @@ -52,21 +60,20 @@ def self.unzip(file_path, dst_folder)
if gzip?(file_path)
Zlib::GzipReader.open(file_path) do |gz|
File.open([dst_folder, gz.orig_name].join('/'), "w") { |file| file.puts(gz.read) }
extracted_files << GzipFile.new(gz)
extracted_files << gz
end
else
elsif zip?(file_path)
Zip::File.open(file_path) do |zipfile|
zipfile.each do |file|
if file.name.split('/').length > 1
sub_folder = File.join(dst_folder,
file.name.split('/')[0..-2].join('/'))
unless Dir.exist?(sub_folder)
FileUtils.mkdir_p sub_folder
end
sub_folder = File.join(dst_folder, file.name.split('/')[0..-2].join('/'))
FileUtils.mkdir_p sub_folder unless Dir.exist?(sub_folder)
end
extracted_files << file.extract(File.join(dst_folder,file.name))
extracted_files << file.extract(File.join(dst_folder, file.name))
end
end
else
raise StandardError, "Unsupported file format: #{File.extname(file_path)}"
end
extracted_files
end
Expand Down
Binary file added test/data/ontology_files/BRO_v3.2.owl.gz
Binary file not shown.
44 changes: 44 additions & 0 deletions test/models/test_ontology_submission.rb
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,13 @@ def test_automaster_from_zip
assert_equal nil, LinkedData::Utils::FileHelpers.automaster(zipfile, ".obo")
end

def test_is_gzip
gzipfile = "./test/data/ontology_files/BRO_v3.2.owl.gz"
zipfile = "./test/data/ontology_files/evoc_v2.9.zip"
assert LinkedData::Utils::FileHelpers.gzip?(gzipfile)
refute LinkedData::Utils::FileHelpers.gzip?(zipfile)
end

def test_duplicated_file_names

acronym = "DUPTEST"
Expand Down Expand Up @@ -501,6 +508,43 @@ def test_submission_parse_zip
puts "#{ctr} classes with no label"
end

def test_submission_parse_gzip
skip if ENV["BP_SKIP_HEAVY_TESTS"] == "1"

acronym = "BROGZ"
name = "BRO GZIPPED"
ontologyFile = "./test/data/ontology_files/BRO_v3.2.owl.gz"
id = 11

LinkedData::TestCase.backend_4s_delete

ont_submission = LinkedData::Models::OntologySubmission.new({submissionId: id})
refute ont_submission.valid?
assert_equal 4, ont_submission.errors.length
upload_file_path = LinkedData::Models::OntologySubmission.copy_file_repository(acronym, id, ontologyFile)
ont_submission.uploadFilePath = upload_file_path
owl, bro, user, contact = submission_dependent_objects("OWL", acronym, "test_linked_models", name)
ont_submission.released = DateTime.now - 4
ont_submission.hasOntologyLanguage = owl
ont_submission.prefLabelProperty = RDF::URI.new("http://bioontology.org/projects/ontologies/radlex/radlexOwl#Preferred_name")
ont_submission.ontology = bro
ont_submission.contact = [contact]
assert ont_submission.valid?
ont_submission.save
parse_options = {process_rdf: true, reasoning: true, index_search: false, run_metrics: false, diff: false}
begin
tmp_log = Logger.new(TestLogFile.new)
ont_submission.process_submission(tmp_log, parse_options)
rescue StandardError => e
puts "Error, logged in #{tmp_log.instance_variable_get("@logdev").dev.path}"
raise e
end

assert ont_submission.ready?({status: [:uploaded, :rdf, :rdf_labels]})
read_only_classes = LinkedData::Models::Class.in(ont_submission).include(:prefLabel).read_only
refute read_only_classes.empty?
end

def test_download_ontology_file
begin
server_port = Random.rand(55000..65535) # http://en.wikipedia.org/wiki/List_of_TCP_and_UDP_port_numbers#Dynamic.2C_private_or_ephemeral_ports
Expand Down

0 comments on commit 711ebf2

Please sign in to comment.