Skip to content

Commit

Permalink
implement search ontologies content search endpoint
Browse files Browse the repository at this point in the history
syphax-bouazzouni committed Feb 29, 2024
1 parent e33a97b commit e378e64
Showing 5 changed files with 143 additions and 46 deletions.
4 changes: 2 additions & 2 deletions Gemfile
Original file line number Diff line number Diff line change
@@ -14,7 +14,7 @@ gem 'sinatra', '~> 1.0'
gem 'sinatra-advanced-routes'
gem 'sinatra-contrib', '~> 1.0'
gem 'request_store'

gem 'addressable', '~> 2.8'
# Rack middleware
gem 'ffi'
gem 'rack-accept', '~> 0.4'
@@ -74,5 +74,5 @@ group :test do
gem 'rack-test'
gem 'simplecov', require: false
gem 'simplecov-cobertura' # for codecov.io
gem 'webmock'
gem 'webmock', '~> 3.19.1'
end
63 changes: 42 additions & 21 deletions Gemfile.lock
Original file line number Diff line number Diff line change
@@ -11,13 +11,16 @@ GIT

GIT
remote: https://github.com/ontoportal-lirmm/goo.git
revision: ec99ed8d742fc8d458911cb0e74fa23d31cdd158
revision: 10b90c17af12c71bfc95bfb4fc0bba5e47ff77af
branch: feature/add-model-based-search
specs:
goo (0.0.2)
addressable (~> 2.8)
pry
rdf (= 1.0.8)
rdf (= 3.2.11)
rdf-raptor
rdf-rdfxml
rdf-vocab
redis
rest-client
rsolr
@@ -54,7 +57,7 @@ GIT

GIT
remote: https://github.com/ontoportal-lirmm/ontologies_linked_data.git
revision: 4773e7f90a1afba33f3d84d4b05105826465d8a9
revision: 4e74b918119a246b908634c152b22a3f732d6abd
branch: feature/index-ontology-agent-metadata
specs:
ontologies_linked_data (0.0.1)
@@ -74,7 +77,7 @@ GIT

GIT
remote: https://github.com/ontoportal-lirmm/sparql-client.git
revision: aed51baf4106fd0f3d0e3f9238f0aad9406aa3f0
revision: 180c818f7715baac64b2699bb452ef5c756f62c5
branch: master
specs:
sparql-client (1.0.1)
@@ -109,6 +112,7 @@ GEM
airbrussh (1.5.1)
sshkit (>= 1.6.1, != 1.7.0)
backports (3.24.1)
base64 (0.2.0)
bcrypt (3.1.20)
bcrypt_pbkdf (1.1.0)
bigdecimal (1.4.2)
@@ -171,15 +175,15 @@ GEM
grpc (~> 1.59)
get_process_mem (0.2.7)
ffi (~> 1.0)
google-analytics-data (0.5.0)
google-analytics-data (0.6.0)
google-analytics-data-v1beta (>= 0.11, < 2.a)
google-cloud-core (~> 1.6)
google-analytics-data-v1beta (0.11.2)
google-analytics-data-v1beta (0.12.0)
gapic-common (>= 0.21.1, < 2.a)
google-cloud-errors (~> 1.0)
google-apis-analytics_v3 (0.14.0)
google-apis-core (>= 0.12.0, < 2.a)
google-apis-core (0.13.0)
google-apis-analytics_v3 (0.15.0)
google-apis-core (>= 0.14.0, < 2.a)
google-apis-core (0.14.0)
addressable (~> 2.5, >= 2.5.1)
googleauth (~> 1.9)
httpclient (>= 2.8.1, < 3.a)
@@ -195,11 +199,11 @@ GEM
google-cloud-errors (1.3.1)
google-protobuf (3.25.3-x86_64-darwin)
google-protobuf (3.25.3-x86_64-linux)
googleapis-common-protos (1.4.0)
google-protobuf (~> 3.14)
googleapis-common-protos-types (~> 1.2)
grpc (~> 1.27)
googleapis-common-protos-types (1.12.0)
googleapis-common-protos (1.5.0)
google-protobuf (~> 3.18)
googleapis-common-protos-types (~> 1.7)
grpc (~> 1.41)
googleapis-common-protos-types (1.13.0)
google-protobuf (~> 3.18)
googleauth (1.11.0)
faraday (>= 1.0, < 3.a)
@@ -208,16 +212,17 @@ GEM
multi_json (~> 1.11)
os (>= 0.9, < 2.0)
signet (>= 0.16, < 2.a)
grpc (1.61.0-x86_64-darwin)
grpc (1.62.0-x86_64-darwin)
google-protobuf (~> 3.25)
googleapis-common-protos-types (~> 1.0)
grpc (1.61.0-x86_64-linux)
grpc (1.62.0-x86_64-linux)
google-protobuf (~> 3.25)
googleapis-common-protos-types (~> 1.0)
haml (5.2.2)
temple (>= 0.8.0)
tilt
hashdiff (1.1.0)
htmlentities (4.3.4)
http-accept (1.7.0)
http-cookie (1.0.5)
domain_name (~> 0.5)
@@ -228,9 +233,11 @@ GEM
json-schema (2.8.1)
addressable (>= 2.4)
json_pure (2.7.1)
jwt (2.7.1)
jwt (2.8.0)
base64
kgio (2.11.4)
libxml-ruby (5.0.2)
link_header (0.0.8)
logger (1.6.0)
macaddr (1.7.2)
systemu (~> 2.6.5)
@@ -297,8 +304,21 @@ GEM
rack-timeout (0.6.3)
raindrops (0.20.1)
rake (10.5.0)
rdf (1.0.8)
addressable (>= 2.2)
rdf (3.2.11)
link_header (~> 0.0, >= 0.0.8)
rdf-raptor (3.2.0)
ffi (~> 1.15)
rdf (~> 3.2)
rdf-rdfxml (3.2.2)
builder (~> 3.2)
htmlentities (~> 4.3)
rdf (~> 3.2)
rdf-xsd (~> 3.2)
rdf-vocab (3.2.7)
rdf (~> 3.2, >= 3.2.4)
rdf-xsd (3.2.1)
rdf (~> 3.2)
rexml (~> 3.2)
redcarpet (3.6.0)
redis (4.8.1)
redis-activesupport (5.3.0)
@@ -378,7 +398,7 @@ GEM
unicorn (>= 4, < 7)
uuid (2.3.9)
macaddr (~> 1.0)
webmock (3.20.0)
webmock (3.19.1)
addressable (>= 2.8.0)
crack (>= 0.3.2)
hashdiff (>= 0.4.0, < 2.0.0)
@@ -389,6 +409,7 @@ PLATFORMS

DEPENDENCIES
activesupport (~> 3.2)
addressable (~> 2.8)
bcrypt_pbkdf (>= 1.0, < 2.0)
bigdecimal (= 1.4.2)
capistrano (~> 3)
@@ -438,7 +459,7 @@ DEPENDENCIES
sparql-client!
unicorn
unicorn-worker-killer
webmock
webmock (~> 3.19.1)

BUNDLED WITH
2.4.21
62 changes: 41 additions & 21 deletions controllers/search_controller.rb
Original file line number Diff line number Diff line change
@@ -31,18 +31,18 @@ class SearchController < ApplicationController
'resource_model:"ontology_submission"',
'submissionStatus_txt:ERROR_* OR submissionStatus_txt:"RDF" OR submissionStatus_txt:"UPLOADED"',
"ontology_viewingRestriction_t:#{visibility}",
groups.map{|x| "ontology_group_txt:\"http://data.bioontology.org/groups/#{x.upcase}\""}.join(' OR '),
categories.map{|x| "ontology_hasDomain_txt:\"http://data.bioontology.org/categories/#{x.upcase}\""}.join(' OR '),
languages.map{|x| "naturalLanguage_txt:\"#{x.downcase}\""}.join(' OR '),
groups.map { |x| "ontology_group_txt:\"http://data.bioontology.org/groups/#{x.upcase}\"" }.join(' OR '),
categories.map { |x| "ontology_hasDomain_txt:\"http://data.bioontology.org/categories/#{x.upcase}\"" }.join(' OR '),
languages.map { |x| "naturalLanguage_txt:\"#{x.downcase}\"" }.join(' OR '),
]

fq << "!ontology_viewOf_t:*" unless show_views

fq << format.map{|x| "hasOntologyLanguage_t:\"http://data.bioontology.org/ontology_formats/#{x}\""}.join(' OR ') unless format.blank?
fq << format.map { |x| "hasOntologyLanguage_t:\"http://data.bioontology.org/ontology_formats/#{x}\"" }.join(' OR ') unless format.blank?

fq << status.map{|x| "status_t:#{x}"}.join(' OR ') unless status.blank?
fq << is_of_type.map{|x| "isOfType_t:#{x}"}.join(' OR ') unless is_of_type.blank?
fq << has_format.map{|x| "hasFormalityLevel_t:#{x}"}.join(' OR ') unless has_format.blank?
fq << status.map { |x| "status_t:#{x}" }.join(' OR ') unless status.blank?
fq << is_of_type.map { |x| "isOfType_t:#{x}" }.join(' OR ') unless is_of_type.blank?
fq << has_format.map { |x| "hasFormalityLevel_t:#{x}" }.join(' OR ') unless has_format.blank?

fq.reject!(&:blank?)

@@ -63,8 +63,7 @@ class SearchController < ApplicationController
page_size: page_size,
sort: sort
})

#resp = Ontology.search(query, search_params)

total_found = page_data.aggregate
ontology_rank = LinkedData::Models::Ontology.rank
docs = {}
@@ -77,25 +76,48 @@ class SearchController < ApplicationController
old_id = old_resource_id.split('/').last.to_i rescue 0

if acronym.blank? || old_id && id && (id <= old_id)
total_found-= 1
total_found -= 1
next
end

docs.delete(old_resource_id)
acronyms_ids[acronym] = resource_id

doc["ontology_rank"] = ontology_rank.dig(doc["ontology_acronym_text"], :normalizedScore) || 0.0
docs[resource_id] = doc
docs[resource_id] = doc
end

docs = docs.values

docs.sort! {|a, b| [b["score"], b["ontology_rank"]] <=> [a["score"], a["ontology_rank"]]} unless params[:sort].present?
docs.sort! { |a, b| [b["score"], b["ontology_rank"]] <=> [a["score"], a["ontology_rank"]] } unless params[:sort].present?

page = page_object(docs, total_found)

reply 200, page
end

get '/content' do
query = params[:query] || params[:q]
page, page_size = page_params
ontologies = params.fetch("ontologies", "").split(',')
qf = params.fetch("qf", "")

fq = []

fq << ontologies.map { |x| "ontology_t:\"#{x}\"" }.join(' OR ') unless ontologies.blank?


conn = SOLR::SolrConnector.new(Goo.search_conf, :ontology_data)

resp = conn.search(query, fq: fq, qf: qf,
page: page, page_size: page_size)

total_found = resp["response"]["numFound"]
docs = resp["response"]["docs"]


reply 200,page_object(docs, total_found)
end
end

namespace "/agents" do
@@ -104,7 +126,7 @@ class SearchController < ApplicationController
page, page_size = page_params
type = params[:agentType].blank? ? nil : params[:agentType]

fq = "agentType_t:#{type}" if type
fq = "agentType_t:#{type}" if type

qf = [
"acronymSuggestEdge^25 nameSuggestEdge^15 emailSuggestEdge^15 identifiersSuggestEdge^10 ", # start of the word first
@@ -118,7 +140,6 @@ class SearchController < ApplicationController
sort = "score desc, acronym_sort asc, name_sort asc"
end


reply 200, search(LinkedData::Models::Agent,
query,
fq: fq, qf: qf,
@@ -132,15 +153,15 @@ class SearchController < ApplicationController
def search(model, query, params = {})
query = query.blank? ? "*" : query

resp = model.search(query, search_params(params))
resp = model.search(query, search_params(params))

total_found = resp["response"]["numFound"]
docs = resp["response"]["docs"]

page_object(docs, total_found)
end

def search_params(defType: "edismax", fq: , qf: , stopwords: "true", lowercaseOperators: "true", page: , page_size: , fl: '*,score', sort: )
def search_params(defType: "edismax", fq:, qf:, stopwords: "true", lowercaseOperators: "true", page:, page_size:, fl: '*,score', sort:)
{
defType: defType,
fq: fq,
@@ -154,8 +175,7 @@ def search_params(defType: "edismax", fq: , qf: , stopwords: "true", lowercaseOp
}
end


def process_search(params=nil)
def process_search(params = nil)
params ||= @params
text = params["q"]

@@ -191,13 +211,13 @@ def process_search(params=nil)

unless params['sort']
if !text.nil? && text[-1] == '*'
docs.sort! {|a, b| [b[:score], a[:prefLabelExact].downcase, b[:ontology_rank]] <=> [a[:score], b[:prefLabelExact].downcase, a[:ontology_rank]]}
docs.sort! { |a, b| [b[:score], a[:prefLabelExact].downcase, b[:ontology_rank]] <=> [a[:score], b[:prefLabelExact].downcase, a[:ontology_rank]] }
else
docs.sort! {|a, b| [b[:score], b[:ontology_rank]] <=> [a[:score], a[:ontology_rank]]}
docs.sort! { |a, b| [b[:score], b[:ontology_rank]] <=> [a[:score], a[:ontology_rank]] }
end
end

#need to return a Page object
# need to return a Page object
page = page_object(docs, total_found)

reply 200, page
2 changes: 1 addition & 1 deletion docker-compose.yml
Original file line number Diff line number Diff line change
@@ -87,7 +87,7 @@ services:
# volumes:
#- solr_data:/var/solr/data
agraph-ut:
image: franzinc/agraph:v8.0.0.rc1
image: franzinc/agraph:v8.1.0
platform: linux/amd64
environment:
- AGRAPH_SUPER_USER=test
58 changes: 57 additions & 1 deletion test/controllers/test_search_models_controller.rb
Original file line number Diff line number Diff line change
@@ -18,7 +18,7 @@ def test_show_all_collection
get '/admin/search/collections'
assert last_response.ok?
res = MultiJson.load(last_response.body)
assert_equal res["collections"], Goo.search_connections.keys.map(&:to_s)
assert_equal res["collections"].sort, Goo.search_connections.keys.map(&:to_s).sort
end

def test_collection_schema
@@ -341,4 +341,60 @@ def test_agents_search
agents = MultiJson.load(last_response.body)
assert_equal agent_org.id.to_s, agents["collection"].first["id"]
end

def test_search_data
count, acronyms, bro = LinkedData::SampleData::Ontology.create_ontologies_and_submissions({
process_submission: true,
process_options: { process_rdf: true, extract_metadata: false, generate_missing_labels: false},
acronym: "BROSEARCHTEST",
name: "BRO Search Test",
file_path: "./test/data/ontology_files/BRO_v3.2.owl",
ont_count: 1,
submission_count: 1,
ontology_type: "VALUE_SET_COLLECTION"
})

count, acronyms, mccl = LinkedData::SampleData::Ontology.create_ontologies_and_submissions({
process_submission: true,
process_options: { process_rdf: true, extract_metadata: false, generate_missing_labels: false},
acronym: "MCCLSEARCHTEST",
name: "MCCL Search Test",
file_path: "./test/data/ontology_files/CellLine_OWL_BioPortal_v1.0.owl",
ont_count: 1,
submission_count: 1
})


subs = LinkedData::Models::OntologySubmission.all
count = []
subs.each do |s|
s.bring_remaining
s.index_all_data(Logger.new($stdout))
count << Goo.sparql_query_client.query("SELECT (COUNT( DISTINCT ?id) as ?c) FROM <#{s.id}> WHERE {?id ?p ?v}")
.first[:c]
.to_i
end

get "/search/ontologies/content?q=*"
assert last_response.ok?
res = MultiJson.load(last_response.body)
assert_equal count.sum, res['totalCount']


get "/search/ontologies/content?q=*&ontologies=MCCLSEARCHTEST-0,BROSEARCHTEST-0"
assert last_response.ok?
res = MultiJson.load(last_response.body)
assert_equal count.sum, res['totalCount']

get "/search/ontologies/content?q=*&ontologies=BROSEARCHTEST-0"
assert last_response.ok?
res = MultiJson.load(last_response.body)
assert_includes count, res['totalCount']

get "/search/ontologies/content?q=*&ontologies=MCCLSEARCHTEST-0"
assert last_response.ok?
res = MultiJson.load(last_response.body)
assert_includes count, res['totalCount']

end
end

0 comments on commit e378e64

Please sign in to comment.