Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fielded search #152

Merged
merged 11 commits into from
Feb 11, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 4 additions & 10 deletions app/controllers/catalog_controller.rb
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ class CatalogController < ApplicationController
# config.per_page = [10,20,50,100]

# solr field configuration for search results/index views
config.index.title_field = 'title_tsim'
config.index.title_field = 'title_tesim'
# config.index.display_type_field = 'format'
# config.index.thumbnail_field = 'thumbnail_path_ss'

Expand All @@ -51,7 +51,6 @@ class CatalogController < ApplicationController
config.add_nav_action(:search_history, partial: 'blacklight/nav/search_history')

# solr field configuration for document/show views
# config.show.title_field = 'title_tsim'
# config.show.display_type_field = 'format'
# config.show.thumbnail_field = 'thumbnail_path_ss'

Expand Down Expand Up @@ -102,7 +101,6 @@ class CatalogController < ApplicationController
# solr fields to be displayed in the index (search results) view
# The ordering of the field names is the order of the display
config.add_index_field 'author_tesim', label: 'Author(s)'
config.add_index_field 'author_vern_ssim', label: 'Author(s)'
config.add_index_field 'format', label: 'Format'
config.add_index_field 'abstract_tsim', label: 'Abstract'
config.add_index_field 'published_ssim', label: 'Published'
Expand All @@ -112,11 +110,7 @@ class CatalogController < ApplicationController

# solr fields to be displayed in the show (single result) view
# The ordering of the field names is the order of the display
config.add_show_field 'title_vern_ssim', label: 'Title'
config.add_show_field 'subtitle_tsim', label: 'Subtitle'
config.add_show_field 'subtitle_vern_ssim', label: 'Subtitle'
config.add_show_field 'author_tesim', label: 'Author'
config.add_show_field 'author_vern_ssim', label: 'Author'
config.add_show_field 'format', label: 'Format'
config.add_show_field 'url_fulltext_ssim', label: 'URL'
config.add_show_field 'url_suppl_ssim', label: 'More Information'
Expand Down Expand Up @@ -191,10 +185,10 @@ class CatalogController < ApplicationController
# whether the sort is ascending or descending (it must be asc or desc
# except in the relevancy case). Add the sort: option to configure a
# custom Blacklight url parameter value separate from the Solr sort fields.
config.add_sort_field 'relevance', sort: 'score desc, pub_date_si desc, title_si asc', label: 'relevance'
config.add_sort_field 'year-desc', sort: 'pub_date_si desc, title_si asc', label: 'year'
config.add_sort_field 'relevance', sort: 'score desc, year_available_itsi desc, title_si asc', label: 'relevance'
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

👍

config.add_sort_field 'year', sort: 'year_available_itsi desc, title_si asc', label: 'year'
config.add_sort_field 'author', sort: 'author_si asc, title_si asc', label: 'author'
config.add_sort_field 'title_si asc, pub_date_si desc', label: 'title'
config.add_sort_field 'title', sort: 'title_si asc, year_available_itsi desc', label: 'title'

# If there are more than this many search results, no spelling ("did you
# mean") suggestion is offered.
Expand Down
6 changes: 3 additions & 3 deletions app/models/solr_document.rb
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ class SolrDocument
end

field_semantics.merge!(
title: 'title_tsim',
title: 'title_tesim',
contributor: 'author_tesim',
format: 'genre_ssim',
date: 'issue_date_ssim'
Expand All @@ -36,7 +36,7 @@ class SolrDocument
DESCRIPTION_FIELD = 'description_tsim'
ISSUED_DATE_FIELD = 'issue_date_ssim'
METHODS_FIELD = 'methods_tsim'
TITLE_FIELD = 'title_tsim'
TITLE_FIELD = 'title_tesim'

# These icons map to CSS classes in Bootstrap
ICONS = {
Expand Down Expand Up @@ -293,7 +293,7 @@ def subject_other
end

def alternative_title
fetch("alternative_title_ssim", [])
fetch("alternative_title_tesim", [])
end

def genres
Expand Down
67 changes: 49 additions & 18 deletions lib/traject/dataspace_research_data_config.rb
Original file line number Diff line number Diff line change
Expand Up @@ -24,24 +24,46 @@

to_field 'abstract_tsim', extract_xpath("/item/metadata/key[text()='dc.description.abstract']/../value")
to_field 'abstract_tsim', extract_xpath("/item/metadata/key[text()='dcterms.abstract']/../value")
to_field 'author_tesim', extract_xpath("/item/metadata/key[text()='dc.contributor.author']/../value")
to_field 'creator_tesim', extract_xpath("/item/metadata/key[text()='dcterms.creator']/../value")
to_field 'contributor_tsim', extract_xpath("/item/metadata/key[text()='dc.contributor']/../value")
to_field 'contributor_tsim', extract_xpath("/item/metadata/key[text()='dcterms.contributor']/../value")
to_field 'description_tsim', extract_xpath("/item/metadata/key[text()='dc.description']/../value")
to_field 'description_tsim', extract_xpath("/item/metadata/key[text()='dcterms.description']/../value")
to_field 'handle_ssim', extract_xpath('/item/handle')
to_field 'id', extract_xpath('/item/id')
to_field 'title_ssim', extract_xpath('/item/name')
to_field 'title_tsim', extract_xpath('/item/name')
to_field 'title_ssim', extract_xpath("/item/metadata/key[text()='dcterms.title']/../value")
to_field 'title_tsim', extract_xpath("/item/metadata/key[text()='dcterms.title']/../value")
to_field 'uri_tesim', extract_xpath("/item/metadata/key[text()='dc.identifier.uri']/../value")

to_field 'collection_id_ssi', extract_xpath('/item/parentCollection/id')
to_field 'handle_ssi', extract_xpath('/item/handle')

# ==================
# author fields

to_field 'author_tesim', extract_xpath("/item/metadata/key[text()='dc.contributor.author']/../value")

to_field 'author_si' do |record, accumulator, _c|
values = record.xpath("/item/metadata/key[text()='dc.contributor.author']/../value").map(&:text)
accumulator.concat [values.uniq.sort.first]
end

# ==================
# title fields

to_field 'title_tesim', extract_xpath('/item/name')
to_field 'title_tesim', extract_xpath("/item/metadata/key[text()='dcterms.title']/../value")

to_field 'title_si' do |record, accumulator, _c|
values = []
values += record.xpath('/item/name').map(&:text)
values += record.xpath("/item/metadata/key[text()='dcterms.title']/../value").map(&:text)
accumulator.concat [values.uniq.first]
end

to_field 'alternative_title_tesim', extract_xpath("/item/metadata/key[text()='dc.title.alternative']/../value")
to_field 'alternative_title_tesim', extract_xpath("/item/metadata/key[text()='dcterms.alternative']/../value")

# ==================
# Calculate domain from the communities

to_field 'domain_ssi' do |record, accumulator, _context|
communities = record.xpath("/item/parentCommunityList/type[text()='community']/../name").map(&:text)
domains = Domain.from_communities(communities)
Expand Down Expand Up @@ -271,23 +293,32 @@
to_field 'subject_other_tesim', extract_xpath("/item/metadata/key[text()='dc.subject.other']/../value")

# subject_all_ssim is used for faceting (must be string)
to_field 'subject_all_ssim', extract_xpath("/item/metadata/key[text()='dc.subject']/../value")
to_field 'subject_all_ssim', extract_xpath("/item/metadata/key[text()='dcterms.subject']/../value")
to_field 'subject_all_ssim', extract_xpath("/item/metadata/key[text()='dc.subject.classification']/../value")
to_field 'subject_all_ssim', extract_xpath("/item/metadata/key[text()='dc.subject.ddc']/../value")
to_field 'subject_all_ssim', extract_xpath("/item/metadata/key[text()='dc.subject.lcc']/../value")
to_field 'subject_all_ssim', extract_xpath("/item/metadata/key[text()='dc.subject.lcsh']/../value")
to_field 'subject_all_ssim', extract_xpath("/item/metadata/key[text()='dc.subject.mesh']/../value")
to_field 'subject_all_ssim', extract_xpath("/item/metadata/key[text()='dc.subject.other']/../value")
# subject_all_tesim is used for searching (use text english)
to_field ['subject_all_ssim', 'subject_all_tesim'] do |record, accumulator, _context|
xpaths = []
xpaths << "/item/metadata/key[text()='dc.subject']/../value"
xpaths << "/item/metadata/key[text()='dcterms.subject']/../value"
xpaths << "/item/metadata/key[text()='dc.subject.classification']/../value"
xpaths << "/item/metadata/key[text()='dc.subject.ddc']/../value"
xpaths << "/item/metadata/key[text()='dc.subject.lcc']/../value"
xpaths << "/item/metadata/key[text()='dc.subject.lcsh']/../value"
xpaths << "/item/metadata/key[text()='dc.subject.mesh']/../value"
xpaths << "/item/metadata/key[text()='dc.subject.other']/../value"

values = []
xpaths.each do |xpath|
values += record.xpath(xpath).map(&:text)
end

accumulator.concat values.uniq
end

# ==================
# genre, provenance, peer review, alternative title fields
# genre, provenance, peer review fields
to_field 'genre_ssim', extract_xpath("/item/metadata/key[text()='dc.type']/../value")
to_field 'genre_ssim', extract_xpath("/item/metadata/key[text()='dcterms.type']/../value")
to_field 'provenance_ssim', extract_xpath("/item/metadata/key[text()='dc.provenance']/../value")
to_field 'peer_review_status_ssim', extract_xpath("/item/metadata/key[text()='dc.description.version']/../value")
to_field 'alternative_title_ssim', extract_xpath("/item/metadata/key[text()='dc.title.alternative']/../value")
to_field 'alternative_title_ssim', extract_xpath("/item/metadata/key[text()='dcterms.alternative']/../value")

# ==================
# contributor fields
Expand Down Expand Up @@ -333,7 +364,7 @@
end

# Indexes the entire text in a catch-all field.
to_field 'all_text_timv' do |record, accumulator, _context|
to_field 'all_text_teimv' do |record, accumulator, _context|
all_text = record.xpath("//text()").map(&:to_s).join(" ")
accumulator.concat [all_text]
end
8 changes: 4 additions & 4 deletions solr/conf/schema.xml
Original file line number Diff line number Diff line change
Expand Up @@ -194,7 +194,7 @@
<dynamicField name="*suggest" type="textSuggest" indexed="true" stored="false" multiValued="true" />

<!-- We populate this field manually via Traject (rather than via copyFields) -->
<field name="all_text_timv" type="text" stored="false" indexed="true" multiValued="true" termVectors="true" termPositions="true" termOffsets="true"/>
<field name="all_text_teimv" type="text_en" stored="false" indexed="true" multiValued="true" termVectors="true" termPositions="true" termOffsets="true"/>
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

👍


</fields>

Expand All @@ -215,9 +215,9 @@
<copyField source="*_ssim" dest="spell"/>
<copyField source="*_si" dest="spell"/>

<copyField source="author_tsim" dest="author_spell"/>
<copyField source="subject_ssim" dest="subject_spell"/>
<copyField source="title_tsim" dest="title_spell"/>
<copyField source="author_tesim" dest="author_spell"/>
<copyField source="subject_all_tesim" dest="subject_spell"/>
<copyField source="title_tesim" dest="title_spell"/>

<types>
<fieldType name="string" class="solr.StrField" sortMissingLast="true" />
Expand Down
25 changes: 10 additions & 15 deletions solr/conf/solrconfig.xml
Original file line number Diff line number Diff line change
Expand Up @@ -43,34 +43,29 @@
-->
<str name="qf">
id
full_title_tsim
short_title_tsim
alternative_title_tsim
active_fedora_model_ssi
title_tsim
author_tsim
subject_tsim
all_text_timv
title_tesim
alternative_title_tesim
author_tesim
subject_all_tesim
all_text_teimv
</str>
<str name="pf">
all_text_timv^10
all_text_teimv^10
</str>

<str name="author_qf">
author_tsim
author_tesim
</str>
<str name="author_pf">
</str>
<str name="title_qf">
title_tsim
full_title_tsim
short_title_tsim
alternative_title_tsim
title_tesim
alternative_title_tesim
</str>
<str name="title_pf">
</str>
<str name="subject_qf">
subject_tsim
subject_all_tesim
</str>
<str name="subject_pf">
</str>
Expand Down
Loading