Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Allowing sort by year and first author #574

Merged
merged 1 commit into from
Mar 4, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions app/controllers/catalog_controller.rb
Original file line number Diff line number Diff line change
Expand Up @@ -233,10 +233,10 @@ def retry_on_exception
# whether the sort is ascending or descending (it must be asc or desc
# except in the relevancy case). Add the sort: option to configure a
# custom Blacklight url parameter value separate from the Solr sort fields.
config.add_sort_field 'relevance', sort: 'score desc, year_available_itsi desc, title_si asc', label: 'relevance'
config.add_sort_field 'year', sort: 'year_available_itsi desc, title_si asc', label: 'year'
config.add_sort_field 'relevance', sort: 'score desc, issue_date_strict_ssi desc, title_si asc', label: 'relevance'
config.add_sort_field 'year', sort: 'issue_date_strict_ssi desc, title_si asc', label: 'year'
config.add_sort_field 'author', sort: 'author_si asc, title_si asc', label: 'author'
config.add_sort_field 'title', sort: 'title_si asc, year_available_itsi desc', label: 'title'
config.add_sort_field 'title', sort: 'title_si asc, issue_date_strict_ssi desc', label: 'title'

# If there are more than this many search results, no spelling ("did you
# mean") suggestion is offered.
Expand Down
6 changes: 5 additions & 1 deletion app/lib/date_normalizer.rb
Original file line number Diff line number Diff line change
Expand Up @@ -49,11 +49,15 @@ def self.year_from_date(date_string)
Date.strptime(date_string).strftime('%Y').to_i
elsif date_string.match?(/\d{4}-\d{2}/)
Date.strptime(date_string, '%Y-%m').strftime('%Y').to_i
else
elsif date_string.match?(/^\d{4}/) && date_string.size == 4
date_string.to_i
else
time = Time.zone.parse(date_string)
time.year
end
rescue ArgumentError
# bad formatted date
Rails.logger.warn("Error parsing date #{date_string}")
nil
end
end
2 changes: 1 addition & 1 deletion lib/traject/dataspace_research_data_config.rb
Original file line number Diff line number Diff line change
Expand Up @@ -114,7 +114,7 @@
# single value is used for sorting
to_field 'author_si' do |record, accumulator, _c|
values = record.xpath("/item/metadata/key[text()='dc.contributor.author']/../value").map(&:text)
accumulator.concat [values.uniq.sort.first]
accumulator.concat [values.first]
end

# all values as strings for faceting
Expand Down
20 changes: 19 additions & 1 deletion lib/traject/pdc_describe_indexing_config.rb
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,7 @@
# single value is used for sorting
to_field 'author_si' do |record, accumulator, _c|
author_names = record.xpath("/hash/resource/creators/creator/value").map(&:text)
accumulator.concat [author_names.uniq.sort.first]
accumulator.concat [author_names.first]
end

# all values as strings for faceting
Expand Down Expand Up @@ -146,6 +146,24 @@

to_field 'pdc_created_at_dtsi', extract_xpath('/hash/created-at')

to_field "issue_date_strict_ssi" do |record, accumulator, _context|
migrated = record.xpath("/hash/resource/migrated/text()").to_s
date = if migrated == "true"
pub_year = record.xpath("/hash/resource/publication-year/text()").to_s
"#{pub_year}-01-01"
else
date_value = record.xpath("/hash/created-at/text()").to_s
begin
DateTime.parse(date_value).strftime('%Y-%m-%d')
rescue
nil
end
end
if date
accumulator.concat [date]
end
end

to_field 'pdc_updated_at_dtsi', extract_xpath('/hash/updated-at')

# ==================
Expand Down
30 changes: 12 additions & 18 deletions spec/fixtures/files/pdc_describe_data/89.json
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@
"resource_type": "Dataset",
"resource_type_general": "Dataset",
"publisher": "Princeton Plasma Physics Laboratory, Princeton University",
"publication_year": "2022",
"publication_year": "2023",
"ark": "ark:/88435/dsp01wh246w38h",
"doi": "10.34770/bm4s-t361",
"rights_many": [
Expand Down Expand Up @@ -150,13 +150,7 @@
"errors": []
}
],
"keywords": [
"HHFW",
"3D RF modeling",
"Petra-M",
"fast wave",
"NSTX-U"
],
"keywords": ["HHFW", "3D RF modeling", "Petra-M", "fast wave", "NSTX-U"],
"contributors": [],
"organizational_contributors": [],
"funders": [
Expand All @@ -167,15 +161,12 @@
"award_uri": ""
}
],
"domains": [
"Natural Sciences"
],
"communities": [
"Princeton Plasma Physics Laboratory"
],
"domains": ["Natural Sciences"],
"communities": ["Princeton Plasma Physics Laboratory"],
"subcommunities": [
"NSTX-U",
"Spherical Torus"
"Spherical Torus",
"Tokamak Experimental Sciences"
],
"migrated": true
},
Expand Down Expand Up @@ -321,9 +312,12 @@
],
"group": {
"title": "Princeton Plasma Physics Lab (PPPL)",
"description": null,
"description": "",
"code": "PPPL",
"created_at": "2022-04-28T16:30:00.195-04:00",
"updated_at": "2023-05-18T14:20:45.272-04:00"
}
"updated_at": "2024-02-16T09:06:20.108-05:00"
},
"embargo_date": null,
"created_at": "2023-08-18T13:47:14Z",
"updated_at": "2024-02-23T11:05:11Z"
}
58 changes: 34 additions & 24 deletions spec/fixtures/files/sowing_the_seeds.json
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,18 @@
"title_type": null
}
],
"description": "In 2017, seven members of the Archive-It Mid-Atlantic Users Group (AITMA) conducted a study of 14 subjects representative of their stakeholder populations to assess the usability of Archive-It, a web archiving subscription service of the Internet Archive. While Archive-It is the most widely-used tool for web archiving, little is known about how users interact with the service.This study intended to teach us what users expect from web archives, which exist as another form of archival material. End-user subjects executed four search tasks using the public Archive-It interface and the Wayback Machine to access archived information on websites from the facilitators’ own harvested collections and provide feedback about their experiences. The tasks were designed to have straightforward pass or fail outcomes,\r\n and the facilitators took notes on the subjects’ behavior and commentary during the sessions.Overall, participants reported mildly positive impressions of Archive-It public user interface based on their session. The study identified several key areas of improvement for the Archive-It service pertaining to metadata options, terminology display, indexing of dates, and the site’s search box.\r\n-\r\nDownload the README.txt for a detailed description of this dataset's content.",
"description": "In 2017, seven members of the Archive-It Mid-Atlantic Users Group (AITMA) conducted a study of 14 subjects representative of their stakeholder populations to assess the usability of Archive-It, a web archiving subscription service of the Internet Archive. While Archive-It is the most widely-used tool for web archiving, little is known about how users interact with the service. This study intended to teach us what users expect from web archives, which exist as another form of archival material. End-user subjects executed four search tasks using the public Archive-It interface and the Wayback Machine to access archived information on websites from the facilitators' own harvested collections and provide feedback about their experiences. The tasks were designed to have straightforward pass or fail outcomes,\r\nand the facilitators took notes on the subjects' behavior and commentary during the sessions. Overall, participants reported mildly positive impressions of Archive-It public user interface based on their session. The study identified several key areas of improvement for the Archive-It service pertaining to metadata options, terminology display, indexing of dates, and the site's search box.\r\n\r\nDownload the README.txt for a detailed description of this dataset's content.",
"collection_tags": [],
"creators": [
{
"value": "Abrams, Samantha",
"name_type": "Personal",
"given_name": "Samantha",
"family_name": "Abrams",
"identifier": null,
"affiliations": [],
"sequence": 0
},
{
"value": "Antracoli, Alexis",
"name_type": "Personal",
Expand Down Expand Up @@ -62,40 +71,41 @@
"identifier": null,
"affiliations": [],
"sequence": 6
},
{
"value": "Abrams, Samantha",
"name_type": "Personal",
"given_name": "Samantha",
"family_name": "Abrams",
"identifier": null,
"affiliations": [],
"sequence": 7
}
],
"resource_type": "Dataset",
"resource_type_general": "",
"resource_type_general": "Dataset",
"publisher": "Princeton University",
"publication_year": "2023",
"ark": null,
"doi": "10.34770/doc-1",
"rights": {
"identifier": "GPLv3",
"uri": "https://www.gnu.org/licenses/gpl-3.0.en.html",
"name": "GNU General Public License"
},
"ark": "ark:/88435/dsp01d791sj97j",
"doi": "10.34770/00yp-2w12",
"rights_many": [
{
"identifier": "CC BY",
"uri": "https://creativecommons.org/licenses/by/4.0/",
"name": "Creative Commons Attribution 4.0 International"
}
],
"version_number": "1",
"related_objects": [],
"keywords": [],
"contributors": [],
"funders": []
"organizational_contributors": [],
"funders": [],
"domains": [],
"communities": [],
"subcommunities": [],
"migrated": true
},
"files": [],
"group": {
"title": "Research Data",
"description": null,
"title": "Princeton Research Data Service (PRDS)",
"description": "",
"code": "RD",
"created_at": "2023-01-05T11:26:07.005-05:00",
"updated_at": "2023-01-05T11:26:07.005-05:00"
}
"created_at": "2022-04-28T16:30:00.190-04:00",
"updated_at": "2024-01-23T11:29:40.724-05:00"
},
"embargo_date": null,
"created_at": "2023-07-11T11:06:10Z",
"updated_at": "2023-09-13T08:23:50Z"
}
2 changes: 2 additions & 0 deletions spec/lib/date_normalizer_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
let(:years) { ['2015'] }
let(:months_and_years) { ['2015-08'] }
let(:timestamps) { ['2015-08-18T18:14:22Z'] }
let(:month_year_name) { ['August 2020'] }

describe "#format_array_for_display" do
it "formats four digit years" do
Expand All @@ -27,6 +28,7 @@
expect(described_class.years_from_dates(timestamps)).to eq [2015]
expect(described_class.years_from_dates(months_and_years)).to eq [2015]
expect(described_class.years_from_dates(years)).to eq [2015]
expect(described_class.years_from_dates(month_year_name)).to eq [2020]
end

it "handles bad dates" do
Expand Down
37 changes: 37 additions & 0 deletions spec/lib/describe_indexer_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,10 @@
expect(indexed_record["pdc_updated_at_dtsi"]).to eq "2021-12-31T20:00:00Z"
end

it "issue_date_strict_ssi" do
expect(indexed_record["issue_date_strict_ssi"]).to eq "2021-01-01"
end

it "publisher_ssim" do
expect(indexed_record["publisher_ssim"].first).to eq "Princeton University"
end
Expand Down Expand Up @@ -189,6 +193,39 @@
expect(response["response"]["numFound"]).to eq 2
end

it "can sort by issue_date_strict_ssi" do
Rails.configuration.pdc_discovery.index_pdc_describe = true
indexer.index
response = Blacklight.default_index.connection.get 'select', params: { q: '*:*', sort: 'issue_date_strict_ssi desc' }
expect(response["response"]["numFound"]).to eq 2
expect(response["response"]['docs'].first['pdc_created_at_dtsi']).to eq("2023-07-11T11:06:10Z")
expect(response["response"]['docs'].last['pdc_created_at_dtsi']).to eq("2021-12-31T19:00:00Z")
expect(response["response"]['docs'].first["issue_date_strict_ssi"]).to eq "2023-01-01"
expect(response["response"]['docs'].last["issue_date_strict_ssi"]).to eq "2021-01-01"
end

context "works with multiple creators" do
let(:pppl1) { File.read(File.join(fixture_path, 'files', 'pppl1.json')) }
let(:pppl2) { File.read(File.join(fixture_path, 'files', 'pppl2.json')) }
before do
stub_request(:get, "https://pdc-describe-prod.princeton.edu/describe/works/6.json")
.to_return(status: 200, body: pppl1, headers: {})
stub_request(:get, "https://pdc-describe-prod.princeton.edu/describe/works/20.json")
.to_return(status: 200, body: pppl2, headers: {})
end

it "can sort by the first author" do
Rails.configuration.pdc_discovery.index_pdc_describe = true
indexer.index
response = Blacklight.default_index.connection.get 'select', params: { q: '*:*', sort: 'author_si desc' }
expect(response["response"]["numFound"]).to eq 2
expect(response["response"]['docs'].first['author_tesim'].first).to eq("Wang, Yin")
expect(response["response"]['docs'].last['author_tesim'].first).to eq("Schwartz, Jacob A.")
expect(response["response"]['docs'].first["issue_date_strict_ssi"]).to eq "2021-12-31"
expect(response["response"]['docs'].last["issue_date_strict_ssi"]).to eq "2022-01-01"
end
end

context "when there are items which are under active embargo" do
let(:item_file_fixture) { file_fixture("pdc_describe_active_embargo.json") }
let(:embargo_resource) { item_file_fixture.read }
Expand Down